xref: /qemu/target/arm/tcg/crypto_helper.c (revision 50f57e09fda4b7ffbc5ba62aad6cebf660824023)
19d935509SArd Biesheuvel /*
29d935509SArd Biesheuvel  * crypto_helper.c - emulate v8 Crypto Extensions instructions
39d935509SArd Biesheuvel  *
490b827d1SArd Biesheuvel  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
59d935509SArd Biesheuvel  *
69d935509SArd Biesheuvel  * This library is free software; you can redistribute it and/or
79d935509SArd Biesheuvel  * modify it under the terms of the GNU Lesser General Public
89d935509SArd Biesheuvel  * License as published by the Free Software Foundation; either
9*50f57e09SChetan Pant  * version 2.1 of the License, or (at your option) any later version.
109d935509SArd Biesheuvel  */
119d935509SArd Biesheuvel 
1274c21bd0SPeter Maydell #include "qemu/osdep.h"
139d935509SArd Biesheuvel 
149d935509SArd Biesheuvel #include "cpu.h"
152ef6175aSRichard Henderson #include "exec/helper-proto.h"
16a04b68e1SRichard Henderson #include "tcg/tcg-gvec-desc.h"
176f2945cdSDaniel P. Berrange #include "crypto/aes.h"
18a04b68e1SRichard Henderson #include "vec_internal.h"
199d935509SArd Biesheuvel 
20f1ecb913SArd Biesheuvel union CRYPTO_STATE {
219d935509SArd Biesheuvel     uint8_t    bytes[16];
22f1ecb913SArd Biesheuvel     uint32_t   words[4];
239d935509SArd Biesheuvel     uint64_t   l[2];
249d935509SArd Biesheuvel };
259d935509SArd Biesheuvel 
26b449ca3cSArd Biesheuvel #ifdef HOST_WORDS_BIGENDIAN
27afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
28afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
29b449ca3cSArd Biesheuvel #else
30afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i)   ((state).bytes[i])
31afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i)   ((state).words[i])
32b449ca3cSArd Biesheuvel #endif
33b449ca3cSArd Biesheuvel 
34aaffebd6SRichard Henderson /*
35aaffebd6SRichard Henderson  * The caller has not been converted to full gvec, and so only
36aaffebd6SRichard Henderson  * modifies the low 16 bytes of the vector register.
37aaffebd6SRichard Henderson  */
38aaffebd6SRichard Henderson static void clear_tail_16(void *vd, uint32_t desc)
39aaffebd6SRichard Henderson {
40aaffebd6SRichard Henderson     int opr_sz = simd_oprsz(desc);
41aaffebd6SRichard Henderson     int max_sz = simd_maxsz(desc);
42aaffebd6SRichard Henderson 
43aaffebd6SRichard Henderson     assert(opr_sz == 16);
44aaffebd6SRichard Henderson     clear_tail(vd, opr_sz, max_sz);
45aaffebd6SRichard Henderson }
46aaffebd6SRichard Henderson 
47a04b68e1SRichard Henderson static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
48a04b68e1SRichard Henderson                            uint64_t *rm, bool decrypt)
499d935509SArd Biesheuvel {
5059dcd29aSTom Musta     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
5159dcd29aSTom Musta     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
521a66ac61SRichard Henderson     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
53a04b68e1SRichard Henderson     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
549d935509SArd Biesheuvel     int i;
559d935509SArd Biesheuvel 
569d935509SArd Biesheuvel     /* xor state vector with round key */
579d935509SArd Biesheuvel     rk.l[0] ^= st.l[0];
589d935509SArd Biesheuvel     rk.l[1] ^= st.l[1];
599d935509SArd Biesheuvel 
609d935509SArd Biesheuvel     /* combine ShiftRows operation and sbox substitution */
619d935509SArd Biesheuvel     for (i = 0; i < 16; i++) {
62b449ca3cSArd Biesheuvel         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
639d935509SArd Biesheuvel     }
649d935509SArd Biesheuvel 
651a66ac61SRichard Henderson     rd[0] = st.l[0];
661a66ac61SRichard Henderson     rd[1] = st.l[1];
679d935509SArd Biesheuvel }
689d935509SArd Biesheuvel 
69a04b68e1SRichard Henderson void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
70a04b68e1SRichard Henderson {
71a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
72a04b68e1SRichard Henderson     bool decrypt = simd_data(desc);
73a04b68e1SRichard Henderson 
74a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
75a04b68e1SRichard Henderson         do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
76a04b68e1SRichard Henderson     }
77a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
78a04b68e1SRichard Henderson }
79a04b68e1SRichard Henderson 
80a04b68e1SRichard Henderson static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
819d935509SArd Biesheuvel {
829d935509SArd Biesheuvel     static uint32_t const mc[][256] = { {
839d935509SArd Biesheuvel         /* MixColumns lookup table */
849d935509SArd Biesheuvel         0x00000000, 0x03010102, 0x06020204, 0x05030306,
859d935509SArd Biesheuvel         0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
869d935509SArd Biesheuvel         0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
879d935509SArd Biesheuvel         0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
889d935509SArd Biesheuvel         0x30101020, 0x33111122, 0x36121224, 0x35131326,
899d935509SArd Biesheuvel         0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
909d935509SArd Biesheuvel         0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
919d935509SArd Biesheuvel         0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
929d935509SArd Biesheuvel         0x60202040, 0x63212142, 0x66222244, 0x65232346,
939d935509SArd Biesheuvel         0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
949d935509SArd Biesheuvel         0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
959d935509SArd Biesheuvel         0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
969d935509SArd Biesheuvel         0x50303060, 0x53313162, 0x56323264, 0x55333366,
979d935509SArd Biesheuvel         0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
989d935509SArd Biesheuvel         0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
999d935509SArd Biesheuvel         0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
1009d935509SArd Biesheuvel         0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
1019d935509SArd Biesheuvel         0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
1029d935509SArd Biesheuvel         0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
1039d935509SArd Biesheuvel         0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
1049d935509SArd Biesheuvel         0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
1059d935509SArd Biesheuvel         0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
1069d935509SArd Biesheuvel         0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
1079d935509SArd Biesheuvel         0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
1089d935509SArd Biesheuvel         0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
1099d935509SArd Biesheuvel         0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
1109d935509SArd Biesheuvel         0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
1119d935509SArd Biesheuvel         0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
1129d935509SArd Biesheuvel         0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
1139d935509SArd Biesheuvel         0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
1149d935509SArd Biesheuvel         0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
1159d935509SArd Biesheuvel         0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
1169d935509SArd Biesheuvel         0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
1179d935509SArd Biesheuvel         0x97848413, 0x94858511, 0x91868617, 0x92878715,
1189d935509SArd Biesheuvel         0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
1199d935509SArd Biesheuvel         0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
1209d935509SArd Biesheuvel         0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
1219d935509SArd Biesheuvel         0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
1229d935509SArd Biesheuvel         0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
1239d935509SArd Biesheuvel         0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
1249d935509SArd Biesheuvel         0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
1259d935509SArd Biesheuvel         0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
1269d935509SArd Biesheuvel         0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
1279d935509SArd Biesheuvel         0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
1289d935509SArd Biesheuvel         0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
1299d935509SArd Biesheuvel         0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
1309d935509SArd Biesheuvel         0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
1319d935509SArd Biesheuvel         0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
1329d935509SArd Biesheuvel         0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
1339d935509SArd Biesheuvel         0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
1349d935509SArd Biesheuvel         0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
1359d935509SArd Biesheuvel         0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
1369d935509SArd Biesheuvel         0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
1379d935509SArd Biesheuvel         0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
1389d935509SArd Biesheuvel         0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
1399d935509SArd Biesheuvel         0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
1409d935509SArd Biesheuvel         0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
1419d935509SArd Biesheuvel         0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
1429d935509SArd Biesheuvel         0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
1439d935509SArd Biesheuvel         0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
1449d935509SArd Biesheuvel         0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
1459d935509SArd Biesheuvel         0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
1469d935509SArd Biesheuvel         0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
1479d935509SArd Biesheuvel         0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
1489d935509SArd Biesheuvel     }, {
1499d935509SArd Biesheuvel         /* Inverse MixColumns lookup table */
1509d935509SArd Biesheuvel         0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
1519d935509SArd Biesheuvel         0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
1529d935509SArd Biesheuvel         0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
1539d935509SArd Biesheuvel         0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
1549d935509SArd Biesheuvel         0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
1559d935509SArd Biesheuvel         0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
1569d935509SArd Biesheuvel         0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
1579d935509SArd Biesheuvel         0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
1589d935509SArd Biesheuvel         0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
1599d935509SArd Biesheuvel         0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
1609d935509SArd Biesheuvel         0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
1619d935509SArd Biesheuvel         0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
1629d935509SArd Biesheuvel         0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
1639d935509SArd Biesheuvel         0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
1649d935509SArd Biesheuvel         0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
1659d935509SArd Biesheuvel         0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
1669d935509SArd Biesheuvel         0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
1679d935509SArd Biesheuvel         0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
1689d935509SArd Biesheuvel         0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
1699d935509SArd Biesheuvel         0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
1709d935509SArd Biesheuvel         0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
1719d935509SArd Biesheuvel         0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
1729d935509SArd Biesheuvel         0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
1739d935509SArd Biesheuvel         0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
1749d935509SArd Biesheuvel         0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
1759d935509SArd Biesheuvel         0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
1769d935509SArd Biesheuvel         0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
1779d935509SArd Biesheuvel         0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
1789d935509SArd Biesheuvel         0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
1799d935509SArd Biesheuvel         0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
1809d935509SArd Biesheuvel         0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
1819d935509SArd Biesheuvel         0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
1829d935509SArd Biesheuvel         0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
1839d935509SArd Biesheuvel         0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
1849d935509SArd Biesheuvel         0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
1859d935509SArd Biesheuvel         0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
1869d935509SArd Biesheuvel         0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
1879d935509SArd Biesheuvel         0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
1889d935509SArd Biesheuvel         0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
1899d935509SArd Biesheuvel         0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
1909d935509SArd Biesheuvel         0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
1919d935509SArd Biesheuvel         0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
1929d935509SArd Biesheuvel         0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
1939d935509SArd Biesheuvel         0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
1949d935509SArd Biesheuvel         0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
1959d935509SArd Biesheuvel         0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
1969d935509SArd Biesheuvel         0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
1979d935509SArd Biesheuvel         0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
1989d935509SArd Biesheuvel         0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
1999d935509SArd Biesheuvel         0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
2009d935509SArd Biesheuvel         0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
2019d935509SArd Biesheuvel         0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
2029d935509SArd Biesheuvel         0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
2039d935509SArd Biesheuvel         0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
2049d935509SArd Biesheuvel         0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
2059d935509SArd Biesheuvel         0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
2069d935509SArd Biesheuvel         0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
2079d935509SArd Biesheuvel         0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
2089d935509SArd Biesheuvel         0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
2099d935509SArd Biesheuvel         0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
2109d935509SArd Biesheuvel         0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
2119d935509SArd Biesheuvel         0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
2129d935509SArd Biesheuvel         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
2139d935509SArd Biesheuvel         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
2149d935509SArd Biesheuvel     } };
2151a66ac61SRichard Henderson 
2161a66ac61SRichard Henderson     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
2179d935509SArd Biesheuvel     int i;
2189d935509SArd Biesheuvel 
2199d935509SArd Biesheuvel     for (i = 0; i < 16; i += 4) {
220b449ca3cSArd Biesheuvel         CR_ST_WORD(st, i >> 2) =
221b449ca3cSArd Biesheuvel             mc[decrypt][CR_ST_BYTE(st, i)] ^
222b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
223b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
224b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
2259d935509SArd Biesheuvel     }
2269d935509SArd Biesheuvel 
2271a66ac61SRichard Henderson     rd[0] = st.l[0];
2281a66ac61SRichard Henderson     rd[1] = st.l[1];
2299d935509SArd Biesheuvel }
230f1ecb913SArd Biesheuvel 
231a04b68e1SRichard Henderson void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
232a04b68e1SRichard Henderson {
233a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
234a04b68e1SRichard Henderson     bool decrypt = simd_data(desc);
235a04b68e1SRichard Henderson 
236a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
237a04b68e1SRichard Henderson         do_crypto_aesmc(vd + i, vm + i, decrypt);
238a04b68e1SRichard Henderson     }
239a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
240a04b68e1SRichard Henderson }
241a04b68e1SRichard Henderson 
242f1ecb913SArd Biesheuvel /*
243f1ecb913SArd Biesheuvel  * SHA-1 logical functions
244f1ecb913SArd Biesheuvel  */
245f1ecb913SArd Biesheuvel 
246f1ecb913SArd Biesheuvel static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
247f1ecb913SArd Biesheuvel {
248f1ecb913SArd Biesheuvel     return (x & (y ^ z)) ^ z;
249f1ecb913SArd Biesheuvel }
250f1ecb913SArd Biesheuvel 
251f1ecb913SArd Biesheuvel static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
252f1ecb913SArd Biesheuvel {
253f1ecb913SArd Biesheuvel     return x ^ y ^ z;
254f1ecb913SArd Biesheuvel }
255f1ecb913SArd Biesheuvel 
256f1ecb913SArd Biesheuvel static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
257f1ecb913SArd Biesheuvel {
258f1ecb913SArd Biesheuvel     return (x & y) | ((x | y) & z);
259f1ecb913SArd Biesheuvel }
260f1ecb913SArd Biesheuvel 
261afc8b7d3SRichard Henderson void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
262f1ecb913SArd Biesheuvel {
263afc8b7d3SRichard Henderson     uint64_t *d = vd, *n = vn, *m = vm;
264afc8b7d3SRichard Henderson     uint64_t d0, d1;
265afc8b7d3SRichard Henderson 
266afc8b7d3SRichard Henderson     d0 = d[1] ^ d[0] ^ m[0];
267afc8b7d3SRichard Henderson     d1 = n[0] ^ d[1] ^ m[1];
268afc8b7d3SRichard Henderson     d[0] = d0;
269afc8b7d3SRichard Henderson     d[1] = d1;
270afc8b7d3SRichard Henderson 
271afc8b7d3SRichard Henderson     clear_tail_16(vd, desc);
272afc8b7d3SRichard Henderson }
273afc8b7d3SRichard Henderson 
274afc8b7d3SRichard Henderson static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
275afc8b7d3SRichard Henderson                                     uint64_t *rm, uint32_t desc,
276afc8b7d3SRichard Henderson                                     uint32_t (*fn)(union CRYPTO_STATE *d))
277afc8b7d3SRichard Henderson {
2781a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
2791a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
2801a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
281f1ecb913SArd Biesheuvel     int i;
282f1ecb913SArd Biesheuvel 
283f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
284afc8b7d3SRichard Henderson         uint32_t t = fn(&d);
285f1ecb913SArd Biesheuvel 
286b449ca3cSArd Biesheuvel         t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
287b449ca3cSArd Biesheuvel              + CR_ST_WORD(m, i);
288f1ecb913SArd Biesheuvel 
289b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
290b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
291b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
292b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
293b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = t;
294f1ecb913SArd Biesheuvel     }
2951a66ac61SRichard Henderson     rd[0] = d.l[0];
2961a66ac61SRichard Henderson     rd[1] = d.l[1];
297afc8b7d3SRichard Henderson 
298afc8b7d3SRichard Henderson     clear_tail_16(rd, desc);
299afc8b7d3SRichard Henderson }
300afc8b7d3SRichard Henderson 
301afc8b7d3SRichard Henderson static uint32_t do_sha1c(union CRYPTO_STATE *d)
302afc8b7d3SRichard Henderson {
303afc8b7d3SRichard Henderson     return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
304afc8b7d3SRichard Henderson }
305afc8b7d3SRichard Henderson 
306afc8b7d3SRichard Henderson void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
307afc8b7d3SRichard Henderson {
308afc8b7d3SRichard Henderson     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
309afc8b7d3SRichard Henderson }
310afc8b7d3SRichard Henderson 
311afc8b7d3SRichard Henderson static uint32_t do_sha1p(union CRYPTO_STATE *d)
312afc8b7d3SRichard Henderson {
313afc8b7d3SRichard Henderson     return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
314afc8b7d3SRichard Henderson }
315afc8b7d3SRichard Henderson 
316afc8b7d3SRichard Henderson void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
317afc8b7d3SRichard Henderson {
318afc8b7d3SRichard Henderson     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
319afc8b7d3SRichard Henderson }
320afc8b7d3SRichard Henderson 
321afc8b7d3SRichard Henderson static uint32_t do_sha1m(union CRYPTO_STATE *d)
322afc8b7d3SRichard Henderson {
323afc8b7d3SRichard Henderson     return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
324afc8b7d3SRichard Henderson }
325afc8b7d3SRichard Henderson 
326afc8b7d3SRichard Henderson void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
327afc8b7d3SRichard Henderson {
328afc8b7d3SRichard Henderson     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
329f1ecb913SArd Biesheuvel }
330f1ecb913SArd Biesheuvel 
331effa992fSRichard Henderson void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
332f1ecb913SArd Biesheuvel {
3331a66ac61SRichard Henderson     uint64_t *rd = vd;
3341a66ac61SRichard Henderson     uint64_t *rm = vm;
3351a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
336f1ecb913SArd Biesheuvel 
337b449ca3cSArd Biesheuvel     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
338b449ca3cSArd Biesheuvel     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
339f1ecb913SArd Biesheuvel 
3401a66ac61SRichard Henderson     rd[0] = m.l[0];
3411a66ac61SRichard Henderson     rd[1] = m.l[1];
342effa992fSRichard Henderson 
343effa992fSRichard Henderson     clear_tail_16(vd, desc);
344f1ecb913SArd Biesheuvel }
345f1ecb913SArd Biesheuvel 
346effa992fSRichard Henderson void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
347f1ecb913SArd Biesheuvel {
3481a66ac61SRichard Henderson     uint64_t *rd = vd;
3491a66ac61SRichard Henderson     uint64_t *rm = vm;
3501a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3511a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
352f1ecb913SArd Biesheuvel 
353b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
354b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
355b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
356b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
357f1ecb913SArd Biesheuvel 
3581a66ac61SRichard Henderson     rd[0] = d.l[0];
3591a66ac61SRichard Henderson     rd[1] = d.l[1];
360effa992fSRichard Henderson 
361effa992fSRichard Henderson     clear_tail_16(vd, desc);
362f1ecb913SArd Biesheuvel }
363f1ecb913SArd Biesheuvel 
364f1ecb913SArd Biesheuvel /*
365f1ecb913SArd Biesheuvel  * The SHA-256 logical functions, according to
366f1ecb913SArd Biesheuvel  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
367f1ecb913SArd Biesheuvel  */
368f1ecb913SArd Biesheuvel 
369f1ecb913SArd Biesheuvel static uint32_t S0(uint32_t x)
370f1ecb913SArd Biesheuvel {
371f1ecb913SArd Biesheuvel     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
372f1ecb913SArd Biesheuvel }
373f1ecb913SArd Biesheuvel 
374f1ecb913SArd Biesheuvel static uint32_t S1(uint32_t x)
375f1ecb913SArd Biesheuvel {
376f1ecb913SArd Biesheuvel     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
377f1ecb913SArd Biesheuvel }
378f1ecb913SArd Biesheuvel 
379f1ecb913SArd Biesheuvel static uint32_t s0(uint32_t x)
380f1ecb913SArd Biesheuvel {
381f1ecb913SArd Biesheuvel     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
382f1ecb913SArd Biesheuvel }
383f1ecb913SArd Biesheuvel 
384f1ecb913SArd Biesheuvel static uint32_t s1(uint32_t x)
385f1ecb913SArd Biesheuvel {
386f1ecb913SArd Biesheuvel     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
387f1ecb913SArd Biesheuvel }
388f1ecb913SArd Biesheuvel 
389effa992fSRichard Henderson void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
390f1ecb913SArd Biesheuvel {
3911a66ac61SRichard Henderson     uint64_t *rd = vd;
3921a66ac61SRichard Henderson     uint64_t *rn = vn;
3931a66ac61SRichard Henderson     uint64_t *rm = vm;
3941a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3951a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
3961a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
397f1ecb913SArd Biesheuvel     int i;
398f1ecb913SArd Biesheuvel 
399f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
400b449ca3cSArd Biesheuvel         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
401b449ca3cSArd Biesheuvel                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
402b449ca3cSArd Biesheuvel                      + CR_ST_WORD(m, i);
403f1ecb913SArd Biesheuvel 
404b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
405b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
406b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
407b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
408f1ecb913SArd Biesheuvel 
409b449ca3cSArd Biesheuvel         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
410b449ca3cSArd Biesheuvel              + S0(CR_ST_WORD(d, 0));
411f1ecb913SArd Biesheuvel 
412b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
413b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
414b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
415b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = t;
416f1ecb913SArd Biesheuvel     }
417f1ecb913SArd Biesheuvel 
4181a66ac61SRichard Henderson     rd[0] = d.l[0];
4191a66ac61SRichard Henderson     rd[1] = d.l[1];
420effa992fSRichard Henderson 
421effa992fSRichard Henderson     clear_tail_16(vd, desc);
422f1ecb913SArd Biesheuvel }
423f1ecb913SArd Biesheuvel 
424effa992fSRichard Henderson void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
425f1ecb913SArd Biesheuvel {
4261a66ac61SRichard Henderson     uint64_t *rd = vd;
4271a66ac61SRichard Henderson     uint64_t *rn = vn;
4281a66ac61SRichard Henderson     uint64_t *rm = vm;
4291a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4301a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
4311a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
432f1ecb913SArd Biesheuvel     int i;
433f1ecb913SArd Biesheuvel 
434f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
435b449ca3cSArd Biesheuvel         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
436b449ca3cSArd Biesheuvel                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
437b449ca3cSArd Biesheuvel                      + CR_ST_WORD(m, i);
438f1ecb913SArd Biesheuvel 
439b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
440b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
441b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
442b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
443f1ecb913SArd Biesheuvel     }
444f1ecb913SArd Biesheuvel 
4451a66ac61SRichard Henderson     rd[0] = d.l[0];
4461a66ac61SRichard Henderson     rd[1] = d.l[1];
447effa992fSRichard Henderson 
448effa992fSRichard Henderson     clear_tail_16(vd, desc);
449f1ecb913SArd Biesheuvel }
450f1ecb913SArd Biesheuvel 
451effa992fSRichard Henderson void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
452f1ecb913SArd Biesheuvel {
4531a66ac61SRichard Henderson     uint64_t *rd = vd;
4541a66ac61SRichard Henderson     uint64_t *rm = vm;
4551a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4561a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
457f1ecb913SArd Biesheuvel 
458b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
459b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
460b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
461b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
462f1ecb913SArd Biesheuvel 
4631a66ac61SRichard Henderson     rd[0] = d.l[0];
4641a66ac61SRichard Henderson     rd[1] = d.l[1];
465effa992fSRichard Henderson 
466effa992fSRichard Henderson     clear_tail_16(vd, desc);
467f1ecb913SArd Biesheuvel }
468f1ecb913SArd Biesheuvel 
469effa992fSRichard Henderson void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
470f1ecb913SArd Biesheuvel {
4711a66ac61SRichard Henderson     uint64_t *rd = vd;
4721a66ac61SRichard Henderson     uint64_t *rn = vn;
4731a66ac61SRichard Henderson     uint64_t *rm = vm;
4741a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4751a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
4761a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
477f1ecb913SArd Biesheuvel 
478b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
479b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
480b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
481b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
482f1ecb913SArd Biesheuvel 
4831a66ac61SRichard Henderson     rd[0] = d.l[0];
4841a66ac61SRichard Henderson     rd[1] = d.l[1];
485effa992fSRichard Henderson 
486effa992fSRichard Henderson     clear_tail_16(vd, desc);
487f1ecb913SArd Biesheuvel }
48890b827d1SArd Biesheuvel 
48990b827d1SArd Biesheuvel /*
49090b827d1SArd Biesheuvel  * The SHA-512 logical functions (same as above but using 64-bit operands)
49190b827d1SArd Biesheuvel  */
49290b827d1SArd Biesheuvel 
49390b827d1SArd Biesheuvel static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
49490b827d1SArd Biesheuvel {
49590b827d1SArd Biesheuvel     return (x & (y ^ z)) ^ z;
49690b827d1SArd Biesheuvel }
49790b827d1SArd Biesheuvel 
49890b827d1SArd Biesheuvel static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
49990b827d1SArd Biesheuvel {
50090b827d1SArd Biesheuvel     return (x & y) | ((x | y) & z);
50190b827d1SArd Biesheuvel }
50290b827d1SArd Biesheuvel 
50390b827d1SArd Biesheuvel static uint64_t S0_512(uint64_t x)
50490b827d1SArd Biesheuvel {
50590b827d1SArd Biesheuvel     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
50690b827d1SArd Biesheuvel }
50790b827d1SArd Biesheuvel 
50890b827d1SArd Biesheuvel static uint64_t S1_512(uint64_t x)
50990b827d1SArd Biesheuvel {
51090b827d1SArd Biesheuvel     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
51190b827d1SArd Biesheuvel }
51290b827d1SArd Biesheuvel 
51390b827d1SArd Biesheuvel static uint64_t s0_512(uint64_t x)
51490b827d1SArd Biesheuvel {
51590b827d1SArd Biesheuvel     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
51690b827d1SArd Biesheuvel }
51790b827d1SArd Biesheuvel 
51890b827d1SArd Biesheuvel static uint64_t s1_512(uint64_t x)
51990b827d1SArd Biesheuvel {
52090b827d1SArd Biesheuvel     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
52190b827d1SArd Biesheuvel }
52290b827d1SArd Biesheuvel 
523aaffebd6SRichard Henderson void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
52490b827d1SArd Biesheuvel {
52590b827d1SArd Biesheuvel     uint64_t *rd = vd;
52690b827d1SArd Biesheuvel     uint64_t *rn = vn;
52790b827d1SArd Biesheuvel     uint64_t *rm = vm;
52890b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
52990b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
53090b827d1SArd Biesheuvel 
53190b827d1SArd Biesheuvel     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
53290b827d1SArd Biesheuvel     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
53390b827d1SArd Biesheuvel 
53490b827d1SArd Biesheuvel     rd[0] = d0;
53590b827d1SArd Biesheuvel     rd[1] = d1;
536aaffebd6SRichard Henderson 
537aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
53890b827d1SArd Biesheuvel }
53990b827d1SArd Biesheuvel 
540aaffebd6SRichard Henderson void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
54190b827d1SArd Biesheuvel {
54290b827d1SArd Biesheuvel     uint64_t *rd = vd;
54390b827d1SArd Biesheuvel     uint64_t *rn = vn;
54490b827d1SArd Biesheuvel     uint64_t *rm = vm;
54590b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
54690b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
54790b827d1SArd Biesheuvel 
54890b827d1SArd Biesheuvel     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
54990b827d1SArd Biesheuvel     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
55090b827d1SArd Biesheuvel 
55190b827d1SArd Biesheuvel     rd[0] = d0;
55290b827d1SArd Biesheuvel     rd[1] = d1;
553aaffebd6SRichard Henderson 
554aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
55590b827d1SArd Biesheuvel }
55690b827d1SArd Biesheuvel 
557aaffebd6SRichard Henderson void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
55890b827d1SArd Biesheuvel {
55990b827d1SArd Biesheuvel     uint64_t *rd = vd;
56090b827d1SArd Biesheuvel     uint64_t *rn = vn;
56190b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
56290b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
56390b827d1SArd Biesheuvel 
56490b827d1SArd Biesheuvel     d0 += s0_512(rd[1]);
56590b827d1SArd Biesheuvel     d1 += s0_512(rn[0]);
56690b827d1SArd Biesheuvel 
56790b827d1SArd Biesheuvel     rd[0] = d0;
56890b827d1SArd Biesheuvel     rd[1] = d1;
569aaffebd6SRichard Henderson 
570aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
57190b827d1SArd Biesheuvel }
57290b827d1SArd Biesheuvel 
573aaffebd6SRichard Henderson void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
57490b827d1SArd Biesheuvel {
57590b827d1SArd Biesheuvel     uint64_t *rd = vd;
57690b827d1SArd Biesheuvel     uint64_t *rn = vn;
57790b827d1SArd Biesheuvel     uint64_t *rm = vm;
57890b827d1SArd Biesheuvel 
57990b827d1SArd Biesheuvel     rd[0] += s1_512(rn[0]) + rm[0];
58090b827d1SArd Biesheuvel     rd[1] += s1_512(rn[1]) + rm[1];
581aaffebd6SRichard Henderson 
582aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
58390b827d1SArd Biesheuvel }
58480d6f4c6SArd Biesheuvel 
585aaffebd6SRichard Henderson void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
58680d6f4c6SArd Biesheuvel {
58780d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
58880d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
58980d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
59080d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
59180d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
59280d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
59380d6f4c6SArd Biesheuvel     uint32_t t;
59480d6f4c6SArd Biesheuvel 
59580d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
59680d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
59780d6f4c6SArd Biesheuvel 
59880d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
59980d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
60080d6f4c6SArd Biesheuvel 
60180d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
60280d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
60380d6f4c6SArd Biesheuvel 
60480d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
60580d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
60680d6f4c6SArd Biesheuvel 
60780d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
60880d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
609aaffebd6SRichard Henderson 
610aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
61180d6f4c6SArd Biesheuvel }
61280d6f4c6SArd Biesheuvel 
613aaffebd6SRichard Henderson void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
61480d6f4c6SArd Biesheuvel {
61580d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
61680d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
61780d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
61880d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
61980d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
62080d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
62180d6f4c6SArd Biesheuvel     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
62280d6f4c6SArd Biesheuvel 
62380d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) ^= t;
62480d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
62580d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
62680d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
62780d6f4c6SArd Biesheuvel                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
62880d6f4c6SArd Biesheuvel 
62980d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
63080d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
631aaffebd6SRichard Henderson 
632aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
63380d6f4c6SArd Biesheuvel }
63480d6f4c6SArd Biesheuvel 
63543fa36c9SRichard Henderson static inline void QEMU_ALWAYS_INLINE
63643fa36c9SRichard Henderson crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
63743fa36c9SRichard Henderson              uint32_t desc, uint32_t opcode)
63880d6f4c6SArd Biesheuvel {
63980d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
64080d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
64180d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
64243fa36c9SRichard Henderson     uint32_t imm2 = simd_data(desc);
64380d6f4c6SArd Biesheuvel     uint32_t t;
64480d6f4c6SArd Biesheuvel 
64580d6f4c6SArd Biesheuvel     assert(imm2 < 4);
64680d6f4c6SArd Biesheuvel 
64780d6f4c6SArd Biesheuvel     if (opcode == 0 || opcode == 2) {
64880d6f4c6SArd Biesheuvel         /* SM3TT1A, SM3TT2A */
64980d6f4c6SArd Biesheuvel         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
65080d6f4c6SArd Biesheuvel     } else if (opcode == 1) {
65180d6f4c6SArd Biesheuvel         /* SM3TT1B */
65280d6f4c6SArd Biesheuvel         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
65380d6f4c6SArd Biesheuvel     } else if (opcode == 3) {
65480d6f4c6SArd Biesheuvel         /* SM3TT2B */
65580d6f4c6SArd Biesheuvel         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
65680d6f4c6SArd Biesheuvel     } else {
65743fa36c9SRichard Henderson         qemu_build_not_reached();
65880d6f4c6SArd Biesheuvel     }
65980d6f4c6SArd Biesheuvel 
66080d6f4c6SArd Biesheuvel     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
66180d6f4c6SArd Biesheuvel 
66280d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
66380d6f4c6SArd Biesheuvel 
66480d6f4c6SArd Biesheuvel     if (opcode < 2) {
66580d6f4c6SArd Biesheuvel         /* SM3TT1A, SM3TT1B */
66680d6f4c6SArd Biesheuvel         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
66780d6f4c6SArd Biesheuvel 
66880d6f4c6SArd Biesheuvel         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
66980d6f4c6SArd Biesheuvel     } else {
67080d6f4c6SArd Biesheuvel         /* SM3TT2A, SM3TT2B */
67180d6f4c6SArd Biesheuvel         t += CR_ST_WORD(n, 3);
67280d6f4c6SArd Biesheuvel         t ^= rol32(t, 9) ^ rol32(t, 17);
67380d6f4c6SArd Biesheuvel 
67480d6f4c6SArd Biesheuvel         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
67580d6f4c6SArd Biesheuvel     }
67680d6f4c6SArd Biesheuvel 
67780d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
67880d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) = t;
67980d6f4c6SArd Biesheuvel 
68080d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
68180d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
68243fa36c9SRichard Henderson 
68343fa36c9SRichard Henderson     clear_tail_16(rd, desc);
68480d6f4c6SArd Biesheuvel }
685b6577bcdSArd Biesheuvel 
68643fa36c9SRichard Henderson #define DO_SM3TT(NAME, OPCODE) \
68743fa36c9SRichard Henderson     void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
68843fa36c9SRichard Henderson     { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
68943fa36c9SRichard Henderson 
69043fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1a, 0)
69143fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1b, 1)
69243fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2a, 2)
69343fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2b, 3)
69443fa36c9SRichard Henderson 
69543fa36c9SRichard Henderson #undef DO_SM3TT
69643fa36c9SRichard Henderson 
697b6577bcdSArd Biesheuvel static uint8_t const sm4_sbox[] = {
698b6577bcdSArd Biesheuvel     0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
699b6577bcdSArd Biesheuvel     0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
700b6577bcdSArd Biesheuvel     0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
701b6577bcdSArd Biesheuvel     0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
702b6577bcdSArd Biesheuvel     0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
703b6577bcdSArd Biesheuvel     0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
704b6577bcdSArd Biesheuvel     0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
705b6577bcdSArd Biesheuvel     0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
706b6577bcdSArd Biesheuvel     0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
707b6577bcdSArd Biesheuvel     0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
708b6577bcdSArd Biesheuvel     0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
709b6577bcdSArd Biesheuvel     0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
710b6577bcdSArd Biesheuvel     0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
711b6577bcdSArd Biesheuvel     0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
712b6577bcdSArd Biesheuvel     0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
713b6577bcdSArd Biesheuvel     0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
714b6577bcdSArd Biesheuvel     0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
715b6577bcdSArd Biesheuvel     0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
716b6577bcdSArd Biesheuvel     0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
717b6577bcdSArd Biesheuvel     0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
718b6577bcdSArd Biesheuvel     0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
719b6577bcdSArd Biesheuvel     0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
720b6577bcdSArd Biesheuvel     0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
721b6577bcdSArd Biesheuvel     0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
722b6577bcdSArd Biesheuvel     0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
723b6577bcdSArd Biesheuvel     0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
724b6577bcdSArd Biesheuvel     0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
725b6577bcdSArd Biesheuvel     0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
726b6577bcdSArd Biesheuvel     0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
727b6577bcdSArd Biesheuvel     0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
728b6577bcdSArd Biesheuvel     0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
729b6577bcdSArd Biesheuvel     0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
730b6577bcdSArd Biesheuvel };
731b6577bcdSArd Biesheuvel 
732a04b68e1SRichard Henderson static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
733b6577bcdSArd Biesheuvel {
734a04b68e1SRichard Henderson     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
735a04b68e1SRichard Henderson     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
736b6577bcdSArd Biesheuvel     uint32_t t, i;
737b6577bcdSArd Biesheuvel 
738b6577bcdSArd Biesheuvel     for (i = 0; i < 4; i++) {
739b6577bcdSArd Biesheuvel         t = CR_ST_WORD(d, (i + 1) % 4) ^
740b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 2) % 4) ^
741b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 3) % 4) ^
742b6577bcdSArd Biesheuvel             CR_ST_WORD(n, i);
743b6577bcdSArd Biesheuvel 
744b6577bcdSArd Biesheuvel         t = sm4_sbox[t & 0xff] |
745b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 8) & 0xff] << 8 |
746b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 16) & 0xff] << 16 |
747b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 24) & 0xff] << 24;
748b6577bcdSArd Biesheuvel 
749b6577bcdSArd Biesheuvel         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
750b6577bcdSArd Biesheuvel                             rol32(t, 24);
751b6577bcdSArd Biesheuvel     }
752b6577bcdSArd Biesheuvel 
753b6577bcdSArd Biesheuvel     rd[0] = d.l[0];
754b6577bcdSArd Biesheuvel     rd[1] = d.l[1];
755b6577bcdSArd Biesheuvel }
756b6577bcdSArd Biesheuvel 
757a04b68e1SRichard Henderson void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
758b6577bcdSArd Biesheuvel {
759a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
760a04b68e1SRichard Henderson 
761a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
762a04b68e1SRichard Henderson         do_crypto_sm4e(vd + i, vn + i, vm + i);
763a04b68e1SRichard Henderson     }
764a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
765a04b68e1SRichard Henderson }
766a04b68e1SRichard Henderson 
767a04b68e1SRichard Henderson static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
768a04b68e1SRichard Henderson {
769b6577bcdSArd Biesheuvel     union CRYPTO_STATE d;
770b6577bcdSArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
771b6577bcdSArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
772b6577bcdSArd Biesheuvel     uint32_t t, i;
773b6577bcdSArd Biesheuvel 
774b6577bcdSArd Biesheuvel     d = n;
775b6577bcdSArd Biesheuvel     for (i = 0; i < 4; i++) {
776b6577bcdSArd Biesheuvel         t = CR_ST_WORD(d, (i + 1) % 4) ^
777b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 2) % 4) ^
778b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 3) % 4) ^
779b6577bcdSArd Biesheuvel             CR_ST_WORD(m, i);
780b6577bcdSArd Biesheuvel 
781b6577bcdSArd Biesheuvel         t = sm4_sbox[t & 0xff] |
782b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 8) & 0xff] << 8 |
783b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 16) & 0xff] << 16 |
784b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 24) & 0xff] << 24;
785b6577bcdSArd Biesheuvel 
786b6577bcdSArd Biesheuvel         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
787b6577bcdSArd Biesheuvel     }
788b6577bcdSArd Biesheuvel 
789b6577bcdSArd Biesheuvel     rd[0] = d.l[0];
790b6577bcdSArd Biesheuvel     rd[1] = d.l[1];
791b6577bcdSArd Biesheuvel }
792a04b68e1SRichard Henderson 
793a04b68e1SRichard Henderson void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
794a04b68e1SRichard Henderson {
795a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
796a04b68e1SRichard Henderson 
797a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
798a04b68e1SRichard Henderson         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
799a04b68e1SRichard Henderson     }
800a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
801a04b68e1SRichard Henderson }
8021738860dSRichard Henderson 
8031738860dSRichard Henderson void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
8041738860dSRichard Henderson {
8051738860dSRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
8061738860dSRichard Henderson     uint64_t *d = vd, *n = vn, *m = vm;
8071738860dSRichard Henderson 
8081738860dSRichard Henderson     for (i = 0; i < opr_sz / 8; ++i) {
8091738860dSRichard Henderson         d[i] = n[i] ^ rol64(m[i], 1);
8101738860dSRichard Henderson     }
8111738860dSRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
8121738860dSRichard Henderson }
813