xref: /qemu/target/arm/tcg/crypto_helper.c (revision aaffebd6d3135b8aed7e61932af53b004d261579)
19d935509SArd Biesheuvel /*
29d935509SArd Biesheuvel  * crypto_helper.c - emulate v8 Crypto Extensions instructions
39d935509SArd Biesheuvel  *
490b827d1SArd Biesheuvel  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
59d935509SArd Biesheuvel  *
69d935509SArd Biesheuvel  * This library is free software; you can redistribute it and/or
79d935509SArd Biesheuvel  * modify it under the terms of the GNU Lesser General Public
89d935509SArd Biesheuvel  * License as published by the Free Software Foundation; either
99d935509SArd Biesheuvel  * version 2 of the License, or (at your option) any later version.
109d935509SArd Biesheuvel  */
119d935509SArd Biesheuvel 
1274c21bd0SPeter Maydell #include "qemu/osdep.h"
139d935509SArd Biesheuvel 
149d935509SArd Biesheuvel #include "cpu.h"
152ef6175aSRichard Henderson #include "exec/helper-proto.h"
16a04b68e1SRichard Henderson #include "tcg/tcg-gvec-desc.h"
176f2945cdSDaniel P. Berrange #include "crypto/aes.h"
18a04b68e1SRichard Henderson #include "vec_internal.h"
199d935509SArd Biesheuvel 
20f1ecb913SArd Biesheuvel union CRYPTO_STATE {
219d935509SArd Biesheuvel     uint8_t    bytes[16];
22f1ecb913SArd Biesheuvel     uint32_t   words[4];
239d935509SArd Biesheuvel     uint64_t   l[2];
249d935509SArd Biesheuvel };
259d935509SArd Biesheuvel 
26b449ca3cSArd Biesheuvel #ifdef HOST_WORDS_BIGENDIAN
27b449ca3cSArd Biesheuvel #define CR_ST_BYTE(state, i)   (state.bytes[(15 - (i)) ^ 8])
28b449ca3cSArd Biesheuvel #define CR_ST_WORD(state, i)   (state.words[(3 - (i)) ^ 2])
29b449ca3cSArd Biesheuvel #else
30b449ca3cSArd Biesheuvel #define CR_ST_BYTE(state, i)   (state.bytes[i])
31b449ca3cSArd Biesheuvel #define CR_ST_WORD(state, i)   (state.words[i])
32b449ca3cSArd Biesheuvel #endif
33b449ca3cSArd Biesheuvel 
34*aaffebd6SRichard Henderson /*
35*aaffebd6SRichard Henderson  * The caller has not been converted to full gvec, and so only
36*aaffebd6SRichard Henderson  * modifies the low 16 bytes of the vector register.
37*aaffebd6SRichard Henderson  */
38*aaffebd6SRichard Henderson static void clear_tail_16(void *vd, uint32_t desc)
39*aaffebd6SRichard Henderson {
40*aaffebd6SRichard Henderson     int opr_sz = simd_oprsz(desc);
41*aaffebd6SRichard Henderson     int max_sz = simd_maxsz(desc);
42*aaffebd6SRichard Henderson 
43*aaffebd6SRichard Henderson     assert(opr_sz == 16);
44*aaffebd6SRichard Henderson     clear_tail(vd, opr_sz, max_sz);
45*aaffebd6SRichard Henderson }
46*aaffebd6SRichard Henderson 
47a04b68e1SRichard Henderson static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
48a04b68e1SRichard Henderson                            uint64_t *rm, bool decrypt)
499d935509SArd Biesheuvel {
5059dcd29aSTom Musta     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
5159dcd29aSTom Musta     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
521a66ac61SRichard Henderson     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
53a04b68e1SRichard Henderson     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
549d935509SArd Biesheuvel     int i;
559d935509SArd Biesheuvel 
569d935509SArd Biesheuvel     /* xor state vector with round key */
579d935509SArd Biesheuvel     rk.l[0] ^= st.l[0];
589d935509SArd Biesheuvel     rk.l[1] ^= st.l[1];
599d935509SArd Biesheuvel 
609d935509SArd Biesheuvel     /* combine ShiftRows operation and sbox substitution */
619d935509SArd Biesheuvel     for (i = 0; i < 16; i++) {
62b449ca3cSArd Biesheuvel         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
639d935509SArd Biesheuvel     }
649d935509SArd Biesheuvel 
651a66ac61SRichard Henderson     rd[0] = st.l[0];
661a66ac61SRichard Henderson     rd[1] = st.l[1];
679d935509SArd Biesheuvel }
689d935509SArd Biesheuvel 
69a04b68e1SRichard Henderson void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
70a04b68e1SRichard Henderson {
71a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
72a04b68e1SRichard Henderson     bool decrypt = simd_data(desc);
73a04b68e1SRichard Henderson 
74a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
75a04b68e1SRichard Henderson         do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
76a04b68e1SRichard Henderson     }
77a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
78a04b68e1SRichard Henderson }
79a04b68e1SRichard Henderson 
80a04b68e1SRichard Henderson static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
819d935509SArd Biesheuvel {
829d935509SArd Biesheuvel     static uint32_t const mc[][256] = { {
839d935509SArd Biesheuvel         /* MixColumns lookup table */
849d935509SArd Biesheuvel         0x00000000, 0x03010102, 0x06020204, 0x05030306,
859d935509SArd Biesheuvel         0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
869d935509SArd Biesheuvel         0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
879d935509SArd Biesheuvel         0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
889d935509SArd Biesheuvel         0x30101020, 0x33111122, 0x36121224, 0x35131326,
899d935509SArd Biesheuvel         0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
909d935509SArd Biesheuvel         0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
919d935509SArd Biesheuvel         0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
929d935509SArd Biesheuvel         0x60202040, 0x63212142, 0x66222244, 0x65232346,
939d935509SArd Biesheuvel         0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
949d935509SArd Biesheuvel         0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
959d935509SArd Biesheuvel         0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
969d935509SArd Biesheuvel         0x50303060, 0x53313162, 0x56323264, 0x55333366,
979d935509SArd Biesheuvel         0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
989d935509SArd Biesheuvel         0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
999d935509SArd Biesheuvel         0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
1009d935509SArd Biesheuvel         0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
1019d935509SArd Biesheuvel         0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
1029d935509SArd Biesheuvel         0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
1039d935509SArd Biesheuvel         0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
1049d935509SArd Biesheuvel         0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
1059d935509SArd Biesheuvel         0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
1069d935509SArd Biesheuvel         0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
1079d935509SArd Biesheuvel         0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
1089d935509SArd Biesheuvel         0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
1099d935509SArd Biesheuvel         0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
1109d935509SArd Biesheuvel         0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
1119d935509SArd Biesheuvel         0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
1129d935509SArd Biesheuvel         0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
1139d935509SArd Biesheuvel         0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
1149d935509SArd Biesheuvel         0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
1159d935509SArd Biesheuvel         0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
1169d935509SArd Biesheuvel         0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
1179d935509SArd Biesheuvel         0x97848413, 0x94858511, 0x91868617, 0x92878715,
1189d935509SArd Biesheuvel         0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
1199d935509SArd Biesheuvel         0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
1209d935509SArd Biesheuvel         0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
1219d935509SArd Biesheuvel         0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
1229d935509SArd Biesheuvel         0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
1239d935509SArd Biesheuvel         0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
1249d935509SArd Biesheuvel         0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
1259d935509SArd Biesheuvel         0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
1269d935509SArd Biesheuvel         0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
1279d935509SArd Biesheuvel         0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
1289d935509SArd Biesheuvel         0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
1299d935509SArd Biesheuvel         0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
1309d935509SArd Biesheuvel         0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
1319d935509SArd Biesheuvel         0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
1329d935509SArd Biesheuvel         0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
1339d935509SArd Biesheuvel         0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
1349d935509SArd Biesheuvel         0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
1359d935509SArd Biesheuvel         0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
1369d935509SArd Biesheuvel         0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
1379d935509SArd Biesheuvel         0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
1389d935509SArd Biesheuvel         0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
1399d935509SArd Biesheuvel         0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
1409d935509SArd Biesheuvel         0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
1419d935509SArd Biesheuvel         0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
1429d935509SArd Biesheuvel         0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
1439d935509SArd Biesheuvel         0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
1449d935509SArd Biesheuvel         0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
1459d935509SArd Biesheuvel         0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
1469d935509SArd Biesheuvel         0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
1479d935509SArd Biesheuvel         0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
1489d935509SArd Biesheuvel     }, {
1499d935509SArd Biesheuvel         /* Inverse MixColumns lookup table */
1509d935509SArd Biesheuvel         0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
1519d935509SArd Biesheuvel         0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
1529d935509SArd Biesheuvel         0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
1539d935509SArd Biesheuvel         0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
1549d935509SArd Biesheuvel         0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
1559d935509SArd Biesheuvel         0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
1569d935509SArd Biesheuvel         0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
1579d935509SArd Biesheuvel         0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
1589d935509SArd Biesheuvel         0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
1599d935509SArd Biesheuvel         0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
1609d935509SArd Biesheuvel         0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
1619d935509SArd Biesheuvel         0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
1629d935509SArd Biesheuvel         0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
1639d935509SArd Biesheuvel         0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
1649d935509SArd Biesheuvel         0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
1659d935509SArd Biesheuvel         0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
1669d935509SArd Biesheuvel         0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
1679d935509SArd Biesheuvel         0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
1689d935509SArd Biesheuvel         0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
1699d935509SArd Biesheuvel         0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
1709d935509SArd Biesheuvel         0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
1719d935509SArd Biesheuvel         0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
1729d935509SArd Biesheuvel         0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
1739d935509SArd Biesheuvel         0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
1749d935509SArd Biesheuvel         0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
1759d935509SArd Biesheuvel         0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
1769d935509SArd Biesheuvel         0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
1779d935509SArd Biesheuvel         0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
1789d935509SArd Biesheuvel         0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
1799d935509SArd Biesheuvel         0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
1809d935509SArd Biesheuvel         0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
1819d935509SArd Biesheuvel         0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
1829d935509SArd Biesheuvel         0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
1839d935509SArd Biesheuvel         0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
1849d935509SArd Biesheuvel         0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
1859d935509SArd Biesheuvel         0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
1869d935509SArd Biesheuvel         0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
1879d935509SArd Biesheuvel         0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
1889d935509SArd Biesheuvel         0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
1899d935509SArd Biesheuvel         0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
1909d935509SArd Biesheuvel         0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
1919d935509SArd Biesheuvel         0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
1929d935509SArd Biesheuvel         0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
1939d935509SArd Biesheuvel         0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
1949d935509SArd Biesheuvel         0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
1959d935509SArd Biesheuvel         0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
1969d935509SArd Biesheuvel         0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
1979d935509SArd Biesheuvel         0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
1989d935509SArd Biesheuvel         0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
1999d935509SArd Biesheuvel         0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
2009d935509SArd Biesheuvel         0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
2019d935509SArd Biesheuvel         0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
2029d935509SArd Biesheuvel         0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
2039d935509SArd Biesheuvel         0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
2049d935509SArd Biesheuvel         0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
2059d935509SArd Biesheuvel         0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
2069d935509SArd Biesheuvel         0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
2079d935509SArd Biesheuvel         0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
2089d935509SArd Biesheuvel         0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
2099d935509SArd Biesheuvel         0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
2109d935509SArd Biesheuvel         0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
2119d935509SArd Biesheuvel         0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
2129d935509SArd Biesheuvel         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
2139d935509SArd Biesheuvel         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
2149d935509SArd Biesheuvel     } };
2151a66ac61SRichard Henderson 
2161a66ac61SRichard Henderson     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
2179d935509SArd Biesheuvel     int i;
2189d935509SArd Biesheuvel 
2199d935509SArd Biesheuvel     for (i = 0; i < 16; i += 4) {
220b449ca3cSArd Biesheuvel         CR_ST_WORD(st, i >> 2) =
221b449ca3cSArd Biesheuvel             mc[decrypt][CR_ST_BYTE(st, i)] ^
222b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
223b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
224b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
2259d935509SArd Biesheuvel     }
2269d935509SArd Biesheuvel 
2271a66ac61SRichard Henderson     rd[0] = st.l[0];
2281a66ac61SRichard Henderson     rd[1] = st.l[1];
2299d935509SArd Biesheuvel }
230f1ecb913SArd Biesheuvel 
231a04b68e1SRichard Henderson void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
232a04b68e1SRichard Henderson {
233a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
234a04b68e1SRichard Henderson     bool decrypt = simd_data(desc);
235a04b68e1SRichard Henderson 
236a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
237a04b68e1SRichard Henderson         do_crypto_aesmc(vd + i, vm + i, decrypt);
238a04b68e1SRichard Henderson     }
239a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
240a04b68e1SRichard Henderson }
241a04b68e1SRichard Henderson 
242f1ecb913SArd Biesheuvel /*
243f1ecb913SArd Biesheuvel  * SHA-1 logical functions
244f1ecb913SArd Biesheuvel  */
245f1ecb913SArd Biesheuvel 
246f1ecb913SArd Biesheuvel static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
247f1ecb913SArd Biesheuvel {
248f1ecb913SArd Biesheuvel     return (x & (y ^ z)) ^ z;
249f1ecb913SArd Biesheuvel }
250f1ecb913SArd Biesheuvel 
251f1ecb913SArd Biesheuvel static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
252f1ecb913SArd Biesheuvel {
253f1ecb913SArd Biesheuvel     return x ^ y ^ z;
254f1ecb913SArd Biesheuvel }
255f1ecb913SArd Biesheuvel 
256f1ecb913SArd Biesheuvel static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
257f1ecb913SArd Biesheuvel {
258f1ecb913SArd Biesheuvel     return (x & y) | ((x | y) & z);
259f1ecb913SArd Biesheuvel }
260f1ecb913SArd Biesheuvel 
2611a66ac61SRichard Henderson void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
262f1ecb913SArd Biesheuvel {
2631a66ac61SRichard Henderson     uint64_t *rd = vd;
2641a66ac61SRichard Henderson     uint64_t *rn = vn;
2651a66ac61SRichard Henderson     uint64_t *rm = vm;
2661a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
2671a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
2681a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
269f1ecb913SArd Biesheuvel 
270f1ecb913SArd Biesheuvel     if (op == 3) { /* sha1su0 */
271f1ecb913SArd Biesheuvel         d.l[0] ^= d.l[1] ^ m.l[0];
272f1ecb913SArd Biesheuvel         d.l[1] ^= n.l[0] ^ m.l[1];
273f1ecb913SArd Biesheuvel     } else {
274f1ecb913SArd Biesheuvel         int i;
275f1ecb913SArd Biesheuvel 
276f1ecb913SArd Biesheuvel         for (i = 0; i < 4; i++) {
277f1ecb913SArd Biesheuvel             uint32_t t;
278f1ecb913SArd Biesheuvel 
279f1ecb913SArd Biesheuvel             switch (op) {
280f1ecb913SArd Biesheuvel             case 0: /* sha1c */
281b449ca3cSArd Biesheuvel                 t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
282f1ecb913SArd Biesheuvel                 break;
283f1ecb913SArd Biesheuvel             case 1: /* sha1p */
284b449ca3cSArd Biesheuvel                 t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
285f1ecb913SArd Biesheuvel                 break;
286f1ecb913SArd Biesheuvel             case 2: /* sha1m */
287b449ca3cSArd Biesheuvel                 t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
288f1ecb913SArd Biesheuvel                 break;
289f1ecb913SArd Biesheuvel             default:
290f1ecb913SArd Biesheuvel                 g_assert_not_reached();
291f1ecb913SArd Biesheuvel             }
292b449ca3cSArd Biesheuvel             t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
293b449ca3cSArd Biesheuvel                  + CR_ST_WORD(m, i);
294f1ecb913SArd Biesheuvel 
295b449ca3cSArd Biesheuvel             CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
296b449ca3cSArd Biesheuvel             CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
297b449ca3cSArd Biesheuvel             CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
298b449ca3cSArd Biesheuvel             CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
299b449ca3cSArd Biesheuvel             CR_ST_WORD(d, 0) = t;
300f1ecb913SArd Biesheuvel         }
301f1ecb913SArd Biesheuvel     }
3021a66ac61SRichard Henderson     rd[0] = d.l[0];
3031a66ac61SRichard Henderson     rd[1] = d.l[1];
304f1ecb913SArd Biesheuvel }
305f1ecb913SArd Biesheuvel 
3061a66ac61SRichard Henderson void HELPER(crypto_sha1h)(void *vd, void *vm)
307f1ecb913SArd Biesheuvel {
3081a66ac61SRichard Henderson     uint64_t *rd = vd;
3091a66ac61SRichard Henderson     uint64_t *rm = vm;
3101a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
311f1ecb913SArd Biesheuvel 
312b449ca3cSArd Biesheuvel     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
313b449ca3cSArd Biesheuvel     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
314f1ecb913SArd Biesheuvel 
3151a66ac61SRichard Henderson     rd[0] = m.l[0];
3161a66ac61SRichard Henderson     rd[1] = m.l[1];
317f1ecb913SArd Biesheuvel }
318f1ecb913SArd Biesheuvel 
3191a66ac61SRichard Henderson void HELPER(crypto_sha1su1)(void *vd, void *vm)
320f1ecb913SArd Biesheuvel {
3211a66ac61SRichard Henderson     uint64_t *rd = vd;
3221a66ac61SRichard Henderson     uint64_t *rm = vm;
3231a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3241a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
325f1ecb913SArd Biesheuvel 
326b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
327b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
328b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
329b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
330f1ecb913SArd Biesheuvel 
3311a66ac61SRichard Henderson     rd[0] = d.l[0];
3321a66ac61SRichard Henderson     rd[1] = d.l[1];
333f1ecb913SArd Biesheuvel }
334f1ecb913SArd Biesheuvel 
335f1ecb913SArd Biesheuvel /*
336f1ecb913SArd Biesheuvel  * The SHA-256 logical functions, according to
337f1ecb913SArd Biesheuvel  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
338f1ecb913SArd Biesheuvel  */
339f1ecb913SArd Biesheuvel 
340f1ecb913SArd Biesheuvel static uint32_t S0(uint32_t x)
341f1ecb913SArd Biesheuvel {
342f1ecb913SArd Biesheuvel     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
343f1ecb913SArd Biesheuvel }
344f1ecb913SArd Biesheuvel 
345f1ecb913SArd Biesheuvel static uint32_t S1(uint32_t x)
346f1ecb913SArd Biesheuvel {
347f1ecb913SArd Biesheuvel     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
348f1ecb913SArd Biesheuvel }
349f1ecb913SArd Biesheuvel 
350f1ecb913SArd Biesheuvel static uint32_t s0(uint32_t x)
351f1ecb913SArd Biesheuvel {
352f1ecb913SArd Biesheuvel     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
353f1ecb913SArd Biesheuvel }
354f1ecb913SArd Biesheuvel 
355f1ecb913SArd Biesheuvel static uint32_t s1(uint32_t x)
356f1ecb913SArd Biesheuvel {
357f1ecb913SArd Biesheuvel     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
358f1ecb913SArd Biesheuvel }
359f1ecb913SArd Biesheuvel 
3601a66ac61SRichard Henderson void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
361f1ecb913SArd Biesheuvel {
3621a66ac61SRichard Henderson     uint64_t *rd = vd;
3631a66ac61SRichard Henderson     uint64_t *rn = vn;
3641a66ac61SRichard Henderson     uint64_t *rm = vm;
3651a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3661a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
3671a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
368f1ecb913SArd Biesheuvel     int i;
369f1ecb913SArd Biesheuvel 
370f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
371b449ca3cSArd Biesheuvel         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
372b449ca3cSArd Biesheuvel                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
373b449ca3cSArd Biesheuvel                      + CR_ST_WORD(m, i);
374f1ecb913SArd Biesheuvel 
375b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
376b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
377b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
378b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
379f1ecb913SArd Biesheuvel 
380b449ca3cSArd Biesheuvel         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
381b449ca3cSArd Biesheuvel              + S0(CR_ST_WORD(d, 0));
382f1ecb913SArd Biesheuvel 
383b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
384b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
385b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
386b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = t;
387f1ecb913SArd Biesheuvel     }
388f1ecb913SArd Biesheuvel 
3891a66ac61SRichard Henderson     rd[0] = d.l[0];
3901a66ac61SRichard Henderson     rd[1] = d.l[1];
391f1ecb913SArd Biesheuvel }
392f1ecb913SArd Biesheuvel 
3931a66ac61SRichard Henderson void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
394f1ecb913SArd Biesheuvel {
3951a66ac61SRichard Henderson     uint64_t *rd = vd;
3961a66ac61SRichard Henderson     uint64_t *rn = vn;
3971a66ac61SRichard Henderson     uint64_t *rm = vm;
3981a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3991a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
4001a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
401f1ecb913SArd Biesheuvel     int i;
402f1ecb913SArd Biesheuvel 
403f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
404b449ca3cSArd Biesheuvel         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
405b449ca3cSArd Biesheuvel                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
406b449ca3cSArd Biesheuvel                      + CR_ST_WORD(m, i);
407f1ecb913SArd Biesheuvel 
408b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
409b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
410b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
411b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
412f1ecb913SArd Biesheuvel     }
413f1ecb913SArd Biesheuvel 
4141a66ac61SRichard Henderson     rd[0] = d.l[0];
4151a66ac61SRichard Henderson     rd[1] = d.l[1];
416f1ecb913SArd Biesheuvel }
417f1ecb913SArd Biesheuvel 
4181a66ac61SRichard Henderson void HELPER(crypto_sha256su0)(void *vd, void *vm)
419f1ecb913SArd Biesheuvel {
4201a66ac61SRichard Henderson     uint64_t *rd = vd;
4211a66ac61SRichard Henderson     uint64_t *rm = vm;
4221a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4231a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
424f1ecb913SArd Biesheuvel 
425b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
426b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
427b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
428b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
429f1ecb913SArd Biesheuvel 
4301a66ac61SRichard Henderson     rd[0] = d.l[0];
4311a66ac61SRichard Henderson     rd[1] = d.l[1];
432f1ecb913SArd Biesheuvel }
433f1ecb913SArd Biesheuvel 
4341a66ac61SRichard Henderson void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
435f1ecb913SArd Biesheuvel {
4361a66ac61SRichard Henderson     uint64_t *rd = vd;
4371a66ac61SRichard Henderson     uint64_t *rn = vn;
4381a66ac61SRichard Henderson     uint64_t *rm = vm;
4391a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4401a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
4411a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
442f1ecb913SArd Biesheuvel 
443b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
444b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
445b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
446b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
447f1ecb913SArd Biesheuvel 
4481a66ac61SRichard Henderson     rd[0] = d.l[0];
4491a66ac61SRichard Henderson     rd[1] = d.l[1];
450f1ecb913SArd Biesheuvel }
45190b827d1SArd Biesheuvel 
45290b827d1SArd Biesheuvel /*
45390b827d1SArd Biesheuvel  * The SHA-512 logical functions (same as above but using 64-bit operands)
45490b827d1SArd Biesheuvel  */
45590b827d1SArd Biesheuvel 
45690b827d1SArd Biesheuvel static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
45790b827d1SArd Biesheuvel {
45890b827d1SArd Biesheuvel     return (x & (y ^ z)) ^ z;
45990b827d1SArd Biesheuvel }
46090b827d1SArd Biesheuvel 
46190b827d1SArd Biesheuvel static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
46290b827d1SArd Biesheuvel {
46390b827d1SArd Biesheuvel     return (x & y) | ((x | y) & z);
46490b827d1SArd Biesheuvel }
46590b827d1SArd Biesheuvel 
46690b827d1SArd Biesheuvel static uint64_t S0_512(uint64_t x)
46790b827d1SArd Biesheuvel {
46890b827d1SArd Biesheuvel     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
46990b827d1SArd Biesheuvel }
47090b827d1SArd Biesheuvel 
47190b827d1SArd Biesheuvel static uint64_t S1_512(uint64_t x)
47290b827d1SArd Biesheuvel {
47390b827d1SArd Biesheuvel     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
47490b827d1SArd Biesheuvel }
47590b827d1SArd Biesheuvel 
47690b827d1SArd Biesheuvel static uint64_t s0_512(uint64_t x)
47790b827d1SArd Biesheuvel {
47890b827d1SArd Biesheuvel     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
47990b827d1SArd Biesheuvel }
48090b827d1SArd Biesheuvel 
48190b827d1SArd Biesheuvel static uint64_t s1_512(uint64_t x)
48290b827d1SArd Biesheuvel {
48390b827d1SArd Biesheuvel     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
48490b827d1SArd Biesheuvel }
48590b827d1SArd Biesheuvel 
486*aaffebd6SRichard Henderson void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
48790b827d1SArd Biesheuvel {
48890b827d1SArd Biesheuvel     uint64_t *rd = vd;
48990b827d1SArd Biesheuvel     uint64_t *rn = vn;
49090b827d1SArd Biesheuvel     uint64_t *rm = vm;
49190b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
49290b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
49390b827d1SArd Biesheuvel 
49490b827d1SArd Biesheuvel     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
49590b827d1SArd Biesheuvel     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
49690b827d1SArd Biesheuvel 
49790b827d1SArd Biesheuvel     rd[0] = d0;
49890b827d1SArd Biesheuvel     rd[1] = d1;
499*aaffebd6SRichard Henderson 
500*aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
50190b827d1SArd Biesheuvel }
50290b827d1SArd Biesheuvel 
503*aaffebd6SRichard Henderson void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
50490b827d1SArd Biesheuvel {
50590b827d1SArd Biesheuvel     uint64_t *rd = vd;
50690b827d1SArd Biesheuvel     uint64_t *rn = vn;
50790b827d1SArd Biesheuvel     uint64_t *rm = vm;
50890b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
50990b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
51090b827d1SArd Biesheuvel 
51190b827d1SArd Biesheuvel     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
51290b827d1SArd Biesheuvel     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
51390b827d1SArd Biesheuvel 
51490b827d1SArd Biesheuvel     rd[0] = d0;
51590b827d1SArd Biesheuvel     rd[1] = d1;
516*aaffebd6SRichard Henderson 
517*aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
51890b827d1SArd Biesheuvel }
51990b827d1SArd Biesheuvel 
520*aaffebd6SRichard Henderson void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
52190b827d1SArd Biesheuvel {
52290b827d1SArd Biesheuvel     uint64_t *rd = vd;
52390b827d1SArd Biesheuvel     uint64_t *rn = vn;
52490b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
52590b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
52690b827d1SArd Biesheuvel 
52790b827d1SArd Biesheuvel     d0 += s0_512(rd[1]);
52890b827d1SArd Biesheuvel     d1 += s0_512(rn[0]);
52990b827d1SArd Biesheuvel 
53090b827d1SArd Biesheuvel     rd[0] = d0;
53190b827d1SArd Biesheuvel     rd[1] = d1;
532*aaffebd6SRichard Henderson 
533*aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
53490b827d1SArd Biesheuvel }
53590b827d1SArd Biesheuvel 
536*aaffebd6SRichard Henderson void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
53790b827d1SArd Biesheuvel {
53890b827d1SArd Biesheuvel     uint64_t *rd = vd;
53990b827d1SArd Biesheuvel     uint64_t *rn = vn;
54090b827d1SArd Biesheuvel     uint64_t *rm = vm;
54190b827d1SArd Biesheuvel 
54290b827d1SArd Biesheuvel     rd[0] += s1_512(rn[0]) + rm[0];
54390b827d1SArd Biesheuvel     rd[1] += s1_512(rn[1]) + rm[1];
544*aaffebd6SRichard Henderson 
545*aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
54690b827d1SArd Biesheuvel }
54780d6f4c6SArd Biesheuvel 
548*aaffebd6SRichard Henderson void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
54980d6f4c6SArd Biesheuvel {
55080d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
55180d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
55280d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
55380d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
55480d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
55580d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
55680d6f4c6SArd Biesheuvel     uint32_t t;
55780d6f4c6SArd Biesheuvel 
55880d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
55980d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
56080d6f4c6SArd Biesheuvel 
56180d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
56280d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
56380d6f4c6SArd Biesheuvel 
56480d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
56580d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
56680d6f4c6SArd Biesheuvel 
56780d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
56880d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
56980d6f4c6SArd Biesheuvel 
57080d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
57180d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
572*aaffebd6SRichard Henderson 
573*aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
57480d6f4c6SArd Biesheuvel }
57580d6f4c6SArd Biesheuvel 
576*aaffebd6SRichard Henderson void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
57780d6f4c6SArd Biesheuvel {
57880d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
57980d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
58080d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
58180d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
58280d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
58380d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
58480d6f4c6SArd Biesheuvel     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
58580d6f4c6SArd Biesheuvel 
58680d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) ^= t;
58780d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
58880d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
58980d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
59080d6f4c6SArd Biesheuvel                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
59180d6f4c6SArd Biesheuvel 
59280d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
59380d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
594*aaffebd6SRichard Henderson 
595*aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
59680d6f4c6SArd Biesheuvel }
59780d6f4c6SArd Biesheuvel 
59880d6f4c6SArd Biesheuvel void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
59980d6f4c6SArd Biesheuvel                           uint32_t opcode)
60080d6f4c6SArd Biesheuvel {
60180d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
60280d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
60380d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
60480d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
60580d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
60680d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
60780d6f4c6SArd Biesheuvel     uint32_t t;
60880d6f4c6SArd Biesheuvel 
60980d6f4c6SArd Biesheuvel     assert(imm2 < 4);
61080d6f4c6SArd Biesheuvel 
61180d6f4c6SArd Biesheuvel     if (opcode == 0 || opcode == 2) {
61280d6f4c6SArd Biesheuvel         /* SM3TT1A, SM3TT2A */
61380d6f4c6SArd Biesheuvel         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
61480d6f4c6SArd Biesheuvel     } else if (opcode == 1) {
61580d6f4c6SArd Biesheuvel         /* SM3TT1B */
61680d6f4c6SArd Biesheuvel         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
61780d6f4c6SArd Biesheuvel     } else if (opcode == 3) {
61880d6f4c6SArd Biesheuvel         /* SM3TT2B */
61980d6f4c6SArd Biesheuvel         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
62080d6f4c6SArd Biesheuvel     } else {
62180d6f4c6SArd Biesheuvel         g_assert_not_reached();
62280d6f4c6SArd Biesheuvel     }
62380d6f4c6SArd Biesheuvel 
62480d6f4c6SArd Biesheuvel     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
62580d6f4c6SArd Biesheuvel 
62680d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
62780d6f4c6SArd Biesheuvel 
62880d6f4c6SArd Biesheuvel     if (opcode < 2) {
62980d6f4c6SArd Biesheuvel         /* SM3TT1A, SM3TT1B */
63080d6f4c6SArd Biesheuvel         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
63180d6f4c6SArd Biesheuvel 
63280d6f4c6SArd Biesheuvel         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
63380d6f4c6SArd Biesheuvel     } else {
63480d6f4c6SArd Biesheuvel         /* SM3TT2A, SM3TT2B */
63580d6f4c6SArd Biesheuvel         t += CR_ST_WORD(n, 3);
63680d6f4c6SArd Biesheuvel         t ^= rol32(t, 9) ^ rol32(t, 17);
63780d6f4c6SArd Biesheuvel 
63880d6f4c6SArd Biesheuvel         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
63980d6f4c6SArd Biesheuvel     }
64080d6f4c6SArd Biesheuvel 
64180d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
64280d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) = t;
64380d6f4c6SArd Biesheuvel 
64480d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
64580d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
64680d6f4c6SArd Biesheuvel }
647b6577bcdSArd Biesheuvel 
648b6577bcdSArd Biesheuvel static uint8_t const sm4_sbox[] = {
649b6577bcdSArd Biesheuvel     0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
650b6577bcdSArd Biesheuvel     0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
651b6577bcdSArd Biesheuvel     0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
652b6577bcdSArd Biesheuvel     0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
653b6577bcdSArd Biesheuvel     0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
654b6577bcdSArd Biesheuvel     0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
655b6577bcdSArd Biesheuvel     0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
656b6577bcdSArd Biesheuvel     0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
657b6577bcdSArd Biesheuvel     0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
658b6577bcdSArd Biesheuvel     0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
659b6577bcdSArd Biesheuvel     0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
660b6577bcdSArd Biesheuvel     0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
661b6577bcdSArd Biesheuvel     0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
662b6577bcdSArd Biesheuvel     0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
663b6577bcdSArd Biesheuvel     0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
664b6577bcdSArd Biesheuvel     0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
665b6577bcdSArd Biesheuvel     0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
666b6577bcdSArd Biesheuvel     0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
667b6577bcdSArd Biesheuvel     0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
668b6577bcdSArd Biesheuvel     0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
669b6577bcdSArd Biesheuvel     0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
670b6577bcdSArd Biesheuvel     0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
671b6577bcdSArd Biesheuvel     0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
672b6577bcdSArd Biesheuvel     0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
673b6577bcdSArd Biesheuvel     0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
674b6577bcdSArd Biesheuvel     0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
675b6577bcdSArd Biesheuvel     0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
676b6577bcdSArd Biesheuvel     0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
677b6577bcdSArd Biesheuvel     0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
678b6577bcdSArd Biesheuvel     0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
679b6577bcdSArd Biesheuvel     0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
680b6577bcdSArd Biesheuvel     0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
681b6577bcdSArd Biesheuvel };
682b6577bcdSArd Biesheuvel 
683a04b68e1SRichard Henderson static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
684b6577bcdSArd Biesheuvel {
685a04b68e1SRichard Henderson     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
686a04b68e1SRichard Henderson     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
687b6577bcdSArd Biesheuvel     uint32_t t, i;
688b6577bcdSArd Biesheuvel 
689b6577bcdSArd Biesheuvel     for (i = 0; i < 4; i++) {
690b6577bcdSArd Biesheuvel         t = CR_ST_WORD(d, (i + 1) % 4) ^
691b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 2) % 4) ^
692b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 3) % 4) ^
693b6577bcdSArd Biesheuvel             CR_ST_WORD(n, i);
694b6577bcdSArd Biesheuvel 
695b6577bcdSArd Biesheuvel         t = sm4_sbox[t & 0xff] |
696b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 8) & 0xff] << 8 |
697b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 16) & 0xff] << 16 |
698b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 24) & 0xff] << 24;
699b6577bcdSArd Biesheuvel 
700b6577bcdSArd Biesheuvel         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
701b6577bcdSArd Biesheuvel                             rol32(t, 24);
702b6577bcdSArd Biesheuvel     }
703b6577bcdSArd Biesheuvel 
704b6577bcdSArd Biesheuvel     rd[0] = d.l[0];
705b6577bcdSArd Biesheuvel     rd[1] = d.l[1];
706b6577bcdSArd Biesheuvel }
707b6577bcdSArd Biesheuvel 
708a04b68e1SRichard Henderson void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
709b6577bcdSArd Biesheuvel {
710a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
711a04b68e1SRichard Henderson 
712a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
713a04b68e1SRichard Henderson         do_crypto_sm4e(vd + i, vn + i, vm + i);
714a04b68e1SRichard Henderson     }
715a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
716a04b68e1SRichard Henderson }
717a04b68e1SRichard Henderson 
718a04b68e1SRichard Henderson static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
719a04b68e1SRichard Henderson {
720b6577bcdSArd Biesheuvel     union CRYPTO_STATE d;
721b6577bcdSArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
722b6577bcdSArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
723b6577bcdSArd Biesheuvel     uint32_t t, i;
724b6577bcdSArd Biesheuvel 
725b6577bcdSArd Biesheuvel     d = n;
726b6577bcdSArd Biesheuvel     for (i = 0; i < 4; i++) {
727b6577bcdSArd Biesheuvel         t = CR_ST_WORD(d, (i + 1) % 4) ^
728b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 2) % 4) ^
729b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 3) % 4) ^
730b6577bcdSArd Biesheuvel             CR_ST_WORD(m, i);
731b6577bcdSArd Biesheuvel 
732b6577bcdSArd Biesheuvel         t = sm4_sbox[t & 0xff] |
733b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 8) & 0xff] << 8 |
734b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 16) & 0xff] << 16 |
735b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 24) & 0xff] << 24;
736b6577bcdSArd Biesheuvel 
737b6577bcdSArd Biesheuvel         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
738b6577bcdSArd Biesheuvel     }
739b6577bcdSArd Biesheuvel 
740b6577bcdSArd Biesheuvel     rd[0] = d.l[0];
741b6577bcdSArd Biesheuvel     rd[1] = d.l[1];
742b6577bcdSArd Biesheuvel }
743a04b68e1SRichard Henderson 
744a04b68e1SRichard Henderson void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
745a04b68e1SRichard Henderson {
746a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
747a04b68e1SRichard Henderson 
748a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
749a04b68e1SRichard Henderson         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
750a04b68e1SRichard Henderson     }
751a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
752a04b68e1SRichard Henderson }
7531738860dSRichard Henderson 
7541738860dSRichard Henderson void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
7551738860dSRichard Henderson {
7561738860dSRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
7571738860dSRichard Henderson     uint64_t *d = vd, *n = vn, *m = vm;
7581738860dSRichard Henderson 
7591738860dSRichard Henderson     for (i = 0; i < opr_sz / 8; ++i) {
7601738860dSRichard Henderson         d[i] = n[i] ^ rol64(m[i], 1);
7611738860dSRichard Henderson     }
7621738860dSRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
7631738860dSRichard Henderson }
764