xref: /qemu/target/arm/tcg/crypto_helper.c (revision a04b68e1d4c4f0cd5cd7542697b1b230b84532f5)
19d935509SArd Biesheuvel /*
29d935509SArd Biesheuvel  * crypto_helper.c - emulate v8 Crypto Extensions instructions
39d935509SArd Biesheuvel  *
490b827d1SArd Biesheuvel  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
59d935509SArd Biesheuvel  *
69d935509SArd Biesheuvel  * This library is free software; you can redistribute it and/or
79d935509SArd Biesheuvel  * modify it under the terms of the GNU Lesser General Public
89d935509SArd Biesheuvel  * License as published by the Free Software Foundation; either
99d935509SArd Biesheuvel  * version 2 of the License, or (at your option) any later version.
109d935509SArd Biesheuvel  */
119d935509SArd Biesheuvel 
1274c21bd0SPeter Maydell #include "qemu/osdep.h"
139d935509SArd Biesheuvel 
149d935509SArd Biesheuvel #include "cpu.h"
152ef6175aSRichard Henderson #include "exec/helper-proto.h"
16*a04b68e1SRichard Henderson #include "tcg/tcg-gvec-desc.h"
176f2945cdSDaniel P. Berrange #include "crypto/aes.h"
18*a04b68e1SRichard Henderson #include "vec_internal.h"
199d935509SArd Biesheuvel 
20f1ecb913SArd Biesheuvel union CRYPTO_STATE {
219d935509SArd Biesheuvel     uint8_t    bytes[16];
22f1ecb913SArd Biesheuvel     uint32_t   words[4];
239d935509SArd Biesheuvel     uint64_t   l[2];
249d935509SArd Biesheuvel };
259d935509SArd Biesheuvel 
26b449ca3cSArd Biesheuvel #ifdef HOST_WORDS_BIGENDIAN
27b449ca3cSArd Biesheuvel #define CR_ST_BYTE(state, i)   (state.bytes[(15 - (i)) ^ 8])
28b449ca3cSArd Biesheuvel #define CR_ST_WORD(state, i)   (state.words[(3 - (i)) ^ 2])
29b449ca3cSArd Biesheuvel #else
30b449ca3cSArd Biesheuvel #define CR_ST_BYTE(state, i)   (state.bytes[i])
31b449ca3cSArd Biesheuvel #define CR_ST_WORD(state, i)   (state.words[i])
32b449ca3cSArd Biesheuvel #endif
33b449ca3cSArd Biesheuvel 
34*a04b68e1SRichard Henderson static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
35*a04b68e1SRichard Henderson                            uint64_t *rm, bool decrypt)
369d935509SArd Biesheuvel {
3759dcd29aSTom Musta     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
3859dcd29aSTom Musta     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
391a66ac61SRichard Henderson     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
40*a04b68e1SRichard Henderson     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
419d935509SArd Biesheuvel     int i;
429d935509SArd Biesheuvel 
439d935509SArd Biesheuvel     /* xor state vector with round key */
449d935509SArd Biesheuvel     rk.l[0] ^= st.l[0];
459d935509SArd Biesheuvel     rk.l[1] ^= st.l[1];
469d935509SArd Biesheuvel 
479d935509SArd Biesheuvel     /* combine ShiftRows operation and sbox substitution */
489d935509SArd Biesheuvel     for (i = 0; i < 16; i++) {
49b449ca3cSArd Biesheuvel         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
509d935509SArd Biesheuvel     }
519d935509SArd Biesheuvel 
521a66ac61SRichard Henderson     rd[0] = st.l[0];
531a66ac61SRichard Henderson     rd[1] = st.l[1];
549d935509SArd Biesheuvel }
559d935509SArd Biesheuvel 
56*a04b68e1SRichard Henderson void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
57*a04b68e1SRichard Henderson {
58*a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
59*a04b68e1SRichard Henderson     bool decrypt = simd_data(desc);
60*a04b68e1SRichard Henderson 
61*a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
62*a04b68e1SRichard Henderson         do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
63*a04b68e1SRichard Henderson     }
64*a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
65*a04b68e1SRichard Henderson }
66*a04b68e1SRichard Henderson 
67*a04b68e1SRichard Henderson static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
689d935509SArd Biesheuvel {
699d935509SArd Biesheuvel     static uint32_t const mc[][256] = { {
709d935509SArd Biesheuvel         /* MixColumns lookup table */
719d935509SArd Biesheuvel         0x00000000, 0x03010102, 0x06020204, 0x05030306,
729d935509SArd Biesheuvel         0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
739d935509SArd Biesheuvel         0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
749d935509SArd Biesheuvel         0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
759d935509SArd Biesheuvel         0x30101020, 0x33111122, 0x36121224, 0x35131326,
769d935509SArd Biesheuvel         0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
779d935509SArd Biesheuvel         0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
789d935509SArd Biesheuvel         0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
799d935509SArd Biesheuvel         0x60202040, 0x63212142, 0x66222244, 0x65232346,
809d935509SArd Biesheuvel         0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
819d935509SArd Biesheuvel         0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
829d935509SArd Biesheuvel         0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
839d935509SArd Biesheuvel         0x50303060, 0x53313162, 0x56323264, 0x55333366,
849d935509SArd Biesheuvel         0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
859d935509SArd Biesheuvel         0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
869d935509SArd Biesheuvel         0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
879d935509SArd Biesheuvel         0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
889d935509SArd Biesheuvel         0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
899d935509SArd Biesheuvel         0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
909d935509SArd Biesheuvel         0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
919d935509SArd Biesheuvel         0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
929d935509SArd Biesheuvel         0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
939d935509SArd Biesheuvel         0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
949d935509SArd Biesheuvel         0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
959d935509SArd Biesheuvel         0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
969d935509SArd Biesheuvel         0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
979d935509SArd Biesheuvel         0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
989d935509SArd Biesheuvel         0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
999d935509SArd Biesheuvel         0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
1009d935509SArd Biesheuvel         0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
1019d935509SArd Biesheuvel         0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
1029d935509SArd Biesheuvel         0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
1039d935509SArd Biesheuvel         0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
1049d935509SArd Biesheuvel         0x97848413, 0x94858511, 0x91868617, 0x92878715,
1059d935509SArd Biesheuvel         0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
1069d935509SArd Biesheuvel         0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
1079d935509SArd Biesheuvel         0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
1089d935509SArd Biesheuvel         0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
1099d935509SArd Biesheuvel         0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
1109d935509SArd Biesheuvel         0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
1119d935509SArd Biesheuvel         0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
1129d935509SArd Biesheuvel         0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
1139d935509SArd Biesheuvel         0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
1149d935509SArd Biesheuvel         0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
1159d935509SArd Biesheuvel         0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
1169d935509SArd Biesheuvel         0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
1179d935509SArd Biesheuvel         0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
1189d935509SArd Biesheuvel         0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
1199d935509SArd Biesheuvel         0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
1209d935509SArd Biesheuvel         0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
1219d935509SArd Biesheuvel         0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
1229d935509SArd Biesheuvel         0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
1239d935509SArd Biesheuvel         0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
1249d935509SArd Biesheuvel         0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
1259d935509SArd Biesheuvel         0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
1269d935509SArd Biesheuvel         0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
1279d935509SArd Biesheuvel         0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
1289d935509SArd Biesheuvel         0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
1299d935509SArd Biesheuvel         0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
1309d935509SArd Biesheuvel         0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
1319d935509SArd Biesheuvel         0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
1329d935509SArd Biesheuvel         0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
1339d935509SArd Biesheuvel         0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
1349d935509SArd Biesheuvel         0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
1359d935509SArd Biesheuvel     }, {
1369d935509SArd Biesheuvel         /* Inverse MixColumns lookup table */
1379d935509SArd Biesheuvel         0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
1389d935509SArd Biesheuvel         0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
1399d935509SArd Biesheuvel         0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
1409d935509SArd Biesheuvel         0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
1419d935509SArd Biesheuvel         0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
1429d935509SArd Biesheuvel         0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
1439d935509SArd Biesheuvel         0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
1449d935509SArd Biesheuvel         0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
1459d935509SArd Biesheuvel         0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
1469d935509SArd Biesheuvel         0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
1479d935509SArd Biesheuvel         0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
1489d935509SArd Biesheuvel         0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
1499d935509SArd Biesheuvel         0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
1509d935509SArd Biesheuvel         0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
1519d935509SArd Biesheuvel         0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
1529d935509SArd Biesheuvel         0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
1539d935509SArd Biesheuvel         0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
1549d935509SArd Biesheuvel         0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
1559d935509SArd Biesheuvel         0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
1569d935509SArd Biesheuvel         0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
1579d935509SArd Biesheuvel         0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
1589d935509SArd Biesheuvel         0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
1599d935509SArd Biesheuvel         0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
1609d935509SArd Biesheuvel         0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
1619d935509SArd Biesheuvel         0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
1629d935509SArd Biesheuvel         0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
1639d935509SArd Biesheuvel         0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
1649d935509SArd Biesheuvel         0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
1659d935509SArd Biesheuvel         0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
1669d935509SArd Biesheuvel         0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
1679d935509SArd Biesheuvel         0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
1689d935509SArd Biesheuvel         0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
1699d935509SArd Biesheuvel         0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
1709d935509SArd Biesheuvel         0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
1719d935509SArd Biesheuvel         0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
1729d935509SArd Biesheuvel         0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
1739d935509SArd Biesheuvel         0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
1749d935509SArd Biesheuvel         0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
1759d935509SArd Biesheuvel         0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
1769d935509SArd Biesheuvel         0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
1779d935509SArd Biesheuvel         0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
1789d935509SArd Biesheuvel         0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
1799d935509SArd Biesheuvel         0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
1809d935509SArd Biesheuvel         0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
1819d935509SArd Biesheuvel         0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
1829d935509SArd Biesheuvel         0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
1839d935509SArd Biesheuvel         0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
1849d935509SArd Biesheuvel         0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
1859d935509SArd Biesheuvel         0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
1869d935509SArd Biesheuvel         0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
1879d935509SArd Biesheuvel         0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
1889d935509SArd Biesheuvel         0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
1899d935509SArd Biesheuvel         0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
1909d935509SArd Biesheuvel         0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
1919d935509SArd Biesheuvel         0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
1929d935509SArd Biesheuvel         0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
1939d935509SArd Biesheuvel         0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
1949d935509SArd Biesheuvel         0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
1959d935509SArd Biesheuvel         0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
1969d935509SArd Biesheuvel         0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
1979d935509SArd Biesheuvel         0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
1989d935509SArd Biesheuvel         0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
1999d935509SArd Biesheuvel         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
2009d935509SArd Biesheuvel         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
2019d935509SArd Biesheuvel     } };
2021a66ac61SRichard Henderson 
2031a66ac61SRichard Henderson     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
2049d935509SArd Biesheuvel     int i;
2059d935509SArd Biesheuvel 
2069d935509SArd Biesheuvel     for (i = 0; i < 16; i += 4) {
207b449ca3cSArd Biesheuvel         CR_ST_WORD(st, i >> 2) =
208b449ca3cSArd Biesheuvel             mc[decrypt][CR_ST_BYTE(st, i)] ^
209b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
210b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
211b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
2129d935509SArd Biesheuvel     }
2139d935509SArd Biesheuvel 
2141a66ac61SRichard Henderson     rd[0] = st.l[0];
2151a66ac61SRichard Henderson     rd[1] = st.l[1];
2169d935509SArd Biesheuvel }
217f1ecb913SArd Biesheuvel 
218*a04b68e1SRichard Henderson void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
219*a04b68e1SRichard Henderson {
220*a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
221*a04b68e1SRichard Henderson     bool decrypt = simd_data(desc);
222*a04b68e1SRichard Henderson 
223*a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
224*a04b68e1SRichard Henderson         do_crypto_aesmc(vd + i, vm + i, decrypt);
225*a04b68e1SRichard Henderson     }
226*a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
227*a04b68e1SRichard Henderson }
228*a04b68e1SRichard Henderson 
229f1ecb913SArd Biesheuvel /*
230f1ecb913SArd Biesheuvel  * SHA-1 logical functions
231f1ecb913SArd Biesheuvel  */
232f1ecb913SArd Biesheuvel 
233f1ecb913SArd Biesheuvel static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
234f1ecb913SArd Biesheuvel {
235f1ecb913SArd Biesheuvel     return (x & (y ^ z)) ^ z;
236f1ecb913SArd Biesheuvel }
237f1ecb913SArd Biesheuvel 
238f1ecb913SArd Biesheuvel static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
239f1ecb913SArd Biesheuvel {
240f1ecb913SArd Biesheuvel     return x ^ y ^ z;
241f1ecb913SArd Biesheuvel }
242f1ecb913SArd Biesheuvel 
243f1ecb913SArd Biesheuvel static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
244f1ecb913SArd Biesheuvel {
245f1ecb913SArd Biesheuvel     return (x & y) | ((x | y) & z);
246f1ecb913SArd Biesheuvel }
247f1ecb913SArd Biesheuvel 
2481a66ac61SRichard Henderson void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
249f1ecb913SArd Biesheuvel {
2501a66ac61SRichard Henderson     uint64_t *rd = vd;
2511a66ac61SRichard Henderson     uint64_t *rn = vn;
2521a66ac61SRichard Henderson     uint64_t *rm = vm;
2531a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
2541a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
2551a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
256f1ecb913SArd Biesheuvel 
257f1ecb913SArd Biesheuvel     if (op == 3) { /* sha1su0 */
258f1ecb913SArd Biesheuvel         d.l[0] ^= d.l[1] ^ m.l[0];
259f1ecb913SArd Biesheuvel         d.l[1] ^= n.l[0] ^ m.l[1];
260f1ecb913SArd Biesheuvel     } else {
261f1ecb913SArd Biesheuvel         int i;
262f1ecb913SArd Biesheuvel 
263f1ecb913SArd Biesheuvel         for (i = 0; i < 4; i++) {
264f1ecb913SArd Biesheuvel             uint32_t t;
265f1ecb913SArd Biesheuvel 
266f1ecb913SArd Biesheuvel             switch (op) {
267f1ecb913SArd Biesheuvel             case 0: /* sha1c */
268b449ca3cSArd Biesheuvel                 t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
269f1ecb913SArd Biesheuvel                 break;
270f1ecb913SArd Biesheuvel             case 1: /* sha1p */
271b449ca3cSArd Biesheuvel                 t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
272f1ecb913SArd Biesheuvel                 break;
273f1ecb913SArd Biesheuvel             case 2: /* sha1m */
274b449ca3cSArd Biesheuvel                 t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
275f1ecb913SArd Biesheuvel                 break;
276f1ecb913SArd Biesheuvel             default:
277f1ecb913SArd Biesheuvel                 g_assert_not_reached();
278f1ecb913SArd Biesheuvel             }
279b449ca3cSArd Biesheuvel             t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
280b449ca3cSArd Biesheuvel                  + CR_ST_WORD(m, i);
281f1ecb913SArd Biesheuvel 
282b449ca3cSArd Biesheuvel             CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
283b449ca3cSArd Biesheuvel             CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
284b449ca3cSArd Biesheuvel             CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
285b449ca3cSArd Biesheuvel             CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
286b449ca3cSArd Biesheuvel             CR_ST_WORD(d, 0) = t;
287f1ecb913SArd Biesheuvel         }
288f1ecb913SArd Biesheuvel     }
2891a66ac61SRichard Henderson     rd[0] = d.l[0];
2901a66ac61SRichard Henderson     rd[1] = d.l[1];
291f1ecb913SArd Biesheuvel }
292f1ecb913SArd Biesheuvel 
2931a66ac61SRichard Henderson void HELPER(crypto_sha1h)(void *vd, void *vm)
294f1ecb913SArd Biesheuvel {
2951a66ac61SRichard Henderson     uint64_t *rd = vd;
2961a66ac61SRichard Henderson     uint64_t *rm = vm;
2971a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
298f1ecb913SArd Biesheuvel 
299b449ca3cSArd Biesheuvel     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
300b449ca3cSArd Biesheuvel     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
301f1ecb913SArd Biesheuvel 
3021a66ac61SRichard Henderson     rd[0] = m.l[0];
3031a66ac61SRichard Henderson     rd[1] = m.l[1];
304f1ecb913SArd Biesheuvel }
305f1ecb913SArd Biesheuvel 
3061a66ac61SRichard Henderson void HELPER(crypto_sha1su1)(void *vd, void *vm)
307f1ecb913SArd Biesheuvel {
3081a66ac61SRichard Henderson     uint64_t *rd = vd;
3091a66ac61SRichard Henderson     uint64_t *rm = vm;
3101a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3111a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
312f1ecb913SArd Biesheuvel 
313b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
314b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
315b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
316b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
317f1ecb913SArd Biesheuvel 
3181a66ac61SRichard Henderson     rd[0] = d.l[0];
3191a66ac61SRichard Henderson     rd[1] = d.l[1];
320f1ecb913SArd Biesheuvel }
321f1ecb913SArd Biesheuvel 
322f1ecb913SArd Biesheuvel /*
323f1ecb913SArd Biesheuvel  * The SHA-256 logical functions, according to
324f1ecb913SArd Biesheuvel  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
325f1ecb913SArd Biesheuvel  */
326f1ecb913SArd Biesheuvel 
327f1ecb913SArd Biesheuvel static uint32_t S0(uint32_t x)
328f1ecb913SArd Biesheuvel {
329f1ecb913SArd Biesheuvel     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
330f1ecb913SArd Biesheuvel }
331f1ecb913SArd Biesheuvel 
332f1ecb913SArd Biesheuvel static uint32_t S1(uint32_t x)
333f1ecb913SArd Biesheuvel {
334f1ecb913SArd Biesheuvel     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
335f1ecb913SArd Biesheuvel }
336f1ecb913SArd Biesheuvel 
337f1ecb913SArd Biesheuvel static uint32_t s0(uint32_t x)
338f1ecb913SArd Biesheuvel {
339f1ecb913SArd Biesheuvel     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
340f1ecb913SArd Biesheuvel }
341f1ecb913SArd Biesheuvel 
342f1ecb913SArd Biesheuvel static uint32_t s1(uint32_t x)
343f1ecb913SArd Biesheuvel {
344f1ecb913SArd Biesheuvel     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
345f1ecb913SArd Biesheuvel }
346f1ecb913SArd Biesheuvel 
3471a66ac61SRichard Henderson void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
348f1ecb913SArd Biesheuvel {
3491a66ac61SRichard Henderson     uint64_t *rd = vd;
3501a66ac61SRichard Henderson     uint64_t *rn = vn;
3511a66ac61SRichard Henderson     uint64_t *rm = vm;
3521a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3531a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
3541a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
355f1ecb913SArd Biesheuvel     int i;
356f1ecb913SArd Biesheuvel 
357f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
358b449ca3cSArd Biesheuvel         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
359b449ca3cSArd Biesheuvel                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
360b449ca3cSArd Biesheuvel                      + CR_ST_WORD(m, i);
361f1ecb913SArd Biesheuvel 
362b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
363b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
364b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
365b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
366f1ecb913SArd Biesheuvel 
367b449ca3cSArd Biesheuvel         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
368b449ca3cSArd Biesheuvel              + S0(CR_ST_WORD(d, 0));
369f1ecb913SArd Biesheuvel 
370b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
371b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
372b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
373b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = t;
374f1ecb913SArd Biesheuvel     }
375f1ecb913SArd Biesheuvel 
3761a66ac61SRichard Henderson     rd[0] = d.l[0];
3771a66ac61SRichard Henderson     rd[1] = d.l[1];
378f1ecb913SArd Biesheuvel }
379f1ecb913SArd Biesheuvel 
3801a66ac61SRichard Henderson void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
381f1ecb913SArd Biesheuvel {
3821a66ac61SRichard Henderson     uint64_t *rd = vd;
3831a66ac61SRichard Henderson     uint64_t *rn = vn;
3841a66ac61SRichard Henderson     uint64_t *rm = vm;
3851a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3861a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
3871a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
388f1ecb913SArd Biesheuvel     int i;
389f1ecb913SArd Biesheuvel 
390f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
391b449ca3cSArd Biesheuvel         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
392b449ca3cSArd Biesheuvel                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
393b449ca3cSArd Biesheuvel                      + CR_ST_WORD(m, i);
394f1ecb913SArd Biesheuvel 
395b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
396b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
397b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
398b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
399f1ecb913SArd Biesheuvel     }
400f1ecb913SArd Biesheuvel 
4011a66ac61SRichard Henderson     rd[0] = d.l[0];
4021a66ac61SRichard Henderson     rd[1] = d.l[1];
403f1ecb913SArd Biesheuvel }
404f1ecb913SArd Biesheuvel 
4051a66ac61SRichard Henderson void HELPER(crypto_sha256su0)(void *vd, void *vm)
406f1ecb913SArd Biesheuvel {
4071a66ac61SRichard Henderson     uint64_t *rd = vd;
4081a66ac61SRichard Henderson     uint64_t *rm = vm;
4091a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4101a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
411f1ecb913SArd Biesheuvel 
412b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
413b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
414b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
415b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
416f1ecb913SArd Biesheuvel 
4171a66ac61SRichard Henderson     rd[0] = d.l[0];
4181a66ac61SRichard Henderson     rd[1] = d.l[1];
419f1ecb913SArd Biesheuvel }
420f1ecb913SArd Biesheuvel 
4211a66ac61SRichard Henderson void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
422f1ecb913SArd Biesheuvel {
4231a66ac61SRichard Henderson     uint64_t *rd = vd;
4241a66ac61SRichard Henderson     uint64_t *rn = vn;
4251a66ac61SRichard Henderson     uint64_t *rm = vm;
4261a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4271a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
4281a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
429f1ecb913SArd Biesheuvel 
430b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
431b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
432b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
433b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
434f1ecb913SArd Biesheuvel 
4351a66ac61SRichard Henderson     rd[0] = d.l[0];
4361a66ac61SRichard Henderson     rd[1] = d.l[1];
437f1ecb913SArd Biesheuvel }
43890b827d1SArd Biesheuvel 
43990b827d1SArd Biesheuvel /*
44090b827d1SArd Biesheuvel  * The SHA-512 logical functions (same as above but using 64-bit operands)
44190b827d1SArd Biesheuvel  */
44290b827d1SArd Biesheuvel 
44390b827d1SArd Biesheuvel static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
44490b827d1SArd Biesheuvel {
44590b827d1SArd Biesheuvel     return (x & (y ^ z)) ^ z;
44690b827d1SArd Biesheuvel }
44790b827d1SArd Biesheuvel 
44890b827d1SArd Biesheuvel static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
44990b827d1SArd Biesheuvel {
45090b827d1SArd Biesheuvel     return (x & y) | ((x | y) & z);
45190b827d1SArd Biesheuvel }
45290b827d1SArd Biesheuvel 
45390b827d1SArd Biesheuvel static uint64_t S0_512(uint64_t x)
45490b827d1SArd Biesheuvel {
45590b827d1SArd Biesheuvel     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
45690b827d1SArd Biesheuvel }
45790b827d1SArd Biesheuvel 
45890b827d1SArd Biesheuvel static uint64_t S1_512(uint64_t x)
45990b827d1SArd Biesheuvel {
46090b827d1SArd Biesheuvel     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
46190b827d1SArd Biesheuvel }
46290b827d1SArd Biesheuvel 
46390b827d1SArd Biesheuvel static uint64_t s0_512(uint64_t x)
46490b827d1SArd Biesheuvel {
46590b827d1SArd Biesheuvel     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
46690b827d1SArd Biesheuvel }
46790b827d1SArd Biesheuvel 
46890b827d1SArd Biesheuvel static uint64_t s1_512(uint64_t x)
46990b827d1SArd Biesheuvel {
47090b827d1SArd Biesheuvel     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
47190b827d1SArd Biesheuvel }
47290b827d1SArd Biesheuvel 
47390b827d1SArd Biesheuvel void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
47490b827d1SArd Biesheuvel {
47590b827d1SArd Biesheuvel     uint64_t *rd = vd;
47690b827d1SArd Biesheuvel     uint64_t *rn = vn;
47790b827d1SArd Biesheuvel     uint64_t *rm = vm;
47890b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
47990b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
48090b827d1SArd Biesheuvel 
48190b827d1SArd Biesheuvel     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
48290b827d1SArd Biesheuvel     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
48390b827d1SArd Biesheuvel 
48490b827d1SArd Biesheuvel     rd[0] = d0;
48590b827d1SArd Biesheuvel     rd[1] = d1;
48690b827d1SArd Biesheuvel }
48790b827d1SArd Biesheuvel 
48890b827d1SArd Biesheuvel void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
48990b827d1SArd Biesheuvel {
49090b827d1SArd Biesheuvel     uint64_t *rd = vd;
49190b827d1SArd Biesheuvel     uint64_t *rn = vn;
49290b827d1SArd Biesheuvel     uint64_t *rm = vm;
49390b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
49490b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
49590b827d1SArd Biesheuvel 
49690b827d1SArd Biesheuvel     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
49790b827d1SArd Biesheuvel     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
49890b827d1SArd Biesheuvel 
49990b827d1SArd Biesheuvel     rd[0] = d0;
50090b827d1SArd Biesheuvel     rd[1] = d1;
50190b827d1SArd Biesheuvel }
50290b827d1SArd Biesheuvel 
50390b827d1SArd Biesheuvel void HELPER(crypto_sha512su0)(void *vd, void *vn)
50490b827d1SArd Biesheuvel {
50590b827d1SArd Biesheuvel     uint64_t *rd = vd;
50690b827d1SArd Biesheuvel     uint64_t *rn = vn;
50790b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
50890b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
50990b827d1SArd Biesheuvel 
51090b827d1SArd Biesheuvel     d0 += s0_512(rd[1]);
51190b827d1SArd Biesheuvel     d1 += s0_512(rn[0]);
51290b827d1SArd Biesheuvel 
51390b827d1SArd Biesheuvel     rd[0] = d0;
51490b827d1SArd Biesheuvel     rd[1] = d1;
51590b827d1SArd Biesheuvel }
51690b827d1SArd Biesheuvel 
51790b827d1SArd Biesheuvel void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
51890b827d1SArd Biesheuvel {
51990b827d1SArd Biesheuvel     uint64_t *rd = vd;
52090b827d1SArd Biesheuvel     uint64_t *rn = vn;
52190b827d1SArd Biesheuvel     uint64_t *rm = vm;
52290b827d1SArd Biesheuvel 
52390b827d1SArd Biesheuvel     rd[0] += s1_512(rn[0]) + rm[0];
52490b827d1SArd Biesheuvel     rd[1] += s1_512(rn[1]) + rm[1];
52590b827d1SArd Biesheuvel }
52680d6f4c6SArd Biesheuvel 
52780d6f4c6SArd Biesheuvel void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
52880d6f4c6SArd Biesheuvel {
52980d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
53080d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
53180d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
53280d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
53380d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
53480d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
53580d6f4c6SArd Biesheuvel     uint32_t t;
53680d6f4c6SArd Biesheuvel 
53780d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
53880d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
53980d6f4c6SArd Biesheuvel 
54080d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
54180d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
54280d6f4c6SArd Biesheuvel 
54380d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
54480d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
54580d6f4c6SArd Biesheuvel 
54680d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
54780d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
54880d6f4c6SArd Biesheuvel 
54980d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
55080d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
55180d6f4c6SArd Biesheuvel }
55280d6f4c6SArd Biesheuvel 
55380d6f4c6SArd Biesheuvel void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
55480d6f4c6SArd Biesheuvel {
55580d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
55680d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
55780d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
55880d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
55980d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
56080d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
56180d6f4c6SArd Biesheuvel     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
56280d6f4c6SArd Biesheuvel 
56380d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) ^= t;
56480d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
56580d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
56680d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
56780d6f4c6SArd Biesheuvel                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
56880d6f4c6SArd Biesheuvel 
56980d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
57080d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
57180d6f4c6SArd Biesheuvel }
57280d6f4c6SArd Biesheuvel 
57380d6f4c6SArd Biesheuvel void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
57480d6f4c6SArd Biesheuvel                           uint32_t opcode)
57580d6f4c6SArd Biesheuvel {
57680d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
57780d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
57880d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
57980d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
58080d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
58180d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
58280d6f4c6SArd Biesheuvel     uint32_t t;
58380d6f4c6SArd Biesheuvel 
58480d6f4c6SArd Biesheuvel     assert(imm2 < 4);
58580d6f4c6SArd Biesheuvel 
58680d6f4c6SArd Biesheuvel     if (opcode == 0 || opcode == 2) {
58780d6f4c6SArd Biesheuvel         /* SM3TT1A, SM3TT2A */
58880d6f4c6SArd Biesheuvel         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
58980d6f4c6SArd Biesheuvel     } else if (opcode == 1) {
59080d6f4c6SArd Biesheuvel         /* SM3TT1B */
59180d6f4c6SArd Biesheuvel         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
59280d6f4c6SArd Biesheuvel     } else if (opcode == 3) {
59380d6f4c6SArd Biesheuvel         /* SM3TT2B */
59480d6f4c6SArd Biesheuvel         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
59580d6f4c6SArd Biesheuvel     } else {
59680d6f4c6SArd Biesheuvel         g_assert_not_reached();
59780d6f4c6SArd Biesheuvel     }
59880d6f4c6SArd Biesheuvel 
59980d6f4c6SArd Biesheuvel     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
60080d6f4c6SArd Biesheuvel 
60180d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
60280d6f4c6SArd Biesheuvel 
60380d6f4c6SArd Biesheuvel     if (opcode < 2) {
60480d6f4c6SArd Biesheuvel         /* SM3TT1A, SM3TT1B */
60580d6f4c6SArd Biesheuvel         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
60680d6f4c6SArd Biesheuvel 
60780d6f4c6SArd Biesheuvel         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
60880d6f4c6SArd Biesheuvel     } else {
60980d6f4c6SArd Biesheuvel         /* SM3TT2A, SM3TT2B */
61080d6f4c6SArd Biesheuvel         t += CR_ST_WORD(n, 3);
61180d6f4c6SArd Biesheuvel         t ^= rol32(t, 9) ^ rol32(t, 17);
61280d6f4c6SArd Biesheuvel 
61380d6f4c6SArd Biesheuvel         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
61480d6f4c6SArd Biesheuvel     }
61580d6f4c6SArd Biesheuvel 
61680d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
61780d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) = t;
61880d6f4c6SArd Biesheuvel 
61980d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
62080d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
62180d6f4c6SArd Biesheuvel }
622b6577bcdSArd Biesheuvel 
623b6577bcdSArd Biesheuvel static uint8_t const sm4_sbox[] = {
624b6577bcdSArd Biesheuvel     0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
625b6577bcdSArd Biesheuvel     0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
626b6577bcdSArd Biesheuvel     0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
627b6577bcdSArd Biesheuvel     0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
628b6577bcdSArd Biesheuvel     0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
629b6577bcdSArd Biesheuvel     0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
630b6577bcdSArd Biesheuvel     0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
631b6577bcdSArd Biesheuvel     0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
632b6577bcdSArd Biesheuvel     0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
633b6577bcdSArd Biesheuvel     0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
634b6577bcdSArd Biesheuvel     0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
635b6577bcdSArd Biesheuvel     0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
636b6577bcdSArd Biesheuvel     0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
637b6577bcdSArd Biesheuvel     0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
638b6577bcdSArd Biesheuvel     0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
639b6577bcdSArd Biesheuvel     0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
640b6577bcdSArd Biesheuvel     0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
641b6577bcdSArd Biesheuvel     0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
642b6577bcdSArd Biesheuvel     0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
643b6577bcdSArd Biesheuvel     0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
644b6577bcdSArd Biesheuvel     0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
645b6577bcdSArd Biesheuvel     0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
646b6577bcdSArd Biesheuvel     0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
647b6577bcdSArd Biesheuvel     0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
648b6577bcdSArd Biesheuvel     0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
649b6577bcdSArd Biesheuvel     0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
650b6577bcdSArd Biesheuvel     0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
651b6577bcdSArd Biesheuvel     0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
652b6577bcdSArd Biesheuvel     0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
653b6577bcdSArd Biesheuvel     0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
654b6577bcdSArd Biesheuvel     0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
655b6577bcdSArd Biesheuvel     0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
656b6577bcdSArd Biesheuvel };
657b6577bcdSArd Biesheuvel 
658*a04b68e1SRichard Henderson static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
659b6577bcdSArd Biesheuvel {
660*a04b68e1SRichard Henderson     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
661*a04b68e1SRichard Henderson     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
662b6577bcdSArd Biesheuvel     uint32_t t, i;
663b6577bcdSArd Biesheuvel 
664b6577bcdSArd Biesheuvel     for (i = 0; i < 4; i++) {
665b6577bcdSArd Biesheuvel         t = CR_ST_WORD(d, (i + 1) % 4) ^
666b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 2) % 4) ^
667b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 3) % 4) ^
668b6577bcdSArd Biesheuvel             CR_ST_WORD(n, i);
669b6577bcdSArd Biesheuvel 
670b6577bcdSArd Biesheuvel         t = sm4_sbox[t & 0xff] |
671b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 8) & 0xff] << 8 |
672b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 16) & 0xff] << 16 |
673b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 24) & 0xff] << 24;
674b6577bcdSArd Biesheuvel 
675b6577bcdSArd Biesheuvel         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
676b6577bcdSArd Biesheuvel                             rol32(t, 24);
677b6577bcdSArd Biesheuvel     }
678b6577bcdSArd Biesheuvel 
679b6577bcdSArd Biesheuvel     rd[0] = d.l[0];
680b6577bcdSArd Biesheuvel     rd[1] = d.l[1];
681b6577bcdSArd Biesheuvel }
682b6577bcdSArd Biesheuvel 
683*a04b68e1SRichard Henderson void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
684b6577bcdSArd Biesheuvel {
685*a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
686*a04b68e1SRichard Henderson 
687*a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
688*a04b68e1SRichard Henderson         do_crypto_sm4e(vd + i, vn + i, vm + i);
689*a04b68e1SRichard Henderson     }
690*a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
691*a04b68e1SRichard Henderson }
692*a04b68e1SRichard Henderson 
693*a04b68e1SRichard Henderson static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
694*a04b68e1SRichard Henderson {
695b6577bcdSArd Biesheuvel     union CRYPTO_STATE d;
696b6577bcdSArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
697b6577bcdSArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
698b6577bcdSArd Biesheuvel     uint32_t t, i;
699b6577bcdSArd Biesheuvel 
700b6577bcdSArd Biesheuvel     d = n;
701b6577bcdSArd Biesheuvel     for (i = 0; i < 4; i++) {
702b6577bcdSArd Biesheuvel         t = CR_ST_WORD(d, (i + 1) % 4) ^
703b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 2) % 4) ^
704b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 3) % 4) ^
705b6577bcdSArd Biesheuvel             CR_ST_WORD(m, i);
706b6577bcdSArd Biesheuvel 
707b6577bcdSArd Biesheuvel         t = sm4_sbox[t & 0xff] |
708b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 8) & 0xff] << 8 |
709b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 16) & 0xff] << 16 |
710b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 24) & 0xff] << 24;
711b6577bcdSArd Biesheuvel 
712b6577bcdSArd Biesheuvel         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
713b6577bcdSArd Biesheuvel     }
714b6577bcdSArd Biesheuvel 
715b6577bcdSArd Biesheuvel     rd[0] = d.l[0];
716b6577bcdSArd Biesheuvel     rd[1] = d.l[1];
717b6577bcdSArd Biesheuvel }
718*a04b68e1SRichard Henderson 
719*a04b68e1SRichard Henderson void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
720*a04b68e1SRichard Henderson {
721*a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
722*a04b68e1SRichard Henderson 
723*a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
724*a04b68e1SRichard Henderson         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
725*a04b68e1SRichard Henderson     }
726*a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
727*a04b68e1SRichard Henderson }
728