19d935509SArd Biesheuvel /* 29d935509SArd Biesheuvel * crypto_helper.c - emulate v8 Crypto Extensions instructions 39d935509SArd Biesheuvel * 490b827d1SArd Biesheuvel * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 59d935509SArd Biesheuvel * 69d935509SArd Biesheuvel * This library is free software; you can redistribute it and/or 79d935509SArd Biesheuvel * modify it under the terms of the GNU Lesser General Public 89d935509SArd Biesheuvel * License as published by the Free Software Foundation; either 99d935509SArd Biesheuvel * version 2 of the License, or (at your option) any later version. 109d935509SArd Biesheuvel */ 119d935509SArd Biesheuvel 1274c21bd0SPeter Maydell #include "qemu/osdep.h" 139d935509SArd Biesheuvel 149d935509SArd Biesheuvel #include "cpu.h" 152ef6175aSRichard Henderson #include "exec/helper-proto.h" 16a04b68e1SRichard Henderson #include "tcg/tcg-gvec-desc.h" 176f2945cdSDaniel P. Berrange #include "crypto/aes.h" 18a04b68e1SRichard Henderson #include "vec_internal.h" 199d935509SArd Biesheuvel 20f1ecb913SArd Biesheuvel union CRYPTO_STATE { 219d935509SArd Biesheuvel uint8_t bytes[16]; 22f1ecb913SArd Biesheuvel uint32_t words[4]; 239d935509SArd Biesheuvel uint64_t l[2]; 249d935509SArd Biesheuvel }; 259d935509SArd Biesheuvel 26b449ca3cSArd Biesheuvel #ifdef HOST_WORDS_BIGENDIAN 27b449ca3cSArd Biesheuvel #define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8]) 28b449ca3cSArd Biesheuvel #define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2]) 29b449ca3cSArd Biesheuvel #else 30b449ca3cSArd Biesheuvel #define CR_ST_BYTE(state, i) (state.bytes[i]) 31b449ca3cSArd Biesheuvel #define CR_ST_WORD(state, i) (state.words[i]) 32b449ca3cSArd Biesheuvel #endif 33b449ca3cSArd Biesheuvel 34aaffebd6SRichard Henderson /* 35aaffebd6SRichard Henderson * The caller has not been converted to full gvec, and so only 36aaffebd6SRichard Henderson * modifies the low 16 bytes of the vector register. 37aaffebd6SRichard Henderson */ 38aaffebd6SRichard Henderson static void clear_tail_16(void *vd, uint32_t desc) 39aaffebd6SRichard Henderson { 40aaffebd6SRichard Henderson int opr_sz = simd_oprsz(desc); 41aaffebd6SRichard Henderson int max_sz = simd_maxsz(desc); 42aaffebd6SRichard Henderson 43aaffebd6SRichard Henderson assert(opr_sz == 16); 44aaffebd6SRichard Henderson clear_tail(vd, opr_sz, max_sz); 45aaffebd6SRichard Henderson } 46aaffebd6SRichard Henderson 47a04b68e1SRichard Henderson static void do_crypto_aese(uint64_t *rd, uint64_t *rn, 48a04b68e1SRichard Henderson uint64_t *rm, bool decrypt) 499d935509SArd Biesheuvel { 5059dcd29aSTom Musta static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; 5159dcd29aSTom Musta static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; 521a66ac61SRichard Henderson union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; 53a04b68e1SRichard Henderson union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; 549d935509SArd Biesheuvel int i; 559d935509SArd Biesheuvel 569d935509SArd Biesheuvel /* xor state vector with round key */ 579d935509SArd Biesheuvel rk.l[0] ^= st.l[0]; 589d935509SArd Biesheuvel rk.l[1] ^= st.l[1]; 599d935509SArd Biesheuvel 609d935509SArd Biesheuvel /* combine ShiftRows operation and sbox substitution */ 619d935509SArd Biesheuvel for (i = 0; i < 16; i++) { 62b449ca3cSArd Biesheuvel CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])]; 639d935509SArd Biesheuvel } 649d935509SArd Biesheuvel 651a66ac61SRichard Henderson rd[0] = st.l[0]; 661a66ac61SRichard Henderson rd[1] = st.l[1]; 679d935509SArd Biesheuvel } 689d935509SArd Biesheuvel 69a04b68e1SRichard Henderson void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 70a04b68e1SRichard Henderson { 71a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 72a04b68e1SRichard Henderson bool decrypt = simd_data(desc); 73a04b68e1SRichard Henderson 74a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 75a04b68e1SRichard Henderson do_crypto_aese(vd + i, vn + i, vm + i, decrypt); 76a04b68e1SRichard Henderson } 77a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 78a04b68e1SRichard Henderson } 79a04b68e1SRichard Henderson 80a04b68e1SRichard Henderson static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) 819d935509SArd Biesheuvel { 829d935509SArd Biesheuvel static uint32_t const mc[][256] = { { 839d935509SArd Biesheuvel /* MixColumns lookup table */ 849d935509SArd Biesheuvel 0x00000000, 0x03010102, 0x06020204, 0x05030306, 859d935509SArd Biesheuvel 0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e, 869d935509SArd Biesheuvel 0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16, 879d935509SArd Biesheuvel 0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e, 889d935509SArd Biesheuvel 0x30101020, 0x33111122, 0x36121224, 0x35131326, 899d935509SArd Biesheuvel 0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e, 909d935509SArd Biesheuvel 0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36, 919d935509SArd Biesheuvel 0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e, 929d935509SArd Biesheuvel 0x60202040, 0x63212142, 0x66222244, 0x65232346, 939d935509SArd Biesheuvel 0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e, 949d935509SArd Biesheuvel 0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56, 959d935509SArd Biesheuvel 0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e, 969d935509SArd Biesheuvel 0x50303060, 0x53313162, 0x56323264, 0x55333366, 979d935509SArd Biesheuvel 0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e, 989d935509SArd Biesheuvel 0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76, 999d935509SArd Biesheuvel 0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e, 1009d935509SArd Biesheuvel 0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386, 1019d935509SArd Biesheuvel 0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e, 1029d935509SArd Biesheuvel 0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96, 1039d935509SArd Biesheuvel 0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e, 1049d935509SArd Biesheuvel 0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6, 1059d935509SArd Biesheuvel 0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae, 1069d935509SArd Biesheuvel 0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6, 1079d935509SArd Biesheuvel 0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe, 1089d935509SArd Biesheuvel 0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6, 1099d935509SArd Biesheuvel 0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce, 1109d935509SArd Biesheuvel 0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6, 1119d935509SArd Biesheuvel 0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde, 1129d935509SArd Biesheuvel 0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6, 1139d935509SArd Biesheuvel 0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee, 1149d935509SArd Biesheuvel 0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6, 1159d935509SArd Biesheuvel 0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe, 1169d935509SArd Biesheuvel 0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d, 1179d935509SArd Biesheuvel 0x97848413, 0x94858511, 0x91868617, 0x92878715, 1189d935509SArd Biesheuvel 0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d, 1199d935509SArd Biesheuvel 0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05, 1209d935509SArd Biesheuvel 0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d, 1219d935509SArd Biesheuvel 0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735, 1229d935509SArd Biesheuvel 0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d, 1239d935509SArd Biesheuvel 0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25, 1249d935509SArd Biesheuvel 0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d, 1259d935509SArd Biesheuvel 0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755, 1269d935509SArd Biesheuvel 0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d, 1279d935509SArd Biesheuvel 0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45, 1289d935509SArd Biesheuvel 0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d, 1299d935509SArd Biesheuvel 0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775, 1309d935509SArd Biesheuvel 0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d, 1319d935509SArd Biesheuvel 0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65, 1329d935509SArd Biesheuvel 0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d, 1339d935509SArd Biesheuvel 0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795, 1349d935509SArd Biesheuvel 0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d, 1359d935509SArd Biesheuvel 0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85, 1369d935509SArd Biesheuvel 0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd, 1379d935509SArd Biesheuvel 0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5, 1389d935509SArd Biesheuvel 0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad, 1399d935509SArd Biesheuvel 0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5, 1409d935509SArd Biesheuvel 0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd, 1419d935509SArd Biesheuvel 0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5, 1429d935509SArd Biesheuvel 0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd, 1439d935509SArd Biesheuvel 0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5, 1449d935509SArd Biesheuvel 0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd, 1459d935509SArd Biesheuvel 0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5, 1469d935509SArd Biesheuvel 0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed, 1479d935509SArd Biesheuvel 0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5, 1489d935509SArd Biesheuvel }, { 1499d935509SArd Biesheuvel /* Inverse MixColumns lookup table */ 1509d935509SArd Biesheuvel 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, 1519d935509SArd Biesheuvel 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a, 1529d935509SArd Biesheuvel 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362, 1539d935509SArd Biesheuvel 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, 1549d935509SArd Biesheuvel 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2, 1559d935509SArd Biesheuvel 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca, 1569d935509SArd Biesheuvel 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, 1579d935509SArd Biesheuvel 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba, 1589d935509SArd Biesheuvel 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9, 1599d935509SArd Biesheuvel 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, 1609d935509SArd Biesheuvel 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9, 1619d935509SArd Biesheuvel 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81, 1629d935509SArd Biesheuvel 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, 1639d935509SArd Biesheuvel 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411, 1649d935509SArd Biesheuvel 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859, 1659d935509SArd Biesheuvel 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, 1669d935509SArd Biesheuvel 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf, 1679d935509SArd Biesheuvel 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987, 1689d935509SArd Biesheuvel 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, 1699d935509SArd Biesheuvel 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7, 1709d935509SArd Biesheuvel 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f, 1719d935509SArd Biesheuvel 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, 1729d935509SArd Biesheuvel 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f, 1739d935509SArd Biesheuvel 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117, 1749d935509SArd Biesheuvel 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, 1759d935509SArd Biesheuvel 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c, 1769d935509SArd Biesheuvel 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14, 1779d935509SArd Biesheuvel 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, 1789d935509SArd Biesheuvel 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684, 1799d935509SArd Biesheuvel 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc, 1809d935509SArd Biesheuvel 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, 1819d935509SArd Biesheuvel 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc, 1829d935509SArd Biesheuvel 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753, 1839d935509SArd Biesheuvel 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, 1849d935509SArd Biesheuvel 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23, 1859d935509SArd Biesheuvel 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b, 1869d935509SArd Biesheuvel 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, 1879d935509SArd Biesheuvel 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b, 1889d935509SArd Biesheuvel 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3, 1899d935509SArd Biesheuvel 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, 1909d935509SArd Biesheuvel 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88, 1919d935509SArd Biesheuvel 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0, 1929d935509SArd Biesheuvel 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, 1939d935509SArd Biesheuvel 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0, 1949d935509SArd Biesheuvel 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68, 1959d935509SArd Biesheuvel 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, 1969d935509SArd Biesheuvel 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418, 1979d935509SArd Biesheuvel 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020, 1989d935509SArd Biesheuvel 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, 1999d935509SArd Biesheuvel 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6, 2009d935509SArd Biesheuvel 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e, 2019d935509SArd Biesheuvel 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, 2029d935509SArd Biesheuvel 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e, 2039d935509SArd Biesheuvel 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526, 2049d935509SArd Biesheuvel 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, 2059d935509SArd Biesheuvel 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56, 2069d935509SArd Biesheuvel 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25, 2079d935509SArd Biesheuvel 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, 2089d935509SArd Biesheuvel 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255, 2099d935509SArd Biesheuvel 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d, 2109d935509SArd Biesheuvel 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, 2119d935509SArd Biesheuvel 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd, 2129d935509SArd Biesheuvel 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, 2139d935509SArd Biesheuvel 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, 2149d935509SArd Biesheuvel } }; 2151a66ac61SRichard Henderson 2161a66ac61SRichard Henderson union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 2179d935509SArd Biesheuvel int i; 2189d935509SArd Biesheuvel 2199d935509SArd Biesheuvel for (i = 0; i < 16; i += 4) { 220b449ca3cSArd Biesheuvel CR_ST_WORD(st, i >> 2) = 221b449ca3cSArd Biesheuvel mc[decrypt][CR_ST_BYTE(st, i)] ^ 222b449ca3cSArd Biesheuvel rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^ 223b449ca3cSArd Biesheuvel rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^ 224b449ca3cSArd Biesheuvel rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24); 2259d935509SArd Biesheuvel } 2269d935509SArd Biesheuvel 2271a66ac61SRichard Henderson rd[0] = st.l[0]; 2281a66ac61SRichard Henderson rd[1] = st.l[1]; 2299d935509SArd Biesheuvel } 230f1ecb913SArd Biesheuvel 231a04b68e1SRichard Henderson void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 232a04b68e1SRichard Henderson { 233a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 234a04b68e1SRichard Henderson bool decrypt = simd_data(desc); 235a04b68e1SRichard Henderson 236a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 237a04b68e1SRichard Henderson do_crypto_aesmc(vd + i, vm + i, decrypt); 238a04b68e1SRichard Henderson } 239a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 240a04b68e1SRichard Henderson } 241a04b68e1SRichard Henderson 242f1ecb913SArd Biesheuvel /* 243f1ecb913SArd Biesheuvel * SHA-1 logical functions 244f1ecb913SArd Biesheuvel */ 245f1ecb913SArd Biesheuvel 246f1ecb913SArd Biesheuvel static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 247f1ecb913SArd Biesheuvel { 248f1ecb913SArd Biesheuvel return (x & (y ^ z)) ^ z; 249f1ecb913SArd Biesheuvel } 250f1ecb913SArd Biesheuvel 251f1ecb913SArd Biesheuvel static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 252f1ecb913SArd Biesheuvel { 253f1ecb913SArd Biesheuvel return x ^ y ^ z; 254f1ecb913SArd Biesheuvel } 255f1ecb913SArd Biesheuvel 256f1ecb913SArd Biesheuvel static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 257f1ecb913SArd Biesheuvel { 258f1ecb913SArd Biesheuvel return (x & y) | ((x | y) & z); 259f1ecb913SArd Biesheuvel } 260f1ecb913SArd Biesheuvel 2611a66ac61SRichard Henderson void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op) 262f1ecb913SArd Biesheuvel { 2631a66ac61SRichard Henderson uint64_t *rd = vd; 2641a66ac61SRichard Henderson uint64_t *rn = vn; 2651a66ac61SRichard Henderson uint64_t *rm = vm; 2661a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 2671a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 2681a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 269f1ecb913SArd Biesheuvel 270f1ecb913SArd Biesheuvel if (op == 3) { /* sha1su0 */ 271f1ecb913SArd Biesheuvel d.l[0] ^= d.l[1] ^ m.l[0]; 272f1ecb913SArd Biesheuvel d.l[1] ^= n.l[0] ^ m.l[1]; 273f1ecb913SArd Biesheuvel } else { 274f1ecb913SArd Biesheuvel int i; 275f1ecb913SArd Biesheuvel 276f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 277f1ecb913SArd Biesheuvel uint32_t t; 278f1ecb913SArd Biesheuvel 279f1ecb913SArd Biesheuvel switch (op) { 280f1ecb913SArd Biesheuvel case 0: /* sha1c */ 281b449ca3cSArd Biesheuvel t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); 282f1ecb913SArd Biesheuvel break; 283f1ecb913SArd Biesheuvel case 1: /* sha1p */ 284b449ca3cSArd Biesheuvel t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); 285f1ecb913SArd Biesheuvel break; 286f1ecb913SArd Biesheuvel case 2: /* sha1m */ 287b449ca3cSArd Biesheuvel t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); 288f1ecb913SArd Biesheuvel break; 289f1ecb913SArd Biesheuvel default: 290f1ecb913SArd Biesheuvel g_assert_not_reached(); 291f1ecb913SArd Biesheuvel } 292b449ca3cSArd Biesheuvel t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 293b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 294f1ecb913SArd Biesheuvel 295b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 296b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 297b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 298b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 299b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 300f1ecb913SArd Biesheuvel } 301f1ecb913SArd Biesheuvel } 3021a66ac61SRichard Henderson rd[0] = d.l[0]; 3031a66ac61SRichard Henderson rd[1] = d.l[1]; 304f1ecb913SArd Biesheuvel } 305f1ecb913SArd Biesheuvel 306*effa992fSRichard Henderson void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 307f1ecb913SArd Biesheuvel { 3081a66ac61SRichard Henderson uint64_t *rd = vd; 3091a66ac61SRichard Henderson uint64_t *rm = vm; 3101a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 311f1ecb913SArd Biesheuvel 312b449ca3cSArd Biesheuvel CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 313b449ca3cSArd Biesheuvel CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 314f1ecb913SArd Biesheuvel 3151a66ac61SRichard Henderson rd[0] = m.l[0]; 3161a66ac61SRichard Henderson rd[1] = m.l[1]; 317*effa992fSRichard Henderson 318*effa992fSRichard Henderson clear_tail_16(vd, desc); 319f1ecb913SArd Biesheuvel } 320f1ecb913SArd Biesheuvel 321*effa992fSRichard Henderson void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 322f1ecb913SArd Biesheuvel { 3231a66ac61SRichard Henderson uint64_t *rd = vd; 3241a66ac61SRichard Henderson uint64_t *rm = vm; 3251a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3261a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 327f1ecb913SArd Biesheuvel 328b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 329b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 330b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 331b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 332f1ecb913SArd Biesheuvel 3331a66ac61SRichard Henderson rd[0] = d.l[0]; 3341a66ac61SRichard Henderson rd[1] = d.l[1]; 335*effa992fSRichard Henderson 336*effa992fSRichard Henderson clear_tail_16(vd, desc); 337f1ecb913SArd Biesheuvel } 338f1ecb913SArd Biesheuvel 339f1ecb913SArd Biesheuvel /* 340f1ecb913SArd Biesheuvel * The SHA-256 logical functions, according to 341f1ecb913SArd Biesheuvel * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 342f1ecb913SArd Biesheuvel */ 343f1ecb913SArd Biesheuvel 344f1ecb913SArd Biesheuvel static uint32_t S0(uint32_t x) 345f1ecb913SArd Biesheuvel { 346f1ecb913SArd Biesheuvel return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 347f1ecb913SArd Biesheuvel } 348f1ecb913SArd Biesheuvel 349f1ecb913SArd Biesheuvel static uint32_t S1(uint32_t x) 350f1ecb913SArd Biesheuvel { 351f1ecb913SArd Biesheuvel return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 352f1ecb913SArd Biesheuvel } 353f1ecb913SArd Biesheuvel 354f1ecb913SArd Biesheuvel static uint32_t s0(uint32_t x) 355f1ecb913SArd Biesheuvel { 356f1ecb913SArd Biesheuvel return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 357f1ecb913SArd Biesheuvel } 358f1ecb913SArd Biesheuvel 359f1ecb913SArd Biesheuvel static uint32_t s1(uint32_t x) 360f1ecb913SArd Biesheuvel { 361f1ecb913SArd Biesheuvel return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 362f1ecb913SArd Biesheuvel } 363f1ecb913SArd Biesheuvel 364*effa992fSRichard Henderson void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 365f1ecb913SArd Biesheuvel { 3661a66ac61SRichard Henderson uint64_t *rd = vd; 3671a66ac61SRichard Henderson uint64_t *rn = vn; 3681a66ac61SRichard Henderson uint64_t *rm = vm; 3691a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3701a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3711a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 372f1ecb913SArd Biesheuvel int i; 373f1ecb913SArd Biesheuvel 374f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 375b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 376b449ca3cSArd Biesheuvel + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 377b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 378f1ecb913SArd Biesheuvel 379b449ca3cSArd Biesheuvel CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 380b449ca3cSArd Biesheuvel CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 381b449ca3cSArd Biesheuvel CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 382b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 383f1ecb913SArd Biesheuvel 384b449ca3cSArd Biesheuvel t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 385b449ca3cSArd Biesheuvel + S0(CR_ST_WORD(d, 0)); 386f1ecb913SArd Biesheuvel 387b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 388b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 389b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 390b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 391f1ecb913SArd Biesheuvel } 392f1ecb913SArd Biesheuvel 3931a66ac61SRichard Henderson rd[0] = d.l[0]; 3941a66ac61SRichard Henderson rd[1] = d.l[1]; 395*effa992fSRichard Henderson 396*effa992fSRichard Henderson clear_tail_16(vd, desc); 397f1ecb913SArd Biesheuvel } 398f1ecb913SArd Biesheuvel 399*effa992fSRichard Henderson void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 400f1ecb913SArd Biesheuvel { 4011a66ac61SRichard Henderson uint64_t *rd = vd; 4021a66ac61SRichard Henderson uint64_t *rn = vn; 4031a66ac61SRichard Henderson uint64_t *rm = vm; 4041a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 4051a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 4061a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 407f1ecb913SArd Biesheuvel int i; 408f1ecb913SArd Biesheuvel 409f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 410b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 411b449ca3cSArd Biesheuvel + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 412b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 413f1ecb913SArd Biesheuvel 414b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 415b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 416b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 417b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 418f1ecb913SArd Biesheuvel } 419f1ecb913SArd Biesheuvel 4201a66ac61SRichard Henderson rd[0] = d.l[0]; 4211a66ac61SRichard Henderson rd[1] = d.l[1]; 422*effa992fSRichard Henderson 423*effa992fSRichard Henderson clear_tail_16(vd, desc); 424f1ecb913SArd Biesheuvel } 425f1ecb913SArd Biesheuvel 426*effa992fSRichard Henderson void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 427f1ecb913SArd Biesheuvel { 4281a66ac61SRichard Henderson uint64_t *rd = vd; 4291a66ac61SRichard Henderson uint64_t *rm = vm; 4301a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 4311a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 432f1ecb913SArd Biesheuvel 433b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 434b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 435b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 436b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 437f1ecb913SArd Biesheuvel 4381a66ac61SRichard Henderson rd[0] = d.l[0]; 4391a66ac61SRichard Henderson rd[1] = d.l[1]; 440*effa992fSRichard Henderson 441*effa992fSRichard Henderson clear_tail_16(vd, desc); 442f1ecb913SArd Biesheuvel } 443f1ecb913SArd Biesheuvel 444*effa992fSRichard Henderson void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 445f1ecb913SArd Biesheuvel { 4461a66ac61SRichard Henderson uint64_t *rd = vd; 4471a66ac61SRichard Henderson uint64_t *rn = vn; 4481a66ac61SRichard Henderson uint64_t *rm = vm; 4491a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 4501a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 4511a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 452f1ecb913SArd Biesheuvel 453b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 454b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 455b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 456b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 457f1ecb913SArd Biesheuvel 4581a66ac61SRichard Henderson rd[0] = d.l[0]; 4591a66ac61SRichard Henderson rd[1] = d.l[1]; 460*effa992fSRichard Henderson 461*effa992fSRichard Henderson clear_tail_16(vd, desc); 462f1ecb913SArd Biesheuvel } 46390b827d1SArd Biesheuvel 46490b827d1SArd Biesheuvel /* 46590b827d1SArd Biesheuvel * The SHA-512 logical functions (same as above but using 64-bit operands) 46690b827d1SArd Biesheuvel */ 46790b827d1SArd Biesheuvel 46890b827d1SArd Biesheuvel static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 46990b827d1SArd Biesheuvel { 47090b827d1SArd Biesheuvel return (x & (y ^ z)) ^ z; 47190b827d1SArd Biesheuvel } 47290b827d1SArd Biesheuvel 47390b827d1SArd Biesheuvel static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 47490b827d1SArd Biesheuvel { 47590b827d1SArd Biesheuvel return (x & y) | ((x | y) & z); 47690b827d1SArd Biesheuvel } 47790b827d1SArd Biesheuvel 47890b827d1SArd Biesheuvel static uint64_t S0_512(uint64_t x) 47990b827d1SArd Biesheuvel { 48090b827d1SArd Biesheuvel return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 48190b827d1SArd Biesheuvel } 48290b827d1SArd Biesheuvel 48390b827d1SArd Biesheuvel static uint64_t S1_512(uint64_t x) 48490b827d1SArd Biesheuvel { 48590b827d1SArd Biesheuvel return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 48690b827d1SArd Biesheuvel } 48790b827d1SArd Biesheuvel 48890b827d1SArd Biesheuvel static uint64_t s0_512(uint64_t x) 48990b827d1SArd Biesheuvel { 49090b827d1SArd Biesheuvel return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 49190b827d1SArd Biesheuvel } 49290b827d1SArd Biesheuvel 49390b827d1SArd Biesheuvel static uint64_t s1_512(uint64_t x) 49490b827d1SArd Biesheuvel { 49590b827d1SArd Biesheuvel return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 49690b827d1SArd Biesheuvel } 49790b827d1SArd Biesheuvel 498aaffebd6SRichard Henderson void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 49990b827d1SArd Biesheuvel { 50090b827d1SArd Biesheuvel uint64_t *rd = vd; 50190b827d1SArd Biesheuvel uint64_t *rn = vn; 50290b827d1SArd Biesheuvel uint64_t *rm = vm; 50390b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 50490b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 50590b827d1SArd Biesheuvel 50690b827d1SArd Biesheuvel d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 50790b827d1SArd Biesheuvel d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 50890b827d1SArd Biesheuvel 50990b827d1SArd Biesheuvel rd[0] = d0; 51090b827d1SArd Biesheuvel rd[1] = d1; 511aaffebd6SRichard Henderson 512aaffebd6SRichard Henderson clear_tail_16(vd, desc); 51390b827d1SArd Biesheuvel } 51490b827d1SArd Biesheuvel 515aaffebd6SRichard Henderson void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 51690b827d1SArd Biesheuvel { 51790b827d1SArd Biesheuvel uint64_t *rd = vd; 51890b827d1SArd Biesheuvel uint64_t *rn = vn; 51990b827d1SArd Biesheuvel uint64_t *rm = vm; 52090b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 52190b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 52290b827d1SArd Biesheuvel 52390b827d1SArd Biesheuvel d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 52490b827d1SArd Biesheuvel d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 52590b827d1SArd Biesheuvel 52690b827d1SArd Biesheuvel rd[0] = d0; 52790b827d1SArd Biesheuvel rd[1] = d1; 528aaffebd6SRichard Henderson 529aaffebd6SRichard Henderson clear_tail_16(vd, desc); 53090b827d1SArd Biesheuvel } 53190b827d1SArd Biesheuvel 532aaffebd6SRichard Henderson void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 53390b827d1SArd Biesheuvel { 53490b827d1SArd Biesheuvel uint64_t *rd = vd; 53590b827d1SArd Biesheuvel uint64_t *rn = vn; 53690b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 53790b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 53890b827d1SArd Biesheuvel 53990b827d1SArd Biesheuvel d0 += s0_512(rd[1]); 54090b827d1SArd Biesheuvel d1 += s0_512(rn[0]); 54190b827d1SArd Biesheuvel 54290b827d1SArd Biesheuvel rd[0] = d0; 54390b827d1SArd Biesheuvel rd[1] = d1; 544aaffebd6SRichard Henderson 545aaffebd6SRichard Henderson clear_tail_16(vd, desc); 54690b827d1SArd Biesheuvel } 54790b827d1SArd Biesheuvel 548aaffebd6SRichard Henderson void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 54990b827d1SArd Biesheuvel { 55090b827d1SArd Biesheuvel uint64_t *rd = vd; 55190b827d1SArd Biesheuvel uint64_t *rn = vn; 55290b827d1SArd Biesheuvel uint64_t *rm = vm; 55390b827d1SArd Biesheuvel 55490b827d1SArd Biesheuvel rd[0] += s1_512(rn[0]) + rm[0]; 55590b827d1SArd Biesheuvel rd[1] += s1_512(rn[1]) + rm[1]; 556aaffebd6SRichard Henderson 557aaffebd6SRichard Henderson clear_tail_16(vd, desc); 55890b827d1SArd Biesheuvel } 55980d6f4c6SArd Biesheuvel 560aaffebd6SRichard Henderson void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 56180d6f4c6SArd Biesheuvel { 56280d6f4c6SArd Biesheuvel uint64_t *rd = vd; 56380d6f4c6SArd Biesheuvel uint64_t *rn = vn; 56480d6f4c6SArd Biesheuvel uint64_t *rm = vm; 56580d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 56680d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 56780d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 56880d6f4c6SArd Biesheuvel uint32_t t; 56980d6f4c6SArd Biesheuvel 57080d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 57180d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 57280d6f4c6SArd Biesheuvel 57380d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 57480d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 57580d6f4c6SArd Biesheuvel 57680d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 57780d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 57880d6f4c6SArd Biesheuvel 57980d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 58080d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 58180d6f4c6SArd Biesheuvel 58280d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 58380d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 584aaffebd6SRichard Henderson 585aaffebd6SRichard Henderson clear_tail_16(vd, desc); 58680d6f4c6SArd Biesheuvel } 58780d6f4c6SArd Biesheuvel 588aaffebd6SRichard Henderson void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 58980d6f4c6SArd Biesheuvel { 59080d6f4c6SArd Biesheuvel uint64_t *rd = vd; 59180d6f4c6SArd Biesheuvel uint64_t *rn = vn; 59280d6f4c6SArd Biesheuvel uint64_t *rm = vm; 59380d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 59480d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 59580d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 59680d6f4c6SArd Biesheuvel uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 59780d6f4c6SArd Biesheuvel 59880d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) ^= t; 59980d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 60080d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 60180d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 60280d6f4c6SArd Biesheuvel ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 60380d6f4c6SArd Biesheuvel 60480d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 60580d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 606aaffebd6SRichard Henderson 607aaffebd6SRichard Henderson clear_tail_16(vd, desc); 60880d6f4c6SArd Biesheuvel } 60980d6f4c6SArd Biesheuvel 61080d6f4c6SArd Biesheuvel void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, 61180d6f4c6SArd Biesheuvel uint32_t opcode) 61280d6f4c6SArd Biesheuvel { 61380d6f4c6SArd Biesheuvel uint64_t *rd = vd; 61480d6f4c6SArd Biesheuvel uint64_t *rn = vn; 61580d6f4c6SArd Biesheuvel uint64_t *rm = vm; 61680d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 61780d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 61880d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 61980d6f4c6SArd Biesheuvel uint32_t t; 62080d6f4c6SArd Biesheuvel 62180d6f4c6SArd Biesheuvel assert(imm2 < 4); 62280d6f4c6SArd Biesheuvel 62380d6f4c6SArd Biesheuvel if (opcode == 0 || opcode == 2) { 62480d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT2A */ 62580d6f4c6SArd Biesheuvel t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 62680d6f4c6SArd Biesheuvel } else if (opcode == 1) { 62780d6f4c6SArd Biesheuvel /* SM3TT1B */ 62880d6f4c6SArd Biesheuvel t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 62980d6f4c6SArd Biesheuvel } else if (opcode == 3) { 63080d6f4c6SArd Biesheuvel /* SM3TT2B */ 63180d6f4c6SArd Biesheuvel t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 63280d6f4c6SArd Biesheuvel } else { 63380d6f4c6SArd Biesheuvel g_assert_not_reached(); 63480d6f4c6SArd Biesheuvel } 63580d6f4c6SArd Biesheuvel 63680d6f4c6SArd Biesheuvel t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 63780d6f4c6SArd Biesheuvel 63880d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 63980d6f4c6SArd Biesheuvel 64080d6f4c6SArd Biesheuvel if (opcode < 2) { 64180d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT1B */ 64280d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 64380d6f4c6SArd Biesheuvel 64480d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 64580d6f4c6SArd Biesheuvel } else { 64680d6f4c6SArd Biesheuvel /* SM3TT2A, SM3TT2B */ 64780d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3); 64880d6f4c6SArd Biesheuvel t ^= rol32(t, 9) ^ rol32(t, 17); 64980d6f4c6SArd Biesheuvel 65080d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 65180d6f4c6SArd Biesheuvel } 65280d6f4c6SArd Biesheuvel 65380d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 65480d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t; 65580d6f4c6SArd Biesheuvel 65680d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 65780d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 65880d6f4c6SArd Biesheuvel } 659b6577bcdSArd Biesheuvel 660b6577bcdSArd Biesheuvel static uint8_t const sm4_sbox[] = { 661b6577bcdSArd Biesheuvel 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 662b6577bcdSArd Biesheuvel 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, 663b6577bcdSArd Biesheuvel 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, 664b6577bcdSArd Biesheuvel 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 665b6577bcdSArd Biesheuvel 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, 666b6577bcdSArd Biesheuvel 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, 667b6577bcdSArd Biesheuvel 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, 668b6577bcdSArd Biesheuvel 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, 669b6577bcdSArd Biesheuvel 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, 670b6577bcdSArd Biesheuvel 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, 671b6577bcdSArd Biesheuvel 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, 672b6577bcdSArd Biesheuvel 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, 673b6577bcdSArd Biesheuvel 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, 674b6577bcdSArd Biesheuvel 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, 675b6577bcdSArd Biesheuvel 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, 676b6577bcdSArd Biesheuvel 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, 677b6577bcdSArd Biesheuvel 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, 678b6577bcdSArd Biesheuvel 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, 679b6577bcdSArd Biesheuvel 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, 680b6577bcdSArd Biesheuvel 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, 681b6577bcdSArd Biesheuvel 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, 682b6577bcdSArd Biesheuvel 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, 683b6577bcdSArd Biesheuvel 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, 684b6577bcdSArd Biesheuvel 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, 685b6577bcdSArd Biesheuvel 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, 686b6577bcdSArd Biesheuvel 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, 687b6577bcdSArd Biesheuvel 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, 688b6577bcdSArd Biesheuvel 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, 689b6577bcdSArd Biesheuvel 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, 690b6577bcdSArd Biesheuvel 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, 691b6577bcdSArd Biesheuvel 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 692b6577bcdSArd Biesheuvel 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, 693b6577bcdSArd Biesheuvel }; 694b6577bcdSArd Biesheuvel 695a04b68e1SRichard Henderson static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 696b6577bcdSArd Biesheuvel { 697a04b68e1SRichard Henderson union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 698a04b68e1SRichard Henderson union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 699b6577bcdSArd Biesheuvel uint32_t t, i; 700b6577bcdSArd Biesheuvel 701b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 702b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 703b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 704b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 705b6577bcdSArd Biesheuvel CR_ST_WORD(n, i); 706b6577bcdSArd Biesheuvel 707b6577bcdSArd Biesheuvel t = sm4_sbox[t & 0xff] | 708b6577bcdSArd Biesheuvel sm4_sbox[(t >> 8) & 0xff] << 8 | 709b6577bcdSArd Biesheuvel sm4_sbox[(t >> 16) & 0xff] << 16 | 710b6577bcdSArd Biesheuvel sm4_sbox[(t >> 24) & 0xff] << 24; 711b6577bcdSArd Biesheuvel 712b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 713b6577bcdSArd Biesheuvel rol32(t, 24); 714b6577bcdSArd Biesheuvel } 715b6577bcdSArd Biesheuvel 716b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 717b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 718b6577bcdSArd Biesheuvel } 719b6577bcdSArd Biesheuvel 720a04b68e1SRichard Henderson void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 721b6577bcdSArd Biesheuvel { 722a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 723a04b68e1SRichard Henderson 724a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 725a04b68e1SRichard Henderson do_crypto_sm4e(vd + i, vn + i, vm + i); 726a04b68e1SRichard Henderson } 727a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 728a04b68e1SRichard Henderson } 729a04b68e1SRichard Henderson 730a04b68e1SRichard Henderson static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 731a04b68e1SRichard Henderson { 732b6577bcdSArd Biesheuvel union CRYPTO_STATE d; 733b6577bcdSArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 734b6577bcdSArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 735b6577bcdSArd Biesheuvel uint32_t t, i; 736b6577bcdSArd Biesheuvel 737b6577bcdSArd Biesheuvel d = n; 738b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 739b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 740b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 741b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 742b6577bcdSArd Biesheuvel CR_ST_WORD(m, i); 743b6577bcdSArd Biesheuvel 744b6577bcdSArd Biesheuvel t = sm4_sbox[t & 0xff] | 745b6577bcdSArd Biesheuvel sm4_sbox[(t >> 8) & 0xff] << 8 | 746b6577bcdSArd Biesheuvel sm4_sbox[(t >> 16) & 0xff] << 16 | 747b6577bcdSArd Biesheuvel sm4_sbox[(t >> 24) & 0xff] << 24; 748b6577bcdSArd Biesheuvel 749b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 750b6577bcdSArd Biesheuvel } 751b6577bcdSArd Biesheuvel 752b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 753b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 754b6577bcdSArd Biesheuvel } 755a04b68e1SRichard Henderson 756a04b68e1SRichard Henderson void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 757a04b68e1SRichard Henderson { 758a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 759a04b68e1SRichard Henderson 760a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 761a04b68e1SRichard Henderson do_crypto_sm4ekey(vd + i, vn + i, vm + i); 762a04b68e1SRichard Henderson } 763a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 764a04b68e1SRichard Henderson } 7651738860dSRichard Henderson 7661738860dSRichard Henderson void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 7671738860dSRichard Henderson { 7681738860dSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 7691738860dSRichard Henderson uint64_t *d = vd, *n = vn, *m = vm; 7701738860dSRichard Henderson 7711738860dSRichard Henderson for (i = 0; i < opr_sz / 8; ++i) { 7721738860dSRichard Henderson d[i] = n[i] ^ rol64(m[i], 1); 7731738860dSRichard Henderson } 7741738860dSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 7751738860dSRichard Henderson } 776