19d935509SArd Biesheuvel /* 29d935509SArd Biesheuvel * crypto_helper.c - emulate v8 Crypto Extensions instructions 39d935509SArd Biesheuvel * 490b827d1SArd Biesheuvel * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 59d935509SArd Biesheuvel * 69d935509SArd Biesheuvel * This library is free software; you can redistribute it and/or 79d935509SArd Biesheuvel * modify it under the terms of the GNU Lesser General Public 89d935509SArd Biesheuvel * License as published by the Free Software Foundation; either 99d935509SArd Biesheuvel * version 2 of the License, or (at your option) any later version. 109d935509SArd Biesheuvel */ 119d935509SArd Biesheuvel 1274c21bd0SPeter Maydell #include "qemu/osdep.h" 139d935509SArd Biesheuvel 149d935509SArd Biesheuvel #include "cpu.h" 152ef6175aSRichard Henderson #include "exec/helper-proto.h" 16*a04b68e1SRichard Henderson #include "tcg/tcg-gvec-desc.h" 176f2945cdSDaniel P. Berrange #include "crypto/aes.h" 18*a04b68e1SRichard Henderson #include "vec_internal.h" 199d935509SArd Biesheuvel 20f1ecb913SArd Biesheuvel union CRYPTO_STATE { 219d935509SArd Biesheuvel uint8_t bytes[16]; 22f1ecb913SArd Biesheuvel uint32_t words[4]; 239d935509SArd Biesheuvel uint64_t l[2]; 249d935509SArd Biesheuvel }; 259d935509SArd Biesheuvel 26b449ca3cSArd Biesheuvel #ifdef HOST_WORDS_BIGENDIAN 27b449ca3cSArd Biesheuvel #define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8]) 28b449ca3cSArd Biesheuvel #define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2]) 29b449ca3cSArd Biesheuvel #else 30b449ca3cSArd Biesheuvel #define CR_ST_BYTE(state, i) (state.bytes[i]) 31b449ca3cSArd Biesheuvel #define CR_ST_WORD(state, i) (state.words[i]) 32b449ca3cSArd Biesheuvel #endif 33b449ca3cSArd Biesheuvel 34*a04b68e1SRichard Henderson static void do_crypto_aese(uint64_t *rd, uint64_t *rn, 35*a04b68e1SRichard Henderson uint64_t *rm, bool decrypt) 369d935509SArd Biesheuvel { 3759dcd29aSTom Musta static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; 3859dcd29aSTom Musta static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; 391a66ac61SRichard Henderson union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; 40*a04b68e1SRichard Henderson union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; 419d935509SArd Biesheuvel int i; 429d935509SArd Biesheuvel 439d935509SArd Biesheuvel /* xor state vector with round key */ 449d935509SArd Biesheuvel rk.l[0] ^= st.l[0]; 459d935509SArd Biesheuvel rk.l[1] ^= st.l[1]; 469d935509SArd Biesheuvel 479d935509SArd Biesheuvel /* combine ShiftRows operation and sbox substitution */ 489d935509SArd Biesheuvel for (i = 0; i < 16; i++) { 49b449ca3cSArd Biesheuvel CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])]; 509d935509SArd Biesheuvel } 519d935509SArd Biesheuvel 521a66ac61SRichard Henderson rd[0] = st.l[0]; 531a66ac61SRichard Henderson rd[1] = st.l[1]; 549d935509SArd Biesheuvel } 559d935509SArd Biesheuvel 56*a04b68e1SRichard Henderson void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 57*a04b68e1SRichard Henderson { 58*a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 59*a04b68e1SRichard Henderson bool decrypt = simd_data(desc); 60*a04b68e1SRichard Henderson 61*a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 62*a04b68e1SRichard Henderson do_crypto_aese(vd + i, vn + i, vm + i, decrypt); 63*a04b68e1SRichard Henderson } 64*a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 65*a04b68e1SRichard Henderson } 66*a04b68e1SRichard Henderson 67*a04b68e1SRichard Henderson static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) 689d935509SArd Biesheuvel { 699d935509SArd Biesheuvel static uint32_t const mc[][256] = { { 709d935509SArd Biesheuvel /* MixColumns lookup table */ 719d935509SArd Biesheuvel 0x00000000, 0x03010102, 0x06020204, 0x05030306, 729d935509SArd Biesheuvel 0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e, 739d935509SArd Biesheuvel 0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16, 749d935509SArd Biesheuvel 0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e, 759d935509SArd Biesheuvel 0x30101020, 0x33111122, 0x36121224, 0x35131326, 769d935509SArd Biesheuvel 0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e, 779d935509SArd Biesheuvel 0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36, 789d935509SArd Biesheuvel 0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e, 799d935509SArd Biesheuvel 0x60202040, 0x63212142, 0x66222244, 0x65232346, 809d935509SArd Biesheuvel 0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e, 819d935509SArd Biesheuvel 0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56, 829d935509SArd Biesheuvel 0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e, 839d935509SArd Biesheuvel 0x50303060, 0x53313162, 0x56323264, 0x55333366, 849d935509SArd Biesheuvel 0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e, 859d935509SArd Biesheuvel 0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76, 869d935509SArd Biesheuvel 0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e, 879d935509SArd Biesheuvel 0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386, 889d935509SArd Biesheuvel 0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e, 899d935509SArd Biesheuvel 0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96, 909d935509SArd Biesheuvel 0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e, 919d935509SArd Biesheuvel 0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6, 929d935509SArd Biesheuvel 0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae, 939d935509SArd Biesheuvel 0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6, 949d935509SArd Biesheuvel 0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe, 959d935509SArd Biesheuvel 0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6, 969d935509SArd Biesheuvel 0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce, 979d935509SArd Biesheuvel 0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6, 989d935509SArd Biesheuvel 0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde, 999d935509SArd Biesheuvel 0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6, 1009d935509SArd Biesheuvel 0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee, 1019d935509SArd Biesheuvel 0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6, 1029d935509SArd Biesheuvel 0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe, 1039d935509SArd Biesheuvel 0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d, 1049d935509SArd Biesheuvel 0x97848413, 0x94858511, 0x91868617, 0x92878715, 1059d935509SArd Biesheuvel 0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d, 1069d935509SArd Biesheuvel 0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05, 1079d935509SArd Biesheuvel 0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d, 1089d935509SArd Biesheuvel 0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735, 1099d935509SArd Biesheuvel 0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d, 1109d935509SArd Biesheuvel 0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25, 1119d935509SArd Biesheuvel 0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d, 1129d935509SArd Biesheuvel 0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755, 1139d935509SArd Biesheuvel 0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d, 1149d935509SArd Biesheuvel 0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45, 1159d935509SArd Biesheuvel 0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d, 1169d935509SArd Biesheuvel 0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775, 1179d935509SArd Biesheuvel 0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d, 1189d935509SArd Biesheuvel 0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65, 1199d935509SArd Biesheuvel 0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d, 1209d935509SArd Biesheuvel 0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795, 1219d935509SArd Biesheuvel 0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d, 1229d935509SArd Biesheuvel 0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85, 1239d935509SArd Biesheuvel 0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd, 1249d935509SArd Biesheuvel 0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5, 1259d935509SArd Biesheuvel 0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad, 1269d935509SArd Biesheuvel 0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5, 1279d935509SArd Biesheuvel 0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd, 1289d935509SArd Biesheuvel 0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5, 1299d935509SArd Biesheuvel 0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd, 1309d935509SArd Biesheuvel 0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5, 1319d935509SArd Biesheuvel 0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd, 1329d935509SArd Biesheuvel 0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5, 1339d935509SArd Biesheuvel 0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed, 1349d935509SArd Biesheuvel 0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5, 1359d935509SArd Biesheuvel }, { 1369d935509SArd Biesheuvel /* Inverse MixColumns lookup table */ 1379d935509SArd Biesheuvel 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, 1389d935509SArd Biesheuvel 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a, 1399d935509SArd Biesheuvel 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362, 1409d935509SArd Biesheuvel 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, 1419d935509SArd Biesheuvel 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2, 1429d935509SArd Biesheuvel 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca, 1439d935509SArd Biesheuvel 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, 1449d935509SArd Biesheuvel 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba, 1459d935509SArd Biesheuvel 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9, 1469d935509SArd Biesheuvel 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, 1479d935509SArd Biesheuvel 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9, 1489d935509SArd Biesheuvel 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81, 1499d935509SArd Biesheuvel 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, 1509d935509SArd Biesheuvel 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411, 1519d935509SArd Biesheuvel 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859, 1529d935509SArd Biesheuvel 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, 1539d935509SArd Biesheuvel 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf, 1549d935509SArd Biesheuvel 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987, 1559d935509SArd Biesheuvel 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, 1569d935509SArd Biesheuvel 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7, 1579d935509SArd Biesheuvel 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f, 1589d935509SArd Biesheuvel 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, 1599d935509SArd Biesheuvel 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f, 1609d935509SArd Biesheuvel 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117, 1619d935509SArd Biesheuvel 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, 1629d935509SArd Biesheuvel 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c, 1639d935509SArd Biesheuvel 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14, 1649d935509SArd Biesheuvel 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, 1659d935509SArd Biesheuvel 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684, 1669d935509SArd Biesheuvel 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc, 1679d935509SArd Biesheuvel 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, 1689d935509SArd Biesheuvel 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc, 1699d935509SArd Biesheuvel 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753, 1709d935509SArd Biesheuvel 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, 1719d935509SArd Biesheuvel 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23, 1729d935509SArd Biesheuvel 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b, 1739d935509SArd Biesheuvel 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, 1749d935509SArd Biesheuvel 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b, 1759d935509SArd Biesheuvel 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3, 1769d935509SArd Biesheuvel 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, 1779d935509SArd Biesheuvel 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88, 1789d935509SArd Biesheuvel 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0, 1799d935509SArd Biesheuvel 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, 1809d935509SArd Biesheuvel 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0, 1819d935509SArd Biesheuvel 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68, 1829d935509SArd Biesheuvel 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, 1839d935509SArd Biesheuvel 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418, 1849d935509SArd Biesheuvel 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020, 1859d935509SArd Biesheuvel 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, 1869d935509SArd Biesheuvel 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6, 1879d935509SArd Biesheuvel 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e, 1889d935509SArd Biesheuvel 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, 1899d935509SArd Biesheuvel 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e, 1909d935509SArd Biesheuvel 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526, 1919d935509SArd Biesheuvel 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, 1929d935509SArd Biesheuvel 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56, 1939d935509SArd Biesheuvel 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25, 1949d935509SArd Biesheuvel 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, 1959d935509SArd Biesheuvel 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255, 1969d935509SArd Biesheuvel 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d, 1979d935509SArd Biesheuvel 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, 1989d935509SArd Biesheuvel 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd, 1999d935509SArd Biesheuvel 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, 2009d935509SArd Biesheuvel 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, 2019d935509SArd Biesheuvel } }; 2021a66ac61SRichard Henderson 2031a66ac61SRichard Henderson union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 2049d935509SArd Biesheuvel int i; 2059d935509SArd Biesheuvel 2069d935509SArd Biesheuvel for (i = 0; i < 16; i += 4) { 207b449ca3cSArd Biesheuvel CR_ST_WORD(st, i >> 2) = 208b449ca3cSArd Biesheuvel mc[decrypt][CR_ST_BYTE(st, i)] ^ 209b449ca3cSArd Biesheuvel rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^ 210b449ca3cSArd Biesheuvel rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^ 211b449ca3cSArd Biesheuvel rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24); 2129d935509SArd Biesheuvel } 2139d935509SArd Biesheuvel 2141a66ac61SRichard Henderson rd[0] = st.l[0]; 2151a66ac61SRichard Henderson rd[1] = st.l[1]; 2169d935509SArd Biesheuvel } 217f1ecb913SArd Biesheuvel 218*a04b68e1SRichard Henderson void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 219*a04b68e1SRichard Henderson { 220*a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 221*a04b68e1SRichard Henderson bool decrypt = simd_data(desc); 222*a04b68e1SRichard Henderson 223*a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 224*a04b68e1SRichard Henderson do_crypto_aesmc(vd + i, vm + i, decrypt); 225*a04b68e1SRichard Henderson } 226*a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 227*a04b68e1SRichard Henderson } 228*a04b68e1SRichard Henderson 229f1ecb913SArd Biesheuvel /* 230f1ecb913SArd Biesheuvel * SHA-1 logical functions 231f1ecb913SArd Biesheuvel */ 232f1ecb913SArd Biesheuvel 233f1ecb913SArd Biesheuvel static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 234f1ecb913SArd Biesheuvel { 235f1ecb913SArd Biesheuvel return (x & (y ^ z)) ^ z; 236f1ecb913SArd Biesheuvel } 237f1ecb913SArd Biesheuvel 238f1ecb913SArd Biesheuvel static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 239f1ecb913SArd Biesheuvel { 240f1ecb913SArd Biesheuvel return x ^ y ^ z; 241f1ecb913SArd Biesheuvel } 242f1ecb913SArd Biesheuvel 243f1ecb913SArd Biesheuvel static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 244f1ecb913SArd Biesheuvel { 245f1ecb913SArd Biesheuvel return (x & y) | ((x | y) & z); 246f1ecb913SArd Biesheuvel } 247f1ecb913SArd Biesheuvel 2481a66ac61SRichard Henderson void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op) 249f1ecb913SArd Biesheuvel { 2501a66ac61SRichard Henderson uint64_t *rd = vd; 2511a66ac61SRichard Henderson uint64_t *rn = vn; 2521a66ac61SRichard Henderson uint64_t *rm = vm; 2531a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 2541a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 2551a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 256f1ecb913SArd Biesheuvel 257f1ecb913SArd Biesheuvel if (op == 3) { /* sha1su0 */ 258f1ecb913SArd Biesheuvel d.l[0] ^= d.l[1] ^ m.l[0]; 259f1ecb913SArd Biesheuvel d.l[1] ^= n.l[0] ^ m.l[1]; 260f1ecb913SArd Biesheuvel } else { 261f1ecb913SArd Biesheuvel int i; 262f1ecb913SArd Biesheuvel 263f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 264f1ecb913SArd Biesheuvel uint32_t t; 265f1ecb913SArd Biesheuvel 266f1ecb913SArd Biesheuvel switch (op) { 267f1ecb913SArd Biesheuvel case 0: /* sha1c */ 268b449ca3cSArd Biesheuvel t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); 269f1ecb913SArd Biesheuvel break; 270f1ecb913SArd Biesheuvel case 1: /* sha1p */ 271b449ca3cSArd Biesheuvel t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); 272f1ecb913SArd Biesheuvel break; 273f1ecb913SArd Biesheuvel case 2: /* sha1m */ 274b449ca3cSArd Biesheuvel t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); 275f1ecb913SArd Biesheuvel break; 276f1ecb913SArd Biesheuvel default: 277f1ecb913SArd Biesheuvel g_assert_not_reached(); 278f1ecb913SArd Biesheuvel } 279b449ca3cSArd Biesheuvel t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 280b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 281f1ecb913SArd Biesheuvel 282b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 283b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 284b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 285b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 286b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 287f1ecb913SArd Biesheuvel } 288f1ecb913SArd Biesheuvel } 2891a66ac61SRichard Henderson rd[0] = d.l[0]; 2901a66ac61SRichard Henderson rd[1] = d.l[1]; 291f1ecb913SArd Biesheuvel } 292f1ecb913SArd Biesheuvel 2931a66ac61SRichard Henderson void HELPER(crypto_sha1h)(void *vd, void *vm) 294f1ecb913SArd Biesheuvel { 2951a66ac61SRichard Henderson uint64_t *rd = vd; 2961a66ac61SRichard Henderson uint64_t *rm = vm; 2971a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 298f1ecb913SArd Biesheuvel 299b449ca3cSArd Biesheuvel CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 300b449ca3cSArd Biesheuvel CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 301f1ecb913SArd Biesheuvel 3021a66ac61SRichard Henderson rd[0] = m.l[0]; 3031a66ac61SRichard Henderson rd[1] = m.l[1]; 304f1ecb913SArd Biesheuvel } 305f1ecb913SArd Biesheuvel 3061a66ac61SRichard Henderson void HELPER(crypto_sha1su1)(void *vd, void *vm) 307f1ecb913SArd Biesheuvel { 3081a66ac61SRichard Henderson uint64_t *rd = vd; 3091a66ac61SRichard Henderson uint64_t *rm = vm; 3101a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3111a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 312f1ecb913SArd Biesheuvel 313b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 314b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 315b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 316b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 317f1ecb913SArd Biesheuvel 3181a66ac61SRichard Henderson rd[0] = d.l[0]; 3191a66ac61SRichard Henderson rd[1] = d.l[1]; 320f1ecb913SArd Biesheuvel } 321f1ecb913SArd Biesheuvel 322f1ecb913SArd Biesheuvel /* 323f1ecb913SArd Biesheuvel * The SHA-256 logical functions, according to 324f1ecb913SArd Biesheuvel * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 325f1ecb913SArd Biesheuvel */ 326f1ecb913SArd Biesheuvel 327f1ecb913SArd Biesheuvel static uint32_t S0(uint32_t x) 328f1ecb913SArd Biesheuvel { 329f1ecb913SArd Biesheuvel return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 330f1ecb913SArd Biesheuvel } 331f1ecb913SArd Biesheuvel 332f1ecb913SArd Biesheuvel static uint32_t S1(uint32_t x) 333f1ecb913SArd Biesheuvel { 334f1ecb913SArd Biesheuvel return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 335f1ecb913SArd Biesheuvel } 336f1ecb913SArd Biesheuvel 337f1ecb913SArd Biesheuvel static uint32_t s0(uint32_t x) 338f1ecb913SArd Biesheuvel { 339f1ecb913SArd Biesheuvel return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 340f1ecb913SArd Biesheuvel } 341f1ecb913SArd Biesheuvel 342f1ecb913SArd Biesheuvel static uint32_t s1(uint32_t x) 343f1ecb913SArd Biesheuvel { 344f1ecb913SArd Biesheuvel return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 345f1ecb913SArd Biesheuvel } 346f1ecb913SArd Biesheuvel 3471a66ac61SRichard Henderson void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) 348f1ecb913SArd Biesheuvel { 3491a66ac61SRichard Henderson uint64_t *rd = vd; 3501a66ac61SRichard Henderson uint64_t *rn = vn; 3511a66ac61SRichard Henderson uint64_t *rm = vm; 3521a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3531a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3541a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 355f1ecb913SArd Biesheuvel int i; 356f1ecb913SArd Biesheuvel 357f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 358b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 359b449ca3cSArd Biesheuvel + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 360b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 361f1ecb913SArd Biesheuvel 362b449ca3cSArd Biesheuvel CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 363b449ca3cSArd Biesheuvel CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 364b449ca3cSArd Biesheuvel CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 365b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 366f1ecb913SArd Biesheuvel 367b449ca3cSArd Biesheuvel t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 368b449ca3cSArd Biesheuvel + S0(CR_ST_WORD(d, 0)); 369f1ecb913SArd Biesheuvel 370b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 371b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 372b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 373b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 374f1ecb913SArd Biesheuvel } 375f1ecb913SArd Biesheuvel 3761a66ac61SRichard Henderson rd[0] = d.l[0]; 3771a66ac61SRichard Henderson rd[1] = d.l[1]; 378f1ecb913SArd Biesheuvel } 379f1ecb913SArd Biesheuvel 3801a66ac61SRichard Henderson void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) 381f1ecb913SArd Biesheuvel { 3821a66ac61SRichard Henderson uint64_t *rd = vd; 3831a66ac61SRichard Henderson uint64_t *rn = vn; 3841a66ac61SRichard Henderson uint64_t *rm = vm; 3851a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3861a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3871a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 388f1ecb913SArd Biesheuvel int i; 389f1ecb913SArd Biesheuvel 390f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 391b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 392b449ca3cSArd Biesheuvel + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 393b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 394f1ecb913SArd Biesheuvel 395b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 396b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 397b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 398b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 399f1ecb913SArd Biesheuvel } 400f1ecb913SArd Biesheuvel 4011a66ac61SRichard Henderson rd[0] = d.l[0]; 4021a66ac61SRichard Henderson rd[1] = d.l[1]; 403f1ecb913SArd Biesheuvel } 404f1ecb913SArd Biesheuvel 4051a66ac61SRichard Henderson void HELPER(crypto_sha256su0)(void *vd, void *vm) 406f1ecb913SArd Biesheuvel { 4071a66ac61SRichard Henderson uint64_t *rd = vd; 4081a66ac61SRichard Henderson uint64_t *rm = vm; 4091a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 4101a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 411f1ecb913SArd Biesheuvel 412b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 413b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 414b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 415b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 416f1ecb913SArd Biesheuvel 4171a66ac61SRichard Henderson rd[0] = d.l[0]; 4181a66ac61SRichard Henderson rd[1] = d.l[1]; 419f1ecb913SArd Biesheuvel } 420f1ecb913SArd Biesheuvel 4211a66ac61SRichard Henderson void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) 422f1ecb913SArd Biesheuvel { 4231a66ac61SRichard Henderson uint64_t *rd = vd; 4241a66ac61SRichard Henderson uint64_t *rn = vn; 4251a66ac61SRichard Henderson uint64_t *rm = vm; 4261a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 4271a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 4281a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 429f1ecb913SArd Biesheuvel 430b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 431b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 432b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 433b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 434f1ecb913SArd Biesheuvel 4351a66ac61SRichard Henderson rd[0] = d.l[0]; 4361a66ac61SRichard Henderson rd[1] = d.l[1]; 437f1ecb913SArd Biesheuvel } 43890b827d1SArd Biesheuvel 43990b827d1SArd Biesheuvel /* 44090b827d1SArd Biesheuvel * The SHA-512 logical functions (same as above but using 64-bit operands) 44190b827d1SArd Biesheuvel */ 44290b827d1SArd Biesheuvel 44390b827d1SArd Biesheuvel static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 44490b827d1SArd Biesheuvel { 44590b827d1SArd Biesheuvel return (x & (y ^ z)) ^ z; 44690b827d1SArd Biesheuvel } 44790b827d1SArd Biesheuvel 44890b827d1SArd Biesheuvel static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 44990b827d1SArd Biesheuvel { 45090b827d1SArd Biesheuvel return (x & y) | ((x | y) & z); 45190b827d1SArd Biesheuvel } 45290b827d1SArd Biesheuvel 45390b827d1SArd Biesheuvel static uint64_t S0_512(uint64_t x) 45490b827d1SArd Biesheuvel { 45590b827d1SArd Biesheuvel return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 45690b827d1SArd Biesheuvel } 45790b827d1SArd Biesheuvel 45890b827d1SArd Biesheuvel static uint64_t S1_512(uint64_t x) 45990b827d1SArd Biesheuvel { 46090b827d1SArd Biesheuvel return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 46190b827d1SArd Biesheuvel } 46290b827d1SArd Biesheuvel 46390b827d1SArd Biesheuvel static uint64_t s0_512(uint64_t x) 46490b827d1SArd Biesheuvel { 46590b827d1SArd Biesheuvel return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 46690b827d1SArd Biesheuvel } 46790b827d1SArd Biesheuvel 46890b827d1SArd Biesheuvel static uint64_t s1_512(uint64_t x) 46990b827d1SArd Biesheuvel { 47090b827d1SArd Biesheuvel return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 47190b827d1SArd Biesheuvel } 47290b827d1SArd Biesheuvel 47390b827d1SArd Biesheuvel void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) 47490b827d1SArd Biesheuvel { 47590b827d1SArd Biesheuvel uint64_t *rd = vd; 47690b827d1SArd Biesheuvel uint64_t *rn = vn; 47790b827d1SArd Biesheuvel uint64_t *rm = vm; 47890b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 47990b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 48090b827d1SArd Biesheuvel 48190b827d1SArd Biesheuvel d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 48290b827d1SArd Biesheuvel d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 48390b827d1SArd Biesheuvel 48490b827d1SArd Biesheuvel rd[0] = d0; 48590b827d1SArd Biesheuvel rd[1] = d1; 48690b827d1SArd Biesheuvel } 48790b827d1SArd Biesheuvel 48890b827d1SArd Biesheuvel void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) 48990b827d1SArd Biesheuvel { 49090b827d1SArd Biesheuvel uint64_t *rd = vd; 49190b827d1SArd Biesheuvel uint64_t *rn = vn; 49290b827d1SArd Biesheuvel uint64_t *rm = vm; 49390b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 49490b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 49590b827d1SArd Biesheuvel 49690b827d1SArd Biesheuvel d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 49790b827d1SArd Biesheuvel d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 49890b827d1SArd Biesheuvel 49990b827d1SArd Biesheuvel rd[0] = d0; 50090b827d1SArd Biesheuvel rd[1] = d1; 50190b827d1SArd Biesheuvel } 50290b827d1SArd Biesheuvel 50390b827d1SArd Biesheuvel void HELPER(crypto_sha512su0)(void *vd, void *vn) 50490b827d1SArd Biesheuvel { 50590b827d1SArd Biesheuvel uint64_t *rd = vd; 50690b827d1SArd Biesheuvel uint64_t *rn = vn; 50790b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 50890b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 50990b827d1SArd Biesheuvel 51090b827d1SArd Biesheuvel d0 += s0_512(rd[1]); 51190b827d1SArd Biesheuvel d1 += s0_512(rn[0]); 51290b827d1SArd Biesheuvel 51390b827d1SArd Biesheuvel rd[0] = d0; 51490b827d1SArd Biesheuvel rd[1] = d1; 51590b827d1SArd Biesheuvel } 51690b827d1SArd Biesheuvel 51790b827d1SArd Biesheuvel void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) 51890b827d1SArd Biesheuvel { 51990b827d1SArd Biesheuvel uint64_t *rd = vd; 52090b827d1SArd Biesheuvel uint64_t *rn = vn; 52190b827d1SArd Biesheuvel uint64_t *rm = vm; 52290b827d1SArd Biesheuvel 52390b827d1SArd Biesheuvel rd[0] += s1_512(rn[0]) + rm[0]; 52490b827d1SArd Biesheuvel rd[1] += s1_512(rn[1]) + rm[1]; 52590b827d1SArd Biesheuvel } 52680d6f4c6SArd Biesheuvel 52780d6f4c6SArd Biesheuvel void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) 52880d6f4c6SArd Biesheuvel { 52980d6f4c6SArd Biesheuvel uint64_t *rd = vd; 53080d6f4c6SArd Biesheuvel uint64_t *rn = vn; 53180d6f4c6SArd Biesheuvel uint64_t *rm = vm; 53280d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 53380d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 53480d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 53580d6f4c6SArd Biesheuvel uint32_t t; 53680d6f4c6SArd Biesheuvel 53780d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 53880d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 53980d6f4c6SArd Biesheuvel 54080d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 54180d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 54280d6f4c6SArd Biesheuvel 54380d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 54480d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 54580d6f4c6SArd Biesheuvel 54680d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 54780d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 54880d6f4c6SArd Biesheuvel 54980d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 55080d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 55180d6f4c6SArd Biesheuvel } 55280d6f4c6SArd Biesheuvel 55380d6f4c6SArd Biesheuvel void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) 55480d6f4c6SArd Biesheuvel { 55580d6f4c6SArd Biesheuvel uint64_t *rd = vd; 55680d6f4c6SArd Biesheuvel uint64_t *rn = vn; 55780d6f4c6SArd Biesheuvel uint64_t *rm = vm; 55880d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 55980d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 56080d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 56180d6f4c6SArd Biesheuvel uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 56280d6f4c6SArd Biesheuvel 56380d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) ^= t; 56480d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 56580d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 56680d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 56780d6f4c6SArd Biesheuvel ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 56880d6f4c6SArd Biesheuvel 56980d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 57080d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 57180d6f4c6SArd Biesheuvel } 57280d6f4c6SArd Biesheuvel 57380d6f4c6SArd Biesheuvel void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, 57480d6f4c6SArd Biesheuvel uint32_t opcode) 57580d6f4c6SArd Biesheuvel { 57680d6f4c6SArd Biesheuvel uint64_t *rd = vd; 57780d6f4c6SArd Biesheuvel uint64_t *rn = vn; 57880d6f4c6SArd Biesheuvel uint64_t *rm = vm; 57980d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 58080d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 58180d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 58280d6f4c6SArd Biesheuvel uint32_t t; 58380d6f4c6SArd Biesheuvel 58480d6f4c6SArd Biesheuvel assert(imm2 < 4); 58580d6f4c6SArd Biesheuvel 58680d6f4c6SArd Biesheuvel if (opcode == 0 || opcode == 2) { 58780d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT2A */ 58880d6f4c6SArd Biesheuvel t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 58980d6f4c6SArd Biesheuvel } else if (opcode == 1) { 59080d6f4c6SArd Biesheuvel /* SM3TT1B */ 59180d6f4c6SArd Biesheuvel t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 59280d6f4c6SArd Biesheuvel } else if (opcode == 3) { 59380d6f4c6SArd Biesheuvel /* SM3TT2B */ 59480d6f4c6SArd Biesheuvel t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 59580d6f4c6SArd Biesheuvel } else { 59680d6f4c6SArd Biesheuvel g_assert_not_reached(); 59780d6f4c6SArd Biesheuvel } 59880d6f4c6SArd Biesheuvel 59980d6f4c6SArd Biesheuvel t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 60080d6f4c6SArd Biesheuvel 60180d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 60280d6f4c6SArd Biesheuvel 60380d6f4c6SArd Biesheuvel if (opcode < 2) { 60480d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT1B */ 60580d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 60680d6f4c6SArd Biesheuvel 60780d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 60880d6f4c6SArd Biesheuvel } else { 60980d6f4c6SArd Biesheuvel /* SM3TT2A, SM3TT2B */ 61080d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3); 61180d6f4c6SArd Biesheuvel t ^= rol32(t, 9) ^ rol32(t, 17); 61280d6f4c6SArd Biesheuvel 61380d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 61480d6f4c6SArd Biesheuvel } 61580d6f4c6SArd Biesheuvel 61680d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 61780d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t; 61880d6f4c6SArd Biesheuvel 61980d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 62080d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 62180d6f4c6SArd Biesheuvel } 622b6577bcdSArd Biesheuvel 623b6577bcdSArd Biesheuvel static uint8_t const sm4_sbox[] = { 624b6577bcdSArd Biesheuvel 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 625b6577bcdSArd Biesheuvel 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, 626b6577bcdSArd Biesheuvel 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, 627b6577bcdSArd Biesheuvel 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 628b6577bcdSArd Biesheuvel 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, 629b6577bcdSArd Biesheuvel 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, 630b6577bcdSArd Biesheuvel 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, 631b6577bcdSArd Biesheuvel 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, 632b6577bcdSArd Biesheuvel 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, 633b6577bcdSArd Biesheuvel 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, 634b6577bcdSArd Biesheuvel 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, 635b6577bcdSArd Biesheuvel 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, 636b6577bcdSArd Biesheuvel 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, 637b6577bcdSArd Biesheuvel 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, 638b6577bcdSArd Biesheuvel 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, 639b6577bcdSArd Biesheuvel 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, 640b6577bcdSArd Biesheuvel 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, 641b6577bcdSArd Biesheuvel 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, 642b6577bcdSArd Biesheuvel 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, 643b6577bcdSArd Biesheuvel 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, 644b6577bcdSArd Biesheuvel 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, 645b6577bcdSArd Biesheuvel 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, 646b6577bcdSArd Biesheuvel 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, 647b6577bcdSArd Biesheuvel 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, 648b6577bcdSArd Biesheuvel 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, 649b6577bcdSArd Biesheuvel 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, 650b6577bcdSArd Biesheuvel 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, 651b6577bcdSArd Biesheuvel 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, 652b6577bcdSArd Biesheuvel 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, 653b6577bcdSArd Biesheuvel 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, 654b6577bcdSArd Biesheuvel 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 655b6577bcdSArd Biesheuvel 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, 656b6577bcdSArd Biesheuvel }; 657b6577bcdSArd Biesheuvel 658*a04b68e1SRichard Henderson static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 659b6577bcdSArd Biesheuvel { 660*a04b68e1SRichard Henderson union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 661*a04b68e1SRichard Henderson union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 662b6577bcdSArd Biesheuvel uint32_t t, i; 663b6577bcdSArd Biesheuvel 664b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 665b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 666b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 667b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 668b6577bcdSArd Biesheuvel CR_ST_WORD(n, i); 669b6577bcdSArd Biesheuvel 670b6577bcdSArd Biesheuvel t = sm4_sbox[t & 0xff] | 671b6577bcdSArd Biesheuvel sm4_sbox[(t >> 8) & 0xff] << 8 | 672b6577bcdSArd Biesheuvel sm4_sbox[(t >> 16) & 0xff] << 16 | 673b6577bcdSArd Biesheuvel sm4_sbox[(t >> 24) & 0xff] << 24; 674b6577bcdSArd Biesheuvel 675b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 676b6577bcdSArd Biesheuvel rol32(t, 24); 677b6577bcdSArd Biesheuvel } 678b6577bcdSArd Biesheuvel 679b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 680b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 681b6577bcdSArd Biesheuvel } 682b6577bcdSArd Biesheuvel 683*a04b68e1SRichard Henderson void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 684b6577bcdSArd Biesheuvel { 685*a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 686*a04b68e1SRichard Henderson 687*a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 688*a04b68e1SRichard Henderson do_crypto_sm4e(vd + i, vn + i, vm + i); 689*a04b68e1SRichard Henderson } 690*a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 691*a04b68e1SRichard Henderson } 692*a04b68e1SRichard Henderson 693*a04b68e1SRichard Henderson static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 694*a04b68e1SRichard Henderson { 695b6577bcdSArd Biesheuvel union CRYPTO_STATE d; 696b6577bcdSArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 697b6577bcdSArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 698b6577bcdSArd Biesheuvel uint32_t t, i; 699b6577bcdSArd Biesheuvel 700b6577bcdSArd Biesheuvel d = n; 701b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 702b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 703b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 704b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 705b6577bcdSArd Biesheuvel CR_ST_WORD(m, i); 706b6577bcdSArd Biesheuvel 707b6577bcdSArd Biesheuvel t = sm4_sbox[t & 0xff] | 708b6577bcdSArd Biesheuvel sm4_sbox[(t >> 8) & 0xff] << 8 | 709b6577bcdSArd Biesheuvel sm4_sbox[(t >> 16) & 0xff] << 16 | 710b6577bcdSArd Biesheuvel sm4_sbox[(t >> 24) & 0xff] << 24; 711b6577bcdSArd Biesheuvel 712b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 713b6577bcdSArd Biesheuvel } 714b6577bcdSArd Biesheuvel 715b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 716b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 717b6577bcdSArd Biesheuvel } 718*a04b68e1SRichard Henderson 719*a04b68e1SRichard Henderson void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 720*a04b68e1SRichard Henderson { 721*a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 722*a04b68e1SRichard Henderson 723*a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 724*a04b68e1SRichard Henderson do_crypto_sm4ekey(vd + i, vn + i, vm + i); 725*a04b68e1SRichard Henderson } 726*a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 727*a04b68e1SRichard Henderson } 728