1 /* 2 * crypto_helper.c - emulate v8 Crypto Extensions instructions 3 * 4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 */ 11 12 #include "qemu/osdep.h" 13 14 #include "cpu.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 #include "crypto/aes.h" 18 #include "vec_internal.h" 19 20 union CRYPTO_STATE { 21 uint8_t bytes[16]; 22 uint32_t words[4]; 23 uint64_t l[2]; 24 }; 25 26 #ifdef HOST_WORDS_BIGENDIAN 27 #define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8]) 28 #define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2]) 29 #else 30 #define CR_ST_BYTE(state, i) (state.bytes[i]) 31 #define CR_ST_WORD(state, i) (state.words[i]) 32 #endif 33 34 /* 35 * The caller has not been converted to full gvec, and so only 36 * modifies the low 16 bytes of the vector register. 37 */ 38 static void clear_tail_16(void *vd, uint32_t desc) 39 { 40 int opr_sz = simd_oprsz(desc); 41 int max_sz = simd_maxsz(desc); 42 43 assert(opr_sz == 16); 44 clear_tail(vd, opr_sz, max_sz); 45 } 46 47 static void do_crypto_aese(uint64_t *rd, uint64_t *rn, 48 uint64_t *rm, bool decrypt) 49 { 50 static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; 51 static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; 52 union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; 53 union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; 54 int i; 55 56 /* xor state vector with round key */ 57 rk.l[0] ^= st.l[0]; 58 rk.l[1] ^= st.l[1]; 59 60 /* combine ShiftRows operation and sbox substitution */ 61 for (i = 0; i < 16; i++) { 62 CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])]; 63 } 64 65 rd[0] = st.l[0]; 66 rd[1] = st.l[1]; 67 } 68 69 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 70 { 71 intptr_t i, opr_sz = simd_oprsz(desc); 72 bool decrypt = simd_data(desc); 73 74 for (i = 0; i < opr_sz; i += 16) { 75 do_crypto_aese(vd + i, vn + i, vm + i, decrypt); 76 } 77 clear_tail(vd, opr_sz, simd_maxsz(desc)); 78 } 79 80 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) 81 { 82 static uint32_t const mc[][256] = { { 83 /* MixColumns lookup table */ 84 0x00000000, 0x03010102, 0x06020204, 0x05030306, 85 0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e, 86 0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16, 87 0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e, 88 0x30101020, 0x33111122, 0x36121224, 0x35131326, 89 0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e, 90 0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36, 91 0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e, 92 0x60202040, 0x63212142, 0x66222244, 0x65232346, 93 0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e, 94 0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56, 95 0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e, 96 0x50303060, 0x53313162, 0x56323264, 0x55333366, 97 0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e, 98 0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76, 99 0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e, 100 0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386, 101 0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e, 102 0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96, 103 0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e, 104 0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6, 105 0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae, 106 0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6, 107 0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe, 108 0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6, 109 0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce, 110 0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6, 111 0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde, 112 0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6, 113 0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee, 114 0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6, 115 0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe, 116 0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d, 117 0x97848413, 0x94858511, 0x91868617, 0x92878715, 118 0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d, 119 0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05, 120 0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d, 121 0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735, 122 0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d, 123 0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25, 124 0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d, 125 0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755, 126 0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d, 127 0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45, 128 0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d, 129 0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775, 130 0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d, 131 0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65, 132 0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d, 133 0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795, 134 0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d, 135 0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85, 136 0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd, 137 0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5, 138 0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad, 139 0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5, 140 0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd, 141 0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5, 142 0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd, 143 0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5, 144 0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd, 145 0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5, 146 0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed, 147 0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5, 148 }, { 149 /* Inverse MixColumns lookup table */ 150 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, 151 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a, 152 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362, 153 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, 154 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2, 155 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca, 156 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, 157 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba, 158 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9, 159 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, 160 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9, 161 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81, 162 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, 163 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411, 164 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859, 165 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, 166 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf, 167 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987, 168 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, 169 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7, 170 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f, 171 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, 172 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f, 173 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117, 174 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, 175 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c, 176 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14, 177 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, 178 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684, 179 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc, 180 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, 181 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc, 182 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753, 183 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, 184 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23, 185 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b, 186 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, 187 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b, 188 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3, 189 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, 190 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88, 191 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0, 192 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, 193 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0, 194 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68, 195 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, 196 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418, 197 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020, 198 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, 199 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6, 200 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e, 201 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, 202 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e, 203 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526, 204 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, 205 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56, 206 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25, 207 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, 208 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255, 209 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d, 210 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, 211 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd, 212 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, 213 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, 214 } }; 215 216 union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 217 int i; 218 219 for (i = 0; i < 16; i += 4) { 220 CR_ST_WORD(st, i >> 2) = 221 mc[decrypt][CR_ST_BYTE(st, i)] ^ 222 rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^ 223 rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^ 224 rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24); 225 } 226 227 rd[0] = st.l[0]; 228 rd[1] = st.l[1]; 229 } 230 231 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 232 { 233 intptr_t i, opr_sz = simd_oprsz(desc); 234 bool decrypt = simd_data(desc); 235 236 for (i = 0; i < opr_sz; i += 16) { 237 do_crypto_aesmc(vd + i, vm + i, decrypt); 238 } 239 clear_tail(vd, opr_sz, simd_maxsz(desc)); 240 } 241 242 /* 243 * SHA-1 logical functions 244 */ 245 246 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 247 { 248 return (x & (y ^ z)) ^ z; 249 } 250 251 static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 252 { 253 return x ^ y ^ z; 254 } 255 256 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 257 { 258 return (x & y) | ((x | y) & z); 259 } 260 261 void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op) 262 { 263 uint64_t *rd = vd; 264 uint64_t *rn = vn; 265 uint64_t *rm = vm; 266 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 267 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 268 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 269 270 if (op == 3) { /* sha1su0 */ 271 d.l[0] ^= d.l[1] ^ m.l[0]; 272 d.l[1] ^= n.l[0] ^ m.l[1]; 273 } else { 274 int i; 275 276 for (i = 0; i < 4; i++) { 277 uint32_t t; 278 279 switch (op) { 280 case 0: /* sha1c */ 281 t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); 282 break; 283 case 1: /* sha1p */ 284 t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); 285 break; 286 case 2: /* sha1m */ 287 t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); 288 break; 289 default: 290 g_assert_not_reached(); 291 } 292 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 293 + CR_ST_WORD(m, i); 294 295 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 296 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 297 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 298 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 299 CR_ST_WORD(d, 0) = t; 300 } 301 } 302 rd[0] = d.l[0]; 303 rd[1] = d.l[1]; 304 } 305 306 void HELPER(crypto_sha1h)(void *vd, void *vm) 307 { 308 uint64_t *rd = vd; 309 uint64_t *rm = vm; 310 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 311 312 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 313 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 314 315 rd[0] = m.l[0]; 316 rd[1] = m.l[1]; 317 } 318 319 void HELPER(crypto_sha1su1)(void *vd, void *vm) 320 { 321 uint64_t *rd = vd; 322 uint64_t *rm = vm; 323 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 324 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 325 326 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 327 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 328 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 329 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 330 331 rd[0] = d.l[0]; 332 rd[1] = d.l[1]; 333 } 334 335 /* 336 * The SHA-256 logical functions, according to 337 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 338 */ 339 340 static uint32_t S0(uint32_t x) 341 { 342 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 343 } 344 345 static uint32_t S1(uint32_t x) 346 { 347 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 348 } 349 350 static uint32_t s0(uint32_t x) 351 { 352 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 353 } 354 355 static uint32_t s1(uint32_t x) 356 { 357 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 358 } 359 360 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) 361 { 362 uint64_t *rd = vd; 363 uint64_t *rn = vn; 364 uint64_t *rm = vm; 365 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 366 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 367 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 368 int i; 369 370 for (i = 0; i < 4; i++) { 371 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 372 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 373 + CR_ST_WORD(m, i); 374 375 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 376 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 377 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 378 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 379 380 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 381 + S0(CR_ST_WORD(d, 0)); 382 383 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 384 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 385 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 386 CR_ST_WORD(d, 0) = t; 387 } 388 389 rd[0] = d.l[0]; 390 rd[1] = d.l[1]; 391 } 392 393 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) 394 { 395 uint64_t *rd = vd; 396 uint64_t *rn = vn; 397 uint64_t *rm = vm; 398 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 399 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 400 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 401 int i; 402 403 for (i = 0; i < 4; i++) { 404 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 405 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 406 + CR_ST_WORD(m, i); 407 408 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 409 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 410 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 411 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 412 } 413 414 rd[0] = d.l[0]; 415 rd[1] = d.l[1]; 416 } 417 418 void HELPER(crypto_sha256su0)(void *vd, void *vm) 419 { 420 uint64_t *rd = vd; 421 uint64_t *rm = vm; 422 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 423 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 424 425 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 426 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 427 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 428 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 429 430 rd[0] = d.l[0]; 431 rd[1] = d.l[1]; 432 } 433 434 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) 435 { 436 uint64_t *rd = vd; 437 uint64_t *rn = vn; 438 uint64_t *rm = vm; 439 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 440 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 441 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 442 443 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 444 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 445 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 446 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 447 448 rd[0] = d.l[0]; 449 rd[1] = d.l[1]; 450 } 451 452 /* 453 * The SHA-512 logical functions (same as above but using 64-bit operands) 454 */ 455 456 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 457 { 458 return (x & (y ^ z)) ^ z; 459 } 460 461 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 462 { 463 return (x & y) | ((x | y) & z); 464 } 465 466 static uint64_t S0_512(uint64_t x) 467 { 468 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 469 } 470 471 static uint64_t S1_512(uint64_t x) 472 { 473 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 474 } 475 476 static uint64_t s0_512(uint64_t x) 477 { 478 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 479 } 480 481 static uint64_t s1_512(uint64_t x) 482 { 483 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 484 } 485 486 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 487 { 488 uint64_t *rd = vd; 489 uint64_t *rn = vn; 490 uint64_t *rm = vm; 491 uint64_t d0 = rd[0]; 492 uint64_t d1 = rd[1]; 493 494 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 495 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 496 497 rd[0] = d0; 498 rd[1] = d1; 499 500 clear_tail_16(vd, desc); 501 } 502 503 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 504 { 505 uint64_t *rd = vd; 506 uint64_t *rn = vn; 507 uint64_t *rm = vm; 508 uint64_t d0 = rd[0]; 509 uint64_t d1 = rd[1]; 510 511 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 512 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 513 514 rd[0] = d0; 515 rd[1] = d1; 516 517 clear_tail_16(vd, desc); 518 } 519 520 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 521 { 522 uint64_t *rd = vd; 523 uint64_t *rn = vn; 524 uint64_t d0 = rd[0]; 525 uint64_t d1 = rd[1]; 526 527 d0 += s0_512(rd[1]); 528 d1 += s0_512(rn[0]); 529 530 rd[0] = d0; 531 rd[1] = d1; 532 533 clear_tail_16(vd, desc); 534 } 535 536 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 537 { 538 uint64_t *rd = vd; 539 uint64_t *rn = vn; 540 uint64_t *rm = vm; 541 542 rd[0] += s1_512(rn[0]) + rm[0]; 543 rd[1] += s1_512(rn[1]) + rm[1]; 544 545 clear_tail_16(vd, desc); 546 } 547 548 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 549 { 550 uint64_t *rd = vd; 551 uint64_t *rn = vn; 552 uint64_t *rm = vm; 553 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 554 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 555 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 556 uint32_t t; 557 558 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 559 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 560 561 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 562 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 563 564 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 565 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 566 567 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 568 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 569 570 rd[0] = d.l[0]; 571 rd[1] = d.l[1]; 572 573 clear_tail_16(vd, desc); 574 } 575 576 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 577 { 578 uint64_t *rd = vd; 579 uint64_t *rn = vn; 580 uint64_t *rm = vm; 581 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 582 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 583 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 584 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 585 586 CR_ST_WORD(d, 0) ^= t; 587 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 588 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 589 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 590 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 591 592 rd[0] = d.l[0]; 593 rd[1] = d.l[1]; 594 595 clear_tail_16(vd, desc); 596 } 597 598 void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, 599 uint32_t opcode) 600 { 601 uint64_t *rd = vd; 602 uint64_t *rn = vn; 603 uint64_t *rm = vm; 604 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 605 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 606 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 607 uint32_t t; 608 609 assert(imm2 < 4); 610 611 if (opcode == 0 || opcode == 2) { 612 /* SM3TT1A, SM3TT2A */ 613 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 614 } else if (opcode == 1) { 615 /* SM3TT1B */ 616 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 617 } else if (opcode == 3) { 618 /* SM3TT2B */ 619 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 620 } else { 621 g_assert_not_reached(); 622 } 623 624 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 625 626 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 627 628 if (opcode < 2) { 629 /* SM3TT1A, SM3TT1B */ 630 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 631 632 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 633 } else { 634 /* SM3TT2A, SM3TT2B */ 635 t += CR_ST_WORD(n, 3); 636 t ^= rol32(t, 9) ^ rol32(t, 17); 637 638 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 639 } 640 641 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 642 CR_ST_WORD(d, 3) = t; 643 644 rd[0] = d.l[0]; 645 rd[1] = d.l[1]; 646 } 647 648 static uint8_t const sm4_sbox[] = { 649 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 650 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, 651 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, 652 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 653 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, 654 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, 655 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, 656 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, 657 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, 658 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, 659 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, 660 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, 661 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, 662 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, 663 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, 664 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, 665 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, 666 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, 667 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, 668 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, 669 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, 670 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, 671 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, 672 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, 673 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, 674 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, 675 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, 676 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, 677 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, 678 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, 679 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 680 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, 681 }; 682 683 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 684 { 685 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 686 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 687 uint32_t t, i; 688 689 for (i = 0; i < 4; i++) { 690 t = CR_ST_WORD(d, (i + 1) % 4) ^ 691 CR_ST_WORD(d, (i + 2) % 4) ^ 692 CR_ST_WORD(d, (i + 3) % 4) ^ 693 CR_ST_WORD(n, i); 694 695 t = sm4_sbox[t & 0xff] | 696 sm4_sbox[(t >> 8) & 0xff] << 8 | 697 sm4_sbox[(t >> 16) & 0xff] << 16 | 698 sm4_sbox[(t >> 24) & 0xff] << 24; 699 700 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 701 rol32(t, 24); 702 } 703 704 rd[0] = d.l[0]; 705 rd[1] = d.l[1]; 706 } 707 708 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 709 { 710 intptr_t i, opr_sz = simd_oprsz(desc); 711 712 for (i = 0; i < opr_sz; i += 16) { 713 do_crypto_sm4e(vd + i, vn + i, vm + i); 714 } 715 clear_tail(vd, opr_sz, simd_maxsz(desc)); 716 } 717 718 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 719 { 720 union CRYPTO_STATE d; 721 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 722 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 723 uint32_t t, i; 724 725 d = n; 726 for (i = 0; i < 4; i++) { 727 t = CR_ST_WORD(d, (i + 1) % 4) ^ 728 CR_ST_WORD(d, (i + 2) % 4) ^ 729 CR_ST_WORD(d, (i + 3) % 4) ^ 730 CR_ST_WORD(m, i); 731 732 t = sm4_sbox[t & 0xff] | 733 sm4_sbox[(t >> 8) & 0xff] << 8 | 734 sm4_sbox[(t >> 16) & 0xff] << 16 | 735 sm4_sbox[(t >> 24) & 0xff] << 24; 736 737 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 738 } 739 740 rd[0] = d.l[0]; 741 rd[1] = d.l[1]; 742 } 743 744 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 745 { 746 intptr_t i, opr_sz = simd_oprsz(desc); 747 748 for (i = 0; i < opr_sz; i += 16) { 749 do_crypto_sm4ekey(vd + i, vn + i, vm + i); 750 } 751 clear_tail(vd, opr_sz, simd_maxsz(desc)); 752 } 753 754 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 755 { 756 intptr_t i, opr_sz = simd_oprsz(desc); 757 uint64_t *d = vd, *n = vn, *m = vm; 758 759 for (i = 0; i < opr_sz / 8; ++i) { 760 d[i] = n[i] ^ rol64(m[i], 1); 761 } 762 clear_tail(vd, opr_sz, simd_maxsz(desc)); 763 } 764