19d935509SArd Biesheuvel /* 29d935509SArd Biesheuvel * crypto_helper.c - emulate v8 Crypto Extensions instructions 39d935509SArd Biesheuvel * 490b827d1SArd Biesheuvel * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 59d935509SArd Biesheuvel * 69d935509SArd Biesheuvel * This library is free software; you can redistribute it and/or 79d935509SArd Biesheuvel * modify it under the terms of the GNU Lesser General Public 89d935509SArd Biesheuvel * License as published by the Free Software Foundation; either 950f57e09SChetan Pant * version 2.1 of the License, or (at your option) any later version. 109d935509SArd Biesheuvel */ 119d935509SArd Biesheuvel 1274c21bd0SPeter Maydell #include "qemu/osdep.h" 139d935509SArd Biesheuvel 149d935509SArd Biesheuvel #include "cpu.h" 152ef6175aSRichard Henderson #include "exec/helper-proto.h" 16a04b68e1SRichard Henderson #include "tcg/tcg-gvec-desc.h" 176f2945cdSDaniel P. Berrange #include "crypto/aes.h" 18c29da5a7SWeiwei Li #include "crypto/sm4.h" 19a04b68e1SRichard Henderson #include "vec_internal.h" 209d935509SArd Biesheuvel 21f1ecb913SArd Biesheuvel union CRYPTO_STATE { 229d935509SArd Biesheuvel uint8_t bytes[16]; 23f1ecb913SArd Biesheuvel uint32_t words[4]; 249d935509SArd Biesheuvel uint64_t l[2]; 259d935509SArd Biesheuvel }; 269d935509SArd Biesheuvel 27e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN 28afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) 29afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) 30b449ca3cSArd Biesheuvel #else 31afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i) ((state).bytes[i]) 32afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i) ((state).words[i]) 33b449ca3cSArd Biesheuvel #endif 34b449ca3cSArd Biesheuvel 35aaffebd6SRichard Henderson /* 36aaffebd6SRichard Henderson * The caller has not been converted to full gvec, and so only 37aaffebd6SRichard Henderson * modifies the low 16 bytes of the vector register. 38aaffebd6SRichard Henderson */ 39aaffebd6SRichard Henderson static void clear_tail_16(void *vd, uint32_t desc) 40aaffebd6SRichard Henderson { 41aaffebd6SRichard Henderson int opr_sz = simd_oprsz(desc); 42aaffebd6SRichard Henderson int max_sz = simd_maxsz(desc); 43aaffebd6SRichard Henderson 44aaffebd6SRichard Henderson assert(opr_sz == 16); 45aaffebd6SRichard Henderson clear_tail(vd, opr_sz, max_sz); 46aaffebd6SRichard Henderson } 47aaffebd6SRichard Henderson 48*0f23908cSRichard Henderson static void do_crypto_aese(uint64_t *rd, uint64_t *rn, uint64_t *rm, 49*0f23908cSRichard Henderson const uint8_t *sbox, const uint8_t *shift) 509d935509SArd Biesheuvel { 511a66ac61SRichard Henderson union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; 52a04b68e1SRichard Henderson union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; 539d935509SArd Biesheuvel int i; 549d935509SArd Biesheuvel 559d935509SArd Biesheuvel /* xor state vector with round key */ 569d935509SArd Biesheuvel rk.l[0] ^= st.l[0]; 579d935509SArd Biesheuvel rk.l[1] ^= st.l[1]; 589d935509SArd Biesheuvel 599d935509SArd Biesheuvel /* combine ShiftRows operation and sbox substitution */ 609d935509SArd Biesheuvel for (i = 0; i < 16; i++) { 61*0f23908cSRichard Henderson CR_ST_BYTE(st, i) = sbox[CR_ST_BYTE(rk, shift[i])]; 629d935509SArd Biesheuvel } 639d935509SArd Biesheuvel 641a66ac61SRichard Henderson rd[0] = st.l[0]; 651a66ac61SRichard Henderson rd[1] = st.l[1]; 669d935509SArd Biesheuvel } 679d935509SArd Biesheuvel 68a04b68e1SRichard Henderson void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 69a04b68e1SRichard Henderson { 70a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 71a04b68e1SRichard Henderson 72a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 73*0f23908cSRichard Henderson do_crypto_aese(vd + i, vn + i, vm + i, AES_sbox, AES_shifts); 74a04b68e1SRichard Henderson } 75a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 76a04b68e1SRichard Henderson } 77a04b68e1SRichard Henderson 78*0f23908cSRichard Henderson void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc) 79*0f23908cSRichard Henderson { 80*0f23908cSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 81*0f23908cSRichard Henderson 82*0f23908cSRichard Henderson for (i = 0; i < opr_sz; i += 16) { 83*0f23908cSRichard Henderson do_crypto_aese(vd + i, vn + i, vm + i, AES_isbox, AES_ishifts); 84*0f23908cSRichard Henderson } 85*0f23908cSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 86*0f23908cSRichard Henderson } 87*0f23908cSRichard Henderson 88*0f23908cSRichard Henderson static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, const uint32_t *mc) 899d935509SArd Biesheuvel { 901a66ac61SRichard Henderson union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 919d935509SArd Biesheuvel int i; 929d935509SArd Biesheuvel 939d935509SArd Biesheuvel for (i = 0; i < 16; i += 4) { 94b449ca3cSArd Biesheuvel CR_ST_WORD(st, i >> 2) = 95fb250c59SRichard Henderson mc[CR_ST_BYTE(st, i)] ^ 96fb250c59SRichard Henderson rol32(mc[CR_ST_BYTE(st, i + 1)], 8) ^ 97fb250c59SRichard Henderson rol32(mc[CR_ST_BYTE(st, i + 2)], 16) ^ 98fb250c59SRichard Henderson rol32(mc[CR_ST_BYTE(st, i + 3)], 24); 999d935509SArd Biesheuvel } 1009d935509SArd Biesheuvel 1011a66ac61SRichard Henderson rd[0] = st.l[0]; 1021a66ac61SRichard Henderson rd[1] = st.l[1]; 1039d935509SArd Biesheuvel } 104f1ecb913SArd Biesheuvel 105a04b68e1SRichard Henderson void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 106a04b68e1SRichard Henderson { 107a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 108a04b68e1SRichard Henderson 109a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 110*0f23908cSRichard Henderson do_crypto_aesmc(vd + i, vm + i, AES_mc_rot); 111*0f23908cSRichard Henderson } 112*0f23908cSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 113*0f23908cSRichard Henderson } 114*0f23908cSRichard Henderson 115*0f23908cSRichard Henderson void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc) 116*0f23908cSRichard Henderson { 117*0f23908cSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 118*0f23908cSRichard Henderson 119*0f23908cSRichard Henderson for (i = 0; i < opr_sz; i += 16) { 120*0f23908cSRichard Henderson do_crypto_aesmc(vd + i, vm + i, AES_imc_rot); 121a04b68e1SRichard Henderson } 122a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 123a04b68e1SRichard Henderson } 124a04b68e1SRichard Henderson 125f1ecb913SArd Biesheuvel /* 126f1ecb913SArd Biesheuvel * SHA-1 logical functions 127f1ecb913SArd Biesheuvel */ 128f1ecb913SArd Biesheuvel 129f1ecb913SArd Biesheuvel static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 130f1ecb913SArd Biesheuvel { 131f1ecb913SArd Biesheuvel return (x & (y ^ z)) ^ z; 132f1ecb913SArd Biesheuvel } 133f1ecb913SArd Biesheuvel 134f1ecb913SArd Biesheuvel static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 135f1ecb913SArd Biesheuvel { 136f1ecb913SArd Biesheuvel return x ^ y ^ z; 137f1ecb913SArd Biesheuvel } 138f1ecb913SArd Biesheuvel 139f1ecb913SArd Biesheuvel static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 140f1ecb913SArd Biesheuvel { 141f1ecb913SArd Biesheuvel return (x & y) | ((x | y) & z); 142f1ecb913SArd Biesheuvel } 143f1ecb913SArd Biesheuvel 144afc8b7d3SRichard Henderson void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) 145f1ecb913SArd Biesheuvel { 146afc8b7d3SRichard Henderson uint64_t *d = vd, *n = vn, *m = vm; 147afc8b7d3SRichard Henderson uint64_t d0, d1; 148afc8b7d3SRichard Henderson 149afc8b7d3SRichard Henderson d0 = d[1] ^ d[0] ^ m[0]; 150afc8b7d3SRichard Henderson d1 = n[0] ^ d[1] ^ m[1]; 151afc8b7d3SRichard Henderson d[0] = d0; 152afc8b7d3SRichard Henderson d[1] = d1; 153afc8b7d3SRichard Henderson 154afc8b7d3SRichard Henderson clear_tail_16(vd, desc); 155afc8b7d3SRichard Henderson } 156afc8b7d3SRichard Henderson 157afc8b7d3SRichard Henderson static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, 158afc8b7d3SRichard Henderson uint64_t *rm, uint32_t desc, 159afc8b7d3SRichard Henderson uint32_t (*fn)(union CRYPTO_STATE *d)) 160afc8b7d3SRichard Henderson { 1611a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 1621a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 1631a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 164f1ecb913SArd Biesheuvel int i; 165f1ecb913SArd Biesheuvel 166f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 167afc8b7d3SRichard Henderson uint32_t t = fn(&d); 168f1ecb913SArd Biesheuvel 169b449ca3cSArd Biesheuvel t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 170b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 171f1ecb913SArd Biesheuvel 172b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 173b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 174b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 175b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 176b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 177f1ecb913SArd Biesheuvel } 1781a66ac61SRichard Henderson rd[0] = d.l[0]; 1791a66ac61SRichard Henderson rd[1] = d.l[1]; 180afc8b7d3SRichard Henderson 181afc8b7d3SRichard Henderson clear_tail_16(rd, desc); 182afc8b7d3SRichard Henderson } 183afc8b7d3SRichard Henderson 184afc8b7d3SRichard Henderson static uint32_t do_sha1c(union CRYPTO_STATE *d) 185afc8b7d3SRichard Henderson { 186afc8b7d3SRichard Henderson return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 187afc8b7d3SRichard Henderson } 188afc8b7d3SRichard Henderson 189afc8b7d3SRichard Henderson void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) 190afc8b7d3SRichard Henderson { 191afc8b7d3SRichard Henderson crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); 192afc8b7d3SRichard Henderson } 193afc8b7d3SRichard Henderson 194afc8b7d3SRichard Henderson static uint32_t do_sha1p(union CRYPTO_STATE *d) 195afc8b7d3SRichard Henderson { 196afc8b7d3SRichard Henderson return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 197afc8b7d3SRichard Henderson } 198afc8b7d3SRichard Henderson 199afc8b7d3SRichard Henderson void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) 200afc8b7d3SRichard Henderson { 201afc8b7d3SRichard Henderson crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); 202afc8b7d3SRichard Henderson } 203afc8b7d3SRichard Henderson 204afc8b7d3SRichard Henderson static uint32_t do_sha1m(union CRYPTO_STATE *d) 205afc8b7d3SRichard Henderson { 206afc8b7d3SRichard Henderson return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 207afc8b7d3SRichard Henderson } 208afc8b7d3SRichard Henderson 209afc8b7d3SRichard Henderson void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) 210afc8b7d3SRichard Henderson { 211afc8b7d3SRichard Henderson crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); 212f1ecb913SArd Biesheuvel } 213f1ecb913SArd Biesheuvel 214effa992fSRichard Henderson void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 215f1ecb913SArd Biesheuvel { 2161a66ac61SRichard Henderson uint64_t *rd = vd; 2171a66ac61SRichard Henderson uint64_t *rm = vm; 2181a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 219f1ecb913SArd Biesheuvel 220b449ca3cSArd Biesheuvel CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 221b449ca3cSArd Biesheuvel CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 222f1ecb913SArd Biesheuvel 2231a66ac61SRichard Henderson rd[0] = m.l[0]; 2241a66ac61SRichard Henderson rd[1] = m.l[1]; 225effa992fSRichard Henderson 226effa992fSRichard Henderson clear_tail_16(vd, desc); 227f1ecb913SArd Biesheuvel } 228f1ecb913SArd Biesheuvel 229effa992fSRichard Henderson void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 230f1ecb913SArd Biesheuvel { 2311a66ac61SRichard Henderson uint64_t *rd = vd; 2321a66ac61SRichard Henderson uint64_t *rm = vm; 2331a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 2341a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 235f1ecb913SArd Biesheuvel 236b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 237b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 238b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 239b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 240f1ecb913SArd Biesheuvel 2411a66ac61SRichard Henderson rd[0] = d.l[0]; 2421a66ac61SRichard Henderson rd[1] = d.l[1]; 243effa992fSRichard Henderson 244effa992fSRichard Henderson clear_tail_16(vd, desc); 245f1ecb913SArd Biesheuvel } 246f1ecb913SArd Biesheuvel 247f1ecb913SArd Biesheuvel /* 248f1ecb913SArd Biesheuvel * The SHA-256 logical functions, according to 249f1ecb913SArd Biesheuvel * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 250f1ecb913SArd Biesheuvel */ 251f1ecb913SArd Biesheuvel 252f1ecb913SArd Biesheuvel static uint32_t S0(uint32_t x) 253f1ecb913SArd Biesheuvel { 254f1ecb913SArd Biesheuvel return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 255f1ecb913SArd Biesheuvel } 256f1ecb913SArd Biesheuvel 257f1ecb913SArd Biesheuvel static uint32_t S1(uint32_t x) 258f1ecb913SArd Biesheuvel { 259f1ecb913SArd Biesheuvel return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 260f1ecb913SArd Biesheuvel } 261f1ecb913SArd Biesheuvel 262f1ecb913SArd Biesheuvel static uint32_t s0(uint32_t x) 263f1ecb913SArd Biesheuvel { 264f1ecb913SArd Biesheuvel return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 265f1ecb913SArd Biesheuvel } 266f1ecb913SArd Biesheuvel 267f1ecb913SArd Biesheuvel static uint32_t s1(uint32_t x) 268f1ecb913SArd Biesheuvel { 269f1ecb913SArd Biesheuvel return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 270f1ecb913SArd Biesheuvel } 271f1ecb913SArd Biesheuvel 272effa992fSRichard Henderson void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 273f1ecb913SArd Biesheuvel { 2741a66ac61SRichard Henderson uint64_t *rd = vd; 2751a66ac61SRichard Henderson uint64_t *rn = vn; 2761a66ac61SRichard Henderson uint64_t *rm = vm; 2771a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 2781a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 2791a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 280f1ecb913SArd Biesheuvel int i; 281f1ecb913SArd Biesheuvel 282f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 283b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 284b449ca3cSArd Biesheuvel + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 285b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 286f1ecb913SArd Biesheuvel 287b449ca3cSArd Biesheuvel CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 288b449ca3cSArd Biesheuvel CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 289b449ca3cSArd Biesheuvel CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 290b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 291f1ecb913SArd Biesheuvel 292b449ca3cSArd Biesheuvel t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 293b449ca3cSArd Biesheuvel + S0(CR_ST_WORD(d, 0)); 294f1ecb913SArd Biesheuvel 295b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 296b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 297b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 298b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 299f1ecb913SArd Biesheuvel } 300f1ecb913SArd Biesheuvel 3011a66ac61SRichard Henderson rd[0] = d.l[0]; 3021a66ac61SRichard Henderson rd[1] = d.l[1]; 303effa992fSRichard Henderson 304effa992fSRichard Henderson clear_tail_16(vd, desc); 305f1ecb913SArd Biesheuvel } 306f1ecb913SArd Biesheuvel 307effa992fSRichard Henderson void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 308f1ecb913SArd Biesheuvel { 3091a66ac61SRichard Henderson uint64_t *rd = vd; 3101a66ac61SRichard Henderson uint64_t *rn = vn; 3111a66ac61SRichard Henderson uint64_t *rm = vm; 3121a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3131a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3141a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 315f1ecb913SArd Biesheuvel int i; 316f1ecb913SArd Biesheuvel 317f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 318b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 319b449ca3cSArd Biesheuvel + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 320b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 321f1ecb913SArd Biesheuvel 322b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 323b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 324b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 325b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 326f1ecb913SArd Biesheuvel } 327f1ecb913SArd Biesheuvel 3281a66ac61SRichard Henderson rd[0] = d.l[0]; 3291a66ac61SRichard Henderson rd[1] = d.l[1]; 330effa992fSRichard Henderson 331effa992fSRichard Henderson clear_tail_16(vd, desc); 332f1ecb913SArd Biesheuvel } 333f1ecb913SArd Biesheuvel 334effa992fSRichard Henderson void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 335f1ecb913SArd Biesheuvel { 3361a66ac61SRichard Henderson uint64_t *rd = vd; 3371a66ac61SRichard Henderson uint64_t *rm = vm; 3381a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3391a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 340f1ecb913SArd Biesheuvel 341b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 342b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 343b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 344b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 345f1ecb913SArd Biesheuvel 3461a66ac61SRichard Henderson rd[0] = d.l[0]; 3471a66ac61SRichard Henderson rd[1] = d.l[1]; 348effa992fSRichard Henderson 349effa992fSRichard Henderson clear_tail_16(vd, desc); 350f1ecb913SArd Biesheuvel } 351f1ecb913SArd Biesheuvel 352effa992fSRichard Henderson void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 353f1ecb913SArd Biesheuvel { 3541a66ac61SRichard Henderson uint64_t *rd = vd; 3551a66ac61SRichard Henderson uint64_t *rn = vn; 3561a66ac61SRichard Henderson uint64_t *rm = vm; 3571a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3581a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3591a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 360f1ecb913SArd Biesheuvel 361b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 362b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 363b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 364b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 365f1ecb913SArd Biesheuvel 3661a66ac61SRichard Henderson rd[0] = d.l[0]; 3671a66ac61SRichard Henderson rd[1] = d.l[1]; 368effa992fSRichard Henderson 369effa992fSRichard Henderson clear_tail_16(vd, desc); 370f1ecb913SArd Biesheuvel } 37190b827d1SArd Biesheuvel 37290b827d1SArd Biesheuvel /* 37390b827d1SArd Biesheuvel * The SHA-512 logical functions (same as above but using 64-bit operands) 37490b827d1SArd Biesheuvel */ 37590b827d1SArd Biesheuvel 37690b827d1SArd Biesheuvel static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 37790b827d1SArd Biesheuvel { 37890b827d1SArd Biesheuvel return (x & (y ^ z)) ^ z; 37990b827d1SArd Biesheuvel } 38090b827d1SArd Biesheuvel 38190b827d1SArd Biesheuvel static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 38290b827d1SArd Biesheuvel { 38390b827d1SArd Biesheuvel return (x & y) | ((x | y) & z); 38490b827d1SArd Biesheuvel } 38590b827d1SArd Biesheuvel 38690b827d1SArd Biesheuvel static uint64_t S0_512(uint64_t x) 38790b827d1SArd Biesheuvel { 38890b827d1SArd Biesheuvel return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 38990b827d1SArd Biesheuvel } 39090b827d1SArd Biesheuvel 39190b827d1SArd Biesheuvel static uint64_t S1_512(uint64_t x) 39290b827d1SArd Biesheuvel { 39390b827d1SArd Biesheuvel return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 39490b827d1SArd Biesheuvel } 39590b827d1SArd Biesheuvel 39690b827d1SArd Biesheuvel static uint64_t s0_512(uint64_t x) 39790b827d1SArd Biesheuvel { 39890b827d1SArd Biesheuvel return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 39990b827d1SArd Biesheuvel } 40090b827d1SArd Biesheuvel 40190b827d1SArd Biesheuvel static uint64_t s1_512(uint64_t x) 40290b827d1SArd Biesheuvel { 40390b827d1SArd Biesheuvel return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 40490b827d1SArd Biesheuvel } 40590b827d1SArd Biesheuvel 406aaffebd6SRichard Henderson void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 40790b827d1SArd Biesheuvel { 40890b827d1SArd Biesheuvel uint64_t *rd = vd; 40990b827d1SArd Biesheuvel uint64_t *rn = vn; 41090b827d1SArd Biesheuvel uint64_t *rm = vm; 41190b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 41290b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 41390b827d1SArd Biesheuvel 41490b827d1SArd Biesheuvel d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 41590b827d1SArd Biesheuvel d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 41690b827d1SArd Biesheuvel 41790b827d1SArd Biesheuvel rd[0] = d0; 41890b827d1SArd Biesheuvel rd[1] = d1; 419aaffebd6SRichard Henderson 420aaffebd6SRichard Henderson clear_tail_16(vd, desc); 42190b827d1SArd Biesheuvel } 42290b827d1SArd Biesheuvel 423aaffebd6SRichard Henderson void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 42490b827d1SArd Biesheuvel { 42590b827d1SArd Biesheuvel uint64_t *rd = vd; 42690b827d1SArd Biesheuvel uint64_t *rn = vn; 42790b827d1SArd Biesheuvel uint64_t *rm = vm; 42890b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 42990b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 43090b827d1SArd Biesheuvel 43190b827d1SArd Biesheuvel d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 43290b827d1SArd Biesheuvel d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 43390b827d1SArd Biesheuvel 43490b827d1SArd Biesheuvel rd[0] = d0; 43590b827d1SArd Biesheuvel rd[1] = d1; 436aaffebd6SRichard Henderson 437aaffebd6SRichard Henderson clear_tail_16(vd, desc); 43890b827d1SArd Biesheuvel } 43990b827d1SArd Biesheuvel 440aaffebd6SRichard Henderson void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 44190b827d1SArd Biesheuvel { 44290b827d1SArd Biesheuvel uint64_t *rd = vd; 44390b827d1SArd Biesheuvel uint64_t *rn = vn; 44490b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 44590b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 44690b827d1SArd Biesheuvel 44790b827d1SArd Biesheuvel d0 += s0_512(rd[1]); 44890b827d1SArd Biesheuvel d1 += s0_512(rn[0]); 44990b827d1SArd Biesheuvel 45090b827d1SArd Biesheuvel rd[0] = d0; 45190b827d1SArd Biesheuvel rd[1] = d1; 452aaffebd6SRichard Henderson 453aaffebd6SRichard Henderson clear_tail_16(vd, desc); 45490b827d1SArd Biesheuvel } 45590b827d1SArd Biesheuvel 456aaffebd6SRichard Henderson void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 45790b827d1SArd Biesheuvel { 45890b827d1SArd Biesheuvel uint64_t *rd = vd; 45990b827d1SArd Biesheuvel uint64_t *rn = vn; 46090b827d1SArd Biesheuvel uint64_t *rm = vm; 46190b827d1SArd Biesheuvel 46290b827d1SArd Biesheuvel rd[0] += s1_512(rn[0]) + rm[0]; 46390b827d1SArd Biesheuvel rd[1] += s1_512(rn[1]) + rm[1]; 464aaffebd6SRichard Henderson 465aaffebd6SRichard Henderson clear_tail_16(vd, desc); 46690b827d1SArd Biesheuvel } 46780d6f4c6SArd Biesheuvel 468aaffebd6SRichard Henderson void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 46980d6f4c6SArd Biesheuvel { 47080d6f4c6SArd Biesheuvel uint64_t *rd = vd; 47180d6f4c6SArd Biesheuvel uint64_t *rn = vn; 47280d6f4c6SArd Biesheuvel uint64_t *rm = vm; 47380d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 47480d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 47580d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 47680d6f4c6SArd Biesheuvel uint32_t t; 47780d6f4c6SArd Biesheuvel 47880d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 47980d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 48080d6f4c6SArd Biesheuvel 48180d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 48280d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 48380d6f4c6SArd Biesheuvel 48480d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 48580d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 48680d6f4c6SArd Biesheuvel 48780d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 48880d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 48980d6f4c6SArd Biesheuvel 49080d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 49180d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 492aaffebd6SRichard Henderson 493aaffebd6SRichard Henderson clear_tail_16(vd, desc); 49480d6f4c6SArd Biesheuvel } 49580d6f4c6SArd Biesheuvel 496aaffebd6SRichard Henderson void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 49780d6f4c6SArd Biesheuvel { 49880d6f4c6SArd Biesheuvel uint64_t *rd = vd; 49980d6f4c6SArd Biesheuvel uint64_t *rn = vn; 50080d6f4c6SArd Biesheuvel uint64_t *rm = vm; 50180d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 50280d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 50380d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 50480d6f4c6SArd Biesheuvel uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 50580d6f4c6SArd Biesheuvel 50680d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) ^= t; 50780d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 50880d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 50980d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 51080d6f4c6SArd Biesheuvel ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 51180d6f4c6SArd Biesheuvel 51280d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 51380d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 514aaffebd6SRichard Henderson 515aaffebd6SRichard Henderson clear_tail_16(vd, desc); 51680d6f4c6SArd Biesheuvel } 51780d6f4c6SArd Biesheuvel 51843fa36c9SRichard Henderson static inline void QEMU_ALWAYS_INLINE 51943fa36c9SRichard Henderson crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, 52043fa36c9SRichard Henderson uint32_t desc, uint32_t opcode) 52180d6f4c6SArd Biesheuvel { 52280d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 52380d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 52480d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 52543fa36c9SRichard Henderson uint32_t imm2 = simd_data(desc); 52680d6f4c6SArd Biesheuvel uint32_t t; 52780d6f4c6SArd Biesheuvel 52880d6f4c6SArd Biesheuvel assert(imm2 < 4); 52980d6f4c6SArd Biesheuvel 53080d6f4c6SArd Biesheuvel if (opcode == 0 || opcode == 2) { 53180d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT2A */ 53280d6f4c6SArd Biesheuvel t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 53380d6f4c6SArd Biesheuvel } else if (opcode == 1) { 53480d6f4c6SArd Biesheuvel /* SM3TT1B */ 53580d6f4c6SArd Biesheuvel t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 53680d6f4c6SArd Biesheuvel } else if (opcode == 3) { 53780d6f4c6SArd Biesheuvel /* SM3TT2B */ 53880d6f4c6SArd Biesheuvel t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 53980d6f4c6SArd Biesheuvel } else { 54043fa36c9SRichard Henderson qemu_build_not_reached(); 54180d6f4c6SArd Biesheuvel } 54280d6f4c6SArd Biesheuvel 54380d6f4c6SArd Biesheuvel t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 54480d6f4c6SArd Biesheuvel 54580d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 54680d6f4c6SArd Biesheuvel 54780d6f4c6SArd Biesheuvel if (opcode < 2) { 54880d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT1B */ 54980d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 55080d6f4c6SArd Biesheuvel 55180d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 55280d6f4c6SArd Biesheuvel } else { 55380d6f4c6SArd Biesheuvel /* SM3TT2A, SM3TT2B */ 55480d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3); 55580d6f4c6SArd Biesheuvel t ^= rol32(t, 9) ^ rol32(t, 17); 55680d6f4c6SArd Biesheuvel 55780d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 55880d6f4c6SArd Biesheuvel } 55980d6f4c6SArd Biesheuvel 56080d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 56180d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t; 56280d6f4c6SArd Biesheuvel 56380d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 56480d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 56543fa36c9SRichard Henderson 56643fa36c9SRichard Henderson clear_tail_16(rd, desc); 56780d6f4c6SArd Biesheuvel } 568b6577bcdSArd Biesheuvel 56943fa36c9SRichard Henderson #define DO_SM3TT(NAME, OPCODE) \ 57043fa36c9SRichard Henderson void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ 57143fa36c9SRichard Henderson { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } 57243fa36c9SRichard Henderson 57343fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1a, 0) 57443fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1b, 1) 57543fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2a, 2) 57643fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2b, 3) 57743fa36c9SRichard Henderson 57843fa36c9SRichard Henderson #undef DO_SM3TT 57943fa36c9SRichard Henderson 580a04b68e1SRichard Henderson static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 581b6577bcdSArd Biesheuvel { 582a04b68e1SRichard Henderson union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 583a04b68e1SRichard Henderson union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 584b6577bcdSArd Biesheuvel uint32_t t, i; 585b6577bcdSArd Biesheuvel 586b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 587b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 588b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 589b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 590b6577bcdSArd Biesheuvel CR_ST_WORD(n, i); 591b6577bcdSArd Biesheuvel 592b6577bcdSArd Biesheuvel t = sm4_sbox[t & 0xff] | 593b6577bcdSArd Biesheuvel sm4_sbox[(t >> 8) & 0xff] << 8 | 594b6577bcdSArd Biesheuvel sm4_sbox[(t >> 16) & 0xff] << 16 | 595b6577bcdSArd Biesheuvel sm4_sbox[(t >> 24) & 0xff] << 24; 596b6577bcdSArd Biesheuvel 597b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 598b6577bcdSArd Biesheuvel rol32(t, 24); 599b6577bcdSArd Biesheuvel } 600b6577bcdSArd Biesheuvel 601b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 602b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 603b6577bcdSArd Biesheuvel } 604b6577bcdSArd Biesheuvel 605a04b68e1SRichard Henderson void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 606b6577bcdSArd Biesheuvel { 607a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 608a04b68e1SRichard Henderson 609a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 610a04b68e1SRichard Henderson do_crypto_sm4e(vd + i, vn + i, vm + i); 611a04b68e1SRichard Henderson } 612a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 613a04b68e1SRichard Henderson } 614a04b68e1SRichard Henderson 615a04b68e1SRichard Henderson static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 616a04b68e1SRichard Henderson { 617b6577bcdSArd Biesheuvel union CRYPTO_STATE d; 618b6577bcdSArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 619b6577bcdSArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 620b6577bcdSArd Biesheuvel uint32_t t, i; 621b6577bcdSArd Biesheuvel 622b6577bcdSArd Biesheuvel d = n; 623b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 624b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 625b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 626b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 627b6577bcdSArd Biesheuvel CR_ST_WORD(m, i); 628b6577bcdSArd Biesheuvel 629b6577bcdSArd Biesheuvel t = sm4_sbox[t & 0xff] | 630b6577bcdSArd Biesheuvel sm4_sbox[(t >> 8) & 0xff] << 8 | 631b6577bcdSArd Biesheuvel sm4_sbox[(t >> 16) & 0xff] << 16 | 632b6577bcdSArd Biesheuvel sm4_sbox[(t >> 24) & 0xff] << 24; 633b6577bcdSArd Biesheuvel 634b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 635b6577bcdSArd Biesheuvel } 636b6577bcdSArd Biesheuvel 637b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 638b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 639b6577bcdSArd Biesheuvel } 640a04b68e1SRichard Henderson 641a04b68e1SRichard Henderson void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 642a04b68e1SRichard Henderson { 643a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 644a04b68e1SRichard Henderson 645a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 646a04b68e1SRichard Henderson do_crypto_sm4ekey(vd + i, vn + i, vm + i); 647a04b68e1SRichard Henderson } 648a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 649a04b68e1SRichard Henderson } 6501738860dSRichard Henderson 6511738860dSRichard Henderson void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 6521738860dSRichard Henderson { 6531738860dSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 6541738860dSRichard Henderson uint64_t *d = vd, *n = vn, *m = vm; 6551738860dSRichard Henderson 6561738860dSRichard Henderson for (i = 0; i < opr_sz / 8; ++i) { 6571738860dSRichard Henderson d[i] = n[i] ^ rol64(m[i], 1); 6581738860dSRichard Henderson } 6591738860dSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 6601738860dSRichard Henderson } 661