19d935509SArd Biesheuvel /* 29d935509SArd Biesheuvel * crypto_helper.c - emulate v8 Crypto Extensions instructions 39d935509SArd Biesheuvel * 490b827d1SArd Biesheuvel * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 59d935509SArd Biesheuvel * 69d935509SArd Biesheuvel * This library is free software; you can redistribute it and/or 79d935509SArd Biesheuvel * modify it under the terms of the GNU Lesser General Public 89d935509SArd Biesheuvel * License as published by the Free Software Foundation; either 950f57e09SChetan Pant * version 2.1 of the License, or (at your option) any later version. 109d935509SArd Biesheuvel */ 119d935509SArd Biesheuvel 1274c21bd0SPeter Maydell #include "qemu/osdep.h" 139d935509SArd Biesheuvel 149d935509SArd Biesheuvel #include "cpu.h" 152ef6175aSRichard Henderson #include "exec/helper-proto.h" 16a04b68e1SRichard Henderson #include "tcg/tcg-gvec-desc.h" 176f2945cdSDaniel P. Berrange #include "crypto/aes.h" 18552d8924SRichard Henderson #include "crypto/aes-round.h" 19c29da5a7SWeiwei Li #include "crypto/sm4.h" 20a04b68e1SRichard Henderson #include "vec_internal.h" 219d935509SArd Biesheuvel 22f1ecb913SArd Biesheuvel union CRYPTO_STATE { 239d935509SArd Biesheuvel uint8_t bytes[16]; 24f1ecb913SArd Biesheuvel uint32_t words[4]; 259d935509SArd Biesheuvel uint64_t l[2]; 269d935509SArd Biesheuvel }; 279d935509SArd Biesheuvel 28e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN 29afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) 30afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) 31b449ca3cSArd Biesheuvel #else 32afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i) ((state).bytes[i]) 33afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i) ((state).words[i]) 34b449ca3cSArd Biesheuvel #endif 35b449ca3cSArd Biesheuvel 36aaffebd6SRichard Henderson /* 37aaffebd6SRichard Henderson * The caller has not been converted to full gvec, and so only 38aaffebd6SRichard Henderson * modifies the low 16 bytes of the vector register. 39aaffebd6SRichard Henderson */ 40aaffebd6SRichard Henderson static void clear_tail_16(void *vd, uint32_t desc) 41aaffebd6SRichard Henderson { 42aaffebd6SRichard Henderson int opr_sz = simd_oprsz(desc); 43aaffebd6SRichard Henderson int max_sz = simd_maxsz(desc); 44aaffebd6SRichard Henderson 45aaffebd6SRichard Henderson assert(opr_sz == 16); 46aaffebd6SRichard Henderson clear_tail(vd, opr_sz, max_sz); 47aaffebd6SRichard Henderson } 48aaffebd6SRichard Henderson 49552d8924SRichard Henderson static const AESState aes_zero = { }; 50552d8924SRichard Henderson 51a04b68e1SRichard Henderson void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 52a04b68e1SRichard Henderson { 53a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 54a04b68e1SRichard Henderson 55a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 56552d8924SRichard Henderson AESState *ad = (AESState *)(vd + i); 57552d8924SRichard Henderson AESState *st = (AESState *)(vn + i); 58552d8924SRichard Henderson AESState *rk = (AESState *)(vm + i); 59552d8924SRichard Henderson AESState t; 60552d8924SRichard Henderson 61552d8924SRichard Henderson /* 62552d8924SRichard Henderson * Our uint64_t are in the wrong order for big-endian. 63552d8924SRichard Henderson * The Arm AddRoundKey comes first, while the API AddRoundKey 64552d8924SRichard Henderson * comes last: perform the xor here, and provide zero to API. 65552d8924SRichard Henderson */ 66552d8924SRichard Henderson if (HOST_BIG_ENDIAN) { 67552d8924SRichard Henderson t.d[0] = st->d[1] ^ rk->d[1]; 68552d8924SRichard Henderson t.d[1] = st->d[0] ^ rk->d[0]; 69552d8924SRichard Henderson aesenc_SB_SR_AK(&t, &t, &aes_zero, false); 70552d8924SRichard Henderson ad->d[0] = t.d[1]; 71552d8924SRichard Henderson ad->d[1] = t.d[0]; 72552d8924SRichard Henderson } else { 73552d8924SRichard Henderson t.v = st->v ^ rk->v; 74552d8924SRichard Henderson aesenc_SB_SR_AK(ad, &t, &aes_zero, false); 75552d8924SRichard Henderson } 76a04b68e1SRichard Henderson } 77a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 78a04b68e1SRichard Henderson } 79a04b68e1SRichard Henderson 800f23908cSRichard Henderson void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc) 810f23908cSRichard Henderson { 820f23908cSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 830f23908cSRichard Henderson 840f23908cSRichard Henderson for (i = 0; i < opr_sz; i += 16) { 85*2a8b545fSRichard Henderson AESState *ad = (AESState *)(vd + i); 86*2a8b545fSRichard Henderson AESState *st = (AESState *)(vn + i); 87*2a8b545fSRichard Henderson AESState *rk = (AESState *)(vm + i); 88*2a8b545fSRichard Henderson AESState t; 89*2a8b545fSRichard Henderson 90*2a8b545fSRichard Henderson /* Our uint64_t are in the wrong order for big-endian. */ 91*2a8b545fSRichard Henderson if (HOST_BIG_ENDIAN) { 92*2a8b545fSRichard Henderson t.d[0] = st->d[1] ^ rk->d[1]; 93*2a8b545fSRichard Henderson t.d[1] = st->d[0] ^ rk->d[0]; 94*2a8b545fSRichard Henderson aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false); 95*2a8b545fSRichard Henderson ad->d[0] = t.d[1]; 96*2a8b545fSRichard Henderson ad->d[1] = t.d[0]; 97*2a8b545fSRichard Henderson } else { 98*2a8b545fSRichard Henderson t.v = st->v ^ rk->v; 99*2a8b545fSRichard Henderson aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false); 100*2a8b545fSRichard Henderson } 1010f23908cSRichard Henderson } 1020f23908cSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 1030f23908cSRichard Henderson } 1040f23908cSRichard Henderson 1050f23908cSRichard Henderson static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, const uint32_t *mc) 1069d935509SArd Biesheuvel { 1071a66ac61SRichard Henderson union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 1089d935509SArd Biesheuvel int i; 1099d935509SArd Biesheuvel 1109d935509SArd Biesheuvel for (i = 0; i < 16; i += 4) { 111b449ca3cSArd Biesheuvel CR_ST_WORD(st, i >> 2) = 112fb250c59SRichard Henderson mc[CR_ST_BYTE(st, i)] ^ 113fb250c59SRichard Henderson rol32(mc[CR_ST_BYTE(st, i + 1)], 8) ^ 114fb250c59SRichard Henderson rol32(mc[CR_ST_BYTE(st, i + 2)], 16) ^ 115fb250c59SRichard Henderson rol32(mc[CR_ST_BYTE(st, i + 3)], 24); 1169d935509SArd Biesheuvel } 1179d935509SArd Biesheuvel 1181a66ac61SRichard Henderson rd[0] = st.l[0]; 1191a66ac61SRichard Henderson rd[1] = st.l[1]; 1209d935509SArd Biesheuvel } 121f1ecb913SArd Biesheuvel 122a04b68e1SRichard Henderson void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 123a04b68e1SRichard Henderson { 124a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 125a04b68e1SRichard Henderson 126a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 1270f23908cSRichard Henderson do_crypto_aesmc(vd + i, vm + i, AES_mc_rot); 1280f23908cSRichard Henderson } 1290f23908cSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 1300f23908cSRichard Henderson } 1310f23908cSRichard Henderson 1320f23908cSRichard Henderson void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc) 1330f23908cSRichard Henderson { 1340f23908cSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 1350f23908cSRichard Henderson 1360f23908cSRichard Henderson for (i = 0; i < opr_sz; i += 16) { 1370f23908cSRichard Henderson do_crypto_aesmc(vd + i, vm + i, AES_imc_rot); 138a04b68e1SRichard Henderson } 139a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 140a04b68e1SRichard Henderson } 141a04b68e1SRichard Henderson 142f1ecb913SArd Biesheuvel /* 143f1ecb913SArd Biesheuvel * SHA-1 logical functions 144f1ecb913SArd Biesheuvel */ 145f1ecb913SArd Biesheuvel 146f1ecb913SArd Biesheuvel static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 147f1ecb913SArd Biesheuvel { 148f1ecb913SArd Biesheuvel return (x & (y ^ z)) ^ z; 149f1ecb913SArd Biesheuvel } 150f1ecb913SArd Biesheuvel 151f1ecb913SArd Biesheuvel static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 152f1ecb913SArd Biesheuvel { 153f1ecb913SArd Biesheuvel return x ^ y ^ z; 154f1ecb913SArd Biesheuvel } 155f1ecb913SArd Biesheuvel 156f1ecb913SArd Biesheuvel static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 157f1ecb913SArd Biesheuvel { 158f1ecb913SArd Biesheuvel return (x & y) | ((x | y) & z); 159f1ecb913SArd Biesheuvel } 160f1ecb913SArd Biesheuvel 161afc8b7d3SRichard Henderson void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) 162f1ecb913SArd Biesheuvel { 163afc8b7d3SRichard Henderson uint64_t *d = vd, *n = vn, *m = vm; 164afc8b7d3SRichard Henderson uint64_t d0, d1; 165afc8b7d3SRichard Henderson 166afc8b7d3SRichard Henderson d0 = d[1] ^ d[0] ^ m[0]; 167afc8b7d3SRichard Henderson d1 = n[0] ^ d[1] ^ m[1]; 168afc8b7d3SRichard Henderson d[0] = d0; 169afc8b7d3SRichard Henderson d[1] = d1; 170afc8b7d3SRichard Henderson 171afc8b7d3SRichard Henderson clear_tail_16(vd, desc); 172afc8b7d3SRichard Henderson } 173afc8b7d3SRichard Henderson 174afc8b7d3SRichard Henderson static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, 175afc8b7d3SRichard Henderson uint64_t *rm, uint32_t desc, 176afc8b7d3SRichard Henderson uint32_t (*fn)(union CRYPTO_STATE *d)) 177afc8b7d3SRichard Henderson { 1781a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 1791a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 1801a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 181f1ecb913SArd Biesheuvel int i; 182f1ecb913SArd Biesheuvel 183f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 184afc8b7d3SRichard Henderson uint32_t t = fn(&d); 185f1ecb913SArd Biesheuvel 186b449ca3cSArd Biesheuvel t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 187b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 188f1ecb913SArd Biesheuvel 189b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 190b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 191b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 192b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 193b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 194f1ecb913SArd Biesheuvel } 1951a66ac61SRichard Henderson rd[0] = d.l[0]; 1961a66ac61SRichard Henderson rd[1] = d.l[1]; 197afc8b7d3SRichard Henderson 198afc8b7d3SRichard Henderson clear_tail_16(rd, desc); 199afc8b7d3SRichard Henderson } 200afc8b7d3SRichard Henderson 201afc8b7d3SRichard Henderson static uint32_t do_sha1c(union CRYPTO_STATE *d) 202afc8b7d3SRichard Henderson { 203afc8b7d3SRichard Henderson return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 204afc8b7d3SRichard Henderson } 205afc8b7d3SRichard Henderson 206afc8b7d3SRichard Henderson void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) 207afc8b7d3SRichard Henderson { 208afc8b7d3SRichard Henderson crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); 209afc8b7d3SRichard Henderson } 210afc8b7d3SRichard Henderson 211afc8b7d3SRichard Henderson static uint32_t do_sha1p(union CRYPTO_STATE *d) 212afc8b7d3SRichard Henderson { 213afc8b7d3SRichard Henderson return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 214afc8b7d3SRichard Henderson } 215afc8b7d3SRichard Henderson 216afc8b7d3SRichard Henderson void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) 217afc8b7d3SRichard Henderson { 218afc8b7d3SRichard Henderson crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); 219afc8b7d3SRichard Henderson } 220afc8b7d3SRichard Henderson 221afc8b7d3SRichard Henderson static uint32_t do_sha1m(union CRYPTO_STATE *d) 222afc8b7d3SRichard Henderson { 223afc8b7d3SRichard Henderson return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 224afc8b7d3SRichard Henderson } 225afc8b7d3SRichard Henderson 226afc8b7d3SRichard Henderson void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) 227afc8b7d3SRichard Henderson { 228afc8b7d3SRichard Henderson crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); 229f1ecb913SArd Biesheuvel } 230f1ecb913SArd Biesheuvel 231effa992fSRichard Henderson void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 232f1ecb913SArd Biesheuvel { 2331a66ac61SRichard Henderson uint64_t *rd = vd; 2341a66ac61SRichard Henderson uint64_t *rm = vm; 2351a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 236f1ecb913SArd Biesheuvel 237b449ca3cSArd Biesheuvel CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 238b449ca3cSArd Biesheuvel CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 239f1ecb913SArd Biesheuvel 2401a66ac61SRichard Henderson rd[0] = m.l[0]; 2411a66ac61SRichard Henderson rd[1] = m.l[1]; 242effa992fSRichard Henderson 243effa992fSRichard Henderson clear_tail_16(vd, desc); 244f1ecb913SArd Biesheuvel } 245f1ecb913SArd Biesheuvel 246effa992fSRichard Henderson void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 247f1ecb913SArd Biesheuvel { 2481a66ac61SRichard Henderson uint64_t *rd = vd; 2491a66ac61SRichard Henderson uint64_t *rm = vm; 2501a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 2511a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 252f1ecb913SArd Biesheuvel 253b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 254b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 255b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 256b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 257f1ecb913SArd Biesheuvel 2581a66ac61SRichard Henderson rd[0] = d.l[0]; 2591a66ac61SRichard Henderson rd[1] = d.l[1]; 260effa992fSRichard Henderson 261effa992fSRichard Henderson clear_tail_16(vd, desc); 262f1ecb913SArd Biesheuvel } 263f1ecb913SArd Biesheuvel 264f1ecb913SArd Biesheuvel /* 265f1ecb913SArd Biesheuvel * The SHA-256 logical functions, according to 266f1ecb913SArd Biesheuvel * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 267f1ecb913SArd Biesheuvel */ 268f1ecb913SArd Biesheuvel 269f1ecb913SArd Biesheuvel static uint32_t S0(uint32_t x) 270f1ecb913SArd Biesheuvel { 271f1ecb913SArd Biesheuvel return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 272f1ecb913SArd Biesheuvel } 273f1ecb913SArd Biesheuvel 274f1ecb913SArd Biesheuvel static uint32_t S1(uint32_t x) 275f1ecb913SArd Biesheuvel { 276f1ecb913SArd Biesheuvel return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 277f1ecb913SArd Biesheuvel } 278f1ecb913SArd Biesheuvel 279f1ecb913SArd Biesheuvel static uint32_t s0(uint32_t x) 280f1ecb913SArd Biesheuvel { 281f1ecb913SArd Biesheuvel return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 282f1ecb913SArd Biesheuvel } 283f1ecb913SArd Biesheuvel 284f1ecb913SArd Biesheuvel static uint32_t s1(uint32_t x) 285f1ecb913SArd Biesheuvel { 286f1ecb913SArd Biesheuvel return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 287f1ecb913SArd Biesheuvel } 288f1ecb913SArd Biesheuvel 289effa992fSRichard Henderson void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 290f1ecb913SArd Biesheuvel { 2911a66ac61SRichard Henderson uint64_t *rd = vd; 2921a66ac61SRichard Henderson uint64_t *rn = vn; 2931a66ac61SRichard Henderson uint64_t *rm = vm; 2941a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 2951a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 2961a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 297f1ecb913SArd Biesheuvel int i; 298f1ecb913SArd Biesheuvel 299f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 300b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 301b449ca3cSArd Biesheuvel + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 302b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 303f1ecb913SArd Biesheuvel 304b449ca3cSArd Biesheuvel CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 305b449ca3cSArd Biesheuvel CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 306b449ca3cSArd Biesheuvel CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 307b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 308f1ecb913SArd Biesheuvel 309b449ca3cSArd Biesheuvel t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 310b449ca3cSArd Biesheuvel + S0(CR_ST_WORD(d, 0)); 311f1ecb913SArd Biesheuvel 312b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 313b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 314b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 315b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 316f1ecb913SArd Biesheuvel } 317f1ecb913SArd Biesheuvel 3181a66ac61SRichard Henderson rd[0] = d.l[0]; 3191a66ac61SRichard Henderson rd[1] = d.l[1]; 320effa992fSRichard Henderson 321effa992fSRichard Henderson clear_tail_16(vd, desc); 322f1ecb913SArd Biesheuvel } 323f1ecb913SArd Biesheuvel 324effa992fSRichard Henderson void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 325f1ecb913SArd Biesheuvel { 3261a66ac61SRichard Henderson uint64_t *rd = vd; 3271a66ac61SRichard Henderson uint64_t *rn = vn; 3281a66ac61SRichard Henderson uint64_t *rm = vm; 3291a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3301a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3311a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 332f1ecb913SArd Biesheuvel int i; 333f1ecb913SArd Biesheuvel 334f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 335b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 336b449ca3cSArd Biesheuvel + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 337b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 338f1ecb913SArd Biesheuvel 339b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 340b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 341b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 342b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 343f1ecb913SArd Biesheuvel } 344f1ecb913SArd Biesheuvel 3451a66ac61SRichard Henderson rd[0] = d.l[0]; 3461a66ac61SRichard Henderson rd[1] = d.l[1]; 347effa992fSRichard Henderson 348effa992fSRichard Henderson clear_tail_16(vd, desc); 349f1ecb913SArd Biesheuvel } 350f1ecb913SArd Biesheuvel 351effa992fSRichard Henderson void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 352f1ecb913SArd Biesheuvel { 3531a66ac61SRichard Henderson uint64_t *rd = vd; 3541a66ac61SRichard Henderson uint64_t *rm = vm; 3551a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3561a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 357f1ecb913SArd Biesheuvel 358b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 359b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 360b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 361b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 362f1ecb913SArd Biesheuvel 3631a66ac61SRichard Henderson rd[0] = d.l[0]; 3641a66ac61SRichard Henderson rd[1] = d.l[1]; 365effa992fSRichard Henderson 366effa992fSRichard Henderson clear_tail_16(vd, desc); 367f1ecb913SArd Biesheuvel } 368f1ecb913SArd Biesheuvel 369effa992fSRichard Henderson void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 370f1ecb913SArd Biesheuvel { 3711a66ac61SRichard Henderson uint64_t *rd = vd; 3721a66ac61SRichard Henderson uint64_t *rn = vn; 3731a66ac61SRichard Henderson uint64_t *rm = vm; 3741a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3751a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3761a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 377f1ecb913SArd Biesheuvel 378b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 379b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 380b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 381b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 382f1ecb913SArd Biesheuvel 3831a66ac61SRichard Henderson rd[0] = d.l[0]; 3841a66ac61SRichard Henderson rd[1] = d.l[1]; 385effa992fSRichard Henderson 386effa992fSRichard Henderson clear_tail_16(vd, desc); 387f1ecb913SArd Biesheuvel } 38890b827d1SArd Biesheuvel 38990b827d1SArd Biesheuvel /* 39090b827d1SArd Biesheuvel * The SHA-512 logical functions (same as above but using 64-bit operands) 39190b827d1SArd Biesheuvel */ 39290b827d1SArd Biesheuvel 39390b827d1SArd Biesheuvel static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 39490b827d1SArd Biesheuvel { 39590b827d1SArd Biesheuvel return (x & (y ^ z)) ^ z; 39690b827d1SArd Biesheuvel } 39790b827d1SArd Biesheuvel 39890b827d1SArd Biesheuvel static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 39990b827d1SArd Biesheuvel { 40090b827d1SArd Biesheuvel return (x & y) | ((x | y) & z); 40190b827d1SArd Biesheuvel } 40290b827d1SArd Biesheuvel 40390b827d1SArd Biesheuvel static uint64_t S0_512(uint64_t x) 40490b827d1SArd Biesheuvel { 40590b827d1SArd Biesheuvel return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 40690b827d1SArd Biesheuvel } 40790b827d1SArd Biesheuvel 40890b827d1SArd Biesheuvel static uint64_t S1_512(uint64_t x) 40990b827d1SArd Biesheuvel { 41090b827d1SArd Biesheuvel return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 41190b827d1SArd Biesheuvel } 41290b827d1SArd Biesheuvel 41390b827d1SArd Biesheuvel static uint64_t s0_512(uint64_t x) 41490b827d1SArd Biesheuvel { 41590b827d1SArd Biesheuvel return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 41690b827d1SArd Biesheuvel } 41790b827d1SArd Biesheuvel 41890b827d1SArd Biesheuvel static uint64_t s1_512(uint64_t x) 41990b827d1SArd Biesheuvel { 42090b827d1SArd Biesheuvel return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 42190b827d1SArd Biesheuvel } 42290b827d1SArd Biesheuvel 423aaffebd6SRichard Henderson void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 42490b827d1SArd Biesheuvel { 42590b827d1SArd Biesheuvel uint64_t *rd = vd; 42690b827d1SArd Biesheuvel uint64_t *rn = vn; 42790b827d1SArd Biesheuvel uint64_t *rm = vm; 42890b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 42990b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 43090b827d1SArd Biesheuvel 43190b827d1SArd Biesheuvel d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 43290b827d1SArd Biesheuvel d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 43390b827d1SArd Biesheuvel 43490b827d1SArd Biesheuvel rd[0] = d0; 43590b827d1SArd Biesheuvel rd[1] = d1; 436aaffebd6SRichard Henderson 437aaffebd6SRichard Henderson clear_tail_16(vd, desc); 43890b827d1SArd Biesheuvel } 43990b827d1SArd Biesheuvel 440aaffebd6SRichard Henderson void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 44190b827d1SArd Biesheuvel { 44290b827d1SArd Biesheuvel uint64_t *rd = vd; 44390b827d1SArd Biesheuvel uint64_t *rn = vn; 44490b827d1SArd Biesheuvel uint64_t *rm = vm; 44590b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 44690b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 44790b827d1SArd Biesheuvel 44890b827d1SArd Biesheuvel d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 44990b827d1SArd Biesheuvel d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 45090b827d1SArd Biesheuvel 45190b827d1SArd Biesheuvel rd[0] = d0; 45290b827d1SArd Biesheuvel rd[1] = d1; 453aaffebd6SRichard Henderson 454aaffebd6SRichard Henderson clear_tail_16(vd, desc); 45590b827d1SArd Biesheuvel } 45690b827d1SArd Biesheuvel 457aaffebd6SRichard Henderson void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 45890b827d1SArd Biesheuvel { 45990b827d1SArd Biesheuvel uint64_t *rd = vd; 46090b827d1SArd Biesheuvel uint64_t *rn = vn; 46190b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 46290b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 46390b827d1SArd Biesheuvel 46490b827d1SArd Biesheuvel d0 += s0_512(rd[1]); 46590b827d1SArd Biesheuvel d1 += s0_512(rn[0]); 46690b827d1SArd Biesheuvel 46790b827d1SArd Biesheuvel rd[0] = d0; 46890b827d1SArd Biesheuvel rd[1] = d1; 469aaffebd6SRichard Henderson 470aaffebd6SRichard Henderson clear_tail_16(vd, desc); 47190b827d1SArd Biesheuvel } 47290b827d1SArd Biesheuvel 473aaffebd6SRichard Henderson void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 47490b827d1SArd Biesheuvel { 47590b827d1SArd Biesheuvel uint64_t *rd = vd; 47690b827d1SArd Biesheuvel uint64_t *rn = vn; 47790b827d1SArd Biesheuvel uint64_t *rm = vm; 47890b827d1SArd Biesheuvel 47990b827d1SArd Biesheuvel rd[0] += s1_512(rn[0]) + rm[0]; 48090b827d1SArd Biesheuvel rd[1] += s1_512(rn[1]) + rm[1]; 481aaffebd6SRichard Henderson 482aaffebd6SRichard Henderson clear_tail_16(vd, desc); 48390b827d1SArd Biesheuvel } 48480d6f4c6SArd Biesheuvel 485aaffebd6SRichard Henderson void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 48680d6f4c6SArd Biesheuvel { 48780d6f4c6SArd Biesheuvel uint64_t *rd = vd; 48880d6f4c6SArd Biesheuvel uint64_t *rn = vn; 48980d6f4c6SArd Biesheuvel uint64_t *rm = vm; 49080d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 49180d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 49280d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 49380d6f4c6SArd Biesheuvel uint32_t t; 49480d6f4c6SArd Biesheuvel 49580d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 49680d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 49780d6f4c6SArd Biesheuvel 49880d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 49980d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 50080d6f4c6SArd Biesheuvel 50180d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 50280d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 50380d6f4c6SArd Biesheuvel 50480d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 50580d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 50680d6f4c6SArd Biesheuvel 50780d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 50880d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 509aaffebd6SRichard Henderson 510aaffebd6SRichard Henderson clear_tail_16(vd, desc); 51180d6f4c6SArd Biesheuvel } 51280d6f4c6SArd Biesheuvel 513aaffebd6SRichard Henderson void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 51480d6f4c6SArd Biesheuvel { 51580d6f4c6SArd Biesheuvel uint64_t *rd = vd; 51680d6f4c6SArd Biesheuvel uint64_t *rn = vn; 51780d6f4c6SArd Biesheuvel uint64_t *rm = vm; 51880d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 51980d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 52080d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 52180d6f4c6SArd Biesheuvel uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 52280d6f4c6SArd Biesheuvel 52380d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) ^= t; 52480d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 52580d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 52680d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 52780d6f4c6SArd Biesheuvel ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 52880d6f4c6SArd Biesheuvel 52980d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 53080d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 531aaffebd6SRichard Henderson 532aaffebd6SRichard Henderson clear_tail_16(vd, desc); 53380d6f4c6SArd Biesheuvel } 53480d6f4c6SArd Biesheuvel 53543fa36c9SRichard Henderson static inline void QEMU_ALWAYS_INLINE 53643fa36c9SRichard Henderson crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, 53743fa36c9SRichard Henderson uint32_t desc, uint32_t opcode) 53880d6f4c6SArd Biesheuvel { 53980d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 54080d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 54180d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 54243fa36c9SRichard Henderson uint32_t imm2 = simd_data(desc); 54380d6f4c6SArd Biesheuvel uint32_t t; 54480d6f4c6SArd Biesheuvel 54580d6f4c6SArd Biesheuvel assert(imm2 < 4); 54680d6f4c6SArd Biesheuvel 54780d6f4c6SArd Biesheuvel if (opcode == 0 || opcode == 2) { 54880d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT2A */ 54980d6f4c6SArd Biesheuvel t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 55080d6f4c6SArd Biesheuvel } else if (opcode == 1) { 55180d6f4c6SArd Biesheuvel /* SM3TT1B */ 55280d6f4c6SArd Biesheuvel t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 55380d6f4c6SArd Biesheuvel } else if (opcode == 3) { 55480d6f4c6SArd Biesheuvel /* SM3TT2B */ 55580d6f4c6SArd Biesheuvel t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 55680d6f4c6SArd Biesheuvel } else { 55743fa36c9SRichard Henderson qemu_build_not_reached(); 55880d6f4c6SArd Biesheuvel } 55980d6f4c6SArd Biesheuvel 56080d6f4c6SArd Biesheuvel t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 56180d6f4c6SArd Biesheuvel 56280d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 56380d6f4c6SArd Biesheuvel 56480d6f4c6SArd Biesheuvel if (opcode < 2) { 56580d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT1B */ 56680d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 56780d6f4c6SArd Biesheuvel 56880d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 56980d6f4c6SArd Biesheuvel } else { 57080d6f4c6SArd Biesheuvel /* SM3TT2A, SM3TT2B */ 57180d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3); 57280d6f4c6SArd Biesheuvel t ^= rol32(t, 9) ^ rol32(t, 17); 57380d6f4c6SArd Biesheuvel 57480d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 57580d6f4c6SArd Biesheuvel } 57680d6f4c6SArd Biesheuvel 57780d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 57880d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t; 57980d6f4c6SArd Biesheuvel 58080d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 58180d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 58243fa36c9SRichard Henderson 58343fa36c9SRichard Henderson clear_tail_16(rd, desc); 58480d6f4c6SArd Biesheuvel } 585b6577bcdSArd Biesheuvel 58643fa36c9SRichard Henderson #define DO_SM3TT(NAME, OPCODE) \ 58743fa36c9SRichard Henderson void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ 58843fa36c9SRichard Henderson { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } 58943fa36c9SRichard Henderson 59043fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1a, 0) 59143fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1b, 1) 59243fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2a, 2) 59343fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2b, 3) 59443fa36c9SRichard Henderson 59543fa36c9SRichard Henderson #undef DO_SM3TT 59643fa36c9SRichard Henderson 597a04b68e1SRichard Henderson static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 598b6577bcdSArd Biesheuvel { 599a04b68e1SRichard Henderson union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 600a04b68e1SRichard Henderson union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 601b6577bcdSArd Biesheuvel uint32_t t, i; 602b6577bcdSArd Biesheuvel 603b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 604b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 605b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 606b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 607b6577bcdSArd Biesheuvel CR_ST_WORD(n, i); 608b6577bcdSArd Biesheuvel 609b6577bcdSArd Biesheuvel t = sm4_sbox[t & 0xff] | 610b6577bcdSArd Biesheuvel sm4_sbox[(t >> 8) & 0xff] << 8 | 611b6577bcdSArd Biesheuvel sm4_sbox[(t >> 16) & 0xff] << 16 | 612b6577bcdSArd Biesheuvel sm4_sbox[(t >> 24) & 0xff] << 24; 613b6577bcdSArd Biesheuvel 614b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 615b6577bcdSArd Biesheuvel rol32(t, 24); 616b6577bcdSArd Biesheuvel } 617b6577bcdSArd Biesheuvel 618b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 619b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 620b6577bcdSArd Biesheuvel } 621b6577bcdSArd Biesheuvel 622a04b68e1SRichard Henderson void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 623b6577bcdSArd Biesheuvel { 624a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 625a04b68e1SRichard Henderson 626a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 627a04b68e1SRichard Henderson do_crypto_sm4e(vd + i, vn + i, vm + i); 628a04b68e1SRichard Henderson } 629a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 630a04b68e1SRichard Henderson } 631a04b68e1SRichard Henderson 632a04b68e1SRichard Henderson static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 633a04b68e1SRichard Henderson { 634b6577bcdSArd Biesheuvel union CRYPTO_STATE d; 635b6577bcdSArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 636b6577bcdSArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 637b6577bcdSArd Biesheuvel uint32_t t, i; 638b6577bcdSArd Biesheuvel 639b6577bcdSArd Biesheuvel d = n; 640b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 641b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 642b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 643b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 644b6577bcdSArd Biesheuvel CR_ST_WORD(m, i); 645b6577bcdSArd Biesheuvel 646b6577bcdSArd Biesheuvel t = sm4_sbox[t & 0xff] | 647b6577bcdSArd Biesheuvel sm4_sbox[(t >> 8) & 0xff] << 8 | 648b6577bcdSArd Biesheuvel sm4_sbox[(t >> 16) & 0xff] << 16 | 649b6577bcdSArd Biesheuvel sm4_sbox[(t >> 24) & 0xff] << 24; 650b6577bcdSArd Biesheuvel 651b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 652b6577bcdSArd Biesheuvel } 653b6577bcdSArd Biesheuvel 654b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 655b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 656b6577bcdSArd Biesheuvel } 657a04b68e1SRichard Henderson 658a04b68e1SRichard Henderson void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 659a04b68e1SRichard Henderson { 660a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 661a04b68e1SRichard Henderson 662a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 663a04b68e1SRichard Henderson do_crypto_sm4ekey(vd + i, vn + i, vm + i); 664a04b68e1SRichard Henderson } 665a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 666a04b68e1SRichard Henderson } 6671738860dSRichard Henderson 6681738860dSRichard Henderson void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 6691738860dSRichard Henderson { 6701738860dSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 6711738860dSRichard Henderson uint64_t *d = vd, *n = vn, *m = vm; 6721738860dSRichard Henderson 6731738860dSRichard Henderson for (i = 0; i < opr_sz / 8; ++i) { 6741738860dSRichard Henderson d[i] = n[i] ^ rol64(m[i], 1); 6751738860dSRichard Henderson } 6761738860dSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 6771738860dSRichard Henderson } 678