19d935509SArd Biesheuvel /* 29d935509SArd Biesheuvel * crypto_helper.c - emulate v8 Crypto Extensions instructions 39d935509SArd Biesheuvel * 490b827d1SArd Biesheuvel * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 59d935509SArd Biesheuvel * 69d935509SArd Biesheuvel * This library is free software; you can redistribute it and/or 79d935509SArd Biesheuvel * modify it under the terms of the GNU Lesser General Public 89d935509SArd Biesheuvel * License as published by the Free Software Foundation; either 950f57e09SChetan Pant * version 2.1 of the License, or (at your option) any later version. 109d935509SArd Biesheuvel */ 119d935509SArd Biesheuvel 1274c21bd0SPeter Maydell #include "qemu/osdep.h" 139d935509SArd Biesheuvel 149d935509SArd Biesheuvel #include "cpu.h" 152ef6175aSRichard Henderson #include "exec/helper-proto.h" 16a04b68e1SRichard Henderson #include "tcg/tcg-gvec-desc.h" 17552d8924SRichard Henderson #include "crypto/aes-round.h" 18c29da5a7SWeiwei Li #include "crypto/sm4.h" 19a04b68e1SRichard Henderson #include "vec_internal.h" 209d935509SArd Biesheuvel 21f1ecb913SArd Biesheuvel union CRYPTO_STATE { 229d935509SArd Biesheuvel uint8_t bytes[16]; 23f1ecb913SArd Biesheuvel uint32_t words[4]; 249d935509SArd Biesheuvel uint64_t l[2]; 259d935509SArd Biesheuvel }; 269d935509SArd Biesheuvel 27e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN 28afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) 29afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) 30b449ca3cSArd Biesheuvel #else 31afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i) ((state).bytes[i]) 32afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i) ((state).words[i]) 33b449ca3cSArd Biesheuvel #endif 34b449ca3cSArd Biesheuvel 35aaffebd6SRichard Henderson /* 36aaffebd6SRichard Henderson * The caller has not been converted to full gvec, and so only 37aaffebd6SRichard Henderson * modifies the low 16 bytes of the vector register. 38aaffebd6SRichard Henderson */ 39aaffebd6SRichard Henderson static void clear_tail_16(void *vd, uint32_t desc) 40aaffebd6SRichard Henderson { 41aaffebd6SRichard Henderson int opr_sz = simd_oprsz(desc); 42aaffebd6SRichard Henderson int max_sz = simd_maxsz(desc); 43aaffebd6SRichard Henderson 44aaffebd6SRichard Henderson assert(opr_sz == 16); 45aaffebd6SRichard Henderson clear_tail(vd, opr_sz, max_sz); 46aaffebd6SRichard Henderson } 47aaffebd6SRichard Henderson 48552d8924SRichard Henderson static const AESState aes_zero = { }; 49552d8924SRichard Henderson 50a04b68e1SRichard Henderson void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 51a04b68e1SRichard Henderson { 52a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 53a04b68e1SRichard Henderson 54a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 55552d8924SRichard Henderson AESState *ad = (AESState *)(vd + i); 56552d8924SRichard Henderson AESState *st = (AESState *)(vn + i); 57552d8924SRichard Henderson AESState *rk = (AESState *)(vm + i); 58552d8924SRichard Henderson AESState t; 59552d8924SRichard Henderson 60552d8924SRichard Henderson /* 61552d8924SRichard Henderson * Our uint64_t are in the wrong order for big-endian. 62552d8924SRichard Henderson * The Arm AddRoundKey comes first, while the API AddRoundKey 63552d8924SRichard Henderson * comes last: perform the xor here, and provide zero to API. 64552d8924SRichard Henderson */ 65552d8924SRichard Henderson if (HOST_BIG_ENDIAN) { 66552d8924SRichard Henderson t.d[0] = st->d[1] ^ rk->d[1]; 67552d8924SRichard Henderson t.d[1] = st->d[0] ^ rk->d[0]; 68552d8924SRichard Henderson aesenc_SB_SR_AK(&t, &t, &aes_zero, false); 69552d8924SRichard Henderson ad->d[0] = t.d[1]; 70552d8924SRichard Henderson ad->d[1] = t.d[0]; 71552d8924SRichard Henderson } else { 72552d8924SRichard Henderson t.v = st->v ^ rk->v; 73552d8924SRichard Henderson aesenc_SB_SR_AK(ad, &t, &aes_zero, false); 74552d8924SRichard Henderson } 75a04b68e1SRichard Henderson } 76a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 77a04b68e1SRichard Henderson } 78a04b68e1SRichard Henderson 790f23908cSRichard Henderson void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc) 800f23908cSRichard Henderson { 810f23908cSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 820f23908cSRichard Henderson 830f23908cSRichard Henderson for (i = 0; i < opr_sz; i += 16) { 842a8b545fSRichard Henderson AESState *ad = (AESState *)(vd + i); 852a8b545fSRichard Henderson AESState *st = (AESState *)(vn + i); 862a8b545fSRichard Henderson AESState *rk = (AESState *)(vm + i); 872a8b545fSRichard Henderson AESState t; 882a8b545fSRichard Henderson 892a8b545fSRichard Henderson /* Our uint64_t are in the wrong order for big-endian. */ 902a8b545fSRichard Henderson if (HOST_BIG_ENDIAN) { 912a8b545fSRichard Henderson t.d[0] = st->d[1] ^ rk->d[1]; 922a8b545fSRichard Henderson t.d[1] = st->d[0] ^ rk->d[0]; 932a8b545fSRichard Henderson aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false); 942a8b545fSRichard Henderson ad->d[0] = t.d[1]; 952a8b545fSRichard Henderson ad->d[1] = t.d[0]; 962a8b545fSRichard Henderson } else { 972a8b545fSRichard Henderson t.v = st->v ^ rk->v; 982a8b545fSRichard Henderson aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false); 992a8b545fSRichard Henderson } 1000f23908cSRichard Henderson } 1010f23908cSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 1020f23908cSRichard Henderson } 1030f23908cSRichard Henderson 104a04b68e1SRichard Henderson void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 105a04b68e1SRichard Henderson { 106a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 107a04b68e1SRichard Henderson 108a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 1098b103ed7SRichard Henderson AESState *ad = (AESState *)(vd + i); 1108b103ed7SRichard Henderson AESState *st = (AESState *)(vm + i); 1118b103ed7SRichard Henderson AESState t; 1128b103ed7SRichard Henderson 1138b103ed7SRichard Henderson /* Our uint64_t are in the wrong order for big-endian. */ 1148b103ed7SRichard Henderson if (HOST_BIG_ENDIAN) { 1158b103ed7SRichard Henderson t.d[0] = st->d[1]; 1168b103ed7SRichard Henderson t.d[1] = st->d[0]; 1178b103ed7SRichard Henderson aesenc_MC(&t, &t, false); 1188b103ed7SRichard Henderson ad->d[0] = t.d[1]; 1198b103ed7SRichard Henderson ad->d[1] = t.d[0]; 1208b103ed7SRichard Henderson } else { 1218b103ed7SRichard Henderson aesenc_MC(ad, st, false); 1228b103ed7SRichard Henderson } 1230f23908cSRichard Henderson } 1240f23908cSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 1250f23908cSRichard Henderson } 1260f23908cSRichard Henderson 1270f23908cSRichard Henderson void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc) 1280f23908cSRichard Henderson { 1290f23908cSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 1300f23908cSRichard Henderson 1310f23908cSRichard Henderson for (i = 0; i < opr_sz; i += 16) { 132bdb01515SRichard Henderson AESState *ad = (AESState *)(vd + i); 133bdb01515SRichard Henderson AESState *st = (AESState *)(vm + i); 134bdb01515SRichard Henderson AESState t; 135bdb01515SRichard Henderson 136bdb01515SRichard Henderson /* Our uint64_t are in the wrong order for big-endian. */ 137bdb01515SRichard Henderson if (HOST_BIG_ENDIAN) { 138bdb01515SRichard Henderson t.d[0] = st->d[1]; 139bdb01515SRichard Henderson t.d[1] = st->d[0]; 140bdb01515SRichard Henderson aesdec_IMC(&t, &t, false); 141bdb01515SRichard Henderson ad->d[0] = t.d[1]; 142bdb01515SRichard Henderson ad->d[1] = t.d[0]; 143bdb01515SRichard Henderson } else { 144bdb01515SRichard Henderson aesdec_IMC(ad, st, false); 145bdb01515SRichard Henderson } 146a04b68e1SRichard Henderson } 147a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 148a04b68e1SRichard Henderson } 149a04b68e1SRichard Henderson 150f1ecb913SArd Biesheuvel /* 151f1ecb913SArd Biesheuvel * SHA-1 logical functions 152f1ecb913SArd Biesheuvel */ 153f1ecb913SArd Biesheuvel 154f1ecb913SArd Biesheuvel static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 155f1ecb913SArd Biesheuvel { 156f1ecb913SArd Biesheuvel return (x & (y ^ z)) ^ z; 157f1ecb913SArd Biesheuvel } 158f1ecb913SArd Biesheuvel 159f1ecb913SArd Biesheuvel static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 160f1ecb913SArd Biesheuvel { 161f1ecb913SArd Biesheuvel return x ^ y ^ z; 162f1ecb913SArd Biesheuvel } 163f1ecb913SArd Biesheuvel 164f1ecb913SArd Biesheuvel static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 165f1ecb913SArd Biesheuvel { 166f1ecb913SArd Biesheuvel return (x & y) | ((x | y) & z); 167f1ecb913SArd Biesheuvel } 168f1ecb913SArd Biesheuvel 169afc8b7d3SRichard Henderson void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) 170f1ecb913SArd Biesheuvel { 171afc8b7d3SRichard Henderson uint64_t *d = vd, *n = vn, *m = vm; 172afc8b7d3SRichard Henderson uint64_t d0, d1; 173afc8b7d3SRichard Henderson 174afc8b7d3SRichard Henderson d0 = d[1] ^ d[0] ^ m[0]; 175afc8b7d3SRichard Henderson d1 = n[0] ^ d[1] ^ m[1]; 176afc8b7d3SRichard Henderson d[0] = d0; 177afc8b7d3SRichard Henderson d[1] = d1; 178afc8b7d3SRichard Henderson 179afc8b7d3SRichard Henderson clear_tail_16(vd, desc); 180afc8b7d3SRichard Henderson } 181afc8b7d3SRichard Henderson 182afc8b7d3SRichard Henderson static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, 183afc8b7d3SRichard Henderson uint64_t *rm, uint32_t desc, 184afc8b7d3SRichard Henderson uint32_t (*fn)(union CRYPTO_STATE *d)) 185afc8b7d3SRichard Henderson { 1861a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 1871a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 1881a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 189f1ecb913SArd Biesheuvel int i; 190f1ecb913SArd Biesheuvel 191f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 192afc8b7d3SRichard Henderson uint32_t t = fn(&d); 193f1ecb913SArd Biesheuvel 194b449ca3cSArd Biesheuvel t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 195b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 196f1ecb913SArd Biesheuvel 197b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 198b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 199b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 200b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 201b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 202f1ecb913SArd Biesheuvel } 2031a66ac61SRichard Henderson rd[0] = d.l[0]; 2041a66ac61SRichard Henderson rd[1] = d.l[1]; 205afc8b7d3SRichard Henderson 206afc8b7d3SRichard Henderson clear_tail_16(rd, desc); 207afc8b7d3SRichard Henderson } 208afc8b7d3SRichard Henderson 209afc8b7d3SRichard Henderson static uint32_t do_sha1c(union CRYPTO_STATE *d) 210afc8b7d3SRichard Henderson { 211afc8b7d3SRichard Henderson return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 212afc8b7d3SRichard Henderson } 213afc8b7d3SRichard Henderson 214afc8b7d3SRichard Henderson void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) 215afc8b7d3SRichard Henderson { 216afc8b7d3SRichard Henderson crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); 217afc8b7d3SRichard Henderson } 218afc8b7d3SRichard Henderson 219afc8b7d3SRichard Henderson static uint32_t do_sha1p(union CRYPTO_STATE *d) 220afc8b7d3SRichard Henderson { 221afc8b7d3SRichard Henderson return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 222afc8b7d3SRichard Henderson } 223afc8b7d3SRichard Henderson 224afc8b7d3SRichard Henderson void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) 225afc8b7d3SRichard Henderson { 226afc8b7d3SRichard Henderson crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); 227afc8b7d3SRichard Henderson } 228afc8b7d3SRichard Henderson 229afc8b7d3SRichard Henderson static uint32_t do_sha1m(union CRYPTO_STATE *d) 230afc8b7d3SRichard Henderson { 231afc8b7d3SRichard Henderson return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 232afc8b7d3SRichard Henderson } 233afc8b7d3SRichard Henderson 234afc8b7d3SRichard Henderson void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) 235afc8b7d3SRichard Henderson { 236afc8b7d3SRichard Henderson crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); 237f1ecb913SArd Biesheuvel } 238f1ecb913SArd Biesheuvel 239effa992fSRichard Henderson void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 240f1ecb913SArd Biesheuvel { 2411a66ac61SRichard Henderson uint64_t *rd = vd; 2421a66ac61SRichard Henderson uint64_t *rm = vm; 2431a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 244f1ecb913SArd Biesheuvel 245b449ca3cSArd Biesheuvel CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 246b449ca3cSArd Biesheuvel CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 247f1ecb913SArd Biesheuvel 2481a66ac61SRichard Henderson rd[0] = m.l[0]; 2491a66ac61SRichard Henderson rd[1] = m.l[1]; 250effa992fSRichard Henderson 251effa992fSRichard Henderson clear_tail_16(vd, desc); 252f1ecb913SArd Biesheuvel } 253f1ecb913SArd Biesheuvel 254effa992fSRichard Henderson void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 255f1ecb913SArd Biesheuvel { 2561a66ac61SRichard Henderson uint64_t *rd = vd; 2571a66ac61SRichard Henderson uint64_t *rm = vm; 2581a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 2591a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 260f1ecb913SArd Biesheuvel 261b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 262b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 263b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 264b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 265f1ecb913SArd Biesheuvel 2661a66ac61SRichard Henderson rd[0] = d.l[0]; 2671a66ac61SRichard Henderson rd[1] = d.l[1]; 268effa992fSRichard Henderson 269effa992fSRichard Henderson clear_tail_16(vd, desc); 270f1ecb913SArd Biesheuvel } 271f1ecb913SArd Biesheuvel 272f1ecb913SArd Biesheuvel /* 273f1ecb913SArd Biesheuvel * The SHA-256 logical functions, according to 274f1ecb913SArd Biesheuvel * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 275f1ecb913SArd Biesheuvel */ 276f1ecb913SArd Biesheuvel 277f1ecb913SArd Biesheuvel static uint32_t S0(uint32_t x) 278f1ecb913SArd Biesheuvel { 279f1ecb913SArd Biesheuvel return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 280f1ecb913SArd Biesheuvel } 281f1ecb913SArd Biesheuvel 282f1ecb913SArd Biesheuvel static uint32_t S1(uint32_t x) 283f1ecb913SArd Biesheuvel { 284f1ecb913SArd Biesheuvel return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 285f1ecb913SArd Biesheuvel } 286f1ecb913SArd Biesheuvel 287f1ecb913SArd Biesheuvel static uint32_t s0(uint32_t x) 288f1ecb913SArd Biesheuvel { 289f1ecb913SArd Biesheuvel return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 290f1ecb913SArd Biesheuvel } 291f1ecb913SArd Biesheuvel 292f1ecb913SArd Biesheuvel static uint32_t s1(uint32_t x) 293f1ecb913SArd Biesheuvel { 294f1ecb913SArd Biesheuvel return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 295f1ecb913SArd Biesheuvel } 296f1ecb913SArd Biesheuvel 297effa992fSRichard Henderson void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 298f1ecb913SArd Biesheuvel { 2991a66ac61SRichard Henderson uint64_t *rd = vd; 3001a66ac61SRichard Henderson uint64_t *rn = vn; 3011a66ac61SRichard Henderson uint64_t *rm = vm; 3021a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3031a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3041a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 305f1ecb913SArd Biesheuvel int i; 306f1ecb913SArd Biesheuvel 307f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 308b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 309b449ca3cSArd Biesheuvel + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 310b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 311f1ecb913SArd Biesheuvel 312b449ca3cSArd Biesheuvel CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 313b449ca3cSArd Biesheuvel CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 314b449ca3cSArd Biesheuvel CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 315b449ca3cSArd Biesheuvel CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 316f1ecb913SArd Biesheuvel 317b449ca3cSArd Biesheuvel t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 318b449ca3cSArd Biesheuvel + S0(CR_ST_WORD(d, 0)); 319f1ecb913SArd Biesheuvel 320b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 321b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 322b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 323b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = t; 324f1ecb913SArd Biesheuvel } 325f1ecb913SArd Biesheuvel 3261a66ac61SRichard Henderson rd[0] = d.l[0]; 3271a66ac61SRichard Henderson rd[1] = d.l[1]; 328effa992fSRichard Henderson 329effa992fSRichard Henderson clear_tail_16(vd, desc); 330f1ecb913SArd Biesheuvel } 331f1ecb913SArd Biesheuvel 332effa992fSRichard Henderson void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 333f1ecb913SArd Biesheuvel { 3341a66ac61SRichard Henderson uint64_t *rd = vd; 3351a66ac61SRichard Henderson uint64_t *rn = vn; 3361a66ac61SRichard Henderson uint64_t *rm = vm; 3371a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3381a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3391a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 340f1ecb913SArd Biesheuvel int i; 341f1ecb913SArd Biesheuvel 342f1ecb913SArd Biesheuvel for (i = 0; i < 4; i++) { 343b449ca3cSArd Biesheuvel uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 344b449ca3cSArd Biesheuvel + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 345b449ca3cSArd Biesheuvel + CR_ST_WORD(m, i); 346f1ecb913SArd Biesheuvel 347b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 348b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 349b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 350b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 351f1ecb913SArd Biesheuvel } 352f1ecb913SArd Biesheuvel 3531a66ac61SRichard Henderson rd[0] = d.l[0]; 3541a66ac61SRichard Henderson rd[1] = d.l[1]; 355effa992fSRichard Henderson 356effa992fSRichard Henderson clear_tail_16(vd, desc); 357f1ecb913SArd Biesheuvel } 358f1ecb913SArd Biesheuvel 359effa992fSRichard Henderson void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 360f1ecb913SArd Biesheuvel { 3611a66ac61SRichard Henderson uint64_t *rd = vd; 3621a66ac61SRichard Henderson uint64_t *rm = vm; 3631a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3641a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 365f1ecb913SArd Biesheuvel 366b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 367b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 368b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 369b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 370f1ecb913SArd Biesheuvel 3711a66ac61SRichard Henderson rd[0] = d.l[0]; 3721a66ac61SRichard Henderson rd[1] = d.l[1]; 373effa992fSRichard Henderson 374effa992fSRichard Henderson clear_tail_16(vd, desc); 375f1ecb913SArd Biesheuvel } 376f1ecb913SArd Biesheuvel 377effa992fSRichard Henderson void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 378f1ecb913SArd Biesheuvel { 3791a66ac61SRichard Henderson uint64_t *rd = vd; 3801a66ac61SRichard Henderson uint64_t *rn = vn; 3811a66ac61SRichard Henderson uint64_t *rm = vm; 3821a66ac61SRichard Henderson union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 3831a66ac61SRichard Henderson union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 3841a66ac61SRichard Henderson union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 385f1ecb913SArd Biesheuvel 386b449ca3cSArd Biesheuvel CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 387b449ca3cSArd Biesheuvel CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 388b449ca3cSArd Biesheuvel CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 389b449ca3cSArd Biesheuvel CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 390f1ecb913SArd Biesheuvel 3911a66ac61SRichard Henderson rd[0] = d.l[0]; 3921a66ac61SRichard Henderson rd[1] = d.l[1]; 393effa992fSRichard Henderson 394effa992fSRichard Henderson clear_tail_16(vd, desc); 395f1ecb913SArd Biesheuvel } 39690b827d1SArd Biesheuvel 39790b827d1SArd Biesheuvel /* 39890b827d1SArd Biesheuvel * The SHA-512 logical functions (same as above but using 64-bit operands) 39990b827d1SArd Biesheuvel */ 40090b827d1SArd Biesheuvel 40190b827d1SArd Biesheuvel static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 40290b827d1SArd Biesheuvel { 40390b827d1SArd Biesheuvel return (x & (y ^ z)) ^ z; 40490b827d1SArd Biesheuvel } 40590b827d1SArd Biesheuvel 40690b827d1SArd Biesheuvel static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 40790b827d1SArd Biesheuvel { 40890b827d1SArd Biesheuvel return (x & y) | ((x | y) & z); 40990b827d1SArd Biesheuvel } 41090b827d1SArd Biesheuvel 41190b827d1SArd Biesheuvel static uint64_t S0_512(uint64_t x) 41290b827d1SArd Biesheuvel { 41390b827d1SArd Biesheuvel return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 41490b827d1SArd Biesheuvel } 41590b827d1SArd Biesheuvel 41690b827d1SArd Biesheuvel static uint64_t S1_512(uint64_t x) 41790b827d1SArd Biesheuvel { 41890b827d1SArd Biesheuvel return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 41990b827d1SArd Biesheuvel } 42090b827d1SArd Biesheuvel 42190b827d1SArd Biesheuvel static uint64_t s0_512(uint64_t x) 42290b827d1SArd Biesheuvel { 42390b827d1SArd Biesheuvel return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 42490b827d1SArd Biesheuvel } 42590b827d1SArd Biesheuvel 42690b827d1SArd Biesheuvel static uint64_t s1_512(uint64_t x) 42790b827d1SArd Biesheuvel { 42890b827d1SArd Biesheuvel return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 42990b827d1SArd Biesheuvel } 43090b827d1SArd Biesheuvel 431aaffebd6SRichard Henderson void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 43290b827d1SArd Biesheuvel { 43390b827d1SArd Biesheuvel uint64_t *rd = vd; 43490b827d1SArd Biesheuvel uint64_t *rn = vn; 43590b827d1SArd Biesheuvel uint64_t *rm = vm; 43690b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 43790b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 43890b827d1SArd Biesheuvel 43990b827d1SArd Biesheuvel d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 44090b827d1SArd Biesheuvel d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 44190b827d1SArd Biesheuvel 44290b827d1SArd Biesheuvel rd[0] = d0; 44390b827d1SArd Biesheuvel rd[1] = d1; 444aaffebd6SRichard Henderson 445aaffebd6SRichard Henderson clear_tail_16(vd, desc); 44690b827d1SArd Biesheuvel } 44790b827d1SArd Biesheuvel 448aaffebd6SRichard Henderson void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 44990b827d1SArd Biesheuvel { 45090b827d1SArd Biesheuvel uint64_t *rd = vd; 45190b827d1SArd Biesheuvel uint64_t *rn = vn; 45290b827d1SArd Biesheuvel uint64_t *rm = vm; 45390b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 45490b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 45590b827d1SArd Biesheuvel 45690b827d1SArd Biesheuvel d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 45790b827d1SArd Biesheuvel d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 45890b827d1SArd Biesheuvel 45990b827d1SArd Biesheuvel rd[0] = d0; 46090b827d1SArd Biesheuvel rd[1] = d1; 461aaffebd6SRichard Henderson 462aaffebd6SRichard Henderson clear_tail_16(vd, desc); 46390b827d1SArd Biesheuvel } 46490b827d1SArd Biesheuvel 465aaffebd6SRichard Henderson void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 46690b827d1SArd Biesheuvel { 46790b827d1SArd Biesheuvel uint64_t *rd = vd; 46890b827d1SArd Biesheuvel uint64_t *rn = vn; 46990b827d1SArd Biesheuvel uint64_t d0 = rd[0]; 47090b827d1SArd Biesheuvel uint64_t d1 = rd[1]; 47190b827d1SArd Biesheuvel 47290b827d1SArd Biesheuvel d0 += s0_512(rd[1]); 47390b827d1SArd Biesheuvel d1 += s0_512(rn[0]); 47490b827d1SArd Biesheuvel 47590b827d1SArd Biesheuvel rd[0] = d0; 47690b827d1SArd Biesheuvel rd[1] = d1; 477aaffebd6SRichard Henderson 478aaffebd6SRichard Henderson clear_tail_16(vd, desc); 47990b827d1SArd Biesheuvel } 48090b827d1SArd Biesheuvel 481aaffebd6SRichard Henderson void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 48290b827d1SArd Biesheuvel { 48390b827d1SArd Biesheuvel uint64_t *rd = vd; 48490b827d1SArd Biesheuvel uint64_t *rn = vn; 48590b827d1SArd Biesheuvel uint64_t *rm = vm; 48690b827d1SArd Biesheuvel 48790b827d1SArd Biesheuvel rd[0] += s1_512(rn[0]) + rm[0]; 48890b827d1SArd Biesheuvel rd[1] += s1_512(rn[1]) + rm[1]; 489aaffebd6SRichard Henderson 490aaffebd6SRichard Henderson clear_tail_16(vd, desc); 49190b827d1SArd Biesheuvel } 49280d6f4c6SArd Biesheuvel 493aaffebd6SRichard Henderson void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 49480d6f4c6SArd Biesheuvel { 49580d6f4c6SArd Biesheuvel uint64_t *rd = vd; 49680d6f4c6SArd Biesheuvel uint64_t *rn = vn; 49780d6f4c6SArd Biesheuvel uint64_t *rm = vm; 49880d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 49980d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 50080d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 50180d6f4c6SArd Biesheuvel uint32_t t; 50280d6f4c6SArd Biesheuvel 50380d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 50480d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 50580d6f4c6SArd Biesheuvel 50680d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 50780d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 50880d6f4c6SArd Biesheuvel 50980d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 51080d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 51180d6f4c6SArd Biesheuvel 51280d6f4c6SArd Biesheuvel t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 51380d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 51480d6f4c6SArd Biesheuvel 51580d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 51680d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 517aaffebd6SRichard Henderson 518aaffebd6SRichard Henderson clear_tail_16(vd, desc); 51980d6f4c6SArd Biesheuvel } 52080d6f4c6SArd Biesheuvel 521aaffebd6SRichard Henderson void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 52280d6f4c6SArd Biesheuvel { 52380d6f4c6SArd Biesheuvel uint64_t *rd = vd; 52480d6f4c6SArd Biesheuvel uint64_t *rn = vn; 52580d6f4c6SArd Biesheuvel uint64_t *rm = vm; 52680d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 52780d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 52880d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 52980d6f4c6SArd Biesheuvel uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 53080d6f4c6SArd Biesheuvel 53180d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) ^= t; 53280d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 53380d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 53480d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 53580d6f4c6SArd Biesheuvel ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 53680d6f4c6SArd Biesheuvel 53780d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 53880d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 539aaffebd6SRichard Henderson 540aaffebd6SRichard Henderson clear_tail_16(vd, desc); 54180d6f4c6SArd Biesheuvel } 54280d6f4c6SArd Biesheuvel 54343fa36c9SRichard Henderson static inline void QEMU_ALWAYS_INLINE 54443fa36c9SRichard Henderson crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, 54543fa36c9SRichard Henderson uint32_t desc, uint32_t opcode) 54680d6f4c6SArd Biesheuvel { 54780d6f4c6SArd Biesheuvel union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 54880d6f4c6SArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 54980d6f4c6SArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 55043fa36c9SRichard Henderson uint32_t imm2 = simd_data(desc); 55180d6f4c6SArd Biesheuvel uint32_t t; 55280d6f4c6SArd Biesheuvel 55380d6f4c6SArd Biesheuvel assert(imm2 < 4); 55480d6f4c6SArd Biesheuvel 55580d6f4c6SArd Biesheuvel if (opcode == 0 || opcode == 2) { 55680d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT2A */ 55780d6f4c6SArd Biesheuvel t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 55880d6f4c6SArd Biesheuvel } else if (opcode == 1) { 55980d6f4c6SArd Biesheuvel /* SM3TT1B */ 56080d6f4c6SArd Biesheuvel t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 56180d6f4c6SArd Biesheuvel } else if (opcode == 3) { 56280d6f4c6SArd Biesheuvel /* SM3TT2B */ 56380d6f4c6SArd Biesheuvel t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 56480d6f4c6SArd Biesheuvel } else { 56543fa36c9SRichard Henderson qemu_build_not_reached(); 56680d6f4c6SArd Biesheuvel } 56780d6f4c6SArd Biesheuvel 56880d6f4c6SArd Biesheuvel t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 56980d6f4c6SArd Biesheuvel 57080d6f4c6SArd Biesheuvel CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 57180d6f4c6SArd Biesheuvel 57280d6f4c6SArd Biesheuvel if (opcode < 2) { 57380d6f4c6SArd Biesheuvel /* SM3TT1A, SM3TT1B */ 57480d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 57580d6f4c6SArd Biesheuvel 57680d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 57780d6f4c6SArd Biesheuvel } else { 57880d6f4c6SArd Biesheuvel /* SM3TT2A, SM3TT2B */ 57980d6f4c6SArd Biesheuvel t += CR_ST_WORD(n, 3); 58080d6f4c6SArd Biesheuvel t ^= rol32(t, 9) ^ rol32(t, 17); 58180d6f4c6SArd Biesheuvel 58280d6f4c6SArd Biesheuvel CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 58380d6f4c6SArd Biesheuvel } 58480d6f4c6SArd Biesheuvel 58580d6f4c6SArd Biesheuvel CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 58680d6f4c6SArd Biesheuvel CR_ST_WORD(d, 3) = t; 58780d6f4c6SArd Biesheuvel 58880d6f4c6SArd Biesheuvel rd[0] = d.l[0]; 58980d6f4c6SArd Biesheuvel rd[1] = d.l[1]; 59043fa36c9SRichard Henderson 59143fa36c9SRichard Henderson clear_tail_16(rd, desc); 59280d6f4c6SArd Biesheuvel } 593b6577bcdSArd Biesheuvel 59443fa36c9SRichard Henderson #define DO_SM3TT(NAME, OPCODE) \ 59543fa36c9SRichard Henderson void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ 59643fa36c9SRichard Henderson { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } 59743fa36c9SRichard Henderson 59843fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1a, 0) 59943fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1b, 1) 60043fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2a, 2) 60143fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2b, 3) 60243fa36c9SRichard Henderson 60343fa36c9SRichard Henderson #undef DO_SM3TT 60443fa36c9SRichard Henderson 605a04b68e1SRichard Henderson static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 606b6577bcdSArd Biesheuvel { 607a04b68e1SRichard Henderson union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 608a04b68e1SRichard Henderson union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 609b6577bcdSArd Biesheuvel uint32_t t, i; 610b6577bcdSArd Biesheuvel 611b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 612b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 613b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 614b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 615b6577bcdSArd Biesheuvel CR_ST_WORD(n, i); 616b6577bcdSArd Biesheuvel 617*f6ef550fSMax Chou t = sm4_subword(t); 618b6577bcdSArd Biesheuvel 619b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 620b6577bcdSArd Biesheuvel rol32(t, 24); 621b6577bcdSArd Biesheuvel } 622b6577bcdSArd Biesheuvel 623b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 624b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 625b6577bcdSArd Biesheuvel } 626b6577bcdSArd Biesheuvel 627a04b68e1SRichard Henderson void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 628b6577bcdSArd Biesheuvel { 629a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 630a04b68e1SRichard Henderson 631a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 632a04b68e1SRichard Henderson do_crypto_sm4e(vd + i, vn + i, vm + i); 633a04b68e1SRichard Henderson } 634a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 635a04b68e1SRichard Henderson } 636a04b68e1SRichard Henderson 637a04b68e1SRichard Henderson static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 638a04b68e1SRichard Henderson { 639b6577bcdSArd Biesheuvel union CRYPTO_STATE d; 640b6577bcdSArd Biesheuvel union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 641b6577bcdSArd Biesheuvel union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 642b6577bcdSArd Biesheuvel uint32_t t, i; 643b6577bcdSArd Biesheuvel 644b6577bcdSArd Biesheuvel d = n; 645b6577bcdSArd Biesheuvel for (i = 0; i < 4; i++) { 646b6577bcdSArd Biesheuvel t = CR_ST_WORD(d, (i + 1) % 4) ^ 647b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 2) % 4) ^ 648b6577bcdSArd Biesheuvel CR_ST_WORD(d, (i + 3) % 4) ^ 649b6577bcdSArd Biesheuvel CR_ST_WORD(m, i); 650b6577bcdSArd Biesheuvel 651*f6ef550fSMax Chou t = sm4_subword(t); 652b6577bcdSArd Biesheuvel 653b6577bcdSArd Biesheuvel CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 654b6577bcdSArd Biesheuvel } 655b6577bcdSArd Biesheuvel 656b6577bcdSArd Biesheuvel rd[0] = d.l[0]; 657b6577bcdSArd Biesheuvel rd[1] = d.l[1]; 658b6577bcdSArd Biesheuvel } 659a04b68e1SRichard Henderson 660a04b68e1SRichard Henderson void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 661a04b68e1SRichard Henderson { 662a04b68e1SRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 663a04b68e1SRichard Henderson 664a04b68e1SRichard Henderson for (i = 0; i < opr_sz; i += 16) { 665a04b68e1SRichard Henderson do_crypto_sm4ekey(vd + i, vn + i, vm + i); 666a04b68e1SRichard Henderson } 667a04b68e1SRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 668a04b68e1SRichard Henderson } 6691738860dSRichard Henderson 6701738860dSRichard Henderson void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 6711738860dSRichard Henderson { 6721738860dSRichard Henderson intptr_t i, opr_sz = simd_oprsz(desc); 6731738860dSRichard Henderson uint64_t *d = vd, *n = vn, *m = vm; 6741738860dSRichard Henderson 6751738860dSRichard Henderson for (i = 0; i < opr_sz / 8; ++i) { 6761738860dSRichard Henderson d[i] = n[i] ^ rol64(m[i], 1); 6771738860dSRichard Henderson } 6781738860dSRichard Henderson clear_tail(vd, opr_sz, simd_maxsz(desc)); 6791738860dSRichard Henderson } 680