1 /* 2 * crypto_helper.c - emulate v8 Crypto Extensions instructions 3 * 4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 */ 11 12 #include "qemu/osdep.h" 13 14 #include "cpu.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 #include "crypto/aes.h" 18 #include "crypto/aes-round.h" 19 #include "crypto/sm4.h" 20 #include "vec_internal.h" 21 22 union CRYPTO_STATE { 23 uint8_t bytes[16]; 24 uint32_t words[4]; 25 uint64_t l[2]; 26 }; 27 28 #if HOST_BIG_ENDIAN 29 #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) 30 #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) 31 #else 32 #define CR_ST_BYTE(state, i) ((state).bytes[i]) 33 #define CR_ST_WORD(state, i) ((state).words[i]) 34 #endif 35 36 /* 37 * The caller has not been converted to full gvec, and so only 38 * modifies the low 16 bytes of the vector register. 39 */ 40 static void clear_tail_16(void *vd, uint32_t desc) 41 { 42 int opr_sz = simd_oprsz(desc); 43 int max_sz = simd_maxsz(desc); 44 45 assert(opr_sz == 16); 46 clear_tail(vd, opr_sz, max_sz); 47 } 48 49 static const AESState aes_zero = { }; 50 51 static void do_crypto_aese(uint64_t *rd, uint64_t *rn, uint64_t *rm, 52 const uint8_t *sbox, const uint8_t *shift) 53 { 54 union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; 55 union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; 56 int i; 57 58 /* xor state vector with round key */ 59 rk.l[0] ^= st.l[0]; 60 rk.l[1] ^= st.l[1]; 61 62 /* combine ShiftRows operation and sbox substitution */ 63 for (i = 0; i < 16; i++) { 64 CR_ST_BYTE(st, i) = sbox[CR_ST_BYTE(rk, shift[i])]; 65 } 66 67 rd[0] = st.l[0]; 68 rd[1] = st.l[1]; 69 } 70 71 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 72 { 73 intptr_t i, opr_sz = simd_oprsz(desc); 74 75 for (i = 0; i < opr_sz; i += 16) { 76 AESState *ad = (AESState *)(vd + i); 77 AESState *st = (AESState *)(vn + i); 78 AESState *rk = (AESState *)(vm + i); 79 AESState t; 80 81 /* 82 * Our uint64_t are in the wrong order for big-endian. 83 * The Arm AddRoundKey comes first, while the API AddRoundKey 84 * comes last: perform the xor here, and provide zero to API. 85 */ 86 if (HOST_BIG_ENDIAN) { 87 t.d[0] = st->d[1] ^ rk->d[1]; 88 t.d[1] = st->d[0] ^ rk->d[0]; 89 aesenc_SB_SR_AK(&t, &t, &aes_zero, false); 90 ad->d[0] = t.d[1]; 91 ad->d[1] = t.d[0]; 92 } else { 93 t.v = st->v ^ rk->v; 94 aesenc_SB_SR_AK(ad, &t, &aes_zero, false); 95 } 96 } 97 clear_tail(vd, opr_sz, simd_maxsz(desc)); 98 } 99 100 void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc) 101 { 102 intptr_t i, opr_sz = simd_oprsz(desc); 103 104 for (i = 0; i < opr_sz; i += 16) { 105 do_crypto_aese(vd + i, vn + i, vm + i, AES_isbox, AES_ishifts); 106 } 107 clear_tail(vd, opr_sz, simd_maxsz(desc)); 108 } 109 110 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, const uint32_t *mc) 111 { 112 union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 113 int i; 114 115 for (i = 0; i < 16; i += 4) { 116 CR_ST_WORD(st, i >> 2) = 117 mc[CR_ST_BYTE(st, i)] ^ 118 rol32(mc[CR_ST_BYTE(st, i + 1)], 8) ^ 119 rol32(mc[CR_ST_BYTE(st, i + 2)], 16) ^ 120 rol32(mc[CR_ST_BYTE(st, i + 3)], 24); 121 } 122 123 rd[0] = st.l[0]; 124 rd[1] = st.l[1]; 125 } 126 127 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 128 { 129 intptr_t i, opr_sz = simd_oprsz(desc); 130 131 for (i = 0; i < opr_sz; i += 16) { 132 do_crypto_aesmc(vd + i, vm + i, AES_mc_rot); 133 } 134 clear_tail(vd, opr_sz, simd_maxsz(desc)); 135 } 136 137 void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc) 138 { 139 intptr_t i, opr_sz = simd_oprsz(desc); 140 141 for (i = 0; i < opr_sz; i += 16) { 142 do_crypto_aesmc(vd + i, vm + i, AES_imc_rot); 143 } 144 clear_tail(vd, opr_sz, simd_maxsz(desc)); 145 } 146 147 /* 148 * SHA-1 logical functions 149 */ 150 151 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 152 { 153 return (x & (y ^ z)) ^ z; 154 } 155 156 static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 157 { 158 return x ^ y ^ z; 159 } 160 161 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 162 { 163 return (x & y) | ((x | y) & z); 164 } 165 166 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) 167 { 168 uint64_t *d = vd, *n = vn, *m = vm; 169 uint64_t d0, d1; 170 171 d0 = d[1] ^ d[0] ^ m[0]; 172 d1 = n[0] ^ d[1] ^ m[1]; 173 d[0] = d0; 174 d[1] = d1; 175 176 clear_tail_16(vd, desc); 177 } 178 179 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, 180 uint64_t *rm, uint32_t desc, 181 uint32_t (*fn)(union CRYPTO_STATE *d)) 182 { 183 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 184 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 185 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 186 int i; 187 188 for (i = 0; i < 4; i++) { 189 uint32_t t = fn(&d); 190 191 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 192 + CR_ST_WORD(m, i); 193 194 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 195 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 196 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 197 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 198 CR_ST_WORD(d, 0) = t; 199 } 200 rd[0] = d.l[0]; 201 rd[1] = d.l[1]; 202 203 clear_tail_16(rd, desc); 204 } 205 206 static uint32_t do_sha1c(union CRYPTO_STATE *d) 207 { 208 return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 209 } 210 211 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) 212 { 213 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); 214 } 215 216 static uint32_t do_sha1p(union CRYPTO_STATE *d) 217 { 218 return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 219 } 220 221 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) 222 { 223 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); 224 } 225 226 static uint32_t do_sha1m(union CRYPTO_STATE *d) 227 { 228 return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 229 } 230 231 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) 232 { 233 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); 234 } 235 236 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 237 { 238 uint64_t *rd = vd; 239 uint64_t *rm = vm; 240 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 241 242 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 243 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 244 245 rd[0] = m.l[0]; 246 rd[1] = m.l[1]; 247 248 clear_tail_16(vd, desc); 249 } 250 251 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 252 { 253 uint64_t *rd = vd; 254 uint64_t *rm = vm; 255 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 256 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 257 258 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 259 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 260 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 261 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 262 263 rd[0] = d.l[0]; 264 rd[1] = d.l[1]; 265 266 clear_tail_16(vd, desc); 267 } 268 269 /* 270 * The SHA-256 logical functions, according to 271 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 272 */ 273 274 static uint32_t S0(uint32_t x) 275 { 276 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 277 } 278 279 static uint32_t S1(uint32_t x) 280 { 281 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 282 } 283 284 static uint32_t s0(uint32_t x) 285 { 286 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 287 } 288 289 static uint32_t s1(uint32_t x) 290 { 291 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 292 } 293 294 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 295 { 296 uint64_t *rd = vd; 297 uint64_t *rn = vn; 298 uint64_t *rm = vm; 299 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 300 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 301 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 302 int i; 303 304 for (i = 0; i < 4; i++) { 305 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 306 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 307 + CR_ST_WORD(m, i); 308 309 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 310 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 311 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 312 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 313 314 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 315 + S0(CR_ST_WORD(d, 0)); 316 317 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 318 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 319 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 320 CR_ST_WORD(d, 0) = t; 321 } 322 323 rd[0] = d.l[0]; 324 rd[1] = d.l[1]; 325 326 clear_tail_16(vd, desc); 327 } 328 329 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 330 { 331 uint64_t *rd = vd; 332 uint64_t *rn = vn; 333 uint64_t *rm = vm; 334 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 335 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 336 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 337 int i; 338 339 for (i = 0; i < 4; i++) { 340 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 341 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 342 + CR_ST_WORD(m, i); 343 344 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 345 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 346 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 347 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 348 } 349 350 rd[0] = d.l[0]; 351 rd[1] = d.l[1]; 352 353 clear_tail_16(vd, desc); 354 } 355 356 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 357 { 358 uint64_t *rd = vd; 359 uint64_t *rm = vm; 360 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 361 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 362 363 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 364 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 365 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 366 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 367 368 rd[0] = d.l[0]; 369 rd[1] = d.l[1]; 370 371 clear_tail_16(vd, desc); 372 } 373 374 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 375 { 376 uint64_t *rd = vd; 377 uint64_t *rn = vn; 378 uint64_t *rm = vm; 379 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 380 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 381 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 382 383 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 384 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 385 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 386 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 387 388 rd[0] = d.l[0]; 389 rd[1] = d.l[1]; 390 391 clear_tail_16(vd, desc); 392 } 393 394 /* 395 * The SHA-512 logical functions (same as above but using 64-bit operands) 396 */ 397 398 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 399 { 400 return (x & (y ^ z)) ^ z; 401 } 402 403 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 404 { 405 return (x & y) | ((x | y) & z); 406 } 407 408 static uint64_t S0_512(uint64_t x) 409 { 410 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 411 } 412 413 static uint64_t S1_512(uint64_t x) 414 { 415 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 416 } 417 418 static uint64_t s0_512(uint64_t x) 419 { 420 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 421 } 422 423 static uint64_t s1_512(uint64_t x) 424 { 425 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 426 } 427 428 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 429 { 430 uint64_t *rd = vd; 431 uint64_t *rn = vn; 432 uint64_t *rm = vm; 433 uint64_t d0 = rd[0]; 434 uint64_t d1 = rd[1]; 435 436 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 437 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 438 439 rd[0] = d0; 440 rd[1] = d1; 441 442 clear_tail_16(vd, desc); 443 } 444 445 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 446 { 447 uint64_t *rd = vd; 448 uint64_t *rn = vn; 449 uint64_t *rm = vm; 450 uint64_t d0 = rd[0]; 451 uint64_t d1 = rd[1]; 452 453 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 454 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 455 456 rd[0] = d0; 457 rd[1] = d1; 458 459 clear_tail_16(vd, desc); 460 } 461 462 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 463 { 464 uint64_t *rd = vd; 465 uint64_t *rn = vn; 466 uint64_t d0 = rd[0]; 467 uint64_t d1 = rd[1]; 468 469 d0 += s0_512(rd[1]); 470 d1 += s0_512(rn[0]); 471 472 rd[0] = d0; 473 rd[1] = d1; 474 475 clear_tail_16(vd, desc); 476 } 477 478 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 479 { 480 uint64_t *rd = vd; 481 uint64_t *rn = vn; 482 uint64_t *rm = vm; 483 484 rd[0] += s1_512(rn[0]) + rm[0]; 485 rd[1] += s1_512(rn[1]) + rm[1]; 486 487 clear_tail_16(vd, desc); 488 } 489 490 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 491 { 492 uint64_t *rd = vd; 493 uint64_t *rn = vn; 494 uint64_t *rm = vm; 495 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 496 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 497 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 498 uint32_t t; 499 500 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 501 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 502 503 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 504 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 505 506 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 507 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 508 509 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 510 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 511 512 rd[0] = d.l[0]; 513 rd[1] = d.l[1]; 514 515 clear_tail_16(vd, desc); 516 } 517 518 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 519 { 520 uint64_t *rd = vd; 521 uint64_t *rn = vn; 522 uint64_t *rm = vm; 523 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 524 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 525 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 526 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 527 528 CR_ST_WORD(d, 0) ^= t; 529 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 530 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 531 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 532 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 533 534 rd[0] = d.l[0]; 535 rd[1] = d.l[1]; 536 537 clear_tail_16(vd, desc); 538 } 539 540 static inline void QEMU_ALWAYS_INLINE 541 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, 542 uint32_t desc, uint32_t opcode) 543 { 544 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 545 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 546 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 547 uint32_t imm2 = simd_data(desc); 548 uint32_t t; 549 550 assert(imm2 < 4); 551 552 if (opcode == 0 || opcode == 2) { 553 /* SM3TT1A, SM3TT2A */ 554 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 555 } else if (opcode == 1) { 556 /* SM3TT1B */ 557 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 558 } else if (opcode == 3) { 559 /* SM3TT2B */ 560 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 561 } else { 562 qemu_build_not_reached(); 563 } 564 565 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 566 567 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 568 569 if (opcode < 2) { 570 /* SM3TT1A, SM3TT1B */ 571 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 572 573 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 574 } else { 575 /* SM3TT2A, SM3TT2B */ 576 t += CR_ST_WORD(n, 3); 577 t ^= rol32(t, 9) ^ rol32(t, 17); 578 579 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 580 } 581 582 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 583 CR_ST_WORD(d, 3) = t; 584 585 rd[0] = d.l[0]; 586 rd[1] = d.l[1]; 587 588 clear_tail_16(rd, desc); 589 } 590 591 #define DO_SM3TT(NAME, OPCODE) \ 592 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ 593 { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } 594 595 DO_SM3TT(crypto_sm3tt1a, 0) 596 DO_SM3TT(crypto_sm3tt1b, 1) 597 DO_SM3TT(crypto_sm3tt2a, 2) 598 DO_SM3TT(crypto_sm3tt2b, 3) 599 600 #undef DO_SM3TT 601 602 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 603 { 604 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 605 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 606 uint32_t t, i; 607 608 for (i = 0; i < 4; i++) { 609 t = CR_ST_WORD(d, (i + 1) % 4) ^ 610 CR_ST_WORD(d, (i + 2) % 4) ^ 611 CR_ST_WORD(d, (i + 3) % 4) ^ 612 CR_ST_WORD(n, i); 613 614 t = sm4_sbox[t & 0xff] | 615 sm4_sbox[(t >> 8) & 0xff] << 8 | 616 sm4_sbox[(t >> 16) & 0xff] << 16 | 617 sm4_sbox[(t >> 24) & 0xff] << 24; 618 619 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 620 rol32(t, 24); 621 } 622 623 rd[0] = d.l[0]; 624 rd[1] = d.l[1]; 625 } 626 627 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 628 { 629 intptr_t i, opr_sz = simd_oprsz(desc); 630 631 for (i = 0; i < opr_sz; i += 16) { 632 do_crypto_sm4e(vd + i, vn + i, vm + i); 633 } 634 clear_tail(vd, opr_sz, simd_maxsz(desc)); 635 } 636 637 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 638 { 639 union CRYPTO_STATE d; 640 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 641 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 642 uint32_t t, i; 643 644 d = n; 645 for (i = 0; i < 4; i++) { 646 t = CR_ST_WORD(d, (i + 1) % 4) ^ 647 CR_ST_WORD(d, (i + 2) % 4) ^ 648 CR_ST_WORD(d, (i + 3) % 4) ^ 649 CR_ST_WORD(m, i); 650 651 t = sm4_sbox[t & 0xff] | 652 sm4_sbox[(t >> 8) & 0xff] << 8 | 653 sm4_sbox[(t >> 16) & 0xff] << 16 | 654 sm4_sbox[(t >> 24) & 0xff] << 24; 655 656 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 657 } 658 659 rd[0] = d.l[0]; 660 rd[1] = d.l[1]; 661 } 662 663 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 664 { 665 intptr_t i, opr_sz = simd_oprsz(desc); 666 667 for (i = 0; i < opr_sz; i += 16) { 668 do_crypto_sm4ekey(vd + i, vn + i, vm + i); 669 } 670 clear_tail(vd, opr_sz, simd_maxsz(desc)); 671 } 672 673 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 674 { 675 intptr_t i, opr_sz = simd_oprsz(desc); 676 uint64_t *d = vd, *n = vn, *m = vm; 677 678 for (i = 0; i < opr_sz / 8; ++i) { 679 d[i] = n[i] ^ rol64(m[i], 1); 680 } 681 clear_tail(vd, opr_sz, simd_maxsz(desc)); 682 } 683