1 /* 2 * crypto_helper.c - emulate v8 Crypto Extensions instructions 3 * 4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qemu/bitops.h" 14 15 #include "tcg/tcg-gvec-desc.h" 16 #include "crypto/aes-round.h" 17 #include "crypto/sm4.h" 18 #include "vec_internal.h" 19 20 #define HELPER_H "tcg/helper.h" 21 #include "exec/helper-proto.h.inc" 22 23 union CRYPTO_STATE { 24 uint8_t bytes[16]; 25 uint32_t words[4]; 26 uint64_t l[2]; 27 }; 28 29 #if HOST_BIG_ENDIAN 30 #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) 31 #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) 32 #else 33 #define CR_ST_BYTE(state, i) ((state).bytes[i]) 34 #define CR_ST_WORD(state, i) ((state).words[i]) 35 #endif 36 37 /* 38 * The caller has not been converted to full gvec, and so only 39 * modifies the low 16 bytes of the vector register. 40 */ 41 static void clear_tail_16(void *vd, uint32_t desc) 42 { 43 int opr_sz = simd_oprsz(desc); 44 int max_sz = simd_maxsz(desc); 45 46 assert(opr_sz == 16); 47 clear_tail(vd, opr_sz, max_sz); 48 } 49 50 static const AESState aes_zero = { }; 51 52 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 53 { 54 intptr_t i, opr_sz = simd_oprsz(desc); 55 56 for (i = 0; i < opr_sz; i += 16) { 57 AESState *ad = (AESState *)(vd + i); 58 AESState *st = (AESState *)(vn + i); 59 AESState *rk = (AESState *)(vm + i); 60 AESState t; 61 62 /* 63 * Our uint64_t are in the wrong order for big-endian. 64 * The Arm AddRoundKey comes first, while the API AddRoundKey 65 * comes last: perform the xor here, and provide zero to API. 66 */ 67 if (HOST_BIG_ENDIAN) { 68 t.d[0] = st->d[1] ^ rk->d[1]; 69 t.d[1] = st->d[0] ^ rk->d[0]; 70 aesenc_SB_SR_AK(&t, &t, &aes_zero, false); 71 ad->d[0] = t.d[1]; 72 ad->d[1] = t.d[0]; 73 } else { 74 t.v = st->v ^ rk->v; 75 aesenc_SB_SR_AK(ad, &t, &aes_zero, false); 76 } 77 } 78 clear_tail(vd, opr_sz, simd_maxsz(desc)); 79 } 80 81 void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc) 82 { 83 intptr_t i, opr_sz = simd_oprsz(desc); 84 85 for (i = 0; i < opr_sz; i += 16) { 86 AESState *ad = (AESState *)(vd + i); 87 AESState *st = (AESState *)(vn + i); 88 AESState *rk = (AESState *)(vm + i); 89 AESState t; 90 91 /* Our uint64_t are in the wrong order for big-endian. */ 92 if (HOST_BIG_ENDIAN) { 93 t.d[0] = st->d[1] ^ rk->d[1]; 94 t.d[1] = st->d[0] ^ rk->d[0]; 95 aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false); 96 ad->d[0] = t.d[1]; 97 ad->d[1] = t.d[0]; 98 } else { 99 t.v = st->v ^ rk->v; 100 aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false); 101 } 102 } 103 clear_tail(vd, opr_sz, simd_maxsz(desc)); 104 } 105 106 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 107 { 108 intptr_t i, opr_sz = simd_oprsz(desc); 109 110 for (i = 0; i < opr_sz; i += 16) { 111 AESState *ad = (AESState *)(vd + i); 112 AESState *st = (AESState *)(vm + i); 113 AESState t; 114 115 /* Our uint64_t are in the wrong order for big-endian. */ 116 if (HOST_BIG_ENDIAN) { 117 t.d[0] = st->d[1]; 118 t.d[1] = st->d[0]; 119 aesenc_MC(&t, &t, false); 120 ad->d[0] = t.d[1]; 121 ad->d[1] = t.d[0]; 122 } else { 123 aesenc_MC(ad, st, false); 124 } 125 } 126 clear_tail(vd, opr_sz, simd_maxsz(desc)); 127 } 128 129 void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc) 130 { 131 intptr_t i, opr_sz = simd_oprsz(desc); 132 133 for (i = 0; i < opr_sz; i += 16) { 134 AESState *ad = (AESState *)(vd + i); 135 AESState *st = (AESState *)(vm + i); 136 AESState t; 137 138 /* Our uint64_t are in the wrong order for big-endian. */ 139 if (HOST_BIG_ENDIAN) { 140 t.d[0] = st->d[1]; 141 t.d[1] = st->d[0]; 142 aesdec_IMC(&t, &t, false); 143 ad->d[0] = t.d[1]; 144 ad->d[1] = t.d[0]; 145 } else { 146 aesdec_IMC(ad, st, false); 147 } 148 } 149 clear_tail(vd, opr_sz, simd_maxsz(desc)); 150 } 151 152 /* 153 * SHA-1 logical functions 154 */ 155 156 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 157 { 158 return (x & (y ^ z)) ^ z; 159 } 160 161 static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 162 { 163 return x ^ y ^ z; 164 } 165 166 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 167 { 168 return (x & y) | ((x | y) & z); 169 } 170 171 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) 172 { 173 uint64_t *d = vd, *n = vn, *m = vm; 174 uint64_t d0, d1; 175 176 d0 = d[1] ^ d[0] ^ m[0]; 177 d1 = n[0] ^ d[1] ^ m[1]; 178 d[0] = d0; 179 d[1] = d1; 180 181 clear_tail_16(vd, desc); 182 } 183 184 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, 185 uint64_t *rm, uint32_t desc, 186 uint32_t (*fn)(union CRYPTO_STATE *d)) 187 { 188 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 189 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 190 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 191 int i; 192 193 for (i = 0; i < 4; i++) { 194 uint32_t t = fn(&d); 195 196 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 197 + CR_ST_WORD(m, i); 198 199 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 200 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 201 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 202 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 203 CR_ST_WORD(d, 0) = t; 204 } 205 rd[0] = d.l[0]; 206 rd[1] = d.l[1]; 207 208 clear_tail_16(rd, desc); 209 } 210 211 static uint32_t do_sha1c(union CRYPTO_STATE *d) 212 { 213 return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 214 } 215 216 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) 217 { 218 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); 219 } 220 221 static uint32_t do_sha1p(union CRYPTO_STATE *d) 222 { 223 return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 224 } 225 226 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) 227 { 228 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); 229 } 230 231 static uint32_t do_sha1m(union CRYPTO_STATE *d) 232 { 233 return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 234 } 235 236 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) 237 { 238 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); 239 } 240 241 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 242 { 243 uint64_t *rd = vd; 244 uint64_t *rm = vm; 245 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 246 247 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 248 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 249 250 rd[0] = m.l[0]; 251 rd[1] = m.l[1]; 252 253 clear_tail_16(vd, desc); 254 } 255 256 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 257 { 258 uint64_t *rd = vd; 259 uint64_t *rm = vm; 260 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 261 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 262 263 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 264 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 265 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 266 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 267 268 rd[0] = d.l[0]; 269 rd[1] = d.l[1]; 270 271 clear_tail_16(vd, desc); 272 } 273 274 /* 275 * The SHA-256 logical functions, according to 276 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 277 */ 278 279 static uint32_t S0(uint32_t x) 280 { 281 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 282 } 283 284 static uint32_t S1(uint32_t x) 285 { 286 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 287 } 288 289 static uint32_t s0(uint32_t x) 290 { 291 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 292 } 293 294 static uint32_t s1(uint32_t x) 295 { 296 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 297 } 298 299 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 300 { 301 uint64_t *rd = vd; 302 uint64_t *rn = vn; 303 uint64_t *rm = vm; 304 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 305 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 306 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 307 int i; 308 309 for (i = 0; i < 4; i++) { 310 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 311 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 312 + CR_ST_WORD(m, i); 313 314 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 315 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 316 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 317 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 318 319 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 320 + S0(CR_ST_WORD(d, 0)); 321 322 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 323 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 324 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 325 CR_ST_WORD(d, 0) = t; 326 } 327 328 rd[0] = d.l[0]; 329 rd[1] = d.l[1]; 330 331 clear_tail_16(vd, desc); 332 } 333 334 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 335 { 336 uint64_t *rd = vd; 337 uint64_t *rn = vn; 338 uint64_t *rm = vm; 339 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 340 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 341 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 342 int i; 343 344 for (i = 0; i < 4; i++) { 345 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 346 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 347 + CR_ST_WORD(m, i); 348 349 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 350 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 351 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 352 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 353 } 354 355 rd[0] = d.l[0]; 356 rd[1] = d.l[1]; 357 358 clear_tail_16(vd, desc); 359 } 360 361 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 362 { 363 uint64_t *rd = vd; 364 uint64_t *rm = vm; 365 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 366 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 367 368 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 369 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 370 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 371 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 372 373 rd[0] = d.l[0]; 374 rd[1] = d.l[1]; 375 376 clear_tail_16(vd, desc); 377 } 378 379 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 380 { 381 uint64_t *rd = vd; 382 uint64_t *rn = vn; 383 uint64_t *rm = vm; 384 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 385 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 386 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 387 388 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 389 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 390 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 391 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 392 393 rd[0] = d.l[0]; 394 rd[1] = d.l[1]; 395 396 clear_tail_16(vd, desc); 397 } 398 399 /* 400 * The SHA-512 logical functions (same as above but using 64-bit operands) 401 */ 402 403 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 404 { 405 return (x & (y ^ z)) ^ z; 406 } 407 408 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 409 { 410 return (x & y) | ((x | y) & z); 411 } 412 413 static uint64_t S0_512(uint64_t x) 414 { 415 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 416 } 417 418 static uint64_t S1_512(uint64_t x) 419 { 420 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 421 } 422 423 static uint64_t s0_512(uint64_t x) 424 { 425 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 426 } 427 428 static uint64_t s1_512(uint64_t x) 429 { 430 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 431 } 432 433 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 434 { 435 uint64_t *rd = vd; 436 uint64_t *rn = vn; 437 uint64_t *rm = vm; 438 uint64_t d0 = rd[0]; 439 uint64_t d1 = rd[1]; 440 441 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 442 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 443 444 rd[0] = d0; 445 rd[1] = d1; 446 447 clear_tail_16(vd, desc); 448 } 449 450 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 451 { 452 uint64_t *rd = vd; 453 uint64_t *rn = vn; 454 uint64_t *rm = vm; 455 uint64_t d0 = rd[0]; 456 uint64_t d1 = rd[1]; 457 458 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 459 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 460 461 rd[0] = d0; 462 rd[1] = d1; 463 464 clear_tail_16(vd, desc); 465 } 466 467 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 468 { 469 uint64_t *rd = vd; 470 uint64_t *rn = vn; 471 uint64_t d0 = rd[0]; 472 uint64_t d1 = rd[1]; 473 474 d0 += s0_512(rd[1]); 475 d1 += s0_512(rn[0]); 476 477 rd[0] = d0; 478 rd[1] = d1; 479 480 clear_tail_16(vd, desc); 481 } 482 483 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 484 { 485 uint64_t *rd = vd; 486 uint64_t *rn = vn; 487 uint64_t *rm = vm; 488 489 rd[0] += s1_512(rn[0]) + rm[0]; 490 rd[1] += s1_512(rn[1]) + rm[1]; 491 492 clear_tail_16(vd, desc); 493 } 494 495 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 496 { 497 uint64_t *rd = vd; 498 uint64_t *rn = vn; 499 uint64_t *rm = vm; 500 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 501 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 502 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 503 uint32_t t; 504 505 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 506 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 507 508 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 509 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 510 511 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 512 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 513 514 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 515 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 516 517 rd[0] = d.l[0]; 518 rd[1] = d.l[1]; 519 520 clear_tail_16(vd, desc); 521 } 522 523 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 524 { 525 uint64_t *rd = vd; 526 uint64_t *rn = vn; 527 uint64_t *rm = vm; 528 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 529 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 530 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 531 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 532 533 CR_ST_WORD(d, 0) ^= t; 534 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 535 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 536 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 537 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 538 539 rd[0] = d.l[0]; 540 rd[1] = d.l[1]; 541 542 clear_tail_16(vd, desc); 543 } 544 545 static inline void QEMU_ALWAYS_INLINE 546 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, 547 uint32_t desc, uint32_t opcode) 548 { 549 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 550 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 551 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 552 uint32_t imm2 = simd_data(desc); 553 uint32_t t; 554 555 assert(imm2 < 4); 556 557 if (opcode == 0 || opcode == 2) { 558 /* SM3TT1A, SM3TT2A */ 559 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 560 } else if (opcode == 1) { 561 /* SM3TT1B */ 562 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 563 } else if (opcode == 3) { 564 /* SM3TT2B */ 565 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 566 } else { 567 qemu_build_not_reached(); 568 } 569 570 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 571 572 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 573 574 if (opcode < 2) { 575 /* SM3TT1A, SM3TT1B */ 576 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 577 578 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 579 } else { 580 /* SM3TT2A, SM3TT2B */ 581 t += CR_ST_WORD(n, 3); 582 t ^= rol32(t, 9) ^ rol32(t, 17); 583 584 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 585 } 586 587 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 588 CR_ST_WORD(d, 3) = t; 589 590 rd[0] = d.l[0]; 591 rd[1] = d.l[1]; 592 593 clear_tail_16(rd, desc); 594 } 595 596 #define DO_SM3TT(NAME, OPCODE) \ 597 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ 598 { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } 599 600 DO_SM3TT(crypto_sm3tt1a, 0) 601 DO_SM3TT(crypto_sm3tt1b, 1) 602 DO_SM3TT(crypto_sm3tt2a, 2) 603 DO_SM3TT(crypto_sm3tt2b, 3) 604 605 #undef DO_SM3TT 606 607 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 608 { 609 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 610 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 611 uint32_t t, i; 612 613 for (i = 0; i < 4; i++) { 614 t = CR_ST_WORD(d, (i + 1) % 4) ^ 615 CR_ST_WORD(d, (i + 2) % 4) ^ 616 CR_ST_WORD(d, (i + 3) % 4) ^ 617 CR_ST_WORD(n, i); 618 619 t = sm4_subword(t); 620 621 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 622 rol32(t, 24); 623 } 624 625 rd[0] = d.l[0]; 626 rd[1] = d.l[1]; 627 } 628 629 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 630 { 631 intptr_t i, opr_sz = simd_oprsz(desc); 632 633 for (i = 0; i < opr_sz; i += 16) { 634 do_crypto_sm4e(vd + i, vn + i, vm + i); 635 } 636 clear_tail(vd, opr_sz, simd_maxsz(desc)); 637 } 638 639 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 640 { 641 union CRYPTO_STATE d; 642 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 643 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 644 uint32_t t, i; 645 646 d = n; 647 for (i = 0; i < 4; i++) { 648 t = CR_ST_WORD(d, (i + 1) % 4) ^ 649 CR_ST_WORD(d, (i + 2) % 4) ^ 650 CR_ST_WORD(d, (i + 3) % 4) ^ 651 CR_ST_WORD(m, i); 652 653 t = sm4_subword(t); 654 655 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 656 } 657 658 rd[0] = d.l[0]; 659 rd[1] = d.l[1]; 660 } 661 662 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 663 { 664 intptr_t i, opr_sz = simd_oprsz(desc); 665 666 for (i = 0; i < opr_sz; i += 16) { 667 do_crypto_sm4ekey(vd + i, vn + i, vm + i); 668 } 669 clear_tail(vd, opr_sz, simd_maxsz(desc)); 670 } 671 672 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 673 { 674 intptr_t i, opr_sz = simd_oprsz(desc); 675 uint64_t *d = vd, *n = vn, *m = vm; 676 677 for (i = 0; i < opr_sz / 8; ++i) { 678 d[i] = n[i] ^ rol64(m[i], 1); 679 } 680 clear_tail(vd, opr_sz, simd_maxsz(desc)); 681 } 682