1 /* 2 * crypto_helper.c - emulate v8 Crypto Extensions instructions 3 * 4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 */ 11 12 #include "qemu/osdep.h" 13 14 #include "cpu.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 #include "crypto/aes.h" 18 #include "crypto/aes-round.h" 19 #include "crypto/sm4.h" 20 #include "vec_internal.h" 21 22 union CRYPTO_STATE { 23 uint8_t bytes[16]; 24 uint32_t words[4]; 25 uint64_t l[2]; 26 }; 27 28 #if HOST_BIG_ENDIAN 29 #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) 30 #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) 31 #else 32 #define CR_ST_BYTE(state, i) ((state).bytes[i]) 33 #define CR_ST_WORD(state, i) ((state).words[i]) 34 #endif 35 36 /* 37 * The caller has not been converted to full gvec, and so only 38 * modifies the low 16 bytes of the vector register. 39 */ 40 static void clear_tail_16(void *vd, uint32_t desc) 41 { 42 int opr_sz = simd_oprsz(desc); 43 int max_sz = simd_maxsz(desc); 44 45 assert(opr_sz == 16); 46 clear_tail(vd, opr_sz, max_sz); 47 } 48 49 static const AESState aes_zero = { }; 50 51 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 52 { 53 intptr_t i, opr_sz = simd_oprsz(desc); 54 55 for (i = 0; i < opr_sz; i += 16) { 56 AESState *ad = (AESState *)(vd + i); 57 AESState *st = (AESState *)(vn + i); 58 AESState *rk = (AESState *)(vm + i); 59 AESState t; 60 61 /* 62 * Our uint64_t are in the wrong order for big-endian. 63 * The Arm AddRoundKey comes first, while the API AddRoundKey 64 * comes last: perform the xor here, and provide zero to API. 65 */ 66 if (HOST_BIG_ENDIAN) { 67 t.d[0] = st->d[1] ^ rk->d[1]; 68 t.d[1] = st->d[0] ^ rk->d[0]; 69 aesenc_SB_SR_AK(&t, &t, &aes_zero, false); 70 ad->d[0] = t.d[1]; 71 ad->d[1] = t.d[0]; 72 } else { 73 t.v = st->v ^ rk->v; 74 aesenc_SB_SR_AK(ad, &t, &aes_zero, false); 75 } 76 } 77 clear_tail(vd, opr_sz, simd_maxsz(desc)); 78 } 79 80 void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc) 81 { 82 intptr_t i, opr_sz = simd_oprsz(desc); 83 84 for (i = 0; i < opr_sz; i += 16) { 85 AESState *ad = (AESState *)(vd + i); 86 AESState *st = (AESState *)(vn + i); 87 AESState *rk = (AESState *)(vm + i); 88 AESState t; 89 90 /* Our uint64_t are in the wrong order for big-endian. */ 91 if (HOST_BIG_ENDIAN) { 92 t.d[0] = st->d[1] ^ rk->d[1]; 93 t.d[1] = st->d[0] ^ rk->d[0]; 94 aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false); 95 ad->d[0] = t.d[1]; 96 ad->d[1] = t.d[0]; 97 } else { 98 t.v = st->v ^ rk->v; 99 aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false); 100 } 101 } 102 clear_tail(vd, opr_sz, simd_maxsz(desc)); 103 } 104 105 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, const uint32_t *mc) 106 { 107 union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 108 int i; 109 110 for (i = 0; i < 16; i += 4) { 111 CR_ST_WORD(st, i >> 2) = 112 mc[CR_ST_BYTE(st, i)] ^ 113 rol32(mc[CR_ST_BYTE(st, i + 1)], 8) ^ 114 rol32(mc[CR_ST_BYTE(st, i + 2)], 16) ^ 115 rol32(mc[CR_ST_BYTE(st, i + 3)], 24); 116 } 117 118 rd[0] = st.l[0]; 119 rd[1] = st.l[1]; 120 } 121 122 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 123 { 124 intptr_t i, opr_sz = simd_oprsz(desc); 125 126 for (i = 0; i < opr_sz; i += 16) { 127 do_crypto_aesmc(vd + i, vm + i, AES_mc_rot); 128 } 129 clear_tail(vd, opr_sz, simd_maxsz(desc)); 130 } 131 132 void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc) 133 { 134 intptr_t i, opr_sz = simd_oprsz(desc); 135 136 for (i = 0; i < opr_sz; i += 16) { 137 do_crypto_aesmc(vd + i, vm + i, AES_imc_rot); 138 } 139 clear_tail(vd, opr_sz, simd_maxsz(desc)); 140 } 141 142 /* 143 * SHA-1 logical functions 144 */ 145 146 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 147 { 148 return (x & (y ^ z)) ^ z; 149 } 150 151 static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 152 { 153 return x ^ y ^ z; 154 } 155 156 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 157 { 158 return (x & y) | ((x | y) & z); 159 } 160 161 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) 162 { 163 uint64_t *d = vd, *n = vn, *m = vm; 164 uint64_t d0, d1; 165 166 d0 = d[1] ^ d[0] ^ m[0]; 167 d1 = n[0] ^ d[1] ^ m[1]; 168 d[0] = d0; 169 d[1] = d1; 170 171 clear_tail_16(vd, desc); 172 } 173 174 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, 175 uint64_t *rm, uint32_t desc, 176 uint32_t (*fn)(union CRYPTO_STATE *d)) 177 { 178 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 179 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 180 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 181 int i; 182 183 for (i = 0; i < 4; i++) { 184 uint32_t t = fn(&d); 185 186 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 187 + CR_ST_WORD(m, i); 188 189 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 190 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 191 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 192 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 193 CR_ST_WORD(d, 0) = t; 194 } 195 rd[0] = d.l[0]; 196 rd[1] = d.l[1]; 197 198 clear_tail_16(rd, desc); 199 } 200 201 static uint32_t do_sha1c(union CRYPTO_STATE *d) 202 { 203 return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 204 } 205 206 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) 207 { 208 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); 209 } 210 211 static uint32_t do_sha1p(union CRYPTO_STATE *d) 212 { 213 return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 214 } 215 216 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) 217 { 218 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); 219 } 220 221 static uint32_t do_sha1m(union CRYPTO_STATE *d) 222 { 223 return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 224 } 225 226 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) 227 { 228 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); 229 } 230 231 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 232 { 233 uint64_t *rd = vd; 234 uint64_t *rm = vm; 235 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 236 237 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 238 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 239 240 rd[0] = m.l[0]; 241 rd[1] = m.l[1]; 242 243 clear_tail_16(vd, desc); 244 } 245 246 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 247 { 248 uint64_t *rd = vd; 249 uint64_t *rm = vm; 250 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 251 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 252 253 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 254 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 255 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 256 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 257 258 rd[0] = d.l[0]; 259 rd[1] = d.l[1]; 260 261 clear_tail_16(vd, desc); 262 } 263 264 /* 265 * The SHA-256 logical functions, according to 266 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 267 */ 268 269 static uint32_t S0(uint32_t x) 270 { 271 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 272 } 273 274 static uint32_t S1(uint32_t x) 275 { 276 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 277 } 278 279 static uint32_t s0(uint32_t x) 280 { 281 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 282 } 283 284 static uint32_t s1(uint32_t x) 285 { 286 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 287 } 288 289 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 290 { 291 uint64_t *rd = vd; 292 uint64_t *rn = vn; 293 uint64_t *rm = vm; 294 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 295 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 296 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 297 int i; 298 299 for (i = 0; i < 4; i++) { 300 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 301 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 302 + CR_ST_WORD(m, i); 303 304 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 305 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 306 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 307 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 308 309 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 310 + S0(CR_ST_WORD(d, 0)); 311 312 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 313 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 314 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 315 CR_ST_WORD(d, 0) = t; 316 } 317 318 rd[0] = d.l[0]; 319 rd[1] = d.l[1]; 320 321 clear_tail_16(vd, desc); 322 } 323 324 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 325 { 326 uint64_t *rd = vd; 327 uint64_t *rn = vn; 328 uint64_t *rm = vm; 329 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 330 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 331 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 332 int i; 333 334 for (i = 0; i < 4; i++) { 335 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 336 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 337 + CR_ST_WORD(m, i); 338 339 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 340 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 341 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 342 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 343 } 344 345 rd[0] = d.l[0]; 346 rd[1] = d.l[1]; 347 348 clear_tail_16(vd, desc); 349 } 350 351 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 352 { 353 uint64_t *rd = vd; 354 uint64_t *rm = vm; 355 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 356 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 357 358 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 359 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 360 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 361 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 362 363 rd[0] = d.l[0]; 364 rd[1] = d.l[1]; 365 366 clear_tail_16(vd, desc); 367 } 368 369 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 370 { 371 uint64_t *rd = vd; 372 uint64_t *rn = vn; 373 uint64_t *rm = vm; 374 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 375 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 376 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 377 378 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 379 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 380 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 381 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 382 383 rd[0] = d.l[0]; 384 rd[1] = d.l[1]; 385 386 clear_tail_16(vd, desc); 387 } 388 389 /* 390 * The SHA-512 logical functions (same as above but using 64-bit operands) 391 */ 392 393 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 394 { 395 return (x & (y ^ z)) ^ z; 396 } 397 398 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 399 { 400 return (x & y) | ((x | y) & z); 401 } 402 403 static uint64_t S0_512(uint64_t x) 404 { 405 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 406 } 407 408 static uint64_t S1_512(uint64_t x) 409 { 410 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 411 } 412 413 static uint64_t s0_512(uint64_t x) 414 { 415 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 416 } 417 418 static uint64_t s1_512(uint64_t x) 419 { 420 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 421 } 422 423 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 424 { 425 uint64_t *rd = vd; 426 uint64_t *rn = vn; 427 uint64_t *rm = vm; 428 uint64_t d0 = rd[0]; 429 uint64_t d1 = rd[1]; 430 431 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 432 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 433 434 rd[0] = d0; 435 rd[1] = d1; 436 437 clear_tail_16(vd, desc); 438 } 439 440 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 441 { 442 uint64_t *rd = vd; 443 uint64_t *rn = vn; 444 uint64_t *rm = vm; 445 uint64_t d0 = rd[0]; 446 uint64_t d1 = rd[1]; 447 448 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 449 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 450 451 rd[0] = d0; 452 rd[1] = d1; 453 454 clear_tail_16(vd, desc); 455 } 456 457 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 458 { 459 uint64_t *rd = vd; 460 uint64_t *rn = vn; 461 uint64_t d0 = rd[0]; 462 uint64_t d1 = rd[1]; 463 464 d0 += s0_512(rd[1]); 465 d1 += s0_512(rn[0]); 466 467 rd[0] = d0; 468 rd[1] = d1; 469 470 clear_tail_16(vd, desc); 471 } 472 473 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 474 { 475 uint64_t *rd = vd; 476 uint64_t *rn = vn; 477 uint64_t *rm = vm; 478 479 rd[0] += s1_512(rn[0]) + rm[0]; 480 rd[1] += s1_512(rn[1]) + rm[1]; 481 482 clear_tail_16(vd, desc); 483 } 484 485 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 486 { 487 uint64_t *rd = vd; 488 uint64_t *rn = vn; 489 uint64_t *rm = vm; 490 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 491 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 492 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 493 uint32_t t; 494 495 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 496 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 497 498 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 499 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 500 501 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 502 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 503 504 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 505 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 506 507 rd[0] = d.l[0]; 508 rd[1] = d.l[1]; 509 510 clear_tail_16(vd, desc); 511 } 512 513 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 514 { 515 uint64_t *rd = vd; 516 uint64_t *rn = vn; 517 uint64_t *rm = vm; 518 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 519 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 520 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 521 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 522 523 CR_ST_WORD(d, 0) ^= t; 524 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 525 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 526 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 527 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 528 529 rd[0] = d.l[0]; 530 rd[1] = d.l[1]; 531 532 clear_tail_16(vd, desc); 533 } 534 535 static inline void QEMU_ALWAYS_INLINE 536 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, 537 uint32_t desc, uint32_t opcode) 538 { 539 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 540 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 541 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 542 uint32_t imm2 = simd_data(desc); 543 uint32_t t; 544 545 assert(imm2 < 4); 546 547 if (opcode == 0 || opcode == 2) { 548 /* SM3TT1A, SM3TT2A */ 549 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 550 } else if (opcode == 1) { 551 /* SM3TT1B */ 552 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 553 } else if (opcode == 3) { 554 /* SM3TT2B */ 555 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 556 } else { 557 qemu_build_not_reached(); 558 } 559 560 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 561 562 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 563 564 if (opcode < 2) { 565 /* SM3TT1A, SM3TT1B */ 566 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 567 568 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 569 } else { 570 /* SM3TT2A, SM3TT2B */ 571 t += CR_ST_WORD(n, 3); 572 t ^= rol32(t, 9) ^ rol32(t, 17); 573 574 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 575 } 576 577 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 578 CR_ST_WORD(d, 3) = t; 579 580 rd[0] = d.l[0]; 581 rd[1] = d.l[1]; 582 583 clear_tail_16(rd, desc); 584 } 585 586 #define DO_SM3TT(NAME, OPCODE) \ 587 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ 588 { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } 589 590 DO_SM3TT(crypto_sm3tt1a, 0) 591 DO_SM3TT(crypto_sm3tt1b, 1) 592 DO_SM3TT(crypto_sm3tt2a, 2) 593 DO_SM3TT(crypto_sm3tt2b, 3) 594 595 #undef DO_SM3TT 596 597 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 598 { 599 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 600 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 601 uint32_t t, i; 602 603 for (i = 0; i < 4; i++) { 604 t = CR_ST_WORD(d, (i + 1) % 4) ^ 605 CR_ST_WORD(d, (i + 2) % 4) ^ 606 CR_ST_WORD(d, (i + 3) % 4) ^ 607 CR_ST_WORD(n, i); 608 609 t = sm4_sbox[t & 0xff] | 610 sm4_sbox[(t >> 8) & 0xff] << 8 | 611 sm4_sbox[(t >> 16) & 0xff] << 16 | 612 sm4_sbox[(t >> 24) & 0xff] << 24; 613 614 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 615 rol32(t, 24); 616 } 617 618 rd[0] = d.l[0]; 619 rd[1] = d.l[1]; 620 } 621 622 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 623 { 624 intptr_t i, opr_sz = simd_oprsz(desc); 625 626 for (i = 0; i < opr_sz; i += 16) { 627 do_crypto_sm4e(vd + i, vn + i, vm + i); 628 } 629 clear_tail(vd, opr_sz, simd_maxsz(desc)); 630 } 631 632 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 633 { 634 union CRYPTO_STATE d; 635 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 636 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 637 uint32_t t, i; 638 639 d = n; 640 for (i = 0; i < 4; i++) { 641 t = CR_ST_WORD(d, (i + 1) % 4) ^ 642 CR_ST_WORD(d, (i + 2) % 4) ^ 643 CR_ST_WORD(d, (i + 3) % 4) ^ 644 CR_ST_WORD(m, i); 645 646 t = sm4_sbox[t & 0xff] | 647 sm4_sbox[(t >> 8) & 0xff] << 8 | 648 sm4_sbox[(t >> 16) & 0xff] << 16 | 649 sm4_sbox[(t >> 24) & 0xff] << 24; 650 651 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 652 } 653 654 rd[0] = d.l[0]; 655 rd[1] = d.l[1]; 656 } 657 658 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 659 { 660 intptr_t i, opr_sz = simd_oprsz(desc); 661 662 for (i = 0; i < opr_sz; i += 16) { 663 do_crypto_sm4ekey(vd + i, vn + i, vm + i); 664 } 665 clear_tail(vd, opr_sz, simd_maxsz(desc)); 666 } 667 668 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 669 { 670 intptr_t i, opr_sz = simd_oprsz(desc); 671 uint64_t *d = vd, *n = vn, *m = vm; 672 673 for (i = 0; i < opr_sz / 8; ++i) { 674 d[i] = n[i] ^ rol64(m[i], 1); 675 } 676 clear_tail(vd, opr_sz, simd_maxsz(desc)); 677 } 678