1 /* 2 * crypto_helper.c - emulate v8 Crypto Extensions instructions 3 * 4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 */ 11 12 #include "qemu/osdep.h" 13 14 #include "cpu.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 #include "crypto/aes.h" 18 #include "crypto/sm4.h" 19 #include "vec_internal.h" 20 21 union CRYPTO_STATE { 22 uint8_t bytes[16]; 23 uint32_t words[4]; 24 uint64_t l[2]; 25 }; 26 27 #if HOST_BIG_ENDIAN 28 #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) 29 #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) 30 #else 31 #define CR_ST_BYTE(state, i) ((state).bytes[i]) 32 #define CR_ST_WORD(state, i) ((state).words[i]) 33 #endif 34 35 /* 36 * The caller has not been converted to full gvec, and so only 37 * modifies the low 16 bytes of the vector register. 38 */ 39 static void clear_tail_16(void *vd, uint32_t desc) 40 { 41 int opr_sz = simd_oprsz(desc); 42 int max_sz = simd_maxsz(desc); 43 44 assert(opr_sz == 16); 45 clear_tail(vd, opr_sz, max_sz); 46 } 47 48 static void do_crypto_aese(uint64_t *rd, uint64_t *rn, uint64_t *rm, 49 const uint8_t *sbox, const uint8_t *shift) 50 { 51 union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; 52 union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; 53 int i; 54 55 /* xor state vector with round key */ 56 rk.l[0] ^= st.l[0]; 57 rk.l[1] ^= st.l[1]; 58 59 /* combine ShiftRows operation and sbox substitution */ 60 for (i = 0; i < 16; i++) { 61 CR_ST_BYTE(st, i) = sbox[CR_ST_BYTE(rk, shift[i])]; 62 } 63 64 rd[0] = st.l[0]; 65 rd[1] = st.l[1]; 66 } 67 68 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 69 { 70 intptr_t i, opr_sz = simd_oprsz(desc); 71 72 for (i = 0; i < opr_sz; i += 16) { 73 do_crypto_aese(vd + i, vn + i, vm + i, AES_sbox, AES_shifts); 74 } 75 clear_tail(vd, opr_sz, simd_maxsz(desc)); 76 } 77 78 void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc) 79 { 80 intptr_t i, opr_sz = simd_oprsz(desc); 81 82 for (i = 0; i < opr_sz; i += 16) { 83 do_crypto_aese(vd + i, vn + i, vm + i, AES_isbox, AES_ishifts); 84 } 85 clear_tail(vd, opr_sz, simd_maxsz(desc)); 86 } 87 88 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, const uint32_t *mc) 89 { 90 union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 91 int i; 92 93 for (i = 0; i < 16; i += 4) { 94 CR_ST_WORD(st, i >> 2) = 95 mc[CR_ST_BYTE(st, i)] ^ 96 rol32(mc[CR_ST_BYTE(st, i + 1)], 8) ^ 97 rol32(mc[CR_ST_BYTE(st, i + 2)], 16) ^ 98 rol32(mc[CR_ST_BYTE(st, i + 3)], 24); 99 } 100 101 rd[0] = st.l[0]; 102 rd[1] = st.l[1]; 103 } 104 105 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 106 { 107 intptr_t i, opr_sz = simd_oprsz(desc); 108 109 for (i = 0; i < opr_sz; i += 16) { 110 do_crypto_aesmc(vd + i, vm + i, AES_mc_rot); 111 } 112 clear_tail(vd, opr_sz, simd_maxsz(desc)); 113 } 114 115 void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc) 116 { 117 intptr_t i, opr_sz = simd_oprsz(desc); 118 119 for (i = 0; i < opr_sz; i += 16) { 120 do_crypto_aesmc(vd + i, vm + i, AES_imc_rot); 121 } 122 clear_tail(vd, opr_sz, simd_maxsz(desc)); 123 } 124 125 /* 126 * SHA-1 logical functions 127 */ 128 129 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 130 { 131 return (x & (y ^ z)) ^ z; 132 } 133 134 static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 135 { 136 return x ^ y ^ z; 137 } 138 139 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 140 { 141 return (x & y) | ((x | y) & z); 142 } 143 144 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) 145 { 146 uint64_t *d = vd, *n = vn, *m = vm; 147 uint64_t d0, d1; 148 149 d0 = d[1] ^ d[0] ^ m[0]; 150 d1 = n[0] ^ d[1] ^ m[1]; 151 d[0] = d0; 152 d[1] = d1; 153 154 clear_tail_16(vd, desc); 155 } 156 157 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, 158 uint64_t *rm, uint32_t desc, 159 uint32_t (*fn)(union CRYPTO_STATE *d)) 160 { 161 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 162 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 163 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 164 int i; 165 166 for (i = 0; i < 4; i++) { 167 uint32_t t = fn(&d); 168 169 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 170 + CR_ST_WORD(m, i); 171 172 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 173 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 174 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 175 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 176 CR_ST_WORD(d, 0) = t; 177 } 178 rd[0] = d.l[0]; 179 rd[1] = d.l[1]; 180 181 clear_tail_16(rd, desc); 182 } 183 184 static uint32_t do_sha1c(union CRYPTO_STATE *d) 185 { 186 return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 187 } 188 189 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) 190 { 191 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); 192 } 193 194 static uint32_t do_sha1p(union CRYPTO_STATE *d) 195 { 196 return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 197 } 198 199 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) 200 { 201 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); 202 } 203 204 static uint32_t do_sha1m(union CRYPTO_STATE *d) 205 { 206 return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 207 } 208 209 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) 210 { 211 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); 212 } 213 214 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 215 { 216 uint64_t *rd = vd; 217 uint64_t *rm = vm; 218 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 219 220 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 221 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 222 223 rd[0] = m.l[0]; 224 rd[1] = m.l[1]; 225 226 clear_tail_16(vd, desc); 227 } 228 229 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 230 { 231 uint64_t *rd = vd; 232 uint64_t *rm = vm; 233 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 234 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 235 236 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 237 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 238 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 239 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 240 241 rd[0] = d.l[0]; 242 rd[1] = d.l[1]; 243 244 clear_tail_16(vd, desc); 245 } 246 247 /* 248 * The SHA-256 logical functions, according to 249 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 250 */ 251 252 static uint32_t S0(uint32_t x) 253 { 254 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 255 } 256 257 static uint32_t S1(uint32_t x) 258 { 259 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 260 } 261 262 static uint32_t s0(uint32_t x) 263 { 264 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 265 } 266 267 static uint32_t s1(uint32_t x) 268 { 269 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 270 } 271 272 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 273 { 274 uint64_t *rd = vd; 275 uint64_t *rn = vn; 276 uint64_t *rm = vm; 277 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 278 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 279 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 280 int i; 281 282 for (i = 0; i < 4; i++) { 283 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 284 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 285 + CR_ST_WORD(m, i); 286 287 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 288 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 289 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 290 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 291 292 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 293 + S0(CR_ST_WORD(d, 0)); 294 295 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 296 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 297 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 298 CR_ST_WORD(d, 0) = t; 299 } 300 301 rd[0] = d.l[0]; 302 rd[1] = d.l[1]; 303 304 clear_tail_16(vd, desc); 305 } 306 307 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 308 { 309 uint64_t *rd = vd; 310 uint64_t *rn = vn; 311 uint64_t *rm = vm; 312 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 313 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 314 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 315 int i; 316 317 for (i = 0; i < 4; i++) { 318 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 319 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 320 + CR_ST_WORD(m, i); 321 322 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 323 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 324 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 325 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 326 } 327 328 rd[0] = d.l[0]; 329 rd[1] = d.l[1]; 330 331 clear_tail_16(vd, desc); 332 } 333 334 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 335 { 336 uint64_t *rd = vd; 337 uint64_t *rm = vm; 338 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 339 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 340 341 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 342 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 343 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 344 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 345 346 rd[0] = d.l[0]; 347 rd[1] = d.l[1]; 348 349 clear_tail_16(vd, desc); 350 } 351 352 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 353 { 354 uint64_t *rd = vd; 355 uint64_t *rn = vn; 356 uint64_t *rm = vm; 357 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 358 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 359 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 360 361 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 362 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 363 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 364 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 365 366 rd[0] = d.l[0]; 367 rd[1] = d.l[1]; 368 369 clear_tail_16(vd, desc); 370 } 371 372 /* 373 * The SHA-512 logical functions (same as above but using 64-bit operands) 374 */ 375 376 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 377 { 378 return (x & (y ^ z)) ^ z; 379 } 380 381 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 382 { 383 return (x & y) | ((x | y) & z); 384 } 385 386 static uint64_t S0_512(uint64_t x) 387 { 388 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 389 } 390 391 static uint64_t S1_512(uint64_t x) 392 { 393 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 394 } 395 396 static uint64_t s0_512(uint64_t x) 397 { 398 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 399 } 400 401 static uint64_t s1_512(uint64_t x) 402 { 403 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 404 } 405 406 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 407 { 408 uint64_t *rd = vd; 409 uint64_t *rn = vn; 410 uint64_t *rm = vm; 411 uint64_t d0 = rd[0]; 412 uint64_t d1 = rd[1]; 413 414 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 415 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 416 417 rd[0] = d0; 418 rd[1] = d1; 419 420 clear_tail_16(vd, desc); 421 } 422 423 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 424 { 425 uint64_t *rd = vd; 426 uint64_t *rn = vn; 427 uint64_t *rm = vm; 428 uint64_t d0 = rd[0]; 429 uint64_t d1 = rd[1]; 430 431 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 432 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 433 434 rd[0] = d0; 435 rd[1] = d1; 436 437 clear_tail_16(vd, desc); 438 } 439 440 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 441 { 442 uint64_t *rd = vd; 443 uint64_t *rn = vn; 444 uint64_t d0 = rd[0]; 445 uint64_t d1 = rd[1]; 446 447 d0 += s0_512(rd[1]); 448 d1 += s0_512(rn[0]); 449 450 rd[0] = d0; 451 rd[1] = d1; 452 453 clear_tail_16(vd, desc); 454 } 455 456 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 457 { 458 uint64_t *rd = vd; 459 uint64_t *rn = vn; 460 uint64_t *rm = vm; 461 462 rd[0] += s1_512(rn[0]) + rm[0]; 463 rd[1] += s1_512(rn[1]) + rm[1]; 464 465 clear_tail_16(vd, desc); 466 } 467 468 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 469 { 470 uint64_t *rd = vd; 471 uint64_t *rn = vn; 472 uint64_t *rm = vm; 473 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 474 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 475 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 476 uint32_t t; 477 478 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 479 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 480 481 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 482 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 483 484 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 485 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 486 487 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 488 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 489 490 rd[0] = d.l[0]; 491 rd[1] = d.l[1]; 492 493 clear_tail_16(vd, desc); 494 } 495 496 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 497 { 498 uint64_t *rd = vd; 499 uint64_t *rn = vn; 500 uint64_t *rm = vm; 501 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 502 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 503 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 504 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 505 506 CR_ST_WORD(d, 0) ^= t; 507 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 508 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 509 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 510 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 511 512 rd[0] = d.l[0]; 513 rd[1] = d.l[1]; 514 515 clear_tail_16(vd, desc); 516 } 517 518 static inline void QEMU_ALWAYS_INLINE 519 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, 520 uint32_t desc, uint32_t opcode) 521 { 522 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 523 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 524 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 525 uint32_t imm2 = simd_data(desc); 526 uint32_t t; 527 528 assert(imm2 < 4); 529 530 if (opcode == 0 || opcode == 2) { 531 /* SM3TT1A, SM3TT2A */ 532 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 533 } else if (opcode == 1) { 534 /* SM3TT1B */ 535 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 536 } else if (opcode == 3) { 537 /* SM3TT2B */ 538 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 539 } else { 540 qemu_build_not_reached(); 541 } 542 543 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 544 545 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 546 547 if (opcode < 2) { 548 /* SM3TT1A, SM3TT1B */ 549 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 550 551 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 552 } else { 553 /* SM3TT2A, SM3TT2B */ 554 t += CR_ST_WORD(n, 3); 555 t ^= rol32(t, 9) ^ rol32(t, 17); 556 557 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 558 } 559 560 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 561 CR_ST_WORD(d, 3) = t; 562 563 rd[0] = d.l[0]; 564 rd[1] = d.l[1]; 565 566 clear_tail_16(rd, desc); 567 } 568 569 #define DO_SM3TT(NAME, OPCODE) \ 570 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ 571 { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } 572 573 DO_SM3TT(crypto_sm3tt1a, 0) 574 DO_SM3TT(crypto_sm3tt1b, 1) 575 DO_SM3TT(crypto_sm3tt2a, 2) 576 DO_SM3TT(crypto_sm3tt2b, 3) 577 578 #undef DO_SM3TT 579 580 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 581 { 582 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 583 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 584 uint32_t t, i; 585 586 for (i = 0; i < 4; i++) { 587 t = CR_ST_WORD(d, (i + 1) % 4) ^ 588 CR_ST_WORD(d, (i + 2) % 4) ^ 589 CR_ST_WORD(d, (i + 3) % 4) ^ 590 CR_ST_WORD(n, i); 591 592 t = sm4_sbox[t & 0xff] | 593 sm4_sbox[(t >> 8) & 0xff] << 8 | 594 sm4_sbox[(t >> 16) & 0xff] << 16 | 595 sm4_sbox[(t >> 24) & 0xff] << 24; 596 597 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 598 rol32(t, 24); 599 } 600 601 rd[0] = d.l[0]; 602 rd[1] = d.l[1]; 603 } 604 605 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 606 { 607 intptr_t i, opr_sz = simd_oprsz(desc); 608 609 for (i = 0; i < opr_sz; i += 16) { 610 do_crypto_sm4e(vd + i, vn + i, vm + i); 611 } 612 clear_tail(vd, opr_sz, simd_maxsz(desc)); 613 } 614 615 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 616 { 617 union CRYPTO_STATE d; 618 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 619 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 620 uint32_t t, i; 621 622 d = n; 623 for (i = 0; i < 4; i++) { 624 t = CR_ST_WORD(d, (i + 1) % 4) ^ 625 CR_ST_WORD(d, (i + 2) % 4) ^ 626 CR_ST_WORD(d, (i + 3) % 4) ^ 627 CR_ST_WORD(m, i); 628 629 t = sm4_sbox[t & 0xff] | 630 sm4_sbox[(t >> 8) & 0xff] << 8 | 631 sm4_sbox[(t >> 16) & 0xff] << 16 | 632 sm4_sbox[(t >> 24) & 0xff] << 24; 633 634 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 635 } 636 637 rd[0] = d.l[0]; 638 rd[1] = d.l[1]; 639 } 640 641 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 642 { 643 intptr_t i, opr_sz = simd_oprsz(desc); 644 645 for (i = 0; i < opr_sz; i += 16) { 646 do_crypto_sm4ekey(vd + i, vn + i, vm + i); 647 } 648 clear_tail(vd, opr_sz, simd_maxsz(desc)); 649 } 650 651 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 652 { 653 intptr_t i, opr_sz = simd_oprsz(desc); 654 uint64_t *d = vd, *n = vn, *m = vm; 655 656 for (i = 0; i < opr_sz / 8; ++i) { 657 d[i] = n[i] ^ rol64(m[i], 1); 658 } 659 clear_tail(vd, opr_sz, simd_maxsz(desc)); 660 } 661