Lines Matching +full:y +full:- +full:rc
1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * sm3-neon-core.S - SM3 secure hash using NEON instructions
43 #define rc w5 macro
92 ror o, a, #(32 - n);
96 #define GG1_1(x, y, z, o, t) \ argument
97 eor o, x, y;
98 #define GG1_2(x, y, z, o, t) \ argument
100 #define GG1_3(x, y, z, o, t) argument
102 #define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t) argument
103 #define FF1_2(x, y, z, o, t) argument
104 #define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t) argument
106 #define GG2_1(x, y, z, o, t) \ argument
108 #define GG2_2(x, y, z, o, t) \ argument
109 and t, y, x;
110 #define GG2_3(x, y, z, o, t) \ argument
113 #define FF2_1(x, y, z, o, t) \ argument
114 eor o, x, y;
115 #define FF2_2(x, y, z, o, t) \ argument
116 and t, x, y; \
118 #define FF2_3(x, y, z, o, t) \ argument
148 eor h, t3, t3, ror #(32-9); \
153 eor h, h, t3, ror #(32-17); /* P0(t3) => h */
166 /* Byte-swapped input address. */
172 (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))
174 /* Rounds 1-12, byte-swapped input block addresses. */
178 /* Rounds 1-12, expanded input block addresses. */
183 * Interleaving within round function needed for in-order CPUs. */
243 * Interleaving within round function needed for in-order CPUs. */
245 /* Load (w[i - 16]) => XTMP0 */ \
246 /* Load (w[i - 13]) => XTMP5 */ \
255 /* w[i - 9] == w3 */ \
259 /* w[i - 3] == w5 */ \
267 sri XTMP2.4s, w5.4s, #(32-15);
269 sri XTMP1.4s, XTMP5.4s, #(32-7);
273 /* w[i - 6] == W4 */ \
284 sri XTMP3.4s, XTMP0.4s, #(32-15);
286 sri XTMP4.4s, XTMP0.4s, #(32-23);
290 /* Load (w[i - 3]) => XTMP2 */ \
301 st1 {XTMP2.16b-XTMP3.16b}, [addr0];
348 * Transform blocks*64 bytes (blocks*16 32-bit words) at 'src'.
357 ldp rc, rd, [RSTATE, #8]
361 stp x28, x29, [sp, #-16]!
362 stp x19, x20, [sp, #-16]!
363 stp x21, x22, [sp, #-16]!
364 stp x23, x24, [sp, #-16]!
365 stp x25, x26, [sp, #-16]!
400 /* Transform 0-3 */
401 R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0)
402 R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 1, 1, IW, _, 0)
403 R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0)
404 R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 3, 3, IW, _, 0)
406 /* Transform 4-7 + Precalc 12-14 */
407 R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0)
408 R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 5, 1, IW, _, 0)
409 R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, SCHED_W_W0W1W2W3W4W5_1, 12)
410 R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 7, 3, IW, SCHED_W_W0W1W2W3W4W5_2, 12)
412 /* Transform 8-11 + Precalc 12-17 */
413 R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, SCHED_W_W0W1W2W3W4W5_3, 12)
414 R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 9, 1, IW, SCHED_W_W1W2W3W4W5W0_1, 15)
415 R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, SCHED_W_W1W2W3W4W5W0_2, 15)
416 R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 11, 3, IW, SCHED_W_W1W2W3W4W5W0_3, 15)
418 /* Transform 12-14 + Precalc 18-20 */
419 R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 18)
420 R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 13, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 18)
421 R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 18)
423 /* Transform 15-17 + Precalc 21-23 */
424 R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 15, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 21)
425 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 21)
426 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 17, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 21)
428 /* Transform 18-20 + Precalc 24-26 */
429 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 24)
430 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 19, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 24)
431 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 24)
433 /* Transform 21-23 + Precalc 27-29 */
434 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 21, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 27)
435 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 27)
436 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 23, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 27)
438 /* Transform 24-26 + Precalc 30-32 */
439 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 30)
440 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 25, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 30)
441 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 30)
443 /* Transform 27-29 + Precalc 33-35 */
444 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 27, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 33)
445 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 33)
446 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 29, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 33)
448 /* Transform 30-32 + Precalc 36-38 */
449 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 36)
450 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 31, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 36)
451 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36)
453 /* Transform 33-35 + Precalc 39-41 */
454 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 33, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 39)
455 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 39)
456 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 35, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 39)
458 /* Transform 36-38 + Precalc 42-44 */
459 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 42)
460 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 37, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 42)
461 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 42)
463 /* Transform 39-41 + Precalc 45-47 */
464 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 39, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 45)
465 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 45)
466 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 41, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 45)
468 /* Transform 42-44 + Precalc 48-50 */
469 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 48)
470 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 43, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 48)
471 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 48)
473 /* Transform 45-47 + Precalc 51-53 */
474 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 45, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 51)
475 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 51)
476 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 47, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 51)
478 /* Transform 48-50 + Precalc 54-56 */
479 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 54)
480 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 49, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 54)
481 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 54)
483 /* Transform 51-53 + Precalc 57-59 */
484 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 51, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 57)
485 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 57)
486 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 53, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 57)
488 /* Transform 54-56 + Precalc 60-62 */
489 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 60)
490 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 55, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 60)
491 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 60)
493 /* Transform 57-59 + Precalc 63 */
494 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 57, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 63)
495 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 63)
496 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 59, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 63)
499 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _)
503 /* Transform 61-63 + Preload next block */
504 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, LOAD_W_VEC_1, _)
506 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _)
508 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, LOAD_W_VEC_3, _)
514 eor rc, rc, s2
520 stp rc, rd, [RSTATE, #8]
528 /* Transform 61-63 */
529 R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, _, _)
531 R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _)
533 R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, _, _)
542 eor rc, rc, s2
554 stp rc, rd, [RSTATE, #8]
566 st1 {W0.16b-W3.16b}, [addr0], #64
567 st1 {W0.16b-W3.16b}, [addr0], #64
568 st1 {W0.16b-W3.16b}, [addr0]