Lines Matching +full:- +full:n
1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * RAID-6 syndrome calculation using RISC-V vector instructions
9 * Copyright 2002-2004 H. Peter Anvin
33 z0 = disks - 3; /* Highest data disk */
37 asm volatile (".option push\n"
38 ".option arch,+v\n"
39 "vsetvli %0, x0, e8, m1, ta, ma\n"
40 ".option pop\n"
47 asm volatile (".option push\n"
48 ".option arch,+v\n"
49 "vle8.v v0, (%[wp0])\n"
50 "vle8.v v1, (%[wp0])\n"
51 ".option pop\n"
56 for (z = z0 - 1 ; z >= 0 ; z--) {
66 asm volatile (".option push\n"
67 ".option arch,+v\n"
68 "vsra.vi v2, v1, 7\n"
69 "vsll.vi v3, v1, 1\n"
70 "vand.vx v2, v2, %[x1d]\n"
71 "vxor.vv v3, v3, v2\n"
72 "vle8.v v2, (%[wd0])\n"
73 "vxor.vv v1, v3, v2\n"
74 "vxor.vv v0, v0, v2\n"
75 ".option pop\n"
86 asm volatile (".option push\n"
87 ".option arch,+v\n"
88 "vse8.v v0, (%[wp0])\n"
89 "vse8.v v1, (%[wq0])\n"
90 ".option pop\n"
107 p = dptr[disks - 2]; /* XOR parity */
108 q = dptr[disks - 1]; /* RS syndrome */
110 asm volatile (".option push\n"
111 ".option arch,+v\n"
112 "vsetvli %0, x0, e8, m1, ta, ma\n"
113 ".option pop\n"
120 asm volatile (".option push\n"
121 ".option arch,+v\n"
122 "vle8.v v0, (%[wp0])\n"
123 "vle8.v v1, (%[wp0])\n"
124 ".option pop\n"
130 for (z = z0 - 1; z >= start; z--) {
140 asm volatile (".option push\n"
141 ".option arch,+v\n"
142 "vsra.vi v2, v1, 7\n"
143 "vsll.vi v3, v1, 1\n"
144 "vand.vx v2, v2, %[x1d]\n"
145 "vxor.vv v3, v3, v2\n"
146 "vle8.v v2, (%[wd0])\n"
147 "vxor.vv v1, v3, v2\n"
148 "vxor.vv v0, v0, v2\n"
149 ".option pop\n"
157 for (z = start - 1; z >= 0; z--) {
164 asm volatile (".option push\n"
165 ".option arch,+v\n"
166 "vsra.vi v2, v1, 7\n"
167 "vsll.vi v3, v1, 1\n"
168 "vand.vx v2, v2, %[x1d]\n"
169 "vxor.vv v1, v3, v2\n"
170 ".option pop\n"
181 asm volatile (".option push\n"
182 ".option arch,+v\n"
183 "vle8.v v2, (%[wp0])\n"
184 "vle8.v v3, (%[wq0])\n"
185 "vxor.vv v2, v2, v0\n"
186 "vxor.vv v3, v3, v1\n"
187 "vse8.v v2, (%[wp0])\n"
188 "vse8.v v3, (%[wq0])\n"
189 ".option pop\n"
204 z0 = disks - 3; /* Highest data disk */
208 asm volatile (".option push\n"
209 ".option arch,+v\n"
210 "vsetvli %0, x0, e8, m1, ta, ma\n"
211 ".option pop\n"
221 asm volatile (".option push\n"
222 ".option arch,+v\n"
223 "vle8.v v0, (%[wp0])\n"
224 "vle8.v v1, (%[wp0])\n"
225 "vle8.v v4, (%[wp1])\n"
226 "vle8.v v5, (%[wp1])\n"
227 ".option pop\n"
233 for (z = z0 - 1; z >= 0; z--) {
243 asm volatile (".option push\n"
244 ".option arch,+v\n"
245 "vsra.vi v2, v1, 7\n"
246 "vsll.vi v3, v1, 1\n"
247 "vand.vx v2, v2, %[x1d]\n"
248 "vxor.vv v3, v3, v2\n"
249 "vle8.v v2, (%[wd0])\n"
250 "vxor.vv v1, v3, v2\n"
251 "vxor.vv v0, v0, v2\n"
253 "vsra.vi v6, v5, 7\n"
254 "vsll.vi v7, v5, 1\n"
255 "vand.vx v6, v6, %[x1d]\n"
256 "vxor.vv v7, v7, v6\n"
257 "vle8.v v6, (%[wd1])\n"
258 "vxor.vv v5, v7, v6\n"
259 "vxor.vv v4, v4, v6\n"
260 ".option pop\n"
272 asm volatile (".option push\n"
273 ".option arch,+v\n"
274 "vse8.v v0, (%[wp0])\n"
275 "vse8.v v1, (%[wq0])\n"
276 "vse8.v v4, (%[wp1])\n"
277 "vse8.v v5, (%[wq1])\n"
278 ".option pop\n"
297 p = dptr[disks - 2]; /* XOR parity */
298 q = dptr[disks - 1]; /* RS syndrome */
300 asm volatile (".option push\n"
301 ".option arch,+v\n"
302 "vsetvli %0, x0, e8, m1, ta, ma\n"
303 ".option pop\n"
313 asm volatile (".option push\n"
314 ".option arch,+v\n"
315 "vle8.v v0, (%[wp0])\n"
316 "vle8.v v1, (%[wp0])\n"
317 "vle8.v v4, (%[wp1])\n"
318 "vle8.v v5, (%[wp1])\n"
319 ".option pop\n"
326 for (z = z0 - 1; z >= start; z--) {
336 asm volatile (".option push\n"
337 ".option arch,+v\n"
338 "vsra.vi v2, v1, 7\n"
339 "vsll.vi v3, v1, 1\n"
340 "vand.vx v2, v2, %[x1d]\n"
341 "vxor.vv v3, v3, v2\n"
342 "vle8.v v2, (%[wd0])\n"
343 "vxor.vv v1, v3, v2\n"
344 "vxor.vv v0, v0, v2\n"
346 "vsra.vi v6, v5, 7\n"
347 "vsll.vi v7, v5, 1\n"
348 "vand.vx v6, v6, %[x1d]\n"
349 "vxor.vv v7, v7, v6\n"
350 "vle8.v v6, (%[wd1])\n"
351 "vxor.vv v5, v7, v6\n"
352 "vxor.vv v4, v4, v6\n"
353 ".option pop\n"
362 for (z = start - 1; z >= 0; z--) {
369 asm volatile (".option push\n"
370 ".option arch,+v\n"
371 "vsra.vi v2, v1, 7\n"
372 "vsll.vi v3, v1, 1\n"
373 "vand.vx v2, v2, %[x1d]\n"
374 "vxor.vv v1, v3, v2\n"
376 "vsra.vi v6, v5, 7\n"
377 "vsll.vi v7, v5, 1\n"
378 "vand.vx v6, v6, %[x1d]\n"
379 "vxor.vv v5, v7, v6\n"
380 ".option pop\n"
392 asm volatile (".option push\n"
393 ".option arch,+v\n"
394 "vle8.v v2, (%[wp0])\n"
395 "vle8.v v3, (%[wq0])\n"
396 "vxor.vv v2, v2, v0\n"
397 "vxor.vv v3, v3, v1\n"
398 "vse8.v v2, (%[wp0])\n"
399 "vse8.v v3, (%[wq0])\n"
401 "vle8.v v6, (%[wp1])\n"
402 "vle8.v v7, (%[wq1])\n"
403 "vxor.vv v6, v6, v4\n"
404 "vxor.vv v7, v7, v5\n"
405 "vse8.v v6, (%[wp1])\n"
406 "vse8.v v7, (%[wq1])\n"
407 ".option pop\n"
424 z0 = disks - 3; /* Highest data disk */
428 asm volatile (".option push\n"
429 ".option arch,+v\n"
430 "vsetvli %0, x0, e8, m1, ta, ma\n"
431 ".option pop\n"
443 asm volatile (".option push\n"
444 ".option arch,+v\n"
445 "vle8.v v0, (%[wp0])\n"
446 "vle8.v v1, (%[wp0])\n"
447 "vle8.v v4, (%[wp1])\n"
448 "vle8.v v5, (%[wp1])\n"
449 "vle8.v v8, (%[wp2])\n"
450 "vle8.v v9, (%[wp2])\n"
451 "vle8.v v12, (%[wp3])\n"
452 "vle8.v v13, (%[wp3])\n"
453 ".option pop\n"
461 for (z = z0 - 1; z >= 0; z--) {
471 asm volatile (".option push\n"
472 ".option arch,+v\n"
473 "vsra.vi v2, v1, 7\n"
474 "vsll.vi v3, v1, 1\n"
475 "vand.vx v2, v2, %[x1d]\n"
476 "vxor.vv v3, v3, v2\n"
477 "vle8.v v2, (%[wd0])\n"
478 "vxor.vv v1, v3, v2\n"
479 "vxor.vv v0, v0, v2\n"
481 "vsra.vi v6, v5, 7\n"
482 "vsll.vi v7, v5, 1\n"
483 "vand.vx v6, v6, %[x1d]\n"
484 "vxor.vv v7, v7, v6\n"
485 "vle8.v v6, (%[wd1])\n"
486 "vxor.vv v5, v7, v6\n"
487 "vxor.vv v4, v4, v6\n"
489 "vsra.vi v10, v9, 7\n"
490 "vsll.vi v11, v9, 1\n"
491 "vand.vx v10, v10, %[x1d]\n"
492 "vxor.vv v11, v11, v10\n"
493 "vle8.v v10, (%[wd2])\n"
494 "vxor.vv v9, v11, v10\n"
495 "vxor.vv v8, v8, v10\n"
497 "vsra.vi v14, v13, 7\n"
498 "vsll.vi v15, v13, 1\n"
499 "vand.vx v14, v14, %[x1d]\n"
500 "vxor.vv v15, v15, v14\n"
501 "vle8.v v14, (%[wd3])\n"
502 "vxor.vv v13, v15, v14\n"
503 "vxor.vv v12, v12, v14\n"
504 ".option pop\n"
518 asm volatile (".option push\n"
519 ".option arch,+v\n"
520 "vse8.v v0, (%[wp0])\n"
521 "vse8.v v1, (%[wq0])\n"
522 "vse8.v v4, (%[wp1])\n"
523 "vse8.v v5, (%[wq1])\n"
524 "vse8.v v8, (%[wp2])\n"
525 "vse8.v v9, (%[wq2])\n"
526 "vse8.v v12, (%[wp3])\n"
527 "vse8.v v13, (%[wq3])\n"
528 ".option pop\n"
551 p = dptr[disks - 2]; /* XOR parity */
552 q = dptr[disks - 1]; /* RS syndrome */
554 asm volatile (".option push\n"
555 ".option arch,+v\n"
556 "vsetvli %0, x0, e8, m1, ta, ma\n"
557 ".option pop\n"
569 asm volatile (".option push\n"
570 ".option arch,+v\n"
571 "vle8.v v0, (%[wp0])\n"
572 "vle8.v v1, (%[wp0])\n"
573 "vle8.v v4, (%[wp1])\n"
574 "vle8.v v5, (%[wp1])\n"
575 "vle8.v v8, (%[wp2])\n"
576 "vle8.v v9, (%[wp2])\n"
577 "vle8.v v12, (%[wp3])\n"
578 "vle8.v v13, (%[wp3])\n"
579 ".option pop\n"
588 for (z = z0 - 1; z >= start; z--) {
598 asm volatile (".option push\n"
599 ".option arch,+v\n"
600 "vsra.vi v2, v1, 7\n"
601 "vsll.vi v3, v1, 1\n"
602 "vand.vx v2, v2, %[x1d]\n"
603 "vxor.vv v3, v3, v2\n"
604 "vle8.v v2, (%[wd0])\n"
605 "vxor.vv v1, v3, v2\n"
606 "vxor.vv v0, v0, v2\n"
608 "vsra.vi v6, v5, 7\n"
609 "vsll.vi v7, v5, 1\n"
610 "vand.vx v6, v6, %[x1d]\n"
611 "vxor.vv v7, v7, v6\n"
612 "vle8.v v6, (%[wd1])\n"
613 "vxor.vv v5, v7, v6\n"
614 "vxor.vv v4, v4, v6\n"
616 "vsra.vi v10, v9, 7\n"
617 "vsll.vi v11, v9, 1\n"
618 "vand.vx v10, v10, %[x1d]\n"
619 "vxor.vv v11, v11, v10\n"
620 "vle8.v v10, (%[wd2])\n"
621 "vxor.vv v9, v11, v10\n"
622 "vxor.vv v8, v8, v10\n"
624 "vsra.vi v14, v13, 7\n"
625 "vsll.vi v15, v13, 1\n"
626 "vand.vx v14, v14, %[x1d]\n"
627 "vxor.vv v15, v15, v14\n"
628 "vle8.v v14, (%[wd3])\n"
629 "vxor.vv v13, v15, v14\n"
630 "vxor.vv v12, v12, v14\n"
631 ".option pop\n"
642 for (z = start - 1; z >= 0; z--) {
649 asm volatile (".option push\n"
650 ".option arch,+v\n"
651 "vsra.vi v2, v1, 7\n"
652 "vsll.vi v3, v1, 1\n"
653 "vand.vx v2, v2, %[x1d]\n"
654 "vxor.vv v1, v3, v2\n"
656 "vsra.vi v6, v5, 7\n"
657 "vsll.vi v7, v5, 1\n"
658 "vand.vx v6, v6, %[x1d]\n"
659 "vxor.vv v5, v7, v6\n"
661 "vsra.vi v10, v9, 7\n"
662 "vsll.vi v11, v9, 1\n"
663 "vand.vx v10, v10, %[x1d]\n"
664 "vxor.vv v9, v11, v10\n"
666 "vsra.vi v14, v13, 7\n"
667 "vsll.vi v15, v13, 1\n"
668 "vand.vx v14, v14, %[x1d]\n"
669 "vxor.vv v13, v15, v14\n"
670 ".option pop\n"
684 asm volatile (".option push\n"
685 ".option arch,+v\n"
686 "vle8.v v2, (%[wp0])\n"
687 "vle8.v v3, (%[wq0])\n"
688 "vxor.vv v2, v2, v0\n"
689 "vxor.vv v3, v3, v1\n"
690 "vse8.v v2, (%[wp0])\n"
691 "vse8.v v3, (%[wq0])\n"
693 "vle8.v v6, (%[wp1])\n"
694 "vle8.v v7, (%[wq1])\n"
695 "vxor.vv v6, v6, v4\n"
696 "vxor.vv v7, v7, v5\n"
697 "vse8.v v6, (%[wp1])\n"
698 "vse8.v v7, (%[wq1])\n"
700 "vle8.v v10, (%[wp2])\n"
701 "vle8.v v11, (%[wq2])\n"
702 "vxor.vv v10, v10, v8\n"
703 "vxor.vv v11, v11, v9\n"
704 "vse8.v v10, (%[wp2])\n"
705 "vse8.v v11, (%[wq2])\n"
707 "vle8.v v14, (%[wp3])\n"
708 "vle8.v v15, (%[wq3])\n"
709 "vxor.vv v14, v14, v12\n"
710 "vxor.vv v15, v15, v13\n"
711 "vse8.v v14, (%[wp3])\n"
712 "vse8.v v15, (%[wq3])\n"
713 ".option pop\n"
734 z0 = disks - 3; /* Highest data disk */
738 asm volatile (".option push\n"
739 ".option arch,+v\n"
740 "vsetvli %0, x0, e8, m1, ta, ma\n"
741 ".option pop\n"
757 asm volatile (".option push\n"
758 ".option arch,+v\n"
759 "vle8.v v0, (%[wp0])\n"
760 "vle8.v v1, (%[wp0])\n"
761 "vle8.v v4, (%[wp1])\n"
762 "vle8.v v5, (%[wp1])\n"
763 "vle8.v v8, (%[wp2])\n"
764 "vle8.v v9, (%[wp2])\n"
765 "vle8.v v12, (%[wp3])\n"
766 "vle8.v v13, (%[wp3])\n"
767 "vle8.v v16, (%[wp4])\n"
768 "vle8.v v17, (%[wp4])\n"
769 "vle8.v v20, (%[wp5])\n"
770 "vle8.v v21, (%[wp5])\n"
771 "vle8.v v24, (%[wp6])\n"
772 "vle8.v v25, (%[wp6])\n"
773 "vle8.v v28, (%[wp7])\n"
774 "vle8.v v29, (%[wp7])\n"
775 ".option pop\n"
787 for (z = z0 - 1; z >= 0; z--) {
797 asm volatile (".option push\n"
798 ".option arch,+v\n"
799 "vsra.vi v2, v1, 7\n"
800 "vsll.vi v3, v1, 1\n"
801 "vand.vx v2, v2, %[x1d]\n"
802 "vxor.vv v3, v3, v2\n"
803 "vle8.v v2, (%[wd0])\n"
804 "vxor.vv v1, v3, v2\n"
805 "vxor.vv v0, v0, v2\n"
807 "vsra.vi v6, v5, 7\n"
808 "vsll.vi v7, v5, 1\n"
809 "vand.vx v6, v6, %[x1d]\n"
810 "vxor.vv v7, v7, v6\n"
811 "vle8.v v6, (%[wd1])\n"
812 "vxor.vv v5, v7, v6\n"
813 "vxor.vv v4, v4, v6\n"
815 "vsra.vi v10, v9, 7\n"
816 "vsll.vi v11, v9, 1\n"
817 "vand.vx v10, v10, %[x1d]\n"
818 "vxor.vv v11, v11, v10\n"
819 "vle8.v v10, (%[wd2])\n"
820 "vxor.vv v9, v11, v10\n"
821 "vxor.vv v8, v8, v10\n"
823 "vsra.vi v14, v13, 7\n"
824 "vsll.vi v15, v13, 1\n"
825 "vand.vx v14, v14, %[x1d]\n"
826 "vxor.vv v15, v15, v14\n"
827 "vle8.v v14, (%[wd3])\n"
828 "vxor.vv v13, v15, v14\n"
829 "vxor.vv v12, v12, v14\n"
831 "vsra.vi v18, v17, 7\n"
832 "vsll.vi v19, v17, 1\n"
833 "vand.vx v18, v18, %[x1d]\n"
834 "vxor.vv v19, v19, v18\n"
835 "vle8.v v18, (%[wd4])\n"
836 "vxor.vv v17, v19, v18\n"
837 "vxor.vv v16, v16, v18\n"
839 "vsra.vi v22, v21, 7\n"
840 "vsll.vi v23, v21, 1\n"
841 "vand.vx v22, v22, %[x1d]\n"
842 "vxor.vv v23, v23, v22\n"
843 "vle8.v v22, (%[wd5])\n"
844 "vxor.vv v21, v23, v22\n"
845 "vxor.vv v20, v20, v22\n"
847 "vsra.vi v26, v25, 7\n"
848 "vsll.vi v27, v25, 1\n"
849 "vand.vx v26, v26, %[x1d]\n"
850 "vxor.vv v27, v27, v26\n"
851 "vle8.v v26, (%[wd6])\n"
852 "vxor.vv v25, v27, v26\n"
853 "vxor.vv v24, v24, v26\n"
855 "vsra.vi v30, v29, 7\n"
856 "vsll.vi v31, v29, 1\n"
857 "vand.vx v30, v30, %[x1d]\n"
858 "vxor.vv v31, v31, v30\n"
859 "vle8.v v30, (%[wd7])\n"
860 "vxor.vv v29, v31, v30\n"
861 "vxor.vv v28, v28, v30\n"
862 ".option pop\n"
880 asm volatile (".option push\n"
881 ".option arch,+v\n"
882 "vse8.v v0, (%[wp0])\n"
883 "vse8.v v1, (%[wq0])\n"
884 "vse8.v v4, (%[wp1])\n"
885 "vse8.v v5, (%[wq1])\n"
886 "vse8.v v8, (%[wp2])\n"
887 "vse8.v v9, (%[wq2])\n"
888 "vse8.v v12, (%[wp3])\n"
889 "vse8.v v13, (%[wq3])\n"
890 "vse8.v v16, (%[wp4])\n"
891 "vse8.v v17, (%[wq4])\n"
892 "vse8.v v20, (%[wp5])\n"
893 "vse8.v v21, (%[wq5])\n"
894 "vse8.v v24, (%[wp6])\n"
895 "vse8.v v25, (%[wq6])\n"
896 "vse8.v v28, (%[wp7])\n"
897 "vse8.v v29, (%[wq7])\n"
898 ".option pop\n"
929 p = dptr[disks - 2]; /* XOR parity */
930 q = dptr[disks - 1]; /* RS syndrome */
932 asm volatile (".option push\n"
933 ".option arch,+v\n"
934 "vsetvli %0, x0, e8, m1, ta, ma\n"
935 ".option pop\n"
951 asm volatile (".option push\n"
952 ".option arch,+v\n"
953 "vle8.v v0, (%[wp0])\n"
954 "vle8.v v1, (%[wp0])\n"
955 "vle8.v v4, (%[wp1])\n"
956 "vle8.v v5, (%[wp1])\n"
957 "vle8.v v8, (%[wp2])\n"
958 "vle8.v v9, (%[wp2])\n"
959 "vle8.v v12, (%[wp3])\n"
960 "vle8.v v13, (%[wp3])\n"
961 "vle8.v v16, (%[wp4])\n"
962 "vle8.v v17, (%[wp4])\n"
963 "vle8.v v20, (%[wp5])\n"
964 "vle8.v v21, (%[wp5])\n"
965 "vle8.v v24, (%[wp6])\n"
966 "vle8.v v25, (%[wp6])\n"
967 "vle8.v v28, (%[wp7])\n"
968 "vle8.v v29, (%[wp7])\n"
969 ".option pop\n"
982 for (z = z0 - 1; z >= start; z--) {
992 asm volatile (".option push\n"
993 ".option arch,+v\n"
994 "vsra.vi v2, v1, 7\n"
995 "vsll.vi v3, v1, 1\n"
996 "vand.vx v2, v2, %[x1d]\n"
997 "vxor.vv v3, v3, v2\n"
998 "vle8.v v2, (%[wd0])\n"
999 "vxor.vv v1, v3, v2\n"
1000 "vxor.vv v0, v0, v2\n"
1002 "vsra.vi v6, v5, 7\n"
1003 "vsll.vi v7, v5, 1\n"
1004 "vand.vx v6, v6, %[x1d]\n"
1005 "vxor.vv v7, v7, v6\n"
1006 "vle8.v v6, (%[wd1])\n"
1007 "vxor.vv v5, v7, v6\n"
1008 "vxor.vv v4, v4, v6\n"
1010 "vsra.vi v10, v9, 7\n"
1011 "vsll.vi v11, v9, 1\n"
1012 "vand.vx v10, v10, %[x1d]\n"
1013 "vxor.vv v11, v11, v10\n"
1014 "vle8.v v10, (%[wd2])\n"
1015 "vxor.vv v9, v11, v10\n"
1016 "vxor.vv v8, v8, v10\n"
1018 "vsra.vi v14, v13, 7\n"
1019 "vsll.vi v15, v13, 1\n"
1020 "vand.vx v14, v14, %[x1d]\n"
1021 "vxor.vv v15, v15, v14\n"
1022 "vle8.v v14, (%[wd3])\n"
1023 "vxor.vv v13, v15, v14\n"
1024 "vxor.vv v12, v12, v14\n"
1026 "vsra.vi v18, v17, 7\n"
1027 "vsll.vi v19, v17, 1\n"
1028 "vand.vx v18, v18, %[x1d]\n"
1029 "vxor.vv v19, v19, v18\n"
1030 "vle8.v v18, (%[wd4])\n"
1031 "vxor.vv v17, v19, v18\n"
1032 "vxor.vv v16, v16, v18\n"
1034 "vsra.vi v22, v21, 7\n"
1035 "vsll.vi v23, v21, 1\n"
1036 "vand.vx v22, v22, %[x1d]\n"
1037 "vxor.vv v23, v23, v22\n"
1038 "vle8.v v22, (%[wd5])\n"
1039 "vxor.vv v21, v23, v22\n"
1040 "vxor.vv v20, v20, v22\n"
1042 "vsra.vi v26, v25, 7\n"
1043 "vsll.vi v27, v25, 1\n"
1044 "vand.vx v26, v26, %[x1d]\n"
1045 "vxor.vv v27, v27, v26\n"
1046 "vle8.v v26, (%[wd6])\n"
1047 "vxor.vv v25, v27, v26\n"
1048 "vxor.vv v24, v24, v26\n"
1050 "vsra.vi v30, v29, 7\n"
1051 "vsll.vi v31, v29, 1\n"
1052 "vand.vx v30, v30, %[x1d]\n"
1053 "vxor.vv v31, v31, v30\n"
1054 "vle8.v v30, (%[wd7])\n"
1055 "vxor.vv v29, v31, v30\n"
1056 "vxor.vv v28, v28, v30\n"
1057 ".option pop\n"
1072 for (z = start - 1; z >= 0; z--) {
1079 asm volatile (".option push\n"
1080 ".option arch,+v\n"
1081 "vsra.vi v2, v1, 7\n"
1082 "vsll.vi v3, v1, 1\n"
1083 "vand.vx v2, v2, %[x1d]\n"
1084 "vxor.vv v1, v3, v2\n"
1086 "vsra.vi v6, v5, 7\n"
1087 "vsll.vi v7, v5, 1\n"
1088 "vand.vx v6, v6, %[x1d]\n"
1089 "vxor.vv v5, v7, v6\n"
1091 "vsra.vi v10, v9, 7\n"
1092 "vsll.vi v11, v9, 1\n"
1093 "vand.vx v10, v10, %[x1d]\n"
1094 "vxor.vv v9, v11, v10\n"
1096 "vsra.vi v14, v13, 7\n"
1097 "vsll.vi v15, v13, 1\n"
1098 "vand.vx v14, v14, %[x1d]\n"
1099 "vxor.vv v13, v15, v14\n"
1101 "vsra.vi v18, v17, 7\n"
1102 "vsll.vi v19, v17, 1\n"
1103 "vand.vx v18, v18, %[x1d]\n"
1104 "vxor.vv v17, v19, v18\n"
1106 "vsra.vi v22, v21, 7\n"
1107 "vsll.vi v23, v21, 1\n"
1108 "vand.vx v22, v22, %[x1d]\n"
1109 "vxor.vv v21, v23, v22\n"
1111 "vsra.vi v26, v25, 7\n"
1112 "vsll.vi v27, v25, 1\n"
1113 "vand.vx v26, v26, %[x1d]\n"
1114 "vxor.vv v25, v27, v26\n"
1116 "vsra.vi v30, v29, 7\n"
1117 "vsll.vi v31, v29, 1\n"
1118 "vand.vx v30, v30, %[x1d]\n"
1119 "vxor.vv v29, v31, v30\n"
1120 ".option pop\n"
1138 asm volatile (".option push\n"
1139 ".option arch,+v\n"
1140 "vle8.v v2, (%[wp0])\n"
1141 "vle8.v v3, (%[wq0])\n"
1142 "vxor.vv v2, v2, v0\n"
1143 "vxor.vv v3, v3, v1\n"
1144 "vse8.v v2, (%[wp0])\n"
1145 "vse8.v v3, (%[wq0])\n"
1147 "vle8.v v6, (%[wp1])\n"
1148 "vle8.v v7, (%[wq1])\n"
1149 "vxor.vv v6, v6, v4\n"
1150 "vxor.vv v7, v7, v5\n"
1151 "vse8.v v6, (%[wp1])\n"
1152 "vse8.v v7, (%[wq1])\n"
1154 "vle8.v v10, (%[wp2])\n"
1155 "vle8.v v11, (%[wq2])\n"
1156 "vxor.vv v10, v10, v8\n"
1157 "vxor.vv v11, v11, v9\n"
1158 "vse8.v v10, (%[wp2])\n"
1159 "vse8.v v11, (%[wq2])\n"
1161 "vle8.v v14, (%[wp3])\n"
1162 "vle8.v v15, (%[wq3])\n"
1163 "vxor.vv v14, v14, v12\n"
1164 "vxor.vv v15, v15, v13\n"
1165 "vse8.v v14, (%[wp3])\n"
1166 "vse8.v v15, (%[wq3])\n"
1168 "vle8.v v18, (%[wp4])\n"
1169 "vle8.v v19, (%[wq4])\n"
1170 "vxor.vv v18, v18, v16\n"
1171 "vxor.vv v19, v19, v17\n"
1172 "vse8.v v18, (%[wp4])\n"
1173 "vse8.v v19, (%[wq4])\n"
1175 "vle8.v v22, (%[wp5])\n"
1176 "vle8.v v23, (%[wq5])\n"
1177 "vxor.vv v22, v22, v20\n"
1178 "vxor.vv v23, v23, v21\n"
1179 "vse8.v v22, (%[wp5])\n"
1180 "vse8.v v23, (%[wq5])\n"
1182 "vle8.v v26, (%[wp6])\n"
1183 "vle8.v v27, (%[wq6])\n"
1184 "vxor.vv v26, v26, v24\n"
1185 "vxor.vv v27, v27, v25\n"
1186 "vse8.v v26, (%[wp6])\n"
1187 "vse8.v v27, (%[wq6])\n"
1189 "vle8.v v30, (%[wp7])\n"
1190 "vle8.v v31, (%[wq7])\n"
1191 "vxor.vv v30, v30, v28\n"
1192 "vxor.vv v31, v31, v29\n"
1193 "vse8.v v30, (%[wp7])\n"
1194 "vse8.v v31, (%[wq7])\n"
1195 ".option pop\n"