1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * RAID-6 syndrome calculation using RISC-V vector instructions
4  *
5  * Copyright 2024 Institute of Software, CAS.
6  * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
7  *
8  * Based on neon.uc:
9  *	Copyright 2002-2004 H. Peter Anvin
10  */
11 
12 #include <asm/simd.h>
13 #include <asm/vector.h>
14 #include <crypto/internal/simd.h>
15 #include <linux/raid/pq.h>
16 #include <linux/types.h>
17 #include "rvv.h"
18 
19 #define NSIZE	(riscv_v_vsize / 32) /* NSIZE = vlenb */
20 
21 static int rvv_has_vector(void)
22 {
23 	return has_vector();
24 }
25 
26 static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
27 {
28 	u8 **dptr = (u8 **)ptrs;
29 	unsigned long d;
30 	int z, z0;
31 	u8 *p, *q;
32 
33 	z0 = disks - 3;		/* Highest data disk */
34 	p = dptr[z0 + 1];		/* XOR parity */
35 	q = dptr[z0 + 2];		/* RS syndrome */
36 
37 	asm volatile (".option	push\n"
38 		      ".option	arch,+v\n"
39 		      "vsetvli	t0, x0, e8, m1, ta, ma\n"
40 		      ".option	pop\n"
41 	);
42 
43 	 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
44 	for (d = 0; d < bytes; d += NSIZE * 1) {
45 		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
46 		asm volatile (".option	push\n"
47 			      ".option	arch,+v\n"
48 			      "vle8.v	v0, (%[wp0])\n"
49 			      "vle8.v	v1, (%[wp0])\n"
50 			      ".option	pop\n"
51 			      : :
52 			      [wp0]"r"(&dptr[z0][d + 0 * NSIZE])
53 		);
54 
55 		for (z = z0 - 1 ; z >= 0 ; z--) {
56 			/*
57 			 * w2$$ = MASK(wq$$);
58 			 * w1$$ = SHLBYTE(wq$$);
59 			 * w2$$ &= NBYTES(0x1d);
60 			 * w1$$ ^= w2$$;
61 			 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
62 			 * wq$$ = w1$$ ^ wd$$;
63 			 * wp$$ ^= wd$$;
64 			 */
65 			asm volatile (".option	push\n"
66 				      ".option	arch,+v\n"
67 				      "vsra.vi	v2, v1, 7\n"
68 				      "vsll.vi	v3, v1, 1\n"
69 				      "vand.vx	v2, v2, %[x1d]\n"
70 				      "vxor.vv	v3, v3, v2\n"
71 				      "vle8.v	v2, (%[wd0])\n"
72 				      "vxor.vv	v1, v3, v2\n"
73 				      "vxor.vv	v0, v0, v2\n"
74 				      ".option	pop\n"
75 				      : :
76 				      [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
77 				      [x1d]"r"(0x1d)
78 			);
79 		}
80 
81 		/*
82 		 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
83 		 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
84 		 */
85 		asm volatile (".option	push\n"
86 			      ".option	arch,+v\n"
87 			      "vse8.v	v0, (%[wp0])\n"
88 			      "vse8.v	v1, (%[wq0])\n"
89 			      ".option	pop\n"
90 			      : :
91 			      [wp0]"r"(&p[d + NSIZE * 0]),
92 			      [wq0]"r"(&q[d + NSIZE * 0])
93 		);
94 	}
95 }
96 
97 static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop,
98 					 unsigned long bytes, void **ptrs)
99 {
100 	u8 **dptr = (u8 **)ptrs;
101 	u8 *p, *q;
102 	unsigned long d;
103 	int z, z0;
104 
105 	z0 = stop;		/* P/Q right side optimization */
106 	p = dptr[disks - 2];	/* XOR parity */
107 	q = dptr[disks - 1];	/* RS syndrome */
108 
109 	asm volatile (".option	push\n"
110 		      ".option	arch,+v\n"
111 		      "vsetvli	t0, x0, e8, m1, ta, ma\n"
112 		      ".option	pop\n"
113 	);
114 
115 	/* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
116 	for (d = 0 ; d < bytes ; d += NSIZE * 1) {
117 		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
118 		asm volatile (".option	push\n"
119 			      ".option	arch,+v\n"
120 			      "vle8.v	v0, (%[wp0])\n"
121 			      "vle8.v	v1, (%[wp0])\n"
122 			      ".option	pop\n"
123 			      : :
124 			      [wp0]"r"(&dptr[z0][d + 0 * NSIZE])
125 		);
126 
127 		/* P/Q data pages */
128 		for (z = z0 - 1; z >= start; z--) {
129 			/*
130 			 * w2$$ = MASK(wq$$);
131 			 * w1$$ = SHLBYTE(wq$$);
132 			 * w2$$ &= NBYTES(0x1d);
133 			 * w1$$ ^= w2$$;
134 			 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
135 			 * wq$$ = w1$$ ^ wd$$;
136 			 * wp$$ ^= wd$$;
137 			 */
138 			asm volatile (".option	push\n"
139 				      ".option	arch,+v\n"
140 				      "vsra.vi	v2, v1, 7\n"
141 				      "vsll.vi	v3, v1, 1\n"
142 				      "vand.vx	v2, v2, %[x1d]\n"
143 				      "vxor.vv	v3, v3, v2\n"
144 				      "vle8.v	v2, (%[wd0])\n"
145 				      "vxor.vv	v1, v3, v2\n"
146 				      "vxor.vv	v0, v0, v2\n"
147 				      ".option	pop\n"
148 				      : :
149 				      [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
150 				      [x1d]"r"(0x1d)
151 			);
152 		}
153 
154 		/* P/Q left side optimization */
155 		for (z = start - 1; z >= 0; z--) {
156 			/*
157 			 * w2$$ = MASK(wq$$);
158 			 * w1$$ = SHLBYTE(wq$$);
159 			 * w2$$ &= NBYTES(0x1d);
160 			 * wq$$ = w1$$ ^ w2$$;
161 			 */
162 			asm volatile (".option	push\n"
163 				      ".option	arch,+v\n"
164 				      "vsra.vi	v2, v1, 7\n"
165 				      "vsll.vi	v3, v1, 1\n"
166 				      "vand.vx	v2, v2, %[x1d]\n"
167 				      "vxor.vv	v1, v3, v2\n"
168 				      ".option	pop\n"
169 				      : :
170 				      [x1d]"r"(0x1d)
171 			);
172 		}
173 
174 		/*
175 		 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
176 		 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
177 		 * v0:wp0, v1:wq0, v2:p0, v3:q0
178 		 */
179 		asm volatile (".option	push\n"
180 			      ".option	arch,+v\n"
181 			      "vle8.v	v2, (%[wp0])\n"
182 			      "vle8.v	v3, (%[wq0])\n"
183 			      "vxor.vv	v2, v2, v0\n"
184 			      "vxor.vv	v3, v3, v1\n"
185 			      "vse8.v	v2, (%[wp0])\n"
186 			      "vse8.v	v3, (%[wq0])\n"
187 			      ".option	pop\n"
188 			      : :
189 			      [wp0]"r"(&p[d + NSIZE * 0]),
190 			      [wq0]"r"(&q[d + NSIZE * 0])
191 		);
192 	}
193 }
194 
195 static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
196 {
197 	u8 **dptr = (u8 **)ptrs;
198 	unsigned long d;
199 	int z, z0;
200 	u8 *p, *q;
201 
202 	z0 = disks - 3;		/* Highest data disk */
203 	p = dptr[z0 + 1];		/* XOR parity */
204 	q = dptr[z0 + 2];		/* RS syndrome */
205 
206 	asm volatile (".option	push\n"
207 		      ".option	arch,+v\n"
208 		      "vsetvli	t0, x0, e8, m1, ta, ma\n"
209 		      ".option	pop\n"
210 	);
211 
212 	/*
213 	 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
214 	 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
215 	 */
216 	for (d = 0; d < bytes; d += NSIZE * 2) {
217 		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
218 		asm volatile (".option	push\n"
219 			      ".option	arch,+v\n"
220 			      "vle8.v	v0, (%[wp0])\n"
221 			      "vle8.v	v1, (%[wp0])\n"
222 			      "vle8.v	v4, (%[wp1])\n"
223 			      "vle8.v	v5, (%[wp1])\n"
224 			      ".option	pop\n"
225 			      : :
226 			      [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
227 			      [wp1]"r"(&dptr[z0][d + 1 * NSIZE])
228 		);
229 
230 		for (z = z0 - 1; z >= 0; z--) {
231 			/*
232 			 * w2$$ = MASK(wq$$);
233 			 * w1$$ = SHLBYTE(wq$$);
234 			 * w2$$ &= NBYTES(0x1d);
235 			 * w1$$ ^= w2$$;
236 			 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
237 			 * wq$$ = w1$$ ^ wd$$;
238 			 * wp$$ ^= wd$$;
239 			 */
240 			asm volatile (".option	push\n"
241 				      ".option	arch,+v\n"
242 				      "vsra.vi	v2, v1, 7\n"
243 				      "vsll.vi	v3, v1, 1\n"
244 				      "vand.vx	v2, v2, %[x1d]\n"
245 				      "vxor.vv	v3, v3, v2\n"
246 				      "vle8.v	v2, (%[wd0])\n"
247 				      "vxor.vv	v1, v3, v2\n"
248 				      "vxor.vv	v0, v0, v2\n"
249 
250 				      "vsra.vi	v6, v5, 7\n"
251 				      "vsll.vi	v7, v5, 1\n"
252 				      "vand.vx	v6, v6, %[x1d]\n"
253 				      "vxor.vv	v7, v7, v6\n"
254 				      "vle8.v	v6, (%[wd1])\n"
255 				      "vxor.vv	v5, v7, v6\n"
256 				      "vxor.vv	v4, v4, v6\n"
257 				      ".option	pop\n"
258 				      : :
259 				      [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
260 				      [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
261 				      [x1d]"r"(0x1d)
262 			);
263 		}
264 
265 		/*
266 		 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
267 		 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
268 		 */
269 		asm volatile (".option	push\n"
270 			      ".option	arch,+v\n"
271 			      "vse8.v	v0, (%[wp0])\n"
272 			      "vse8.v	v1, (%[wq0])\n"
273 			      "vse8.v	v4, (%[wp1])\n"
274 			      "vse8.v	v5, (%[wq1])\n"
275 			      ".option	pop\n"
276 			      : :
277 			      [wp0]"r"(&p[d + NSIZE * 0]),
278 			      [wq0]"r"(&q[d + NSIZE * 0]),
279 			      [wp1]"r"(&p[d + NSIZE * 1]),
280 			      [wq1]"r"(&q[d + NSIZE * 1])
281 		);
282 	}
283 }
284 
285 static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop,
286 					 unsigned long bytes, void **ptrs)
287 {
288 	u8 **dptr = (u8 **)ptrs;
289 	u8 *p, *q;
290 	unsigned long d;
291 	int z, z0;
292 
293 	z0 = stop;		/* P/Q right side optimization */
294 	p = dptr[disks - 2];	/* XOR parity */
295 	q = dptr[disks - 1];	/* RS syndrome */
296 
297 	asm volatile (".option	push\n"
298 		      ".option	arch,+v\n"
299 		      "vsetvli	t0, x0, e8, m1, ta, ma\n"
300 		      ".option	pop\n"
301 	);
302 
303 	/*
304 	 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
305 	 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
306 	 */
307 	for (d = 0; d < bytes; d += NSIZE * 2) {
308 		 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
309 		asm volatile (".option	push\n"
310 			      ".option	arch,+v\n"
311 			      "vle8.v	v0, (%[wp0])\n"
312 			      "vle8.v	v1, (%[wp0])\n"
313 			      "vle8.v	v4, (%[wp1])\n"
314 			      "vle8.v	v5, (%[wp1])\n"
315 			      ".option	pop\n"
316 			      : :
317 			      [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
318 			      [wp1]"r"(&dptr[z0][d + 1 * NSIZE])
319 		);
320 
321 		/* P/Q data pages */
322 		for (z = z0 - 1; z >= start; z--) {
323 			/*
324 			 * w2$$ = MASK(wq$$);
325 			 * w1$$ = SHLBYTE(wq$$);
326 			 * w2$$ &= NBYTES(0x1d);
327 			 * w1$$ ^= w2$$;
328 			 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
329 			 * wq$$ = w1$$ ^ wd$$;
330 			 * wp$$ ^= wd$$;
331 			 */
332 			asm volatile (".option	push\n"
333 				      ".option	arch,+v\n"
334 				      "vsra.vi	v2, v1, 7\n"
335 				      "vsll.vi	v3, v1, 1\n"
336 				      "vand.vx	v2, v2, %[x1d]\n"
337 				      "vxor.vv	v3, v3, v2\n"
338 				      "vle8.v	v2, (%[wd0])\n"
339 				      "vxor.vv	v1, v3, v2\n"
340 				      "vxor.vv	v0, v0, v2\n"
341 
342 				      "vsra.vi	v6, v5, 7\n"
343 				      "vsll.vi	v7, v5, 1\n"
344 				      "vand.vx	v6, v6, %[x1d]\n"
345 				      "vxor.vv	v7, v7, v6\n"
346 				      "vle8.v	v6, (%[wd1])\n"
347 				      "vxor.vv	v5, v7, v6\n"
348 				      "vxor.vv	v4, v4, v6\n"
349 				      ".option	pop\n"
350 				      : :
351 				      [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
352 				      [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
353 				      [x1d]"r"(0x1d)
354 			);
355 		}
356 
357 		/* P/Q left side optimization */
358 		for (z = start - 1; z >= 0; z--) {
359 			/*
360 			 * w2$$ = MASK(wq$$);
361 			 * w1$$ = SHLBYTE(wq$$);
362 			 * w2$$ &= NBYTES(0x1d);
363 			 * wq$$ = w1$$ ^ w2$$;
364 			 */
365 			asm volatile (".option	push\n"
366 				      ".option	arch,+v\n"
367 				      "vsra.vi	v2, v1, 7\n"
368 				      "vsll.vi	v3, v1, 1\n"
369 				      "vand.vx	v2, v2, %[x1d]\n"
370 				      "vxor.vv	v1, v3, v2\n"
371 
372 				      "vsra.vi	v6, v5, 7\n"
373 				      "vsll.vi	v7, v5, 1\n"
374 				      "vand.vx	v6, v6, %[x1d]\n"
375 				      "vxor.vv	v5, v7, v6\n"
376 				      ".option	pop\n"
377 				      : :
378 				      [x1d]"r"(0x1d)
379 			);
380 		}
381 
382 		/*
383 		 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
384 		 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
385 		 * v0:wp0, v1:wq0, v2:p0, v3:q0
386 		 * v4:wp1, v5:wq1, v6:p1, v7:q1
387 		 */
388 		asm volatile (".option	push\n"
389 			      ".option	arch,+v\n"
390 			      "vle8.v	v2, (%[wp0])\n"
391 			      "vle8.v	v3, (%[wq0])\n"
392 			      "vxor.vv	v2, v2, v0\n"
393 			      "vxor.vv	v3, v3, v1\n"
394 			      "vse8.v	v2, (%[wp0])\n"
395 			      "vse8.v	v3, (%[wq0])\n"
396 
397 			      "vle8.v	v6, (%[wp1])\n"
398 			      "vle8.v	v7, (%[wq1])\n"
399 			      "vxor.vv	v6, v6, v4\n"
400 			      "vxor.vv	v7, v7, v5\n"
401 			      "vse8.v	v6, (%[wp1])\n"
402 			      "vse8.v	v7, (%[wq1])\n"
403 			      ".option	pop\n"
404 			      : :
405 			      [wp0]"r"(&p[d + NSIZE * 0]),
406 			      [wq0]"r"(&q[d + NSIZE * 0]),
407 			      [wp1]"r"(&p[d + NSIZE * 1]),
408 			      [wq1]"r"(&q[d + NSIZE * 1])
409 		);
410 	}
411 }
412 
413 static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
414 {
415 	u8 **dptr = (u8 **)ptrs;
416 	unsigned long d;
417 	int z, z0;
418 	u8 *p, *q;
419 
420 	z0 = disks - 3;	/* Highest data disk */
421 	p = dptr[z0 + 1];	/* XOR parity */
422 	q = dptr[z0 + 2];	/* RS syndrome */
423 
424 	asm volatile (".option	push\n"
425 		      ".option	arch,+v\n"
426 		      "vsetvli	t0, x0, e8, m1, ta, ma\n"
427 		      ".option	pop\n"
428 	);
429 
430 	/*
431 	 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
432 	 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
433 	 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
434 	 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
435 	 */
436 	for (d = 0; d < bytes; d += NSIZE * 4) {
437 		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
438 		asm volatile (".option	push\n"
439 			      ".option	arch,+v\n"
440 			      "vle8.v	v0, (%[wp0])\n"
441 			      "vle8.v	v1, (%[wp0])\n"
442 			      "vle8.v	v4, (%[wp1])\n"
443 			      "vle8.v	v5, (%[wp1])\n"
444 			      "vle8.v	v8, (%[wp2])\n"
445 			      "vle8.v	v9, (%[wp2])\n"
446 			      "vle8.v	v12, (%[wp3])\n"
447 			      "vle8.v	v13, (%[wp3])\n"
448 			      ".option	pop\n"
449 			      : :
450 			      [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
451 			      [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
452 			      [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
453 			      [wp3]"r"(&dptr[z0][d + 3 * NSIZE])
454 		);
455 
456 		for (z = z0 - 1; z >= 0; z--) {
457 			/*
458 			 * w2$$ = MASK(wq$$);
459 			 * w1$$ = SHLBYTE(wq$$);
460 			 * w2$$ &= NBYTES(0x1d);
461 			 * w1$$ ^= w2$$;
462 			 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
463 			 * wq$$ = w1$$ ^ wd$$;
464 			 * wp$$ ^= wd$$;
465 			 */
466 			asm volatile (".option	push\n"
467 				      ".option	arch,+v\n"
468 				      "vsra.vi	v2, v1, 7\n"
469 				      "vsll.vi	v3, v1, 1\n"
470 				      "vand.vx	v2, v2, %[x1d]\n"
471 				      "vxor.vv	v3, v3, v2\n"
472 				      "vle8.v	v2, (%[wd0])\n"
473 				      "vxor.vv	v1, v3, v2\n"
474 				      "vxor.vv	v0, v0, v2\n"
475 
476 				      "vsra.vi	v6, v5, 7\n"
477 				      "vsll.vi	v7, v5, 1\n"
478 				      "vand.vx	v6, v6, %[x1d]\n"
479 				      "vxor.vv	v7, v7, v6\n"
480 				      "vle8.v	v6, (%[wd1])\n"
481 				      "vxor.vv	v5, v7, v6\n"
482 				      "vxor.vv	v4, v4, v6\n"
483 
484 				      "vsra.vi	v10, v9, 7\n"
485 				      "vsll.vi	v11, v9, 1\n"
486 				      "vand.vx	v10, v10, %[x1d]\n"
487 				      "vxor.vv	v11, v11, v10\n"
488 				      "vle8.v	v10, (%[wd2])\n"
489 				      "vxor.vv	v9, v11, v10\n"
490 				      "vxor.vv	v8, v8, v10\n"
491 
492 				      "vsra.vi	v14, v13, 7\n"
493 				      "vsll.vi	v15, v13, 1\n"
494 				      "vand.vx	v14, v14, %[x1d]\n"
495 				      "vxor.vv	v15, v15, v14\n"
496 				      "vle8.v	v14, (%[wd3])\n"
497 				      "vxor.vv	v13, v15, v14\n"
498 				      "vxor.vv	v12, v12, v14\n"
499 				      ".option	pop\n"
500 				      : :
501 				      [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
502 				      [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
503 				      [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
504 				      [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
505 				      [x1d]"r"(0x1d)
506 			);
507 		}
508 
509 		/*
510 		 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
511 		 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
512 		 */
513 		asm volatile (".option	push\n"
514 			      ".option	arch,+v\n"
515 			      "vse8.v	v0, (%[wp0])\n"
516 			      "vse8.v	v1, (%[wq0])\n"
517 			      "vse8.v	v4, (%[wp1])\n"
518 			      "vse8.v	v5, (%[wq1])\n"
519 			      "vse8.v	v8, (%[wp2])\n"
520 			      "vse8.v	v9, (%[wq2])\n"
521 			      "vse8.v	v12, (%[wp3])\n"
522 			      "vse8.v	v13, (%[wq3])\n"
523 			      ".option	pop\n"
524 			      : :
525 			      [wp0]"r"(&p[d + NSIZE * 0]),
526 			      [wq0]"r"(&q[d + NSIZE * 0]),
527 			      [wp1]"r"(&p[d + NSIZE * 1]),
528 			      [wq1]"r"(&q[d + NSIZE * 1]),
529 			      [wp2]"r"(&p[d + NSIZE * 2]),
530 			      [wq2]"r"(&q[d + NSIZE * 2]),
531 			      [wp3]"r"(&p[d + NSIZE * 3]),
532 			      [wq3]"r"(&q[d + NSIZE * 3])
533 		);
534 	}
535 }
536 
537 static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop,
538 					 unsigned long bytes, void **ptrs)
539 {
540 	u8 **dptr = (u8 **)ptrs;
541 	u8 *p, *q;
542 	unsigned long d;
543 	int z, z0;
544 
545 	z0 = stop;		/* P/Q right side optimization */
546 	p = dptr[disks - 2];	/* XOR parity */
547 	q = dptr[disks - 1];	/* RS syndrome */
548 
549 	asm volatile (".option	push\n"
550 		      ".option	arch,+v\n"
551 		      "vsetvli	t0, x0, e8, m1, ta, ma\n"
552 		      ".option	pop\n"
553 	);
554 
555 	/*
556 	 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
557 	 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
558 	 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
559 	 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
560 	 */
561 	for (d = 0; d < bytes; d += NSIZE * 4) {
562 		 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
563 		asm volatile (".option	push\n"
564 			      ".option	arch,+v\n"
565 			      "vle8.v	v0, (%[wp0])\n"
566 			      "vle8.v	v1, (%[wp0])\n"
567 			      "vle8.v	v4, (%[wp1])\n"
568 			      "vle8.v	v5, (%[wp1])\n"
569 			      "vle8.v	v8, (%[wp2])\n"
570 			      "vle8.v	v9, (%[wp2])\n"
571 			      "vle8.v	v12, (%[wp3])\n"
572 			      "vle8.v	v13, (%[wp3])\n"
573 			      ".option	pop\n"
574 			      : :
575 			      [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
576 			      [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
577 			      [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
578 			      [wp3]"r"(&dptr[z0][d + 3 * NSIZE])
579 		);
580 
581 		/* P/Q data pages */
582 		for (z = z0 - 1; z >= start; z--) {
583 			/*
584 			 * w2$$ = MASK(wq$$);
585 			 * w1$$ = SHLBYTE(wq$$);
586 			 * w2$$ &= NBYTES(0x1d);
587 			 * w1$$ ^= w2$$;
588 			 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
589 			 * wq$$ = w1$$ ^ wd$$;
590 			 * wp$$ ^= wd$$;
591 			 */
592 			asm volatile (".option	push\n"
593 				      ".option	arch,+v\n"
594 				      "vsra.vi	v2, v1, 7\n"
595 				      "vsll.vi	v3, v1, 1\n"
596 				      "vand.vx	v2, v2, %[x1d]\n"
597 				      "vxor.vv	v3, v3, v2\n"
598 				      "vle8.v	v2, (%[wd0])\n"
599 				      "vxor.vv	v1, v3, v2\n"
600 				      "vxor.vv	v0, v0, v2\n"
601 
602 				      "vsra.vi	v6, v5, 7\n"
603 				      "vsll.vi	v7, v5, 1\n"
604 				      "vand.vx	v6, v6, %[x1d]\n"
605 				      "vxor.vv	v7, v7, v6\n"
606 				      "vle8.v	v6, (%[wd1])\n"
607 				      "vxor.vv	v5, v7, v6\n"
608 				      "vxor.vv	v4, v4, v6\n"
609 
610 				      "vsra.vi	v10, v9, 7\n"
611 				      "vsll.vi	v11, v9, 1\n"
612 				      "vand.vx	v10, v10, %[x1d]\n"
613 				      "vxor.vv	v11, v11, v10\n"
614 				      "vle8.v	v10, (%[wd2])\n"
615 				      "vxor.vv	v9, v11, v10\n"
616 				      "vxor.vv	v8, v8, v10\n"
617 
618 				      "vsra.vi	v14, v13, 7\n"
619 				      "vsll.vi	v15, v13, 1\n"
620 				      "vand.vx	v14, v14, %[x1d]\n"
621 				      "vxor.vv	v15, v15, v14\n"
622 				      "vle8.v	v14, (%[wd3])\n"
623 				      "vxor.vv	v13, v15, v14\n"
624 				      "vxor.vv	v12, v12, v14\n"
625 				      ".option	pop\n"
626 				      : :
627 				      [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
628 				      [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
629 				      [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
630 				      [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
631 				      [x1d]"r"(0x1d)
632 			);
633 		}
634 
635 		/* P/Q left side optimization */
636 		for (z = start - 1; z >= 0; z--) {
637 			/*
638 			 * w2$$ = MASK(wq$$);
639 			 * w1$$ = SHLBYTE(wq$$);
640 			 * w2$$ &= NBYTES(0x1d);
641 			 * wq$$ = w1$$ ^ w2$$;
642 			 */
643 			asm volatile (".option	push\n"
644 				      ".option	arch,+v\n"
645 				      "vsra.vi	v2, v1, 7\n"
646 				      "vsll.vi	v3, v1, 1\n"
647 				      "vand.vx	v2, v2, %[x1d]\n"
648 				      "vxor.vv	v1, v3, v2\n"
649 
650 				      "vsra.vi	v6, v5, 7\n"
651 				      "vsll.vi	v7, v5, 1\n"
652 				      "vand.vx	v6, v6, %[x1d]\n"
653 				      "vxor.vv	v5, v7, v6\n"
654 
655 				      "vsra.vi	v10, v9, 7\n"
656 				      "vsll.vi	v11, v9, 1\n"
657 				      "vand.vx	v10, v10, %[x1d]\n"
658 				      "vxor.vv	v9, v11, v10\n"
659 
660 				      "vsra.vi	v14, v13, 7\n"
661 				      "vsll.vi	v15, v13, 1\n"
662 				      "vand.vx	v14, v14, %[x1d]\n"
663 				      "vxor.vv	v13, v15, v14\n"
664 				      ".option	pop\n"
665 				      : :
666 				      [x1d]"r"(0x1d)
667 			);
668 		}
669 
670 		/*
671 		 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
672 		 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
673 		 * v0:wp0, v1:wq0, v2:p0, v3:q0
674 		 * v4:wp1, v5:wq1, v6:p1, v7:q1
675 		 * v8:wp2, v9:wq2, v10:p2, v11:q2
676 		 * v12:wp3, v13:wq3, v14:p3, v15:q3
677 		 */
678 		asm volatile (".option	push\n"
679 			      ".option	arch,+v\n"
680 			      "vle8.v	v2, (%[wp0])\n"
681 			      "vle8.v	v3, (%[wq0])\n"
682 			      "vxor.vv	v2, v2, v0\n"
683 			      "vxor.vv	v3, v3, v1\n"
684 			      "vse8.v	v2, (%[wp0])\n"
685 			      "vse8.v	v3, (%[wq0])\n"
686 
687 			      "vle8.v	v6, (%[wp1])\n"
688 			      "vle8.v	v7, (%[wq1])\n"
689 			      "vxor.vv	v6, v6, v4\n"
690 			      "vxor.vv	v7, v7, v5\n"
691 			      "vse8.v	v6, (%[wp1])\n"
692 			      "vse8.v	v7, (%[wq1])\n"
693 
694 			      "vle8.v	v10, (%[wp2])\n"
695 			      "vle8.v	v11, (%[wq2])\n"
696 			      "vxor.vv	v10, v10, v8\n"
697 			      "vxor.vv	v11, v11, v9\n"
698 			      "vse8.v	v10, (%[wp2])\n"
699 			      "vse8.v	v11, (%[wq2])\n"
700 
701 			      "vle8.v	v14, (%[wp3])\n"
702 			      "vle8.v	v15, (%[wq3])\n"
703 			      "vxor.vv	v14, v14, v12\n"
704 			      "vxor.vv	v15, v15, v13\n"
705 			      "vse8.v	v14, (%[wp3])\n"
706 			      "vse8.v	v15, (%[wq3])\n"
707 			      ".option	pop\n"
708 			      : :
709 			      [wp0]"r"(&p[d + NSIZE * 0]),
710 			      [wq0]"r"(&q[d + NSIZE * 0]),
711 			      [wp1]"r"(&p[d + NSIZE * 1]),
712 			      [wq1]"r"(&q[d + NSIZE * 1]),
713 			      [wp2]"r"(&p[d + NSIZE * 2]),
714 			      [wq2]"r"(&q[d + NSIZE * 2]),
715 			      [wp3]"r"(&p[d + NSIZE * 3]),
716 			      [wq3]"r"(&q[d + NSIZE * 3])
717 		);
718 	}
719 }
720 
721 static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
722 {
723 	u8 **dptr = (u8 **)ptrs;
724 	unsigned long d;
725 	int z, z0;
726 	u8 *p, *q;
727 
728 	z0 = disks - 3;	/* Highest data disk */
729 	p = dptr[z0 + 1];	/* XOR parity */
730 	q = dptr[z0 + 2];	/* RS syndrome */
731 
732 	asm volatile (".option	push\n"
733 		      ".option	arch,+v\n"
734 		      "vsetvli	t0, x0, e8, m1, ta, ma\n"
735 		      ".option	pop\n"
736 	);
737 
738 	/*
739 	 * v0:wp0,   v1:wq0,  v2:wd0/w20,  v3:w10
740 	 * v4:wp1,   v5:wq1,  v6:wd1/w21,  v7:w11
741 	 * v8:wp2,   v9:wq2, v10:wd2/w22, v11:w12
742 	 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
743 	 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14
744 	 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15
745 	 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16
746 	 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17
747 	 */
748 	for (d = 0; d < bytes; d += NSIZE * 8) {
749 		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
750 		asm volatile (".option	push\n"
751 			      ".option	arch,+v\n"
752 			      "vle8.v	v0, (%[wp0])\n"
753 			      "vle8.v	v1, (%[wp0])\n"
754 			      "vle8.v	v4, (%[wp1])\n"
755 			      "vle8.v	v5, (%[wp1])\n"
756 			      "vle8.v	v8, (%[wp2])\n"
757 			      "vle8.v	v9, (%[wp2])\n"
758 			      "vle8.v	v12, (%[wp3])\n"
759 			      "vle8.v	v13, (%[wp3])\n"
760 			      "vle8.v	v16, (%[wp4])\n"
761 			      "vle8.v	v17, (%[wp4])\n"
762 			      "vle8.v	v20, (%[wp5])\n"
763 			      "vle8.v	v21, (%[wp5])\n"
764 			      "vle8.v	v24, (%[wp6])\n"
765 			      "vle8.v	v25, (%[wp6])\n"
766 			      "vle8.v	v28, (%[wp7])\n"
767 			      "vle8.v	v29, (%[wp7])\n"
768 			      ".option	pop\n"
769 			      : :
770 			      [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
771 			      [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
772 			      [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
773 			      [wp3]"r"(&dptr[z0][d + 3 * NSIZE]),
774 			      [wp4]"r"(&dptr[z0][d + 4 * NSIZE]),
775 			      [wp5]"r"(&dptr[z0][d + 5 * NSIZE]),
776 			      [wp6]"r"(&dptr[z0][d + 6 * NSIZE]),
777 			      [wp7]"r"(&dptr[z0][d + 7 * NSIZE])
778 		);
779 
780 		for (z = z0 - 1; z >= 0; z--) {
781 			/*
782 			 * w2$$ = MASK(wq$$);
783 			 * w1$$ = SHLBYTE(wq$$);
784 			 * w2$$ &= NBYTES(0x1d);
785 			 * w1$$ ^= w2$$;
786 			 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
787 			 * wq$$ = w1$$ ^ wd$$;
788 			 * wp$$ ^= wd$$;
789 			 */
790 			asm volatile (".option	push\n"
791 				      ".option	arch,+v\n"
792 				      "vsra.vi	v2, v1, 7\n"
793 				      "vsll.vi	v3, v1, 1\n"
794 				      "vand.vx	v2, v2, %[x1d]\n"
795 				      "vxor.vv	v3, v3, v2\n"
796 				      "vle8.v	v2, (%[wd0])\n"
797 				      "vxor.vv	v1, v3, v2\n"
798 				      "vxor.vv	v0, v0, v2\n"
799 
800 				      "vsra.vi	v6, v5, 7\n"
801 				      "vsll.vi	v7, v5, 1\n"
802 				      "vand.vx	v6, v6, %[x1d]\n"
803 				      "vxor.vv	v7, v7, v6\n"
804 				      "vle8.v	v6, (%[wd1])\n"
805 				      "vxor.vv	v5, v7, v6\n"
806 				      "vxor.vv	v4, v4, v6\n"
807 
808 				      "vsra.vi	v10, v9, 7\n"
809 				      "vsll.vi	v11, v9, 1\n"
810 				      "vand.vx	v10, v10, %[x1d]\n"
811 				      "vxor.vv	v11, v11, v10\n"
812 				      "vle8.v	v10, (%[wd2])\n"
813 				      "vxor.vv	v9, v11, v10\n"
814 				      "vxor.vv	v8, v8, v10\n"
815 
816 				      "vsra.vi	v14, v13, 7\n"
817 				      "vsll.vi	v15, v13, 1\n"
818 				      "vand.vx	v14, v14, %[x1d]\n"
819 				      "vxor.vv	v15, v15, v14\n"
820 				      "vle8.v	v14, (%[wd3])\n"
821 				      "vxor.vv	v13, v15, v14\n"
822 				      "vxor.vv	v12, v12, v14\n"
823 
824 				      "vsra.vi	v18, v17, 7\n"
825 				      "vsll.vi	v19, v17, 1\n"
826 				      "vand.vx	v18, v18, %[x1d]\n"
827 				      "vxor.vv	v19, v19, v18\n"
828 				      "vle8.v	v18, (%[wd4])\n"
829 				      "vxor.vv	v17, v19, v18\n"
830 				      "vxor.vv	v16, v16, v18\n"
831 
832 				      "vsra.vi	v22, v21, 7\n"
833 				      "vsll.vi	v23, v21, 1\n"
834 				      "vand.vx	v22, v22, %[x1d]\n"
835 				      "vxor.vv	v23, v23, v22\n"
836 				      "vle8.v	v22, (%[wd5])\n"
837 				      "vxor.vv	v21, v23, v22\n"
838 				      "vxor.vv	v20, v20, v22\n"
839 
840 				      "vsra.vi	v26, v25, 7\n"
841 				      "vsll.vi	v27, v25, 1\n"
842 				      "vand.vx	v26, v26, %[x1d]\n"
843 				      "vxor.vv	v27, v27, v26\n"
844 				      "vle8.v	v26, (%[wd6])\n"
845 				      "vxor.vv	v25, v27, v26\n"
846 				      "vxor.vv	v24, v24, v26\n"
847 
848 				      "vsra.vi	v30, v29, 7\n"
849 				      "vsll.vi	v31, v29, 1\n"
850 				      "vand.vx	v30, v30, %[x1d]\n"
851 				      "vxor.vv	v31, v31, v30\n"
852 				      "vle8.v	v30, (%[wd7])\n"
853 				      "vxor.vv	v29, v31, v30\n"
854 				      "vxor.vv	v28, v28, v30\n"
855 				      ".option	pop\n"
856 				      : :
857 				      [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
858 				      [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
859 				      [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
860 				      [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
861 				      [wd4]"r"(&dptr[z][d + 4 * NSIZE]),
862 				      [wd5]"r"(&dptr[z][d + 5 * NSIZE]),
863 				      [wd6]"r"(&dptr[z][d + 6 * NSIZE]),
864 				      [wd7]"r"(&dptr[z][d + 7 * NSIZE]),
865 				      [x1d]"r"(0x1d)
866 			);
867 		}
868 
869 		/*
870 		 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
871 		 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
872 		 */
873 		asm volatile (".option	push\n"
874 			      ".option	arch,+v\n"
875 			      "vse8.v	v0, (%[wp0])\n"
876 			      "vse8.v	v1, (%[wq0])\n"
877 			      "vse8.v	v4, (%[wp1])\n"
878 			      "vse8.v	v5, (%[wq1])\n"
879 			      "vse8.v	v8, (%[wp2])\n"
880 			      "vse8.v	v9, (%[wq2])\n"
881 			      "vse8.v	v12, (%[wp3])\n"
882 			      "vse8.v	v13, (%[wq3])\n"
883 			      "vse8.v	v16, (%[wp4])\n"
884 			      "vse8.v	v17, (%[wq4])\n"
885 			      "vse8.v	v20, (%[wp5])\n"
886 			      "vse8.v	v21, (%[wq5])\n"
887 			      "vse8.v	v24, (%[wp6])\n"
888 			      "vse8.v	v25, (%[wq6])\n"
889 			      "vse8.v	v28, (%[wp7])\n"
890 			      "vse8.v	v29, (%[wq7])\n"
891 			      ".option	pop\n"
892 			      : :
893 			      [wp0]"r"(&p[d + NSIZE * 0]),
894 			      [wq0]"r"(&q[d + NSIZE * 0]),
895 			      [wp1]"r"(&p[d + NSIZE * 1]),
896 			      [wq1]"r"(&q[d + NSIZE * 1]),
897 			      [wp2]"r"(&p[d + NSIZE * 2]),
898 			      [wq2]"r"(&q[d + NSIZE * 2]),
899 			      [wp3]"r"(&p[d + NSIZE * 3]),
900 			      [wq3]"r"(&q[d + NSIZE * 3]),
901 			      [wp4]"r"(&p[d + NSIZE * 4]),
902 			      [wq4]"r"(&q[d + NSIZE * 4]),
903 			      [wp5]"r"(&p[d + NSIZE * 5]),
904 			      [wq5]"r"(&q[d + NSIZE * 5]),
905 			      [wp6]"r"(&p[d + NSIZE * 6]),
906 			      [wq6]"r"(&q[d + NSIZE * 6]),
907 			      [wp7]"r"(&p[d + NSIZE * 7]),
908 			      [wq7]"r"(&q[d + NSIZE * 7])
909 		);
910 	}
911 }
912 
913 static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop,
914 					 unsigned long bytes, void **ptrs)
915 {
916 	u8 **dptr = (u8 **)ptrs;
917 	u8 *p, *q;
918 	unsigned long d;
919 	int z, z0;
920 
921 	z0 = stop;		/* P/Q right side optimization */
922 	p = dptr[disks - 2];	/* XOR parity */
923 	q = dptr[disks - 1];	/* RS syndrome */
924 
925 	asm volatile (".option	push\n"
926 		      ".option	arch,+v\n"
927 		      "vsetvli	t0, x0, e8, m1, ta, ma\n"
928 		      ".option	pop\n"
929 	);
930 
931 	/*
932 	 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
933 	 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
934 	 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
935 	 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
936 	 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14
937 	 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15
938 	 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16
939 	 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17
940 	 */
941 	for (d = 0; d < bytes; d += NSIZE * 8) {
942 		 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
943 		asm volatile (".option	push\n"
944 			      ".option	arch,+v\n"
945 			      "vle8.v	v0, (%[wp0])\n"
946 			      "vle8.v	v1, (%[wp0])\n"
947 			      "vle8.v	v4, (%[wp1])\n"
948 			      "vle8.v	v5, (%[wp1])\n"
949 			      "vle8.v	v8, (%[wp2])\n"
950 			      "vle8.v	v9, (%[wp2])\n"
951 			      "vle8.v	v12, (%[wp3])\n"
952 			      "vle8.v	v13, (%[wp3])\n"
953 			      "vle8.v	v16, (%[wp4])\n"
954 			      "vle8.v	v17, (%[wp4])\n"
955 			      "vle8.v	v20, (%[wp5])\n"
956 			      "vle8.v	v21, (%[wp5])\n"
957 			      "vle8.v	v24, (%[wp6])\n"
958 			      "vle8.v	v25, (%[wp6])\n"
959 			      "vle8.v	v28, (%[wp7])\n"
960 			      "vle8.v	v29, (%[wp7])\n"
961 			      ".option	pop\n"
962 			      : :
963 			      [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
964 			      [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
965 			      [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
966 			      [wp3]"r"(&dptr[z0][d + 3 * NSIZE]),
967 			      [wp4]"r"(&dptr[z0][d + 4 * NSIZE]),
968 			      [wp5]"r"(&dptr[z0][d + 5 * NSIZE]),
969 			      [wp6]"r"(&dptr[z0][d + 6 * NSIZE]),
970 			      [wp7]"r"(&dptr[z0][d + 7 * NSIZE])
971 		);
972 
973 		/* P/Q data pages */
974 		for (z = z0 - 1; z >= start; z--) {
975 			/*
976 			 * w2$$ = MASK(wq$$);
977 			 * w1$$ = SHLBYTE(wq$$);
978 			 * w2$$ &= NBYTES(0x1d);
979 			 * w1$$ ^= w2$$;
980 			 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
981 			 * wq$$ = w1$$ ^ wd$$;
982 			 * wp$$ ^= wd$$;
983 			 */
984 			asm volatile (".option	push\n"
985 				      ".option	arch,+v\n"
986 				      "vsra.vi	v2, v1, 7\n"
987 				      "vsll.vi	v3, v1, 1\n"
988 				      "vand.vx	v2, v2, %[x1d]\n"
989 				      "vxor.vv	v3, v3, v2\n"
990 				      "vle8.v	v2, (%[wd0])\n"
991 				      "vxor.vv	v1, v3, v2\n"
992 				      "vxor.vv	v0, v0, v2\n"
993 
994 				      "vsra.vi	v6, v5, 7\n"
995 				      "vsll.vi	v7, v5, 1\n"
996 				      "vand.vx	v6, v6, %[x1d]\n"
997 				      "vxor.vv	v7, v7, v6\n"
998 				      "vle8.v	v6, (%[wd1])\n"
999 				      "vxor.vv	v5, v7, v6\n"
1000 				      "vxor.vv	v4, v4, v6\n"
1001 
1002 				      "vsra.vi	v10, v9, 7\n"
1003 				      "vsll.vi	v11, v9, 1\n"
1004 				      "vand.vx	v10, v10, %[x1d]\n"
1005 				      "vxor.vv	v11, v11, v10\n"
1006 				      "vle8.v	v10, (%[wd2])\n"
1007 				      "vxor.vv	v9, v11, v10\n"
1008 				      "vxor.vv	v8, v8, v10\n"
1009 
1010 				      "vsra.vi	v14, v13, 7\n"
1011 				      "vsll.vi	v15, v13, 1\n"
1012 				      "vand.vx	v14, v14, %[x1d]\n"
1013 				      "vxor.vv	v15, v15, v14\n"
1014 				      "vle8.v	v14, (%[wd3])\n"
1015 				      "vxor.vv	v13, v15, v14\n"
1016 				      "vxor.vv	v12, v12, v14\n"
1017 
1018 				      "vsra.vi	v18, v17, 7\n"
1019 				      "vsll.vi	v19, v17, 1\n"
1020 				      "vand.vx	v18, v18, %[x1d]\n"
1021 				      "vxor.vv	v19, v19, v18\n"
1022 				      "vle8.v	v18, (%[wd4])\n"
1023 				      "vxor.vv	v17, v19, v18\n"
1024 				      "vxor.vv	v16, v16, v18\n"
1025 
1026 				      "vsra.vi	v22, v21, 7\n"
1027 				      "vsll.vi	v23, v21, 1\n"
1028 				      "vand.vx	v22, v22, %[x1d]\n"
1029 				      "vxor.vv	v23, v23, v22\n"
1030 				      "vle8.v	v22, (%[wd5])\n"
1031 				      "vxor.vv	v21, v23, v22\n"
1032 				      "vxor.vv	v20, v20, v22\n"
1033 
1034 				      "vsra.vi	v26, v25, 7\n"
1035 				      "vsll.vi	v27, v25, 1\n"
1036 				      "vand.vx	v26, v26, %[x1d]\n"
1037 				      "vxor.vv	v27, v27, v26\n"
1038 				      "vle8.v	v26, (%[wd6])\n"
1039 				      "vxor.vv	v25, v27, v26\n"
1040 				      "vxor.vv	v24, v24, v26\n"
1041 
1042 				      "vsra.vi	v30, v29, 7\n"
1043 				      "vsll.vi	v31, v29, 1\n"
1044 				      "vand.vx	v30, v30, %[x1d]\n"
1045 				      "vxor.vv	v31, v31, v30\n"
1046 				      "vle8.v	v30, (%[wd7])\n"
1047 				      "vxor.vv	v29, v31, v30\n"
1048 				      "vxor.vv	v28, v28, v30\n"
1049 				      ".option	pop\n"
1050 				      : :
1051 				      [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
1052 				      [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
1053 				      [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
1054 				      [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
1055 				      [wd4]"r"(&dptr[z][d + 4 * NSIZE]),
1056 				      [wd5]"r"(&dptr[z][d + 5 * NSIZE]),
1057 				      [wd6]"r"(&dptr[z][d + 6 * NSIZE]),
1058 				      [wd7]"r"(&dptr[z][d + 7 * NSIZE]),
1059 				      [x1d]"r"(0x1d)
1060 			);
1061 		}
1062 
1063 		/* P/Q left side optimization */
1064 		for (z = start - 1; z >= 0; z--) {
1065 			/*
1066 			 * w2$$ = MASK(wq$$);
1067 			 * w1$$ = SHLBYTE(wq$$);
1068 			 * w2$$ &= NBYTES(0x1d);
1069 			 * wq$$ = w1$$ ^ w2$$;
1070 			 */
1071 			asm volatile (".option	push\n"
1072 				      ".option	arch,+v\n"
1073 				      "vsra.vi	v2, v1, 7\n"
1074 				      "vsll.vi	v3, v1, 1\n"
1075 				      "vand.vx	v2, v2, %[x1d]\n"
1076 				      "vxor.vv	v1, v3, v2\n"
1077 
1078 				      "vsra.vi	v6, v5, 7\n"
1079 				      "vsll.vi	v7, v5, 1\n"
1080 				      "vand.vx	v6, v6, %[x1d]\n"
1081 				      "vxor.vv	v5, v7, v6\n"
1082 
1083 				      "vsra.vi	v10, v9, 7\n"
1084 				      "vsll.vi	v11, v9, 1\n"
1085 				      "vand.vx	v10, v10, %[x1d]\n"
1086 				      "vxor.vv	v9, v11, v10\n"
1087 
1088 				      "vsra.vi	v14, v13, 7\n"
1089 				      "vsll.vi	v15, v13, 1\n"
1090 				      "vand.vx	v14, v14, %[x1d]\n"
1091 				      "vxor.vv	v13, v15, v14\n"
1092 
1093 				      "vsra.vi	v18, v17, 7\n"
1094 				      "vsll.vi	v19, v17, 1\n"
1095 				      "vand.vx	v18, v18, %[x1d]\n"
1096 				      "vxor.vv	v17, v19, v18\n"
1097 
1098 				      "vsra.vi	v22, v21, 7\n"
1099 				      "vsll.vi	v23, v21, 1\n"
1100 				      "vand.vx	v22, v22, %[x1d]\n"
1101 				      "vxor.vv	v21, v23, v22\n"
1102 
1103 				      "vsra.vi	v26, v25, 7\n"
1104 				      "vsll.vi	v27, v25, 1\n"
1105 				      "vand.vx	v26, v26, %[x1d]\n"
1106 				      "vxor.vv	v25, v27, v26\n"
1107 
1108 				      "vsra.vi	v30, v29, 7\n"
1109 				      "vsll.vi	v31, v29, 1\n"
1110 				      "vand.vx	v30, v30, %[x1d]\n"
1111 				      "vxor.vv	v29, v31, v30\n"
1112 				      ".option	pop\n"
1113 				      : :
1114 				      [x1d]"r"(0x1d)
1115 			);
1116 		}
1117 
1118 		/*
1119 		 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
1120 		 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
1121 		 * v0:wp0, v1:wq0, v2:p0, v3:q0
1122 		 * v4:wp1, v5:wq1, v6:p1, v7:q1
1123 		 * v8:wp2, v9:wq2, v10:p2, v11:q2
1124 		 * v12:wp3, v13:wq3, v14:p3, v15:q3
1125 		 * v16:wp4, v17:wq4, v18:p4, v19:q4
1126 		 * v20:wp5, v21:wq5, v22:p5, v23:q5
1127 		 * v24:wp6, v25:wq6, v26:p6, v27:q6
1128 		 * v28:wp7, v29:wq7, v30:p7, v31:q7
1129 		 */
1130 		asm volatile (".option	push\n"
1131 			      ".option	arch,+v\n"
1132 			      "vle8.v	v2, (%[wp0])\n"
1133 			      "vle8.v	v3, (%[wq0])\n"
1134 			      "vxor.vv	v2, v2, v0\n"
1135 			      "vxor.vv	v3, v3, v1\n"
1136 			      "vse8.v	v2, (%[wp0])\n"
1137 			      "vse8.v	v3, (%[wq0])\n"
1138 
1139 			      "vle8.v	v6, (%[wp1])\n"
1140 			      "vle8.v	v7, (%[wq1])\n"
1141 			      "vxor.vv	v6, v6, v4\n"
1142 			      "vxor.vv	v7, v7, v5\n"
1143 			      "vse8.v	v6, (%[wp1])\n"
1144 			      "vse8.v	v7, (%[wq1])\n"
1145 
1146 			      "vle8.v	v10, (%[wp2])\n"
1147 			      "vle8.v	v11, (%[wq2])\n"
1148 			      "vxor.vv	v10, v10, v8\n"
1149 			      "vxor.vv	v11, v11, v9\n"
1150 			      "vse8.v	v10, (%[wp2])\n"
1151 			      "vse8.v	v11, (%[wq2])\n"
1152 
1153 			      "vle8.v	v14, (%[wp3])\n"
1154 			      "vle8.v	v15, (%[wq3])\n"
1155 			      "vxor.vv	v14, v14, v12\n"
1156 			      "vxor.vv	v15, v15, v13\n"
1157 			      "vse8.v	v14, (%[wp3])\n"
1158 			      "vse8.v	v15, (%[wq3])\n"
1159 
1160 			      "vle8.v	v18, (%[wp4])\n"
1161 			      "vle8.v	v19, (%[wq4])\n"
1162 			      "vxor.vv	v18, v18, v16\n"
1163 			      "vxor.vv	v19, v19, v17\n"
1164 			      "vse8.v	v18, (%[wp4])\n"
1165 			      "vse8.v	v19, (%[wq4])\n"
1166 
1167 			      "vle8.v	v22, (%[wp5])\n"
1168 			      "vle8.v	v23, (%[wq5])\n"
1169 			      "vxor.vv	v22, v22, v20\n"
1170 			      "vxor.vv	v23, v23, v21\n"
1171 			      "vse8.v	v22, (%[wp5])\n"
1172 			      "vse8.v	v23, (%[wq5])\n"
1173 
1174 			      "vle8.v	v26, (%[wp6])\n"
1175 			      "vle8.v	v27, (%[wq6])\n"
1176 			      "vxor.vv	v26, v26, v24\n"
1177 			      "vxor.vv	v27, v27, v25\n"
1178 			      "vse8.v	v26, (%[wp6])\n"
1179 			      "vse8.v	v27, (%[wq6])\n"
1180 
1181 			      "vle8.v	v30, (%[wp7])\n"
1182 			      "vle8.v	v31, (%[wq7])\n"
1183 			      "vxor.vv	v30, v30, v28\n"
1184 			      "vxor.vv	v31, v31, v29\n"
1185 			      "vse8.v	v30, (%[wp7])\n"
1186 			      "vse8.v	v31, (%[wq7])\n"
1187 			      ".option	pop\n"
1188 			      : :
1189 			      [wp0]"r"(&p[d + NSIZE * 0]),
1190 			      [wq0]"r"(&q[d + NSIZE * 0]),
1191 			      [wp1]"r"(&p[d + NSIZE * 1]),
1192 			      [wq1]"r"(&q[d + NSIZE * 1]),
1193 			      [wp2]"r"(&p[d + NSIZE * 2]),
1194 			      [wq2]"r"(&q[d + NSIZE * 2]),
1195 			      [wp3]"r"(&p[d + NSIZE * 3]),
1196 			      [wq3]"r"(&q[d + NSIZE * 3]),
1197 			      [wp4]"r"(&p[d + NSIZE * 4]),
1198 			      [wq4]"r"(&q[d + NSIZE * 4]),
1199 			      [wp5]"r"(&p[d + NSIZE * 5]),
1200 			      [wq5]"r"(&q[d + NSIZE * 5]),
1201 			      [wp6]"r"(&p[d + NSIZE * 6]),
1202 			      [wq6]"r"(&q[d + NSIZE * 6]),
1203 			      [wp7]"r"(&p[d + NSIZE * 7]),
1204 			      [wq7]"r"(&q[d + NSIZE * 7])
1205 		);
1206 	}
1207 }
1208 
1209 RAID6_RVV_WRAPPER(1);
1210 RAID6_RVV_WRAPPER(2);
1211 RAID6_RVV_WRAPPER(4);
1212 RAID6_RVV_WRAPPER(8);
1213