1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * RAID-6 syndrome calculation using RISC-V vector instructions 4 * 5 * Copyright 2024 Institute of Software, CAS. 6 * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn> 7 * 8 * Based on neon.uc: 9 * Copyright 2002-2004 H. Peter Anvin 10 */ 11 12 #include <asm/simd.h> 13 #include <asm/vector.h> 14 #include <crypto/internal/simd.h> 15 #include <linux/raid/pq.h> 16 #include <linux/types.h> 17 #include "rvv.h" 18 19 #define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */ 20 21 static int rvv_has_vector(void) 22 { 23 return has_vector(); 24 } 25 26 static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 27 { 28 u8 **dptr = (u8 **)ptrs; 29 unsigned long d; 30 int z, z0; 31 u8 *p, *q; 32 33 z0 = disks - 3; /* Highest data disk */ 34 p = dptr[z0 + 1]; /* XOR parity */ 35 q = dptr[z0 + 2]; /* RS syndrome */ 36 37 asm volatile (".option push\n" 38 ".option arch,+v\n" 39 "vsetvli t0, x0, e8, m1, ta, ma\n" 40 ".option pop\n" 41 ); 42 43 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 44 for (d = 0; d < bytes; d += NSIZE * 1) { 45 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 46 asm volatile (".option push\n" 47 ".option arch,+v\n" 48 "vle8.v v0, (%[wp0])\n" 49 "vle8.v v1, (%[wp0])\n" 50 ".option pop\n" 51 : : 52 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) 53 ); 54 55 for (z = z0 - 1 ; z >= 0 ; z--) { 56 /* 57 * w2$$ = MASK(wq$$); 58 * w1$$ = SHLBYTE(wq$$); 59 * w2$$ &= NBYTES(0x1d); 60 * w1$$ ^= w2$$; 61 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 62 * wq$$ = w1$$ ^ wd$$; 63 * wp$$ ^= wd$$; 64 */ 65 asm volatile (".option push\n" 66 ".option arch,+v\n" 67 "vsra.vi v2, v1, 7\n" 68 "vsll.vi v3, v1, 1\n" 69 "vand.vx v2, v2, %[x1d]\n" 70 "vxor.vv v3, v3, v2\n" 71 "vle8.v v2, (%[wd0])\n" 72 "vxor.vv v1, v3, v2\n" 73 "vxor.vv v0, v0, v2\n" 74 ".option pop\n" 75 : : 76 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 77 [x1d]"r"(0x1d) 78 ); 79 } 80 81 /* 82 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 83 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 84 */ 85 asm volatile (".option push\n" 86 ".option arch,+v\n" 87 "vse8.v v0, (%[wp0])\n" 88 "vse8.v v1, (%[wq0])\n" 89 ".option pop\n" 90 : : 91 [wp0]"r"(&p[d + NSIZE * 0]), 92 [wq0]"r"(&q[d + NSIZE * 0]) 93 ); 94 } 95 } 96 97 static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, 98 unsigned long bytes, void **ptrs) 99 { 100 u8 **dptr = (u8 **)ptrs; 101 u8 *p, *q; 102 unsigned long d; 103 int z, z0; 104 105 z0 = stop; /* P/Q right side optimization */ 106 p = dptr[disks - 2]; /* XOR parity */ 107 q = dptr[disks - 1]; /* RS syndrome */ 108 109 asm volatile (".option push\n" 110 ".option arch,+v\n" 111 "vsetvli t0, x0, e8, m1, ta, ma\n" 112 ".option pop\n" 113 ); 114 115 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 116 for (d = 0 ; d < bytes ; d += NSIZE * 1) { 117 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 118 asm volatile (".option push\n" 119 ".option arch,+v\n" 120 "vle8.v v0, (%[wp0])\n" 121 "vle8.v v1, (%[wp0])\n" 122 ".option pop\n" 123 : : 124 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) 125 ); 126 127 /* P/Q data pages */ 128 for (z = z0 - 1; z >= start; z--) { 129 /* 130 * w2$$ = MASK(wq$$); 131 * w1$$ = SHLBYTE(wq$$); 132 * w2$$ &= NBYTES(0x1d); 133 * w1$$ ^= w2$$; 134 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 135 * wq$$ = w1$$ ^ wd$$; 136 * wp$$ ^= wd$$; 137 */ 138 asm volatile (".option push\n" 139 ".option arch,+v\n" 140 "vsra.vi v2, v1, 7\n" 141 "vsll.vi v3, v1, 1\n" 142 "vand.vx v2, v2, %[x1d]\n" 143 "vxor.vv v3, v3, v2\n" 144 "vle8.v v2, (%[wd0])\n" 145 "vxor.vv v1, v3, v2\n" 146 "vxor.vv v0, v0, v2\n" 147 ".option pop\n" 148 : : 149 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 150 [x1d]"r"(0x1d) 151 ); 152 } 153 154 /* P/Q left side optimization */ 155 for (z = start - 1; z >= 0; z--) { 156 /* 157 * w2$$ = MASK(wq$$); 158 * w1$$ = SHLBYTE(wq$$); 159 * w2$$ &= NBYTES(0x1d); 160 * wq$$ = w1$$ ^ w2$$; 161 */ 162 asm volatile (".option push\n" 163 ".option arch,+v\n" 164 "vsra.vi v2, v1, 7\n" 165 "vsll.vi v3, v1, 1\n" 166 "vand.vx v2, v2, %[x1d]\n" 167 "vxor.vv v1, v3, v2\n" 168 ".option pop\n" 169 : : 170 [x1d]"r"(0x1d) 171 ); 172 } 173 174 /* 175 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 176 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 177 * v0:wp0, v1:wq0, v2:p0, v3:q0 178 */ 179 asm volatile (".option push\n" 180 ".option arch,+v\n" 181 "vle8.v v2, (%[wp0])\n" 182 "vle8.v v3, (%[wq0])\n" 183 "vxor.vv v2, v2, v0\n" 184 "vxor.vv v3, v3, v1\n" 185 "vse8.v v2, (%[wp0])\n" 186 "vse8.v v3, (%[wq0])\n" 187 ".option pop\n" 188 : : 189 [wp0]"r"(&p[d + NSIZE * 0]), 190 [wq0]"r"(&q[d + NSIZE * 0]) 191 ); 192 } 193 } 194 195 static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 196 { 197 u8 **dptr = (u8 **)ptrs; 198 unsigned long d; 199 int z, z0; 200 u8 *p, *q; 201 202 z0 = disks - 3; /* Highest data disk */ 203 p = dptr[z0 + 1]; /* XOR parity */ 204 q = dptr[z0 + 2]; /* RS syndrome */ 205 206 asm volatile (".option push\n" 207 ".option arch,+v\n" 208 "vsetvli t0, x0, e8, m1, ta, ma\n" 209 ".option pop\n" 210 ); 211 212 /* 213 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 214 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 215 */ 216 for (d = 0; d < bytes; d += NSIZE * 2) { 217 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 218 asm volatile (".option push\n" 219 ".option arch,+v\n" 220 "vle8.v v0, (%[wp0])\n" 221 "vle8.v v1, (%[wp0])\n" 222 "vle8.v v4, (%[wp1])\n" 223 "vle8.v v5, (%[wp1])\n" 224 ".option pop\n" 225 : : 226 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 227 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) 228 ); 229 230 for (z = z0 - 1; z >= 0; z--) { 231 /* 232 * w2$$ = MASK(wq$$); 233 * w1$$ = SHLBYTE(wq$$); 234 * w2$$ &= NBYTES(0x1d); 235 * w1$$ ^= w2$$; 236 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 237 * wq$$ = w1$$ ^ wd$$; 238 * wp$$ ^= wd$$; 239 */ 240 asm volatile (".option push\n" 241 ".option arch,+v\n" 242 "vsra.vi v2, v1, 7\n" 243 "vsll.vi v3, v1, 1\n" 244 "vand.vx v2, v2, %[x1d]\n" 245 "vxor.vv v3, v3, v2\n" 246 "vle8.v v2, (%[wd0])\n" 247 "vxor.vv v1, v3, v2\n" 248 "vxor.vv v0, v0, v2\n" 249 250 "vsra.vi v6, v5, 7\n" 251 "vsll.vi v7, v5, 1\n" 252 "vand.vx v6, v6, %[x1d]\n" 253 "vxor.vv v7, v7, v6\n" 254 "vle8.v v6, (%[wd1])\n" 255 "vxor.vv v5, v7, v6\n" 256 "vxor.vv v4, v4, v6\n" 257 ".option pop\n" 258 : : 259 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 260 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 261 [x1d]"r"(0x1d) 262 ); 263 } 264 265 /* 266 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 267 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 268 */ 269 asm volatile (".option push\n" 270 ".option arch,+v\n" 271 "vse8.v v0, (%[wp0])\n" 272 "vse8.v v1, (%[wq0])\n" 273 "vse8.v v4, (%[wp1])\n" 274 "vse8.v v5, (%[wq1])\n" 275 ".option pop\n" 276 : : 277 [wp0]"r"(&p[d + NSIZE * 0]), 278 [wq0]"r"(&q[d + NSIZE * 0]), 279 [wp1]"r"(&p[d + NSIZE * 1]), 280 [wq1]"r"(&q[d + NSIZE * 1]) 281 ); 282 } 283 } 284 285 static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, 286 unsigned long bytes, void **ptrs) 287 { 288 u8 **dptr = (u8 **)ptrs; 289 u8 *p, *q; 290 unsigned long d; 291 int z, z0; 292 293 z0 = stop; /* P/Q right side optimization */ 294 p = dptr[disks - 2]; /* XOR parity */ 295 q = dptr[disks - 1]; /* RS syndrome */ 296 297 asm volatile (".option push\n" 298 ".option arch,+v\n" 299 "vsetvli t0, x0, e8, m1, ta, ma\n" 300 ".option pop\n" 301 ); 302 303 /* 304 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 305 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 306 */ 307 for (d = 0; d < bytes; d += NSIZE * 2) { 308 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 309 asm volatile (".option push\n" 310 ".option arch,+v\n" 311 "vle8.v v0, (%[wp0])\n" 312 "vle8.v v1, (%[wp0])\n" 313 "vle8.v v4, (%[wp1])\n" 314 "vle8.v v5, (%[wp1])\n" 315 ".option pop\n" 316 : : 317 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 318 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) 319 ); 320 321 /* P/Q data pages */ 322 for (z = z0 - 1; z >= start; z--) { 323 /* 324 * w2$$ = MASK(wq$$); 325 * w1$$ = SHLBYTE(wq$$); 326 * w2$$ &= NBYTES(0x1d); 327 * w1$$ ^= w2$$; 328 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 329 * wq$$ = w1$$ ^ wd$$; 330 * wp$$ ^= wd$$; 331 */ 332 asm volatile (".option push\n" 333 ".option arch,+v\n" 334 "vsra.vi v2, v1, 7\n" 335 "vsll.vi v3, v1, 1\n" 336 "vand.vx v2, v2, %[x1d]\n" 337 "vxor.vv v3, v3, v2\n" 338 "vle8.v v2, (%[wd0])\n" 339 "vxor.vv v1, v3, v2\n" 340 "vxor.vv v0, v0, v2\n" 341 342 "vsra.vi v6, v5, 7\n" 343 "vsll.vi v7, v5, 1\n" 344 "vand.vx v6, v6, %[x1d]\n" 345 "vxor.vv v7, v7, v6\n" 346 "vle8.v v6, (%[wd1])\n" 347 "vxor.vv v5, v7, v6\n" 348 "vxor.vv v4, v4, v6\n" 349 ".option pop\n" 350 : : 351 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 352 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 353 [x1d]"r"(0x1d) 354 ); 355 } 356 357 /* P/Q left side optimization */ 358 for (z = start - 1; z >= 0; z--) { 359 /* 360 * w2$$ = MASK(wq$$); 361 * w1$$ = SHLBYTE(wq$$); 362 * w2$$ &= NBYTES(0x1d); 363 * wq$$ = w1$$ ^ w2$$; 364 */ 365 asm volatile (".option push\n" 366 ".option arch,+v\n" 367 "vsra.vi v2, v1, 7\n" 368 "vsll.vi v3, v1, 1\n" 369 "vand.vx v2, v2, %[x1d]\n" 370 "vxor.vv v1, v3, v2\n" 371 372 "vsra.vi v6, v5, 7\n" 373 "vsll.vi v7, v5, 1\n" 374 "vand.vx v6, v6, %[x1d]\n" 375 "vxor.vv v5, v7, v6\n" 376 ".option pop\n" 377 : : 378 [x1d]"r"(0x1d) 379 ); 380 } 381 382 /* 383 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 384 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 385 * v0:wp0, v1:wq0, v2:p0, v3:q0 386 * v4:wp1, v5:wq1, v6:p1, v7:q1 387 */ 388 asm volatile (".option push\n" 389 ".option arch,+v\n" 390 "vle8.v v2, (%[wp0])\n" 391 "vle8.v v3, (%[wq0])\n" 392 "vxor.vv v2, v2, v0\n" 393 "vxor.vv v3, v3, v1\n" 394 "vse8.v v2, (%[wp0])\n" 395 "vse8.v v3, (%[wq0])\n" 396 397 "vle8.v v6, (%[wp1])\n" 398 "vle8.v v7, (%[wq1])\n" 399 "vxor.vv v6, v6, v4\n" 400 "vxor.vv v7, v7, v5\n" 401 "vse8.v v6, (%[wp1])\n" 402 "vse8.v v7, (%[wq1])\n" 403 ".option pop\n" 404 : : 405 [wp0]"r"(&p[d + NSIZE * 0]), 406 [wq0]"r"(&q[d + NSIZE * 0]), 407 [wp1]"r"(&p[d + NSIZE * 1]), 408 [wq1]"r"(&q[d + NSIZE * 1]) 409 ); 410 } 411 } 412 413 static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 414 { 415 u8 **dptr = (u8 **)ptrs; 416 unsigned long d; 417 int z, z0; 418 u8 *p, *q; 419 420 z0 = disks - 3; /* Highest data disk */ 421 p = dptr[z0 + 1]; /* XOR parity */ 422 q = dptr[z0 + 2]; /* RS syndrome */ 423 424 asm volatile (".option push\n" 425 ".option arch,+v\n" 426 "vsetvli t0, x0, e8, m1, ta, ma\n" 427 ".option pop\n" 428 ); 429 430 /* 431 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 432 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 433 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 434 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 435 */ 436 for (d = 0; d < bytes; d += NSIZE * 4) { 437 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 438 asm volatile (".option push\n" 439 ".option arch,+v\n" 440 "vle8.v v0, (%[wp0])\n" 441 "vle8.v v1, (%[wp0])\n" 442 "vle8.v v4, (%[wp1])\n" 443 "vle8.v v5, (%[wp1])\n" 444 "vle8.v v8, (%[wp2])\n" 445 "vle8.v v9, (%[wp2])\n" 446 "vle8.v v12, (%[wp3])\n" 447 "vle8.v v13, (%[wp3])\n" 448 ".option pop\n" 449 : : 450 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 451 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 452 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 453 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) 454 ); 455 456 for (z = z0 - 1; z >= 0; z--) { 457 /* 458 * w2$$ = MASK(wq$$); 459 * w1$$ = SHLBYTE(wq$$); 460 * w2$$ &= NBYTES(0x1d); 461 * w1$$ ^= w2$$; 462 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 463 * wq$$ = w1$$ ^ wd$$; 464 * wp$$ ^= wd$$; 465 */ 466 asm volatile (".option push\n" 467 ".option arch,+v\n" 468 "vsra.vi v2, v1, 7\n" 469 "vsll.vi v3, v1, 1\n" 470 "vand.vx v2, v2, %[x1d]\n" 471 "vxor.vv v3, v3, v2\n" 472 "vle8.v v2, (%[wd0])\n" 473 "vxor.vv v1, v3, v2\n" 474 "vxor.vv v0, v0, v2\n" 475 476 "vsra.vi v6, v5, 7\n" 477 "vsll.vi v7, v5, 1\n" 478 "vand.vx v6, v6, %[x1d]\n" 479 "vxor.vv v7, v7, v6\n" 480 "vle8.v v6, (%[wd1])\n" 481 "vxor.vv v5, v7, v6\n" 482 "vxor.vv v4, v4, v6\n" 483 484 "vsra.vi v10, v9, 7\n" 485 "vsll.vi v11, v9, 1\n" 486 "vand.vx v10, v10, %[x1d]\n" 487 "vxor.vv v11, v11, v10\n" 488 "vle8.v v10, (%[wd2])\n" 489 "vxor.vv v9, v11, v10\n" 490 "vxor.vv v8, v8, v10\n" 491 492 "vsra.vi v14, v13, 7\n" 493 "vsll.vi v15, v13, 1\n" 494 "vand.vx v14, v14, %[x1d]\n" 495 "vxor.vv v15, v15, v14\n" 496 "vle8.v v14, (%[wd3])\n" 497 "vxor.vv v13, v15, v14\n" 498 "vxor.vv v12, v12, v14\n" 499 ".option pop\n" 500 : : 501 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 502 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 503 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 504 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 505 [x1d]"r"(0x1d) 506 ); 507 } 508 509 /* 510 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 511 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 512 */ 513 asm volatile (".option push\n" 514 ".option arch,+v\n" 515 "vse8.v v0, (%[wp0])\n" 516 "vse8.v v1, (%[wq0])\n" 517 "vse8.v v4, (%[wp1])\n" 518 "vse8.v v5, (%[wq1])\n" 519 "vse8.v v8, (%[wp2])\n" 520 "vse8.v v9, (%[wq2])\n" 521 "vse8.v v12, (%[wp3])\n" 522 "vse8.v v13, (%[wq3])\n" 523 ".option pop\n" 524 : : 525 [wp0]"r"(&p[d + NSIZE * 0]), 526 [wq0]"r"(&q[d + NSIZE * 0]), 527 [wp1]"r"(&p[d + NSIZE * 1]), 528 [wq1]"r"(&q[d + NSIZE * 1]), 529 [wp2]"r"(&p[d + NSIZE * 2]), 530 [wq2]"r"(&q[d + NSIZE * 2]), 531 [wp3]"r"(&p[d + NSIZE * 3]), 532 [wq3]"r"(&q[d + NSIZE * 3]) 533 ); 534 } 535 } 536 537 static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, 538 unsigned long bytes, void **ptrs) 539 { 540 u8 **dptr = (u8 **)ptrs; 541 u8 *p, *q; 542 unsigned long d; 543 int z, z0; 544 545 z0 = stop; /* P/Q right side optimization */ 546 p = dptr[disks - 2]; /* XOR parity */ 547 q = dptr[disks - 1]; /* RS syndrome */ 548 549 asm volatile (".option push\n" 550 ".option arch,+v\n" 551 "vsetvli t0, x0, e8, m1, ta, ma\n" 552 ".option pop\n" 553 ); 554 555 /* 556 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 557 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 558 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 559 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 560 */ 561 for (d = 0; d < bytes; d += NSIZE * 4) { 562 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 563 asm volatile (".option push\n" 564 ".option arch,+v\n" 565 "vle8.v v0, (%[wp0])\n" 566 "vle8.v v1, (%[wp0])\n" 567 "vle8.v v4, (%[wp1])\n" 568 "vle8.v v5, (%[wp1])\n" 569 "vle8.v v8, (%[wp2])\n" 570 "vle8.v v9, (%[wp2])\n" 571 "vle8.v v12, (%[wp3])\n" 572 "vle8.v v13, (%[wp3])\n" 573 ".option pop\n" 574 : : 575 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 576 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 577 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 578 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) 579 ); 580 581 /* P/Q data pages */ 582 for (z = z0 - 1; z >= start; z--) { 583 /* 584 * w2$$ = MASK(wq$$); 585 * w1$$ = SHLBYTE(wq$$); 586 * w2$$ &= NBYTES(0x1d); 587 * w1$$ ^= w2$$; 588 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 589 * wq$$ = w1$$ ^ wd$$; 590 * wp$$ ^= wd$$; 591 */ 592 asm volatile (".option push\n" 593 ".option arch,+v\n" 594 "vsra.vi v2, v1, 7\n" 595 "vsll.vi v3, v1, 1\n" 596 "vand.vx v2, v2, %[x1d]\n" 597 "vxor.vv v3, v3, v2\n" 598 "vle8.v v2, (%[wd0])\n" 599 "vxor.vv v1, v3, v2\n" 600 "vxor.vv v0, v0, v2\n" 601 602 "vsra.vi v6, v5, 7\n" 603 "vsll.vi v7, v5, 1\n" 604 "vand.vx v6, v6, %[x1d]\n" 605 "vxor.vv v7, v7, v6\n" 606 "vle8.v v6, (%[wd1])\n" 607 "vxor.vv v5, v7, v6\n" 608 "vxor.vv v4, v4, v6\n" 609 610 "vsra.vi v10, v9, 7\n" 611 "vsll.vi v11, v9, 1\n" 612 "vand.vx v10, v10, %[x1d]\n" 613 "vxor.vv v11, v11, v10\n" 614 "vle8.v v10, (%[wd2])\n" 615 "vxor.vv v9, v11, v10\n" 616 "vxor.vv v8, v8, v10\n" 617 618 "vsra.vi v14, v13, 7\n" 619 "vsll.vi v15, v13, 1\n" 620 "vand.vx v14, v14, %[x1d]\n" 621 "vxor.vv v15, v15, v14\n" 622 "vle8.v v14, (%[wd3])\n" 623 "vxor.vv v13, v15, v14\n" 624 "vxor.vv v12, v12, v14\n" 625 ".option pop\n" 626 : : 627 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 628 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 629 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 630 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 631 [x1d]"r"(0x1d) 632 ); 633 } 634 635 /* P/Q left side optimization */ 636 for (z = start - 1; z >= 0; z--) { 637 /* 638 * w2$$ = MASK(wq$$); 639 * w1$$ = SHLBYTE(wq$$); 640 * w2$$ &= NBYTES(0x1d); 641 * wq$$ = w1$$ ^ w2$$; 642 */ 643 asm volatile (".option push\n" 644 ".option arch,+v\n" 645 "vsra.vi v2, v1, 7\n" 646 "vsll.vi v3, v1, 1\n" 647 "vand.vx v2, v2, %[x1d]\n" 648 "vxor.vv v1, v3, v2\n" 649 650 "vsra.vi v6, v5, 7\n" 651 "vsll.vi v7, v5, 1\n" 652 "vand.vx v6, v6, %[x1d]\n" 653 "vxor.vv v5, v7, v6\n" 654 655 "vsra.vi v10, v9, 7\n" 656 "vsll.vi v11, v9, 1\n" 657 "vand.vx v10, v10, %[x1d]\n" 658 "vxor.vv v9, v11, v10\n" 659 660 "vsra.vi v14, v13, 7\n" 661 "vsll.vi v15, v13, 1\n" 662 "vand.vx v14, v14, %[x1d]\n" 663 "vxor.vv v13, v15, v14\n" 664 ".option pop\n" 665 : : 666 [x1d]"r"(0x1d) 667 ); 668 } 669 670 /* 671 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 672 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 673 * v0:wp0, v1:wq0, v2:p0, v3:q0 674 * v4:wp1, v5:wq1, v6:p1, v7:q1 675 * v8:wp2, v9:wq2, v10:p2, v11:q2 676 * v12:wp3, v13:wq3, v14:p3, v15:q3 677 */ 678 asm volatile (".option push\n" 679 ".option arch,+v\n" 680 "vle8.v v2, (%[wp0])\n" 681 "vle8.v v3, (%[wq0])\n" 682 "vxor.vv v2, v2, v0\n" 683 "vxor.vv v3, v3, v1\n" 684 "vse8.v v2, (%[wp0])\n" 685 "vse8.v v3, (%[wq0])\n" 686 687 "vle8.v v6, (%[wp1])\n" 688 "vle8.v v7, (%[wq1])\n" 689 "vxor.vv v6, v6, v4\n" 690 "vxor.vv v7, v7, v5\n" 691 "vse8.v v6, (%[wp1])\n" 692 "vse8.v v7, (%[wq1])\n" 693 694 "vle8.v v10, (%[wp2])\n" 695 "vle8.v v11, (%[wq2])\n" 696 "vxor.vv v10, v10, v8\n" 697 "vxor.vv v11, v11, v9\n" 698 "vse8.v v10, (%[wp2])\n" 699 "vse8.v v11, (%[wq2])\n" 700 701 "vle8.v v14, (%[wp3])\n" 702 "vle8.v v15, (%[wq3])\n" 703 "vxor.vv v14, v14, v12\n" 704 "vxor.vv v15, v15, v13\n" 705 "vse8.v v14, (%[wp3])\n" 706 "vse8.v v15, (%[wq3])\n" 707 ".option pop\n" 708 : : 709 [wp0]"r"(&p[d + NSIZE * 0]), 710 [wq0]"r"(&q[d + NSIZE * 0]), 711 [wp1]"r"(&p[d + NSIZE * 1]), 712 [wq1]"r"(&q[d + NSIZE * 1]), 713 [wp2]"r"(&p[d + NSIZE * 2]), 714 [wq2]"r"(&q[d + NSIZE * 2]), 715 [wp3]"r"(&p[d + NSIZE * 3]), 716 [wq3]"r"(&q[d + NSIZE * 3]) 717 ); 718 } 719 } 720 721 static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 722 { 723 u8 **dptr = (u8 **)ptrs; 724 unsigned long d; 725 int z, z0; 726 u8 *p, *q; 727 728 z0 = disks - 3; /* Highest data disk */ 729 p = dptr[z0 + 1]; /* XOR parity */ 730 q = dptr[z0 + 2]; /* RS syndrome */ 731 732 asm volatile (".option push\n" 733 ".option arch,+v\n" 734 "vsetvli t0, x0, e8, m1, ta, ma\n" 735 ".option pop\n" 736 ); 737 738 /* 739 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 740 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 741 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 742 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 743 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 744 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 745 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 746 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 747 */ 748 for (d = 0; d < bytes; d += NSIZE * 8) { 749 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 750 asm volatile (".option push\n" 751 ".option arch,+v\n" 752 "vle8.v v0, (%[wp0])\n" 753 "vle8.v v1, (%[wp0])\n" 754 "vle8.v v4, (%[wp1])\n" 755 "vle8.v v5, (%[wp1])\n" 756 "vle8.v v8, (%[wp2])\n" 757 "vle8.v v9, (%[wp2])\n" 758 "vle8.v v12, (%[wp3])\n" 759 "vle8.v v13, (%[wp3])\n" 760 "vle8.v v16, (%[wp4])\n" 761 "vle8.v v17, (%[wp4])\n" 762 "vle8.v v20, (%[wp5])\n" 763 "vle8.v v21, (%[wp5])\n" 764 "vle8.v v24, (%[wp6])\n" 765 "vle8.v v25, (%[wp6])\n" 766 "vle8.v v28, (%[wp7])\n" 767 "vle8.v v29, (%[wp7])\n" 768 ".option pop\n" 769 : : 770 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 771 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 772 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 773 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), 774 [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), 775 [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), 776 [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), 777 [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) 778 ); 779 780 for (z = z0 - 1; z >= 0; z--) { 781 /* 782 * w2$$ = MASK(wq$$); 783 * w1$$ = SHLBYTE(wq$$); 784 * w2$$ &= NBYTES(0x1d); 785 * w1$$ ^= w2$$; 786 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 787 * wq$$ = w1$$ ^ wd$$; 788 * wp$$ ^= wd$$; 789 */ 790 asm volatile (".option push\n" 791 ".option arch,+v\n" 792 "vsra.vi v2, v1, 7\n" 793 "vsll.vi v3, v1, 1\n" 794 "vand.vx v2, v2, %[x1d]\n" 795 "vxor.vv v3, v3, v2\n" 796 "vle8.v v2, (%[wd0])\n" 797 "vxor.vv v1, v3, v2\n" 798 "vxor.vv v0, v0, v2\n" 799 800 "vsra.vi v6, v5, 7\n" 801 "vsll.vi v7, v5, 1\n" 802 "vand.vx v6, v6, %[x1d]\n" 803 "vxor.vv v7, v7, v6\n" 804 "vle8.v v6, (%[wd1])\n" 805 "vxor.vv v5, v7, v6\n" 806 "vxor.vv v4, v4, v6\n" 807 808 "vsra.vi v10, v9, 7\n" 809 "vsll.vi v11, v9, 1\n" 810 "vand.vx v10, v10, %[x1d]\n" 811 "vxor.vv v11, v11, v10\n" 812 "vle8.v v10, (%[wd2])\n" 813 "vxor.vv v9, v11, v10\n" 814 "vxor.vv v8, v8, v10\n" 815 816 "vsra.vi v14, v13, 7\n" 817 "vsll.vi v15, v13, 1\n" 818 "vand.vx v14, v14, %[x1d]\n" 819 "vxor.vv v15, v15, v14\n" 820 "vle8.v v14, (%[wd3])\n" 821 "vxor.vv v13, v15, v14\n" 822 "vxor.vv v12, v12, v14\n" 823 824 "vsra.vi v18, v17, 7\n" 825 "vsll.vi v19, v17, 1\n" 826 "vand.vx v18, v18, %[x1d]\n" 827 "vxor.vv v19, v19, v18\n" 828 "vle8.v v18, (%[wd4])\n" 829 "vxor.vv v17, v19, v18\n" 830 "vxor.vv v16, v16, v18\n" 831 832 "vsra.vi v22, v21, 7\n" 833 "vsll.vi v23, v21, 1\n" 834 "vand.vx v22, v22, %[x1d]\n" 835 "vxor.vv v23, v23, v22\n" 836 "vle8.v v22, (%[wd5])\n" 837 "vxor.vv v21, v23, v22\n" 838 "vxor.vv v20, v20, v22\n" 839 840 "vsra.vi v26, v25, 7\n" 841 "vsll.vi v27, v25, 1\n" 842 "vand.vx v26, v26, %[x1d]\n" 843 "vxor.vv v27, v27, v26\n" 844 "vle8.v v26, (%[wd6])\n" 845 "vxor.vv v25, v27, v26\n" 846 "vxor.vv v24, v24, v26\n" 847 848 "vsra.vi v30, v29, 7\n" 849 "vsll.vi v31, v29, 1\n" 850 "vand.vx v30, v30, %[x1d]\n" 851 "vxor.vv v31, v31, v30\n" 852 "vle8.v v30, (%[wd7])\n" 853 "vxor.vv v29, v31, v30\n" 854 "vxor.vv v28, v28, v30\n" 855 ".option pop\n" 856 : : 857 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 858 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 859 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 860 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 861 [wd4]"r"(&dptr[z][d + 4 * NSIZE]), 862 [wd5]"r"(&dptr[z][d + 5 * NSIZE]), 863 [wd6]"r"(&dptr[z][d + 6 * NSIZE]), 864 [wd7]"r"(&dptr[z][d + 7 * NSIZE]), 865 [x1d]"r"(0x1d) 866 ); 867 } 868 869 /* 870 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 871 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 872 */ 873 asm volatile (".option push\n" 874 ".option arch,+v\n" 875 "vse8.v v0, (%[wp0])\n" 876 "vse8.v v1, (%[wq0])\n" 877 "vse8.v v4, (%[wp1])\n" 878 "vse8.v v5, (%[wq1])\n" 879 "vse8.v v8, (%[wp2])\n" 880 "vse8.v v9, (%[wq2])\n" 881 "vse8.v v12, (%[wp3])\n" 882 "vse8.v v13, (%[wq3])\n" 883 "vse8.v v16, (%[wp4])\n" 884 "vse8.v v17, (%[wq4])\n" 885 "vse8.v v20, (%[wp5])\n" 886 "vse8.v v21, (%[wq5])\n" 887 "vse8.v v24, (%[wp6])\n" 888 "vse8.v v25, (%[wq6])\n" 889 "vse8.v v28, (%[wp7])\n" 890 "vse8.v v29, (%[wq7])\n" 891 ".option pop\n" 892 : : 893 [wp0]"r"(&p[d + NSIZE * 0]), 894 [wq0]"r"(&q[d + NSIZE * 0]), 895 [wp1]"r"(&p[d + NSIZE * 1]), 896 [wq1]"r"(&q[d + NSIZE * 1]), 897 [wp2]"r"(&p[d + NSIZE * 2]), 898 [wq2]"r"(&q[d + NSIZE * 2]), 899 [wp3]"r"(&p[d + NSIZE * 3]), 900 [wq3]"r"(&q[d + NSIZE * 3]), 901 [wp4]"r"(&p[d + NSIZE * 4]), 902 [wq4]"r"(&q[d + NSIZE * 4]), 903 [wp5]"r"(&p[d + NSIZE * 5]), 904 [wq5]"r"(&q[d + NSIZE * 5]), 905 [wp6]"r"(&p[d + NSIZE * 6]), 906 [wq6]"r"(&q[d + NSIZE * 6]), 907 [wp7]"r"(&p[d + NSIZE * 7]), 908 [wq7]"r"(&q[d + NSIZE * 7]) 909 ); 910 } 911 } 912 913 static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, 914 unsigned long bytes, void **ptrs) 915 { 916 u8 **dptr = (u8 **)ptrs; 917 u8 *p, *q; 918 unsigned long d; 919 int z, z0; 920 921 z0 = stop; /* P/Q right side optimization */ 922 p = dptr[disks - 2]; /* XOR parity */ 923 q = dptr[disks - 1]; /* RS syndrome */ 924 925 asm volatile (".option push\n" 926 ".option arch,+v\n" 927 "vsetvli t0, x0, e8, m1, ta, ma\n" 928 ".option pop\n" 929 ); 930 931 /* 932 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 933 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 934 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 935 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 936 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 937 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 938 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 939 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 940 */ 941 for (d = 0; d < bytes; d += NSIZE * 8) { 942 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 943 asm volatile (".option push\n" 944 ".option arch,+v\n" 945 "vle8.v v0, (%[wp0])\n" 946 "vle8.v v1, (%[wp0])\n" 947 "vle8.v v4, (%[wp1])\n" 948 "vle8.v v5, (%[wp1])\n" 949 "vle8.v v8, (%[wp2])\n" 950 "vle8.v v9, (%[wp2])\n" 951 "vle8.v v12, (%[wp3])\n" 952 "vle8.v v13, (%[wp3])\n" 953 "vle8.v v16, (%[wp4])\n" 954 "vle8.v v17, (%[wp4])\n" 955 "vle8.v v20, (%[wp5])\n" 956 "vle8.v v21, (%[wp5])\n" 957 "vle8.v v24, (%[wp6])\n" 958 "vle8.v v25, (%[wp6])\n" 959 "vle8.v v28, (%[wp7])\n" 960 "vle8.v v29, (%[wp7])\n" 961 ".option pop\n" 962 : : 963 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 964 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 965 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 966 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), 967 [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), 968 [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), 969 [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), 970 [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) 971 ); 972 973 /* P/Q data pages */ 974 for (z = z0 - 1; z >= start; z--) { 975 /* 976 * w2$$ = MASK(wq$$); 977 * w1$$ = SHLBYTE(wq$$); 978 * w2$$ &= NBYTES(0x1d); 979 * w1$$ ^= w2$$; 980 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 981 * wq$$ = w1$$ ^ wd$$; 982 * wp$$ ^= wd$$; 983 */ 984 asm volatile (".option push\n" 985 ".option arch,+v\n" 986 "vsra.vi v2, v1, 7\n" 987 "vsll.vi v3, v1, 1\n" 988 "vand.vx v2, v2, %[x1d]\n" 989 "vxor.vv v3, v3, v2\n" 990 "vle8.v v2, (%[wd0])\n" 991 "vxor.vv v1, v3, v2\n" 992 "vxor.vv v0, v0, v2\n" 993 994 "vsra.vi v6, v5, 7\n" 995 "vsll.vi v7, v5, 1\n" 996 "vand.vx v6, v6, %[x1d]\n" 997 "vxor.vv v7, v7, v6\n" 998 "vle8.v v6, (%[wd1])\n" 999 "vxor.vv v5, v7, v6\n" 1000 "vxor.vv v4, v4, v6\n" 1001 1002 "vsra.vi v10, v9, 7\n" 1003 "vsll.vi v11, v9, 1\n" 1004 "vand.vx v10, v10, %[x1d]\n" 1005 "vxor.vv v11, v11, v10\n" 1006 "vle8.v v10, (%[wd2])\n" 1007 "vxor.vv v9, v11, v10\n" 1008 "vxor.vv v8, v8, v10\n" 1009 1010 "vsra.vi v14, v13, 7\n" 1011 "vsll.vi v15, v13, 1\n" 1012 "vand.vx v14, v14, %[x1d]\n" 1013 "vxor.vv v15, v15, v14\n" 1014 "vle8.v v14, (%[wd3])\n" 1015 "vxor.vv v13, v15, v14\n" 1016 "vxor.vv v12, v12, v14\n" 1017 1018 "vsra.vi v18, v17, 7\n" 1019 "vsll.vi v19, v17, 1\n" 1020 "vand.vx v18, v18, %[x1d]\n" 1021 "vxor.vv v19, v19, v18\n" 1022 "vle8.v v18, (%[wd4])\n" 1023 "vxor.vv v17, v19, v18\n" 1024 "vxor.vv v16, v16, v18\n" 1025 1026 "vsra.vi v22, v21, 7\n" 1027 "vsll.vi v23, v21, 1\n" 1028 "vand.vx v22, v22, %[x1d]\n" 1029 "vxor.vv v23, v23, v22\n" 1030 "vle8.v v22, (%[wd5])\n" 1031 "vxor.vv v21, v23, v22\n" 1032 "vxor.vv v20, v20, v22\n" 1033 1034 "vsra.vi v26, v25, 7\n" 1035 "vsll.vi v27, v25, 1\n" 1036 "vand.vx v26, v26, %[x1d]\n" 1037 "vxor.vv v27, v27, v26\n" 1038 "vle8.v v26, (%[wd6])\n" 1039 "vxor.vv v25, v27, v26\n" 1040 "vxor.vv v24, v24, v26\n" 1041 1042 "vsra.vi v30, v29, 7\n" 1043 "vsll.vi v31, v29, 1\n" 1044 "vand.vx v30, v30, %[x1d]\n" 1045 "vxor.vv v31, v31, v30\n" 1046 "vle8.v v30, (%[wd7])\n" 1047 "vxor.vv v29, v31, v30\n" 1048 "vxor.vv v28, v28, v30\n" 1049 ".option pop\n" 1050 : : 1051 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 1052 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 1053 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 1054 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 1055 [wd4]"r"(&dptr[z][d + 4 * NSIZE]), 1056 [wd5]"r"(&dptr[z][d + 5 * NSIZE]), 1057 [wd6]"r"(&dptr[z][d + 6 * NSIZE]), 1058 [wd7]"r"(&dptr[z][d + 7 * NSIZE]), 1059 [x1d]"r"(0x1d) 1060 ); 1061 } 1062 1063 /* P/Q left side optimization */ 1064 for (z = start - 1; z >= 0; z--) { 1065 /* 1066 * w2$$ = MASK(wq$$); 1067 * w1$$ = SHLBYTE(wq$$); 1068 * w2$$ &= NBYTES(0x1d); 1069 * wq$$ = w1$$ ^ w2$$; 1070 */ 1071 asm volatile (".option push\n" 1072 ".option arch,+v\n" 1073 "vsra.vi v2, v1, 7\n" 1074 "vsll.vi v3, v1, 1\n" 1075 "vand.vx v2, v2, %[x1d]\n" 1076 "vxor.vv v1, v3, v2\n" 1077 1078 "vsra.vi v6, v5, 7\n" 1079 "vsll.vi v7, v5, 1\n" 1080 "vand.vx v6, v6, %[x1d]\n" 1081 "vxor.vv v5, v7, v6\n" 1082 1083 "vsra.vi v10, v9, 7\n" 1084 "vsll.vi v11, v9, 1\n" 1085 "vand.vx v10, v10, %[x1d]\n" 1086 "vxor.vv v9, v11, v10\n" 1087 1088 "vsra.vi v14, v13, 7\n" 1089 "vsll.vi v15, v13, 1\n" 1090 "vand.vx v14, v14, %[x1d]\n" 1091 "vxor.vv v13, v15, v14\n" 1092 1093 "vsra.vi v18, v17, 7\n" 1094 "vsll.vi v19, v17, 1\n" 1095 "vand.vx v18, v18, %[x1d]\n" 1096 "vxor.vv v17, v19, v18\n" 1097 1098 "vsra.vi v22, v21, 7\n" 1099 "vsll.vi v23, v21, 1\n" 1100 "vand.vx v22, v22, %[x1d]\n" 1101 "vxor.vv v21, v23, v22\n" 1102 1103 "vsra.vi v26, v25, 7\n" 1104 "vsll.vi v27, v25, 1\n" 1105 "vand.vx v26, v26, %[x1d]\n" 1106 "vxor.vv v25, v27, v26\n" 1107 1108 "vsra.vi v30, v29, 7\n" 1109 "vsll.vi v31, v29, 1\n" 1110 "vand.vx v30, v30, %[x1d]\n" 1111 "vxor.vv v29, v31, v30\n" 1112 ".option pop\n" 1113 : : 1114 [x1d]"r"(0x1d) 1115 ); 1116 } 1117 1118 /* 1119 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 1120 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 1121 * v0:wp0, v1:wq0, v2:p0, v3:q0 1122 * v4:wp1, v5:wq1, v6:p1, v7:q1 1123 * v8:wp2, v9:wq2, v10:p2, v11:q2 1124 * v12:wp3, v13:wq3, v14:p3, v15:q3 1125 * v16:wp4, v17:wq4, v18:p4, v19:q4 1126 * v20:wp5, v21:wq5, v22:p5, v23:q5 1127 * v24:wp6, v25:wq6, v26:p6, v27:q6 1128 * v28:wp7, v29:wq7, v30:p7, v31:q7 1129 */ 1130 asm volatile (".option push\n" 1131 ".option arch,+v\n" 1132 "vle8.v v2, (%[wp0])\n" 1133 "vle8.v v3, (%[wq0])\n" 1134 "vxor.vv v2, v2, v0\n" 1135 "vxor.vv v3, v3, v1\n" 1136 "vse8.v v2, (%[wp0])\n" 1137 "vse8.v v3, (%[wq0])\n" 1138 1139 "vle8.v v6, (%[wp1])\n" 1140 "vle8.v v7, (%[wq1])\n" 1141 "vxor.vv v6, v6, v4\n" 1142 "vxor.vv v7, v7, v5\n" 1143 "vse8.v v6, (%[wp1])\n" 1144 "vse8.v v7, (%[wq1])\n" 1145 1146 "vle8.v v10, (%[wp2])\n" 1147 "vle8.v v11, (%[wq2])\n" 1148 "vxor.vv v10, v10, v8\n" 1149 "vxor.vv v11, v11, v9\n" 1150 "vse8.v v10, (%[wp2])\n" 1151 "vse8.v v11, (%[wq2])\n" 1152 1153 "vle8.v v14, (%[wp3])\n" 1154 "vle8.v v15, (%[wq3])\n" 1155 "vxor.vv v14, v14, v12\n" 1156 "vxor.vv v15, v15, v13\n" 1157 "vse8.v v14, (%[wp3])\n" 1158 "vse8.v v15, (%[wq3])\n" 1159 1160 "vle8.v v18, (%[wp4])\n" 1161 "vle8.v v19, (%[wq4])\n" 1162 "vxor.vv v18, v18, v16\n" 1163 "vxor.vv v19, v19, v17\n" 1164 "vse8.v v18, (%[wp4])\n" 1165 "vse8.v v19, (%[wq4])\n" 1166 1167 "vle8.v v22, (%[wp5])\n" 1168 "vle8.v v23, (%[wq5])\n" 1169 "vxor.vv v22, v22, v20\n" 1170 "vxor.vv v23, v23, v21\n" 1171 "vse8.v v22, (%[wp5])\n" 1172 "vse8.v v23, (%[wq5])\n" 1173 1174 "vle8.v v26, (%[wp6])\n" 1175 "vle8.v v27, (%[wq6])\n" 1176 "vxor.vv v26, v26, v24\n" 1177 "vxor.vv v27, v27, v25\n" 1178 "vse8.v v26, (%[wp6])\n" 1179 "vse8.v v27, (%[wq6])\n" 1180 1181 "vle8.v v30, (%[wp7])\n" 1182 "vle8.v v31, (%[wq7])\n" 1183 "vxor.vv v30, v30, v28\n" 1184 "vxor.vv v31, v31, v29\n" 1185 "vse8.v v30, (%[wp7])\n" 1186 "vse8.v v31, (%[wq7])\n" 1187 ".option pop\n" 1188 : : 1189 [wp0]"r"(&p[d + NSIZE * 0]), 1190 [wq0]"r"(&q[d + NSIZE * 0]), 1191 [wp1]"r"(&p[d + NSIZE * 1]), 1192 [wq1]"r"(&q[d + NSIZE * 1]), 1193 [wp2]"r"(&p[d + NSIZE * 2]), 1194 [wq2]"r"(&q[d + NSIZE * 2]), 1195 [wp3]"r"(&p[d + NSIZE * 3]), 1196 [wq3]"r"(&q[d + NSIZE * 3]), 1197 [wp4]"r"(&p[d + NSIZE * 4]), 1198 [wq4]"r"(&q[d + NSIZE * 4]), 1199 [wp5]"r"(&p[d + NSIZE * 5]), 1200 [wq5]"r"(&q[d + NSIZE * 5]), 1201 [wp6]"r"(&p[d + NSIZE * 6]), 1202 [wq6]"r"(&q[d + NSIZE * 6]), 1203 [wp7]"r"(&p[d + NSIZE * 7]), 1204 [wq7]"r"(&q[d + NSIZE * 7]) 1205 ); 1206 } 1207 } 1208 1209 RAID6_RVV_WRAPPER(1); 1210 RAID6_RVV_WRAPPER(2); 1211 RAID6_RVV_WRAPPER(4); 1212 RAID6_RVV_WRAPPER(8); 1213