1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2016 Intel Corporation 4 * 5 * Author: Gayatri Kammela <gayatri.kammela@intel.com> 6 * Author: Megha Dey <megha.dey@linux.intel.com> 7 */ 8 9 #include <linux/raid/pq.h> 10 #include "x86.h" 11 12 static int raid6_has_avx512(void) 13 { 14 return boot_cpu_has(X86_FEATURE_AVX2) && 15 boot_cpu_has(X86_FEATURE_AVX) && 16 boot_cpu_has(X86_FEATURE_AVX512F) && 17 boot_cpu_has(X86_FEATURE_AVX512BW) && 18 boot_cpu_has(X86_FEATURE_AVX512VL) && 19 boot_cpu_has(X86_FEATURE_AVX512DQ); 20 } 21 22 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, 23 int failb, void **ptrs) 24 { 25 u8 *p, *q, *dp, *dq; 26 const u8 *pbmul; /* P multiplier table for B data */ 27 const u8 *qmul; /* Q multiplier table (for both) */ 28 const u8 x0f = 0x0f; 29 30 p = (u8 *)ptrs[disks-2]; 31 q = (u8 *)ptrs[disks-1]; 32 33 /* 34 * Compute syndrome with zero for the missing data pages 35 * Use the dead data pages as temporary storage for 36 * delta p and delta q 37 */ 38 39 dp = (u8 *)ptrs[faila]; 40 ptrs[faila] = (void *)raid6_empty_zero_page; 41 ptrs[disks-2] = dp; 42 dq = (u8 *)ptrs[failb]; 43 ptrs[failb] = (void *)raid6_empty_zero_page; 44 ptrs[disks-1] = dq; 45 46 raid6_call.gen_syndrome(disks, bytes, ptrs); 47 48 /* Restore pointer table */ 49 ptrs[faila] = dp; 50 ptrs[failb] = dq; 51 ptrs[disks-2] = p; 52 ptrs[disks-1] = q; 53 54 /* Now, pick the proper data tables */ 55 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; 56 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 57 raid6_gfexp[failb]]]; 58 59 kernel_fpu_begin(); 60 61 /* zmm0 = x0f[16] */ 62 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 63 64 while (bytes) { 65 #ifdef CONFIG_X86_64 66 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 67 "vmovdqa64 %1, %%zmm9\n\t" 68 "vmovdqa64 %2, %%zmm0\n\t" 69 "vmovdqa64 %3, %%zmm8\n\t" 70 "vpxorq %4, %%zmm1, %%zmm1\n\t" 71 "vpxorq %5, %%zmm9, %%zmm9\n\t" 72 "vpxorq %6, %%zmm0, %%zmm0\n\t" 73 "vpxorq %7, %%zmm8, %%zmm8" 74 : 75 : "m" (q[0]), "m" (q[64]), "m" (p[0]), 76 "m" (p[64]), "m" (dq[0]), "m" (dq[64]), 77 "m" (dp[0]), "m" (dp[64])); 78 79 /* 80 * 1 = dq[0] ^ q[0] 81 * 9 = dq[64] ^ q[64] 82 * 0 = dp[0] ^ p[0] 83 * 8 = dp[64] ^ p[64] 84 */ 85 86 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 87 "vbroadcasti64x2 %1, %%zmm5" 88 : 89 : "m" (qmul[0]), "m" (qmul[16])); 90 91 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 92 "vpsraw $4, %%zmm9, %%zmm12\n\t" 93 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 94 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" 95 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 96 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 97 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" 98 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 99 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" 100 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 101 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" 102 "vpxorq %%zmm4, %%zmm5, %%zmm5" 103 : 104 : ); 105 106 /* 107 * 5 = qx[0] 108 * 15 = qx[64] 109 */ 110 111 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 112 "vbroadcasti64x2 %1, %%zmm1\n\t" 113 "vpsraw $4, %%zmm0, %%zmm2\n\t" 114 "vpsraw $4, %%zmm8, %%zmm6\n\t" 115 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 116 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" 117 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 118 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 119 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" 120 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 121 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" 122 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 123 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" 124 "vpxorq %%zmm12, %%zmm13, %%zmm13" 125 : 126 : "m" (pbmul[0]), "m" (pbmul[16])); 127 128 /* 129 * 1 = pbmul[px[0]] 130 * 13 = pbmul[px[64]] 131 */ 132 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 133 "vpxorq %%zmm15, %%zmm13, %%zmm13" 134 : 135 : ); 136 137 /* 138 * 1 = db = DQ 139 * 13 = db[64] = DQ[64] 140 */ 141 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 142 "vmovdqa64 %%zmm13,%1\n\t" 143 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 144 "vpxorq %%zmm13, %%zmm8, %%zmm8" 145 : 146 : "m" (dq[0]), "m" (dq[64])); 147 148 asm volatile("vmovdqa64 %%zmm0, %0\n\t" 149 "vmovdqa64 %%zmm8, %1" 150 : 151 : "m" (dp[0]), "m" (dp[64])); 152 153 bytes -= 128; 154 p += 128; 155 q += 128; 156 dp += 128; 157 dq += 128; 158 #else 159 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 160 "vmovdqa64 %1, %%zmm0\n\t" 161 "vpxorq %2, %%zmm1, %%zmm1\n\t" 162 "vpxorq %3, %%zmm0, %%zmm0" 163 : 164 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); 165 166 /* 1 = dq ^ q; 0 = dp ^ p */ 167 168 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 169 "vbroadcasti64x2 %1, %%zmm5" 170 : 171 : "m" (qmul[0]), "m" (qmul[16])); 172 173 /* 174 * 1 = dq ^ q 175 * 3 = dq ^ p >> 4 176 */ 177 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 178 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 179 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 180 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 181 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 182 "vpxorq %%zmm4, %%zmm5, %%zmm5" 183 : 184 : ); 185 186 /* 5 = qx */ 187 188 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 189 "vbroadcasti64x2 %1, %%zmm1" 190 : 191 : "m" (pbmul[0]), "m" (pbmul[16])); 192 193 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" 194 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 195 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 196 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 197 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 198 "vpxorq %%zmm4, %%zmm1, %%zmm1" 199 : 200 : ); 201 202 /* 1 = pbmul[px] */ 203 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 204 /* 1 = db = DQ */ 205 "vmovdqa64 %%zmm1, %0\n\t" 206 : 207 : "m" (dq[0])); 208 209 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 210 "vmovdqa64 %%zmm0, %0" 211 : 212 : "m" (dp[0])); 213 214 bytes -= 64; 215 p += 64; 216 q += 64; 217 dp += 64; 218 dq += 64; 219 #endif 220 } 221 222 kernel_fpu_end(); 223 } 224 225 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, 226 void **ptrs) 227 { 228 u8 *p, *q, *dq; 229 const u8 *qmul; /* Q multiplier table */ 230 const u8 x0f = 0x0f; 231 232 p = (u8 *)ptrs[disks-2]; 233 q = (u8 *)ptrs[disks-1]; 234 235 /* 236 * Compute syndrome with zero for the missing data page 237 * Use the dead data page as temporary storage for delta q 238 */ 239 240 dq = (u8 *)ptrs[faila]; 241 ptrs[faila] = (void *)raid6_empty_zero_page; 242 ptrs[disks-1] = dq; 243 244 raid6_call.gen_syndrome(disks, bytes, ptrs); 245 246 /* Restore pointer table */ 247 ptrs[faila] = dq; 248 ptrs[disks-1] = q; 249 250 /* Now, pick the proper data tables */ 251 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 252 253 kernel_fpu_begin(); 254 255 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 256 257 while (bytes) { 258 #ifdef CONFIG_X86_64 259 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 260 "vmovdqa64 %1, %%zmm8\n\t" 261 "vpxorq %2, %%zmm3, %%zmm3\n\t" 262 "vpxorq %3, %%zmm8, %%zmm8" 263 : 264 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), 265 "m" (q[64])); 266 267 /* 268 * 3 = q[0] ^ dq[0] 269 * 8 = q[64] ^ dq[64] 270 */ 271 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 272 "vmovapd %%zmm0, %%zmm13\n\t" 273 "vbroadcasti64x2 %1, %%zmm1\n\t" 274 "vmovapd %%zmm1, %%zmm14" 275 : 276 : "m" (qmul[0]), "m" (qmul[16])); 277 278 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 279 "vpsraw $4, %%zmm8, %%zmm12\n\t" 280 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 281 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" 282 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 283 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 284 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 285 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" 286 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 287 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" 288 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" 289 "vpxorq %%zmm13, %%zmm14, %%zmm14" 290 : 291 : ); 292 293 /* 294 * 1 = qmul[q[0] ^ dq[0]] 295 * 14 = qmul[q[64] ^ dq[64]] 296 */ 297 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 298 "vmovdqa64 %1, %%zmm12\n\t" 299 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" 300 "vpxorq %%zmm14, %%zmm12, %%zmm12" 301 : 302 : "m" (p[0]), "m" (p[64])); 303 304 /* 305 * 2 = p[0] ^ qmul[q[0] ^ dq[0]] 306 * 12 = p[64] ^ qmul[q[64] ^ dq[64]] 307 */ 308 309 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 310 "vmovdqa64 %%zmm14, %1\n\t" 311 "vmovdqa64 %%zmm2, %2\n\t" 312 "vmovdqa64 %%zmm12,%3" 313 : 314 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), 315 "m" (p[64])); 316 317 bytes -= 128; 318 p += 128; 319 q += 128; 320 dq += 128; 321 #else 322 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 323 "vpxorq %1, %%zmm3, %%zmm3" 324 : 325 : "m" (dq[0]), "m" (q[0])); 326 327 /* 3 = q ^ dq */ 328 329 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 330 "vbroadcasti64x2 %1, %%zmm1" 331 : 332 : "m" (qmul[0]), "m" (qmul[16])); 333 334 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 335 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 336 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 337 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 338 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 339 "vpxorq %%zmm0, %%zmm1, %%zmm1" 340 : 341 : ); 342 343 /* 1 = qmul[q ^ dq] */ 344 345 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 346 "vpxorq %%zmm1, %%zmm2, %%zmm2" 347 : 348 : "m" (p[0])); 349 350 /* 2 = p ^ qmul[q ^ dq] */ 351 352 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 353 "vmovdqa64 %%zmm2, %1" 354 : 355 : "m" (dq[0]), "m" (p[0])); 356 357 bytes -= 64; 358 p += 64; 359 q += 64; 360 dq += 64; 361 #endif 362 } 363 364 kernel_fpu_end(); 365 } 366 367 const struct raid6_recov_calls raid6_recov_avx512 = { 368 .data2 = raid6_2data_recov_avx512, 369 .datap = raid6_datap_recov_avx512, 370 .valid = raid6_has_avx512, 371 #ifdef CONFIG_X86_64 372 .name = "avx512x2", 373 #else 374 .name = "avx512x1", 375 #endif 376 .priority = 3, 377 }; 378