1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2016 Intel Corporation
4  *
5  * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6  * Author: Megha Dey <megha.dey@linux.intel.com>
7  */
8 
9 #include <linux/raid/pq.h>
10 #include "x86.h"
11 
12 static int raid6_has_avx512(void)
13 {
14 	return boot_cpu_has(X86_FEATURE_AVX2) &&
15 		boot_cpu_has(X86_FEATURE_AVX) &&
16 		boot_cpu_has(X86_FEATURE_AVX512F) &&
17 		boot_cpu_has(X86_FEATURE_AVX512BW) &&
18 		boot_cpu_has(X86_FEATURE_AVX512VL) &&
19 		boot_cpu_has(X86_FEATURE_AVX512DQ);
20 }
21 
22 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
23 				     int failb, void **ptrs)
24 {
25 	u8 *p, *q, *dp, *dq;
26 	const u8 *pbmul;	/* P multiplier table for B data */
27 	const u8 *qmul;		/* Q multiplier table (for both) */
28 	const u8 x0f = 0x0f;
29 
30 	p = (u8 *)ptrs[disks-2];
31 	q = (u8 *)ptrs[disks-1];
32 
33 	/*
34 	 * Compute syndrome with zero for the missing data pages
35 	 * Use the dead data pages as temporary storage for
36 	 * delta p and delta q
37 	 */
38 
39 	dp = (u8 *)ptrs[faila];
40 	ptrs[faila] = (void *)raid6_empty_zero_page;
41 	ptrs[disks-2] = dp;
42 	dq = (u8 *)ptrs[failb];
43 	ptrs[failb] = (void *)raid6_empty_zero_page;
44 	ptrs[disks-1] = dq;
45 
46 	raid6_call.gen_syndrome(disks, bytes, ptrs);
47 
48 	/* Restore pointer table */
49 	ptrs[faila]   = dp;
50 	ptrs[failb]   = dq;
51 	ptrs[disks-2] = p;
52 	ptrs[disks-1] = q;
53 
54 	/* Now, pick the proper data tables */
55 	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
56 	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
57 		raid6_gfexp[failb]]];
58 
59 	kernel_fpu_begin();
60 
61 	/* zmm0 = x0f[16] */
62 	asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
63 
64 	while (bytes) {
65 #ifdef CONFIG_X86_64
66 		asm volatile("vmovdqa64 %0, %%zmm1\n\t"
67 			     "vmovdqa64 %1, %%zmm9\n\t"
68 			     "vmovdqa64 %2, %%zmm0\n\t"
69 			     "vmovdqa64 %3, %%zmm8\n\t"
70 			     "vpxorq %4, %%zmm1, %%zmm1\n\t"
71 			     "vpxorq %5, %%zmm9, %%zmm9\n\t"
72 			     "vpxorq %6, %%zmm0, %%zmm0\n\t"
73 			     "vpxorq %7, %%zmm8, %%zmm8"
74 			     :
75 			     : "m" (q[0]), "m" (q[64]), "m" (p[0]),
76 			       "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
77 			       "m" (dp[0]), "m" (dp[64]));
78 
79 		/*
80 		 * 1 = dq[0]  ^ q[0]
81 		 * 9 = dq[64] ^ q[64]
82 		 * 0 = dp[0]  ^ p[0]
83 		 * 8 = dp[64] ^ p[64]
84 		 */
85 
86 		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
87 			     "vbroadcasti64x2 %1, %%zmm5"
88 			     :
89 			     : "m" (qmul[0]), "m" (qmul[16]));
90 
91 		asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
92 			     "vpsraw $4, %%zmm9, %%zmm12\n\t"
93 			     "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
94 			     "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
95 			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
96 			     "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
97 			     "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
98 			     "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
99 			     "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
100 			     "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
101 			     "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
102 			     "vpxorq %%zmm4, %%zmm5, %%zmm5"
103 			     :
104 			     : );
105 
106 		/*
107 		 * 5 = qx[0]
108 		 * 15 = qx[64]
109 		 */
110 
111 		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
112 			     "vbroadcasti64x2 %1, %%zmm1\n\t"
113 			     "vpsraw $4, %%zmm0, %%zmm2\n\t"
114 			     "vpsraw $4, %%zmm8, %%zmm6\n\t"
115 			     "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
116 			     "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
117 			     "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
118 			     "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
119 			     "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
120 			     "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
121 			     "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
122 			     "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
123 			     "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
124 			     "vpxorq %%zmm12, %%zmm13, %%zmm13"
125 			     :
126 			     : "m" (pbmul[0]), "m" (pbmul[16]));
127 
128 		/*
129 		 * 1  = pbmul[px[0]]
130 		 * 13 = pbmul[px[64]]
131 		 */
132 		asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
133 			     "vpxorq %%zmm15, %%zmm13, %%zmm13"
134 			     :
135 			     : );
136 
137 		/*
138 		 * 1 = db = DQ
139 		 * 13 = db[64] = DQ[64]
140 		 */
141 		asm volatile("vmovdqa64 %%zmm1, %0\n\t"
142 			     "vmovdqa64 %%zmm13,%1\n\t"
143 			     "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
144 			     "vpxorq %%zmm13, %%zmm8, %%zmm8"
145 			     :
146 			     : "m" (dq[0]), "m" (dq[64]));
147 
148 		asm volatile("vmovdqa64 %%zmm0, %0\n\t"
149 			     "vmovdqa64 %%zmm8, %1"
150 			     :
151 			     : "m" (dp[0]), "m" (dp[64]));
152 
153 		bytes -= 128;
154 		p += 128;
155 		q += 128;
156 		dp += 128;
157 		dq += 128;
158 #else
159 		asm volatile("vmovdqa64 %0, %%zmm1\n\t"
160 			     "vmovdqa64 %1, %%zmm0\n\t"
161 			     "vpxorq %2, %%zmm1, %%zmm1\n\t"
162 			     "vpxorq %3, %%zmm0, %%zmm0"
163 			     :
164 			     : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
165 
166 		/* 1 = dq ^ q;  0 = dp ^ p */
167 
168 		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
169 			     "vbroadcasti64x2 %1, %%zmm5"
170 			     :
171 			     : "m" (qmul[0]), "m" (qmul[16]));
172 
173 		/*
174 		 * 1 = dq ^ q
175 		 * 3 = dq ^ p >> 4
176 		 */
177 		asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
178 			     "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
179 			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
180 			     "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
181 			     "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
182 			     "vpxorq %%zmm4, %%zmm5, %%zmm5"
183 			     :
184 			     : );
185 
186 		/* 5 = qx */
187 
188 		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
189 			     "vbroadcasti64x2 %1, %%zmm1"
190 			     :
191 			     : "m" (pbmul[0]), "m" (pbmul[16]));
192 
193 		asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
194 			     "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
195 			     "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
196 			     "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
197 			     "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
198 			     "vpxorq %%zmm4, %%zmm1, %%zmm1"
199 			     :
200 			     : );
201 
202 		/* 1 = pbmul[px] */
203 		asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
204 			     /* 1 = db = DQ */
205 			     "vmovdqa64 %%zmm1, %0\n\t"
206 			     :
207 			     : "m" (dq[0]));
208 
209 		asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
210 			     "vmovdqa64 %%zmm0, %0"
211 			     :
212 			     : "m" (dp[0]));
213 
214 		bytes -= 64;
215 		p += 64;
216 		q += 64;
217 		dp += 64;
218 		dq += 64;
219 #endif
220 	}
221 
222 	kernel_fpu_end();
223 }
224 
225 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
226 				     void **ptrs)
227 {
228 	u8 *p, *q, *dq;
229 	const u8 *qmul;		/* Q multiplier table */
230 	const u8 x0f = 0x0f;
231 
232 	p = (u8 *)ptrs[disks-2];
233 	q = (u8 *)ptrs[disks-1];
234 
235 	/*
236 	 * Compute syndrome with zero for the missing data page
237 	 * Use the dead data page as temporary storage for delta q
238 	 */
239 
240 	dq = (u8 *)ptrs[faila];
241 	ptrs[faila] = (void *)raid6_empty_zero_page;
242 	ptrs[disks-1] = dq;
243 
244 	raid6_call.gen_syndrome(disks, bytes, ptrs);
245 
246 	/* Restore pointer table */
247 	ptrs[faila]   = dq;
248 	ptrs[disks-1] = q;
249 
250 	/* Now, pick the proper data tables */
251 	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
252 
253 	kernel_fpu_begin();
254 
255 	asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
256 
257 	while (bytes) {
258 #ifdef CONFIG_X86_64
259 		asm volatile("vmovdqa64 %0, %%zmm3\n\t"
260 			     "vmovdqa64 %1, %%zmm8\n\t"
261 			     "vpxorq %2, %%zmm3, %%zmm3\n\t"
262 			     "vpxorq %3, %%zmm8, %%zmm8"
263 			     :
264 			     : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
265 			       "m" (q[64]));
266 
267 		/*
268 		 * 3 = q[0] ^ dq[0]
269 		 * 8 = q[64] ^ dq[64]
270 		 */
271 		asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
272 			     "vmovapd %%zmm0, %%zmm13\n\t"
273 			     "vbroadcasti64x2 %1, %%zmm1\n\t"
274 			     "vmovapd %%zmm1, %%zmm14"
275 			     :
276 			     : "m" (qmul[0]), "m" (qmul[16]));
277 
278 		asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
279 			     "vpsraw $4, %%zmm8, %%zmm12\n\t"
280 			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
281 			     "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
282 			     "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
283 			     "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
284 			     "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
285 			     "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
286 			     "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
287 			     "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
288 			     "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
289 			     "vpxorq %%zmm13, %%zmm14, %%zmm14"
290 			     :
291 			     : );
292 
293 		/*
294 		 * 1  = qmul[q[0]  ^ dq[0]]
295 		 * 14 = qmul[q[64] ^ dq[64]]
296 		 */
297 		asm volatile("vmovdqa64 %0, %%zmm2\n\t"
298 			     "vmovdqa64 %1, %%zmm12\n\t"
299 			     "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
300 			     "vpxorq %%zmm14, %%zmm12, %%zmm12"
301 			     :
302 			     : "m" (p[0]), "m" (p[64]));
303 
304 		/*
305 		 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
306 		 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
307 		 */
308 
309 		asm volatile("vmovdqa64 %%zmm1, %0\n\t"
310 			     "vmovdqa64 %%zmm14, %1\n\t"
311 			     "vmovdqa64 %%zmm2, %2\n\t"
312 			     "vmovdqa64 %%zmm12,%3"
313 			     :
314 			     : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
315 			       "m" (p[64]));
316 
317 		bytes -= 128;
318 		p += 128;
319 		q += 128;
320 		dq += 128;
321 #else
322 		asm volatile("vmovdqa64 %0, %%zmm3\n\t"
323 			     "vpxorq %1, %%zmm3, %%zmm3"
324 			     :
325 			     : "m" (dq[0]), "m" (q[0]));
326 
327 		/* 3 = q ^ dq */
328 
329 		asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
330 			     "vbroadcasti64x2 %1, %%zmm1"
331 			     :
332 			     : "m" (qmul[0]), "m" (qmul[16]));
333 
334 		asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
335 			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
336 			     "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
337 			     "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
338 			     "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
339 			     "vpxorq %%zmm0, %%zmm1, %%zmm1"
340 			     :
341 			     : );
342 
343 		/* 1 = qmul[q ^ dq] */
344 
345 		asm volatile("vmovdqa64 %0, %%zmm2\n\t"
346 			     "vpxorq %%zmm1, %%zmm2, %%zmm2"
347 			     :
348 			     : "m" (p[0]));
349 
350 		/* 2 = p ^ qmul[q ^ dq] */
351 
352 		asm volatile("vmovdqa64 %%zmm1, %0\n\t"
353 			     "vmovdqa64 %%zmm2, %1"
354 			     :
355 			     : "m" (dq[0]), "m" (p[0]));
356 
357 		bytes -= 64;
358 		p += 64;
359 		q += 64;
360 		dq += 64;
361 #endif
362 	}
363 
364 	kernel_fpu_end();
365 }
366 
367 const struct raid6_recov_calls raid6_recov_avx512 = {
368 	.data2 = raid6_2data_recov_avx512,
369 	.datap = raid6_datap_recov_avx512,
370 	.valid = raid6_has_avx512,
371 #ifdef CONFIG_X86_64
372 	.name = "avx512x2",
373 #else
374 	.name = "avx512x1",
375 #endif
376 	.priority = 3,
377 };
378