xref: /linux/drivers/platform/x86/intel/ifs/runtest.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. */
3 
4 #include <linux/cpu.h>
5 #include <linux/delay.h>
6 #include <linux/fs.h>
7 #include <linux/nmi.h>
8 #include <linux/slab.h>
9 #include <linux/stop_machine.h>
10 #include <asm/msr.h>
11 
12 #include "ifs.h"
13 
14 /*
15  * Note all code and data in this file is protected by
16  * ifs_sem. On HT systems all threads on a core will
17  * execute together, but only the first thread on the
18  * core will update results of the test.
19  */
20 
21 #define CREATE_TRACE_POINTS
22 #include <trace/events/intel_ifs.h>
23 
24 /* Max retries on the same chunk */
25 #define MAX_IFS_RETRIES  5
26 
27 struct run_params {
28 	struct ifs_data *ifsd;
29 	union ifs_scan *activate;
30 	union ifs_status status;
31 };
32 
33 struct sbaf_run_params {
34 	struct ifs_data *ifsd;
35 	int *retry_cnt;
36 	union ifs_sbaf *activate;
37 	union ifs_sbaf_status status;
38 };
39 
40 /*
41  * Number of TSC cycles that a logical CPU will wait for the other
42  * logical CPU on the core in the WRMSR(ACTIVATE_SCAN).
43  */
44 #define IFS_THREAD_WAIT 100000
45 
46 enum ifs_status_err_code {
47 	IFS_NO_ERROR				= 0,
48 	IFS_OTHER_THREAD_COULD_NOT_JOIN		= 1,
49 	IFS_INTERRUPTED_BEFORE_RENDEZVOUS	= 2,
50 	IFS_POWER_MGMT_INADEQUATE_FOR_SCAN	= 3,
51 	IFS_INVALID_CHUNK_RANGE			= 4,
52 	IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS	= 5,
53 	IFS_CORE_NOT_CAPABLE_CURRENTLY		= 6,
54 	IFS_UNASSIGNED_ERROR_CODE		= 7,
55 	IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT	= 8,
56 	IFS_INTERRUPTED_DURING_EXECUTION	= 9,
57 	IFS_UNASSIGNED_ERROR_CODE_0xA		= 0xA,
58 	IFS_CORRUPTED_CHUNK		= 0xB,
59 };
60 
61 static const char * const scan_test_status[] = {
62 	[IFS_NO_ERROR] = "SCAN no error",
63 	[IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
64 	[IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.",
65 	[IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] =
66 	"Core Abort SCAN Response due to power management condition.",
67 	[IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range",
68 	[IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
69 	[IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently",
70 	[IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7",
71 	[IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] =
72 	"Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
73 	[IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start",
74 	[IFS_UNASSIGNED_ERROR_CODE_0xA] = "Unassigned error code 0xA",
75 	[IFS_CORRUPTED_CHUNK] = "Scan operation aborted due to corrupted image. Try reloading",
76 };
77 
message_not_tested(struct device * dev,int cpu,union ifs_status status)78 static void message_not_tested(struct device *dev, int cpu, union ifs_status status)
79 {
80 	struct ifs_data *ifsd = ifs_get_data(dev);
81 
82 	/*
83 	 * control_error is set when the microcode runs into a problem
84 	 * loading the image from the reserved BIOS memory, or it has
85 	 * been corrupted. Reloading the image may fix this issue.
86 	 */
87 	if (status.control_error) {
88 		dev_warn(dev, "CPU(s) %*pbl: Scan controller error. Batch: %02x version: 0x%x\n",
89 			 cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version);
90 		return;
91 	}
92 
93 	if (status.error_code < ARRAY_SIZE(scan_test_status)) {
94 		dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n",
95 			 cpumask_pr_args(cpu_smt_mask(cpu)),
96 			 scan_test_status[status.error_code]);
97 	} else if (status.error_code == IFS_SW_TIMEOUT) {
98 		dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
99 			 cpumask_pr_args(cpu_smt_mask(cpu)));
100 	} else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
101 		dev_info(dev, "CPU(s) %*pbl: %s\n",
102 			 cpumask_pr_args(cpu_smt_mask(cpu)),
103 			 "Not all scan chunks were executed. Maximum forward progress retries exceeded");
104 	} else {
105 		dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n",
106 			 cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
107 	}
108 }
109 
message_fail(struct device * dev,int cpu,union ifs_status status)110 static void message_fail(struct device *dev, int cpu, union ifs_status status)
111 {
112 	struct ifs_data *ifsd = ifs_get_data(dev);
113 
114 	/*
115 	 * signature_error is set when the output from the scan chains does not
116 	 * match the expected signature. This might be a transient problem (e.g.
117 	 * due to a bit flip from an alpha particle or neutron). If the problem
118 	 * repeats on a subsequent test, then it indicates an actual problem in
119 	 * the core being tested.
120 	 */
121 	if (status.signature_error) {
122 		dev_err(dev, "CPU(s) %*pbl: test signature incorrect. Batch: %02x version: 0x%x\n",
123 			cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version);
124 	}
125 }
126 
can_restart(union ifs_status status)127 static bool can_restart(union ifs_status status)
128 {
129 	enum ifs_status_err_code err_code = status.error_code;
130 
131 	/* Signature for chunk is bad, or scan test failed */
132 	if (status.signature_error || status.control_error)
133 		return false;
134 
135 	switch (err_code) {
136 	case IFS_NO_ERROR:
137 	case IFS_OTHER_THREAD_COULD_NOT_JOIN:
138 	case IFS_INTERRUPTED_BEFORE_RENDEZVOUS:
139 	case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN:
140 	case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
141 	case IFS_INTERRUPTED_DURING_EXECUTION:
142 		return true;
143 	case IFS_INVALID_CHUNK_RANGE:
144 	case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS:
145 	case IFS_CORE_NOT_CAPABLE_CURRENTLY:
146 	case IFS_UNASSIGNED_ERROR_CODE:
147 	case IFS_UNASSIGNED_ERROR_CODE_0xA:
148 	case IFS_CORRUPTED_CHUNK:
149 		break;
150 	}
151 	return false;
152 }
153 
154 #define SPINUNIT 100 /* 100 nsec */
155 static atomic_t array_cpus_in;
156 static atomic_t scan_cpus_in;
157 static atomic_t sbaf_cpus_in;
158 
159 /*
160  * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus()
161  */
wait_for_sibling_cpu(atomic_t * t,long long timeout)162 static void wait_for_sibling_cpu(atomic_t *t, long long timeout)
163 {
164 	int cpu = smp_processor_id();
165 	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
166 	int all_cpus = cpumask_weight(smt_mask);
167 
168 	atomic_inc(t);
169 	while (atomic_read(t) < all_cpus) {
170 		if (timeout < SPINUNIT)
171 			return;
172 		ndelay(SPINUNIT);
173 		timeout -= SPINUNIT;
174 		touch_nmi_watchdog();
175 	}
176 }
177 
178 /*
179  * Execute the scan. Called "simultaneously" on all threads of a core
180  * at high priority using the stop_cpus mechanism.
181  */
doscan(void * data)182 static int doscan(void *data)
183 {
184 	int cpu = smp_processor_id(), start, stop;
185 	struct run_params *params = data;
186 	union ifs_status status;
187 	struct ifs_data *ifsd;
188 	int first;
189 
190 	ifsd = params->ifsd;
191 
192 	if (ifsd->generation) {
193 		start = params->activate->gen2.start;
194 		stop = params->activate->gen2.stop;
195 	} else {
196 		start = params->activate->gen0.start;
197 		stop = params->activate->gen0.stop;
198 	}
199 
200 	/* Only the first logical CPU on a core reports result */
201 	first = cpumask_first(cpu_smt_mask(cpu));
202 
203 	wait_for_sibling_cpu(&scan_cpus_in, NSEC_PER_SEC);
204 
205 	/*
206 	 * This WRMSR will wait for other HT threads to also write
207 	 * to this MSR (at most for activate.delay cycles). Then it
208 	 * starts scan of each requested chunk. The core scan happens
209 	 * during the "execution" of the WRMSR. This instruction can
210 	 * take up to 200 milliseconds (in the case where all chunks
211 	 * are processed in a single pass) before it retires.
212 	 */
213 	wrmsrq(MSR_ACTIVATE_SCAN, params->activate->data);
214 	rdmsrq(MSR_SCAN_STATUS, status.data);
215 
216 	trace_ifs_status(ifsd->cur_batch, start, stop, status.data);
217 
218 	/* Pass back the result of the scan */
219 	if (cpu == first)
220 		params->status = status;
221 
222 	return 0;
223 }
224 
225 /*
226  * Use stop_core_cpuslocked() to synchronize writing to MSR_ACTIVATE_SCAN
227  * on all threads of the core to be tested. Loop if necessary to complete
228  * run of all chunks. Include some defensive tests to make sure forward
229  * progress is made, and that the whole test completes in a reasonable time.
230  */
ifs_test_core(int cpu,struct device * dev)231 static void ifs_test_core(int cpu, struct device *dev)
232 {
233 	union ifs_status status = {};
234 	union ifs_scan activate;
235 	unsigned long timeout;
236 	struct ifs_data *ifsd;
237 	int to_start, to_stop;
238 	int status_chunk;
239 	struct run_params params;
240 	int retries;
241 
242 	ifsd = ifs_get_data(dev);
243 
244 	activate.gen0.rsvd = 0;
245 	activate.delay = IFS_THREAD_WAIT;
246 	activate.sigmce = 0;
247 	to_start = 0;
248 	to_stop = ifsd->valid_chunks - 1;
249 
250 	params.ifsd = ifs_get_data(dev);
251 
252 	if (ifsd->generation) {
253 		activate.gen2.start = to_start;
254 		activate.gen2.stop = to_stop;
255 	} else {
256 		activate.gen0.start = to_start;
257 		activate.gen0.stop = to_stop;
258 	}
259 
260 	timeout = jiffies + HZ / 2;
261 	retries = MAX_IFS_RETRIES;
262 
263 	while (to_start <= to_stop) {
264 		if (time_after(jiffies, timeout)) {
265 			status.error_code = IFS_SW_TIMEOUT;
266 			break;
267 		}
268 
269 		params.activate = &activate;
270 		atomic_set(&scan_cpus_in, 0);
271 		stop_core_cpuslocked(cpu, doscan, &params);
272 
273 		status = params.status;
274 
275 		/* Some cases can be retried, give up for others */
276 		if (!can_restart(status))
277 			break;
278 
279 		status_chunk = ifsd->generation ? status.gen2.chunk_num : status.gen0.chunk_num;
280 		if (status_chunk == to_start) {
281 			/* Check for forward progress */
282 			if (--retries == 0) {
283 				if (status.error_code == IFS_NO_ERROR)
284 					status.error_code = IFS_SW_PARTIAL_COMPLETION;
285 				break;
286 			}
287 		} else {
288 			retries = MAX_IFS_RETRIES;
289 			if (ifsd->generation)
290 				activate.gen2.start = status_chunk;
291 			else
292 				activate.gen0.start = status_chunk;
293 			to_start = status_chunk;
294 		}
295 	}
296 
297 	/* Update status for this core */
298 	ifsd->scan_details = status.data;
299 
300 	if (status.signature_error) {
301 		ifsd->status = SCAN_TEST_FAIL;
302 		message_fail(dev, cpu, status);
303 	} else if (status.control_error || status.error_code) {
304 		ifsd->status = SCAN_NOT_TESTED;
305 		message_not_tested(dev, cpu, status);
306 	} else {
307 		ifsd->status = SCAN_TEST_PASS;
308 	}
309 }
310 
do_array_test(void * data)311 static int do_array_test(void *data)
312 {
313 	union ifs_array *command = data;
314 	int cpu = smp_processor_id();
315 	int first;
316 
317 	wait_for_sibling_cpu(&array_cpus_in, NSEC_PER_SEC);
318 
319 	/*
320 	 * Only one logical CPU on a core needs to trigger the Array test via MSR write.
321 	 */
322 	first = cpumask_first(cpu_smt_mask(cpu));
323 
324 	if (cpu == first) {
325 		wrmsrq(MSR_ARRAY_BIST, command->data);
326 		/* Pass back the result of the test */
327 		rdmsrq(MSR_ARRAY_BIST, command->data);
328 	}
329 
330 	return 0;
331 }
332 
ifs_array_test_core(int cpu,struct device * dev)333 static void ifs_array_test_core(int cpu, struct device *dev)
334 {
335 	union ifs_array command = {};
336 	bool timed_out = false;
337 	struct ifs_data *ifsd;
338 	unsigned long timeout;
339 
340 	ifsd = ifs_get_data(dev);
341 
342 	command.array_bitmask = ~0U;
343 	timeout = jiffies + HZ / 2;
344 
345 	do {
346 		if (time_after(jiffies, timeout)) {
347 			timed_out = true;
348 			break;
349 		}
350 		atomic_set(&array_cpus_in, 0);
351 		stop_core_cpuslocked(cpu, do_array_test, &command);
352 
353 		if (command.ctrl_result)
354 			break;
355 	} while (command.array_bitmask);
356 
357 	ifsd->scan_details = command.data;
358 
359 	if (command.ctrl_result)
360 		ifsd->status = SCAN_TEST_FAIL;
361 	else if (timed_out || command.array_bitmask)
362 		ifsd->status = SCAN_NOT_TESTED;
363 	else
364 		ifsd->status = SCAN_TEST_PASS;
365 }
366 
367 #define ARRAY_GEN1_TEST_ALL_ARRAYS	0x0ULL
368 #define ARRAY_GEN1_STATUS_FAIL		0x1ULL
369 
do_array_test_gen1(void * status)370 static int do_array_test_gen1(void *status)
371 {
372 	int cpu = smp_processor_id();
373 	int first;
374 
375 	first = cpumask_first(cpu_smt_mask(cpu));
376 
377 	if (cpu == first) {
378 		wrmsrq(MSR_ARRAY_TRIGGER, ARRAY_GEN1_TEST_ALL_ARRAYS);
379 		rdmsrq(MSR_ARRAY_STATUS, *((u64 *)status));
380 	}
381 
382 	return 0;
383 }
384 
ifs_array_test_gen1(int cpu,struct device * dev)385 static void ifs_array_test_gen1(int cpu, struct device *dev)
386 {
387 	struct ifs_data *ifsd = ifs_get_data(dev);
388 	u64 status = 0;
389 
390 	stop_core_cpuslocked(cpu, do_array_test_gen1, &status);
391 	ifsd->scan_details = status;
392 
393 	if (status & ARRAY_GEN1_STATUS_FAIL)
394 		ifsd->status = SCAN_TEST_FAIL;
395 	else
396 		ifsd->status = SCAN_TEST_PASS;
397 }
398 
399 #define SBAF_STATUS_PASS			0
400 #define SBAF_STATUS_SIGN_FAIL			1
401 #define SBAF_STATUS_INTR			2
402 #define SBAF_STATUS_TEST_FAIL			3
403 
404 enum sbaf_status_err_code {
405 	IFS_SBAF_NO_ERROR				= 0,
406 	IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN		= 1,
407 	IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS		= 2,
408 	IFS_SBAF_UNASSIGNED_ERROR_CODE3			= 3,
409 	IFS_SBAF_INVALID_BUNDLE_INDEX			= 4,
410 	IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS		= 5,
411 	IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY		= 6,
412 	IFS_SBAF_UNASSIGNED_ERROR_CODE7			= 7,
413 	IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT	= 8,
414 	IFS_SBAF_INTERRUPTED_DURING_EXECUTION		= 9,
415 	IFS_SBAF_INVALID_PROGRAM_INDEX			= 0xA,
416 	IFS_SBAF_CORRUPTED_CHUNK			= 0xB,
417 	IFS_SBAF_DID_NOT_START				= 0xC,
418 };
419 
420 static const char * const sbaf_test_status[] = {
421 	[IFS_SBAF_NO_ERROR] = "SBAF no error",
422 	[IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
423 	[IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.",
424 	[IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3",
425 	[IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non-valid sbaf bundles. Reload test image",
426 	[IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
427 	[IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently",
428 	[IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7",
429 	[IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
430 	[IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start",
431 	[IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid",
432 	[IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk",
433 	[IFS_SBAF_DID_NOT_START] = "SBAF operation did not start",
434 };
435 
sbaf_message_not_tested(struct device * dev,int cpu,u64 status_data)436 static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data)
437 {
438 	union ifs_sbaf_status status = (union ifs_sbaf_status)status_data;
439 
440 	if (status.error_code < ARRAY_SIZE(sbaf_test_status)) {
441 		dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n",
442 			 cpumask_pr_args(cpu_smt_mask(cpu)),
443 			 sbaf_test_status[status.error_code]);
444 	} else if (status.error_code == IFS_SW_TIMEOUT) {
445 		dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
446 			 cpumask_pr_args(cpu_smt_mask(cpu)));
447 	} else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
448 		dev_info(dev, "CPU(s) %*pbl: %s\n",
449 			 cpumask_pr_args(cpu_smt_mask(cpu)),
450 			 "Not all SBAF bundles executed. Maximum forward progress retries exceeded");
451 	} else {
452 		dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n",
453 			 cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
454 	}
455 }
456 
sbaf_message_fail(struct device * dev,int cpu,union ifs_sbaf_status status)457 static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status)
458 {
459 	/* Failed signature check is set when SBAF signature did not match the expected value */
460 	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) {
461 		dev_err(dev, "CPU(s) %*pbl: Failed signature check\n",
462 			cpumask_pr_args(cpu_smt_mask(cpu)));
463 	}
464 
465 	/* Failed to reach end of test */
466 	if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) {
467 		dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n",
468 			cpumask_pr_args(cpu_smt_mask(cpu)));
469 	}
470 }
471 
sbaf_bundle_completed(union ifs_sbaf_status status)472 static bool sbaf_bundle_completed(union ifs_sbaf_status status)
473 {
474 	return !(status.sbaf_status || status.error_code);
475 }
476 
sbaf_can_restart(union ifs_sbaf_status status)477 static bool sbaf_can_restart(union ifs_sbaf_status status)
478 {
479 	enum sbaf_status_err_code err_code = status.error_code;
480 
481 	/* Signature for chunk is bad, or scan test failed */
482 	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL ||
483 	    status.sbaf_status == SBAF_STATUS_TEST_FAIL)
484 		return false;
485 
486 	switch (err_code) {
487 	case IFS_SBAF_NO_ERROR:
488 	case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN:
489 	case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS:
490 	case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
491 	case IFS_SBAF_INTERRUPTED_DURING_EXECUTION:
492 		return true;
493 	case IFS_SBAF_UNASSIGNED_ERROR_CODE3:
494 	case IFS_SBAF_INVALID_BUNDLE_INDEX:
495 	case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS:
496 	case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY:
497 	case IFS_SBAF_UNASSIGNED_ERROR_CODE7:
498 	case IFS_SBAF_INVALID_PROGRAM_INDEX:
499 	case IFS_SBAF_CORRUPTED_CHUNK:
500 	case IFS_SBAF_DID_NOT_START:
501 		break;
502 	}
503 	return false;
504 }
505 
506 /*
507  * Execute the SBAF test. Called "simultaneously" on all threads of a core
508  * at high priority using the stop_cpus mechanism.
509  */
dosbaf(void * data)510 static int dosbaf(void *data)
511 {
512 	struct sbaf_run_params *run_params = data;
513 	int cpu = smp_processor_id();
514 	union ifs_sbaf_status status;
515 	struct ifs_data *ifsd;
516 	int first;
517 
518 	ifsd = run_params->ifsd;
519 
520 	/* Only the first logical CPU on a core reports result */
521 	first = cpumask_first(cpu_smt_mask(cpu));
522 	wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC);
523 
524 	/*
525 	 * This WRMSR will wait for other HT threads to also write
526 	 * to this MSR (at most for activate.delay cycles). Then it
527 	 * starts scan of each requested bundle. The core test happens
528 	 * during the "execution" of the WRMSR.
529 	 */
530 	wrmsrq(MSR_ACTIVATE_SBAF, run_params->activate->data);
531 	rdmsrq(MSR_SBAF_STATUS, status.data);
532 	trace_ifs_sbaf(ifsd->cur_batch, *run_params->activate, status);
533 
534 	/* Pass back the result of the test */
535 	if (cpu == first)
536 		run_params->status = status;
537 
538 	return 0;
539 }
540 
ifs_sbaf_test_core(int cpu,struct device * dev)541 static void ifs_sbaf_test_core(int cpu, struct device *dev)
542 {
543 	struct sbaf_run_params run_params;
544 	union ifs_sbaf_status status = {};
545 	union ifs_sbaf activate;
546 	unsigned long timeout;
547 	struct ifs_data *ifsd;
548 	int stop_bundle;
549 	int retries;
550 
551 	ifsd = ifs_get_data(dev);
552 
553 	activate.data = 0;
554 	activate.delay = IFS_THREAD_WAIT;
555 
556 	timeout = jiffies + 2 * HZ;
557 	retries = MAX_IFS_RETRIES;
558 	activate.bundle_idx = 0;
559 	stop_bundle = ifsd->max_bundle;
560 
561 	while (activate.bundle_idx <= stop_bundle) {
562 		if (time_after(jiffies, timeout)) {
563 			status.error_code = IFS_SW_TIMEOUT;
564 			break;
565 		}
566 
567 		atomic_set(&sbaf_cpus_in, 0);
568 
569 		run_params.ifsd = ifsd;
570 		run_params.activate = &activate;
571 		run_params.retry_cnt = &retries;
572 		stop_core_cpuslocked(cpu, dosbaf, &run_params);
573 
574 		status = run_params.status;
575 
576 		if (sbaf_bundle_completed(status)) {
577 			activate.bundle_idx = status.bundle_idx + 1;
578 			activate.pgm_idx = 0;
579 			retries = MAX_IFS_RETRIES;
580 			continue;
581 		}
582 
583 		/* Some cases can be retried, give up for others */
584 		if (!sbaf_can_restart(status))
585 			break;
586 
587 		if (status.pgm_idx == activate.pgm_idx) {
588 			/* If no progress retry */
589 			if (--retries == 0) {
590 				if (status.error_code == IFS_NO_ERROR)
591 					status.error_code = IFS_SW_PARTIAL_COMPLETION;
592 				break;
593 			}
594 		} else {
595 			/* if some progress, more pgms remaining in bundle, reset retries */
596 			retries = MAX_IFS_RETRIES;
597 			activate.bundle_idx = status.bundle_idx;
598 			activate.pgm_idx = status.pgm_idx;
599 		}
600 	}
601 
602 	/* Update status for this core */
603 	ifsd->scan_details = status.data;
604 
605 	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL ||
606 	    status.sbaf_status == SBAF_STATUS_TEST_FAIL) {
607 		ifsd->status = SCAN_TEST_FAIL;
608 		sbaf_message_fail(dev, cpu, status);
609 	} else if (status.error_code || status.sbaf_status == SBAF_STATUS_INTR ||
610 		   (activate.bundle_idx < stop_bundle)) {
611 		ifsd->status = SCAN_NOT_TESTED;
612 		sbaf_message_not_tested(dev, cpu, status.data);
613 	} else {
614 		ifsd->status = SCAN_TEST_PASS;
615 	}
616 }
617 
618 /*
619  * Initiate per core test. It wakes up work queue threads on the target cpu and
620  * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and
621  * wait for all sibling threads to finish the scan test.
622  */
do_core_test(int cpu,struct device * dev)623 int do_core_test(int cpu, struct device *dev)
624 {
625 	const struct ifs_test_caps *test = ifs_get_test_caps(dev);
626 	struct ifs_data *ifsd = ifs_get_data(dev);
627 	int ret = 0;
628 
629 	/* Prevent CPUs from being taken offline during the scan test */
630 	cpus_read_lock();
631 
632 	if (!cpu_online(cpu)) {
633 		dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
634 		ret = -EINVAL;
635 		goto out;
636 	}
637 
638 	switch (test->test_num) {
639 	case IFS_TYPE_SAF:
640 		if (!ifsd->loaded)
641 			ret = -EPERM;
642 		else
643 			ifs_test_core(cpu, dev);
644 		break;
645 	case IFS_TYPE_ARRAY_BIST:
646 		if (ifsd->array_gen == ARRAY_GEN0)
647 			ifs_array_test_core(cpu, dev);
648 		else
649 			ifs_array_test_gen1(cpu, dev);
650 		break;
651 	case IFS_TYPE_SBAF:
652 		if (!ifsd->loaded)
653 			ret = -EPERM;
654 		else
655 			ifs_sbaf_test_core(cpu, dev);
656 		break;
657 	default:
658 		ret = -EINVAL;
659 	}
660 out:
661 	cpus_read_unlock();
662 	return ret;
663 }
664