1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8 
9 #include "gem/i915_gem_internal.h"
10 
11 #include "i915_reg.h"
12 #include "intel_engine_heartbeat.h"
13 #include "intel_engine_pm.h"
14 #include "intel_engine_regs.h"
15 #include "intel_gpu_commands.h"
16 #include "intel_gt_clock_utils.h"
17 #include "intel_gt_pm.h"
18 #include "intel_rc6.h"
19 #include "selftest_engine_heartbeat.h"
20 #include "selftest_rps.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_spinner.h"
23 #include "selftests/librapl.h"
24 
25 /* Try to isolate the impact of cstates from determining frequency response */
26 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
27 
dummy_rps_work(struct work_struct * wrk)28 static void dummy_rps_work(struct work_struct *wrk)
29 {
30 }
31 
cmp_u64(const void * A,const void * B)32 static int cmp_u64(const void *A, const void *B)
33 {
34 	const u64 *a = A, *b = B;
35 
36 	if (*a < *b)
37 		return -1;
38 	else if (*a > *b)
39 		return 1;
40 	else
41 		return 0;
42 }
43 
cmp_u32(const void * A,const void * B)44 static int cmp_u32(const void *A, const void *B)
45 {
46 	const u32 *a = A, *b = B;
47 
48 	if (*a < *b)
49 		return -1;
50 	else if (*a > *b)
51 		return 1;
52 	else
53 		return 0;
54 }
55 
56 static struct i915_vma *
create_spin_counter(struct intel_engine_cs * engine,struct i915_address_space * vm,bool srm,u32 ** cancel,u32 ** counter)57 create_spin_counter(struct intel_engine_cs *engine,
58 		    struct i915_address_space *vm,
59 		    bool srm,
60 		    u32 **cancel,
61 		    u32 **counter)
62 {
63 	enum {
64 		COUNT,
65 		INC,
66 		__NGPR__,
67 	};
68 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
69 	struct drm_i915_gem_object *obj;
70 	struct i915_vma *vma;
71 	unsigned long end;
72 	u32 *base, *cs;
73 	int loop, i;
74 	int err;
75 
76 	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
77 	if (IS_ERR(obj))
78 		return ERR_CAST(obj);
79 
80 	end = obj->base.size / sizeof(u32) - 1;
81 
82 	vma = i915_vma_instance(obj, vm, NULL);
83 	if (IS_ERR(vma)) {
84 		err = PTR_ERR(vma);
85 		goto err_put;
86 	}
87 
88 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
89 	if (err)
90 		goto err_unlock;
91 
92 	i915_vma_lock(vma);
93 
94 	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
95 	if (IS_ERR(base)) {
96 		err = PTR_ERR(base);
97 		goto err_unpin;
98 	}
99 	cs = base;
100 
101 	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
102 	for (i = 0; i < __NGPR__; i++) {
103 		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
104 		*cs++ = 0;
105 		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
106 		*cs++ = 0;
107 	}
108 
109 	*cs++ = MI_LOAD_REGISTER_IMM(1);
110 	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
111 	*cs++ = 1;
112 
113 	loop = cs - base;
114 
115 	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
116 	for (i = 0; i < 1024; i++) {
117 		*cs++ = MI_MATH(4);
118 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
119 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
120 		*cs++ = MI_MATH_ADD;
121 		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
122 
123 		if (srm) {
124 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
125 			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
126 			*cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
127 			*cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
128 		}
129 	}
130 
131 	*cs++ = MI_BATCH_BUFFER_START_GEN8;
132 	*cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
133 	*cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
134 	GEM_BUG_ON(cs - base > end);
135 
136 	i915_gem_object_flush_map(obj);
137 
138 	*cancel = base + loop;
139 	*counter = srm ? memset32(base + end, 0, 1) : NULL;
140 	return vma;
141 
142 err_unpin:
143 	i915_vma_unpin(vma);
144 err_unlock:
145 	i915_vma_unlock(vma);
146 err_put:
147 	i915_gem_object_put(obj);
148 	return ERR_PTR(err);
149 }
150 
wait_for_freq(struct intel_rps * rps,u8 freq,int timeout_ms)151 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
152 {
153 	u8 history[64], i;
154 	unsigned long end;
155 	int sleep;
156 
157 	i = 0;
158 	memset(history, freq, sizeof(history));
159 	sleep = 20;
160 
161 	/* The PCU does not change instantly, but drifts towards the goal? */
162 	end = jiffies + msecs_to_jiffies(timeout_ms);
163 	do {
164 		u8 act;
165 
166 		act = read_cagf(rps);
167 		if (time_after(jiffies, end))
168 			return act;
169 
170 		/* Target acquired */
171 		if (act == freq)
172 			return act;
173 
174 		/* Any change within the last N samples? */
175 		if (!memchr_inv(history, act, sizeof(history)))
176 			return act;
177 
178 		history[i] = act;
179 		i = (i + 1) % ARRAY_SIZE(history);
180 
181 		usleep_range(sleep, 2 * sleep);
182 		sleep *= 2;
183 		if (sleep > timeout_ms * 20)
184 			sleep = timeout_ms * 20;
185 	} while (1);
186 }
187 
rps_set_check(struct intel_rps * rps,u8 freq)188 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
189 {
190 	mutex_lock(&rps->lock);
191 	GEM_BUG_ON(!intel_rps_is_active(rps));
192 	if (wait_for(!intel_rps_set(rps, freq), 50)) {
193 		mutex_unlock(&rps->lock);
194 		return 0;
195 	}
196 	GEM_BUG_ON(rps->last_freq != freq);
197 	mutex_unlock(&rps->lock);
198 
199 	return wait_for_freq(rps, freq, 50);
200 }
201 
show_pstate_limits(struct intel_rps * rps)202 static void show_pstate_limits(struct intel_rps *rps)
203 {
204 	struct drm_i915_private *i915 = rps_to_i915(rps);
205 
206 	if (IS_BROXTON(i915)) {
207 		pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208 			i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209 			intel_uncore_read(rps_to_uncore(rps),
210 					  BXT_RP_STATE_CAP));
211 	} else if (GRAPHICS_VER(i915) == 9) {
212 		pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213 			i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214 			intel_uncore_read(rps_to_uncore(rps),
215 					  GEN9_RP_STATE_LIMITS));
216 	}
217 }
218 
live_rps_clock_interval(void * arg)219 int live_rps_clock_interval(void *arg)
220 {
221 	struct intel_gt *gt = arg;
222 	struct intel_rps *rps = &gt->rps;
223 	void (*saved_work)(struct work_struct *wrk);
224 	struct intel_engine_cs *engine;
225 	enum intel_engine_id id;
226 	struct igt_spinner spin;
227 	intel_wakeref_t wakeref;
228 	int err = 0;
229 
230 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
231 		return 0;
232 
233 	if (igt_spinner_init(&spin, gt))
234 		return -ENOMEM;
235 
236 	intel_gt_pm_wait_for_idle(gt);
237 	saved_work = rps->work.func;
238 	rps->work.func = dummy_rps_work;
239 
240 	wakeref = intel_gt_pm_get(gt);
241 	intel_rps_disable(&gt->rps);
242 
243 	intel_gt_check_clock_frequency(gt);
244 
245 	for_each_engine(engine, gt, id) {
246 		struct i915_request *rq;
247 		u32 cycles;
248 		u64 dt;
249 
250 		if (!intel_engine_can_store_dword(engine))
251 			continue;
252 
253 		st_engine_heartbeat_disable(engine);
254 
255 		rq = igt_spinner_create_request(&spin,
256 						engine->kernel_context,
257 						MI_NOOP);
258 		if (IS_ERR(rq)) {
259 			st_engine_heartbeat_enable(engine);
260 			err = PTR_ERR(rq);
261 			break;
262 		}
263 
264 		i915_request_add(rq);
265 
266 		if (!igt_wait_for_spinner(&spin, rq)) {
267 			pr_err("%s: RPS spinner did not start\n",
268 			       engine->name);
269 			igt_spinner_end(&spin);
270 			st_engine_heartbeat_enable(engine);
271 			intel_gt_set_wedged(engine->gt);
272 			err = -EIO;
273 			break;
274 		}
275 
276 		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
277 
278 		intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
279 
280 		/* Set the evaluation interval to infinity! */
281 		intel_uncore_write_fw(gt->uncore,
282 				      GEN6_RP_UP_EI, 0xffffffff);
283 		intel_uncore_write_fw(gt->uncore,
284 				      GEN6_RP_UP_THRESHOLD, 0xffffffff);
285 
286 		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
287 				      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
288 
289 		if (wait_for(intel_uncore_read_fw(gt->uncore,
290 						  GEN6_RP_CUR_UP_EI),
291 			     10)) {
292 			/* Just skip the test; assume lack of HW support */
293 			pr_notice("%s: rps evaluation interval not ticking\n",
294 				  engine->name);
295 			err = -ENODEV;
296 		} else {
297 			ktime_t dt_[5];
298 			u32 cycles_[5];
299 			int i;
300 
301 			for (i = 0; i < 5; i++) {
302 				preempt_disable();
303 
304 				cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
305 				dt_[i] = ktime_get();
306 
307 				udelay(1000);
308 
309 				cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
310 				dt_[i] = ktime_sub(ktime_get(), dt_[i]);
311 
312 				preempt_enable();
313 			}
314 
315 			/* Use the median of both cycle/dt; close enough */
316 			sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
317 			cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
318 			sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
319 			dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
320 		}
321 
322 		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
323 		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
324 
325 		igt_spinner_end(&spin);
326 		st_engine_heartbeat_enable(engine);
327 
328 		if (err == 0) {
329 			u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
330 			u32 expected =
331 				intel_gt_ns_to_pm_interval(gt, dt);
332 
333 			pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
334 				engine->name, cycles, time, dt, expected,
335 				gt->clock_frequency / 1000);
336 
337 			if (10 * time < 8 * dt ||
338 			    8 * time > 10 * dt) {
339 				pr_err("%s: rps clock time does not match walltime!\n",
340 				       engine->name);
341 				err = -EINVAL;
342 			}
343 
344 			if (10 * expected < 8 * cycles ||
345 			    8 * expected > 10 * cycles) {
346 				pr_err("%s: walltime does not match rps clock ticks!\n",
347 				       engine->name);
348 				err = -EINVAL;
349 			}
350 		}
351 
352 		if (igt_flush_test(gt->i915))
353 			err = -EIO;
354 
355 		break; /* once is enough */
356 	}
357 
358 	intel_rps_enable(&gt->rps);
359 	intel_gt_pm_put(gt, wakeref);
360 
361 	igt_spinner_fini(&spin);
362 
363 	intel_gt_pm_wait_for_idle(gt);
364 	rps->work.func = saved_work;
365 
366 	if (err == -ENODEV) /* skipped, don't report a fail */
367 		err = 0;
368 
369 	return err;
370 }
371 
live_rps_control(void * arg)372 int live_rps_control(void *arg)
373 {
374 	struct intel_gt *gt = arg;
375 	struct intel_rps *rps = &gt->rps;
376 	void (*saved_work)(struct work_struct *wrk);
377 	struct intel_engine_cs *engine;
378 	enum intel_engine_id id;
379 	struct igt_spinner spin;
380 	intel_wakeref_t wakeref;
381 	int err = 0;
382 
383 	/*
384 	 * Check that the actual frequency matches our requested frequency,
385 	 * to verify our control mechanism. We have to be careful that the
386 	 * PCU may throttle the GPU in which case the actual frequency used
387 	 * will be lowered than requested.
388 	 */
389 
390 	if (!intel_rps_is_enabled(rps))
391 		return 0;
392 
393 	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
394 		return 0;
395 
396 	if (igt_spinner_init(&spin, gt))
397 		return -ENOMEM;
398 
399 	intel_gt_pm_wait_for_idle(gt);
400 	saved_work = rps->work.func;
401 	rps->work.func = dummy_rps_work;
402 
403 	wakeref = intel_gt_pm_get(gt);
404 	for_each_engine(engine, gt, id) {
405 		struct i915_request *rq;
406 		ktime_t min_dt, max_dt;
407 		int f, limit;
408 		int min, max;
409 
410 		if (!intel_engine_can_store_dword(engine))
411 			continue;
412 
413 		st_engine_heartbeat_disable(engine);
414 
415 		rq = igt_spinner_create_request(&spin,
416 						engine->kernel_context,
417 						MI_NOOP);
418 		if (IS_ERR(rq)) {
419 			err = PTR_ERR(rq);
420 			break;
421 		}
422 
423 		i915_request_add(rq);
424 
425 		if (!igt_wait_for_spinner(&spin, rq)) {
426 			pr_err("%s: RPS spinner did not start\n",
427 			       engine->name);
428 			igt_spinner_end(&spin);
429 			st_engine_heartbeat_enable(engine);
430 			intel_gt_set_wedged(engine->gt);
431 			err = -EIO;
432 			break;
433 		}
434 
435 		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
436 			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
437 			       engine->name, rps->min_freq, read_cagf(rps));
438 			igt_spinner_end(&spin);
439 			st_engine_heartbeat_enable(engine);
440 			show_pstate_limits(rps);
441 			err = -EINVAL;
442 			break;
443 		}
444 
445 		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
446 			if (rps_set_check(rps, f) < f)
447 				break;
448 		}
449 
450 		limit = rps_set_check(rps, f);
451 
452 		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
453 			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
454 			       engine->name, rps->min_freq, read_cagf(rps));
455 			igt_spinner_end(&spin);
456 			st_engine_heartbeat_enable(engine);
457 			show_pstate_limits(rps);
458 			err = -EINVAL;
459 			break;
460 		}
461 
462 		max_dt = ktime_get();
463 		max = rps_set_check(rps, limit);
464 		max_dt = ktime_sub(ktime_get(), max_dt);
465 
466 		min_dt = ktime_get();
467 		min = rps_set_check(rps, rps->min_freq);
468 		min_dt = ktime_sub(ktime_get(), min_dt);
469 
470 		igt_spinner_end(&spin);
471 		st_engine_heartbeat_enable(engine);
472 
473 		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
474 			engine->name,
475 			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
476 			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
477 			limit, intel_gpu_freq(rps, limit),
478 			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
479 
480 		if (limit != rps->max_freq) {
481 			u32 throttle = intel_uncore_read(gt->uncore,
482 							 intel_gt_perf_limit_reasons_reg(gt));
483 
484 			pr_warn("%s: GPU throttled with reasons 0x%08x\n",
485 				engine->name, throttle & GT0_PERF_LIMIT_REASONS_MASK);
486 			show_pstate_limits(rps);
487 		}
488 
489 		if (igt_flush_test(gt->i915)) {
490 			err = -EIO;
491 			break;
492 		}
493 	}
494 	intel_gt_pm_put(gt, wakeref);
495 
496 	igt_spinner_fini(&spin);
497 
498 	intel_gt_pm_wait_for_idle(gt);
499 	rps->work.func = saved_work;
500 
501 	return err;
502 }
503 
show_pcu_config(struct intel_rps * rps)504 static void show_pcu_config(struct intel_rps *rps)
505 {
506 	struct drm_i915_private *i915 = rps_to_i915(rps);
507 	unsigned int max_gpu_freq, min_gpu_freq;
508 	intel_wakeref_t wakeref;
509 	int gpu_freq;
510 
511 	if (!HAS_LLC(i915))
512 		return;
513 
514 	min_gpu_freq = rps->min_freq;
515 	max_gpu_freq = rps->max_freq;
516 	if (GRAPHICS_VER(i915) >= 9) {
517 		/* Convert GT frequency to 50 HZ units */
518 		min_gpu_freq /= GEN9_FREQ_SCALER;
519 		max_gpu_freq /= GEN9_FREQ_SCALER;
520 	}
521 
522 	wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
523 
524 	pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
525 	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
526 		int ia_freq = gpu_freq;
527 
528 		snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
529 			       &ia_freq, NULL);
530 
531 		pr_info("%5d  %5d  %5d\n",
532 			gpu_freq * 50,
533 			((ia_freq >> 0) & 0xff) * 100,
534 			((ia_freq >> 8) & 0xff) * 100);
535 	}
536 
537 	intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
538 }
539 
__measure_frequency(u32 * cntr,int duration_ms)540 static u64 __measure_frequency(u32 *cntr, int duration_ms)
541 {
542 	u64 dc, dt;
543 
544 	dc = READ_ONCE(*cntr);
545 	dt = ktime_get();
546 	usleep_range(1000 * duration_ms, 2000 * duration_ms);
547 	dc = READ_ONCE(*cntr) - dc;
548 	dt = ktime_get() - dt;
549 
550 	return div64_u64(1000 * 1000 * dc, dt);
551 }
552 
measure_frequency_at(struct intel_rps * rps,u32 * cntr,int * freq)553 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
554 {
555 	u64 x[5];
556 	int i;
557 
558 	*freq = rps_set_check(rps, *freq);
559 	for (i = 0; i < 5; i++)
560 		x[i] = __measure_frequency(cntr, 2);
561 	*freq = (*freq + read_cagf(rps)) / 2;
562 
563 	/* A simple triangle filter for better result stability */
564 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
565 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
566 }
567 
__measure_cs_frequency(struct intel_engine_cs * engine,int duration_ms)568 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
569 				  int duration_ms)
570 {
571 	u64 dc, dt;
572 
573 	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
574 	dt = ktime_get();
575 	usleep_range(1000 * duration_ms, 2000 * duration_ms);
576 	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
577 	dt = ktime_get() - dt;
578 
579 	return div64_u64(1000 * 1000 * dc, dt);
580 }
581 
measure_cs_frequency_at(struct intel_rps * rps,struct intel_engine_cs * engine,int * freq)582 static u64 measure_cs_frequency_at(struct intel_rps *rps,
583 				   struct intel_engine_cs *engine,
584 				   int *freq)
585 {
586 	u64 x[5];
587 	int i;
588 
589 	*freq = rps_set_check(rps, *freq);
590 	for (i = 0; i < 5; i++)
591 		x[i] = __measure_cs_frequency(engine, 2);
592 	*freq = (*freq + read_cagf(rps)) / 2;
593 
594 	/* A simple triangle filter for better result stability */
595 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
596 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
597 }
598 
scaled_within(u64 x,u64 y,u32 f_n,u32 f_d)599 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
600 {
601 	return f_d * x > f_n * y && f_n * x < f_d * y;
602 }
603 
live_rps_frequency_cs(void * arg)604 int live_rps_frequency_cs(void *arg)
605 {
606 	void (*saved_work)(struct work_struct *wrk);
607 	struct intel_gt *gt = arg;
608 	struct intel_rps *rps = &gt->rps;
609 	struct intel_engine_cs *engine;
610 	struct pm_qos_request qos;
611 	enum intel_engine_id id;
612 	int err = 0;
613 
614 	/*
615 	 * The premise is that the GPU does change frequency at our behest.
616 	 * Let's check there is a correspondence between the requested
617 	 * frequency, the actual frequency, and the observed clock rate.
618 	 */
619 
620 	if (!intel_rps_is_enabled(rps))
621 		return 0;
622 
623 	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
624 		return 0;
625 
626 	if (CPU_LATENCY >= 0)
627 		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
628 
629 	intel_gt_pm_wait_for_idle(gt);
630 	saved_work = rps->work.func;
631 	rps->work.func = dummy_rps_work;
632 
633 	for_each_engine(engine, gt, id) {
634 		struct i915_request *rq;
635 		struct i915_vma *vma;
636 		u32 *cancel, *cntr;
637 		struct {
638 			u64 count;
639 			int freq;
640 		} min, max;
641 
642 		st_engine_heartbeat_disable(engine);
643 
644 		vma = create_spin_counter(engine,
645 					  engine->kernel_context->vm, false,
646 					  &cancel, &cntr);
647 		if (IS_ERR(vma)) {
648 			err = PTR_ERR(vma);
649 			st_engine_heartbeat_enable(engine);
650 			break;
651 		}
652 
653 		rq = intel_engine_create_kernel_request(engine);
654 		if (IS_ERR(rq)) {
655 			err = PTR_ERR(rq);
656 			goto err_vma;
657 		}
658 
659 		err = i915_vma_move_to_active(vma, rq, 0);
660 		if (!err)
661 			err = rq->engine->emit_bb_start(rq,
662 							i915_vma_offset(vma),
663 							PAGE_SIZE, 0);
664 		i915_request_add(rq);
665 		if (err)
666 			goto err_vma;
667 
668 		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
669 			     10)) {
670 			pr_err("%s: timed loop did not start\n",
671 			       engine->name);
672 			goto err_vma;
673 		}
674 
675 		min.freq = rps->min_freq;
676 		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
677 
678 		max.freq = rps->max_freq;
679 		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
680 
681 		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
682 			engine->name,
683 			min.count, intel_gpu_freq(rps, min.freq),
684 			max.count, intel_gpu_freq(rps, max.freq),
685 			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
686 						     max.freq * min.count));
687 
688 		if (!scaled_within(max.freq * min.count,
689 				   min.freq * max.count,
690 				   2, 3)) {
691 			int f;
692 
693 			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
694 			       engine->name,
695 			       max.freq * min.count,
696 			       min.freq * max.count);
697 			show_pcu_config(rps);
698 
699 			for (f = min.freq + 1; f <= rps->max_freq; f++) {
700 				int act = f;
701 				u64 count;
702 
703 				count = measure_cs_frequency_at(rps, engine, &act);
704 				if (act < f)
705 					break;
706 
707 				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
708 					engine->name,
709 					act, intel_gpu_freq(rps, act), count,
710 					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
711 								     act * min.count));
712 
713 				f = act; /* may skip ahead [pcu granularity] */
714 			}
715 
716 			err = -EINTR; /* ignore error, continue on with test */
717 		}
718 
719 err_vma:
720 		*cancel = MI_BATCH_BUFFER_END;
721 		i915_gem_object_flush_map(vma->obj);
722 		i915_gem_object_unpin_map(vma->obj);
723 		i915_vma_unpin(vma);
724 		i915_vma_unlock(vma);
725 		i915_vma_put(vma);
726 
727 		st_engine_heartbeat_enable(engine);
728 		if (igt_flush_test(gt->i915))
729 			err = -EIO;
730 		if (err)
731 			break;
732 	}
733 
734 	intel_gt_pm_wait_for_idle(gt);
735 	rps->work.func = saved_work;
736 
737 	if (CPU_LATENCY >= 0)
738 		cpu_latency_qos_remove_request(&qos);
739 
740 	return err;
741 }
742 
live_rps_frequency_srm(void * arg)743 int live_rps_frequency_srm(void *arg)
744 {
745 	void (*saved_work)(struct work_struct *wrk);
746 	struct intel_gt *gt = arg;
747 	struct intel_rps *rps = &gt->rps;
748 	struct intel_engine_cs *engine;
749 	struct pm_qos_request qos;
750 	enum intel_engine_id id;
751 	int err = 0;
752 
753 	/*
754 	 * The premise is that the GPU does change frequency at our behest.
755 	 * Let's check there is a correspondence between the requested
756 	 * frequency, the actual frequency, and the observed clock rate.
757 	 */
758 
759 	if (!intel_rps_is_enabled(rps))
760 		return 0;
761 
762 	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
763 		return 0;
764 
765 	if (CPU_LATENCY >= 0)
766 		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
767 
768 	intel_gt_pm_wait_for_idle(gt);
769 	saved_work = rps->work.func;
770 	rps->work.func = dummy_rps_work;
771 
772 	for_each_engine(engine, gt, id) {
773 		struct i915_request *rq;
774 		struct i915_vma *vma;
775 		u32 *cancel, *cntr;
776 		struct {
777 			u64 count;
778 			int freq;
779 		} min, max;
780 
781 		st_engine_heartbeat_disable(engine);
782 
783 		vma = create_spin_counter(engine,
784 					  engine->kernel_context->vm, true,
785 					  &cancel, &cntr);
786 		if (IS_ERR(vma)) {
787 			err = PTR_ERR(vma);
788 			st_engine_heartbeat_enable(engine);
789 			break;
790 		}
791 
792 		rq = intel_engine_create_kernel_request(engine);
793 		if (IS_ERR(rq)) {
794 			err = PTR_ERR(rq);
795 			goto err_vma;
796 		}
797 
798 		err = i915_vma_move_to_active(vma, rq, 0);
799 		if (!err)
800 			err = rq->engine->emit_bb_start(rq,
801 							i915_vma_offset(vma),
802 							PAGE_SIZE, 0);
803 		i915_request_add(rq);
804 		if (err)
805 			goto err_vma;
806 
807 		if (wait_for(READ_ONCE(*cntr), 10)) {
808 			pr_err("%s: timed loop did not start\n",
809 			       engine->name);
810 			goto err_vma;
811 		}
812 
813 		min.freq = rps->min_freq;
814 		min.count = measure_frequency_at(rps, cntr, &min.freq);
815 
816 		max.freq = rps->max_freq;
817 		max.count = measure_frequency_at(rps, cntr, &max.freq);
818 
819 		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
820 			engine->name,
821 			min.count, intel_gpu_freq(rps, min.freq),
822 			max.count, intel_gpu_freq(rps, max.freq),
823 			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
824 						     max.freq * min.count));
825 
826 		if (!scaled_within(max.freq * min.count,
827 				   min.freq * max.count,
828 				   1, 2)) {
829 			int f;
830 
831 			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
832 			       engine->name,
833 			       max.freq * min.count,
834 			       min.freq * max.count);
835 			show_pcu_config(rps);
836 
837 			for (f = min.freq + 1; f <= rps->max_freq; f++) {
838 				int act = f;
839 				u64 count;
840 
841 				count = measure_frequency_at(rps, cntr, &act);
842 				if (act < f)
843 					break;
844 
845 				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
846 					engine->name,
847 					act, intel_gpu_freq(rps, act), count,
848 					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
849 								     act * min.count));
850 
851 				f = act; /* may skip ahead [pcu granularity] */
852 			}
853 
854 			err = -EINTR; /* ignore error, continue on with test */
855 		}
856 
857 err_vma:
858 		*cancel = MI_BATCH_BUFFER_END;
859 		i915_gem_object_flush_map(vma->obj);
860 		i915_gem_object_unpin_map(vma->obj);
861 		i915_vma_unpin(vma);
862 		i915_vma_unlock(vma);
863 		i915_vma_put(vma);
864 
865 		st_engine_heartbeat_enable(engine);
866 		if (igt_flush_test(gt->i915))
867 			err = -EIO;
868 		if (err)
869 			break;
870 	}
871 
872 	intel_gt_pm_wait_for_idle(gt);
873 	rps->work.func = saved_work;
874 
875 	if (CPU_LATENCY >= 0)
876 		cpu_latency_qos_remove_request(&qos);
877 
878 	return err;
879 }
880 
sleep_for_ei(struct intel_rps * rps,int timeout_us)881 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
882 {
883 	/* Flush any previous EI */
884 	usleep_range(timeout_us, 2 * timeout_us);
885 
886 	/* Reset the interrupt status */
887 	rps_disable_interrupts(rps);
888 	GEM_BUG_ON(rps->pm_iir);
889 	rps_enable_interrupts(rps);
890 
891 	/* And then wait for the timeout, for real this time */
892 	usleep_range(2 * timeout_us, 3 * timeout_us);
893 }
894 
__rps_up_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine,struct igt_spinner * spin)895 static int __rps_up_interrupt(struct intel_rps *rps,
896 			      struct intel_engine_cs *engine,
897 			      struct igt_spinner *spin)
898 {
899 	struct intel_uncore *uncore = engine->uncore;
900 	struct i915_request *rq;
901 	u32 timeout;
902 
903 	if (!intel_engine_can_store_dword(engine))
904 		return 0;
905 
906 	rps_set_check(rps, rps->min_freq);
907 
908 	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
909 	if (IS_ERR(rq))
910 		return PTR_ERR(rq);
911 
912 	i915_request_get(rq);
913 	i915_request_add(rq);
914 
915 	if (!igt_wait_for_spinner(spin, rq)) {
916 		pr_err("%s: RPS spinner did not start\n",
917 		       engine->name);
918 		i915_request_put(rq);
919 		intel_gt_set_wedged(engine->gt);
920 		return -EIO;
921 	}
922 
923 	if (!intel_rps_is_active(rps)) {
924 		pr_err("%s: RPS not enabled on starting spinner\n",
925 		       engine->name);
926 		igt_spinner_end(spin);
927 		i915_request_put(rq);
928 		return -EINVAL;
929 	}
930 
931 	if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
932 		pr_err("%s: RPS did not register UP interrupt\n",
933 		       engine->name);
934 		i915_request_put(rq);
935 		return -EINVAL;
936 	}
937 
938 	if (rps->last_freq != rps->min_freq) {
939 		pr_err("%s: RPS did not program min frequency\n",
940 		       engine->name);
941 		i915_request_put(rq);
942 		return -EINVAL;
943 	}
944 
945 	timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
946 	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
947 	timeout = DIV_ROUND_UP(timeout, 1000);
948 
949 	sleep_for_ei(rps, timeout);
950 	GEM_BUG_ON(i915_request_completed(rq));
951 
952 	igt_spinner_end(spin);
953 	i915_request_put(rq);
954 
955 	if (rps->cur_freq != rps->min_freq) {
956 		pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
957 		       engine->name, intel_rps_read_actual_frequency(rps));
958 		return -EINVAL;
959 	}
960 
961 	if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
962 		pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
963 		       engine->name, rps->pm_iir,
964 		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
965 		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
966 		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
967 		return -EINVAL;
968 	}
969 
970 	return 0;
971 }
972 
__rps_down_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine)973 static int __rps_down_interrupt(struct intel_rps *rps,
974 				struct intel_engine_cs *engine)
975 {
976 	struct intel_uncore *uncore = engine->uncore;
977 	u32 timeout;
978 
979 	rps_set_check(rps, rps->max_freq);
980 
981 	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
982 		pr_err("%s: RPS did not register DOWN interrupt\n",
983 		       engine->name);
984 		return -EINVAL;
985 	}
986 
987 	if (rps->last_freq != rps->max_freq) {
988 		pr_err("%s: RPS did not program max frequency\n",
989 		       engine->name);
990 		return -EINVAL;
991 	}
992 
993 	timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
994 	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
995 	timeout = DIV_ROUND_UP(timeout, 1000);
996 
997 	sleep_for_ei(rps, timeout);
998 
999 	if (rps->cur_freq != rps->max_freq) {
1000 		pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1001 		       engine->name,
1002 		       intel_rps_read_actual_frequency(rps));
1003 		return -EINVAL;
1004 	}
1005 
1006 	if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1007 		pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1008 		       engine->name, rps->pm_iir,
1009 		       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1010 		       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1011 		       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1012 		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1013 		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1014 		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1015 		return -EINVAL;
1016 	}
1017 
1018 	return 0;
1019 }
1020 
live_rps_interrupt(void * arg)1021 int live_rps_interrupt(void *arg)
1022 {
1023 	struct intel_gt *gt = arg;
1024 	struct intel_rps *rps = &gt->rps;
1025 	void (*saved_work)(struct work_struct *wrk);
1026 	struct intel_engine_cs *engine;
1027 	enum intel_engine_id id;
1028 	struct igt_spinner spin;
1029 	intel_wakeref_t wakeref;
1030 	u32 pm_events;
1031 	int err = 0;
1032 
1033 	/*
1034 	 * First, let's check whether or not we are receiving interrupts.
1035 	 */
1036 
1037 	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1038 		return 0;
1039 
1040 	pm_events = 0;
1041 	with_intel_gt_pm(gt, wakeref)
1042 		pm_events = rps->pm_events;
1043 	if (!pm_events) {
1044 		pr_err("No RPS PM events registered, but RPS is enabled?\n");
1045 		return -ENODEV;
1046 	}
1047 
1048 	if (igt_spinner_init(&spin, gt))
1049 		return -ENOMEM;
1050 
1051 	intel_gt_pm_wait_for_idle(gt);
1052 	saved_work = rps->work.func;
1053 	rps->work.func = dummy_rps_work;
1054 
1055 	for_each_engine(engine, gt, id) {
1056 		/* Keep the engine busy with a spinner; expect an UP! */
1057 		if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1058 			intel_gt_pm_wait_for_idle(engine->gt);
1059 			GEM_BUG_ON(intel_rps_is_active(rps));
1060 
1061 			st_engine_heartbeat_disable(engine);
1062 
1063 			err = __rps_up_interrupt(rps, engine, &spin);
1064 
1065 			st_engine_heartbeat_enable(engine);
1066 			if (err)
1067 				goto out;
1068 
1069 			intel_gt_pm_wait_for_idle(engine->gt);
1070 		}
1071 
1072 		/* Keep the engine awake but idle and check for DOWN */
1073 		if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1074 			st_engine_heartbeat_disable(engine);
1075 			intel_rc6_disable(&gt->rc6);
1076 
1077 			err = __rps_down_interrupt(rps, engine);
1078 
1079 			intel_rc6_enable(&gt->rc6);
1080 			st_engine_heartbeat_enable(engine);
1081 			if (err)
1082 				goto out;
1083 		}
1084 	}
1085 
1086 out:
1087 	if (igt_flush_test(gt->i915))
1088 		err = -EIO;
1089 
1090 	igt_spinner_fini(&spin);
1091 
1092 	intel_gt_pm_wait_for_idle(gt);
1093 	rps->work.func = saved_work;
1094 
1095 	return err;
1096 }
1097 
__measure_power(int duration_ms)1098 static u64 __measure_power(int duration_ms)
1099 {
1100 	u64 dE, dt;
1101 
1102 	dE = librapl_energy_uJ();
1103 	dt = ktime_get();
1104 	usleep_range(1000 * duration_ms, 2000 * duration_ms);
1105 	dE = librapl_energy_uJ() - dE;
1106 	dt = ktime_get() - dt;
1107 
1108 	return div64_u64(1000 * 1000 * dE, dt);
1109 }
1110 
measure_power(struct intel_rps * rps,int * freq)1111 static u64 measure_power(struct intel_rps *rps, int *freq)
1112 {
1113 	u64 x[5];
1114 	int i;
1115 
1116 	for (i = 0; i < 5; i++)
1117 		x[i] = __measure_power(5);
1118 
1119 	*freq = (*freq + read_cagf(rps)) / 2;
1120 
1121 	/* A simple triangle filter for better result stability */
1122 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
1123 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
1124 }
1125 
measure_power_at(struct intel_rps * rps,int * freq)1126 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1127 {
1128 	*freq = rps_set_check(rps, *freq);
1129 	msleep(100);
1130 	return measure_power(rps, freq);
1131 }
1132 
live_rps_power(void * arg)1133 int live_rps_power(void *arg)
1134 {
1135 	struct intel_gt *gt = arg;
1136 	struct intel_rps *rps = &gt->rps;
1137 	void (*saved_work)(struct work_struct *wrk);
1138 	struct intel_engine_cs *engine;
1139 	enum intel_engine_id id;
1140 	struct igt_spinner spin;
1141 	int err = 0;
1142 
1143 	/*
1144 	 * Our fundamental assumption is that running at lower frequency
1145 	 * actually saves power. Let's see if our RAPL measurement support
1146 	 * that theory.
1147 	 */
1148 
1149 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1150 		return 0;
1151 
1152 	if (!librapl_supported(gt->i915))
1153 		return 0;
1154 
1155 	if (igt_spinner_init(&spin, gt))
1156 		return -ENOMEM;
1157 
1158 	intel_gt_pm_wait_for_idle(gt);
1159 	saved_work = rps->work.func;
1160 	rps->work.func = dummy_rps_work;
1161 
1162 	for_each_engine(engine, gt, id) {
1163 		struct i915_request *rq;
1164 		struct {
1165 			u64 power;
1166 			int freq;
1167 		} min, max;
1168 
1169 		if (!intel_engine_can_store_dword(engine))
1170 			continue;
1171 
1172 		st_engine_heartbeat_disable(engine);
1173 
1174 		rq = igt_spinner_create_request(&spin,
1175 						engine->kernel_context,
1176 						MI_NOOP);
1177 		if (IS_ERR(rq)) {
1178 			st_engine_heartbeat_enable(engine);
1179 			err = PTR_ERR(rq);
1180 			break;
1181 		}
1182 
1183 		i915_request_add(rq);
1184 
1185 		if (!igt_wait_for_spinner(&spin, rq)) {
1186 			pr_err("%s: RPS spinner did not start\n",
1187 			       engine->name);
1188 			igt_spinner_end(&spin);
1189 			st_engine_heartbeat_enable(engine);
1190 			intel_gt_set_wedged(engine->gt);
1191 			err = -EIO;
1192 			break;
1193 		}
1194 
1195 		max.freq = rps->max_freq;
1196 		max.power = measure_power_at(rps, &max.freq);
1197 
1198 		min.freq = rps->min_freq;
1199 		min.power = measure_power_at(rps, &min.freq);
1200 
1201 		igt_spinner_end(&spin);
1202 		st_engine_heartbeat_enable(engine);
1203 
1204 		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1205 			engine->name,
1206 			min.power, intel_gpu_freq(rps, min.freq),
1207 			max.power, intel_gpu_freq(rps, max.freq));
1208 
1209 		if (10 * min.freq >= 9 * max.freq) {
1210 			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1211 				  min.freq, intel_gpu_freq(rps, min.freq),
1212 				  max.freq, intel_gpu_freq(rps, max.freq));
1213 			continue;
1214 		}
1215 
1216 		if (11 * min.power > 10 * max.power) {
1217 			pr_err("%s: did not conserve power when setting lower frequency!\n",
1218 			       engine->name);
1219 			err = -EINVAL;
1220 			break;
1221 		}
1222 
1223 		if (igt_flush_test(gt->i915)) {
1224 			err = -EIO;
1225 			break;
1226 		}
1227 	}
1228 
1229 	igt_spinner_fini(&spin);
1230 
1231 	intel_gt_pm_wait_for_idle(gt);
1232 	rps->work.func = saved_work;
1233 
1234 	return err;
1235 }
1236 
live_rps_dynamic(void * arg)1237 int live_rps_dynamic(void *arg)
1238 {
1239 	struct intel_gt *gt = arg;
1240 	struct intel_rps *rps = &gt->rps;
1241 	struct intel_engine_cs *engine;
1242 	enum intel_engine_id id;
1243 	struct igt_spinner spin;
1244 	int err = 0;
1245 
1246 	/*
1247 	 * We've looked at the bascs, and have established that we
1248 	 * can change the clock frequency and that the HW will generate
1249 	 * interrupts based on load. Now we check how we integrate those
1250 	 * moving parts into dynamic reclocking based on load.
1251 	 */
1252 
1253 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1254 		return 0;
1255 
1256 	if (igt_spinner_init(&spin, gt))
1257 		return -ENOMEM;
1258 
1259 	if (intel_rps_has_interrupts(rps))
1260 		pr_info("RPS has interrupt support\n");
1261 	if (intel_rps_uses_timer(rps))
1262 		pr_info("RPS has timer support\n");
1263 
1264 	for_each_engine(engine, gt, id) {
1265 		struct i915_request *rq;
1266 		struct {
1267 			ktime_t dt;
1268 			u8 freq;
1269 		} min, max;
1270 
1271 		if (!intel_engine_can_store_dword(engine))
1272 			continue;
1273 
1274 		intel_gt_pm_wait_for_idle(gt);
1275 		GEM_BUG_ON(intel_rps_is_active(rps));
1276 		rps->cur_freq = rps->min_freq;
1277 
1278 		intel_engine_pm_get(engine);
1279 		intel_rc6_disable(&gt->rc6);
1280 		GEM_BUG_ON(rps->last_freq != rps->min_freq);
1281 
1282 		rq = igt_spinner_create_request(&spin,
1283 						engine->kernel_context,
1284 						MI_NOOP);
1285 		if (IS_ERR(rq)) {
1286 			err = PTR_ERR(rq);
1287 			goto err;
1288 		}
1289 
1290 		i915_request_add(rq);
1291 
1292 		max.dt = ktime_get();
1293 		max.freq = wait_for_freq(rps, rps->max_freq, 500);
1294 		max.dt = ktime_sub(ktime_get(), max.dt);
1295 
1296 		igt_spinner_end(&spin);
1297 
1298 		min.dt = ktime_get();
1299 		min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1300 		min.dt = ktime_sub(ktime_get(), min.dt);
1301 
1302 		pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1303 			engine->name,
1304 			max.freq, intel_gpu_freq(rps, max.freq),
1305 			ktime_to_ns(max.dt),
1306 			min.freq, intel_gpu_freq(rps, min.freq),
1307 			ktime_to_ns(min.dt));
1308 		if (min.freq >= max.freq) {
1309 			pr_err("%s: dynamic reclocking of spinner failed\n!",
1310 			       engine->name);
1311 			err = -EINVAL;
1312 		}
1313 
1314 err:
1315 		intel_rc6_enable(&gt->rc6);
1316 		intel_engine_pm_put(engine);
1317 
1318 		if (igt_flush_test(gt->i915))
1319 			err = -EIO;
1320 		if (err)
1321 			break;
1322 	}
1323 
1324 	igt_spinner_fini(&spin);
1325 
1326 	return err;
1327 }
1328