1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2020 Intel Corporation
4 */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "gem/i915_gem_internal.h"
10
11 #include "i915_reg.h"
12 #include "intel_engine_heartbeat.h"
13 #include "intel_engine_pm.h"
14 #include "intel_engine_regs.h"
15 #include "intel_gpu_commands.h"
16 #include "intel_gt_clock_utils.h"
17 #include "intel_gt_pm.h"
18 #include "intel_rc6.h"
19 #include "selftest_engine_heartbeat.h"
20 #include "selftest_rps.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_spinner.h"
23 #include "selftests/librapl.h"
24
25 /* Try to isolate the impact of cstates from determining frequency response */
26 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
27
dummy_rps_work(struct work_struct * wrk)28 static void dummy_rps_work(struct work_struct *wrk)
29 {
30 }
31
cmp_u64(const void * A,const void * B)32 static int cmp_u64(const void *A, const void *B)
33 {
34 const u64 *a = A, *b = B;
35
36 if (*a < *b)
37 return -1;
38 else if (*a > *b)
39 return 1;
40 else
41 return 0;
42 }
43
cmp_u32(const void * A,const void * B)44 static int cmp_u32(const void *A, const void *B)
45 {
46 const u32 *a = A, *b = B;
47
48 if (*a < *b)
49 return -1;
50 else if (*a > *b)
51 return 1;
52 else
53 return 0;
54 }
55
56 static struct i915_vma *
create_spin_counter(struct intel_engine_cs * engine,struct i915_address_space * vm,bool srm,u32 ** cancel,u32 ** counter)57 create_spin_counter(struct intel_engine_cs *engine,
58 struct i915_address_space *vm,
59 bool srm,
60 u32 **cancel,
61 u32 **counter)
62 {
63 enum {
64 COUNT,
65 INC,
66 __NGPR__,
67 };
68 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
69 struct drm_i915_gem_object *obj;
70 struct i915_vma *vma;
71 unsigned long end;
72 u32 *base, *cs;
73 int loop, i;
74 int err;
75
76 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
77 if (IS_ERR(obj))
78 return ERR_CAST(obj);
79
80 end = obj->base.size / sizeof(u32) - 1;
81
82 vma = i915_vma_instance(obj, vm, NULL);
83 if (IS_ERR(vma)) {
84 err = PTR_ERR(vma);
85 goto err_put;
86 }
87
88 err = i915_vma_pin(vma, 0, 0, PIN_USER);
89 if (err)
90 goto err_unlock;
91
92 i915_vma_lock(vma);
93
94 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
95 if (IS_ERR(base)) {
96 err = PTR_ERR(base);
97 goto err_unpin;
98 }
99 cs = base;
100
101 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
102 for (i = 0; i < __NGPR__; i++) {
103 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
104 *cs++ = 0;
105 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
106 *cs++ = 0;
107 }
108
109 *cs++ = MI_LOAD_REGISTER_IMM(1);
110 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
111 *cs++ = 1;
112
113 loop = cs - base;
114
115 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
116 for (i = 0; i < 1024; i++) {
117 *cs++ = MI_MATH(4);
118 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
119 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
120 *cs++ = MI_MATH_ADD;
121 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
122
123 if (srm) {
124 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
125 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
126 *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
127 *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
128 }
129 }
130
131 *cs++ = MI_BATCH_BUFFER_START_GEN8;
132 *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
133 *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
134 GEM_BUG_ON(cs - base > end);
135
136 i915_gem_object_flush_map(obj);
137
138 *cancel = base + loop;
139 *counter = srm ? memset32(base + end, 0, 1) : NULL;
140 return vma;
141
142 err_unpin:
143 i915_vma_unpin(vma);
144 err_unlock:
145 i915_vma_unlock(vma);
146 err_put:
147 i915_gem_object_put(obj);
148 return ERR_PTR(err);
149 }
150
wait_for_freq(struct intel_rps * rps,u8 freq,int timeout_ms)151 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
152 {
153 u8 history[64], i;
154 unsigned long end;
155 int sleep;
156
157 i = 0;
158 memset(history, freq, sizeof(history));
159 sleep = 20;
160
161 /* The PCU does not change instantly, but drifts towards the goal? */
162 end = jiffies + msecs_to_jiffies(timeout_ms);
163 do {
164 u8 act;
165
166 act = read_cagf(rps);
167 if (time_after(jiffies, end))
168 return act;
169
170 /* Target acquired */
171 if (act == freq)
172 return act;
173
174 /* Any change within the last N samples? */
175 if (!memchr_inv(history, act, sizeof(history)))
176 return act;
177
178 history[i] = act;
179 i = (i + 1) % ARRAY_SIZE(history);
180
181 usleep_range(sleep, 2 * sleep);
182 sleep *= 2;
183 if (sleep > timeout_ms * 20)
184 sleep = timeout_ms * 20;
185 } while (1);
186 }
187
rps_set_check(struct intel_rps * rps,u8 freq)188 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
189 {
190 mutex_lock(&rps->lock);
191 GEM_BUG_ON(!intel_rps_is_active(rps));
192 if (wait_for(!intel_rps_set(rps, freq), 50)) {
193 mutex_unlock(&rps->lock);
194 return 0;
195 }
196 GEM_BUG_ON(rps->last_freq != freq);
197 mutex_unlock(&rps->lock);
198
199 return wait_for_freq(rps, freq, 50);
200 }
201
show_pstate_limits(struct intel_rps * rps)202 static void show_pstate_limits(struct intel_rps *rps)
203 {
204 struct drm_i915_private *i915 = rps_to_i915(rps);
205
206 if (IS_BROXTON(i915)) {
207 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209 intel_uncore_read(rps_to_uncore(rps),
210 BXT_RP_STATE_CAP));
211 } else if (GRAPHICS_VER(i915) == 9) {
212 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214 intel_uncore_read(rps_to_uncore(rps),
215 GEN9_RP_STATE_LIMITS));
216 }
217 }
218
live_rps_clock_interval(void * arg)219 int live_rps_clock_interval(void *arg)
220 {
221 struct intel_gt *gt = arg;
222 struct intel_rps *rps = >->rps;
223 void (*saved_work)(struct work_struct *wrk);
224 struct intel_engine_cs *engine;
225 enum intel_engine_id id;
226 struct igt_spinner spin;
227 intel_wakeref_t wakeref;
228 int err = 0;
229
230 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
231 return 0;
232
233 if (igt_spinner_init(&spin, gt))
234 return -ENOMEM;
235
236 intel_gt_pm_wait_for_idle(gt);
237 saved_work = rps->work.func;
238 rps->work.func = dummy_rps_work;
239
240 wakeref = intel_gt_pm_get(gt);
241 intel_rps_disable(>->rps);
242
243 intel_gt_check_clock_frequency(gt);
244
245 for_each_engine(engine, gt, id) {
246 struct i915_request *rq;
247 u32 cycles;
248 u64 dt;
249
250 if (!intel_engine_can_store_dword(engine))
251 continue;
252
253 st_engine_heartbeat_disable(engine);
254
255 rq = igt_spinner_create_request(&spin,
256 engine->kernel_context,
257 MI_NOOP);
258 if (IS_ERR(rq)) {
259 st_engine_heartbeat_enable(engine);
260 err = PTR_ERR(rq);
261 break;
262 }
263
264 i915_request_add(rq);
265
266 if (!igt_wait_for_spinner(&spin, rq)) {
267 pr_err("%s: RPS spinner did not start\n",
268 engine->name);
269 igt_spinner_end(&spin);
270 st_engine_heartbeat_enable(engine);
271 intel_gt_set_wedged(engine->gt);
272 err = -EIO;
273 break;
274 }
275
276 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
277
278 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
279
280 /* Set the evaluation interval to infinity! */
281 intel_uncore_write_fw(gt->uncore,
282 GEN6_RP_UP_EI, 0xffffffff);
283 intel_uncore_write_fw(gt->uncore,
284 GEN6_RP_UP_THRESHOLD, 0xffffffff);
285
286 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
287 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
288
289 if (wait_for(intel_uncore_read_fw(gt->uncore,
290 GEN6_RP_CUR_UP_EI),
291 10)) {
292 /* Just skip the test; assume lack of HW support */
293 pr_notice("%s: rps evaluation interval not ticking\n",
294 engine->name);
295 err = -ENODEV;
296 } else {
297 ktime_t dt_[5];
298 u32 cycles_[5];
299 int i;
300
301 for (i = 0; i < 5; i++) {
302 preempt_disable();
303
304 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
305 dt_[i] = ktime_get();
306
307 udelay(1000);
308
309 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
310 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
311
312 preempt_enable();
313 }
314
315 /* Use the median of both cycle/dt; close enough */
316 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
317 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
318 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
319 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
320 }
321
322 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
323 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
324
325 igt_spinner_end(&spin);
326 st_engine_heartbeat_enable(engine);
327
328 if (err == 0) {
329 u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
330 u32 expected =
331 intel_gt_ns_to_pm_interval(gt, dt);
332
333 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
334 engine->name, cycles, time, dt, expected,
335 gt->clock_frequency / 1000);
336
337 if (10 * time < 8 * dt ||
338 8 * time > 10 * dt) {
339 pr_err("%s: rps clock time does not match walltime!\n",
340 engine->name);
341 err = -EINVAL;
342 }
343
344 if (10 * expected < 8 * cycles ||
345 8 * expected > 10 * cycles) {
346 pr_err("%s: walltime does not match rps clock ticks!\n",
347 engine->name);
348 err = -EINVAL;
349 }
350 }
351
352 if (igt_flush_test(gt->i915))
353 err = -EIO;
354
355 break; /* once is enough */
356 }
357
358 intel_rps_enable(>->rps);
359 intel_gt_pm_put(gt, wakeref);
360
361 igt_spinner_fini(&spin);
362
363 intel_gt_pm_wait_for_idle(gt);
364 rps->work.func = saved_work;
365
366 if (err == -ENODEV) /* skipped, don't report a fail */
367 err = 0;
368
369 return err;
370 }
371
live_rps_control(void * arg)372 int live_rps_control(void *arg)
373 {
374 struct intel_gt *gt = arg;
375 struct intel_rps *rps = >->rps;
376 void (*saved_work)(struct work_struct *wrk);
377 struct intel_engine_cs *engine;
378 enum intel_engine_id id;
379 struct igt_spinner spin;
380 intel_wakeref_t wakeref;
381 int err = 0;
382
383 /*
384 * Check that the actual frequency matches our requested frequency,
385 * to verify our control mechanism. We have to be careful that the
386 * PCU may throttle the GPU in which case the actual frequency used
387 * will be lowered than requested.
388 */
389
390 if (!intel_rps_is_enabled(rps))
391 return 0;
392
393 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
394 return 0;
395
396 if (igt_spinner_init(&spin, gt))
397 return -ENOMEM;
398
399 intel_gt_pm_wait_for_idle(gt);
400 saved_work = rps->work.func;
401 rps->work.func = dummy_rps_work;
402
403 wakeref = intel_gt_pm_get(gt);
404 for_each_engine(engine, gt, id) {
405 struct i915_request *rq;
406 ktime_t min_dt, max_dt;
407 int f, limit;
408 int min, max;
409
410 if (!intel_engine_can_store_dword(engine))
411 continue;
412
413 st_engine_heartbeat_disable(engine);
414
415 rq = igt_spinner_create_request(&spin,
416 engine->kernel_context,
417 MI_NOOP);
418 if (IS_ERR(rq)) {
419 err = PTR_ERR(rq);
420 break;
421 }
422
423 i915_request_add(rq);
424
425 if (!igt_wait_for_spinner(&spin, rq)) {
426 pr_err("%s: RPS spinner did not start\n",
427 engine->name);
428 igt_spinner_end(&spin);
429 st_engine_heartbeat_enable(engine);
430 intel_gt_set_wedged(engine->gt);
431 err = -EIO;
432 break;
433 }
434
435 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
436 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
437 engine->name, rps->min_freq, read_cagf(rps));
438 igt_spinner_end(&spin);
439 st_engine_heartbeat_enable(engine);
440 show_pstate_limits(rps);
441 err = -EINVAL;
442 break;
443 }
444
445 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
446 if (rps_set_check(rps, f) < f)
447 break;
448 }
449
450 limit = rps_set_check(rps, f);
451
452 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
453 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
454 engine->name, rps->min_freq, read_cagf(rps));
455 igt_spinner_end(&spin);
456 st_engine_heartbeat_enable(engine);
457 show_pstate_limits(rps);
458 err = -EINVAL;
459 break;
460 }
461
462 max_dt = ktime_get();
463 max = rps_set_check(rps, limit);
464 max_dt = ktime_sub(ktime_get(), max_dt);
465
466 min_dt = ktime_get();
467 min = rps_set_check(rps, rps->min_freq);
468 min_dt = ktime_sub(ktime_get(), min_dt);
469
470 igt_spinner_end(&spin);
471 st_engine_heartbeat_enable(engine);
472
473 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
474 engine->name,
475 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
476 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
477 limit, intel_gpu_freq(rps, limit),
478 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
479
480 if (limit != rps->max_freq) {
481 u32 throttle = intel_uncore_read(gt->uncore,
482 intel_gt_perf_limit_reasons_reg(gt));
483
484 pr_warn("%s: GPU throttled with reasons 0x%08x\n",
485 engine->name, throttle & GT0_PERF_LIMIT_REASONS_MASK);
486 show_pstate_limits(rps);
487 }
488
489 if (igt_flush_test(gt->i915)) {
490 err = -EIO;
491 break;
492 }
493 }
494 intel_gt_pm_put(gt, wakeref);
495
496 igt_spinner_fini(&spin);
497
498 intel_gt_pm_wait_for_idle(gt);
499 rps->work.func = saved_work;
500
501 return err;
502 }
503
show_pcu_config(struct intel_rps * rps)504 static void show_pcu_config(struct intel_rps *rps)
505 {
506 struct drm_i915_private *i915 = rps_to_i915(rps);
507 unsigned int max_gpu_freq, min_gpu_freq;
508 intel_wakeref_t wakeref;
509 int gpu_freq;
510
511 if (!HAS_LLC(i915))
512 return;
513
514 min_gpu_freq = rps->min_freq;
515 max_gpu_freq = rps->max_freq;
516 if (GRAPHICS_VER(i915) >= 9) {
517 /* Convert GT frequency to 50 HZ units */
518 min_gpu_freq /= GEN9_FREQ_SCALER;
519 max_gpu_freq /= GEN9_FREQ_SCALER;
520 }
521
522 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
523
524 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
525 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
526 int ia_freq = gpu_freq;
527
528 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
529 &ia_freq, NULL);
530
531 pr_info("%5d %5d %5d\n",
532 gpu_freq * 50,
533 ((ia_freq >> 0) & 0xff) * 100,
534 ((ia_freq >> 8) & 0xff) * 100);
535 }
536
537 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
538 }
539
__measure_frequency(u32 * cntr,int duration_ms)540 static u64 __measure_frequency(u32 *cntr, int duration_ms)
541 {
542 u64 dc, dt;
543
544 dc = READ_ONCE(*cntr);
545 dt = ktime_get();
546 usleep_range(1000 * duration_ms, 2000 * duration_ms);
547 dc = READ_ONCE(*cntr) - dc;
548 dt = ktime_get() - dt;
549
550 return div64_u64(1000 * 1000 * dc, dt);
551 }
552
measure_frequency_at(struct intel_rps * rps,u32 * cntr,int * freq)553 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
554 {
555 u64 x[5];
556 int i;
557
558 *freq = rps_set_check(rps, *freq);
559 for (i = 0; i < 5; i++)
560 x[i] = __measure_frequency(cntr, 2);
561 *freq = (*freq + read_cagf(rps)) / 2;
562
563 /* A simple triangle filter for better result stability */
564 sort(x, 5, sizeof(*x), cmp_u64, NULL);
565 return div_u64(x[1] + 2 * x[2] + x[3], 4);
566 }
567
__measure_cs_frequency(struct intel_engine_cs * engine,int duration_ms)568 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
569 int duration_ms)
570 {
571 u64 dc, dt;
572
573 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
574 dt = ktime_get();
575 usleep_range(1000 * duration_ms, 2000 * duration_ms);
576 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
577 dt = ktime_get() - dt;
578
579 return div64_u64(1000 * 1000 * dc, dt);
580 }
581
measure_cs_frequency_at(struct intel_rps * rps,struct intel_engine_cs * engine,int * freq)582 static u64 measure_cs_frequency_at(struct intel_rps *rps,
583 struct intel_engine_cs *engine,
584 int *freq)
585 {
586 u64 x[5];
587 int i;
588
589 *freq = rps_set_check(rps, *freq);
590 for (i = 0; i < 5; i++)
591 x[i] = __measure_cs_frequency(engine, 2);
592 *freq = (*freq + read_cagf(rps)) / 2;
593
594 /* A simple triangle filter for better result stability */
595 sort(x, 5, sizeof(*x), cmp_u64, NULL);
596 return div_u64(x[1] + 2 * x[2] + x[3], 4);
597 }
598
scaled_within(u64 x,u64 y,u32 f_n,u32 f_d)599 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
600 {
601 return f_d * x > f_n * y && f_n * x < f_d * y;
602 }
603
live_rps_frequency_cs(void * arg)604 int live_rps_frequency_cs(void *arg)
605 {
606 void (*saved_work)(struct work_struct *wrk);
607 struct intel_gt *gt = arg;
608 struct intel_rps *rps = >->rps;
609 struct intel_engine_cs *engine;
610 struct pm_qos_request qos;
611 enum intel_engine_id id;
612 int err = 0;
613
614 /*
615 * The premise is that the GPU does change frequency at our behest.
616 * Let's check there is a correspondence between the requested
617 * frequency, the actual frequency, and the observed clock rate.
618 */
619
620 if (!intel_rps_is_enabled(rps))
621 return 0;
622
623 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
624 return 0;
625
626 if (CPU_LATENCY >= 0)
627 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
628
629 intel_gt_pm_wait_for_idle(gt);
630 saved_work = rps->work.func;
631 rps->work.func = dummy_rps_work;
632
633 for_each_engine(engine, gt, id) {
634 struct i915_request *rq;
635 struct i915_vma *vma;
636 u32 *cancel, *cntr;
637 struct {
638 u64 count;
639 int freq;
640 } min, max;
641
642 st_engine_heartbeat_disable(engine);
643
644 vma = create_spin_counter(engine,
645 engine->kernel_context->vm, false,
646 &cancel, &cntr);
647 if (IS_ERR(vma)) {
648 err = PTR_ERR(vma);
649 st_engine_heartbeat_enable(engine);
650 break;
651 }
652
653 rq = intel_engine_create_kernel_request(engine);
654 if (IS_ERR(rq)) {
655 err = PTR_ERR(rq);
656 goto err_vma;
657 }
658
659 err = i915_vma_move_to_active(vma, rq, 0);
660 if (!err)
661 err = rq->engine->emit_bb_start(rq,
662 i915_vma_offset(vma),
663 PAGE_SIZE, 0);
664 i915_request_add(rq);
665 if (err)
666 goto err_vma;
667
668 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
669 10)) {
670 pr_err("%s: timed loop did not start\n",
671 engine->name);
672 goto err_vma;
673 }
674
675 min.freq = rps->min_freq;
676 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
677
678 max.freq = rps->max_freq;
679 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
680
681 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
682 engine->name,
683 min.count, intel_gpu_freq(rps, min.freq),
684 max.count, intel_gpu_freq(rps, max.freq),
685 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
686 max.freq * min.count));
687
688 if (!scaled_within(max.freq * min.count,
689 min.freq * max.count,
690 2, 3)) {
691 int f;
692
693 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
694 engine->name,
695 max.freq * min.count,
696 min.freq * max.count);
697 show_pcu_config(rps);
698
699 for (f = min.freq + 1; f <= rps->max_freq; f++) {
700 int act = f;
701 u64 count;
702
703 count = measure_cs_frequency_at(rps, engine, &act);
704 if (act < f)
705 break;
706
707 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
708 engine->name,
709 act, intel_gpu_freq(rps, act), count,
710 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
711 act * min.count));
712
713 f = act; /* may skip ahead [pcu granularity] */
714 }
715
716 err = -EINTR; /* ignore error, continue on with test */
717 }
718
719 err_vma:
720 *cancel = MI_BATCH_BUFFER_END;
721 i915_gem_object_flush_map(vma->obj);
722 i915_gem_object_unpin_map(vma->obj);
723 i915_vma_unpin(vma);
724 i915_vma_unlock(vma);
725 i915_vma_put(vma);
726
727 st_engine_heartbeat_enable(engine);
728 if (igt_flush_test(gt->i915))
729 err = -EIO;
730 if (err)
731 break;
732 }
733
734 intel_gt_pm_wait_for_idle(gt);
735 rps->work.func = saved_work;
736
737 if (CPU_LATENCY >= 0)
738 cpu_latency_qos_remove_request(&qos);
739
740 return err;
741 }
742
live_rps_frequency_srm(void * arg)743 int live_rps_frequency_srm(void *arg)
744 {
745 void (*saved_work)(struct work_struct *wrk);
746 struct intel_gt *gt = arg;
747 struct intel_rps *rps = >->rps;
748 struct intel_engine_cs *engine;
749 struct pm_qos_request qos;
750 enum intel_engine_id id;
751 int err = 0;
752
753 /*
754 * The premise is that the GPU does change frequency at our behest.
755 * Let's check there is a correspondence between the requested
756 * frequency, the actual frequency, and the observed clock rate.
757 */
758
759 if (!intel_rps_is_enabled(rps))
760 return 0;
761
762 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
763 return 0;
764
765 if (CPU_LATENCY >= 0)
766 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
767
768 intel_gt_pm_wait_for_idle(gt);
769 saved_work = rps->work.func;
770 rps->work.func = dummy_rps_work;
771
772 for_each_engine(engine, gt, id) {
773 struct i915_request *rq;
774 struct i915_vma *vma;
775 u32 *cancel, *cntr;
776 struct {
777 u64 count;
778 int freq;
779 } min, max;
780
781 st_engine_heartbeat_disable(engine);
782
783 vma = create_spin_counter(engine,
784 engine->kernel_context->vm, true,
785 &cancel, &cntr);
786 if (IS_ERR(vma)) {
787 err = PTR_ERR(vma);
788 st_engine_heartbeat_enable(engine);
789 break;
790 }
791
792 rq = intel_engine_create_kernel_request(engine);
793 if (IS_ERR(rq)) {
794 err = PTR_ERR(rq);
795 goto err_vma;
796 }
797
798 err = i915_vma_move_to_active(vma, rq, 0);
799 if (!err)
800 err = rq->engine->emit_bb_start(rq,
801 i915_vma_offset(vma),
802 PAGE_SIZE, 0);
803 i915_request_add(rq);
804 if (err)
805 goto err_vma;
806
807 if (wait_for(READ_ONCE(*cntr), 10)) {
808 pr_err("%s: timed loop did not start\n",
809 engine->name);
810 goto err_vma;
811 }
812
813 min.freq = rps->min_freq;
814 min.count = measure_frequency_at(rps, cntr, &min.freq);
815
816 max.freq = rps->max_freq;
817 max.count = measure_frequency_at(rps, cntr, &max.freq);
818
819 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
820 engine->name,
821 min.count, intel_gpu_freq(rps, min.freq),
822 max.count, intel_gpu_freq(rps, max.freq),
823 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
824 max.freq * min.count));
825
826 if (!scaled_within(max.freq * min.count,
827 min.freq * max.count,
828 1, 2)) {
829 int f;
830
831 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
832 engine->name,
833 max.freq * min.count,
834 min.freq * max.count);
835 show_pcu_config(rps);
836
837 for (f = min.freq + 1; f <= rps->max_freq; f++) {
838 int act = f;
839 u64 count;
840
841 count = measure_frequency_at(rps, cntr, &act);
842 if (act < f)
843 break;
844
845 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
846 engine->name,
847 act, intel_gpu_freq(rps, act), count,
848 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
849 act * min.count));
850
851 f = act; /* may skip ahead [pcu granularity] */
852 }
853
854 err = -EINTR; /* ignore error, continue on with test */
855 }
856
857 err_vma:
858 *cancel = MI_BATCH_BUFFER_END;
859 i915_gem_object_flush_map(vma->obj);
860 i915_gem_object_unpin_map(vma->obj);
861 i915_vma_unpin(vma);
862 i915_vma_unlock(vma);
863 i915_vma_put(vma);
864
865 st_engine_heartbeat_enable(engine);
866 if (igt_flush_test(gt->i915))
867 err = -EIO;
868 if (err)
869 break;
870 }
871
872 intel_gt_pm_wait_for_idle(gt);
873 rps->work.func = saved_work;
874
875 if (CPU_LATENCY >= 0)
876 cpu_latency_qos_remove_request(&qos);
877
878 return err;
879 }
880
sleep_for_ei(struct intel_rps * rps,int timeout_us)881 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
882 {
883 /* Flush any previous EI */
884 usleep_range(timeout_us, 2 * timeout_us);
885
886 /* Reset the interrupt status */
887 rps_disable_interrupts(rps);
888 GEM_BUG_ON(rps->pm_iir);
889 rps_enable_interrupts(rps);
890
891 /* And then wait for the timeout, for real this time */
892 usleep_range(2 * timeout_us, 3 * timeout_us);
893 }
894
__rps_up_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine,struct igt_spinner * spin)895 static int __rps_up_interrupt(struct intel_rps *rps,
896 struct intel_engine_cs *engine,
897 struct igt_spinner *spin)
898 {
899 struct intel_uncore *uncore = engine->uncore;
900 struct i915_request *rq;
901 u32 timeout;
902
903 if (!intel_engine_can_store_dword(engine))
904 return 0;
905
906 rps_set_check(rps, rps->min_freq);
907
908 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
909 if (IS_ERR(rq))
910 return PTR_ERR(rq);
911
912 i915_request_get(rq);
913 i915_request_add(rq);
914
915 if (!igt_wait_for_spinner(spin, rq)) {
916 pr_err("%s: RPS spinner did not start\n",
917 engine->name);
918 i915_request_put(rq);
919 intel_gt_set_wedged(engine->gt);
920 return -EIO;
921 }
922
923 if (!intel_rps_is_active(rps)) {
924 pr_err("%s: RPS not enabled on starting spinner\n",
925 engine->name);
926 igt_spinner_end(spin);
927 i915_request_put(rq);
928 return -EINVAL;
929 }
930
931 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
932 pr_err("%s: RPS did not register UP interrupt\n",
933 engine->name);
934 i915_request_put(rq);
935 return -EINVAL;
936 }
937
938 if (rps->last_freq != rps->min_freq) {
939 pr_err("%s: RPS did not program min frequency\n",
940 engine->name);
941 i915_request_put(rq);
942 return -EINVAL;
943 }
944
945 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
946 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
947 timeout = DIV_ROUND_UP(timeout, 1000);
948
949 sleep_for_ei(rps, timeout);
950 GEM_BUG_ON(i915_request_completed(rq));
951
952 igt_spinner_end(spin);
953 i915_request_put(rq);
954
955 if (rps->cur_freq != rps->min_freq) {
956 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
957 engine->name, intel_rps_read_actual_frequency(rps));
958 return -EINVAL;
959 }
960
961 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
962 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
963 engine->name, rps->pm_iir,
964 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
965 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
966 intel_uncore_read(uncore, GEN6_RP_UP_EI));
967 return -EINVAL;
968 }
969
970 return 0;
971 }
972
__rps_down_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine)973 static int __rps_down_interrupt(struct intel_rps *rps,
974 struct intel_engine_cs *engine)
975 {
976 struct intel_uncore *uncore = engine->uncore;
977 u32 timeout;
978
979 rps_set_check(rps, rps->max_freq);
980
981 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
982 pr_err("%s: RPS did not register DOWN interrupt\n",
983 engine->name);
984 return -EINVAL;
985 }
986
987 if (rps->last_freq != rps->max_freq) {
988 pr_err("%s: RPS did not program max frequency\n",
989 engine->name);
990 return -EINVAL;
991 }
992
993 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
994 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
995 timeout = DIV_ROUND_UP(timeout, 1000);
996
997 sleep_for_ei(rps, timeout);
998
999 if (rps->cur_freq != rps->max_freq) {
1000 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1001 engine->name,
1002 intel_rps_read_actual_frequency(rps));
1003 return -EINVAL;
1004 }
1005
1006 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1007 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1008 engine->name, rps->pm_iir,
1009 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1010 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1011 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1012 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1013 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1014 intel_uncore_read(uncore, GEN6_RP_UP_EI));
1015 return -EINVAL;
1016 }
1017
1018 return 0;
1019 }
1020
live_rps_interrupt(void * arg)1021 int live_rps_interrupt(void *arg)
1022 {
1023 struct intel_gt *gt = arg;
1024 struct intel_rps *rps = >->rps;
1025 void (*saved_work)(struct work_struct *wrk);
1026 struct intel_engine_cs *engine;
1027 enum intel_engine_id id;
1028 struct igt_spinner spin;
1029 intel_wakeref_t wakeref;
1030 u32 pm_events;
1031 int err = 0;
1032
1033 /*
1034 * First, let's check whether or not we are receiving interrupts.
1035 */
1036
1037 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1038 return 0;
1039
1040 pm_events = 0;
1041 with_intel_gt_pm(gt, wakeref)
1042 pm_events = rps->pm_events;
1043 if (!pm_events) {
1044 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1045 return -ENODEV;
1046 }
1047
1048 if (igt_spinner_init(&spin, gt))
1049 return -ENOMEM;
1050
1051 intel_gt_pm_wait_for_idle(gt);
1052 saved_work = rps->work.func;
1053 rps->work.func = dummy_rps_work;
1054
1055 for_each_engine(engine, gt, id) {
1056 /* Keep the engine busy with a spinner; expect an UP! */
1057 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1058 intel_gt_pm_wait_for_idle(engine->gt);
1059 GEM_BUG_ON(intel_rps_is_active(rps));
1060
1061 st_engine_heartbeat_disable(engine);
1062
1063 err = __rps_up_interrupt(rps, engine, &spin);
1064
1065 st_engine_heartbeat_enable(engine);
1066 if (err)
1067 goto out;
1068
1069 intel_gt_pm_wait_for_idle(engine->gt);
1070 }
1071
1072 /* Keep the engine awake but idle and check for DOWN */
1073 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1074 st_engine_heartbeat_disable(engine);
1075 intel_rc6_disable(>->rc6);
1076
1077 err = __rps_down_interrupt(rps, engine);
1078
1079 intel_rc6_enable(>->rc6);
1080 st_engine_heartbeat_enable(engine);
1081 if (err)
1082 goto out;
1083 }
1084 }
1085
1086 out:
1087 if (igt_flush_test(gt->i915))
1088 err = -EIO;
1089
1090 igt_spinner_fini(&spin);
1091
1092 intel_gt_pm_wait_for_idle(gt);
1093 rps->work.func = saved_work;
1094
1095 return err;
1096 }
1097
__measure_power(int duration_ms)1098 static u64 __measure_power(int duration_ms)
1099 {
1100 u64 dE, dt;
1101
1102 dE = librapl_energy_uJ();
1103 dt = ktime_get();
1104 usleep_range(1000 * duration_ms, 2000 * duration_ms);
1105 dE = librapl_energy_uJ() - dE;
1106 dt = ktime_get() - dt;
1107
1108 return div64_u64(1000 * 1000 * dE, dt);
1109 }
1110
measure_power(struct intel_rps * rps,int * freq)1111 static u64 measure_power(struct intel_rps *rps, int *freq)
1112 {
1113 u64 x[5];
1114 int i;
1115
1116 for (i = 0; i < 5; i++)
1117 x[i] = __measure_power(5);
1118
1119 *freq = (*freq + read_cagf(rps)) / 2;
1120
1121 /* A simple triangle filter for better result stability */
1122 sort(x, 5, sizeof(*x), cmp_u64, NULL);
1123 return div_u64(x[1] + 2 * x[2] + x[3], 4);
1124 }
1125
measure_power_at(struct intel_rps * rps,int * freq)1126 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1127 {
1128 *freq = rps_set_check(rps, *freq);
1129 msleep(100);
1130 return measure_power(rps, freq);
1131 }
1132
live_rps_power(void * arg)1133 int live_rps_power(void *arg)
1134 {
1135 struct intel_gt *gt = arg;
1136 struct intel_rps *rps = >->rps;
1137 void (*saved_work)(struct work_struct *wrk);
1138 struct intel_engine_cs *engine;
1139 enum intel_engine_id id;
1140 struct igt_spinner spin;
1141 int err = 0;
1142
1143 /*
1144 * Our fundamental assumption is that running at lower frequency
1145 * actually saves power. Let's see if our RAPL measurement support
1146 * that theory.
1147 */
1148
1149 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1150 return 0;
1151
1152 if (!librapl_supported(gt->i915))
1153 return 0;
1154
1155 if (igt_spinner_init(&spin, gt))
1156 return -ENOMEM;
1157
1158 intel_gt_pm_wait_for_idle(gt);
1159 saved_work = rps->work.func;
1160 rps->work.func = dummy_rps_work;
1161
1162 for_each_engine(engine, gt, id) {
1163 struct i915_request *rq;
1164 struct {
1165 u64 power;
1166 int freq;
1167 } min, max;
1168
1169 if (!intel_engine_can_store_dword(engine))
1170 continue;
1171
1172 st_engine_heartbeat_disable(engine);
1173
1174 rq = igt_spinner_create_request(&spin,
1175 engine->kernel_context,
1176 MI_NOOP);
1177 if (IS_ERR(rq)) {
1178 st_engine_heartbeat_enable(engine);
1179 err = PTR_ERR(rq);
1180 break;
1181 }
1182
1183 i915_request_add(rq);
1184
1185 if (!igt_wait_for_spinner(&spin, rq)) {
1186 pr_err("%s: RPS spinner did not start\n",
1187 engine->name);
1188 igt_spinner_end(&spin);
1189 st_engine_heartbeat_enable(engine);
1190 intel_gt_set_wedged(engine->gt);
1191 err = -EIO;
1192 break;
1193 }
1194
1195 max.freq = rps->max_freq;
1196 max.power = measure_power_at(rps, &max.freq);
1197
1198 min.freq = rps->min_freq;
1199 min.power = measure_power_at(rps, &min.freq);
1200
1201 igt_spinner_end(&spin);
1202 st_engine_heartbeat_enable(engine);
1203
1204 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1205 engine->name,
1206 min.power, intel_gpu_freq(rps, min.freq),
1207 max.power, intel_gpu_freq(rps, max.freq));
1208
1209 if (10 * min.freq >= 9 * max.freq) {
1210 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1211 min.freq, intel_gpu_freq(rps, min.freq),
1212 max.freq, intel_gpu_freq(rps, max.freq));
1213 continue;
1214 }
1215
1216 if (11 * min.power > 10 * max.power) {
1217 pr_err("%s: did not conserve power when setting lower frequency!\n",
1218 engine->name);
1219 err = -EINVAL;
1220 break;
1221 }
1222
1223 if (igt_flush_test(gt->i915)) {
1224 err = -EIO;
1225 break;
1226 }
1227 }
1228
1229 igt_spinner_fini(&spin);
1230
1231 intel_gt_pm_wait_for_idle(gt);
1232 rps->work.func = saved_work;
1233
1234 return err;
1235 }
1236
live_rps_dynamic(void * arg)1237 int live_rps_dynamic(void *arg)
1238 {
1239 struct intel_gt *gt = arg;
1240 struct intel_rps *rps = >->rps;
1241 struct intel_engine_cs *engine;
1242 enum intel_engine_id id;
1243 struct igt_spinner spin;
1244 int err = 0;
1245
1246 /*
1247 * We've looked at the bascs, and have established that we
1248 * can change the clock frequency and that the HW will generate
1249 * interrupts based on load. Now we check how we integrate those
1250 * moving parts into dynamic reclocking based on load.
1251 */
1252
1253 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1254 return 0;
1255
1256 if (igt_spinner_init(&spin, gt))
1257 return -ENOMEM;
1258
1259 if (intel_rps_has_interrupts(rps))
1260 pr_info("RPS has interrupt support\n");
1261 if (intel_rps_uses_timer(rps))
1262 pr_info("RPS has timer support\n");
1263
1264 for_each_engine(engine, gt, id) {
1265 struct i915_request *rq;
1266 struct {
1267 ktime_t dt;
1268 u8 freq;
1269 } min, max;
1270
1271 if (!intel_engine_can_store_dword(engine))
1272 continue;
1273
1274 intel_gt_pm_wait_for_idle(gt);
1275 GEM_BUG_ON(intel_rps_is_active(rps));
1276 rps->cur_freq = rps->min_freq;
1277
1278 intel_engine_pm_get(engine);
1279 intel_rc6_disable(>->rc6);
1280 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1281
1282 rq = igt_spinner_create_request(&spin,
1283 engine->kernel_context,
1284 MI_NOOP);
1285 if (IS_ERR(rq)) {
1286 err = PTR_ERR(rq);
1287 goto err;
1288 }
1289
1290 i915_request_add(rq);
1291
1292 max.dt = ktime_get();
1293 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1294 max.dt = ktime_sub(ktime_get(), max.dt);
1295
1296 igt_spinner_end(&spin);
1297
1298 min.dt = ktime_get();
1299 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1300 min.dt = ktime_sub(ktime_get(), min.dt);
1301
1302 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1303 engine->name,
1304 max.freq, intel_gpu_freq(rps, max.freq),
1305 ktime_to_ns(max.dt),
1306 min.freq, intel_gpu_freq(rps, min.freq),
1307 ktime_to_ns(min.dt));
1308 if (min.freq >= max.freq) {
1309 pr_err("%s: dynamic reclocking of spinner failed\n!",
1310 engine->name);
1311 err = -EINVAL;
1312 }
1313
1314 err:
1315 intel_rc6_enable(>->rc6);
1316 intel_engine_pm_put(engine);
1317
1318 if (igt_flush_test(gt->i915))
1319 err = -EIO;
1320 if (err)
1321 break;
1322 }
1323
1324 igt_spinner_fini(&spin);
1325
1326 return err;
1327 }
1328