xref: /linux/drivers/gpu/drm/i915/gt/selftest_rc6.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include "intel_context.h"
7 #include "intel_engine_pm.h"
8 #include "intel_gpu_commands.h"
9 #include "intel_gt_requests.h"
10 #include "intel_ring.h"
11 #include "intel_rps.h"
12 #include "selftest_rc6.h"
13 
14 #include "selftests/i915_random.h"
15 #include "selftests/librapl.h"
16 
rc6_residency(struct intel_rc6 * rc6)17 static u64 rc6_residency(struct intel_rc6 *rc6)
18 {
19 	u64 result;
20 
21 	/* XXX VLV_GT_MEDIA_RC6? */
22 
23 	result = intel_rc6_residency_ns(rc6, INTEL_RC6_RES_RC6);
24 	if (HAS_RC6p(rc6_to_i915(rc6)))
25 		result += intel_rc6_residency_ns(rc6, INTEL_RC6_RES_RC6p);
26 	if (HAS_RC6pp(rc6_to_i915(rc6)))
27 		result += intel_rc6_residency_ns(rc6, INTEL_RC6_RES_RC6pp);
28 
29 	return result;
30 }
31 
live_rc6_manual(void * arg)32 int live_rc6_manual(void *arg)
33 {
34 	struct intel_gt *gt = arg;
35 	struct intel_rc6 *rc6 = &gt->rc6;
36 	struct intel_rps *rps = &gt->rps;
37 	intel_wakeref_t wakeref;
38 	u64 rc0_sample_energy[2];
39 	u64 rc6_sample_energy[2];
40 	u64 sleep_time = 1000;
41 	u32 rc0_freq = 0;
42 	u32 rc6_freq = 0;
43 	u64 rc0_power;
44 	u64 rc6_power;
45 	bool has_power;
46 	u64 threshold;
47 	ktime_t dt;
48 	u64 res[2];
49 	int err = 0;
50 	u64 diff;
51 
52 
53 	/*
54 	 * Our claim is that we can "encourage" the GPU to enter rc6 at will.
55 	 * Let's try it!
56 	 */
57 
58 	if (!rc6->enabled)
59 		return 0;
60 
61 	/* bsw/byt use a PCU and decouple RC6 from our manual control */
62 	if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
63 		return 0;
64 
65 	has_power = librapl_supported(gt->i915);
66 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
67 
68 	/* Force RC6 off for starters */
69 	__intel_rc6_disable(rc6);
70 	/* wakeup is not immediate, takes about 100us on icl */
71 	usleep_range(1000, 2000);
72 
73 	res[0] = rc6_residency(rc6);
74 
75 	dt = ktime_get();
76 	rc0_sample_energy[0] = librapl_energy_uJ();
77 	msleep(sleep_time);
78 	rc0_sample_energy[1] = librapl_energy_uJ() - rc0_sample_energy[0];
79 	dt = ktime_sub(ktime_get(), dt);
80 	res[1] = rc6_residency(rc6);
81 	rc0_freq = intel_rps_read_actual_frequency_fw(rps);
82 	if ((res[1] - res[0]) >> 10) {
83 		pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n",
84 		       (res[1] - res[0]) >> 10);
85 		err = -EINVAL;
86 		goto out_unlock;
87 	}
88 
89 	if (has_power) {
90 		rc0_power = div64_u64(NSEC_PER_SEC * rc0_sample_energy[1],
91 				      ktime_to_ns(dt));
92 
93 		if (!rc0_power) {
94 			if (rc0_freq)
95 				pr_debug("No power measured while in RC0! GPU Freq: %uMHz in RC0\n",
96 					 rc0_freq);
97 			else
98 				pr_err("No power and freq measured while in RC0\n");
99 			err = -EINVAL;
100 			goto out_unlock;
101 		}
102 	}
103 
104 	/* Manually enter RC6 */
105 	intel_rc6_park(rc6);
106 
107 	res[0] = rc6_residency(rc6);
108 	intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
109 	dt = ktime_get();
110 	rc6_sample_energy[0] = librapl_energy_uJ();
111 	msleep(sleep_time);
112 	rc6_freq = intel_rps_read_actual_frequency_fw(rps);
113 	rc6_sample_energy[1] = librapl_energy_uJ() - rc6_sample_energy[0];
114 	dt = ktime_sub(ktime_get(), dt);
115 	res[1] = rc6_residency(rc6);
116 	if (res[1] == res[0]) {
117 		pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n",
118 		       intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
119 		       intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL),
120 		       res[0]);
121 		err = -EINVAL;
122 	}
123 
124 	if (has_power) {
125 		rc6_power = div64_u64(NSEC_PER_SEC * rc6_sample_energy[1],
126 				      ktime_to_ns(dt));
127 		pr_info("GPU consumed %lluuW in RC0 and %lluuW in RC6\n",
128 			rc0_power, rc6_power);
129 
130 		if (2 * rc6_power > rc0_power) {
131 			pr_err("GPU leaked energy while in RC6!\n"
132 			       "GPU Freq: %uMHz in RC6 and %uMHz in RC0\n"
133 			       "RC0 energy before & after sleep respectively: %lluuJ %lluuJ\n"
134 			       "RC6 energy before & after sleep respectively: %lluuJ %lluuJ\n",
135 			       rc6_freq, rc0_freq, rc0_sample_energy[0], rc0_sample_energy[1],
136 			       rc6_sample_energy[0], rc6_sample_energy[1]);
137 
138 			diff = res[1] - res[0];
139 			threshold = (9 * NSEC_PER_MSEC * sleep_time) / 10;
140 			if (diff < threshold)
141 				pr_err("Did not enter RC6 properly, RC6 start residency=%lluns, RC6 end residency=%lluns\n",
142 				       res[0], res[1]);
143 			err = -EINVAL;
144 			goto out_unlock;
145 		}
146 	}
147 
148 	/* Restore what should have been the original state! */
149 	intel_rc6_unpark(rc6);
150 
151 out_unlock:
152 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
153 	return err;
154 }
155 
__live_rc6_ctx(struct intel_context * ce)156 static const u32 *__live_rc6_ctx(struct intel_context *ce)
157 {
158 	struct i915_request *rq;
159 	const u32 *result;
160 	u32 cmd;
161 	u32 *cs;
162 
163 	rq = intel_context_create_request(ce);
164 	if (IS_ERR(rq))
165 		return ERR_CAST(rq);
166 
167 	cs = intel_ring_begin(rq, 4);
168 	if (IS_ERR(cs)) {
169 		i915_request_add(rq);
170 		return cs;
171 	}
172 
173 	cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
174 	if (GRAPHICS_VER(rq->i915) >= 8)
175 		cmd++;
176 
177 	*cs++ = cmd;
178 	*cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO);
179 	*cs++ = ce->timeline->hwsp_offset + 8;
180 	*cs++ = 0;
181 	intel_ring_advance(rq, cs);
182 
183 	result = rq->hwsp_seqno + 2;
184 	i915_request_add(rq);
185 
186 	return result;
187 }
188 
189 static struct intel_engine_cs **
randomised_engines(struct intel_gt * gt,struct rnd_state * prng,unsigned int * count)190 randomised_engines(struct intel_gt *gt,
191 		   struct rnd_state *prng,
192 		   unsigned int *count)
193 {
194 	struct intel_engine_cs *engine, **engines;
195 	enum intel_engine_id id;
196 	int n;
197 
198 	n = 0;
199 	for_each_engine(engine, gt, id)
200 		n++;
201 	if (!n)
202 		return NULL;
203 
204 	engines = kmalloc_array(n, sizeof(*engines), GFP_KERNEL);
205 	if (!engines)
206 		return NULL;
207 
208 	n = 0;
209 	for_each_engine(engine, gt, id)
210 		engines[n++] = engine;
211 
212 	i915_prandom_shuffle(engines, sizeof(*engines), n, prng);
213 
214 	*count = n;
215 	return engines;
216 }
217 
live_rc6_ctx_wa(void * arg)218 int live_rc6_ctx_wa(void *arg)
219 {
220 	struct intel_gt *gt = arg;
221 	struct intel_engine_cs **engines;
222 	unsigned int n, count;
223 	I915_RND_STATE(prng);
224 	int err = 0;
225 
226 	/* A read of CTX_INFO upsets rc6. Poke the bear! */
227 	if (GRAPHICS_VER(gt->i915) < 8)
228 		return 0;
229 
230 	engines = randomised_engines(gt, &prng, &count);
231 	if (!engines)
232 		return 0;
233 
234 	for (n = 0; n < count; n++) {
235 		struct intel_engine_cs *engine = engines[n];
236 		int pass;
237 
238 		for (pass = 0; pass < 2; pass++) {
239 			struct i915_gpu_error *error = &gt->i915->gpu_error;
240 			struct intel_context *ce;
241 			unsigned int resets =
242 				i915_reset_engine_count(error, engine);
243 			const u32 *res;
244 
245 			/* Use a sacrificial context */
246 			ce = intel_context_create(engine);
247 			if (IS_ERR(ce)) {
248 				err = PTR_ERR(ce);
249 				goto out;
250 			}
251 
252 			intel_engine_pm_get(engine);
253 			res = __live_rc6_ctx(ce);
254 			intel_engine_pm_put(engine);
255 			intel_context_put(ce);
256 			if (IS_ERR(res)) {
257 				err = PTR_ERR(res);
258 				goto out;
259 			}
260 
261 			if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
262 				intel_gt_set_wedged(gt);
263 				err = -ETIME;
264 				goto out;
265 			}
266 
267 			intel_gt_pm_wait_for_idle(gt);
268 			pr_debug("%s: CTX_INFO=%0x\n",
269 				 engine->name, READ_ONCE(*res));
270 
271 			if (resets !=
272 			    i915_reset_engine_count(error, engine)) {
273 				pr_err("%s: GPU reset required\n",
274 				       engine->name);
275 				add_taint_for_CI(gt->i915, TAINT_WARN);
276 				err = -EIO;
277 				goto out;
278 			}
279 		}
280 	}
281 
282 out:
283 	kfree(engines);
284 	return err;
285 }
286