xref: /linux/drivers/gpu/drm/i915/gt/selftest_lrc.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include "gem/i915_gem_internal.h"
9 
10 #include "i915_drv.h"
11 #include "i915_selftest.h"
12 #include "intel_engine_heartbeat.h"
13 #include "intel_engine_pm.h"
14 #include "intel_reset.h"
15 #include "intel_ring.h"
16 #include "selftest_engine_heartbeat.h"
17 #include "selftests/i915_random.h"
18 #include "selftests/igt_flush_test.h"
19 #include "selftests/igt_live_test.h"
20 #include "selftests/igt_spinner.h"
21 #include "selftests/lib_sw_fence.h"
22 #include "shmem_utils.h"
23 
24 #include "gem/selftests/igt_gem_utils.h"
25 #include "gem/selftests/mock_context.h"
26 
27 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
28 #define NUM_GPR 16
29 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
30 
31 #define LRI_HEADER MI_INSTR(0x22, 0)
32 #define LRI_LENGTH_MASK GENMASK(7, 0)
33 
create_scratch(struct intel_gt * gt)34 static struct i915_vma *create_scratch(struct intel_gt *gt)
35 {
36 	return __vm_create_scratch_for_read_pinned(&gt->ggtt->vm, PAGE_SIZE);
37 }
38 
is_active(struct i915_request * rq)39 static bool is_active(struct i915_request *rq)
40 {
41 	if (i915_request_is_active(rq))
42 		return true;
43 
44 	if (i915_request_on_hold(rq))
45 		return true;
46 
47 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
48 		return true;
49 
50 	return false;
51 }
52 
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)53 static int wait_for_submit(struct intel_engine_cs *engine,
54 			   struct i915_request *rq,
55 			   unsigned long timeout)
56 {
57 	/* Ignore our own attempts to suppress excess tasklets */
58 	tasklet_hi_schedule(&engine->sched_engine->tasklet);
59 
60 	timeout += jiffies;
61 	do {
62 		bool done = time_after(jiffies, timeout);
63 
64 		if (i915_request_completed(rq)) /* that was quick! */
65 			return 0;
66 
67 		/* Wait until the HW has acknowledged the submission (or err) */
68 		intel_engine_flush_submission(engine);
69 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
70 			return 0;
71 
72 		if (done)
73 			return -ETIME;
74 
75 		cond_resched();
76 	} while (1);
77 }
78 
emit_semaphore_signal(struct intel_context * ce,void * slot)79 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
80 {
81 	const u32 offset =
82 		i915_ggtt_offset(ce->engine->status_page.vma) +
83 		offset_in_page(slot);
84 	struct i915_request *rq;
85 	u32 *cs;
86 
87 	rq = intel_context_create_request(ce);
88 	if (IS_ERR(rq))
89 		return PTR_ERR(rq);
90 
91 	cs = intel_ring_begin(rq, 4);
92 	if (IS_ERR(cs)) {
93 		i915_request_add(rq);
94 		return PTR_ERR(cs);
95 	}
96 
97 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
98 	*cs++ = offset;
99 	*cs++ = 0;
100 	*cs++ = 1;
101 
102 	intel_ring_advance(rq, cs);
103 
104 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
105 	i915_request_add(rq);
106 	return 0;
107 }
108 
context_flush(struct intel_context * ce,long timeout)109 static int context_flush(struct intel_context *ce, long timeout)
110 {
111 	struct i915_request *rq;
112 	struct dma_fence *fence;
113 	int err = 0;
114 
115 	rq = intel_engine_create_kernel_request(ce->engine);
116 	if (IS_ERR(rq))
117 		return PTR_ERR(rq);
118 
119 	fence = i915_active_fence_get(&ce->timeline->last_request);
120 	if (fence) {
121 		i915_request_await_dma_fence(rq, fence);
122 		dma_fence_put(fence);
123 	}
124 
125 	rq = i915_request_get(rq);
126 	i915_request_add(rq);
127 	if (i915_request_wait(rq, 0, timeout) < 0)
128 		err = -ETIME;
129 	i915_request_put(rq);
130 
131 	rmb(); /* We know the request is written, make sure all state is too! */
132 	return err;
133 }
134 
get_lri_mask(struct intel_engine_cs * engine,u32 lri)135 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
136 {
137 	if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
138 		return ~0u;
139 
140 	if (GRAPHICS_VER(engine->i915) < 12)
141 		return 0xfff;
142 
143 	switch (engine->class) {
144 	default:
145 	case RENDER_CLASS:
146 	case COMPUTE_CLASS:
147 		return 0x07ff;
148 	case COPY_ENGINE_CLASS:
149 		return 0x0fff;
150 	case VIDEO_DECODE_CLASS:
151 	case VIDEO_ENHANCEMENT_CLASS:
152 		return 0x3fff;
153 	}
154 }
155 
live_lrc_layout(void * arg)156 static int live_lrc_layout(void *arg)
157 {
158 	struct intel_gt *gt = arg;
159 	struct intel_engine_cs *engine;
160 	enum intel_engine_id id;
161 	u32 *lrc;
162 	int err;
163 
164 	/*
165 	 * Check the registers offsets we use to create the initial reg state
166 	 * match the layout saved by HW.
167 	 */
168 
169 	lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
170 	if (!lrc)
171 		return -ENOMEM;
172 	GEM_BUG_ON(offset_in_page(lrc));
173 
174 	err = 0;
175 	for_each_engine(engine, gt, id) {
176 		u32 *hw;
177 		int dw;
178 
179 		if (!engine->default_state)
180 			continue;
181 
182 		hw = shmem_pin_map(engine->default_state);
183 		if (!hw) {
184 			err = -ENOMEM;
185 			break;
186 		}
187 		hw += LRC_STATE_OFFSET / sizeof(*hw);
188 
189 		__lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
190 				engine->kernel_context, engine, true);
191 
192 		dw = 0;
193 		do {
194 			u32 lri = READ_ONCE(hw[dw]);
195 			u32 lri_mask;
196 
197 			if (lri == 0) {
198 				dw++;
199 				continue;
200 			}
201 
202 			if (lrc[dw] == 0) {
203 				pr_debug("%s: skipped instruction %x at dword %d\n",
204 					 engine->name, lri, dw);
205 				dw++;
206 				continue;
207 			}
208 
209 			if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
210 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
211 				       engine->name, dw, lri);
212 				err = -EINVAL;
213 				break;
214 			}
215 
216 			if (lrc[dw] != lri) {
217 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
218 				       engine->name, dw, lri, lrc[dw]);
219 				err = -EINVAL;
220 				break;
221 			}
222 
223 			/*
224 			 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
225 			 * opcode is set on Gen12+ devices, HW does not
226 			 * care about certain register address offsets, and
227 			 * instead check the following for valid address
228 			 * ranges on specific engines:
229 			 * RCS && CCS: BITS(0 - 10)
230 			 * BCS: BITS(0 - 11)
231 			 * VECS && VCS: BITS(0 - 13)
232 			 */
233 			lri_mask = get_lri_mask(engine, lri);
234 
235 			lri &= 0x7f;
236 			lri++;
237 			dw++;
238 
239 			while (lri) {
240 				u32 offset = READ_ONCE(hw[dw]);
241 
242 				if ((offset ^ lrc[dw]) & lri_mask) {
243 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
244 					       engine->name, dw, offset, lrc[dw]);
245 					err = -EINVAL;
246 					break;
247 				}
248 
249 				/*
250 				 * Skip over the actual register value as we
251 				 * expect that to differ.
252 				 */
253 				dw += 2;
254 				lri -= 2;
255 			}
256 		} while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
257 
258 		if (err) {
259 			pr_info("%s: HW register image:\n", engine->name);
260 			igt_hexdump(hw, PAGE_SIZE);
261 
262 			pr_info("%s: SW register image:\n", engine->name);
263 			igt_hexdump(lrc, PAGE_SIZE);
264 		}
265 
266 		shmem_unpin_map(engine->default_state, hw);
267 		if (err)
268 			break;
269 	}
270 
271 	free_page((unsigned long)lrc);
272 	return err;
273 }
274 
find_offset(const u32 * lri,u32 offset)275 static int find_offset(const u32 *lri, u32 offset)
276 {
277 	int i;
278 
279 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
280 		if (lri[i] == offset)
281 			return i;
282 
283 	return -1;
284 }
285 
live_lrc_fixed(void * arg)286 static int live_lrc_fixed(void *arg)
287 {
288 	struct intel_gt *gt = arg;
289 	struct intel_engine_cs *engine;
290 	enum intel_engine_id id;
291 	int err = 0;
292 
293 	/*
294 	 * Check the assumed register offsets match the actual locations in
295 	 * the context image.
296 	 */
297 
298 	for_each_engine(engine, gt, id) {
299 		const struct {
300 			u32 reg;
301 			u32 offset;
302 			const char *name;
303 		} tbl[] = {
304 			{
305 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
306 				CTX_RING_START - 1,
307 				"RING_START"
308 			},
309 			{
310 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
311 				CTX_RING_CTL - 1,
312 				"RING_CTL"
313 			},
314 			{
315 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
316 				CTX_RING_HEAD - 1,
317 				"RING_HEAD"
318 			},
319 			{
320 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
321 				CTX_RING_TAIL - 1,
322 				"RING_TAIL"
323 			},
324 			{
325 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
326 				lrc_ring_mi_mode(engine),
327 				"RING_MI_MODE"
328 			},
329 			{
330 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
331 				CTX_BB_STATE - 1,
332 				"BB_STATE"
333 			},
334 			{
335 				i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
336 				lrc_ring_wa_bb_per_ctx(engine),
337 				"RING_BB_PER_CTX_PTR"
338 			},
339 			{
340 				i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
341 				lrc_ring_indirect_ptr(engine),
342 				"RING_INDIRECT_CTX_PTR"
343 			},
344 			{
345 				i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
346 				lrc_ring_indirect_offset(engine),
347 				"RING_INDIRECT_CTX_OFFSET"
348 			},
349 			{
350 				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
351 				CTX_TIMESTAMP - 1,
352 				"RING_CTX_TIMESTAMP"
353 			},
354 			{
355 				i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
356 				lrc_ring_gpr0(engine),
357 				"RING_CS_GPR0"
358 			},
359 			{
360 				i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
361 				lrc_ring_cmd_buf_cctl(engine),
362 				"RING_CMD_BUF_CCTL"
363 			},
364 			{
365 				i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
366 				lrc_ring_bb_offset(engine),
367 				"RING_BB_OFFSET"
368 			},
369 			{ },
370 		}, *t;
371 		u32 *hw;
372 
373 		if (!engine->default_state)
374 			continue;
375 
376 		hw = shmem_pin_map(engine->default_state);
377 		if (!hw) {
378 			err = -ENOMEM;
379 			break;
380 		}
381 		hw += LRC_STATE_OFFSET / sizeof(*hw);
382 
383 		for (t = tbl; t->name; t++) {
384 			int dw = find_offset(hw, t->reg);
385 
386 			if (dw != t->offset) {
387 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
388 				       engine->name,
389 				       t->name,
390 				       t->reg,
391 				       dw,
392 				       t->offset);
393 				err = -EINVAL;
394 			}
395 		}
396 
397 		shmem_unpin_map(engine->default_state, hw);
398 	}
399 
400 	return err;
401 }
402 
__live_lrc_state(struct intel_engine_cs * engine,struct i915_vma * scratch)403 static int __live_lrc_state(struct intel_engine_cs *engine,
404 			    struct i915_vma *scratch)
405 {
406 	struct intel_context *ce;
407 	struct i915_request *rq;
408 	struct i915_gem_ww_ctx ww;
409 	enum {
410 		RING_START_IDX = 0,
411 		RING_TAIL_IDX,
412 		MAX_IDX
413 	};
414 	u32 expected[MAX_IDX];
415 	u32 *cs;
416 	int err;
417 	int n;
418 
419 	ce = intel_context_create(engine);
420 	if (IS_ERR(ce))
421 		return PTR_ERR(ce);
422 
423 	i915_gem_ww_ctx_init(&ww, false);
424 retry:
425 	err = i915_gem_object_lock(scratch->obj, &ww);
426 	if (!err)
427 		err = intel_context_pin_ww(ce, &ww);
428 	if (err)
429 		goto err_put;
430 
431 	rq = i915_request_create(ce);
432 	if (IS_ERR(rq)) {
433 		err = PTR_ERR(rq);
434 		goto err_unpin;
435 	}
436 
437 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
438 	if (IS_ERR(cs)) {
439 		err = PTR_ERR(cs);
440 		i915_request_add(rq);
441 		goto err_unpin;
442 	}
443 
444 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
445 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
446 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
447 	*cs++ = 0;
448 
449 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
450 
451 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
452 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
453 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
454 	*cs++ = 0;
455 
456 	err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
457 
458 	i915_request_get(rq);
459 	i915_request_add(rq);
460 	if (err)
461 		goto err_rq;
462 
463 	intel_engine_flush_submission(engine);
464 	expected[RING_TAIL_IDX] = ce->ring->tail;
465 
466 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
467 		err = -ETIME;
468 		goto err_rq;
469 	}
470 
471 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
472 	if (IS_ERR(cs)) {
473 		err = PTR_ERR(cs);
474 		goto err_rq;
475 	}
476 
477 	for (n = 0; n < MAX_IDX; n++) {
478 		if (cs[n] != expected[n]) {
479 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
480 			       engine->name, n, cs[n], expected[n]);
481 			err = -EINVAL;
482 			break;
483 		}
484 	}
485 
486 	i915_gem_object_unpin_map(scratch->obj);
487 
488 err_rq:
489 	i915_request_put(rq);
490 err_unpin:
491 	intel_context_unpin(ce);
492 err_put:
493 	if (err == -EDEADLK) {
494 		err = i915_gem_ww_ctx_backoff(&ww);
495 		if (!err)
496 			goto retry;
497 	}
498 	i915_gem_ww_ctx_fini(&ww);
499 	intel_context_put(ce);
500 	return err;
501 }
502 
live_lrc_state(void * arg)503 static int live_lrc_state(void *arg)
504 {
505 	struct intel_gt *gt = arg;
506 	struct intel_engine_cs *engine;
507 	struct i915_vma *scratch;
508 	enum intel_engine_id id;
509 	int err = 0;
510 
511 	/*
512 	 * Check the live register state matches what we expect for this
513 	 * intel_context.
514 	 */
515 
516 	scratch = create_scratch(gt);
517 	if (IS_ERR(scratch))
518 		return PTR_ERR(scratch);
519 
520 	for_each_engine(engine, gt, id) {
521 		err = __live_lrc_state(engine, scratch);
522 		if (err)
523 			break;
524 	}
525 
526 	if (igt_flush_test(gt->i915))
527 		err = -EIO;
528 
529 	i915_vma_unpin_and_release(&scratch, 0);
530 	return err;
531 }
532 
gpr_make_dirty(struct intel_context * ce)533 static int gpr_make_dirty(struct intel_context *ce)
534 {
535 	struct i915_request *rq;
536 	u32 *cs;
537 	int n;
538 
539 	rq = intel_context_create_request(ce);
540 	if (IS_ERR(rq))
541 		return PTR_ERR(rq);
542 
543 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
544 	if (IS_ERR(cs)) {
545 		i915_request_add(rq);
546 		return PTR_ERR(cs);
547 	}
548 
549 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
550 	for (n = 0; n < NUM_GPR_DW; n++) {
551 		*cs++ = CS_GPR(ce->engine, n);
552 		*cs++ = STACK_MAGIC;
553 	}
554 	*cs++ = MI_NOOP;
555 
556 	intel_ring_advance(rq, cs);
557 
558 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
559 	i915_request_add(rq);
560 
561 	return 0;
562 }
563 
564 static struct i915_request *
__gpr_read(struct intel_context * ce,struct i915_vma * scratch,u32 * slot)565 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
566 {
567 	const u32 offset =
568 		i915_ggtt_offset(ce->engine->status_page.vma) +
569 		offset_in_page(slot);
570 	struct i915_request *rq;
571 	u32 *cs;
572 	int err;
573 	int n;
574 
575 	rq = intel_context_create_request(ce);
576 	if (IS_ERR(rq))
577 		return rq;
578 
579 	cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
580 	if (IS_ERR(cs)) {
581 		i915_request_add(rq);
582 		return ERR_CAST(cs);
583 	}
584 
585 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
586 	*cs++ = MI_NOOP;
587 
588 	*cs++ = MI_SEMAPHORE_WAIT |
589 		MI_SEMAPHORE_GLOBAL_GTT |
590 		MI_SEMAPHORE_POLL |
591 		MI_SEMAPHORE_SAD_NEQ_SDD;
592 	*cs++ = 0;
593 	*cs++ = offset;
594 	*cs++ = 0;
595 
596 	for (n = 0; n < NUM_GPR_DW; n++) {
597 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
598 		*cs++ = CS_GPR(ce->engine, n);
599 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
600 		*cs++ = 0;
601 	}
602 
603 	err = igt_vma_move_to_active_unlocked(scratch, rq, EXEC_OBJECT_WRITE);
604 
605 	i915_request_get(rq);
606 	i915_request_add(rq);
607 	if (err) {
608 		i915_request_put(rq);
609 		rq = ERR_PTR(err);
610 	}
611 
612 	return rq;
613 }
614 
__live_lrc_gpr(struct intel_engine_cs * engine,struct i915_vma * scratch,bool preempt)615 static int __live_lrc_gpr(struct intel_engine_cs *engine,
616 			  struct i915_vma *scratch,
617 			  bool preempt)
618 {
619 	u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
620 	struct intel_context *ce;
621 	struct i915_request *rq;
622 	u32 *cs;
623 	int err;
624 	int n;
625 
626 	if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
627 		return 0; /* GPR only on rcs0 for gen8 */
628 
629 	err = gpr_make_dirty(engine->kernel_context);
630 	if (err)
631 		return err;
632 
633 	ce = intel_context_create(engine);
634 	if (IS_ERR(ce))
635 		return PTR_ERR(ce);
636 
637 	rq = __gpr_read(ce, scratch, slot);
638 	if (IS_ERR(rq)) {
639 		err = PTR_ERR(rq);
640 		goto err_put;
641 	}
642 
643 	err = wait_for_submit(engine, rq, HZ / 2);
644 	if (err)
645 		goto err_rq;
646 
647 	if (preempt) {
648 		err = gpr_make_dirty(engine->kernel_context);
649 		if (err)
650 			goto err_rq;
651 
652 		err = emit_semaphore_signal(engine->kernel_context, slot);
653 		if (err)
654 			goto err_rq;
655 
656 		err = wait_for_submit(engine, rq, HZ / 2);
657 		if (err)
658 			goto err_rq;
659 	} else {
660 		slot[0] = 1;
661 		wmb();
662 	}
663 
664 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
665 		err = -ETIME;
666 		goto err_rq;
667 	}
668 
669 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
670 	if (IS_ERR(cs)) {
671 		err = PTR_ERR(cs);
672 		goto err_rq;
673 	}
674 
675 	for (n = 0; n < NUM_GPR_DW; n++) {
676 		if (cs[n]) {
677 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
678 			       engine->name,
679 			       n / 2, n & 1 ? "udw" : "ldw",
680 			       cs[n]);
681 			err = -EINVAL;
682 			break;
683 		}
684 	}
685 
686 	i915_gem_object_unpin_map(scratch->obj);
687 
688 err_rq:
689 	memset32(&slot[0], -1, 4);
690 	wmb();
691 	i915_request_put(rq);
692 err_put:
693 	intel_context_put(ce);
694 	return err;
695 }
696 
live_lrc_gpr(void * arg)697 static int live_lrc_gpr(void *arg)
698 {
699 	struct intel_gt *gt = arg;
700 	struct intel_engine_cs *engine;
701 	struct i915_vma *scratch;
702 	enum intel_engine_id id;
703 	int err = 0;
704 
705 	/*
706 	 * Check that GPR registers are cleared in new contexts as we need
707 	 * to avoid leaking any information from previous contexts.
708 	 */
709 
710 	scratch = create_scratch(gt);
711 	if (IS_ERR(scratch))
712 		return PTR_ERR(scratch);
713 
714 	for_each_engine(engine, gt, id) {
715 		st_engine_heartbeat_disable(engine);
716 
717 		err = __live_lrc_gpr(engine, scratch, false);
718 		if (err)
719 			goto err;
720 
721 		err = __live_lrc_gpr(engine, scratch, true);
722 		if (err)
723 			goto err;
724 
725 err:
726 		st_engine_heartbeat_enable(engine);
727 		if (igt_flush_test(gt->i915))
728 			err = -EIO;
729 		if (err)
730 			break;
731 	}
732 
733 	i915_vma_unpin_and_release(&scratch, 0);
734 	return err;
735 }
736 
737 static struct i915_request *
create_timestamp(struct intel_context * ce,void * slot,int idx)738 create_timestamp(struct intel_context *ce, void *slot, int idx)
739 {
740 	const u32 offset =
741 		i915_ggtt_offset(ce->engine->status_page.vma) +
742 		offset_in_page(slot);
743 	struct i915_request *rq;
744 	u32 *cs;
745 	int err;
746 
747 	rq = intel_context_create_request(ce);
748 	if (IS_ERR(rq))
749 		return rq;
750 
751 	cs = intel_ring_begin(rq, 10);
752 	if (IS_ERR(cs)) {
753 		err = PTR_ERR(cs);
754 		goto err;
755 	}
756 
757 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
758 	*cs++ = MI_NOOP;
759 
760 	*cs++ = MI_SEMAPHORE_WAIT |
761 		MI_SEMAPHORE_GLOBAL_GTT |
762 		MI_SEMAPHORE_POLL |
763 		MI_SEMAPHORE_SAD_NEQ_SDD;
764 	*cs++ = 0;
765 	*cs++ = offset;
766 	*cs++ = 0;
767 
768 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
769 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
770 	*cs++ = offset + idx * sizeof(u32);
771 	*cs++ = 0;
772 
773 	intel_ring_advance(rq, cs);
774 
775 	err = 0;
776 err:
777 	i915_request_get(rq);
778 	i915_request_add(rq);
779 	if (err) {
780 		i915_request_put(rq);
781 		return ERR_PTR(err);
782 	}
783 
784 	return rq;
785 }
786 
787 struct lrc_timestamp {
788 	struct intel_engine_cs *engine;
789 	struct intel_context *ce[2];
790 	u32 poison;
791 };
792 
timestamp_advanced(u32 start,u32 end)793 static bool timestamp_advanced(u32 start, u32 end)
794 {
795 	return (s32)(end - start) > 0;
796 }
797 
__lrc_timestamp(const struct lrc_timestamp * arg,bool preempt)798 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
799 {
800 	u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
801 	struct i915_request *rq;
802 	u32 timestamp;
803 	int err = 0;
804 
805 	arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
806 	rq = create_timestamp(arg->ce[0], slot, 1);
807 	if (IS_ERR(rq))
808 		return PTR_ERR(rq);
809 
810 	err = wait_for_submit(rq->engine, rq, HZ / 2);
811 	if (err)
812 		goto err;
813 
814 	if (preempt) {
815 		arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
816 		err = emit_semaphore_signal(arg->ce[1], slot);
817 		if (err)
818 			goto err;
819 	} else {
820 		slot[0] = 1;
821 		wmb();
822 	}
823 
824 	/* And wait for switch to kernel (to save our context to memory) */
825 	err = context_flush(arg->ce[0], HZ / 2);
826 	if (err)
827 		goto err;
828 
829 	if (!timestamp_advanced(arg->poison, slot[1])) {
830 		pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
831 		       arg->engine->name, preempt ? "preempt" : "simple",
832 		       arg->poison, slot[1]);
833 		err = -EINVAL;
834 	}
835 
836 	timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
837 	if (!timestamp_advanced(slot[1], timestamp)) {
838 		pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
839 		       arg->engine->name, preempt ? "preempt" : "simple",
840 		       slot[1], timestamp);
841 		err = -EINVAL;
842 	}
843 
844 err:
845 	memset32(slot, -1, 4);
846 	i915_request_put(rq);
847 	return err;
848 }
849 
live_lrc_timestamp(void * arg)850 static int live_lrc_timestamp(void *arg)
851 {
852 	struct lrc_timestamp data = {};
853 	struct intel_gt *gt = arg;
854 	enum intel_engine_id id;
855 	const u32 poison[] = {
856 		0,
857 		S32_MAX,
858 		(u32)S32_MAX + 1,
859 		U32_MAX,
860 	};
861 
862 	/*
863 	 * This test was designed to isolate a hardware bug.
864 	 * The bug was found and fixed in future generations but
865 	 * now the test pollutes our CI on previous generation.
866 	 */
867 	if (GRAPHICS_VER(gt->i915) == 12)
868 		return 0;
869 
870 	/*
871 	 * We want to verify that the timestamp is saved and restore across
872 	 * context switches and is monotonic.
873 	 *
874 	 * So we do this with a little bit of LRC poisoning to check various
875 	 * boundary conditions, and see what happens if we preempt the context
876 	 * with a second request (carrying more poison into the timestamp).
877 	 */
878 
879 	for_each_engine(data.engine, gt, id) {
880 		int i, err = 0;
881 
882 		st_engine_heartbeat_disable(data.engine);
883 
884 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
885 			struct intel_context *tmp;
886 
887 			tmp = intel_context_create(data.engine);
888 			if (IS_ERR(tmp)) {
889 				err = PTR_ERR(tmp);
890 				goto err;
891 			}
892 
893 			err = intel_context_pin(tmp);
894 			if (err) {
895 				intel_context_put(tmp);
896 				goto err;
897 			}
898 
899 			data.ce[i] = tmp;
900 		}
901 
902 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
903 			data.poison = poison[i];
904 
905 			err = __lrc_timestamp(&data, false);
906 			if (err)
907 				break;
908 
909 			err = __lrc_timestamp(&data, true);
910 			if (err)
911 				break;
912 		}
913 
914 err:
915 		st_engine_heartbeat_enable(data.engine);
916 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
917 			if (!data.ce[i])
918 				break;
919 
920 			intel_context_unpin(data.ce[i]);
921 			intel_context_put(data.ce[i]);
922 		}
923 
924 		if (igt_flush_test(gt->i915))
925 			err = -EIO;
926 		if (err)
927 			return err;
928 	}
929 
930 	return 0;
931 }
932 
933 static struct i915_vma *
create_user_vma(struct i915_address_space * vm,unsigned long size)934 create_user_vma(struct i915_address_space *vm, unsigned long size)
935 {
936 	struct drm_i915_gem_object *obj;
937 	struct i915_vma *vma;
938 	int err;
939 
940 	obj = i915_gem_object_create_internal(vm->i915, size);
941 	if (IS_ERR(obj))
942 		return ERR_CAST(obj);
943 
944 	vma = i915_vma_instance(obj, vm, NULL);
945 	if (IS_ERR(vma)) {
946 		i915_gem_object_put(obj);
947 		return vma;
948 	}
949 
950 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
951 	if (err) {
952 		i915_gem_object_put(obj);
953 		return ERR_PTR(err);
954 	}
955 
956 	return vma;
957 }
958 
safe_poison(u32 offset,u32 poison)959 static u32 safe_poison(u32 offset, u32 poison)
960 {
961 	/*
962 	 * Do not enable predication as it will nop all subsequent commands,
963 	 * not only disabling the tests (by preventing all the other SRM) but
964 	 * also preventing the arbitration events at the end of the request.
965 	 */
966 	if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
967 		poison &= ~REG_BIT(0);
968 
969 	return poison;
970 }
971 
972 static struct i915_vma *
store_context(struct intel_context * ce,struct i915_vma * scratch)973 store_context(struct intel_context *ce, struct i915_vma *scratch)
974 {
975 	struct i915_vma *batch;
976 	u32 dw, x, *cs, *hw;
977 	u32 *defaults;
978 
979 	batch = create_user_vma(ce->vm, SZ_64K);
980 	if (IS_ERR(batch))
981 		return batch;
982 
983 	cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
984 	if (IS_ERR(cs)) {
985 		i915_vma_put(batch);
986 		return ERR_CAST(cs);
987 	}
988 
989 	defaults = shmem_pin_map(ce->engine->default_state);
990 	if (!defaults) {
991 		i915_gem_object_unpin_map(batch->obj);
992 		i915_vma_put(batch);
993 		return ERR_PTR(-ENOMEM);
994 	}
995 
996 	x = 0;
997 	dw = 0;
998 	hw = defaults;
999 	hw += LRC_STATE_OFFSET / sizeof(*hw);
1000 	do {
1001 		u32 len = hw[dw] & LRI_LENGTH_MASK;
1002 
1003 		/*
1004 		 * Keep it simple, skip parsing complex commands
1005 		 *
1006 		 * At present, there are no more MI_LOAD_REGISTER_IMM
1007 		 * commands after the first 3D state command. Rather
1008 		 * than include a table (see i915_cmd_parser.c) of all
1009 		 * the possible commands and their instruction lengths
1010 		 * (or mask for variable length instructions), assume
1011 		 * we have gathered the complete list of registers and
1012 		 * bail out.
1013 		 */
1014 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1015 			break;
1016 
1017 		if (hw[dw] == 0) {
1018 			dw++;
1019 			continue;
1020 		}
1021 
1022 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1023 			/* Assume all other MI commands match LRI length mask */
1024 			dw += len + 2;
1025 			continue;
1026 		}
1027 
1028 		if (!len) {
1029 			pr_err("%s: invalid LRI found in context image\n",
1030 			       ce->engine->name);
1031 			igt_hexdump(defaults, PAGE_SIZE);
1032 			break;
1033 		}
1034 
1035 		dw++;
1036 		len = (len + 1) / 2;
1037 		while (len--) {
1038 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
1039 			*cs++ = hw[dw];
1040 			*cs++ = lower_32_bits(i915_vma_offset(scratch) + x);
1041 			*cs++ = upper_32_bits(i915_vma_offset(scratch) + x);
1042 
1043 			dw += 2;
1044 			x += 4;
1045 		}
1046 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1047 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1048 
1049 	*cs++ = MI_BATCH_BUFFER_END;
1050 
1051 	shmem_unpin_map(ce->engine->default_state, defaults);
1052 
1053 	i915_gem_object_flush_map(batch->obj);
1054 	i915_gem_object_unpin_map(batch->obj);
1055 
1056 	return batch;
1057 }
1058 
1059 static struct i915_request *
record_registers(struct intel_context * ce,struct i915_vma * before,struct i915_vma * after,u32 * sema)1060 record_registers(struct intel_context *ce,
1061 		 struct i915_vma *before,
1062 		 struct i915_vma *after,
1063 		 u32 *sema)
1064 {
1065 	struct i915_vma *b_before, *b_after;
1066 	struct i915_request *rq;
1067 	u32 *cs;
1068 	int err;
1069 
1070 	b_before = store_context(ce, before);
1071 	if (IS_ERR(b_before))
1072 		return ERR_CAST(b_before);
1073 
1074 	b_after = store_context(ce, after);
1075 	if (IS_ERR(b_after)) {
1076 		rq = ERR_CAST(b_after);
1077 		goto err_before;
1078 	}
1079 
1080 	rq = intel_context_create_request(ce);
1081 	if (IS_ERR(rq))
1082 		goto err_after;
1083 
1084 	err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE);
1085 	if (err)
1086 		goto err_rq;
1087 
1088 	err = igt_vma_move_to_active_unlocked(b_before, rq, 0);
1089 	if (err)
1090 		goto err_rq;
1091 
1092 	err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE);
1093 	if (err)
1094 		goto err_rq;
1095 
1096 	err = igt_vma_move_to_active_unlocked(b_after, rq, 0);
1097 	if (err)
1098 		goto err_rq;
1099 
1100 	cs = intel_ring_begin(rq, 14);
1101 	if (IS_ERR(cs)) {
1102 		err = PTR_ERR(cs);
1103 		goto err_rq;
1104 	}
1105 
1106 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1107 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1108 	*cs++ = lower_32_bits(i915_vma_offset(b_before));
1109 	*cs++ = upper_32_bits(i915_vma_offset(b_before));
1110 
1111 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1112 	*cs++ = MI_SEMAPHORE_WAIT |
1113 		MI_SEMAPHORE_GLOBAL_GTT |
1114 		MI_SEMAPHORE_POLL |
1115 		MI_SEMAPHORE_SAD_NEQ_SDD;
1116 	*cs++ = 0;
1117 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1118 		offset_in_page(sema);
1119 	*cs++ = 0;
1120 	*cs++ = MI_NOOP;
1121 
1122 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1123 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1124 	*cs++ = lower_32_bits(i915_vma_offset(b_after));
1125 	*cs++ = upper_32_bits(i915_vma_offset(b_after));
1126 
1127 	intel_ring_advance(rq, cs);
1128 
1129 	WRITE_ONCE(*sema, 0);
1130 	i915_request_get(rq);
1131 	i915_request_add(rq);
1132 err_after:
1133 	i915_vma_put(b_after);
1134 err_before:
1135 	i915_vma_put(b_before);
1136 	return rq;
1137 
1138 err_rq:
1139 	i915_request_add(rq);
1140 	rq = ERR_PTR(err);
1141 	goto err_after;
1142 }
1143 
load_context(struct intel_context * ce,u32 poison)1144 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1145 {
1146 	struct i915_vma *batch;
1147 	u32 dw, *cs, *hw;
1148 	u32 *defaults;
1149 
1150 	batch = create_user_vma(ce->vm, SZ_64K);
1151 	if (IS_ERR(batch))
1152 		return batch;
1153 
1154 	cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1155 	if (IS_ERR(cs)) {
1156 		i915_vma_put(batch);
1157 		return ERR_CAST(cs);
1158 	}
1159 
1160 	defaults = shmem_pin_map(ce->engine->default_state);
1161 	if (!defaults) {
1162 		i915_gem_object_unpin_map(batch->obj);
1163 		i915_vma_put(batch);
1164 		return ERR_PTR(-ENOMEM);
1165 	}
1166 
1167 	dw = 0;
1168 	hw = defaults;
1169 	hw += LRC_STATE_OFFSET / sizeof(*hw);
1170 	do {
1171 		u32 len = hw[dw] & LRI_LENGTH_MASK;
1172 
1173 		/* For simplicity, break parsing at the first complex command */
1174 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1175 			break;
1176 
1177 		if (hw[dw] == 0) {
1178 			dw++;
1179 			continue;
1180 		}
1181 
1182 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1183 			dw += len + 2;
1184 			continue;
1185 		}
1186 
1187 		if (!len) {
1188 			pr_err("%s: invalid LRI found in context image\n",
1189 			       ce->engine->name);
1190 			igt_hexdump(defaults, PAGE_SIZE);
1191 			break;
1192 		}
1193 
1194 		dw++;
1195 		len = (len + 1) / 2;
1196 		*cs++ = MI_LOAD_REGISTER_IMM(len);
1197 		while (len--) {
1198 			*cs++ = hw[dw];
1199 			*cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1200 								  MI_LRI_LRM_CS_MMIO),
1201 					    poison);
1202 			dw += 2;
1203 		}
1204 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1205 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1206 
1207 	*cs++ = MI_BATCH_BUFFER_END;
1208 
1209 	shmem_unpin_map(ce->engine->default_state, defaults);
1210 
1211 	i915_gem_object_flush_map(batch->obj);
1212 	i915_gem_object_unpin_map(batch->obj);
1213 
1214 	return batch;
1215 }
1216 
poison_registers(struct intel_context * ce,u32 poison,u32 * sema)1217 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1218 {
1219 	struct i915_request *rq;
1220 	struct i915_vma *batch;
1221 	u32 *cs;
1222 	int err;
1223 
1224 	batch = load_context(ce, poison);
1225 	if (IS_ERR(batch))
1226 		return PTR_ERR(batch);
1227 
1228 	rq = intel_context_create_request(ce);
1229 	if (IS_ERR(rq)) {
1230 		err = PTR_ERR(rq);
1231 		goto err_batch;
1232 	}
1233 
1234 	err = igt_vma_move_to_active_unlocked(batch, rq, 0);
1235 	if (err)
1236 		goto err_rq;
1237 
1238 	cs = intel_ring_begin(rq, 8);
1239 	if (IS_ERR(cs)) {
1240 		err = PTR_ERR(cs);
1241 		goto err_rq;
1242 	}
1243 
1244 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1245 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1246 	*cs++ = lower_32_bits(i915_vma_offset(batch));
1247 	*cs++ = upper_32_bits(i915_vma_offset(batch));
1248 
1249 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1250 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1251 		offset_in_page(sema);
1252 	*cs++ = 0;
1253 	*cs++ = 1;
1254 
1255 	intel_ring_advance(rq, cs);
1256 
1257 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1258 err_rq:
1259 	i915_request_add(rq);
1260 err_batch:
1261 	i915_vma_put(batch);
1262 	return err;
1263 }
1264 
is_moving(u32 a,u32 b)1265 static bool is_moving(u32 a, u32 b)
1266 {
1267 	return a != b;
1268 }
1269 
compare_isolation(struct intel_engine_cs * engine,struct i915_vma * ref[2],struct i915_vma * result[2],struct intel_context * ce,u32 poison)1270 static int compare_isolation(struct intel_engine_cs *engine,
1271 			     struct i915_vma *ref[2],
1272 			     struct i915_vma *result[2],
1273 			     struct intel_context *ce,
1274 			     u32 poison)
1275 {
1276 	u32 x, dw, *hw, *lrc;
1277 	u32 *A[2], *B[2];
1278 	u32 *defaults;
1279 	int err = 0;
1280 
1281 	A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1282 	if (IS_ERR(A[0]))
1283 		return PTR_ERR(A[0]);
1284 
1285 	A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1286 	if (IS_ERR(A[1])) {
1287 		err = PTR_ERR(A[1]);
1288 		goto err_A0;
1289 	}
1290 
1291 	B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1292 	if (IS_ERR(B[0])) {
1293 		err = PTR_ERR(B[0]);
1294 		goto err_A1;
1295 	}
1296 
1297 	B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1298 	if (IS_ERR(B[1])) {
1299 		err = PTR_ERR(B[1]);
1300 		goto err_B0;
1301 	}
1302 
1303 	lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1304 					       intel_gt_coherent_map_type(engine->gt,
1305 									  ce->state->obj,
1306 									  false));
1307 	if (IS_ERR(lrc)) {
1308 		err = PTR_ERR(lrc);
1309 		goto err_B1;
1310 	}
1311 	lrc += LRC_STATE_OFFSET / sizeof(*hw);
1312 
1313 	defaults = shmem_pin_map(ce->engine->default_state);
1314 	if (!defaults) {
1315 		err = -ENOMEM;
1316 		goto err_lrc;
1317 	}
1318 
1319 	x = 0;
1320 	dw = 0;
1321 	hw = defaults;
1322 	hw += LRC_STATE_OFFSET / sizeof(*hw);
1323 	do {
1324 		u32 len = hw[dw] & LRI_LENGTH_MASK;
1325 
1326 		/* For simplicity, break parsing at the first complex command */
1327 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1328 			break;
1329 
1330 		if (hw[dw] == 0) {
1331 			dw++;
1332 			continue;
1333 		}
1334 
1335 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1336 			dw += len + 2;
1337 			continue;
1338 		}
1339 
1340 		if (!len) {
1341 			pr_err("%s: invalid LRI found in context image\n",
1342 			       engine->name);
1343 			igt_hexdump(defaults, PAGE_SIZE);
1344 			break;
1345 		}
1346 
1347 		dw++;
1348 		len = (len + 1) / 2;
1349 		while (len--) {
1350 			if (!is_moving(A[0][x], A[1][x]) &&
1351 			    (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1352 				switch (hw[dw] & 4095) {
1353 				case 0x30: /* RING_HEAD */
1354 				case 0x34: /* RING_TAIL */
1355 					break;
1356 
1357 				default:
1358 					pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1359 					       engine->name, dw,
1360 					       hw[dw], hw[dw + 1],
1361 					       A[0][x], B[0][x], B[1][x],
1362 					       poison, lrc[dw + 1]);
1363 					err = -EINVAL;
1364 				}
1365 			}
1366 			dw += 2;
1367 			x++;
1368 		}
1369 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1370 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1371 
1372 	shmem_unpin_map(ce->engine->default_state, defaults);
1373 err_lrc:
1374 	i915_gem_object_unpin_map(ce->state->obj);
1375 err_B1:
1376 	i915_gem_object_unpin_map(result[1]->obj);
1377 err_B0:
1378 	i915_gem_object_unpin_map(result[0]->obj);
1379 err_A1:
1380 	i915_gem_object_unpin_map(ref[1]->obj);
1381 err_A0:
1382 	i915_gem_object_unpin_map(ref[0]->obj);
1383 	return err;
1384 }
1385 
1386 static struct i915_vma *
create_result_vma(struct i915_address_space * vm,unsigned long sz)1387 create_result_vma(struct i915_address_space *vm, unsigned long sz)
1388 {
1389 	struct i915_vma *vma;
1390 	void *ptr;
1391 
1392 	vma = create_user_vma(vm, sz);
1393 	if (IS_ERR(vma))
1394 		return vma;
1395 
1396 	/* Set the results to a known value distinct from the poison */
1397 	ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
1398 	if (IS_ERR(ptr)) {
1399 		i915_vma_put(vma);
1400 		return ERR_CAST(ptr);
1401 	}
1402 
1403 	memset(ptr, POISON_INUSE, vma->size);
1404 	i915_gem_object_flush_map(vma->obj);
1405 	i915_gem_object_unpin_map(vma->obj);
1406 
1407 	return vma;
1408 }
1409 
__lrc_isolation(struct intel_engine_cs * engine,u32 poison)1410 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1411 {
1412 	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1413 	struct i915_vma *ref[2], *result[2];
1414 	struct intel_context *A, *B;
1415 	struct i915_request *rq;
1416 	int err;
1417 
1418 	A = intel_context_create(engine);
1419 	if (IS_ERR(A))
1420 		return PTR_ERR(A);
1421 
1422 	B = intel_context_create(engine);
1423 	if (IS_ERR(B)) {
1424 		err = PTR_ERR(B);
1425 		goto err_A;
1426 	}
1427 
1428 	ref[0] = create_result_vma(A->vm, SZ_64K);
1429 	if (IS_ERR(ref[0])) {
1430 		err = PTR_ERR(ref[0]);
1431 		goto err_B;
1432 	}
1433 
1434 	ref[1] = create_result_vma(A->vm, SZ_64K);
1435 	if (IS_ERR(ref[1])) {
1436 		err = PTR_ERR(ref[1]);
1437 		goto err_ref0;
1438 	}
1439 
1440 	rq = record_registers(A, ref[0], ref[1], sema);
1441 	if (IS_ERR(rq)) {
1442 		err = PTR_ERR(rq);
1443 		goto err_ref1;
1444 	}
1445 
1446 	WRITE_ONCE(*sema, 1);
1447 	wmb();
1448 
1449 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1450 		i915_request_put(rq);
1451 		err = -ETIME;
1452 		goto err_ref1;
1453 	}
1454 	i915_request_put(rq);
1455 
1456 	result[0] = create_result_vma(A->vm, SZ_64K);
1457 	if (IS_ERR(result[0])) {
1458 		err = PTR_ERR(result[0]);
1459 		goto err_ref1;
1460 	}
1461 
1462 	result[1] = create_result_vma(A->vm, SZ_64K);
1463 	if (IS_ERR(result[1])) {
1464 		err = PTR_ERR(result[1]);
1465 		goto err_result0;
1466 	}
1467 
1468 	rq = record_registers(A, result[0], result[1], sema);
1469 	if (IS_ERR(rq)) {
1470 		err = PTR_ERR(rq);
1471 		goto err_result1;
1472 	}
1473 
1474 	err = poison_registers(B, poison, sema);
1475 	if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) {
1476 		pr_err("%s(%s): wait for results timed out\n",
1477 		       __func__, engine->name);
1478 		err = -ETIME;
1479 	}
1480 
1481 	/* Always cancel the semaphore wait, just in case the GPU gets stuck */
1482 	WRITE_ONCE(*sema, -1);
1483 	i915_request_put(rq);
1484 	if (err)
1485 		goto err_result1;
1486 
1487 	err = compare_isolation(engine, ref, result, A, poison);
1488 
1489 err_result1:
1490 	i915_vma_put(result[1]);
1491 err_result0:
1492 	i915_vma_put(result[0]);
1493 err_ref1:
1494 	i915_vma_put(ref[1]);
1495 err_ref0:
1496 	i915_vma_put(ref[0]);
1497 err_B:
1498 	intel_context_put(B);
1499 err_A:
1500 	intel_context_put(A);
1501 	return err;
1502 }
1503 
skip_isolation(const struct intel_engine_cs * engine)1504 static bool skip_isolation(const struct intel_engine_cs *engine)
1505 {
1506 	if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1507 		return true;
1508 
1509 	if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1510 		return true;
1511 
1512 	return false;
1513 }
1514 
live_lrc_isolation(void * arg)1515 static int live_lrc_isolation(void *arg)
1516 {
1517 	struct intel_gt *gt = arg;
1518 	struct intel_engine_cs *engine;
1519 	enum intel_engine_id id;
1520 	const u32 poison[] = {
1521 		STACK_MAGIC,
1522 		0x3a3a3a3a,
1523 		0x5c5c5c5c,
1524 		0xffffffff,
1525 		0xffff0000,
1526 	};
1527 	int err = 0;
1528 
1529 	/*
1530 	 * Our goal is try and verify that per-context state cannot be
1531 	 * tampered with by another non-privileged client.
1532 	 *
1533 	 * We take the list of context registers from the LRI in the default
1534 	 * context image and attempt to modify that list from a remote context.
1535 	 */
1536 
1537 	for_each_engine(engine, gt, id) {
1538 		int i;
1539 
1540 		/* Just don't even ask */
1541 		if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1542 		    skip_isolation(engine))
1543 			continue;
1544 
1545 		intel_engine_pm_get(engine);
1546 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
1547 			int result;
1548 
1549 			result = __lrc_isolation(engine, poison[i]);
1550 			if (result && !err)
1551 				err = result;
1552 
1553 			result = __lrc_isolation(engine, ~poison[i]);
1554 			if (result && !err)
1555 				err = result;
1556 		}
1557 		intel_engine_pm_put(engine);
1558 		if (igt_flush_test(gt->i915)) {
1559 			err = -EIO;
1560 			break;
1561 		}
1562 	}
1563 
1564 	return err;
1565 }
1566 
wabb_ctx_submit_req(struct intel_context * ce)1567 static int wabb_ctx_submit_req(struct intel_context *ce)
1568 {
1569 	struct i915_request *rq;
1570 	int err = 0;
1571 
1572 	rq = intel_context_create_request(ce);
1573 	if (IS_ERR(rq))
1574 		return PTR_ERR(rq);
1575 
1576 	i915_request_get(rq);
1577 	i915_request_add(rq);
1578 
1579 	if (i915_request_wait(rq, 0, HZ / 5) < 0)
1580 		err = -ETIME;
1581 
1582 	i915_request_put(rq);
1583 
1584 	return err;
1585 }
1586 
1587 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1588 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
1589 
1590 static u32 *
emit_wabb_ctx_canary(const struct intel_context * ce,u32 * cs,bool per_ctx)1591 emit_wabb_ctx_canary(const struct intel_context *ce,
1592 		     u32 *cs, bool per_ctx)
1593 {
1594 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1595 		MI_SRM_LRM_GLOBAL_GTT |
1596 		MI_LRI_LRM_CS_MMIO;
1597 	*cs++ = i915_mmio_reg_offset(RING_START(0));
1598 	*cs++ = i915_ggtt_offset(ce->state) +
1599 		context_wa_bb_offset(ce) +
1600 		CTX_BB_CANARY_OFFSET +
1601 		(per_ctx ? PAGE_SIZE : 0);
1602 	*cs++ = 0;
1603 
1604 	return cs;
1605 }
1606 
1607 static u32 *
emit_indirect_ctx_bb_canary(const struct intel_context * ce,u32 * cs)1608 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1609 {
1610 	return emit_wabb_ctx_canary(ce, cs, false);
1611 }
1612 
1613 static u32 *
emit_per_ctx_bb_canary(const struct intel_context * ce,u32 * cs)1614 emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1615 {
1616 	return emit_wabb_ctx_canary(ce, cs, true);
1617 }
1618 
1619 static void
wabb_ctx_setup(struct intel_context * ce,bool per_ctx)1620 wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
1621 {
1622 	u32 *cs = context_wabb(ce, per_ctx);
1623 
1624 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1625 
1626 	if (per_ctx)
1627 		setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
1628 	else
1629 		setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1630 }
1631 
check_ring_start(struct intel_context * ce,bool per_ctx)1632 static bool check_ring_start(struct intel_context *ce, bool per_ctx)
1633 {
1634 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1635 		LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
1636 		(per_ctx ? PAGE_SIZE : 0);
1637 
1638 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1639 		return true;
1640 
1641 	pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1642 	       ctx_bb[CTX_BB_CANARY_INDEX],
1643 	       ce->lrc_reg_state[CTX_RING_START]);
1644 
1645 	return false;
1646 }
1647 
wabb_ctx_check(struct intel_context * ce,bool per_ctx)1648 static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
1649 {
1650 	int err;
1651 
1652 	err = wabb_ctx_submit_req(ce);
1653 	if (err)
1654 		return err;
1655 
1656 	if (!check_ring_start(ce, per_ctx))
1657 		return -EINVAL;
1658 
1659 	return 0;
1660 }
1661 
__lrc_wabb_ctx(struct intel_engine_cs * engine,bool per_ctx)1662 static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
1663 {
1664 	struct intel_context *a, *b;
1665 	int err;
1666 
1667 	a = intel_context_create(engine);
1668 	if (IS_ERR(a))
1669 		return PTR_ERR(a);
1670 	err = intel_context_pin(a);
1671 	if (err)
1672 		goto put_a;
1673 
1674 	b = intel_context_create(engine);
1675 	if (IS_ERR(b)) {
1676 		err = PTR_ERR(b);
1677 		goto unpin_a;
1678 	}
1679 	err = intel_context_pin(b);
1680 	if (err)
1681 		goto put_b;
1682 
1683 	/* We use the already reserved extra page in context state */
1684 	if (!a->wa_bb_page) {
1685 		GEM_BUG_ON(b->wa_bb_page);
1686 		GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1687 		goto unpin_b;
1688 	}
1689 
1690 	/*
1691 	 * In order to test that our per context bb is truly per context,
1692 	 * and executes at the intended spot on context restoring process,
1693 	 * make the batch store the ring start value to memory.
1694 	 * As ring start is restored apriori of starting the indirect ctx bb and
1695 	 * as it will be different for each context, it fits to this purpose.
1696 	 */
1697 	wabb_ctx_setup(a, per_ctx);
1698 	wabb_ctx_setup(b, per_ctx);
1699 
1700 	err = wabb_ctx_check(a, per_ctx);
1701 	if (err)
1702 		goto unpin_b;
1703 
1704 	err = wabb_ctx_check(b, per_ctx);
1705 
1706 unpin_b:
1707 	intel_context_unpin(b);
1708 put_b:
1709 	intel_context_put(b);
1710 unpin_a:
1711 	intel_context_unpin(a);
1712 put_a:
1713 	intel_context_put(a);
1714 
1715 	return err;
1716 }
1717 
lrc_wabb_ctx(void * arg,bool per_ctx)1718 static int lrc_wabb_ctx(void *arg, bool per_ctx)
1719 {
1720 	struct intel_gt *gt = arg;
1721 	struct intel_engine_cs *engine;
1722 	enum intel_engine_id id;
1723 	int err = 0;
1724 
1725 	for_each_engine(engine, gt, id) {
1726 		intel_engine_pm_get(engine);
1727 		err = __lrc_wabb_ctx(engine, per_ctx);
1728 		intel_engine_pm_put(engine);
1729 
1730 		if (igt_flush_test(gt->i915))
1731 			err = -EIO;
1732 
1733 		if (err)
1734 			break;
1735 	}
1736 
1737 	return err;
1738 }
1739 
live_lrc_indirect_ctx_bb(void * arg)1740 static int live_lrc_indirect_ctx_bb(void *arg)
1741 {
1742 	return lrc_wabb_ctx(arg, false);
1743 }
1744 
live_lrc_per_ctx_bb(void * arg)1745 static int live_lrc_per_ctx_bb(void *arg)
1746 {
1747 	return lrc_wabb_ctx(arg, true);
1748 }
1749 
garbage_reset(struct intel_engine_cs * engine,struct i915_request * rq)1750 static void garbage_reset(struct intel_engine_cs *engine,
1751 			  struct i915_request *rq)
1752 {
1753 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
1754 	unsigned long *lock = &engine->gt->reset.flags;
1755 
1756 	local_bh_disable();
1757 	if (!test_and_set_bit(bit, lock)) {
1758 		tasklet_disable(&engine->sched_engine->tasklet);
1759 
1760 		if (!rq->fence.error)
1761 			__intel_engine_reset_bh(engine, NULL);
1762 
1763 		tasklet_enable(&engine->sched_engine->tasklet);
1764 		clear_and_wake_up_bit(bit, lock);
1765 	}
1766 	local_bh_enable();
1767 }
1768 
garbage(struct intel_context * ce,struct rnd_state * prng)1769 static struct i915_request *garbage(struct intel_context *ce,
1770 				    struct rnd_state *prng)
1771 {
1772 	struct i915_request *rq;
1773 	int err;
1774 
1775 	err = intel_context_pin(ce);
1776 	if (err)
1777 		return ERR_PTR(err);
1778 
1779 	prandom_bytes_state(prng,
1780 			    ce->lrc_reg_state,
1781 			    ce->engine->context_size -
1782 			    LRC_STATE_OFFSET);
1783 
1784 	rq = intel_context_create_request(ce);
1785 	if (IS_ERR(rq)) {
1786 		err = PTR_ERR(rq);
1787 		goto err_unpin;
1788 	}
1789 
1790 	i915_request_get(rq);
1791 	i915_request_add(rq);
1792 	return rq;
1793 
1794 err_unpin:
1795 	intel_context_unpin(ce);
1796 	return ERR_PTR(err);
1797 }
1798 
__lrc_garbage(struct intel_engine_cs * engine,struct rnd_state * prng)1799 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1800 {
1801 	struct intel_context *ce;
1802 	struct i915_request *hang;
1803 	int err = 0;
1804 
1805 	ce = intel_context_create(engine);
1806 	if (IS_ERR(ce))
1807 		return PTR_ERR(ce);
1808 
1809 	hang = garbage(ce, prng);
1810 	if (IS_ERR(hang)) {
1811 		err = PTR_ERR(hang);
1812 		goto err_ce;
1813 	}
1814 
1815 	if (wait_for_submit(engine, hang, HZ / 2)) {
1816 		i915_request_put(hang);
1817 		err = -ETIME;
1818 		goto err_ce;
1819 	}
1820 
1821 	intel_context_set_banned(ce);
1822 	garbage_reset(engine, hang);
1823 
1824 	intel_engine_flush_submission(engine);
1825 	if (!hang->fence.error) {
1826 		i915_request_put(hang);
1827 		pr_err("%s: corrupted context was not reset\n",
1828 		       engine->name);
1829 		err = -EINVAL;
1830 		goto err_ce;
1831 	}
1832 
1833 	if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1834 		pr_err("%s: corrupted context did not recover\n",
1835 		       engine->name);
1836 		i915_request_put(hang);
1837 		err = -EIO;
1838 		goto err_ce;
1839 	}
1840 	i915_request_put(hang);
1841 
1842 err_ce:
1843 	intel_context_put(ce);
1844 	return err;
1845 }
1846 
live_lrc_garbage(void * arg)1847 static int live_lrc_garbage(void *arg)
1848 {
1849 	struct intel_gt *gt = arg;
1850 	struct intel_engine_cs *engine;
1851 	enum intel_engine_id id;
1852 
1853 	/*
1854 	 * Verify that we can recover if one context state is completely
1855 	 * corrupted.
1856 	 */
1857 
1858 	if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1859 		return 0;
1860 
1861 	for_each_engine(engine, gt, id) {
1862 		I915_RND_STATE(prng);
1863 		int err = 0, i;
1864 
1865 		if (!intel_has_reset_engine(engine->gt))
1866 			continue;
1867 
1868 		intel_engine_pm_get(engine);
1869 		for (i = 0; i < 3; i++) {
1870 			err = __lrc_garbage(engine, &prng);
1871 			if (err)
1872 				break;
1873 		}
1874 		intel_engine_pm_put(engine);
1875 
1876 		if (igt_flush_test(gt->i915))
1877 			err = -EIO;
1878 		if (err)
1879 			return err;
1880 	}
1881 
1882 	return 0;
1883 }
1884 
__live_pphwsp_runtime(struct intel_engine_cs * engine)1885 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1886 {
1887 	struct intel_context *ce;
1888 	struct i915_request *rq;
1889 	IGT_TIMEOUT(end_time);
1890 	int err;
1891 
1892 	ce = intel_context_create(engine);
1893 	if (IS_ERR(ce))
1894 		return PTR_ERR(ce);
1895 
1896 	ce->stats.runtime.num_underflow = 0;
1897 	ce->stats.runtime.max_underflow = 0;
1898 
1899 	do {
1900 		unsigned int loop = 1024;
1901 
1902 		while (loop) {
1903 			rq = intel_context_create_request(ce);
1904 			if (IS_ERR(rq)) {
1905 				err = PTR_ERR(rq);
1906 				goto err_rq;
1907 			}
1908 
1909 			if (--loop == 0)
1910 				i915_request_get(rq);
1911 
1912 			i915_request_add(rq);
1913 		}
1914 
1915 		if (__igt_timeout(end_time, NULL))
1916 			break;
1917 
1918 		i915_request_put(rq);
1919 	} while (1);
1920 
1921 	err = i915_request_wait(rq, 0, HZ / 5);
1922 	if (err < 0) {
1923 		pr_err("%s: request not completed!\n", engine->name);
1924 		goto err_wait;
1925 	}
1926 
1927 	igt_flush_test(engine->i915);
1928 
1929 	pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1930 		engine->name,
1931 		intel_context_get_total_runtime_ns(ce),
1932 		intel_context_get_avg_runtime_ns(ce));
1933 
1934 	err = 0;
1935 	if (ce->stats.runtime.num_underflow) {
1936 		pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1937 		       engine->name,
1938 		       ce->stats.runtime.num_underflow,
1939 		       ce->stats.runtime.max_underflow);
1940 		GEM_TRACE_DUMP();
1941 		err = -EOVERFLOW;
1942 	}
1943 
1944 err_wait:
1945 	i915_request_put(rq);
1946 err_rq:
1947 	intel_context_put(ce);
1948 	return err;
1949 }
1950 
live_pphwsp_runtime(void * arg)1951 static int live_pphwsp_runtime(void *arg)
1952 {
1953 	struct intel_gt *gt = arg;
1954 	struct intel_engine_cs *engine;
1955 	enum intel_engine_id id;
1956 	int err = 0;
1957 
1958 	/*
1959 	 * Check that cumulative context runtime as stored in the pphwsp[16]
1960 	 * is monotonic.
1961 	 */
1962 
1963 	for_each_engine(engine, gt, id) {
1964 		err = __live_pphwsp_runtime(engine);
1965 		if (err)
1966 			break;
1967 	}
1968 
1969 	if (igt_flush_test(gt->i915))
1970 		err = -EIO;
1971 
1972 	return err;
1973 }
1974 
intel_lrc_live_selftests(struct drm_i915_private * i915)1975 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1976 {
1977 	static const struct i915_subtest tests[] = {
1978 		SUBTEST(live_lrc_layout),
1979 		SUBTEST(live_lrc_fixed),
1980 		SUBTEST(live_lrc_state),
1981 		SUBTEST(live_lrc_gpr),
1982 		SUBTEST(live_lrc_isolation),
1983 		SUBTEST(live_lrc_timestamp),
1984 		SUBTEST(live_lrc_garbage),
1985 		SUBTEST(live_pphwsp_runtime),
1986 		SUBTEST(live_lrc_indirect_ctx_bb),
1987 		SUBTEST(live_lrc_per_ctx_bb),
1988 	};
1989 
1990 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1991 		return 0;
1992 
1993 	return intel_gt_live_subtests(tests, to_gt(i915));
1994 }
1995