1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2018 Intel Corporation
4 */
5
6 #include <linux/prime_numbers.h>
7
8 #include "gem/i915_gem_internal.h"
9
10 #include "i915_drv.h"
11 #include "i915_selftest.h"
12 #include "intel_engine_heartbeat.h"
13 #include "intel_engine_pm.h"
14 #include "intel_reset.h"
15 #include "intel_ring.h"
16 #include "selftest_engine_heartbeat.h"
17 #include "selftests/i915_random.h"
18 #include "selftests/igt_flush_test.h"
19 #include "selftests/igt_live_test.h"
20 #include "selftests/igt_spinner.h"
21 #include "selftests/lib_sw_fence.h"
22 #include "shmem_utils.h"
23
24 #include "gem/selftests/igt_gem_utils.h"
25 #include "gem/selftests/mock_context.h"
26
27 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
28 #define NUM_GPR 16
29 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
30
31 #define LRI_HEADER MI_INSTR(0x22, 0)
32 #define LRI_LENGTH_MASK GENMASK(7, 0)
33
create_scratch(struct intel_gt * gt)34 static struct i915_vma *create_scratch(struct intel_gt *gt)
35 {
36 return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE);
37 }
38
is_active(struct i915_request * rq)39 static bool is_active(struct i915_request *rq)
40 {
41 if (i915_request_is_active(rq))
42 return true;
43
44 if (i915_request_on_hold(rq))
45 return true;
46
47 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
48 return true;
49
50 return false;
51 }
52
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)53 static int wait_for_submit(struct intel_engine_cs *engine,
54 struct i915_request *rq,
55 unsigned long timeout)
56 {
57 /* Ignore our own attempts to suppress excess tasklets */
58 tasklet_hi_schedule(&engine->sched_engine->tasklet);
59
60 timeout += jiffies;
61 do {
62 bool done = time_after(jiffies, timeout);
63
64 if (i915_request_completed(rq)) /* that was quick! */
65 return 0;
66
67 /* Wait until the HW has acknowledged the submission (or err) */
68 intel_engine_flush_submission(engine);
69 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
70 return 0;
71
72 if (done)
73 return -ETIME;
74
75 cond_resched();
76 } while (1);
77 }
78
emit_semaphore_signal(struct intel_context * ce,void * slot)79 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
80 {
81 const u32 offset =
82 i915_ggtt_offset(ce->engine->status_page.vma) +
83 offset_in_page(slot);
84 struct i915_request *rq;
85 u32 *cs;
86
87 rq = intel_context_create_request(ce);
88 if (IS_ERR(rq))
89 return PTR_ERR(rq);
90
91 cs = intel_ring_begin(rq, 4);
92 if (IS_ERR(cs)) {
93 i915_request_add(rq);
94 return PTR_ERR(cs);
95 }
96
97 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
98 *cs++ = offset;
99 *cs++ = 0;
100 *cs++ = 1;
101
102 intel_ring_advance(rq, cs);
103
104 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
105 i915_request_add(rq);
106 return 0;
107 }
108
context_flush(struct intel_context * ce,long timeout)109 static int context_flush(struct intel_context *ce, long timeout)
110 {
111 struct i915_request *rq;
112 struct dma_fence *fence;
113 int err = 0;
114
115 rq = intel_engine_create_kernel_request(ce->engine);
116 if (IS_ERR(rq))
117 return PTR_ERR(rq);
118
119 fence = i915_active_fence_get(&ce->timeline->last_request);
120 if (fence) {
121 i915_request_await_dma_fence(rq, fence);
122 dma_fence_put(fence);
123 }
124
125 rq = i915_request_get(rq);
126 i915_request_add(rq);
127 if (i915_request_wait(rq, 0, timeout) < 0)
128 err = -ETIME;
129 i915_request_put(rq);
130
131 rmb(); /* We know the request is written, make sure all state is too! */
132 return err;
133 }
134
get_lri_mask(struct intel_engine_cs * engine,u32 lri)135 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
136 {
137 if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
138 return ~0u;
139
140 if (GRAPHICS_VER(engine->i915) < 12)
141 return 0xfff;
142
143 switch (engine->class) {
144 default:
145 case RENDER_CLASS:
146 case COMPUTE_CLASS:
147 return 0x07ff;
148 case COPY_ENGINE_CLASS:
149 return 0x0fff;
150 case VIDEO_DECODE_CLASS:
151 case VIDEO_ENHANCEMENT_CLASS:
152 return 0x3fff;
153 }
154 }
155
live_lrc_layout(void * arg)156 static int live_lrc_layout(void *arg)
157 {
158 struct intel_gt *gt = arg;
159 struct intel_engine_cs *engine;
160 enum intel_engine_id id;
161 u32 *lrc;
162 int err;
163
164 /*
165 * Check the registers offsets we use to create the initial reg state
166 * match the layout saved by HW.
167 */
168
169 lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
170 if (!lrc)
171 return -ENOMEM;
172 GEM_BUG_ON(offset_in_page(lrc));
173
174 err = 0;
175 for_each_engine(engine, gt, id) {
176 u32 *hw;
177 int dw;
178
179 if (!engine->default_state)
180 continue;
181
182 hw = shmem_pin_map(engine->default_state);
183 if (!hw) {
184 err = -ENOMEM;
185 break;
186 }
187 hw += LRC_STATE_OFFSET / sizeof(*hw);
188
189 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
190 engine->kernel_context, engine, true);
191
192 dw = 0;
193 do {
194 u32 lri = READ_ONCE(hw[dw]);
195 u32 lri_mask;
196
197 if (lri == 0) {
198 dw++;
199 continue;
200 }
201
202 if (lrc[dw] == 0) {
203 pr_debug("%s: skipped instruction %x at dword %d\n",
204 engine->name, lri, dw);
205 dw++;
206 continue;
207 }
208
209 if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
210 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
211 engine->name, dw, lri);
212 err = -EINVAL;
213 break;
214 }
215
216 if (lrc[dw] != lri) {
217 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
218 engine->name, dw, lri, lrc[dw]);
219 err = -EINVAL;
220 break;
221 }
222
223 /*
224 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
225 * opcode is set on Gen12+ devices, HW does not
226 * care about certain register address offsets, and
227 * instead check the following for valid address
228 * ranges on specific engines:
229 * RCS && CCS: BITS(0 - 10)
230 * BCS: BITS(0 - 11)
231 * VECS && VCS: BITS(0 - 13)
232 */
233 lri_mask = get_lri_mask(engine, lri);
234
235 lri &= 0x7f;
236 lri++;
237 dw++;
238
239 while (lri) {
240 u32 offset = READ_ONCE(hw[dw]);
241
242 if ((offset ^ lrc[dw]) & lri_mask) {
243 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
244 engine->name, dw, offset, lrc[dw]);
245 err = -EINVAL;
246 break;
247 }
248
249 /*
250 * Skip over the actual register value as we
251 * expect that to differ.
252 */
253 dw += 2;
254 lri -= 2;
255 }
256 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
257
258 if (err) {
259 pr_info("%s: HW register image:\n", engine->name);
260 igt_hexdump(hw, PAGE_SIZE);
261
262 pr_info("%s: SW register image:\n", engine->name);
263 igt_hexdump(lrc, PAGE_SIZE);
264 }
265
266 shmem_unpin_map(engine->default_state, hw);
267 if (err)
268 break;
269 }
270
271 free_page((unsigned long)lrc);
272 return err;
273 }
274
find_offset(const u32 * lri,u32 offset)275 static int find_offset(const u32 *lri, u32 offset)
276 {
277 int i;
278
279 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
280 if (lri[i] == offset)
281 return i;
282
283 return -1;
284 }
285
live_lrc_fixed(void * arg)286 static int live_lrc_fixed(void *arg)
287 {
288 struct intel_gt *gt = arg;
289 struct intel_engine_cs *engine;
290 enum intel_engine_id id;
291 int err = 0;
292
293 /*
294 * Check the assumed register offsets match the actual locations in
295 * the context image.
296 */
297
298 for_each_engine(engine, gt, id) {
299 const struct {
300 u32 reg;
301 u32 offset;
302 const char *name;
303 } tbl[] = {
304 {
305 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
306 CTX_RING_START - 1,
307 "RING_START"
308 },
309 {
310 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
311 CTX_RING_CTL - 1,
312 "RING_CTL"
313 },
314 {
315 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
316 CTX_RING_HEAD - 1,
317 "RING_HEAD"
318 },
319 {
320 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
321 CTX_RING_TAIL - 1,
322 "RING_TAIL"
323 },
324 {
325 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
326 lrc_ring_mi_mode(engine),
327 "RING_MI_MODE"
328 },
329 {
330 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
331 CTX_BB_STATE - 1,
332 "BB_STATE"
333 },
334 {
335 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
336 lrc_ring_wa_bb_per_ctx(engine),
337 "RING_BB_PER_CTX_PTR"
338 },
339 {
340 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
341 lrc_ring_indirect_ptr(engine),
342 "RING_INDIRECT_CTX_PTR"
343 },
344 {
345 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
346 lrc_ring_indirect_offset(engine),
347 "RING_INDIRECT_CTX_OFFSET"
348 },
349 {
350 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
351 CTX_TIMESTAMP - 1,
352 "RING_CTX_TIMESTAMP"
353 },
354 {
355 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
356 lrc_ring_gpr0(engine),
357 "RING_CS_GPR0"
358 },
359 {
360 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
361 lrc_ring_cmd_buf_cctl(engine),
362 "RING_CMD_BUF_CCTL"
363 },
364 {
365 i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
366 lrc_ring_bb_offset(engine),
367 "RING_BB_OFFSET"
368 },
369 { },
370 }, *t;
371 u32 *hw;
372
373 if (!engine->default_state)
374 continue;
375
376 hw = shmem_pin_map(engine->default_state);
377 if (!hw) {
378 err = -ENOMEM;
379 break;
380 }
381 hw += LRC_STATE_OFFSET / sizeof(*hw);
382
383 for (t = tbl; t->name; t++) {
384 int dw = find_offset(hw, t->reg);
385
386 if (dw != t->offset) {
387 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
388 engine->name,
389 t->name,
390 t->reg,
391 dw,
392 t->offset);
393 err = -EINVAL;
394 }
395 }
396
397 shmem_unpin_map(engine->default_state, hw);
398 }
399
400 return err;
401 }
402
__live_lrc_state(struct intel_engine_cs * engine,struct i915_vma * scratch)403 static int __live_lrc_state(struct intel_engine_cs *engine,
404 struct i915_vma *scratch)
405 {
406 struct intel_context *ce;
407 struct i915_request *rq;
408 struct i915_gem_ww_ctx ww;
409 enum {
410 RING_START_IDX = 0,
411 RING_TAIL_IDX,
412 MAX_IDX
413 };
414 u32 expected[MAX_IDX];
415 u32 *cs;
416 int err;
417 int n;
418
419 ce = intel_context_create(engine);
420 if (IS_ERR(ce))
421 return PTR_ERR(ce);
422
423 i915_gem_ww_ctx_init(&ww, false);
424 retry:
425 err = i915_gem_object_lock(scratch->obj, &ww);
426 if (!err)
427 err = intel_context_pin_ww(ce, &ww);
428 if (err)
429 goto err_put;
430
431 rq = i915_request_create(ce);
432 if (IS_ERR(rq)) {
433 err = PTR_ERR(rq);
434 goto err_unpin;
435 }
436
437 cs = intel_ring_begin(rq, 4 * MAX_IDX);
438 if (IS_ERR(cs)) {
439 err = PTR_ERR(cs);
440 i915_request_add(rq);
441 goto err_unpin;
442 }
443
444 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
445 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
446 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
447 *cs++ = 0;
448
449 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
450
451 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
452 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
453 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
454 *cs++ = 0;
455
456 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
457
458 i915_request_get(rq);
459 i915_request_add(rq);
460 if (err)
461 goto err_rq;
462
463 intel_engine_flush_submission(engine);
464 expected[RING_TAIL_IDX] = ce->ring->tail;
465
466 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
467 err = -ETIME;
468 goto err_rq;
469 }
470
471 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
472 if (IS_ERR(cs)) {
473 err = PTR_ERR(cs);
474 goto err_rq;
475 }
476
477 for (n = 0; n < MAX_IDX; n++) {
478 if (cs[n] != expected[n]) {
479 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
480 engine->name, n, cs[n], expected[n]);
481 err = -EINVAL;
482 break;
483 }
484 }
485
486 i915_gem_object_unpin_map(scratch->obj);
487
488 err_rq:
489 i915_request_put(rq);
490 err_unpin:
491 intel_context_unpin(ce);
492 err_put:
493 if (err == -EDEADLK) {
494 err = i915_gem_ww_ctx_backoff(&ww);
495 if (!err)
496 goto retry;
497 }
498 i915_gem_ww_ctx_fini(&ww);
499 intel_context_put(ce);
500 return err;
501 }
502
live_lrc_state(void * arg)503 static int live_lrc_state(void *arg)
504 {
505 struct intel_gt *gt = arg;
506 struct intel_engine_cs *engine;
507 struct i915_vma *scratch;
508 enum intel_engine_id id;
509 int err = 0;
510
511 /*
512 * Check the live register state matches what we expect for this
513 * intel_context.
514 */
515
516 scratch = create_scratch(gt);
517 if (IS_ERR(scratch))
518 return PTR_ERR(scratch);
519
520 for_each_engine(engine, gt, id) {
521 err = __live_lrc_state(engine, scratch);
522 if (err)
523 break;
524 }
525
526 if (igt_flush_test(gt->i915))
527 err = -EIO;
528
529 i915_vma_unpin_and_release(&scratch, 0);
530 return err;
531 }
532
gpr_make_dirty(struct intel_context * ce)533 static int gpr_make_dirty(struct intel_context *ce)
534 {
535 struct i915_request *rq;
536 u32 *cs;
537 int n;
538
539 rq = intel_context_create_request(ce);
540 if (IS_ERR(rq))
541 return PTR_ERR(rq);
542
543 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
544 if (IS_ERR(cs)) {
545 i915_request_add(rq);
546 return PTR_ERR(cs);
547 }
548
549 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
550 for (n = 0; n < NUM_GPR_DW; n++) {
551 *cs++ = CS_GPR(ce->engine, n);
552 *cs++ = STACK_MAGIC;
553 }
554 *cs++ = MI_NOOP;
555
556 intel_ring_advance(rq, cs);
557
558 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
559 i915_request_add(rq);
560
561 return 0;
562 }
563
564 static struct i915_request *
__gpr_read(struct intel_context * ce,struct i915_vma * scratch,u32 * slot)565 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
566 {
567 const u32 offset =
568 i915_ggtt_offset(ce->engine->status_page.vma) +
569 offset_in_page(slot);
570 struct i915_request *rq;
571 u32 *cs;
572 int err;
573 int n;
574
575 rq = intel_context_create_request(ce);
576 if (IS_ERR(rq))
577 return rq;
578
579 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
580 if (IS_ERR(cs)) {
581 i915_request_add(rq);
582 return ERR_CAST(cs);
583 }
584
585 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
586 *cs++ = MI_NOOP;
587
588 *cs++ = MI_SEMAPHORE_WAIT |
589 MI_SEMAPHORE_GLOBAL_GTT |
590 MI_SEMAPHORE_POLL |
591 MI_SEMAPHORE_SAD_NEQ_SDD;
592 *cs++ = 0;
593 *cs++ = offset;
594 *cs++ = 0;
595
596 for (n = 0; n < NUM_GPR_DW; n++) {
597 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
598 *cs++ = CS_GPR(ce->engine, n);
599 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
600 *cs++ = 0;
601 }
602
603 err = igt_vma_move_to_active_unlocked(scratch, rq, EXEC_OBJECT_WRITE);
604
605 i915_request_get(rq);
606 i915_request_add(rq);
607 if (err) {
608 i915_request_put(rq);
609 rq = ERR_PTR(err);
610 }
611
612 return rq;
613 }
614
__live_lrc_gpr(struct intel_engine_cs * engine,struct i915_vma * scratch,bool preempt)615 static int __live_lrc_gpr(struct intel_engine_cs *engine,
616 struct i915_vma *scratch,
617 bool preempt)
618 {
619 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
620 struct intel_context *ce;
621 struct i915_request *rq;
622 u32 *cs;
623 int err;
624 int n;
625
626 if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
627 return 0; /* GPR only on rcs0 for gen8 */
628
629 err = gpr_make_dirty(engine->kernel_context);
630 if (err)
631 return err;
632
633 ce = intel_context_create(engine);
634 if (IS_ERR(ce))
635 return PTR_ERR(ce);
636
637 rq = __gpr_read(ce, scratch, slot);
638 if (IS_ERR(rq)) {
639 err = PTR_ERR(rq);
640 goto err_put;
641 }
642
643 err = wait_for_submit(engine, rq, HZ / 2);
644 if (err)
645 goto err_rq;
646
647 if (preempt) {
648 err = gpr_make_dirty(engine->kernel_context);
649 if (err)
650 goto err_rq;
651
652 err = emit_semaphore_signal(engine->kernel_context, slot);
653 if (err)
654 goto err_rq;
655
656 err = wait_for_submit(engine, rq, HZ / 2);
657 if (err)
658 goto err_rq;
659 } else {
660 slot[0] = 1;
661 wmb();
662 }
663
664 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
665 err = -ETIME;
666 goto err_rq;
667 }
668
669 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
670 if (IS_ERR(cs)) {
671 err = PTR_ERR(cs);
672 goto err_rq;
673 }
674
675 for (n = 0; n < NUM_GPR_DW; n++) {
676 if (cs[n]) {
677 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
678 engine->name,
679 n / 2, n & 1 ? "udw" : "ldw",
680 cs[n]);
681 err = -EINVAL;
682 break;
683 }
684 }
685
686 i915_gem_object_unpin_map(scratch->obj);
687
688 err_rq:
689 memset32(&slot[0], -1, 4);
690 wmb();
691 i915_request_put(rq);
692 err_put:
693 intel_context_put(ce);
694 return err;
695 }
696
live_lrc_gpr(void * arg)697 static int live_lrc_gpr(void *arg)
698 {
699 struct intel_gt *gt = arg;
700 struct intel_engine_cs *engine;
701 struct i915_vma *scratch;
702 enum intel_engine_id id;
703 int err = 0;
704
705 /*
706 * Check that GPR registers are cleared in new contexts as we need
707 * to avoid leaking any information from previous contexts.
708 */
709
710 scratch = create_scratch(gt);
711 if (IS_ERR(scratch))
712 return PTR_ERR(scratch);
713
714 for_each_engine(engine, gt, id) {
715 st_engine_heartbeat_disable(engine);
716
717 err = __live_lrc_gpr(engine, scratch, false);
718 if (err)
719 goto err;
720
721 err = __live_lrc_gpr(engine, scratch, true);
722 if (err)
723 goto err;
724
725 err:
726 st_engine_heartbeat_enable(engine);
727 if (igt_flush_test(gt->i915))
728 err = -EIO;
729 if (err)
730 break;
731 }
732
733 i915_vma_unpin_and_release(&scratch, 0);
734 return err;
735 }
736
737 static struct i915_request *
create_timestamp(struct intel_context * ce,void * slot,int idx)738 create_timestamp(struct intel_context *ce, void *slot, int idx)
739 {
740 const u32 offset =
741 i915_ggtt_offset(ce->engine->status_page.vma) +
742 offset_in_page(slot);
743 struct i915_request *rq;
744 u32 *cs;
745 int err;
746
747 rq = intel_context_create_request(ce);
748 if (IS_ERR(rq))
749 return rq;
750
751 cs = intel_ring_begin(rq, 10);
752 if (IS_ERR(cs)) {
753 err = PTR_ERR(cs);
754 goto err;
755 }
756
757 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
758 *cs++ = MI_NOOP;
759
760 *cs++ = MI_SEMAPHORE_WAIT |
761 MI_SEMAPHORE_GLOBAL_GTT |
762 MI_SEMAPHORE_POLL |
763 MI_SEMAPHORE_SAD_NEQ_SDD;
764 *cs++ = 0;
765 *cs++ = offset;
766 *cs++ = 0;
767
768 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
769 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
770 *cs++ = offset + idx * sizeof(u32);
771 *cs++ = 0;
772
773 intel_ring_advance(rq, cs);
774
775 err = 0;
776 err:
777 i915_request_get(rq);
778 i915_request_add(rq);
779 if (err) {
780 i915_request_put(rq);
781 return ERR_PTR(err);
782 }
783
784 return rq;
785 }
786
787 struct lrc_timestamp {
788 struct intel_engine_cs *engine;
789 struct intel_context *ce[2];
790 u32 poison;
791 };
792
timestamp_advanced(u32 start,u32 end)793 static bool timestamp_advanced(u32 start, u32 end)
794 {
795 return (s32)(end - start) > 0;
796 }
797
__lrc_timestamp(const struct lrc_timestamp * arg,bool preempt)798 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
799 {
800 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
801 struct i915_request *rq;
802 u32 timestamp;
803 int err = 0;
804
805 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
806 rq = create_timestamp(arg->ce[0], slot, 1);
807 if (IS_ERR(rq))
808 return PTR_ERR(rq);
809
810 err = wait_for_submit(rq->engine, rq, HZ / 2);
811 if (err)
812 goto err;
813
814 if (preempt) {
815 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
816 err = emit_semaphore_signal(arg->ce[1], slot);
817 if (err)
818 goto err;
819 } else {
820 slot[0] = 1;
821 wmb();
822 }
823
824 /* And wait for switch to kernel (to save our context to memory) */
825 err = context_flush(arg->ce[0], HZ / 2);
826 if (err)
827 goto err;
828
829 if (!timestamp_advanced(arg->poison, slot[1])) {
830 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
831 arg->engine->name, preempt ? "preempt" : "simple",
832 arg->poison, slot[1]);
833 err = -EINVAL;
834 }
835
836 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
837 if (!timestamp_advanced(slot[1], timestamp)) {
838 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
839 arg->engine->name, preempt ? "preempt" : "simple",
840 slot[1], timestamp);
841 err = -EINVAL;
842 }
843
844 err:
845 memset32(slot, -1, 4);
846 i915_request_put(rq);
847 return err;
848 }
849
live_lrc_timestamp(void * arg)850 static int live_lrc_timestamp(void *arg)
851 {
852 struct lrc_timestamp data = {};
853 struct intel_gt *gt = arg;
854 enum intel_engine_id id;
855 const u32 poison[] = {
856 0,
857 S32_MAX,
858 (u32)S32_MAX + 1,
859 U32_MAX,
860 };
861
862 /*
863 * This test was designed to isolate a hardware bug.
864 * The bug was found and fixed in future generations but
865 * now the test pollutes our CI on previous generation.
866 */
867 if (GRAPHICS_VER(gt->i915) == 12)
868 return 0;
869
870 /*
871 * We want to verify that the timestamp is saved and restore across
872 * context switches and is monotonic.
873 *
874 * So we do this with a little bit of LRC poisoning to check various
875 * boundary conditions, and see what happens if we preempt the context
876 * with a second request (carrying more poison into the timestamp).
877 */
878
879 for_each_engine(data.engine, gt, id) {
880 int i, err = 0;
881
882 st_engine_heartbeat_disable(data.engine);
883
884 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
885 struct intel_context *tmp;
886
887 tmp = intel_context_create(data.engine);
888 if (IS_ERR(tmp)) {
889 err = PTR_ERR(tmp);
890 goto err;
891 }
892
893 err = intel_context_pin(tmp);
894 if (err) {
895 intel_context_put(tmp);
896 goto err;
897 }
898
899 data.ce[i] = tmp;
900 }
901
902 for (i = 0; i < ARRAY_SIZE(poison); i++) {
903 data.poison = poison[i];
904
905 err = __lrc_timestamp(&data, false);
906 if (err)
907 break;
908
909 err = __lrc_timestamp(&data, true);
910 if (err)
911 break;
912 }
913
914 err:
915 st_engine_heartbeat_enable(data.engine);
916 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
917 if (!data.ce[i])
918 break;
919
920 intel_context_unpin(data.ce[i]);
921 intel_context_put(data.ce[i]);
922 }
923
924 if (igt_flush_test(gt->i915))
925 err = -EIO;
926 if (err)
927 return err;
928 }
929
930 return 0;
931 }
932
933 static struct i915_vma *
create_user_vma(struct i915_address_space * vm,unsigned long size)934 create_user_vma(struct i915_address_space *vm, unsigned long size)
935 {
936 struct drm_i915_gem_object *obj;
937 struct i915_vma *vma;
938 int err;
939
940 obj = i915_gem_object_create_internal(vm->i915, size);
941 if (IS_ERR(obj))
942 return ERR_CAST(obj);
943
944 vma = i915_vma_instance(obj, vm, NULL);
945 if (IS_ERR(vma)) {
946 i915_gem_object_put(obj);
947 return vma;
948 }
949
950 err = i915_vma_pin(vma, 0, 0, PIN_USER);
951 if (err) {
952 i915_gem_object_put(obj);
953 return ERR_PTR(err);
954 }
955
956 return vma;
957 }
958
safe_poison(u32 offset,u32 poison)959 static u32 safe_poison(u32 offset, u32 poison)
960 {
961 /*
962 * Do not enable predication as it will nop all subsequent commands,
963 * not only disabling the tests (by preventing all the other SRM) but
964 * also preventing the arbitration events at the end of the request.
965 */
966 if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
967 poison &= ~REG_BIT(0);
968
969 return poison;
970 }
971
972 static struct i915_vma *
store_context(struct intel_context * ce,struct i915_vma * scratch)973 store_context(struct intel_context *ce, struct i915_vma *scratch)
974 {
975 struct i915_vma *batch;
976 u32 dw, x, *cs, *hw;
977 u32 *defaults;
978
979 batch = create_user_vma(ce->vm, SZ_64K);
980 if (IS_ERR(batch))
981 return batch;
982
983 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
984 if (IS_ERR(cs)) {
985 i915_vma_put(batch);
986 return ERR_CAST(cs);
987 }
988
989 defaults = shmem_pin_map(ce->engine->default_state);
990 if (!defaults) {
991 i915_gem_object_unpin_map(batch->obj);
992 i915_vma_put(batch);
993 return ERR_PTR(-ENOMEM);
994 }
995
996 x = 0;
997 dw = 0;
998 hw = defaults;
999 hw += LRC_STATE_OFFSET / sizeof(*hw);
1000 do {
1001 u32 len = hw[dw] & LRI_LENGTH_MASK;
1002
1003 /*
1004 * Keep it simple, skip parsing complex commands
1005 *
1006 * At present, there are no more MI_LOAD_REGISTER_IMM
1007 * commands after the first 3D state command. Rather
1008 * than include a table (see i915_cmd_parser.c) of all
1009 * the possible commands and their instruction lengths
1010 * (or mask for variable length instructions), assume
1011 * we have gathered the complete list of registers and
1012 * bail out.
1013 */
1014 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1015 break;
1016
1017 if (hw[dw] == 0) {
1018 dw++;
1019 continue;
1020 }
1021
1022 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1023 /* Assume all other MI commands match LRI length mask */
1024 dw += len + 2;
1025 continue;
1026 }
1027
1028 if (!len) {
1029 pr_err("%s: invalid LRI found in context image\n",
1030 ce->engine->name);
1031 igt_hexdump(defaults, PAGE_SIZE);
1032 break;
1033 }
1034
1035 dw++;
1036 len = (len + 1) / 2;
1037 while (len--) {
1038 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
1039 *cs++ = hw[dw];
1040 *cs++ = lower_32_bits(i915_vma_offset(scratch) + x);
1041 *cs++ = upper_32_bits(i915_vma_offset(scratch) + x);
1042
1043 dw += 2;
1044 x += 4;
1045 }
1046 } while (dw < PAGE_SIZE / sizeof(u32) &&
1047 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1048
1049 *cs++ = MI_BATCH_BUFFER_END;
1050
1051 shmem_unpin_map(ce->engine->default_state, defaults);
1052
1053 i915_gem_object_flush_map(batch->obj);
1054 i915_gem_object_unpin_map(batch->obj);
1055
1056 return batch;
1057 }
1058
1059 static struct i915_request *
record_registers(struct intel_context * ce,struct i915_vma * before,struct i915_vma * after,u32 * sema)1060 record_registers(struct intel_context *ce,
1061 struct i915_vma *before,
1062 struct i915_vma *after,
1063 u32 *sema)
1064 {
1065 struct i915_vma *b_before, *b_after;
1066 struct i915_request *rq;
1067 u32 *cs;
1068 int err;
1069
1070 b_before = store_context(ce, before);
1071 if (IS_ERR(b_before))
1072 return ERR_CAST(b_before);
1073
1074 b_after = store_context(ce, after);
1075 if (IS_ERR(b_after)) {
1076 rq = ERR_CAST(b_after);
1077 goto err_before;
1078 }
1079
1080 rq = intel_context_create_request(ce);
1081 if (IS_ERR(rq))
1082 goto err_after;
1083
1084 err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE);
1085 if (err)
1086 goto err_rq;
1087
1088 err = igt_vma_move_to_active_unlocked(b_before, rq, 0);
1089 if (err)
1090 goto err_rq;
1091
1092 err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE);
1093 if (err)
1094 goto err_rq;
1095
1096 err = igt_vma_move_to_active_unlocked(b_after, rq, 0);
1097 if (err)
1098 goto err_rq;
1099
1100 cs = intel_ring_begin(rq, 14);
1101 if (IS_ERR(cs)) {
1102 err = PTR_ERR(cs);
1103 goto err_rq;
1104 }
1105
1106 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1107 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1108 *cs++ = lower_32_bits(i915_vma_offset(b_before));
1109 *cs++ = upper_32_bits(i915_vma_offset(b_before));
1110
1111 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1112 *cs++ = MI_SEMAPHORE_WAIT |
1113 MI_SEMAPHORE_GLOBAL_GTT |
1114 MI_SEMAPHORE_POLL |
1115 MI_SEMAPHORE_SAD_NEQ_SDD;
1116 *cs++ = 0;
1117 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1118 offset_in_page(sema);
1119 *cs++ = 0;
1120 *cs++ = MI_NOOP;
1121
1122 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1123 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1124 *cs++ = lower_32_bits(i915_vma_offset(b_after));
1125 *cs++ = upper_32_bits(i915_vma_offset(b_after));
1126
1127 intel_ring_advance(rq, cs);
1128
1129 WRITE_ONCE(*sema, 0);
1130 i915_request_get(rq);
1131 i915_request_add(rq);
1132 err_after:
1133 i915_vma_put(b_after);
1134 err_before:
1135 i915_vma_put(b_before);
1136 return rq;
1137
1138 err_rq:
1139 i915_request_add(rq);
1140 rq = ERR_PTR(err);
1141 goto err_after;
1142 }
1143
load_context(struct intel_context * ce,u32 poison)1144 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1145 {
1146 struct i915_vma *batch;
1147 u32 dw, *cs, *hw;
1148 u32 *defaults;
1149
1150 batch = create_user_vma(ce->vm, SZ_64K);
1151 if (IS_ERR(batch))
1152 return batch;
1153
1154 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1155 if (IS_ERR(cs)) {
1156 i915_vma_put(batch);
1157 return ERR_CAST(cs);
1158 }
1159
1160 defaults = shmem_pin_map(ce->engine->default_state);
1161 if (!defaults) {
1162 i915_gem_object_unpin_map(batch->obj);
1163 i915_vma_put(batch);
1164 return ERR_PTR(-ENOMEM);
1165 }
1166
1167 dw = 0;
1168 hw = defaults;
1169 hw += LRC_STATE_OFFSET / sizeof(*hw);
1170 do {
1171 u32 len = hw[dw] & LRI_LENGTH_MASK;
1172
1173 /* For simplicity, break parsing at the first complex command */
1174 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1175 break;
1176
1177 if (hw[dw] == 0) {
1178 dw++;
1179 continue;
1180 }
1181
1182 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1183 dw += len + 2;
1184 continue;
1185 }
1186
1187 if (!len) {
1188 pr_err("%s: invalid LRI found in context image\n",
1189 ce->engine->name);
1190 igt_hexdump(defaults, PAGE_SIZE);
1191 break;
1192 }
1193
1194 dw++;
1195 len = (len + 1) / 2;
1196 *cs++ = MI_LOAD_REGISTER_IMM(len);
1197 while (len--) {
1198 *cs++ = hw[dw];
1199 *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1200 MI_LRI_LRM_CS_MMIO),
1201 poison);
1202 dw += 2;
1203 }
1204 } while (dw < PAGE_SIZE / sizeof(u32) &&
1205 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1206
1207 *cs++ = MI_BATCH_BUFFER_END;
1208
1209 shmem_unpin_map(ce->engine->default_state, defaults);
1210
1211 i915_gem_object_flush_map(batch->obj);
1212 i915_gem_object_unpin_map(batch->obj);
1213
1214 return batch;
1215 }
1216
poison_registers(struct intel_context * ce,u32 poison,u32 * sema)1217 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1218 {
1219 struct i915_request *rq;
1220 struct i915_vma *batch;
1221 u32 *cs;
1222 int err;
1223
1224 batch = load_context(ce, poison);
1225 if (IS_ERR(batch))
1226 return PTR_ERR(batch);
1227
1228 rq = intel_context_create_request(ce);
1229 if (IS_ERR(rq)) {
1230 err = PTR_ERR(rq);
1231 goto err_batch;
1232 }
1233
1234 err = igt_vma_move_to_active_unlocked(batch, rq, 0);
1235 if (err)
1236 goto err_rq;
1237
1238 cs = intel_ring_begin(rq, 8);
1239 if (IS_ERR(cs)) {
1240 err = PTR_ERR(cs);
1241 goto err_rq;
1242 }
1243
1244 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1245 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1246 *cs++ = lower_32_bits(i915_vma_offset(batch));
1247 *cs++ = upper_32_bits(i915_vma_offset(batch));
1248
1249 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1250 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1251 offset_in_page(sema);
1252 *cs++ = 0;
1253 *cs++ = 1;
1254
1255 intel_ring_advance(rq, cs);
1256
1257 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1258 err_rq:
1259 i915_request_add(rq);
1260 err_batch:
1261 i915_vma_put(batch);
1262 return err;
1263 }
1264
is_moving(u32 a,u32 b)1265 static bool is_moving(u32 a, u32 b)
1266 {
1267 return a != b;
1268 }
1269
compare_isolation(struct intel_engine_cs * engine,struct i915_vma * ref[2],struct i915_vma * result[2],struct intel_context * ce,u32 poison)1270 static int compare_isolation(struct intel_engine_cs *engine,
1271 struct i915_vma *ref[2],
1272 struct i915_vma *result[2],
1273 struct intel_context *ce,
1274 u32 poison)
1275 {
1276 u32 x, dw, *hw, *lrc;
1277 u32 *A[2], *B[2];
1278 u32 *defaults;
1279 int err = 0;
1280
1281 A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1282 if (IS_ERR(A[0]))
1283 return PTR_ERR(A[0]);
1284
1285 A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1286 if (IS_ERR(A[1])) {
1287 err = PTR_ERR(A[1]);
1288 goto err_A0;
1289 }
1290
1291 B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1292 if (IS_ERR(B[0])) {
1293 err = PTR_ERR(B[0]);
1294 goto err_A1;
1295 }
1296
1297 B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1298 if (IS_ERR(B[1])) {
1299 err = PTR_ERR(B[1]);
1300 goto err_B0;
1301 }
1302
1303 lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1304 intel_gt_coherent_map_type(engine->gt,
1305 ce->state->obj,
1306 false));
1307 if (IS_ERR(lrc)) {
1308 err = PTR_ERR(lrc);
1309 goto err_B1;
1310 }
1311 lrc += LRC_STATE_OFFSET / sizeof(*hw);
1312
1313 defaults = shmem_pin_map(ce->engine->default_state);
1314 if (!defaults) {
1315 err = -ENOMEM;
1316 goto err_lrc;
1317 }
1318
1319 x = 0;
1320 dw = 0;
1321 hw = defaults;
1322 hw += LRC_STATE_OFFSET / sizeof(*hw);
1323 do {
1324 u32 len = hw[dw] & LRI_LENGTH_MASK;
1325
1326 /* For simplicity, break parsing at the first complex command */
1327 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1328 break;
1329
1330 if (hw[dw] == 0) {
1331 dw++;
1332 continue;
1333 }
1334
1335 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1336 dw += len + 2;
1337 continue;
1338 }
1339
1340 if (!len) {
1341 pr_err("%s: invalid LRI found in context image\n",
1342 engine->name);
1343 igt_hexdump(defaults, PAGE_SIZE);
1344 break;
1345 }
1346
1347 dw++;
1348 len = (len + 1) / 2;
1349 while (len--) {
1350 if (!is_moving(A[0][x], A[1][x]) &&
1351 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1352 switch (hw[dw] & 4095) {
1353 case 0x30: /* RING_HEAD */
1354 case 0x34: /* RING_TAIL */
1355 break;
1356
1357 default:
1358 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1359 engine->name, dw,
1360 hw[dw], hw[dw + 1],
1361 A[0][x], B[0][x], B[1][x],
1362 poison, lrc[dw + 1]);
1363 err = -EINVAL;
1364 }
1365 }
1366 dw += 2;
1367 x++;
1368 }
1369 } while (dw < PAGE_SIZE / sizeof(u32) &&
1370 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1371
1372 shmem_unpin_map(ce->engine->default_state, defaults);
1373 err_lrc:
1374 i915_gem_object_unpin_map(ce->state->obj);
1375 err_B1:
1376 i915_gem_object_unpin_map(result[1]->obj);
1377 err_B0:
1378 i915_gem_object_unpin_map(result[0]->obj);
1379 err_A1:
1380 i915_gem_object_unpin_map(ref[1]->obj);
1381 err_A0:
1382 i915_gem_object_unpin_map(ref[0]->obj);
1383 return err;
1384 }
1385
1386 static struct i915_vma *
create_result_vma(struct i915_address_space * vm,unsigned long sz)1387 create_result_vma(struct i915_address_space *vm, unsigned long sz)
1388 {
1389 struct i915_vma *vma;
1390 void *ptr;
1391
1392 vma = create_user_vma(vm, sz);
1393 if (IS_ERR(vma))
1394 return vma;
1395
1396 /* Set the results to a known value distinct from the poison */
1397 ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
1398 if (IS_ERR(ptr)) {
1399 i915_vma_put(vma);
1400 return ERR_CAST(ptr);
1401 }
1402
1403 memset(ptr, POISON_INUSE, vma->size);
1404 i915_gem_object_flush_map(vma->obj);
1405 i915_gem_object_unpin_map(vma->obj);
1406
1407 return vma;
1408 }
1409
__lrc_isolation(struct intel_engine_cs * engine,u32 poison)1410 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1411 {
1412 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1413 struct i915_vma *ref[2], *result[2];
1414 struct intel_context *A, *B;
1415 struct i915_request *rq;
1416 int err;
1417
1418 A = intel_context_create(engine);
1419 if (IS_ERR(A))
1420 return PTR_ERR(A);
1421
1422 B = intel_context_create(engine);
1423 if (IS_ERR(B)) {
1424 err = PTR_ERR(B);
1425 goto err_A;
1426 }
1427
1428 ref[0] = create_result_vma(A->vm, SZ_64K);
1429 if (IS_ERR(ref[0])) {
1430 err = PTR_ERR(ref[0]);
1431 goto err_B;
1432 }
1433
1434 ref[1] = create_result_vma(A->vm, SZ_64K);
1435 if (IS_ERR(ref[1])) {
1436 err = PTR_ERR(ref[1]);
1437 goto err_ref0;
1438 }
1439
1440 rq = record_registers(A, ref[0], ref[1], sema);
1441 if (IS_ERR(rq)) {
1442 err = PTR_ERR(rq);
1443 goto err_ref1;
1444 }
1445
1446 WRITE_ONCE(*sema, 1);
1447 wmb();
1448
1449 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1450 i915_request_put(rq);
1451 err = -ETIME;
1452 goto err_ref1;
1453 }
1454 i915_request_put(rq);
1455
1456 result[0] = create_result_vma(A->vm, SZ_64K);
1457 if (IS_ERR(result[0])) {
1458 err = PTR_ERR(result[0]);
1459 goto err_ref1;
1460 }
1461
1462 result[1] = create_result_vma(A->vm, SZ_64K);
1463 if (IS_ERR(result[1])) {
1464 err = PTR_ERR(result[1]);
1465 goto err_result0;
1466 }
1467
1468 rq = record_registers(A, result[0], result[1], sema);
1469 if (IS_ERR(rq)) {
1470 err = PTR_ERR(rq);
1471 goto err_result1;
1472 }
1473
1474 err = poison_registers(B, poison, sema);
1475 if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) {
1476 pr_err("%s(%s): wait for results timed out\n",
1477 __func__, engine->name);
1478 err = -ETIME;
1479 }
1480
1481 /* Always cancel the semaphore wait, just in case the GPU gets stuck */
1482 WRITE_ONCE(*sema, -1);
1483 i915_request_put(rq);
1484 if (err)
1485 goto err_result1;
1486
1487 err = compare_isolation(engine, ref, result, A, poison);
1488
1489 err_result1:
1490 i915_vma_put(result[1]);
1491 err_result0:
1492 i915_vma_put(result[0]);
1493 err_ref1:
1494 i915_vma_put(ref[1]);
1495 err_ref0:
1496 i915_vma_put(ref[0]);
1497 err_B:
1498 intel_context_put(B);
1499 err_A:
1500 intel_context_put(A);
1501 return err;
1502 }
1503
skip_isolation(const struct intel_engine_cs * engine)1504 static bool skip_isolation(const struct intel_engine_cs *engine)
1505 {
1506 if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1507 return true;
1508
1509 if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1510 return true;
1511
1512 return false;
1513 }
1514
live_lrc_isolation(void * arg)1515 static int live_lrc_isolation(void *arg)
1516 {
1517 struct intel_gt *gt = arg;
1518 struct intel_engine_cs *engine;
1519 enum intel_engine_id id;
1520 const u32 poison[] = {
1521 STACK_MAGIC,
1522 0x3a3a3a3a,
1523 0x5c5c5c5c,
1524 0xffffffff,
1525 0xffff0000,
1526 };
1527 int err = 0;
1528
1529 /*
1530 * Our goal is try and verify that per-context state cannot be
1531 * tampered with by another non-privileged client.
1532 *
1533 * We take the list of context registers from the LRI in the default
1534 * context image and attempt to modify that list from a remote context.
1535 */
1536
1537 for_each_engine(engine, gt, id) {
1538 int i;
1539
1540 /* Just don't even ask */
1541 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1542 skip_isolation(engine))
1543 continue;
1544
1545 intel_engine_pm_get(engine);
1546 for (i = 0; i < ARRAY_SIZE(poison); i++) {
1547 int result;
1548
1549 result = __lrc_isolation(engine, poison[i]);
1550 if (result && !err)
1551 err = result;
1552
1553 result = __lrc_isolation(engine, ~poison[i]);
1554 if (result && !err)
1555 err = result;
1556 }
1557 intel_engine_pm_put(engine);
1558 if (igt_flush_test(gt->i915)) {
1559 err = -EIO;
1560 break;
1561 }
1562 }
1563
1564 return err;
1565 }
1566
wabb_ctx_submit_req(struct intel_context * ce)1567 static int wabb_ctx_submit_req(struct intel_context *ce)
1568 {
1569 struct i915_request *rq;
1570 int err = 0;
1571
1572 rq = intel_context_create_request(ce);
1573 if (IS_ERR(rq))
1574 return PTR_ERR(rq);
1575
1576 i915_request_get(rq);
1577 i915_request_add(rq);
1578
1579 if (i915_request_wait(rq, 0, HZ / 5) < 0)
1580 err = -ETIME;
1581
1582 i915_request_put(rq);
1583
1584 return err;
1585 }
1586
1587 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1588 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
1589
1590 static u32 *
emit_wabb_ctx_canary(const struct intel_context * ce,u32 * cs,bool per_ctx)1591 emit_wabb_ctx_canary(const struct intel_context *ce,
1592 u32 *cs, bool per_ctx)
1593 {
1594 *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1595 MI_SRM_LRM_GLOBAL_GTT |
1596 MI_LRI_LRM_CS_MMIO;
1597 *cs++ = i915_mmio_reg_offset(RING_START(0));
1598 *cs++ = i915_ggtt_offset(ce->state) +
1599 context_wa_bb_offset(ce) +
1600 CTX_BB_CANARY_OFFSET +
1601 (per_ctx ? PAGE_SIZE : 0);
1602 *cs++ = 0;
1603
1604 return cs;
1605 }
1606
1607 static u32 *
emit_indirect_ctx_bb_canary(const struct intel_context * ce,u32 * cs)1608 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1609 {
1610 return emit_wabb_ctx_canary(ce, cs, false);
1611 }
1612
1613 static u32 *
emit_per_ctx_bb_canary(const struct intel_context * ce,u32 * cs)1614 emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1615 {
1616 return emit_wabb_ctx_canary(ce, cs, true);
1617 }
1618
1619 static void
wabb_ctx_setup(struct intel_context * ce,bool per_ctx)1620 wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
1621 {
1622 u32 *cs = context_wabb(ce, per_ctx);
1623
1624 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1625
1626 if (per_ctx)
1627 setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
1628 else
1629 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1630 }
1631
check_ring_start(struct intel_context * ce,bool per_ctx)1632 static bool check_ring_start(struct intel_context *ce, bool per_ctx)
1633 {
1634 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1635 LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
1636 (per_ctx ? PAGE_SIZE : 0);
1637
1638 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1639 return true;
1640
1641 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1642 ctx_bb[CTX_BB_CANARY_INDEX],
1643 ce->lrc_reg_state[CTX_RING_START]);
1644
1645 return false;
1646 }
1647
wabb_ctx_check(struct intel_context * ce,bool per_ctx)1648 static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
1649 {
1650 int err;
1651
1652 err = wabb_ctx_submit_req(ce);
1653 if (err)
1654 return err;
1655
1656 if (!check_ring_start(ce, per_ctx))
1657 return -EINVAL;
1658
1659 return 0;
1660 }
1661
__lrc_wabb_ctx(struct intel_engine_cs * engine,bool per_ctx)1662 static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
1663 {
1664 struct intel_context *a, *b;
1665 int err;
1666
1667 a = intel_context_create(engine);
1668 if (IS_ERR(a))
1669 return PTR_ERR(a);
1670 err = intel_context_pin(a);
1671 if (err)
1672 goto put_a;
1673
1674 b = intel_context_create(engine);
1675 if (IS_ERR(b)) {
1676 err = PTR_ERR(b);
1677 goto unpin_a;
1678 }
1679 err = intel_context_pin(b);
1680 if (err)
1681 goto put_b;
1682
1683 /* We use the already reserved extra page in context state */
1684 if (!a->wa_bb_page) {
1685 GEM_BUG_ON(b->wa_bb_page);
1686 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1687 goto unpin_b;
1688 }
1689
1690 /*
1691 * In order to test that our per context bb is truly per context,
1692 * and executes at the intended spot on context restoring process,
1693 * make the batch store the ring start value to memory.
1694 * As ring start is restored apriori of starting the indirect ctx bb and
1695 * as it will be different for each context, it fits to this purpose.
1696 */
1697 wabb_ctx_setup(a, per_ctx);
1698 wabb_ctx_setup(b, per_ctx);
1699
1700 err = wabb_ctx_check(a, per_ctx);
1701 if (err)
1702 goto unpin_b;
1703
1704 err = wabb_ctx_check(b, per_ctx);
1705
1706 unpin_b:
1707 intel_context_unpin(b);
1708 put_b:
1709 intel_context_put(b);
1710 unpin_a:
1711 intel_context_unpin(a);
1712 put_a:
1713 intel_context_put(a);
1714
1715 return err;
1716 }
1717
lrc_wabb_ctx(void * arg,bool per_ctx)1718 static int lrc_wabb_ctx(void *arg, bool per_ctx)
1719 {
1720 struct intel_gt *gt = arg;
1721 struct intel_engine_cs *engine;
1722 enum intel_engine_id id;
1723 int err = 0;
1724
1725 for_each_engine(engine, gt, id) {
1726 intel_engine_pm_get(engine);
1727 err = __lrc_wabb_ctx(engine, per_ctx);
1728 intel_engine_pm_put(engine);
1729
1730 if (igt_flush_test(gt->i915))
1731 err = -EIO;
1732
1733 if (err)
1734 break;
1735 }
1736
1737 return err;
1738 }
1739
live_lrc_indirect_ctx_bb(void * arg)1740 static int live_lrc_indirect_ctx_bb(void *arg)
1741 {
1742 return lrc_wabb_ctx(arg, false);
1743 }
1744
live_lrc_per_ctx_bb(void * arg)1745 static int live_lrc_per_ctx_bb(void *arg)
1746 {
1747 return lrc_wabb_ctx(arg, true);
1748 }
1749
garbage_reset(struct intel_engine_cs * engine,struct i915_request * rq)1750 static void garbage_reset(struct intel_engine_cs *engine,
1751 struct i915_request *rq)
1752 {
1753 const unsigned int bit = I915_RESET_ENGINE + engine->id;
1754 unsigned long *lock = &engine->gt->reset.flags;
1755
1756 local_bh_disable();
1757 if (!test_and_set_bit(bit, lock)) {
1758 tasklet_disable(&engine->sched_engine->tasklet);
1759
1760 if (!rq->fence.error)
1761 __intel_engine_reset_bh(engine, NULL);
1762
1763 tasklet_enable(&engine->sched_engine->tasklet);
1764 clear_and_wake_up_bit(bit, lock);
1765 }
1766 local_bh_enable();
1767 }
1768
garbage(struct intel_context * ce,struct rnd_state * prng)1769 static struct i915_request *garbage(struct intel_context *ce,
1770 struct rnd_state *prng)
1771 {
1772 struct i915_request *rq;
1773 int err;
1774
1775 err = intel_context_pin(ce);
1776 if (err)
1777 return ERR_PTR(err);
1778
1779 prandom_bytes_state(prng,
1780 ce->lrc_reg_state,
1781 ce->engine->context_size -
1782 LRC_STATE_OFFSET);
1783
1784 rq = intel_context_create_request(ce);
1785 if (IS_ERR(rq)) {
1786 err = PTR_ERR(rq);
1787 goto err_unpin;
1788 }
1789
1790 i915_request_get(rq);
1791 i915_request_add(rq);
1792 return rq;
1793
1794 err_unpin:
1795 intel_context_unpin(ce);
1796 return ERR_PTR(err);
1797 }
1798
__lrc_garbage(struct intel_engine_cs * engine,struct rnd_state * prng)1799 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1800 {
1801 struct intel_context *ce;
1802 struct i915_request *hang;
1803 int err = 0;
1804
1805 ce = intel_context_create(engine);
1806 if (IS_ERR(ce))
1807 return PTR_ERR(ce);
1808
1809 hang = garbage(ce, prng);
1810 if (IS_ERR(hang)) {
1811 err = PTR_ERR(hang);
1812 goto err_ce;
1813 }
1814
1815 if (wait_for_submit(engine, hang, HZ / 2)) {
1816 i915_request_put(hang);
1817 err = -ETIME;
1818 goto err_ce;
1819 }
1820
1821 intel_context_set_banned(ce);
1822 garbage_reset(engine, hang);
1823
1824 intel_engine_flush_submission(engine);
1825 if (!hang->fence.error) {
1826 i915_request_put(hang);
1827 pr_err("%s: corrupted context was not reset\n",
1828 engine->name);
1829 err = -EINVAL;
1830 goto err_ce;
1831 }
1832
1833 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1834 pr_err("%s: corrupted context did not recover\n",
1835 engine->name);
1836 i915_request_put(hang);
1837 err = -EIO;
1838 goto err_ce;
1839 }
1840 i915_request_put(hang);
1841
1842 err_ce:
1843 intel_context_put(ce);
1844 return err;
1845 }
1846
live_lrc_garbage(void * arg)1847 static int live_lrc_garbage(void *arg)
1848 {
1849 struct intel_gt *gt = arg;
1850 struct intel_engine_cs *engine;
1851 enum intel_engine_id id;
1852
1853 /*
1854 * Verify that we can recover if one context state is completely
1855 * corrupted.
1856 */
1857
1858 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1859 return 0;
1860
1861 for_each_engine(engine, gt, id) {
1862 I915_RND_STATE(prng);
1863 int err = 0, i;
1864
1865 if (!intel_has_reset_engine(engine->gt))
1866 continue;
1867
1868 intel_engine_pm_get(engine);
1869 for (i = 0; i < 3; i++) {
1870 err = __lrc_garbage(engine, &prng);
1871 if (err)
1872 break;
1873 }
1874 intel_engine_pm_put(engine);
1875
1876 if (igt_flush_test(gt->i915))
1877 err = -EIO;
1878 if (err)
1879 return err;
1880 }
1881
1882 return 0;
1883 }
1884
__live_pphwsp_runtime(struct intel_engine_cs * engine)1885 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1886 {
1887 struct intel_context *ce;
1888 struct i915_request *rq;
1889 IGT_TIMEOUT(end_time);
1890 int err;
1891
1892 ce = intel_context_create(engine);
1893 if (IS_ERR(ce))
1894 return PTR_ERR(ce);
1895
1896 ce->stats.runtime.num_underflow = 0;
1897 ce->stats.runtime.max_underflow = 0;
1898
1899 do {
1900 unsigned int loop = 1024;
1901
1902 while (loop) {
1903 rq = intel_context_create_request(ce);
1904 if (IS_ERR(rq)) {
1905 err = PTR_ERR(rq);
1906 goto err_rq;
1907 }
1908
1909 if (--loop == 0)
1910 i915_request_get(rq);
1911
1912 i915_request_add(rq);
1913 }
1914
1915 if (__igt_timeout(end_time, NULL))
1916 break;
1917
1918 i915_request_put(rq);
1919 } while (1);
1920
1921 err = i915_request_wait(rq, 0, HZ / 5);
1922 if (err < 0) {
1923 pr_err("%s: request not completed!\n", engine->name);
1924 goto err_wait;
1925 }
1926
1927 igt_flush_test(engine->i915);
1928
1929 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1930 engine->name,
1931 intel_context_get_total_runtime_ns(ce),
1932 intel_context_get_avg_runtime_ns(ce));
1933
1934 err = 0;
1935 if (ce->stats.runtime.num_underflow) {
1936 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1937 engine->name,
1938 ce->stats.runtime.num_underflow,
1939 ce->stats.runtime.max_underflow);
1940 GEM_TRACE_DUMP();
1941 err = -EOVERFLOW;
1942 }
1943
1944 err_wait:
1945 i915_request_put(rq);
1946 err_rq:
1947 intel_context_put(ce);
1948 return err;
1949 }
1950
live_pphwsp_runtime(void * arg)1951 static int live_pphwsp_runtime(void *arg)
1952 {
1953 struct intel_gt *gt = arg;
1954 struct intel_engine_cs *engine;
1955 enum intel_engine_id id;
1956 int err = 0;
1957
1958 /*
1959 * Check that cumulative context runtime as stored in the pphwsp[16]
1960 * is monotonic.
1961 */
1962
1963 for_each_engine(engine, gt, id) {
1964 err = __live_pphwsp_runtime(engine);
1965 if (err)
1966 break;
1967 }
1968
1969 if (igt_flush_test(gt->i915))
1970 err = -EIO;
1971
1972 return err;
1973 }
1974
intel_lrc_live_selftests(struct drm_i915_private * i915)1975 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1976 {
1977 static const struct i915_subtest tests[] = {
1978 SUBTEST(live_lrc_layout),
1979 SUBTEST(live_lrc_fixed),
1980 SUBTEST(live_lrc_state),
1981 SUBTEST(live_lrc_gpr),
1982 SUBTEST(live_lrc_isolation),
1983 SUBTEST(live_lrc_timestamp),
1984 SUBTEST(live_lrc_garbage),
1985 SUBTEST(live_pphwsp_runtime),
1986 SUBTEST(live_lrc_indirect_ctx_bb),
1987 SUBTEST(live_lrc_per_ctx_bb),
1988 };
1989
1990 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1991 return 0;
1992
1993 return intel_gt_live_subtests(tests, to_gt(i915));
1994 }
1995