1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_lrc.h"
7 
8 #include <generated/xe_wa_oob.h>
9 
10 #include <linux/ascii85.h>
11 
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_mmio.h"
28 #include "xe_sriov.h"
29 #include "xe_trace_lrc.h"
30 #include "xe_vm.h"
31 #include "xe_wa.h"
32 
33 #define LRC_VALID				BIT_ULL(0)
34 #define LRC_PRIVILEGE				BIT_ULL(8)
35 #define LRC_ADDRESSING_MODE			GENMASK_ULL(4, 3)
36 #define LRC_LEGACY_64B_CONTEXT			3
37 
38 #define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
39 #define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
40 
41 #define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
42 
43 static struct xe_device *
lrc_to_xe(struct xe_lrc * lrc)44 lrc_to_xe(struct xe_lrc *lrc)
45 {
46 	return gt_to_xe(lrc->fence_ctx.gt);
47 }
48 
xe_gt_lrc_size(struct xe_gt * gt,enum xe_engine_class class)49 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
50 {
51 	struct xe_device *xe = gt_to_xe(gt);
52 	size_t size;
53 
54 	switch (class) {
55 	case XE_ENGINE_CLASS_RENDER:
56 		if (GRAPHICS_VER(xe) >= 20)
57 			size = 4 * SZ_4K;
58 		else
59 			size = 14 * SZ_4K;
60 		break;
61 	case XE_ENGINE_CLASS_COMPUTE:
62 		/* 14 pages since graphics_ver == 11 */
63 		if (GRAPHICS_VER(xe) >= 20)
64 			size = 3 * SZ_4K;
65 		else
66 			size = 14 * SZ_4K;
67 		break;
68 	default:
69 		WARN(1, "Unknown engine class: %d", class);
70 		fallthrough;
71 	case XE_ENGINE_CLASS_COPY:
72 	case XE_ENGINE_CLASS_VIDEO_DECODE:
73 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
74 	case XE_ENGINE_CLASS_OTHER:
75 		size = 2 * SZ_4K;
76 	}
77 
78 	/* Add indirect ring state page */
79 	if (xe_gt_has_indirect_ring_state(gt))
80 		size += LRC_INDIRECT_RING_STATE_SIZE;
81 
82 	return size;
83 }
84 
85 /*
86  * The per-platform tables are u8-encoded in @data. Decode @data and set the
87  * addresses' offset and commands in @regs. The following encoding is used
88  * for each byte. There are 2 steps: decoding commands and decoding addresses.
89  *
90  * Commands:
91  * [7]: create NOPs - number of NOPs are set in lower bits
92  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
93  *      MI_LRI_FORCE_POSTED
94  * [5:0]: Number of NOPs or registers to set values to in case of
95  *        MI_LOAD_REGISTER_IMM
96  *
97  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
98  * number of registers. They are set by using the REG/REG16 macros: the former
99  * is used for offsets smaller than 0x200 while the latter is for values bigger
100  * than that. Those macros already set all the bits documented below correctly:
101  *
102  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
103  *      follow, for the lower bits
104  * [6:0]: Register offset, without considering the engine base.
105  *
106  * This function only tweaks the commands and register offsets. Values are not
107  * filled out.
108  */
set_offsets(u32 * regs,const u8 * data,const struct xe_hw_engine * hwe)109 static void set_offsets(u32 *regs,
110 			const u8 *data,
111 			const struct xe_hw_engine *hwe)
112 #define NOP(x) (BIT(7) | (x))
113 #define LRI(count, flags) ((flags) << 6 | (count) | \
114 			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
115 #define POSTED BIT(0)
116 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
117 #define REG16(x) \
118 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
119 	(((x) >> 2) & 0x7f)
120 {
121 	const u32 base = hwe->mmio_base;
122 
123 	while (*data) {
124 		u8 count, flags;
125 
126 		if (*data & BIT(7)) { /* skip */
127 			count = *data++ & ~BIT(7);
128 			regs += count;
129 			continue;
130 		}
131 
132 		count = *data & 0x3f;
133 		flags = *data >> 6;
134 		data++;
135 
136 		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
137 		if (flags & POSTED)
138 			*regs |= MI_LRI_FORCE_POSTED;
139 		*regs |= MI_LRI_LRM_CS_MMIO;
140 		regs++;
141 
142 		xe_gt_assert(hwe->gt, count);
143 		do {
144 			u32 offset = 0;
145 			u8 v;
146 
147 			do {
148 				v = *data++;
149 				offset <<= 7;
150 				offset |= v & ~BIT(7);
151 			} while (v & BIT(7));
152 
153 			regs[0] = base + (offset << 2);
154 			regs += 2;
155 		} while (--count);
156 	}
157 
158 	*regs = MI_BATCH_BUFFER_END | BIT(0);
159 }
160 
161 static const u8 gen12_xcs_offsets[] = {
162 	NOP(1),
163 	LRI(13, POSTED),
164 	REG16(0x244),
165 	REG(0x034),
166 	REG(0x030),
167 	REG(0x038),
168 	REG(0x03c),
169 	REG(0x168),
170 	REG(0x140),
171 	REG(0x110),
172 	REG(0x1c0),
173 	REG(0x1c4),
174 	REG(0x1c8),
175 	REG(0x180),
176 	REG16(0x2b4),
177 
178 	NOP(5),
179 	LRI(9, POSTED),
180 	REG16(0x3a8),
181 	REG16(0x28c),
182 	REG16(0x288),
183 	REG16(0x284),
184 	REG16(0x280),
185 	REG16(0x27c),
186 	REG16(0x278),
187 	REG16(0x274),
188 	REG16(0x270),
189 
190 	0
191 };
192 
193 static const u8 dg2_xcs_offsets[] = {
194 	NOP(1),
195 	LRI(15, POSTED),
196 	REG16(0x244),
197 	REG(0x034),
198 	REG(0x030),
199 	REG(0x038),
200 	REG(0x03c),
201 	REG(0x168),
202 	REG(0x140),
203 	REG(0x110),
204 	REG(0x1c0),
205 	REG(0x1c4),
206 	REG(0x1c8),
207 	REG(0x180),
208 	REG16(0x2b4),
209 	REG(0x120),
210 	REG(0x124),
211 
212 	NOP(1),
213 	LRI(9, POSTED),
214 	REG16(0x3a8),
215 	REG16(0x28c),
216 	REG16(0x288),
217 	REG16(0x284),
218 	REG16(0x280),
219 	REG16(0x27c),
220 	REG16(0x278),
221 	REG16(0x274),
222 	REG16(0x270),
223 
224 	0
225 };
226 
227 static const u8 gen12_rcs_offsets[] = {
228 	NOP(1),
229 	LRI(13, POSTED),
230 	REG16(0x244),
231 	REG(0x034),
232 	REG(0x030),
233 	REG(0x038),
234 	REG(0x03c),
235 	REG(0x168),
236 	REG(0x140),
237 	REG(0x110),
238 	REG(0x1c0),
239 	REG(0x1c4),
240 	REG(0x1c8),
241 	REG(0x180),
242 	REG16(0x2b4),
243 
244 	NOP(5),
245 	LRI(9, POSTED),
246 	REG16(0x3a8),
247 	REG16(0x28c),
248 	REG16(0x288),
249 	REG16(0x284),
250 	REG16(0x280),
251 	REG16(0x27c),
252 	REG16(0x278),
253 	REG16(0x274),
254 	REG16(0x270),
255 
256 	LRI(3, POSTED),
257 	REG(0x1b0),
258 	REG16(0x5a8),
259 	REG16(0x5ac),
260 
261 	NOP(6),
262 	LRI(1, 0),
263 	REG(0x0c8),
264 	NOP(3 + 9 + 1),
265 
266 	LRI(51, POSTED),
267 	REG16(0x588),
268 	REG16(0x588),
269 	REG16(0x588),
270 	REG16(0x588),
271 	REG16(0x588),
272 	REG16(0x588),
273 	REG(0x028),
274 	REG(0x09c),
275 	REG(0x0c0),
276 	REG(0x178),
277 	REG(0x17c),
278 	REG16(0x358),
279 	REG(0x170),
280 	REG(0x150),
281 	REG(0x154),
282 	REG(0x158),
283 	REG16(0x41c),
284 	REG16(0x600),
285 	REG16(0x604),
286 	REG16(0x608),
287 	REG16(0x60c),
288 	REG16(0x610),
289 	REG16(0x614),
290 	REG16(0x618),
291 	REG16(0x61c),
292 	REG16(0x620),
293 	REG16(0x624),
294 	REG16(0x628),
295 	REG16(0x62c),
296 	REG16(0x630),
297 	REG16(0x634),
298 	REG16(0x638),
299 	REG16(0x63c),
300 	REG16(0x640),
301 	REG16(0x644),
302 	REG16(0x648),
303 	REG16(0x64c),
304 	REG16(0x650),
305 	REG16(0x654),
306 	REG16(0x658),
307 	REG16(0x65c),
308 	REG16(0x660),
309 	REG16(0x664),
310 	REG16(0x668),
311 	REG16(0x66c),
312 	REG16(0x670),
313 	REG16(0x674),
314 	REG16(0x678),
315 	REG16(0x67c),
316 	REG(0x068),
317 	REG(0x084),
318 	NOP(1),
319 
320 	0
321 };
322 
323 static const u8 xehp_rcs_offsets[] = {
324 	NOP(1),
325 	LRI(13, POSTED),
326 	REG16(0x244),
327 	REG(0x034),
328 	REG(0x030),
329 	REG(0x038),
330 	REG(0x03c),
331 	REG(0x168),
332 	REG(0x140),
333 	REG(0x110),
334 	REG(0x1c0),
335 	REG(0x1c4),
336 	REG(0x1c8),
337 	REG(0x180),
338 	REG16(0x2b4),
339 
340 	NOP(5),
341 	LRI(9, POSTED),
342 	REG16(0x3a8),
343 	REG16(0x28c),
344 	REG16(0x288),
345 	REG16(0x284),
346 	REG16(0x280),
347 	REG16(0x27c),
348 	REG16(0x278),
349 	REG16(0x274),
350 	REG16(0x270),
351 
352 	LRI(3, POSTED),
353 	REG(0x1b0),
354 	REG16(0x5a8),
355 	REG16(0x5ac),
356 
357 	NOP(6),
358 	LRI(1, 0),
359 	REG(0x0c8),
360 
361 	0
362 };
363 
364 static const u8 dg2_rcs_offsets[] = {
365 	NOP(1),
366 	LRI(15, POSTED),
367 	REG16(0x244),
368 	REG(0x034),
369 	REG(0x030),
370 	REG(0x038),
371 	REG(0x03c),
372 	REG(0x168),
373 	REG(0x140),
374 	REG(0x110),
375 	REG(0x1c0),
376 	REG(0x1c4),
377 	REG(0x1c8),
378 	REG(0x180),
379 	REG16(0x2b4),
380 	REG(0x120),
381 	REG(0x124),
382 
383 	NOP(1),
384 	LRI(9, POSTED),
385 	REG16(0x3a8),
386 	REG16(0x28c),
387 	REG16(0x288),
388 	REG16(0x284),
389 	REG16(0x280),
390 	REG16(0x27c),
391 	REG16(0x278),
392 	REG16(0x274),
393 	REG16(0x270),
394 
395 	LRI(3, POSTED),
396 	REG(0x1b0),
397 	REG16(0x5a8),
398 	REG16(0x5ac),
399 
400 	NOP(6),
401 	LRI(1, 0),
402 	REG(0x0c8),
403 
404 	0
405 };
406 
407 static const u8 mtl_rcs_offsets[] = {
408 	NOP(1),
409 	LRI(15, POSTED),
410 	REG16(0x244),
411 	REG(0x034),
412 	REG(0x030),
413 	REG(0x038),
414 	REG(0x03c),
415 	REG(0x168),
416 	REG(0x140),
417 	REG(0x110),
418 	REG(0x1c0),
419 	REG(0x1c4),
420 	REG(0x1c8),
421 	REG(0x180),
422 	REG16(0x2b4),
423 	REG(0x120),
424 	REG(0x124),
425 
426 	NOP(1),
427 	LRI(9, POSTED),
428 	REG16(0x3a8),
429 	REG16(0x28c),
430 	REG16(0x288),
431 	REG16(0x284),
432 	REG16(0x280),
433 	REG16(0x27c),
434 	REG16(0x278),
435 	REG16(0x274),
436 	REG16(0x270),
437 
438 	NOP(2),
439 	LRI(2, POSTED),
440 	REG16(0x5a8),
441 	REG16(0x5ac),
442 
443 	NOP(6),
444 	LRI(1, 0),
445 	REG(0x0c8),
446 
447 	0
448 };
449 
450 #define XE2_CTX_COMMON \
451 	NOP(1),                 /* [0x00] */ \
452 	LRI(15, POSTED),        /* [0x01] */ \
453 	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
454 	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
455 	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
456 	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
457 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
458 	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
459 	REG(0x140),             /* [0x0e] BB_ADDR */ \
460 	REG(0x110),             /* [0x10] BB_STATE */ \
461 	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
462 	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
463 	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
464 	REG(0x180),             /* [0x18] CCID */ \
465 	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
466 	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
467 	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
468 	\
469 	NOP(1),                 /* [0x20] */ \
470 	LRI(9, POSTED),         /* [0x21] */ \
471 	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
472 	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
473 	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
474 	REG16(0x284),           /* [0x28] dummy reg */ \
475 	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
476 	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
477 	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
478 	REG16(0x274),           /* [0x30] PTBP_UDW */ \
479 	REG16(0x270)            /* [0x32] PTBP_LDW */
480 
481 static const u8 xe2_rcs_offsets[] = {
482 	XE2_CTX_COMMON,
483 
484 	NOP(2),                 /* [0x34] */
485 	LRI(2, POSTED),         /* [0x36] */
486 	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
487 	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
488 
489 	NOP(6),                 /* [0x41] */
490 	LRI(1, 0),              /* [0x47] */
491 	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
492 
493 	0
494 };
495 
496 static const u8 xe2_bcs_offsets[] = {
497 	XE2_CTX_COMMON,
498 
499 	NOP(4 + 8 + 1),         /* [0x34] */
500 	LRI(2, POSTED),         /* [0x41] */
501 	REG16(0x200),           /* [0x42] BCS_SWCTRL */
502 	REG16(0x204),           /* [0x44] BLIT_CCTL */
503 
504 	0
505 };
506 
507 static const u8 xe2_xcs_offsets[] = {
508 	XE2_CTX_COMMON,
509 
510 	0
511 };
512 
513 static const u8 xe2_indirect_ring_state_offsets[] = {
514 	NOP(1),                 /* [0x00] */
515 	LRI(5, POSTED),         /* [0x01] */
516 	REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
517 	REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
518 	REG(0x038),             /* [0x06] RING_BUFFER_START */
519 	REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
520 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
521 
522 	NOP(5),                 /* [0x0c] */
523 	LRI(9, POSTED),         /* [0x11] */
524 	REG(0x168),             /* [0x12] BB_ADDR_UDW */
525 	REG(0x140),             /* [0x14] BB_ADDR */
526 	REG(0x110),             /* [0x16] BB_STATE */
527 	REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
528 	REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
529 	REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
530 	REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
531 	REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
532 	REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
533 
534 	NOP(12),                 /* [0x00] */
535 
536 	0
537 };
538 
539 #undef REG16
540 #undef REG
541 #undef LRI
542 #undef NOP
543 
reg_offsets(struct xe_device * xe,enum xe_engine_class class)544 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
545 {
546 	if (class == XE_ENGINE_CLASS_RENDER) {
547 		if (GRAPHICS_VER(xe) >= 20)
548 			return xe2_rcs_offsets;
549 		else if (GRAPHICS_VERx100(xe) >= 1270)
550 			return mtl_rcs_offsets;
551 		else if (GRAPHICS_VERx100(xe) >= 1255)
552 			return dg2_rcs_offsets;
553 		else if (GRAPHICS_VERx100(xe) >= 1250)
554 			return xehp_rcs_offsets;
555 		else
556 			return gen12_rcs_offsets;
557 	} else if (class == XE_ENGINE_CLASS_COPY) {
558 		if (GRAPHICS_VER(xe) >= 20)
559 			return xe2_bcs_offsets;
560 		else
561 			return gen12_xcs_offsets;
562 	} else {
563 		if (GRAPHICS_VER(xe) >= 20)
564 			return xe2_xcs_offsets;
565 		else if (GRAPHICS_VERx100(xe) >= 1255)
566 			return dg2_xcs_offsets;
567 		else
568 			return gen12_xcs_offsets;
569 	}
570 }
571 
set_context_control(u32 * regs,struct xe_hw_engine * hwe)572 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
573 {
574 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
575 						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
576 
577 	if (xe_gt_has_indirect_ring_state(hwe->gt))
578 		regs[CTX_CONTEXT_CONTROL] |=
579 			_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
580 
581 	/* TODO: Timestamp */
582 }
583 
set_memory_based_intr(u32 * regs,struct xe_hw_engine * hwe)584 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
585 {
586 	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
587 	struct xe_device *xe = gt_to_xe(hwe->gt);
588 	u8 num_regs;
589 
590 	if (!xe_device_uses_memirq(xe))
591 		return;
592 
593 	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
594 					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
595 	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
596 	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
597 
598 	num_regs = xe_device_has_msix(xe) ? 3 : 2;
599 	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
600 				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
601 	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
602 	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
603 	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
604 	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
605 
606 	if (xe_device_has_msix(xe)) {
607 		regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
608 		/* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
609 	}
610 }
611 
lrc_ring_mi_mode(struct xe_hw_engine * hwe)612 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
613 {
614 	struct xe_device *xe = gt_to_xe(hwe->gt);
615 
616 	if (GRAPHICS_VERx100(xe) >= 1250)
617 		return 0x70;
618 	else
619 		return 0x60;
620 }
621 
reset_stop_ring(u32 * regs,struct xe_hw_engine * hwe)622 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
623 {
624 	int x;
625 
626 	x = lrc_ring_mi_mode(hwe);
627 	regs[x + 1] &= ~STOP_RING;
628 	regs[x + 1] |= STOP_RING << 16;
629 }
630 
xe_lrc_has_indirect_ring_state(struct xe_lrc * lrc)631 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
632 {
633 	return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
634 }
635 
__xe_lrc_ring_offset(struct xe_lrc * lrc)636 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
637 {
638 	return 0;
639 }
640 
xe_lrc_pphwsp_offset(struct xe_lrc * lrc)641 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
642 {
643 	return lrc->ring.size;
644 }
645 
646 /* Make the magic macros work */
647 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
648 #define __xe_lrc_regs_offset xe_lrc_regs_offset
649 
650 #define LRC_SEQNO_PPHWSP_OFFSET 512
651 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
652 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
653 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
654 #define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
655 #define LRC_PPHWSP_SIZE SZ_4K
656 
xe_lrc_regs_offset(struct xe_lrc * lrc)657 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
658 {
659 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
660 }
661 
lrc_reg_size(struct xe_device * xe)662 static size_t lrc_reg_size(struct xe_device *xe)
663 {
664 	if (GRAPHICS_VERx100(xe) >= 1250)
665 		return 96 * sizeof(u32);
666 	else
667 		return 80 * sizeof(u32);
668 }
669 
xe_lrc_skip_size(struct xe_device * xe)670 size_t xe_lrc_skip_size(struct xe_device *xe)
671 {
672 	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
673 }
674 
__xe_lrc_seqno_offset(struct xe_lrc * lrc)675 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
676 {
677 	/* The seqno is stored in the driver-defined portion of PPHWSP */
678 	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
679 }
680 
__xe_lrc_start_seqno_offset(struct xe_lrc * lrc)681 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
682 {
683 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
684 	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
685 }
686 
__xe_lrc_ctx_job_timestamp_offset(struct xe_lrc * lrc)687 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
688 {
689 	/* This is stored in the driver-defined portion of PPHWSP */
690 	return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
691 }
692 
__xe_lrc_parallel_offset(struct xe_lrc * lrc)693 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
694 {
695 	/* The parallel is stored in the driver-defined portion of PPHWSP */
696 	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
697 }
698 
__xe_lrc_engine_id_offset(struct xe_lrc * lrc)699 static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
700 {
701 	return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
702 }
703 
__xe_lrc_ctx_timestamp_offset(struct xe_lrc * lrc)704 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
705 {
706 	return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
707 }
708 
__xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc * lrc)709 static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
710 {
711 	return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
712 }
713 
__xe_lrc_indirect_ring_offset(struct xe_lrc * lrc)714 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
715 {
716 	/* Indirect ring state page is at the very end of LRC */
717 	return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
718 }
719 
720 #define DECL_MAP_ADDR_HELPERS(elem) \
721 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
722 { \
723 	struct iosys_map map = lrc->bo->vmap; \
724 \
725 	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
726 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
727 	return map; \
728 } \
729 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
730 { \
731 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
732 } \
733 
734 DECL_MAP_ADDR_HELPERS(ring)
DECL_MAP_ADDR_HELPERS(pphwsp)735 DECL_MAP_ADDR_HELPERS(pphwsp)
736 DECL_MAP_ADDR_HELPERS(seqno)
737 DECL_MAP_ADDR_HELPERS(regs)
738 DECL_MAP_ADDR_HELPERS(start_seqno)
739 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
740 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
741 DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
742 DECL_MAP_ADDR_HELPERS(parallel)
743 DECL_MAP_ADDR_HELPERS(indirect_ring)
744 DECL_MAP_ADDR_HELPERS(engine_id)
745 
746 #undef DECL_MAP_ADDR_HELPERS
747 
748 /**
749  * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
750  * @lrc: Pointer to the lrc.
751  *
752  * Returns: ctx timestamp GGTT address
753  */
754 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
755 {
756 	return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
757 }
758 
759 /**
760  * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
761  * @lrc: Pointer to the lrc.
762  *
763  * Returns: ctx timestamp udw GGTT address
764  */
xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc * lrc)765 u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
766 {
767 	return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
768 }
769 
770 /**
771  * xe_lrc_ctx_timestamp() - Read ctx timestamp value
772  * @lrc: Pointer to the lrc.
773  *
774  * Returns: ctx timestamp value
775  */
xe_lrc_ctx_timestamp(struct xe_lrc * lrc)776 u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
777 {
778 	struct xe_device *xe = lrc_to_xe(lrc);
779 	struct iosys_map map;
780 	u32 ldw, udw = 0;
781 
782 	map = __xe_lrc_ctx_timestamp_map(lrc);
783 	ldw = xe_map_read32(xe, &map);
784 
785 	if (xe->info.has_64bit_timestamp) {
786 		map = __xe_lrc_ctx_timestamp_udw_map(lrc);
787 		udw = xe_map_read32(xe, &map);
788 	}
789 
790 	return (u64)udw << 32 | ldw;
791 }
792 
793 /**
794  * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
795  * @lrc: Pointer to the lrc.
796  *
797  * Returns: ctx timestamp job GGTT address
798  */
xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc * lrc)799 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
800 {
801 	return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
802 }
803 
804 /**
805  * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
806  * @lrc: Pointer to the lrc.
807  *
808  * Returns: ctx timestamp job value
809  */
xe_lrc_ctx_job_timestamp(struct xe_lrc * lrc)810 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
811 {
812 	struct xe_device *xe = lrc_to_xe(lrc);
813 	struct iosys_map map;
814 
815 	map = __xe_lrc_ctx_job_timestamp_map(lrc);
816 	return xe_map_read32(xe, &map);
817 }
818 
xe_lrc_ggtt_addr(struct xe_lrc * lrc)819 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
820 {
821 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
822 }
823 
xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc * lrc)824 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
825 {
826 	if (!xe_lrc_has_indirect_ring_state(lrc))
827 		return 0;
828 
829 	return __xe_lrc_indirect_ring_ggtt_addr(lrc);
830 }
831 
xe_lrc_read_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr)832 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
833 {
834 	struct xe_device *xe = lrc_to_xe(lrc);
835 	struct iosys_map map;
836 
837 	map = __xe_lrc_indirect_ring_map(lrc);
838 	iosys_map_incr(&map, reg_nr * sizeof(u32));
839 	return xe_map_read32(xe, &map);
840 }
841 
xe_lrc_write_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)842 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
843 					  int reg_nr, u32 val)
844 {
845 	struct xe_device *xe = lrc_to_xe(lrc);
846 	struct iosys_map map;
847 
848 	map = __xe_lrc_indirect_ring_map(lrc);
849 	iosys_map_incr(&map, reg_nr * sizeof(u32));
850 	xe_map_write32(xe, &map, val);
851 }
852 
xe_lrc_read_ctx_reg(struct xe_lrc * lrc,int reg_nr)853 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
854 {
855 	struct xe_device *xe = lrc_to_xe(lrc);
856 	struct iosys_map map;
857 
858 	map = __xe_lrc_regs_map(lrc);
859 	iosys_map_incr(&map, reg_nr * sizeof(u32));
860 	return xe_map_read32(xe, &map);
861 }
862 
xe_lrc_write_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)863 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
864 {
865 	struct xe_device *xe = lrc_to_xe(lrc);
866 	struct iosys_map map;
867 
868 	map = __xe_lrc_regs_map(lrc);
869 	iosys_map_incr(&map, reg_nr * sizeof(u32));
870 	xe_map_write32(xe, &map, val);
871 }
872 
empty_lrc_data(struct xe_hw_engine * hwe)873 static void *empty_lrc_data(struct xe_hw_engine *hwe)
874 {
875 	struct xe_gt *gt = hwe->gt;
876 	void *data;
877 	u32 *regs;
878 
879 	data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
880 	if (!data)
881 		return NULL;
882 
883 	/* 1st page: Per-Process of HW status Page */
884 	regs = data + LRC_PPHWSP_SIZE;
885 	set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
886 	set_context_control(regs, hwe);
887 	set_memory_based_intr(regs, hwe);
888 	reset_stop_ring(regs, hwe);
889 	if (xe_gt_has_indirect_ring_state(gt)) {
890 		regs = data + xe_gt_lrc_size(gt, hwe->class) -
891 		       LRC_INDIRECT_RING_STATE_SIZE;
892 		set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
893 	}
894 
895 	return data;
896 }
897 
xe_lrc_set_ppgtt(struct xe_lrc * lrc,struct xe_vm * vm)898 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
899 {
900 	u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
901 
902 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
903 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
904 }
905 
xe_lrc_finish(struct xe_lrc * lrc)906 static void xe_lrc_finish(struct xe_lrc *lrc)
907 {
908 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
909 	xe_bo_lock(lrc->bo, false);
910 	xe_bo_unpin(lrc->bo);
911 	xe_bo_unlock(lrc->bo);
912 	xe_bo_put(lrc->bo);
913 	xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
914 }
915 
916 /*
917  * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
918  * context run ticks.
919  * @lrc: Pointer to the lrc.
920  *
921  * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
922  * context, but only gets updated when the context switches out. In order to
923  * check how long a context has been active before it switches out, two things
924  * are required:
925  *
926  * (1) Determine if the context is running:
927  * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
928  * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
929  * initialized. During a query, we just check for this value to determine if the
930  * context is active. If the context switched out, it would overwrite this
931  * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
932  * the last part of context restore, so reusing this LRC location will not
933  * clobber anything.
934  *
935  * (2) Calculate the time that the context has been active for:
936  * The CTX_TIMESTAMP ticks only when the context is active. If a context is
937  * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
938  * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
939  * engine instance. Since we do not know which instance the context is running
940  * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
941  * store it in the PPHSWP.
942  */
943 #define CONTEXT_ACTIVE 1ULL
xe_lrc_setup_utilization(struct xe_lrc * lrc)944 static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
945 {
946 	u32 *cmd;
947 
948 	cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
949 
950 	*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
951 	*cmd++ = ENGINE_ID(0).addr;
952 	*cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
953 	*cmd++ = 0;
954 
955 	*cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
956 	*cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
957 	*cmd++ = 0;
958 	*cmd++ = lower_32_bits(CONTEXT_ACTIVE);
959 
960 	if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
961 		*cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
962 		*cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
963 		*cmd++ = 0;
964 		*cmd++ = upper_32_bits(CONTEXT_ACTIVE);
965 	}
966 
967 	*cmd++ = MI_BATCH_BUFFER_END;
968 
969 	xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
970 			     xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
971 
972 }
973 
974 #define PVC_CTX_ASID		(0x2e + 1)
975 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
976 
xe_lrc_init(struct xe_lrc * lrc,struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec,u32 init_flags)977 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
978 		       struct xe_vm *vm, u32 ring_size, u16 msix_vec,
979 		       u32 init_flags)
980 {
981 	struct xe_gt *gt = hwe->gt;
982 	struct xe_tile *tile = gt_to_tile(gt);
983 	struct xe_device *xe = gt_to_xe(gt);
984 	struct iosys_map map;
985 	void *init_data = NULL;
986 	u32 arb_enable;
987 	u32 lrc_size;
988 	u32 bo_flags;
989 	int err;
990 
991 	kref_init(&lrc->refcount);
992 	lrc->gt = gt;
993 	lrc->flags = 0;
994 	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
995 	if (xe_gt_has_indirect_ring_state(gt))
996 		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
997 
998 	bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
999 		   XE_BO_FLAG_GGTT_INVALIDATE;
1000 
1001 	/*
1002 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
1003 	 * via VM bind calls.
1004 	 */
1005 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
1006 				       ttm_bo_type_kernel,
1007 				       bo_flags);
1008 	if (IS_ERR(lrc->bo))
1009 		return PTR_ERR(lrc->bo);
1010 
1011 	lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
1012 						  ttm_bo_type_kernel,
1013 						  bo_flags);
1014 	if (IS_ERR(lrc->bb_per_ctx_bo)) {
1015 		err = PTR_ERR(lrc->bb_per_ctx_bo);
1016 		goto err_lrc_finish;
1017 	}
1018 
1019 	lrc->size = lrc_size;
1020 	lrc->ring.size = ring_size;
1021 	lrc->ring.tail = 0;
1022 
1023 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
1024 			     hwe->fence_irq, hwe->name);
1025 
1026 	if (!gt->default_lrc[hwe->class]) {
1027 		init_data = empty_lrc_data(hwe);
1028 		if (!init_data) {
1029 			err = -ENOMEM;
1030 			goto err_lrc_finish;
1031 		}
1032 	}
1033 
1034 	/*
1035 	 * Init Per-Process of HW status Page, LRC / context state to known
1036 	 * values
1037 	 */
1038 	map = __xe_lrc_pphwsp_map(lrc);
1039 	if (!init_data) {
1040 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
1041 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
1042 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
1043 				 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
1044 	} else {
1045 		xe_map_memcpy_to(xe, &map, 0, init_data,
1046 				 xe_gt_lrc_size(gt, hwe->class));
1047 		kfree(init_data);
1048 	}
1049 
1050 	if (vm) {
1051 		xe_lrc_set_ppgtt(lrc, vm);
1052 
1053 		if (vm->xef)
1054 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
1055 	}
1056 
1057 	if (xe_device_has_msix(xe)) {
1058 		xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
1059 				     xe_memirq_status_ptr(&tile->memirq, hwe));
1060 		xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
1061 				     xe_memirq_source_ptr(&tile->memirq, hwe));
1062 		xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
1063 	}
1064 
1065 	if (xe_gt_has_indirect_ring_state(gt)) {
1066 		xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
1067 				     __xe_lrc_indirect_ring_ggtt_addr(lrc));
1068 
1069 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
1070 					      __xe_lrc_ring_ggtt_addr(lrc));
1071 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
1072 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
1073 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
1074 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
1075 					      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1076 	} else {
1077 		xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
1078 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
1079 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
1080 		xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
1081 				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1082 	}
1083 
1084 	if (init_flags & XE_LRC_CREATE_RUNALONE)
1085 		xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1086 				     xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1087 				     _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE));
1088 
1089 	if (init_flags & XE_LRC_CREATE_PXP)
1090 		xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1091 				     xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1092 				     _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
1093 
1094 	lrc->ctx_timestamp = 0;
1095 	xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
1096 	if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
1097 		xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
1098 
1099 	if (xe->info.has_asid && vm)
1100 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
1101 
1102 	lrc->desc = LRC_VALID;
1103 	lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
1104 	/* TODO: Priority */
1105 
1106 	/* While this appears to have something about privileged batches or
1107 	 * some such, it really just means PPGTT mode.
1108 	 */
1109 	if (vm)
1110 		lrc->desc |= LRC_PRIVILEGE;
1111 
1112 	if (GRAPHICS_VERx100(xe) < 1250) {
1113 		lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
1114 		lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
1115 	}
1116 
1117 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1118 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1119 
1120 	map = __xe_lrc_seqno_map(lrc);
1121 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1122 
1123 	map = __xe_lrc_start_seqno_map(lrc);
1124 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1125 
1126 	xe_lrc_setup_utilization(lrc);
1127 
1128 	return 0;
1129 
1130 err_lrc_finish:
1131 	xe_lrc_finish(lrc);
1132 	return err;
1133 }
1134 
1135 /**
1136  * xe_lrc_create - Create a LRC
1137  * @hwe: Hardware Engine
1138  * @vm: The VM (address space)
1139  * @ring_size: LRC ring size
1140  * @msix_vec: MSI-X interrupt vector (for platforms that support it)
1141  * @flags: LRC initialization flags
1142  *
1143  * Allocate and initialize the Logical Ring Context (LRC).
1144  *
1145  * Return pointer to created LRC upon success and an error pointer
1146  * upon failure.
1147  */
xe_lrc_create(struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec,u32 flags)1148 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1149 			     u32 ring_size, u16 msix_vec, u32 flags)
1150 {
1151 	struct xe_lrc *lrc;
1152 	int err;
1153 
1154 	lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1155 	if (!lrc)
1156 		return ERR_PTR(-ENOMEM);
1157 
1158 	err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags);
1159 	if (err) {
1160 		kfree(lrc);
1161 		return ERR_PTR(err);
1162 	}
1163 
1164 	return lrc;
1165 }
1166 
1167 /**
1168  * xe_lrc_destroy - Destroy the LRC
1169  * @ref: reference to LRC
1170  *
1171  * Called when ref == 0, release resources held by the Logical Ring Context
1172  * (LRC) and free the LRC memory.
1173  */
xe_lrc_destroy(struct kref * ref)1174 void xe_lrc_destroy(struct kref *ref)
1175 {
1176 	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1177 
1178 	xe_lrc_finish(lrc);
1179 	kfree(lrc);
1180 }
1181 
xe_lrc_set_ring_tail(struct xe_lrc * lrc,u32 tail)1182 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1183 {
1184 	if (xe_lrc_has_indirect_ring_state(lrc))
1185 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1186 	else
1187 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1188 }
1189 
xe_lrc_ring_tail(struct xe_lrc * lrc)1190 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1191 {
1192 	if (xe_lrc_has_indirect_ring_state(lrc))
1193 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1194 	else
1195 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1196 }
1197 
xe_lrc_ring_start(struct xe_lrc * lrc)1198 static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
1199 {
1200 	if (xe_lrc_has_indirect_ring_state(lrc))
1201 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
1202 	else
1203 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
1204 }
1205 
xe_lrc_set_ring_head(struct xe_lrc * lrc,u32 head)1206 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1207 {
1208 	if (xe_lrc_has_indirect_ring_state(lrc))
1209 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1210 	else
1211 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1212 }
1213 
xe_lrc_ring_head(struct xe_lrc * lrc)1214 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1215 {
1216 	if (xe_lrc_has_indirect_ring_state(lrc))
1217 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1218 	else
1219 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1220 }
1221 
xe_lrc_ring_space(struct xe_lrc * lrc)1222 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1223 {
1224 	const u32 head = xe_lrc_ring_head(lrc);
1225 	const u32 tail = lrc->ring.tail;
1226 	const u32 size = lrc->ring.size;
1227 
1228 	return ((head - tail - 1) & (size - 1)) + 1;
1229 }
1230 
__xe_lrc_write_ring(struct xe_lrc * lrc,struct iosys_map ring,const void * data,size_t size)1231 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1232 				const void *data, size_t size)
1233 {
1234 	struct xe_device *xe = lrc_to_xe(lrc);
1235 
1236 	iosys_map_incr(&ring, lrc->ring.tail);
1237 	xe_map_memcpy_to(xe, &ring, 0, data, size);
1238 	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1239 }
1240 
xe_lrc_write_ring(struct xe_lrc * lrc,const void * data,size_t size)1241 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1242 {
1243 	struct xe_device *xe = lrc_to_xe(lrc);
1244 	struct iosys_map ring;
1245 	u32 rhs;
1246 	size_t aligned_size;
1247 
1248 	xe_assert(xe, IS_ALIGNED(size, 4));
1249 	aligned_size = ALIGN(size, 8);
1250 
1251 	ring = __xe_lrc_ring_map(lrc);
1252 
1253 	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1254 	rhs = lrc->ring.size - lrc->ring.tail;
1255 	if (size > rhs) {
1256 		__xe_lrc_write_ring(lrc, ring, data, rhs);
1257 		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1258 	} else {
1259 		__xe_lrc_write_ring(lrc, ring, data, size);
1260 	}
1261 
1262 	if (aligned_size > size) {
1263 		u32 noop = MI_NOOP;
1264 
1265 		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1266 	}
1267 }
1268 
xe_lrc_descriptor(struct xe_lrc * lrc)1269 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1270 {
1271 	return lrc->desc | xe_lrc_ggtt_addr(lrc);
1272 }
1273 
xe_lrc_seqno_ggtt_addr(struct xe_lrc * lrc)1274 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1275 {
1276 	return __xe_lrc_seqno_ggtt_addr(lrc);
1277 }
1278 
1279 /**
1280  * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1281  *
1282  * Allocate but don't initialize an lrc seqno fence.
1283  *
1284  * Return: Pointer to the allocated fence or
1285  * negative error pointer on error.
1286  */
xe_lrc_alloc_seqno_fence(void)1287 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1288 {
1289 	return xe_hw_fence_alloc();
1290 }
1291 
1292 /**
1293  * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1294  * @fence: Pointer to the fence to free.
1295  *
1296  * Frees an lrc seqno fence that hasn't yet been
1297  * initialized.
1298  */
xe_lrc_free_seqno_fence(struct dma_fence * fence)1299 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1300 {
1301 	xe_hw_fence_free(fence);
1302 }
1303 
1304 /**
1305  * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1306  * @lrc: Pointer to the lrc.
1307  * @fence: Pointer to the fence to initialize.
1308  *
1309  * Initializes a pre-allocated lrc seqno fence.
1310  * After initialization, the fence is subject to normal
1311  * dma-fence refcounting.
1312  */
xe_lrc_init_seqno_fence(struct xe_lrc * lrc,struct dma_fence * fence)1313 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1314 {
1315 	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1316 }
1317 
xe_lrc_seqno(struct xe_lrc * lrc)1318 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1319 {
1320 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
1321 
1322 	return xe_map_read32(lrc_to_xe(lrc), &map);
1323 }
1324 
xe_lrc_start_seqno(struct xe_lrc * lrc)1325 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1326 {
1327 	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1328 
1329 	return xe_map_read32(lrc_to_xe(lrc), &map);
1330 }
1331 
xe_lrc_start_seqno_ggtt_addr(struct xe_lrc * lrc)1332 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1333 {
1334 	return __xe_lrc_start_seqno_ggtt_addr(lrc);
1335 }
1336 
xe_lrc_parallel_ggtt_addr(struct xe_lrc * lrc)1337 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1338 {
1339 	return __xe_lrc_parallel_ggtt_addr(lrc);
1340 }
1341 
xe_lrc_parallel_map(struct xe_lrc * lrc)1342 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1343 {
1344 	return __xe_lrc_parallel_map(lrc);
1345 }
1346 
1347 /**
1348  * xe_lrc_engine_id() - Read engine id value
1349  * @lrc: Pointer to the lrc.
1350  *
1351  * Returns: context id value
1352  */
xe_lrc_engine_id(struct xe_lrc * lrc)1353 static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
1354 {
1355 	struct xe_device *xe = lrc_to_xe(lrc);
1356 	struct iosys_map map;
1357 
1358 	map = __xe_lrc_engine_id_map(lrc);
1359 	return xe_map_read32(xe, &map);
1360 }
1361 
instr_dw(u32 cmd_header)1362 static int instr_dw(u32 cmd_header)
1363 {
1364 	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1365 	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1366 	    GFXPIPE_SINGLE_DW_CMD(0, 0))
1367 		return 1;
1368 
1369 	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1370 	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1371 		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1372 
1373 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
1374 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1375 }
1376 
dump_mi_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1377 static int dump_mi_command(struct drm_printer *p,
1378 			   struct xe_gt *gt,
1379 			   u32 *dw,
1380 			   int remaining_dw)
1381 {
1382 	u32 inst_header = *dw;
1383 	u32 numdw = instr_dw(inst_header);
1384 	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1385 	int num_noop;
1386 
1387 	/* First check for commands that don't have/use a '# DW' field */
1388 	switch (inst_header & MI_OPCODE) {
1389 	case MI_NOOP:
1390 		num_noop = 1;
1391 		while (num_noop < remaining_dw &&
1392 		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1393 			num_noop++;
1394 		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1395 		return num_noop;
1396 
1397 	case MI_TOPOLOGY_FILTER:
1398 		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1399 		return 1;
1400 
1401 	case MI_BATCH_BUFFER_END:
1402 		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1403 		/* Return 'remaining_dw' to consume the rest of the LRC */
1404 		return remaining_dw;
1405 	}
1406 
1407 	/*
1408 	 * Any remaining commands include a # of dwords.  We should make sure
1409 	 * it doesn't exceed the remaining size of the LRC.
1410 	 */
1411 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1412 		numdw = remaining_dw;
1413 
1414 	switch (inst_header & MI_OPCODE) {
1415 	case MI_LOAD_REGISTER_IMM:
1416 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1417 			   inst_header, (numdw - 1) / 2);
1418 		for (int i = 1; i < numdw; i += 2)
1419 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1420 		return numdw;
1421 
1422 	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1423 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1424 			   inst_header,
1425 			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1426 			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1427 		if (numdw == 4)
1428 			drm_printf(p, " - %#6x = %#010llx\n",
1429 				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1430 		else
1431 			drm_printf(p, " - %*ph (%s)\n",
1432 				   (int)sizeof(u32) * (numdw - 1), dw + 1,
1433 				   numdw < 4 ? "truncated" : "malformed");
1434 		return numdw;
1435 
1436 	case MI_FORCE_WAKEUP:
1437 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1438 		return numdw;
1439 
1440 	default:
1441 		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1442 			   inst_header, opcode, numdw);
1443 		return numdw;
1444 	}
1445 }
1446 
dump_gfxpipe_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1447 static int dump_gfxpipe_command(struct drm_printer *p,
1448 				struct xe_gt *gt,
1449 				u32 *dw,
1450 				int remaining_dw)
1451 {
1452 	u32 numdw = instr_dw(*dw);
1453 	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1454 	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1455 	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1456 
1457 	/*
1458 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1459 	 * remaining size of the LRC.
1460 	 */
1461 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1462 		numdw = remaining_dw;
1463 
1464 	switch (*dw & GFXPIPE_MATCH_MASK) {
1465 #define MATCH(cmd) \
1466 	case cmd: \
1467 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1468 		return numdw
1469 #define MATCH3D(cmd) \
1470 	case CMD_##cmd: \
1471 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1472 		return numdw
1473 
1474 	MATCH(STATE_BASE_ADDRESS);
1475 	MATCH(STATE_SIP);
1476 	MATCH(GPGPU_CSR_BASE_ADDRESS);
1477 	MATCH(STATE_COMPUTE_MODE);
1478 	MATCH3D(3DSTATE_BTD);
1479 	MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1480 	MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1481 
1482 	MATCH3D(3DSTATE_VF_STATISTICS);
1483 
1484 	MATCH(PIPELINE_SELECT);
1485 
1486 	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1487 	MATCH3D(3DSTATE_CLEAR_PARAMS);
1488 	MATCH3D(3DSTATE_DEPTH_BUFFER);
1489 	MATCH3D(3DSTATE_STENCIL_BUFFER);
1490 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1491 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1492 	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1493 	MATCH3D(3DSTATE_INDEX_BUFFER);
1494 	MATCH3D(3DSTATE_VF);
1495 	MATCH3D(3DSTATE_MULTISAMPLE);
1496 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1497 	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1498 	MATCH3D(3DSTATE_VS);
1499 	MATCH3D(3DSTATE_GS);
1500 	MATCH3D(3DSTATE_CLIP);
1501 	MATCH3D(3DSTATE_SF);
1502 	MATCH3D(3DSTATE_WM);
1503 	MATCH3D(3DSTATE_CONSTANT_VS);
1504 	MATCH3D(3DSTATE_CONSTANT_GS);
1505 	MATCH3D(3DSTATE_CONSTANT_PS);
1506 	MATCH3D(3DSTATE_SAMPLE_MASK);
1507 	MATCH3D(3DSTATE_CONSTANT_HS);
1508 	MATCH3D(3DSTATE_CONSTANT_DS);
1509 	MATCH3D(3DSTATE_HS);
1510 	MATCH3D(3DSTATE_TE);
1511 	MATCH3D(3DSTATE_DS);
1512 	MATCH3D(3DSTATE_STREAMOUT);
1513 	MATCH3D(3DSTATE_SBE);
1514 	MATCH3D(3DSTATE_PS);
1515 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1516 	MATCH3D(3DSTATE_CPS_POINTERS);
1517 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1518 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1519 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1520 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1521 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1522 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1523 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1524 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1525 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1526 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1527 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1528 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1529 	MATCH3D(3DSTATE_VF_INSTANCING);
1530 	MATCH3D(3DSTATE_VF_SGVS);
1531 	MATCH3D(3DSTATE_VF_TOPOLOGY);
1532 	MATCH3D(3DSTATE_WM_CHROMAKEY);
1533 	MATCH3D(3DSTATE_PS_BLEND);
1534 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1535 	MATCH3D(3DSTATE_PS_EXTRA);
1536 	MATCH3D(3DSTATE_RASTER);
1537 	MATCH3D(3DSTATE_SBE_SWIZ);
1538 	MATCH3D(3DSTATE_WM_HZ_OP);
1539 	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1540 	MATCH3D(3DSTATE_VF_SGVS_2);
1541 	MATCH3D(3DSTATE_VFG);
1542 	MATCH3D(3DSTATE_URB_ALLOC_VS);
1543 	MATCH3D(3DSTATE_URB_ALLOC_HS);
1544 	MATCH3D(3DSTATE_URB_ALLOC_DS);
1545 	MATCH3D(3DSTATE_URB_ALLOC_GS);
1546 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1547 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1548 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1549 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1550 	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1551 	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1552 	MATCH3D(3DSTATE_AMFS);
1553 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1554 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1555 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1556 	MATCH3D(3DSTATE_MESH_CONTROL);
1557 	MATCH3D(3DSTATE_MESH_DISTRIB);
1558 	MATCH3D(3DSTATE_TASK_REDISTRIB);
1559 	MATCH3D(3DSTATE_MESH_SHADER);
1560 	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1561 	MATCH3D(3DSTATE_TASK_CONTROL);
1562 	MATCH3D(3DSTATE_TASK_SHADER);
1563 	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1564 	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1565 	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1566 	MATCH3D(3DSTATE_CLIP_MESH);
1567 	MATCH3D(3DSTATE_SBE_MESH);
1568 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1569 
1570 	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1571 	MATCH3D(3DSTATE_CHROMA_KEY);
1572 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1573 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1574 	MATCH3D(3DSTATE_LINE_STIPPLE);
1575 	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1576 	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1577 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1578 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1579 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1580 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1581 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1582 	MATCH3D(3DSTATE_SO_DECL_LIST);
1583 	MATCH3D(3DSTATE_SO_BUFFER);
1584 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1585 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1586 	MATCH3D(3DSTATE_3D_MODE);
1587 	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1588 	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1589 	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1590 
1591 	default:
1592 		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1593 			   *dw, pipeline, opcode, subopcode, numdw);
1594 		return numdw;
1595 	}
1596 }
1597 
dump_gfx_state_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1598 static int dump_gfx_state_command(struct drm_printer *p,
1599 				  struct xe_gt *gt,
1600 				  u32 *dw,
1601 				  int remaining_dw)
1602 {
1603 	u32 numdw = instr_dw(*dw);
1604 	u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1605 
1606 	/*
1607 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1608 	 * remaining size of the LRC.
1609 	 */
1610 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1611 		numdw = remaining_dw;
1612 
1613 	switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1614 	MATCH(STATE_WRITE_INLINE);
1615 
1616 	default:
1617 		drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1618 			   *dw, opcode, numdw);
1619 		return numdw;
1620 	}
1621 }
1622 
xe_lrc_dump_default(struct drm_printer * p,struct xe_gt * gt,enum xe_engine_class hwe_class)1623 void xe_lrc_dump_default(struct drm_printer *p,
1624 			 struct xe_gt *gt,
1625 			 enum xe_engine_class hwe_class)
1626 {
1627 	u32 *dw;
1628 	int remaining_dw, num_dw;
1629 
1630 	if (!gt->default_lrc[hwe_class]) {
1631 		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1632 		return;
1633 	}
1634 
1635 	/*
1636 	 * Skip the beginning of the LRC since it contains the per-process
1637 	 * hardware status page.
1638 	 */
1639 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1640 	remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1641 
1642 	while (remaining_dw > 0) {
1643 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1644 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1645 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1646 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1647 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1648 			num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1649 		} else {
1650 			num_dw = min(instr_dw(*dw), remaining_dw);
1651 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1652 				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1653 				   num_dw);
1654 		}
1655 
1656 		dw += num_dw;
1657 		remaining_dw -= num_dw;
1658 	}
1659 }
1660 
1661 struct instr_state {
1662 	u32 instr;
1663 	u16 num_dw;
1664 };
1665 
1666 static const struct instr_state xe_hpg_svg_state[] = {
1667 	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1668 	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1669 	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1670 	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1671 	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1672 	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1673 	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1674 	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1675 	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1676 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1677 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1678 	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1679 	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1680 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1681 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1682 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1683 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1684 	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1685 	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1686 	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1687 	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1688 	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1689 	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1690 	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1691 	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1692 	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1693 	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1694 	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1695 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1696 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1697 	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1698 	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1699 	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1700 	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1701 	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1702 	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1703 	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1704 	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1705 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1706 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1707 	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1708 	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1709 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1710 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1711 	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1712 	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1713 	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1714 	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1715 	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1716 	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1717 };
1718 
xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue * q,struct xe_bb * bb)1719 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1720 {
1721 	struct xe_gt *gt = q->hwe->gt;
1722 	struct xe_device *xe = gt_to_xe(gt);
1723 	const struct instr_state *state_table = NULL;
1724 	int state_table_size = 0;
1725 
1726 	/*
1727 	 * Wa_14019789679
1728 	 *
1729 	 * If the driver doesn't explicitly emit the SVG instructions while
1730 	 * setting up the default LRC, the context switch will write 0's
1731 	 * (noops) into the LRC memory rather than the expected instruction
1732 	 * headers.  Application contexts start out as a copy of the default
1733 	 * LRC, and if they also do not emit specific settings for some SVG
1734 	 * state, then on context restore they'll unintentionally inherit
1735 	 * whatever state setting the previous context had programmed into the
1736 	 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1737 	 * prevent the hardware from resetting that state back to any specific
1738 	 * value).
1739 	 *
1740 	 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1741 	 * since that's a specific state setting that can easily cause GPU
1742 	 * hangs if unintentionally inherited.  However to be safe we'll
1743 	 * continue to emit all of the SVG state since it's best not to leak
1744 	 * any of the state between contexts, even if that leakage is harmless.
1745 	 */
1746 	if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1747 		state_table = xe_hpg_svg_state;
1748 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1749 	}
1750 
1751 	if (!state_table) {
1752 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1753 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1754 		return;
1755 	}
1756 
1757 	for (int i = 0; i < state_table_size; i++) {
1758 		u32 instr = state_table[i].instr;
1759 		u16 num_dw = state_table[i].num_dw;
1760 		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1761 
1762 		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1763 		xe_gt_assert(gt, num_dw != 0);
1764 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1765 
1766 		/*
1767 		 * Xe2's SVG context is the same as the one on DG2 / MTL
1768 		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1769 		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1770 		 * Just make the replacement here rather than defining a
1771 		 * whole separate table for the single trivial change.
1772 		 */
1773 		if (GRAPHICS_VER(xe) >= 20 &&
1774 		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1775 			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1776 
1777 		bb->cs[bb->len] = instr;
1778 		if (!is_single_dw)
1779 			bb->cs[bb->len] |= (num_dw - 2);
1780 
1781 		bb->len += num_dw;
1782 	}
1783 }
1784 
xe_lrc_snapshot_capture(struct xe_lrc * lrc)1785 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1786 {
1787 	struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1788 
1789 	if (!snapshot)
1790 		return NULL;
1791 
1792 	if (lrc->bo->vm)
1793 		xe_vm_get(lrc->bo->vm);
1794 
1795 	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1796 	snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
1797 	snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1798 	snapshot->head = xe_lrc_ring_head(lrc);
1799 	snapshot->tail.internal = lrc->ring.tail;
1800 	snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1801 	snapshot->start = xe_lrc_ring_start(lrc);
1802 	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1803 	snapshot->seqno = xe_lrc_seqno(lrc);
1804 	snapshot->lrc_bo = xe_bo_get(lrc->bo);
1805 	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1806 	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1807 	snapshot->lrc_snapshot = NULL;
1808 	snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
1809 	snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1810 	return snapshot;
1811 }
1812 
xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot * snapshot)1813 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1814 {
1815 	struct xe_bo *bo;
1816 	struct xe_vm *vm;
1817 	struct iosys_map src;
1818 
1819 	if (!snapshot)
1820 		return;
1821 
1822 	bo = snapshot->lrc_bo;
1823 	vm = bo->vm;
1824 	snapshot->lrc_bo = NULL;
1825 
1826 	snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1827 	if (!snapshot->lrc_snapshot)
1828 		goto put_bo;
1829 
1830 	xe_bo_lock(bo, false);
1831 	if (!ttm_bo_vmap(&bo->ttm, &src)) {
1832 		xe_map_memcpy_from(xe_bo_device(bo),
1833 				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1834 				   snapshot->lrc_size);
1835 		ttm_bo_vunmap(&bo->ttm, &src);
1836 	} else {
1837 		kvfree(snapshot->lrc_snapshot);
1838 		snapshot->lrc_snapshot = NULL;
1839 	}
1840 	xe_bo_unlock(bo);
1841 put_bo:
1842 	xe_bo_put(bo);
1843 	if (vm)
1844 		xe_vm_put(vm);
1845 }
1846 
xe_lrc_snapshot_print(struct xe_lrc_snapshot * snapshot,struct drm_printer * p)1847 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1848 {
1849 	unsigned long i;
1850 
1851 	if (!snapshot)
1852 		return;
1853 
1854 	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1855 	drm_printf(p, "\tHW Ring address: 0x%08x\n",
1856 		   snapshot->ring_addr);
1857 	drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1858 		   snapshot->indirect_context_desc);
1859 	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1860 	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1861 		   snapshot->tail.internal, snapshot->tail.memory);
1862 	drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
1863 	drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1864 	drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1865 	drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1866 	drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1867 
1868 	if (!snapshot->lrc_snapshot)
1869 		return;
1870 
1871 	drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1872 	drm_puts(p, "\t[HWSP].data: ");
1873 	for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1874 		u32 *val = snapshot->lrc_snapshot + i;
1875 		char dumped[ASCII85_BUFSZ];
1876 
1877 		drm_puts(p, ascii85_encode(*val, dumped));
1878 	}
1879 
1880 	drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1881 	drm_puts(p, "\t[HWCTX].data: ");
1882 	for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1883 		u32 *val = snapshot->lrc_snapshot + i;
1884 		char dumped[ASCII85_BUFSZ];
1885 
1886 		drm_puts(p, ascii85_encode(*val, dumped));
1887 	}
1888 	drm_puts(p, "\n");
1889 }
1890 
xe_lrc_snapshot_free(struct xe_lrc_snapshot * snapshot)1891 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1892 {
1893 	if (!snapshot)
1894 		return;
1895 
1896 	kvfree(snapshot->lrc_snapshot);
1897 	if (snapshot->lrc_bo) {
1898 		struct xe_vm *vm;
1899 
1900 		vm = snapshot->lrc_bo->vm;
1901 		xe_bo_put(snapshot->lrc_bo);
1902 		if (vm)
1903 			xe_vm_put(vm);
1904 	}
1905 	kfree(snapshot);
1906 }
1907 
get_ctx_timestamp(struct xe_lrc * lrc,u32 engine_id,u64 * reg_ctx_ts)1908 static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
1909 {
1910 	u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
1911 	u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
1912 	struct xe_hw_engine *hwe;
1913 	u64 val;
1914 
1915 	hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
1916 	if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
1917 			    "Unexpected engine class:instance %d:%d for context utilization\n",
1918 			    class, instance))
1919 		return -1;
1920 
1921 	if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
1922 		val = xe_mmio_read64_2x32(&hwe->gt->mmio,
1923 					  RING_CTX_TIMESTAMP(hwe->mmio_base));
1924 	else
1925 		val = xe_mmio_read32(&hwe->gt->mmio,
1926 				     RING_CTX_TIMESTAMP(hwe->mmio_base));
1927 
1928 	*reg_ctx_ts = val;
1929 
1930 	return 0;
1931 }
1932 
1933 /**
1934  * xe_lrc_update_timestamp() - Update ctx timestamp
1935  * @lrc: Pointer to the lrc.
1936  * @old_ts: Old timestamp value
1937  *
1938  * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1939  * update saved value. With support for active contexts, the calculation may be
1940  * slightly racy, so follow a read-again logic to ensure that the context is
1941  * still active before returning the right timestamp.
1942  *
1943  * Returns: New ctx timestamp value
1944  */
xe_lrc_update_timestamp(struct xe_lrc * lrc,u64 * old_ts)1945 u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
1946 {
1947 	u64 lrc_ts, reg_ts;
1948 	u32 engine_id;
1949 
1950 	*old_ts = lrc->ctx_timestamp;
1951 
1952 	lrc_ts = xe_lrc_ctx_timestamp(lrc);
1953 	/* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
1954 	if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
1955 		lrc->ctx_timestamp = lrc_ts;
1956 		goto done;
1957 	}
1958 
1959 	if (lrc_ts == CONTEXT_ACTIVE) {
1960 		engine_id = xe_lrc_engine_id(lrc);
1961 		if (!get_ctx_timestamp(lrc, engine_id, &reg_ts))
1962 			lrc->ctx_timestamp = reg_ts;
1963 
1964 		/* read lrc again to ensure context is still active */
1965 		lrc_ts = xe_lrc_ctx_timestamp(lrc);
1966 	}
1967 
1968 	/*
1969 	 * If context switched out, just use the lrc_ts. Note that this needs to
1970 	 * be a separate if condition.
1971 	 */
1972 	if (lrc_ts != CONTEXT_ACTIVE)
1973 		lrc->ctx_timestamp = lrc_ts;
1974 
1975 done:
1976 	trace_xe_lrc_update_timestamp(lrc, *old_ts);
1977 
1978 	return lrc->ctx_timestamp;
1979 }
1980 
1981 /**
1982  * xe_lrc_ring_is_idle() - LRC is idle
1983  * @lrc: Pointer to the lrc.
1984  *
1985  * Compare LRC ring head and tail to determine if idle.
1986  *
1987  * Return: True is ring is idle, False otherwise
1988  */
xe_lrc_ring_is_idle(struct xe_lrc * lrc)1989 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
1990 {
1991 	return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
1992 }
1993