1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright © 2022-2023 Intel Corporation
4  */
5 
6 #ifndef _XE_GT_TYPES_H_
7 #define _XE_GT_TYPES_H_
8 
9 #include "xe_device_types.h"
10 #include "xe_force_wake_types.h"
11 #include "xe_gt_idle_types.h"
12 #include "xe_gt_sriov_pf_types.h"
13 #include "xe_gt_sriov_vf_types.h"
14 #include "xe_gt_stats_types.h"
15 #include "xe_hw_engine_types.h"
16 #include "xe_hw_fence_types.h"
17 #include "xe_oa_types.h"
18 #include "xe_reg_sr_types.h"
19 #include "xe_sa_types.h"
20 #include "xe_uc_types.h"
21 
22 struct xe_exec_queue_ops;
23 struct xe_migrate;
24 struct xe_ring_ops;
25 
26 enum xe_gt_type {
27 	XE_GT_TYPE_UNINITIALIZED,
28 	XE_GT_TYPE_MAIN,
29 	XE_GT_TYPE_MEDIA,
30 };
31 
32 enum xe_gt_eu_type {
33 	XE_GT_EU_TYPE_SIMD8,
34 	XE_GT_EU_TYPE_SIMD16,
35 };
36 
37 #define XE_MAX_DSS_FUSE_REGS		3
38 #define XE_MAX_DSS_FUSE_BITS		(32 * XE_MAX_DSS_FUSE_REGS)
39 #define XE_MAX_EU_FUSE_REGS		1
40 #define XE_MAX_EU_FUSE_BITS		(32 * XE_MAX_EU_FUSE_REGS)
41 #define XE_MAX_L3_BANK_MASK_BITS	64
42 
43 typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)];
44 typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)];
45 typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)];
46 
47 struct xe_mmio_range {
48 	u32 start;
49 	u32 end;
50 };
51 
52 /*
53  * The hardware has multiple kinds of multicast register ranges that need
54  * special register steering (and future platforms are expected to add
55  * additional types).
56  *
57  * During driver startup, we initialize the steering control register to
58  * direct reads to a slice/subslice that are valid for the 'subslice' class
59  * of multicast registers.  If another type of steering does not have any
60  * overlap in valid steering targets with 'subslice' style registers, we will
61  * need to explicitly re-steer reads of registers of the other type.
62  *
63  * Only the replication types that may need additional non-default steering
64  * are listed here.
65  */
66 enum xe_steering_type {
67 	L3BANK,
68 	MSLICE,
69 	LNCF,
70 	DSS,
71 	OADDRM,
72 	SQIDI_PSMI,
73 
74 	/*
75 	 * On some platforms there are multiple types of MCR registers that
76 	 * will always return a non-terminated value at instance (0, 0).  We'll
77 	 * lump those all into a single category to keep things simple.
78 	 */
79 	INSTANCE0,
80 
81 	/*
82 	 * Register ranges that don't need special steering for each register:
83 	 * it's sufficient to keep the HW-default for the selector, or only
84 	 * change it once, on GT initialization. This needs to be the last
85 	 * steering type.
86 	 */
87 	IMPLICIT_STEERING,
88 	NUM_STEERING_TYPES
89 };
90 
91 #define gt_to_tile(gt__)							\
92 	_Generic(gt__,								\
93 		 const struct xe_gt * : (const struct xe_tile *)((gt__)->tile),	\
94 		 struct xe_gt * : (gt__)->tile)
95 
96 #define gt_to_xe(gt__)										\
97 	_Generic(gt__,										\
98 		 const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe),	\
99 		 struct xe_gt * : gt_to_tile(gt__)->xe)
100 
101 /**
102  * struct xe_gt - A "Graphics Technology" unit of the GPU
103  *
104  * A GT ("Graphics Technology") is the subset of a GPU primarily responsible
105  * for implementing the graphics, compute, and/or media IP.  It encapsulates
106  * the hardware engines, programmable execution units, and GuC.   Each GT has
107  * its own handling of power management (RC6+forcewake) and multicast register
108  * steering.
109  *
110  * A GPU/tile may have a single GT that supplies all graphics, compute, and
111  * media functionality, or the graphics/compute and media may be split into
112  * separate GTs within a tile.
113  */
114 struct xe_gt {
115 	/** @tile: Backpointer to GT's tile */
116 	struct xe_tile *tile;
117 
118 	/** @info: GT info */
119 	struct {
120 		/** @info.type: type of GT */
121 		enum xe_gt_type type;
122 		/** @info.reference_clock: clock frequency */
123 		u32 reference_clock;
124 		/** @info.timestamp_base: GT timestamp base */
125 		u32 timestamp_base;
126 		/**
127 		 * @info.engine_mask: mask of engines present on GT. Some of
128 		 * them may be reserved in runtime and not available for user.
129 		 * See @user_engines.mask
130 		 */
131 		u64 engine_mask;
132 		/** @info.gmdid: raw GMD_ID value from hardware */
133 		u32 gmdid;
134 		/** @info.id: Unique ID of this GT within the PCI Device */
135 		u8 id;
136 		/** @info.has_indirect_ring_state: GT has indirect ring state support */
137 		u8 has_indirect_ring_state:1;
138 	} info;
139 
140 #if IS_ENABLED(CONFIG_DEBUG_FS)
141 	/** @stats: GT stats */
142 	struct {
143 		/** @stats.counters: counters for various GT stats */
144 		atomic64_t counters[__XE_GT_STATS_NUM_IDS];
145 	} stats;
146 #endif
147 
148 	/**
149 	 * @mmio: mmio info for GT.  All GTs within a tile share the same
150 	 * register space, but have their own copy of GSI registers at a
151 	 * specific offset.
152 	 */
153 	struct xe_mmio mmio;
154 
155 	/**
156 	 * @pm: power management info for GT.  The driver uses the GT's
157 	 * "force wake" interface to wake up specific parts of the GT hardware
158 	 * from C6 sleep states and ensure the hardware remains awake while it
159 	 * is being actively used.
160 	 */
161 	struct {
162 		/** @pm.fw: force wake for GT */
163 		struct xe_force_wake fw;
164 	} pm;
165 
166 	/** @sriov: virtualization data related to GT */
167 	union {
168 		/** @sriov.pf: PF data. Valid only if driver is running as PF */
169 		struct xe_gt_sriov_pf pf;
170 		/** @sriov.vf: VF data. Valid only if driver is running as VF */
171 		struct xe_gt_sriov_vf vf;
172 	} sriov;
173 
174 	/**
175 	 * @reg_sr: table with registers to be restored on GT init/resume/reset
176 	 */
177 	struct xe_reg_sr reg_sr;
178 
179 	/** @reset: state for GT resets */
180 	struct {
181 		/**
182 		 * @reset.worker: work so GT resets can done async allowing to reset
183 		 * code to safely flush all code paths
184 		 */
185 		struct work_struct worker;
186 	} reset;
187 
188 	/** @tlb_invalidation: TLB invalidation state */
189 	struct {
190 		/** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */
191 #define TLB_INVALIDATION_SEQNO_MAX	0x100000
192 		int seqno;
193 		/**
194 		 * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno,
195 		 * protected by CT lock
196 		 */
197 		int seqno_recv;
198 		/**
199 		 * @tlb_invalidation.pending_fences: list of pending fences waiting TLB
200 		 * invaliations, protected by CT lock
201 		 */
202 		struct list_head pending_fences;
203 		/**
204 		 * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences
205 		 * and updating @tlb_invalidation.seqno_recv.
206 		 */
207 		spinlock_t pending_lock;
208 		/**
209 		 * @tlb_invalidation.fence_tdr: schedules a delayed call to
210 		 * xe_gt_tlb_fence_timeout after the timeut interval is over.
211 		 */
212 		struct delayed_work fence_tdr;
213 		/** @tlb_invalidation.lock: protects TLB invalidation fences */
214 		spinlock_t lock;
215 	} tlb_invalidation;
216 
217 	/**
218 	 * @ccs_mode: Number of compute engines enabled.
219 	 * Allows fixed mapping of available compute slices to compute engines.
220 	 * By default only the first available compute engine is enabled and all
221 	 * available compute slices are allocated to it.
222 	 */
223 	u32 ccs_mode;
224 
225 	/** @usm: unified shared memory state */
226 	struct {
227 		/**
228 		 * @usm.bb_pool: Pool from which batchbuffers, for USM operations
229 		 * (e.g. migrations, fixing page tables), are allocated.
230 		 * Dedicated pool needed so USM operations to not get blocked
231 		 * behind any user operations which may have resulted in a
232 		 * fault.
233 		 */
234 		struct xe_sa_manager *bb_pool;
235 		/**
236 		 * @usm.reserved_bcs_instance: reserved BCS instance used for USM
237 		 * operations (e.g. mmigrations, fixing page tables)
238 		 */
239 		u16 reserved_bcs_instance;
240 		/** @usm.pf_wq: page fault work queue, unbound, high priority */
241 		struct workqueue_struct *pf_wq;
242 		/** @usm.acc_wq: access counter work queue, unbound, high priority */
243 		struct workqueue_struct *acc_wq;
244 		/**
245 		 * @usm.pf_queue: Page fault queue used to sync faults so faults can
246 		 * be processed not under the GuC CT lock. The queue is sized so
247 		 * it can sync all possible faults (1 per physical engine).
248 		 * Multiple queues exists for page faults from different VMs are
249 		 * be processed in parallel.
250 		 */
251 		struct pf_queue {
252 			/** @usm.pf_queue.gt: back pointer to GT */
253 			struct xe_gt *gt;
254 			/** @usm.pf_queue.data: data in the page fault queue */
255 			u32 *data;
256 			/**
257 			 * @usm.pf_queue.num_dw: number of DWORDS in the page
258 			 * fault queue. Dynamically calculated based on the number
259 			 * of compute resources available.
260 			 */
261 			u32 num_dw;
262 			/**
263 			 * @usm.pf_queue.tail: tail pointer in DWs for page fault queue,
264 			 * moved by worker which processes faults (consumer).
265 			 */
266 			u16 tail;
267 			/**
268 			 * @usm.pf_queue.head: head pointer in DWs for page fault queue,
269 			 * moved by G2H handler (producer).
270 			 */
271 			u16 head;
272 			/** @usm.pf_queue.lock: protects page fault queue */
273 			spinlock_t lock;
274 			/** @usm.pf_queue.worker: to process page faults */
275 			struct work_struct worker;
276 #define NUM_PF_QUEUE	4
277 		} pf_queue[NUM_PF_QUEUE];
278 		/**
279 		 * @usm.acc_queue: Same as page fault queue, cannot process access
280 		 * counters under CT lock.
281 		 */
282 		struct acc_queue {
283 			/** @usm.acc_queue.gt: back pointer to GT */
284 			struct xe_gt *gt;
285 #define ACC_QUEUE_NUM_DW	128
286 			/** @usm.acc_queue.data: data in the page fault queue */
287 			u32 data[ACC_QUEUE_NUM_DW];
288 			/**
289 			 * @usm.acc_queue.tail: tail pointer in DWs for access counter queue,
290 			 * moved by worker which processes counters
291 			 * (consumer).
292 			 */
293 			u16 tail;
294 			/**
295 			 * @usm.acc_queue.head: head pointer in DWs for access counter queue,
296 			 * moved by G2H handler (producer).
297 			 */
298 			u16 head;
299 			/** @usm.acc_queue.lock: protects page fault queue */
300 			spinlock_t lock;
301 			/** @usm.acc_queue.worker: to process access counters */
302 			struct work_struct worker;
303 #define NUM_ACC_QUEUE	4
304 		} acc_queue[NUM_ACC_QUEUE];
305 	} usm;
306 
307 	/** @ordered_wq: used to serialize GT resets and TDRs */
308 	struct workqueue_struct *ordered_wq;
309 
310 	/** @uc: micro controllers on the GT */
311 	struct xe_uc uc;
312 
313 	/** @gtidle: idle properties of GT */
314 	struct xe_gt_idle gtidle;
315 
316 	/** @exec_queue_ops: submission backend exec queue operations */
317 	const struct xe_exec_queue_ops *exec_queue_ops;
318 
319 	/**
320 	 * @ring_ops: ring operations for this hw engine (1 per engine class)
321 	 */
322 	const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX];
323 
324 	/** @fence_irq: fence IRQs (1 per engine class) */
325 	struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX];
326 
327 	/** @default_lrc: default LRC state */
328 	void *default_lrc[XE_ENGINE_CLASS_MAX];
329 
330 	/** @hw_engines: hardware engines on the GT */
331 	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
332 
333 	/** @eclass: per hardware engine class interface on the GT */
334 	struct xe_hw_engine_class_intf  eclass[XE_ENGINE_CLASS_MAX];
335 
336 	/** @sysfs: sysfs' kobj used by xe_gt_sysfs */
337 	struct kobject *sysfs;
338 
339 	/** @freq: Main GT freq sysfs control */
340 	struct kobject *freq;
341 
342 	/** @mocs: info */
343 	struct {
344 		/** @mocs.uc_index: UC index */
345 		u8 uc_index;
346 		/** @mocs.wb_index: WB index, only used on L3_CCS platforms */
347 		u8 wb_index;
348 	} mocs;
349 
350 	/** @fuse_topo: GT topology reported by fuse registers */
351 	struct {
352 		/** @fuse_topo.g_dss_mask: dual-subslices usable by geometry */
353 		xe_dss_mask_t g_dss_mask;
354 
355 		/** @fuse_topo.c_dss_mask: dual-subslices usable by compute */
356 		xe_dss_mask_t c_dss_mask;
357 
358 		/** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
359 		xe_eu_mask_t eu_mask_per_dss;
360 
361 		/** @fuse_topo.l3_bank_mask: L3 bank mask */
362 		xe_l3_bank_mask_t l3_bank_mask;
363 
364 		/**
365 		 * @fuse_topo.eu_type: type/width of EU stored in
366 		 * fuse_topo.eu_mask_per_dss
367 		 */
368 		enum xe_gt_eu_type eu_type;
369 	} fuse_topo;
370 
371 	/** @steering: register steering for individual HW units */
372 	struct {
373 		/** @steering.ranges: register ranges used for this steering type */
374 		const struct xe_mmio_range *ranges;
375 
376 		/** @steering.group_target: target to steer accesses to */
377 		u16 group_target;
378 		/** @steering.instance_target: instance to steer accesses to */
379 		u16 instance_target;
380 	} steering[NUM_STEERING_TYPES];
381 
382 	/**
383 	 * @steering_dss_per_grp: number of DSS per steering group (gslice,
384 	 *    cslice, etc.).
385 	 */
386 	unsigned int steering_dss_per_grp;
387 
388 	/**
389 	 * @mcr_lock: protects the MCR_SELECTOR register for the duration
390 	 *    of a steered operation
391 	 */
392 	spinlock_t mcr_lock;
393 
394 	/**
395 	 * @global_invl_lock: protects the register for the duration
396 	 *    of a global invalidation of l2 cache
397 	 */
398 	spinlock_t global_invl_lock;
399 
400 	/** @wa_active: keep track of active workarounds */
401 	struct {
402 		/** @wa_active.gt: bitmap with active GT workarounds */
403 		unsigned long *gt;
404 		/** @wa_active.engine: bitmap with active engine workarounds */
405 		unsigned long *engine;
406 		/** @wa_active.lrc: bitmap with active LRC workarounds */
407 		unsigned long *lrc;
408 		/** @wa_active.oob: bitmap with active OOB workarounds */
409 		unsigned long *oob;
410 		/**
411 		 * @wa_active.oob_initialized: mark oob as initialized to help
412 		 * detecting misuse of XE_WA() - it can only be called on
413 		 * initialization after OOB WAs have being processed
414 		 */
415 		bool oob_initialized;
416 	} wa_active;
417 
418 	/** @tuning_active: keep track of active tunings */
419 	struct {
420 		/** @tuning_active.gt: bitmap with active GT tunings */
421 		unsigned long *gt;
422 		/** @tuning_active.engine: bitmap with active engine tunings */
423 		unsigned long *engine;
424 		/** @tuning_active.lrc: bitmap with active LRC tunings */
425 		unsigned long *lrc;
426 	} tuning_active;
427 
428 	/** @user_engines: engines present in GT and available to userspace */
429 	struct {
430 		/**
431 		 * @user_engines.mask: like @info->engine_mask, but take in
432 		 * consideration only engines available to userspace
433 		 */
434 		u64 mask;
435 
436 		/**
437 		 * @user_engines.instances_per_class: aggregate per class the
438 		 * number of engines available to userspace
439 		 */
440 		u8 instances_per_class[XE_ENGINE_CLASS_MAX];
441 	} user_engines;
442 
443 	/** @oa: oa observation subsystem per gt info */
444 	struct xe_oa_gt oa;
445 
446 	/** @eu_stall: EU stall counters subsystem per gt info */
447 	struct xe_eu_stall_gt *eu_stall;
448 };
449 
450 #endif
451