xref: /linux/drivers/gpu/drm/xe/xe_hw_engine_group.c (revision 89748acdf226fd1a8775ff6fa2703f8412b286c8)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2024 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "xe_assert.h"
9 #include "xe_device.h"
10 #include "xe_exec_queue.h"
11 #include "xe_gt.h"
12 #include "xe_hw_engine_group.h"
13 #include "xe_vm.h"
14 
15 static void
hw_engine_group_resume_lr_jobs_func(struct work_struct * w)16 hw_engine_group_resume_lr_jobs_func(struct work_struct *w)
17 {
18 	struct xe_exec_queue *q;
19 	struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work);
20 	int err;
21 	enum xe_hw_engine_group_execution_mode previous_mode;
22 
23 	err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode);
24 	if (err)
25 		return;
26 
27 	if (previous_mode == EXEC_MODE_LR)
28 		goto put;
29 
30 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
31 		if (!xe_vm_in_fault_mode(q->vm))
32 			continue;
33 
34 		q->ops->resume(q);
35 	}
36 
37 put:
38 	xe_hw_engine_group_put(group);
39 }
40 
41 static struct xe_hw_engine_group *
hw_engine_group_alloc(struct xe_device * xe)42 hw_engine_group_alloc(struct xe_device *xe)
43 {
44 	struct xe_hw_engine_group *group;
45 	int err;
46 
47 	group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL);
48 	if (!group)
49 		return ERR_PTR(-ENOMEM);
50 
51 	group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
52 	if (!group->resume_wq)
53 		return ERR_PTR(-ENOMEM);
54 
55 	err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq);
56 	if (err)
57 		return ERR_PTR(err);
58 
59 	init_rwsem(&group->mode_sem);
60 	INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
61 	INIT_LIST_HEAD(&group->exec_queue_list);
62 
63 	return group;
64 }
65 
66 /**
67  * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt
68  * @gt: The gt for which groups are setup
69  *
70  * Return: 0 on success, negative error code on error.
71  */
xe_hw_engine_setup_groups(struct xe_gt * gt)72 int xe_hw_engine_setup_groups(struct xe_gt *gt)
73 {
74 	struct xe_hw_engine *hwe;
75 	enum xe_hw_engine_id id;
76 	struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
77 	struct xe_device *xe = gt_to_xe(gt);
78 
79 	group_rcs_ccs = hw_engine_group_alloc(xe);
80 	if (IS_ERR(group_rcs_ccs))
81 		return PTR_ERR(group_rcs_ccs);
82 
83 	group_bcs = hw_engine_group_alloc(xe);
84 	if (IS_ERR(group_bcs))
85 		return PTR_ERR(group_bcs);
86 
87 	group_vcs_vecs = hw_engine_group_alloc(xe);
88 	if (IS_ERR(group_vcs_vecs))
89 		return PTR_ERR(group_vcs_vecs);
90 
91 	for_each_hw_engine(hwe, gt, id) {
92 		switch (hwe->class) {
93 		case XE_ENGINE_CLASS_COPY:
94 			hwe->hw_engine_group = group_bcs;
95 			break;
96 		case XE_ENGINE_CLASS_RENDER:
97 		case XE_ENGINE_CLASS_COMPUTE:
98 			hwe->hw_engine_group = group_rcs_ccs;
99 			break;
100 		case XE_ENGINE_CLASS_VIDEO_DECODE:
101 		case XE_ENGINE_CLASS_VIDEO_ENHANCE:
102 			hwe->hw_engine_group = group_vcs_vecs;
103 			break;
104 		case XE_ENGINE_CLASS_OTHER:
105 			break;
106 		default:
107 			drm_warn(&xe->drm, "NOT POSSIBLE");
108 		}
109 	}
110 
111 	return 0;
112 }
113 
114 /**
115  * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group
116  * @group: The hw engine group
117  * @q: The exec_queue
118  *
119  * Return: 0 on success,
120  *	    -EINTR if the lock could not be acquired
121  */
xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group * group,struct xe_exec_queue * q)122 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
123 {
124 	int err;
125 	struct xe_device *xe = gt_to_xe(q->gt);
126 
127 	xe_assert(xe, group);
128 	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
129 	xe_assert(xe, q->vm);
130 
131 	if (xe_vm_in_preempt_fence_mode(q->vm))
132 		return 0;
133 
134 	err = down_write_killable(&group->mode_sem);
135 	if (err)
136 		return err;
137 
138 	if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) {
139 		q->ops->suspend(q);
140 		err = q->ops->suspend_wait(q);
141 		if (err)
142 			goto err_suspend;
143 
144 		xe_hw_engine_group_resume_faulting_lr_jobs(group);
145 	}
146 
147 	list_add(&q->hw_engine_group_link, &group->exec_queue_list);
148 	up_write(&group->mode_sem);
149 
150 	return 0;
151 
152 err_suspend:
153 	up_write(&group->mode_sem);
154 	return err;
155 }
156 ALLOW_ERROR_INJECTION(xe_hw_engine_group_add_exec_queue, ERRNO);
157 
158 /**
159  * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group
160  * @group: The hw engine group
161  * @q: The exec_queue
162  */
xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group * group,struct xe_exec_queue * q)163 void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
164 {
165 	struct xe_device *xe = gt_to_xe(q->gt);
166 
167 	xe_assert(xe, group);
168 	xe_assert(xe, q->vm);
169 
170 	down_write(&group->mode_sem);
171 
172 	if (!list_empty(&q->hw_engine_group_link))
173 		list_del(&q->hw_engine_group_link);
174 
175 	up_write(&group->mode_sem);
176 }
177 
178 /**
179  * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's
180  * faulting LR jobs
181  * @group: The hw engine group
182  */
xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group * group)183 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group)
184 {
185 	queue_work(group->resume_wq, &group->resume_work);
186 }
187 
188 /**
189  * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group
190  * @group: The hw engine group
191  *
192  * Return: 0 on success, negative error code on error.
193  */
xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group * group)194 static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group)
195 {
196 	int err;
197 	struct xe_exec_queue *q;
198 	bool need_resume = false;
199 
200 	lockdep_assert_held_write(&group->mode_sem);
201 
202 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
203 		if (!xe_vm_in_fault_mode(q->vm))
204 			continue;
205 
206 		need_resume = true;
207 		q->ops->suspend(q);
208 	}
209 
210 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
211 		if (!xe_vm_in_fault_mode(q->vm))
212 			continue;
213 
214 		err = q->ops->suspend_wait(q);
215 		if (err)
216 			goto err_suspend;
217 	}
218 
219 	if (need_resume)
220 		xe_hw_engine_group_resume_faulting_lr_jobs(group);
221 
222 	return 0;
223 
224 err_suspend:
225 	up_write(&group->mode_sem);
226 	return err;
227 }
228 
229 /**
230  * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete
231  * @group: The hw engine group
232  *
233  * This function is not meant to be called directly from a user IOCTL as dma_fence_wait()
234  * is not interruptible.
235  *
236  * Return: 0 on success,
237  *	   -ETIME if waiting for one job failed
238  */
xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group * group)239 static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group)
240 {
241 	long timeout;
242 	struct xe_exec_queue *q;
243 	struct dma_fence *fence;
244 
245 	lockdep_assert_held_write(&group->mode_sem);
246 
247 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
248 		if (xe_vm_in_lr_mode(q->vm))
249 			continue;
250 
251 		fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm);
252 		timeout = dma_fence_wait(fence, false);
253 		dma_fence_put(fence);
254 
255 		if (timeout < 0)
256 			return -ETIME;
257 	}
258 
259 	return 0;
260 }
261 
switch_mode(struct xe_hw_engine_group * group)262 static int switch_mode(struct xe_hw_engine_group *group)
263 {
264 	int err = 0;
265 	enum xe_hw_engine_group_execution_mode new_mode;
266 
267 	lockdep_assert_held_write(&group->mode_sem);
268 
269 	switch (group->cur_mode) {
270 	case EXEC_MODE_LR:
271 		new_mode = EXEC_MODE_DMA_FENCE;
272 		err = xe_hw_engine_group_suspend_faulting_lr_jobs(group);
273 		break;
274 	case EXEC_MODE_DMA_FENCE:
275 		new_mode = EXEC_MODE_LR;
276 		err = xe_hw_engine_group_wait_for_dma_fence_jobs(group);
277 		break;
278 	}
279 
280 	if (err)
281 		return err;
282 
283 	group->cur_mode = new_mode;
284 
285 	return 0;
286 }
287 
288 /**
289  * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode
290  * @group: The hw engine group
291  * @new_mode: The new execution mode
292  * @previous_mode: Pointer to the previous mode provided for use by caller
293  *
294  * Return: 0 if successful, -EINTR if locking failed.
295  */
xe_hw_engine_group_get_mode(struct xe_hw_engine_group * group,enum xe_hw_engine_group_execution_mode new_mode,enum xe_hw_engine_group_execution_mode * previous_mode)296 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
297 				enum xe_hw_engine_group_execution_mode new_mode,
298 				enum xe_hw_engine_group_execution_mode *previous_mode)
299 __acquires(&group->mode_sem)
300 {
301 	int err = down_read_interruptible(&group->mode_sem);
302 
303 	if (err)
304 		return err;
305 
306 	*previous_mode = group->cur_mode;
307 
308 	if (new_mode != group->cur_mode) {
309 		up_read(&group->mode_sem);
310 		err = down_write_killable(&group->mode_sem);
311 		if (err)
312 			return err;
313 
314 		if (new_mode != group->cur_mode) {
315 			err = switch_mode(group);
316 			if (err) {
317 				up_write(&group->mode_sem);
318 				return err;
319 			}
320 		}
321 		downgrade_write(&group->mode_sem);
322 	}
323 
324 	return err;
325 }
326 
327 /**
328  * xe_hw_engine_group_put() - Put the group
329  * @group: The hw engine group
330  */
xe_hw_engine_group_put(struct xe_hw_engine_group * group)331 void xe_hw_engine_group_put(struct xe_hw_engine_group *group)
332 __releases(&group->mode_sem)
333 {
334 	up_read(&group->mode_sem);
335 }
336 
337 /**
338  * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue
339  * @q: The exec_queue
340  */
341 enum xe_hw_engine_group_execution_mode
xe_hw_engine_group_find_exec_mode(struct xe_exec_queue * q)342 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q)
343 {
344 	if (xe_vm_in_fault_mode(q->vm))
345 		return EXEC_MODE_LR;
346 	else
347 		return EXEC_MODE_DMA_FENCE;
348 }
349