1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2024 Intel Corporation
4 */
5
6 #include <drm/drm_managed.h>
7
8 #include "xe_assert.h"
9 #include "xe_device.h"
10 #include "xe_exec_queue.h"
11 #include "xe_gt.h"
12 #include "xe_hw_engine_group.h"
13 #include "xe_vm.h"
14
15 static void
hw_engine_group_resume_lr_jobs_func(struct work_struct * w)16 hw_engine_group_resume_lr_jobs_func(struct work_struct *w)
17 {
18 struct xe_exec_queue *q;
19 struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work);
20 int err;
21 enum xe_hw_engine_group_execution_mode previous_mode;
22
23 err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode);
24 if (err)
25 return;
26
27 if (previous_mode == EXEC_MODE_LR)
28 goto put;
29
30 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
31 if (!xe_vm_in_fault_mode(q->vm))
32 continue;
33
34 q->ops->resume(q);
35 }
36
37 put:
38 xe_hw_engine_group_put(group);
39 }
40
41 static struct xe_hw_engine_group *
hw_engine_group_alloc(struct xe_device * xe)42 hw_engine_group_alloc(struct xe_device *xe)
43 {
44 struct xe_hw_engine_group *group;
45 int err;
46
47 group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL);
48 if (!group)
49 return ERR_PTR(-ENOMEM);
50
51 group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
52 if (!group->resume_wq)
53 return ERR_PTR(-ENOMEM);
54
55 err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq);
56 if (err)
57 return ERR_PTR(err);
58
59 init_rwsem(&group->mode_sem);
60 INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
61 INIT_LIST_HEAD(&group->exec_queue_list);
62
63 return group;
64 }
65
66 /**
67 * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt
68 * @gt: The gt for which groups are setup
69 *
70 * Return: 0 on success, negative error code on error.
71 */
xe_hw_engine_setup_groups(struct xe_gt * gt)72 int xe_hw_engine_setup_groups(struct xe_gt *gt)
73 {
74 struct xe_hw_engine *hwe;
75 enum xe_hw_engine_id id;
76 struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
77 struct xe_device *xe = gt_to_xe(gt);
78
79 group_rcs_ccs = hw_engine_group_alloc(xe);
80 if (IS_ERR(group_rcs_ccs))
81 return PTR_ERR(group_rcs_ccs);
82
83 group_bcs = hw_engine_group_alloc(xe);
84 if (IS_ERR(group_bcs))
85 return PTR_ERR(group_bcs);
86
87 group_vcs_vecs = hw_engine_group_alloc(xe);
88 if (IS_ERR(group_vcs_vecs))
89 return PTR_ERR(group_vcs_vecs);
90
91 for_each_hw_engine(hwe, gt, id) {
92 switch (hwe->class) {
93 case XE_ENGINE_CLASS_COPY:
94 hwe->hw_engine_group = group_bcs;
95 break;
96 case XE_ENGINE_CLASS_RENDER:
97 case XE_ENGINE_CLASS_COMPUTE:
98 hwe->hw_engine_group = group_rcs_ccs;
99 break;
100 case XE_ENGINE_CLASS_VIDEO_DECODE:
101 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
102 hwe->hw_engine_group = group_vcs_vecs;
103 break;
104 case XE_ENGINE_CLASS_OTHER:
105 break;
106 default:
107 drm_warn(&xe->drm, "NOT POSSIBLE");
108 }
109 }
110
111 return 0;
112 }
113
114 /**
115 * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group
116 * @group: The hw engine group
117 * @q: The exec_queue
118 *
119 * Return: 0 on success,
120 * -EINTR if the lock could not be acquired
121 */
xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group * group,struct xe_exec_queue * q)122 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
123 {
124 int err;
125 struct xe_device *xe = gt_to_xe(q->gt);
126
127 xe_assert(xe, group);
128 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
129 xe_assert(xe, q->vm);
130
131 if (xe_vm_in_preempt_fence_mode(q->vm))
132 return 0;
133
134 err = down_write_killable(&group->mode_sem);
135 if (err)
136 return err;
137
138 if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) {
139 q->ops->suspend(q);
140 err = q->ops->suspend_wait(q);
141 if (err)
142 goto err_suspend;
143
144 xe_hw_engine_group_resume_faulting_lr_jobs(group);
145 }
146
147 list_add(&q->hw_engine_group_link, &group->exec_queue_list);
148 up_write(&group->mode_sem);
149
150 return 0;
151
152 err_suspend:
153 up_write(&group->mode_sem);
154 return err;
155 }
156 ALLOW_ERROR_INJECTION(xe_hw_engine_group_add_exec_queue, ERRNO);
157
158 /**
159 * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group
160 * @group: The hw engine group
161 * @q: The exec_queue
162 */
xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group * group,struct xe_exec_queue * q)163 void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
164 {
165 struct xe_device *xe = gt_to_xe(q->gt);
166
167 xe_assert(xe, group);
168 xe_assert(xe, q->vm);
169
170 down_write(&group->mode_sem);
171
172 if (!list_empty(&q->hw_engine_group_link))
173 list_del(&q->hw_engine_group_link);
174
175 up_write(&group->mode_sem);
176 }
177
178 /**
179 * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's
180 * faulting LR jobs
181 * @group: The hw engine group
182 */
xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group * group)183 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group)
184 {
185 queue_work(group->resume_wq, &group->resume_work);
186 }
187
188 /**
189 * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group
190 * @group: The hw engine group
191 *
192 * Return: 0 on success, negative error code on error.
193 */
xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group * group)194 static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group)
195 {
196 int err;
197 struct xe_exec_queue *q;
198 bool need_resume = false;
199
200 lockdep_assert_held_write(&group->mode_sem);
201
202 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
203 if (!xe_vm_in_fault_mode(q->vm))
204 continue;
205
206 need_resume = true;
207 q->ops->suspend(q);
208 }
209
210 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
211 if (!xe_vm_in_fault_mode(q->vm))
212 continue;
213
214 err = q->ops->suspend_wait(q);
215 if (err)
216 goto err_suspend;
217 }
218
219 if (need_resume)
220 xe_hw_engine_group_resume_faulting_lr_jobs(group);
221
222 return 0;
223
224 err_suspend:
225 up_write(&group->mode_sem);
226 return err;
227 }
228
229 /**
230 * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete
231 * @group: The hw engine group
232 *
233 * This function is not meant to be called directly from a user IOCTL as dma_fence_wait()
234 * is not interruptible.
235 *
236 * Return: 0 on success,
237 * -ETIME if waiting for one job failed
238 */
xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group * group)239 static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group)
240 {
241 long timeout;
242 struct xe_exec_queue *q;
243 struct dma_fence *fence;
244
245 lockdep_assert_held_write(&group->mode_sem);
246
247 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
248 if (xe_vm_in_lr_mode(q->vm))
249 continue;
250
251 fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm);
252 timeout = dma_fence_wait(fence, false);
253 dma_fence_put(fence);
254
255 if (timeout < 0)
256 return -ETIME;
257 }
258
259 return 0;
260 }
261
switch_mode(struct xe_hw_engine_group * group)262 static int switch_mode(struct xe_hw_engine_group *group)
263 {
264 int err = 0;
265 enum xe_hw_engine_group_execution_mode new_mode;
266
267 lockdep_assert_held_write(&group->mode_sem);
268
269 switch (group->cur_mode) {
270 case EXEC_MODE_LR:
271 new_mode = EXEC_MODE_DMA_FENCE;
272 err = xe_hw_engine_group_suspend_faulting_lr_jobs(group);
273 break;
274 case EXEC_MODE_DMA_FENCE:
275 new_mode = EXEC_MODE_LR;
276 err = xe_hw_engine_group_wait_for_dma_fence_jobs(group);
277 break;
278 }
279
280 if (err)
281 return err;
282
283 group->cur_mode = new_mode;
284
285 return 0;
286 }
287
288 /**
289 * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode
290 * @group: The hw engine group
291 * @new_mode: The new execution mode
292 * @previous_mode: Pointer to the previous mode provided for use by caller
293 *
294 * Return: 0 if successful, -EINTR if locking failed.
295 */
xe_hw_engine_group_get_mode(struct xe_hw_engine_group * group,enum xe_hw_engine_group_execution_mode new_mode,enum xe_hw_engine_group_execution_mode * previous_mode)296 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
297 enum xe_hw_engine_group_execution_mode new_mode,
298 enum xe_hw_engine_group_execution_mode *previous_mode)
299 __acquires(&group->mode_sem)
300 {
301 int err = down_read_interruptible(&group->mode_sem);
302
303 if (err)
304 return err;
305
306 *previous_mode = group->cur_mode;
307
308 if (new_mode != group->cur_mode) {
309 up_read(&group->mode_sem);
310 err = down_write_killable(&group->mode_sem);
311 if (err)
312 return err;
313
314 if (new_mode != group->cur_mode) {
315 err = switch_mode(group);
316 if (err) {
317 up_write(&group->mode_sem);
318 return err;
319 }
320 }
321 downgrade_write(&group->mode_sem);
322 }
323
324 return err;
325 }
326
327 /**
328 * xe_hw_engine_group_put() - Put the group
329 * @group: The hw engine group
330 */
xe_hw_engine_group_put(struct xe_hw_engine_group * group)331 void xe_hw_engine_group_put(struct xe_hw_engine_group *group)
332 __releases(&group->mode_sem)
333 {
334 up_read(&group->mode_sem);
335 }
336
337 /**
338 * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue
339 * @q: The exec_queue
340 */
341 enum xe_hw_engine_group_execution_mode
xe_hw_engine_group_find_exec_mode(struct xe_exec_queue * q)342 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q)
343 {
344 if (xe_vm_in_fault_mode(q->vm))
345 return EXEC_MODE_LR;
346 else
347 return EXEC_MODE_DMA_FENCE;
348 }
349