1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 /**
25 * DOC: Overview
26 *
27 * The GPU scheduler provides entities which allow userspace to push jobs
28 * into software queues which are then scheduled on a hardware run queue.
29 * The software queues have a priority among them. The scheduler selects the entities
30 * from the run queue using a FIFO. The scheduler provides dependency handling
31 * features among jobs. The driver is supposed to provide callback functions for
32 * backend operations to the scheduler like submitting a job to hardware run queue,
33 * returning the dependencies of a job etc.
34 *
35 * The organisation of the scheduler is the following:
36 *
37 * 1. Each hw run queue has one scheduler
38 * 2. Each scheduler has multiple run queues with different priorities
39 * (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
40 * 3. Each scheduler run queue has a queue of entities to schedule
41 * 4. Entities themselves maintain a queue of jobs that will be scheduled on
42 * the hardware.
43 *
44 * The jobs in an entity are always scheduled in the order in which they were pushed.
45 *
46 * Note that once a job was taken from the entities queue and pushed to the
47 * hardware, i.e. the pending queue, the entity must not be referenced anymore
48 * through the jobs entity pointer.
49 */
50
51 /**
52 * DOC: Flow Control
53 *
54 * The DRM GPU scheduler provides a flow control mechanism to regulate the rate
55 * in which the jobs fetched from scheduler entities are executed.
56 *
57 * In this context the &drm_gpu_scheduler keeps track of a driver specified
58 * credit limit representing the capacity of this scheduler and a credit count;
59 * every &drm_sched_job carries a driver specified number of credits.
60 *
61 * Once a job is executed (but not yet finished), the job's credits contribute
62 * to the scheduler's credit count until the job is finished. If by executing
63 * one more job the scheduler's credit count would exceed the scheduler's
64 * credit limit, the job won't be executed. Instead, the scheduler will wait
65 * until the credit count has decreased enough to not overflow its credit limit.
66 * This implies waiting for previously executed jobs.
67 */
68
69 #include <linux/wait.h>
70 #include <linux/sched.h>
71 #include <linux/completion.h>
72 #include <linux/dma-resv.h>
73 #include <uapi/linux/sched/types.h>
74
75 #include <drm/drm_print.h>
76 #include <drm/drm_gem.h>
77 #include <drm/drm_syncobj.h>
78 #include <drm/gpu_scheduler.h>
79 #include <drm/spsc_queue.h>
80
81 #include "sched_internal.h"
82
83 #define CREATE_TRACE_POINTS
84 #include "gpu_scheduler_trace.h"
85
86 #ifdef CONFIG_LOCKDEP
87 static struct lockdep_map drm_sched_lockdep_map = {
88 .name = "drm_sched_lockdep_map"
89 };
90 #endif
91
92 int drm_sched_policy = DRM_SCHED_POLICY_FIFO;
93
94 /**
95 * DOC: sched_policy (int)
96 * Used to override default entities scheduling policy in a run queue.
97 */
98 MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
99 module_param_named(sched_policy, drm_sched_policy, int, 0444);
100
drm_sched_available_credits(struct drm_gpu_scheduler * sched)101 static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched)
102 {
103 u32 credits;
104
105 WARN_ON(check_sub_overflow(sched->credit_limit,
106 atomic_read(&sched->credit_count),
107 &credits));
108
109 return credits;
110 }
111
112 /**
113 * drm_sched_can_queue -- Can we queue more to the hardware?
114 * @sched: scheduler instance
115 * @entity: the scheduler entity
116 *
117 * Return true if we can push at least one more job from @entity, false
118 * otherwise.
119 */
drm_sched_can_queue(struct drm_gpu_scheduler * sched,struct drm_sched_entity * entity)120 static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched,
121 struct drm_sched_entity *entity)
122 {
123 struct drm_sched_job *s_job;
124
125 s_job = drm_sched_entity_queue_peek(entity);
126 if (!s_job)
127 return false;
128
129 /* If a job exceeds the credit limit, truncate it to the credit limit
130 * itself to guarantee forward progress.
131 */
132 if (s_job->credits > sched->credit_limit) {
133 dev_WARN(sched->dev,
134 "Jobs may not exceed the credit limit, truncate.\n");
135 s_job->credits = sched->credit_limit;
136 }
137
138 return drm_sched_available_credits(sched) >= s_job->credits;
139 }
140
drm_sched_entity_compare_before(struct rb_node * a,const struct rb_node * b)141 static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
142 const struct rb_node *b)
143 {
144 struct drm_sched_entity *ent_a = rb_entry((a), struct drm_sched_entity, rb_tree_node);
145 struct drm_sched_entity *ent_b = rb_entry((b), struct drm_sched_entity, rb_tree_node);
146
147 return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting);
148 }
149
drm_sched_rq_remove_fifo_locked(struct drm_sched_entity * entity,struct drm_sched_rq * rq)150 static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity,
151 struct drm_sched_rq *rq)
152 {
153 if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
154 rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
155 RB_CLEAR_NODE(&entity->rb_tree_node);
156 }
157 }
158
drm_sched_rq_update_fifo_locked(struct drm_sched_entity * entity,struct drm_sched_rq * rq,ktime_t ts)159 void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
160 struct drm_sched_rq *rq,
161 ktime_t ts)
162 {
163 /*
164 * Both locks need to be grabbed, one to protect from entity->rq change
165 * for entity from within concurrent drm_sched_entity_select_rq and the
166 * other to update the rb tree structure.
167 */
168 lockdep_assert_held(&entity->lock);
169 lockdep_assert_held(&rq->lock);
170
171 drm_sched_rq_remove_fifo_locked(entity, rq);
172
173 entity->oldest_job_waiting = ts;
174
175 rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
176 drm_sched_entity_compare_before);
177 }
178
179 /**
180 * drm_sched_rq_init - initialize a given run queue struct
181 *
182 * @sched: scheduler instance to associate with this run queue
183 * @rq: scheduler run queue
184 *
185 * Initializes a scheduler runqueue.
186 */
drm_sched_rq_init(struct drm_gpu_scheduler * sched,struct drm_sched_rq * rq)187 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
188 struct drm_sched_rq *rq)
189 {
190 spin_lock_init(&rq->lock);
191 INIT_LIST_HEAD(&rq->entities);
192 rq->rb_tree_root = RB_ROOT_CACHED;
193 rq->current_entity = NULL;
194 rq->sched = sched;
195 }
196
197 /**
198 * drm_sched_rq_add_entity - add an entity
199 *
200 * @rq: scheduler run queue
201 * @entity: scheduler entity
202 *
203 * Adds a scheduler entity to the run queue.
204 */
drm_sched_rq_add_entity(struct drm_sched_rq * rq,struct drm_sched_entity * entity)205 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
206 struct drm_sched_entity *entity)
207 {
208 lockdep_assert_held(&entity->lock);
209 lockdep_assert_held(&rq->lock);
210
211 if (!list_empty(&entity->list))
212 return;
213
214 atomic_inc(rq->sched->score);
215 list_add_tail(&entity->list, &rq->entities);
216 }
217
218 /**
219 * drm_sched_rq_remove_entity - remove an entity
220 *
221 * @rq: scheduler run queue
222 * @entity: scheduler entity
223 *
224 * Removes a scheduler entity from the run queue.
225 */
drm_sched_rq_remove_entity(struct drm_sched_rq * rq,struct drm_sched_entity * entity)226 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
227 struct drm_sched_entity *entity)
228 {
229 lockdep_assert_held(&entity->lock);
230
231 if (list_empty(&entity->list))
232 return;
233
234 spin_lock(&rq->lock);
235
236 atomic_dec(rq->sched->score);
237 list_del_init(&entity->list);
238
239 if (rq->current_entity == entity)
240 rq->current_entity = NULL;
241
242 if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
243 drm_sched_rq_remove_fifo_locked(entity, rq);
244
245 spin_unlock(&rq->lock);
246 }
247
248 /**
249 * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
250 *
251 * @sched: the gpu scheduler
252 * @rq: scheduler run queue to check.
253 *
254 * Try to find the next ready entity.
255 *
256 * Return an entity if one is found; return an error-pointer (!NULL) if an
257 * entity was ready, but the scheduler had insufficient credits to accommodate
258 * its job; return NULL, if no ready entity was found.
259 */
260 static struct drm_sched_entity *
drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler * sched,struct drm_sched_rq * rq)261 drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
262 struct drm_sched_rq *rq)
263 {
264 struct drm_sched_entity *entity;
265
266 spin_lock(&rq->lock);
267
268 entity = rq->current_entity;
269 if (entity) {
270 list_for_each_entry_continue(entity, &rq->entities, list) {
271 if (drm_sched_entity_is_ready(entity)) {
272 /* If we can't queue yet, preserve the current
273 * entity in terms of fairness.
274 */
275 if (!drm_sched_can_queue(sched, entity)) {
276 spin_unlock(&rq->lock);
277 return ERR_PTR(-ENOSPC);
278 }
279
280 rq->current_entity = entity;
281 reinit_completion(&entity->entity_idle);
282 spin_unlock(&rq->lock);
283 return entity;
284 }
285 }
286 }
287
288 list_for_each_entry(entity, &rq->entities, list) {
289 if (drm_sched_entity_is_ready(entity)) {
290 /* If we can't queue yet, preserve the current entity in
291 * terms of fairness.
292 */
293 if (!drm_sched_can_queue(sched, entity)) {
294 spin_unlock(&rq->lock);
295 return ERR_PTR(-ENOSPC);
296 }
297
298 rq->current_entity = entity;
299 reinit_completion(&entity->entity_idle);
300 spin_unlock(&rq->lock);
301 return entity;
302 }
303
304 if (entity == rq->current_entity)
305 break;
306 }
307
308 spin_unlock(&rq->lock);
309
310 return NULL;
311 }
312
313 /**
314 * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
315 *
316 * @sched: the gpu scheduler
317 * @rq: scheduler run queue to check.
318 *
319 * Find oldest waiting ready entity.
320 *
321 * Return an entity if one is found; return an error-pointer (!NULL) if an
322 * entity was ready, but the scheduler had insufficient credits to accommodate
323 * its job; return NULL, if no ready entity was found.
324 */
325 static struct drm_sched_entity *
drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler * sched,struct drm_sched_rq * rq)326 drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
327 struct drm_sched_rq *rq)
328 {
329 struct rb_node *rb;
330
331 spin_lock(&rq->lock);
332 for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) {
333 struct drm_sched_entity *entity;
334
335 entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
336 if (drm_sched_entity_is_ready(entity)) {
337 /* If we can't queue yet, preserve the current entity in
338 * terms of fairness.
339 */
340 if (!drm_sched_can_queue(sched, entity)) {
341 spin_unlock(&rq->lock);
342 return ERR_PTR(-ENOSPC);
343 }
344
345 reinit_completion(&entity->entity_idle);
346 break;
347 }
348 }
349 spin_unlock(&rq->lock);
350
351 return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
352 }
353
354 /**
355 * drm_sched_run_job_queue - enqueue run-job work
356 * @sched: scheduler instance
357 */
drm_sched_run_job_queue(struct drm_gpu_scheduler * sched)358 static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
359 {
360 if (!READ_ONCE(sched->pause_submit))
361 queue_work(sched->submit_wq, &sched->work_run_job);
362 }
363
364 /**
365 * __drm_sched_run_free_queue - enqueue free-job work
366 * @sched: scheduler instance
367 */
__drm_sched_run_free_queue(struct drm_gpu_scheduler * sched)368 static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
369 {
370 if (!READ_ONCE(sched->pause_submit))
371 queue_work(sched->submit_wq, &sched->work_free_job);
372 }
373
374 /**
375 * drm_sched_run_free_queue - enqueue free-job work if ready
376 * @sched: scheduler instance
377 */
drm_sched_run_free_queue(struct drm_gpu_scheduler * sched)378 static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
379 {
380 struct drm_sched_job *job;
381
382 spin_lock(&sched->job_list_lock);
383 job = list_first_entry_or_null(&sched->pending_list,
384 struct drm_sched_job, list);
385 if (job && dma_fence_is_signaled(&job->s_fence->finished))
386 __drm_sched_run_free_queue(sched);
387 spin_unlock(&sched->job_list_lock);
388 }
389
390 /**
391 * drm_sched_job_done - complete a job
392 * @s_job: pointer to the job which is done
393 *
394 * Finish the job's fence and wake up the worker thread.
395 */
drm_sched_job_done(struct drm_sched_job * s_job,int result)396 static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
397 {
398 struct drm_sched_fence *s_fence = s_job->s_fence;
399 struct drm_gpu_scheduler *sched = s_fence->sched;
400
401 atomic_sub(s_job->credits, &sched->credit_count);
402 atomic_dec(sched->score);
403
404 trace_drm_sched_process_job(s_fence);
405
406 dma_fence_get(&s_fence->finished);
407 drm_sched_fence_finished(s_fence, result);
408 dma_fence_put(&s_fence->finished);
409 __drm_sched_run_free_queue(sched);
410 }
411
412 /**
413 * drm_sched_job_done_cb - the callback for a done job
414 * @f: fence
415 * @cb: fence callbacks
416 */
drm_sched_job_done_cb(struct dma_fence * f,struct dma_fence_cb * cb)417 static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
418 {
419 struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
420
421 drm_sched_job_done(s_job, f->error);
422 }
423
424 /**
425 * drm_sched_start_timeout - start timeout for reset worker
426 *
427 * @sched: scheduler instance to start the worker for
428 *
429 * Start the timeout for the given scheduler.
430 */
drm_sched_start_timeout(struct drm_gpu_scheduler * sched)431 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
432 {
433 lockdep_assert_held(&sched->job_list_lock);
434
435 if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
436 !list_empty(&sched->pending_list))
437 mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
438 }
439
drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler * sched)440 static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched)
441 {
442 spin_lock(&sched->job_list_lock);
443 drm_sched_start_timeout(sched);
444 spin_unlock(&sched->job_list_lock);
445 }
446
447 /**
448 * drm_sched_tdr_queue_imm: - immediately start job timeout handler
449 *
450 * @sched: scheduler for which the timeout handling should be started.
451 *
452 * Start timeout handling immediately for the named scheduler.
453 */
drm_sched_tdr_queue_imm(struct drm_gpu_scheduler * sched)454 void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched)
455 {
456 spin_lock(&sched->job_list_lock);
457 sched->timeout = 0;
458 drm_sched_start_timeout(sched);
459 spin_unlock(&sched->job_list_lock);
460 }
461 EXPORT_SYMBOL(drm_sched_tdr_queue_imm);
462
463 /**
464 * drm_sched_fault - immediately start timeout handler
465 *
466 * @sched: scheduler where the timeout handling should be started.
467 *
468 * Start timeout handling immediately when the driver detects a hardware fault.
469 */
drm_sched_fault(struct drm_gpu_scheduler * sched)470 void drm_sched_fault(struct drm_gpu_scheduler *sched)
471 {
472 if (sched->timeout_wq)
473 mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
474 }
475 EXPORT_SYMBOL(drm_sched_fault);
476
477 /**
478 * drm_sched_suspend_timeout - Suspend scheduler job timeout
479 *
480 * @sched: scheduler instance for which to suspend the timeout
481 *
482 * Suspend the delayed work timeout for the scheduler. This is done by
483 * modifying the delayed work timeout to an arbitrary large value,
484 * MAX_SCHEDULE_TIMEOUT in this case.
485 *
486 * Returns the timeout remaining
487 *
488 */
drm_sched_suspend_timeout(struct drm_gpu_scheduler * sched)489 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
490 {
491 unsigned long sched_timeout, now = jiffies;
492
493 sched_timeout = sched->work_tdr.timer.expires;
494
495 /*
496 * Modify the timeout to an arbitrarily large value. This also prevents
497 * the timeout to be restarted when new submissions arrive
498 */
499 if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
500 && time_after(sched_timeout, now))
501 return sched_timeout - now;
502 else
503 return sched->timeout;
504 }
505 EXPORT_SYMBOL(drm_sched_suspend_timeout);
506
507 /**
508 * drm_sched_resume_timeout - Resume scheduler job timeout
509 *
510 * @sched: scheduler instance for which to resume the timeout
511 * @remaining: remaining timeout
512 *
513 * Resume the delayed work timeout for the scheduler.
514 */
drm_sched_resume_timeout(struct drm_gpu_scheduler * sched,unsigned long remaining)515 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
516 unsigned long remaining)
517 {
518 spin_lock(&sched->job_list_lock);
519
520 if (list_empty(&sched->pending_list))
521 cancel_delayed_work(&sched->work_tdr);
522 else
523 mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining);
524
525 spin_unlock(&sched->job_list_lock);
526 }
527 EXPORT_SYMBOL(drm_sched_resume_timeout);
528
drm_sched_job_begin(struct drm_sched_job * s_job)529 static void drm_sched_job_begin(struct drm_sched_job *s_job)
530 {
531 struct drm_gpu_scheduler *sched = s_job->sched;
532
533 spin_lock(&sched->job_list_lock);
534 list_add_tail(&s_job->list, &sched->pending_list);
535 drm_sched_start_timeout(sched);
536 spin_unlock(&sched->job_list_lock);
537 }
538
drm_sched_job_timedout(struct work_struct * work)539 static void drm_sched_job_timedout(struct work_struct *work)
540 {
541 struct drm_gpu_scheduler *sched;
542 struct drm_sched_job *job;
543 enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
544
545 sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
546
547 /* Protects against concurrent deletion in drm_sched_get_finished_job */
548 spin_lock(&sched->job_list_lock);
549 job = list_first_entry_or_null(&sched->pending_list,
550 struct drm_sched_job, list);
551
552 if (job) {
553 /*
554 * Remove the bad job so it cannot be freed by concurrent
555 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
556 * is parked at which point it's safe.
557 */
558 list_del_init(&job->list);
559 spin_unlock(&sched->job_list_lock);
560
561 status = job->sched->ops->timedout_job(job);
562
563 /*
564 * Guilty job did complete and hence needs to be manually removed
565 * See drm_sched_stop doc.
566 */
567 if (sched->free_guilty) {
568 job->sched->ops->free_job(job);
569 sched->free_guilty = false;
570 }
571 } else {
572 spin_unlock(&sched->job_list_lock);
573 }
574
575 if (status != DRM_GPU_SCHED_STAT_ENODEV)
576 drm_sched_start_timeout_unlocked(sched);
577 }
578
579 /**
580 * drm_sched_stop - stop the scheduler
581 *
582 * @sched: scheduler instance
583 * @bad: job which caused the time out
584 *
585 * Stop the scheduler and also removes and frees all completed jobs.
586 * Note: bad job will not be freed as it might be used later and so it's
587 * callers responsibility to release it manually if it's not part of the
588 * pending list any more.
589 *
590 * This function is typically used for reset recovery (see the docu of
591 * drm_sched_backend_ops.timedout_job() for details). Do not call it for
592 * scheduler teardown, i.e., before calling drm_sched_fini().
593 */
drm_sched_stop(struct drm_gpu_scheduler * sched,struct drm_sched_job * bad)594 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
595 {
596 struct drm_sched_job *s_job, *tmp;
597
598 drm_sched_wqueue_stop(sched);
599
600 /*
601 * Reinsert back the bad job here - now it's safe as
602 * drm_sched_get_finished_job cannot race against us and release the
603 * bad job at this point - we parked (waited for) any in progress
604 * (earlier) cleanups and drm_sched_get_finished_job will not be called
605 * now until the scheduler thread is unparked.
606 */
607 if (bad && bad->sched == sched)
608 /*
609 * Add at the head of the queue to reflect it was the earliest
610 * job extracted.
611 */
612 list_add(&bad->list, &sched->pending_list);
613
614 /*
615 * Iterate the job list from later to earlier one and either deactive
616 * their HW callbacks or remove them from pending list if they already
617 * signaled.
618 * This iteration is thread safe as sched thread is stopped.
619 */
620 list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
621 list) {
622 if (s_job->s_fence->parent &&
623 dma_fence_remove_callback(s_job->s_fence->parent,
624 &s_job->cb)) {
625 dma_fence_put(s_job->s_fence->parent);
626 s_job->s_fence->parent = NULL;
627 atomic_sub(s_job->credits, &sched->credit_count);
628 } else {
629 /*
630 * remove job from pending_list.
631 * Locking here is for concurrent resume timeout
632 */
633 spin_lock(&sched->job_list_lock);
634 list_del_init(&s_job->list);
635 spin_unlock(&sched->job_list_lock);
636
637 /*
638 * Wait for job's HW fence callback to finish using s_job
639 * before releasing it.
640 *
641 * Job is still alive so fence refcount at least 1
642 */
643 dma_fence_wait(&s_job->s_fence->finished, false);
644
645 /*
646 * We must keep bad job alive for later use during
647 * recovery by some of the drivers but leave a hint
648 * that the guilty job must be released.
649 */
650 if (bad != s_job)
651 sched->ops->free_job(s_job);
652 else
653 sched->free_guilty = true;
654 }
655 }
656
657 /*
658 * Stop pending timer in flight as we rearm it in drm_sched_start. This
659 * avoids the pending timeout work in progress to fire right away after
660 * this TDR finished and before the newly restarted jobs had a
661 * chance to complete.
662 */
663 cancel_delayed_work(&sched->work_tdr);
664 }
665 EXPORT_SYMBOL(drm_sched_stop);
666
667 /**
668 * drm_sched_start - recover jobs after a reset
669 *
670 * @sched: scheduler instance
671 * @errno: error to set on the pending fences
672 *
673 * This function is typically used for reset recovery (see the docu of
674 * drm_sched_backend_ops.timedout_job() for details). Do not call it for
675 * scheduler startup. The scheduler itself is fully operational after
676 * drm_sched_init() succeeded.
677 */
drm_sched_start(struct drm_gpu_scheduler * sched,int errno)678 void drm_sched_start(struct drm_gpu_scheduler *sched, int errno)
679 {
680 struct drm_sched_job *s_job, *tmp;
681
682 /*
683 * Locking the list is not required here as the sched thread is parked
684 * so no new jobs are being inserted or removed. Also concurrent
685 * GPU recovers can't run in parallel.
686 */
687 list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
688 struct dma_fence *fence = s_job->s_fence->parent;
689
690 atomic_add(s_job->credits, &sched->credit_count);
691
692 if (!fence) {
693 drm_sched_job_done(s_job, errno ?: -ECANCELED);
694 continue;
695 }
696
697 if (dma_fence_add_callback(fence, &s_job->cb,
698 drm_sched_job_done_cb))
699 drm_sched_job_done(s_job, fence->error ?: errno);
700 }
701
702 drm_sched_start_timeout_unlocked(sched);
703 drm_sched_wqueue_start(sched);
704 }
705 EXPORT_SYMBOL(drm_sched_start);
706
707 /**
708 * drm_sched_resubmit_jobs - Deprecated, don't use in new code!
709 *
710 * @sched: scheduler instance
711 *
712 * Re-submitting jobs was a concept AMD came up as cheap way to implement
713 * recovery after a job timeout.
714 *
715 * This turned out to be not working very well. First of all there are many
716 * problem with the dma_fence implementation and requirements. Either the
717 * implementation is risking deadlocks with core memory management or violating
718 * documented implementation details of the dma_fence object.
719 *
720 * Drivers can still save and restore their state for recovery operations, but
721 * we shouldn't make this a general scheduler feature around the dma_fence
722 * interface.
723 */
drm_sched_resubmit_jobs(struct drm_gpu_scheduler * sched)724 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
725 {
726 struct drm_sched_job *s_job, *tmp;
727 uint64_t guilty_context;
728 bool found_guilty = false;
729 struct dma_fence *fence;
730
731 list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
732 struct drm_sched_fence *s_fence = s_job->s_fence;
733
734 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
735 found_guilty = true;
736 guilty_context = s_job->s_fence->scheduled.context;
737 }
738
739 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
740 dma_fence_set_error(&s_fence->finished, -ECANCELED);
741
742 fence = sched->ops->run_job(s_job);
743
744 if (IS_ERR_OR_NULL(fence)) {
745 if (IS_ERR(fence))
746 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
747
748 s_job->s_fence->parent = NULL;
749 } else {
750
751 s_job->s_fence->parent = dma_fence_get(fence);
752
753 /* Drop for orignal kref_init */
754 dma_fence_put(fence);
755 }
756 }
757 }
758 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
759
760 /**
761 * drm_sched_job_init - init a scheduler job
762 * @job: scheduler job to init
763 * @entity: scheduler entity to use
764 * @credits: the number of credits this job contributes to the schedulers
765 * credit limit
766 * @owner: job owner for debugging
767 *
768 * Refer to drm_sched_entity_push_job() documentation
769 * for locking considerations.
770 *
771 * Drivers must make sure drm_sched_job_cleanup() if this function returns
772 * successfully, even when @job is aborted before drm_sched_job_arm() is called.
773 *
774 * Note that this function does not assign a valid value to each struct member
775 * of struct drm_sched_job. Take a look at that struct's documentation to see
776 * who sets which struct member with what lifetime.
777 *
778 * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
779 * has died, which can mean that there's no valid runqueue for a @entity.
780 * This function returns -ENOENT in this case (which probably should be -EIO as
781 * a more meanigful return value).
782 *
783 * Returns 0 for success, negative error code otherwise.
784 */
drm_sched_job_init(struct drm_sched_job * job,struct drm_sched_entity * entity,u32 credits,void * owner)785 int drm_sched_job_init(struct drm_sched_job *job,
786 struct drm_sched_entity *entity,
787 u32 credits, void *owner)
788 {
789 if (!entity->rq) {
790 /* This will most likely be followed by missing frames
791 * or worse--a blank screen--leave a trail in the
792 * logs, so this can be debugged easier.
793 */
794 dev_err(job->sched->dev, "%s: entity has no rq!\n", __func__);
795 return -ENOENT;
796 }
797
798 if (unlikely(!credits)) {
799 pr_err("*ERROR* %s: credits cannot be 0!\n", __func__);
800 return -EINVAL;
801 }
802
803 /*
804 * We don't know for sure how the user has allocated. Thus, zero the
805 * struct so that unallowed (i.e., too early) usage of pointers that
806 * this function does not set is guaranteed to lead to a NULL pointer
807 * exception instead of UB.
808 */
809 memset(job, 0, sizeof(*job));
810
811 job->entity = entity;
812 job->credits = credits;
813 job->s_fence = drm_sched_fence_alloc(entity, owner);
814 if (!job->s_fence)
815 return -ENOMEM;
816
817 INIT_LIST_HEAD(&job->list);
818
819 xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC);
820
821 return 0;
822 }
823 EXPORT_SYMBOL(drm_sched_job_init);
824
825 /**
826 * drm_sched_job_arm - arm a scheduler job for execution
827 * @job: scheduler job to arm
828 *
829 * This arms a scheduler job for execution. Specifically it initializes the
830 * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv
831 * or other places that need to track the completion of this job.
832 *
833 * Refer to drm_sched_entity_push_job() documentation for locking
834 * considerations.
835 *
836 * This can only be called if drm_sched_job_init() succeeded.
837 */
drm_sched_job_arm(struct drm_sched_job * job)838 void drm_sched_job_arm(struct drm_sched_job *job)
839 {
840 struct drm_gpu_scheduler *sched;
841 struct drm_sched_entity *entity = job->entity;
842
843 BUG_ON(!entity);
844 drm_sched_entity_select_rq(entity);
845 sched = entity->rq->sched;
846
847 job->sched = sched;
848 job->s_priority = entity->priority;
849 job->id = atomic64_inc_return(&sched->job_id_count);
850
851 drm_sched_fence_init(job->s_fence, job->entity);
852 }
853 EXPORT_SYMBOL(drm_sched_job_arm);
854
855 /**
856 * drm_sched_job_add_dependency - adds the fence as a job dependency
857 * @job: scheduler job to add the dependencies to
858 * @fence: the dma_fence to add to the list of dependencies.
859 *
860 * Note that @fence is consumed in both the success and error cases.
861 *
862 * Returns:
863 * 0 on success, or an error on failing to expand the array.
864 */
drm_sched_job_add_dependency(struct drm_sched_job * job,struct dma_fence * fence)865 int drm_sched_job_add_dependency(struct drm_sched_job *job,
866 struct dma_fence *fence)
867 {
868 struct dma_fence *entry;
869 unsigned long index;
870 u32 id = 0;
871 int ret;
872
873 if (!fence)
874 return 0;
875
876 /* Deduplicate if we already depend on a fence from the same context.
877 * This lets the size of the array of deps scale with the number of
878 * engines involved, rather than the number of BOs.
879 */
880 xa_for_each(&job->dependencies, index, entry) {
881 if (entry->context != fence->context)
882 continue;
883
884 if (dma_fence_is_later(fence, entry)) {
885 dma_fence_put(entry);
886 xa_store(&job->dependencies, index, fence, GFP_KERNEL);
887 } else {
888 dma_fence_put(fence);
889 }
890 return 0;
891 }
892
893 ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
894 if (ret != 0)
895 dma_fence_put(fence);
896
897 return ret;
898 }
899 EXPORT_SYMBOL(drm_sched_job_add_dependency);
900
901 /**
902 * drm_sched_job_add_syncobj_dependency - adds a syncobj's fence as a job dependency
903 * @job: scheduler job to add the dependencies to
904 * @file: drm file private pointer
905 * @handle: syncobj handle to lookup
906 * @point: timeline point
907 *
908 * This adds the fence matching the given syncobj to @job.
909 *
910 * Returns:
911 * 0 on success, or an error on failing to expand the array.
912 */
drm_sched_job_add_syncobj_dependency(struct drm_sched_job * job,struct drm_file * file,u32 handle,u32 point)913 int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
914 struct drm_file *file,
915 u32 handle,
916 u32 point)
917 {
918 struct dma_fence *fence;
919 int ret;
920
921 ret = drm_syncobj_find_fence(file, handle, point, 0, &fence);
922 if (ret)
923 return ret;
924
925 return drm_sched_job_add_dependency(job, fence);
926 }
927 EXPORT_SYMBOL(drm_sched_job_add_syncobj_dependency);
928
929 /**
930 * drm_sched_job_add_resv_dependencies - add all fences from the resv to the job
931 * @job: scheduler job to add the dependencies to
932 * @resv: the dma_resv object to get the fences from
933 * @usage: the dma_resv_usage to use to filter the fences
934 *
935 * This adds all fences matching the given usage from @resv to @job.
936 * Must be called with the @resv lock held.
937 *
938 * Returns:
939 * 0 on success, or an error on failing to expand the array.
940 */
drm_sched_job_add_resv_dependencies(struct drm_sched_job * job,struct dma_resv * resv,enum dma_resv_usage usage)941 int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job,
942 struct dma_resv *resv,
943 enum dma_resv_usage usage)
944 {
945 struct dma_resv_iter cursor;
946 struct dma_fence *fence;
947 int ret;
948
949 dma_resv_assert_held(resv);
950
951 dma_resv_for_each_fence(&cursor, resv, usage, fence) {
952 /* Make sure to grab an additional ref on the added fence */
953 dma_fence_get(fence);
954 ret = drm_sched_job_add_dependency(job, fence);
955 if (ret) {
956 dma_fence_put(fence);
957 return ret;
958 }
959 }
960 return 0;
961 }
962 EXPORT_SYMBOL(drm_sched_job_add_resv_dependencies);
963
964 /**
965 * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
966 * dependencies
967 * @job: scheduler job to add the dependencies to
968 * @obj: the gem object to add new dependencies from.
969 * @write: whether the job might write the object (so we need to depend on
970 * shared fences in the reservation object).
971 *
972 * This should be called after drm_gem_lock_reservations() on your array of
973 * GEM objects used in the job but before updating the reservations with your
974 * own fences.
975 *
976 * Returns:
977 * 0 on success, or an error on failing to expand the array.
978 */
drm_sched_job_add_implicit_dependencies(struct drm_sched_job * job,struct drm_gem_object * obj,bool write)979 int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
980 struct drm_gem_object *obj,
981 bool write)
982 {
983 return drm_sched_job_add_resv_dependencies(job, obj->resv,
984 dma_resv_usage_rw(write));
985 }
986 EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
987
988 /**
989 * drm_sched_job_has_dependency - check whether fence is the job's dependency
990 * @job: scheduler job to check
991 * @fence: fence to look for
992 *
993 * Returns:
994 * True if @fence is found within the job's dependencies, or otherwise false.
995 */
drm_sched_job_has_dependency(struct drm_sched_job * job,struct dma_fence * fence)996 bool drm_sched_job_has_dependency(struct drm_sched_job *job,
997 struct dma_fence *fence)
998 {
999 struct dma_fence *f;
1000 unsigned long index;
1001
1002 xa_for_each(&job->dependencies, index, f) {
1003 if (f == fence)
1004 return true;
1005 }
1006
1007 return false;
1008 }
1009 EXPORT_SYMBOL(drm_sched_job_has_dependency);
1010
1011 /**
1012 * drm_sched_job_cleanup - clean up scheduler job resources
1013 * @job: scheduler job to clean up
1014 *
1015 * Cleans up the resources allocated with drm_sched_job_init().
1016 *
1017 * Drivers should call this from their error unwind code if @job is aborted
1018 * before it was submitted to an entity with drm_sched_entity_push_job().
1019 *
1020 * Since calling drm_sched_job_arm() causes the job's fences to be initialized,
1021 * it is up to the driver to ensure that fences that were exposed to external
1022 * parties get signaled. drm_sched_job_cleanup() does not ensure this.
1023 *
1024 * This function must also be called in &struct drm_sched_backend_ops.free_job
1025 */
drm_sched_job_cleanup(struct drm_sched_job * job)1026 void drm_sched_job_cleanup(struct drm_sched_job *job)
1027 {
1028 struct dma_fence *fence;
1029 unsigned long index;
1030
1031 if (kref_read(&job->s_fence->finished.refcount)) {
1032 /* drm_sched_job_arm() has been called */
1033 dma_fence_put(&job->s_fence->finished);
1034 } else {
1035 /* aborted job before arming */
1036 drm_sched_fence_free(job->s_fence);
1037 }
1038
1039 job->s_fence = NULL;
1040
1041 xa_for_each(&job->dependencies, index, fence) {
1042 dma_fence_put(fence);
1043 }
1044 xa_destroy(&job->dependencies);
1045
1046 }
1047 EXPORT_SYMBOL(drm_sched_job_cleanup);
1048
1049 /**
1050 * drm_sched_wakeup - Wake up the scheduler if it is ready to queue
1051 * @sched: scheduler instance
1052 *
1053 * Wake up the scheduler if we can queue jobs.
1054 */
drm_sched_wakeup(struct drm_gpu_scheduler * sched)1055 void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
1056 {
1057 drm_sched_run_job_queue(sched);
1058 }
1059
1060 /**
1061 * drm_sched_select_entity - Select next entity to process
1062 *
1063 * @sched: scheduler instance
1064 *
1065 * Return an entity to process or NULL if none are found.
1066 *
1067 * Note, that we break out of the for-loop when "entity" is non-null, which can
1068 * also be an error-pointer--this assures we don't process lower priority
1069 * run-queues. See comments in the respectively called functions.
1070 */
1071 static struct drm_sched_entity *
drm_sched_select_entity(struct drm_gpu_scheduler * sched)1072 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
1073 {
1074 struct drm_sched_entity *entity;
1075 int i;
1076
1077 /* Start with the highest priority.
1078 */
1079 for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
1080 entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
1081 drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
1082 drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
1083 if (entity)
1084 break;
1085 }
1086
1087 return IS_ERR(entity) ? NULL : entity;
1088 }
1089
1090 /**
1091 * drm_sched_get_finished_job - fetch the next finished job to be destroyed
1092 *
1093 * @sched: scheduler instance
1094 *
1095 * Returns the next finished job from the pending list (if there is one)
1096 * ready for it to be destroyed.
1097 */
1098 static struct drm_sched_job *
drm_sched_get_finished_job(struct drm_gpu_scheduler * sched)1099 drm_sched_get_finished_job(struct drm_gpu_scheduler *sched)
1100 {
1101 struct drm_sched_job *job, *next;
1102
1103 spin_lock(&sched->job_list_lock);
1104
1105 job = list_first_entry_or_null(&sched->pending_list,
1106 struct drm_sched_job, list);
1107
1108 if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
1109 /* remove job from pending_list */
1110 list_del_init(&job->list);
1111
1112 /* cancel this job's TO timer */
1113 cancel_delayed_work(&sched->work_tdr);
1114 /* make the scheduled timestamp more accurate */
1115 next = list_first_entry_or_null(&sched->pending_list,
1116 typeof(*next), list);
1117
1118 if (next) {
1119 if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
1120 &next->s_fence->scheduled.flags))
1121 next->s_fence->scheduled.timestamp =
1122 dma_fence_timestamp(&job->s_fence->finished);
1123 /* start TO timer for next job */
1124 drm_sched_start_timeout(sched);
1125 }
1126 } else {
1127 job = NULL;
1128 }
1129
1130 spin_unlock(&sched->job_list_lock);
1131
1132 return job;
1133 }
1134
1135 /**
1136 * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
1137 * @sched_list: list of drm_gpu_schedulers
1138 * @num_sched_list: number of drm_gpu_schedulers in the sched_list
1139 *
1140 * Returns pointer of the sched with the least load or NULL if none of the
1141 * drm_gpu_schedulers are ready
1142 */
1143 struct drm_gpu_scheduler *
drm_sched_pick_best(struct drm_gpu_scheduler ** sched_list,unsigned int num_sched_list)1144 drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
1145 unsigned int num_sched_list)
1146 {
1147 struct drm_gpu_scheduler *sched, *picked_sched = NULL;
1148 int i;
1149 unsigned int min_score = UINT_MAX, num_score;
1150
1151 for (i = 0; i < num_sched_list; ++i) {
1152 sched = sched_list[i];
1153
1154 if (!sched->ready) {
1155 DRM_WARN("scheduler %s is not ready, skipping",
1156 sched->name);
1157 continue;
1158 }
1159
1160 num_score = atomic_read(sched->score);
1161 if (num_score < min_score) {
1162 min_score = num_score;
1163 picked_sched = sched;
1164 }
1165 }
1166
1167 return picked_sched;
1168 }
1169 EXPORT_SYMBOL(drm_sched_pick_best);
1170
1171 /**
1172 * drm_sched_free_job_work - worker to call free_job
1173 *
1174 * @w: free job work
1175 */
drm_sched_free_job_work(struct work_struct * w)1176 static void drm_sched_free_job_work(struct work_struct *w)
1177 {
1178 struct drm_gpu_scheduler *sched =
1179 container_of(w, struct drm_gpu_scheduler, work_free_job);
1180 struct drm_sched_job *job;
1181
1182 job = drm_sched_get_finished_job(sched);
1183 if (job)
1184 sched->ops->free_job(job);
1185
1186 drm_sched_run_free_queue(sched);
1187 drm_sched_run_job_queue(sched);
1188 }
1189
1190 /**
1191 * drm_sched_run_job_work - worker to call run_job
1192 *
1193 * @w: run job work
1194 */
drm_sched_run_job_work(struct work_struct * w)1195 static void drm_sched_run_job_work(struct work_struct *w)
1196 {
1197 struct drm_gpu_scheduler *sched =
1198 container_of(w, struct drm_gpu_scheduler, work_run_job);
1199 struct drm_sched_entity *entity;
1200 struct dma_fence *fence;
1201 struct drm_sched_fence *s_fence;
1202 struct drm_sched_job *sched_job;
1203 int r;
1204
1205 /* Find entity with a ready job */
1206 entity = drm_sched_select_entity(sched);
1207 if (!entity)
1208 return; /* No more work */
1209
1210 sched_job = drm_sched_entity_pop_job(entity);
1211 if (!sched_job) {
1212 complete_all(&entity->entity_idle);
1213 drm_sched_run_job_queue(sched);
1214 return;
1215 }
1216
1217 s_fence = sched_job->s_fence;
1218
1219 atomic_add(sched_job->credits, &sched->credit_count);
1220 drm_sched_job_begin(sched_job);
1221
1222 trace_drm_run_job(sched_job, entity);
1223 fence = sched->ops->run_job(sched_job);
1224 complete_all(&entity->entity_idle);
1225 drm_sched_fence_scheduled(s_fence, fence);
1226
1227 if (!IS_ERR_OR_NULL(fence)) {
1228 /* Drop for original kref_init of the fence */
1229 dma_fence_put(fence);
1230
1231 r = dma_fence_add_callback(fence, &sched_job->cb,
1232 drm_sched_job_done_cb);
1233 if (r == -ENOENT)
1234 drm_sched_job_done(sched_job, fence->error);
1235 else if (r)
1236 DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r);
1237 } else {
1238 drm_sched_job_done(sched_job, IS_ERR(fence) ?
1239 PTR_ERR(fence) : 0);
1240 }
1241
1242 wake_up(&sched->job_scheduled);
1243 drm_sched_run_job_queue(sched);
1244 }
1245
1246 /**
1247 * drm_sched_init - Init a gpu scheduler instance
1248 *
1249 * @sched: scheduler instance
1250 * @args: scheduler initialization arguments
1251 *
1252 * Return 0 on success, otherwise error code.
1253 */
drm_sched_init(struct drm_gpu_scheduler * sched,const struct drm_sched_init_args * args)1254 int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_args *args)
1255 {
1256 int i;
1257
1258 sched->ops = args->ops;
1259 sched->credit_limit = args->credit_limit;
1260 sched->name = args->name;
1261 sched->timeout = args->timeout;
1262 sched->hang_limit = args->hang_limit;
1263 sched->timeout_wq = args->timeout_wq ? args->timeout_wq : system_wq;
1264 sched->score = args->score ? args->score : &sched->_score;
1265 sched->dev = args->dev;
1266
1267 if (args->num_rqs > DRM_SCHED_PRIORITY_COUNT) {
1268 /* This is a gross violation--tell drivers what the problem is.
1269 */
1270 dev_err(sched->dev, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n",
1271 __func__);
1272 return -EINVAL;
1273 } else if (sched->sched_rq) {
1274 /* Not an error, but warn anyway so drivers can
1275 * fine-tune their DRM calling order, and return all
1276 * is good.
1277 */
1278 dev_warn(sched->dev, "%s: scheduler already initialized!\n", __func__);
1279 return 0;
1280 }
1281
1282 if (args->submit_wq) {
1283 sched->submit_wq = args->submit_wq;
1284 sched->own_submit_wq = false;
1285 } else {
1286 #ifdef CONFIG_LOCKDEP
1287 sched->submit_wq = alloc_ordered_workqueue_lockdep_map(args->name,
1288 WQ_MEM_RECLAIM,
1289 &drm_sched_lockdep_map);
1290 #else
1291 sched->submit_wq = alloc_ordered_workqueue(args->name, WQ_MEM_RECLAIM);
1292 #endif
1293 if (!sched->submit_wq)
1294 return -ENOMEM;
1295
1296 sched->own_submit_wq = true;
1297 }
1298
1299 sched->sched_rq = kmalloc_array(args->num_rqs, sizeof(*sched->sched_rq),
1300 GFP_KERNEL | __GFP_ZERO);
1301 if (!sched->sched_rq)
1302 goto Out_check_own;
1303 sched->num_rqs = args->num_rqs;
1304 for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
1305 sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
1306 if (!sched->sched_rq[i])
1307 goto Out_unroll;
1308 drm_sched_rq_init(sched, sched->sched_rq[i]);
1309 }
1310
1311 init_waitqueue_head(&sched->job_scheduled);
1312 INIT_LIST_HEAD(&sched->pending_list);
1313 spin_lock_init(&sched->job_list_lock);
1314 atomic_set(&sched->credit_count, 0);
1315 INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
1316 INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
1317 INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
1318 atomic_set(&sched->_score, 0);
1319 atomic64_set(&sched->job_id_count, 0);
1320 sched->pause_submit = false;
1321
1322 sched->ready = true;
1323 return 0;
1324 Out_unroll:
1325 for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--)
1326 kfree(sched->sched_rq[i]);
1327
1328 kfree(sched->sched_rq);
1329 sched->sched_rq = NULL;
1330 Out_check_own:
1331 if (sched->own_submit_wq)
1332 destroy_workqueue(sched->submit_wq);
1333 dev_err(sched->dev, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
1334 return -ENOMEM;
1335 }
1336 EXPORT_SYMBOL(drm_sched_init);
1337
1338 /**
1339 * drm_sched_fini - Destroy a gpu scheduler
1340 *
1341 * @sched: scheduler instance
1342 *
1343 * Tears down and cleans up the scheduler.
1344 *
1345 * This stops submission of new jobs to the hardware through
1346 * drm_sched_backend_ops.run_job(). Consequently, drm_sched_backend_ops.free_job()
1347 * will not be called for all jobs still in drm_gpu_scheduler.pending_list.
1348 * There is no solution for this currently. Thus, it is up to the driver to make
1349 * sure that:
1350 *
1351 * a) drm_sched_fini() is only called after for all submitted jobs
1352 * drm_sched_backend_ops.free_job() has been called or that
1353 * b) the jobs for which drm_sched_backend_ops.free_job() has not been called
1354 * after drm_sched_fini() ran are freed manually.
1355 *
1356 * FIXME: Take care of the above problem and prevent this function from leaking
1357 * the jobs in drm_gpu_scheduler.pending_list under any circumstances.
1358 */
drm_sched_fini(struct drm_gpu_scheduler * sched)1359 void drm_sched_fini(struct drm_gpu_scheduler *sched)
1360 {
1361 struct drm_sched_entity *s_entity;
1362 int i;
1363
1364 drm_sched_wqueue_stop(sched);
1365
1366 for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
1367 struct drm_sched_rq *rq = sched->sched_rq[i];
1368
1369 spin_lock(&rq->lock);
1370 list_for_each_entry(s_entity, &rq->entities, list)
1371 /*
1372 * Prevents reinsertion and marks job_queue as idle,
1373 * it will be removed from the rq in drm_sched_entity_fini()
1374 * eventually
1375 */
1376 s_entity->stopped = true;
1377 spin_unlock(&rq->lock);
1378 kfree(sched->sched_rq[i]);
1379 }
1380
1381 /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
1382 wake_up_all(&sched->job_scheduled);
1383
1384 /* Confirm no work left behind accessing device structures */
1385 cancel_delayed_work_sync(&sched->work_tdr);
1386
1387 if (sched->own_submit_wq)
1388 destroy_workqueue(sched->submit_wq);
1389 sched->ready = false;
1390 kfree(sched->sched_rq);
1391 sched->sched_rq = NULL;
1392 }
1393 EXPORT_SYMBOL(drm_sched_fini);
1394
1395 /**
1396 * drm_sched_increase_karma - Update sched_entity guilty flag
1397 *
1398 * @bad: The job guilty of time out
1399 *
1400 * Increment on every hang caused by the 'bad' job. If this exceeds the hang
1401 * limit of the scheduler then the respective sched entity is marked guilty and
1402 * jobs from it will not be scheduled further
1403 */
drm_sched_increase_karma(struct drm_sched_job * bad)1404 void drm_sched_increase_karma(struct drm_sched_job *bad)
1405 {
1406 int i;
1407 struct drm_sched_entity *tmp;
1408 struct drm_sched_entity *entity;
1409 struct drm_gpu_scheduler *sched = bad->sched;
1410
1411 /* don't change @bad's karma if it's from KERNEL RQ,
1412 * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
1413 * corrupt but keep in mind that kernel jobs always considered good.
1414 */
1415 if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
1416 atomic_inc(&bad->karma);
1417
1418 for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) {
1419 struct drm_sched_rq *rq = sched->sched_rq[i];
1420
1421 spin_lock(&rq->lock);
1422 list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
1423 if (bad->s_fence->scheduled.context ==
1424 entity->fence_context) {
1425 if (entity->guilty)
1426 atomic_set(entity->guilty, 1);
1427 break;
1428 }
1429 }
1430 spin_unlock(&rq->lock);
1431 if (&entity->list != &rq->entities)
1432 break;
1433 }
1434 }
1435 }
1436 EXPORT_SYMBOL(drm_sched_increase_karma);
1437
1438 /**
1439 * drm_sched_wqueue_ready - Is the scheduler ready for submission
1440 *
1441 * @sched: scheduler instance
1442 *
1443 * Returns true if submission is ready
1444 */
drm_sched_wqueue_ready(struct drm_gpu_scheduler * sched)1445 bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
1446 {
1447 return sched->ready;
1448 }
1449 EXPORT_SYMBOL(drm_sched_wqueue_ready);
1450
1451 /**
1452 * drm_sched_wqueue_stop - stop scheduler submission
1453 * @sched: scheduler instance
1454 *
1455 * Stops the scheduler from pulling new jobs from entities. It also stops
1456 * freeing jobs automatically through drm_sched_backend_ops.free_job().
1457 */
drm_sched_wqueue_stop(struct drm_gpu_scheduler * sched)1458 void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
1459 {
1460 WRITE_ONCE(sched->pause_submit, true);
1461 cancel_work_sync(&sched->work_run_job);
1462 cancel_work_sync(&sched->work_free_job);
1463 }
1464 EXPORT_SYMBOL(drm_sched_wqueue_stop);
1465
1466 /**
1467 * drm_sched_wqueue_start - start scheduler submission
1468 * @sched: scheduler instance
1469 *
1470 * Restarts the scheduler after drm_sched_wqueue_stop() has stopped it.
1471 *
1472 * This function is not necessary for 'conventional' startup. The scheduler is
1473 * fully operational after drm_sched_init() succeeded.
1474 */
drm_sched_wqueue_start(struct drm_gpu_scheduler * sched)1475 void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
1476 {
1477 WRITE_ONCE(sched->pause_submit, false);
1478 queue_work(sched->submit_wq, &sched->work_run_job);
1479 queue_work(sched->submit_wq, &sched->work_free_job);
1480 }
1481 EXPORT_SYMBOL(drm_sched_wqueue_start);
1482