Lines Matching +full:required +full:- +full:for +full:- +full:hardware +full:- +full:jobs
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
27 * The GPU scheduler provides entities which allow userspace to push jobs
28 * into software queues which are then scheduled on a hardware run queue.
31 * features among jobs. The driver is supposed to provide callback functions for
32 * backend operations to the scheduler like submitting a job to hardware run queue,
41 * 4. Entities themselves maintain a queue of jobs that will be scheduled on
42 * the hardware.
44 * The jobs in an entity are always scheduled in the order in which they were pushed.
47 * hardware, i.e. the pending queue, the entity must not be referenced anymore
48 * through the jobs entity pointer.
55 * in which the jobs fetched from scheduler entities are executed.
66 * This implies waiting for previously executed jobs.
72 #include <linux/dma-resv.h>
98 MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stri…
105 WARN_ON(check_sub_overflow(sched->credit_limit, in drm_sched_available_credits()
106 atomic_read(&sched->credit_count), in drm_sched_available_credits()
113 * drm_sched_can_queue -- Can we queue more to the hardware?
132 if (s_job->credits > sched->credit_limit) { in drm_sched_can_queue()
133 dev_WARN(sched->dev, in drm_sched_can_queue()
134 "Jobs may not exceed the credit limit, truncate.\n"); in drm_sched_can_queue()
135 s_job->credits = sched->credit_limit; in drm_sched_can_queue()
138 return drm_sched_available_credits(sched) >= s_job->credits; in drm_sched_can_queue()
147 return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting); in drm_sched_entity_compare_before()
153 if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { in drm_sched_rq_remove_fifo_locked()
154 rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root); in drm_sched_rq_remove_fifo_locked()
155 RB_CLEAR_NODE(&entity->rb_tree_node); in drm_sched_rq_remove_fifo_locked()
164 * Both locks need to be grabbed, one to protect from entity->rq change in drm_sched_rq_update_fifo_locked()
165 * for entity from within concurrent drm_sched_entity_select_rq and the in drm_sched_rq_update_fifo_locked()
168 lockdep_assert_held(&entity->lock); in drm_sched_rq_update_fifo_locked()
169 lockdep_assert_held(&rq->lock); in drm_sched_rq_update_fifo_locked()
173 entity->oldest_job_waiting = ts; in drm_sched_rq_update_fifo_locked()
175 rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root, in drm_sched_rq_update_fifo_locked()
180 * drm_sched_rq_init - initialize a given run queue struct
190 spin_lock_init(&rq->lock); in drm_sched_rq_init()
191 INIT_LIST_HEAD(&rq->entities); in drm_sched_rq_init()
192 rq->rb_tree_root = RB_ROOT_CACHED; in drm_sched_rq_init()
193 rq->current_entity = NULL; in drm_sched_rq_init()
194 rq->sched = sched; in drm_sched_rq_init()
198 * drm_sched_rq_add_entity - add an entity
208 lockdep_assert_held(&entity->lock); in drm_sched_rq_add_entity()
209 lockdep_assert_held(&rq->lock); in drm_sched_rq_add_entity()
211 if (!list_empty(&entity->list)) in drm_sched_rq_add_entity()
214 atomic_inc(rq->sched->score); in drm_sched_rq_add_entity()
215 list_add_tail(&entity->list, &rq->entities); in drm_sched_rq_add_entity()
219 * drm_sched_rq_remove_entity - remove an entity
229 lockdep_assert_held(&entity->lock); in drm_sched_rq_remove_entity()
231 if (list_empty(&entity->list)) in drm_sched_rq_remove_entity()
234 spin_lock(&rq->lock); in drm_sched_rq_remove_entity()
236 atomic_dec(rq->sched->score); in drm_sched_rq_remove_entity()
237 list_del_init(&entity->list); in drm_sched_rq_remove_entity()
239 if (rq->current_entity == entity) in drm_sched_rq_remove_entity()
240 rq->current_entity = NULL; in drm_sched_rq_remove_entity()
245 spin_unlock(&rq->lock); in drm_sched_rq_remove_entity()
249 * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
256 * Return an entity if one is found; return an error-pointer (!NULL) if an
266 spin_lock(&rq->lock); in drm_sched_rq_select_entity_rr()
268 entity = rq->current_entity; in drm_sched_rq_select_entity_rr()
270 list_for_each_entry_continue(entity, &rq->entities, list) { in drm_sched_rq_select_entity_rr()
276 spin_unlock(&rq->lock); in drm_sched_rq_select_entity_rr()
277 return ERR_PTR(-ENOSPC); in drm_sched_rq_select_entity_rr()
280 rq->current_entity = entity; in drm_sched_rq_select_entity_rr()
281 reinit_completion(&entity->entity_idle); in drm_sched_rq_select_entity_rr()
282 spin_unlock(&rq->lock); in drm_sched_rq_select_entity_rr()
288 list_for_each_entry(entity, &rq->entities, list) { in drm_sched_rq_select_entity_rr()
294 spin_unlock(&rq->lock); in drm_sched_rq_select_entity_rr()
295 return ERR_PTR(-ENOSPC); in drm_sched_rq_select_entity_rr()
298 rq->current_entity = entity; in drm_sched_rq_select_entity_rr()
299 reinit_completion(&entity->entity_idle); in drm_sched_rq_select_entity_rr()
300 spin_unlock(&rq->lock); in drm_sched_rq_select_entity_rr()
304 if (entity == rq->current_entity) in drm_sched_rq_select_entity_rr()
308 spin_unlock(&rq->lock); in drm_sched_rq_select_entity_rr()
314 * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
321 * Return an entity if one is found; return an error-pointer (!NULL) if an
331 spin_lock(&rq->lock); in drm_sched_rq_select_entity_fifo()
332 for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { in drm_sched_rq_select_entity_fifo()
341 spin_unlock(&rq->lock); in drm_sched_rq_select_entity_fifo()
342 return ERR_PTR(-ENOSPC); in drm_sched_rq_select_entity_fifo()
345 reinit_completion(&entity->entity_idle); in drm_sched_rq_select_entity_fifo()
349 spin_unlock(&rq->lock); in drm_sched_rq_select_entity_fifo()
355 * drm_sched_run_job_queue - enqueue run-job work
360 if (!READ_ONCE(sched->pause_submit)) in drm_sched_run_job_queue()
361 queue_work(sched->submit_wq, &sched->work_run_job); in drm_sched_run_job_queue()
365 * __drm_sched_run_free_queue - enqueue free-job work
370 if (!READ_ONCE(sched->pause_submit)) in __drm_sched_run_free_queue()
371 queue_work(sched->submit_wq, &sched->work_free_job); in __drm_sched_run_free_queue()
375 * drm_sched_run_free_queue - enqueue free-job work if ready
382 spin_lock(&sched->job_list_lock); in drm_sched_run_free_queue()
383 job = list_first_entry_or_null(&sched->pending_list, in drm_sched_run_free_queue()
385 if (job && dma_fence_is_signaled(&job->s_fence->finished)) in drm_sched_run_free_queue()
387 spin_unlock(&sched->job_list_lock); in drm_sched_run_free_queue()
391 * drm_sched_job_done - complete a job
398 struct drm_sched_fence *s_fence = s_job->s_fence; in drm_sched_job_done()
399 struct drm_gpu_scheduler *sched = s_fence->sched; in drm_sched_job_done()
401 atomic_sub(s_job->credits, &sched->credit_count); in drm_sched_job_done()
402 atomic_dec(sched->score); in drm_sched_job_done()
406 dma_fence_get(&s_fence->finished); in drm_sched_job_done()
408 dma_fence_put(&s_fence->finished); in drm_sched_job_done()
413 * drm_sched_job_done_cb - the callback for a done job
421 drm_sched_job_done(s_job, f->error); in drm_sched_job_done_cb()
425 * drm_sched_start_timeout - start timeout for reset worker
427 * @sched: scheduler instance to start the worker for
429 * Start the timeout for the given scheduler.
433 lockdep_assert_held(&sched->job_list_lock); in drm_sched_start_timeout()
435 if (sched->timeout != MAX_SCHEDULE_TIMEOUT && in drm_sched_start_timeout()
436 !list_empty(&sched->pending_list)) in drm_sched_start_timeout()
437 mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); in drm_sched_start_timeout()
442 spin_lock(&sched->job_list_lock); in drm_sched_start_timeout_unlocked()
444 spin_unlock(&sched->job_list_lock); in drm_sched_start_timeout_unlocked()
448 * drm_sched_tdr_queue_imm: - immediately start job timeout handler
450 * @sched: scheduler for which the timeout handling should be started.
452 * Start timeout handling immediately for the named scheduler.
456 spin_lock(&sched->job_list_lock); in drm_sched_tdr_queue_imm()
457 sched->timeout = 0; in drm_sched_tdr_queue_imm()
459 spin_unlock(&sched->job_list_lock); in drm_sched_tdr_queue_imm()
464 * drm_sched_fault - immediately start timeout handler
468 * Start timeout handling immediately when the driver detects a hardware fault.
472 if (sched->timeout_wq) in drm_sched_fault()
473 mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0); in drm_sched_fault()
478 * drm_sched_suspend_timeout - Suspend scheduler job timeout
480 * @sched: scheduler instance for which to suspend the timeout
482 * Suspend the delayed work timeout for the scheduler. This is done by
493 sched_timeout = sched->work_tdr.timer.expires; in drm_sched_suspend_timeout()
499 if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT) in drm_sched_suspend_timeout()
501 return sched_timeout - now; in drm_sched_suspend_timeout()
503 return sched->timeout; in drm_sched_suspend_timeout()
508 * drm_sched_resume_timeout - Resume scheduler job timeout
510 * @sched: scheduler instance for which to resume the timeout
513 * Resume the delayed work timeout for the scheduler.
518 spin_lock(&sched->job_list_lock); in drm_sched_resume_timeout()
520 if (list_empty(&sched->pending_list)) in drm_sched_resume_timeout()
521 cancel_delayed_work(&sched->work_tdr); in drm_sched_resume_timeout()
523 mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining); in drm_sched_resume_timeout()
525 spin_unlock(&sched->job_list_lock); in drm_sched_resume_timeout()
531 struct drm_gpu_scheduler *sched = s_job->sched; in drm_sched_job_begin()
533 spin_lock(&sched->job_list_lock); in drm_sched_job_begin()
534 list_add_tail(&s_job->list, &sched->pending_list); in drm_sched_job_begin()
536 spin_unlock(&sched->job_list_lock); in drm_sched_job_begin()
548 spin_lock(&sched->job_list_lock); in drm_sched_job_timedout()
549 job = list_first_entry_or_null(&sched->pending_list, in drm_sched_job_timedout()
555 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread in drm_sched_job_timedout()
558 list_del_init(&job->list); in drm_sched_job_timedout()
559 spin_unlock(&sched->job_list_lock); in drm_sched_job_timedout()
561 status = job->sched->ops->timedout_job(job); in drm_sched_job_timedout()
567 if (sched->free_guilty) { in drm_sched_job_timedout()
568 job->sched->ops->free_job(job); in drm_sched_job_timedout()
569 sched->free_guilty = false; in drm_sched_job_timedout()
572 spin_unlock(&sched->job_list_lock); in drm_sched_job_timedout()
580 * drm_sched_stop - stop the scheduler
585 * Stop the scheduler and also removes and frees all completed jobs.
590 * This function is typically used for reset recovery (see the docu of
591 * drm_sched_backend_ops.timedout_job() for details). Do not call it for
601 * Reinsert back the bad job here - now it's safe as in drm_sched_stop()
603 * bad job at this point - we parked (waited for) any in progress in drm_sched_stop()
607 if (bad && bad->sched == sched) in drm_sched_stop()
612 list_add(&bad->list, &sched->pending_list); in drm_sched_stop()
620 list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list, in drm_sched_stop()
622 if (s_job->s_fence->parent && in drm_sched_stop()
623 dma_fence_remove_callback(s_job->s_fence->parent, in drm_sched_stop()
624 &s_job->cb)) { in drm_sched_stop()
625 dma_fence_put(s_job->s_fence->parent); in drm_sched_stop()
626 s_job->s_fence->parent = NULL; in drm_sched_stop()
627 atomic_sub(s_job->credits, &sched->credit_count); in drm_sched_stop()
631 * Locking here is for concurrent resume timeout in drm_sched_stop()
633 spin_lock(&sched->job_list_lock); in drm_sched_stop()
634 list_del_init(&s_job->list); in drm_sched_stop()
635 spin_unlock(&sched->job_list_lock); in drm_sched_stop()
638 * Wait for job's HW fence callback to finish using s_job in drm_sched_stop()
643 dma_fence_wait(&s_job->s_fence->finished, false); in drm_sched_stop()
646 * We must keep bad job alive for later use during in drm_sched_stop()
651 sched->ops->free_job(s_job); in drm_sched_stop()
653 sched->free_guilty = true; in drm_sched_stop()
660 * this TDR finished and before the newly restarted jobs had a in drm_sched_stop()
663 cancel_delayed_work(&sched->work_tdr); in drm_sched_stop()
668 * drm_sched_start - recover jobs after a reset
673 * This function is typically used for reset recovery (see the docu of
674 * drm_sched_backend_ops.timedout_job() for details). Do not call it for
683 * Locking the list is not required here as the sched thread is parked in drm_sched_start()
684 * so no new jobs are being inserted or removed. Also concurrent in drm_sched_start()
687 list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { in drm_sched_start()
688 struct dma_fence *fence = s_job->s_fence->parent; in drm_sched_start()
690 atomic_add(s_job->credits, &sched->credit_count); in drm_sched_start()
693 drm_sched_job_done(s_job, errno ?: -ECANCELED); in drm_sched_start()
697 if (dma_fence_add_callback(fence, &s_job->cb, in drm_sched_start()
699 drm_sched_job_done(s_job, fence->error ?: errno); in drm_sched_start()
708 * drm_sched_resubmit_jobs - Deprecated, don't use in new code!
712 * Re-submitting jobs was a concept AMD came up as cheap way to implement
720 * Drivers can still save and restore their state for recovery operations, but
731 list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { in drm_sched_resubmit_jobs()
732 struct drm_sched_fence *s_fence = s_job->s_fence; in drm_sched_resubmit_jobs()
734 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) { in drm_sched_resubmit_jobs()
736 guilty_context = s_job->s_fence->scheduled.context; in drm_sched_resubmit_jobs()
739 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context) in drm_sched_resubmit_jobs()
740 dma_fence_set_error(&s_fence->finished, -ECANCELED); in drm_sched_resubmit_jobs()
742 fence = sched->ops->run_job(s_job); in drm_sched_resubmit_jobs()
746 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); in drm_sched_resubmit_jobs()
748 s_job->s_fence->parent = NULL; in drm_sched_resubmit_jobs()
751 s_job->s_fence->parent = dma_fence_get(fence); in drm_sched_resubmit_jobs()
753 /* Drop for orignal kref_init */ in drm_sched_resubmit_jobs()
761 * drm_sched_job_init - init a scheduler job
766 * @owner: job owner for debugging
769 * for locking considerations.
778 * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
779 * has died, which can mean that there's no valid runqueue for a @entity.
780 * This function returns -ENOENT in this case (which probably should be -EIO as
783 * Returns 0 for success, negative error code otherwise.
789 if (!entity->rq) { in drm_sched_job_init()
791 * or worse--a blank screen--leave a trail in the in drm_sched_job_init()
794 dev_err(job->sched->dev, "%s: entity has no rq!\n", __func__); in drm_sched_job_init()
795 return -ENOENT; in drm_sched_job_init()
800 return -EINVAL; in drm_sched_job_init()
804 * We don't know for sure how the user has allocated. Thus, zero the in drm_sched_job_init()
811 job->entity = entity; in drm_sched_job_init()
812 job->credits = credits; in drm_sched_job_init()
813 job->s_fence = drm_sched_fence_alloc(entity, owner); in drm_sched_job_init()
814 if (!job->s_fence) in drm_sched_job_init()
815 return -ENOMEM; in drm_sched_job_init()
817 INIT_LIST_HEAD(&job->list); in drm_sched_job_init()
819 xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC); in drm_sched_job_init()
826 * drm_sched_job_arm - arm a scheduler job for execution
829 * This arms a scheduler job for execution. Specifically it initializes the
833 * Refer to drm_sched_entity_push_job() documentation for locking
841 struct drm_sched_entity *entity = job->entity; in drm_sched_job_arm()
845 sched = entity->rq->sched; in drm_sched_job_arm()
847 job->sched = sched; in drm_sched_job_arm()
848 job->s_priority = entity->priority; in drm_sched_job_arm()
849 job->id = atomic64_inc_return(&sched->job_id_count); in drm_sched_job_arm()
851 drm_sched_fence_init(job->s_fence, job->entity); in drm_sched_job_arm()
856 * drm_sched_job_add_dependency - adds the fence as a job dependency
880 xa_for_each(&job->dependencies, index, entry) { in drm_sched_job_add_dependency()
881 if (entry->context != fence->context) in drm_sched_job_add_dependency()
886 xa_store(&job->dependencies, index, fence, GFP_KERNEL); in drm_sched_job_add_dependency()
893 ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL); in drm_sched_job_add_dependency()
902 * drm_sched_job_add_syncobj_dependency - adds a syncobj's fence as a job dependency
930 * drm_sched_job_add_resv_dependencies - add all fences from the resv to the job
965 * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
983 return drm_sched_job_add_resv_dependencies(job, obj->resv, in drm_sched_job_add_implicit_dependencies()
989 * drm_sched_job_has_dependency - check whether fence is the job's dependency
991 * @fence: fence to look for
1002 xa_for_each(&job->dependencies, index, f) { in drm_sched_job_has_dependency()
1012 * drm_sched_job_cleanup - clean up scheduler job resources
1031 if (kref_read(&job->s_fence->finished.refcount)) { in drm_sched_job_cleanup()
1033 dma_fence_put(&job->s_fence->finished); in drm_sched_job_cleanup()
1036 drm_sched_fence_free(job->s_fence); in drm_sched_job_cleanup()
1039 job->s_fence = NULL; in drm_sched_job_cleanup()
1041 xa_for_each(&job->dependencies, index, fence) { in drm_sched_job_cleanup()
1044 xa_destroy(&job->dependencies); in drm_sched_job_cleanup()
1050 * drm_sched_wakeup - Wake up the scheduler if it is ready to queue
1053 * Wake up the scheduler if we can queue jobs.
1061 * drm_sched_select_entity - Select next entity to process
1067 * Note, that we break out of the for-loop when "entity" is non-null, which can
1068 * also be an error-pointer--this assures we don't process lower priority
1069 * run-queues. See comments in the respectively called functions.
1079 for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { in drm_sched_select_entity()
1081 drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : in drm_sched_select_entity()
1082 drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); in drm_sched_select_entity()
1091 * drm_sched_get_finished_job - fetch the next finished job to be destroyed
1096 * ready for it to be destroyed.
1103 spin_lock(&sched->job_list_lock); in drm_sched_get_finished_job()
1105 job = list_first_entry_or_null(&sched->pending_list, in drm_sched_get_finished_job()
1108 if (job && dma_fence_is_signaled(&job->s_fence->finished)) { in drm_sched_get_finished_job()
1110 list_del_init(&job->list); in drm_sched_get_finished_job()
1113 cancel_delayed_work(&sched->work_tdr); in drm_sched_get_finished_job()
1115 next = list_first_entry_or_null(&sched->pending_list, in drm_sched_get_finished_job()
1120 &next->s_fence->scheduled.flags)) in drm_sched_get_finished_job()
1121 next->s_fence->scheduled.timestamp = in drm_sched_get_finished_job()
1122 dma_fence_timestamp(&job->s_fence->finished); in drm_sched_get_finished_job()
1123 /* start TO timer for next job */ in drm_sched_get_finished_job()
1130 spin_unlock(&sched->job_list_lock); in drm_sched_get_finished_job()
1136 * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
1151 for (i = 0; i < num_sched_list; ++i) { in drm_sched_pick_best()
1154 if (!sched->ready) { in drm_sched_pick_best()
1156 sched->name); in drm_sched_pick_best()
1160 num_score = atomic_read(sched->score); in drm_sched_pick_best()
1172 * drm_sched_free_job_work - worker to call free_job
1184 sched->ops->free_job(job); in drm_sched_free_job_work()
1191 * drm_sched_run_job_work - worker to call run_job
1212 complete_all(&entity->entity_idle); in drm_sched_run_job_work()
1217 s_fence = sched_job->s_fence; in drm_sched_run_job_work()
1219 atomic_add(sched_job->credits, &sched->credit_count); in drm_sched_run_job_work()
1223 fence = sched->ops->run_job(sched_job); in drm_sched_run_job_work()
1224 complete_all(&entity->entity_idle); in drm_sched_run_job_work()
1228 /* Drop for original kref_init of the fence */ in drm_sched_run_job_work()
1231 r = dma_fence_add_callback(fence, &sched_job->cb, in drm_sched_run_job_work()
1233 if (r == -ENOENT) in drm_sched_run_job_work()
1234 drm_sched_job_done(sched_job, fence->error); in drm_sched_run_job_work()
1236 DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); in drm_sched_run_job_work()
1242 wake_up(&sched->job_scheduled); in drm_sched_run_job_work()
1247 * drm_sched_init - Init a gpu scheduler instance
1258 sched->ops = args->ops; in drm_sched_init()
1259 sched->credit_limit = args->credit_limit; in drm_sched_init()
1260 sched->name = args->name; in drm_sched_init()
1261 sched->timeout = args->timeout; in drm_sched_init()
1262 sched->hang_limit = args->hang_limit; in drm_sched_init()
1263 sched->timeout_wq = args->timeout_wq ? args->timeout_wq : system_wq; in drm_sched_init()
1264 sched->score = args->score ? args->score : &sched->_score; in drm_sched_init()
1265 sched->dev = args->dev; in drm_sched_init()
1267 if (args->num_rqs > DRM_SCHED_PRIORITY_COUNT) { in drm_sched_init()
1268 /* This is a gross violation--tell drivers what the problem is. in drm_sched_init()
1270 dev_err(sched->dev, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n", in drm_sched_init()
1272 return -EINVAL; in drm_sched_init()
1273 } else if (sched->sched_rq) { in drm_sched_init()
1275 * fine-tune their DRM calling order, and return all in drm_sched_init()
1278 dev_warn(sched->dev, "%s: scheduler already initialized!\n", __func__); in drm_sched_init()
1282 if (args->submit_wq) { in drm_sched_init()
1283 sched->submit_wq = args->submit_wq; in drm_sched_init()
1284 sched->own_submit_wq = false; in drm_sched_init()
1287 sched->submit_wq = alloc_ordered_workqueue_lockdep_map(args->name, in drm_sched_init()
1291 sched->submit_wq = alloc_ordered_workqueue(args->name, WQ_MEM_RECLAIM); in drm_sched_init()
1293 if (!sched->submit_wq) in drm_sched_init()
1294 return -ENOMEM; in drm_sched_init()
1296 sched->own_submit_wq = true; in drm_sched_init()
1299 sched->sched_rq = kmalloc_array(args->num_rqs, sizeof(*sched->sched_rq), in drm_sched_init()
1301 if (!sched->sched_rq) in drm_sched_init()
1303 sched->num_rqs = args->num_rqs; in drm_sched_init()
1304 for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { in drm_sched_init()
1305 sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); in drm_sched_init()
1306 if (!sched->sched_rq[i]) in drm_sched_init()
1308 drm_sched_rq_init(sched, sched->sched_rq[i]); in drm_sched_init()
1311 init_waitqueue_head(&sched->job_scheduled); in drm_sched_init()
1312 INIT_LIST_HEAD(&sched->pending_list); in drm_sched_init()
1313 spin_lock_init(&sched->job_list_lock); in drm_sched_init()
1314 atomic_set(&sched->credit_count, 0); in drm_sched_init()
1315 INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); in drm_sched_init()
1316 INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); in drm_sched_init()
1317 INIT_WORK(&sched->work_free_job, drm_sched_free_job_work); in drm_sched_init()
1318 atomic_set(&sched->_score, 0); in drm_sched_init()
1319 atomic64_set(&sched->job_id_count, 0); in drm_sched_init()
1320 sched->pause_submit = false; in drm_sched_init()
1322 sched->ready = true; in drm_sched_init()
1325 for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--) in drm_sched_init()
1326 kfree(sched->sched_rq[i]); in drm_sched_init()
1328 kfree(sched->sched_rq); in drm_sched_init()
1329 sched->sched_rq = NULL; in drm_sched_init()
1331 if (sched->own_submit_wq) in drm_sched_init()
1332 destroy_workqueue(sched->submit_wq); in drm_sched_init()
1333 dev_err(sched->dev, "%s: Failed to setup GPU scheduler--out of memory\n", __func__); in drm_sched_init()
1334 return -ENOMEM; in drm_sched_init()
1339 * drm_sched_fini - Destroy a gpu scheduler
1345 * This stops submission of new jobs to the hardware through
1347 * will not be called for all jobs still in drm_gpu_scheduler.pending_list.
1348 * There is no solution for this currently. Thus, it is up to the driver to make
1351 * a) drm_sched_fini() is only called after for all submitted jobs
1353 * b) the jobs for which drm_sched_backend_ops.free_job() has not been called
1357 * the jobs in drm_gpu_scheduler.pending_list under any circumstances.
1366 for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { in drm_sched_fini()
1367 struct drm_sched_rq *rq = sched->sched_rq[i]; in drm_sched_fini()
1369 spin_lock(&rq->lock); in drm_sched_fini()
1370 list_for_each_entry(s_entity, &rq->entities, list) in drm_sched_fini()
1376 s_entity->stopped = true; in drm_sched_fini()
1377 spin_unlock(&rq->lock); in drm_sched_fini()
1378 kfree(sched->sched_rq[i]); in drm_sched_fini()
1381 /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */ in drm_sched_fini()
1382 wake_up_all(&sched->job_scheduled); in drm_sched_fini()
1385 cancel_delayed_work_sync(&sched->work_tdr); in drm_sched_fini()
1387 if (sched->own_submit_wq) in drm_sched_fini()
1388 destroy_workqueue(sched->submit_wq); in drm_sched_fini()
1389 sched->ready = false; in drm_sched_fini()
1390 kfree(sched->sched_rq); in drm_sched_fini()
1391 sched->sched_rq = NULL; in drm_sched_fini()
1396 * drm_sched_increase_karma - Update sched_entity guilty flag
1402 * jobs from it will not be scheduled further
1409 struct drm_gpu_scheduler *sched = bad->sched; in drm_sched_increase_karma()
1412 * because sometimes GPU hang would cause kernel jobs (like VM updating jobs) in drm_sched_increase_karma()
1413 * corrupt but keep in mind that kernel jobs always considered good. in drm_sched_increase_karma()
1415 if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { in drm_sched_increase_karma()
1416 atomic_inc(&bad->karma); in drm_sched_increase_karma()
1418 for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) { in drm_sched_increase_karma()
1419 struct drm_sched_rq *rq = sched->sched_rq[i]; in drm_sched_increase_karma()
1421 spin_lock(&rq->lock); in drm_sched_increase_karma()
1422 list_for_each_entry_safe(entity, tmp, &rq->entities, list) { in drm_sched_increase_karma()
1423 if (bad->s_fence->scheduled.context == in drm_sched_increase_karma()
1424 entity->fence_context) { in drm_sched_increase_karma()
1425 if (entity->guilty) in drm_sched_increase_karma()
1426 atomic_set(entity->guilty, 1); in drm_sched_increase_karma()
1430 spin_unlock(&rq->lock); in drm_sched_increase_karma()
1431 if (&entity->list != &rq->entities) in drm_sched_increase_karma()
1439 * drm_sched_wqueue_ready - Is the scheduler ready for submission
1447 return sched->ready; in drm_sched_wqueue_ready()
1452 * drm_sched_wqueue_stop - stop scheduler submission
1455 * Stops the scheduler from pulling new jobs from entities. It also stops
1456 * freeing jobs automatically through drm_sched_backend_ops.free_job().
1460 WRITE_ONCE(sched->pause_submit, true); in drm_sched_wqueue_stop()
1461 cancel_work_sync(&sched->work_run_job); in drm_sched_wqueue_stop()
1462 cancel_work_sync(&sched->work_free_job); in drm_sched_wqueue_stop()
1467 * drm_sched_wqueue_start - start scheduler submission
1472 * This function is not necessary for 'conventional' startup. The scheduler is
1477 WRITE_ONCE(sched->pause_submit, false); in drm_sched_wqueue_start()
1478 queue_work(sched->submit_wq, &sched->work_run_job); in drm_sched_wqueue_start()
1479 queue_work(sched->submit_wq, &sched->work_free_job); in drm_sched_wqueue_start()