Lines Matching +full:required +full:- +full:for +full:- +full:hardware +full:- +full:jobs

1 // SPDX-License-Identifier: GPL-2.0+
7 * The shared DRM GPU scheduler is used to coordinate submitting jobs
8 * to the hardware. Each DRM fd (roughly a client process) gets its
9 * own scheduler entity, which will process jobs in order. The GPU
12 * For simplicity, and in order to keep latency low for interactive
13 * jobs when bulk background jobs are queued up, we submit a new job
15 * filling up the CT[01]Q FIFOs with jobs. Similarly, we use
17 * and render, instead of having the clients submit jobs using the HW's
80 if (query_info->queries) { in v3d_timestamp_query_info_free()
83 for (i = 0; i < count; i++) in v3d_timestamp_query_info_free()
84 drm_syncobj_put(query_info->queries[i].syncobj); in v3d_timestamp_query_info_free()
86 kvfree(query_info->queries); in v3d_timestamp_query_info_free()
94 if (query_info->queries) { in v3d_performance_query_info_free()
97 for (i = 0; i < count; i++) { in v3d_performance_query_info_free()
98 drm_syncobj_put(query_info->queries[i].syncobj); in v3d_performance_query_info_free()
99 kvfree(query_info->queries[i].kperfmon_ids); in v3d_performance_query_info_free()
102 kvfree(query_info->queries); in v3d_performance_query_info_free()
111 v3d_timestamp_query_info_free(&job->timestamp_query, in v3d_cpu_job_free()
112 job->timestamp_query.count); in v3d_cpu_job_free()
114 v3d_performance_query_info_free(&job->performance_query, in v3d_cpu_job_free()
115 job->performance_query.count); in v3d_cpu_job_free()
117 v3d_job_cleanup(&job->base); in v3d_cpu_job_free()
123 struct v3d_perfmon *perfmon = v3d->global_perfmon; in v3d_switch_perfmon()
126 perfmon = job->perfmon; in v3d_switch_perfmon()
128 if (perfmon == v3d->active_perfmon) in v3d_switch_perfmon()
131 if (perfmon != v3d->active_perfmon) in v3d_switch_perfmon()
132 v3d_perfmon_stop(v3d, v3d->active_perfmon, true); in v3d_switch_perfmon()
134 if (perfmon && v3d->active_perfmon != perfmon) in v3d_switch_perfmon()
141 struct v3d_dev *v3d = job->v3d; in v3d_job_start_stats()
142 struct v3d_file_priv *file = job->file->driver_priv; in v3d_job_start_stats()
143 struct v3d_stats *global_stats = &v3d->queue[queue].stats; in v3d_job_start_stats()
144 struct v3d_stats *local_stats = &file->stats[queue]; in v3d_job_start_stats()
151 * unsafe in-irq vs no-irq-off usage problem. This is a false positive in v3d_job_start_stats()
152 * because all the locks are per queue and stats type, and all jobs are in v3d_job_start_stats()
155 * 1. Locks for GPU queues are updated from interrupt handlers under a in v3d_job_start_stats()
158 * 2. Locks for CPU queues are updated from the worker with preemption in v3d_job_start_stats()
164 * been signaled, and locks are per queue, there is also no scope for in v3d_job_start_stats()
172 write_seqcount_begin(&local_stats->lock); in v3d_job_start_stats()
173 local_stats->start_ns = now; in v3d_job_start_stats()
174 write_seqcount_end(&local_stats->lock); in v3d_job_start_stats()
176 write_seqcount_begin(&global_stats->lock); in v3d_job_start_stats()
177 global_stats->start_ns = now; in v3d_job_start_stats()
178 write_seqcount_end(&global_stats->lock); in v3d_job_start_stats()
189 write_seqcount_begin(&stats->lock); in v3d_stats_update()
190 stats->enabled_ns += now - stats->start_ns; in v3d_stats_update()
191 stats->jobs_completed++; in v3d_stats_update()
192 stats->start_ns = 0; in v3d_stats_update()
193 write_seqcount_end(&stats->lock); in v3d_stats_update()
199 struct v3d_dev *v3d = job->v3d; in v3d_job_update_stats()
200 struct v3d_file_priv *file = job->file->driver_priv; in v3d_job_update_stats()
201 struct v3d_stats *global_stats = &v3d->queue[queue].stats; in v3d_job_update_stats()
202 struct v3d_stats *local_stats = &file->stats[queue]; in v3d_job_update_stats()
224 struct v3d_dev *v3d = job->base.v3d; in v3d_bin_job_run()
225 struct drm_device *dev = &v3d->drm; in v3d_bin_job_run()
229 if (unlikely(job->base.base.s_fence->finished.error)) { in v3d_bin_job_run()
230 spin_lock_irqsave(&v3d->job_lock, irqflags); in v3d_bin_job_run()
231 v3d->bin_job = NULL; in v3d_bin_job_run()
232 spin_unlock_irqrestore(&v3d->job_lock, irqflags); in v3d_bin_job_run()
236 /* Lock required around bin_job update vs in v3d_bin_job_run()
239 spin_lock_irqsave(&v3d->job_lock, irqflags); in v3d_bin_job_run()
240 v3d->bin_job = job; in v3d_bin_job_run()
245 spin_unlock_irqrestore(&v3d->job_lock, irqflags); in v3d_bin_job_run()
253 if (job->base.irq_fence) in v3d_bin_job_run()
254 dma_fence_put(job->base.irq_fence); in v3d_bin_job_run()
255 job->base.irq_fence = dma_fence_get(fence); in v3d_bin_job_run()
257 trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, in v3d_bin_job_run()
258 job->start, job->end); in v3d_bin_job_run()
260 v3d_job_start_stats(&job->base, V3D_BIN); in v3d_bin_job_run()
261 v3d_switch_perfmon(v3d, &job->base); in v3d_bin_job_run()
266 if (job->qma) { in v3d_bin_job_run()
267 V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma); in v3d_bin_job_run()
268 V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms); in v3d_bin_job_run()
270 if (job->qts) { in v3d_bin_job_run()
273 job->qts); in v3d_bin_job_run()
275 V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start); in v3d_bin_job_run()
276 V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end); in v3d_bin_job_run()
284 struct v3d_dev *v3d = job->base.v3d; in v3d_render_job_run()
285 struct drm_device *dev = &v3d->drm; in v3d_render_job_run()
288 if (unlikely(job->base.base.s_fence->finished.error)) { in v3d_render_job_run()
289 v3d->render_job = NULL; in v3d_render_job_run()
293 v3d->render_job = job; in v3d_render_job_run()
296 * scheduling, though -- imagine job0 rendering to texture and in v3d_render_job_run()
307 if (job->base.irq_fence) in v3d_render_job_run()
308 dma_fence_put(job->base.irq_fence); in v3d_render_job_run()
309 job->base.irq_fence = dma_fence_get(fence); in v3d_render_job_run()
311 trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, in v3d_render_job_run()
312 job->start, job->end); in v3d_render_job_run()
314 v3d_job_start_stats(&job->base, V3D_RENDER); in v3d_render_job_run()
315 v3d_switch_perfmon(v3d, &job->base); in v3d_render_job_run()
322 V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start); in v3d_render_job_run()
323 V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end); in v3d_render_job_run()
332 struct v3d_dev *v3d = job->base.v3d; in v3d_tfu_job_run()
333 struct drm_device *dev = &v3d->drm; in v3d_tfu_job_run()
336 if (unlikely(job->base.base.s_fence->finished.error)) { in v3d_tfu_job_run()
337 v3d->tfu_job = NULL; in v3d_tfu_job_run()
341 v3d->tfu_job = job; in v3d_tfu_job_run()
347 if (job->base.irq_fence) in v3d_tfu_job_run()
348 dma_fence_put(job->base.irq_fence); in v3d_tfu_job_run()
349 job->base.irq_fence = dma_fence_get(fence); in v3d_tfu_job_run()
351 trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); in v3d_tfu_job_run()
353 v3d_job_start_stats(&job->base, V3D_TFU); in v3d_tfu_job_run()
355 V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia); in v3d_tfu_job_run()
356 V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis); in v3d_tfu_job_run()
357 V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica); in v3d_tfu_job_run()
358 V3D_WRITE(V3D_TFU_IUA(v3d->ver), job->args.iua); in v3d_tfu_job_run()
359 V3D_WRITE(V3D_TFU_IOA(v3d->ver), job->args.ioa); in v3d_tfu_job_run()
360 if (v3d->ver >= 71) in v3d_tfu_job_run()
361 V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc); in v3d_tfu_job_run()
362 V3D_WRITE(V3D_TFU_IOS(v3d->ver), job->args.ios); in v3d_tfu_job_run()
363 V3D_WRITE(V3D_TFU_COEF0(v3d->ver), job->args.coef[0]); in v3d_tfu_job_run()
364 if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) { in v3d_tfu_job_run()
365 V3D_WRITE(V3D_TFU_COEF1(v3d->ver), job->args.coef[1]); in v3d_tfu_job_run()
366 V3D_WRITE(V3D_TFU_COEF2(v3d->ver), job->args.coef[2]); in v3d_tfu_job_run()
367 V3D_WRITE(V3D_TFU_COEF3(v3d->ver), job->args.coef[3]); in v3d_tfu_job_run()
370 V3D_WRITE(V3D_TFU_ICFG(v3d->ver), job->args.icfg | V3D_TFU_ICFG_IOC); in v3d_tfu_job_run()
379 struct v3d_dev *v3d = job->base.v3d; in v3d_csd_job_run()
380 struct drm_device *dev = &v3d->drm; in v3d_csd_job_run()
384 if (unlikely(job->base.base.s_fence->finished.error)) { in v3d_csd_job_run()
385 v3d->csd_job = NULL; in v3d_csd_job_run()
389 v3d->csd_job = job; in v3d_csd_job_run()
397 if (job->base.irq_fence) in v3d_csd_job_run()
398 dma_fence_put(job->base.irq_fence); in v3d_csd_job_run()
399 job->base.irq_fence = dma_fence_get(fence); in v3d_csd_job_run()
401 trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno); in v3d_csd_job_run()
403 v3d_job_start_stats(&job->base, V3D_CSD); in v3d_csd_job_run()
404 v3d_switch_perfmon(v3d, &job->base); in v3d_csd_job_run()
406 csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); in v3d_csd_job_run()
407 for (i = 1; i <= 6; i++) in v3d_csd_job_run()
408 V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); in v3d_csd_job_run()
415 if (v3d->ver >= 71) in v3d_csd_job_run()
419 V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); in v3d_csd_job_run()
427 struct v3d_indirect_csd_info *indirect_csd = &job->indirect_csd; in v3d_rewrite_csd_job_wg_counts_from_indirect()
428 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); in v3d_rewrite_csd_job_wg_counts_from_indirect()
429 struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); in v3d_rewrite_csd_job_wg_counts_from_indirect()
430 struct drm_v3d_submit_csd *args = &indirect_csd->job->args; in v3d_rewrite_csd_job_wg_counts_from_indirect()
431 struct v3d_dev *v3d = job->base.v3d; in v3d_rewrite_csd_job_wg_counts_from_indirect()
437 wg_counts = (uint32_t *)(bo->vaddr + indirect_csd->offset); in v3d_rewrite_csd_job_wg_counts_from_indirect()
442 args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; in v3d_rewrite_csd_job_wg_counts_from_indirect()
443 args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; in v3d_rewrite_csd_job_wg_counts_from_indirect()
444 args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; in v3d_rewrite_csd_job_wg_counts_from_indirect()
446 num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * in v3d_rewrite_csd_job_wg_counts_from_indirect()
450 if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) in v3d_rewrite_csd_job_wg_counts_from_indirect()
451 args->cfg[4] = num_batches - 1; in v3d_rewrite_csd_job_wg_counts_from_indirect()
453 args->cfg[4] = num_batches; in v3d_rewrite_csd_job_wg_counts_from_indirect()
455 WARN_ON(args->cfg[4] == ~0); in v3d_rewrite_csd_job_wg_counts_from_indirect()
457 for (int i = 0; i < 3; i++) { in v3d_rewrite_csd_job_wg_counts_from_indirect()
459 if (indirect_csd->wg_uniform_offsets[i] != 0xffffffff) { in v3d_rewrite_csd_job_wg_counts_from_indirect()
460 u32 uniform_idx = indirect_csd->wg_uniform_offsets[i]; in v3d_rewrite_csd_job_wg_counts_from_indirect()
461 ((uint32_t *)indirect->vaddr)[uniform_idx] = wg_counts[i]; in v3d_rewrite_csd_job_wg_counts_from_indirect()
472 struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; in v3d_timestamp_query()
473 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); in v3d_timestamp_query()
478 for (int i = 0; i < timestamp_query->count; i++) { in v3d_timestamp_query()
479 value_addr = ((u8 *)bo->vaddr) + timestamp_query->queries[i].offset; in v3d_timestamp_query()
482 drm_syncobj_replace_fence(timestamp_query->queries[i].syncobj, in v3d_timestamp_query()
483 job->base.done_fence); in v3d_timestamp_query()
492 struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; in v3d_reset_timestamp_queries()
493 struct v3d_timestamp_query *queries = timestamp_query->queries; in v3d_reset_timestamp_queries()
494 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); in v3d_reset_timestamp_queries()
499 for (int i = 0; i < timestamp_query->count; i++) { in v3d_reset_timestamp_queries()
500 value_addr = ((u8 *)bo->vaddr) + queries[i].offset; in v3d_reset_timestamp_queries()
531 struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; in v3d_copy_query_results()
532 struct v3d_timestamp_query *queries = timestamp_query->queries; in v3d_copy_query_results()
533 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); in v3d_copy_query_results()
534 struct v3d_bo *timestamp = to_v3d_bo(job->base.bo[1]); in v3d_copy_query_results()
535 struct v3d_copy_query_results_info *copy = &job->copy; in v3d_copy_query_results()
545 data = ((u8 *)bo->vaddr) + copy->offset; in v3d_copy_query_results()
547 for (i = 0; i < timestamp_query->count; i++) { in v3d_copy_query_results()
551 write_result = available || copy->do_partial; in v3d_copy_query_results()
553 query_addr = ((u8 *)timestamp->vaddr) + queries[i].offset; in v3d_copy_query_results()
554 write_to_buffer(data, 0, copy->do_64bit, *((u64 *)query_addr)); in v3d_copy_query_results()
557 if (copy->availability_bit) in v3d_copy_query_results()
558 write_to_buffer(data, 1, copy->do_64bit, available ? 1u : 0u); in v3d_copy_query_results()
560 data += copy->stride; in v3d_copy_query_results()
572 struct v3d_performance_query_info *performance_query = &job->performance_query; in v3d_reset_performance_queries()
573 struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; in v3d_reset_performance_queries()
574 struct v3d_dev *v3d = job->base.v3d; in v3d_reset_performance_queries()
577 for (int i = 0; i < performance_query->count; i++) { in v3d_reset_performance_queries()
578 for (int j = 0; j < performance_query->nperfmons; j++) { in v3d_reset_performance_queries()
580 performance_query->queries[i].kperfmon_ids[j]); in v3d_reset_performance_queries()
588 memset(perfmon->values, 0, perfmon->ncounters * sizeof(u64)); in v3d_reset_performance_queries()
593 drm_syncobj_replace_fence(performance_query->queries[i].syncobj, NULL); in v3d_reset_performance_queries()
602 &job->performance_query; in v3d_write_performance_query_result()
603 struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; in v3d_write_performance_query_result()
605 &performance_query->queries[query]; in v3d_write_performance_query_result()
606 struct v3d_dev *v3d = job->base.v3d; in v3d_write_performance_query_result()
609 for (i = 0, offset = 0; in v3d_write_performance_query_result()
610 i < performance_query->nperfmons; in v3d_write_performance_query_result()
615 perf_query->kperfmon_ids[i]); in v3d_write_performance_query_result()
623 if (job->copy.do_64bit) { in v3d_write_performance_query_result()
624 for (j = 0; j < perfmon->ncounters; j++) in v3d_write_performance_query_result()
626 perfmon->values[j]); in v3d_write_performance_query_result()
628 for (j = 0; j < perfmon->ncounters; j++) in v3d_write_performance_query_result()
630 perfmon->values[j]); in v3d_write_performance_query_result()
640 struct v3d_performance_query_info *performance_query = &job->performance_query; in v3d_copy_performance_query()
641 struct v3d_copy_query_results_info *copy = &job->copy; in v3d_copy_performance_query()
642 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); in v3d_copy_performance_query()
649 data = ((u8 *)bo->vaddr) + copy->offset; in v3d_copy_performance_query()
651 for (int i = 0; i < performance_query->count; i++) { in v3d_copy_performance_query()
652 fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj); in v3d_copy_performance_query()
655 write_result = available || copy->do_partial; in v3d_copy_performance_query()
659 if (copy->availability_bit) in v3d_copy_performance_query()
660 write_to_buffer(data, performance_query->ncounters, in v3d_copy_performance_query()
661 copy->do_64bit, available ? 1u : 0u); in v3d_copy_performance_query()
663 data += copy->stride; in v3d_copy_performance_query()
684 struct v3d_dev *v3d = job->base.v3d; in v3d_cpu_job_run()
686 if (job->job_type >= ARRAY_SIZE(cpu_job_function)) { in v3d_cpu_job_run()
687 DRM_DEBUG_DRIVER("Unknown CPU job: %d\n", job->job_type); in v3d_cpu_job_run()
691 v3d_job_start_stats(&job->base, V3D_CPU); in v3d_cpu_job_run()
692 trace_v3d_cpu_job_begin(&v3d->drm, job->job_type); in v3d_cpu_job_run()
694 cpu_job_function[job->job_type](job); in v3d_cpu_job_run()
696 trace_v3d_cpu_job_end(&v3d->drm, job->job_type); in v3d_cpu_job_run()
697 v3d_job_update_stats(&job->base, V3D_CPU); in v3d_cpu_job_run()
706 struct v3d_dev *v3d = job->v3d; in v3d_cache_clean_job_run()
722 mutex_lock(&v3d->reset_lock); in v3d_gpu_reset_for_timeout()
725 for (q = 0; q < V3D_MAX_QUEUES; q++) in v3d_gpu_reset_for_timeout()
726 drm_sched_stop(&v3d->queue[q].sched, sched_job); in v3d_gpu_reset_for_timeout()
734 for (q = 0; q < V3D_MAX_QUEUES; q++) in v3d_gpu_reset_for_timeout()
735 drm_sched_resubmit_jobs(&v3d->queue[q].sched); in v3d_gpu_reset_for_timeout()
737 /* Unblock schedulers and restart their jobs. */ in v3d_gpu_reset_for_timeout()
738 for (q = 0; q < V3D_MAX_QUEUES; q++) { in v3d_gpu_reset_for_timeout()
739 drm_sched_start(&v3d->queue[q].sched, 0); in v3d_gpu_reset_for_timeout()
742 mutex_unlock(&v3d->reset_lock); in v3d_gpu_reset_for_timeout()
750 struct drm_gpu_scheduler *sched = sched_job->sched; in v3d_sched_skip_reset()
752 spin_lock(&sched->job_list_lock); in v3d_sched_skip_reset()
753 list_add(&sched_job->list, &sched->pending_list); in v3d_sched_skip_reset()
754 spin_unlock(&sched->job_list_lock); in v3d_sched_skip_reset()
762 struct v3d_dev *v3d = job->v3d; in v3d_cl_job_timedout()
769 * is pretty unlikely outside of an i-g-t testcase. in v3d_cl_job_timedout()
788 &job->timedout_ctca, &job->timedout_ctra); in v3d_bin_job_timedout()
797 &job->timedout_ctca, &job->timedout_ctra); in v3d_render_job_timedout()
805 return v3d_gpu_reset_for_timeout(job->v3d, sched_job); in v3d_generic_job_timedout()
812 struct v3d_dev *v3d = job->base.v3d; in v3d_csd_job_timedout()
813 u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); in v3d_csd_job_timedout()
818 if (job->timedout_batches != batches) { in v3d_csd_job_timedout()
819 job->timedout_batches = batches; in v3d_csd_job_timedout()
872 .dev = v3d->drm.dev, in v3d_queue_sched_init()
878 return drm_sched_init(&v3d->queue[queue].sched, &args); in v3d_queue_sched_init()
927 for (q = 0; q < V3D_MAX_QUEUES; q++) { in v3d_sched_fini()
928 if (v3d->queue[q].sched.ready) in v3d_sched_fini()
929 drm_sched_fini(&v3d->queue[q].sched); in v3d_sched_fini()