Lines Matching +full:cs +full:- +full:x

1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
25 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset()
27 hdev->asic_funcs->reset_sob(hdev, hw_sob); in hl_sob_reset()
34 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset_error()
36 dev_crit(hdev->dev, in hl_sob_reset_error()
38 hw_sob->q_idx, hw_sob->sob_id); in hl_sob_reset_error()
47 struct hl_device *hdev = hl_cs_cmpl->hdev; in hl_fence_release()
49 /* EBUSY means the CS was never submitted and hence we don't have in hl_fence_release()
52 if (fence->error == -EBUSY) in hl_fence_release()
55 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || in hl_fence_release()
56 (hl_cs_cmpl->type == CS_TYPE_WAIT)) { in hl_fence_release()
58 dev_dbg(hdev->dev, in hl_fence_release()
59 "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n", in hl_fence_release()
60 hl_cs_cmpl->cs_seq, in hl_fence_release()
61 hl_cs_cmpl->type, in hl_fence_release()
62 hl_cs_cmpl->hw_sob->sob_id, in hl_fence_release()
63 hl_cs_cmpl->sob_val); in hl_fence_release()
66 * A signal CS can get completion while the corresponding wait in hl_fence_release()
67 * for signal CS is on its way to the PQ. The wait for signal CS in hl_fence_release()
68 * will get stuck if the signal CS incremented the SOB to its in hl_fence_release()
72 * 1. The wait for signal CS must get a ref for the signal CS as in hl_fence_release()
76 * 2. Signal/Wait for signal CS will decrement the SOB refcnt in hl_fence_release()
78 * These two measures guarantee that the wait for signal CS will in hl_fence_release()
79 * reset the SOB upon completion rather than the signal CS and in hl_fence_release()
82 kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); in hl_fence_release()
92 kref_put(&fence->refcount, hl_fence_release); in hl_fence_put()
98 kref_get(&fence->refcount); in hl_fence_get()
103 kref_init(&fence->refcount); in hl_fence_init()
104 fence->error = 0; in hl_fence_init()
105 init_completion(&fence->completion); in hl_fence_init()
108 static void cs_get(struct hl_cs *cs) in cs_get() argument
110 kref_get(&cs->refcount); in cs_get()
113 static int cs_get_unless_zero(struct hl_cs *cs) in cs_get_unless_zero() argument
115 return kref_get_unless_zero(&cs->refcount); in cs_get_unless_zero()
118 static void cs_put(struct hl_cs *cs) in cs_put() argument
120 kref_put(&cs->refcount, cs_do_release); in cs_put()
129 return (job->queue_type == QUEUE_TYPE_EXT || in is_cb_patched()
130 (job->queue_type == QUEUE_TYPE_HW && in is_cb_patched()
131 job->is_kernel_allocated_cb && in is_cb_patched()
132 !hdev->mmu_enable)); in is_cb_patched()
136 * cs_parser - parse the user command submission
148 struct hl_device *hdev = hpriv->hdev; in cs_parser()
152 parser.ctx_id = job->cs->ctx->asid; in cs_parser()
153 parser.cs_sequence = job->cs->sequence; in cs_parser()
154 parser.job_id = job->id; in cs_parser()
156 parser.hw_queue_id = job->hw_queue_id; in cs_parser()
157 parser.job_userptr_list = &job->userptr_list; in cs_parser()
159 parser.user_cb = job->user_cb; in cs_parser()
160 parser.user_cb_size = job->user_cb_size; in cs_parser()
161 parser.queue_type = job->queue_type; in cs_parser()
162 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; in cs_parser()
163 job->patched_cb = NULL; in cs_parser()
165 rc = hdev->asic_funcs->cs_parser(hdev, &parser); in cs_parser()
169 job->patched_cb = parser.patched_cb; in cs_parser()
170 job->job_cb_size = parser.patched_cb_size; in cs_parser()
171 job->contains_dma_pkt = parser.contains_dma_pkt; in cs_parser()
173 spin_lock(&job->patched_cb->lock); in cs_parser()
174 job->patched_cb->cs_cnt++; in cs_parser()
175 spin_unlock(&job->patched_cb->lock); in cs_parser()
181 * won't be accessed again for this CS in cs_parser()
183 spin_lock(&job->user_cb->lock); in cs_parser()
184 job->user_cb->cs_cnt--; in cs_parser()
185 spin_unlock(&job->user_cb->lock); in cs_parser()
186 hl_cb_put(job->user_cb); in cs_parser()
187 job->user_cb = NULL; in cs_parser()
189 job->job_cb_size = job->user_cb_size; in cs_parser()
197 struct hl_cs *cs = job->cs; in free_job() local
200 hl_userptr_delete_list(hdev, &job->userptr_list); in free_job()
206 if (job->patched_cb) { in free_job()
207 spin_lock(&job->patched_cb->lock); in free_job()
208 job->patched_cb->cs_cnt--; in free_job()
209 spin_unlock(&job->patched_cb->lock); in free_job()
211 hl_cb_put(job->patched_cb); in free_job()
219 if (job->queue_type == QUEUE_TYPE_HW && in free_job()
220 job->is_kernel_allocated_cb && hdev->mmu_enable) { in free_job()
221 spin_lock(&job->user_cb->lock); in free_job()
222 job->user_cb->cs_cnt--; in free_job()
223 spin_unlock(&job->user_cb->lock); in free_job()
225 hl_cb_put(job->user_cb); in free_job()
232 spin_lock(&cs->job_lock); in free_job()
233 list_del(&job->cs_node); in free_job()
234 spin_unlock(&cs->job_lock); in free_job()
238 if (job->queue_type == QUEUE_TYPE_EXT || in free_job()
239 job->queue_type == QUEUE_TYPE_HW) in free_job()
240 cs_put(cs); in free_job()
247 hdev->aggregated_cs_counters.device_in_reset_drop_cnt += in cs_counters_aggregate()
248 ctx->cs_counters.device_in_reset_drop_cnt; in cs_counters_aggregate()
249 hdev->aggregated_cs_counters.out_of_mem_drop_cnt += in cs_counters_aggregate()
250 ctx->cs_counters.out_of_mem_drop_cnt; in cs_counters_aggregate()
251 hdev->aggregated_cs_counters.parsing_drop_cnt += in cs_counters_aggregate()
252 ctx->cs_counters.parsing_drop_cnt; in cs_counters_aggregate()
253 hdev->aggregated_cs_counters.queue_full_drop_cnt += in cs_counters_aggregate()
254 ctx->cs_counters.queue_full_drop_cnt; in cs_counters_aggregate()
255 hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt += in cs_counters_aggregate()
256 ctx->cs_counters.max_cs_in_flight_drop_cnt; in cs_counters_aggregate()
261 struct hl_cs *cs = container_of(ref, struct hl_cs, in cs_do_release() local
263 struct hl_device *hdev = cs->ctx->hdev; in cs_do_release()
266 cs->completed = true; in cs_do_release()
270 * finished, because each one of them took refcnt to CS, we still in cs_do_release()
272 * will have leaked memory and what's worse, the CS object (and in cs_do_release()
276 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_do_release()
280 if (cs->submitted) { in cs_do_release()
281 hdev->asic_funcs->hw_queues_lock(hdev); in cs_do_release()
283 hdev->cs_active_cnt--; in cs_do_release()
284 if (!hdev->cs_active_cnt) { in cs_do_release()
287 ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; in cs_do_release()
288 ts->busy_to_idle_ts = ktime_get(); in cs_do_release()
290 if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) in cs_do_release()
291 hdev->idle_busy_ts_idx = 0; in cs_do_release()
292 } else if (hdev->cs_active_cnt < 0) { in cs_do_release()
293 dev_crit(hdev->dev, "CS active cnt %d is negative\n", in cs_do_release()
294 hdev->cs_active_cnt); in cs_do_release()
297 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_do_release()
299 hl_int_hw_queue_update_ci(cs); in cs_do_release()
301 spin_lock(&hdev->hw_queues_mirror_lock); in cs_do_release()
302 /* remove CS from hw_queues mirror list */ in cs_do_release()
303 list_del_init(&cs->mirror_node); in cs_do_release()
304 spin_unlock(&hdev->hw_queues_mirror_lock); in cs_do_release()
307 * Don't cancel TDR in case this CS was timedout because we in cs_do_release()
310 if ((!cs->timedout) && in cs_do_release()
311 (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) { in cs_do_release()
314 if (cs->tdr_active) in cs_do_release()
315 cancel_delayed_work_sync(&cs->work_tdr); in cs_do_release()
317 spin_lock(&hdev->hw_queues_mirror_lock); in cs_do_release()
319 /* queue TDR for next CS */ in cs_do_release()
321 &hdev->hw_queues_mirror_list, in cs_do_release()
324 if ((next) && (!next->tdr_active)) { in cs_do_release()
325 next->tdr_active = true; in cs_do_release()
326 schedule_delayed_work(&next->work_tdr, in cs_do_release()
327 hdev->timeout_jiffies); in cs_do_release()
330 spin_unlock(&hdev->hw_queues_mirror_lock); in cs_do_release()
332 } else if (cs->type == CS_TYPE_WAIT) { in cs_do_release()
334 * In case the wait for signal CS was submitted, the put occurs in cs_do_release()
337 hl_fence_put(cs->signal_fence); in cs_do_release()
344 hl_debugfs_remove_cs(cs); in cs_do_release()
346 hl_ctx_put(cs->ctx); in cs_do_release()
352 if (cs->timedout) in cs_do_release()
353 cs->fence->error = -ETIMEDOUT; in cs_do_release()
354 else if (cs->aborted) in cs_do_release()
355 cs->fence->error = -EIO; in cs_do_release()
356 else if (!cs->submitted) in cs_do_release()
357 cs->fence->error = -EBUSY; in cs_do_release()
359 complete_all(&cs->fence->completion); in cs_do_release()
360 hl_fence_put(cs->fence); in cs_do_release()
361 cs_counters_aggregate(hdev, cs->ctx); in cs_do_release()
363 kfree(cs->jobs_in_queue_cnt); in cs_do_release()
364 kfree(cs); in cs_do_release()
371 struct hl_cs *cs = container_of(work, struct hl_cs, in cs_timedout() local
373 rc = cs_get_unless_zero(cs); in cs_timedout()
377 if ((!cs->submitted) || (cs->completed)) { in cs_timedout()
378 cs_put(cs); in cs_timedout()
382 /* Mark the CS is timed out so we won't try to cancel its TDR */ in cs_timedout()
383 cs->timedout = true; in cs_timedout()
385 hdev = cs->ctx->hdev; in cs_timedout()
387 dev_err(hdev->dev, in cs_timedout()
389 cs->sequence); in cs_timedout()
391 cs_put(cs); in cs_timedout()
393 if (hdev->reset_on_lockup) in cs_timedout()
402 struct hl_cs *cs; in allocate_cs() local
405 cs = kzalloc(sizeof(*cs), GFP_ATOMIC); in allocate_cs()
406 if (!cs) in allocate_cs()
407 return -ENOMEM; in allocate_cs()
409 cs->ctx = ctx; in allocate_cs()
410 cs->submitted = false; in allocate_cs()
411 cs->completed = false; in allocate_cs()
412 cs->type = cs_type; in allocate_cs()
413 INIT_LIST_HEAD(&cs->job_list); in allocate_cs()
414 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); in allocate_cs()
415 kref_init(&cs->refcount); in allocate_cs()
416 spin_lock_init(&cs->job_lock); in allocate_cs()
420 rc = -ENOMEM; in allocate_cs()
424 cs_cmpl->hdev = hdev; in allocate_cs()
425 cs_cmpl->type = cs->type; in allocate_cs()
426 spin_lock_init(&cs_cmpl->lock); in allocate_cs()
427 cs->fence = &cs_cmpl->base_fence; in allocate_cs()
429 spin_lock(&ctx->cs_lock); in allocate_cs()
431 cs_cmpl->cs_seq = ctx->cs_sequence; in allocate_cs()
432 other = ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
433 (hdev->asic_prop.max_pending_cs - 1)]; in allocate_cs()
435 if (other && !completion_done(&other->completion)) { in allocate_cs()
436 dev_dbg_ratelimited(hdev->dev, in allocate_cs()
437 "Rejecting CS because of too many in-flights CS\n"); in allocate_cs()
438 ctx->cs_counters.max_cs_in_flight_drop_cnt++; in allocate_cs()
439 rc = -EAGAIN; in allocate_cs()
443 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, in allocate_cs()
444 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); in allocate_cs()
445 if (!cs->jobs_in_queue_cnt) { in allocate_cs()
446 rc = -ENOMEM; in allocate_cs()
451 hl_fence_init(&cs_cmpl->base_fence); in allocate_cs()
453 cs->sequence = cs_cmpl->cs_seq; in allocate_cs()
455 ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
456 (hdev->asic_prop.max_pending_cs - 1)] = in allocate_cs()
457 &cs_cmpl->base_fence; in allocate_cs()
458 ctx->cs_sequence++; in allocate_cs()
460 hl_fence_get(&cs_cmpl->base_fence); in allocate_cs()
464 spin_unlock(&ctx->cs_lock); in allocate_cs()
466 *cs_new = cs; in allocate_cs()
471 spin_unlock(&ctx->cs_lock); in allocate_cs()
474 kfree(cs); in allocate_cs()
478 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) in cs_rollback() argument
482 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_rollback()
489 struct hl_cs *cs, *tmp; in hl_cs_rollback_all() local
492 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) in hl_cs_rollback_all()
493 flush_workqueue(hdev->cq_wq[i]); in hl_cs_rollback_all()
496 list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list, in hl_cs_rollback_all()
498 cs_get(cs); in hl_cs_rollback_all()
499 cs->aborted = true; in hl_cs_rollback_all()
500 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", in hl_cs_rollback_all()
501 cs->ctx->asid, cs->sequence); in hl_cs_rollback_all()
502 cs_rollback(hdev, cs); in hl_cs_rollback_all()
503 cs_put(cs); in hl_cs_rollback_all()
511 struct hl_cs *cs = job->cs; in job_wq_completion() local
512 struct hl_device *hdev = cs->ctx->hdev; in job_wq_completion()
523 struct asic_fixed_properties *asic = &hdev->asic_prop; in validate_queue_index()
526 /* This must be checked here to prevent out-of-bounds access to in validate_queue_index()
529 if (chunk->queue_index >= asic->max_queues) { in validate_queue_index()
530 dev_err(hdev->dev, "Queue index %d is invalid\n", in validate_queue_index()
531 chunk->queue_index); in validate_queue_index()
532 return -EINVAL; in validate_queue_index()
535 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; in validate_queue_index()
537 if (hw_queue_prop->type == QUEUE_TYPE_NA) { in validate_queue_index()
538 dev_err(hdev->dev, "Queue index %d is invalid\n", in validate_queue_index()
539 chunk->queue_index); in validate_queue_index()
540 return -EINVAL; in validate_queue_index()
543 if (hw_queue_prop->driver_only) { in validate_queue_index()
544 dev_err(hdev->dev, in validate_queue_index()
546 chunk->queue_index); in validate_queue_index()
547 return -EINVAL; in validate_queue_index()
550 *queue_type = hw_queue_prop->type; in validate_queue_index()
551 *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; in validate_queue_index()
563 cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); in get_cb_from_cs_chunk()
567 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle); in get_cb_from_cs_chunk()
571 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { in get_cb_from_cs_chunk()
572 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); in get_cb_from_cs_chunk()
576 spin_lock(&cb->lock); in get_cb_from_cs_chunk()
577 cb->cs_cnt++; in get_cb_from_cs_chunk()
578 spin_unlock(&cb->lock); in get_cb_from_cs_chunk()
596 job->queue_type = queue_type; in hl_cs_allocate_job()
597 job->is_kernel_allocated_cb = is_kernel_allocated_cb; in hl_cs_allocate_job()
600 INIT_LIST_HEAD(&job->userptr_list); in hl_cs_allocate_job()
602 if (job->queue_type == QUEUE_TYPE_EXT) in hl_cs_allocate_job()
603 INIT_WORK(&job->finish_work, job_wq_completion); in hl_cs_allocate_job()
611 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_default()
614 struct hl_cs *cs; in cs_ioctl_default() local
623 dev_err(hdev->dev, in cs_ioctl_default()
626 rc = -EINVAL; in cs_ioctl_default()
633 rc = -ENOMEM; in cs_ioctl_default()
639 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); in cs_ioctl_default()
640 rc = -EFAULT; in cs_ioctl_default()
645 hl_ctx_get(hdev, hpriv->ctx); in cs_ioctl_default()
647 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs); in cs_ioctl_default()
649 hl_ctx_put(hpriv->ctx); in cs_ioctl_default()
653 *cs_seq = cs->sequence; in cs_ioctl_default()
655 hl_debugfs_add_cs(cs); in cs_ioctl_default()
657 /* Validate ALL the CS chunks before submitting the CS */ in cs_ioctl_default()
666 hpriv->ctx->cs_counters.parsing_drop_cnt++; in cs_ioctl_default()
671 cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); in cs_ioctl_default()
673 hpriv->ctx->cs_counters.parsing_drop_cnt++; in cs_ioctl_default()
674 rc = -EINVAL; in cs_ioctl_default()
678 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; in cs_ioctl_default()
687 hpriv->ctx->cs_counters.out_of_mem_drop_cnt++; in cs_ioctl_default()
688 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_default()
689 rc = -ENOMEM; in cs_ioctl_default()
696 job->id = i + 1; in cs_ioctl_default()
697 job->cs = cs; in cs_ioctl_default()
698 job->user_cb = cb; in cs_ioctl_default()
699 job->user_cb_size = chunk->cb_size; in cs_ioctl_default()
700 job->hw_queue_id = chunk->queue_index; in cs_ioctl_default()
702 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_default()
704 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_default()
707 * Increment CS reference. When CS reference is 0, CS is in cs_ioctl_default()
712 if (job->queue_type == QUEUE_TYPE_EXT || in cs_ioctl_default()
713 job->queue_type == QUEUE_TYPE_HW) in cs_ioctl_default()
714 cs_get(cs); in cs_ioctl_default()
720 hpriv->ctx->cs_counters.parsing_drop_cnt++; in cs_ioctl_default()
721 dev_err(hdev->dev, in cs_ioctl_default()
722 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", in cs_ioctl_default()
723 cs->ctx->asid, cs->sequence, job->id, rc); in cs_ioctl_default()
729 hpriv->ctx->cs_counters.parsing_drop_cnt++; in cs_ioctl_default()
730 dev_err(hdev->dev, in cs_ioctl_default()
731 "Reject CS %d.%llu because only internal queues jobs are present\n", in cs_ioctl_default()
732 cs->ctx->asid, cs->sequence); in cs_ioctl_default()
733 rc = -EINVAL; in cs_ioctl_default()
737 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_default()
739 if (rc != -EAGAIN) in cs_ioctl_default()
740 dev_err(hdev->dev, in cs_ioctl_default()
741 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_default()
742 cs->ctx->asid, cs->sequence, rc); in cs_ioctl_default()
750 spin_lock(&cb->lock); in cs_ioctl_default()
751 cb->cs_cnt--; in cs_ioctl_default()
752 spin_unlock(&cb->lock); in cs_ioctl_default()
755 cs_rollback(hdev, cs); in cs_ioctl_default()
759 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_default()
760 cs_put(cs); in cs_ioctl_default()
771 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_signal_wait()
772 struct hl_ctx *ctx = hpriv->ctx; in cs_ioctl_signal_wait()
777 struct hl_cs *cs; in cs_ioctl_signal_wait() local
787 dev_err(hdev->dev, in cs_ioctl_signal_wait()
790 rc = -EINVAL; in cs_ioctl_signal_wait()
797 rc = -ENOMEM; in cs_ioctl_signal_wait()
803 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); in cs_ioctl_signal_wait()
804 rc = -EFAULT; in cs_ioctl_signal_wait()
811 if (chunk->queue_index >= hdev->asic_prop.max_queues) { in cs_ioctl_signal_wait()
812 dev_err(hdev->dev, "Queue index %d is invalid\n", in cs_ioctl_signal_wait()
813 chunk->queue_index); in cs_ioctl_signal_wait()
814 rc = -EINVAL; in cs_ioctl_signal_wait()
818 q_idx = chunk->queue_index; in cs_ioctl_signal_wait()
819 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; in cs_ioctl_signal_wait()
820 q_type = hw_queue_prop->type; in cs_ioctl_signal_wait()
822 if ((q_idx >= hdev->asic_prop.max_queues) || in cs_ioctl_signal_wait()
823 (!hw_queue_prop->supports_sync_stream)) { in cs_ioctl_signal_wait()
824 dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); in cs_ioctl_signal_wait()
825 rc = -EINVAL; in cs_ioctl_signal_wait()
832 signal_seq_arr_len = chunk->num_signal_seq_arr; in cs_ioctl_signal_wait()
836 dev_err(hdev->dev, in cs_ioctl_signal_wait()
837 "Wait for signal CS supports only one signal CS seq\n"); in cs_ioctl_signal_wait()
838 rc = -EINVAL; in cs_ioctl_signal_wait()
846 rc = -ENOMEM; in cs_ioctl_signal_wait()
850 size_to_copy = chunk->num_signal_seq_arr * in cs_ioctl_signal_wait()
853 u64_to_user_ptr(chunk->signal_seq_arr), in cs_ioctl_signal_wait()
855 dev_err(hdev->dev, in cs_ioctl_signal_wait()
857 rc = -EFAULT; in cs_ioctl_signal_wait()
865 dev_err(hdev->dev, in cs_ioctl_signal_wait()
866 "Failed to get signal CS with seq 0x%llx\n", in cs_ioctl_signal_wait()
873 /* signal CS already finished */ in cs_ioctl_signal_wait()
881 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { in cs_ioctl_signal_wait()
882 dev_err(hdev->dev, in cs_ioctl_signal_wait()
883 "CS seq 0x%llx is not of a signal CS\n", in cs_ioctl_signal_wait()
886 rc = -EINVAL; in cs_ioctl_signal_wait()
890 if (completion_done(&sig_fence->completion)) { in cs_ioctl_signal_wait()
891 /* signal CS already finished */ in cs_ioctl_signal_wait()
901 rc = allocate_cs(hdev, ctx, cs_type, &cs); in cs_ioctl_signal_wait()
910 * Save the signal CS fence for later initialization right before in cs_ioctl_signal_wait()
911 * hanging the wait CS on the queue. in cs_ioctl_signal_wait()
913 if (cs->type == CS_TYPE_WAIT) in cs_ioctl_signal_wait()
914 cs->signal_fence = sig_fence; in cs_ioctl_signal_wait()
916 hl_debugfs_add_cs(cs); in cs_ioctl_signal_wait()
918 *cs_seq = cs->sequence; in cs_ioctl_signal_wait()
922 ctx->cs_counters.out_of_mem_drop_cnt++; in cs_ioctl_signal_wait()
923 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_signal_wait()
924 rc = -ENOMEM; in cs_ioctl_signal_wait()
928 if (cs->type == CS_TYPE_WAIT) in cs_ioctl_signal_wait()
929 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); in cs_ioctl_signal_wait()
931 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); in cs_ioctl_signal_wait()
934 q_type == QUEUE_TYPE_HW && hdev->mmu_enable); in cs_ioctl_signal_wait()
936 ctx->cs_counters.out_of_mem_drop_cnt++; in cs_ioctl_signal_wait()
938 rc = -EFAULT; in cs_ioctl_signal_wait()
942 job->id = 0; in cs_ioctl_signal_wait()
943 job->cs = cs; in cs_ioctl_signal_wait()
944 job->user_cb = cb; in cs_ioctl_signal_wait()
945 job->user_cb->cs_cnt++; in cs_ioctl_signal_wait()
946 job->user_cb_size = cb_size; in cs_ioctl_signal_wait()
947 job->hw_queue_id = q_idx; in cs_ioctl_signal_wait()
951 * We call hl_cb_destroy() out of two reasons - we don't need the CB in in cs_ioctl_signal_wait()
955 job->patched_cb = job->user_cb; in cs_ioctl_signal_wait()
956 job->job_cb_size = job->user_cb_size; in cs_ioctl_signal_wait()
957 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); in cs_ioctl_signal_wait()
959 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_signal_wait()
961 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_signal_wait()
964 cs_get(cs); in cs_ioctl_signal_wait()
968 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_signal_wait()
970 if (rc != -EAGAIN) in cs_ioctl_signal_wait()
971 dev_err(hdev->dev, in cs_ioctl_signal_wait()
972 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_signal_wait()
973 ctx->asid, cs->sequence, rc); in cs_ioctl_signal_wait()
981 cs_rollback(hdev, cs); in cs_ioctl_signal_wait()
985 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_signal_wait()
986 cs_put(cs); in cs_ioctl_signal_wait()
998 struct hl_device *hdev = hpriv->hdev; in hl_cs_ioctl()
1000 struct hl_ctx *ctx = hpriv->ctx; in hl_cs_ioctl()
1009 dev_warn_ratelimited(hdev->dev, in hl_cs_ioctl()
1010 "Device is %s. Can't submit new CS\n", in hl_cs_ioctl()
1011 atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); in hl_cs_ioctl()
1012 rc = -EBUSY; in hl_cs_ioctl()
1016 sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT; in hl_cs_ioctl()
1019 dev_err(hdev->dev, in hl_cs_ioctl()
1020 "Signal and wait CS flags are mutually exclusive, context %d\n", in hl_cs_ioctl()
1021 ctx->asid); in hl_cs_ioctl()
1022 rc = -EINVAL; in hl_cs_ioctl()
1027 (!hdev->supports_sync_stream))) { in hl_cs_ioctl()
1028 dev_err(hdev->dev, "Sync stream CS is not supported\n"); in hl_cs_ioctl()
1029 rc = -EINVAL; in hl_cs_ioctl()
1033 if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL) in hl_cs_ioctl()
1035 else if (args->in.cs_flags & HL_CS_FLAGS_WAIT) in hl_cs_ioctl()
1040 chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute; in hl_cs_ioctl()
1041 num_chunks_execute = args->in.num_chunks_execute; in hl_cs_ioctl()
1045 dev_err(hdev->dev, in hl_cs_ioctl()
1046 "Got execute CS with 0 chunks, context %d\n", in hl_cs_ioctl()
1047 ctx->asid); in hl_cs_ioctl()
1048 rc = -EINVAL; in hl_cs_ioctl()
1052 dev_err(hdev->dev, in hl_cs_ioctl()
1053 "Sync stream CS mandates one chunk only, context %d\n", in hl_cs_ioctl()
1054 ctx->asid); in hl_cs_ioctl()
1055 rc = -EINVAL; in hl_cs_ioctl()
1059 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); in hl_cs_ioctl()
1061 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { in hl_cs_ioctl()
1065 (void __user *) (uintptr_t) args->in.chunks_restore; in hl_cs_ioctl()
1066 num_chunks_restore = args->in.num_chunks_restore; in hl_cs_ioctl()
1068 mutex_lock(&hpriv->restore_phase_mutex); in hl_cs_ioctl()
1071 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); in hl_cs_ioctl()
1073 dev_err_ratelimited(hdev->dev, in hl_cs_ioctl()
1074 "Failed to switch to context %d, rejecting CS! %d\n", in hl_cs_ioctl()
1075 ctx->asid, rc); in hl_cs_ioctl()
1078 * while we want to do context-switch (-EBUSY), in hl_cs_ioctl()
1079 * we need to soft-reset because QMAN is in hl_cs_ioctl()
1085 if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) in hl_cs_ioctl()
1087 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ioctl()
1092 hdev->asic_funcs->restore_phase_topology(hdev); in hl_cs_ioctl()
1095 dev_dbg(hdev->dev, in hl_cs_ioctl()
1096 "Need to run restore phase but restore CS is empty\n"); in hl_cs_ioctl()
1103 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ioctl()
1106 dev_err(hdev->dev, in hl_cs_ioctl()
1107 "Failed to submit restore CS for context %d (%d)\n", in hl_cs_ioctl()
1108 ctx->asid, rc); in hl_cs_ioctl()
1115 jiffies_to_usecs(hdev->timeout_jiffies), in hl_cs_ioctl()
1118 dev_err(hdev->dev, in hl_cs_ioctl()
1119 "Restore CS for context %d failed to complete %ld\n", in hl_cs_ioctl()
1120 ctx->asid, ret); in hl_cs_ioctl()
1121 rc = -ENOEXEC; in hl_cs_ioctl()
1126 ctx->thread_ctx_switch_wait_token = 1; in hl_cs_ioctl()
1127 } else if (!ctx->thread_ctx_switch_wait_token) { in hl_cs_ioctl()
1131 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), in hl_cs_ioctl()
1132 100, jiffies_to_usecs(hdev->timeout_jiffies), false); in hl_cs_ioctl()
1134 if (rc == -ETIMEDOUT) { in hl_cs_ioctl()
1135 dev_err(hdev->dev, in hl_cs_ioctl()
1149 if (rc != -EAGAIN) { in hl_cs_ioctl()
1151 args->out.status = rc; in hl_cs_ioctl()
1152 args->out.seq = cs_seq; in hl_cs_ioctl()
1155 if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset)) in hl_cs_ioctl()
1178 if (rc == -EINVAL) in _hl_cs_wait_ioctl()
1179 dev_notice_ratelimited(hdev->dev, in _hl_cs_wait_ioctl()
1180 "Can't wait on CS %llu because current CS is at seq %llu\n", in _hl_cs_wait_ioctl()
1181 seq, ctx->cs_sequence); in _hl_cs_wait_ioctl()
1184 rc = completion_done(&fence->completion); in _hl_cs_wait_ioctl()
1187 &fence->completion, timeout); in _hl_cs_wait_ioctl()
1189 if (fence->error == -ETIMEDOUT) in _hl_cs_wait_ioctl()
1190 rc = -ETIMEDOUT; in _hl_cs_wait_ioctl()
1191 else if (fence->error == -EIO) in _hl_cs_wait_ioctl()
1192 rc = -EIO; in _hl_cs_wait_ioctl()
1196 dev_dbg(hdev->dev, in _hl_cs_wait_ioctl()
1197 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", in _hl_cs_wait_ioctl()
1198 seq, ctx->cs_sequence); in _hl_cs_wait_ioctl()
1209 struct hl_device *hdev = hpriv->hdev; in hl_cs_wait_ioctl()
1211 u64 seq = args->in.seq; in hl_cs_wait_ioctl()
1214 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq); in hl_cs_wait_ioctl()
1219 if (rc == -ERESTARTSYS) { in hl_cs_wait_ioctl()
1220 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
1221 "user process got signal while waiting for CS handle %llu\n", in hl_cs_wait_ioctl()
1223 args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; in hl_cs_wait_ioctl()
1224 rc = -EINTR; in hl_cs_wait_ioctl()
1225 } else if (rc == -ETIMEDOUT) { in hl_cs_wait_ioctl()
1226 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
1227 "CS %llu has timed-out while user process is waiting for it\n", in hl_cs_wait_ioctl()
1229 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; in hl_cs_wait_ioctl()
1230 } else if (rc == -EIO) { in hl_cs_wait_ioctl()
1231 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
1232 "CS %llu has been aborted while user process is waiting for it\n", in hl_cs_wait_ioctl()
1234 args->out.status = HL_WAIT_CS_STATUS_ABORTED; in hl_cs_wait_ioctl()
1240 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_cs_wait_ioctl()
1242 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_cs_wait_ioctl()