Lines Matching +full:cs +full:- +full:0
1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2021 HabanaLabs, Ltd.
23 * enum hl_cs_wait_status - cs wait status
24 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
25 * @CS_WAIT_STATUS_COMPLETED: cs completed
26 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
65 * CS outcome store supports the following operations: in hl_push_cs_outcome()
66 * push outcome - store a recent CS outcome in the store in hl_push_cs_outcome()
67 * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store in hl_push_cs_outcome()
69 * It has a pre-allocated amount of nodes, each node stores in hl_push_cs_outcome()
70 * a single CS outcome. in hl_push_cs_outcome()
84 spin_lock_irqsave(&outcome_store->db_lock, flags); in hl_push_cs_outcome()
86 if (list_empty(&outcome_store->free_list)) { in hl_push_cs_outcome()
87 node = list_last_entry(&outcome_store->used_list, in hl_push_cs_outcome()
89 hash_del(&node->map_link); in hl_push_cs_outcome()
90 dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq); in hl_push_cs_outcome()
92 node = list_last_entry(&outcome_store->free_list, in hl_push_cs_outcome()
96 list_del_init(&node->list_link); in hl_push_cs_outcome()
98 node->seq = seq; in hl_push_cs_outcome()
99 node->ts = ts; in hl_push_cs_outcome()
100 node->error = error; in hl_push_cs_outcome()
102 list_add(&node->list_link, &outcome_store->used_list); in hl_push_cs_outcome()
103 hash_add(outcome_store->outcome_map, &node->map_link, node->seq); in hl_push_cs_outcome()
105 spin_unlock_irqrestore(&outcome_store->db_lock, flags); in hl_push_cs_outcome()
114 spin_lock_irqsave(&outcome_store->db_lock, flags); in hl_pop_cs_outcome()
116 hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq) in hl_pop_cs_outcome()
117 if (node->seq == seq) { in hl_pop_cs_outcome()
118 *ts = node->ts; in hl_pop_cs_outcome()
119 *error = node->error; in hl_pop_cs_outcome()
121 hash_del(&node->map_link); in hl_pop_cs_outcome()
122 list_del_init(&node->list_link); in hl_pop_cs_outcome()
123 list_add(&node->list_link, &outcome_store->free_list); in hl_pop_cs_outcome()
125 spin_unlock_irqrestore(&outcome_store->db_lock, flags); in hl_pop_cs_outcome()
130 spin_unlock_irqrestore(&outcome_store->db_lock, flags); in hl_pop_cs_outcome()
139 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset()
141 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id); in hl_sob_reset()
143 hdev->asic_funcs->reset_sob(hdev, hw_sob); in hl_sob_reset()
145 hw_sob->need_reset = false; in hl_sob_reset()
152 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset_error()
154 dev_crit(hdev->dev, in hl_sob_reset_error()
156 hw_sob->q_idx, hw_sob->sob_id); in hl_sob_reset_error()
162 kref_put(&hw_sob->kref, hl_sob_reset); in hw_sob_put()
168 kref_put(&hw_sob->kref, hl_sob_reset_error); in hw_sob_put_err()
174 kref_get(&hw_sob->kref); in hw_sob_get()
178 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
183 * Return: 0 if given parameters are valid
189 if (sob_mask == 0) in hl_gen_sob_mask()
190 return -EINVAL; in hl_gen_sob_mask()
192 if (sob_mask == 0x1) { in hl_gen_sob_mask()
193 *mask = ~(1 << (sob_base & 0x7)); in hl_gen_sob_mask()
196 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) in hl_gen_sob_mask()
200 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) in hl_gen_sob_mask()
201 return -EINVAL; in hl_gen_sob_mask()
206 return 0; in hl_gen_sob_mask()
223 kref_put(&fence->refcount, hl_fence_release); in hl_fence_put()
230 for (i = 0; i < len; i++, fence++) in hl_fences_put()
237 kref_get(&fence->refcount); in hl_fence_get()
242 kref_init(&fence->refcount); in hl_fence_init()
243 fence->cs_sequence = sequence; in hl_fence_init()
244 fence->error = 0; in hl_fence_init()
245 fence->timestamp = ktime_set(0, 0); in hl_fence_init()
246 fence->mcs_handling_done = false; in hl_fence_init()
247 init_completion(&fence->completion); in hl_fence_init()
250 void cs_get(struct hl_cs *cs) in cs_get() argument
252 kref_get(&cs->refcount); in cs_get()
255 static int cs_get_unless_zero(struct hl_cs *cs) in cs_get_unless_zero() argument
257 return kref_get_unless_zero(&cs->refcount); in cs_get_unless_zero()
260 static void cs_put(struct hl_cs *cs) in cs_put() argument
262 kref_put(&cs->refcount, cs_do_release); in cs_put()
274 kref_put(&job->refcount, cs_job_do_release); in hl_cs_job_put()
277 bool cs_needs_completion(struct hl_cs *cs) in cs_needs_completion() argument
279 /* In case this is a staged CS, only the last CS in sequence should in cs_needs_completion()
280 * get a completion, any non staged CS will always get a completion in cs_needs_completion()
282 if (cs->staged_cs && !cs->staged_last) in cs_needs_completion()
288 bool cs_needs_timeout(struct hl_cs *cs) in cs_needs_timeout() argument
290 /* In case this is a staged CS, only the first CS in sequence should in cs_needs_timeout()
291 * get a timeout, any non staged CS will always get a timeout in cs_needs_timeout()
293 if (cs->staged_cs && !cs->staged_first) in cs_needs_timeout()
302 return (job->queue_type == QUEUE_TYPE_EXT); in is_cb_patched()
306 * cs_parser - parse the user command submission
318 struct hl_device *hdev = hpriv->hdev; in cs_parser()
322 parser.ctx_id = job->cs->ctx->asid; in cs_parser()
323 parser.cs_sequence = job->cs->sequence; in cs_parser()
324 parser.job_id = job->id; in cs_parser()
326 parser.hw_queue_id = job->hw_queue_id; in cs_parser()
327 parser.job_userptr_list = &job->userptr_list; in cs_parser()
329 parser.user_cb = job->user_cb; in cs_parser()
330 parser.user_cb_size = job->user_cb_size; in cs_parser()
331 parser.queue_type = job->queue_type; in cs_parser()
332 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; in cs_parser()
333 job->patched_cb = NULL; in cs_parser()
334 parser.completion = cs_needs_completion(job->cs); in cs_parser()
336 rc = hdev->asic_funcs->cs_parser(hdev, &parser); in cs_parser()
340 job->patched_cb = parser.patched_cb; in cs_parser()
341 job->job_cb_size = parser.patched_cb_size; in cs_parser()
342 job->contains_dma_pkt = parser.contains_dma_pkt; in cs_parser()
343 atomic_inc(&job->patched_cb->cs_cnt); in cs_parser()
349 * won't be accessed again for this CS in cs_parser()
351 atomic_dec(&job->user_cb->cs_cnt); in cs_parser()
352 hl_cb_put(job->user_cb); in cs_parser()
353 job->user_cb = NULL; in cs_parser()
355 job->job_cb_size = job->user_cb_size; in cs_parser()
363 struct hl_cs *cs = job->cs; in hl_complete_job() local
366 hl_userptr_delete_list(hdev, &job->userptr_list); in hl_complete_job()
372 if (job->patched_cb) { in hl_complete_job()
373 atomic_dec(&job->patched_cb->cs_cnt); in hl_complete_job()
374 hl_cb_put(job->patched_cb); in hl_complete_job()
383 if (job->is_kernel_allocated_cb && in hl_complete_job()
384 (job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) { in hl_complete_job()
385 atomic_dec(&job->user_cb->cs_cnt); in hl_complete_job()
386 hl_cb_put(job->user_cb); in hl_complete_job()
393 spin_lock(&cs->job_lock); in hl_complete_job()
394 list_del(&job->cs_node); in hl_complete_job()
395 spin_unlock(&cs->job_lock); in hl_complete_job()
399 /* We decrement reference only for a CS that gets completion in hl_complete_job()
400 * because the reference was incremented only for this kind of CS in hl_complete_job()
403 * In staged submission, only the last CS marked as 'staged_last' in hl_complete_job()
405 * As for all the rest CS's in the staged submission which do not get in hl_complete_job()
406 * completion, their CS reference will be decremented by the in hl_complete_job()
407 * 'staged_last' CS during the CS release flow. in hl_complete_job()
408 * All relevant PQ CI counters will be incremented during the CS release in hl_complete_job()
411 if (cs_needs_completion(cs) && in hl_complete_job()
412 (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) { in hl_complete_job()
414 /* In CS based completions, the timestamp is already available, in hl_complete_job()
417 if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB) in hl_complete_job()
418 cs->completion_timestamp = job->timestamp; in hl_complete_job()
420 cs_put(cs); in hl_complete_job()
427 * hl_staged_cs_find_first - locate the first CS in this staged submission
432 * @note: This function must be called under 'hdev->cs_mirror_lock'
434 * Find and return a CS pointer with the given sequence
438 struct hl_cs *cs; in hl_staged_cs_find_first() local
440 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) in hl_staged_cs_find_first()
441 if (cs->staged_cs && cs->staged_first && in hl_staged_cs_find_first()
442 cs->sequence == cs_seq) in hl_staged_cs_find_first()
443 return cs; in hl_staged_cs_find_first()
449 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
452 * @cs: staged submission member
455 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) in is_staged_cs_last_exists() argument
459 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, in is_staged_cs_last_exists()
462 if (last_entry->staged_last) in is_staged_cs_last_exists()
469 * staged_cs_get - get CS reference if this CS is a part of a staged CS
472 * @cs: current CS
475 * Increment CS reference for every CS in this staged submission except for
476 * the CS which get completion.
478 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) in staged_cs_get() argument
480 /* Only the last CS in this staged submission will get a completion. in staged_cs_get()
481 * We must increment the reference for all other CS's in this in staged_cs_get()
485 if (!cs->staged_last) in staged_cs_get()
486 cs_get(cs); in staged_cs_get()
490 * staged_cs_put - put a CS in case it is part of staged submission
493 * @cs: CS to put
495 * This function decrements a CS reference (for a non completion CS)
497 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) in staged_cs_put() argument
499 /* We release all CS's in a staged submission except the last in staged_cs_put()
500 * CS which we have never incremented its reference. in staged_cs_put()
502 if (!cs_needs_completion(cs)) in staged_cs_put()
503 cs_put(cs); in staged_cs_put()
506 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) in cs_handle_tdr() argument
510 if (!cs_needs_timeout(cs)) in cs_handle_tdr()
513 spin_lock(&hdev->cs_mirror_lock); in cs_handle_tdr()
516 * Hence, we choose the CS that reaches this function first which is in cs_handle_tdr()
517 * the CS marked as 'staged_last'. in cs_handle_tdr()
518 * In case single staged cs was submitted which has both first and last in cs_handle_tdr()
520 * removed the cs node from the list before getting here, in cs_handle_tdr()
521 * in such cases just continue with the cs to cancel it's TDR work. in cs_handle_tdr()
523 if (cs->staged_cs && cs->staged_last) { in cs_handle_tdr()
524 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); in cs_handle_tdr()
526 cs = first_cs; in cs_handle_tdr()
529 spin_unlock(&hdev->cs_mirror_lock); in cs_handle_tdr()
531 /* Don't cancel TDR in case this CS was timedout because we might be in cs_handle_tdr()
534 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) in cs_handle_tdr()
537 if (cs->tdr_active) in cs_handle_tdr()
538 cancel_delayed_work_sync(&cs->work_tdr); in cs_handle_tdr()
540 spin_lock(&hdev->cs_mirror_lock); in cs_handle_tdr()
542 /* queue TDR for next CS */ in cs_handle_tdr()
543 list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) in cs_handle_tdr()
549 if (next && !next->tdr_active) { in cs_handle_tdr()
550 next->tdr_active = true; in cs_handle_tdr()
551 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); in cs_handle_tdr()
554 spin_unlock(&hdev->cs_mirror_lock); in cs_handle_tdr()
558 * force_complete_multi_cs - complete all contexts that wait on multi-CS
566 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in force_complete_multi_cs()
569 mcs_compl = &hdev->multi_cs_completion[i]; in force_complete_multi_cs()
571 spin_lock(&mcs_compl->lock); in force_complete_multi_cs()
573 if (!mcs_compl->used) { in force_complete_multi_cs()
574 spin_unlock(&mcs_compl->lock); in force_complete_multi_cs()
579 * multi-cS. in force_complete_multi_cs()
583 dev_err(hdev->dev, in force_complete_multi_cs()
584 "multi-CS completion context %d still waiting when calling force completion\n", in force_complete_multi_cs()
586 complete_all(&mcs_compl->completion); in force_complete_multi_cs()
587 spin_unlock(&mcs_compl->lock); in force_complete_multi_cs()
592 * complete_multi_cs - complete all waiting entities on multi-CS
595 * @cs: CS structure
597 * with the completed CS.
599 * - a completed CS worked on stream master QID 4, multi CS completion
602 * - a completed CS worked on stream master QID 4, multi CS completion
606 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) in complete_multi_cs() argument
608 struct hl_fence *fence = cs->fence; in complete_multi_cs()
611 /* in case of multi CS check for completion only for the first CS */ in complete_multi_cs()
612 if (cs->staged_cs && !cs->staged_first) in complete_multi_cs()
615 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in complete_multi_cs()
618 mcs_compl = &hdev->multi_cs_completion[i]; in complete_multi_cs()
619 if (!mcs_compl->used) in complete_multi_cs()
622 spin_lock(&mcs_compl->lock); in complete_multi_cs()
627 * 2. the completed CS has at least one overlapping stream in complete_multi_cs()
630 if (mcs_compl->used && in complete_multi_cs()
631 (fence->stream_master_qid_map & in complete_multi_cs()
632 mcs_compl->stream_master_qid_map)) { in complete_multi_cs()
633 /* extract the timestamp only of first completed CS */ in complete_multi_cs()
634 if (!mcs_compl->timestamp) in complete_multi_cs()
635 mcs_compl->timestamp = ktime_to_ns(fence->timestamp); in complete_multi_cs()
637 complete_all(&mcs_compl->completion); in complete_multi_cs()
643 * least one CS will be set as completed when polling in complete_multi_cs()
646 fence->mcs_handling_done = true; in complete_multi_cs()
649 spin_unlock(&mcs_compl->lock); in complete_multi_cs()
651 /* In case CS completed without mcs completion initialized */ in complete_multi_cs()
652 fence->mcs_handling_done = true; in complete_multi_cs()
656 struct hl_cs *cs, in cs_release_sob_reset_handler() argument
659 /* Skip this handler if the cs wasn't submitted, to avoid putting in cs_release_sob_reset_handler()
663 if (!hl_cs_cmpl->hw_sob || !cs->submitted) in cs_release_sob_reset_handler()
666 spin_lock(&hl_cs_cmpl->lock); in cs_release_sob_reset_handler()
669 * we get refcount upon reservation of signals or signal/wait cs for the in cs_release_sob_reset_handler()
670 * hw_sob object, and need to put it when the first staged cs in cs_release_sob_reset_handler()
671 * (which contains the encaps signals) or cs signal/wait is completed. in cs_release_sob_reset_handler()
673 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || in cs_release_sob_reset_handler()
674 (hl_cs_cmpl->type == CS_TYPE_WAIT) || in cs_release_sob_reset_handler()
675 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || in cs_release_sob_reset_handler()
676 (!!hl_cs_cmpl->encaps_signals)) { in cs_release_sob_reset_handler()
677 dev_dbg(hdev->dev, in cs_release_sob_reset_handler()
678 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n", in cs_release_sob_reset_handler()
679 hl_cs_cmpl->cs_seq, in cs_release_sob_reset_handler()
680 hl_cs_cmpl->type, in cs_release_sob_reset_handler()
681 hl_cs_cmpl->hw_sob->sob_id, in cs_release_sob_reset_handler()
682 hl_cs_cmpl->sob_val); in cs_release_sob_reset_handler()
684 hw_sob_put(hl_cs_cmpl->hw_sob); in cs_release_sob_reset_handler()
686 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) in cs_release_sob_reset_handler()
687 hdev->asic_funcs->reset_sob_group(hdev, in cs_release_sob_reset_handler()
688 hl_cs_cmpl->sob_group); in cs_release_sob_reset_handler()
691 spin_unlock(&hl_cs_cmpl->lock); in cs_release_sob_reset_handler()
696 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); in cs_do_release() local
697 struct hl_device *hdev = cs->ctx->hdev; in cs_do_release()
700 container_of(cs->fence, struct hl_cs_compl, base_fence); in cs_do_release()
702 cs->completed = true; in cs_do_release()
706 * finished, because each one of them took refcnt to CS, we still in cs_do_release()
708 * will have leaked memory and what's worse, the CS object (and in cs_do_release()
712 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_do_release()
715 if (!cs->submitted) { in cs_do_release()
717 * In case the wait for signal CS was submitted, the fence put in cs_do_release()
721 if (cs->type == CS_TYPE_WAIT || in cs_do_release()
722 cs->type == CS_TYPE_COLLECTIVE_WAIT) in cs_do_release()
723 hl_fence_put(cs->signal_fence); in cs_do_release()
729 hl_hw_queue_update_ci(cs); in cs_do_release()
731 /* remove CS from CS mirror list */ in cs_do_release()
732 spin_lock(&hdev->cs_mirror_lock); in cs_do_release()
733 list_del_init(&cs->mirror_node); in cs_do_release()
734 spin_unlock(&hdev->cs_mirror_lock); in cs_do_release()
736 cs_handle_tdr(hdev, cs); in cs_do_release()
738 if (cs->staged_cs) { in cs_do_release()
739 /* the completion CS decrements reference for the entire in cs_do_release()
742 if (cs->staged_last) { in cs_do_release()
746 &cs->staged_cs_node, staged_cs_node) in cs_do_release()
750 /* A staged CS will be a member in the list only after it in cs_do_release()
754 if (cs->submitted) { in cs_do_release()
755 spin_lock(&hdev->cs_mirror_lock); in cs_do_release()
756 list_del(&cs->staged_cs_node); in cs_do_release()
757 spin_unlock(&hdev->cs_mirror_lock); in cs_do_release()
760 /* decrement refcount to handle when first staged cs in cs_do_release()
763 if (hl_cs_cmpl->encaps_signals) in cs_do_release()
764 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, in cs_do_release()
768 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals) in cs_do_release()
769 kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); in cs_do_release()
775 hl_debugfs_remove_cs(cs); in cs_do_release()
777 hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL; in cs_do_release()
783 if (cs->timedout) in cs_do_release()
784 cs->fence->error = -ETIMEDOUT; in cs_do_release()
785 else if (cs->aborted) in cs_do_release()
786 cs->fence->error = -EIO; in cs_do_release()
787 else if (!cs->submitted) in cs_do_release()
788 cs->fence->error = -EBUSY; in cs_do_release()
790 if (unlikely(cs->skip_reset_on_timeout)) { in cs_do_release()
791 dev_err(hdev->dev, in cs_do_release()
793 cs->sequence, in cs_do_release()
794 div_u64(jiffies - cs->submission_time_jiffies, HZ)); in cs_do_release()
797 if (cs->timestamp) { in cs_do_release()
798 cs->fence->timestamp = cs->completion_timestamp; in cs_do_release()
799 hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence, in cs_do_release()
800 cs->fence->timestamp, cs->fence->error); in cs_do_release()
803 hl_ctx_put(cs->ctx); in cs_do_release()
805 complete_all(&cs->fence->completion); in cs_do_release()
806 complete_multi_cs(hdev, cs); in cs_do_release()
808 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); in cs_do_release()
810 hl_fence_put(cs->fence); in cs_do_release()
812 kfree(cs->jobs_in_queue_cnt); in cs_do_release()
813 kfree(cs); in cs_do_release()
818 struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); in cs_timedout() local
821 u64 event_mask = 0x0; in cs_timedout()
825 skip_reset_on_timeout = cs->skip_reset_on_timeout; in cs_timedout()
827 rc = cs_get_unless_zero(cs); in cs_timedout()
831 if ((!cs->submitted) || (cs->completed)) { in cs_timedout()
832 cs_put(cs); in cs_timedout()
836 hdev = cs->ctx->hdev; in cs_timedout()
839 if (hdev->reset_on_lockup) in cs_timedout()
842 hdev->reset_info.needs_reset = true; in cs_timedout()
844 /* Mark the CS is timed out so we won't try to cancel its TDR */ in cs_timedout()
845 cs->timedout = true; in cs_timedout()
848 /* Save only the first CS timeout parameters */ in cs_timedout()
849 rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); in cs_timedout()
851 hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); in cs_timedout()
852 hdev->captured_err_info.cs_timeout.seq = cs->sequence; in cs_timedout()
856 timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000; in cs_timedout()
858 switch (cs->type) { in cs_timedout()
860 dev_err(hdev->dev, in cs_timedout()
862 cs->sequence, timeout_sec); in cs_timedout()
866 dev_err(hdev->dev, in cs_timedout()
868 cs->sequence, timeout_sec); in cs_timedout()
872 dev_err(hdev->dev, in cs_timedout()
874 cs->sequence, timeout_sec); in cs_timedout()
878 dev_err(hdev->dev, in cs_timedout()
880 cs->sequence, timeout_sec); in cs_timedout()
886 dev_err(hdev->dev, "Error during system state dump %d\n", rc); in cs_timedout()
888 cs_put(cs); in cs_timedout()
905 struct hl_cs *cs; in allocate_cs() local
908 cntr = &hdev->aggregated_cs_counters; in allocate_cs()
910 cs = kzalloc(sizeof(*cs), GFP_ATOMIC); in allocate_cs()
911 if (!cs) in allocate_cs()
912 cs = kzalloc(sizeof(*cs), GFP_KERNEL); in allocate_cs()
914 if (!cs) { in allocate_cs()
915 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
916 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
917 return -ENOMEM; in allocate_cs()
923 cs->ctx = ctx; in allocate_cs()
924 cs->submitted = false; in allocate_cs()
925 cs->completed = false; in allocate_cs()
926 cs->type = cs_type; in allocate_cs()
927 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); in allocate_cs()
928 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); in allocate_cs()
929 cs->timeout_jiffies = timeout; in allocate_cs()
930 cs->skip_reset_on_timeout = in allocate_cs()
931 hdev->reset_info.skip_reset_on_timeout || in allocate_cs()
933 cs->submission_time_jiffies = jiffies; in allocate_cs()
934 INIT_LIST_HEAD(&cs->job_list); in allocate_cs()
935 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); in allocate_cs()
936 kref_init(&cs->refcount); in allocate_cs()
937 spin_lock_init(&cs->job_lock); in allocate_cs()
944 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
945 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
946 rc = -ENOMEM; in allocate_cs()
950 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, in allocate_cs()
951 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); in allocate_cs()
952 if (!cs->jobs_in_queue_cnt) in allocate_cs()
953 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, in allocate_cs()
954 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); in allocate_cs()
956 if (!cs->jobs_in_queue_cnt) { in allocate_cs()
957 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
958 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
959 rc = -ENOMEM; in allocate_cs()
963 cs_cmpl->hdev = hdev; in allocate_cs()
964 cs_cmpl->type = cs->type; in allocate_cs()
965 spin_lock_init(&cs_cmpl->lock); in allocate_cs()
966 cs->fence = &cs_cmpl->base_fence; in allocate_cs()
968 spin_lock(&ctx->cs_lock); in allocate_cs()
970 cs_cmpl->cs_seq = ctx->cs_sequence; in allocate_cs()
971 other = ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
972 (hdev->asic_prop.max_pending_cs - 1)]; in allocate_cs()
974 if (other && !completion_done(&other->completion)) { in allocate_cs()
979 * This causes a deadlock because this CS will never be in allocate_cs()
980 * completed as it depends on future CS's for completion. in allocate_cs()
982 if (other->cs_sequence == user_sequence) in allocate_cs()
983 dev_crit_ratelimited(hdev->dev, in allocate_cs()
984 "Staged CS %llu deadlock due to lack of resources", in allocate_cs()
987 dev_dbg_ratelimited(hdev->dev, in allocate_cs()
988 "Rejecting CS because of too many in-flights CS\n"); in allocate_cs()
989 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt); in allocate_cs()
990 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); in allocate_cs()
991 rc = -EAGAIN; in allocate_cs()
996 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); in allocate_cs()
998 cs->sequence = cs_cmpl->cs_seq; in allocate_cs()
1000 ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
1001 (hdev->asic_prop.max_pending_cs - 1)] = in allocate_cs()
1002 &cs_cmpl->base_fence; in allocate_cs()
1003 ctx->cs_sequence++; in allocate_cs()
1005 hl_fence_get(&cs_cmpl->base_fence); in allocate_cs()
1009 spin_unlock(&ctx->cs_lock); in allocate_cs()
1011 *cs_new = cs; in allocate_cs()
1013 return 0; in allocate_cs()
1016 spin_unlock(&ctx->cs_lock); in allocate_cs()
1017 kfree(cs->jobs_in_queue_cnt); in allocate_cs()
1021 kfree(cs); in allocate_cs()
1026 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) in cs_rollback() argument
1030 staged_cs_put(hdev, cs); in cs_rollback()
1032 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_rollback()
1037 * release_reserved_encaps_signals() - release reserved encapsulated signals.
1040 * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
1041 * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
1055 mgr = &ctx->sig_mgr; in release_reserved_encaps_signals()
1057 idr_for_each_entry(&mgr->handles, handle, id) in release_reserved_encaps_signals()
1058 if (handle->cs_seq == ULLONG_MAX) in release_reserved_encaps_signals()
1059 kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx); in release_reserved_encaps_signals()
1067 struct hl_cs *cs, *tmp; in hl_cs_rollback_all() local
1070 flush_workqueue(hdev->ts_free_obj_wq); in hl_cs_rollback_all()
1072 /* flush all completions before iterating over the CS mirror list in in hl_cs_rollback_all()
1075 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) in hl_cs_rollback_all()
1076 flush_workqueue(hdev->cq_wq[i]); in hl_cs_rollback_all()
1078 flush_workqueue(hdev->cs_cmplt_wq); in hl_cs_rollback_all()
1081 /* Make sure we don't have leftovers in the CS mirror list */ in hl_cs_rollback_all()
1082 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { in hl_cs_rollback_all()
1083 cs_get(cs); in hl_cs_rollback_all()
1084 cs->aborted = true; in hl_cs_rollback_all()
1085 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", in hl_cs_rollback_all()
1086 cs->ctx->asid, cs->sequence); in hl_cs_rollback_all()
1087 cs_rollback(hdev, cs); in hl_cs_rollback_all()
1088 cs_put(cs); in hl_cs_rollback_all()
1102 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in wake_pending_user_interrupt_threads()
1103 list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) { in wake_pending_user_interrupt_threads()
1104 pend->fence.error = -EIO; in wake_pending_user_interrupt_threads()
1105 complete_all(&pend->fence.completion); in wake_pending_user_interrupt_threads()
1107 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in wake_pending_user_interrupt_threads()
1109 spin_lock_irqsave(&interrupt->ts_list_lock, flags); in wake_pending_user_interrupt_threads()
1110 list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) { in wake_pending_user_interrupt_threads()
1111 list_del(&pend->list_node); in wake_pending_user_interrupt_threads()
1112 hl_mmap_mem_buf_put(pend->ts_reg_info.buf); in wake_pending_user_interrupt_threads()
1113 hl_cb_put(pend->ts_reg_info.cq_cb); in wake_pending_user_interrupt_threads()
1115 spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); in wake_pending_user_interrupt_threads()
1120 struct asic_fixed_properties *prop = &hdev->asic_prop; in hl_release_pending_user_interrupts()
1124 if (!prop->user_interrupt_count) in hl_release_pending_user_interrupts()
1134 for (i = 0 ; i < prop->user_interrupt_count ; i++) { in hl_release_pending_user_interrupts()
1135 interrupt = &hdev->user_interrupt[i]; in hl_release_pending_user_interrupts()
1139 interrupt = &hdev->common_user_cq_interrupt; in hl_release_pending_user_interrupts()
1142 interrupt = &hdev->common_decoder_interrupt; in hl_release_pending_user_interrupts()
1148 struct hl_cs *cs; in force_complete_cs() local
1150 spin_lock(&hdev->cs_mirror_lock); in force_complete_cs()
1152 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) { in force_complete_cs()
1153 cs->fence->error = -EIO; in force_complete_cs()
1154 complete_all(&cs->fence->completion); in force_complete_cs()
1157 spin_unlock(&hdev->cs_mirror_lock); in force_complete_cs()
1170 struct hl_cs *cs = job->cs; in job_wq_completion() local
1171 struct hl_device *hdev = cs->ctx->hdev; in job_wq_completion()
1179 struct hl_cs *cs = container_of(work, struct hl_cs, finish_work); in cs_completion() local
1180 struct hl_device *hdev = cs->ctx->hdev; in cs_completion()
1183 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_completion()
1189 u32 active_cs_num = 0; in hl_get_active_cs_num()
1190 struct hl_cs *cs; in hl_get_active_cs_num() local
1192 spin_lock(&hdev->cs_mirror_lock); in hl_get_active_cs_num()
1194 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) in hl_get_active_cs_num()
1195 if (!cs->completed) in hl_get_active_cs_num()
1198 spin_unlock(&hdev->cs_mirror_lock); in hl_get_active_cs_num()
1208 struct asic_fixed_properties *asic = &hdev->asic_prop; in validate_queue_index()
1211 /* This must be checked here to prevent out-of-bounds access to in validate_queue_index()
1214 if (chunk->queue_index >= asic->max_queues) { in validate_queue_index()
1215 dev_err(hdev->dev, "Queue index %d is invalid\n", in validate_queue_index()
1216 chunk->queue_index); in validate_queue_index()
1217 return -EINVAL; in validate_queue_index()
1220 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; in validate_queue_index()
1222 if (hw_queue_prop->type == QUEUE_TYPE_NA) { in validate_queue_index()
1223 dev_err(hdev->dev, "Queue index %d is not applicable\n", in validate_queue_index()
1224 chunk->queue_index); in validate_queue_index()
1225 return -EINVAL; in validate_queue_index()
1228 if (hw_queue_prop->binned) { in validate_queue_index()
1229 dev_err(hdev->dev, "Queue index %d is binned out\n", in validate_queue_index()
1230 chunk->queue_index); in validate_queue_index()
1231 return -EINVAL; in validate_queue_index()
1234 if (hw_queue_prop->driver_only) { in validate_queue_index()
1235 dev_err(hdev->dev, in validate_queue_index()
1237 chunk->queue_index); in validate_queue_index()
1238 return -EINVAL; in validate_queue_index()
1244 if (hw_queue_prop->type == QUEUE_TYPE_HW) { in validate_queue_index()
1245 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { in validate_queue_index()
1246 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { in validate_queue_index()
1247 dev_err(hdev->dev, in validate_queue_index()
1249 chunk->queue_index); in validate_queue_index()
1250 return -EINVAL; in validate_queue_index()
1255 if (!(hw_queue_prop->cb_alloc_flags & in validate_queue_index()
1257 dev_err(hdev->dev, in validate_queue_index()
1259 chunk->queue_index); in validate_queue_index()
1260 return -EINVAL; in validate_queue_index()
1266 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags in validate_queue_index()
1270 *queue_type = hw_queue_prop->type; in validate_queue_index()
1271 return 0; in validate_queue_index()
1280 cb = hl_cb_get(mmg, chunk->cb_handle); in get_cb_from_cs_chunk()
1282 dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle); in get_cb_from_cs_chunk()
1286 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { in get_cb_from_cs_chunk()
1287 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); in get_cb_from_cs_chunk()
1291 atomic_inc(&cb->cs_cnt); in get_cb_from_cs_chunk()
1312 kref_init(&job->refcount); in hl_cs_allocate_job()
1313 job->queue_type = queue_type; in hl_cs_allocate_job()
1314 job->is_kernel_allocated_cb = is_kernel_allocated_cb; in hl_cs_allocate_job()
1317 INIT_LIST_HEAD(&job->userptr_list); in hl_cs_allocate_job()
1319 if (job->queue_type == QUEUE_TYPE_EXT) in hl_cs_allocate_job()
1320 INIT_WORK(&job->finish_work, job_wq_completion); in hl_cs_allocate_job()
1349 struct hl_device *hdev = hpriv->hdev; in hl_cs_sanity_checks()
1350 struct hl_ctx *ctx = hpriv->ctx; in hl_cs_sanity_checks()
1357 for (i = 0 ; i < sizeof(args->in.pad) ; i++) in hl_cs_sanity_checks()
1358 if (args->in.pad[i]) { in hl_cs_sanity_checks()
1359 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); in hl_cs_sanity_checks()
1360 return -EINVAL; in hl_cs_sanity_checks()
1364 return -EBUSY; in hl_cs_sanity_checks()
1367 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && in hl_cs_sanity_checks()
1368 !hdev->supports_staged_submission) { in hl_cs_sanity_checks()
1369 dev_err(hdev->dev, "staged submission not supported"); in hl_cs_sanity_checks()
1370 return -EPERM; in hl_cs_sanity_checks()
1373 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; in hl_cs_sanity_checks()
1376 dev_err(hdev->dev, in hl_cs_sanity_checks()
1377 "CS type flags are mutually exclusive, context %d\n", in hl_cs_sanity_checks()
1378 ctx->asid); in hl_cs_sanity_checks()
1379 return -EINVAL; in hl_cs_sanity_checks()
1383 num_chunks = args->in.num_chunks_execute; in hl_cs_sanity_checks()
1388 if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { in hl_cs_sanity_checks()
1389 dev_err(hdev->dev, "Sync stream CS is not supported\n"); in hl_cs_sanity_checks()
1390 return -EINVAL; in hl_cs_sanity_checks()
1395 dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); in hl_cs_sanity_checks()
1396 return -EINVAL; in hl_cs_sanity_checks()
1399 dev_err(hdev->dev, in hl_cs_sanity_checks()
1400 "Sync stream CS mandates one chunk only, context %d\n", in hl_cs_sanity_checks()
1401 ctx->asid); in hl_cs_sanity_checks()
1402 return -EINVAL; in hl_cs_sanity_checks()
1405 return 0; in hl_cs_sanity_checks()
1416 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1417 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1418 dev_err(hdev->dev, in hl_cs_copy_chunk_array()
1421 return -EINVAL; in hl_cs_copy_chunk_array()
1430 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in hl_cs_copy_chunk_array()
1431 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); in hl_cs_copy_chunk_array()
1432 return -ENOMEM; in hl_cs_copy_chunk_array()
1437 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1438 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1439 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); in hl_cs_copy_chunk_array()
1441 return -EFAULT; in hl_cs_copy_chunk_array()
1444 return 0; in hl_cs_copy_chunk_array()
1447 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, in cs_staged_submission() argument
1452 return 0; in cs_staged_submission()
1454 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST); in cs_staged_submission()
1455 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST); in cs_staged_submission()
1457 if (cs->staged_first) { in cs_staged_submission()
1458 /* Staged CS sequence is the first CS sequence */ in cs_staged_submission()
1459 INIT_LIST_HEAD(&cs->staged_cs_node); in cs_staged_submission()
1460 cs->staged_sequence = cs->sequence; in cs_staged_submission()
1462 if (cs->encaps_signals) in cs_staged_submission()
1463 cs->encaps_sig_hdl_id = encaps_signal_handle; in cs_staged_submission()
1468 cs->staged_sequence = sequence; in cs_staged_submission()
1471 /* Increment CS reference if needed */ in cs_staged_submission()
1472 staged_cs_get(hdev, cs); in cs_staged_submission()
1474 cs->staged_cs = true; in cs_staged_submission()
1476 return 0; in cs_staged_submission()
1483 for (i = 0; i < hdev->stream_master_qid_arr_size; i++) in get_stream_master_qid_mask()
1484 if (qid == hdev->stream_master_qid_arr[i]) in get_stream_master_qid_mask()
1487 return 0; in get_stream_master_qid_mask()
1496 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_default()
1499 struct hl_ctx *ctx = hpriv->ctx; in cs_ioctl_default()
1501 struct hl_cs *cs; in cs_ioctl_default() local
1504 u8 stream_master_qid_map = 0; in cs_ioctl_default()
1507 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_default()
1512 hpriv->ctx); in cs_ioctl_default()
1522 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, in cs_ioctl_default()
1523 staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, in cs_ioctl_default()
1528 *cs_seq = cs->sequence; in cs_ioctl_default()
1530 hl_debugfs_add_cs(cs); in cs_ioctl_default()
1532 rc = cs_staged_submission(hdev, cs, user_sequence, flags, in cs_ioctl_default()
1538 * rather than the internal CS sequence in cs_ioctl_default()
1540 if (cs->staged_cs) in cs_ioctl_default()
1541 *cs_seq = cs->staged_sequence; in cs_ioctl_default()
1543 /* Validate ALL the CS chunks before submitting the CS */ in cs_ioctl_default()
1544 for (i = 0 ; i < num_chunks ; i++) { in cs_ioctl_default()
1552 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1553 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1558 cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk); in cs_ioctl_default()
1561 &ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1562 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1563 rc = -EINVAL; in cs_ioctl_default()
1567 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; in cs_ioctl_default()
1576 * queues of this CS in cs_ioctl_default()
1578 if (hdev->supports_wait_for_multi_cs) in cs_ioctl_default()
1581 chunk->queue_index); in cs_ioctl_default()
1590 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_default()
1591 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_default()
1592 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_default()
1593 rc = -ENOMEM; in cs_ioctl_default()
1600 job->id = i + 1; in cs_ioctl_default()
1601 job->cs = cs; in cs_ioctl_default()
1602 job->user_cb = cb; in cs_ioctl_default()
1603 job->user_cb_size = chunk->cb_size; in cs_ioctl_default()
1604 job->hw_queue_id = chunk->queue_index; in cs_ioctl_default()
1606 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_default()
1607 cs->jobs_cnt++; in cs_ioctl_default()
1609 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_default()
1612 * Increment CS reference. When CS reference is 0, CS is in cs_ioctl_default()
1617 if (cs_needs_completion(cs) && in cs_ioctl_default()
1618 (job->queue_type == QUEUE_TYPE_EXT || in cs_ioctl_default()
1619 job->queue_type == QUEUE_TYPE_HW)) in cs_ioctl_default()
1620 cs_get(cs); in cs_ioctl_default()
1626 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); in cs_ioctl_default()
1627 atomic64_inc(&cntr->parsing_drop_cnt); in cs_ioctl_default()
1628 dev_err(hdev->dev, in cs_ioctl_default()
1629 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", in cs_ioctl_default()
1630 cs->ctx->asid, cs->sequence, job->id, rc); in cs_ioctl_default()
1635 /* We allow a CS with any queue type combination as long as it does in cs_ioctl_default()
1638 if (int_queues_only && cs_needs_completion(cs)) { in cs_ioctl_default()
1639 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1640 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1641 dev_err(hdev->dev, in cs_ioctl_default()
1642 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n", in cs_ioctl_default()
1643 cs->ctx->asid, cs->sequence); in cs_ioctl_default()
1644 rc = -EINVAL; in cs_ioctl_default()
1649 INIT_WORK(&cs->finish_work, cs_completion); in cs_ioctl_default()
1652 * store the (external/HW queues) streams used by the CS in the in cs_ioctl_default()
1653 * fence object for multi-CS completion in cs_ioctl_default()
1655 if (hdev->supports_wait_for_multi_cs) in cs_ioctl_default()
1656 cs->fence->stream_master_qid_map = stream_master_qid_map; in cs_ioctl_default()
1658 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_default()
1660 if (rc != -EAGAIN) in cs_ioctl_default()
1661 dev_err(hdev->dev, in cs_ioctl_default()
1662 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_default()
1663 cs->ctx->asid, cs->sequence, rc); in cs_ioctl_default()
1667 *signal_initial_sob_count = cs->initial_sob_count; in cs_ioctl_default()
1673 atomic_dec(&cb->cs_cnt); in cs_ioctl_default()
1676 cs_rollback(hdev, cs); in cs_ioctl_default()
1680 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_default()
1681 cs_put(cs); in cs_ioctl_default()
1691 struct hl_device *hdev = hpriv->hdev; in hl_cs_ctx_switch()
1692 struct hl_ctx *ctx = hpriv->ctx; in hl_cs_ctx_switch()
1694 int rc = 0, do_ctx_switch = 0; in hl_cs_ctx_switch()
1700 if (hdev->supports_ctx_switch) in hl_cs_ctx_switch()
1701 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); in hl_cs_ctx_switch()
1703 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { in hl_cs_ctx_switch()
1704 mutex_lock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1707 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); in hl_cs_ctx_switch()
1709 dev_err_ratelimited(hdev->dev, in hl_cs_ctx_switch()
1710 "Failed to switch to context %d, rejecting CS! %d\n", in hl_cs_ctx_switch()
1711 ctx->asid, rc); in hl_cs_ctx_switch()
1714 * while we want to do context-switch (-EBUSY), in hl_cs_ctx_switch()
1715 * we need to soft-reset because QMAN is in hl_cs_ctx_switch()
1721 if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) in hl_cs_ctx_switch()
1723 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1728 hdev->asic_funcs->restore_phase_topology(hdev); in hl_cs_ctx_switch()
1730 chunks = (void __user *) (uintptr_t) args->in.chunks_restore; in hl_cs_ctx_switch()
1731 num_chunks = args->in.num_chunks_restore; in hl_cs_ctx_switch()
1734 dev_dbg(hdev->dev, in hl_cs_ctx_switch()
1735 "Need to run restore phase but restore CS is empty\n"); in hl_cs_ctx_switch()
1736 rc = 0; in hl_cs_ctx_switch()
1739 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count); in hl_cs_ctx_switch()
1742 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1745 dev_err(hdev->dev, in hl_cs_ctx_switch()
1746 "Failed to submit restore CS for context %d (%d)\n", in hl_cs_ctx_switch()
1747 ctx->asid, rc); in hl_cs_ctx_switch()
1756 jiffies_to_usecs(hdev->timeout_jiffies), in hl_cs_ctx_switch()
1759 dev_err(hdev->dev, in hl_cs_ctx_switch()
1760 "Restore CS for context %d failed to complete %d\n", in hl_cs_ctx_switch()
1761 ctx->asid, ret); in hl_cs_ctx_switch()
1762 rc = -ENOEXEC; in hl_cs_ctx_switch()
1767 if (hdev->supports_ctx_switch) in hl_cs_ctx_switch()
1768 ctx->thread_ctx_switch_wait_token = 1; in hl_cs_ctx_switch()
1770 } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { in hl_cs_ctx_switch()
1772 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), in hl_cs_ctx_switch()
1773 100, jiffies_to_usecs(hdev->timeout_jiffies), false); in hl_cs_ctx_switch()
1775 if (rc == -ETIMEDOUT) { in hl_cs_ctx_switch()
1776 dev_err(hdev->dev, in hl_cs_ctx_switch()
1783 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) in hl_cs_ctx_switch()
1784 hl_device_reset(hdev, 0); in hl_cs_ctx_switch()
1795 * @hw_sob: the H/W SOB used in this signal CS.
1809 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in hl_cs_signal_sob_wraparound_handler()
1814 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { in hl_cs_signal_sob_wraparound_handler()
1827 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; in hl_cs_signal_sob_wraparound_handler()
1828 other_sob = &prop->hw_sob[other_sob_offset]; in hl_cs_signal_sob_wraparound_handler()
1830 if (kref_read(&other_sob->kref) != 1) { in hl_cs_signal_sob_wraparound_handler()
1831 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n", in hl_cs_signal_sob_wraparound_handler()
1833 return -EINVAL; in hl_cs_signal_sob_wraparound_handler()
1842 prop->next_sob_val = count + 1; in hl_cs_signal_sob_wraparound_handler()
1844 prop->next_sob_val = count; in hl_cs_signal_sob_wraparound_handler()
1847 prop->curr_sob_offset = other_sob_offset; in hl_cs_signal_sob_wraparound_handler()
1852 * for the reservation or the next signal cs. in hl_cs_signal_sob_wraparound_handler()
1853 * we do it here, and for both encaps and regular signal cs in hl_cs_signal_sob_wraparound_handler()
1857 * in addition, if we have combination of cs signal and in hl_cs_signal_sob_wraparound_handler()
1859 * no more reservations and only signal cs keep coming, in hl_cs_signal_sob_wraparound_handler()
1863 if (other_sob->need_reset) in hl_cs_signal_sob_wraparound_handler()
1868 sob->need_reset = true; in hl_cs_signal_sob_wraparound_handler()
1872 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", in hl_cs_signal_sob_wraparound_handler()
1873 prop->curr_sob_offset, q_idx); in hl_cs_signal_sob_wraparound_handler()
1875 prop->next_sob_val += count; in hl_cs_signal_sob_wraparound_handler()
1878 return 0; in hl_cs_signal_sob_wraparound_handler()
1887 int rc = 0; in cs_ioctl_extract_signal_seq()
1890 *signal_seq = chunk->encaps_signal_seq; in cs_ioctl_extract_signal_seq()
1891 return 0; in cs_ioctl_extract_signal_seq()
1894 signal_seq_arr_len = chunk->num_signal_seq_arr; in cs_ioctl_extract_signal_seq()
1898 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1899 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1900 dev_err(hdev->dev, in cs_ioctl_extract_signal_seq()
1901 "Wait for signal CS supports only one signal CS seq\n"); in cs_ioctl_extract_signal_seq()
1902 return -EINVAL; in cs_ioctl_extract_signal_seq()
1913 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_extract_signal_seq()
1914 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); in cs_ioctl_extract_signal_seq()
1915 return -ENOMEM; in cs_ioctl_extract_signal_seq()
1920 u64_to_user_ptr(chunk->signal_seq_arr), in cs_ioctl_extract_signal_seq()
1922 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1923 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1924 dev_err(hdev->dev, in cs_ioctl_extract_signal_seq()
1926 rc = -EFAULT; in cs_ioctl_extract_signal_seq()
1931 *signal_seq = signal_seq_arr[0]; in cs_ioctl_extract_signal_seq()
1940 struct hl_ctx *ctx, struct hl_cs *cs, in cs_ioctl_signal_wait_create_jobs() argument
1948 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_signal_wait_create_jobs()
1952 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1953 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1954 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_signal_wait_create_jobs()
1955 return -ENOMEM; in cs_ioctl_signal_wait_create_jobs()
1958 if (cs->type == CS_TYPE_WAIT) in cs_ioctl_signal_wait_create_jobs()
1959 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); in cs_ioctl_signal_wait_create_jobs()
1961 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); in cs_ioctl_signal_wait_create_jobs()
1965 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1966 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1968 return -EFAULT; in cs_ioctl_signal_wait_create_jobs()
1971 job->id = 0; in cs_ioctl_signal_wait_create_jobs()
1972 job->cs = cs; in cs_ioctl_signal_wait_create_jobs()
1973 job->user_cb = cb; in cs_ioctl_signal_wait_create_jobs()
1974 atomic_inc(&job->user_cb->cs_cnt); in cs_ioctl_signal_wait_create_jobs()
1975 job->user_cb_size = cb_size; in cs_ioctl_signal_wait_create_jobs()
1976 job->hw_queue_id = q_idx; in cs_ioctl_signal_wait_create_jobs()
1978 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) in cs_ioctl_signal_wait_create_jobs()
1979 && cs->encaps_signals) in cs_ioctl_signal_wait_create_jobs()
1980 job->encaps_sig_wait_offset = encaps_signal_offset; in cs_ioctl_signal_wait_create_jobs()
1983 * We call hl_cb_destroy() out of two reasons - we don't need the CB in in cs_ioctl_signal_wait_create_jobs()
1987 job->patched_cb = job->user_cb; in cs_ioctl_signal_wait_create_jobs()
1988 job->job_cb_size = job->user_cb_size; in cs_ioctl_signal_wait_create_jobs()
1989 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); in cs_ioctl_signal_wait_create_jobs()
1992 cs_get(cs); in cs_ioctl_signal_wait_create_jobs()
1994 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_signal_wait_create_jobs()
1995 cs->jobs_cnt++; in cs_ioctl_signal_wait_create_jobs()
1997 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_signal_wait_create_jobs()
2001 return 0; in cs_ioctl_signal_wait_create_jobs()
2011 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_reserve_signals()
2016 int rc = 0; in cs_ioctl_reserve_signals()
2019 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n", in cs_ioctl_reserve_signals()
2021 rc = -EINVAL; in cs_ioctl_reserve_signals()
2025 if (q_idx >= hdev->asic_prop.max_queues) { in cs_ioctl_reserve_signals()
2026 dev_err(hdev->dev, "Queue index %d is invalid\n", in cs_ioctl_reserve_signals()
2028 rc = -EINVAL; in cs_ioctl_reserve_signals()
2032 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; in cs_ioctl_reserve_signals()
2034 if (!hw_queue_prop->supports_sync_stream) { in cs_ioctl_reserve_signals()
2035 dev_err(hdev->dev, in cs_ioctl_reserve_signals()
2038 rc = -EINVAL; in cs_ioctl_reserve_signals()
2042 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in cs_ioctl_reserve_signals()
2046 rc = -ENOMEM; in cs_ioctl_reserve_signals()
2050 handle->count = count; in cs_ioctl_reserve_signals()
2052 hl_ctx_get(hpriv->ctx); in cs_ioctl_reserve_signals()
2053 handle->ctx = hpriv->ctx; in cs_ioctl_reserve_signals()
2054 mgr = &hpriv->ctx->sig_mgr; in cs_ioctl_reserve_signals()
2056 spin_lock(&mgr->lock); in cs_ioctl_reserve_signals()
2057 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC); in cs_ioctl_reserve_signals()
2058 spin_unlock(&mgr->lock); in cs_ioctl_reserve_signals()
2060 if (hdl_id < 0) { in cs_ioctl_reserve_signals()
2061 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); in cs_ioctl_reserve_signals()
2062 rc = -EINVAL; in cs_ioctl_reserve_signals()
2066 handle->id = hdl_id; in cs_ioctl_reserve_signals()
2067 handle->q_idx = q_idx; in cs_ioctl_reserve_signals()
2068 handle->hdev = hdev; in cs_ioctl_reserve_signals()
2069 kref_init(&handle->refcount); in cs_ioctl_reserve_signals()
2071 hdev->asic_funcs->hw_queues_lock(hdev); in cs_ioctl_reserve_signals()
2073 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in cs_ioctl_reserve_signals()
2084 dev_err(hdev->dev, "Failed to switch SOB\n"); in cs_ioctl_reserve_signals()
2085 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_reserve_signals()
2086 rc = -EINVAL; in cs_ioctl_reserve_signals()
2092 handle->hw_sob = hw_sob; in cs_ioctl_reserve_signals()
2097 handle->pre_sob_val = prop->next_sob_val - handle->count; in cs_ioctl_reserve_signals()
2099 handle->cs_seq = ULLONG_MAX; in cs_ioctl_reserve_signals()
2101 *signals_count = prop->next_sob_val; in cs_ioctl_reserve_signals()
2102 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_reserve_signals()
2104 *sob_addr = handle->hw_sob->sob_addr; in cs_ioctl_reserve_signals()
2107 dev_dbg(hdev->dev, in cs_ioctl_reserve_signals()
2108 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n", in cs_ioctl_reserve_signals()
2109 hw_sob->sob_id, handle->hw_sob->sob_addr, in cs_ioctl_reserve_signals()
2110 prop->next_sob_val - 1, q_idx, hdl_id); in cs_ioctl_reserve_signals()
2114 spin_lock(&mgr->lock); in cs_ioctl_reserve_signals()
2115 idr_remove(&mgr->handles, hdl_id); in cs_ioctl_reserve_signals()
2116 spin_unlock(&mgr->lock); in cs_ioctl_reserve_signals()
2119 hl_ctx_put(handle->ctx); in cs_ioctl_reserve_signals()
2130 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_unreserve_signals()
2134 int rc = 0; in cs_ioctl_unreserve_signals()
2136 mgr = &hpriv->ctx->sig_mgr; in cs_ioctl_unreserve_signals()
2138 spin_lock(&mgr->lock); in cs_ioctl_unreserve_signals()
2139 encaps_sig_hdl = idr_find(&mgr->handles, handle_id); in cs_ioctl_unreserve_signals()
2141 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n", in cs_ioctl_unreserve_signals()
2142 handle_id, encaps_sig_hdl->hw_sob->sob_addr, in cs_ioctl_unreserve_signals()
2143 encaps_sig_hdl->count); in cs_ioctl_unreserve_signals()
2145 hdev->asic_funcs->hw_queues_lock(hdev); in cs_ioctl_unreserve_signals()
2147 q_idx = encaps_sig_hdl->q_idx; in cs_ioctl_unreserve_signals()
2148 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in cs_ioctl_unreserve_signals()
2149 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in cs_ioctl_unreserve_signals()
2150 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); in cs_ioctl_unreserve_signals()
2154 * between the reserve-unreserve calls or SOB switch in cs_ioctl_unreserve_signals()
2157 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count in cs_ioctl_unreserve_signals()
2158 != prop->next_sob_val || in cs_ioctl_unreserve_signals()
2159 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { in cs_ioctl_unreserve_signals()
2160 …dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %… in cs_ioctl_unreserve_signals()
2161 encaps_sig_hdl->pre_sob_val, in cs_ioctl_unreserve_signals()
2162 (prop->next_sob_val - encaps_sig_hdl->count)); in cs_ioctl_unreserve_signals()
2164 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_unreserve_signals()
2165 rc = -EINVAL; in cs_ioctl_unreserve_signals()
2173 prop->next_sob_val -= encaps_sig_hdl->count; in cs_ioctl_unreserve_signals()
2175 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_unreserve_signals()
2180 idr_remove(&mgr->handles, handle_id); in cs_ioctl_unreserve_signals()
2183 spin_unlock(&mgr->lock); in cs_ioctl_unreserve_signals()
2184 hl_ctx_put(encaps_sig_hdl->ctx); in cs_ioctl_unreserve_signals()
2188 rc = -EINVAL; in cs_ioctl_unreserve_signals()
2189 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); in cs_ioctl_unreserve_signals()
2193 spin_unlock(&mgr->lock); in cs_ioctl_unreserve_signals()
2211 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_signal_wait()
2213 u32 q_idx, collective_engine_id = 0; in cs_ioctl_signal_wait()
2216 struct hl_ctx *ctx = hpriv->ctx; in cs_ioctl_signal_wait()
2218 struct hl_cs *cs; in cs_ioctl_signal_wait() local
2222 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_signal_wait()
2231 chunk = &cs_chunk_array[0]; in cs_ioctl_signal_wait()
2233 if (chunk->queue_index >= hdev->asic_prop.max_queues) { in cs_ioctl_signal_wait()
2234 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2235 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2236 dev_err(hdev->dev, "Queue index %d is invalid\n", in cs_ioctl_signal_wait()
2237 chunk->queue_index); in cs_ioctl_signal_wait()
2238 rc = -EINVAL; in cs_ioctl_signal_wait()
2242 q_idx = chunk->queue_index; in cs_ioctl_signal_wait()
2243 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; in cs_ioctl_signal_wait()
2244 q_type = hw_queue_prop->type; in cs_ioctl_signal_wait()
2246 if (!hw_queue_prop->supports_sync_stream) { in cs_ioctl_signal_wait()
2247 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2248 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2249 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2252 rc = -EINVAL; in cs_ioctl_signal_wait()
2257 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { in cs_ioctl_signal_wait()
2258 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2259 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2260 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2262 rc = -EINVAL; in cs_ioctl_signal_wait()
2266 if (!hdev->nic_ports_mask) { in cs_ioctl_signal_wait()
2267 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2268 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2269 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2271 rc = -EINVAL; in cs_ioctl_signal_wait()
2275 collective_engine_id = chunk->collective_engine_id; in cs_ioctl_signal_wait()
2290 /* check if cs sequence has encapsulated in cs_ioctl_signal_wait()
2296 spin_lock(&ctx->sig_mgr.lock); in cs_ioctl_signal_wait()
2297 idp = &ctx->sig_mgr.handles; in cs_ioctl_signal_wait()
2299 if (encaps_sig_hdl->cs_seq == signal_seq) { in cs_ioctl_signal_wait()
2301 * needed when multiple wait cs are used with offset in cs_ioctl_signal_wait()
2305 * is 0 but it yet to be removed from the list. In this in cs_ioctl_signal_wait()
2308 if (kref_get_unless_zero(&encaps_sig_hdl->refcount)) in cs_ioctl_signal_wait()
2313 spin_unlock(&ctx->sig_mgr.lock); in cs_ioctl_signal_wait()
2316 /* treat as signal CS already finished */ in cs_ioctl_signal_wait()
2317 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", in cs_ioctl_signal_wait()
2319 rc = 0; in cs_ioctl_signal_wait()
2324 if (chunk->encaps_signal_offset > in cs_ioctl_signal_wait()
2325 encaps_sig_hdl->count) { in cs_ioctl_signal_wait()
2326 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n", in cs_ioctl_signal_wait()
2327 chunk->encaps_signal_offset, in cs_ioctl_signal_wait()
2328 encaps_sig_hdl->count); in cs_ioctl_signal_wait()
2329 rc = -EINVAL; in cs_ioctl_signal_wait()
2336 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2337 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2338 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2339 "Failed to get signal CS with seq 0x%llx\n", in cs_ioctl_signal_wait()
2346 /* signal CS already finished */ in cs_ioctl_signal_wait()
2347 rc = 0; in cs_ioctl_signal_wait()
2355 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && in cs_ioctl_signal_wait()
2358 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && in cs_ioctl_signal_wait()
2360 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2361 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2362 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2363 "CS seq 0x%llx is not of a signal/encaps-signal CS\n", in cs_ioctl_signal_wait()
2366 rc = -EINVAL; in cs_ioctl_signal_wait()
2370 if (completion_done(&sig_fence->completion)) { in cs_ioctl_signal_wait()
2371 /* signal CS already finished */ in cs_ioctl_signal_wait()
2373 rc = 0; in cs_ioctl_signal_wait()
2378 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); in cs_ioctl_signal_wait()
2387 * Save the signal CS fence for later initialization right before in cs_ioctl_signal_wait()
2388 * hanging the wait CS on the queue. in cs_ioctl_signal_wait()
2389 * for encaps signals case, we save the cs sequence and handle pointer in cs_ioctl_signal_wait()
2393 cs->signal_fence = sig_fence; in cs_ioctl_signal_wait()
2398 if (cs->encaps_signals) in cs_ioctl_signal_wait()
2399 cs->encaps_sig_hdl = encaps_sig_hdl; in cs_ioctl_signal_wait()
2402 hl_debugfs_add_cs(cs); in cs_ioctl_signal_wait()
2404 *cs_seq = cs->sequence; in cs_ioctl_signal_wait()
2407 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, in cs_ioctl_signal_wait()
2408 q_idx, chunk->encaps_signal_offset); in cs_ioctl_signal_wait()
2410 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, in cs_ioctl_signal_wait()
2411 cs, q_idx, collective_engine_id, in cs_ioctl_signal_wait()
2412 chunk->encaps_signal_offset); in cs_ioctl_signal_wait()
2414 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2415 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2416 rc = -EINVAL; in cs_ioctl_signal_wait()
2423 INIT_WORK(&cs->finish_work, cs_completion); in cs_ioctl_signal_wait()
2425 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_signal_wait()
2427 /* In case wait cs failed here, it means the signal cs in cs_ioctl_signal_wait()
2432 rc = 0; in cs_ioctl_signal_wait()
2433 else if (rc != -EAGAIN) in cs_ioctl_signal_wait()
2434 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2435 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_signal_wait()
2436 ctx->asid, cs->sequence, rc); in cs_ioctl_signal_wait()
2440 *signal_sob_addr_offset = cs->sob_addr_offset; in cs_ioctl_signal_wait()
2441 *signal_initial_sob_count = cs->initial_sob_count; in cs_ioctl_signal_wait()
2449 cs_rollback(hdev, cs); in cs_ioctl_signal_wait()
2453 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_signal_wait()
2454 cs_put(cs); in cs_ioctl_signal_wait()
2457 kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); in cs_ioctl_signal_wait()
2466 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_engine_cores()
2471 if (!hdev->asic_prop.supports_engine_modes) in cs_ioctl_engine_cores()
2472 return -EPERM; in cs_ioctl_engine_cores()
2474 if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { in cs_ioctl_engine_cores()
2475 dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); in cs_ioctl_engine_cores()
2476 return -EINVAL; in cs_ioctl_engine_cores()
2480 dev_err(hdev->dev, "Engine core command is invalid\n"); in cs_ioctl_engine_cores()
2481 return -EINVAL; in cs_ioctl_engine_cores()
2487 return -ENOMEM; in cs_ioctl_engine_cores()
2490 dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); in cs_ioctl_engine_cores()
2492 return -EFAULT; in cs_ioctl_engine_cores()
2495 rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); in cs_ioctl_engine_cores()
2504 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_engines()
2509 if (!hdev->asic_prop.supports_engine_modes) in cs_ioctl_engines()
2510 return -EPERM; in cs_ioctl_engines()
2513 dev_err(hdev->dev, "Engine command is invalid\n"); in cs_ioctl_engines()
2514 return -EINVAL; in cs_ioctl_engines()
2517 max_num_of_engines = hdev->asic_prop.max_num_of_engines; in cs_ioctl_engines()
2519 max_num_of_engines = hdev->asic_prop.num_engine_cores; in cs_ioctl_engines()
2522 dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines); in cs_ioctl_engines()
2523 return -EINVAL; in cs_ioctl_engines()
2529 return -ENOMEM; in cs_ioctl_engines()
2532 dev_err(hdev->dev, "Failed to copy engine-ids array from user\n"); in cs_ioctl_engines()
2534 return -EFAULT; in cs_ioctl_engines()
2537 rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command); in cs_ioctl_engines()
2545 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_flush_pci_hbw_writes()
2546 struct asic_fixed_properties *prop = &hdev->asic_prop; in cs_ioctl_flush_pci_hbw_writes()
2548 if (!prop->hbw_flush_reg) { in cs_ioctl_flush_pci_hbw_writes()
2549 dev_dbg(hdev->dev, "HBW flush is not supported\n"); in cs_ioctl_flush_pci_hbw_writes()
2550 return -EOPNOTSUPP; in cs_ioctl_flush_pci_hbw_writes()
2553 RREG32(prop->hbw_flush_reg); in cs_ioctl_flush_pci_hbw_writes()
2555 return 0; in cs_ioctl_flush_pci_hbw_writes()
2560 struct hl_fpriv *hpriv = file_priv->driver_priv; in hl_cs_ioctl()
2562 enum hl_cs_type cs_type = 0; in hl_cs_ioctl()
2566 signals_count = 0, sob_addr = 0, handle_id = 0; in hl_cs_ioctl()
2567 u16 sob_initial_count = 0; in hl_cs_ioctl()
2578 cs_type = hl_cs_get_cs_type(args->in.cs_flags & in hl_cs_ioctl()
2580 chunks = (void __user *) (uintptr_t) args->in.chunks_execute; in hl_cs_ioctl()
2581 num_chunks = args->in.num_chunks_execute; in hl_cs_ioctl()
2582 flags = args->in.cs_flags; in hl_cs_ioctl()
2584 /* In case this is a staged CS, user should supply the CS sequence */ in hl_cs_ioctl()
2587 cs_seq = args->in.seq; in hl_cs_ioctl()
2590 ? msecs_to_jiffies(args->in.timeout * 1000) in hl_cs_ioctl()
2591 : hpriv->hdev->timeout_jiffies; in hl_cs_ioctl()
2598 &cs_seq, args->in.cs_flags, timeout, in hl_cs_ioctl()
2603 args->in.encaps_signals_q_idx, in hl_cs_ioctl()
2604 args->in.encaps_signals_count, in hl_cs_ioctl()
2609 args->in.encaps_sig_handle_id); in hl_cs_ioctl()
2612 rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, in hl_cs_ioctl()
2613 args->in.num_engine_cores, args->in.core_command); in hl_cs_ioctl()
2616 rc = cs_ioctl_engines(hpriv, args->in.engines, in hl_cs_ioctl()
2617 args->in.num_engines, args->in.engine_command); in hl_cs_ioctl()
2624 args->in.cs_flags, in hl_cs_ioctl()
2625 args->in.encaps_sig_handle_id, in hl_cs_ioctl()
2630 if (rc != -EAGAIN) { in hl_cs_ioctl()
2631 memset(args, 0, sizeof(*args)); in hl_cs_ioctl()
2635 args->out.handle_id = handle_id; in hl_cs_ioctl()
2636 args->out.sob_base_addr_offset = sob_addr; in hl_cs_ioctl()
2637 args->out.count = signals_count; in hl_cs_ioctl()
2640 args->out.sob_base_addr_offset = sob_addr; in hl_cs_ioctl()
2641 args->out.sob_count_before_submission = sob_initial_count; in hl_cs_ioctl()
2642 args->out.seq = cs_seq; in hl_cs_ioctl()
2645 args->out.sob_count_before_submission = sob_initial_count; in hl_cs_ioctl()
2646 args->out.seq = cs_seq; in hl_cs_ioctl()
2649 args->out.seq = cs_seq; in hl_cs_ioctl()
2653 args->out.status = rc; in hl_cs_ioctl()
2662 struct hl_device *hdev = ctx->hdev; in hl_wait_for_fence()
2665 int rc = 0, error; in hl_wait_for_fence()
2669 if (rc == -EINVAL) in hl_wait_for_fence()
2670 dev_notice_ratelimited(hdev->dev, in hl_wait_for_fence()
2671 "Can't wait on CS %llu because current CS is at seq %llu\n", in hl_wait_for_fence()
2672 seq, ctx->cs_sequence); in hl_wait_for_fence()
2677 if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, ×tamp_kt, &error)) { in hl_wait_for_fence()
2678 dev_dbg(hdev->dev, in hl_wait_for_fence()
2679 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", in hl_wait_for_fence()
2680 seq, ctx->cs_sequence); in hl_wait_for_fence()
2682 return 0; in hl_wait_for_fence()
2690 completion_rc = completion_done(&fence->completion); in hl_wait_for_fence()
2698 &fence->completion, timeout); in hl_wait_for_fence()
2701 error = fence->error; in hl_wait_for_fence()
2702 timestamp_kt = fence->timestamp; in hl_wait_for_fence()
2705 if (completion_rc > 0) { in hl_wait_for_fence()
2713 if (completion_rc == -ERESTARTSYS) in hl_wait_for_fence()
2715 else if (error == -ETIMEDOUT || error == -EIO) in hl_wait_for_fence()
2722 * hl_cs_poll_fences - iterate CS fences to check for CS completion
2724 * @mcs_data: multi-CS internal data
2725 * @mcs_compl: multi-CS completion structure
2727 * @return 0 on success, otherwise non 0 error code
2729 * The function iterates on all CS sequence in the list and set bit in
2730 * completion_bitmap for each completed CS.
2733 * completion to the multi-CS context.
2738 struct hl_fence **fence_ptr = mcs_data->fence_arr; in hl_cs_poll_fences()
2739 struct hl_device *hdev = mcs_data->ctx->hdev; in hl_cs_poll_fences()
2740 int i, rc, arr_len = mcs_data->arr_len; in hl_cs_poll_fences()
2741 u64 *seq_arr = mcs_data->seq_arr; in hl_cs_poll_fences()
2745 memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); in hl_cs_poll_fences()
2748 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); in hl_cs_poll_fences()
2753 * re-initialize the completion here to handle 2 possible cases: in hl_cs_poll_fences()
2754 * 1. CS will complete the multi-CS prior clearing the completion. in which in hl_cs_poll_fences()
2755 * case the fence iteration is guaranteed to catch the CS completion. in hl_cs_poll_fences()
2756 * 2. the completion will occur after re-init of the completion. in hl_cs_poll_fences()
2759 reinit_completion(&mcs_compl->completion); in hl_cs_poll_fences()
2763 * this value is maintained- no timestamp was updated in hl_cs_poll_fences()
2765 max_ktime = ktime_set(KTIME_SEC_MAX, 0); in hl_cs_poll_fences()
2768 for (i = 0; i < arr_len; i++, fence_ptr++) { in hl_cs_poll_fences()
2772 * In order to prevent case where we wait until timeout even though a CS associated in hl_cs_poll_fences()
2773 * with the multi-CS actually completed we do things in the below order: in hl_cs_poll_fences()
2774 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way in hl_cs_poll_fences()
2775 * any CS can, potentially, complete the multi CS for the specific QID (note in hl_cs_poll_fences()
2778 * 2. only after allowing multi-CS completion for the specific QID we check whether in hl_cs_poll_fences()
2779 * the specific CS already completed (and thus the wait for completion part will in hl_cs_poll_fences()
2780 * be skipped). if the CS not completed it is guaranteed that completing CS will in hl_cs_poll_fences()
2784 mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map; in hl_cs_poll_fences()
2787 * function won't sleep as it is called with timeout 0 (i.e. in hl_cs_poll_fences()
2790 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL); in hl_cs_poll_fences()
2792 dev_err(hdev->dev, in hl_cs_poll_fences()
2793 "wait_for_fence error :%d for CS seq %llu\n", in hl_cs_poll_fences()
2800 /* CS did not finished, QID to wait on already stored */ in hl_cs_poll_fences()
2805 * returns to user indicating CS completed before it finished in hl_cs_poll_fences()
2811 if (fence && !fence->mcs_handling_done) { in hl_cs_poll_fences()
2813 * in case multi CS is completed but MCS handling not done in hl_cs_poll_fences()
2814 * we "complete" the multi CS to prevent it from waiting in hl_cs_poll_fences()
2815 * until time-out and the "multi-CS handling done" will have in hl_cs_poll_fences()
2818 complete_all(&mcs_compl->completion); in hl_cs_poll_fences()
2822 mcs_data->completion_bitmap |= BIT(i); in hl_cs_poll_fences()
2828 if (fence && mcs_data->update_ts && in hl_cs_poll_fences()
2829 (ktime_compare(fence->timestamp, first_cs_time) < 0)) in hl_cs_poll_fences()
2830 first_cs_time = fence->timestamp; in hl_cs_poll_fences()
2833 mcs_data->update_ts = false; in hl_cs_poll_fences()
2834 mcs_data->gone_cs = true; in hl_cs_poll_fences()
2838 * already gone. In this case, CS set as completed but in hl_cs_poll_fences()
2841 mcs_data->completion_bitmap |= BIT(i); in hl_cs_poll_fences()
2844 dev_err(hdev->dev, "Invalid fence status\n"); in hl_cs_poll_fences()
2845 rc = -EINVAL; in hl_cs_poll_fences()
2851 hl_fences_put(mcs_data->fence_arr, arr_len); in hl_cs_poll_fences()
2853 if (mcs_data->update_ts && in hl_cs_poll_fences()
2854 (ktime_compare(first_cs_time, max_ktime) != 0)) in hl_cs_poll_fences()
2855 mcs_data->timestamp = ktime_to_ns(first_cs_time); in hl_cs_poll_fences()
2864 int rc = 0; in _hl_cs_wait_ioctl()
2867 *timestamp = 0; in _hl_cs_wait_ioctl()
2896 * hl_wait_multi_cs_completion_init - init completion structure
2914 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in hl_wait_multi_cs_completion_init()
2915 mcs_compl = &hdev->multi_cs_completion[i]; in hl_wait_multi_cs_completion_init()
2916 spin_lock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2917 if (!mcs_compl->used) { in hl_wait_multi_cs_completion_init()
2918 mcs_compl->used = 1; in hl_wait_multi_cs_completion_init()
2919 mcs_compl->timestamp = 0; in hl_wait_multi_cs_completion_init()
2921 * init QID map to 0 to avoid completion by CSs. the actual QID map in hl_wait_multi_cs_completion_init()
2922 * to multi-CS CSs will be set incrementally at a later stage in hl_wait_multi_cs_completion_init()
2924 mcs_compl->stream_master_qid_map = 0; in hl_wait_multi_cs_completion_init()
2925 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2928 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2932 dev_err(hdev->dev, "no available multi-CS completion structure\n"); in hl_wait_multi_cs_completion_init()
2933 return ERR_PTR(-ENOMEM); in hl_wait_multi_cs_completion_init()
2939 * hl_wait_multi_cs_completion_fini - return completion structure and set as
2948 * free completion structure, do it under lock to be in-sync with the in hl_wait_multi_cs_completion_fini()
2951 spin_lock(&mcs_compl->lock); in hl_wait_multi_cs_completion_fini()
2952 mcs_compl->used = 0; in hl_wait_multi_cs_completion_fini()
2953 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_fini()
2957 * hl_wait_multi_cs_completion - wait for first CS to complete
2959 * @mcs_data: multi-CS internal data
2961 * @return 0 on success, otherwise non 0 error code
2968 completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion, in hl_wait_multi_cs_completion()
2969 mcs_data->timeout_jiffies); in hl_wait_multi_cs_completion()
2972 if (completion_rc > 0) in hl_wait_multi_cs_completion()
2973 mcs_data->timestamp = mcs_compl->timestamp; in hl_wait_multi_cs_completion()
2975 if (completion_rc == -ERESTARTSYS) in hl_wait_multi_cs_completion()
2978 mcs_data->wait_status = completion_rc; in hl_wait_multi_cs_completion()
2980 return 0; in hl_wait_multi_cs_completion()
2984 * hl_multi_cs_completion_init - init array of multi-CS completion structures
2993 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in hl_multi_cs_completion_init()
2994 mcs_cmpl = &hdev->multi_cs_completion[i]; in hl_multi_cs_completion_init()
2995 mcs_cmpl->used = 0; in hl_multi_cs_completion_init()
2996 spin_lock_init(&mcs_cmpl->lock); in hl_multi_cs_completion_init()
2997 init_completion(&mcs_cmpl->completion); in hl_multi_cs_completion_init()
3002 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
3005 * @data: pointer to multi-CS wait ioctl in/out args
3011 struct hl_device *hdev = hpriv->hdev; in hl_multi_cs_wait_ioctl()
3014 struct hl_ctx *ctx = hpriv->ctx; in hl_multi_cs_wait_ioctl()
3022 for (i = 0 ; i < sizeof(args->in.pad) ; i++) in hl_multi_cs_wait_ioctl()
3023 if (args->in.pad[i]) { in hl_multi_cs_wait_ioctl()
3024 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); in hl_multi_cs_wait_ioctl()
3025 return -EINVAL; in hl_multi_cs_wait_ioctl()
3028 if (!hdev->supports_wait_for_multi_cs) { in hl_multi_cs_wait_ioctl()
3029 dev_err(hdev->dev, "Wait for multi CS is not supported\n"); in hl_multi_cs_wait_ioctl()
3030 return -EPERM; in hl_multi_cs_wait_ioctl()
3033 seq_arr_len = args->in.seq_arr_len; in hl_multi_cs_wait_ioctl()
3036 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n", in hl_multi_cs_wait_ioctl()
3038 return -EINVAL; in hl_multi_cs_wait_ioctl()
3045 return -ENOMEM; in hl_multi_cs_wait_ioctl()
3047 /* copy CS sequence array from user */ in hl_multi_cs_wait_ioctl()
3048 seq_arr = (void __user *) (uintptr_t) args->in.seq; in hl_multi_cs_wait_ioctl()
3051 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n"); in hl_multi_cs_wait_ioctl()
3052 rc = -EFAULT; in hl_multi_cs_wait_ioctl()
3059 rc = -ENOMEM; in hl_multi_cs_wait_ioctl()
3063 /* initialize the multi-CS internal data */ in hl_multi_cs_wait_ioctl()
3071 /* wait (with timeout) for the first CS to be completed */ in hl_multi_cs_wait_ioctl()
3072 mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); in hl_multi_cs_wait_ioctl()
3079 /* poll all CS fences, extract timestamp */ in hl_multi_cs_wait_ioctl()
3083 * skip wait for CS completion when one of the below is true: in hl_multi_cs_wait_ioctl()
3084 * - an error on the poll function in hl_multi_cs_wait_ioctl()
3085 * - one or more CS in the list completed in hl_multi_cs_wait_ioctl()
3086 * - the user called ioctl with timeout 0 in hl_multi_cs_wait_ioctl()
3088 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) in hl_multi_cs_wait_ioctl()
3093 if (rc || (mcs_data.wait_status == 0)) in hl_multi_cs_wait_ioctl()
3097 * poll fences once again to update the CS map. in hl_multi_cs_wait_ioctl()
3108 * it got a completion) it either got completed by CS in the multi CS list in hl_multi_cs_wait_ioctl()
3110 * got completed by CS submitted to one of the shared stream master but in hl_multi_cs_wait_ioctl()
3111 * not in the multi CS list (in which case we should wait again but modify in hl_multi_cs_wait_ioctl()
3112 * the timeout and set timestamp as zero to let a CS related to the current in hl_multi_cs_wait_ioctl()
3113 * multi-CS set a new, relevant, timestamp) in hl_multi_cs_wait_ioctl()
3116 mcs_compl->timestamp = 0; in hl_multi_cs_wait_ioctl()
3129 if (rc == -ERESTARTSYS) { in hl_multi_cs_wait_ioctl()
3130 dev_err_ratelimited(hdev->dev, in hl_multi_cs_wait_ioctl()
3131 "user process got signal while waiting for Multi-CS\n"); in hl_multi_cs_wait_ioctl()
3132 rc = -EINTR; in hl_multi_cs_wait_ioctl()
3139 memset(args, 0, sizeof(*args)); in hl_multi_cs_wait_ioctl()
3142 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_multi_cs_wait_ioctl()
3143 args->out.cs_completion_map = mcs_data.completion_bitmap; in hl_multi_cs_wait_ioctl()
3145 /* if timestamp not 0- it's valid */ in hl_multi_cs_wait_ioctl()
3147 args->out.timestamp_nsec = mcs_data.timestamp; in hl_multi_cs_wait_ioctl()
3148 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_multi_cs_wait_ioctl()
3151 /* update if some CS was gone */ in hl_multi_cs_wait_ioctl()
3153 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; in hl_multi_cs_wait_ioctl()
3155 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_multi_cs_wait_ioctl()
3158 return 0; in hl_multi_cs_wait_ioctl()
3163 struct hl_device *hdev = hpriv->hdev; in hl_cs_wait_ioctl()
3166 u64 seq = args->in.seq; in hl_cs_wait_ioctl()
3170 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, ×tamp); in hl_cs_wait_ioctl()
3172 if (rc == -ERESTARTSYS) { in hl_cs_wait_ioctl()
3173 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
3174 "user process got signal while waiting for CS handle %llu\n", in hl_cs_wait_ioctl()
3176 return -EINTR; in hl_cs_wait_ioctl()
3179 memset(args, 0, sizeof(*args)); in hl_cs_wait_ioctl()
3182 if (rc == -ETIMEDOUT) { in hl_cs_wait_ioctl()
3183 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
3184 "CS %llu has timed-out while user process is waiting for it\n", in hl_cs_wait_ioctl()
3186 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; in hl_cs_wait_ioctl()
3187 } else if (rc == -EIO) { in hl_cs_wait_ioctl()
3188 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
3189 "CS %llu has been aborted while user process is waiting for it\n", in hl_cs_wait_ioctl()
3191 args->out.status = HL_WAIT_CS_STATUS_ABORTED; in hl_cs_wait_ioctl()
3197 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_cs_wait_ioctl()
3198 args->out.timestamp_nsec = timestamp; in hl_cs_wait_ioctl()
3203 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; in hl_cs_wait_ioctl()
3206 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_cs_wait_ioctl()
3210 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_cs_wait_ioctl()
3214 return 0; in hl_cs_wait_ioctl()
3220 record->ts_reg_info.cq_cb = cq_cb; in set_record_cq_info()
3221 record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset; in set_record_cq_info()
3222 record->cq_target_value = target_value; in set_record_cq_info()
3231 *req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + in validate_and_get_ts_record()
3233 ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + in validate_and_get_ts_record()
3234 (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); in validate_and_get_ts_record()
3238 dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n", in validate_and_get_ts_record()
3240 return -EINVAL; in validate_and_get_ts_record()
3243 return 0; in validate_and_get_ts_record()
3249 struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt; in unregister_timestamp_node()
3254 spin_lock_irqsave(&interrupt->ts_list_lock, flags); in unregister_timestamp_node()
3256 if (record->ts_reg_info.in_use) { in unregister_timestamp_node()
3257 record->ts_reg_info.in_use = false; in unregister_timestamp_node()
3258 list_del(&record->list_node); in unregister_timestamp_node()
3263 spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); in unregister_timestamp_node()
3267 hl_mmap_mem_buf_put(record->ts_reg_info.buf); in unregister_timestamp_node()
3268 hl_cb_put(record->ts_reg_info.cq_cb); in unregister_timestamp_node()
3277 struct hl_ts_buff *ts_buff = data->buf->private; in ts_get_and_handle_kernel_record()
3281 rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset, in ts_get_and_handle_kernel_record()
3286 /* In case the node already registered, need to unregister first then re-use */ in ts_get_and_handle_kernel_record()
3287 if (req_offset_record->ts_reg_info.in_use) { in ts_get_and_handle_kernel_record()
3288 dev_dbg(data->buf->mmg->dev, in ts_get_and_handle_kernel_record()
3291 req_offset_record->ts_reg_info.interrupt->interrupt_id, in ts_get_and_handle_kernel_record()
3292 req_offset_record->ts_reg_info.timestamp_kernel_addr, in ts_get_and_handle_kernel_record()
3293 data->interrupt->interrupt_id); in ts_get_and_handle_kernel_record()
3299 if (data->interrupt->interrupt_id != in ts_get_and_handle_kernel_record()
3300 req_offset_record->ts_reg_info.interrupt->interrupt_id) { in ts_get_and_handle_kernel_record()
3303 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags); in ts_get_and_handle_kernel_record()
3309 spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags); in ts_get_and_handle_kernel_record()
3313 req_offset_record->ts_reg_info.in_use = true; in ts_get_and_handle_kernel_record()
3314 req_offset_record->ts_reg_info.buf = data->buf; in ts_get_and_handle_kernel_record()
3315 req_offset_record->ts_reg_info.timestamp_kernel_addr = in ts_get_and_handle_kernel_record()
3316 (u64 *) ts_buff->user_buff_address + data->ts_offset; in ts_get_and_handle_kernel_record()
3317 req_offset_record->ts_reg_info.interrupt = data->interrupt; in ts_get_and_handle_kernel_record()
3318 set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset, in ts_get_and_handle_kernel_record()
3319 data->target_value); in ts_get_and_handle_kernel_record()
3332 int rc = 0; in _hl_interrupt_ts_reg_ioctl()
3336 data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); in _hl_interrupt_ts_reg_ioctl()
3337 if (!data->cq_cb) { in _hl_interrupt_ts_reg_ioctl()
3338 rc = -EINVAL; in _hl_interrupt_ts_reg_ioctl()
3343 if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= in _hl_interrupt_ts_reg_ioctl()
3344 ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { in _hl_interrupt_ts_reg_ioctl()
3345 rc = -EINVAL; in _hl_interrupt_ts_reg_ioctl()
3349 …dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_… in _hl_interrupt_ts_reg_ioctl()
3350 data->interrupt->interrupt_id, data->ts_handle, in _hl_interrupt_ts_reg_ioctl()
3351 data->ts_offset, data->cq_offset); in _hl_interrupt_ts_reg_ioctl()
3353 data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle); in _hl_interrupt_ts_reg_ioctl()
3354 if (!data->buf) { in _hl_interrupt_ts_reg_ioctl()
3355 rc = -EINVAL; in _hl_interrupt_ts_reg_ioctl()
3359 spin_lock_irqsave(&data->interrupt->ts_list_lock, flags); in _hl_interrupt_ts_reg_ioctl()
3364 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); in _hl_interrupt_ts_reg_ioctl()
3371 if (*pend->cq_kernel_addr >= data->target_value) { in _hl_interrupt_ts_reg_ioctl()
3372 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); in _hl_interrupt_ts_reg_ioctl()
3374 …dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interr… in _hl_interrupt_ts_reg_ioctl()
3375 pend, data->ts_offset, data->interrupt->interrupt_id); in _hl_interrupt_ts_reg_ioctl()
3377 pend->ts_reg_info.in_use = 0; in _hl_interrupt_ts_reg_ioctl()
3379 *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); in _hl_interrupt_ts_reg_ioctl()
3384 list_add_tail(&pend->list_node, &data->interrupt->ts_list_head); in _hl_interrupt_ts_reg_ioctl()
3385 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); in _hl_interrupt_ts_reg_ioctl()
3394 hl_mmap_mem_buf_put(data->buf); in _hl_interrupt_ts_reg_ioctl()
3396 hl_cb_put(data->cq_cb); in _hl_interrupt_ts_reg_ioctl()
3410 int rc = 0; in _hl_interrupt_wait_ioctl()
3412 timeout = hl_usecs64_to_jiffies(data->intr_timeout_us); in _hl_interrupt_wait_ioctl()
3416 data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); in _hl_interrupt_wait_ioctl()
3417 if (!data->cq_cb) { in _hl_interrupt_wait_ioctl()
3418 rc = -EINVAL; in _hl_interrupt_wait_ioctl()
3423 if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= in _hl_interrupt_wait_ioctl()
3424 ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { in _hl_interrupt_wait_ioctl()
3425 rc = -EINVAL; in _hl_interrupt_wait_ioctl()
3431 rc = -ENOMEM; in _hl_interrupt_wait_ioctl()
3435 hl_fence_init(&pend->fence, ULONG_MAX); in _hl_interrupt_wait_ioctl()
3436 pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset; in _hl_interrupt_wait_ioctl()
3437 pend->cq_target_value = data->target_value; in _hl_interrupt_wait_ioctl()
3438 spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3444 if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) { in _hl_interrupt_wait_ioctl()
3445 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3447 if (*pend->cq_kernel_addr >= data->target_value) in _hl_interrupt_wait_ioctl()
3452 pend->fence.timestamp = ktime_get(); in _hl_interrupt_wait_ioctl()
3462 list_add_tail(&pend->list_node, &data->interrupt->wait_list_head); in _hl_interrupt_wait_ioctl()
3463 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3466 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, in _hl_interrupt_wait_ioctl()
3468 if (completion_rc > 0) { in _hl_interrupt_wait_ioctl()
3469 if (pend->fence.error == -EIO) { in _hl_interrupt_wait_ioctl()
3470 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl()
3472 pend->fence.error); in _hl_interrupt_wait_ioctl()
3473 rc = -EIO; in _hl_interrupt_wait_ioctl()
3479 if (completion_rc == -ERESTARTSYS) { in _hl_interrupt_wait_ioctl()
3480 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl()
3482 data->interrupt->interrupt_id); in _hl_interrupt_wait_ioctl()
3483 rc = -EINTR; in _hl_interrupt_wait_ioctl()
3486 /* The wait has timed-out. We don't know anything beyond that in _hl_interrupt_wait_ioctl()
3491 rc = 0; in _hl_interrupt_wait_ioctl()
3502 spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3503 list_del(&pend->list_node); in _hl_interrupt_wait_ioctl()
3504 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3507 *timestamp = ktime_to_ns(pend->fence.timestamp); in _hl_interrupt_wait_ioctl()
3509 hl_cb_put(data->cq_cb); in _hl_interrupt_wait_ioctl()
3515 hl_cb_put(data->cq_cb); in _hl_interrupt_wait_ioctl()
3532 int rc = 0; in _hl_interrupt_wait_ioctl_user_addr()
3541 return -ENOMEM; in _hl_interrupt_wait_ioctl_user_addr()
3544 hl_fence_init(&pend->fence, ULONG_MAX); in _hl_interrupt_wait_ioctl_user_addr()
3549 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3550 list_add_tail(&pend->list_node, &interrupt->wait_list_head); in _hl_interrupt_wait_ioctl_user_addr()
3551 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3557 dev_err(hdev->dev, "Failed to copy completion value from user\n"); in _hl_interrupt_wait_ioctl_user_addr()
3558 rc = -EFAULT; in _hl_interrupt_wait_ioctl_user_addr()
3565 pend->fence.timestamp = ktime_get(); in _hl_interrupt_wait_ioctl_user_addr()
3575 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, in _hl_interrupt_wait_ioctl_user_addr()
3581 if (completion_rc > 0) { in _hl_interrupt_wait_ioctl_user_addr()
3582 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3588 reinit_completion(&pend->fence.completion); in _hl_interrupt_wait_ioctl_user_addr()
3589 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3592 dev_err(hdev->dev, "Failed to copy completion value from user\n"); in _hl_interrupt_wait_ioctl_user_addr()
3593 rc = -EFAULT; in _hl_interrupt_wait_ioctl_user_addr()
3600 } else if (pend->fence.error) { in _hl_interrupt_wait_ioctl_user_addr()
3601 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl_user_addr()
3603 pend->fence.error); in _hl_interrupt_wait_ioctl_user_addr()
3610 } else if (completion_rc == -ERESTARTSYS) { in _hl_interrupt_wait_ioctl_user_addr()
3611 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl_user_addr()
3613 interrupt->interrupt_id); in _hl_interrupt_wait_ioctl_user_addr()
3614 rc = -EINTR; in _hl_interrupt_wait_ioctl_user_addr()
3616 /* The wait has timed-out. We don't know anything beyond that in _hl_interrupt_wait_ioctl_user_addr()
3621 rc = 0; in _hl_interrupt_wait_ioctl_user_addr()
3626 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3627 list_del(&pend->list_node); in _hl_interrupt_wait_ioctl_user_addr()
3628 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3630 *timestamp = ktime_to_ns(pend->fence.timestamp); in _hl_interrupt_wait_ioctl_user_addr()
3641 struct hl_device *hdev = hpriv->hdev; in hl_interrupt_wait_ioctl()
3646 u64 timestamp = 0; in hl_interrupt_wait_ioctl()
3649 prop = &hdev->asic_prop; in hl_interrupt_wait_ioctl()
3651 if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) { in hl_interrupt_wait_ioctl()
3652 dev_err(hdev->dev, "no user interrupts allowed"); in hl_interrupt_wait_ioctl()
3653 return -EPERM; in hl_interrupt_wait_ioctl()
3656 interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); in hl_interrupt_wait_ioctl()
3658 first_interrupt = prop->first_available_user_interrupt; in hl_interrupt_wait_ioctl()
3659 last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1; in hl_interrupt_wait_ioctl()
3661 if (interrupt_id < prop->user_dec_intr_count) { in hl_interrupt_wait_ioctl()
3664 if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) { in hl_interrupt_wait_ioctl()
3665 dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed", in hl_interrupt_wait_ioctl()
3667 return -EINVAL; in hl_interrupt_wait_ioctl()
3670 interrupt = &hdev->user_interrupt[interrupt_id]; in hl_interrupt_wait_ioctl()
3674 int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count; in hl_interrupt_wait_ioctl()
3675 interrupt = &hdev->user_interrupt[int_idx]; in hl_interrupt_wait_ioctl()
3678 interrupt = &hdev->common_user_cq_interrupt; in hl_interrupt_wait_ioctl()
3680 interrupt = &hdev->common_decoder_interrupt; in hl_interrupt_wait_ioctl()
3682 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); in hl_interrupt_wait_ioctl()
3683 return -EINVAL; in hl_interrupt_wait_ioctl()
3686 if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) { in hl_interrupt_wait_ioctl()
3687 struct wait_interrupt_data wait_intr_data = {0}; in hl_interrupt_wait_ioctl()
3690 wait_intr_data.mmg = &hpriv->mem_mgr; in hl_interrupt_wait_ioctl()
3691 wait_intr_data.cq_handle = args->in.cq_counters_handle; in hl_interrupt_wait_ioctl()
3692 wait_intr_data.cq_offset = args->in.cq_counters_offset; in hl_interrupt_wait_ioctl()
3693 wait_intr_data.ts_handle = args->in.timestamp_handle; in hl_interrupt_wait_ioctl()
3694 wait_intr_data.ts_offset = args->in.timestamp_offset; in hl_interrupt_wait_ioctl()
3695 wait_intr_data.target_value = args->in.target; in hl_interrupt_wait_ioctl()
3696 wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us; in hl_interrupt_wait_ioctl()
3698 if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) { in hl_interrupt_wait_ioctl()
3701 * issues while handling the flow of re-use of the same offset. in hl_interrupt_wait_ioctl()
3703 * re-use flow might request to move ts node to another interrupt list, in hl_interrupt_wait_ioctl()
3706 mutex_lock(&hpriv->ctx->ts_reg_lock); in hl_interrupt_wait_ioctl()
3708 rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data, in hl_interrupt_wait_ioctl()
3711 mutex_unlock(&hpriv->ctx->ts_reg_lock); in hl_interrupt_wait_ioctl()
3713 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data, in hl_interrupt_wait_ioctl()
3716 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, in hl_interrupt_wait_ioctl()
3717 args->in.interrupt_timeout_us, args->in.addr, in hl_interrupt_wait_ioctl()
3718 args->in.target, interrupt, &status, in hl_interrupt_wait_ioctl()
3725 memset(args, 0, sizeof(*args)); in hl_interrupt_wait_ioctl()
3726 args->out.status = status; in hl_interrupt_wait_ioctl()
3729 args->out.timestamp_nsec = timestamp; in hl_interrupt_wait_ioctl()
3730 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_interrupt_wait_ioctl()
3733 return 0; in hl_interrupt_wait_ioctl()
3738 struct hl_fpriv *hpriv = file_priv->driver_priv; in hl_wait_ioctl()
3739 struct hl_device *hdev = hpriv->hdev; in hl_wait_ioctl()
3741 u32 flags = args->in.flags; in hl_wait_ioctl()
3747 if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active) in hl_wait_ioctl()
3748 return -EBUSY; in hl_wait_ioctl()