Lines Matching +full:cs +full:- +full:0

1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
13 * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
23 ptr &= ((HL_QUEUE_LENGTH << 1) - 1); in hl_hw_queue_add_ptr()
28 return atomic_read(ci) & ((queue_len << 1) - 1); in queue_ci_get()
33 int delta = (q->pi - queue_ci_get(&q->ci, queue_len)); in queue_free_slots()
35 if (delta >= 0) in queue_free_slots()
36 return (queue_len - delta); in queue_free_slots()
38 return (abs(delta) - queue_len); in queue_free_slots()
41 void hl_hw_queue_update_ci(struct hl_cs *cs) in hl_hw_queue_update_ci() argument
43 struct hl_device *hdev = cs->ctx->hdev; in hl_hw_queue_update_ci()
47 if (hdev->disabled) in hl_hw_queue_update_ci()
50 q = &hdev->kernel_queues[0]; in hl_hw_queue_update_ci()
53 if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW) in hl_hw_queue_update_ci()
58 * 1. All queues of a non completion CS will never get a completion. in hl_hw_queue_update_ci()
61 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { in hl_hw_queue_update_ci()
62 if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT) in hl_hw_queue_update_ci()
63 atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); in hl_hw_queue_update_ci()
68 * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
88 bd = q->kernel_address; in hl_hw_queue_submit_bd()
89 bd += hl_pi_2_offset(q->pi); in hl_hw_queue_submit_bd()
90 bd->ctl = cpu_to_le32(ctl); in hl_hw_queue_submit_bd()
91 bd->len = cpu_to_le32(len); in hl_hw_queue_submit_bd()
92 bd->ptr = cpu_to_le64(ptr); in hl_hw_queue_submit_bd()
94 q->pi = hl_queue_inc_ptr(q->pi); in hl_hw_queue_submit_bd()
95 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); in hl_hw_queue_submit_bd()
99 * ext_queue_sanity_checks - perform some sanity checks on external queue
109 * - Make sure we have enough space in the h/w queue
110 * - Make sure we have enough space in the completion queue
111 * - Reserve space in the completion queue (needs to be reversed if there
121 &hdev->completion_queue[q->cq_id].free_slots_cnt; in ext_queue_sanity_checks()
128 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", in ext_queue_sanity_checks()
129 q->hw_queue_id, num_of_entries); in ext_queue_sanity_checks()
130 return -EAGAIN; in ext_queue_sanity_checks()
136 * Add -1 to counter (decrement) unless counter was already 0 in ext_queue_sanity_checks()
139 * atomic_add_unless will return 0 if counter was already 0 in ext_queue_sanity_checks()
141 if (atomic_add_negative(num_of_entries * -1, free_slots)) { in ext_queue_sanity_checks()
142 dev_dbg(hdev->dev, "No space for %d on CQ %d\n", in ext_queue_sanity_checks()
143 num_of_entries, q->hw_queue_id); in ext_queue_sanity_checks()
145 return -EAGAIN; in ext_queue_sanity_checks()
149 return 0; in ext_queue_sanity_checks()
153 * int_queue_sanity_checks - perform some sanity checks on internal queue
162 * - Make sure we have enough space in the h/w queue
171 if (num_of_entries > q->int_queue_len) { in int_queue_sanity_checks()
172 dev_err(hdev->dev, in int_queue_sanity_checks()
174 q->hw_queue_id, num_of_entries); in int_queue_sanity_checks()
175 return -ENOMEM; in int_queue_sanity_checks()
179 free_slots_cnt = queue_free_slots(q, q->int_queue_len); in int_queue_sanity_checks()
182 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", in int_queue_sanity_checks()
183 q->hw_queue_id, num_of_entries); in int_queue_sanity_checks()
184 return -EAGAIN; in int_queue_sanity_checks()
187 return 0; in int_queue_sanity_checks()
191 * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
197 * more than once per CS for the same queue
209 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", in hw_queue_sanity_checks()
210 q->hw_queue_id, num_of_entries); in hw_queue_sanity_checks()
211 return -EAGAIN; in hw_queue_sanity_checks()
214 return 0; in hw_queue_sanity_checks()
218 * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
231 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; in hl_hw_queue_send_cb_no_cmpl()
232 int rc = 0; in hl_hw_queue_send_cb_no_cmpl()
234 hdev->asic_funcs->hw_queues_lock(hdev); in hl_hw_queue_send_cb_no_cmpl()
236 if (hdev->disabled) { in hl_hw_queue_send_cb_no_cmpl()
237 rc = -EPERM; in hl_hw_queue_send_cb_no_cmpl()
246 if (q->queue_type != QUEUE_TYPE_HW) { in hl_hw_queue_send_cb_no_cmpl()
252 hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); in hl_hw_queue_send_cb_no_cmpl()
255 hdev->asic_funcs->hw_queues_unlock(hdev); in hl_hw_queue_send_cb_no_cmpl()
261 * ext_queue_schedule_job - submit a JOB to an external queue
270 struct hl_device *hdev = job->cs->ctx->hdev; in ext_queue_schedule_job()
271 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; in ext_queue_schedule_job()
284 ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK); in ext_queue_schedule_job()
286 cb = job->patched_cb; in ext_queue_schedule_job()
287 len = job->job_cb_size; in ext_queue_schedule_job()
288 ptr = cb->bus_address; in ext_queue_schedule_job()
290 /* Skip completion flow in case this is a non completion CS */ in ext_queue_schedule_job()
291 if (!cs_needs_completion(job->cs)) in ext_queue_schedule_job()
295 ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) in ext_queue_schedule_job()
307 cq = &hdev->completion_queue[q->cq_id]; in ext_queue_schedule_job()
308 cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); in ext_queue_schedule_job()
310 hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, in ext_queue_schedule_job()
311 job->user_cb_size, in ext_queue_schedule_job()
314 q->msi_vec, in ext_queue_schedule_job()
315 job->contains_dma_pkt); in ext_queue_schedule_job()
317 q->shadow_queue[hl_pi_2_offset(q->pi)] = job; in ext_queue_schedule_job()
319 cq->pi = hl_cq_inc_ptr(cq->pi); in ext_queue_schedule_job()
326 * int_queue_schedule_job - submit a JOB to an internal queue
335 struct hl_device *hdev = job->cs->ctx->hdev; in int_queue_schedule_job()
336 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; in int_queue_schedule_job()
340 bd.ctl = 0; in int_queue_schedule_job()
341 bd.len = cpu_to_le32(job->job_cb_size); in int_queue_schedule_job()
343 if (job->is_kernel_allocated_cb) in int_queue_schedule_job()
347 bd.ptr = cpu_to_le64(job->user_cb->bus_address); in int_queue_schedule_job()
349 bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); in int_queue_schedule_job()
351 pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); in int_queue_schedule_job()
353 q->pi++; in int_queue_schedule_job()
354 q->pi &= ((q->int_queue_len << 1) - 1); in int_queue_schedule_job()
356 hdev->asic_funcs->pqe_write(hdev, pi, &bd); in int_queue_schedule_job()
358 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); in int_queue_schedule_job()
362 * hw_queue_schedule_job - submit a JOB to a H/W queue
371 struct hl_device *hdev = job->cs->ctx->hdev; in hw_queue_schedule_job()
372 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; in hw_queue_schedule_job()
382 offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1); in hw_queue_schedule_job()
384 ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); in hw_queue_schedule_job()
386 len = job->job_cb_size; in hw_queue_schedule_job()
394 if (job->patched_cb) in hw_queue_schedule_job()
395 ptr = job->patched_cb->bus_address; in hw_queue_schedule_job()
396 else if (job->is_kernel_allocated_cb) in hw_queue_schedule_job()
397 ptr = job->user_cb->bus_address; in hw_queue_schedule_job()
399 ptr = (u64) (uintptr_t) job->user_cb; in hw_queue_schedule_job()
410 int rc = 0; in init_signal_cs()
412 q_idx = job->hw_queue_id; in init_signal_cs()
413 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in init_signal_cs()
414 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in init_signal_cs()
416 cs_cmpl->hw_sob = hw_sob; in init_signal_cs()
417 cs_cmpl->sob_val = prop->next_sob_val; in init_signal_cs()
419 dev_dbg(hdev->dev, in init_signal_cs()
421 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx, in init_signal_cs()
422 cs_cmpl->cs_seq); in init_signal_cs()
427 hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, in init_signal_cs()
428 cs_cmpl->hw_sob->sob_id, 0, true); in init_signal_cs()
433 job->cs->sob_addr_offset = hw_sob->sob_addr; in init_signal_cs()
434 job->cs->initial_sob_count = prop->next_sob_val - 1; in init_signal_cs()
440 struct hl_cs *cs, struct hl_cs_job *job, in hl_hw_queue_encaps_sig_set_sob_info() argument
443 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; in hl_hw_queue_encaps_sig_set_sob_info()
444 u32 offset = 0; in hl_hw_queue_encaps_sig_set_sob_info()
446 cs_cmpl->hw_sob = handle->hw_sob; in hl_hw_queue_encaps_sig_set_sob_info()
454 * if user set wait offset to 0, then treat it as legacy wait cs, in hl_hw_queue_encaps_sig_set_sob_info()
457 if (job->encaps_sig_wait_offset) in hl_hw_queue_encaps_sig_set_sob_info()
458 offset = job->encaps_sig_wait_offset - 1; in hl_hw_queue_encaps_sig_set_sob_info()
460 cs_cmpl->sob_val = handle->pre_sob_val + offset; in hl_hw_queue_encaps_sig_set_sob_info()
463 static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, in init_wait_cs() argument
471 q_idx = job->hw_queue_id; in init_wait_cs()
472 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in init_wait_cs()
474 signal_cs_cmpl = container_of(cs->signal_fence, in init_wait_cs()
478 if (cs->encaps_signals) { in init_wait_cs()
483 hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl); in init_wait_cs()
485 …dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, of… in init_wait_cs()
486 cs->encaps_sig_hdl->q_idx, in init_wait_cs()
487 cs->encaps_sig_hdl->cs_seq, in init_wait_cs()
488 cs_cmpl->sob_val, in init_wait_cs()
489 job->encaps_sig_wait_offset); in init_wait_cs()
491 /* Copy the SOB id and value of the signal CS */ in init_wait_cs()
492 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; in init_wait_cs()
493 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; in init_wait_cs()
496 /* check again if the signal cs already completed. in init_wait_cs()
497 * if yes then don't send any wait cs since the hw_sob in init_wait_cs()
500 * while wait cs is not submitted. in init_wait_cs()
508 spin_lock(&signal_cs_cmpl->lock); in init_wait_cs()
510 if (completion_done(&cs->signal_fence->completion)) { in init_wait_cs()
511 spin_unlock(&signal_cs_cmpl->lock); in init_wait_cs()
512 return -EINVAL; in init_wait_cs()
515 kref_get(&cs_cmpl->hw_sob->kref); in init_wait_cs()
517 spin_unlock(&signal_cs_cmpl->lock); in init_wait_cs()
519 dev_dbg(hdev->dev, in init_wait_cs()
520 "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n", in init_wait_cs()
521 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, in init_wait_cs()
522 prop->base_mon_id, q_idx, cs->sequence); in init_wait_cs()
524 wait_prop.data = (void *) job->patched_cb; in init_wait_cs()
525 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; in init_wait_cs()
526 wait_prop.sob_mask = 0x1; in init_wait_cs()
527 wait_prop.sob_val = cs_cmpl->sob_val; in init_wait_cs()
528 wait_prop.mon_id = prop->base_mon_id; in init_wait_cs()
530 wait_prop.size = 0; in init_wait_cs()
532 hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop); in init_wait_cs()
535 hl_fence_put(cs->signal_fence); in init_wait_cs()
536 cs->signal_fence = NULL; in init_wait_cs()
538 return 0; in init_wait_cs()
542 * init_signal_wait_cs - initialize a signal/wait CS
543 * @cs: pointer to the signal/wait CS
547 static int init_signal_wait_cs(struct hl_cs *cs) in init_signal_wait_cs() argument
549 struct hl_ctx *ctx = cs->ctx; in init_signal_wait_cs()
550 struct hl_device *hdev = ctx->hdev; in init_signal_wait_cs()
553 container_of(cs->fence, struct hl_cs_compl, base_fence); in init_signal_wait_cs()
554 int rc = 0; in init_signal_wait_cs()
556 /* There is only one job in a signal/wait CS */ in init_signal_wait_cs()
557 job = list_first_entry(&cs->job_list, struct hl_cs_job, in init_signal_wait_cs()
560 if (cs->type & CS_TYPE_SIGNAL) in init_signal_wait_cs()
562 else if (cs->type & CS_TYPE_WAIT) in init_signal_wait_cs()
563 rc = init_wait_cs(hdev, cs, job, cs_cmpl); in init_signal_wait_cs()
569 (struct hl_device *hdev, struct hl_cs *cs) in encaps_sig_first_staged_cs_handler() argument
572 container_of(cs->fence, in encaps_sig_first_staged_cs_handler()
576 int rc = 0; in encaps_sig_first_staged_cs_handler()
578 mgr = &cs->ctx->sig_mgr; in encaps_sig_first_staged_cs_handler()
580 spin_lock(&mgr->lock); in encaps_sig_first_staged_cs_handler()
581 encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id); in encaps_sig_first_staged_cs_handler()
584 * Set handler CS sequence, in encaps_sig_first_staged_cs_handler()
585 * the CS which contains the encapsulated signals. in encaps_sig_first_staged_cs_handler()
587 encaps_sig_hdl->cs_seq = cs->sequence; in encaps_sig_first_staged_cs_handler()
592 cs_cmpl->encaps_signals = true; in encaps_sig_first_staged_cs_handler()
593 cs_cmpl->encaps_sig_hdl = encaps_sig_hdl; in encaps_sig_first_staged_cs_handler()
599 cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob; in encaps_sig_first_staged_cs_handler()
600 cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val + in encaps_sig_first_staged_cs_handler()
601 encaps_sig_hdl->count; in encaps_sig_first_staged_cs_handler()
603 …dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob… in encaps_sig_first_staged_cs_handler()
604 cs->sequence, encaps_sig_hdl->id, in encaps_sig_first_staged_cs_handler()
605 encaps_sig_hdl->count, in encaps_sig_first_staged_cs_handler()
606 encaps_sig_hdl->q_idx, in encaps_sig_first_staged_cs_handler()
607 cs_cmpl->hw_sob->sob_id, in encaps_sig_first_staged_cs_handler()
608 cs_cmpl->sob_val); in encaps_sig_first_staged_cs_handler()
611 dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n", in encaps_sig_first_staged_cs_handler()
612 cs->encaps_sig_hdl_id); in encaps_sig_first_staged_cs_handler()
613 rc = -EINVAL; in encaps_sig_first_staged_cs_handler()
616 spin_unlock(&mgr->lock); in encaps_sig_first_staged_cs_handler()
622 * hl_hw_queue_schedule_cs - schedule a command submission
623 * @cs: pointer to the CS
625 int hl_hw_queue_schedule_cs(struct hl_cs *cs) in hl_hw_queue_schedule_cs() argument
629 struct hl_ctx *ctx = cs->ctx; in hl_hw_queue_schedule_cs()
630 struct hl_device *hdev = ctx->hdev; in hl_hw_queue_schedule_cs()
633 int rc = 0, i, cq_cnt; in hl_hw_queue_schedule_cs()
637 cntr = &hdev->aggregated_cs_counters; in hl_hw_queue_schedule_cs()
639 hdev->asic_funcs->hw_queues_lock(hdev); in hl_hw_queue_schedule_cs()
642 atomic64_inc(&cntr->device_in_reset_drop_cnt); in hl_hw_queue_schedule_cs()
643 atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt); in hl_hw_queue_schedule_cs()
644 dev_err(hdev->dev, in hl_hw_queue_schedule_cs()
645 "device is %s, CS rejected!\n", hdev->status[status]); in hl_hw_queue_schedule_cs()
646 rc = -EPERM; in hl_hw_queue_schedule_cs()
650 max_queues = hdev->asic_prop.max_queues; in hl_hw_queue_schedule_cs()
652 q = &hdev->kernel_queues[0]; in hl_hw_queue_schedule_cs()
653 for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) { in hl_hw_queue_schedule_cs()
654 if (cs->jobs_in_queue_cnt[i]) { in hl_hw_queue_schedule_cs()
655 switch (q->queue_type) { in hl_hw_queue_schedule_cs()
658 cs->jobs_in_queue_cnt[i], in hl_hw_queue_schedule_cs()
659 cs_needs_completion(cs) ? in hl_hw_queue_schedule_cs()
664 cs->jobs_in_queue_cnt[i]); in hl_hw_queue_schedule_cs()
668 cs->jobs_in_queue_cnt[i]); in hl_hw_queue_schedule_cs()
671 dev_err(hdev->dev, "Queue type %d is invalid\n", in hl_hw_queue_schedule_cs()
672 q->queue_type); in hl_hw_queue_schedule_cs()
673 rc = -EINVAL; in hl_hw_queue_schedule_cs()
679 &ctx->cs_counters.queue_full_drop_cnt); in hl_hw_queue_schedule_cs()
680 atomic64_inc(&cntr->queue_full_drop_cnt); in hl_hw_queue_schedule_cs()
684 if (q->queue_type == QUEUE_TYPE_EXT) in hl_hw_queue_schedule_cs()
689 if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) { in hl_hw_queue_schedule_cs()
690 rc = init_signal_wait_cs(cs); in hl_hw_queue_schedule_cs()
693 } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) { in hl_hw_queue_schedule_cs()
694 rc = hdev->asic_funcs->collective_wait_init_cs(cs); in hl_hw_queue_schedule_cs()
699 rc = hdev->asic_funcs->pre_schedule_cs(cs); in hl_hw_queue_schedule_cs()
701 dev_err(hdev->dev, in hl_hw_queue_schedule_cs()
702 "Failed in pre-submission operations of CS %d.%llu\n", in hl_hw_queue_schedule_cs()
703 ctx->asid, cs->sequence); in hl_hw_queue_schedule_cs()
707 hdev->shadow_cs_queue[cs->sequence & in hl_hw_queue_schedule_cs()
708 (hdev->asic_prop.max_pending_cs - 1)] = cs; in hl_hw_queue_schedule_cs()
710 if (cs->encaps_signals && cs->staged_first) { in hl_hw_queue_schedule_cs()
711 rc = encaps_sig_first_staged_cs_handler(hdev, cs); in hl_hw_queue_schedule_cs()
716 spin_lock(&hdev->cs_mirror_lock); in hl_hw_queue_schedule_cs()
718 /* Verify staged CS exists and add to the staged list */ in hl_hw_queue_schedule_cs()
719 if (cs->staged_cs && !cs->staged_first) { in hl_hw_queue_schedule_cs()
722 staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); in hl_hw_queue_schedule_cs()
724 dev_err(hdev->dev, in hl_hw_queue_schedule_cs()
726 cs->staged_sequence); in hl_hw_queue_schedule_cs()
727 rc = -EINVAL; in hl_hw_queue_schedule_cs()
732 dev_err(hdev->dev, in hl_hw_queue_schedule_cs()
734 cs->staged_sequence); in hl_hw_queue_schedule_cs()
735 rc = -EINVAL; in hl_hw_queue_schedule_cs()
739 list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node); in hl_hw_queue_schedule_cs()
741 /* update stream map of the first CS */ in hl_hw_queue_schedule_cs()
742 if (hdev->supports_wait_for_multi_cs) in hl_hw_queue_schedule_cs()
743 staged_cs->fence->stream_master_qid_map |= in hl_hw_queue_schedule_cs()
744 cs->fence->stream_master_qid_map; in hl_hw_queue_schedule_cs()
747 list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list); in hl_hw_queue_schedule_cs()
749 /* Queue TDR if the CS is the first entry and if timeout is wanted */ in hl_hw_queue_schedule_cs()
750 first_entry = list_first_entry(&hdev->cs_mirror_list, in hl_hw_queue_schedule_cs()
751 struct hl_cs, mirror_node) == cs; in hl_hw_queue_schedule_cs()
752 if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && in hl_hw_queue_schedule_cs()
753 first_entry && cs_needs_timeout(cs)) { in hl_hw_queue_schedule_cs()
754 cs->tdr_active = true; in hl_hw_queue_schedule_cs()
755 schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies); in hl_hw_queue_schedule_cs()
759 spin_unlock(&hdev->cs_mirror_lock); in hl_hw_queue_schedule_cs()
761 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in hl_hw_queue_schedule_cs()
762 switch (job->queue_type) { in hl_hw_queue_schedule_cs()
776 cs->submitted = true; in hl_hw_queue_schedule_cs()
781 spin_unlock(&hdev->cs_mirror_lock); in hl_hw_queue_schedule_cs()
783 q = &hdev->kernel_queues[0]; in hl_hw_queue_schedule_cs()
784 for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { in hl_hw_queue_schedule_cs()
785 if ((q->queue_type == QUEUE_TYPE_EXT) && in hl_hw_queue_schedule_cs()
786 (cs->jobs_in_queue_cnt[i])) { in hl_hw_queue_schedule_cs()
788 &hdev->completion_queue[i].free_slots_cnt; in hl_hw_queue_schedule_cs()
789 atomic_add(cs->jobs_in_queue_cnt[i], free_slots); in hl_hw_queue_schedule_cs()
790 cq_cnt--; in hl_hw_queue_schedule_cs()
795 hdev->asic_funcs->hw_queues_unlock(hdev); in hl_hw_queue_schedule_cs()
801 * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
808 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; in hl_hw_queue_inc_ci_kernel()
810 atomic_inc(&q->ci); in hl_hw_queue_inc_ci_kernel()
820 p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address); in ext_and_cpu_queue_init()
822 p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address, in ext_and_cpu_queue_init()
825 return -ENOMEM; in ext_and_cpu_queue_init()
827 q->kernel_address = p; in ext_and_cpu_queue_init()
829 q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL); in ext_and_cpu_queue_init()
830 if (!q->shadow_queue) { in ext_and_cpu_queue_init()
831 dev_err(hdev->dev, in ext_and_cpu_queue_init()
833 q->hw_queue_id); in ext_and_cpu_queue_init()
834 rc = -ENOMEM; in ext_and_cpu_queue_init()
839 atomic_set(&q->ci, 0); in ext_and_cpu_queue_init()
840 q->pi = 0; in ext_and_cpu_queue_init()
842 return 0; in ext_and_cpu_queue_init()
846 hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address); in ext_and_cpu_queue_init()
848 hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address, in ext_and_cpu_queue_init()
849 q->bus_address); in ext_and_cpu_queue_init()
858 p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id, in int_queue_init()
859 &q->bus_address, &q->int_queue_len); in int_queue_init()
861 dev_err(hdev->dev, in int_queue_init()
863 q->hw_queue_id); in int_queue_init()
864 return -EFAULT; in int_queue_init()
867 q->kernel_address = p; in int_queue_init()
868 q->pi = 0; in int_queue_init()
869 atomic_set(&q->ci, 0); in int_queue_init()
871 return 0; in int_queue_init()
888 p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address, in hw_queue_init()
891 return -ENOMEM; in hw_queue_init()
893 q->kernel_address = p; in hw_queue_init()
896 atomic_set(&q->ci, 0); in hw_queue_init()
897 q->pi = 0; in hw_queue_init()
899 return 0; in hw_queue_init()
905 struct asic_fixed_properties *prop = &hdev->asic_prop; in sync_stream_queue_init()
909 sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in sync_stream_queue_init()
916 if (hdev->kernel_queues[q_idx].collective_mode == in sync_stream_queue_init()
918 reserved_mon_idx = hdev->collective_mon_idx; in sync_stream_queue_init()
921 sync_stream_prop->collective_mstr_mon_id[0] = in sync_stream_queue_init()
922 prop->collective_first_mon + reserved_mon_idx; in sync_stream_queue_init()
925 sync_stream_prop->collective_mstr_mon_id[1] = in sync_stream_queue_init()
926 prop->collective_first_mon + reserved_mon_idx + 1; in sync_stream_queue_init()
928 hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS; in sync_stream_queue_init()
929 } else if (hdev->kernel_queues[q_idx].collective_mode == in sync_stream_queue_init()
931 reserved_mon_idx = hdev->collective_mon_idx++; in sync_stream_queue_init()
934 sync_stream_prop->collective_slave_mon_id = in sync_stream_queue_init()
935 prop->collective_first_mon + reserved_mon_idx; in sync_stream_queue_init()
938 if (!hdev->kernel_queues[q_idx].supports_sync_stream) in sync_stream_queue_init()
941 queue_idx = hdev->sync_stream_queue_idx++; in sync_stream_queue_init()
943 sync_stream_prop->base_sob_id = prop->sync_stream_first_sob + in sync_stream_queue_init()
945 sync_stream_prop->base_mon_id = prop->sync_stream_first_mon + in sync_stream_queue_init()
947 sync_stream_prop->next_sob_val = 1; in sync_stream_queue_init()
948 sync_stream_prop->curr_sob_offset = 0; in sync_stream_queue_init()
950 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { in sync_stream_queue_init()
951 hw_sob = &sync_stream_prop->hw_sob[sob]; in sync_stream_queue_init()
952 hw_sob->hdev = hdev; in sync_stream_queue_init()
953 hw_sob->sob_id = sync_stream_prop->base_sob_id + sob; in sync_stream_queue_init()
954 hw_sob->sob_addr = in sync_stream_queue_init()
955 hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); in sync_stream_queue_init()
956 hw_sob->q_idx = q_idx; in sync_stream_queue_init()
957 kref_init(&hw_sob->kref); in sync_stream_queue_init()
964 &hdev->kernel_queues[q_idx].sync_stream_prop; in sync_stream_queue_reset()
967 * In case we got here due to a stuck CS, the refcnt might be bigger in sync_stream_queue_reset()
970 kref_init(&prop->hw_sob[prop->curr_sob_offset].kref); in sync_stream_queue_reset()
971 prop->curr_sob_offset = 0; in sync_stream_queue_reset()
972 prop->next_sob_val = 1; in sync_stream_queue_reset()
976 * queue_init - main initialization function for H/W queue object
982 * Allocate dma-able memory for the queue and initialize fields
983 * Returns 0 on success
990 q->hw_queue_id = hw_queue_id; in queue_init()
992 switch (q->queue_type) { in queue_init()
1006 q->valid = 0; in queue_init()
1007 return 0; in queue_init()
1009 dev_crit(hdev->dev, "wrong queue type %d during init\n", in queue_init()
1010 q->queue_type); in queue_init()
1011 rc = -EINVAL; in queue_init()
1015 sync_stream_queue_init(hdev, q->hw_queue_id); in queue_init()
1020 q->valid = 1; in queue_init()
1022 return 0; in queue_init()
1026 * hw_queue_fini - destroy queue
1035 if (!q->valid) in queue_fini()
1056 if (q->queue_type == QUEUE_TYPE_INT) in queue_fini()
1059 kfree(q->shadow_queue); in queue_fini()
1061 if (q->queue_type == QUEUE_TYPE_CPU) in queue_fini()
1062 hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address); in queue_fini()
1064 hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address, in queue_fini()
1065 q->bus_address); in queue_fini()
1070 struct asic_fixed_properties *asic = &hdev->asic_prop; in hl_hw_queues_create()
1074 hdev->kernel_queues = kcalloc(asic->max_queues, in hl_hw_queues_create()
1075 sizeof(*hdev->kernel_queues), GFP_KERNEL); in hl_hw_queues_create()
1077 if (!hdev->kernel_queues) { in hl_hw_queues_create()
1078 dev_err(hdev->dev, "Not enough memory for H/W queues\n"); in hl_hw_queues_create()
1079 return -ENOMEM; in hl_hw_queues_create()
1083 for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues; in hl_hw_queues_create()
1084 i < asic->max_queues ; i++, q_ready_cnt++, q++) { in hl_hw_queues_create()
1086 q->queue_type = asic->hw_queues_props[i].type; in hl_hw_queues_create()
1087 q->supports_sync_stream = in hl_hw_queues_create()
1088 asic->hw_queues_props[i].supports_sync_stream; in hl_hw_queues_create()
1089 q->collective_mode = asic->hw_queues_props[i].collective_mode; in hl_hw_queues_create()
1092 dev_err(hdev->dev, in hl_hw_queues_create()
1098 return 0; in hl_hw_queues_create()
1101 for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) in hl_hw_queues_create()
1104 kfree(hdev->kernel_queues); in hl_hw_queues_create()
1112 u32 max_queues = hdev->asic_prop.max_queues; in hl_hw_queues_destroy()
1115 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) in hl_hw_queues_destroy()
1118 kfree(hdev->kernel_queues); in hl_hw_queues_destroy()
1124 u32 max_queues = hdev->asic_prop.max_queues; in hl_hw_queue_reset()
1127 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) { in hl_hw_queue_reset()
1128 if ((!q->valid) || in hl_hw_queue_reset()
1129 ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU))) in hl_hw_queue_reset()
1131 q->pi = 0; in hl_hw_queue_reset()
1132 atomic_set(&q->ci, 0); in hl_hw_queue_reset()
1134 if (q->supports_sync_stream) in hl_hw_queue_reset()
1135 sync_stream_queue_reset(hdev, q->hw_queue_id); in hl_hw_queue_reset()