Lines Matching +full:needs +full:- +full:reset +full:- +full:on +full:- +full:resume
1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2021 HabanaLabs, Ltd.
52 "Do device reset on lockup (0 = no, 1 = yes, default yes)");
113 * get_asic_type - translate device id to asic type
118 * In case of unidentified device, return -1
122 struct pci_dev *pdev = hdev->pdev; in get_asic_type()
125 switch (pdev->device) { in get_asic_type()
136 switch (pdev->revision) { in get_asic_type()
171 * hl_device_open() - open function for habanalabs device.
186 return -ENOMEM; in hl_device_open()
188 hpriv->hdev = hdev; in hl_device_open()
189 mutex_init(&hpriv->notifier_event.lock); in hl_device_open()
190 mutex_init(&hpriv->restore_phase_mutex); in hl_device_open()
191 mutex_init(&hpriv->ctx_lock); in hl_device_open()
192 kref_init(&hpriv->refcount); in hl_device_open()
194 hl_ctx_mgr_init(&hpriv->ctx_mgr); in hl_device_open()
195 hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); in hl_device_open()
197 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); in hl_device_open()
199 mutex_lock(&hdev->fpriv_list_lock); in hl_device_open()
202 dev_dbg_ratelimited(hdev->dev, in hl_device_open()
204 dev_name(hdev->dev), hdev->status[status]); in hl_device_open()
208 rc = -EAGAIN; in hl_device_open()
210 rc = -EPERM; in hl_device_open()
215 if (hdev->is_in_dram_scrub) { in hl_device_open()
216 dev_dbg_ratelimited(hdev->dev, in hl_device_open()
218 dev_name(hdev->dev)); in hl_device_open()
219 rc = -EAGAIN; in hl_device_open()
223 if (hdev->compute_ctx_in_release) { in hl_device_open()
224 dev_dbg_ratelimited(hdev->dev, in hl_device_open()
226 dev_name(hdev->dev)); in hl_device_open()
227 rc = -EAGAIN; in hl_device_open()
231 if (hdev->is_compute_ctx_active) { in hl_device_open()
232 dev_dbg_ratelimited(hdev->dev, in hl_device_open()
233 "Can't open %s because another user is working on it\n", in hl_device_open()
234 dev_name(hdev->dev)); in hl_device_open()
235 rc = -EBUSY; in hl_device_open()
241 dev_err(hdev->dev, "Failed to create context %d\n", rc); in hl_device_open()
245 list_add(&hpriv->dev_node, &hdev->fpriv_list); in hl_device_open()
246 mutex_unlock(&hdev->fpriv_list_lock); in hl_device_open()
248 hdev->asic_funcs->send_device_activity(hdev, true); in hl_device_open()
252 hl_enable_err_info_capture(&hdev->captured_err_info); in hl_device_open()
254 hdev->open_counter++; in hl_device_open()
255 hdev->last_successful_open_jif = jiffies; in hl_device_open()
256 hdev->last_successful_open_ktime = ktime_get(); in hl_device_open()
258 file_priv->driver_priv = hpriv; in hl_device_open()
259 hpriv->file_priv = file_priv; in hl_device_open()
264 mutex_unlock(&hdev->fpriv_list_lock); in hl_device_open()
265 hl_mem_mgr_fini(&hpriv->mem_mgr, NULL); in hl_device_open()
266 hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); in hl_device_open()
267 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); in hl_device_open()
268 mutex_destroy(&hpriv->ctx_lock); in hl_device_open()
269 mutex_destroy(&hpriv->restore_phase_mutex); in hl_device_open()
270 mutex_destroy(&hpriv->notifier_event.lock); in hl_device_open()
271 put_pid(hpriv->taskpid); in hl_device_open()
291 return -ENXIO; in hl_device_open_ctrl()
296 return -ENOMEM; in hl_device_open_ctrl()
301 hpriv->hdev = hdev; in hl_device_open_ctrl()
302 filp->private_data = hpriv; in hl_device_open_ctrl()
306 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); in hl_device_open_ctrl()
308 mutex_lock(&hdev->fpriv_ctrl_list_lock); in hl_device_open_ctrl()
311 dev_dbg_ratelimited(hdev->dev_ctrl, in hl_device_open_ctrl()
313 dev_name(hdev->dev_ctrl)); in hl_device_open_ctrl()
314 rc = -EPERM; in hl_device_open_ctrl()
318 list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list); in hl_device_open_ctrl()
319 mutex_unlock(&hdev->fpriv_ctrl_list_lock); in hl_device_open_ctrl()
324 mutex_unlock(&hdev->fpriv_ctrl_list_lock); in hl_device_open_ctrl()
325 filp->private_data = NULL; in hl_device_open_ctrl()
326 put_pid(hpriv->taskpid); in hl_device_open_ctrl()
335 hdev->nic_ports_mask = 0; in set_driver_behavior_per_device()
336 hdev->fw_components = FW_TYPE_ALL_TYPES; in set_driver_behavior_per_device()
337 hdev->cpu_queues_enable = 1; in set_driver_behavior_per_device()
338 hdev->pldm = 0; in set_driver_behavior_per_device()
339 hdev->hard_reset_on_fw_events = 1; in set_driver_behavior_per_device()
340 hdev->bmc_enable = 1; in set_driver_behavior_per_device()
341 hdev->reset_on_preboot_fail = 1; in set_driver_behavior_per_device()
342 hdev->heartbeat = 1; in set_driver_behavior_per_device()
347 hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); in copy_kernel_module_params_to_device()
349 hdev->major = hl_major; in copy_kernel_module_params_to_device()
350 hdev->memory_scrub = memory_scrub; in copy_kernel_module_params_to_device()
351 hdev->reset_on_lockup = reset_on_lockup; in copy_kernel_module_params_to_device()
352 hdev->boot_error_status_mask = boot_error_status_mask; in copy_kernel_module_params_to_device()
357 switch (hdev->asic_type) { in fixup_device_params_per_asic()
364 hdev->timeout_jiffies = secs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED); in fixup_device_params_per_asic()
366 hdev->reset_upon_device_release = 0; in fixup_device_params_per_asic()
370 hdev->reset_upon_device_release = 0; in fixup_device_params_per_asic()
374 hdev->reset_upon_device_release = 1; in fixup_device_params_per_asic()
385 hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; in fixup_device_params()
386 hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; in fixup_device_params()
389 hdev->timeout_jiffies = secs_to_jiffies(tmp_timeout); in fixup_device_params()
391 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; in fixup_device_params()
393 hdev->stop_on_err = true; in fixup_device_params()
394 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; in fixup_device_params()
395 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; in fixup_device_params()
398 hdev->disabled = true; in fixup_device_params()
400 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) && in fixup_device_params()
401 (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) { in fixup_device_params()
403 return -EINVAL; in fixup_device_params()
407 if (!hdev->cpu_queues_enable) in fixup_device_params()
408 hdev->heartbeat = 0; in fixup_device_params()
423 if (id == -ENOSPC) in allocate_device_id()
425 return -EBUSY; in allocate_device_id()
428 hdev->id = id; in allocate_device_id()
434 hdev->cdev_idx = hdev->id; in allocate_device_id()
440 * create_hdev - create habanalabs device instance
456 hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm); in create_hdev()
460 hdev->dev = hdev->drm.dev; in create_hdev()
463 hdev->pdev = pdev; in create_hdev()
466 strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); in create_hdev()
467 strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); in create_hdev()
468 strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); in create_hdev()
469 strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); in create_hdev()
470 strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], in create_hdev()
472 strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], in create_hdev()
473 "in reset after device release", HL_STR_MAX); in create_hdev()
479 hdev->asic_type = get_asic_type(hdev); in create_hdev()
480 if (hdev->asic_type == ASIC_INVALID) { in create_hdev()
481 dev_err(&pdev->dev, "Unsupported ASIC\n"); in create_hdev()
482 rc = -ENODEV; in create_hdev()
505 * destroy_hdev - destroy habanalabs device instance
514 idr_remove(&hl_devs_idr, hdev->id); in destroy_hdev()
537 pr_debug("Going to resume PCI device\n"); in hl_pmops_resume()
540 pr_err("device pointer is NULL in resume\n"); in hl_pmops_resume()
548 * hl_pci_probe - probe PCI habanalabs devices
562 dev_info(&pdev->dev, HL_NAME in hl_pci_probe()
564 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); in hl_pci_probe()
574 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); in hl_pci_probe()
575 rc = -ENODEV; in hl_pci_probe()
589 * hl_pci_remove - remove PCI habanalabs devices
609 * hl_pci_err_detected - a PCI bus error detected on this device
614 * Called by the PCI subsystem whenever a non-correctable
625 dev_warn(hdev->dev, "PCI normal state error detected\n"); in hl_pci_err_detected()
629 dev_warn(hdev->dev, "PCI frozen state error detected\n"); in hl_pci_err_detected()
634 dev_warn(hdev->dev, "PCI failure state error detected\n"); in hl_pci_err_detected()
642 hdev->asic_funcs->halt_engines(hdev, true, false); in hl_pci_err_detected()
648 * hl_pci_err_resume - resume after a PCI slot reset
657 dev_warn(hdev->dev, "Resuming device after PCI slot reset\n"); in hl_pci_err_resume()
662 * hl_pci_err_slot_reset - a PCI slot reset has just happened
666 * Determine if the driver can recover from the PCI slot reset
672 dev_warn(hdev->dev, "PCI slot reset detected\n"); in hl_pci_err_slot_reset()
685 hdev->disabled = true; in hl_pci_reset_prepare()
698 * Schedule a thread to trigger hard reset. in hl_pci_reset_done()
701 * and resets the device. FW will go back preboot stage, so driver needs to perform in hl_pci_reset_done()
702 * hard reset in order to load FW fit again. in hl_pci_reset_done()
711 .resume = hl_pmops_resume,
717 .resume = hl_pci_err_resume,
737 * hl_init - Initialize the habanalabs kernel driver
770 * hl_exit - Release all resources of the habanalabs kernel driver