Lines Matching +full:gfx +full:- +full:mem
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
59 /* Ordered, single-threaded workqueue for restoring evicted
62 * their BOs and result in a live-lock situation where processes
115 pdd = workarea->pdd; in kfd_sdma_activity_worker()
118 dqm = pdd->dev->dqm; in kfd_sdma_activity_worker()
119 qpd = &pdd->qpd; in kfd_sdma_activity_worker()
126 * we loop over all SDMA queues and get their counts from user-space. in kfd_sdma_activity_worker()
132 * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list, in kfd_sdma_activity_worker()
138 * from the qpd->queues_list. in kfd_sdma_activity_worker()
139 * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted. in kfd_sdma_activity_worker()
151 list_for_each_entry(q, &qpd->queues_list, list) { in kfd_sdma_activity_worker()
152 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) && in kfd_sdma_activity_worker()
153 (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI)) in kfd_sdma_activity_worker()
162 INIT_LIST_HEAD(&sdma_q->list); in kfd_sdma_activity_worker()
163 sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr; in kfd_sdma_activity_worker()
164 sdma_q->queue_id = q->properties.queue_id; in kfd_sdma_activity_worker()
165 list_add_tail(&sdma_q->list, &sdma_q_list.list); in kfd_sdma_activity_worker()
170 * qpd->queues_list. Return the past activity count as the total sdma in kfd_sdma_activity_worker()
174 workarea->sdma_activity_counter = pdd->sdma_past_activity_counter; in kfd_sdma_activity_worker()
184 mm = get_task_mm(pdd->process->lead_thread); in kfd_sdma_activity_worker()
192 ret = read_sdma_queue_counter(sdma_q->rptr, &val); in kfd_sdma_activity_worker()
195 sdma_q->queue_id); in kfd_sdma_activity_worker()
197 sdma_q->sdma_val = val; in kfd_sdma_activity_worker()
198 workarea->sdma_activity_counter += val; in kfd_sdma_activity_worker()
211 workarea->sdma_activity_counter += pdd->sdma_past_activity_counter; in kfd_sdma_activity_worker()
213 list_for_each_entry(q, &qpd->queues_list, list) { in kfd_sdma_activity_worker()
217 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) && in kfd_sdma_activity_worker()
218 (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI)) in kfd_sdma_activity_worker()
222 if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) && in kfd_sdma_activity_worker()
223 (sdma_q->queue_id == q->properties.queue_id)) { in kfd_sdma_activity_worker()
224 list_del(&sdma_q->list); in kfd_sdma_activity_worker()
235 * from qpd->queues_list during SDMA usage read. Subtract the SDMA in kfd_sdma_activity_worker()
239 workarea->sdma_activity_counter -= sdma_q->sdma_val; in kfd_sdma_activity_worker()
240 list_del(&sdma_q->list); in kfd_sdma_activity_worker()
248 list_del(&sdma_q->list); in kfd_sdma_activity_worker()
254 * kfd_get_cu_occupancy - Collect number of waves in-flight on this device
275 dev = pdd->dev; in kfd_get_cu_occupancy()
276 if (dev->kfd2kgd->get_cu_occupancy == NULL) in kfd_get_cu_occupancy()
277 return -EINVAL; in kfd_get_cu_occupancy()
280 proc = pdd->process; in kfd_get_cu_occupancy()
281 if (pdd->qpd.queue_count == 0) { in kfd_get_cu_occupancy()
282 pr_debug("Gpu-Id: %d has no active queues for process %d\n", in kfd_get_cu_occupancy()
283 dev->id, proc->pasid); in kfd_get_cu_occupancy()
290 dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt, in kfd_get_cu_occupancy()
294 cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; in kfd_get_cu_occupancy()
301 if (strcmp(attr->name, "pasid") == 0) { in kfd_procfs_show()
305 return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid); in kfd_procfs_show()
306 } else if (strncmp(attr->name, "vram_", 5) == 0) { in kfd_procfs_show()
309 return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); in kfd_procfs_show()
310 } else if (strncmp(attr->name, "sdma_", 5) == 0) { in kfd_procfs_show()
330 return -EINVAL; in kfd_procfs_show()
359 &kfd_device->kobj, "proc"); in kfd_procfs_init()
381 if (!strcmp(attr->name, "size")) in kfd_procfs_queue_show()
383 q->properties.queue_size); in kfd_procfs_queue_show()
384 else if (!strcmp(attr->name, "type")) in kfd_procfs_queue_show()
385 return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type); in kfd_procfs_queue_show()
386 else if (!strcmp(attr->name, "gpuid")) in kfd_procfs_queue_show()
387 return snprintf(buffer, PAGE_SIZE, "%u", q->device->id); in kfd_procfs_queue_show()
397 if (strcmp(attr->name, "evicted_ms") == 0) { in kfd_procfs_stats_show()
403 evict_jiffies = atomic64_read(&pdd->evict_duration_counter); in kfd_procfs_stats_show()
411 } else if (strcmp(attr->name, "cu_occupancy") == 0) { in kfd_procfs_stats_show()
425 if (!strcmp(attr->name, "faults")) { in kfd_sysfs_counters_show()
428 return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults)); in kfd_sysfs_counters_show()
430 if (!strcmp(attr->name, "page_in")) { in kfd_sysfs_counters_show()
433 return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in)); in kfd_sysfs_counters_show()
435 if (!strcmp(attr->name, "page_out")) { in kfd_sysfs_counters_show()
438 return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out)); in kfd_sysfs_counters_show()
498 if (!q || !q->process) in kfd_procfs_add_queue()
499 return -EINVAL; in kfd_procfs_add_queue()
500 proc = q->process; in kfd_procfs_add_queue()
503 if (!proc->kobj_queues) in kfd_procfs_add_queue()
504 return -EFAULT; in kfd_procfs_add_queue()
505 ret = kobject_init_and_add(&q->kobj, &procfs_queue_type, in kfd_procfs_add_queue()
506 proc->kobj_queues, "%u", q->properties.queue_id); in kfd_procfs_add_queue()
509 q->properties.queue_id); in kfd_procfs_add_queue()
510 kobject_put(&q->kobj); in kfd_procfs_add_queue()
525 attr->name = name; in kfd_sysfs_create_file()
526 attr->mode = KFD_SYSFS_FILE_MODE; in kfd_sysfs_create_file()
531 pr_warn("Create sysfs %s/%s failed %d", kobj->name, name, ret); in kfd_sysfs_create_file()
540 if (!p || !p->kobj) in kfd_procfs_add_sysfs_stats()
545 * - proc/<pid>/stats_<gpuid>/ in kfd_procfs_add_sysfs_stats()
546 * - proc/<pid>/stats_<gpuid>/evicted_ms in kfd_procfs_add_sysfs_stats()
547 * - proc/<pid>/stats_<gpuid>/cu_occupancy in kfd_procfs_add_sysfs_stats()
549 for (i = 0; i < p->n_pdds; i++) { in kfd_procfs_add_sysfs_stats()
550 struct kfd_process_device *pdd = p->pdds[i]; in kfd_procfs_add_sysfs_stats()
553 "stats_%u", pdd->dev->id); in kfd_procfs_add_sysfs_stats()
554 pdd->kobj_stats = kfd_alloc_struct(pdd->kobj_stats); in kfd_procfs_add_sysfs_stats()
555 if (!pdd->kobj_stats) in kfd_procfs_add_sysfs_stats()
558 ret = kobject_init_and_add(pdd->kobj_stats, in kfd_procfs_add_sysfs_stats()
560 p->kobj, in kfd_procfs_add_sysfs_stats()
566 kobject_put(pdd->kobj_stats); in kfd_procfs_add_sysfs_stats()
567 pdd->kobj_stats = NULL; in kfd_procfs_add_sysfs_stats()
571 kfd_sysfs_create_file(pdd->kobj_stats, &pdd->attr_evict, in kfd_procfs_add_sysfs_stats()
574 if (pdd->dev->kfd2kgd->get_cu_occupancy) in kfd_procfs_add_sysfs_stats()
575 kfd_sysfs_create_file(pdd->kobj_stats, in kfd_procfs_add_sysfs_stats()
576 &pdd->attr_cu_occupancy, in kfd_procfs_add_sysfs_stats()
587 if (!p || !p->kobj) in kfd_procfs_add_sysfs_counters()
592 * - proc/<pid>/counters_<gpuid>/ in kfd_procfs_add_sysfs_counters()
593 * - proc/<pid>/counters_<gpuid>/faults in kfd_procfs_add_sysfs_counters()
594 * - proc/<pid>/counters_<gpuid>/page_in in kfd_procfs_add_sysfs_counters()
595 * - proc/<pid>/counters_<gpuid>/page_out in kfd_procfs_add_sysfs_counters()
597 for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { in kfd_procfs_add_sysfs_counters()
598 struct kfd_process_device *pdd = p->pdds[i]; in kfd_procfs_add_sysfs_counters()
602 "counters_%u", pdd->dev->id); in kfd_procfs_add_sysfs_counters()
608 p->kobj, counters_dir_filename); in kfd_procfs_add_sysfs_counters()
616 pdd->kobj_counters = kobj_counters; in kfd_procfs_add_sysfs_counters()
617 kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults, in kfd_procfs_add_sysfs_counters()
619 kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in, in kfd_procfs_add_sysfs_counters()
621 kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out, in kfd_procfs_add_sysfs_counters()
630 if (!p || !p->kobj) in kfd_procfs_add_sysfs_files()
635 * - proc/<pid>/vram_<gpuid> in kfd_procfs_add_sysfs_files()
636 * - proc/<pid>/sdma_<gpuid> in kfd_procfs_add_sysfs_files()
638 for (i = 0; i < p->n_pdds; i++) { in kfd_procfs_add_sysfs_files()
639 struct kfd_process_device *pdd = p->pdds[i]; in kfd_procfs_add_sysfs_files()
641 snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u", in kfd_procfs_add_sysfs_files()
642 pdd->dev->id); in kfd_procfs_add_sysfs_files()
643 kfd_sysfs_create_file(p->kobj, &pdd->attr_vram, in kfd_procfs_add_sysfs_files()
644 pdd->vram_filename); in kfd_procfs_add_sysfs_files()
646 snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u", in kfd_procfs_add_sysfs_files()
647 pdd->dev->id); in kfd_procfs_add_sysfs_files()
648 kfd_sysfs_create_file(p->kobj, &pdd->attr_sdma, in kfd_procfs_add_sysfs_files()
649 pdd->sdma_filename); in kfd_procfs_add_sysfs_files()
658 kobject_del(&q->kobj); in kfd_procfs_del_queue()
659 kobject_put(&q->kobj); in kfd_procfs_del_queue()
672 return -ENOMEM; in kfd_process_create_wq()
690 static void kfd_process_free_gpuvm(struct kgd_mem *mem, in kfd_process_free_gpuvm() argument
693 struct kfd_node *dev = pdd->dev; in kfd_process_free_gpuvm()
696 amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); in kfd_process_free_gpuvm()
700 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv); in kfd_process_free_gpuvm()
701 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv, in kfd_process_free_gpuvm()
705 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
709 * not need to take p->mutex.
713 uint32_t flags, struct kgd_mem **mem, void **kptr) in kfd_process_alloc_gpuvm() argument
715 struct kfd_node *kdev = pdd->dev; in kfd_process_alloc_gpuvm()
718 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size, in kfd_process_alloc_gpuvm()
719 pdd->drm_priv, mem, NULL, in kfd_process_alloc_gpuvm()
724 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem, in kfd_process_alloc_gpuvm()
725 pdd->drm_priv); in kfd_process_alloc_gpuvm()
729 err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true); in kfd_process_alloc_gpuvm()
737 (struct kgd_mem *)*mem, kptr, NULL); in kfd_process_alloc_gpuvm()
747 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv); in kfd_process_alloc_gpuvm()
750 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv, in kfd_process_alloc_gpuvm()
753 *mem = NULL; in kfd_process_alloc_gpuvm()
758 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
766 struct qcm_process_device *qpd = &pdd->qpd; in kfd_process_device_reserve_ib_mem()
771 struct kgd_mem *mem; in kfd_process_device_reserve_ib_mem() local
775 if (qpd->ib_kaddr || !qpd->ib_base) in kfd_process_device_reserve_ib_mem()
779 ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, in kfd_process_device_reserve_ib_mem()
780 &mem, &kaddr); in kfd_process_device_reserve_ib_mem()
784 qpd->ib_mem = mem; in kfd_process_device_reserve_ib_mem()
785 qpd->ib_kaddr = kaddr; in kfd_process_device_reserve_ib_mem()
792 struct qcm_process_device *qpd = &pdd->qpd; in kfd_process_device_destroy_ib_mem()
794 if (!qpd->ib_kaddr || !qpd->ib_base) in kfd_process_device_destroy_ib_mem()
797 kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr); in kfd_process_device_destroy_ib_mem()
805 if (!(thread->mm && mmget_not_zero(thread->mm))) in kfd_create_process()
806 return ERR_PTR(-EINVAL); in kfd_create_process()
809 if (thread->group_leader->mm != thread->mm) { in kfd_create_process()
810 mmput(thread->mm); in kfd_create_process()
811 return ERR_PTR(-EINVAL); in kfd_create_process()
824 return ERR_PTR(-EINVAL); in kfd_create_process()
839 process->kobj = kfd_alloc_struct(process->kobj); in kfd_create_process()
840 if (!process->kobj) { in kfd_create_process()
844 ret = kobject_init_and_add(process->kobj, &procfs_type, in kfd_create_process()
846 (int)process->lead_thread->pid); in kfd_create_process()
849 kobject_put(process->kobj); in kfd_create_process()
853 kfd_sysfs_create_file(process->kobj, &process->attr_pasid, in kfd_create_process()
856 process->kobj_queues = kobject_create_and_add("queues", in kfd_create_process()
857 process->kobj); in kfd_create_process()
858 if (!process->kobj_queues) in kfd_create_process()
865 init_waitqueue_head(&process->wait_irq_drain); in kfd_create_process()
869 kref_get(&process->ref); in kfd_create_process()
871 mmput(thread->mm); in kfd_create_process()
880 if (!thread->mm) in kfd_get_process()
881 return ERR_PTR(-EINVAL); in kfd_get_process()
884 if (thread->group_leader->mm != thread->mm) in kfd_get_process()
885 return ERR_PTR(-EINVAL); in kfd_get_process()
889 return ERR_PTR(-EINVAL); in kfd_get_process()
900 if (process->mm == mm) in find_process_by_mm()
913 p = find_process_by_mm(thread->mm); in find_process()
915 kref_get(&p->ref); in find_process()
923 kref_put(&p->ref, kfd_process_ref_release); in kfd_unref_process()
926 /* This increments the process->ref counter. */
949 struct kfd_process *p = pdd->process; in kfd_process_device_free_bos()
950 void *mem; in kfd_process_device_free_bos() local
958 idr_for_each_entry(&pdd->alloc_idr, mem, id) { in kfd_process_device_free_bos()
960 for (i = 0; i < p->n_pdds; i++) { in kfd_process_device_free_bos()
961 struct kfd_process_device *peer_pdd = p->pdds[i]; in kfd_process_device_free_bos()
963 if (!peer_pdd->drm_priv) in kfd_process_device_free_bos()
966 peer_pdd->dev->adev, mem, peer_pdd->drm_priv); in kfd_process_device_free_bos()
969 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem, in kfd_process_device_free_bos()
970 pdd->drm_priv, NULL); in kfd_process_device_free_bos()
983 void *mem; in kfd_process_kunmap_signal_bo() local
985 kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle)); in kfd_process_kunmap_signal_bo()
989 mutex_lock(&p->mutex); in kfd_process_kunmap_signal_bo()
995 mem = kfd_process_device_translate_handle( in kfd_process_kunmap_signal_bo()
996 pdd, GET_IDR_HANDLE(p->signal_handle)); in kfd_process_kunmap_signal_bo()
997 if (!mem) in kfd_process_kunmap_signal_bo()
1000 amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); in kfd_process_kunmap_signal_bo()
1003 mutex_unlock(&p->mutex); in kfd_process_kunmap_signal_bo()
1010 for (i = 0; i < p->n_pdds; i++) in kfd_process_free_outstanding_kfd_bos()
1011 kfd_process_device_free_bos(p->pdds[i]); in kfd_process_free_outstanding_kfd_bos()
1018 for (i = 0; i < p->n_pdds; i++) { in kfd_process_destroy_pdds()
1019 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_destroy_pdds()
1022 pdd->dev->id, p->pasid); in kfd_process_destroy_pdds()
1027 if (pdd->drm_file) { in kfd_process_destroy_pdds()
1029 pdd->dev->adev, pdd->drm_priv); in kfd_process_destroy_pdds()
1030 fput(pdd->drm_file); in kfd_process_destroy_pdds()
1033 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) in kfd_process_destroy_pdds()
1034 free_pages((unsigned long)pdd->qpd.cwsr_kaddr, in kfd_process_destroy_pdds()
1037 idr_destroy(&pdd->alloc_idr); in kfd_process_destroy_pdds()
1039 kfd_free_process_doorbells(pdd->dev->kfd, pdd); in kfd_process_destroy_pdds()
1041 if (pdd->dev->kfd->shared_resources.enable_mes) in kfd_process_destroy_pdds()
1042 amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, in kfd_process_destroy_pdds()
1043 pdd->proc_ctx_bo); in kfd_process_destroy_pdds()
1048 if (pdd->runtime_inuse) { in kfd_process_destroy_pdds()
1049 pm_runtime_mark_last_busy(adev_to_drm(pdd->dev->adev)->dev); in kfd_process_destroy_pdds()
1050 pm_runtime_put_autosuspend(adev_to_drm(pdd->dev->adev)->dev); in kfd_process_destroy_pdds()
1051 pdd->runtime_inuse = false; in kfd_process_destroy_pdds()
1055 p->pdds[i] = NULL; in kfd_process_destroy_pdds()
1057 p->n_pdds = 0; in kfd_process_destroy_pdds()
1065 if (!p->kobj) in kfd_process_remove_sysfs()
1068 sysfs_remove_file(p->kobj, &p->attr_pasid); in kfd_process_remove_sysfs()
1069 kobject_del(p->kobj_queues); in kfd_process_remove_sysfs()
1070 kobject_put(p->kobj_queues); in kfd_process_remove_sysfs()
1071 p->kobj_queues = NULL; in kfd_process_remove_sysfs()
1073 for (i = 0; i < p->n_pdds; i++) { in kfd_process_remove_sysfs()
1074 pdd = p->pdds[i]; in kfd_process_remove_sysfs()
1076 sysfs_remove_file(p->kobj, &pdd->attr_vram); in kfd_process_remove_sysfs()
1077 sysfs_remove_file(p->kobj, &pdd->attr_sdma); in kfd_process_remove_sysfs()
1079 sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); in kfd_process_remove_sysfs()
1080 if (pdd->dev->kfd2kgd->get_cu_occupancy) in kfd_process_remove_sysfs()
1081 sysfs_remove_file(pdd->kobj_stats, in kfd_process_remove_sysfs()
1082 &pdd->attr_cu_occupancy); in kfd_process_remove_sysfs()
1083 kobject_del(pdd->kobj_stats); in kfd_process_remove_sysfs()
1084 kobject_put(pdd->kobj_stats); in kfd_process_remove_sysfs()
1085 pdd->kobj_stats = NULL; in kfd_process_remove_sysfs()
1088 for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { in kfd_process_remove_sysfs()
1089 pdd = p->pdds[i]; in kfd_process_remove_sysfs()
1091 sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults); in kfd_process_remove_sysfs()
1092 sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in); in kfd_process_remove_sysfs()
1093 sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out); in kfd_process_remove_sysfs()
1094 kobject_del(pdd->kobj_counters); in kfd_process_remove_sysfs()
1095 kobject_put(pdd->kobj_counters); in kfd_process_remove_sysfs()
1096 pdd->kobj_counters = NULL; in kfd_process_remove_sysfs()
1099 kobject_del(p->kobj); in kfd_process_remove_sysfs()
1100 kobject_put(p->kobj); in kfd_process_remove_sysfs()
1101 p->kobj = NULL; in kfd_process_remove_sysfs()
1116 pqm_uninit(&p->pqm); in kfd_process_wq_release()
1123 ef = rcu_access_pointer(p->ef); in kfd_process_wq_release()
1137 kfd_pasid_free(p->pasid); in kfd_process_wq_release()
1138 mutex_destroy(&p->mutex); in kfd_process_wq_release()
1140 put_task_struct(p->lead_thread); in kfd_process_wq_release()
1149 INIT_WORK(&p->release_work, kfd_process_wq_release); in kfd_process_ref_release()
1150 queue_work(kfd_process_wq, &p->release_work); in kfd_process_ref_release()
1160 return p ? &p->mmu_notifier : ERR_PTR(-ESRCH); in kfd_process_alloc_notifier()
1172 cancel_delayed_work_sync(&p->eviction_work); in kfd_process_notifier_release_internal()
1173 cancel_delayed_work_sync(&p->restore_work); in kfd_process_notifier_release_internal()
1175 for (i = 0; i < p->n_pdds; i++) { in kfd_process_notifier_release_internal()
1176 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_notifier_release_internal()
1178 /* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */ in kfd_process_notifier_release_internal()
1179 if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup) in kfd_process_notifier_release_internal()
1180 amdgpu_gfx_off_ctrl(pdd->dev->adev, true); in kfd_process_notifier_release_internal()
1184 p->mm = NULL; in kfd_process_notifier_release_internal()
1187 if (atomic_read(&p->debugged_process_count) > 0) { in kfd_process_notifier_release_internal()
1193 if (target->debugger_process && target->debugger_process == p) { in kfd_process_notifier_release_internal()
1194 mutex_lock_nested(&target->mutex, 1); in kfd_process_notifier_release_internal()
1196 mutex_unlock(&target->mutex); in kfd_process_notifier_release_internal()
1197 if (atomic_read(&p->debugged_process_count) == 0) in kfd_process_notifier_release_internal()
1205 mmu_notifier_put(&p->mmu_notifier); in kfd_process_notifier_release_internal()
1218 if (WARN_ON(p->mm != mm)) in kfd_process_notifier_release()
1233 hash_del_rcu(&p->kfd_processes); in kfd_process_notifier_release()
1267 hash_del_rcu(&p->kfd_processes); in kfd_cleanup_processes()
1269 hlist_add_head(&p->kfd_processes, &cleanup_list); in kfd_cleanup_processes()
1288 if (p->has_cwsr) in kfd_process_init_cwsr_apu()
1291 for (i = 0; i < p->n_pdds; i++) { in kfd_process_init_cwsr_apu()
1292 struct kfd_node *dev = p->pdds[i]->dev; in kfd_process_init_cwsr_apu()
1293 struct qcm_process_device *qpd = &p->pdds[i]->qpd; in kfd_process_init_cwsr_apu()
1295 if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) in kfd_process_init_cwsr_apu()
1298 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id); in kfd_process_init_cwsr_apu()
1299 qpd->tba_addr = (int64_t)vm_mmap(filep, 0, in kfd_process_init_cwsr_apu()
1303 if (IS_ERR_VALUE(qpd->tba_addr)) { in kfd_process_init_cwsr_apu()
1304 int err = qpd->tba_addr; in kfd_process_init_cwsr_apu()
1307 qpd->tba_addr = 0; in kfd_process_init_cwsr_apu()
1308 qpd->cwsr_kaddr = NULL; in kfd_process_init_cwsr_apu()
1312 memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); in kfd_process_init_cwsr_apu()
1314 kfd_process_set_trap_debug_flag(qpd, p->debug_trap_enabled); in kfd_process_init_cwsr_apu()
1316 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; in kfd_process_init_cwsr_apu()
1318 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); in kfd_process_init_cwsr_apu()
1321 p->has_cwsr = true; in kfd_process_init_cwsr_apu()
1328 struct kfd_node *dev = pdd->dev; in kfd_process_device_init_cwsr_dgpu()
1329 struct qcm_process_device *qpd = &pdd->qpd; in kfd_process_device_init_cwsr_dgpu()
1333 struct kgd_mem *mem; in kfd_process_device_init_cwsr_dgpu() local
1337 if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) in kfd_process_device_init_cwsr_dgpu()
1341 ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base, in kfd_process_device_init_cwsr_dgpu()
1342 KFD_CWSR_TBA_TMA_SIZE, flags, &mem, &kaddr); in kfd_process_device_init_cwsr_dgpu()
1346 qpd->cwsr_mem = mem; in kfd_process_device_init_cwsr_dgpu()
1347 qpd->cwsr_kaddr = kaddr; in kfd_process_device_init_cwsr_dgpu()
1348 qpd->tba_addr = qpd->cwsr_base; in kfd_process_device_init_cwsr_dgpu()
1350 memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); in kfd_process_device_init_cwsr_dgpu()
1352 kfd_process_set_trap_debug_flag(&pdd->qpd, in kfd_process_device_init_cwsr_dgpu()
1353 pdd->process->debug_trap_enabled); in kfd_process_device_init_cwsr_dgpu()
1355 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; in kfd_process_device_init_cwsr_dgpu()
1357 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); in kfd_process_device_init_cwsr_dgpu()
1364 struct kfd_node *dev = pdd->dev; in kfd_process_device_destroy_cwsr_dgpu()
1365 struct qcm_process_device *qpd = &pdd->qpd; in kfd_process_device_destroy_cwsr_dgpu()
1367 if (!dev->kfd->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) in kfd_process_device_destroy_cwsr_dgpu()
1370 kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr); in kfd_process_device_destroy_cwsr_dgpu()
1377 if (qpd->cwsr_kaddr) { in kfd_process_set_trap_handler()
1378 /* KFD trap handler is bound, record as second-level TBA/TMA in kfd_process_set_trap_handler()
1379 * in first-level TMA. First-level trap will jump to second. in kfd_process_set_trap_handler()
1382 (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); in kfd_process_set_trap_handler()
1386 /* No trap handler bound, bind as first-level TBA/TMA. */ in kfd_process_set_trap_handler()
1387 qpd->tba_addr = tba_addr; in kfd_process_set_trap_handler()
1388 qpd->tma_addr = tma_addr; in kfd_process_set_trap_handler()
1404 * built for XNACK-off. On GFXv9 it may perform slower. in kfd_process_xnack_mode()
1406 * Therefore applications built for XNACK-off can always be in kfd_process_xnack_mode()
1410 for (i = 0; i < p->n_pdds; i++) { in kfd_process_xnack_mode()
1411 struct kfd_node *dev = p->pdds[i]->dev; in kfd_process_xnack_mode()
1420 * per-process XNACK mode selection. But let the dev->noretry in kfd_process_xnack_mode()
1424 if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) { in kfd_process_xnack_mode()
1433 * management and memory-manager-related preemptions or in kfd_process_xnack_mode()
1439 if (dev->kfd->noretry) in kfd_process_xnack_mode()
1449 if (qpd->cwsr_kaddr) { in kfd_process_set_trap_debug_flag()
1451 (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); in kfd_process_set_trap_debug_flag()
1464 int err = -ENOMEM; in create_process()
1470 kref_init(&process->ref); in create_process()
1471 mutex_init(&process->mutex); in create_process()
1472 process->mm = thread->mm; in create_process()
1473 process->lead_thread = thread->group_leader; in create_process()
1474 process->n_pdds = 0; in create_process()
1475 process->queues_paused = false; in create_process()
1476 INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); in create_process()
1477 INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); in create_process()
1478 process->last_restore_timestamp = get_jiffies_64(); in create_process()
1482 process->is_32bit_user_mode = in_compat_syscall(); in create_process()
1483 process->debug_trap_enabled = false; in create_process()
1484 process->debugger_process = NULL; in create_process()
1485 process->exception_enable_mask = 0; in create_process()
1486 atomic_set(&process->debugged_process_count, 0); in create_process()
1487 sema_init(&process->runtime_enable_sema, 0); in create_process()
1489 process->pasid = kfd_pasid_alloc(); in create_process()
1490 if (process->pasid == 0) { in create_process()
1491 err = -ENOSPC; in create_process()
1495 err = pqm_init(&process->pqm, process); in create_process()
1505 process->xnack_enabled = kfd_process_xnack_mode(process, false); in create_process()
1512 hash_add_rcu(kfd_processes_table, &process->kfd_processes, in create_process()
1513 (uintptr_t)process->mm); in create_process()
1518 kref_get(&process->ref); in create_process()
1525 mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm); in create_process()
1530 BUG_ON(mn != &process->mmu_notifier); in create_process()
1533 get_task_struct(process->lead_thread); in create_process()
1535 INIT_WORK(&process->debug_event_workarea, debug_event_write_work_handler); in create_process()
1540 hash_del_rcu(&process->kfd_processes); in create_process()
1546 pqm_uninit(&process->pqm); in create_process()
1548 kfd_pasid_free(process->pasid); in create_process()
1552 mutex_destroy(&process->mutex); in create_process()
1563 for (i = 0; i < p->n_pdds; i++) in kfd_get_process_device_data()
1564 if (p->pdds[i]->dev == dev) in kfd_get_process_device_data()
1565 return p->pdds[i]; in kfd_get_process_device_data()
1576 if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) in kfd_create_process_device_data()
1582 pdd->dev = dev; in kfd_create_process_device_data()
1583 INIT_LIST_HEAD(&pdd->qpd.queues_list); in kfd_create_process_device_data()
1584 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); in kfd_create_process_device_data()
1585 pdd->qpd.dqm = dev->dqm; in kfd_create_process_device_data()
1586 pdd->qpd.pqm = &p->pqm; in kfd_create_process_device_data()
1587 pdd->qpd.evicted = 0; in kfd_create_process_device_data()
1588 pdd->qpd.mapped_gws_queue = false; in kfd_create_process_device_data()
1589 pdd->process = p; in kfd_create_process_device_data()
1590 pdd->bound = PDD_UNBOUND; in kfd_create_process_device_data()
1591 pdd->already_dequeued = false; in kfd_create_process_device_data()
1592 pdd->runtime_inuse = false; in kfd_create_process_device_data()
1593 pdd->vram_usage = 0; in kfd_create_process_device_data()
1594 pdd->sdma_past_activity_counter = 0; in kfd_create_process_device_data()
1595 pdd->user_gpu_id = dev->id; in kfd_create_process_device_data()
1596 atomic64_set(&pdd->evict_duration_counter, 0); in kfd_create_process_device_data()
1598 if (dev->kfd->shared_resources.enable_mes) { in kfd_create_process_device_data()
1599 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, in kfd_create_process_device_data()
1601 &pdd->proc_ctx_bo, in kfd_create_process_device_data()
1602 &pdd->proc_ctx_gpu_addr, in kfd_create_process_device_data()
1603 &pdd->proc_ctx_cpu_ptr, in kfd_create_process_device_data()
1609 memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); in kfd_create_process_device_data()
1612 p->pdds[p->n_pdds++] = pdd; in kfd_create_process_device_data()
1613 if (kfd_dbg_is_per_vmid_supported(pdd->dev)) in kfd_create_process_device_data()
1614 pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap( in kfd_create_process_device_data()
1615 pdd->dev->adev, in kfd_create_process_device_data()
1620 idr_init(&pdd->alloc_idr); in kfd_create_process_device_data()
1630 * kfd_process_device_init_vm - Initialize a VM for a process-device
1632 * @pdd: The process-device
1641 * Returns 0 on success, -errno on failure.
1654 return -EINVAL; in kfd_process_device_init_vm()
1656 if (pdd->drm_priv) in kfd_process_device_init_vm()
1657 return -EBUSY; in kfd_process_device_init_vm()
1662 avm = &drv_priv->vm; in kfd_process_device_init_vm()
1664 p = pdd->process; in kfd_process_device_init_vm()
1665 dev = pdd->dev; in kfd_process_device_init_vm()
1667 ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm, in kfd_process_device_init_vm()
1668 &p->kgd_process_info, in kfd_process_device_init_vm()
1674 RCU_INIT_POINTER(p->ef, ef); in kfd_process_device_init_vm()
1675 pdd->drm_priv = drm_file->private_data; in kfd_process_device_init_vm()
1684 ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid); in kfd_process_device_init_vm()
1688 pdd->drm_file = drm_file; in kfd_process_device_init_vm()
1697 pdd->drm_priv = NULL; in kfd_process_device_init_vm()
1698 amdgpu_amdkfd_gpuvm_destroy_cb(dev->adev, avm); in kfd_process_device_init_vm()
1704 * Direct the IOMMU to bind the process (specifically the pasid->mm)
1719 return ERR_PTR(-ENOMEM); in kfd_bind_process_to_device()
1722 if (!pdd->drm_priv) in kfd_bind_process_to_device()
1723 return ERR_PTR(-ENODEV); in kfd_bind_process_to_device()
1726 * signal runtime-pm system to auto resume and prevent in kfd_bind_process_to_device()
1730 if (!pdd->runtime_inuse) { in kfd_bind_process_to_device()
1731 err = pm_runtime_get_sync(adev_to_drm(dev->adev)->dev); in kfd_bind_process_to_device()
1733 pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev); in kfd_bind_process_to_device()
1742 pdd->runtime_inuse = true; in kfd_bind_process_to_device()
1747 /* Create specific handle mapped to mem from process local memory idr
1751 void *mem) in kfd_process_device_create_obj_handle() argument
1753 return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL); in kfd_process_device_create_obj_handle()
1765 return idr_find(&pdd->alloc_idr, handle); in kfd_process_device_translate_handle()
1775 idr_remove(&pdd->alloc_idr, handle); in kfd_process_device_remove_obj_handle()
1778 /* This increments the process->ref counter. */
1787 if (p->pasid == pasid) { in kfd_lookup_process_by_pasid()
1788 kref_get(&p->ref); in kfd_lookup_process_by_pasid()
1799 /* This increments the process->ref counter. */
1808 kref_get(&p->ref); in kfd_lookup_process_by_mm()
1815 /* kfd_process_evict_queues - Evict all user queues of a process
1817 * Eviction is reference-counted per process-device. This means multiple
1826 for (i = 0; i < p->n_pdds; i++) { in kfd_process_evict_queues()
1827 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_evict_queues()
1829 kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, in kfd_process_evict_queues()
1832 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, in kfd_process_evict_queues()
1833 &pdd->qpd); in kfd_process_evict_queues()
1834 /* evict return -EIO if HWS is hang or asic is resetting, in this case in kfd_process_evict_queues()
1838 if (r && r != -EIO) { in kfd_process_evict_queues()
1851 for (i = 0; i < p->n_pdds; i++) { in kfd_process_evict_queues()
1852 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_evict_queues()
1857 kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); in kfd_process_evict_queues()
1859 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, in kfd_process_evict_queues()
1860 &pdd->qpd)) in kfd_process_evict_queues()
1863 n_evicted--; in kfd_process_evict_queues()
1869 /* kfd_process_restore_queues - Restore all user queues of a process */
1875 for (i = 0; i < p->n_pdds; i++) { in kfd_process_restore_queues()
1876 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_restore_queues()
1878 kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); in kfd_process_restore_queues()
1880 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, in kfd_process_restore_queues()
1881 &pdd->qpd); in kfd_process_restore_queues()
1896 for (i = 0; i < p->n_pdds; i++) in kfd_process_gpuidx_from_gpuid()
1897 if (p->pdds[i] && gpu_id == p->pdds[i]->user_gpu_id) in kfd_process_gpuidx_from_gpuid()
1899 return -EINVAL; in kfd_process_gpuidx_from_gpuid()
1908 for (i = 0; i < p->n_pdds; i++) in kfd_process_gpuid_from_node()
1909 if (p->pdds[i] && p->pdds[i]->dev == node) { in kfd_process_gpuid_from_node()
1910 *gpuid = p->pdds[i]->user_gpu_id; in kfd_process_gpuid_from_node()
1914 return -EINVAL; in kfd_process_gpuid_from_node()
1923 ef = dma_fence_get_rcu_safe(&p->ef); in signal_eviction_fence()
1945 pr_debug("Started evicting pasid 0x%x\n", p->pasid); in evict_process_worker()
1953 queue_delayed_work(kfd_restore_wq, &p->restore_work, in evict_process_worker()
1958 pr_debug("Finished evicting pasid 0x%x\n", p->pasid); in evict_process_worker()
1960 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); in evict_process_worker()
1968 if (p->kgd_process_info) { in restore_process_helper()
1970 p->kgd_process_info, &p->ef); in restore_process_helper()
1977 pr_debug("Finished restoring pasid 0x%x\n", p->pasid); in restore_process_helper()
1979 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); in restore_process_helper()
1996 pr_debug("Started restoring pasid 0x%x\n", p->pasid); in restore_process_worker()
2004 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two in restore_process_worker()
2008 p->last_restore_timestamp = get_jiffies_64(); in restore_process_worker()
2013 p->pasid, PROCESS_BACK_OFF_TIME_MS); in restore_process_worker()
2014 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, in restore_process_worker()
2029 pr_err("Failed to suspend process 0x%x\n", p->pasid); in kfd_suspend_all_processes()
2044 p->pasid); in kfd_resume_all_processes()
2045 ret = -EFAULT; in kfd_resume_all_processes()
2058 if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { in kfd_reserved_mem_mmap()
2060 return -EINVAL; in kfd_reserved_mem_mmap()
2065 return -EINVAL; in kfd_reserved_mem_mmap()
2066 qpd = &pdd->qpd; in kfd_reserved_mem_mmap()
2068 qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, in kfd_reserved_mem_mmap()
2070 if (!qpd->cwsr_kaddr) { in kfd_reserved_mem_mmap()
2072 return -ENOMEM; in kfd_reserved_mem_mmap()
2078 return remap_pfn_range(vma, vma->vm_start, in kfd_reserved_mem_mmap()
2079 PFN_DOWN(__pa(qpd->cwsr_kaddr)), in kfd_reserved_mem_mmap()
2080 KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); in kfd_reserved_mem_mmap()
2090 if (!KFD_IS_SOC15(pdd->dev)) in kfd_process_drain_interrupts()
2093 pdd->process->irq_drain_is_open = true; in kfd_process_drain_interrupts()
2098 irq_drain_fence[3] = pdd->process->pasid; in kfd_process_drain_interrupts()
2101 * For GFX 9.4.3, send the NodeId also in IH cookie DW[3] in kfd_process_drain_interrupts()
2103 if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3)) { in kfd_process_drain_interrupts()
2104 node_id = ffs(pdd->dev->interrupt_bitmap) - 1; in kfd_process_drain_interrupts()
2109 if (amdgpu_amdkfd_send_close_event_drain_irq(pdd->dev->adev, in kfd_process_drain_interrupts()
2111 pdd->process->irq_drain_is_open = false; in kfd_process_drain_interrupts()
2115 r = wait_event_interruptible(pdd->process->wait_irq_drain, in kfd_process_drain_interrupts()
2116 !READ_ONCE(pdd->process->irq_drain_is_open)); in kfd_process_drain_interrupts()
2118 pdd->process->irq_drain_is_open = false; in kfd_process_drain_interrupts()
2132 WRITE_ONCE(p->irq_drain_is_open, false); in kfd_process_close_interrupt_drain()
2133 wake_up_all(&p->wait_irq_drain); in kfd_process_close_interrupt_drain()
2158 p = workarea->p; in send_exception_work_handler()
2160 mm = get_task_mm(p->lead_thread); in send_exception_work_handler()
2167 q = pqm_get_user_queue(&p->pqm, workarea->queue_id); in send_exception_work_handler()
2172 csa_header = (void __user *)q->properties.ctx_save_restore_area_address; in send_exception_work_handler()
2174 get_user(err_payload_ptr, (uint64_t __user **)&csa_header->err_payload_addr); in send_exception_work_handler()
2176 cur_err |= workarea->error_reason; in send_exception_work_handler()
2178 get_user(ev_id, &csa_header->err_event_id); in send_exception_work_handler()
2211 for (i = 0; i < p->n_pdds; i++) { in kfd_process_device_data_by_id()
2212 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_device_data_by_id()
2214 if (pdd->user_gpu_id == gpu_id) in kfd_process_device_data_by_id()
2228 for (i = 0; i < p->n_pdds; i++) { in kfd_process_get_user_gpu_id()
2229 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_get_user_gpu_id()
2231 if (pdd->dev->id == actual_gpu_id) in kfd_process_get_user_gpu_id()
2232 return pdd->user_gpu_id; in kfd_process_get_user_gpu_id()
2234 return -EINVAL; in kfd_process_get_user_gpu_id()
2249 p->lead_thread->tgid, p->pasid); in kfd_debugfs_mqds_by_process()
2251 mutex_lock(&p->mutex); in kfd_debugfs_mqds_by_process()
2252 r = pqm_debugfs_mqds(m, &p->pqm); in kfd_debugfs_mqds_by_process()
2253 mutex_unlock(&p->mutex); in kfd_debugfs_mqds_by_process()