Lines Matching +full:gpu +full:- +full:id
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
45 /* topology_device_list - Master list of all topology devices */
59 if (top_dev->proximity_domain == proximity_domain) { in kfd_topology_device_by_proximity_domain_no_lock()
89 if (top_dev->gpu_id == gpu_id) { in kfd_topology_device_by_id()
107 return top_dev->gpu; in kfd_device_by_id()
118 if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) { in kfd_device_by_pci_dev()
119 device = top_dev->gpu; in kfd_device_by_pci_dev()
137 list_del(&dev->list); in kfd_release_topology_device()
139 while (dev->mem_props.next != &dev->mem_props) { in kfd_release_topology_device()
140 mem = container_of(dev->mem_props.next, in kfd_release_topology_device()
142 list_del(&mem->list); in kfd_release_topology_device()
146 while (dev->cache_props.next != &dev->cache_props) { in kfd_release_topology_device()
147 cache = container_of(dev->cache_props.next, in kfd_release_topology_device()
149 list_del(&cache->list); in kfd_release_topology_device()
153 while (dev->io_link_props.next != &dev->io_link_props) { in kfd_release_topology_device()
154 iolink = container_of(dev->io_link_props.next, in kfd_release_topology_device()
156 list_del(&iolink->list); in kfd_release_topology_device()
160 while (dev->p2p_link_props.next != &dev->p2p_link_props) { in kfd_release_topology_device()
161 p2plink = container_of(dev->p2p_link_props.next, in kfd_release_topology_device()
163 list_del(&p2plink->list); in kfd_release_topology_device()
167 while (dev->perf_props.next != &dev->perf_props) { in kfd_release_topology_device()
168 perf = container_of(dev->perf_props.next, in kfd_release_topology_device()
170 list_del(&perf->list); in kfd_release_topology_device()
205 INIT_LIST_HEAD(&dev->mem_props); in kfd_create_topology_device()
206 INIT_LIST_HEAD(&dev->cache_props); in kfd_create_topology_device()
207 INIT_LIST_HEAD(&dev->io_link_props); in kfd_create_topology_device()
208 INIT_LIST_HEAD(&dev->p2p_link_props); in kfd_create_topology_device()
209 INIT_LIST_HEAD(&dev->perf_props); in kfd_create_topology_device()
211 list_add_tail(&dev->list, device_list); in kfd_create_topology_device()
218 (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \
248 offs = -EINVAL; in sysprops_show()
278 if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) in iolink_show()
279 return -EPERM; in iolink_show()
280 sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type); in iolink_show()
281 sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj); in iolink_show()
282 sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min); in iolink_show()
283 sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from); in iolink_show()
284 sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to); in iolink_show()
285 sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight); in iolink_show()
286 sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency); in iolink_show()
287 sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency); in iolink_show()
289 iolink->min_bandwidth); in iolink_show()
291 iolink->max_bandwidth); in iolink_show()
293 iolink->rec_transfer_size); in iolink_show()
294 sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); in iolink_show()
318 if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) in mem_show()
319 return -EPERM; in mem_show()
320 sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type); in mem_show()
322 mem->size_in_bytes); in mem_show()
323 sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags); in mem_show()
324 sysfs_show_32bit_prop(buffer, offs, "width", mem->width); in mem_show()
326 mem->mem_clk_max); in mem_show()
350 if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) in kfd_cache_show()
351 return -EPERM; in kfd_cache_show()
353 cache->processor_id_low); in kfd_cache_show()
354 sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level); in kfd_cache_show()
355 sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size); in kfd_cache_show()
357 cache->cacheline_size); in kfd_cache_show()
359 cache->cachelines_per_tag); in kfd_cache_show()
360 sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); in kfd_cache_show()
361 sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); in kfd_cache_show()
362 sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); in kfd_cache_show()
364 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); in kfd_cache_show()
365 for (i = 0; i < cache->sibling_map_size; i++) in kfd_cache_show()
366 for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) in kfd_cache_show()
368 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", in kfd_cache_show()
369 (cache->sibling_map[i] >> j) & 1); in kfd_cache_show()
372 buffer[offs-1] = '\n'; in kfd_cache_show()
400 if (!attr->data) /* invalid data for PMC */ in perf_show()
403 return sysfs_show_32bit_val(buf, offs, attr->data); in perf_show()
429 if (strcmp(attr->name, "gpu_id") == 0) { in node_show()
432 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
433 return -EPERM; in node_show()
434 return sysfs_show_32bit_val(buffer, offs, dev->gpu_id); in node_show()
437 if (strcmp(attr->name, "name") == 0) { in node_show()
441 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
442 return -EPERM; in node_show()
443 return sysfs_show_str_val(buffer, offs, dev->node_props.name); in node_show()
448 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
449 return -EPERM; in node_show()
451 dev->node_props.cpu_cores_count); in node_show()
453 dev->gpu ? dev->node_props.simd_count : 0); in node_show()
455 dev->node_props.mem_banks_count); in node_show()
457 dev->node_props.caches_count); in node_show()
459 dev->node_props.io_links_count); in node_show()
461 dev->node_props.p2p_links_count); in node_show()
463 dev->node_props.cpu_core_id_base); in node_show()
465 dev->node_props.simd_id_base); in node_show()
467 dev->node_props.max_waves_per_simd); in node_show()
469 dev->node_props.lds_size_in_kb); in node_show()
471 dev->node_props.gds_size_in_kb); in node_show()
473 dev->node_props.num_gws); in node_show()
475 dev->node_props.wave_front_size); in node_show()
477 dev->gpu ? (dev->node_props.array_count * in node_show()
478 NUM_XCC(dev->gpu->xcc_mask)) : 0); in node_show()
480 dev->node_props.simd_arrays_per_engine); in node_show()
482 dev->node_props.cu_per_simd_array); in node_show()
484 dev->node_props.simd_per_cu); in node_show()
486 dev->node_props.max_slots_scratch_cu); in node_show()
488 dev->node_props.gfx_target_version); in node_show()
490 dev->node_props.vendor_id); in node_show()
492 dev->node_props.device_id); in node_show()
494 dev->node_props.location_id); in node_show()
496 dev->node_props.domain); in node_show()
498 dev->node_props.drm_render_minor); in node_show()
500 dev->node_props.hive_id); in node_show()
502 dev->node_props.num_sdma_engines); in node_show()
504 dev->node_props.num_sdma_xgmi_engines); in node_show()
506 dev->node_props.num_sdma_queues_per_engine); in node_show()
508 dev->node_props.num_cp_queues); in node_show()
510 if (dev->gpu) { in node_show()
512 __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); in node_show()
515 dev->node_props.capability |= in node_show()
518 dev->node_props.capability |= in node_show()
524 if (dev->gpu->adev->asic_type == CHIP_TONGA) in node_show()
525 dev->node_props.capability |= in node_show()
529 dev->node_props.max_engine_clk_fcompute); in node_show()
534 dev->gpu->kfd->mec_fw_version); in node_show()
536 dev->node_props.capability); in node_show()
538 dev->node_props.debug_prop); in node_show()
540 dev->gpu->kfd->sdma_fw_version); in node_show()
542 dev->gpu->adev->unique_id); in node_show()
544 NUM_XCC(dev->gpu->xcc_mask)); in node_show()
575 if (dev->kobj_iolink) { in kfd_remove_sysfs_node_entry()
576 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_remove_sysfs_node_entry()
577 if (iolink->kobj) { in kfd_remove_sysfs_node_entry()
578 kfd_remove_sysfs_file(iolink->kobj, in kfd_remove_sysfs_node_entry()
579 &iolink->attr); in kfd_remove_sysfs_node_entry()
580 iolink->kobj = NULL; in kfd_remove_sysfs_node_entry()
582 kobject_del(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
583 kobject_put(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
584 dev->kobj_iolink = NULL; in kfd_remove_sysfs_node_entry()
587 if (dev->kobj_p2plink) { in kfd_remove_sysfs_node_entry()
588 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_remove_sysfs_node_entry()
589 if (p2plink->kobj) { in kfd_remove_sysfs_node_entry()
590 kfd_remove_sysfs_file(p2plink->kobj, in kfd_remove_sysfs_node_entry()
591 &p2plink->attr); in kfd_remove_sysfs_node_entry()
592 p2plink->kobj = NULL; in kfd_remove_sysfs_node_entry()
594 kobject_del(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
595 kobject_put(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
596 dev->kobj_p2plink = NULL; in kfd_remove_sysfs_node_entry()
599 if (dev->kobj_cache) { in kfd_remove_sysfs_node_entry()
600 list_for_each_entry(cache, &dev->cache_props, list) in kfd_remove_sysfs_node_entry()
601 if (cache->kobj) { in kfd_remove_sysfs_node_entry()
602 kfd_remove_sysfs_file(cache->kobj, in kfd_remove_sysfs_node_entry()
603 &cache->attr); in kfd_remove_sysfs_node_entry()
604 cache->kobj = NULL; in kfd_remove_sysfs_node_entry()
606 kobject_del(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
607 kobject_put(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
608 dev->kobj_cache = NULL; in kfd_remove_sysfs_node_entry()
611 if (dev->kobj_mem) { in kfd_remove_sysfs_node_entry()
612 list_for_each_entry(mem, &dev->mem_props, list) in kfd_remove_sysfs_node_entry()
613 if (mem->kobj) { in kfd_remove_sysfs_node_entry()
614 kfd_remove_sysfs_file(mem->kobj, &mem->attr); in kfd_remove_sysfs_node_entry()
615 mem->kobj = NULL; in kfd_remove_sysfs_node_entry()
617 kobject_del(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
618 kobject_put(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
619 dev->kobj_mem = NULL; in kfd_remove_sysfs_node_entry()
622 if (dev->kobj_perf) { in kfd_remove_sysfs_node_entry()
623 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_remove_sysfs_node_entry()
624 kfree(perf->attr_group); in kfd_remove_sysfs_node_entry()
625 perf->attr_group = NULL; in kfd_remove_sysfs_node_entry()
627 kobject_del(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
628 kobject_put(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
629 dev->kobj_perf = NULL; in kfd_remove_sysfs_node_entry()
632 if (dev->kobj_node) { in kfd_remove_sysfs_node_entry()
633 sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); in kfd_remove_sysfs_node_entry()
634 sysfs_remove_file(dev->kobj_node, &dev->attr_name); in kfd_remove_sysfs_node_entry()
635 sysfs_remove_file(dev->kobj_node, &dev->attr_props); in kfd_remove_sysfs_node_entry()
636 kobject_del(dev->kobj_node); in kfd_remove_sysfs_node_entry()
637 kobject_put(dev->kobj_node); in kfd_remove_sysfs_node_entry()
638 dev->kobj_node = NULL; in kfd_remove_sysfs_node_entry()
643 uint32_t id) in kfd_build_sysfs_node_entry() argument
654 if (WARN_ON(dev->kobj_node)) in kfd_build_sysfs_node_entry()
655 return -EEXIST; in kfd_build_sysfs_node_entry()
660 dev->kobj_node = kfd_alloc_struct(dev->kobj_node); in kfd_build_sysfs_node_entry()
661 if (!dev->kobj_node) in kfd_build_sysfs_node_entry()
662 return -ENOMEM; in kfd_build_sysfs_node_entry()
664 ret = kobject_init_and_add(dev->kobj_node, &node_type, in kfd_build_sysfs_node_entry()
665 sys_props.kobj_nodes, "%d", id); in kfd_build_sysfs_node_entry()
667 kobject_put(dev->kobj_node); in kfd_build_sysfs_node_entry()
671 dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); in kfd_build_sysfs_node_entry()
672 if (!dev->kobj_mem) in kfd_build_sysfs_node_entry()
673 return -ENOMEM; in kfd_build_sysfs_node_entry()
675 dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); in kfd_build_sysfs_node_entry()
676 if (!dev->kobj_cache) in kfd_build_sysfs_node_entry()
677 return -ENOMEM; in kfd_build_sysfs_node_entry()
679 dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
680 if (!dev->kobj_iolink) in kfd_build_sysfs_node_entry()
681 return -ENOMEM; in kfd_build_sysfs_node_entry()
683 dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
684 if (!dev->kobj_p2plink) in kfd_build_sysfs_node_entry()
685 return -ENOMEM; in kfd_build_sysfs_node_entry()
687 dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); in kfd_build_sysfs_node_entry()
688 if (!dev->kobj_perf) in kfd_build_sysfs_node_entry()
689 return -ENOMEM; in kfd_build_sysfs_node_entry()
694 dev->attr_gpuid.name = "gpu_id"; in kfd_build_sysfs_node_entry()
695 dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
696 sysfs_attr_init(&dev->attr_gpuid); in kfd_build_sysfs_node_entry()
697 dev->attr_name.name = "name"; in kfd_build_sysfs_node_entry()
698 dev->attr_name.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
699 sysfs_attr_init(&dev->attr_name); in kfd_build_sysfs_node_entry()
700 dev->attr_props.name = "properties"; in kfd_build_sysfs_node_entry()
701 dev->attr_props.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
702 sysfs_attr_init(&dev->attr_props); in kfd_build_sysfs_node_entry()
703 ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); in kfd_build_sysfs_node_entry()
706 ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); in kfd_build_sysfs_node_entry()
709 ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); in kfd_build_sysfs_node_entry()
714 list_for_each_entry(mem, &dev->mem_props, list) { in kfd_build_sysfs_node_entry()
715 mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
716 if (!mem->kobj) in kfd_build_sysfs_node_entry()
717 return -ENOMEM; in kfd_build_sysfs_node_entry()
718 ret = kobject_init_and_add(mem->kobj, &mem_type, in kfd_build_sysfs_node_entry()
719 dev->kobj_mem, "%d", i); in kfd_build_sysfs_node_entry()
721 kobject_put(mem->kobj); in kfd_build_sysfs_node_entry()
725 mem->attr.name = "properties"; in kfd_build_sysfs_node_entry()
726 mem->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
727 sysfs_attr_init(&mem->attr); in kfd_build_sysfs_node_entry()
728 ret = sysfs_create_file(mem->kobj, &mem->attr); in kfd_build_sysfs_node_entry()
735 list_for_each_entry(cache, &dev->cache_props, list) { in kfd_build_sysfs_node_entry()
736 cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
737 if (!cache->kobj) in kfd_build_sysfs_node_entry()
738 return -ENOMEM; in kfd_build_sysfs_node_entry()
739 ret = kobject_init_and_add(cache->kobj, &cache_type, in kfd_build_sysfs_node_entry()
740 dev->kobj_cache, "%d", i); in kfd_build_sysfs_node_entry()
742 kobject_put(cache->kobj); in kfd_build_sysfs_node_entry()
746 cache->attr.name = "properties"; in kfd_build_sysfs_node_entry()
747 cache->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
748 sysfs_attr_init(&cache->attr); in kfd_build_sysfs_node_entry()
749 ret = sysfs_create_file(cache->kobj, &cache->attr); in kfd_build_sysfs_node_entry()
756 list_for_each_entry(iolink, &dev->io_link_props, list) { in kfd_build_sysfs_node_entry()
757 iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
758 if (!iolink->kobj) in kfd_build_sysfs_node_entry()
759 return -ENOMEM; in kfd_build_sysfs_node_entry()
760 ret = kobject_init_and_add(iolink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
761 dev->kobj_iolink, "%d", i); in kfd_build_sysfs_node_entry()
763 kobject_put(iolink->kobj); in kfd_build_sysfs_node_entry()
767 iolink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
768 iolink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
769 sysfs_attr_init(&iolink->attr); in kfd_build_sysfs_node_entry()
770 ret = sysfs_create_file(iolink->kobj, &iolink->attr); in kfd_build_sysfs_node_entry()
777 list_for_each_entry(p2plink, &dev->p2p_link_props, list) { in kfd_build_sysfs_node_entry()
778 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
779 if (!p2plink->kobj) in kfd_build_sysfs_node_entry()
780 return -ENOMEM; in kfd_build_sysfs_node_entry()
781 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
782 dev->kobj_p2plink, "%d", i); in kfd_build_sysfs_node_entry()
784 kobject_put(p2plink->kobj); in kfd_build_sysfs_node_entry()
788 p2plink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
789 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
790 sysfs_attr_init(&p2plink->attr); in kfd_build_sysfs_node_entry()
791 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_sysfs_node_entry()
799 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_build_sysfs_node_entry()
800 perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) in kfd_build_sysfs_node_entry()
803 if (!perf->attr_group) in kfd_build_sysfs_node_entry()
804 return -ENOMEM; in kfd_build_sysfs_node_entry()
806 attrs = (struct attribute **)(perf->attr_group + 1); in kfd_build_sysfs_node_entry()
807 if (!strcmp(perf->block_name, "iommu")) { in kfd_build_sysfs_node_entry()
812 perf_attr_iommu[0].data = perf->max_concurrent; in kfd_build_sysfs_node_entry()
816 perf->attr_group->name = perf->block_name; in kfd_build_sysfs_node_entry()
817 perf->attr_group->attrs = attrs; in kfd_build_sysfs_node_entry()
818 ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); in kfd_build_sysfs_node_entry()
860 return -ENOMEM; in kfd_topology_update_sysfs()
863 &sysprops_type, &kfd_device->kobj, in kfd_topology_update_sysfs()
873 return -ENOMEM; in kfd_topology_update_sysfs()
921 list_move_tail(temp_list->next, master_list); in kfd_topology_update_device_list()
935 if (dev->node_props.cpu_cores_count && in kfd_debug_print_topology()
936 dev->node_props.simd_count) { in kfd_debug_print_topology()
938 dev->node_props.device_id, in kfd_debug_print_topology()
939 dev->node_props.vendor_id); in kfd_debug_print_topology()
940 } else if (dev->node_props.cpu_cores_count) in kfd_debug_print_topology()
942 else if (dev->node_props.simd_count) in kfd_debug_print_topology()
944 dev->node_props.device_id, in kfd_debug_print_topology()
945 dev->node_props.vendor_id); in kfd_debug_print_topology()
962 (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; in kfd_update_system_properties()
963 sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); in kfd_update_system_properties()
964 sys_props.platform_rev = dev->oem_revision; in kfd_update_system_properties()
978 if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) { in find_system_memory()
981 list_for_each_entry(mem, &kdev->mem_props, list) { in find_system_memory()
983 mem->width = mem_width; in find_system_memory()
985 mem->mem_clk_max = mem_clock; in find_system_memory()
990 /* kfd_add_non_crat_information - Add information that is not currently
992 * @dev - topology device to which addition info is added
997 if (!kdev->gpu) { in kfd_add_non_crat_information()
1001 /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ in kfd_add_non_crat_information()
1014 /* topology_device_list - Master list of all topology devices in kfd_topology_init()
1015 * temp_topology_device_list - temporary list created while parsing CRAT in kfd_topology_init()
1057 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_init()
1068 /* For nodes with GPU, this information gets added in kfd_topology_init()
1069 * when GPU is detected (kfd_topology_add_device). in kfd_topology_init()
1093 static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) in kfd_generate_gpu_id() argument
1100 if (!gpu) in kfd_generate_gpu_id()
1103 local_mem_size = gpu->local_mem_info.local_mem_size_private + in kfd_generate_gpu_id()
1104 gpu->local_mem_info.local_mem_size_public; in kfd_generate_gpu_id()
1105 buf[0] = gpu->adev->pdev->devfn; in kfd_generate_gpu_id()
1106 buf[1] = gpu->adev->pdev->subsystem_vendor | in kfd_generate_gpu_id()
1107 (gpu->adev->pdev->subsystem_device << 16); in kfd_generate_gpu_id()
1108 buf[2] = pci_domain_nr(gpu->adev->pdev->bus); in kfd_generate_gpu_id()
1109 buf[3] = gpu->adev->pdev->device; in kfd_generate_gpu_id()
1110 buf[4] = gpu->adev->pdev->bus->number; in kfd_generate_gpu_id()
1113 buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); in kfd_generate_gpu_id()
1120 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1121 * the GPU device is not already present in the topology device
1123 * be created for this GPU.
1125 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu) in kfd_assign_gpu() argument
1138 if (dev->node_props.cpu_cores_count) in kfd_assign_gpu()
1141 if (!dev->gpu && (dev->node_props.simd_count > 0)) { in kfd_assign_gpu()
1142 dev->gpu = gpu; in kfd_assign_gpu()
1145 list_for_each_entry(mem, &dev->mem_props, list) in kfd_assign_gpu()
1146 mem->gpu = dev->gpu; in kfd_assign_gpu()
1147 list_for_each_entry(cache, &dev->cache_props, list) in kfd_assign_gpu()
1148 cache->gpu = dev->gpu; in kfd_assign_gpu()
1149 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_assign_gpu()
1150 iolink->gpu = dev->gpu; in kfd_assign_gpu()
1151 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_assign_gpu()
1152 p2plink->gpu = dev->gpu; in kfd_assign_gpu()
1163 * of the GPU in kfd_notify_gpu_change()
1167 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1180 * for dGPUs - VCRAT reports only one bank of Local Memory in kfd_fill_mem_clk_max_info()
1181 * for APUs - If CRAT from ACPI reports more than one bank, then in kfd_fill_mem_clk_max_info()
1184 amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, in kfd_fill_mem_clk_max_info()
1185 dev->gpu->xcp); in kfd_fill_mem_clk_max_info()
1187 list_for_each_entry(mem, &dev->mem_props, list) in kfd_fill_mem_clk_max_info()
1188 mem->mem_clk_max = local_mem_info.mem_clk_max; in kfd_fill_mem_clk_max_info()
1196 if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) in kfd_set_iolink_no_atomics()
1203 pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev, in kfd_set_iolink_no_atomics()
1208 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1210 /* set gpu (dev) flags. */ in kfd_set_iolink_no_atomics()
1212 if (!dev->gpu->kfd->pci_atomic_requested || in kfd_set_iolink_no_atomics()
1213 dev->gpu->adev->asic_type == CHIP_HAWAII) in kfd_set_iolink_no_atomics()
1214 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1223 /* CPU -> GPU with PCIe */ in kfd_set_iolink_non_coherent()
1224 if (!to_dev->gpu && in kfd_set_iolink_non_coherent()
1225 inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) in kfd_set_iolink_non_coherent()
1226 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1228 if (to_dev->gpu) { in kfd_set_iolink_non_coherent()
1229 /* GPU <-> GPU with PCIe and in kfd_set_iolink_non_coherent()
1232 if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || in kfd_set_iolink_non_coherent()
1233 (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && in kfd_set_iolink_non_coherent()
1234 KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { in kfd_set_iolink_non_coherent()
1235 outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1236 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1246 if (!dev || !dev->gpu) in kfd_fill_iolink_non_crat_info()
1249 /* GPU only creates direct links so apply flags setting to all */ in kfd_fill_iolink_non_crat_info()
1250 list_for_each_entry(link, &dev->io_link_props, list) { in kfd_fill_iolink_non_crat_info()
1251 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1254 link->node_to); in kfd_fill_iolink_non_crat_info()
1259 /* Include the CPU peer in GPU hive if connected over xGMI. */ in kfd_fill_iolink_non_crat_info()
1260 if (!peer_dev->gpu && in kfd_fill_iolink_non_crat_info()
1261 link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { in kfd_fill_iolink_non_crat_info()
1263 * If the GPU is not part of a GPU hive, use its pci in kfd_fill_iolink_non_crat_info()
1264 * device location as the hive ID to bind with the CPU. in kfd_fill_iolink_non_crat_info()
1266 if (!dev->node_props.hive_id) in kfd_fill_iolink_non_crat_info()
1267 dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev); in kfd_fill_iolink_non_crat_info()
1268 peer_dev->node_props.hive_id = dev->node_props.hive_id; in kfd_fill_iolink_non_crat_info()
1271 list_for_each_entry(inbound_link, &peer_dev->io_link_props, in kfd_fill_iolink_non_crat_info()
1273 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1276 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1283 list_for_each_entry(link, &dev->p2p_link_props, list) { in kfd_fill_iolink_non_crat_info()
1284 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1287 link->node_to); in kfd_fill_iolink_non_crat_info()
1292 list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, in kfd_fill_iolink_non_crat_info()
1294 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1297 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1309 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_p2p_node_entry()
1310 if (!p2plink->kobj) in kfd_build_p2p_node_entry()
1311 return -ENOMEM; in kfd_build_p2p_node_entry()
1313 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_p2p_node_entry()
1314 dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1); in kfd_build_p2p_node_entry()
1316 kobject_put(p2plink->kobj); in kfd_build_p2p_node_entry()
1320 p2plink->attr.name = "properties"; in kfd_build_p2p_node_entry()
1321 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_p2p_node_entry()
1322 sysfs_attr_init(&p2plink->attr); in kfd_build_p2p_node_entry()
1323 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_p2p_node_entry()
1340 if (cpu_dev->gpu) in kfd_create_indirect_link_prop()
1345 if (list_empty(&kdev->io_link_props)) in kfd_create_indirect_link_prop()
1346 return -ENODATA; in kfd_create_indirect_link_prop()
1348 gpu_link = list_first_entry(&kdev->io_link_props, in kfd_create_indirect_link_prop()
1352 /* CPU <--> GPU */ in kfd_create_indirect_link_prop()
1353 if (gpu_link->node_to == i) in kfd_create_indirect_link_prop()
1356 /* find CPU <--> CPU links */ in kfd_create_indirect_link_prop()
1361 &cpu_dev->io_link_props, list) { in kfd_create_indirect_link_prop()
1362 if (tmp_link->node_to == gpu_link->node_to) { in kfd_create_indirect_link_prop()
1370 return -ENOMEM; in kfd_create_indirect_link_prop()
1372 /* CPU <--> CPU <--> GPU, GPU node*/ in kfd_create_indirect_link_prop()
1375 return -ENOMEM; in kfd_create_indirect_link_prop()
1378 props->weight = gpu_link->weight + cpu_link->weight; in kfd_create_indirect_link_prop()
1379 props->min_latency = gpu_link->min_latency + cpu_link->min_latency; in kfd_create_indirect_link_prop()
1380 props->max_latency = gpu_link->max_latency + cpu_link->max_latency; in kfd_create_indirect_link_prop()
1381 props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); in kfd_create_indirect_link_prop()
1382 props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); in kfd_create_indirect_link_prop()
1384 props->node_from = gpu_node; in kfd_create_indirect_link_prop()
1385 props->node_to = i; in kfd_create_indirect_link_prop()
1386 kdev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1387 list_add_tail(&props->list, &kdev->p2p_link_props); in kfd_create_indirect_link_prop()
1392 /* for small Bar, no CPU --> GPU in-direct links */ in kfd_create_indirect_link_prop()
1393 if (kfd_dev_is_large_bar(kdev->gpu)) { in kfd_create_indirect_link_prop()
1394 /* CPU <--> CPU <--> GPU, CPU node*/ in kfd_create_indirect_link_prop()
1397 return -ENOMEM; in kfd_create_indirect_link_prop()
1400 props2->node_from = i; in kfd_create_indirect_link_prop()
1401 props2->node_to = gpu_node; in kfd_create_indirect_link_prop()
1402 props2->kobj = NULL; in kfd_create_indirect_link_prop()
1403 cpu_dev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1404 list_add_tail(&props2->list, &cpu_dev->p2p_link_props); in kfd_create_indirect_link_prop()
1423 kdev->gpu->adev, in kfd_add_peer_prop()
1424 peer->gpu->adev)) in kfd_add_peer_prop()
1427 if (list_empty(&kdev->io_link_props)) in kfd_add_peer_prop()
1428 return -ENODATA; in kfd_add_peer_prop()
1430 iolink1 = list_first_entry(&kdev->io_link_props, in kfd_add_peer_prop()
1433 if (list_empty(&peer->io_link_props)) in kfd_add_peer_prop()
1434 return -ENODATA; in kfd_add_peer_prop()
1436 iolink2 = list_first_entry(&peer->io_link_props, in kfd_add_peer_prop()
1441 return -ENOMEM; in kfd_add_peer_prop()
1445 props->weight = iolink1->weight + iolink2->weight; in kfd_add_peer_prop()
1446 props->min_latency = iolink1->min_latency + iolink2->min_latency; in kfd_add_peer_prop()
1447 props->max_latency = iolink1->max_latency + iolink2->max_latency; in kfd_add_peer_prop()
1448 props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); in kfd_add_peer_prop()
1449 props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); in kfd_add_peer_prop()
1451 if (iolink1->node_to != iolink2->node_to) { in kfd_add_peer_prop()
1452 /* CPU->CPU link*/ in kfd_add_peer_prop()
1453 cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); in kfd_add_peer_prop()
1455 list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { in kfd_add_peer_prop()
1456 if (iolink3->node_to != iolink2->node_to) in kfd_add_peer_prop()
1459 props->weight += iolink3->weight; in kfd_add_peer_prop()
1460 props->min_latency += iolink3->min_latency; in kfd_add_peer_prop()
1461 props->max_latency += iolink3->max_latency; in kfd_add_peer_prop()
1462 props->min_bandwidth = min(props->min_bandwidth, in kfd_add_peer_prop()
1463 iolink3->min_bandwidth); in kfd_add_peer_prop()
1464 props->max_bandwidth = min(props->max_bandwidth, in kfd_add_peer_prop()
1465 iolink3->max_bandwidth); in kfd_add_peer_prop()
1473 props->node_from = from; in kfd_add_peer_prop()
1474 props->node_to = to; in kfd_add_peer_prop()
1475 peer->node_props.p2p_links_count++; in kfd_add_peer_prop()
1476 list_add_tail(&props->list, &peer->p2p_link_props); in kfd_add_peer_prop()
1500 if (WARN_ON(!new_dev->gpu)) in kfd_dev_create_p2p_links()
1503 k--; in kfd_dev_create_p2p_links()
1505 /* create in-direct links */ in kfd_dev_create_p2p_links()
1516 if (!dev->gpu || !dev->gpu->adev || in kfd_dev_create_p2p_links()
1517 (dev->gpu->kfd->hive_id && in kfd_dev_create_p2p_links()
1518 dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) in kfd_dev_create_p2p_links()
1521 /* check if node(s) is/are peer accessible in one direction or bi-direction */ in kfd_dev_create_p2p_links()
1551 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l1_pcache()
1555 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l1_pcache()
1561 return -ENOMEM; in fill_in_l1_pcache()
1564 pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); in fill_in_l1_pcache()
1565 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l1_pcache()
1566 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l1_pcache()
1569 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l1_pcache()
1571 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l1_pcache()
1573 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l1_pcache()
1575 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l1_pcache()
1581 cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l1_pcache()
1583 pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l1_pcache()
1584 pcache->sibling_map[1] = in fill_in_l1_pcache()
1586 pcache->sibling_map[2] = in fill_in_l1_pcache()
1588 pcache->sibling_map[3] = in fill_in_l1_pcache()
1591 pcache->sibling_map_size = 4; in fill_in_l1_pcache()
1610 int num_xcc = NUM_XCC(knode->xcc_mask); in fill_in_l2_l3_pcache()
1613 struct amdgpu_device *adev = knode->adev; in fill_in_l2_l3_pcache()
1615 start = ffs(knode->xcc_mask) - 1; in fill_in_l2_l3_pcache()
1617 cu_sibling_map_mask = cu_info->bitmap[start][0][0]; in fill_in_l2_l3_pcache()
1619 ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1623 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l2_l3_pcache()
1629 return -ENOMEM; in fill_in_l2_l3_pcache()
1632 pcache->processor_id_low = cu_processor_id in fill_in_l2_l3_pcache()
1633 + (first_active_cu - 1); in fill_in_l2_l3_pcache()
1634 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l2_l3_pcache()
1637 mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); in fill_in_l2_l3_pcache()
1641 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l2_l3_pcache()
1643 if (mode && pcache->cache_level == 3) in fill_in_l2_l3_pcache()
1644 pcache->cache_size /= mode; in fill_in_l2_l3_pcache()
1647 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l2_l3_pcache()
1649 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l2_l3_pcache()
1651 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l2_l3_pcache()
1653 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l2_l3_pcache()
1658 cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l2_l3_pcache()
1662 for (i = 0; i < gfx_info->max_shader_engines; i++) { in fill_in_l2_l3_pcache()
1663 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in fill_in_l2_l3_pcache()
1664 pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l2_l3_pcache()
1665 pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); in fill_in_l2_l3_pcache()
1666 pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); in fill_in_l2_l3_pcache()
1667 pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); in fill_in_l2_l3_pcache()
1670 cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4]; in fill_in_l2_l3_pcache()
1671 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1675 pcache->sibling_map_size = k; in fill_in_l2_l3_pcache()
1684 /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
1695 struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; in kfd_fill_cache_non_crat_info()
1696 struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; in kfd_fill_cache_non_crat_info()
1704 gpu_processor_id = dev->node_props.simd_id_base; in kfd_fill_cache_non_crat_info()
1722 start = ffs(kdev->xcc_mask) - 1; in kfd_fill_cache_non_crat_info()
1723 end = start + NUM_XCC(kdev->xcc_mask); in kfd_fill_cache_non_crat_info()
1729 for (i = 0; i < gfx_info->max_shader_engines; i++) { in kfd_fill_cache_non_crat_info()
1730 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in kfd_fill_cache_non_crat_info()
1731 for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) { in kfd_fill_cache_non_crat_info()
1734 cu_info->bitmap[xcc][i % 4][j + i / 4], ct, in kfd_fill_cache_non_crat_info()
1742 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1747 gfx_info->max_cu_per_sh) ? in kfd_fill_cache_non_crat_info()
1749 (gfx_info->max_cu_per_sh - k); in kfd_fill_cache_non_crat_info()
1764 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1768 dev->node_props.caches_count += num_of_entries; in kfd_fill_cache_non_crat_info()
1769 pr_debug("Added [%d] GPU cache entries\n", num_of_entries); in kfd_fill_cache_non_crat_info()
1772 static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id, in kfd_topology_add_device_locked() argument
1782 COMPUTE_UNIT_GPU, gpu, in kfd_topology_add_device_locked()
1785 pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", in kfd_topology_add_device_locked()
1787 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1797 pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", in kfd_topology_add_device_locked()
1799 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1806 *dev = kfd_assign_gpu(gpu); in kfd_topology_add_device_locked()
1808 res = -ENODEV; in kfd_topology_add_device_locked()
1815 kfd_fill_cache_non_crat_info(*dev, gpu); in kfd_topology_add_device_locked()
1824 pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", in kfd_topology_add_device_locked()
1836 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && in kfd_topology_set_dbg_firmware_support()
1837 KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { in kfd_topology_set_dbg_firmware_support()
1838 uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1841 uint32_t mes_rev = dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1852 switch (KFD_GC_VERSION(dev->gpu)) { in kfd_topology_set_dbg_firmware_support()
1854 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768; in kfd_topology_set_dbg_firmware_support()
1861 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459; in kfd_topology_set_dbg_firmware_support()
1864 firmware_supported = dev->gpu->kfd->mec_fw_version >= 60; in kfd_topology_set_dbg_firmware_support()
1867 firmware_supported = dev->gpu->kfd->mec_fw_version >= 51; in kfd_topology_set_dbg_firmware_support()
1872 firmware_supported = dev->gpu->kfd->mec_fw_version >= 144; in kfd_topology_set_dbg_firmware_support()
1879 firmware_supported = dev->gpu->kfd->mec_fw_version >= 89; in kfd_topology_set_dbg_firmware_support()
1891 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; in kfd_topology_set_dbg_firmware_support()
1896 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << in kfd_topology_set_capabilities()
1900 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT | in kfd_topology_set_capabilities()
1904 if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) in kfd_topology_set_capabilities()
1905 dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; in kfd_topology_set_capabilities()
1907 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { in kfd_topology_set_capabilities()
1908 if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3)) in kfd_topology_set_capabilities()
1909 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
1913 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
1917 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) in kfd_topology_set_capabilities()
1918 dev->node_props.capability |= in kfd_topology_set_capabilities()
1921 dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | in kfd_topology_set_capabilities()
1924 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) in kfd_topology_set_capabilities()
1925 dev->node_props.capability |= in kfd_topology_set_capabilities()
1932 int kfd_topology_add_device(struct kfd_node *gpu) in kfd_topology_add_device() argument
1938 const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; in kfd_topology_add_device()
1939 struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config; in kfd_topology_add_device()
1940 struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info; in kfd_topology_add_device()
1942 gpu_id = kfd_generate_gpu_id(gpu); in kfd_topology_add_device()
1943 if (gpu->xcp && !gpu->xcp->ddev) { in kfd_topology_add_device()
1944 dev_warn(gpu->adev->dev, in kfd_topology_add_device()
1945 "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.", in kfd_topology_add_device()
1949 pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); in kfd_topology_add_device()
1952 /* Check to see if this gpu device exists in the topology_device_list. in kfd_topology_add_device()
1953 * If so, assign the gpu to that device, in kfd_topology_add_device()
1954 * else create a Virtual CRAT for this gpu device and then parse that in kfd_topology_add_device()
1955 * CRAT to create a new topology device. Once created assign the gpu to in kfd_topology_add_device()
1959 dev = kfd_assign_gpu(gpu); in kfd_topology_add_device()
1961 res = kfd_topology_add_device_locked(gpu, gpu_id, &dev); in kfd_topology_add_device()
1966 dev->gpu_id = gpu_id; in kfd_topology_add_device()
1967 gpu->id = gpu_id; in kfd_topology_add_device()
1975 /* Fill-in additional information that is not available in CRAT but in kfd_topology_add_device()
1978 for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { in kfd_topology_add_device()
1979 dev->node_props.name[i] = __tolower(asic_name[i]); in kfd_topology_add_device()
1983 dev->node_props.name[i] = '\0'; in kfd_topology_add_device()
1985 dev->node_props.simd_arrays_per_engine = in kfd_topology_add_device()
1986 gfx_info->max_sh_per_se; in kfd_topology_add_device()
1988 dev->node_props.gfx_target_version = in kfd_topology_add_device()
1989 gpu->kfd->device_info.gfx_target_version; in kfd_topology_add_device()
1990 dev->node_props.vendor_id = gpu->adev->pdev->vendor; in kfd_topology_add_device()
1991 dev->node_props.device_id = gpu->adev->pdev->device; in kfd_topology_add_device()
1992 dev->node_props.capability |= in kfd_topology_add_device()
1993 ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & in kfd_topology_add_device()
1996 dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); in kfd_topology_add_device()
1997 if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) in kfd_topology_add_device()
1998 dev->node_props.location_id |= dev->gpu->node_id; in kfd_topology_add_device()
2000 dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); in kfd_topology_add_device()
2001 dev->node_props.max_engine_clk_fcompute = in kfd_topology_add_device()
2002 amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); in kfd_topology_add_device()
2003 dev->node_props.max_engine_clk_ccompute = in kfd_topology_add_device()
2006 if (gpu->xcp) in kfd_topology_add_device()
2007 dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index; in kfd_topology_add_device()
2009 dev->node_props.drm_render_minor = in kfd_topology_add_device()
2010 gpu->kfd->shared_resources.drm_render_minor; in kfd_topology_add_device()
2012 dev->node_props.hive_id = gpu->kfd->hive_id; in kfd_topology_add_device()
2013 dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); in kfd_topology_add_device()
2014 dev->node_props.num_sdma_xgmi_engines = in kfd_topology_add_device()
2015 kfd_get_num_xgmi_sdma_engines(gpu); in kfd_topology_add_device()
2016 dev->node_props.num_sdma_queues_per_engine = in kfd_topology_add_device()
2017 gpu->kfd->device_info.num_sdma_queues_per_engine - in kfd_topology_add_device()
2018 gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; in kfd_topology_add_device()
2019 dev->node_props.num_gws = (dev->gpu->gws && in kfd_topology_add_device()
2020 dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? in kfd_topology_add_device()
2021 dev->gpu->adev->gds.gws_size : 0; in kfd_topology_add_device()
2022 dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); in kfd_topology_add_device()
2027 switch (dev->gpu->adev->asic_type) { in kfd_topology_add_device()
2031 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << in kfd_topology_add_device()
2042 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << in kfd_topology_add_device()
2047 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2049 dev->gpu->adev->asic_type); in kfd_topology_add_device()
2058 dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; in kfd_topology_add_device()
2065 if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { in kfd_topology_add_device()
2066 dev->node_props.simd_count = in kfd_topology_add_device()
2067 cu_info->simd_per_cu * cu_info->number; in kfd_topology_add_device()
2068 dev->node_props.max_waves_per_simd = 10; in kfd_topology_add_device()
2072 dev->node_props.capability |= in kfd_topology_add_device()
2073 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? in kfd_topology_add_device()
2075 dev->node_props.capability |= in kfd_topology_add_device()
2076 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? in kfd_topology_add_device()
2079 if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2080 dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? in kfd_topology_add_device()
2083 if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) in kfd_topology_add_device()
2084 dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; in kfd_topology_add_device()
2086 if (dev->gpu->adev->gmc.is_app_apu || in kfd_topology_add_device()
2087 dev->gpu->adev->gmc.xgmi.connected_to_cpu) in kfd_topology_add_device()
2088 dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; in kfd_topology_add_device()
2098 * kfd_topology_update_io_links() - Update IO links after device removal.
2122 if (dev->proximity_domain > proximity_domain) in kfd_topology_update_io_links()
2123 dev->proximity_domain--; in kfd_topology_update_io_links()
2125 list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { in kfd_topology_update_io_links()
2130 if (iolink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2131 list_del(&iolink->list); in kfd_topology_update_io_links()
2132 dev->node_props.io_links_count--; in kfd_topology_update_io_links()
2134 if (iolink->node_from > proximity_domain) in kfd_topology_update_io_links()
2135 iolink->node_from--; in kfd_topology_update_io_links()
2136 if (iolink->node_to > proximity_domain) in kfd_topology_update_io_links()
2137 iolink->node_to--; in kfd_topology_update_io_links()
2141 list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { in kfd_topology_update_io_links()
2146 if (p2plink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2147 list_del(&p2plink->list); in kfd_topology_update_io_links()
2148 dev->node_props.p2p_links_count--; in kfd_topology_update_io_links()
2150 if (p2plink->node_from > proximity_domain) in kfd_topology_update_io_links()
2151 p2plink->node_from--; in kfd_topology_update_io_links()
2152 if (p2plink->node_to > proximity_domain) in kfd_topology_update_io_links()
2153 p2plink->node_to--; in kfd_topology_update_io_links()
2159 int kfd_topology_remove_device(struct kfd_node *gpu) in kfd_topology_remove_device() argument
2163 int res = -ENODEV; in kfd_topology_remove_device()
2169 if (dev->gpu == gpu) { in kfd_topology_remove_device()
2170 gpu_id = dev->gpu_id; in kfd_topology_remove_device()
2173 sys_props.num_devices--; in kfd_topology_remove_device()
2175 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_remove_device()
2193 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
2194 * topology. If GPU device is found @idx, then valid kfd_dev pointer is
2196 * Return - 0: On success (@kdev will be NULL for non GPU nodes)
2197 * -1: If end of list
2210 *kdev = top_dev->gpu; in kfd_topology_enum_kfd_devices()
2220 return -1; in kfd_topology_enum_kfd_devices()
2229 return -1; in kfd_cpumask_to_apic_id()
2232 return -1; in kfd_cpumask_to_apic_id()
2240 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
2242 * Return -1 on failure
2246 if (numa_node_id == -1) { in kfd_numa_node_to_apic_id()
2264 if (!dev->gpu) { in kfd_debugfs_hqds_by_device()
2269 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_hqds_by_device()
2270 r = dqm_debugfs_hqds(m, dev->gpu->dqm); in kfd_debugfs_hqds_by_device()
2289 if (!dev->gpu) { in kfd_debugfs_rls_by_device()
2294 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_rls_by_device()
2295 r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr); in kfd_debugfs_rls_by_device()