Lines Matching +full:gfx +full:- +full:mem
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
46 /* topology_device_list - Master list of all topology devices */
60 if (top_dev->proximity_domain == proximity_domain) { in kfd_topology_device_by_proximity_domain_no_lock()
90 if (top_dev->gpu_id == gpu_id) { in kfd_topology_device_by_id()
108 return top_dev->gpu; in kfd_device_by_id()
114 struct kfd_mem_properties *mem; in kfd_release_topology_device() local
120 list_del(&dev->list); in kfd_release_topology_device()
122 while (dev->mem_props.next != &dev->mem_props) { in kfd_release_topology_device()
123 mem = container_of(dev->mem_props.next, in kfd_release_topology_device()
125 list_del(&mem->list); in kfd_release_topology_device()
126 kfree(mem); in kfd_release_topology_device()
129 while (dev->cache_props.next != &dev->cache_props) { in kfd_release_topology_device()
130 cache = container_of(dev->cache_props.next, in kfd_release_topology_device()
132 list_del(&cache->list); in kfd_release_topology_device()
136 while (dev->io_link_props.next != &dev->io_link_props) { in kfd_release_topology_device()
137 iolink = container_of(dev->io_link_props.next, in kfd_release_topology_device()
139 list_del(&iolink->list); in kfd_release_topology_device()
143 while (dev->p2p_link_props.next != &dev->p2p_link_props) { in kfd_release_topology_device()
144 p2plink = container_of(dev->p2p_link_props.next, in kfd_release_topology_device()
146 list_del(&p2plink->list); in kfd_release_topology_device()
150 while (dev->perf_props.next != &dev->perf_props) { in kfd_release_topology_device()
151 perf = container_of(dev->perf_props.next, in kfd_release_topology_device()
153 list_del(&perf->list); in kfd_release_topology_device()
188 INIT_LIST_HEAD(&dev->mem_props); in kfd_create_topology_device()
189 INIT_LIST_HEAD(&dev->cache_props); in kfd_create_topology_device()
190 INIT_LIST_HEAD(&dev->io_link_props); in kfd_create_topology_device()
191 INIT_LIST_HEAD(&dev->p2p_link_props); in kfd_create_topology_device()
192 INIT_LIST_HEAD(&dev->perf_props); in kfd_create_topology_device()
194 list_add_tail(&dev->list, device_list); in kfd_create_topology_device()
201 (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \
231 offs = -EINVAL; in sysprops_show()
261 if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) in iolink_show()
262 return -EPERM; in iolink_show()
263 sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type); in iolink_show()
264 sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj); in iolink_show()
265 sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min); in iolink_show()
266 sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from); in iolink_show()
267 sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to); in iolink_show()
268 sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight); in iolink_show()
269 sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency); in iolink_show()
270 sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency); in iolink_show()
272 iolink->min_bandwidth); in iolink_show()
274 iolink->max_bandwidth); in iolink_show()
276 iolink->rec_transfer_size); in iolink_show()
278 iolink->rec_sdma_eng_id_mask); in iolink_show()
279 sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); in iolink_show()
297 struct kfd_mem_properties *mem; in mem_show() local
302 mem = container_of(attr, struct kfd_mem_properties, attr); in mem_show()
303 if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) in mem_show()
304 return -EPERM; in mem_show()
305 sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type); in mem_show()
307 mem->size_in_bytes); in mem_show()
308 sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags); in mem_show()
309 sysfs_show_32bit_prop(buffer, offs, "width", mem->width); in mem_show()
311 mem->mem_clk_max); in mem_show()
335 if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) in kfd_cache_show()
336 return -EPERM; in kfd_cache_show()
338 cache->processor_id_low); in kfd_cache_show()
339 sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level); in kfd_cache_show()
340 sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size); in kfd_cache_show()
342 cache->cacheline_size); in kfd_cache_show()
344 cache->cachelines_per_tag); in kfd_cache_show()
345 sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); in kfd_cache_show()
346 sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); in kfd_cache_show()
347 sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); in kfd_cache_show()
349 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); in kfd_cache_show()
350 for (i = 0; i < cache->sibling_map_size; i++) in kfd_cache_show()
351 for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) in kfd_cache_show()
353 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", in kfd_cache_show()
354 (cache->sibling_map[i] >> j) & 1); in kfd_cache_show()
357 buffer[offs-1] = '\n'; in kfd_cache_show()
385 if (!attr->data) /* invalid data for PMC */ in perf_show()
388 return sysfs_show_32bit_val(buf, offs, attr->data); in perf_show()
414 if (strcmp(attr->name, "gpu_id") == 0) { in node_show()
417 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
418 return -EPERM; in node_show()
419 return sysfs_show_32bit_val(buffer, offs, dev->gpu_id); in node_show()
422 if (strcmp(attr->name, "name") == 0) { in node_show()
426 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
427 return -EPERM; in node_show()
428 return sysfs_show_str_val(buffer, offs, dev->node_props.name); in node_show()
433 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
434 return -EPERM; in node_show()
436 dev->node_props.cpu_cores_count); in node_show()
438 dev->gpu ? dev->node_props.simd_count : 0); in node_show()
440 dev->node_props.mem_banks_count); in node_show()
442 dev->node_props.caches_count); in node_show()
444 dev->node_props.io_links_count); in node_show()
446 dev->node_props.p2p_links_count); in node_show()
448 dev->node_props.cpu_core_id_base); in node_show()
450 dev->node_props.simd_id_base); in node_show()
452 dev->node_props.max_waves_per_simd); in node_show()
454 dev->node_props.lds_size_in_kb); in node_show()
456 dev->node_props.gds_size_in_kb); in node_show()
458 dev->node_props.num_gws); in node_show()
460 dev->node_props.wave_front_size); in node_show()
462 dev->gpu ? (dev->node_props.array_count * in node_show()
463 NUM_XCC(dev->gpu->xcc_mask)) : 0); in node_show()
465 dev->node_props.simd_arrays_per_engine); in node_show()
467 dev->node_props.cu_per_simd_array); in node_show()
469 dev->node_props.simd_per_cu); in node_show()
471 dev->node_props.max_slots_scratch_cu); in node_show()
473 dev->node_props.gfx_target_version); in node_show()
475 dev->node_props.vendor_id); in node_show()
477 dev->node_props.device_id); in node_show()
479 dev->node_props.location_id); in node_show()
481 dev->node_props.domain); in node_show()
483 dev->node_props.drm_render_minor); in node_show()
485 dev->node_props.hive_id); in node_show()
487 dev->node_props.num_sdma_engines); in node_show()
489 dev->node_props.num_sdma_xgmi_engines); in node_show()
491 dev->node_props.num_sdma_queues_per_engine); in node_show()
493 dev->node_props.num_cp_queues); in node_show()
495 if (dev->gpu) { in node_show()
497 __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); in node_show()
500 dev->node_props.capability |= in node_show()
503 dev->node_props.capability |= in node_show()
509 if (dev->gpu->adev->asic_type == CHIP_TONGA) in node_show()
510 dev->node_props.capability |= in node_show()
514 dev->node_props.max_engine_clk_fcompute); in node_show()
519 dev->gpu->kfd->mec_fw_version); in node_show()
521 dev->node_props.capability); in node_show()
523 dev->node_props.capability2); in node_show()
525 dev->node_props.debug_prop); in node_show()
527 dev->gpu->kfd->sdma_fw_version); in node_show()
529 dev->gpu->adev->unique_id); in node_show()
531 NUM_XCC(dev->gpu->xcc_mask)); in node_show()
559 struct kfd_mem_properties *mem; in kfd_remove_sysfs_node_entry() local
562 if (dev->kobj_iolink) { in kfd_remove_sysfs_node_entry()
563 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_remove_sysfs_node_entry()
564 if (iolink->kobj) { in kfd_remove_sysfs_node_entry()
565 kfd_remove_sysfs_file(iolink->kobj, in kfd_remove_sysfs_node_entry()
566 &iolink->attr); in kfd_remove_sysfs_node_entry()
567 iolink->kobj = NULL; in kfd_remove_sysfs_node_entry()
569 kobject_del(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
570 kobject_put(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
571 dev->kobj_iolink = NULL; in kfd_remove_sysfs_node_entry()
574 if (dev->kobj_p2plink) { in kfd_remove_sysfs_node_entry()
575 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_remove_sysfs_node_entry()
576 if (p2plink->kobj) { in kfd_remove_sysfs_node_entry()
577 kfd_remove_sysfs_file(p2plink->kobj, in kfd_remove_sysfs_node_entry()
578 &p2plink->attr); in kfd_remove_sysfs_node_entry()
579 p2plink->kobj = NULL; in kfd_remove_sysfs_node_entry()
581 kobject_del(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
582 kobject_put(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
583 dev->kobj_p2plink = NULL; in kfd_remove_sysfs_node_entry()
586 if (dev->kobj_cache) { in kfd_remove_sysfs_node_entry()
587 list_for_each_entry(cache, &dev->cache_props, list) in kfd_remove_sysfs_node_entry()
588 if (cache->kobj) { in kfd_remove_sysfs_node_entry()
589 kfd_remove_sysfs_file(cache->kobj, in kfd_remove_sysfs_node_entry()
590 &cache->attr); in kfd_remove_sysfs_node_entry()
591 cache->kobj = NULL; in kfd_remove_sysfs_node_entry()
593 kobject_del(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
594 kobject_put(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
595 dev->kobj_cache = NULL; in kfd_remove_sysfs_node_entry()
598 if (dev->kobj_mem) { in kfd_remove_sysfs_node_entry()
599 list_for_each_entry(mem, &dev->mem_props, list) in kfd_remove_sysfs_node_entry()
600 if (mem->kobj) { in kfd_remove_sysfs_node_entry()
601 kfd_remove_sysfs_file(mem->kobj, &mem->attr); in kfd_remove_sysfs_node_entry()
602 mem->kobj = NULL; in kfd_remove_sysfs_node_entry()
604 kobject_del(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
605 kobject_put(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
606 dev->kobj_mem = NULL; in kfd_remove_sysfs_node_entry()
609 if (dev->kobj_perf) { in kfd_remove_sysfs_node_entry()
610 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_remove_sysfs_node_entry()
611 kfree(perf->attr_group); in kfd_remove_sysfs_node_entry()
612 perf->attr_group = NULL; in kfd_remove_sysfs_node_entry()
614 kobject_del(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
615 kobject_put(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
616 dev->kobj_perf = NULL; in kfd_remove_sysfs_node_entry()
619 if (dev->kobj_node) { in kfd_remove_sysfs_node_entry()
620 sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); in kfd_remove_sysfs_node_entry()
621 sysfs_remove_file(dev->kobj_node, &dev->attr_name); in kfd_remove_sysfs_node_entry()
622 sysfs_remove_file(dev->kobj_node, &dev->attr_props); in kfd_remove_sysfs_node_entry()
623 kobject_del(dev->kobj_node); in kfd_remove_sysfs_node_entry()
624 kobject_put(dev->kobj_node); in kfd_remove_sysfs_node_entry()
625 dev->kobj_node = NULL; in kfd_remove_sysfs_node_entry()
635 struct kfd_mem_properties *mem; in kfd_build_sysfs_node_entry() local
641 if (WARN_ON(dev->kobj_node)) in kfd_build_sysfs_node_entry()
642 return -EEXIST; in kfd_build_sysfs_node_entry()
647 dev->kobj_node = kfd_alloc_struct(dev->kobj_node); in kfd_build_sysfs_node_entry()
648 if (!dev->kobj_node) in kfd_build_sysfs_node_entry()
649 return -ENOMEM; in kfd_build_sysfs_node_entry()
651 ret = kobject_init_and_add(dev->kobj_node, &node_type, in kfd_build_sysfs_node_entry()
654 kobject_put(dev->kobj_node); in kfd_build_sysfs_node_entry()
658 dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); in kfd_build_sysfs_node_entry()
659 if (!dev->kobj_mem) in kfd_build_sysfs_node_entry()
660 return -ENOMEM; in kfd_build_sysfs_node_entry()
662 dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); in kfd_build_sysfs_node_entry()
663 if (!dev->kobj_cache) in kfd_build_sysfs_node_entry()
664 return -ENOMEM; in kfd_build_sysfs_node_entry()
666 dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
667 if (!dev->kobj_iolink) in kfd_build_sysfs_node_entry()
668 return -ENOMEM; in kfd_build_sysfs_node_entry()
670 dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
671 if (!dev->kobj_p2plink) in kfd_build_sysfs_node_entry()
672 return -ENOMEM; in kfd_build_sysfs_node_entry()
674 dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); in kfd_build_sysfs_node_entry()
675 if (!dev->kobj_perf) in kfd_build_sysfs_node_entry()
676 return -ENOMEM; in kfd_build_sysfs_node_entry()
681 dev->attr_gpuid.name = "gpu_id"; in kfd_build_sysfs_node_entry()
682 dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
683 sysfs_attr_init(&dev->attr_gpuid); in kfd_build_sysfs_node_entry()
684 dev->attr_name.name = "name"; in kfd_build_sysfs_node_entry()
685 dev->attr_name.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
686 sysfs_attr_init(&dev->attr_name); in kfd_build_sysfs_node_entry()
687 dev->attr_props.name = "properties"; in kfd_build_sysfs_node_entry()
688 dev->attr_props.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
689 sysfs_attr_init(&dev->attr_props); in kfd_build_sysfs_node_entry()
690 ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); in kfd_build_sysfs_node_entry()
693 ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); in kfd_build_sysfs_node_entry()
696 ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); in kfd_build_sysfs_node_entry()
701 list_for_each_entry(mem, &dev->mem_props, list) { in kfd_build_sysfs_node_entry()
702 mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
703 if (!mem->kobj) in kfd_build_sysfs_node_entry()
704 return -ENOMEM; in kfd_build_sysfs_node_entry()
705 ret = kobject_init_and_add(mem->kobj, &mem_type, in kfd_build_sysfs_node_entry()
706 dev->kobj_mem, "%d", i); in kfd_build_sysfs_node_entry()
708 kobject_put(mem->kobj); in kfd_build_sysfs_node_entry()
712 mem->attr.name = "properties"; in kfd_build_sysfs_node_entry()
713 mem->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
714 sysfs_attr_init(&mem->attr); in kfd_build_sysfs_node_entry()
715 ret = sysfs_create_file(mem->kobj, &mem->attr); in kfd_build_sysfs_node_entry()
722 list_for_each_entry(cache, &dev->cache_props, list) { in kfd_build_sysfs_node_entry()
723 cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
724 if (!cache->kobj) in kfd_build_sysfs_node_entry()
725 return -ENOMEM; in kfd_build_sysfs_node_entry()
726 ret = kobject_init_and_add(cache->kobj, &cache_type, in kfd_build_sysfs_node_entry()
727 dev->kobj_cache, "%d", i); in kfd_build_sysfs_node_entry()
729 kobject_put(cache->kobj); in kfd_build_sysfs_node_entry()
733 cache->attr.name = "properties"; in kfd_build_sysfs_node_entry()
734 cache->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
735 sysfs_attr_init(&cache->attr); in kfd_build_sysfs_node_entry()
736 ret = sysfs_create_file(cache->kobj, &cache->attr); in kfd_build_sysfs_node_entry()
743 list_for_each_entry(iolink, &dev->io_link_props, list) { in kfd_build_sysfs_node_entry()
744 iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
745 if (!iolink->kobj) in kfd_build_sysfs_node_entry()
746 return -ENOMEM; in kfd_build_sysfs_node_entry()
747 ret = kobject_init_and_add(iolink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
748 dev->kobj_iolink, "%d", i); in kfd_build_sysfs_node_entry()
750 kobject_put(iolink->kobj); in kfd_build_sysfs_node_entry()
754 iolink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
755 iolink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
756 sysfs_attr_init(&iolink->attr); in kfd_build_sysfs_node_entry()
757 ret = sysfs_create_file(iolink->kobj, &iolink->attr); in kfd_build_sysfs_node_entry()
764 list_for_each_entry(p2plink, &dev->p2p_link_props, list) { in kfd_build_sysfs_node_entry()
765 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
766 if (!p2plink->kobj) in kfd_build_sysfs_node_entry()
767 return -ENOMEM; in kfd_build_sysfs_node_entry()
768 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
769 dev->kobj_p2plink, "%d", i); in kfd_build_sysfs_node_entry()
771 kobject_put(p2plink->kobj); in kfd_build_sysfs_node_entry()
775 p2plink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
776 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
777 sysfs_attr_init(&p2plink->attr); in kfd_build_sysfs_node_entry()
778 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_sysfs_node_entry()
786 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_build_sysfs_node_entry()
787 perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) in kfd_build_sysfs_node_entry()
790 if (!perf->attr_group) in kfd_build_sysfs_node_entry()
791 return -ENOMEM; in kfd_build_sysfs_node_entry()
793 attrs = (struct attribute **)(perf->attr_group + 1); in kfd_build_sysfs_node_entry()
794 if (!strcmp(perf->block_name, "iommu")) { in kfd_build_sysfs_node_entry()
799 perf_attr_iommu[0].data = perf->max_concurrent; in kfd_build_sysfs_node_entry()
803 perf->attr_group->name = perf->block_name; in kfd_build_sysfs_node_entry()
804 perf->attr_group->attrs = attrs; in kfd_build_sysfs_node_entry()
805 ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); in kfd_build_sysfs_node_entry()
847 return -ENOMEM; in kfd_topology_update_sysfs()
850 &sysprops_type, &kfd_device->kobj, in kfd_topology_update_sysfs()
860 return -ENOMEM; in kfd_topology_update_sysfs()
908 list_move_tail(temp_list->next, master_list); in kfd_topology_update_device_list()
922 if (dev->node_props.cpu_cores_count && in kfd_debug_print_topology()
923 dev->node_props.simd_count) { in kfd_debug_print_topology()
925 dev->node_props.device_id, in kfd_debug_print_topology()
926 dev->node_props.vendor_id); in kfd_debug_print_topology()
927 } else if (dev->node_props.cpu_cores_count) in kfd_debug_print_topology()
929 else if (dev->node_props.simd_count) in kfd_debug_print_topology()
931 dev->node_props.device_id, in kfd_debug_print_topology()
932 dev->node_props.vendor_id); in kfd_debug_print_topology()
948 sys_props.platform_id = dev->oem_id64; in kfd_update_system_properties()
949 sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); in kfd_update_system_properties()
950 sys_props.platform_rev = dev->oem_revision; in kfd_update_system_properties()
958 struct kfd_mem_properties *mem; in find_system_memory() local
962 if (memdev->header.type != DMI_ENTRY_MEM_DEVICE) in find_system_memory()
964 if (memdev->header.length < sizeof(struct dmi_mem_device)) in find_system_memory()
967 list_for_each_entry(mem, &kdev->mem_props, list) { in find_system_memory()
968 if (memdev->total_width != 0xFFFF && memdev->total_width != 0) in find_system_memory()
969 mem->width = memdev->total_width; in find_system_memory()
970 if (memdev->speed != 0) in find_system_memory()
971 mem->mem_clk_max = memdev->speed; in find_system_memory()
975 /* kfd_add_non_crat_information - Add information that is not currently
977 * @dev - topology device to which addition info is added
982 if (!kdev->gpu) { in kfd_add_non_crat_information()
999 /* topology_device_list - Master list of all topology devices in kfd_topology_init()
1000 * temp_topology_device_list - temporary list created while parsing CRAT in kfd_topology_init()
1042 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_init()
1091 local_mem_size = gpu->local_mem_info.local_mem_size_private + in kfd_generate_gpu_id()
1092 gpu->local_mem_info.local_mem_size_public; in kfd_generate_gpu_id()
1093 buf[0] = gpu->adev->pdev->devfn; in kfd_generate_gpu_id()
1094 buf[1] = gpu->adev->pdev->subsystem_vendor | in kfd_generate_gpu_id()
1095 (gpu->adev->pdev->subsystem_device << 16); in kfd_generate_gpu_id()
1096 buf[2] = pci_domain_nr(gpu->adev->pdev->bus); in kfd_generate_gpu_id()
1097 buf[3] = gpu->adev->pdev->device; in kfd_generate_gpu_id()
1098 buf[4] = gpu->adev->pdev->bus->number; in kfd_generate_gpu_id()
1101 buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); in kfd_generate_gpu_id()
1104 ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); in kfd_generate_gpu_id()
1108 * that the value could be 0 or non-unique. So, check if in kfd_generate_gpu_id()
1109 * it is unique and non-zero. If not unique increment till in kfd_generate_gpu_id()
1119 if (dev->gpu && dev->gpu_id == gpu_id) { in kfd_generate_gpu_id()
1126 ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); in kfd_generate_gpu_id()
1132 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1141 struct kfd_mem_properties *mem; in kfd_assign_gpu() local
1150 if (dev->node_props.cpu_cores_count) in kfd_assign_gpu()
1153 if (!dev->gpu && (dev->node_props.simd_count > 0)) { in kfd_assign_gpu()
1154 dev->gpu = gpu; in kfd_assign_gpu()
1157 list_for_each_entry(mem, &dev->mem_props, list) in kfd_assign_gpu()
1158 mem->gpu = dev->gpu; in kfd_assign_gpu()
1159 list_for_each_entry(cache, &dev->cache_props, list) in kfd_assign_gpu()
1160 cache->gpu = dev->gpu; in kfd_assign_gpu()
1161 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_assign_gpu()
1162 iolink->gpu = dev->gpu; in kfd_assign_gpu()
1163 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_assign_gpu()
1164 p2plink->gpu = dev->gpu; in kfd_assign_gpu()
1179 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1184 struct kfd_mem_properties *mem; in kfd_fill_mem_clk_max_info() local
1192 * for dGPUs - VCRAT reports only one bank of Local Memory in kfd_fill_mem_clk_max_info()
1193 * for APUs - If CRAT from ACPI reports more than one bank, then in kfd_fill_mem_clk_max_info()
1196 amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, in kfd_fill_mem_clk_max_info()
1197 dev->gpu->xcp); in kfd_fill_mem_clk_max_info()
1199 list_for_each_entry(mem, &dev->mem_props, list) in kfd_fill_mem_clk_max_info()
1200 mem->mem_clk_max = local_mem_info.mem_clk_max; in kfd_fill_mem_clk_max_info()
1208 if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) in kfd_set_iolink_no_atomics()
1215 pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev, in kfd_set_iolink_no_atomics()
1220 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1224 if (!dev->gpu->kfd->pci_atomic_requested || in kfd_set_iolink_no_atomics()
1225 dev->gpu->adev->asic_type == CHIP_HAWAII) in kfd_set_iolink_no_atomics()
1226 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1235 /* CPU -> GPU with PCIe */ in kfd_set_iolink_non_coherent()
1236 if (!to_dev->gpu && in kfd_set_iolink_non_coherent()
1237 inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) in kfd_set_iolink_non_coherent()
1238 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1240 if (to_dev->gpu) { in kfd_set_iolink_non_coherent()
1241 /* GPU <-> GPU with PCIe and in kfd_set_iolink_non_coherent()
1244 if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || in kfd_set_iolink_non_coherent()
1245 (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && in kfd_set_iolink_non_coherent()
1246 KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { in kfd_set_iolink_non_coherent()
1247 outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1248 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1255 { -1, 14, 12, 2, 4, 8, 10, 6 },
1256 { 14, -1, 2, 10, 8, 4, 6, 12 },
1257 { 10, 2, -1, 12, 14, 6, 4, 8 },
1258 { 2, 12, 10, -1, 6, 14, 8, 4 },
1259 { 4, 8, 14, 6, -1, 10, 12, 2 },
1260 { 8, 4, 6, 14, 12, -1, 2, 10 },
1261 { 10, 6, 4, 8, 12, 2, -1, 14 },
1262 { 6, 12, 8, 4, 2, 10, 14, -1 }};
1268 struct kfd_node *gpu = outbound_link->gpu; in kfd_set_recommended_sdma_engines()
1269 struct amdgpu_device *adev = gpu->adev; in kfd_set_recommended_sdma_engines()
1270 int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; in kfd_set_recommended_sdma_engines()
1271 bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && in kfd_set_recommended_sdma_engines()
1272 adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && in kfd_set_recommended_sdma_engines()
1274 (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); in kfd_set_recommended_sdma_engines()
1277 int src_socket_id = adev->gmc.xgmi.physical_node_id; in kfd_set_recommended_sdma_engines()
1278 int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; in kfd_set_recommended_sdma_engines()
1280 outbound_link->rec_sdma_eng_id_mask = in kfd_set_recommended_sdma_engines()
1282 inbound_link->rec_sdma_eng_id_mask = in kfd_set_recommended_sdma_engines()
1288 if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && in kfd_set_recommended_sdma_engines()
1289 kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { in kfd_set_recommended_sdma_engines()
1295 outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); in kfd_set_recommended_sdma_engines()
1296 inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); in kfd_set_recommended_sdma_engines()
1306 if (!dev || !dev->gpu) in kfd_fill_iolink_non_crat_info()
1310 list_for_each_entry(link, &dev->io_link_props, list) { in kfd_fill_iolink_non_crat_info()
1311 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1314 link->node_to); in kfd_fill_iolink_non_crat_info()
1320 if (!peer_dev->gpu && in kfd_fill_iolink_non_crat_info()
1321 link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { in kfd_fill_iolink_non_crat_info()
1326 if (!dev->node_props.hive_id) in kfd_fill_iolink_non_crat_info()
1327 dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev); in kfd_fill_iolink_non_crat_info()
1328 peer_dev->node_props.hive_id = dev->node_props.hive_id; in kfd_fill_iolink_non_crat_info()
1331 list_for_each_entry(inbound_link, &peer_dev->io_link_props, in kfd_fill_iolink_non_crat_info()
1333 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1336 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1344 list_for_each_entry(link, &dev->p2p_link_props, list) { in kfd_fill_iolink_non_crat_info()
1345 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1348 link->node_to); in kfd_fill_iolink_non_crat_info()
1353 list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, in kfd_fill_iolink_non_crat_info()
1355 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1358 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1370 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_p2p_node_entry()
1371 if (!p2plink->kobj) in kfd_build_p2p_node_entry()
1372 return -ENOMEM; in kfd_build_p2p_node_entry()
1374 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_p2p_node_entry()
1375 dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1); in kfd_build_p2p_node_entry()
1377 kobject_put(p2plink->kobj); in kfd_build_p2p_node_entry()
1381 p2plink->attr.name = "properties"; in kfd_build_p2p_node_entry()
1382 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_p2p_node_entry()
1383 sysfs_attr_init(&p2plink->attr); in kfd_build_p2p_node_entry()
1384 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_p2p_node_entry()
1401 if (cpu_dev->gpu) in kfd_create_indirect_link_prop()
1406 if (list_empty(&kdev->io_link_props)) in kfd_create_indirect_link_prop()
1407 return -ENODATA; in kfd_create_indirect_link_prop()
1409 gpu_link = list_first_entry(&kdev->io_link_props, in kfd_create_indirect_link_prop()
1413 /* CPU <--> GPU */ in kfd_create_indirect_link_prop()
1414 if (gpu_link->node_to == i) in kfd_create_indirect_link_prop()
1417 /* find CPU <--> CPU links */ in kfd_create_indirect_link_prop()
1422 &cpu_dev->io_link_props, list) { in kfd_create_indirect_link_prop()
1423 if (tmp_link->node_to == gpu_link->node_to) { in kfd_create_indirect_link_prop()
1431 return -ENOMEM; in kfd_create_indirect_link_prop()
1433 /* CPU <--> CPU <--> GPU, GPU node*/ in kfd_create_indirect_link_prop()
1436 return -ENOMEM; in kfd_create_indirect_link_prop()
1439 props->weight = gpu_link->weight + cpu_link->weight; in kfd_create_indirect_link_prop()
1440 props->min_latency = gpu_link->min_latency + cpu_link->min_latency; in kfd_create_indirect_link_prop()
1441 props->max_latency = gpu_link->max_latency + cpu_link->max_latency; in kfd_create_indirect_link_prop()
1442 props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); in kfd_create_indirect_link_prop()
1443 props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); in kfd_create_indirect_link_prop()
1445 props->node_from = gpu_node; in kfd_create_indirect_link_prop()
1446 props->node_to = i; in kfd_create_indirect_link_prop()
1447 kdev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1448 list_add_tail(&props->list, &kdev->p2p_link_props); in kfd_create_indirect_link_prop()
1453 /* for small Bar, no CPU --> GPU in-direct links */ in kfd_create_indirect_link_prop()
1454 if (kfd_dev_is_large_bar(kdev->gpu)) { in kfd_create_indirect_link_prop()
1455 /* CPU <--> CPU <--> GPU, CPU node*/ in kfd_create_indirect_link_prop()
1458 return -ENOMEM; in kfd_create_indirect_link_prop()
1461 props2->node_from = i; in kfd_create_indirect_link_prop()
1462 props2->node_to = gpu_node; in kfd_create_indirect_link_prop()
1463 props2->kobj = NULL; in kfd_create_indirect_link_prop()
1464 cpu_dev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1465 list_add_tail(&props2->list, &cpu_dev->p2p_link_props); in kfd_create_indirect_link_prop()
1484 kdev->gpu->adev, in kfd_add_peer_prop()
1485 peer->gpu->adev)) in kfd_add_peer_prop()
1488 if (list_empty(&kdev->io_link_props)) in kfd_add_peer_prop()
1489 return -ENODATA; in kfd_add_peer_prop()
1491 iolink1 = list_first_entry(&kdev->io_link_props, in kfd_add_peer_prop()
1494 if (list_empty(&peer->io_link_props)) in kfd_add_peer_prop()
1495 return -ENODATA; in kfd_add_peer_prop()
1497 iolink2 = list_first_entry(&peer->io_link_props, in kfd_add_peer_prop()
1502 return -ENOMEM; in kfd_add_peer_prop()
1506 props->weight = iolink1->weight + iolink2->weight; in kfd_add_peer_prop()
1507 props->min_latency = iolink1->min_latency + iolink2->min_latency; in kfd_add_peer_prop()
1508 props->max_latency = iolink1->max_latency + iolink2->max_latency; in kfd_add_peer_prop()
1509 props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); in kfd_add_peer_prop()
1510 props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); in kfd_add_peer_prop()
1512 if (iolink1->node_to != iolink2->node_to) { in kfd_add_peer_prop()
1513 /* CPU->CPU link*/ in kfd_add_peer_prop()
1514 cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); in kfd_add_peer_prop()
1516 list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { in kfd_add_peer_prop()
1517 if (iolink3->node_to != iolink2->node_to) in kfd_add_peer_prop()
1520 props->weight += iolink3->weight; in kfd_add_peer_prop()
1521 props->min_latency += iolink3->min_latency; in kfd_add_peer_prop()
1522 props->max_latency += iolink3->max_latency; in kfd_add_peer_prop()
1523 props->min_bandwidth = min(props->min_bandwidth, in kfd_add_peer_prop()
1524 iolink3->min_bandwidth); in kfd_add_peer_prop()
1525 props->max_bandwidth = min(props->max_bandwidth, in kfd_add_peer_prop()
1526 iolink3->max_bandwidth); in kfd_add_peer_prop()
1534 props->node_from = from; in kfd_add_peer_prop()
1535 props->node_to = to; in kfd_add_peer_prop()
1536 peer->node_props.p2p_links_count++; in kfd_add_peer_prop()
1537 list_add_tail(&props->list, &peer->p2p_link_props); in kfd_add_peer_prop()
1561 if (WARN_ON(!new_dev->gpu)) in kfd_dev_create_p2p_links()
1564 k--; in kfd_dev_create_p2p_links()
1566 /* create in-direct links */ in kfd_dev_create_p2p_links()
1577 if (!dev->gpu || !dev->gpu->adev || in kfd_dev_create_p2p_links()
1578 (dev->gpu->kfd->hive_id && in kfd_dev_create_p2p_links()
1579 dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) in kfd_dev_create_p2p_links()
1582 /* check if node(s) is/are peer accessible in one direction or bi-direction */ in kfd_dev_create_p2p_links()
1612 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l1_pcache()
1616 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l1_pcache()
1622 return -ENOMEM; in fill_in_l1_pcache()
1625 pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); in fill_in_l1_pcache()
1626 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l1_pcache()
1627 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l1_pcache()
1628 pcache->cacheline_size = pcache_info[cache_type].cache_line_size; in fill_in_l1_pcache()
1631 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l1_pcache()
1633 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l1_pcache()
1635 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l1_pcache()
1637 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l1_pcache()
1643 cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l1_pcache()
1645 pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l1_pcache()
1646 pcache->sibling_map[1] = in fill_in_l1_pcache()
1648 pcache->sibling_map[2] = in fill_in_l1_pcache()
1650 pcache->sibling_map[3] = in fill_in_l1_pcache()
1653 pcache->sibling_map_size = 4; in fill_in_l1_pcache()
1672 int num_xcc = NUM_XCC(knode->xcc_mask); in fill_in_l2_l3_pcache()
1675 struct amdgpu_device *adev = knode->adev; in fill_in_l2_l3_pcache()
1678 start = ffs(knode->xcc_mask) - 1; in fill_in_l2_l3_pcache()
1685 for (i = 0; i < gfx_info->max_shader_engines && !found; i++) { in fill_in_l2_l3_pcache()
1686 for (j = 0; j < gfx_info->max_sh_per_se && !found; j++) { in fill_in_l2_l3_pcache()
1687 if (cu_info->bitmap[start][i % 4][j % 4]) { in fill_in_l2_l3_pcache()
1689 cu_info->bitmap[start][i % 4][j % 4]; in fill_in_l2_l3_pcache()
1696 ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1700 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l2_l3_pcache()
1706 return -ENOMEM; in fill_in_l2_l3_pcache()
1709 pcache->processor_id_low = cu_processor_id in fill_in_l2_l3_pcache()
1710 + (first_active_cu - 1); in fill_in_l2_l3_pcache()
1711 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l2_l3_pcache()
1712 pcache->cacheline_size = pcache_info[cache_type].cache_line_size; in fill_in_l2_l3_pcache()
1717 mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); in fill_in_l2_l3_pcache()
1721 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l2_l3_pcache()
1723 if (mode && pcache->cache_level == 3) in fill_in_l2_l3_pcache()
1724 pcache->cache_size /= mode; in fill_in_l2_l3_pcache()
1727 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l2_l3_pcache()
1729 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l2_l3_pcache()
1731 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l2_l3_pcache()
1733 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l2_l3_pcache()
1738 cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l2_l3_pcache()
1742 for (i = 0; i < gfx_info->max_shader_engines; i++) { in fill_in_l2_l3_pcache()
1743 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in fill_in_l2_l3_pcache()
1744 pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l2_l3_pcache()
1745 pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); in fill_in_l2_l3_pcache()
1746 pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); in fill_in_l2_l3_pcache()
1747 pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); in fill_in_l2_l3_pcache()
1750 cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4]; in fill_in_l2_l3_pcache()
1751 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1755 pcache->sibling_map_size = k; in fill_in_l2_l3_pcache()
1764 /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
1775 struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; in kfd_fill_cache_non_crat_info()
1776 struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; in kfd_fill_cache_non_crat_info()
1784 gpu_processor_id = dev->node_props.simd_id_base; in kfd_fill_cache_non_crat_info()
1803 start = ffs(kdev->xcc_mask) - 1; in kfd_fill_cache_non_crat_info()
1804 end = start + NUM_XCC(kdev->xcc_mask); in kfd_fill_cache_non_crat_info()
1810 for (i = 0; i < gfx_info->max_shader_engines; i++) { in kfd_fill_cache_non_crat_info()
1811 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in kfd_fill_cache_non_crat_info()
1812 for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) { in kfd_fill_cache_non_crat_info()
1815 cu_info->bitmap[xcc][i % 4][j + i / 4], ct, in kfd_fill_cache_non_crat_info()
1823 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1828 gfx_info->max_cu_per_sh) ? in kfd_fill_cache_non_crat_info()
1830 (gfx_info->max_cu_per_sh - k); in kfd_fill_cache_non_crat_info()
1845 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1849 dev->node_props.caches_count += num_of_entries; in kfd_fill_cache_non_crat_info()
1866 dev_err(gpu->adev->dev, "Error creating VCRAT\n"); in kfd_topology_add_device_locked()
1867 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1877 dev_err(gpu->adev->dev, "Error parsing VCRAT\n"); in kfd_topology_add_device_locked()
1878 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1887 res = -ENODEV; in kfd_topology_add_device_locked()
1903 dev_err(gpu->adev->dev, "Failed to update GPU to sysfs topology. res=%d\n", in kfd_topology_add_device_locked()
1915 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && in kfd_topology_set_dbg_firmware_support()
1916 KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { in kfd_topology_set_dbg_firmware_support()
1917 uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1920 uint32_t mes_rev = dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1931 switch (KFD_GC_VERSION(dev->gpu)) { in kfd_topology_set_dbg_firmware_support()
1933 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768; in kfd_topology_set_dbg_firmware_support()
1940 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459; in kfd_topology_set_dbg_firmware_support()
1943 firmware_supported = dev->gpu->kfd->mec_fw_version >= 60; in kfd_topology_set_dbg_firmware_support()
1946 firmware_supported = dev->gpu->kfd->mec_fw_version >= 51; in kfd_topology_set_dbg_firmware_support()
1951 firmware_supported = dev->gpu->kfd->mec_fw_version >= 144; in kfd_topology_set_dbg_firmware_support()
1958 firmware_supported = dev->gpu->kfd->mec_fw_version >= 89; in kfd_topology_set_dbg_firmware_support()
1970 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; in kfd_topology_set_dbg_firmware_support()
1975 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << in kfd_topology_set_capabilities()
1979 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT | in kfd_topology_set_capabilities()
1983 if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) in kfd_topology_set_capabilities()
1984 dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; in kfd_topology_set_capabilities()
1986 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { in kfd_topology_set_capabilities()
1987 if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) || in kfd_topology_set_capabilities()
1988 KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4)) in kfd_topology_set_capabilities()
1989 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
1993 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
1997 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) in kfd_topology_set_capabilities()
1998 dev->node_props.capability |= in kfd_topology_set_capabilities()
2001 if (!amdgpu_sriov_vf(dev->gpu->adev)) in kfd_topology_set_capabilities()
2002 dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; in kfd_topology_set_capabilities()
2004 if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) in kfd_topology_set_capabilities()
2005 dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; in kfd_topology_set_capabilities()
2007 dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | in kfd_topology_set_capabilities()
2010 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0)) in kfd_topology_set_capabilities()
2011 dev->node_props.capability |= in kfd_topology_set_capabilities()
2024 const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; in kfd_topology_add_device()
2025 struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config; in kfd_topology_add_device()
2026 struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info; in kfd_topology_add_device()
2028 if (gpu->xcp && !gpu->xcp->ddev) { in kfd_topology_add_device()
2029 dev_warn(gpu->adev->dev, in kfd_topology_add_device()
2033 dev_dbg(gpu->adev->dev, "Adding new GPU to topology\n"); in kfd_topology_add_device()
2051 dev->gpu_id = gpu_id; in kfd_topology_add_device()
2052 gpu->id = gpu_id; in kfd_topology_add_device()
2060 /* Fill-in additional information that is not available in CRAT but in kfd_topology_add_device()
2063 for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { in kfd_topology_add_device()
2064 dev->node_props.name[i] = __tolower(asic_name[i]); in kfd_topology_add_device()
2068 dev->node_props.name[i] = '\0'; in kfd_topology_add_device()
2070 dev->node_props.simd_arrays_per_engine = in kfd_topology_add_device()
2071 gfx_info->max_sh_per_se; in kfd_topology_add_device()
2073 dev->node_props.gfx_target_version = in kfd_topology_add_device()
2074 gpu->kfd->device_info.gfx_target_version; in kfd_topology_add_device()
2075 dev->node_props.vendor_id = gpu->adev->pdev->vendor; in kfd_topology_add_device()
2076 dev->node_props.device_id = gpu->adev->pdev->device; in kfd_topology_add_device()
2077 dev->node_props.capability |= in kfd_topology_add_device()
2078 ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & in kfd_topology_add_device()
2081 dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); in kfd_topology_add_device()
2082 if (gpu->kfd->num_nodes > 1) in kfd_topology_add_device()
2083 dev->node_props.location_id |= dev->gpu->node_id; in kfd_topology_add_device()
2085 dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); in kfd_topology_add_device()
2086 dev->node_props.max_engine_clk_fcompute = in kfd_topology_add_device()
2087 amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); in kfd_topology_add_device()
2088 dev->node_props.max_engine_clk_ccompute = in kfd_topology_add_device()
2091 if (gpu->xcp) in kfd_topology_add_device()
2092 dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index; in kfd_topology_add_device()
2094 dev->node_props.drm_render_minor = in kfd_topology_add_device()
2095 gpu->kfd->shared_resources.drm_render_minor; in kfd_topology_add_device()
2097 dev->node_props.hive_id = gpu->kfd->hive_id; in kfd_topology_add_device()
2098 dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); in kfd_topology_add_device()
2099 dev->node_props.num_sdma_xgmi_engines = in kfd_topology_add_device()
2101 dev->node_props.num_sdma_queues_per_engine = in kfd_topology_add_device()
2102 gpu->kfd->device_info.num_sdma_queues_per_engine - in kfd_topology_add_device()
2103 gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; in kfd_topology_add_device()
2104 dev->node_props.num_gws = (dev->gpu->gws && in kfd_topology_add_device()
2105 dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? in kfd_topology_add_device()
2106 dev->gpu->adev->gds.gws_size : 0; in kfd_topology_add_device()
2107 dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); in kfd_topology_add_device()
2112 switch (dev->gpu->adev->asic_type) { in kfd_topology_add_device()
2116 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << in kfd_topology_add_device()
2127 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << in kfd_topology_add_device()
2132 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2134 dev->gpu->adev->asic_type); in kfd_topology_add_device()
2143 dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; in kfd_topology_add_device()
2150 if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { in kfd_topology_add_device()
2151 dev->node_props.simd_count = in kfd_topology_add_device()
2152 cu_info->simd_per_cu * cu_info->number; in kfd_topology_add_device()
2153 dev->node_props.max_waves_per_simd = 10; in kfd_topology_add_device()
2156 /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ in kfd_topology_add_device()
2157 dev->node_props.capability |= in kfd_topology_add_device()
2158 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? in kfd_topology_add_device()
2160 dev->node_props.capability |= in kfd_topology_add_device()
2161 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? in kfd_topology_add_device()
2164 if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2165 dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? in kfd_topology_add_device()
2168 if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) in kfd_topology_add_device()
2169 dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; in kfd_topology_add_device()
2171 if (dev->gpu->adev->gmc.is_app_apu || in kfd_topology_add_device()
2172 dev->gpu->adev->gmc.xgmi.connected_to_cpu) in kfd_topology_add_device()
2173 dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; in kfd_topology_add_device()
2185 * kfd_topology_update_io_links() - Update IO links after device removal.
2209 if (dev->proximity_domain > proximity_domain) in kfd_topology_update_io_links()
2210 dev->proximity_domain--; in kfd_topology_update_io_links()
2212 list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { in kfd_topology_update_io_links()
2217 if (iolink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2218 list_del(&iolink->list); in kfd_topology_update_io_links()
2219 dev->node_props.io_links_count--; in kfd_topology_update_io_links()
2221 if (iolink->node_from > proximity_domain) in kfd_topology_update_io_links()
2222 iolink->node_from--; in kfd_topology_update_io_links()
2223 if (iolink->node_to > proximity_domain) in kfd_topology_update_io_links()
2224 iolink->node_to--; in kfd_topology_update_io_links()
2228 list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { in kfd_topology_update_io_links()
2233 if (p2plink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2234 list_del(&p2plink->list); in kfd_topology_update_io_links()
2235 dev->node_props.p2p_links_count--; in kfd_topology_update_io_links()
2237 if (p2plink->node_from > proximity_domain) in kfd_topology_update_io_links()
2238 p2plink->node_from--; in kfd_topology_update_io_links()
2239 if (p2plink->node_to > proximity_domain) in kfd_topology_update_io_links()
2240 p2plink->node_to--; in kfd_topology_update_io_links()
2250 int res = -ENODEV; in kfd_topology_remove_device()
2256 if (dev->gpu == gpu) { in kfd_topology_remove_device()
2257 gpu_id = dev->gpu_id; in kfd_topology_remove_device()
2260 sys_props.num_devices--; in kfd_topology_remove_device()
2262 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_remove_device()
2280 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
2283 * Return - 0: On success (@kdev will be NULL for non GPU nodes)
2284 * -1: If end of list
2297 *kdev = top_dev->gpu; in kfd_topology_enum_kfd_devices()
2307 return -1; in kfd_topology_enum_kfd_devices()
2316 return -1; in kfd_cpumask_to_apic_id()
2319 return -1; in kfd_cpumask_to_apic_id()
2327 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
2329 * Return -1 on failure
2333 if (numa_node_id == -1) { in kfd_numa_node_to_apic_id()
2351 if (!dev->gpu) { in kfd_debugfs_hqds_by_device()
2356 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_hqds_by_device()
2357 r = dqm_debugfs_hqds(m, dev->gpu->dqm); in kfd_debugfs_hqds_by_device()
2376 if (!dev->gpu) { in kfd_debugfs_rls_by_device()
2381 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_rls_by_device()
2382 r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr); in kfd_debugfs_rls_by_device()