Lines Matching +full:gpu +full:- +full:id

1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2015-2022 Advanced Micro Devices, Inc.
32 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
33 * GPU processor ID are expressed with Bit[31]=1.
34 * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
39 /* Return the next available gpu_processor_id and increment it for next GPU
40 * @total_cu_count - Total CUs present in the GPU including ones
162 /* L2 Data Cache per GPU (Total Tex Cache) */
201 /* L2 Data Cache per GPU (Total Tex Cache) */
240 /* L2 Data Cache per GPU (Total Tex Cache) */
279 /* L2 Data Cache per GPU (Total Tex Cache) */
318 /* L2 Data Cache per GPU (Total Tex Cache) */
357 /* L2 Data Cache per GPU (Total Tex Cache) */
405 /* L2 Data Cache per GPU (Total Tex Cache) */
453 /* L2 Data Cache per GPU (Total Tex Cache) */
501 /* L2 Data Cache per GPU (Total Tex Cache) */
549 /* L2 Data Cache per GPU (Total Tex Cache) */
558 /* L3 Data Cache per GPU */
606 /* L2 Data Cache per GPU (Total Tex Cache) */
615 /* L3 Data Cache per GPU */
663 /* L2 Data Cache per GPU (Total Tex Cache) */
672 /* L3 Data Cache per GPU */
720 /* L2 Data Cache per GPU (Total Tex Cache) */
729 /* L3 Data Cache per GPU */
777 /* L2 Data Cache per GPU (Total Tex Cache) */
825 /* L2 Data Cache per GPU (Total Tex Cache) */
873 /* L2 Data Cache per GPU (Total Tex Cache) */
921 /* L2 Data Cache per GPU (Total Tex Cache) */
934 dev->node_props.cpu_cores_count = cu->num_cpu_cores; in kfd_populated_cu_info_cpu()
935 dev->node_props.cpu_core_id_base = cu->processor_id_low; in kfd_populated_cu_info_cpu()
936 if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) in kfd_populated_cu_info_cpu()
937 dev->node_props.capability |= HSA_CAP_ATS_PRESENT; in kfd_populated_cu_info_cpu()
939 pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, in kfd_populated_cu_info_cpu()
940 cu->processor_id_low); in kfd_populated_cu_info_cpu()
946 dev->node_props.simd_id_base = cu->processor_id_low; in kfd_populated_cu_info_gpu()
947 dev->node_props.simd_count = cu->num_simd_cores; in kfd_populated_cu_info_gpu()
948 dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; in kfd_populated_cu_info_gpu()
949 dev->node_props.max_waves_per_simd = cu->max_waves_simd; in kfd_populated_cu_info_gpu()
950 dev->node_props.wave_front_size = cu->wave_front_size; in kfd_populated_cu_info_gpu()
951 dev->node_props.array_count = cu->array_count; in kfd_populated_cu_info_gpu()
952 dev->node_props.cu_per_simd_array = cu->num_cu_per_array; in kfd_populated_cu_info_gpu()
953 dev->node_props.simd_per_cu = cu->num_simd_per_cu; in kfd_populated_cu_info_gpu()
954 dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; in kfd_populated_cu_info_gpu()
955 if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) in kfd_populated_cu_info_gpu()
956 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; in kfd_populated_cu_info_gpu()
957 pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); in kfd_populated_cu_info_gpu()
960 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
969 cu->proximity_domain, cu->hsa_capability); in kfd_parse_subtype_cu()
971 if (cu->proximity_domain == dev->proximity_domain) { in kfd_parse_subtype_cu()
972 if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) in kfd_parse_subtype_cu()
975 if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) in kfd_parse_subtype_cu()
990 list_for_each_entry(props, &dev->mem_props, list) { in find_subtype_mem()
991 if (props->heap_type == heap_type in find_subtype_mem()
992 && props->flags == flags in find_subtype_mem()
993 && props->width == width) in find_subtype_mem()
999 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1013 mem->proximity_domain); in kfd_parse_subtype_mem()
1015 if (mem->proximity_domain == dev->proximity_domain) { in kfd_parse_subtype_mem()
1016 /* We're on GPU node */ in kfd_parse_subtype_mem()
1017 if (dev->node_props.cpu_cores_count == 0) { in kfd_parse_subtype_mem()
1019 if (mem->visibility_type == 0) in kfd_parse_subtype_mem()
1024 heap_type = mem->visibility_type; in kfd_parse_subtype_mem()
1028 if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) in kfd_parse_subtype_mem()
1030 if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) in kfd_parse_subtype_mem()
1034 ((uint64_t)mem->length_high << 32) + in kfd_parse_subtype_mem()
1035 mem->length_low; in kfd_parse_subtype_mem()
1036 width = mem->width; in kfd_parse_subtype_mem()
1045 props->size_in_bytes += size_in_bytes; in kfd_parse_subtype_mem()
1051 return -ENOMEM; in kfd_parse_subtype_mem()
1053 props->heap_type = heap_type; in kfd_parse_subtype_mem()
1054 props->flags = flags; in kfd_parse_subtype_mem()
1055 props->size_in_bytes = size_in_bytes; in kfd_parse_subtype_mem()
1056 props->width = width; in kfd_parse_subtype_mem()
1058 dev->node_props.mem_banks_count++; in kfd_parse_subtype_mem()
1059 list_add_tail(&props->list, &dev->mem_props); in kfd_parse_subtype_mem()
1068 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1076 uint32_t id; in kfd_parse_subtype_cache() local
1079 id = cache->processor_id_low; in kfd_parse_subtype_cache()
1081 pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); in kfd_parse_subtype_cache()
1083 total_num_of_cu = (dev->node_props.array_count * in kfd_parse_subtype_cache()
1084 dev->node_props.cu_per_simd_array); in kfd_parse_subtype_cache()
1087 * information as it is associated with a CPU core or GPU in kfd_parse_subtype_cache()
1088 * Compute Unit. So map the cache using CPU core Id or SIMD in kfd_parse_subtype_cache()
1089 * (GPU) ID. in kfd_parse_subtype_cache()
1094 if ((id >= dev->node_props.cpu_core_id_base && in kfd_parse_subtype_cache()
1095 id <= dev->node_props.cpu_core_id_base + in kfd_parse_subtype_cache()
1096 dev->node_props.cpu_cores_count) || in kfd_parse_subtype_cache()
1097 (id >= dev->node_props.simd_id_base && in kfd_parse_subtype_cache()
1098 id < dev->node_props.simd_id_base + in kfd_parse_subtype_cache()
1102 return -ENOMEM; in kfd_parse_subtype_cache()
1104 props->processor_id_low = id; in kfd_parse_subtype_cache()
1105 props->cache_level = cache->cache_level; in kfd_parse_subtype_cache()
1106 props->cache_size = cache->cache_size; in kfd_parse_subtype_cache()
1107 props->cacheline_size = cache->cache_line_size; in kfd_parse_subtype_cache()
1108 props->cachelines_per_tag = cache->lines_per_tag; in kfd_parse_subtype_cache()
1109 props->cache_assoc = cache->associativity; in kfd_parse_subtype_cache()
1110 props->cache_latency = cache->cache_latency; in kfd_parse_subtype_cache()
1112 memcpy(props->sibling_map, cache->sibling_map, in kfd_parse_subtype_cache()
1116 props->sibling_map_size = CRAT_SIBLINGMAP_SIZE; in kfd_parse_subtype_cache()
1118 if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) in kfd_parse_subtype_cache()
1119 props->cache_type |= HSA_CACHE_TYPE_DATA; in kfd_parse_subtype_cache()
1120 if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) in kfd_parse_subtype_cache()
1121 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in kfd_parse_subtype_cache()
1122 if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) in kfd_parse_subtype_cache()
1123 props->cache_type |= HSA_CACHE_TYPE_CPU; in kfd_parse_subtype_cache()
1124 if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) in kfd_parse_subtype_cache()
1125 props->cache_type |= HSA_CACHE_TYPE_HSACU; in kfd_parse_subtype_cache()
1127 dev->node_props.caches_count++; in kfd_parse_subtype_cache()
1128 list_add_tail(&props->list, &dev->cache_props); in kfd_parse_subtype_cache()
1137 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1148 id_from = iolink->proximity_domain_from; in kfd_parse_subtype_iolink()
1149 id_to = iolink->proximity_domain_to; in kfd_parse_subtype_iolink()
1154 if (id_from == dev->proximity_domain) { in kfd_parse_subtype_iolink()
1157 return -ENOMEM; in kfd_parse_subtype_iolink()
1159 props->node_from = id_from; in kfd_parse_subtype_iolink()
1160 props->node_to = id_to; in kfd_parse_subtype_iolink()
1161 props->ver_maj = iolink->version_major; in kfd_parse_subtype_iolink()
1162 props->ver_min = iolink->version_minor; in kfd_parse_subtype_iolink()
1163 props->iolink_type = iolink->io_interface_type; in kfd_parse_subtype_iolink()
1165 if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) in kfd_parse_subtype_iolink()
1166 props->weight = 20; in kfd_parse_subtype_iolink()
1167 else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) in kfd_parse_subtype_iolink()
1168 props->weight = iolink->weight_xgmi; in kfd_parse_subtype_iolink()
1170 props->weight = node_distance(id_from, id_to); in kfd_parse_subtype_iolink()
1172 props->min_latency = iolink->minimum_latency; in kfd_parse_subtype_iolink()
1173 props->max_latency = iolink->maximum_latency; in kfd_parse_subtype_iolink()
1174 props->min_bandwidth = iolink->minimum_bandwidth_mbs; in kfd_parse_subtype_iolink()
1175 props->max_bandwidth = iolink->maximum_bandwidth_mbs; in kfd_parse_subtype_iolink()
1176 props->rec_transfer_size = in kfd_parse_subtype_iolink()
1177 iolink->recommended_transfer_size; in kfd_parse_subtype_iolink()
1179 dev->node_props.io_links_count++; in kfd_parse_subtype_iolink()
1180 list_add_tail(&props->list, &dev->io_link_props); in kfd_parse_subtype_iolink()
1185 /* CPU topology is created before GPUs are detected, so CPU->GPU in kfd_parse_subtype_iolink()
1187 * means a GPU is detected and we are adding GPU->CPU to the topology. in kfd_parse_subtype_iolink()
1188 * At this time, also add the corresponded CPU->GPU link if GPU in kfd_parse_subtype_iolink()
1193 if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { in kfd_parse_subtype_iolink()
1196 return -ENODEV; in kfd_parse_subtype_iolink()
1200 return -ENOMEM; in kfd_parse_subtype_iolink()
1202 props2->node_from = id_to; in kfd_parse_subtype_iolink()
1203 props2->node_to = id_from; in kfd_parse_subtype_iolink()
1204 props2->kobj = NULL; in kfd_parse_subtype_iolink()
1205 to_dev->node_props.io_links_count++; in kfd_parse_subtype_iolink()
1206 list_add_tail(&props2->list, &to_dev->io_link_props); in kfd_parse_subtype_iolink()
1212 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1214 * @sub_type_hdr - subtype section of crat_image
1215 * @device_list - list of topology devices present in this crat_image
1226 switch (sub_type_hdr->type) { in kfd_parse_subtype()
1257 sub_type_hdr->type); in kfd_parse_subtype()
1263 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1266 * @crat_image - input image containing CRAT
1267 * @device_list - [OUT] list of kfd_topology_device generated after
1269 * @proximity_domain - Proximity domain of the first device in the table
1271 * Return - 0 if successful else -ve value
1285 return -EINVAL; in kfd_parse_crat_table()
1289 return -EINVAL; in kfd_parse_crat_table()
1292 num_nodes = crat_table->num_domains; in kfd_parse_crat_table()
1293 image_len = crat_table->length; in kfd_parse_crat_table()
1301 top_dev->proximity_domain = proximity_domain++; in kfd_parse_crat_table()
1305 ret = -ENOMEM; in kfd_parse_crat_table()
1309 memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); in kfd_parse_crat_table()
1310 memcpy(top_dev->oem_table_id, crat_table->oem_table_id, in kfd_parse_crat_table()
1312 top_dev->oem_revision = crat_table->oem_revision; in kfd_parse_crat_table()
1317 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { in kfd_parse_crat_table()
1324 sub_type_hdr->length); in kfd_parse_crat_table()
1338 struct amdgpu_device *adev = kdev->adev; in kfd_fill_gpu_cache_info_from_gfx_config()
1342 if (adev->gfx.config.gc_tcp_l1_size) { in kfd_fill_gpu_cache_info_from_gfx_config()
1343 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size; in kfd_fill_gpu_cache_info_from_gfx_config()
1348 pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; in kfd_fill_gpu_cache_info_from_gfx_config()
1352 if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config()
1354 adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config()
1359 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; in kfd_fill_gpu_cache_info_from_gfx_config()
1363 if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config()
1364 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config()
1369 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; in kfd_fill_gpu_cache_info_from_gfx_config()
1373 if (adev->gfx.config.gc_gl1c_per_sa && in kfd_fill_gpu_cache_info_from_gfx_config()
1374 adev->gfx.config.gc_gl1c_size_per_instance) { in kfd_fill_gpu_cache_info_from_gfx_config()
1375 pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa * in kfd_fill_gpu_cache_info_from_gfx_config()
1376 adev->gfx.config.gc_gl1c_size_per_instance; in kfd_fill_gpu_cache_info_from_gfx_config()
1381 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config()
1384 /* L2 Data Cache per GPU (Total Tex Cache) */ in kfd_fill_gpu_cache_info_from_gfx_config()
1385 if (adev->gfx.config.gc_gl2c_per_gpu) { in kfd_fill_gpu_cache_info_from_gfx_config()
1386 pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu; in kfd_fill_gpu_cache_info_from_gfx_config()
1391 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config()
1394 /* L3 Data Cache per GPU */ in kfd_fill_gpu_cache_info_from_gfx_config()
1395 if (adev->gmc.mall_size) { in kfd_fill_gpu_cache_info_from_gfx_config()
1396 pcache_info[i].cache_size = adev->gmc.mall_size / 1024; in kfd_fill_gpu_cache_info_from_gfx_config()
1401 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config()
1410 struct amdgpu_device *adev = kdev->adev; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1414 if (adev->gfx.config.gc_tcp_size_per_cu) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1415 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1424 if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1426 adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1431 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1435 if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1436 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1441 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1444 /* L2 Data Cache per GPU (Total Tex Cache) */ in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1445 if (adev->gfx.config.gc_tcc_size) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1446 pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1451 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1454 /* L3 Data Cache per GPU */ in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1455 if (adev->gmc.mall_size) { in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1456 pcache_info[i].cache_size = adev->gmc.mall_size / 1024; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1461 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1471 switch (kdev->adev->asic_type) { in kfd_get_gpu_cache_info()
1529 kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd, in kfd_get_gpu_cache_info()
1591 kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); in kfd_get_gpu_cache_info()
1605 * following amount is allocated for GPU Virtual CRAT. This is
1611 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1613 * @numa_node_id: CPU NUMA node id
1617 * Return 0 if successful else return -ve value
1625 *avail_size -= sizeof(struct crat_subtype_computeunit); in kfd_fill_cu_for_cpu()
1627 return -ENOMEM; in kfd_fill_cu_for_cpu()
1632 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; in kfd_fill_cu_for_cpu()
1633 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); in kfd_fill_cu_for_cpu()
1634 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_cu_for_cpu()
1639 sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; in kfd_fill_cu_for_cpu()
1640 sub_type_hdr->proximity_domain = proximity_domain; in kfd_fill_cu_for_cpu()
1641 sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); in kfd_fill_cu_for_cpu()
1642 if (sub_type_hdr->processor_id_low == -1) in kfd_fill_cu_for_cpu()
1643 return -EINVAL; in kfd_fill_cu_for_cpu()
1645 sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); in kfd_fill_cu_for_cpu()
1650 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1652 * @numa_node_id: CPU NUMA node id
1656 * Return 0 if successful else return -ve value
1666 *avail_size -= sizeof(struct crat_subtype_memory); in kfd_fill_mem_info_for_cpu()
1668 return -ENOMEM; in kfd_fill_mem_info_for_cpu()
1673 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; in kfd_fill_mem_info_for_cpu()
1674 sub_type_hdr->length = sizeof(struct crat_subtype_memory); in kfd_fill_mem_info_for_cpu()
1675 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_mem_info_for_cpu()
1685 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); in kfd_fill_mem_info_for_cpu()
1688 sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); in kfd_fill_mem_info_for_cpu()
1689 sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); in kfd_fill_mem_info_for_cpu()
1690 sub_type_hdr->proximity_domain = proximity_domain; in kfd_fill_mem_info_for_cpu()
1704 if (c->x86_vendor == X86_VENDOR_AMD) in kfd_fill_iolink_info_for_cpu()
1716 *avail_size -= sizeof(struct crat_subtype_iolink); in kfd_fill_iolink_info_for_cpu()
1718 return -ENOMEM; in kfd_fill_iolink_info_for_cpu()
1723 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; in kfd_fill_iolink_info_for_cpu()
1724 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); in kfd_fill_iolink_info_for_cpu()
1725 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_iolink_info_for_cpu()
1728 sub_type_hdr->proximity_domain_from = numa_node_id; in kfd_fill_iolink_info_for_cpu()
1729 sub_type_hdr->proximity_domain_to = nid; in kfd_fill_iolink_info_for_cpu()
1730 sub_type_hdr->io_interface_type = link_type; in kfd_fill_iolink_info_for_cpu()
1740 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1760 return -EINVAL; in kfd_create_vcrat_image_cpu()
1765 avail_size -= sizeof(struct crat_header); in kfd_create_vcrat_image_cpu()
1767 return -ENOMEM; in kfd_create_vcrat_image_cpu()
1770 memcpy(&crat_table->signature, CRAT_SIGNATURE, in kfd_create_vcrat_image_cpu()
1771 sizeof(crat_table->signature)); in kfd_create_vcrat_image_cpu()
1772 crat_table->length = sizeof(struct crat_header); in kfd_create_vcrat_image_cpu()
1778 crat_table->oem_revision = acpi_table->revision; in kfd_create_vcrat_image_cpu()
1779 memcpy(crat_table->oem_id, acpi_table->oem_id, in kfd_create_vcrat_image_cpu()
1781 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, in kfd_create_vcrat_image_cpu()
1785 crat_table->total_entries = 0; in kfd_create_vcrat_image_cpu()
1786 crat_table->num_domains = 0; in kfd_create_vcrat_image_cpu()
1791 if (kfd_numa_node_to_apic_id(numa_node_id) == -1) in kfd_create_vcrat_image_cpu()
1796 crat_table->num_domains, in kfd_create_vcrat_image_cpu()
1800 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_cpu()
1801 crat_table->total_entries++; in kfd_create_vcrat_image_cpu()
1804 sub_type_hdr->length); in kfd_create_vcrat_image_cpu()
1808 crat_table->num_domains, in kfd_create_vcrat_image_cpu()
1812 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_cpu()
1813 crat_table->total_entries++; in kfd_create_vcrat_image_cpu()
1816 sub_type_hdr->length); in kfd_create_vcrat_image_cpu()
1827 crat_table->length += (sub_type_hdr->length * entries); in kfd_create_vcrat_image_cpu()
1828 crat_table->total_entries += entries; in kfd_create_vcrat_image_cpu()
1831 sub_type_hdr->length * entries); in kfd_create_vcrat_image_cpu()
1837 crat_table->num_domains++; in kfd_create_vcrat_image_cpu()
1848 *size = crat_table->length; in kfd_create_vcrat_image_cpu()
1860 *avail_size -= sizeof(struct crat_subtype_memory); in kfd_fill_gpu_memory_affinity()
1862 return -ENOMEM; in kfd_fill_gpu_memory_affinity()
1865 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; in kfd_fill_gpu_memory_affinity()
1866 sub_type_hdr->length = sizeof(struct crat_subtype_memory); in kfd_fill_gpu_memory_affinity()
1867 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_gpu_memory_affinity()
1869 sub_type_hdr->proximity_domain = proximity_domain; in kfd_fill_gpu_memory_affinity()
1871 pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", in kfd_fill_gpu_memory_affinity()
1874 sub_type_hdr->length_low = lower_32_bits(size); in kfd_fill_gpu_memory_affinity()
1875 sub_type_hdr->length_high = upper_32_bits(size); in kfd_fill_gpu_memory_affinity()
1877 sub_type_hdr->width = local_mem_info->vram_width; in kfd_fill_gpu_memory_affinity()
1878 sub_type_hdr->visibility_type = type; in kfd_fill_gpu_memory_affinity()
1889 u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 | in kfd_find_numa_node_in_srat()
1890 pci_dev_id(kdev->adev->pdev); in kfd_find_numa_node_in_srat()
1894 struct acpi_srat_generic_affinity *gpu; in kfd_find_numa_node_in_srat() local
1910 table_end = (unsigned long)table_header + table_header->length; in kfd_find_numa_node_in_srat()
1916 subtable_len = sub_header->length; in kfd_find_numa_node_in_srat()
1928 switch (sub_header->type) { in kfd_find_numa_node_in_srat()
1931 pxm = *((u32 *)cpu->proximity_domain_hi) << 8 | in kfd_find_numa_node_in_srat()
1932 cpu->proximity_domain_lo; in kfd_find_numa_node_in_srat()
1937 gpu = (struct acpi_srat_generic_affinity *)sub_header; in kfd_find_numa_node_in_srat()
1938 bdf = *((u16 *)(&gpu->device_handle[0])) << 16 | in kfd_find_numa_node_in_srat()
1939 *((u16 *)(&gpu->device_handle[2])); in kfd_find_numa_node_in_srat()
1942 numa_node = pxm_to_node(gpu->proximity_domain); in kfd_find_numa_node_in_srat()
1954 subtable_len = sub_header->length; in kfd_find_numa_node_in_srat()
1959 /* Workaround bad cpu-gpu binding case */ in kfd_find_numa_node_in_srat()
1965 set_dev_node(&kdev->adev->pdev->dev, numa_node); in kfd_find_numa_node_in_srat()
1972 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
1975 * @kdev - [IN] GPU device
1977 * @proximity_domain - proximity domain of the GPU node
1979 * Return 0 if successful else return -ve value
1986 *avail_size -= sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_direct_io_link_to_cpu()
1988 return -ENOMEM; in kfd_fill_gpu_direct_io_link_to_cpu()
1993 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; in kfd_fill_gpu_direct_io_link_to_cpu()
1994 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_direct_io_link_to_cpu()
1995 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_fill_gpu_direct_io_link_to_cpu()
1997 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; in kfd_fill_gpu_direct_io_link_to_cpu()
2000 * TODO: Fill-in other fields of iolink subtype in kfd_fill_gpu_direct_io_link_to_cpu()
2002 if (kdev->adev->gmc.xgmi.connected_to_cpu || in kfd_fill_gpu_direct_io_link_to_cpu()
2004 kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) == in kfd_fill_gpu_direct_io_link_to_cpu()
2010 kdev->adev, NULL, true) : mem_bw; in kfd_fill_gpu_direct_io_link_to_cpu()
2013 * with host gpu xgmi link, host can access gpu memory whether in kfd_fill_gpu_direct_io_link_to_cpu()
2017 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; in kfd_fill_gpu_direct_io_link_to_cpu()
2018 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; in kfd_fill_gpu_direct_io_link_to_cpu()
2019 sub_type_hdr->weight_xgmi = weight; in kfd_fill_gpu_direct_io_link_to_cpu()
2020 sub_type_hdr->minimum_bandwidth_mbs = bandwidth; in kfd_fill_gpu_direct_io_link_to_cpu()
2021 sub_type_hdr->maximum_bandwidth_mbs = bandwidth; in kfd_fill_gpu_direct_io_link_to_cpu()
2023 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; in kfd_fill_gpu_direct_io_link_to_cpu()
2024 sub_type_hdr->minimum_bandwidth_mbs = in kfd_fill_gpu_direct_io_link_to_cpu()
2025 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true); in kfd_fill_gpu_direct_io_link_to_cpu()
2026 sub_type_hdr->maximum_bandwidth_mbs = in kfd_fill_gpu_direct_io_link_to_cpu()
2027 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false); in kfd_fill_gpu_direct_io_link_to_cpu()
2030 sub_type_hdr->proximity_domain_from = proximity_domain; in kfd_fill_gpu_direct_io_link_to_cpu()
2033 if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE && in kfd_fill_gpu_direct_io_link_to_cpu()
2038 if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) in kfd_fill_gpu_direct_io_link_to_cpu()
2039 sub_type_hdr->proximity_domain_to = 0; in kfd_fill_gpu_direct_io_link_to_cpu()
2041 sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node; in kfd_fill_gpu_direct_io_link_to_cpu()
2043 sub_type_hdr->proximity_domain_to = 0; in kfd_fill_gpu_direct_io_link_to_cpu()
2055 bool use_ta_info = kdev->kfd->num_nodes == 1; in kfd_fill_gpu_xgmi_link_to_gpu()
2057 *avail_size -= sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_xgmi_link_to_gpu()
2059 return -ENOMEM; in kfd_fill_gpu_xgmi_link_to_gpu()
2063 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; in kfd_fill_gpu_xgmi_link_to_gpu()
2064 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); in kfd_fill_gpu_xgmi_link_to_gpu()
2065 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | in kfd_fill_gpu_xgmi_link_to_gpu()
2068 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; in kfd_fill_gpu_xgmi_link_to_gpu()
2069 sub_type_hdr->proximity_domain_from = proximity_domain_from; in kfd_fill_gpu_xgmi_link_to_gpu()
2070 sub_type_hdr->proximity_domain_to = proximity_domain_to; in kfd_fill_gpu_xgmi_link_to_gpu()
2073 sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * in kfd_fill_gpu_xgmi_link_to_gpu()
2074 amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); in kfd_fill_gpu_xgmi_link_to_gpu()
2075 sub_type_hdr->maximum_bandwidth_mbs = in kfd_fill_gpu_xgmi_link_to_gpu()
2076 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, in kfd_fill_gpu_xgmi_link_to_gpu()
2077 peer_kdev->adev, false); in kfd_fill_gpu_xgmi_link_to_gpu()
2078 sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? in kfd_fill_gpu_xgmi_link_to_gpu()
2079 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; in kfd_fill_gpu_xgmi_link_to_gpu()
2081 bool is_single_hop = kdev->kfd == peer_kdev->kfd; in kfd_fill_gpu_xgmi_link_to_gpu()
2086 sub_type_hdr->weight_xgmi = weight; in kfd_fill_gpu_xgmi_link_to_gpu()
2087 sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0; in kfd_fill_gpu_xgmi_link_to_gpu()
2088 sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0; in kfd_fill_gpu_xgmi_link_to_gpu()
2094 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2096 * @pcrat_image: Fill in VCRAT for GPU
2105 struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; in kfd_create_vcrat_image_gpu()
2106 struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; in kfd_create_vcrat_image_gpu()
2117 return -EINVAL; in kfd_create_vcrat_image_gpu()
2122 avail_size -= sizeof(struct crat_header); in kfd_create_vcrat_image_gpu()
2124 return -ENOMEM; in kfd_create_vcrat_image_gpu()
2128 memcpy(&crat_table->signature, CRAT_SIGNATURE, in kfd_create_vcrat_image_gpu()
2129 sizeof(crat_table->signature)); in kfd_create_vcrat_image_gpu()
2131 crat_table->length = sizeof(struct crat_header); in kfd_create_vcrat_image_gpu()
2132 crat_table->num_domains = 1; in kfd_create_vcrat_image_gpu()
2133 crat_table->total_entries = 0; in kfd_create_vcrat_image_gpu()
2138 avail_size -= sizeof(struct crat_subtype_computeunit); in kfd_create_vcrat_image_gpu()
2140 return -ENOMEM; in kfd_create_vcrat_image_gpu()
2145 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; in kfd_create_vcrat_image_gpu()
2146 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); in kfd_create_vcrat_image_gpu()
2147 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; in kfd_create_vcrat_image_gpu()
2151 cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; in kfd_create_vcrat_image_gpu()
2152 cu->proximity_domain = proximity_domain; in kfd_create_vcrat_image_gpu()
2154 cu->num_simd_per_cu = cu_info->simd_per_cu; in kfd_create_vcrat_image_gpu()
2155 cu->num_simd_cores = cu_info->simd_per_cu * in kfd_create_vcrat_image_gpu()
2156 (cu_info->number / kdev->kfd->num_nodes); in kfd_create_vcrat_image_gpu()
2157 cu->max_waves_simd = cu_info->max_waves_per_simd; in kfd_create_vcrat_image_gpu()
2159 cu->wave_front_size = cu_info->wave_front_size; in kfd_create_vcrat_image_gpu()
2160 cu->array_count = gfx_info->max_sh_per_se * in kfd_create_vcrat_image_gpu()
2161 gfx_info->max_shader_engines; in kfd_create_vcrat_image_gpu()
2162 total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh); in kfd_create_vcrat_image_gpu()
2163 cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); in kfd_create_vcrat_image_gpu()
2164 cu->num_cu_per_array = gfx_info->max_cu_per_sh; in kfd_create_vcrat_image_gpu()
2165 cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu; in kfd_create_vcrat_image_gpu()
2166 cu->num_banks = gfx_info->max_shader_engines; in kfd_create_vcrat_image_gpu()
2167 cu->lds_size_in_kb = cu_info->lds_size; in kfd_create_vcrat_image_gpu()
2169 cu->hsa_capability = 0; in kfd_create_vcrat_image_gpu()
2171 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_gpu()
2172 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2179 local_mem_info = kdev->local_mem_info; in kfd_create_vcrat_image_gpu()
2181 sub_type_hdr->length); in kfd_create_vcrat_image_gpu()
2183 if (kdev->adev->debug_largebar) in kfd_create_vcrat_image_gpu()
2204 crat_table->length += sizeof(struct crat_subtype_memory); in kfd_create_vcrat_image_gpu()
2205 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2208 * Only direct links are added here which is Link from GPU to in kfd_create_vcrat_image_gpu()
2212 sub_type_hdr->length); in kfd_create_vcrat_image_gpu()
2219 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_gpu()
2220 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2224 * Direct links from GPU to other GPUs through xGMI. in kfd_create_vcrat_image_gpu()
2227 * hive id (from this GPU to other GPU) . The reversed iolink in kfd_create_vcrat_image_gpu()
2228 * (from other GPU to this GPU) will be added in kfd_create_vcrat_image_gpu()
2231 if (kdev->kfd->hive_id) { in kfd_create_vcrat_image_gpu()
2234 if (!peer_dev->gpu) in kfd_create_vcrat_image_gpu()
2236 if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id) in kfd_create_vcrat_image_gpu()
2242 &avail_size, kdev, peer_dev->gpu, in kfd_create_vcrat_image_gpu()
2247 crat_table->length += sub_type_hdr->length; in kfd_create_vcrat_image_gpu()
2248 crat_table->total_entries++; in kfd_create_vcrat_image_gpu()
2251 *size = crat_table->length; in kfd_create_vcrat_image_gpu()
2252 pr_info("Virtual CRAT table created for GPU\n"); in kfd_create_vcrat_image_gpu()
2257 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2265 * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2266 * COMPUTE_UNIT_GPU - Create VCRAT for GPU
2267 * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2268 * -- this option is not currently implemented.
2272 * Return 0 if successful else return -ve value
2283 return -EINVAL; in kfd_create_crat_image_virtual()
2288 * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. in kfd_create_crat_image_virtual()
2298 (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); in kfd_create_crat_image_virtual()
2301 return -ENOMEM; in kfd_create_crat_image_virtual()
2308 return -EINVAL; in kfd_create_crat_image_virtual()
2311 return -ENOMEM; in kfd_create_crat_image_virtual()
2318 ret = -EINVAL; in kfd_create_crat_image_virtual()
2322 ret = -EINVAL; in kfd_create_crat_image_virtual()
2336 * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)