amd/amdkfd/kfd_crat.c

1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3  * Copyright 2015-2022 Advanced Micro Devices, Inc.
32 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
33  * GPU processor ID are expressed with Bit[31]=1.
34  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
39 /* Return the next available gpu_processor_id and increment it for next GPU
40  *	@total_cu_count - Total CUs present in the GPU including ones
162 		/* L2 Data Cache per GPU (Total Tex Cache) */
201 		/* L2 Data Cache per GPU (Total Tex Cache) */
240 		/* L2 Data Cache per GPU (Total Tex Cache) */
279 		/* L2 Data Cache per GPU (Total Tex Cache) */
318 		/* L2 Data Cache per GPU (Total Tex Cache) */
357 		/* L2 Data Cache per GPU (Total Tex Cache) */
405 		/* L2 Data Cache per GPU (Total Tex Cache) */
453 		/* L2 Data Cache per GPU (Total Tex Cache) */
501 		/* L2 Data Cache per GPU (Total Tex Cache) */
549 		/* L2 Data Cache per GPU (Total Tex Cache) */
558 		/* L3 Data Cache per GPU */
606 		/* L2 Data Cache per GPU (Total Tex Cache) */
615 		/* L3 Data Cache per GPU */
663 		/* L2 Data Cache per GPU (Total Tex Cache) */
672 		/* L3 Data Cache per GPU */
720 		/* L2 Data Cache per GPU (Total Tex Cache) */
729 		/* L3 Data Cache per GPU */
777 		/* L2 Data Cache per GPU (Total Tex Cache) */
825 		/* L2 Data Cache per GPU (Total Tex Cache) */
873 		/* L2 Data Cache per GPU (Total Tex Cache) */
921 		/* L2 Data Cache per GPU (Total Tex Cache) */
934 	dev->node_props.cpu_cores_count = cu->num_cpu_cores;  in kfd_populated_cu_info_cpu()
935 	dev->node_props.cpu_core_id_base = cu->processor_id_low;  in kfd_populated_cu_info_cpu()
936 	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)  in kfd_populated_cu_info_cpu()
937 		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;  in kfd_populated_cu_info_cpu()
939 	pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,  in kfd_populated_cu_info_cpu()
940 			cu->processor_id_low);  in kfd_populated_cu_info_cpu()
946 	dev->node_props.simd_id_base = cu->processor_id_low;  in kfd_populated_cu_info_gpu()
947 	dev->node_props.simd_count = cu->num_simd_cores;  in kfd_populated_cu_info_gpu()
948 	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;  in kfd_populated_cu_info_gpu()
949 	dev->node_props.max_waves_per_simd = cu->max_waves_simd;  in kfd_populated_cu_info_gpu()
950 	dev->node_props.wave_front_size = cu->wave_front_size;  in kfd_populated_cu_info_gpu()
951 	dev->node_props.array_count = cu->array_count;  in kfd_populated_cu_info_gpu()
952 	dev->node_props.cu_per_simd_array = cu->num_cu_per_array;  in kfd_populated_cu_info_gpu()
953 	dev->node_props.simd_per_cu = cu->num_simd_per_cu;  in kfd_populated_cu_info_gpu()
954 	dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;  in kfd_populated_cu_info_gpu()
955 	if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)  in kfd_populated_cu_info_gpu()
956 		dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;  in kfd_populated_cu_info_gpu()
957 	pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);  in kfd_populated_cu_info_gpu()
960 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
969 			cu->proximity_domain, cu->hsa_capability);  in kfd_parse_subtype_cu()
971 		if (cu->proximity_domain == dev->proximity_domain) {  in kfd_parse_subtype_cu()
972 			if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)  in kfd_parse_subtype_cu()
975 			if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)  in kfd_parse_subtype_cu()
990 	list_for_each_entry(props, &dev->mem_props, list) {  in find_subtype_mem()
991 		if (props->heap_type == heap_type  in find_subtype_mem()
992 				&& props->flags == flags  in find_subtype_mem()
993 				&& props->width == width)  in find_subtype_mem()
999 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1013 			mem->proximity_domain);  in kfd_parse_subtype_mem()
1015 		if (mem->proximity_domain == dev->proximity_domain) {  in kfd_parse_subtype_mem()
1016 			/* We're on GPU node */  in kfd_parse_subtype_mem()
1017 			if (dev->node_props.cpu_cores_count == 0) {  in kfd_parse_subtype_mem()
1019 				if (mem->visibility_type == 0)  in kfd_parse_subtype_mem()
1024 					heap_type = mem->visibility_type;  in kfd_parse_subtype_mem()
1028 			if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)  in kfd_parse_subtype_mem()
1030 			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)  in kfd_parse_subtype_mem()
1034 				((uint64_t)mem->length_high << 32) +  in kfd_parse_subtype_mem()
1035 							mem->length_low;  in kfd_parse_subtype_mem()
1036 			width = mem->width;  in kfd_parse_subtype_mem()
1045 				props->size_in_bytes += size_in_bytes;  in kfd_parse_subtype_mem()
1051 				return -ENOMEM;  in kfd_parse_subtype_mem()
1053 			props->heap_type = heap_type;  in kfd_parse_subtype_mem()
1054 			props->flags = flags;  in kfd_parse_subtype_mem()
1055 			props->size_in_bytes = size_in_bytes;  in kfd_parse_subtype_mem()
1056 			props->width = width;  in kfd_parse_subtype_mem()
1058 			dev->node_props.mem_banks_count++;  in kfd_parse_subtype_mem()
1059 			list_add_tail(&props->list, &dev->mem_props);  in kfd_parse_subtype_mem()
1068 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1076 	uint32_t id;  in kfd_parse_subtype_cache()  local
1079 	id = cache->processor_id_low;  in kfd_parse_subtype_cache()
1081 	pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);  in kfd_parse_subtype_cache()
1083 		total_num_of_cu = (dev->node_props.array_count *  in kfd_parse_subtype_cache()
1084 					dev->node_props.cu_per_simd_array);  in kfd_parse_subtype_cache()
1087 		 * information as it is associated with a CPU core or GPU  in kfd_parse_subtype_cache()
1088 		 * Compute Unit. So map the cache using CPU core Id or SIMD  in kfd_parse_subtype_cache()
1089 		 * (GPU) ID.  in kfd_parse_subtype_cache()
1094 		if ((id >= dev->node_props.cpu_core_id_base &&  in kfd_parse_subtype_cache()
1095 			id <= dev->node_props.cpu_core_id_base +  in kfd_parse_subtype_cache()
1096 				dev->node_props.cpu_cores_count) ||  in kfd_parse_subtype_cache()
1097 			(id >= dev->node_props.simd_id_base &&  in kfd_parse_subtype_cache()
1098 			id < dev->node_props.simd_id_base +  in kfd_parse_subtype_cache()
1102 				return -ENOMEM;  in kfd_parse_subtype_cache()
1104 			props->processor_id_low = id;  in kfd_parse_subtype_cache()
1105 			props->cache_level = cache->cache_level;  in kfd_parse_subtype_cache()
1106 			props->cache_size = cache->cache_size;  in kfd_parse_subtype_cache()
1107 			props->cacheline_size = cache->cache_line_size;  in kfd_parse_subtype_cache()
1108 			props->cachelines_per_tag = cache->lines_per_tag;  in kfd_parse_subtype_cache()
1109 			props->cache_assoc = cache->associativity;  in kfd_parse_subtype_cache()
1110 			props->cache_latency = cache->cache_latency;  in kfd_parse_subtype_cache()
1112 			memcpy(props->sibling_map, cache->sibling_map,  in kfd_parse_subtype_cache()
1116 			props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;  in kfd_parse_subtype_cache()
1118 			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)  in kfd_parse_subtype_cache()
1119 				props->cache_type |= HSA_CACHE_TYPE_DATA;  in kfd_parse_subtype_cache()
1120 			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)  in kfd_parse_subtype_cache()
1121 				props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;  in kfd_parse_subtype_cache()
1122 			if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)  in kfd_parse_subtype_cache()
1123 				props->cache_type |= HSA_CACHE_TYPE_CPU;  in kfd_parse_subtype_cache()
1124 			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)  in kfd_parse_subtype_cache()
1125 				props->cache_type |= HSA_CACHE_TYPE_HSACU;  in kfd_parse_subtype_cache()
1127 			dev->node_props.caches_count++;  in kfd_parse_subtype_cache()
1128 			list_add_tail(&props->list, &dev->cache_props);  in kfd_parse_subtype_cache()
1137 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1148 	id_from = iolink->proximity_domain_from;  in kfd_parse_subtype_iolink()
1149 	id_to = iolink->proximity_domain_to;  in kfd_parse_subtype_iolink()
1154 		if (id_from == dev->proximity_domain) {  in kfd_parse_subtype_iolink()
1157 				return -ENOMEM;  in kfd_parse_subtype_iolink()
1159 			props->node_from = id_from;  in kfd_parse_subtype_iolink()
1160 			props->node_to = id_to;  in kfd_parse_subtype_iolink()
1161 			props->ver_maj = iolink->version_major;  in kfd_parse_subtype_iolink()
1162 			props->ver_min = iolink->version_minor;  in kfd_parse_subtype_iolink()
1163 			props->iolink_type = iolink->io_interface_type;  in kfd_parse_subtype_iolink()
1165 			if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)  in kfd_parse_subtype_iolink()
1166 				props->weight = 20;  in kfd_parse_subtype_iolink()
1167 			else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)  in kfd_parse_subtype_iolink()
1168 				props->weight = iolink->weight_xgmi;  in kfd_parse_subtype_iolink()
1170 				props->weight = node_distance(id_from, id_to);  in kfd_parse_subtype_iolink()
1172 			props->min_latency = iolink->minimum_latency;  in kfd_parse_subtype_iolink()
1173 			props->max_latency = iolink->maximum_latency;  in kfd_parse_subtype_iolink()
1174 			props->min_bandwidth = iolink->minimum_bandwidth_mbs;  in kfd_parse_subtype_iolink()
1175 			props->max_bandwidth = iolink->maximum_bandwidth_mbs;  in kfd_parse_subtype_iolink()
1176 			props->rec_transfer_size =  in kfd_parse_subtype_iolink()
1177 					iolink->recommended_transfer_size;  in kfd_parse_subtype_iolink()
1179 			dev->node_props.io_links_count++;  in kfd_parse_subtype_iolink()
1180 			list_add_tail(&props->list, &dev->io_link_props);  in kfd_parse_subtype_iolink()
1185 	/* CPU topology is created before GPUs are detected, so CPU->GPU  in kfd_parse_subtype_iolink()
1187 	 * means a GPU is detected and we are adding GPU->CPU to the topology.  in kfd_parse_subtype_iolink()
1188 	 * At this time, also add the corresponded CPU->GPU link if GPU  in kfd_parse_subtype_iolink()
1193 	if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {  in kfd_parse_subtype_iolink()
1196 			return -ENODEV;  in kfd_parse_subtype_iolink()
1200 			return -ENOMEM;  in kfd_parse_subtype_iolink()
1202 		props2->node_from = id_to;  in kfd_parse_subtype_iolink()
1203 		props2->node_to = id_from;  in kfd_parse_subtype_iolink()
1204 		props2->kobj = NULL;  in kfd_parse_subtype_iolink()
1205 		to_dev->node_props.io_links_count++;  in kfd_parse_subtype_iolink()
1206 		list_add_tail(&props2->list, &to_dev->io_link_props);  in kfd_parse_subtype_iolink()
1212 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1214  *	@sub_type_hdr - subtype section of crat_image
1215  *	@device_list - list of topology devices present in this crat_image
1226 	switch (sub_type_hdr->type) {  in kfd_parse_subtype()
1257 				sub_type_hdr->type);  in kfd_parse_subtype()
1263 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1266  *	@crat_image - input image containing CRAT
1267  *	@device_list - [OUT] list of kfd_topology_device generated after
1269  *	@proximity_domain - Proximity domain of the first device in the table
1271  *	Return - 0 if successful else -ve value
1285 		return -EINVAL;  in kfd_parse_crat_table()
1289 		return -EINVAL;  in kfd_parse_crat_table()
1292 	num_nodes = crat_table->num_domains;  in kfd_parse_crat_table()
1293 	image_len = crat_table->length;  in kfd_parse_crat_table()
1301 		top_dev->proximity_domain = proximity_domain++;  in kfd_parse_crat_table()
1305 		ret = -ENOMEM;  in kfd_parse_crat_table()
1309 	memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);  in kfd_parse_crat_table()
1310 	memcpy(top_dev->oem_table_id, crat_table->oem_table_id,  in kfd_parse_crat_table()
1312 	top_dev->oem_revision = crat_table->oem_revision;  in kfd_parse_crat_table()
1317 		if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {  in kfd_parse_crat_table()
1324 				sub_type_hdr->length);  in kfd_parse_crat_table()
1338 	struct amdgpu_device *adev = kdev->adev;  in kfd_fill_gpu_cache_info_from_gfx_config()
1342 	if (adev->gfx.config.gc_tcp_l1_size) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1343 		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;  in kfd_fill_gpu_cache_info_from_gfx_config()
1348 		pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;  in kfd_fill_gpu_cache_info_from_gfx_config()
1352 	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1354 			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config()
1359 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;  in kfd_fill_gpu_cache_info_from_gfx_config()
1363 	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1364 		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config()
1369 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;  in kfd_fill_gpu_cache_info_from_gfx_config()
1373 	if (adev->gfx.config.gc_gl1c_per_sa &&  in kfd_fill_gpu_cache_info_from_gfx_config()
1374 	    adev->gfx.config.gc_gl1c_size_per_instance) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1375 		pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *  in kfd_fill_gpu_cache_info_from_gfx_config()
1376 			adev->gfx.config.gc_gl1c_size_per_instance;  in kfd_fill_gpu_cache_info_from_gfx_config()
1381 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config()
1384 	/* L2 Data Cache per GPU (Total Tex Cache) */  in kfd_fill_gpu_cache_info_from_gfx_config()
1385 	if (adev->gfx.config.gc_gl2c_per_gpu) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1386 		pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;  in kfd_fill_gpu_cache_info_from_gfx_config()
1391 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config()
1394 	/* L3 Data Cache per GPU */  in kfd_fill_gpu_cache_info_from_gfx_config()
1395 	if (adev->gmc.mall_size) {  in kfd_fill_gpu_cache_info_from_gfx_config()
1396 		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;  in kfd_fill_gpu_cache_info_from_gfx_config()
1401 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config()
1410 	struct amdgpu_device *adev = kdev->adev;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1414 	if (adev->gfx.config.gc_tcp_size_per_cu) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1415 		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1424 	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1426 			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1431 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1435 	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1436 		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1441 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1444 	/* L2 Data Cache per GPU (Total Tex Cache) */  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1445 	if (adev->gfx.config.gc_tcc_size) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1446 		pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1451 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1454 	/* L3 Data Cache per GPU */  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1455 	if (adev->gmc.mall_size) {  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1456 		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1461 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;  in kfd_fill_gpu_cache_info_from_gfx_config_v2()
1471 	switch (kdev->adev->asic_type) {  in kfd_get_gpu_cache_info()
1529 				kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,  in kfd_get_gpu_cache_info()
1591 				kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);  in kfd_get_gpu_cache_info()
1605  * following amount is allocated for GPU Virtual CRAT. This is
1611 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1613  *	@numa_node_id: CPU NUMA node id
1617  *	Return 0 if successful else return -ve value
1625 	*avail_size -= sizeof(struct crat_subtype_computeunit);  in kfd_fill_cu_for_cpu()
1627 		return -ENOMEM;  in kfd_fill_cu_for_cpu()
1632 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;  in kfd_fill_cu_for_cpu()
1633 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);  in kfd_fill_cu_for_cpu()
1634 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_cu_for_cpu()
1639 	sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;  in kfd_fill_cu_for_cpu()
1640 	sub_type_hdr->proximity_domain = proximity_domain;  in kfd_fill_cu_for_cpu()
1641 	sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);  in kfd_fill_cu_for_cpu()
1642 	if (sub_type_hdr->processor_id_low == -1)  in kfd_fill_cu_for_cpu()
1643 		return -EINVAL;  in kfd_fill_cu_for_cpu()
1645 	sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);  in kfd_fill_cu_for_cpu()
1650 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1652  *	@numa_node_id: CPU NUMA node id
1656  *	Return 0 if successful else return -ve value
1666 	*avail_size -= sizeof(struct crat_subtype_memory);  in kfd_fill_mem_info_for_cpu()
1668 		return -ENOMEM;  in kfd_fill_mem_info_for_cpu()
1673 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;  in kfd_fill_mem_info_for_cpu()
1674 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);  in kfd_fill_mem_info_for_cpu()
1675 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_mem_info_for_cpu()
1685 		mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);  in kfd_fill_mem_info_for_cpu()
1688 	sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);  in kfd_fill_mem_info_for_cpu()
1689 	sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);  in kfd_fill_mem_info_for_cpu()
1690 	sub_type_hdr->proximity_domain = proximity_domain;  in kfd_fill_mem_info_for_cpu()
1704 	if (c->x86_vendor == X86_VENDOR_AMD)  in kfd_fill_iolink_info_for_cpu()
1716 		*avail_size -= sizeof(struct crat_subtype_iolink);  in kfd_fill_iolink_info_for_cpu()
1718 			return -ENOMEM;  in kfd_fill_iolink_info_for_cpu()
1723 		sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;  in kfd_fill_iolink_info_for_cpu()
1724 		sub_type_hdr->length = sizeof(struct crat_subtype_iolink);  in kfd_fill_iolink_info_for_cpu()
1725 		sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_iolink_info_for_cpu()
1728 		sub_type_hdr->proximity_domain_from = numa_node_id;  in kfd_fill_iolink_info_for_cpu()
1729 		sub_type_hdr->proximity_domain_to = nid;  in kfd_fill_iolink_info_for_cpu()
1730 		sub_type_hdr->io_interface_type = link_type;  in kfd_fill_iolink_info_for_cpu()
1740 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1760 		return -EINVAL;  in kfd_create_vcrat_image_cpu()
1765 	avail_size -= sizeof(struct crat_header);  in kfd_create_vcrat_image_cpu()
1767 		return -ENOMEM;  in kfd_create_vcrat_image_cpu()
1770 	memcpy(&crat_table->signature, CRAT_SIGNATURE,  in kfd_create_vcrat_image_cpu()
1771 			sizeof(crat_table->signature));  in kfd_create_vcrat_image_cpu()
1772 	crat_table->length = sizeof(struct crat_header);  in kfd_create_vcrat_image_cpu()
1778 		crat_table->oem_revision = acpi_table->revision;  in kfd_create_vcrat_image_cpu()
1779 		memcpy(crat_table->oem_id, acpi_table->oem_id,  in kfd_create_vcrat_image_cpu()
1781 		memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,  in kfd_create_vcrat_image_cpu()
1785 	crat_table->total_entries = 0;  in kfd_create_vcrat_image_cpu()
1786 	crat_table->num_domains = 0;  in kfd_create_vcrat_image_cpu()
1791 		if (kfd_numa_node_to_apic_id(numa_node_id) == -1)  in kfd_create_vcrat_image_cpu()
1796 			crat_table->num_domains,  in kfd_create_vcrat_image_cpu()
1800 		crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_cpu()
1801 		crat_table->total_entries++;  in kfd_create_vcrat_image_cpu()
1804 			sub_type_hdr->length);  in kfd_create_vcrat_image_cpu()
1808 			crat_table->num_domains,  in kfd_create_vcrat_image_cpu()
1812 		crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_cpu()
1813 		crat_table->total_entries++;  in kfd_create_vcrat_image_cpu()
1816 			sub_type_hdr->length);  in kfd_create_vcrat_image_cpu()
1827 			crat_table->length += (sub_type_hdr->length * entries);  in kfd_create_vcrat_image_cpu()
1828 			crat_table->total_entries += entries;  in kfd_create_vcrat_image_cpu()
1831 					sub_type_hdr->length * entries);  in kfd_create_vcrat_image_cpu()
1837 		crat_table->num_domains++;  in kfd_create_vcrat_image_cpu()
1848 	*size = crat_table->length;  in kfd_create_vcrat_image_cpu()
1860 	*avail_size -= sizeof(struct crat_subtype_memory);  in kfd_fill_gpu_memory_affinity()
1862 		return -ENOMEM;  in kfd_fill_gpu_memory_affinity()
1865 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;  in kfd_fill_gpu_memory_affinity()
1866 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);  in kfd_fill_gpu_memory_affinity()
1867 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_gpu_memory_affinity()
1869 	sub_type_hdr->proximity_domain = proximity_domain;  in kfd_fill_gpu_memory_affinity()
1871 	pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",  in kfd_fill_gpu_memory_affinity()
1874 	sub_type_hdr->length_low = lower_32_bits(size);  in kfd_fill_gpu_memory_affinity()
1875 	sub_type_hdr->length_high = upper_32_bits(size);  in kfd_fill_gpu_memory_affinity()
1877 	sub_type_hdr->width = local_mem_info->vram_width;  in kfd_fill_gpu_memory_affinity()
1878 	sub_type_hdr->visibility_type = type;  in kfd_fill_gpu_memory_affinity()
1889 	u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |  in kfd_find_numa_node_in_srat()
1890 			pci_dev_id(kdev->adev->pdev);  in kfd_find_numa_node_in_srat()
1894 	struct acpi_srat_generic_affinity *gpu;  in kfd_find_numa_node_in_srat()  local
1910 	table_end = (unsigned long)table_header + table_header->length;  in kfd_find_numa_node_in_srat()
1916 	subtable_len = sub_header->length;  in kfd_find_numa_node_in_srat()
1928 		switch (sub_header->type) {  in kfd_find_numa_node_in_srat()
1931 			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |  in kfd_find_numa_node_in_srat()
1932 					cpu->proximity_domain_lo;  in kfd_find_numa_node_in_srat()
1937 			gpu = (struct acpi_srat_generic_affinity *)sub_header;  in kfd_find_numa_node_in_srat()
1938 			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |  in kfd_find_numa_node_in_srat()
1939 					*((u16 *)(&gpu->device_handle[2]));  in kfd_find_numa_node_in_srat()
1942 				numa_node = pxm_to_node(gpu->proximity_domain);  in kfd_find_numa_node_in_srat()
1954 		subtable_len = sub_header->length;  in kfd_find_numa_node_in_srat()
1959 	/* Workaround bad cpu-gpu binding case */  in kfd_find_numa_node_in_srat()
1965 		set_dev_node(&kdev->adev->pdev->dev, numa_node);  in kfd_find_numa_node_in_srat()
1972 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
1975  *	@kdev - [IN] GPU device
1977  *	@proximity_domain - proximity domain of the GPU node
1979  *	Return 0 if successful else return -ve value
1986 	*avail_size -= sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_direct_io_link_to_cpu()
1988 		return -ENOMEM;  in kfd_fill_gpu_direct_io_link_to_cpu()
1993 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;  in kfd_fill_gpu_direct_io_link_to_cpu()
1994 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_direct_io_link_to_cpu()
1995 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_fill_gpu_direct_io_link_to_cpu()
1997 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;  in kfd_fill_gpu_direct_io_link_to_cpu()
2000 	 * TODO: Fill-in other fields of iolink subtype  in kfd_fill_gpu_direct_io_link_to_cpu()
2002 	if (kdev->adev->gmc.xgmi.connected_to_cpu ||  in kfd_fill_gpu_direct_io_link_to_cpu()
2004 	     kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==  in kfd_fill_gpu_direct_io_link_to_cpu()
2010 							kdev->adev, NULL, true) : mem_bw;  in kfd_fill_gpu_direct_io_link_to_cpu()
2013 		 * with host gpu xgmi link, host can access gpu memory whether  in kfd_fill_gpu_direct_io_link_to_cpu()
2017 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;  in kfd_fill_gpu_direct_io_link_to_cpu()
2018 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;  in kfd_fill_gpu_direct_io_link_to_cpu()
2019 		sub_type_hdr->weight_xgmi = weight;  in kfd_fill_gpu_direct_io_link_to_cpu()
2020 		sub_type_hdr->minimum_bandwidth_mbs = bandwidth;  in kfd_fill_gpu_direct_io_link_to_cpu()
2021 		sub_type_hdr->maximum_bandwidth_mbs = bandwidth;  in kfd_fill_gpu_direct_io_link_to_cpu()
2023 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;  in kfd_fill_gpu_direct_io_link_to_cpu()
2024 		sub_type_hdr->minimum_bandwidth_mbs =  in kfd_fill_gpu_direct_io_link_to_cpu()
2025 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);  in kfd_fill_gpu_direct_io_link_to_cpu()
2026 		sub_type_hdr->maximum_bandwidth_mbs =  in kfd_fill_gpu_direct_io_link_to_cpu()
2027 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);  in kfd_fill_gpu_direct_io_link_to_cpu()
2030 	sub_type_hdr->proximity_domain_from = proximity_domain;  in kfd_fill_gpu_direct_io_link_to_cpu()
2033 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&  in kfd_fill_gpu_direct_io_link_to_cpu()
2038 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)  in kfd_fill_gpu_direct_io_link_to_cpu()
2039 		sub_type_hdr->proximity_domain_to = 0;  in kfd_fill_gpu_direct_io_link_to_cpu()
2041 		sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;  in kfd_fill_gpu_direct_io_link_to_cpu()
2043 	sub_type_hdr->proximity_domain_to = 0;  in kfd_fill_gpu_direct_io_link_to_cpu()
2055 	bool use_ta_info = kdev->kfd->num_nodes == 1;  in kfd_fill_gpu_xgmi_link_to_gpu()
2057 	*avail_size -= sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_xgmi_link_to_gpu()
2059 		return -ENOMEM;  in kfd_fill_gpu_xgmi_link_to_gpu()
2063 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;  in kfd_fill_gpu_xgmi_link_to_gpu()
2064 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);  in kfd_fill_gpu_xgmi_link_to_gpu()
2065 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |  in kfd_fill_gpu_xgmi_link_to_gpu()
2068 	sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;  in kfd_fill_gpu_xgmi_link_to_gpu()
2069 	sub_type_hdr->proximity_domain_from = proximity_domain_from;  in kfd_fill_gpu_xgmi_link_to_gpu()
2070 	sub_type_hdr->proximity_domain_to = proximity_domain_to;  in kfd_fill_gpu_xgmi_link_to_gpu()
2073 		sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *  in kfd_fill_gpu_xgmi_link_to_gpu()
2074 			amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);  in kfd_fill_gpu_xgmi_link_to_gpu()
2075 		sub_type_hdr->maximum_bandwidth_mbs =  in kfd_fill_gpu_xgmi_link_to_gpu()
2076 			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,  in kfd_fill_gpu_xgmi_link_to_gpu()
2077 							peer_kdev->adev, false);  in kfd_fill_gpu_xgmi_link_to_gpu()
2078 		sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?  in kfd_fill_gpu_xgmi_link_to_gpu()
2079 			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;  in kfd_fill_gpu_xgmi_link_to_gpu()
2081 		bool is_single_hop = kdev->kfd == peer_kdev->kfd;  in kfd_fill_gpu_xgmi_link_to_gpu()
2086 		sub_type_hdr->weight_xgmi = weight;  in kfd_fill_gpu_xgmi_link_to_gpu()
2087 		sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;  in kfd_fill_gpu_xgmi_link_to_gpu()
2088 		sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;  in kfd_fill_gpu_xgmi_link_to_gpu()
2094 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2096  *	@pcrat_image: Fill in VCRAT for GPU
2105 	struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;  in kfd_create_vcrat_image_gpu()
2106 	struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;  in kfd_create_vcrat_image_gpu()
2117 		return -EINVAL;  in kfd_create_vcrat_image_gpu()
2122 	avail_size -= sizeof(struct crat_header);  in kfd_create_vcrat_image_gpu()
2124 		return -ENOMEM;  in kfd_create_vcrat_image_gpu()
2128 	memcpy(&crat_table->signature, CRAT_SIGNATURE,  in kfd_create_vcrat_image_gpu()
2129 			sizeof(crat_table->signature));  in kfd_create_vcrat_image_gpu()
2131 	crat_table->length = sizeof(struct crat_header);  in kfd_create_vcrat_image_gpu()
2132 	crat_table->num_domains = 1;  in kfd_create_vcrat_image_gpu()
2133 	crat_table->total_entries = 0;  in kfd_create_vcrat_image_gpu()
2138 	avail_size -= sizeof(struct crat_subtype_computeunit);  in kfd_create_vcrat_image_gpu()
2140 		return -ENOMEM;  in kfd_create_vcrat_image_gpu()
2145 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;  in kfd_create_vcrat_image_gpu()
2146 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);  in kfd_create_vcrat_image_gpu()
2147 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;  in kfd_create_vcrat_image_gpu()
2151 	cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;  in kfd_create_vcrat_image_gpu()
2152 	cu->proximity_domain = proximity_domain;  in kfd_create_vcrat_image_gpu()
2154 	cu->num_simd_per_cu = cu_info->simd_per_cu;  in kfd_create_vcrat_image_gpu()
2155 	cu->num_simd_cores = cu_info->simd_per_cu *  in kfd_create_vcrat_image_gpu()
2156 			(cu_info->number / kdev->kfd->num_nodes);  in kfd_create_vcrat_image_gpu()
2157 	cu->max_waves_simd = cu_info->max_waves_per_simd;  in kfd_create_vcrat_image_gpu()
2159 	cu->wave_front_size = cu_info->wave_front_size;  in kfd_create_vcrat_image_gpu()
2160 	cu->array_count = gfx_info->max_sh_per_se *  in kfd_create_vcrat_image_gpu()
2161 		gfx_info->max_shader_engines;  in kfd_create_vcrat_image_gpu()
2162 	total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh);  in kfd_create_vcrat_image_gpu()
2163 	cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);  in kfd_create_vcrat_image_gpu()
2164 	cu->num_cu_per_array = gfx_info->max_cu_per_sh;  in kfd_create_vcrat_image_gpu()
2165 	cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu;  in kfd_create_vcrat_image_gpu()
2166 	cu->num_banks = gfx_info->max_shader_engines;  in kfd_create_vcrat_image_gpu()
2167 	cu->lds_size_in_kb = cu_info->lds_size;  in kfd_create_vcrat_image_gpu()
2169 	cu->hsa_capability = 0;  in kfd_create_vcrat_image_gpu()
2171 	crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_gpu()
2172 	crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2179 	local_mem_info = kdev->local_mem_info;  in kfd_create_vcrat_image_gpu()
2181 			sub_type_hdr->length);  in kfd_create_vcrat_image_gpu()
2183 	if (kdev->adev->debug_largebar)  in kfd_create_vcrat_image_gpu()
2204 	crat_table->length += sizeof(struct crat_subtype_memory);  in kfd_create_vcrat_image_gpu()
2205 	crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2208 	 *  Only direct links are added here which is Link from GPU to  in kfd_create_vcrat_image_gpu()
2212 		sub_type_hdr->length);  in kfd_create_vcrat_image_gpu()
2219 	crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_gpu()
2220 	crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2224 	 * Direct links from GPU to other GPUs through xGMI.  in kfd_create_vcrat_image_gpu()
2227 	 * hive id (from this GPU to other GPU) . The reversed iolink  in kfd_create_vcrat_image_gpu()
2228 	 * (from other GPU to this GPU) will be added  in kfd_create_vcrat_image_gpu()
2231 	if (kdev->kfd->hive_id) {  in kfd_create_vcrat_image_gpu()
2234 			if (!peer_dev->gpu)  in kfd_create_vcrat_image_gpu()
2236 			if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)  in kfd_create_vcrat_image_gpu()
2242 				&avail_size, kdev, peer_dev->gpu,  in kfd_create_vcrat_image_gpu()
2247 			crat_table->length += sub_type_hdr->length;  in kfd_create_vcrat_image_gpu()
2248 			crat_table->total_entries++;  in kfd_create_vcrat_image_gpu()
2251 	*size = crat_table->length;  in kfd_create_vcrat_image_gpu()
2252 	pr_info("Virtual CRAT table created for GPU\n");  in kfd_create_vcrat_image_gpu()
2257 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2265  *	@flags:	COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2266  *		COMPUTE_UNIT_GPU - Create VCRAT for GPU
2267  *		(COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2268  *			-- this option is not currently implemented.
2272  *	Return 0 if successful else return -ve value
2283 		return -EINVAL;  in kfd_create_crat_image_virtual()
2288 	 * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.  in kfd_create_crat_image_virtual()
2298 			(num_nodes - 1) * sizeof(struct crat_subtype_iolink));  in kfd_create_crat_image_virtual()
2301 			return -ENOMEM;  in kfd_create_crat_image_virtual()
2308 			return -EINVAL;  in kfd_create_crat_image_virtual()
2311 			return -ENOMEM;  in kfd_create_crat_image_virtual()
2318 		ret = -EINVAL;  in kfd_create_crat_image_virtual()
2322 		ret = -EINVAL;  in kfd_create_crat_image_virtual()
2336  *	@crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)