xref: /linux/drivers/gpu/drm/amd/amdkfd/kfd_crat.c (revision 4f9786035f9e519db41375818e1d0b5f20da2f10)
1d87f36a0SRajneesh Bhardwaj // SPDX-License-Identifier: GPL-2.0 OR MIT
2174de876SFelix Kuehling /*
3d87f36a0SRajneesh Bhardwaj  * Copyright 2015-2022 Advanced Micro Devices, Inc.
4174de876SFelix Kuehling  *
5174de876SFelix Kuehling  * Permission is hereby granted, free of charge, to any person obtaining a
6174de876SFelix Kuehling  * copy of this software and associated documentation files (the "Software"),
7174de876SFelix Kuehling  * to deal in the Software without restriction, including without limitation
8174de876SFelix Kuehling  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9174de876SFelix Kuehling  * and/or sell copies of the Software, and to permit persons to whom the
10174de876SFelix Kuehling  * Software is furnished to do so, subject to the following conditions:
11174de876SFelix Kuehling  *
12174de876SFelix Kuehling  * The above copyright notice and this permission notice shall be included in
13174de876SFelix Kuehling  * all copies or substantial portions of the Software.
14174de876SFelix Kuehling  *
15174de876SFelix Kuehling  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16174de876SFelix Kuehling  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17174de876SFelix Kuehling  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18174de876SFelix Kuehling  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19174de876SFelix Kuehling  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20174de876SFelix Kuehling  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21174de876SFelix Kuehling  * OTHER DEALINGS IN THE SOFTWARE.
22174de876SFelix Kuehling  */
233a87177eSHarish Kasiviswanathan 
243a87177eSHarish Kasiviswanathan #include <linux/pci.h>
25174de876SFelix Kuehling #include <linux/acpi.h>
26174de876SFelix Kuehling #include "kfd_crat.h"
27520b8fb7SFelix Kuehling #include "kfd_priv.h"
28174de876SFelix Kuehling #include "kfd_topology.h"
29d34184e3SRajneesh Bhardwaj #include "amdgpu.h"
305b87245fSAmber Lin #include "amdgpu_amdkfd.h"
31e46738a5SJonathan Kim #include "amdgpu_xgmi.h"
32174de876SFelix Kuehling 
333a87177eSHarish Kasiviswanathan /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
343a87177eSHarish Kasiviswanathan  * GPU processor ID are expressed with Bit[31]=1.
353a87177eSHarish Kasiviswanathan  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
363a87177eSHarish Kasiviswanathan  * used in the CRAT.
373a87177eSHarish Kasiviswanathan  */
383a87177eSHarish Kasiviswanathan static uint32_t gpu_processor_id_low = 0x80001000;
393a87177eSHarish Kasiviswanathan 
403a87177eSHarish Kasiviswanathan /* Return the next available gpu_processor_id and increment it for next GPU
413a87177eSHarish Kasiviswanathan  *	@total_cu_count - Total CUs present in the GPU including ones
423a87177eSHarish Kasiviswanathan  *			  masked off
433a87177eSHarish Kasiviswanathan  */
get_and_inc_gpu_processor_id(unsigned int total_cu_count)443a87177eSHarish Kasiviswanathan static inline unsigned int get_and_inc_gpu_processor_id(
453a87177eSHarish Kasiviswanathan 				unsigned int total_cu_count)
463a87177eSHarish Kasiviswanathan {
473a87177eSHarish Kasiviswanathan 	int current_id = gpu_processor_id_low;
483a87177eSHarish Kasiviswanathan 
493a87177eSHarish Kasiviswanathan 	gpu_processor_id_low += total_cu_count;
503a87177eSHarish Kasiviswanathan 	return current_id;
513a87177eSHarish Kasiviswanathan }
523a87177eSHarish Kasiviswanathan 
533a87177eSHarish Kasiviswanathan 
543a87177eSHarish Kasiviswanathan static struct kfd_gpu_cache_info kaveri_cache_info[] = {
553a87177eSHarish Kasiviswanathan 	{
563a87177eSHarish Kasiviswanathan 		/* TCP L1 Cache per CU */
573a87177eSHarish Kasiviswanathan 		.cache_size = 16,
583a87177eSHarish Kasiviswanathan 		.cache_level = 1,
595a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
603a87177eSHarish Kasiviswanathan 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
613a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_DATA_CACHE |
623a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_SIMD_CACHE),
633a87177eSHarish Kasiviswanathan 		.num_cu_shared = 1,
643a87177eSHarish Kasiviswanathan 	},
653a87177eSHarish Kasiviswanathan 	{
663a87177eSHarish Kasiviswanathan 		/* Scalar L1 Instruction Cache (in SQC module) per bank */
673a87177eSHarish Kasiviswanathan 		.cache_size = 16,
683a87177eSHarish Kasiviswanathan 		.cache_level = 1,
695a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
703a87177eSHarish Kasiviswanathan 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
713a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_INST_CACHE |
723a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_SIMD_CACHE),
733a87177eSHarish Kasiviswanathan 		.num_cu_shared = 2,
743a87177eSHarish Kasiviswanathan 	},
753a87177eSHarish Kasiviswanathan 	{
763a87177eSHarish Kasiviswanathan 		/* Scalar L1 Data Cache (in SQC module) per bank */
773a87177eSHarish Kasiviswanathan 		.cache_size = 8,
783a87177eSHarish Kasiviswanathan 		.cache_level = 1,
795a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
803a87177eSHarish Kasiviswanathan 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
813a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_DATA_CACHE |
823a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_SIMD_CACHE),
833a87177eSHarish Kasiviswanathan 		.num_cu_shared = 2,
843a87177eSHarish Kasiviswanathan 	},
853a87177eSHarish Kasiviswanathan 
863a87177eSHarish Kasiviswanathan 	/* TODO: Add L2 Cache information */
873a87177eSHarish Kasiviswanathan };
883a87177eSHarish Kasiviswanathan 
893a87177eSHarish Kasiviswanathan 
903a87177eSHarish Kasiviswanathan static struct kfd_gpu_cache_info carrizo_cache_info[] = {
913a87177eSHarish Kasiviswanathan 	{
923a87177eSHarish Kasiviswanathan 		/* TCP L1 Cache per CU */
933a87177eSHarish Kasiviswanathan 		.cache_size = 16,
943a87177eSHarish Kasiviswanathan 		.cache_level = 1,
955a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
963a87177eSHarish Kasiviswanathan 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
973a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_DATA_CACHE |
983a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_SIMD_CACHE),
993a87177eSHarish Kasiviswanathan 		.num_cu_shared = 1,
1003a87177eSHarish Kasiviswanathan 	},
1013a87177eSHarish Kasiviswanathan 	{
1023a87177eSHarish Kasiviswanathan 		/* Scalar L1 Instruction Cache (in SQC module) per bank */
1035a2df8ecSJoseph Greathouse 		.cache_size = 32,
1043a87177eSHarish Kasiviswanathan 		.cache_level = 1,
1055a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
1063a87177eSHarish Kasiviswanathan 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
1073a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_INST_CACHE |
1083a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_SIMD_CACHE),
1093a87177eSHarish Kasiviswanathan 		.num_cu_shared = 4,
1103a87177eSHarish Kasiviswanathan 	},
1113a87177eSHarish Kasiviswanathan 	{
1123a87177eSHarish Kasiviswanathan 		/* Scalar L1 Data Cache (in SQC module) per bank. */
1135a2df8ecSJoseph Greathouse 		.cache_size = 16,
1143a87177eSHarish Kasiviswanathan 		.cache_level = 1,
1155a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
1163a87177eSHarish Kasiviswanathan 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
1173a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_DATA_CACHE |
1183a87177eSHarish Kasiviswanathan 				CRAT_CACHE_FLAGS_SIMD_CACHE),
1193a87177eSHarish Kasiviswanathan 		.num_cu_shared = 4,
1203a87177eSHarish Kasiviswanathan 	},
1213a87177eSHarish Kasiviswanathan 
1223a87177eSHarish Kasiviswanathan 	/* TODO: Add L2 Cache information */
1233a87177eSHarish Kasiviswanathan };
1243a87177eSHarish Kasiviswanathan 
1253a87177eSHarish Kasiviswanathan #define hawaii_cache_info kaveri_cache_info
1263a87177eSHarish Kasiviswanathan #define tonga_cache_info carrizo_cache_info
1273a87177eSHarish Kasiviswanathan #define fiji_cache_info  carrizo_cache_info
1283a87177eSHarish Kasiviswanathan #define polaris10_cache_info carrizo_cache_info
1293a87177eSHarish Kasiviswanathan #define polaris11_cache_info carrizo_cache_info
130846a44d7SGang Ba #define polaris12_cache_info carrizo_cache_info
131ed81cd6eSKent Russell #define vegam_cache_info carrizo_cache_info
13274abbdedSMike Li 
13374abbdedSMike Li /* NOTE: L1 cache information has been updated and L2/L3
13474abbdedSMike Li  * cache information has been added for Vega10 and
13574abbdedSMike Li  * newer ASICs. The unit for cache_size is KiB.
13674abbdedSMike Li  * In future,  check & update cache details
13774abbdedSMike Li  * for every new ASIC is required.
13874abbdedSMike Li  */
13974abbdedSMike Li 
14074abbdedSMike Li static struct kfd_gpu_cache_info vega10_cache_info[] = {
14174abbdedSMike Li 	{
14274abbdedSMike Li 		/* TCP L1 Cache per CU */
14374abbdedSMike Li 		.cache_size = 16,
14474abbdedSMike Li 		.cache_level = 1,
1455a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
14674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
14774abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
14874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
14974abbdedSMike Li 		.num_cu_shared = 1,
15074abbdedSMike Li 	},
15174abbdedSMike Li 	{
15274abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
15374abbdedSMike Li 		.cache_size = 32,
15474abbdedSMike Li 		.cache_level = 1,
1555a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
15674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
15774abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
15874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
15974abbdedSMike Li 		.num_cu_shared = 3,
16074abbdedSMike Li 	},
16174abbdedSMike Li 	{
16274abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
16374abbdedSMike Li 		.cache_size = 16,
16474abbdedSMike Li 		.cache_level = 1,
1655a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
16674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
16774abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
16874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
16974abbdedSMike Li 		.num_cu_shared = 3,
17074abbdedSMike Li 	},
17174abbdedSMike Li 	{
17274abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
17374abbdedSMike Li 		.cache_size = 4096,
17474abbdedSMike Li 		.cache_level = 2,
1755a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
17674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
17774abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
17874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
17974abbdedSMike Li 		.num_cu_shared = 16,
18074abbdedSMike Li 	},
18174abbdedSMike Li };
18274abbdedSMike Li 
18374abbdedSMike Li static struct kfd_gpu_cache_info raven_cache_info[] = {
18474abbdedSMike Li 	{
18574abbdedSMike Li 		/* TCP L1 Cache per CU */
18674abbdedSMike Li 		.cache_size = 16,
18774abbdedSMike Li 		.cache_level = 1,
1885a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
18974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
19074abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
19174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
19274abbdedSMike Li 		.num_cu_shared = 1,
19374abbdedSMike Li 	},
19474abbdedSMike Li 	{
19574abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
19674abbdedSMike Li 		.cache_size = 32,
19774abbdedSMike Li 		.cache_level = 1,
1985a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
19974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
20074abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
20174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
20274abbdedSMike Li 		.num_cu_shared = 3,
20374abbdedSMike Li 	},
20474abbdedSMike Li 	{
20574abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
20674abbdedSMike Li 		.cache_size = 16,
20774abbdedSMike Li 		.cache_level = 1,
2085a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
20974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
21074abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
21174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
21274abbdedSMike Li 		.num_cu_shared = 3,
21374abbdedSMike Li 	},
21474abbdedSMike Li 	{
21574abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
21674abbdedSMike Li 		.cache_size = 1024,
21774abbdedSMike Li 		.cache_level = 2,
2185a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
21974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
22074abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
22174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
22274abbdedSMike Li 		.num_cu_shared = 11,
22374abbdedSMike Li 	},
22474abbdedSMike Li };
22574abbdedSMike Li 
22674abbdedSMike Li static struct kfd_gpu_cache_info renoir_cache_info[] = {
22774abbdedSMike Li 	{
22874abbdedSMike Li 		/* TCP L1 Cache per CU */
22974abbdedSMike Li 		.cache_size = 16,
23074abbdedSMike Li 		.cache_level = 1,
2315a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
23274abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
23374abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
23474abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
23574abbdedSMike Li 		.num_cu_shared = 1,
23674abbdedSMike Li 	},
23774abbdedSMike Li 	{
23874abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
23974abbdedSMike Li 		.cache_size = 32,
24074abbdedSMike Li 		.cache_level = 1,
2415a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
24274abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
24374abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
24474abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
24574abbdedSMike Li 		.num_cu_shared = 3,
24674abbdedSMike Li 	},
24774abbdedSMike Li 	{
24874abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
24974abbdedSMike Li 		.cache_size = 16,
25074abbdedSMike Li 		.cache_level = 1,
2515a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
25274abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
25374abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
25474abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
25574abbdedSMike Li 		.num_cu_shared = 3,
25674abbdedSMike Li 	},
25774abbdedSMike Li 	{
25874abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
25974abbdedSMike Li 		.cache_size = 1024,
26074abbdedSMike Li 		.cache_level = 2,
2615a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
26274abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
26374abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
26474abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
26574abbdedSMike Li 		.num_cu_shared = 8,
26674abbdedSMike Li 	},
26774abbdedSMike Li };
26874abbdedSMike Li 
26974abbdedSMike Li static struct kfd_gpu_cache_info vega12_cache_info[] = {
27074abbdedSMike Li 	{
27174abbdedSMike Li 		/* TCP L1 Cache per CU */
27274abbdedSMike Li 		.cache_size = 16,
27374abbdedSMike Li 		.cache_level = 1,
2745a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
27574abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
27674abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
27774abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
27874abbdedSMike Li 		.num_cu_shared = 1,
27974abbdedSMike Li 	},
28074abbdedSMike Li 	{
28174abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
28274abbdedSMike Li 		.cache_size = 32,
28374abbdedSMike Li 		.cache_level = 1,
2845a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
28574abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
28674abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
28774abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
28874abbdedSMike Li 		.num_cu_shared = 3,
28974abbdedSMike Li 	},
29074abbdedSMike Li 	{
29174abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
29274abbdedSMike Li 		.cache_size = 16,
29374abbdedSMike Li 		.cache_level = 1,
2945a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
29574abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
29674abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
29774abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
29874abbdedSMike Li 		.num_cu_shared = 3,
29974abbdedSMike Li 	},
30074abbdedSMike Li 	{
30174abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
30274abbdedSMike Li 		.cache_size = 2048,
30374abbdedSMike Li 		.cache_level = 2,
3045a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
30574abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
30674abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
30774abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
30874abbdedSMike Li 		.num_cu_shared = 5,
30974abbdedSMike Li 	},
31074abbdedSMike Li };
31174abbdedSMike Li 
31274abbdedSMike Li static struct kfd_gpu_cache_info vega20_cache_info[] = {
31374abbdedSMike Li 	{
31474abbdedSMike Li 		/* TCP L1 Cache per CU */
31574abbdedSMike Li 		.cache_size = 16,
31674abbdedSMike Li 		.cache_level = 1,
3175a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
31874abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
31974abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
32074abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
32174abbdedSMike Li 		.num_cu_shared = 1,
32274abbdedSMike Li 	},
32374abbdedSMike Li 	{
32474abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
32574abbdedSMike Li 		.cache_size = 32,
32674abbdedSMike Li 		.cache_level = 1,
3275a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
32874abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
32974abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
33074abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
33174abbdedSMike Li 		.num_cu_shared = 3,
33274abbdedSMike Li 	},
33374abbdedSMike Li 	{
33474abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
33574abbdedSMike Li 		.cache_size = 16,
33674abbdedSMike Li 		.cache_level = 1,
3375a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
33874abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
33974abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
34074abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
34174abbdedSMike Li 		.num_cu_shared = 3,
34274abbdedSMike Li 	},
34374abbdedSMike Li 	{
34474abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
34574abbdedSMike Li 		.cache_size = 8192,
34674abbdedSMike Li 		.cache_level = 2,
3475a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
34874abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
34974abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
35074abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
35174abbdedSMike Li 		.num_cu_shared = 16,
35274abbdedSMike Li 	},
35374abbdedSMike Li };
35474abbdedSMike Li 
35574abbdedSMike Li static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
35674abbdedSMike Li 	{
35774abbdedSMike Li 		/* TCP L1 Cache per CU */
35874abbdedSMike Li 		.cache_size = 16,
35974abbdedSMike Li 		.cache_level = 1,
3605a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
36174abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
36274abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
36374abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
36474abbdedSMike Li 		.num_cu_shared = 1,
36574abbdedSMike Li 	},
36674abbdedSMike Li 	{
36774abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
36874abbdedSMike Li 		.cache_size = 32,
36974abbdedSMike Li 		.cache_level = 1,
3705a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
37174abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
37274abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
37374abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
37474abbdedSMike Li 		.num_cu_shared = 2,
37574abbdedSMike Li 	},
37674abbdedSMike Li 	{
37774abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
37874abbdedSMike Li 		.cache_size = 16,
37974abbdedSMike Li 		.cache_level = 1,
3805a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
38174abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
38274abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
38374abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
38474abbdedSMike Li 		.num_cu_shared = 2,
38574abbdedSMike Li 	},
38674abbdedSMike Li 	{
38774abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
38874abbdedSMike Li 		.cache_size = 8192,
38974abbdedSMike Li 		.cache_level = 2,
3905a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
39174abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
39274abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
39374abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
39474abbdedSMike Li 		.num_cu_shared = 14,
39574abbdedSMike Li 	},
39674abbdedSMike Li };
39774abbdedSMike Li 
39874abbdedSMike Li static struct kfd_gpu_cache_info navi10_cache_info[] = {
39974abbdedSMike Li 	{
40074abbdedSMike Li 		/* TCP L1 Cache per CU */
40174abbdedSMike Li 		.cache_size = 16,
40274abbdedSMike Li 		.cache_level = 1,
4035a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
40474abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
40574abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
40674abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
40774abbdedSMike Li 		.num_cu_shared = 1,
40874abbdedSMike Li 	},
40974abbdedSMike Li 	{
41074abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
41174abbdedSMike Li 		.cache_size = 32,
41274abbdedSMike Li 		.cache_level = 1,
4135a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
41474abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
41574abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
41674abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
41774abbdedSMike Li 		.num_cu_shared = 2,
41874abbdedSMike Li 	},
41974abbdedSMike Li 	{
42074abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
42174abbdedSMike Li 		.cache_size = 16,
42274abbdedSMike Li 		.cache_level = 1,
4235a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
42474abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
42574abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
42674abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
42774abbdedSMike Li 		.num_cu_shared = 2,
42874abbdedSMike Li 	},
42974abbdedSMike Li 	{
43074abbdedSMike Li 		/* GL1 Data Cache per SA */
43174abbdedSMike Li 		.cache_size = 128,
43274abbdedSMike Li 		.cache_level = 1,
4335a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
43474abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
43574abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
43674abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
43774abbdedSMike Li 		.num_cu_shared = 10,
43874abbdedSMike Li 	},
43974abbdedSMike Li 	{
44074abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
44174abbdedSMike Li 		.cache_size = 4096,
44274abbdedSMike Li 		.cache_level = 2,
4435a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
44474abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
44574abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
44674abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
44774abbdedSMike Li 		.num_cu_shared = 10,
44874abbdedSMike Li 	},
44974abbdedSMike Li };
45074abbdedSMike Li 
45174abbdedSMike Li static struct kfd_gpu_cache_info vangogh_cache_info[] = {
45274abbdedSMike Li 	{
45374abbdedSMike Li 		/* TCP L1 Cache per CU */
45474abbdedSMike Li 		.cache_size = 16,
45574abbdedSMike Li 		.cache_level = 1,
4565a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
45774abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
45874abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
45974abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
46074abbdedSMike Li 		.num_cu_shared = 1,
46174abbdedSMike Li 	},
46274abbdedSMike Li 	{
46374abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
46474abbdedSMike Li 		.cache_size = 32,
46574abbdedSMike Li 		.cache_level = 1,
4665a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
46774abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
46874abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
46974abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
47074abbdedSMike Li 		.num_cu_shared = 2,
47174abbdedSMike Li 	},
47274abbdedSMike Li 	{
47374abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
47474abbdedSMike Li 		.cache_size = 16,
47574abbdedSMike Li 		.cache_level = 1,
4765a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
47774abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
47874abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
47974abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
48074abbdedSMike Li 		.num_cu_shared = 2,
48174abbdedSMike Li 	},
48274abbdedSMike Li 	{
48374abbdedSMike Li 		/* GL1 Data Cache per SA */
48474abbdedSMike Li 		.cache_size = 128,
48574abbdedSMike Li 		.cache_level = 1,
4865a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
48774abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
48874abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
48974abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
49074abbdedSMike Li 		.num_cu_shared = 8,
49174abbdedSMike Li 	},
49274abbdedSMike Li 	{
49374abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
49474abbdedSMike Li 		.cache_size = 1024,
49574abbdedSMike Li 		.cache_level = 2,
4965a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
49774abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
49874abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
49974abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
50074abbdedSMike Li 		.num_cu_shared = 8,
50174abbdedSMike Li 	},
50274abbdedSMike Li };
50374abbdedSMike Li 
50474abbdedSMike Li static struct kfd_gpu_cache_info navi14_cache_info[] = {
50574abbdedSMike Li 	{
50674abbdedSMike Li 		/* TCP L1 Cache per CU */
50774abbdedSMike Li 		.cache_size = 16,
50874abbdedSMike Li 		.cache_level = 1,
5095a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
51074abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
51174abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
51274abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
51374abbdedSMike Li 		.num_cu_shared = 1,
51474abbdedSMike Li 	},
51574abbdedSMike Li 	{
51674abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
51774abbdedSMike Li 		.cache_size = 32,
51874abbdedSMike Li 		.cache_level = 1,
5195a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
52074abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
52174abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
52274abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
52374abbdedSMike Li 		.num_cu_shared = 2,
52474abbdedSMike Li 	},
52574abbdedSMike Li 	{
52674abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
52774abbdedSMike Li 		.cache_size = 16,
52874abbdedSMike Li 		.cache_level = 1,
5295a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
53074abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
53174abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
53274abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
53374abbdedSMike Li 		.num_cu_shared = 2,
53474abbdedSMike Li 	},
53574abbdedSMike Li 	{
53674abbdedSMike Li 		/* GL1 Data Cache per SA */
53774abbdedSMike Li 		.cache_size = 128,
53874abbdedSMike Li 		.cache_level = 1,
5395a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
54074abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
54174abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
54274abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
54374abbdedSMike Li 		.num_cu_shared = 12,
54474abbdedSMike Li 	},
54574abbdedSMike Li 	{
54674abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
54774abbdedSMike Li 		.cache_size = 2048,
54874abbdedSMike Li 		.cache_level = 2,
5495a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
55074abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
55174abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
55274abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
55374abbdedSMike Li 		.num_cu_shared = 12,
55474abbdedSMike Li 	},
55574abbdedSMike Li };
55674abbdedSMike Li 
55774abbdedSMike Li static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
55874abbdedSMike Li 	{
55974abbdedSMike Li 		/* TCP L1 Cache per CU */
56074abbdedSMike Li 		.cache_size = 16,
56174abbdedSMike Li 		.cache_level = 1,
5625a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
56374abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
56474abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
56574abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
56674abbdedSMike Li 		.num_cu_shared = 1,
56774abbdedSMike Li 	},
56874abbdedSMike Li 	{
56974abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
57074abbdedSMike Li 		.cache_size = 32,
57174abbdedSMike Li 		.cache_level = 1,
5725a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
57374abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
57474abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
57574abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
57674abbdedSMike Li 		.num_cu_shared = 2,
57774abbdedSMike Li 	},
57874abbdedSMike Li 	{
57974abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
58074abbdedSMike Li 		.cache_size = 16,
58174abbdedSMike Li 		.cache_level = 1,
5825a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
58374abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
58474abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
58574abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
58674abbdedSMike Li 		.num_cu_shared = 2,
58774abbdedSMike Li 	},
58874abbdedSMike Li 	{
58974abbdedSMike Li 		/* GL1 Data Cache per SA */
59074abbdedSMike Li 		.cache_size = 128,
59174abbdedSMike Li 		.cache_level = 1,
5925a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
59374abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
59474abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
59574abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
59674abbdedSMike Li 		.num_cu_shared = 10,
59774abbdedSMike Li 	},
59874abbdedSMike Li 	{
59974abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
60074abbdedSMike Li 		.cache_size = 4096,
60174abbdedSMike Li 		.cache_level = 2,
6025a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
60374abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
60474abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
60574abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
60674abbdedSMike Li 		.num_cu_shared = 10,
60774abbdedSMike Li 	},
60874abbdedSMike Li 	{
60974abbdedSMike Li 		/* L3 Data Cache per GPU */
61074abbdedSMike Li 		.cache_size = 128*1024,
61174abbdedSMike Li 		.cache_level = 3,
6125a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
61374abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
61474abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
61574abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
61674abbdedSMike Li 		.num_cu_shared = 10,
61774abbdedSMike Li 	},
61874abbdedSMike Li };
61974abbdedSMike Li 
62074abbdedSMike Li static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
62174abbdedSMike Li 	{
62274abbdedSMike Li 		/* TCP L1 Cache per CU */
62374abbdedSMike Li 		.cache_size = 16,
62474abbdedSMike Li 		.cache_level = 1,
6255a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
62674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
62774abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
62874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
62974abbdedSMike Li 		.num_cu_shared = 1,
63074abbdedSMike Li 	},
63174abbdedSMike Li 	{
63274abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
63374abbdedSMike Li 		.cache_size = 32,
63474abbdedSMike Li 		.cache_level = 1,
6355a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
63674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
63774abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
63874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
63974abbdedSMike Li 		.num_cu_shared = 2,
64074abbdedSMike Li 	},
64174abbdedSMike Li 	{
64274abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
64374abbdedSMike Li 		.cache_size = 16,
64474abbdedSMike Li 		.cache_level = 1,
6455a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
64674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
64774abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
64874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
64974abbdedSMike Li 		.num_cu_shared = 2,
65074abbdedSMike Li 	},
65174abbdedSMike Li 	{
65274abbdedSMike Li 		/* GL1 Data Cache per SA */
65374abbdedSMike Li 		.cache_size = 128,
65474abbdedSMike Li 		.cache_level = 1,
6555a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
65674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
65774abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
65874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
65974abbdedSMike Li 		.num_cu_shared = 10,
66074abbdedSMike Li 	},
66174abbdedSMike Li 	{
66274abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
66374abbdedSMike Li 		.cache_size = 3072,
66474abbdedSMike Li 		.cache_level = 2,
6655a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
66674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
66774abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
66874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
66974abbdedSMike Li 		.num_cu_shared = 10,
67074abbdedSMike Li 	},
67174abbdedSMike Li 	{
67274abbdedSMike Li 		/* L3 Data Cache per GPU */
67374abbdedSMike Li 		.cache_size = 96*1024,
67474abbdedSMike Li 		.cache_level = 3,
6755a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
67674abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
67774abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
67874abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
67974abbdedSMike Li 		.num_cu_shared = 10,
68074abbdedSMike Li 	},
68174abbdedSMike Li };
68274abbdedSMike Li 
68374abbdedSMike Li static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
68474abbdedSMike Li 	{
68574abbdedSMike Li 		/* TCP L1 Cache per CU */
68674abbdedSMike Li 		.cache_size = 16,
68774abbdedSMike Li 		.cache_level = 1,
6885a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
68974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
69074abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
69174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
69274abbdedSMike Li 		.num_cu_shared = 1,
69374abbdedSMike Li 	},
69474abbdedSMike Li 	{
69574abbdedSMike Li 		/* Scalar L1 Instruction Cache per SQC */
69674abbdedSMike Li 		.cache_size = 32,
69774abbdedSMike Li 		.cache_level = 1,
6985a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
69974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
70074abbdedSMike Li 				CRAT_CACHE_FLAGS_INST_CACHE |
70174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
70274abbdedSMike Li 		.num_cu_shared = 2,
70374abbdedSMike Li 	},
70474abbdedSMike Li 	{
70574abbdedSMike Li 		/* Scalar L1 Data Cache per SQC */
70674abbdedSMike Li 		.cache_size = 16,
70774abbdedSMike Li 		.cache_level = 1,
7085a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
70974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
71074abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
71174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
71274abbdedSMike Li 		.num_cu_shared = 2,
71374abbdedSMike Li 	},
71474abbdedSMike Li 	{
71574abbdedSMike Li 		/* GL1 Data Cache per SA */
71674abbdedSMike Li 		.cache_size = 128,
71774abbdedSMike Li 		.cache_level = 1,
7185a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
71974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
72074abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
72174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
72274abbdedSMike Li 		.num_cu_shared = 8,
72374abbdedSMike Li 	},
72474abbdedSMike Li 	{
72574abbdedSMike Li 		/* L2 Data Cache per GPU (Total Tex Cache) */
72674abbdedSMike Li 		.cache_size = 2048,
72774abbdedSMike Li 		.cache_level = 2,
7285a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
72974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
73074abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
73174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
73274abbdedSMike Li 		.num_cu_shared = 8,
73374abbdedSMike Li 	},
73474abbdedSMike Li 	{
73574abbdedSMike Li 		/* L3 Data Cache per GPU */
73674abbdedSMike Li 		.cache_size = 32*1024,
73774abbdedSMike Li 		.cache_level = 3,
7385a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
73974abbdedSMike Li 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
74074abbdedSMike Li 				CRAT_CACHE_FLAGS_DATA_CACHE |
74174abbdedSMike Li 				CRAT_CACHE_FLAGS_SIMD_CACHE),
74274abbdedSMike Li 		.num_cu_shared = 8,
74374abbdedSMike Li 	},
74474abbdedSMike Li };
7453a87177eSHarish Kasiviswanathan 
7465cf607ccSChengming Gui static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
7475cf607ccSChengming Gui 	{
7485cf607ccSChengming Gui 		/* TCP L1 Cache per CU */
7495cf607ccSChengming Gui 		.cache_size = 16,
7505cf607ccSChengming Gui 		.cache_level = 1,
7515a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
7525cf607ccSChengming Gui 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
7535cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_DATA_CACHE |
7545cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_SIMD_CACHE),
7555cf607ccSChengming Gui 		.num_cu_shared = 1,
7565cf607ccSChengming Gui 	},
7575cf607ccSChengming Gui 	{
7585cf607ccSChengming Gui 		/* Scalar L1 Instruction Cache per SQC */
7595cf607ccSChengming Gui 		.cache_size = 32,
7605cf607ccSChengming Gui 		.cache_level = 1,
7615a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
7625cf607ccSChengming Gui 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
7635cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_INST_CACHE |
7645cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_SIMD_CACHE),
7655cf607ccSChengming Gui 		.num_cu_shared = 2,
7665cf607ccSChengming Gui 	},
7675cf607ccSChengming Gui 	{
7685cf607ccSChengming Gui 		/* Scalar L1 Data Cache per SQC */
7695cf607ccSChengming Gui 		.cache_size = 16,
7705cf607ccSChengming Gui 		.cache_level = 1,
7715a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
7725cf607ccSChengming Gui 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
7735cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_DATA_CACHE |
7745cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_SIMD_CACHE),
7755cf607ccSChengming Gui 		.num_cu_shared = 2,
7765cf607ccSChengming Gui 	},
7775cf607ccSChengming Gui 	{
7785cf607ccSChengming Gui 		/* GL1 Data Cache per SA */
7795cf607ccSChengming Gui 		.cache_size = 128,
7805cf607ccSChengming Gui 		.cache_level = 1,
7815a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
7825cf607ccSChengming Gui 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
7835cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_DATA_CACHE |
7845cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_SIMD_CACHE),
7855cf607ccSChengming Gui 		.num_cu_shared = 8,
7865cf607ccSChengming Gui 	},
7875cf607ccSChengming Gui 	{
7885cf607ccSChengming Gui 		/* L2 Data Cache per GPU (Total Tex Cache) */
7895cf607ccSChengming Gui 		.cache_size = 1024,
7905cf607ccSChengming Gui 		.cache_level = 2,
7915a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
7925cf607ccSChengming Gui 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
7935cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_DATA_CACHE |
7945cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_SIMD_CACHE),
7955cf607ccSChengming Gui 		.num_cu_shared = 8,
7965cf607ccSChengming Gui 	},
7975cf607ccSChengming Gui 	{
7985cf607ccSChengming Gui 		/* L3 Data Cache per GPU */
7995cf607ccSChengming Gui 		.cache_size = 16*1024,
8005cf607ccSChengming Gui 		.cache_level = 3,
8015a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
8025cf607ccSChengming Gui 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
8035cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_DATA_CACHE |
8045cf607ccSChengming Gui 				CRAT_CACHE_FLAGS_SIMD_CACHE),
8055cf607ccSChengming Gui 		.num_cu_shared = 8,
8065cf607ccSChengming Gui 	},
8075cf607ccSChengming Gui };
8085cf607ccSChengming Gui 
809bf9d4e88SAaron Liu static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
810bf9d4e88SAaron Liu 	{
811bf9d4e88SAaron Liu 		/* TCP L1 Cache per CU */
812bf9d4e88SAaron Liu 		.cache_size = 16,
813bf9d4e88SAaron Liu 		.cache_level = 1,
8145a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
815bf9d4e88SAaron Liu 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
816bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_DATA_CACHE |
817bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_SIMD_CACHE),
818bf9d4e88SAaron Liu 		.num_cu_shared = 1,
819bf9d4e88SAaron Liu 	},
820bf9d4e88SAaron Liu 	{
821bf9d4e88SAaron Liu 		/* Scalar L1 Instruction Cache per SQC */
822bf9d4e88SAaron Liu 		.cache_size = 32,
823bf9d4e88SAaron Liu 		.cache_level = 1,
8245a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
825bf9d4e88SAaron Liu 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
826bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_INST_CACHE |
827bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_SIMD_CACHE),
828bf9d4e88SAaron Liu 		.num_cu_shared = 2,
829bf9d4e88SAaron Liu 	},
830bf9d4e88SAaron Liu 	{
831bf9d4e88SAaron Liu 		/* Scalar L1 Data Cache per SQC */
832bf9d4e88SAaron Liu 		.cache_size = 16,
833bf9d4e88SAaron Liu 		.cache_level = 1,
8345a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
835bf9d4e88SAaron Liu 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
836bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_DATA_CACHE |
837bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_SIMD_CACHE),
838bf9d4e88SAaron Liu 		.num_cu_shared = 2,
839bf9d4e88SAaron Liu 	},
840bf9d4e88SAaron Liu 	{
841bf9d4e88SAaron Liu 		/* GL1 Data Cache per SA */
842bf9d4e88SAaron Liu 		.cache_size = 128,
843bf9d4e88SAaron Liu 		.cache_level = 1,
8445a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
845bf9d4e88SAaron Liu 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
846bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_DATA_CACHE |
847bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_SIMD_CACHE),
848bf9d4e88SAaron Liu 		.num_cu_shared = 6,
849bf9d4e88SAaron Liu 	},
850bf9d4e88SAaron Liu 	{
851bf9d4e88SAaron Liu 		/* L2 Data Cache per GPU (Total Tex Cache) */
852bf9d4e88SAaron Liu 		.cache_size = 2048,
853bf9d4e88SAaron Liu 		.cache_level = 2,
8545a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
855bf9d4e88SAaron Liu 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
856bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_DATA_CACHE |
857bf9d4e88SAaron Liu 				CRAT_CACHE_FLAGS_SIMD_CACHE),
858bf9d4e88SAaron Liu 		.num_cu_shared = 6,
859bf9d4e88SAaron Liu 	},
860bf9d4e88SAaron Liu };
861bf9d4e88SAaron Liu 
862a9232b06SPrike Liang static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
863a9232b06SPrike Liang 	{
864a9232b06SPrike Liang 		/* TCP L1 Cache per CU */
865a9232b06SPrike Liang 		.cache_size = 16,
866a9232b06SPrike Liang 		.cache_level = 1,
8675a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
868a9232b06SPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
869a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_DATA_CACHE |
870a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
871a9232b06SPrike Liang 		.num_cu_shared = 1,
872a9232b06SPrike Liang 	},
873a9232b06SPrike Liang 	{
874a9232b06SPrike Liang 		/* Scalar L1 Instruction Cache per SQC */
875a9232b06SPrike Liang 		.cache_size = 32,
876a9232b06SPrike Liang 		.cache_level = 1,
8775a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
878a9232b06SPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
879a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_INST_CACHE |
880a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
881a9232b06SPrike Liang 		.num_cu_shared = 2,
882a9232b06SPrike Liang 	},
883a9232b06SPrike Liang 	{
884a9232b06SPrike Liang 		/* Scalar L1 Data Cache per SQC */
885a9232b06SPrike Liang 		.cache_size = 16,
886a9232b06SPrike Liang 		.cache_level = 1,
8875a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
888a9232b06SPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
889a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_DATA_CACHE |
890a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
891a9232b06SPrike Liang 		.num_cu_shared = 2,
892a9232b06SPrike Liang 	},
893a9232b06SPrike Liang 	{
894a9232b06SPrike Liang 		/* GL1 Data Cache per SA */
895a9232b06SPrike Liang 		.cache_size = 128,
896a9232b06SPrike Liang 		.cache_level = 1,
8975a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
898a9232b06SPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
899a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_DATA_CACHE |
900a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
901a9232b06SPrike Liang 		.num_cu_shared = 2,
902a9232b06SPrike Liang 	},
903a9232b06SPrike Liang 	{
904a9232b06SPrike Liang 		/* L2 Data Cache per GPU (Total Tex Cache) */
905a9232b06SPrike Liang 		.cache_size = 256,
906a9232b06SPrike Liang 		.cache_level = 2,
9075a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
908a9232b06SPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
909a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_DATA_CACHE |
910a9232b06SPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
911a9232b06SPrike Liang 		.num_cu_shared = 2,
912a9232b06SPrike Liang 	},
913a9232b06SPrike Liang };
914a9232b06SPrike Liang 
915d62eaddbSJesse Zhang static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
916d62eaddbSJesse Zhang 	{
917d62eaddbSJesse Zhang 		/* TCP L1 Cache per CU */
918d62eaddbSJesse Zhang 		.cache_size = 16,
919d62eaddbSJesse Zhang 		.cache_level = 1,
9205a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
921d62eaddbSJesse Zhang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
922d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_DATA_CACHE |
923d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
924d62eaddbSJesse Zhang 		.num_cu_shared = 1,
925d62eaddbSJesse Zhang 	},
926d62eaddbSJesse Zhang 	{
927d62eaddbSJesse Zhang 		/* Scalar L1 Instruction Cache per SQC */
928d62eaddbSJesse Zhang 		.cache_size = 32,
929d62eaddbSJesse Zhang 		.cache_level = 1,
9305a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
931d62eaddbSJesse Zhang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
932d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_INST_CACHE |
933d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
934d62eaddbSJesse Zhang 		.num_cu_shared = 2,
935d62eaddbSJesse Zhang 	},
936d62eaddbSJesse Zhang 	{
937d62eaddbSJesse Zhang 		/* Scalar L1 Data Cache per SQC */
938d62eaddbSJesse Zhang 		.cache_size = 16,
939d62eaddbSJesse Zhang 		.cache_level = 1,
9405a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
941d62eaddbSJesse Zhang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
942d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_DATA_CACHE |
943d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
944d62eaddbSJesse Zhang 		.num_cu_shared = 2,
945d62eaddbSJesse Zhang 	},
946d62eaddbSJesse Zhang 	{
947d62eaddbSJesse Zhang 		/* GL1 Data Cache per SA */
948d62eaddbSJesse Zhang 		.cache_size = 128,
949d62eaddbSJesse Zhang 		.cache_level = 1,
9505a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
951d62eaddbSJesse Zhang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
952d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_DATA_CACHE |
953d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
954d62eaddbSJesse Zhang 		.num_cu_shared = 2,
955d62eaddbSJesse Zhang 	},
956d62eaddbSJesse Zhang 	{
957d62eaddbSJesse Zhang 		/* L2 Data Cache per GPU (Total Tex Cache) */
958d62eaddbSJesse Zhang 		.cache_size = 256,
959d62eaddbSJesse Zhang 		.cache_level = 2,
9605a2df8ecSJoseph Greathouse 		.cache_line_size = 128,
961d62eaddbSJesse Zhang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
962d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_DATA_CACHE |
963d62eaddbSJesse Zhang 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
964d62eaddbSJesse Zhang 		.num_cu_shared = 2,
965d62eaddbSJesse Zhang 	},
966d62eaddbSJesse Zhang };
967d62eaddbSJesse Zhang 
968fd72e2cbSPrike Liang static struct kfd_gpu_cache_info dummy_cache_info[] = {
969fd72e2cbSPrike Liang 	{
970fd72e2cbSPrike Liang 		/* TCP L1 Cache per CU */
971fd72e2cbSPrike Liang 		.cache_size = 16,
972fd72e2cbSPrike Liang 		.cache_level = 1,
9735a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
974fd72e2cbSPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
975fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_DATA_CACHE |
976fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
977fd72e2cbSPrike Liang 		.num_cu_shared = 1,
978fd72e2cbSPrike Liang 	},
979fd72e2cbSPrike Liang 	{
980fd72e2cbSPrike Liang 		/* Scalar L1 Instruction Cache per SQC */
981fd72e2cbSPrike Liang 		.cache_size = 32,
982fd72e2cbSPrike Liang 		.cache_level = 1,
9835a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
984fd72e2cbSPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
985fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_INST_CACHE |
986fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
987fd72e2cbSPrike Liang 		.num_cu_shared = 2,
988fd72e2cbSPrike Liang 	},
989fd72e2cbSPrike Liang 	{
990fd72e2cbSPrike Liang 		/* Scalar L1 Data Cache per SQC */
991fd72e2cbSPrike Liang 		.cache_size = 16,
992fd72e2cbSPrike Liang 		.cache_level = 1,
9935a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
994fd72e2cbSPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
995fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_DATA_CACHE |
996fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
997fd72e2cbSPrike Liang 		.num_cu_shared = 2,
998fd72e2cbSPrike Liang 	},
999fd72e2cbSPrike Liang 	{
1000fd72e2cbSPrike Liang 		/* GL1 Data Cache per SA */
1001fd72e2cbSPrike Liang 		.cache_size = 128,
1002fd72e2cbSPrike Liang 		.cache_level = 1,
10035a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
1004fd72e2cbSPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
1005fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_DATA_CACHE |
1006fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
1007fd72e2cbSPrike Liang 		.num_cu_shared = 6,
1008fd72e2cbSPrike Liang 	},
1009fd72e2cbSPrike Liang 	{
1010fd72e2cbSPrike Liang 		/* L2 Data Cache per GPU (Total Tex Cache) */
1011fd72e2cbSPrike Liang 		.cache_size = 2048,
1012fd72e2cbSPrike Liang 		.cache_level = 2,
10135a2df8ecSJoseph Greathouse 		.cache_line_size = 64,
1014fd72e2cbSPrike Liang 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
1015fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_DATA_CACHE |
1016fd72e2cbSPrike Liang 				CRAT_CACHE_FLAGS_SIMD_CACHE),
1017fd72e2cbSPrike Liang 		.num_cu_shared = 6,
1018fd72e2cbSPrike Liang 	},
1019fd72e2cbSPrike Liang };
1020fd72e2cbSPrike Liang 
kfd_populated_cu_info_cpu(struct kfd_topology_device * dev,struct crat_subtype_computeunit * cu)1021174de876SFelix Kuehling static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
1022174de876SFelix Kuehling 		struct crat_subtype_computeunit *cu)
1023174de876SFelix Kuehling {
1024174de876SFelix Kuehling 	dev->node_props.cpu_cores_count = cu->num_cpu_cores;
1025174de876SFelix Kuehling 	dev->node_props.cpu_core_id_base = cu->processor_id_low;
1026174de876SFelix Kuehling 	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
1027174de876SFelix Kuehling 		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
1028174de876SFelix Kuehling 
102942aa8793SFelix Kuehling 	pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
1030174de876SFelix Kuehling 			cu->processor_id_low);
1031174de876SFelix Kuehling }
1032174de876SFelix Kuehling 
kfd_populated_cu_info_gpu(struct kfd_topology_device * dev,struct crat_subtype_computeunit * cu)1033174de876SFelix Kuehling static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
1034174de876SFelix Kuehling 		struct crat_subtype_computeunit *cu)
1035174de876SFelix Kuehling {
1036174de876SFelix Kuehling 	dev->node_props.simd_id_base = cu->processor_id_low;
1037174de876SFelix Kuehling 	dev->node_props.simd_count = cu->num_simd_cores;
1038174de876SFelix Kuehling 	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
1039174de876SFelix Kuehling 	dev->node_props.max_waves_per_simd = cu->max_waves_simd;
1040174de876SFelix Kuehling 	dev->node_props.wave_front_size = cu->wave_front_size;
10413a87177eSHarish Kasiviswanathan 	dev->node_props.array_count = cu->array_count;
1042174de876SFelix Kuehling 	dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
1043174de876SFelix Kuehling 	dev->node_props.simd_per_cu = cu->num_simd_per_cu;
1044174de876SFelix Kuehling 	dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
1045174de876SFelix Kuehling 	if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
1046174de876SFelix Kuehling 		dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
104742aa8793SFelix Kuehling 	pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);
1048174de876SFelix Kuehling }
1049174de876SFelix Kuehling 
10504f449311SHarish Kasiviswanathan /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
10514f449311SHarish Kasiviswanathan  * topology device present in the device_list
10524f449311SHarish Kasiviswanathan  */
kfd_parse_subtype_cu(struct crat_subtype_computeunit * cu,struct list_head * device_list)10534f449311SHarish Kasiviswanathan static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
10544f449311SHarish Kasiviswanathan 				struct list_head *device_list)
1055174de876SFelix Kuehling {
1056174de876SFelix Kuehling 	struct kfd_topology_device *dev;
1057174de876SFelix Kuehling 
105842aa8793SFelix Kuehling 	pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
1059174de876SFelix Kuehling 			cu->proximity_domain, cu->hsa_capability);
10604f449311SHarish Kasiviswanathan 	list_for_each_entry(dev, device_list, list) {
10614f449311SHarish Kasiviswanathan 		if (cu->proximity_domain == dev->proximity_domain) {
1062174de876SFelix Kuehling 			if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
1063174de876SFelix Kuehling 				kfd_populated_cu_info_cpu(dev, cu);
1064174de876SFelix Kuehling 
1065174de876SFelix Kuehling 			if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
1066174de876SFelix Kuehling 				kfd_populated_cu_info_gpu(dev, cu);
1067174de876SFelix Kuehling 			break;
1068174de876SFelix Kuehling 		}
1069174de876SFelix Kuehling 	}
1070174de876SFelix Kuehling 
1071174de876SFelix Kuehling 	return 0;
1072174de876SFelix Kuehling }
1073174de876SFelix Kuehling 
1074f3ed5df8SYong Zhao static struct kfd_mem_properties *
find_subtype_mem(uint32_t heap_type,uint32_t flags,uint32_t width,struct kfd_topology_device * dev)1075f3ed5df8SYong Zhao find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
1076f3ed5df8SYong Zhao 		struct kfd_topology_device *dev)
1077f3ed5df8SYong Zhao {
1078f3ed5df8SYong Zhao 	struct kfd_mem_properties *props;
1079f3ed5df8SYong Zhao 
1080f3ed5df8SYong Zhao 	list_for_each_entry(props, &dev->mem_props, list) {
1081f3ed5df8SYong Zhao 		if (props->heap_type == heap_type
1082f3ed5df8SYong Zhao 				&& props->flags == flags
1083f3ed5df8SYong Zhao 				&& props->width == width)
1084f3ed5df8SYong Zhao 			return props;
1085f3ed5df8SYong Zhao 	}
1086f3ed5df8SYong Zhao 
1087f3ed5df8SYong Zhao 	return NULL;
1088f3ed5df8SYong Zhao }
10894f449311SHarish Kasiviswanathan /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
10904f449311SHarish Kasiviswanathan  * topology device present in the device_list
1091174de876SFelix Kuehling  */
kfd_parse_subtype_mem(struct crat_subtype_memory * mem,struct list_head * device_list)10924f449311SHarish Kasiviswanathan static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
10934f449311SHarish Kasiviswanathan 				struct list_head *device_list)
1094174de876SFelix Kuehling {
1095174de876SFelix Kuehling 	struct kfd_mem_properties *props;
1096174de876SFelix Kuehling 	struct kfd_topology_device *dev;
1097f3ed5df8SYong Zhao 	uint32_t heap_type;
1098f3ed5df8SYong Zhao 	uint64_t size_in_bytes;
1099f3ed5df8SYong Zhao 	uint32_t flags = 0;
1100f3ed5df8SYong Zhao 	uint32_t width;
1101174de876SFelix Kuehling 
110242aa8793SFelix Kuehling 	pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
1103174de876SFelix Kuehling 			mem->proximity_domain);
11044f449311SHarish Kasiviswanathan 	list_for_each_entry(dev, device_list, list) {
11054f449311SHarish Kasiviswanathan 		if (mem->proximity_domain == dev->proximity_domain) {
11063a87177eSHarish Kasiviswanathan 			/* We're on GPU node */
11073a87177eSHarish Kasiviswanathan 			if (dev->node_props.cpu_cores_count == 0) {
11083a87177eSHarish Kasiviswanathan 				/* APU */
11093a87177eSHarish Kasiviswanathan 				if (mem->visibility_type == 0)
1110f3ed5df8SYong Zhao 					heap_type =
11113a87177eSHarish Kasiviswanathan 						HSA_MEM_HEAP_TYPE_FB_PRIVATE;
11123a87177eSHarish Kasiviswanathan 				/* dGPU */
1113174de876SFelix Kuehling 				else
1114f3ed5df8SYong Zhao 					heap_type = mem->visibility_type;
11153a87177eSHarish Kasiviswanathan 			} else
1116f3ed5df8SYong Zhao 				heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
1117174de876SFelix Kuehling 
1118174de876SFelix Kuehling 			if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
1119f3ed5df8SYong Zhao 				flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
1120174de876SFelix Kuehling 			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
1121f3ed5df8SYong Zhao 				flags |= HSA_MEM_FLAGS_NON_VOLATILE;
1122174de876SFelix Kuehling 
1123f3ed5df8SYong Zhao 			size_in_bytes =
1124174de876SFelix Kuehling 				((uint64_t)mem->length_high << 32) +
1125174de876SFelix Kuehling 							mem->length_low;
1126f3ed5df8SYong Zhao 			width = mem->width;
1127f3ed5df8SYong Zhao 
1128f3ed5df8SYong Zhao 			/* Multiple banks of the same type are aggregated into
1129f3ed5df8SYong Zhao 			 * one. User mode doesn't care about multiple physical
1130f3ed5df8SYong Zhao 			 * memory segments. It's managed as a single virtual
1131f3ed5df8SYong Zhao 			 * heap for user mode.
1132f3ed5df8SYong Zhao 			 */
1133f3ed5df8SYong Zhao 			props = find_subtype_mem(heap_type, flags, width, dev);
1134f3ed5df8SYong Zhao 			if (props) {
1135f3ed5df8SYong Zhao 				props->size_in_bytes += size_in_bytes;
1136f3ed5df8SYong Zhao 				break;
1137f3ed5df8SYong Zhao 			}
1138f3ed5df8SYong Zhao 
1139f3ed5df8SYong Zhao 			props = kfd_alloc_struct(props);
1140f3ed5df8SYong Zhao 			if (!props)
1141f3ed5df8SYong Zhao 				return -ENOMEM;
1142f3ed5df8SYong Zhao 
1143f3ed5df8SYong Zhao 			props->heap_type = heap_type;
1144f3ed5df8SYong Zhao 			props->flags = flags;
1145f3ed5df8SYong Zhao 			props->size_in_bytes = size_in_bytes;
1146f3ed5df8SYong Zhao 			props->width = width;
1147174de876SFelix Kuehling 
1148175b9263SFelix Kuehling 			dev->node_props.mem_banks_count++;
1149174de876SFelix Kuehling 			list_add_tail(&props->list, &dev->mem_props);
1150174de876SFelix Kuehling 
1151174de876SFelix Kuehling 			break;
1152174de876SFelix Kuehling 		}
1153174de876SFelix Kuehling 	}
1154174de876SFelix Kuehling 
1155174de876SFelix Kuehling 	return 0;
1156174de876SFelix Kuehling }
1157174de876SFelix Kuehling 
11584f449311SHarish Kasiviswanathan /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
11594f449311SHarish Kasiviswanathan  * topology device present in the device_list
1160174de876SFelix Kuehling  */
kfd_parse_subtype_cache(struct crat_subtype_cache * cache,struct list_head * device_list)11614f449311SHarish Kasiviswanathan static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
11624f449311SHarish Kasiviswanathan 			struct list_head *device_list)
1163174de876SFelix Kuehling {
1164174de876SFelix Kuehling 	struct kfd_cache_properties *props;
1165174de876SFelix Kuehling 	struct kfd_topology_device *dev;
1166174de876SFelix Kuehling 	uint32_t id;
11673a87177eSHarish Kasiviswanathan 	uint32_t total_num_of_cu;
1168174de876SFelix Kuehling 
1169174de876SFelix Kuehling 	id = cache->processor_id_low;
1170174de876SFelix Kuehling 
117142aa8793SFelix Kuehling 	pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);
11723a87177eSHarish Kasiviswanathan 	list_for_each_entry(dev, device_list, list) {
11733a87177eSHarish Kasiviswanathan 		total_num_of_cu = (dev->node_props.array_count *
11743a87177eSHarish Kasiviswanathan 					dev->node_props.cu_per_simd_array);
11753a87177eSHarish Kasiviswanathan 
11763a87177eSHarish Kasiviswanathan 		/* Cache infomration in CRAT doesn't have proximity_domain
11773a87177eSHarish Kasiviswanathan 		 * information as it is associated with a CPU core or GPU
11783a87177eSHarish Kasiviswanathan 		 * Compute Unit. So map the cache using CPU core Id or SIMD
11793a87177eSHarish Kasiviswanathan 		 * (GPU) ID.
11803a87177eSHarish Kasiviswanathan 		 * TODO: This works because currently we can safely assume that
11813a87177eSHarish Kasiviswanathan 		 *  Compute Units are parsed before caches are parsed. In
11823a87177eSHarish Kasiviswanathan 		 *  future, remove this dependency
11833a87177eSHarish Kasiviswanathan 		 */
11843a87177eSHarish Kasiviswanathan 		if ((id >= dev->node_props.cpu_core_id_base &&
11853a87177eSHarish Kasiviswanathan 			id <= dev->node_props.cpu_core_id_base +
11863a87177eSHarish Kasiviswanathan 				dev->node_props.cpu_cores_count) ||
11873a87177eSHarish Kasiviswanathan 			(id >= dev->node_props.simd_id_base &&
11883a87177eSHarish Kasiviswanathan 			id < dev->node_props.simd_id_base +
11893a87177eSHarish Kasiviswanathan 				total_num_of_cu)) {
1190174de876SFelix Kuehling 			props = kfd_alloc_struct(props);
1191174de876SFelix Kuehling 			if (!props)
1192174de876SFelix Kuehling 				return -ENOMEM;
1193174de876SFelix Kuehling 
1194174de876SFelix Kuehling 			props->processor_id_low = id;
1195174de876SFelix Kuehling 			props->cache_level = cache->cache_level;
1196174de876SFelix Kuehling 			props->cache_size = cache->cache_size;
1197174de876SFelix Kuehling 			props->cacheline_size = cache->cache_line_size;
1198174de876SFelix Kuehling 			props->cachelines_per_tag = cache->lines_per_tag;
1199174de876SFelix Kuehling 			props->cache_assoc = cache->associativity;
1200174de876SFelix Kuehling 			props->cache_latency = cache->cache_latency;
1201c0cc999fSMa Jun 
12023a87177eSHarish Kasiviswanathan 			memcpy(props->sibling_map, cache->sibling_map,
12034cc16d64SMa Jun 					CRAT_SIBLINGMAP_SIZE);
1204174de876SFelix Kuehling 
1205c0cc999fSMa Jun 			/* set the sibling_map_size as 32 for CRAT from ACPI */
1206c0cc999fSMa Jun 			props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
1207c0cc999fSMa Jun 
1208174de876SFelix Kuehling 			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
1209174de876SFelix Kuehling 				props->cache_type |= HSA_CACHE_TYPE_DATA;
1210174de876SFelix Kuehling 			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
1211174de876SFelix Kuehling 				props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
1212174de876SFelix Kuehling 			if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
1213174de876SFelix Kuehling 				props->cache_type |= HSA_CACHE_TYPE_CPU;
1214174de876SFelix Kuehling 			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
1215174de876SFelix Kuehling 				props->cache_type |= HSA_CACHE_TYPE_HSACU;
1216174de876SFelix Kuehling 
1217174de876SFelix Kuehling 			dev->node_props.caches_count++;
1218174de876SFelix Kuehling 			list_add_tail(&props->list, &dev->cache_props);
1219174de876SFelix Kuehling 
1220174de876SFelix Kuehling 			break;
1221174de876SFelix Kuehling 		}
12223a87177eSHarish Kasiviswanathan 	}
1223174de876SFelix Kuehling 
1224174de876SFelix Kuehling 	return 0;
1225174de876SFelix Kuehling }
1226174de876SFelix Kuehling 
12274f449311SHarish Kasiviswanathan /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
12284f449311SHarish Kasiviswanathan  * topology device present in the device_list
1229174de876SFelix Kuehling  */
kfd_parse_subtype_iolink(struct crat_subtype_iolink * iolink,struct list_head * device_list)12304f449311SHarish Kasiviswanathan static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
12314f449311SHarish Kasiviswanathan 					struct list_head *device_list)
1232174de876SFelix Kuehling {
12333a87177eSHarish Kasiviswanathan 	struct kfd_iolink_properties *props = NULL, *props2;
1234ae9a25aeSShaoyun Liu 	struct kfd_topology_device *dev, *to_dev;
1235174de876SFelix Kuehling 	uint32_t id_from;
1236174de876SFelix Kuehling 	uint32_t id_to;
1237174de876SFelix Kuehling 
1238174de876SFelix Kuehling 	id_from = iolink->proximity_domain_from;
1239174de876SFelix Kuehling 	id_to = iolink->proximity_domain_to;
1240174de876SFelix Kuehling 
124167f7cf9fSshaoyunl 	pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
124267f7cf9fSshaoyunl 			id_from, id_to);
12434f449311SHarish Kasiviswanathan 	list_for_each_entry(dev, device_list, list) {
12444f449311SHarish Kasiviswanathan 		if (id_from == dev->proximity_domain) {
1245174de876SFelix Kuehling 			props = kfd_alloc_struct(props);
1246174de876SFelix Kuehling 			if (!props)
1247174de876SFelix Kuehling 				return -ENOMEM;
1248174de876SFelix Kuehling 
1249174de876SFelix Kuehling 			props->node_from = id_from;
1250174de876SFelix Kuehling 			props->node_to = id_to;
1251174de876SFelix Kuehling 			props->ver_maj = iolink->version_major;
1252174de876SFelix Kuehling 			props->ver_min = iolink->version_minor;
12533a87177eSHarish Kasiviswanathan 			props->iolink_type = iolink->io_interface_type;
1254174de876SFelix Kuehling 
12553a87177eSHarish Kasiviswanathan 			if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
12563a87177eSHarish Kasiviswanathan 				props->weight = 20;
1257ae9a25aeSShaoyun Liu 			else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
125892085240SJonathan Kim 				props->weight = iolink->weight_xgmi;
12593a87177eSHarish Kasiviswanathan 			else
12603a87177eSHarish Kasiviswanathan 				props->weight = node_distance(id_from, id_to);
1261174de876SFelix Kuehling 
1262174de876SFelix Kuehling 			props->min_latency = iolink->minimum_latency;
1263174de876SFelix Kuehling 			props->max_latency = iolink->maximum_latency;
1264174de876SFelix Kuehling 			props->min_bandwidth = iolink->minimum_bandwidth_mbs;
1265174de876SFelix Kuehling 			props->max_bandwidth = iolink->maximum_bandwidth_mbs;
1266174de876SFelix Kuehling 			props->rec_transfer_size =
1267174de876SFelix Kuehling 					iolink->recommended_transfer_size;
1268174de876SFelix Kuehling 
1269174de876SFelix Kuehling 			dev->node_props.io_links_count++;
1270174de876SFelix Kuehling 			list_add_tail(&props->list, &dev->io_link_props);
1271174de876SFelix Kuehling 			break;
1272174de876SFelix Kuehling 		}
1273174de876SFelix Kuehling 	}
1274174de876SFelix Kuehling 
12753a87177eSHarish Kasiviswanathan 	/* CPU topology is created before GPUs are detected, so CPU->GPU
12763a87177eSHarish Kasiviswanathan 	 * links are not built at that time. If a PCIe type is discovered, it
12773a87177eSHarish Kasiviswanathan 	 * means a GPU is detected and we are adding GPU->CPU to the topology.
127867f7cf9fSshaoyunl 	 * At this time, also add the corresponded CPU->GPU link if GPU
127967f7cf9fSshaoyunl 	 * is large bar.
1280ae9a25aeSShaoyun Liu 	 * For xGMI, we only added the link with one direction in the crat
1281ae9a25aeSShaoyun Liu 	 * table, add corresponded reversed direction link now.
12823a87177eSHarish Kasiviswanathan 	 */
128367f7cf9fSshaoyunl 	if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
128446d18d51SMukul Joshi 		to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
1285ae9a25aeSShaoyun Liu 		if (!to_dev)
12863a87177eSHarish Kasiviswanathan 			return -ENODEV;
12873a87177eSHarish Kasiviswanathan 		/* same everything but the other direction */
12883a87177eSHarish Kasiviswanathan 		props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
1289abfaf0eeSJiasheng Jiang 		if (!props2)
1290abfaf0eeSJiasheng Jiang 			return -ENOMEM;
1291abfaf0eeSJiasheng Jiang 
12923a87177eSHarish Kasiviswanathan 		props2->node_from = id_to;
12933a87177eSHarish Kasiviswanathan 		props2->node_to = id_from;
12943a87177eSHarish Kasiviswanathan 		props2->kobj = NULL;
1295ae9a25aeSShaoyun Liu 		to_dev->node_props.io_links_count++;
1296ae9a25aeSShaoyun Liu 		list_add_tail(&props2->list, &to_dev->io_link_props);
12973a87177eSHarish Kasiviswanathan 	}
12983a87177eSHarish Kasiviswanathan 
1299174de876SFelix Kuehling 	return 0;
1300174de876SFelix Kuehling }
1301174de876SFelix Kuehling 
13024f449311SHarish Kasiviswanathan /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
13034f449311SHarish Kasiviswanathan  * present in the device_list
13044f449311SHarish Kasiviswanathan  *	@sub_type_hdr - subtype section of crat_image
13054f449311SHarish Kasiviswanathan  *	@device_list - list of topology devices present in this crat_image
13064f449311SHarish Kasiviswanathan  */
kfd_parse_subtype(struct crat_subtype_generic * sub_type_hdr,struct list_head * device_list)13074f449311SHarish Kasiviswanathan static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
13084f449311SHarish Kasiviswanathan 				struct list_head *device_list)
1309174de876SFelix Kuehling {
1310174de876SFelix Kuehling 	struct crat_subtype_computeunit *cu;
1311174de876SFelix Kuehling 	struct crat_subtype_memory *mem;
1312174de876SFelix Kuehling 	struct crat_subtype_cache *cache;
1313174de876SFelix Kuehling 	struct crat_subtype_iolink *iolink;
1314174de876SFelix Kuehling 	int ret = 0;
1315174de876SFelix Kuehling 
1316174de876SFelix Kuehling 	switch (sub_type_hdr->type) {
1317174de876SFelix Kuehling 	case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
1318174de876SFelix Kuehling 		cu = (struct crat_subtype_computeunit *)sub_type_hdr;
13194f449311SHarish Kasiviswanathan 		ret = kfd_parse_subtype_cu(cu, device_list);
1320174de876SFelix Kuehling 		break;
1321174de876SFelix Kuehling 	case CRAT_SUBTYPE_MEMORY_AFFINITY:
1322174de876SFelix Kuehling 		mem = (struct crat_subtype_memory *)sub_type_hdr;
13234f449311SHarish Kasiviswanathan 		ret = kfd_parse_subtype_mem(mem, device_list);
1324174de876SFelix Kuehling 		break;
1325174de876SFelix Kuehling 	case CRAT_SUBTYPE_CACHE_AFFINITY:
1326174de876SFelix Kuehling 		cache = (struct crat_subtype_cache *)sub_type_hdr;
13274f449311SHarish Kasiviswanathan 		ret = kfd_parse_subtype_cache(cache, device_list);
1328174de876SFelix Kuehling 		break;
1329174de876SFelix Kuehling 	case CRAT_SUBTYPE_TLB_AFFINITY:
1330174de876SFelix Kuehling 		/*
1331174de876SFelix Kuehling 		 * For now, nothing to do here
1332174de876SFelix Kuehling 		 */
133342aa8793SFelix Kuehling 		pr_debug("Found TLB entry in CRAT table (not processing)\n");
1334174de876SFelix Kuehling 		break;
1335174de876SFelix Kuehling 	case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
1336174de876SFelix Kuehling 		/*
1337174de876SFelix Kuehling 		 * For now, nothing to do here
1338174de876SFelix Kuehling 		 */
133942aa8793SFelix Kuehling 		pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
1340174de876SFelix Kuehling 		break;
1341174de876SFelix Kuehling 	case CRAT_SUBTYPE_IOLINK_AFFINITY:
1342174de876SFelix Kuehling 		iolink = (struct crat_subtype_iolink *)sub_type_hdr;
13434f449311SHarish Kasiviswanathan 		ret = kfd_parse_subtype_iolink(iolink, device_list);
1344174de876SFelix Kuehling 		break;
1345174de876SFelix Kuehling 	default:
1346174de876SFelix Kuehling 		pr_warn("Unknown subtype %d in CRAT\n",
1347174de876SFelix Kuehling 				sub_type_hdr->type);
1348174de876SFelix Kuehling 	}
1349174de876SFelix Kuehling 
1350174de876SFelix Kuehling 	return ret;
1351174de876SFelix Kuehling }
1352174de876SFelix Kuehling 
13534f449311SHarish Kasiviswanathan /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
13544f449311SHarish Kasiviswanathan  * create a kfd_topology_device and add in to device_list. Also parse
13554f449311SHarish Kasiviswanathan  * CRAT subtypes and attach it to appropriate kfd_topology_device
13564f449311SHarish Kasiviswanathan  *	@crat_image - input image containing CRAT
13574f449311SHarish Kasiviswanathan  *	@device_list - [OUT] list of kfd_topology_device generated after
13584f449311SHarish Kasiviswanathan  *		       parsing crat_image
13594f449311SHarish Kasiviswanathan  *	@proximity_domain - Proximity domain of the first device in the table
13604f449311SHarish Kasiviswanathan  *
13614f449311SHarish Kasiviswanathan  *	Return - 0 if successful else -ve value
13624f449311SHarish Kasiviswanathan  */
kfd_parse_crat_table(void * crat_image,struct list_head * device_list,uint32_t proximity_domain)13634f449311SHarish Kasiviswanathan int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
13644f449311SHarish Kasiviswanathan 			 uint32_t proximity_domain)
1365174de876SFelix Kuehling {
1366520b8fb7SFelix Kuehling 	struct kfd_topology_device *top_dev = NULL;
1367174de876SFelix Kuehling 	struct crat_subtype_generic *sub_type_hdr;
1368174de876SFelix Kuehling 	uint16_t node_id;
13694f449311SHarish Kasiviswanathan 	int ret = 0;
1370174de876SFelix Kuehling 	struct crat_header *crat_table = (struct crat_header *)crat_image;
1371174de876SFelix Kuehling 	uint16_t num_nodes;
1372174de876SFelix Kuehling 	uint32_t image_len;
1373174de876SFelix Kuehling 
1374174de876SFelix Kuehling 	if (!crat_image)
1375174de876SFelix Kuehling 		return -EINVAL;
1376174de876SFelix Kuehling 
13774f449311SHarish Kasiviswanathan 	if (!list_empty(device_list)) {
13784f449311SHarish Kasiviswanathan 		pr_warn("Error device list should be empty\n");
13794f449311SHarish Kasiviswanathan 		return -EINVAL;
13804f449311SHarish Kasiviswanathan 	}
13814f449311SHarish Kasiviswanathan 
1382174de876SFelix Kuehling 	num_nodes = crat_table->num_domains;
1383174de876SFelix Kuehling 	image_len = crat_table->length;
1384174de876SFelix Kuehling 
1385de430916SYong Zhao 	pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
1386174de876SFelix Kuehling 
1387174de876SFelix Kuehling 	for (node_id = 0; node_id < num_nodes; node_id++) {
13884f449311SHarish Kasiviswanathan 		top_dev = kfd_create_topology_device(device_list);
13894f449311SHarish Kasiviswanathan 		if (!top_dev)
13904f449311SHarish Kasiviswanathan 			break;
13914f449311SHarish Kasiviswanathan 		top_dev->proximity_domain = proximity_domain++;
1392174de876SFelix Kuehling 	}
13934f449311SHarish Kasiviswanathan 
13944f449311SHarish Kasiviswanathan 	if (!top_dev) {
13954f449311SHarish Kasiviswanathan 		ret = -ENOMEM;
13964f449311SHarish Kasiviswanathan 		goto err;
1397174de876SFelix Kuehling 	}
1398174de876SFelix Kuehling 
1399520b8fb7SFelix Kuehling 	memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
1400520b8fb7SFelix Kuehling 	memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
1401520b8fb7SFelix Kuehling 			CRAT_OEMTABLEID_LENGTH);
1402520b8fb7SFelix Kuehling 	top_dev->oem_revision = crat_table->oem_revision;
1403174de876SFelix Kuehling 
1404174de876SFelix Kuehling 	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1405174de876SFelix Kuehling 	while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
1406174de876SFelix Kuehling 			((char *)crat_image) + image_len) {
1407174de876SFelix Kuehling 		if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
14084f449311SHarish Kasiviswanathan 			ret = kfd_parse_subtype(sub_type_hdr, device_list);
14094f449311SHarish Kasiviswanathan 			if (ret)
14104f449311SHarish Kasiviswanathan 				break;
1411174de876SFelix Kuehling 		}
1412174de876SFelix Kuehling 
1413174de876SFelix Kuehling 		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1414174de876SFelix Kuehling 				sub_type_hdr->length);
1415174de876SFelix Kuehling 	}
1416174de876SFelix Kuehling 
14174f449311SHarish Kasiviswanathan err:
14184f449311SHarish Kasiviswanathan 	if (ret)
14194f449311SHarish Kasiviswanathan 		kfd_release_topology_device_list(device_list);
1420174de876SFelix Kuehling 
14214f449311SHarish Kasiviswanathan 	return ret;
1422174de876SFelix Kuehling }
1423174de876SFelix Kuehling 
1424cc009e61SMukul Joshi 
kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev * kdev,bool cache_line_size_missing,struct kfd_gpu_cache_info * pcache_info)14253b9186faSAlex Deucher static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
1426321048c4SHarish Kasiviswanathan 						   bool cache_line_size_missing,
14273b9186faSAlex Deucher 						   struct kfd_gpu_cache_info *pcache_info)
14283b9186faSAlex Deucher {
14293b9186faSAlex Deucher 	struct amdgpu_device *adev = kdev->adev;
14303b9186faSAlex Deucher 	int i = 0;
14313b9186faSAlex Deucher 
14323b9186faSAlex Deucher 	/* TCP L1 Cache per CU */
14333b9186faSAlex Deucher 	if (adev->gfx.config.gc_tcp_l1_size) {
14343b9186faSAlex Deucher 		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
14353b9186faSAlex Deucher 		pcache_info[i].cache_level = 1;
14363b9186faSAlex Deucher 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
14373b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_DATA_CACHE |
14383b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_SIMD_CACHE);
14394e9fadacSDavid Belanger 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
14404e9fadacSDavid Belanger 		pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
1441321048c4SHarish Kasiviswanathan 		if (cache_line_size_missing && !pcache_info[i].cache_line_size)
1442321048c4SHarish Kasiviswanathan 			pcache_info[i].cache_line_size = 128;
14433b9186faSAlex Deucher 		i++;
14443b9186faSAlex Deucher 	}
14453b9186faSAlex Deucher 	/* Scalar L1 Instruction Cache per SQC */
14463b9186faSAlex Deucher 	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
14473b9186faSAlex Deucher 		pcache_info[i].cache_size =
14483b9186faSAlex Deucher 			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
14493b9186faSAlex Deucher 		pcache_info[i].cache_level = 1;
14503b9186faSAlex Deucher 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
14513b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_INST_CACHE |
14523b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_SIMD_CACHE);
14533b9186faSAlex Deucher 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
14544e9fadacSDavid Belanger 		pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
1455321048c4SHarish Kasiviswanathan 		if (cache_line_size_missing && !pcache_info[i].cache_line_size)
1456321048c4SHarish Kasiviswanathan 			pcache_info[i].cache_line_size = 128;
14573b9186faSAlex Deucher 		i++;
14583b9186faSAlex Deucher 	}
14593b9186faSAlex Deucher 	/* Scalar L1 Data Cache per SQC */
14603b9186faSAlex Deucher 	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
14613b9186faSAlex Deucher 		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
14623b9186faSAlex Deucher 		pcache_info[i].cache_level = 1;
14633b9186faSAlex Deucher 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
14643b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_DATA_CACHE |
14653b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_SIMD_CACHE);
14663b9186faSAlex Deucher 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
14674e9fadacSDavid Belanger 		pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
1468321048c4SHarish Kasiviswanathan 		if (cache_line_size_missing && !pcache_info[i].cache_line_size)
1469321048c4SHarish Kasiviswanathan 			pcache_info[i].cache_line_size = 64;
14703b9186faSAlex Deucher 		i++;
14713b9186faSAlex Deucher 	}
14723b9186faSAlex Deucher 	/* GL1 Data Cache per SA */
14733b9186faSAlex Deucher 	if (adev->gfx.config.gc_gl1c_per_sa &&
14743b9186faSAlex Deucher 	    adev->gfx.config.gc_gl1c_size_per_instance) {
14753b9186faSAlex Deucher 		pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
14763b9186faSAlex Deucher 			adev->gfx.config.gc_gl1c_size_per_instance;
14773b9186faSAlex Deucher 		pcache_info[i].cache_level = 1;
14783b9186faSAlex Deucher 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
14793b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_DATA_CACHE |
14803b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_SIMD_CACHE);
14813b9186faSAlex Deucher 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1482321048c4SHarish Kasiviswanathan 		if (cache_line_size_missing)
1483321048c4SHarish Kasiviswanathan 			pcache_info[i].cache_line_size = 128;
14843b9186faSAlex Deucher 		i++;
14853b9186faSAlex Deucher 	}
14863b9186faSAlex Deucher 	/* L2 Data Cache per GPU (Total Tex Cache) */
14873b9186faSAlex Deucher 	if (adev->gfx.config.gc_gl2c_per_gpu) {
14883b9186faSAlex Deucher 		pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
14893b9186faSAlex Deucher 		pcache_info[i].cache_level = 2;
14903b9186faSAlex Deucher 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
14913b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_DATA_CACHE |
14923b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_SIMD_CACHE);
14933b9186faSAlex Deucher 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
14944e9fadacSDavid Belanger 		pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
1495321048c4SHarish Kasiviswanathan 		if (cache_line_size_missing && !pcache_info[i].cache_line_size)
1496321048c4SHarish Kasiviswanathan 			pcache_info[i].cache_line_size = 128;
14973b9186faSAlex Deucher 		i++;
14983b9186faSAlex Deucher 	}
14993b9186faSAlex Deucher 	/* L3 Data Cache per GPU */
15003b9186faSAlex Deucher 	if (adev->gmc.mall_size) {
15013b9186faSAlex Deucher 		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
15023b9186faSAlex Deucher 		pcache_info[i].cache_level = 3;
15033b9186faSAlex Deucher 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
15043b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_DATA_CACHE |
15053b9186faSAlex Deucher 					CRAT_CACHE_FLAGS_SIMD_CACHE);
15063b9186faSAlex Deucher 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1507d50bf3f0SHarish Kasiviswanathan 		pcache_info[i].cache_line_size = 64;
15083b9186faSAlex Deucher 		i++;
15093b9186faSAlex Deucher 	}
15103b9186faSAlex Deucher 	return i;
15113b9186faSAlex Deucher }
15123b9186faSAlex Deucher 
kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev * kdev,struct kfd_gpu_cache_info * pcache_info)15130ce8edaeSMukul Joshi static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
15140ce8edaeSMukul Joshi 						   struct kfd_gpu_cache_info *pcache_info)
15150ce8edaeSMukul Joshi {
15160ce8edaeSMukul Joshi 	struct amdgpu_device *adev = kdev->adev;
15170ce8edaeSMukul Joshi 	int i = 0;
15180ce8edaeSMukul Joshi 
15190ce8edaeSMukul Joshi 	/* TCP L1 Cache per CU */
15200ce8edaeSMukul Joshi 	if (adev->gfx.config.gc_tcp_size_per_cu) {
15210ce8edaeSMukul Joshi 		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
15220ce8edaeSMukul Joshi 		pcache_info[i].cache_level = 1;
152355ed120dSDavid Yat Sin 		/* Cacheline size not available in IP discovery for gc943,gc944 */
152455ed120dSDavid Yat Sin 		pcache_info[i].cache_line_size = 128;
15250ce8edaeSMukul Joshi 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
15260ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_DATA_CACHE |
15270ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_SIMD_CACHE);
15280ce8edaeSMukul Joshi 		pcache_info[i].num_cu_shared = 1;
15290ce8edaeSMukul Joshi 		i++;
15300ce8edaeSMukul Joshi 	}
15310ce8edaeSMukul Joshi 	/* Scalar L1 Instruction Cache per SQC */
15320ce8edaeSMukul Joshi 	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
15330ce8edaeSMukul Joshi 		pcache_info[i].cache_size =
15340ce8edaeSMukul Joshi 			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
15350ce8edaeSMukul Joshi 		pcache_info[i].cache_level = 1;
153655ed120dSDavid Yat Sin 		pcache_info[i].cache_line_size = 64;
15370ce8edaeSMukul Joshi 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
15380ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_INST_CACHE |
15390ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_SIMD_CACHE);
15400ce8edaeSMukul Joshi 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
15410ce8edaeSMukul Joshi 		i++;
15420ce8edaeSMukul Joshi 	}
15430ce8edaeSMukul Joshi 	/* Scalar L1 Data Cache per SQC */
15440ce8edaeSMukul Joshi 	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
15450ce8edaeSMukul Joshi 		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
15460ce8edaeSMukul Joshi 		pcache_info[i].cache_level = 1;
154755ed120dSDavid Yat Sin 		pcache_info[i].cache_line_size = 64;
15480ce8edaeSMukul Joshi 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
15490ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_DATA_CACHE |
15500ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_SIMD_CACHE);
15510ce8edaeSMukul Joshi 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
15520ce8edaeSMukul Joshi 		i++;
15530ce8edaeSMukul Joshi 	}
15540ce8edaeSMukul Joshi 	/* L2 Data Cache per GPU (Total Tex Cache) */
15550ce8edaeSMukul Joshi 	if (adev->gfx.config.gc_tcc_size) {
15560ce8edaeSMukul Joshi 		pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
15570ce8edaeSMukul Joshi 		pcache_info[i].cache_level = 2;
155855ed120dSDavid Yat Sin 		pcache_info[i].cache_line_size = 128;
15590ce8edaeSMukul Joshi 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
15600ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_DATA_CACHE |
15610ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_SIMD_CACHE);
15620ce8edaeSMukul Joshi 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
15630ce8edaeSMukul Joshi 		i++;
15640ce8edaeSMukul Joshi 	}
15650ce8edaeSMukul Joshi 	/* L3 Data Cache per GPU */
15660ce8edaeSMukul Joshi 	if (adev->gmc.mall_size) {
15670ce8edaeSMukul Joshi 		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
15680ce8edaeSMukul Joshi 		pcache_info[i].cache_level = 3;
156955ed120dSDavid Yat Sin 		pcache_info[i].cache_line_size = 64;
15700ce8edaeSMukul Joshi 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
15710ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_DATA_CACHE |
15720ce8edaeSMukul Joshi 					CRAT_CACHE_FLAGS_SIMD_CACHE);
15730ce8edaeSMukul Joshi 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
15740ce8edaeSMukul Joshi 		i++;
15750ce8edaeSMukul Joshi 	}
15760ce8edaeSMukul Joshi 	return i;
15770ce8edaeSMukul Joshi }
15780ce8edaeSMukul Joshi 
kfd_get_gpu_cache_info(struct kfd_node * kdev,struct kfd_gpu_cache_info ** pcache_info)15798dc1db31SMukul Joshi int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
15803a87177eSHarish Kasiviswanathan {
15813a87177eSHarish Kasiviswanathan 	int num_of_cache_types = 0;
1582321048c4SHarish Kasiviswanathan 	bool cache_line_size_missing = false;
15833a87177eSHarish Kasiviswanathan 
15847eb0502aSGraham Sider 	switch (kdev->adev->asic_type) {
15853a87177eSHarish Kasiviswanathan 	case CHIP_KAVERI:
1586c0cc999fSMa Jun 		*pcache_info = kaveri_cache_info;
15873a87177eSHarish Kasiviswanathan 		num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
15883a87177eSHarish Kasiviswanathan 		break;
15893a87177eSHarish Kasiviswanathan 	case CHIP_HAWAII:
1590c0cc999fSMa Jun 		*pcache_info = hawaii_cache_info;
15913a87177eSHarish Kasiviswanathan 		num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
15923a87177eSHarish Kasiviswanathan 		break;
15933a87177eSHarish Kasiviswanathan 	case CHIP_CARRIZO:
1594c0cc999fSMa Jun 		*pcache_info = carrizo_cache_info;
15953a87177eSHarish Kasiviswanathan 		num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
15963a87177eSHarish Kasiviswanathan 		break;
15973a87177eSHarish Kasiviswanathan 	case CHIP_TONGA:
1598c0cc999fSMa Jun 		*pcache_info = tonga_cache_info;
15993a87177eSHarish Kasiviswanathan 		num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
16003a87177eSHarish Kasiviswanathan 		break;
16013a87177eSHarish Kasiviswanathan 	case CHIP_FIJI:
1602c0cc999fSMa Jun 		*pcache_info = fiji_cache_info;
16033a87177eSHarish Kasiviswanathan 		num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
16043a87177eSHarish Kasiviswanathan 		break;
16053a87177eSHarish Kasiviswanathan 	case CHIP_POLARIS10:
1606c0cc999fSMa Jun 		*pcache_info = polaris10_cache_info;
16073a87177eSHarish Kasiviswanathan 		num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
16083a87177eSHarish Kasiviswanathan 		break;
16093a87177eSHarish Kasiviswanathan 	case CHIP_POLARIS11:
1610c0cc999fSMa Jun 		*pcache_info = polaris11_cache_info;
16113a87177eSHarish Kasiviswanathan 		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
16123a87177eSHarish Kasiviswanathan 		break;
1613846a44d7SGang Ba 	case CHIP_POLARIS12:
1614c0cc999fSMa Jun 		*pcache_info = polaris12_cache_info;
1615846a44d7SGang Ba 		num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
1616846a44d7SGang Ba 		break;
1617ed81cd6eSKent Russell 	case CHIP_VEGAM:
1618c0cc999fSMa Jun 		*pcache_info = vegam_cache_info;
1619ed81cd6eSKent Russell 		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
1620ed81cd6eSKent Russell 		break;
1621e4804a39SGraham Sider 	default:
1622e4804a39SGraham Sider 		switch (KFD_GC_VERSION(kdev)) {
1623e4804a39SGraham Sider 		case IP_VERSION(9, 0, 1):
1624c0cc999fSMa Jun 			*pcache_info = vega10_cache_info;
1625389056e5SFelix Kuehling 			num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
1626389056e5SFelix Kuehling 			break;
1627e4804a39SGraham Sider 		case IP_VERSION(9, 2, 1):
1628c0cc999fSMa Jun 			*pcache_info = vega12_cache_info;
162974abbdedSMike Li 			num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
163074abbdedSMike Li 			break;
1631e4804a39SGraham Sider 		case IP_VERSION(9, 4, 0):
1632e4804a39SGraham Sider 		case IP_VERSION(9, 4, 1):
1633c0cc999fSMa Jun 			*pcache_info = vega20_cache_info;
163474abbdedSMike Li 			num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
163574abbdedSMike Li 			break;
1636e4804a39SGraham Sider 		case IP_VERSION(9, 4, 2):
1637c0cc999fSMa Jun 			*pcache_info = aldebaran_cache_info;
163874abbdedSMike Li 			num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
163974abbdedSMike Li 			break;
16400ce8edaeSMukul Joshi 		case IP_VERSION(9, 4, 3):
16415f571c61SHawking Zhang 		case IP_VERSION(9, 4, 4):
164271985559SAlex Sierra 		case IP_VERSION(9, 5, 0):
16430ce8edaeSMukul Joshi 			num_of_cache_types =
16440ce8edaeSMukul Joshi 				kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
16450ce8edaeSMukul Joshi 									*pcache_info);
16460ce8edaeSMukul Joshi 			break;
1647e4804a39SGraham Sider 		case IP_VERSION(9, 1, 0):
1648e4804a39SGraham Sider 		case IP_VERSION(9, 2, 2):
1649c0cc999fSMa Jun 			*pcache_info = raven_cache_info;
1650389056e5SFelix Kuehling 			num_of_cache_types = ARRAY_SIZE(raven_cache_info);
1651737298d1SGustavo A. R. Silva 			break;
1652e4804a39SGraham Sider 		case IP_VERSION(9, 3, 0):
1653c0cc999fSMa Jun 			*pcache_info = renoir_cache_info;
1654a8d42f17SHuang Rui 			num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
1655a8d42f17SHuang Rui 			break;
1656e4804a39SGraham Sider 		case IP_VERSION(10, 1, 10):
1657e4804a39SGraham Sider 		case IP_VERSION(10, 1, 2):
1658e4804a39SGraham Sider 		case IP_VERSION(10, 1, 3):
1659f9ed188dSLang Yu 		case IP_VERSION(10, 1, 4):
1660c0cc999fSMa Jun 			*pcache_info = navi10_cache_info;
166114328aa5SPhilip Cox 			num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
1662389056e5SFelix Kuehling 			break;
1663e4804a39SGraham Sider 		case IP_VERSION(10, 1, 1):
1664c0cc999fSMa Jun 			*pcache_info = navi14_cache_info;
166574abbdedSMike Li 			num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
166674abbdedSMike Li 			break;
1667e4804a39SGraham Sider 		case IP_VERSION(10, 3, 0):
1668c0cc999fSMa Jun 			*pcache_info = sienna_cichlid_cache_info;
166974abbdedSMike Li 			num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
167074abbdedSMike Li 			break;
1671e4804a39SGraham Sider 		case IP_VERSION(10, 3, 2):
1672c0cc999fSMa Jun 			*pcache_info = navy_flounder_cache_info;
167374abbdedSMike Li 			num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
167474abbdedSMike Li 			break;
1675e4804a39SGraham Sider 		case IP_VERSION(10, 3, 4):
1676c0cc999fSMa Jun 			*pcache_info = dimgrey_cavefish_cache_info;
167774abbdedSMike Li 			num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
167874abbdedSMike Li 			break;
1679e4804a39SGraham Sider 		case IP_VERSION(10, 3, 1):
1680c0cc999fSMa Jun 			*pcache_info = vangogh_cache_info;
16813a5e715dSHuang Rui 			num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
16823a5e715dSHuang Rui 			break;
1683e4804a39SGraham Sider 		case IP_VERSION(10, 3, 5):
1684c0cc999fSMa Jun 			*pcache_info = beige_goby_cache_info;
16855cf607ccSChengming Gui 			num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
16865cf607ccSChengming Gui 			break;
1687e4804a39SGraham Sider 		case IP_VERSION(10, 3, 3):
1688c0cc999fSMa Jun 			*pcache_info = yellow_carp_cache_info;
1689bf9d4e88SAaron Liu 			num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
1690bf9d4e88SAaron Liu 			break;
1691d62eaddbSJesse Zhang 		case IP_VERSION(10, 3, 6):
1692c0cc999fSMa Jun 			*pcache_info = gc_10_3_6_cache_info;
1693d62eaddbSJesse Zhang 			num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
1694d62eaddbSJesse Zhang 			break;
1695a9232b06SPrike Liang 		case IP_VERSION(10, 3, 7):
1696c0cc999fSMa Jun 			*pcache_info = gfx1037_cache_info;
1697a9232b06SPrike Liang 			num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
1698a9232b06SPrike Liang 			break;
1699cc009e61SMukul Joshi 		case IP_VERSION(11, 0, 0):
170026776a70SHuang Rui 		case IP_VERSION(11, 0, 1):
1701ec661f1cSEric Huang 		case IP_VERSION(11, 0, 2):
17025ddb5fe9SDavid Belanger 		case IP_VERSION(11, 0, 3):
170388c21c2bSYifan Zhang 		case IP_VERSION(11, 0, 4):
1704afac198cSLang Yu 		case IP_VERSION(11, 5, 0):
1705f5f83441SYifan Zhang 		case IP_VERSION(11, 5, 1):
170653c3a374STim Huang 		case IP_VERSION(11, 5, 2):
1707b784faebSTim Huang 		case IP_VERSION(11, 5, 3):
1708321048c4SHarish Kasiviswanathan 			/* Cacheline size not available in IP discovery for gc11.
1709321048c4SHarish Kasiviswanathan 			 * kfd_fill_gpu_cache_info_from_gfx_config to hard code it
1710321048c4SHarish Kasiviswanathan 			 */
1711321048c4SHarish Kasiviswanathan 			cache_line_size_missing = true;
1712321048c4SHarish Kasiviswanathan 			fallthrough;
1713592a5d7dSDavid Belanger 		case IP_VERSION(12, 0, 0):
1714592a5d7dSDavid Belanger 		case IP_VERSION(12, 0, 1):
1715cc009e61SMukul Joshi 			num_of_cache_types =
1716321048c4SHarish Kasiviswanathan 				kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
1717321048c4SHarish Kasiviswanathan 									cache_line_size_missing,
1718321048c4SHarish Kasiviswanathan 									*pcache_info);
1719cc009e61SMukul Joshi 			break;
17203a87177eSHarish Kasiviswanathan 		default:
1721c0cc999fSMa Jun 			*pcache_info = dummy_cache_info;
1722fd72e2cbSPrike Liang 			num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
1723fd72e2cbSPrike Liang 			pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
1724fd72e2cbSPrike Liang 			break;
17253a87177eSHarish Kasiviswanathan 		}
1726e4804a39SGraham Sider 	}
1727c0cc999fSMa Jun 	return num_of_cache_types;
17283a87177eSHarish Kasiviswanathan }
17293a87177eSHarish Kasiviswanathan 
1730520b8fb7SFelix Kuehling /* Memory required to create Virtual CRAT.
1731520b8fb7SFelix Kuehling  * Since there is no easy way to predict the amount of memory required, the
1732b7b6c385SKent Russell  * following amount is allocated for GPU Virtual CRAT. This is
1733520b8fb7SFelix Kuehling  * expected to cover all known conditions. But to be safe additional check
1734520b8fb7SFelix Kuehling  * is put in the code to ensure we don't overwrite.
1735520b8fb7SFelix Kuehling  */
173647a7fe53SOak Zeng #define VCRAT_SIZE_FOR_GPU	(4 * PAGE_SIZE)
1737520b8fb7SFelix Kuehling 
1738520b8fb7SFelix Kuehling /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1739520b8fb7SFelix Kuehling  *
1740520b8fb7SFelix Kuehling  *	@numa_node_id: CPU NUMA node id
1741520b8fb7SFelix Kuehling  *	@avail_size: Available size in the memory
1742520b8fb7SFelix Kuehling  *	@sub_type_hdr: Memory into which compute info will be filled in
1743520b8fb7SFelix Kuehling  *
1744520b8fb7SFelix Kuehling  *	Return 0 if successful else return -ve value
1745520b8fb7SFelix Kuehling  */
kfd_fill_cu_for_cpu(int numa_node_id,int * avail_size,int proximity_domain,struct crat_subtype_computeunit * sub_type_hdr)1746520b8fb7SFelix Kuehling static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
1747520b8fb7SFelix Kuehling 				int proximity_domain,
1748520b8fb7SFelix Kuehling 				struct crat_subtype_computeunit *sub_type_hdr)
1749520b8fb7SFelix Kuehling {
1750520b8fb7SFelix Kuehling 	const struct cpumask *cpumask;
1751520b8fb7SFelix Kuehling 
1752520b8fb7SFelix Kuehling 	*avail_size -= sizeof(struct crat_subtype_computeunit);
1753520b8fb7SFelix Kuehling 	if (*avail_size < 0)
1754520b8fb7SFelix Kuehling 		return -ENOMEM;
1755520b8fb7SFelix Kuehling 
1756520b8fb7SFelix Kuehling 	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
1757520b8fb7SFelix Kuehling 
1758520b8fb7SFelix Kuehling 	/* Fill in subtype header data */
1759520b8fb7SFelix Kuehling 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
1760520b8fb7SFelix Kuehling 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
1761520b8fb7SFelix Kuehling 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1762520b8fb7SFelix Kuehling 
1763520b8fb7SFelix Kuehling 	cpumask = cpumask_of_node(numa_node_id);
1764520b8fb7SFelix Kuehling 
1765520b8fb7SFelix Kuehling 	/* Fill in CU data */
1766520b8fb7SFelix Kuehling 	sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
1767520b8fb7SFelix Kuehling 	sub_type_hdr->proximity_domain = proximity_domain;
1768520b8fb7SFelix Kuehling 	sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
1769520b8fb7SFelix Kuehling 	if (sub_type_hdr->processor_id_low == -1)
1770520b8fb7SFelix Kuehling 		return -EINVAL;
1771520b8fb7SFelix Kuehling 
1772520b8fb7SFelix Kuehling 	sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
1773520b8fb7SFelix Kuehling 
1774520b8fb7SFelix Kuehling 	return 0;
1775520b8fb7SFelix Kuehling }
1776520b8fb7SFelix Kuehling 
1777520b8fb7SFelix Kuehling /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1778520b8fb7SFelix Kuehling  *
1779520b8fb7SFelix Kuehling  *	@numa_node_id: CPU NUMA node id
1780520b8fb7SFelix Kuehling  *	@avail_size: Available size in the memory
1781520b8fb7SFelix Kuehling  *	@sub_type_hdr: Memory into which compute info will be filled in
1782520b8fb7SFelix Kuehling  *
1783520b8fb7SFelix Kuehling  *	Return 0 if successful else return -ve value
1784520b8fb7SFelix Kuehling  */
kfd_fill_mem_info_for_cpu(int numa_node_id,int * avail_size,int proximity_domain,struct crat_subtype_memory * sub_type_hdr)1785520b8fb7SFelix Kuehling static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
1786520b8fb7SFelix Kuehling 			int proximity_domain,
1787520b8fb7SFelix Kuehling 			struct crat_subtype_memory *sub_type_hdr)
1788520b8fb7SFelix Kuehling {
1789520b8fb7SFelix Kuehling 	uint64_t mem_in_bytes = 0;
1790520b8fb7SFelix Kuehling 	pg_data_t *pgdat;
1791520b8fb7SFelix Kuehling 	int zone_type;
1792520b8fb7SFelix Kuehling 
1793520b8fb7SFelix Kuehling 	*avail_size -= sizeof(struct crat_subtype_memory);
1794520b8fb7SFelix Kuehling 	if (*avail_size < 0)
1795520b8fb7SFelix Kuehling 		return -ENOMEM;
1796520b8fb7SFelix Kuehling 
1797520b8fb7SFelix Kuehling 	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1798520b8fb7SFelix Kuehling 
1799520b8fb7SFelix Kuehling 	/* Fill in subtype header data */
1800520b8fb7SFelix Kuehling 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1801520b8fb7SFelix Kuehling 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1802520b8fb7SFelix Kuehling 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1803520b8fb7SFelix Kuehling 
1804520b8fb7SFelix Kuehling 	/* Fill in Memory Subunit data */
1805520b8fb7SFelix Kuehling 
1806520b8fb7SFelix Kuehling 	/* Unlike si_meminfo, si_meminfo_node is not exported. So
1807520b8fb7SFelix Kuehling 	 * the following lines are duplicated from si_meminfo_node
1808520b8fb7SFelix Kuehling 	 * function
1809520b8fb7SFelix Kuehling 	 */
1810520b8fb7SFelix Kuehling 	pgdat = NODE_DATA(numa_node_id);
1811520b8fb7SFelix Kuehling 	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
18129705bea5SArun KS 		mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
1813520b8fb7SFelix Kuehling 	mem_in_bytes <<= PAGE_SHIFT;
1814520b8fb7SFelix Kuehling 
1815520b8fb7SFelix Kuehling 	sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
1816520b8fb7SFelix Kuehling 	sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
1817520b8fb7SFelix Kuehling 	sub_type_hdr->proximity_domain = proximity_domain;
1818520b8fb7SFelix Kuehling 
1819520b8fb7SFelix Kuehling 	return 0;
1820520b8fb7SFelix Kuehling }
1821520b8fb7SFelix Kuehling 
18226d3d8065SNathan Chancellor #ifdef CONFIG_X86_64
kfd_fill_iolink_info_for_cpu(int numa_node_id,int * avail_size,uint32_t * num_entries,struct crat_subtype_iolink * sub_type_hdr)1823520b8fb7SFelix Kuehling static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
1824520b8fb7SFelix Kuehling 				uint32_t *num_entries,
1825520b8fb7SFelix Kuehling 				struct crat_subtype_iolink *sub_type_hdr)
1826520b8fb7SFelix Kuehling {
1827520b8fb7SFelix Kuehling 	int nid;
1828520b8fb7SFelix Kuehling 	struct cpuinfo_x86 *c = &cpu_data(0);
1829520b8fb7SFelix Kuehling 	uint8_t link_type;
1830520b8fb7SFelix Kuehling 
1831520b8fb7SFelix Kuehling 	if (c->x86_vendor == X86_VENDOR_AMD)
1832520b8fb7SFelix Kuehling 		link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
1833520b8fb7SFelix Kuehling 	else
1834520b8fb7SFelix Kuehling 		link_type = CRAT_IOLINK_TYPE_QPI_1_1;
1835520b8fb7SFelix Kuehling 
1836520b8fb7SFelix Kuehling 	*num_entries = 0;
1837520b8fb7SFelix Kuehling 
1838520b8fb7SFelix Kuehling 	/* Create IO links from this node to other CPU nodes */
1839520b8fb7SFelix Kuehling 	for_each_online_node(nid) {
1840520b8fb7SFelix Kuehling 		if (nid == numa_node_id) /* node itself */
1841520b8fb7SFelix Kuehling 			continue;
1842520b8fb7SFelix Kuehling 
1843520b8fb7SFelix Kuehling 		*avail_size -= sizeof(struct crat_subtype_iolink);
1844520b8fb7SFelix Kuehling 		if (*avail_size < 0)
1845520b8fb7SFelix Kuehling 			return -ENOMEM;
1846520b8fb7SFelix Kuehling 
1847520b8fb7SFelix Kuehling 		memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1848520b8fb7SFelix Kuehling 
1849520b8fb7SFelix Kuehling 		/* Fill in subtype header data */
1850520b8fb7SFelix Kuehling 		sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
1851520b8fb7SFelix Kuehling 		sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
1852520b8fb7SFelix Kuehling 		sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1853520b8fb7SFelix Kuehling 
1854520b8fb7SFelix Kuehling 		/* Fill in IO link data */
1855520b8fb7SFelix Kuehling 		sub_type_hdr->proximity_domain_from = numa_node_id;
1856520b8fb7SFelix Kuehling 		sub_type_hdr->proximity_domain_to = nid;
1857520b8fb7SFelix Kuehling 		sub_type_hdr->io_interface_type = link_type;
1858520b8fb7SFelix Kuehling 
1859520b8fb7SFelix Kuehling 		(*num_entries)++;
1860520b8fb7SFelix Kuehling 		sub_type_hdr++;
1861520b8fb7SFelix Kuehling 	}
1862520b8fb7SFelix Kuehling 
1863520b8fb7SFelix Kuehling 	return 0;
1864520b8fb7SFelix Kuehling }
1865d1c234e2SFelix Kuehling #endif
1866520b8fb7SFelix Kuehling 
1867520b8fb7SFelix Kuehling /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1868520b8fb7SFelix Kuehling  *
1869520b8fb7SFelix Kuehling  *	@pcrat_image: Fill in VCRAT for CPU
1870520b8fb7SFelix Kuehling  *	@size:	[IN] allocated size of crat_image.
1871520b8fb7SFelix Kuehling  *		[OUT] actual size of data filled in crat_image
1872520b8fb7SFelix Kuehling  */
kfd_create_vcrat_image_cpu(void * pcrat_image,size_t * size)1873520b8fb7SFelix Kuehling static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
1874520b8fb7SFelix Kuehling {
1875520b8fb7SFelix Kuehling 	struct crat_header *crat_table = (struct crat_header *)pcrat_image;
1876520b8fb7SFelix Kuehling 	struct acpi_table_header *acpi_table;
1877520b8fb7SFelix Kuehling 	acpi_status status;
1878520b8fb7SFelix Kuehling 	struct crat_subtype_generic *sub_type_hdr;
1879520b8fb7SFelix Kuehling 	int avail_size = *size;
1880520b8fb7SFelix Kuehling 	int numa_node_id;
1881d1c234e2SFelix Kuehling #ifdef CONFIG_X86_64
1882520b8fb7SFelix Kuehling 	uint32_t entries = 0;
1883d1c234e2SFelix Kuehling #endif
1884520b8fb7SFelix Kuehling 	int ret = 0;
1885520b8fb7SFelix Kuehling 
1886b7b6c385SKent Russell 	if (!pcrat_image)
1887520b8fb7SFelix Kuehling 		return -EINVAL;
1888520b8fb7SFelix Kuehling 
1889520b8fb7SFelix Kuehling 	/* Fill in CRAT Header.
1890520b8fb7SFelix Kuehling 	 * Modify length and total_entries as subunits are added.
1891520b8fb7SFelix Kuehling 	 */
1892520b8fb7SFelix Kuehling 	avail_size -= sizeof(struct crat_header);
1893520b8fb7SFelix Kuehling 	if (avail_size < 0)
1894520b8fb7SFelix Kuehling 		return -ENOMEM;
1895520b8fb7SFelix Kuehling 
1896520b8fb7SFelix Kuehling 	memset(crat_table, 0, sizeof(struct crat_header));
1897520b8fb7SFelix Kuehling 	memcpy(&crat_table->signature, CRAT_SIGNATURE,
1898520b8fb7SFelix Kuehling 			sizeof(crat_table->signature));
1899520b8fb7SFelix Kuehling 	crat_table->length = sizeof(struct crat_header);
1900520b8fb7SFelix Kuehling 
1901520b8fb7SFelix Kuehling 	status = acpi_get_table("DSDT", 0, &acpi_table);
190248a44387SArnd Bergmann 	if (status != AE_OK)
1903520b8fb7SFelix Kuehling 		pr_warn("DSDT table not found for OEM information\n");
1904520b8fb7SFelix Kuehling 	else {
1905520b8fb7SFelix Kuehling 		crat_table->oem_revision = acpi_table->revision;
1906520b8fb7SFelix Kuehling 		memcpy(crat_table->oem_id, acpi_table->oem_id,
1907520b8fb7SFelix Kuehling 				CRAT_OEMID_LENGTH);
1908520b8fb7SFelix Kuehling 		memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
1909520b8fb7SFelix Kuehling 				CRAT_OEMTABLEID_LENGTH);
1910c4cb773cSHanjun Guo 		acpi_put_table(acpi_table);
1911520b8fb7SFelix Kuehling 	}
1912520b8fb7SFelix Kuehling 	crat_table->total_entries = 0;
1913520b8fb7SFelix Kuehling 	crat_table->num_domains = 0;
1914520b8fb7SFelix Kuehling 
1915520b8fb7SFelix Kuehling 	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1916520b8fb7SFelix Kuehling 
1917520b8fb7SFelix Kuehling 	for_each_online_node(numa_node_id) {
1918520b8fb7SFelix Kuehling 		if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
1919520b8fb7SFelix Kuehling 			continue;
1920520b8fb7SFelix Kuehling 
1921520b8fb7SFelix Kuehling 		/* Fill in Subtype: Compute Unit */
1922520b8fb7SFelix Kuehling 		ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
1923520b8fb7SFelix Kuehling 			crat_table->num_domains,
1924520b8fb7SFelix Kuehling 			(struct crat_subtype_computeunit *)sub_type_hdr);
1925520b8fb7SFelix Kuehling 		if (ret < 0)
1926520b8fb7SFelix Kuehling 			return ret;
1927520b8fb7SFelix Kuehling 		crat_table->length += sub_type_hdr->length;
1928520b8fb7SFelix Kuehling 		crat_table->total_entries++;
1929520b8fb7SFelix Kuehling 
1930520b8fb7SFelix Kuehling 		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1931520b8fb7SFelix Kuehling 			sub_type_hdr->length);
1932520b8fb7SFelix Kuehling 
1933520b8fb7SFelix Kuehling 		/* Fill in Subtype: Memory */
1934520b8fb7SFelix Kuehling 		ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
1935520b8fb7SFelix Kuehling 			crat_table->num_domains,
1936520b8fb7SFelix Kuehling 			(struct crat_subtype_memory *)sub_type_hdr);
1937520b8fb7SFelix Kuehling 		if (ret < 0)
1938520b8fb7SFelix Kuehling 			return ret;
1939520b8fb7SFelix Kuehling 		crat_table->length += sub_type_hdr->length;
1940520b8fb7SFelix Kuehling 		crat_table->total_entries++;
1941520b8fb7SFelix Kuehling 
1942520b8fb7SFelix Kuehling 		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1943520b8fb7SFelix Kuehling 			sub_type_hdr->length);
1944520b8fb7SFelix Kuehling 
1945520b8fb7SFelix Kuehling 		/* Fill in Subtype: IO Link */
1946d1c234e2SFelix Kuehling #ifdef CONFIG_X86_64
1947520b8fb7SFelix Kuehling 		ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
1948520b8fb7SFelix Kuehling 				&entries,
1949520b8fb7SFelix Kuehling 				(struct crat_subtype_iolink *)sub_type_hdr);
1950520b8fb7SFelix Kuehling 		if (ret < 0)
1951520b8fb7SFelix Kuehling 			return ret;
19520257b464SJeremy Cline 
19530257b464SJeremy Cline 		if (entries) {
1954520b8fb7SFelix Kuehling 			crat_table->length += (sub_type_hdr->length * entries);
1955520b8fb7SFelix Kuehling 			crat_table->total_entries += entries;
1956520b8fb7SFelix Kuehling 
1957520b8fb7SFelix Kuehling 			sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1958520b8fb7SFelix Kuehling 					sub_type_hdr->length * entries);
19590257b464SJeremy Cline 		}
1960d1c234e2SFelix Kuehling #else
1961d1c234e2SFelix Kuehling 		pr_info("IO link not available for non x86 platforms\n");
1962d1c234e2SFelix Kuehling #endif
1963520b8fb7SFelix Kuehling 
1964520b8fb7SFelix Kuehling 		crat_table->num_domains++;
1965520b8fb7SFelix Kuehling 	}
1966520b8fb7SFelix Kuehling 
1967520b8fb7SFelix Kuehling 	/* TODO: Add cache Subtype for CPU.
1968520b8fb7SFelix Kuehling 	 * Currently, CPU cache information is available in function
1969520b8fb7SFelix Kuehling 	 * detect_cache_attributes(cpu) defined in the file
1970520b8fb7SFelix Kuehling 	 * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
1971520b8fb7SFelix Kuehling 	 * exported and to get the same information the code needs to be
1972520b8fb7SFelix Kuehling 	 * duplicated.
1973520b8fb7SFelix Kuehling 	 */
1974520b8fb7SFelix Kuehling 
1975520b8fb7SFelix Kuehling 	*size = crat_table->length;
1976520b8fb7SFelix Kuehling 	pr_info("Virtual CRAT table created for CPU\n");
1977520b8fb7SFelix Kuehling 
1978520b8fb7SFelix Kuehling 	return 0;
1979520b8fb7SFelix Kuehling }
1980520b8fb7SFelix Kuehling 
kfd_fill_gpu_memory_affinity(int * avail_size,struct kfd_node * kdev,uint8_t type,uint64_t size,struct crat_subtype_memory * sub_type_hdr,uint32_t proximity_domain,const struct kfd_local_mem_info * local_mem_info)19813a87177eSHarish Kasiviswanathan static int kfd_fill_gpu_memory_affinity(int *avail_size,
19828dc1db31SMukul Joshi 		struct kfd_node *kdev, uint8_t type, uint64_t size,
19833a87177eSHarish Kasiviswanathan 		struct crat_subtype_memory *sub_type_hdr,
19843a87177eSHarish Kasiviswanathan 		uint32_t proximity_domain,
19853a87177eSHarish Kasiviswanathan 		const struct kfd_local_mem_info *local_mem_info)
19863a87177eSHarish Kasiviswanathan {
19873a87177eSHarish Kasiviswanathan 	*avail_size -= sizeof(struct crat_subtype_memory);
19883a87177eSHarish Kasiviswanathan 	if (*avail_size < 0)
19893a87177eSHarish Kasiviswanathan 		return -ENOMEM;
19903a87177eSHarish Kasiviswanathan 
19913a87177eSHarish Kasiviswanathan 	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
19923a87177eSHarish Kasiviswanathan 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
19933a87177eSHarish Kasiviswanathan 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);
19943a87177eSHarish Kasiviswanathan 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
19953a87177eSHarish Kasiviswanathan 
19963a87177eSHarish Kasiviswanathan 	sub_type_hdr->proximity_domain = proximity_domain;
19973a87177eSHarish Kasiviswanathan 
19983a87177eSHarish Kasiviswanathan 	pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",
19993a87177eSHarish Kasiviswanathan 			type, size);
20003a87177eSHarish Kasiviswanathan 
20013a87177eSHarish Kasiviswanathan 	sub_type_hdr->length_low = lower_32_bits(size);
20023a87177eSHarish Kasiviswanathan 	sub_type_hdr->length_high = upper_32_bits(size);
20033a87177eSHarish Kasiviswanathan 
20043a87177eSHarish Kasiviswanathan 	sub_type_hdr->width = local_mem_info->vram_width;
20053a87177eSHarish Kasiviswanathan 	sub_type_hdr->visibility_type = type;
20063a87177eSHarish Kasiviswanathan 
20073a87177eSHarish Kasiviswanathan 	return 0;
20083a87177eSHarish Kasiviswanathan }
20093a87177eSHarish Kasiviswanathan 
2010ddec8d3bSEric Huang #ifdef CONFIG_ACPI_NUMA
kfd_find_numa_node_in_srat(struct kfd_node * kdev)20118dc1db31SMukul Joshi static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
2012ddec8d3bSEric Huang {
2013ddec8d3bSEric Huang 	struct acpi_table_header *table_header = NULL;
2014ddec8d3bSEric Huang 	struct acpi_subtable_header *sub_header = NULL;
2015ddec8d3bSEric Huang 	unsigned long table_end, subtable_len;
2016d69a3b76SMukul Joshi 	u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
2017d69a3b76SMukul Joshi 			pci_dev_id(kdev->adev->pdev);
2018ddec8d3bSEric Huang 	u32 bdf;
2019ddec8d3bSEric Huang 	acpi_status status;
2020ddec8d3bSEric Huang 	struct acpi_srat_cpu_affinity *cpu;
2021ddec8d3bSEric Huang 	struct acpi_srat_generic_affinity *gpu;
2022ddec8d3bSEric Huang 	int pxm = 0, max_pxm = 0;
2023ddec8d3bSEric Huang 	int numa_node = NUMA_NO_NODE;
2024ddec8d3bSEric Huang 	bool found = false;
2025ddec8d3bSEric Huang 
2026ddec8d3bSEric Huang 	/* Fetch the SRAT table from ACPI */
2027ddec8d3bSEric Huang 	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
2028ddec8d3bSEric Huang 	if (status == AE_NOT_FOUND) {
2029ddec8d3bSEric Huang 		pr_warn("SRAT table not found\n");
2030ddec8d3bSEric Huang 		return;
2031ddec8d3bSEric Huang 	} else if (ACPI_FAILURE(status)) {
2032ddec8d3bSEric Huang 		const char *err = acpi_format_exception(status);
2033ddec8d3bSEric Huang 		pr_err("SRAT table error: %s\n", err);
2034ddec8d3bSEric Huang 		return;
2035ddec8d3bSEric Huang 	}
2036ddec8d3bSEric Huang 
2037ddec8d3bSEric Huang 	table_end = (unsigned long)table_header + table_header->length;
2038ddec8d3bSEric Huang 
2039ddec8d3bSEric Huang 	/* Parse all entries looking for a match. */
2040ddec8d3bSEric Huang 	sub_header = (struct acpi_subtable_header *)
2041ddec8d3bSEric Huang 			((unsigned long)table_header +
2042ddec8d3bSEric Huang 			sizeof(struct acpi_table_srat));
2043ddec8d3bSEric Huang 	subtable_len = sub_header->length;
2044ddec8d3bSEric Huang 
2045ddec8d3bSEric Huang 	while (((unsigned long)sub_header) + subtable_len  < table_end) {
2046ddec8d3bSEric Huang 		/*
2047ddec8d3bSEric Huang 		 * If length is 0, break from this loop to avoid
2048ddec8d3bSEric Huang 		 * infinite loop.
2049ddec8d3bSEric Huang 		 */
2050ddec8d3bSEric Huang 		if (subtable_len == 0) {
2051ddec8d3bSEric Huang 			pr_err("SRAT invalid zero length\n");
2052ddec8d3bSEric Huang 			break;
2053ddec8d3bSEric Huang 		}
2054ddec8d3bSEric Huang 
2055ddec8d3bSEric Huang 		switch (sub_header->type) {
2056ddec8d3bSEric Huang 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
2057ddec8d3bSEric Huang 			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
2058ddec8d3bSEric Huang 			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
2059ddec8d3bSEric Huang 					cpu->proximity_domain_lo;
2060ddec8d3bSEric Huang 			if (pxm > max_pxm)
2061ddec8d3bSEric Huang 				max_pxm = pxm;
2062ddec8d3bSEric Huang 			break;
2063ddec8d3bSEric Huang 		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
2064ddec8d3bSEric Huang 			gpu = (struct acpi_srat_generic_affinity *)sub_header;
2065ddec8d3bSEric Huang 			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
2066ddec8d3bSEric Huang 					*((u16 *)(&gpu->device_handle[2]));
2067ddec8d3bSEric Huang 			if (bdf == pci_id) {
2068ddec8d3bSEric Huang 				found = true;
2069ddec8d3bSEric Huang 				numa_node = pxm_to_node(gpu->proximity_domain);
2070ddec8d3bSEric Huang 			}
2071ddec8d3bSEric Huang 			break;
2072ddec8d3bSEric Huang 		default:
2073ddec8d3bSEric Huang 			break;
2074ddec8d3bSEric Huang 		}
2075ddec8d3bSEric Huang 
2076ddec8d3bSEric Huang 		if (found)
2077ddec8d3bSEric Huang 			break;
2078ddec8d3bSEric Huang 
2079ddec8d3bSEric Huang 		sub_header = (struct acpi_subtable_header *)
2080ddec8d3bSEric Huang 				((unsigned long)sub_header + subtable_len);
2081ddec8d3bSEric Huang 		subtable_len = sub_header->length;
2082ddec8d3bSEric Huang 	}
2083ddec8d3bSEric Huang 
2084ddec8d3bSEric Huang 	acpi_put_table(table_header);
2085ddec8d3bSEric Huang 
2086ddec8d3bSEric Huang 	/* Workaround bad cpu-gpu binding case */
2087ddec8d3bSEric Huang 	if (found && (numa_node < 0 ||
2088ddec8d3bSEric Huang 			numa_node > pxm_to_node(max_pxm)))
2089ddec8d3bSEric Huang 		numa_node = 0;
2090ddec8d3bSEric Huang 
2091ddec8d3bSEric Huang 	if (numa_node != NUMA_NO_NODE)
2092d69a3b76SMukul Joshi 		set_dev_node(&kdev->adev->pdev->dev, numa_node);
2093ddec8d3bSEric Huang }
2094ddec8d3bSEric Huang #endif
2095ddec8d3bSEric Huang 
209692085240SJonathan Kim #define KFD_CRAT_INTRA_SOCKET_WEIGHT	13
209792085240SJonathan Kim #define KFD_CRAT_XGMI_WEIGHT		15
209892085240SJonathan Kim 
20993a87177eSHarish Kasiviswanathan /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
21003a87177eSHarish Kasiviswanathan  * to its NUMA node
21013a87177eSHarish Kasiviswanathan  *	@avail_size: Available size in the memory
21023a87177eSHarish Kasiviswanathan  *	@kdev - [IN] GPU device
21033a87177eSHarish Kasiviswanathan  *	@sub_type_hdr: Memory into which io link info will be filled in
21043a87177eSHarish Kasiviswanathan  *	@proximity_domain - proximity domain of the GPU node
21053a87177eSHarish Kasiviswanathan  *
21063a87177eSHarish Kasiviswanathan  *	Return 0 if successful else return -ve value
21073a87177eSHarish Kasiviswanathan  */
kfd_fill_gpu_direct_io_link_to_cpu(int * avail_size,struct kfd_node * kdev,struct crat_subtype_iolink * sub_type_hdr,uint32_t proximity_domain)2108ae9a25aeSShaoyun Liu static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
21098dc1db31SMukul Joshi 			struct kfd_node *kdev,
21103a87177eSHarish Kasiviswanathan 			struct crat_subtype_iolink *sub_type_hdr,
21113a87177eSHarish Kasiviswanathan 			uint32_t proximity_domain)
21123a87177eSHarish Kasiviswanathan {
21133a87177eSHarish Kasiviswanathan 	*avail_size -= sizeof(struct crat_subtype_iolink);
21143a87177eSHarish Kasiviswanathan 	if (*avail_size < 0)
21153a87177eSHarish Kasiviswanathan 		return -ENOMEM;
21163a87177eSHarish Kasiviswanathan 
21173a87177eSHarish Kasiviswanathan 	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
21183a87177eSHarish Kasiviswanathan 
21193a87177eSHarish Kasiviswanathan 	/* Fill in subtype header data */
21203a87177eSHarish Kasiviswanathan 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
21213a87177eSHarish Kasiviswanathan 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
21223a87177eSHarish Kasiviswanathan 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
212367f7cf9fSshaoyunl 	if (kfd_dev_is_large_bar(kdev))
212467f7cf9fSshaoyunl 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
21253a87177eSHarish Kasiviswanathan 
21263a87177eSHarish Kasiviswanathan 	/* Fill in IOLINK subtype.
21273a87177eSHarish Kasiviswanathan 	 * TODO: Fill-in other fields of iolink subtype
21283a87177eSHarish Kasiviswanathan 	 */
2129b2ef2fdfSRajneesh Bhardwaj 	if (kdev->adev->gmc.xgmi.connected_to_cpu ||
2130b2ef2fdfSRajneesh Bhardwaj 	    (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
2131b2ef2fdfSRajneesh Bhardwaj 	     kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
2132b2ef2fdfSRajneesh Bhardwaj 	     AMDGPU_PKG_TYPE_APU)) {
213392085240SJonathan Kim 		bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
213492085240SJonathan Kim 		int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
213592085240SJonathan Kim 							KFD_CRAT_INTRA_SOCKET_WEIGHT;
2136d34184e3SRajneesh Bhardwaj 		/*
2137d34184e3SRajneesh Bhardwaj 		 * with host gpu xgmi link, host can access gpu memory whether
2138d34184e3SRajneesh Bhardwaj 		 * or not pcie bar type is large, so always create bidirectional
2139d34184e3SRajneesh Bhardwaj 		 * io link.
2140d34184e3SRajneesh Bhardwaj 		 */
2141d34184e3SRajneesh Bhardwaj 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2142d34184e3SRajneesh Bhardwaj 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
214392085240SJonathan Kim 		sub_type_hdr->weight_xgmi = weight;
2144*9424a5bfSJonathan Kim 		if (ext_cpu) {
2145*9424a5bfSJonathan Kim 			amdgpu_xgmi_get_bandwidth(kdev->adev, NULL,
2146*9424a5bfSJonathan Kim 						  AMDGPU_XGMI_BW_MODE_PER_LINK,
2147*9424a5bfSJonathan Kim 						  AMDGPU_XGMI_BW_UNIT_MBYTES,
2148*9424a5bfSJonathan Kim 						  &sub_type_hdr->minimum_bandwidth_mbs,
2149*9424a5bfSJonathan Kim 						  &sub_type_hdr->maximum_bandwidth_mbs);
2150*9424a5bfSJonathan Kim 		} else {
2151*9424a5bfSJonathan Kim 			sub_type_hdr->minimum_bandwidth_mbs = mem_bw;
2152*9424a5bfSJonathan Kim 			sub_type_hdr->maximum_bandwidth_mbs = mem_bw;
2153*9424a5bfSJonathan Kim 		}
2154d34184e3SRajneesh Bhardwaj 	} else {
21553a87177eSHarish Kasiviswanathan 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
215693304810SJonathan Kim 		sub_type_hdr->minimum_bandwidth_mbs =
2157574c4183SGraham Sider 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
215893304810SJonathan Kim 		sub_type_hdr->maximum_bandwidth_mbs =
2159574c4183SGraham Sider 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
2160d34184e3SRajneesh Bhardwaj 	}
2161d34184e3SRajneesh Bhardwaj 
21623a87177eSHarish Kasiviswanathan 	sub_type_hdr->proximity_domain_from = proximity_domain;
2163ddec8d3bSEric Huang 
2164ddec8d3bSEric Huang #ifdef CONFIG_ACPI_NUMA
2165bbcc3514SMario Limonciello 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&
2166bbcc3514SMario Limonciello 	    num_possible_nodes() > 1)
2167ddec8d3bSEric Huang 		kfd_find_numa_node_in_srat(kdev);
2168ddec8d3bSEric Huang #endif
21693a87177eSHarish Kasiviswanathan #ifdef CONFIG_NUMA
2170d69a3b76SMukul Joshi 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
21713a87177eSHarish Kasiviswanathan 		sub_type_hdr->proximity_domain_to = 0;
21723a87177eSHarish Kasiviswanathan 	else
2173d69a3b76SMukul Joshi 		sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;
21743a87177eSHarish Kasiviswanathan #else
21753a87177eSHarish Kasiviswanathan 	sub_type_hdr->proximity_domain_to = 0;
21763a87177eSHarish Kasiviswanathan #endif
21773a87177eSHarish Kasiviswanathan 	return 0;
21783a87177eSHarish Kasiviswanathan }
21793a87177eSHarish Kasiviswanathan 
kfd_fill_gpu_xgmi_link_to_gpu(int * avail_size,struct kfd_node * kdev,struct kfd_node * peer_kdev,struct crat_subtype_iolink * sub_type_hdr,uint32_t proximity_domain_from,uint32_t proximity_domain_to)2180ae9a25aeSShaoyun Liu static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
21818dc1db31SMukul Joshi 			struct kfd_node *kdev,
21828dc1db31SMukul Joshi 			struct kfd_node *peer_kdev,
2183ae9a25aeSShaoyun Liu 			struct crat_subtype_iolink *sub_type_hdr,
2184ae9a25aeSShaoyun Liu 			uint32_t proximity_domain_from,
2185ae9a25aeSShaoyun Liu 			uint32_t proximity_domain_to)
2186ae9a25aeSShaoyun Liu {
218792085240SJonathan Kim 	bool use_ta_info = kdev->kfd->num_nodes == 1;
218892085240SJonathan Kim 
2189ae9a25aeSShaoyun Liu 	*avail_size -= sizeof(struct crat_subtype_iolink);
2190ae9a25aeSShaoyun Liu 	if (*avail_size < 0)
2191ae9a25aeSShaoyun Liu 		return -ENOMEM;
2192ae9a25aeSShaoyun Liu 
2193ae9a25aeSShaoyun Liu 	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
2194ae9a25aeSShaoyun Liu 
2195ae9a25aeSShaoyun Liu 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
2196ae9a25aeSShaoyun Liu 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
219767f7cf9fSshaoyunl 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
219867f7cf9fSshaoyunl 			       CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2199ae9a25aeSShaoyun Liu 
2200ae9a25aeSShaoyun Liu 	sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2201ae9a25aeSShaoyun Liu 	sub_type_hdr->proximity_domain_from = proximity_domain_from;
2202ae9a25aeSShaoyun Liu 	sub_type_hdr->proximity_domain_to = proximity_domain_to;
220392085240SJonathan Kim 
220492085240SJonathan Kim 	if (use_ta_info) {
220592085240SJonathan Kim 		sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
2206*9424a5bfSJonathan Kim 			amdgpu_xgmi_get_hops_count(kdev->adev, peer_kdev->adev);
2207*9424a5bfSJonathan Kim 		amdgpu_xgmi_get_bandwidth(kdev->adev, peer_kdev->adev,
2208*9424a5bfSJonathan Kim 					  AMDGPU_XGMI_BW_MODE_PER_PEER,
2209*9424a5bfSJonathan Kim 					  AMDGPU_XGMI_BW_UNIT_MBYTES,
2210*9424a5bfSJonathan Kim 					  &sub_type_hdr->minimum_bandwidth_mbs,
2211*9424a5bfSJonathan Kim 					  &sub_type_hdr->maximum_bandwidth_mbs);
221292085240SJonathan Kim 	} else {
221392085240SJonathan Kim 		bool is_single_hop = kdev->kfd == peer_kdev->kfd;
221492085240SJonathan Kim 		int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
221592085240SJonathan Kim 			(2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
221692085240SJonathan Kim 		int mem_bw = 819200;
221792085240SJonathan Kim 
221892085240SJonathan Kim 		sub_type_hdr->weight_xgmi = weight;
221992085240SJonathan Kim 		sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
222092085240SJonathan Kim 		sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
222192085240SJonathan Kim 	}
22223f46c4e9SJonathan Kim 
2223ae9a25aeSShaoyun Liu 	return 0;
2224ae9a25aeSShaoyun Liu }
2225ae9a25aeSShaoyun Liu 
22263a87177eSHarish Kasiviswanathan /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
22273a87177eSHarish Kasiviswanathan  *
22283a87177eSHarish Kasiviswanathan  *	@pcrat_image: Fill in VCRAT for GPU
22293a87177eSHarish Kasiviswanathan  *	@size:	[IN] allocated size of crat_image.
22303a87177eSHarish Kasiviswanathan  *		[OUT] actual size of data filled in crat_image
22313a87177eSHarish Kasiviswanathan  */
kfd_create_vcrat_image_gpu(void * pcrat_image,size_t * size,struct kfd_node * kdev,uint32_t proximity_domain)22323a87177eSHarish Kasiviswanathan static int kfd_create_vcrat_image_gpu(void *pcrat_image,
22338dc1db31SMukul Joshi 				      size_t *size, struct kfd_node *kdev,
22343a87177eSHarish Kasiviswanathan 				      uint32_t proximity_domain)
22353a87177eSHarish Kasiviswanathan {
22363a87177eSHarish Kasiviswanathan 	struct crat_header *crat_table = (struct crat_header *)pcrat_image;
22370021d70aSAlex Deucher 	struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
22380021d70aSAlex Deucher 	struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
22393a87177eSHarish Kasiviswanathan 	struct crat_subtype_generic *sub_type_hdr;
2240ae9a25aeSShaoyun Liu 	struct kfd_local_mem_info local_mem_info;
2241ae9a25aeSShaoyun Liu 	struct kfd_topology_device *peer_dev;
22423a87177eSHarish Kasiviswanathan 	struct crat_subtype_computeunit *cu;
22433a87177eSHarish Kasiviswanathan 	int avail_size = *size;
22443a87177eSHarish Kasiviswanathan 	uint32_t total_num_of_cu;
2245ae9a25aeSShaoyun Liu 	uint32_t nid = 0;
22463a87177eSHarish Kasiviswanathan 	int ret = 0;
22473a87177eSHarish Kasiviswanathan 
22483a87177eSHarish Kasiviswanathan 	if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
22493a87177eSHarish Kasiviswanathan 		return -EINVAL;
22503a87177eSHarish Kasiviswanathan 
22513a87177eSHarish Kasiviswanathan 	/* Fill the CRAT Header.
22523a87177eSHarish Kasiviswanathan 	 * Modify length and total_entries as subunits are added.
22533a87177eSHarish Kasiviswanathan 	 */
22543a87177eSHarish Kasiviswanathan 	avail_size -= sizeof(struct crat_header);
22553a87177eSHarish Kasiviswanathan 	memset(crat_table, 0, sizeof(struct crat_header));
22563a87177eSHarish Kasiviswanathan 
22573a87177eSHarish Kasiviswanathan 	memcpy(&crat_table->signature, CRAT_SIGNATURE,
22583a87177eSHarish Kasiviswanathan 			sizeof(crat_table->signature));
22593a87177eSHarish Kasiviswanathan 	/* Change length as we add more subtypes*/
22603a87177eSHarish Kasiviswanathan 	crat_table->length = sizeof(struct crat_header);
22613a87177eSHarish Kasiviswanathan 	crat_table->num_domains = 1;
22623a87177eSHarish Kasiviswanathan 	crat_table->total_entries = 0;
22633a87177eSHarish Kasiviswanathan 
22643a87177eSHarish Kasiviswanathan 	/* Fill in Subtype: Compute Unit
22653a87177eSHarish Kasiviswanathan 	 * First fill in the sub type header and then sub type data
22663a87177eSHarish Kasiviswanathan 	 */
22673a87177eSHarish Kasiviswanathan 	avail_size -= sizeof(struct crat_subtype_computeunit);
22683a87177eSHarish Kasiviswanathan 	sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
22693a87177eSHarish Kasiviswanathan 	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
22703a87177eSHarish Kasiviswanathan 
22713a87177eSHarish Kasiviswanathan 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
22723a87177eSHarish Kasiviswanathan 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
22733a87177eSHarish Kasiviswanathan 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
22743a87177eSHarish Kasiviswanathan 
22753a87177eSHarish Kasiviswanathan 	/* Fill CU subtype data */
22763a87177eSHarish Kasiviswanathan 	cu = (struct crat_subtype_computeunit *)sub_type_hdr;
22773a87177eSHarish Kasiviswanathan 	cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
22783a87177eSHarish Kasiviswanathan 	cu->proximity_domain = proximity_domain;
22793a87177eSHarish Kasiviswanathan 
22800021d70aSAlex Deucher 	cu->num_simd_per_cu = cu_info->simd_per_cu;
22810021d70aSAlex Deucher 	cu->num_simd_cores = cu_info->simd_per_cu *
22820021d70aSAlex Deucher 			(cu_info->number / kdev->kfd->num_nodes);
22830021d70aSAlex Deucher 	cu->max_waves_simd = cu_info->max_waves_per_simd;
22843a87177eSHarish Kasiviswanathan 
22850021d70aSAlex Deucher 	cu->wave_front_size = cu_info->wave_front_size;
22860021d70aSAlex Deucher 	cu->array_count = gfx_info->max_sh_per_se *
22870021d70aSAlex Deucher 		gfx_info->max_shader_engines;
22880021d70aSAlex Deucher 	total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh);
22893a87177eSHarish Kasiviswanathan 	cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
22900021d70aSAlex Deucher 	cu->num_cu_per_array = gfx_info->max_cu_per_sh;
22910021d70aSAlex Deucher 	cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu;
22920021d70aSAlex Deucher 	cu->num_banks = gfx_info->max_shader_engines;
22930021d70aSAlex Deucher 	cu->lds_size_in_kb = cu_info->lds_size;
22943a87177eSHarish Kasiviswanathan 
22953a87177eSHarish Kasiviswanathan 	cu->hsa_capability = 0;
22963a87177eSHarish Kasiviswanathan 
22973a87177eSHarish Kasiviswanathan 	crat_table->length += sub_type_hdr->length;
22983a87177eSHarish Kasiviswanathan 	crat_table->total_entries++;
22993a87177eSHarish Kasiviswanathan 
23003a87177eSHarish Kasiviswanathan 	/* Fill in Subtype: Memory. Only on systems with large BAR (no
23013a87177eSHarish Kasiviswanathan 	 * private FB), report memory as public. On other systems
23023a87177eSHarish Kasiviswanathan 	 * report the total FB size (public+private) as a single
23033a87177eSHarish Kasiviswanathan 	 * private heap.
23043a87177eSHarish Kasiviswanathan 	 */
2305315e29ecSMukul Joshi 	local_mem_info = kdev->local_mem_info;
23063a87177eSHarish Kasiviswanathan 	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
23073a87177eSHarish Kasiviswanathan 			sub_type_hdr->length);
23083a87177eSHarish Kasiviswanathan 
2309887db1e4SAndré Almeida 	if (kdev->adev->debug_largebar)
2310374200b1SFelix Kuehling 		local_mem_info.local_mem_size_private = 0;
2311374200b1SFelix Kuehling 
23123a87177eSHarish Kasiviswanathan 	if (local_mem_info.local_mem_size_private == 0)
23133a87177eSHarish Kasiviswanathan 		ret = kfd_fill_gpu_memory_affinity(&avail_size,
23143a87177eSHarish Kasiviswanathan 				kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
23153a87177eSHarish Kasiviswanathan 				local_mem_info.local_mem_size_public,
23163a87177eSHarish Kasiviswanathan 				(struct crat_subtype_memory *)sub_type_hdr,
23173a87177eSHarish Kasiviswanathan 				proximity_domain,
23183a87177eSHarish Kasiviswanathan 				&local_mem_info);
23193a87177eSHarish Kasiviswanathan 	else
23203a87177eSHarish Kasiviswanathan 		ret = kfd_fill_gpu_memory_affinity(&avail_size,
23213a87177eSHarish Kasiviswanathan 				kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE,
23223a87177eSHarish Kasiviswanathan 				local_mem_info.local_mem_size_public +
23233a87177eSHarish Kasiviswanathan 				local_mem_info.local_mem_size_private,
23243a87177eSHarish Kasiviswanathan 				(struct crat_subtype_memory *)sub_type_hdr,
23253a87177eSHarish Kasiviswanathan 				proximity_domain,
23263a87177eSHarish Kasiviswanathan 				&local_mem_info);
23273a87177eSHarish Kasiviswanathan 	if (ret < 0)
23283a87177eSHarish Kasiviswanathan 		return ret;
23293a87177eSHarish Kasiviswanathan 
23303a87177eSHarish Kasiviswanathan 	crat_table->length += sizeof(struct crat_subtype_memory);
23313a87177eSHarish Kasiviswanathan 	crat_table->total_entries++;
23323a87177eSHarish Kasiviswanathan 
23333a87177eSHarish Kasiviswanathan 	/* Fill in Subtype: IO_LINKS
23343a87177eSHarish Kasiviswanathan 	 *  Only direct links are added here which is Link from GPU to
23357a3f8b7cSwangjianli 	 *  its NUMA node. Indirect links are added by userspace.
23363a87177eSHarish Kasiviswanathan 	 */
23373a87177eSHarish Kasiviswanathan 	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2338c0cc999fSMa Jun 		sub_type_hdr->length);
2339ae9a25aeSShaoyun Liu 	ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
23403a87177eSHarish Kasiviswanathan 		(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
23413a87177eSHarish Kasiviswanathan 
23423a87177eSHarish Kasiviswanathan 	if (ret < 0)
23433a87177eSHarish Kasiviswanathan 		return ret;
23443a87177eSHarish Kasiviswanathan 
23453a87177eSHarish Kasiviswanathan 	crat_table->length += sub_type_hdr->length;
23463a87177eSHarish Kasiviswanathan 	crat_table->total_entries++;
23473a87177eSHarish Kasiviswanathan 
2348ae9a25aeSShaoyun Liu 
2349ae9a25aeSShaoyun Liu 	/* Fill in Subtype: IO_LINKS
2350ae9a25aeSShaoyun Liu 	 * Direct links from GPU to other GPUs through xGMI.
2351ae9a25aeSShaoyun Liu 	 * We will loop GPUs that already be processed (with lower value
2352ae9a25aeSShaoyun Liu 	 * of proximity_domain), add the link for the GPUs with same
2353ae9a25aeSShaoyun Liu 	 * hive id (from this GPU to other GPU) . The reversed iolink
2354ae9a25aeSShaoyun Liu 	 * (from other GPU to this GPU) will be added
2355ae9a25aeSShaoyun Liu 	 * in kfd_parse_subtype_iolink.
2356ae9a25aeSShaoyun Liu 	 */
23578dc1db31SMukul Joshi 	if (kdev->kfd->hive_id) {
2358ae9a25aeSShaoyun Liu 		for (nid = 0; nid < proximity_domain; ++nid) {
235946d18d51SMukul Joshi 			peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
2360ae9a25aeSShaoyun Liu 			if (!peer_dev->gpu)
2361ae9a25aeSShaoyun Liu 				continue;
23628dc1db31SMukul Joshi 			if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
2363ae9a25aeSShaoyun Liu 				continue;
2364e46738a5SJonathan Kim 			if (!amdgpu_xgmi_get_is_sharing_enabled(kdev->adev, peer_dev->gpu->adev))
2365e46738a5SJonathan Kim 				continue;
2366ae9a25aeSShaoyun Liu 			sub_type_hdr = (typeof(sub_type_hdr))(
2367ae9a25aeSShaoyun Liu 				(char *)sub_type_hdr +
2368ae9a25aeSShaoyun Liu 				sizeof(struct crat_subtype_iolink));
2369ae9a25aeSShaoyun Liu 			ret = kfd_fill_gpu_xgmi_link_to_gpu(
23700fb0df03Sshaoyunl 				&avail_size, kdev, peer_dev->gpu,
2371ae9a25aeSShaoyun Liu 				(struct crat_subtype_iolink *)sub_type_hdr,
2372ae9a25aeSShaoyun Liu 				proximity_domain, nid);
2373ae9a25aeSShaoyun Liu 			if (ret < 0)
2374ae9a25aeSShaoyun Liu 				return ret;
2375ae9a25aeSShaoyun Liu 			crat_table->length += sub_type_hdr->length;
2376ae9a25aeSShaoyun Liu 			crat_table->total_entries++;
2377ae9a25aeSShaoyun Liu 		}
2378ae9a25aeSShaoyun Liu 	}
23793a87177eSHarish Kasiviswanathan 	*size = crat_table->length;
23803a87177eSHarish Kasiviswanathan 	pr_info("Virtual CRAT table created for GPU\n");
23813a87177eSHarish Kasiviswanathan 
23823a87177eSHarish Kasiviswanathan 	return ret;
23833a87177eSHarish Kasiviswanathan }
23843a87177eSHarish Kasiviswanathan 
2385520b8fb7SFelix Kuehling /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2386520b8fb7SFelix Kuehling  *		creates a Virtual CRAT (VCRAT) image
2387520b8fb7SFelix Kuehling  *
2388520b8fb7SFelix Kuehling  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
2389520b8fb7SFelix Kuehling  *
2390520b8fb7SFelix Kuehling  *	@crat_image: VCRAT image created because ACPI does not have a
2391520b8fb7SFelix Kuehling  *		     CRAT for this device
2392520b8fb7SFelix Kuehling  *	@size: [OUT] size of virtual crat_image
2393520b8fb7SFelix Kuehling  *	@flags:	COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2394520b8fb7SFelix Kuehling  *		COMPUTE_UNIT_GPU - Create VCRAT for GPU
2395520b8fb7SFelix Kuehling  *		(COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2396520b8fb7SFelix Kuehling  *			-- this option is not currently implemented.
2397520b8fb7SFelix Kuehling  *			The assumption is that all AMD APUs will have CRAT
23988dc1db31SMukul Joshi  *	@kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
2399520b8fb7SFelix Kuehling  *
2400520b8fb7SFelix Kuehling  *	Return 0 if successful else return -ve value
2401520b8fb7SFelix Kuehling  */
kfd_create_crat_image_virtual(void ** crat_image,size_t * size,int flags,struct kfd_node * kdev,uint32_t proximity_domain)2402520b8fb7SFelix Kuehling int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
24038dc1db31SMukul Joshi 				  int flags, struct kfd_node *kdev,
2404520b8fb7SFelix Kuehling 				  uint32_t proximity_domain)
2405520b8fb7SFelix Kuehling {
2406520b8fb7SFelix Kuehling 	void *pcrat_image = NULL;
2407b7b6c385SKent Russell 	int ret = 0, num_nodes;
2408b7b6c385SKent Russell 	size_t dyn_size;
2409520b8fb7SFelix Kuehling 
2410520b8fb7SFelix Kuehling 	if (!crat_image)
2411520b8fb7SFelix Kuehling 		return -EINVAL;
2412520b8fb7SFelix Kuehling 
2413520b8fb7SFelix Kuehling 	*crat_image = NULL;
2414520b8fb7SFelix Kuehling 
2415b7b6c385SKent Russell 	/* Allocate the CPU Virtual CRAT size based on the number of online
2416b7b6c385SKent Russell 	 * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.
2417b7b6c385SKent Russell 	 * This should cover all the current conditions. A check is put not
2418b7b6c385SKent Russell 	 * to overwrite beyond allocated size for GPUs
2419520b8fb7SFelix Kuehling 	 */
2420520b8fb7SFelix Kuehling 	switch (flags) {
2421520b8fb7SFelix Kuehling 	case COMPUTE_UNIT_CPU:
2422b7b6c385SKent Russell 		num_nodes = num_online_nodes();
2423b7b6c385SKent Russell 		dyn_size = sizeof(struct crat_header) +
2424b7b6c385SKent Russell 			num_nodes * (sizeof(struct crat_subtype_computeunit) +
2425b7b6c385SKent Russell 			sizeof(struct crat_subtype_memory) +
2426b7b6c385SKent Russell 			(num_nodes - 1) * sizeof(struct crat_subtype_iolink));
2427d0e63b34SKent Russell 		pcrat_image = kvmalloc(dyn_size, GFP_KERNEL);
2428520b8fb7SFelix Kuehling 		if (!pcrat_image)
2429520b8fb7SFelix Kuehling 			return -ENOMEM;
2430b7b6c385SKent Russell 		*size = dyn_size;
2431b7b6c385SKent Russell 		pr_debug("CRAT size is %ld", dyn_size);
2432520b8fb7SFelix Kuehling 		ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
2433520b8fb7SFelix Kuehling 		break;
2434520b8fb7SFelix Kuehling 	case COMPUTE_UNIT_GPU:
24353a87177eSHarish Kasiviswanathan 		if (!kdev)
24363a87177eSHarish Kasiviswanathan 			return -EINVAL;
2437d0e63b34SKent Russell 		pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
24383a87177eSHarish Kasiviswanathan 		if (!pcrat_image)
24393a87177eSHarish Kasiviswanathan 			return -ENOMEM;
24403a87177eSHarish Kasiviswanathan 		*size = VCRAT_SIZE_FOR_GPU;
24413a87177eSHarish Kasiviswanathan 		ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev,
24423a87177eSHarish Kasiviswanathan 						 proximity_domain);
2443520b8fb7SFelix Kuehling 		break;
2444520b8fb7SFelix Kuehling 	case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
2445520b8fb7SFelix Kuehling 		/* TODO: */
2446520b8fb7SFelix Kuehling 		ret = -EINVAL;
2447520b8fb7SFelix Kuehling 		pr_err("VCRAT not implemented for APU\n");
2448520b8fb7SFelix Kuehling 		break;
2449520b8fb7SFelix Kuehling 	default:
2450520b8fb7SFelix Kuehling 		ret = -EINVAL;
2451520b8fb7SFelix Kuehling 	}
2452520b8fb7SFelix Kuehling 
2453520b8fb7SFelix Kuehling 	if (!ret)
2454520b8fb7SFelix Kuehling 		*crat_image = pcrat_image;
2455520b8fb7SFelix Kuehling 	else
2456d0e63b34SKent Russell 		kvfree(pcrat_image);
2457520b8fb7SFelix Kuehling 
2458520b8fb7SFelix Kuehling 	return ret;
2459520b8fb7SFelix Kuehling }
2460520b8fb7SFelix Kuehling 
2461520b8fb7SFelix Kuehling 
2462520b8fb7SFelix Kuehling /* kfd_destroy_crat_image
24638e05247dSHarish Kasiviswanathan  *
24648e05247dSHarish Kasiviswanathan  *	@crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
24658e05247dSHarish Kasiviswanathan  *
24668e05247dSHarish Kasiviswanathan  */
kfd_destroy_crat_image(void * crat_image)24678e05247dSHarish Kasiviswanathan void kfd_destroy_crat_image(void *crat_image)
24688e05247dSHarish Kasiviswanathan {
2469185b0d5aSKent Russell 	kvfree(crat_image);
24708e05247dSHarish Kasiviswanathan }
2471