1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Resource Director Technology(RDT)
4  * - Cache Allocation code.
5  *
6  * Copyright (C) 2016 Intel Corporation
7  *
8  * Authors:
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  *    Tony Luck <tony.luck@intel.com>
11  *    Vikas Shivappa <vikas.shivappa@intel.com>
12  *
13  * More information about RDT be found in the Intel (R) x86 Architecture
14  * Software Developer Manual June 2016, volume 3, section 17.17.
15  */
16 
17 #define pr_fmt(fmt)	"resctrl: " fmt
18 
19 #include <linux/cpu.h>
20 #include <linux/slab.h>
21 #include <linux/err.h>
22 #include <linux/cpuhotplug.h>
23 
24 #include <asm/cpu_device_id.h>
25 #include <asm/resctrl.h>
26 #include "internal.h"
27 
28 /*
29  * rdt_domain structures are kfree()d when their last CPU goes offline,
30  * and allocated when the first CPU in a new domain comes online.
31  * The rdt_resource's domain list is updated when this happens. Readers of
32  * the domain list must either take cpus_read_lock(), or rely on an RCU
33  * read-side critical section, to avoid observing concurrent modification.
34  * All writers take this mutex:
35  */
36 static DEFINE_MUTEX(domain_list_lock);
37 
38 /*
39  * The cached resctrl_pqr_state is strictly per CPU and can never be
40  * updated from a remote CPU. Functions which modify the state
41  * are called with interrupts disabled and no preemption, which
42  * is sufficient for the protection.
43  */
44 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
45 
46 /*
47  * Global boolean for rdt_alloc which is true if any
48  * resource allocation is enabled.
49  */
50 bool rdt_alloc_capable;
51 
52 static void mba_wrmsr_intel(struct msr_param *m);
53 static void cat_wrmsr(struct msr_param *m);
54 static void mba_wrmsr_amd(struct msr_param *m);
55 
56 #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
57 #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)
58 
59 struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
60 	[RDT_RESOURCE_L3] =
61 	{
62 		.r_resctrl = {
63 			.rid			= RDT_RESOURCE_L3,
64 			.name			= "L3",
65 			.ctrl_scope		= RESCTRL_L3_CACHE,
66 			.mon_scope		= RESCTRL_L3_CACHE,
67 			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_L3),
68 			.mon_domains		= mon_domain_init(RDT_RESOURCE_L3),
69 			.schema_fmt		= RESCTRL_SCHEMA_BITMAP,
70 		},
71 		.msr_base		= MSR_IA32_L3_CBM_BASE,
72 		.msr_update		= cat_wrmsr,
73 	},
74 	[RDT_RESOURCE_L2] =
75 	{
76 		.r_resctrl = {
77 			.rid			= RDT_RESOURCE_L2,
78 			.name			= "L2",
79 			.ctrl_scope		= RESCTRL_L2_CACHE,
80 			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_L2),
81 			.schema_fmt		= RESCTRL_SCHEMA_BITMAP,
82 		},
83 		.msr_base		= MSR_IA32_L2_CBM_BASE,
84 		.msr_update		= cat_wrmsr,
85 	},
86 	[RDT_RESOURCE_MBA] =
87 	{
88 		.r_resctrl = {
89 			.rid			= RDT_RESOURCE_MBA,
90 			.name			= "MB",
91 			.ctrl_scope		= RESCTRL_L3_CACHE,
92 			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_MBA),
93 			.schema_fmt		= RESCTRL_SCHEMA_RANGE,
94 		},
95 	},
96 	[RDT_RESOURCE_SMBA] =
97 	{
98 		.r_resctrl = {
99 			.rid			= RDT_RESOURCE_SMBA,
100 			.name			= "SMBA",
101 			.ctrl_scope		= RESCTRL_L3_CACHE,
102 			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_SMBA),
103 			.schema_fmt		= RESCTRL_SCHEMA_RANGE,
104 		},
105 	},
106 };
107 
resctrl_arch_system_num_rmid_idx(void)108 u32 resctrl_arch_system_num_rmid_idx(void)
109 {
110 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
111 
112 	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
113 	return r->num_rmid;
114 }
115 
resctrl_arch_get_resource(enum resctrl_res_level l)116 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
117 {
118 	if (l >= RDT_NUM_RESOURCES)
119 		return NULL;
120 
121 	return &rdt_resources_all[l].r_resctrl;
122 }
123 
124 /*
125  * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
126  * as they do not have CPUID enumeration support for Cache allocation.
127  * The check for Vendor/Family/Model is not enough to guarantee that
128  * the MSRs won't #GP fault because only the following SKUs support
129  * CAT:
130  *	Intel(R) Xeon(R)  CPU E5-2658  v3  @  2.20GHz
131  *	Intel(R) Xeon(R)  CPU E5-2648L v3  @  1.80GHz
132  *	Intel(R) Xeon(R)  CPU E5-2628L v3  @  2.00GHz
133  *	Intel(R) Xeon(R)  CPU E5-2618L v3  @  2.30GHz
134  *	Intel(R) Xeon(R)  CPU E5-2608L v3  @  2.00GHz
135  *	Intel(R) Xeon(R)  CPU E5-2658A v3  @  2.20GHz
136  *
137  * Probe by trying to write the first of the L3 cache mask registers
138  * and checking that the bits stick. Max CLOSids is always 4 and max cbm length
139  * is always 20 on hsw server parts. The minimum cache bitmask length
140  * allowed for HSW server is always 2 bits. Hardcode all of them.
141  */
cache_alloc_hsw_probe(void)142 static inline void cache_alloc_hsw_probe(void)
143 {
144 	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
145 	struct rdt_resource *r  = &hw_res->r_resctrl;
146 	u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0;
147 
148 	if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE, max_cbm))
149 		return;
150 
151 	rdmsrl(MSR_IA32_L3_CBM_BASE, l3_cbm_0);
152 
153 	/* If all the bits were set in MSR, return success */
154 	if (l3_cbm_0 != max_cbm)
155 		return;
156 
157 	hw_res->num_closid = 4;
158 	r->cache.cbm_len = 20;
159 	r->cache.shareable_bits = 0xc0000;
160 	r->cache.min_cbm_bits = 2;
161 	r->cache.arch_has_sparse_bitmasks = false;
162 	r->alloc_capable = true;
163 
164 	rdt_alloc_capable = true;
165 }
166 
is_mba_sc(struct rdt_resource * r)167 bool is_mba_sc(struct rdt_resource *r)
168 {
169 	if (!r)
170 		r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
171 
172 	/*
173 	 * The software controller support is only applicable to MBA resource.
174 	 * Make sure to check for resource type.
175 	 */
176 	if (r->rid != RDT_RESOURCE_MBA)
177 		return false;
178 
179 	return r->membw.mba_sc;
180 }
181 
182 /*
183  * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
184  * exposed to user interface and the h/w understandable delay values.
185  *
186  * The non-linear delay values have the granularity of power of two
187  * and also the h/w does not guarantee a curve for configured delay
188  * values vs. actual b/w enforced.
189  * Hence we need a mapping that is pre calibrated so the user can
190  * express the memory b/w as a percentage value.
191  */
rdt_get_mb_table(struct rdt_resource * r)192 static inline bool rdt_get_mb_table(struct rdt_resource *r)
193 {
194 	/*
195 	 * There are no Intel SKUs as of now to support non-linear delay.
196 	 */
197 	pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
198 		boot_cpu_data.x86, boot_cpu_data.x86_model);
199 
200 	return false;
201 }
202 
__get_mem_config_intel(struct rdt_resource * r)203 static __init bool __get_mem_config_intel(struct rdt_resource *r)
204 {
205 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
206 	union cpuid_0x10_3_eax eax;
207 	union cpuid_0x10_x_edx edx;
208 	u32 ebx, ecx, max_delay;
209 
210 	cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
211 	hw_res->num_closid = edx.split.cos_max + 1;
212 	max_delay = eax.split.max_delay + 1;
213 	r->membw.max_bw = MAX_MBA_BW;
214 	r->membw.arch_needs_linear = true;
215 	if (ecx & MBA_IS_LINEAR) {
216 		r->membw.delay_linear = true;
217 		r->membw.min_bw = MAX_MBA_BW - max_delay;
218 		r->membw.bw_gran = MAX_MBA_BW - max_delay;
219 	} else {
220 		if (!rdt_get_mb_table(r))
221 			return false;
222 		r->membw.arch_needs_linear = false;
223 	}
224 
225 	if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
226 		r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
227 	else
228 		r->membw.throttle_mode = THREAD_THROTTLE_MAX;
229 
230 	r->alloc_capable = true;
231 
232 	return true;
233 }
234 
__rdt_get_mem_config_amd(struct rdt_resource * r)235 static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
236 {
237 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
238 	u32 eax, ebx, ecx, edx, subleaf;
239 
240 	/*
241 	 * Query CPUID_Fn80000020_EDX_x01 for MBA and
242 	 * CPUID_Fn80000020_EDX_x02 for SMBA
243 	 */
244 	subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 :  1;
245 
246 	cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
247 	hw_res->num_closid = edx + 1;
248 	r->membw.max_bw = 1 << eax;
249 
250 	/* AMD does not use delay */
251 	r->membw.delay_linear = false;
252 	r->membw.arch_needs_linear = false;
253 
254 	/*
255 	 * AMD does not use memory delay throttle model to control
256 	 * the allocation like Intel does.
257 	 */
258 	r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
259 	r->membw.min_bw = 0;
260 	r->membw.bw_gran = 1;
261 
262 	r->alloc_capable = true;
263 
264 	return true;
265 }
266 
rdt_get_cache_alloc_cfg(int idx,struct rdt_resource * r)267 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
268 {
269 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
270 	union cpuid_0x10_1_eax eax;
271 	union cpuid_0x10_x_ecx ecx;
272 	union cpuid_0x10_x_edx edx;
273 	u32 ebx, default_ctrl;
274 
275 	cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full);
276 	hw_res->num_closid = edx.split.cos_max + 1;
277 	r->cache.cbm_len = eax.split.cbm_len + 1;
278 	default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
279 	r->cache.shareable_bits = ebx & default_ctrl;
280 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
281 		r->cache.arch_has_sparse_bitmasks = ecx.split.noncont;
282 	r->alloc_capable = true;
283 }
284 
rdt_get_cdp_config(int level)285 static void rdt_get_cdp_config(int level)
286 {
287 	/*
288 	 * By default, CDP is disabled. CDP can be enabled by mount parameter
289 	 * "cdp" during resctrl file system mount time.
290 	 */
291 	rdt_resources_all[level].cdp_enabled = false;
292 	rdt_resources_all[level].r_resctrl.cdp_capable = true;
293 }
294 
rdt_get_cdp_l3_config(void)295 static void rdt_get_cdp_l3_config(void)
296 {
297 	rdt_get_cdp_config(RDT_RESOURCE_L3);
298 }
299 
rdt_get_cdp_l2_config(void)300 static void rdt_get_cdp_l2_config(void)
301 {
302 	rdt_get_cdp_config(RDT_RESOURCE_L2);
303 }
304 
mba_wrmsr_amd(struct msr_param * m)305 static void mba_wrmsr_amd(struct msr_param *m)
306 {
307 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
308 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
309 	unsigned int i;
310 
311 	for (i = m->low; i < m->high; i++)
312 		wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
313 }
314 
315 /*
316  * Map the memory b/w percentage value to delay values
317  * that can be written to QOS_MSRs.
318  * There are currently no SKUs which support non linear delay values.
319  */
delay_bw_map(unsigned long bw,struct rdt_resource * r)320 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
321 {
322 	if (r->membw.delay_linear)
323 		return MAX_MBA_BW - bw;
324 
325 	pr_warn_once("Non Linear delay-bw map not supported but queried\n");
326 	return MAX_MBA_BW;
327 }
328 
mba_wrmsr_intel(struct msr_param * m)329 static void mba_wrmsr_intel(struct msr_param *m)
330 {
331 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
332 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
333 	unsigned int i;
334 
335 	/*  Write the delay values for mba. */
336 	for (i = m->low; i < m->high; i++)
337 		wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res));
338 }
339 
cat_wrmsr(struct msr_param * m)340 static void cat_wrmsr(struct msr_param *m)
341 {
342 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
343 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
344 	unsigned int i;
345 
346 	for (i = m->low; i < m->high; i++)
347 		wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
348 }
349 
resctrl_arch_get_num_closid(struct rdt_resource * r)350 u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
351 {
352 	return resctrl_to_arch_res(r)->num_closid;
353 }
354 
rdt_ctrl_update(void * arg)355 void rdt_ctrl_update(void *arg)
356 {
357 	struct rdt_hw_resource *hw_res;
358 	struct msr_param *m = arg;
359 
360 	hw_res = resctrl_to_arch_res(m->res);
361 	hw_res->msr_update(m);
362 }
363 
setup_default_ctrlval(struct rdt_resource * r,u32 * dc)364 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
365 {
366 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
367 	int i;
368 
369 	/*
370 	 * Initialize the Control MSRs to having no control.
371 	 * For Cache Allocation: Set all bits in cbm
372 	 * For Memory Allocation: Set b/w requested to 100%
373 	 */
374 	for (i = 0; i < hw_res->num_closid; i++, dc++)
375 		*dc = resctrl_get_default_ctrl(r);
376 }
377 
ctrl_domain_free(struct rdt_hw_ctrl_domain * hw_dom)378 static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
379 {
380 	kfree(hw_dom->ctrl_val);
381 	kfree(hw_dom);
382 }
383 
mon_domain_free(struct rdt_hw_mon_domain * hw_dom)384 static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
385 {
386 	kfree(hw_dom->arch_mbm_total);
387 	kfree(hw_dom->arch_mbm_local);
388 	kfree(hw_dom);
389 }
390 
domain_setup_ctrlval(struct rdt_resource * r,struct rdt_ctrl_domain * d)391 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
392 {
393 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
394 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
395 	struct msr_param m;
396 	u32 *dc;
397 
398 	dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val),
399 			   GFP_KERNEL);
400 	if (!dc)
401 		return -ENOMEM;
402 
403 	hw_dom->ctrl_val = dc;
404 	setup_default_ctrlval(r, dc);
405 
406 	m.res = r;
407 	m.dom = d;
408 	m.low = 0;
409 	m.high = hw_res->num_closid;
410 	hw_res->msr_update(&m);
411 	return 0;
412 }
413 
414 /**
415  * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters
416  * @num_rmid:	The size of the MBM counter array
417  * @hw_dom:	The domain that owns the allocated arrays
418  */
arch_domain_mbm_alloc(u32 num_rmid,struct rdt_hw_mon_domain * hw_dom)419 static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
420 {
421 	size_t tsize;
422 
423 	if (resctrl_arch_is_mbm_total_enabled()) {
424 		tsize = sizeof(*hw_dom->arch_mbm_total);
425 		hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
426 		if (!hw_dom->arch_mbm_total)
427 			return -ENOMEM;
428 	}
429 	if (resctrl_arch_is_mbm_local_enabled()) {
430 		tsize = sizeof(*hw_dom->arch_mbm_local);
431 		hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
432 		if (!hw_dom->arch_mbm_local) {
433 			kfree(hw_dom->arch_mbm_total);
434 			hw_dom->arch_mbm_total = NULL;
435 			return -ENOMEM;
436 		}
437 	}
438 
439 	return 0;
440 }
441 
get_domain_id_from_scope(int cpu,enum resctrl_scope scope)442 static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
443 {
444 	switch (scope) {
445 	case RESCTRL_L2_CACHE:
446 	case RESCTRL_L3_CACHE:
447 		return get_cpu_cacheinfo_id(cpu, scope);
448 	case RESCTRL_L3_NODE:
449 		return cpu_to_node(cpu);
450 	default:
451 		break;
452 	}
453 
454 	return -EINVAL;
455 }
456 
domain_add_cpu_ctrl(int cpu,struct rdt_resource * r)457 static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
458 {
459 	int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
460 	struct rdt_hw_ctrl_domain *hw_dom;
461 	struct list_head *add_pos = NULL;
462 	struct rdt_domain_hdr *hdr;
463 	struct rdt_ctrl_domain *d;
464 	int err;
465 
466 	lockdep_assert_held(&domain_list_lock);
467 
468 	if (id < 0) {
469 		pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
470 			     cpu, r->ctrl_scope, r->name);
471 		return;
472 	}
473 
474 	hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos);
475 	if (hdr) {
476 		if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
477 			return;
478 		d = container_of(hdr, struct rdt_ctrl_domain, hdr);
479 
480 		cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
481 		if (r->cache.arch_has_per_cpu_cfg)
482 			rdt_domain_reconfigure_cdp(r);
483 		return;
484 	}
485 
486 	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
487 	if (!hw_dom)
488 		return;
489 
490 	d = &hw_dom->d_resctrl;
491 	d->hdr.id = id;
492 	d->hdr.type = RESCTRL_CTRL_DOMAIN;
493 	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
494 
495 	rdt_domain_reconfigure_cdp(r);
496 
497 	if (domain_setup_ctrlval(r, d)) {
498 		ctrl_domain_free(hw_dom);
499 		return;
500 	}
501 
502 	list_add_tail_rcu(&d->hdr.list, add_pos);
503 
504 	err = resctrl_online_ctrl_domain(r, d);
505 	if (err) {
506 		list_del_rcu(&d->hdr.list);
507 		synchronize_rcu();
508 		ctrl_domain_free(hw_dom);
509 	}
510 }
511 
domain_add_cpu_mon(int cpu,struct rdt_resource * r)512 static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
513 {
514 	int id = get_domain_id_from_scope(cpu, r->mon_scope);
515 	struct list_head *add_pos = NULL;
516 	struct rdt_hw_mon_domain *hw_dom;
517 	struct rdt_domain_hdr *hdr;
518 	struct rdt_mon_domain *d;
519 	int err;
520 
521 	lockdep_assert_held(&domain_list_lock);
522 
523 	if (id < 0) {
524 		pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
525 			     cpu, r->mon_scope, r->name);
526 		return;
527 	}
528 
529 	hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
530 	if (hdr) {
531 		if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
532 			return;
533 		d = container_of(hdr, struct rdt_mon_domain, hdr);
534 
535 		cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
536 		return;
537 	}
538 
539 	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
540 	if (!hw_dom)
541 		return;
542 
543 	d = &hw_dom->d_resctrl;
544 	d->hdr.id = id;
545 	d->hdr.type = RESCTRL_MON_DOMAIN;
546 	d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
547 	if (!d->ci) {
548 		pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
549 		mon_domain_free(hw_dom);
550 		return;
551 	}
552 	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
553 
554 	arch_mon_domain_online(r, d);
555 
556 	if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
557 		mon_domain_free(hw_dom);
558 		return;
559 	}
560 
561 	list_add_tail_rcu(&d->hdr.list, add_pos);
562 
563 	err = resctrl_online_mon_domain(r, d);
564 	if (err) {
565 		list_del_rcu(&d->hdr.list);
566 		synchronize_rcu();
567 		mon_domain_free(hw_dom);
568 	}
569 }
570 
domain_add_cpu(int cpu,struct rdt_resource * r)571 static void domain_add_cpu(int cpu, struct rdt_resource *r)
572 {
573 	if (r->alloc_capable)
574 		domain_add_cpu_ctrl(cpu, r);
575 	if (r->mon_capable)
576 		domain_add_cpu_mon(cpu, r);
577 }
578 
domain_remove_cpu_ctrl(int cpu,struct rdt_resource * r)579 static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
580 {
581 	int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
582 	struct rdt_hw_ctrl_domain *hw_dom;
583 	struct rdt_domain_hdr *hdr;
584 	struct rdt_ctrl_domain *d;
585 
586 	lockdep_assert_held(&domain_list_lock);
587 
588 	if (id < 0) {
589 		pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
590 			     cpu, r->ctrl_scope, r->name);
591 		return;
592 	}
593 
594 	hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL);
595 	if (!hdr) {
596 		pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
597 			id, cpu, r->name);
598 		return;
599 	}
600 
601 	if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
602 		return;
603 
604 	d = container_of(hdr, struct rdt_ctrl_domain, hdr);
605 	hw_dom = resctrl_to_arch_ctrl_dom(d);
606 
607 	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
608 	if (cpumask_empty(&d->hdr.cpu_mask)) {
609 		resctrl_offline_ctrl_domain(r, d);
610 		list_del_rcu(&d->hdr.list);
611 		synchronize_rcu();
612 
613 		/*
614 		 * rdt_ctrl_domain "d" is going to be freed below, so clear
615 		 * its pointer from pseudo_lock_region struct.
616 		 */
617 		if (d->plr)
618 			d->plr->d = NULL;
619 		ctrl_domain_free(hw_dom);
620 
621 		return;
622 	}
623 }
624 
domain_remove_cpu_mon(int cpu,struct rdt_resource * r)625 static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
626 {
627 	int id = get_domain_id_from_scope(cpu, r->mon_scope);
628 	struct rdt_hw_mon_domain *hw_dom;
629 	struct rdt_domain_hdr *hdr;
630 	struct rdt_mon_domain *d;
631 
632 	lockdep_assert_held(&domain_list_lock);
633 
634 	if (id < 0) {
635 		pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
636 			     cpu, r->mon_scope, r->name);
637 		return;
638 	}
639 
640 	hdr = resctrl_find_domain(&r->mon_domains, id, NULL);
641 	if (!hdr) {
642 		pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
643 			id, cpu, r->name);
644 		return;
645 	}
646 
647 	if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
648 		return;
649 
650 	d = container_of(hdr, struct rdt_mon_domain, hdr);
651 	hw_dom = resctrl_to_arch_mon_dom(d);
652 
653 	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
654 	if (cpumask_empty(&d->hdr.cpu_mask)) {
655 		resctrl_offline_mon_domain(r, d);
656 		list_del_rcu(&d->hdr.list);
657 		synchronize_rcu();
658 		mon_domain_free(hw_dom);
659 
660 		return;
661 	}
662 }
663 
domain_remove_cpu(int cpu,struct rdt_resource * r)664 static void domain_remove_cpu(int cpu, struct rdt_resource *r)
665 {
666 	if (r->alloc_capable)
667 		domain_remove_cpu_ctrl(cpu, r);
668 	if (r->mon_capable)
669 		domain_remove_cpu_mon(cpu, r);
670 }
671 
clear_closid_rmid(int cpu)672 static void clear_closid_rmid(int cpu)
673 {
674 	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
675 
676 	state->default_closid = RESCTRL_RESERVED_CLOSID;
677 	state->default_rmid = RESCTRL_RESERVED_RMID;
678 	state->cur_closid = RESCTRL_RESERVED_CLOSID;
679 	state->cur_rmid = RESCTRL_RESERVED_RMID;
680 	wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID,
681 	      RESCTRL_RESERVED_CLOSID);
682 }
683 
resctrl_arch_online_cpu(unsigned int cpu)684 static int resctrl_arch_online_cpu(unsigned int cpu)
685 {
686 	struct rdt_resource *r;
687 
688 	mutex_lock(&domain_list_lock);
689 	for_each_capable_rdt_resource(r)
690 		domain_add_cpu(cpu, r);
691 	mutex_unlock(&domain_list_lock);
692 
693 	clear_closid_rmid(cpu);
694 	resctrl_online_cpu(cpu);
695 
696 	return 0;
697 }
698 
resctrl_arch_offline_cpu(unsigned int cpu)699 static int resctrl_arch_offline_cpu(unsigned int cpu)
700 {
701 	struct rdt_resource *r;
702 
703 	resctrl_offline_cpu(cpu);
704 
705 	mutex_lock(&domain_list_lock);
706 	for_each_capable_rdt_resource(r)
707 		domain_remove_cpu(cpu, r);
708 	mutex_unlock(&domain_list_lock);
709 
710 	clear_closid_rmid(cpu);
711 
712 	return 0;
713 }
714 
715 enum {
716 	RDT_FLAG_CMT,
717 	RDT_FLAG_MBM_TOTAL,
718 	RDT_FLAG_MBM_LOCAL,
719 	RDT_FLAG_L3_CAT,
720 	RDT_FLAG_L3_CDP,
721 	RDT_FLAG_L2_CAT,
722 	RDT_FLAG_L2_CDP,
723 	RDT_FLAG_MBA,
724 	RDT_FLAG_SMBA,
725 	RDT_FLAG_BMEC,
726 };
727 
728 #define RDT_OPT(idx, n, f)	\
729 [idx] = {			\
730 	.name = n,		\
731 	.flag = f		\
732 }
733 
734 struct rdt_options {
735 	char	*name;
736 	int	flag;
737 	bool	force_off, force_on;
738 };
739 
740 static struct rdt_options rdt_options[]  __initdata = {
741 	RDT_OPT(RDT_FLAG_CMT,	    "cmt",	X86_FEATURE_CQM_OCCUP_LLC),
742 	RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
743 	RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
744 	RDT_OPT(RDT_FLAG_L3_CAT,    "l3cat",	X86_FEATURE_CAT_L3),
745 	RDT_OPT(RDT_FLAG_L3_CDP,    "l3cdp",	X86_FEATURE_CDP_L3),
746 	RDT_OPT(RDT_FLAG_L2_CAT,    "l2cat",	X86_FEATURE_CAT_L2),
747 	RDT_OPT(RDT_FLAG_L2_CDP,    "l2cdp",	X86_FEATURE_CDP_L2),
748 	RDT_OPT(RDT_FLAG_MBA,	    "mba",	X86_FEATURE_MBA),
749 	RDT_OPT(RDT_FLAG_SMBA,	    "smba",	X86_FEATURE_SMBA),
750 	RDT_OPT(RDT_FLAG_BMEC,	    "bmec",	X86_FEATURE_BMEC),
751 };
752 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
753 
set_rdt_options(char * str)754 static int __init set_rdt_options(char *str)
755 {
756 	struct rdt_options *o;
757 	bool force_off;
758 	char *tok;
759 
760 	if (*str == '=')
761 		str++;
762 	while ((tok = strsep(&str, ",")) != NULL) {
763 		force_off = *tok == '!';
764 		if (force_off)
765 			tok++;
766 		for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
767 			if (strcmp(tok, o->name) == 0) {
768 				if (force_off)
769 					o->force_off = true;
770 				else
771 					o->force_on = true;
772 				break;
773 			}
774 		}
775 	}
776 	return 1;
777 }
778 __setup("rdt", set_rdt_options);
779 
rdt_cpu_has(int flag)780 bool __init rdt_cpu_has(int flag)
781 {
782 	bool ret = boot_cpu_has(flag);
783 	struct rdt_options *o;
784 
785 	if (!ret)
786 		return ret;
787 
788 	for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
789 		if (flag == o->flag) {
790 			if (o->force_off)
791 				ret = false;
792 			if (o->force_on)
793 				ret = true;
794 			break;
795 		}
796 	}
797 	return ret;
798 }
799 
resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)800 __init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
801 {
802 	if (!rdt_cpu_has(X86_FEATURE_BMEC))
803 		return false;
804 
805 	switch (evt) {
806 	case QOS_L3_MBM_TOTAL_EVENT_ID:
807 		return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL);
808 	case QOS_L3_MBM_LOCAL_EVENT_ID:
809 		return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL);
810 	default:
811 		return false;
812 	}
813 }
814 
get_mem_config(void)815 static __init bool get_mem_config(void)
816 {
817 	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
818 
819 	if (!rdt_cpu_has(X86_FEATURE_MBA))
820 		return false;
821 
822 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
823 		return __get_mem_config_intel(&hw_res->r_resctrl);
824 	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
825 		return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
826 
827 	return false;
828 }
829 
get_slow_mem_config(void)830 static __init bool get_slow_mem_config(void)
831 {
832 	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA];
833 
834 	if (!rdt_cpu_has(X86_FEATURE_SMBA))
835 		return false;
836 
837 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
838 		return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
839 
840 	return false;
841 }
842 
get_rdt_alloc_resources(void)843 static __init bool get_rdt_alloc_resources(void)
844 {
845 	struct rdt_resource *r;
846 	bool ret = false;
847 
848 	if (rdt_alloc_capable)
849 		return true;
850 
851 	if (!boot_cpu_has(X86_FEATURE_RDT_A))
852 		return false;
853 
854 	if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
855 		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
856 		rdt_get_cache_alloc_cfg(1, r);
857 		if (rdt_cpu_has(X86_FEATURE_CDP_L3))
858 			rdt_get_cdp_l3_config();
859 		ret = true;
860 	}
861 	if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
862 		/* CPUID 0x10.2 fields are same format at 0x10.1 */
863 		r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl;
864 		rdt_get_cache_alloc_cfg(2, r);
865 		if (rdt_cpu_has(X86_FEATURE_CDP_L2))
866 			rdt_get_cdp_l2_config();
867 		ret = true;
868 	}
869 
870 	if (get_mem_config())
871 		ret = true;
872 
873 	if (get_slow_mem_config())
874 		ret = true;
875 
876 	return ret;
877 }
878 
get_rdt_mon_resources(void)879 static __init bool get_rdt_mon_resources(void)
880 {
881 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
882 
883 	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
884 		rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
885 	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
886 		rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
887 	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
888 		rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
889 
890 	if (!rdt_mon_features)
891 		return false;
892 
893 	return !rdt_get_mon_l3_config(r);
894 }
895 
__check_quirks_intel(void)896 static __init void __check_quirks_intel(void)
897 {
898 	switch (boot_cpu_data.x86_vfm) {
899 	case INTEL_HASWELL_X:
900 		if (!rdt_options[RDT_FLAG_L3_CAT].force_off)
901 			cache_alloc_hsw_probe();
902 		break;
903 	case INTEL_SKYLAKE_X:
904 		if (boot_cpu_data.x86_stepping <= 4)
905 			set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
906 		else
907 			set_rdt_options("!l3cat");
908 		fallthrough;
909 	case INTEL_BROADWELL_X:
910 		intel_rdt_mbm_apply_quirk();
911 		break;
912 	}
913 }
914 
check_quirks(void)915 static __init void check_quirks(void)
916 {
917 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
918 		__check_quirks_intel();
919 }
920 
get_rdt_resources(void)921 static __init bool get_rdt_resources(void)
922 {
923 	rdt_alloc_capable = get_rdt_alloc_resources();
924 	rdt_mon_capable = get_rdt_mon_resources();
925 
926 	return (rdt_mon_capable || rdt_alloc_capable);
927 }
928 
rdt_init_res_defs_intel(void)929 static __init void rdt_init_res_defs_intel(void)
930 {
931 	struct rdt_hw_resource *hw_res;
932 	struct rdt_resource *r;
933 
934 	for_each_rdt_resource(r) {
935 		hw_res = resctrl_to_arch_res(r);
936 
937 		if (r->rid == RDT_RESOURCE_L3 ||
938 		    r->rid == RDT_RESOURCE_L2) {
939 			r->cache.arch_has_per_cpu_cfg = false;
940 			r->cache.min_cbm_bits = 1;
941 		} else if (r->rid == RDT_RESOURCE_MBA) {
942 			hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
943 			hw_res->msr_update = mba_wrmsr_intel;
944 		}
945 	}
946 }
947 
rdt_init_res_defs_amd(void)948 static __init void rdt_init_res_defs_amd(void)
949 {
950 	struct rdt_hw_resource *hw_res;
951 	struct rdt_resource *r;
952 
953 	for_each_rdt_resource(r) {
954 		hw_res = resctrl_to_arch_res(r);
955 
956 		if (r->rid == RDT_RESOURCE_L3 ||
957 		    r->rid == RDT_RESOURCE_L2) {
958 			r->cache.arch_has_sparse_bitmasks = true;
959 			r->cache.arch_has_per_cpu_cfg = true;
960 			r->cache.min_cbm_bits = 0;
961 		} else if (r->rid == RDT_RESOURCE_MBA) {
962 			hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
963 			hw_res->msr_update = mba_wrmsr_amd;
964 		} else if (r->rid == RDT_RESOURCE_SMBA) {
965 			hw_res->msr_base = MSR_IA32_SMBA_BW_BASE;
966 			hw_res->msr_update = mba_wrmsr_amd;
967 		}
968 	}
969 }
970 
rdt_init_res_defs(void)971 static __init void rdt_init_res_defs(void)
972 {
973 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
974 		rdt_init_res_defs_intel();
975 	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
976 		rdt_init_res_defs_amd();
977 }
978 
979 static enum cpuhp_state rdt_online;
980 
981 /* Runs once on the BSP during boot. */
resctrl_cpu_detect(struct cpuinfo_x86 * c)982 void resctrl_cpu_detect(struct cpuinfo_x86 *c)
983 {
984 	if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
985 		c->x86_cache_max_rmid  = -1;
986 		c->x86_cache_occ_scale = -1;
987 		c->x86_cache_mbm_width_offset = -1;
988 		return;
989 	}
990 
991 	/* will be overridden if occupancy monitoring exists */
992 	c->x86_cache_max_rmid = cpuid_ebx(0xf);
993 
994 	if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
995 	    cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
996 	    cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
997 		u32 eax, ebx, ecx, edx;
998 
999 		/* QoS sub-leaf, EAX=0Fh, ECX=1 */
1000 		cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
1001 
1002 		c->x86_cache_max_rmid  = ecx;
1003 		c->x86_cache_occ_scale = ebx;
1004 		c->x86_cache_mbm_width_offset = eax & 0xff;
1005 
1006 		if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset)
1007 			c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD;
1008 	}
1009 }
1010 
resctrl_arch_late_init(void)1011 static int __init resctrl_arch_late_init(void)
1012 {
1013 	struct rdt_resource *r;
1014 	int state, ret;
1015 
1016 	/*
1017 	 * Initialize functions(or definitions) that are different
1018 	 * between vendors here.
1019 	 */
1020 	rdt_init_res_defs();
1021 
1022 	check_quirks();
1023 
1024 	if (!get_rdt_resources())
1025 		return -ENODEV;
1026 
1027 	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
1028 				  "x86/resctrl/cat:online:",
1029 				  resctrl_arch_online_cpu,
1030 				  resctrl_arch_offline_cpu);
1031 	if (state < 0)
1032 		return state;
1033 
1034 	ret = resctrl_init();
1035 	if (ret) {
1036 		cpuhp_remove_state(state);
1037 		return ret;
1038 	}
1039 	rdt_online = state;
1040 
1041 	for_each_alloc_capable_rdt_resource(r)
1042 		pr_info("%s allocation detected\n", r->name);
1043 
1044 	for_each_mon_capable_rdt_resource(r)
1045 		pr_info("%s monitoring detected\n", r->name);
1046 
1047 	return 0;
1048 }
1049 
1050 late_initcall(resctrl_arch_late_init);
1051 
resctrl_arch_exit(void)1052 static void __exit resctrl_arch_exit(void)
1053 {
1054 	cpuhp_remove_state(rdt_online);
1055 
1056 	resctrl_exit();
1057 }
1058 
1059 __exitcall(resctrl_arch_exit);
1060