1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Resource Director Technology(RDT)
4 * - Cache Allocation code.
5 *
6 * Copyright (C) 2016 Intel Corporation
7 *
8 * Authors:
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Tony Luck <tony.luck@intel.com>
11 * Vikas Shivappa <vikas.shivappa@intel.com>
12 *
13 * More information about RDT be found in the Intel (R) x86 Architecture
14 * Software Developer Manual June 2016, volume 3, section 17.17.
15 */
16
17 #define pr_fmt(fmt) "resctrl: " fmt
18
19 #include <linux/cpu.h>
20 #include <linux/slab.h>
21 #include <linux/err.h>
22 #include <linux/cpuhotplug.h>
23
24 #include <asm/cpu_device_id.h>
25 #include <asm/resctrl.h>
26 #include "internal.h"
27
28 /*
29 * rdt_domain structures are kfree()d when their last CPU goes offline,
30 * and allocated when the first CPU in a new domain comes online.
31 * The rdt_resource's domain list is updated when this happens. Readers of
32 * the domain list must either take cpus_read_lock(), or rely on an RCU
33 * read-side critical section, to avoid observing concurrent modification.
34 * All writers take this mutex:
35 */
36 static DEFINE_MUTEX(domain_list_lock);
37
38 /*
39 * The cached resctrl_pqr_state is strictly per CPU and can never be
40 * updated from a remote CPU. Functions which modify the state
41 * are called with interrupts disabled and no preemption, which
42 * is sufficient for the protection.
43 */
44 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
45
46 /*
47 * Global boolean for rdt_alloc which is true if any
48 * resource allocation is enabled.
49 */
50 bool rdt_alloc_capable;
51
52 static void mba_wrmsr_intel(struct msr_param *m);
53 static void cat_wrmsr(struct msr_param *m);
54 static void mba_wrmsr_amd(struct msr_param *m);
55
56 #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
57 #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)
58
59 struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
60 [RDT_RESOURCE_L3] =
61 {
62 .r_resctrl = {
63 .rid = RDT_RESOURCE_L3,
64 .name = "L3",
65 .ctrl_scope = RESCTRL_L3_CACHE,
66 .mon_scope = RESCTRL_L3_CACHE,
67 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3),
68 .mon_domains = mon_domain_init(RDT_RESOURCE_L3),
69 .schema_fmt = RESCTRL_SCHEMA_BITMAP,
70 },
71 .msr_base = MSR_IA32_L3_CBM_BASE,
72 .msr_update = cat_wrmsr,
73 },
74 [RDT_RESOURCE_L2] =
75 {
76 .r_resctrl = {
77 .rid = RDT_RESOURCE_L2,
78 .name = "L2",
79 .ctrl_scope = RESCTRL_L2_CACHE,
80 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2),
81 .schema_fmt = RESCTRL_SCHEMA_BITMAP,
82 },
83 .msr_base = MSR_IA32_L2_CBM_BASE,
84 .msr_update = cat_wrmsr,
85 },
86 [RDT_RESOURCE_MBA] =
87 {
88 .r_resctrl = {
89 .rid = RDT_RESOURCE_MBA,
90 .name = "MB",
91 .ctrl_scope = RESCTRL_L3_CACHE,
92 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA),
93 .schema_fmt = RESCTRL_SCHEMA_RANGE,
94 },
95 },
96 [RDT_RESOURCE_SMBA] =
97 {
98 .r_resctrl = {
99 .rid = RDT_RESOURCE_SMBA,
100 .name = "SMBA",
101 .ctrl_scope = RESCTRL_L3_CACHE,
102 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA),
103 .schema_fmt = RESCTRL_SCHEMA_RANGE,
104 },
105 },
106 };
107
resctrl_arch_system_num_rmid_idx(void)108 u32 resctrl_arch_system_num_rmid_idx(void)
109 {
110 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
111
112 /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
113 return r->num_rmid;
114 }
115
resctrl_arch_get_resource(enum resctrl_res_level l)116 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
117 {
118 if (l >= RDT_NUM_RESOURCES)
119 return NULL;
120
121 return &rdt_resources_all[l].r_resctrl;
122 }
123
124 /*
125 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
126 * as they do not have CPUID enumeration support for Cache allocation.
127 * The check for Vendor/Family/Model is not enough to guarantee that
128 * the MSRs won't #GP fault because only the following SKUs support
129 * CAT:
130 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz
131 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz
132 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz
133 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz
134 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz
135 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz
136 *
137 * Probe by trying to write the first of the L3 cache mask registers
138 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length
139 * is always 20 on hsw server parts. The minimum cache bitmask length
140 * allowed for HSW server is always 2 bits. Hardcode all of them.
141 */
cache_alloc_hsw_probe(void)142 static inline void cache_alloc_hsw_probe(void)
143 {
144 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
145 struct rdt_resource *r = &hw_res->r_resctrl;
146 u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0;
147
148 if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE, max_cbm))
149 return;
150
151 rdmsrl(MSR_IA32_L3_CBM_BASE, l3_cbm_0);
152
153 /* If all the bits were set in MSR, return success */
154 if (l3_cbm_0 != max_cbm)
155 return;
156
157 hw_res->num_closid = 4;
158 r->cache.cbm_len = 20;
159 r->cache.shareable_bits = 0xc0000;
160 r->cache.min_cbm_bits = 2;
161 r->cache.arch_has_sparse_bitmasks = false;
162 r->alloc_capable = true;
163
164 rdt_alloc_capable = true;
165 }
166
is_mba_sc(struct rdt_resource * r)167 bool is_mba_sc(struct rdt_resource *r)
168 {
169 if (!r)
170 r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
171
172 /*
173 * The software controller support is only applicable to MBA resource.
174 * Make sure to check for resource type.
175 */
176 if (r->rid != RDT_RESOURCE_MBA)
177 return false;
178
179 return r->membw.mba_sc;
180 }
181
182 /*
183 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
184 * exposed to user interface and the h/w understandable delay values.
185 *
186 * The non-linear delay values have the granularity of power of two
187 * and also the h/w does not guarantee a curve for configured delay
188 * values vs. actual b/w enforced.
189 * Hence we need a mapping that is pre calibrated so the user can
190 * express the memory b/w as a percentage value.
191 */
rdt_get_mb_table(struct rdt_resource * r)192 static inline bool rdt_get_mb_table(struct rdt_resource *r)
193 {
194 /*
195 * There are no Intel SKUs as of now to support non-linear delay.
196 */
197 pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
198 boot_cpu_data.x86, boot_cpu_data.x86_model);
199
200 return false;
201 }
202
__get_mem_config_intel(struct rdt_resource * r)203 static __init bool __get_mem_config_intel(struct rdt_resource *r)
204 {
205 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
206 union cpuid_0x10_3_eax eax;
207 union cpuid_0x10_x_edx edx;
208 u32 ebx, ecx, max_delay;
209
210 cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
211 hw_res->num_closid = edx.split.cos_max + 1;
212 max_delay = eax.split.max_delay + 1;
213 r->membw.max_bw = MAX_MBA_BW;
214 r->membw.arch_needs_linear = true;
215 if (ecx & MBA_IS_LINEAR) {
216 r->membw.delay_linear = true;
217 r->membw.min_bw = MAX_MBA_BW - max_delay;
218 r->membw.bw_gran = MAX_MBA_BW - max_delay;
219 } else {
220 if (!rdt_get_mb_table(r))
221 return false;
222 r->membw.arch_needs_linear = false;
223 }
224
225 if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
226 r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
227 else
228 r->membw.throttle_mode = THREAD_THROTTLE_MAX;
229
230 r->alloc_capable = true;
231
232 return true;
233 }
234
__rdt_get_mem_config_amd(struct rdt_resource * r)235 static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
236 {
237 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
238 u32 eax, ebx, ecx, edx, subleaf;
239
240 /*
241 * Query CPUID_Fn80000020_EDX_x01 for MBA and
242 * CPUID_Fn80000020_EDX_x02 for SMBA
243 */
244 subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1;
245
246 cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
247 hw_res->num_closid = edx + 1;
248 r->membw.max_bw = 1 << eax;
249
250 /* AMD does not use delay */
251 r->membw.delay_linear = false;
252 r->membw.arch_needs_linear = false;
253
254 /*
255 * AMD does not use memory delay throttle model to control
256 * the allocation like Intel does.
257 */
258 r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
259 r->membw.min_bw = 0;
260 r->membw.bw_gran = 1;
261
262 r->alloc_capable = true;
263
264 return true;
265 }
266
rdt_get_cache_alloc_cfg(int idx,struct rdt_resource * r)267 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
268 {
269 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
270 union cpuid_0x10_1_eax eax;
271 union cpuid_0x10_x_ecx ecx;
272 union cpuid_0x10_x_edx edx;
273 u32 ebx, default_ctrl;
274
275 cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full);
276 hw_res->num_closid = edx.split.cos_max + 1;
277 r->cache.cbm_len = eax.split.cbm_len + 1;
278 default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
279 r->cache.shareable_bits = ebx & default_ctrl;
280 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
281 r->cache.arch_has_sparse_bitmasks = ecx.split.noncont;
282 r->alloc_capable = true;
283 }
284
rdt_get_cdp_config(int level)285 static void rdt_get_cdp_config(int level)
286 {
287 /*
288 * By default, CDP is disabled. CDP can be enabled by mount parameter
289 * "cdp" during resctrl file system mount time.
290 */
291 rdt_resources_all[level].cdp_enabled = false;
292 rdt_resources_all[level].r_resctrl.cdp_capable = true;
293 }
294
rdt_get_cdp_l3_config(void)295 static void rdt_get_cdp_l3_config(void)
296 {
297 rdt_get_cdp_config(RDT_RESOURCE_L3);
298 }
299
rdt_get_cdp_l2_config(void)300 static void rdt_get_cdp_l2_config(void)
301 {
302 rdt_get_cdp_config(RDT_RESOURCE_L2);
303 }
304
mba_wrmsr_amd(struct msr_param * m)305 static void mba_wrmsr_amd(struct msr_param *m)
306 {
307 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
308 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
309 unsigned int i;
310
311 for (i = m->low; i < m->high; i++)
312 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
313 }
314
315 /*
316 * Map the memory b/w percentage value to delay values
317 * that can be written to QOS_MSRs.
318 * There are currently no SKUs which support non linear delay values.
319 */
delay_bw_map(unsigned long bw,struct rdt_resource * r)320 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
321 {
322 if (r->membw.delay_linear)
323 return MAX_MBA_BW - bw;
324
325 pr_warn_once("Non Linear delay-bw map not supported but queried\n");
326 return MAX_MBA_BW;
327 }
328
mba_wrmsr_intel(struct msr_param * m)329 static void mba_wrmsr_intel(struct msr_param *m)
330 {
331 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
332 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
333 unsigned int i;
334
335 /* Write the delay values for mba. */
336 for (i = m->low; i < m->high; i++)
337 wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res));
338 }
339
cat_wrmsr(struct msr_param * m)340 static void cat_wrmsr(struct msr_param *m)
341 {
342 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
343 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
344 unsigned int i;
345
346 for (i = m->low; i < m->high; i++)
347 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
348 }
349
resctrl_arch_get_num_closid(struct rdt_resource * r)350 u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
351 {
352 return resctrl_to_arch_res(r)->num_closid;
353 }
354
rdt_ctrl_update(void * arg)355 void rdt_ctrl_update(void *arg)
356 {
357 struct rdt_hw_resource *hw_res;
358 struct msr_param *m = arg;
359
360 hw_res = resctrl_to_arch_res(m->res);
361 hw_res->msr_update(m);
362 }
363
setup_default_ctrlval(struct rdt_resource * r,u32 * dc)364 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
365 {
366 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
367 int i;
368
369 /*
370 * Initialize the Control MSRs to having no control.
371 * For Cache Allocation: Set all bits in cbm
372 * For Memory Allocation: Set b/w requested to 100%
373 */
374 for (i = 0; i < hw_res->num_closid; i++, dc++)
375 *dc = resctrl_get_default_ctrl(r);
376 }
377
ctrl_domain_free(struct rdt_hw_ctrl_domain * hw_dom)378 static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
379 {
380 kfree(hw_dom->ctrl_val);
381 kfree(hw_dom);
382 }
383
mon_domain_free(struct rdt_hw_mon_domain * hw_dom)384 static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
385 {
386 kfree(hw_dom->arch_mbm_total);
387 kfree(hw_dom->arch_mbm_local);
388 kfree(hw_dom);
389 }
390
domain_setup_ctrlval(struct rdt_resource * r,struct rdt_ctrl_domain * d)391 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
392 {
393 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
394 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
395 struct msr_param m;
396 u32 *dc;
397
398 dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val),
399 GFP_KERNEL);
400 if (!dc)
401 return -ENOMEM;
402
403 hw_dom->ctrl_val = dc;
404 setup_default_ctrlval(r, dc);
405
406 m.res = r;
407 m.dom = d;
408 m.low = 0;
409 m.high = hw_res->num_closid;
410 hw_res->msr_update(&m);
411 return 0;
412 }
413
414 /**
415 * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters
416 * @num_rmid: The size of the MBM counter array
417 * @hw_dom: The domain that owns the allocated arrays
418 */
arch_domain_mbm_alloc(u32 num_rmid,struct rdt_hw_mon_domain * hw_dom)419 static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
420 {
421 size_t tsize;
422
423 if (resctrl_arch_is_mbm_total_enabled()) {
424 tsize = sizeof(*hw_dom->arch_mbm_total);
425 hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
426 if (!hw_dom->arch_mbm_total)
427 return -ENOMEM;
428 }
429 if (resctrl_arch_is_mbm_local_enabled()) {
430 tsize = sizeof(*hw_dom->arch_mbm_local);
431 hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
432 if (!hw_dom->arch_mbm_local) {
433 kfree(hw_dom->arch_mbm_total);
434 hw_dom->arch_mbm_total = NULL;
435 return -ENOMEM;
436 }
437 }
438
439 return 0;
440 }
441
get_domain_id_from_scope(int cpu,enum resctrl_scope scope)442 static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
443 {
444 switch (scope) {
445 case RESCTRL_L2_CACHE:
446 case RESCTRL_L3_CACHE:
447 return get_cpu_cacheinfo_id(cpu, scope);
448 case RESCTRL_L3_NODE:
449 return cpu_to_node(cpu);
450 default:
451 break;
452 }
453
454 return -EINVAL;
455 }
456
domain_add_cpu_ctrl(int cpu,struct rdt_resource * r)457 static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
458 {
459 int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
460 struct rdt_hw_ctrl_domain *hw_dom;
461 struct list_head *add_pos = NULL;
462 struct rdt_domain_hdr *hdr;
463 struct rdt_ctrl_domain *d;
464 int err;
465
466 lockdep_assert_held(&domain_list_lock);
467
468 if (id < 0) {
469 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
470 cpu, r->ctrl_scope, r->name);
471 return;
472 }
473
474 hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos);
475 if (hdr) {
476 if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
477 return;
478 d = container_of(hdr, struct rdt_ctrl_domain, hdr);
479
480 cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
481 if (r->cache.arch_has_per_cpu_cfg)
482 rdt_domain_reconfigure_cdp(r);
483 return;
484 }
485
486 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
487 if (!hw_dom)
488 return;
489
490 d = &hw_dom->d_resctrl;
491 d->hdr.id = id;
492 d->hdr.type = RESCTRL_CTRL_DOMAIN;
493 cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
494
495 rdt_domain_reconfigure_cdp(r);
496
497 if (domain_setup_ctrlval(r, d)) {
498 ctrl_domain_free(hw_dom);
499 return;
500 }
501
502 list_add_tail_rcu(&d->hdr.list, add_pos);
503
504 err = resctrl_online_ctrl_domain(r, d);
505 if (err) {
506 list_del_rcu(&d->hdr.list);
507 synchronize_rcu();
508 ctrl_domain_free(hw_dom);
509 }
510 }
511
domain_add_cpu_mon(int cpu,struct rdt_resource * r)512 static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
513 {
514 int id = get_domain_id_from_scope(cpu, r->mon_scope);
515 struct list_head *add_pos = NULL;
516 struct rdt_hw_mon_domain *hw_dom;
517 struct rdt_domain_hdr *hdr;
518 struct rdt_mon_domain *d;
519 int err;
520
521 lockdep_assert_held(&domain_list_lock);
522
523 if (id < 0) {
524 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
525 cpu, r->mon_scope, r->name);
526 return;
527 }
528
529 hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
530 if (hdr) {
531 if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
532 return;
533 d = container_of(hdr, struct rdt_mon_domain, hdr);
534
535 cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
536 return;
537 }
538
539 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
540 if (!hw_dom)
541 return;
542
543 d = &hw_dom->d_resctrl;
544 d->hdr.id = id;
545 d->hdr.type = RESCTRL_MON_DOMAIN;
546 d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
547 if (!d->ci) {
548 pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
549 mon_domain_free(hw_dom);
550 return;
551 }
552 cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
553
554 arch_mon_domain_online(r, d);
555
556 if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
557 mon_domain_free(hw_dom);
558 return;
559 }
560
561 list_add_tail_rcu(&d->hdr.list, add_pos);
562
563 err = resctrl_online_mon_domain(r, d);
564 if (err) {
565 list_del_rcu(&d->hdr.list);
566 synchronize_rcu();
567 mon_domain_free(hw_dom);
568 }
569 }
570
domain_add_cpu(int cpu,struct rdt_resource * r)571 static void domain_add_cpu(int cpu, struct rdt_resource *r)
572 {
573 if (r->alloc_capable)
574 domain_add_cpu_ctrl(cpu, r);
575 if (r->mon_capable)
576 domain_add_cpu_mon(cpu, r);
577 }
578
domain_remove_cpu_ctrl(int cpu,struct rdt_resource * r)579 static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
580 {
581 int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
582 struct rdt_hw_ctrl_domain *hw_dom;
583 struct rdt_domain_hdr *hdr;
584 struct rdt_ctrl_domain *d;
585
586 lockdep_assert_held(&domain_list_lock);
587
588 if (id < 0) {
589 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
590 cpu, r->ctrl_scope, r->name);
591 return;
592 }
593
594 hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL);
595 if (!hdr) {
596 pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
597 id, cpu, r->name);
598 return;
599 }
600
601 if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
602 return;
603
604 d = container_of(hdr, struct rdt_ctrl_domain, hdr);
605 hw_dom = resctrl_to_arch_ctrl_dom(d);
606
607 cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
608 if (cpumask_empty(&d->hdr.cpu_mask)) {
609 resctrl_offline_ctrl_domain(r, d);
610 list_del_rcu(&d->hdr.list);
611 synchronize_rcu();
612
613 /*
614 * rdt_ctrl_domain "d" is going to be freed below, so clear
615 * its pointer from pseudo_lock_region struct.
616 */
617 if (d->plr)
618 d->plr->d = NULL;
619 ctrl_domain_free(hw_dom);
620
621 return;
622 }
623 }
624
domain_remove_cpu_mon(int cpu,struct rdt_resource * r)625 static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
626 {
627 int id = get_domain_id_from_scope(cpu, r->mon_scope);
628 struct rdt_hw_mon_domain *hw_dom;
629 struct rdt_domain_hdr *hdr;
630 struct rdt_mon_domain *d;
631
632 lockdep_assert_held(&domain_list_lock);
633
634 if (id < 0) {
635 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
636 cpu, r->mon_scope, r->name);
637 return;
638 }
639
640 hdr = resctrl_find_domain(&r->mon_domains, id, NULL);
641 if (!hdr) {
642 pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
643 id, cpu, r->name);
644 return;
645 }
646
647 if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
648 return;
649
650 d = container_of(hdr, struct rdt_mon_domain, hdr);
651 hw_dom = resctrl_to_arch_mon_dom(d);
652
653 cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
654 if (cpumask_empty(&d->hdr.cpu_mask)) {
655 resctrl_offline_mon_domain(r, d);
656 list_del_rcu(&d->hdr.list);
657 synchronize_rcu();
658 mon_domain_free(hw_dom);
659
660 return;
661 }
662 }
663
domain_remove_cpu(int cpu,struct rdt_resource * r)664 static void domain_remove_cpu(int cpu, struct rdt_resource *r)
665 {
666 if (r->alloc_capable)
667 domain_remove_cpu_ctrl(cpu, r);
668 if (r->mon_capable)
669 domain_remove_cpu_mon(cpu, r);
670 }
671
clear_closid_rmid(int cpu)672 static void clear_closid_rmid(int cpu)
673 {
674 struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
675
676 state->default_closid = RESCTRL_RESERVED_CLOSID;
677 state->default_rmid = RESCTRL_RESERVED_RMID;
678 state->cur_closid = RESCTRL_RESERVED_CLOSID;
679 state->cur_rmid = RESCTRL_RESERVED_RMID;
680 wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID,
681 RESCTRL_RESERVED_CLOSID);
682 }
683
resctrl_arch_online_cpu(unsigned int cpu)684 static int resctrl_arch_online_cpu(unsigned int cpu)
685 {
686 struct rdt_resource *r;
687
688 mutex_lock(&domain_list_lock);
689 for_each_capable_rdt_resource(r)
690 domain_add_cpu(cpu, r);
691 mutex_unlock(&domain_list_lock);
692
693 clear_closid_rmid(cpu);
694 resctrl_online_cpu(cpu);
695
696 return 0;
697 }
698
resctrl_arch_offline_cpu(unsigned int cpu)699 static int resctrl_arch_offline_cpu(unsigned int cpu)
700 {
701 struct rdt_resource *r;
702
703 resctrl_offline_cpu(cpu);
704
705 mutex_lock(&domain_list_lock);
706 for_each_capable_rdt_resource(r)
707 domain_remove_cpu(cpu, r);
708 mutex_unlock(&domain_list_lock);
709
710 clear_closid_rmid(cpu);
711
712 return 0;
713 }
714
715 enum {
716 RDT_FLAG_CMT,
717 RDT_FLAG_MBM_TOTAL,
718 RDT_FLAG_MBM_LOCAL,
719 RDT_FLAG_L3_CAT,
720 RDT_FLAG_L3_CDP,
721 RDT_FLAG_L2_CAT,
722 RDT_FLAG_L2_CDP,
723 RDT_FLAG_MBA,
724 RDT_FLAG_SMBA,
725 RDT_FLAG_BMEC,
726 };
727
728 #define RDT_OPT(idx, n, f) \
729 [idx] = { \
730 .name = n, \
731 .flag = f \
732 }
733
734 struct rdt_options {
735 char *name;
736 int flag;
737 bool force_off, force_on;
738 };
739
740 static struct rdt_options rdt_options[] __initdata = {
741 RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC),
742 RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
743 RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
744 RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3),
745 RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3),
746 RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2),
747 RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2),
748 RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA),
749 RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA),
750 RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC),
751 };
752 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
753
set_rdt_options(char * str)754 static int __init set_rdt_options(char *str)
755 {
756 struct rdt_options *o;
757 bool force_off;
758 char *tok;
759
760 if (*str == '=')
761 str++;
762 while ((tok = strsep(&str, ",")) != NULL) {
763 force_off = *tok == '!';
764 if (force_off)
765 tok++;
766 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
767 if (strcmp(tok, o->name) == 0) {
768 if (force_off)
769 o->force_off = true;
770 else
771 o->force_on = true;
772 break;
773 }
774 }
775 }
776 return 1;
777 }
778 __setup("rdt", set_rdt_options);
779
rdt_cpu_has(int flag)780 bool __init rdt_cpu_has(int flag)
781 {
782 bool ret = boot_cpu_has(flag);
783 struct rdt_options *o;
784
785 if (!ret)
786 return ret;
787
788 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
789 if (flag == o->flag) {
790 if (o->force_off)
791 ret = false;
792 if (o->force_on)
793 ret = true;
794 break;
795 }
796 }
797 return ret;
798 }
799
resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)800 __init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
801 {
802 if (!rdt_cpu_has(X86_FEATURE_BMEC))
803 return false;
804
805 switch (evt) {
806 case QOS_L3_MBM_TOTAL_EVENT_ID:
807 return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL);
808 case QOS_L3_MBM_LOCAL_EVENT_ID:
809 return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL);
810 default:
811 return false;
812 }
813 }
814
get_mem_config(void)815 static __init bool get_mem_config(void)
816 {
817 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
818
819 if (!rdt_cpu_has(X86_FEATURE_MBA))
820 return false;
821
822 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
823 return __get_mem_config_intel(&hw_res->r_resctrl);
824 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
825 return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
826
827 return false;
828 }
829
get_slow_mem_config(void)830 static __init bool get_slow_mem_config(void)
831 {
832 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA];
833
834 if (!rdt_cpu_has(X86_FEATURE_SMBA))
835 return false;
836
837 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
838 return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
839
840 return false;
841 }
842
get_rdt_alloc_resources(void)843 static __init bool get_rdt_alloc_resources(void)
844 {
845 struct rdt_resource *r;
846 bool ret = false;
847
848 if (rdt_alloc_capable)
849 return true;
850
851 if (!boot_cpu_has(X86_FEATURE_RDT_A))
852 return false;
853
854 if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
855 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
856 rdt_get_cache_alloc_cfg(1, r);
857 if (rdt_cpu_has(X86_FEATURE_CDP_L3))
858 rdt_get_cdp_l3_config();
859 ret = true;
860 }
861 if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
862 /* CPUID 0x10.2 fields are same format at 0x10.1 */
863 r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl;
864 rdt_get_cache_alloc_cfg(2, r);
865 if (rdt_cpu_has(X86_FEATURE_CDP_L2))
866 rdt_get_cdp_l2_config();
867 ret = true;
868 }
869
870 if (get_mem_config())
871 ret = true;
872
873 if (get_slow_mem_config())
874 ret = true;
875
876 return ret;
877 }
878
get_rdt_mon_resources(void)879 static __init bool get_rdt_mon_resources(void)
880 {
881 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
882
883 if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
884 rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
885 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
886 rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
887 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
888 rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
889
890 if (!rdt_mon_features)
891 return false;
892
893 return !rdt_get_mon_l3_config(r);
894 }
895
__check_quirks_intel(void)896 static __init void __check_quirks_intel(void)
897 {
898 switch (boot_cpu_data.x86_vfm) {
899 case INTEL_HASWELL_X:
900 if (!rdt_options[RDT_FLAG_L3_CAT].force_off)
901 cache_alloc_hsw_probe();
902 break;
903 case INTEL_SKYLAKE_X:
904 if (boot_cpu_data.x86_stepping <= 4)
905 set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
906 else
907 set_rdt_options("!l3cat");
908 fallthrough;
909 case INTEL_BROADWELL_X:
910 intel_rdt_mbm_apply_quirk();
911 break;
912 }
913 }
914
check_quirks(void)915 static __init void check_quirks(void)
916 {
917 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
918 __check_quirks_intel();
919 }
920
get_rdt_resources(void)921 static __init bool get_rdt_resources(void)
922 {
923 rdt_alloc_capable = get_rdt_alloc_resources();
924 rdt_mon_capable = get_rdt_mon_resources();
925
926 return (rdt_mon_capable || rdt_alloc_capable);
927 }
928
rdt_init_res_defs_intel(void)929 static __init void rdt_init_res_defs_intel(void)
930 {
931 struct rdt_hw_resource *hw_res;
932 struct rdt_resource *r;
933
934 for_each_rdt_resource(r) {
935 hw_res = resctrl_to_arch_res(r);
936
937 if (r->rid == RDT_RESOURCE_L3 ||
938 r->rid == RDT_RESOURCE_L2) {
939 r->cache.arch_has_per_cpu_cfg = false;
940 r->cache.min_cbm_bits = 1;
941 } else if (r->rid == RDT_RESOURCE_MBA) {
942 hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
943 hw_res->msr_update = mba_wrmsr_intel;
944 }
945 }
946 }
947
rdt_init_res_defs_amd(void)948 static __init void rdt_init_res_defs_amd(void)
949 {
950 struct rdt_hw_resource *hw_res;
951 struct rdt_resource *r;
952
953 for_each_rdt_resource(r) {
954 hw_res = resctrl_to_arch_res(r);
955
956 if (r->rid == RDT_RESOURCE_L3 ||
957 r->rid == RDT_RESOURCE_L2) {
958 r->cache.arch_has_sparse_bitmasks = true;
959 r->cache.arch_has_per_cpu_cfg = true;
960 r->cache.min_cbm_bits = 0;
961 } else if (r->rid == RDT_RESOURCE_MBA) {
962 hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
963 hw_res->msr_update = mba_wrmsr_amd;
964 } else if (r->rid == RDT_RESOURCE_SMBA) {
965 hw_res->msr_base = MSR_IA32_SMBA_BW_BASE;
966 hw_res->msr_update = mba_wrmsr_amd;
967 }
968 }
969 }
970
rdt_init_res_defs(void)971 static __init void rdt_init_res_defs(void)
972 {
973 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
974 rdt_init_res_defs_intel();
975 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
976 rdt_init_res_defs_amd();
977 }
978
979 static enum cpuhp_state rdt_online;
980
981 /* Runs once on the BSP during boot. */
resctrl_cpu_detect(struct cpuinfo_x86 * c)982 void resctrl_cpu_detect(struct cpuinfo_x86 *c)
983 {
984 if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
985 c->x86_cache_max_rmid = -1;
986 c->x86_cache_occ_scale = -1;
987 c->x86_cache_mbm_width_offset = -1;
988 return;
989 }
990
991 /* will be overridden if occupancy monitoring exists */
992 c->x86_cache_max_rmid = cpuid_ebx(0xf);
993
994 if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
995 cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
996 cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
997 u32 eax, ebx, ecx, edx;
998
999 /* QoS sub-leaf, EAX=0Fh, ECX=1 */
1000 cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
1001
1002 c->x86_cache_max_rmid = ecx;
1003 c->x86_cache_occ_scale = ebx;
1004 c->x86_cache_mbm_width_offset = eax & 0xff;
1005
1006 if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset)
1007 c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD;
1008 }
1009 }
1010
resctrl_arch_late_init(void)1011 static int __init resctrl_arch_late_init(void)
1012 {
1013 struct rdt_resource *r;
1014 int state, ret;
1015
1016 /*
1017 * Initialize functions(or definitions) that are different
1018 * between vendors here.
1019 */
1020 rdt_init_res_defs();
1021
1022 check_quirks();
1023
1024 if (!get_rdt_resources())
1025 return -ENODEV;
1026
1027 state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
1028 "x86/resctrl/cat:online:",
1029 resctrl_arch_online_cpu,
1030 resctrl_arch_offline_cpu);
1031 if (state < 0)
1032 return state;
1033
1034 ret = resctrl_init();
1035 if (ret) {
1036 cpuhp_remove_state(state);
1037 return ret;
1038 }
1039 rdt_online = state;
1040
1041 for_each_alloc_capable_rdt_resource(r)
1042 pr_info("%s allocation detected\n", r->name);
1043
1044 for_each_mon_capable_rdt_resource(r)
1045 pr_info("%s monitoring detected\n", r->name);
1046
1047 return 0;
1048 }
1049
1050 late_initcall(resctrl_arch_late_init);
1051
resctrl_arch_exit(void)1052 static void __exit resctrl_arch_exit(void)
1053 {
1054 cpuhp_remove_state(rdt_online);
1055
1056 resctrl_exit();
1057 }
1058
1059 __exitcall(resctrl_arch_exit);
1060