1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2025 Arm Ltd.
3
4 #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
5
6 #include <linux/arm_mpam.h>
7 #include <linux/cacheinfo.h>
8 #include <linux/cpu.h>
9 #include <linux/cpumask.h>
10 #include <linux/errno.h>
11 #include <linux/limits.h>
12 #include <linux/list.h>
13 #include <linux/math.h>
14 #include <linux/printk.h>
15 #include <linux/rculist.h>
16 #include <linux/resctrl.h>
17 #include <linux/slab.h>
18 #include <linux/types.h>
19 #include <linux/wait.h>
20
21 #include <asm/mpam.h>
22
23 #include "mpam_internal.h"
24
25 DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters);
26
27 /*
28 * The classes we've picked to map to resctrl resources, wrapped
29 * in with their resctrl structure.
30 * Class pointer may be NULL.
31 */
32 static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
33
34 #define for_each_mpam_resctrl_control(res, rid) \
35 for (rid = 0, res = &mpam_resctrl_controls[rid]; \
36 rid < RDT_NUM_RESOURCES; \
37 rid++, res = &mpam_resctrl_controls[rid])
38
39 /*
40 * The classes we've picked to map to resctrl events.
41 * Resctrl believes all the worlds a Xeon, and these are all on the L3. This
42 * array lets us find the actual class backing the event counters. e.g.
43 * the only memory bandwidth counters may be on the memory controller, but to
44 * make use of them, we pretend they are on L3. Restrict the events considered
45 * to those supported by MPAM.
46 * Class pointer may be NULL.
47 */
48 #define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID
49 static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1];
50
51 #define for_each_mpam_resctrl_mon(mon, eventid) \
52 for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid]; \
53 eventid <= MPAM_MAX_EVENT; \
54 eventid++, mon = &mpam_resctrl_counters[eventid])
55
56 /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
57 static DEFINE_MUTEX(domain_list_lock);
58
59 /*
60 * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1.
61 * This applies globally to all traffic the CPU generates.
62 */
63 static bool cdp_enabled;
64
65 /*
66 * We use cacheinfo to discover the size of the caches and their id. cacheinfo
67 * populates this from a device_initcall(). mpam_resctrl_setup() must wait.
68 */
69 static bool cacheinfo_ready;
70 static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready);
71
72 /*
73 * If resctrl_init() succeeded, resctrl_exit() can be used to remove support
74 * for the filesystem in the event of an error.
75 */
76 static bool resctrl_enabled;
77
resctrl_arch_alloc_capable(void)78 bool resctrl_arch_alloc_capable(void)
79 {
80 struct mpam_resctrl_res *res;
81 enum resctrl_res_level rid;
82
83 for_each_mpam_resctrl_control(res, rid) {
84 if (res->resctrl_res.alloc_capable)
85 return true;
86 }
87
88 return false;
89 }
90
resctrl_arch_mon_capable(void)91 bool resctrl_arch_mon_capable(void)
92 {
93 struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
94 struct rdt_resource *l3 = &res->resctrl_res;
95
96 /* All monitors are presented as being on the L3 cache */
97 return l3->mon_capable;
98 }
99
resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)100 bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
101 {
102 return false;
103 }
104
resctrl_arch_mon_event_config_read(void * info)105 void resctrl_arch_mon_event_config_read(void *info)
106 {
107 }
108
resctrl_arch_mon_event_config_write(void * info)109 void resctrl_arch_mon_event_config_write(void *info)
110 {
111 }
112
resctrl_arch_reset_rmid_all(struct rdt_resource * r,struct rdt_l3_mon_domain * d)113 void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
114 {
115 }
116
resctrl_arch_reset_rmid(struct rdt_resource * r,struct rdt_l3_mon_domain * d,u32 closid,u32 rmid,enum resctrl_event_id eventid)117 void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
118 u32 closid, u32 rmid, enum resctrl_event_id eventid)
119 {
120 }
121
resctrl_arch_reset_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,u32 closid,u32 rmid,int cntr_id,enum resctrl_event_id eventid)122 void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
123 u32 closid, u32 rmid, int cntr_id,
124 enum resctrl_event_id eventid)
125 {
126 }
127
resctrl_arch_config_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,enum resctrl_event_id evtid,u32 rmid,u32 closid,u32 cntr_id,bool assign)128 void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
129 enum resctrl_event_id evtid, u32 rmid, u32 closid,
130 u32 cntr_id, bool assign)
131 {
132 }
133
resctrl_arch_cntr_read(struct rdt_resource * r,struct rdt_l3_mon_domain * d,u32 unused,u32 rmid,int cntr_id,enum resctrl_event_id eventid,u64 * val)134 int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
135 u32 unused, u32 rmid, int cntr_id,
136 enum resctrl_event_id eventid, u64 *val)
137 {
138 return -EOPNOTSUPP;
139 }
140
resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource * r)141 bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
142 {
143 return false;
144 }
145
resctrl_arch_mbm_cntr_assign_set(struct rdt_resource * r,bool enable)146 int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
147 {
148 return -EINVAL;
149 }
150
resctrl_arch_io_alloc_enable(struct rdt_resource * r,bool enable)151 int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable)
152 {
153 return -EOPNOTSUPP;
154 }
155
resctrl_arch_get_io_alloc_enabled(struct rdt_resource * r)156 bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r)
157 {
158 return false;
159 }
160
resctrl_arch_pre_mount(void)161 void resctrl_arch_pre_mount(void)
162 {
163 }
164
resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)165 bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
166 {
167 return mpam_resctrl_controls[rid].cdp_enabled;
168 }
169
170 /**
171 * resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks.
172 *
173 * At boot, all existing tasks use partid zero for D and I.
174 * To enable/disable CDP emulation, all these tasks need relabelling.
175 */
resctrl_reset_task_closids(void)176 static void resctrl_reset_task_closids(void)
177 {
178 struct task_struct *p, *t;
179
180 read_lock(&tasklist_lock);
181 for_each_process_thread(p, t) {
182 resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID,
183 RESCTRL_RESERVED_RMID);
184 }
185 read_unlock(&tasklist_lock);
186 }
187
resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid,bool enable)188 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable)
189 {
190 u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID;
191 struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
192 struct rdt_resource *l3 = &res->resctrl_res;
193 int cpu;
194
195 if (!IS_ENABLED(CONFIG_EXPERT) && enable) {
196 /*
197 * If the resctrl fs is mounted more than once, sequentially,
198 * then CDP can lead to the use of out of range PARTIDs.
199 */
200 pr_warn("CDP not supported\n");
201 return -EOPNOTSUPP;
202 }
203
204 if (enable)
205 pr_warn("CDP is an expert feature and may cause MPAM to malfunction.\n");
206
207 /*
208 * resctrl_arch_set_cdp_enabled() is only called with enable set to
209 * false on error and unmount.
210 */
211 cdp_enabled = enable;
212 mpam_resctrl_controls[rid].cdp_enabled = enable;
213
214 if (enable)
215 l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx() / 2;
216 else
217 l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
218
219 /* The mbw_max feature can't hide cdp as it's a per-partid maximum. */
220 if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled)
221 mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false;
222
223 if (mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled &&
224 mpam_resctrl_controls[RDT_RESOURCE_MBA].class)
225 mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true;
226
227 if (enable) {
228 if (mpam_partid_max < 1)
229 return -EINVAL;
230
231 partid_d = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_DATA);
232 partid_i = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_CODE);
233 }
234
235 mpam_set_task_partid_pmg(current, partid_d, partid_i, 0, 0);
236 WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current));
237
238 resctrl_reset_task_closids();
239
240 for_each_possible_cpu(cpu)
241 mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0);
242 on_each_cpu(resctrl_arch_sync_cpu_closid_rmid, NULL, 1);
243
244 return 0;
245 }
246
mpam_resctrl_hide_cdp(enum resctrl_res_level rid)247 static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid)
248 {
249 return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid);
250 }
251
252 /*
253 * MSC may raise an error interrupt if it sees an out or range partid/pmg,
254 * and go on to truncate the value. Regardless of what the hardware supports,
255 * only the system wide safe value is safe to use.
256 */
resctrl_arch_get_num_closid(struct rdt_resource * ignored)257 u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
258 {
259 return mpam_partid_max + 1;
260 }
261
resctrl_arch_system_num_rmid_idx(void)262 u32 resctrl_arch_system_num_rmid_idx(void)
263 {
264 return (mpam_pmg_max + 1) * (mpam_partid_max + 1);
265 }
266
resctrl_arch_rmid_idx_encode(u32 closid,u32 rmid)267 u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
268 {
269 return closid * (mpam_pmg_max + 1) + rmid;
270 }
271
resctrl_arch_rmid_idx_decode(u32 idx,u32 * closid,u32 * rmid)272 void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
273 {
274 *closid = idx / (mpam_pmg_max + 1);
275 *rmid = idx % (mpam_pmg_max + 1);
276 }
277
resctrl_arch_sched_in(struct task_struct * tsk)278 void resctrl_arch_sched_in(struct task_struct *tsk)
279 {
280 lockdep_assert_preemption_disabled();
281
282 mpam_thread_switch(tsk);
283 }
284
resctrl_arch_set_cpu_default_closid_rmid(int cpu,u32 closid,u32 rmid)285 void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid)
286 {
287 WARN_ON_ONCE(closid > U16_MAX);
288 WARN_ON_ONCE(rmid > U8_MAX);
289
290 if (!cdp_enabled) {
291 mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid);
292 } else {
293 /*
294 * When CDP is enabled, resctrl halves the closid range and we
295 * use odd/even partid for one closid.
296 */
297 u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
298 u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
299
300 mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid);
301 }
302 }
303
resctrl_arch_sync_cpu_closid_rmid(void * info)304 void resctrl_arch_sync_cpu_closid_rmid(void *info)
305 {
306 struct resctrl_cpu_defaults *r = info;
307
308 lockdep_assert_preemption_disabled();
309
310 if (r) {
311 resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(),
312 r->closid, r->rmid);
313 }
314
315 resctrl_arch_sched_in(current);
316 }
317
resctrl_arch_set_closid_rmid(struct task_struct * tsk,u32 closid,u32 rmid)318 void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
319 {
320 WARN_ON_ONCE(closid > U16_MAX);
321 WARN_ON_ONCE(rmid > U8_MAX);
322
323 if (!cdp_enabled) {
324 mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid);
325 } else {
326 u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
327 u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
328
329 mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid);
330 }
331 }
332
resctrl_arch_match_closid(struct task_struct * tsk,u32 closid)333 bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
334 {
335 u64 regval = mpam_get_regval(tsk);
336 u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
337
338 if (cdp_enabled)
339 tsk_closid >>= 1;
340
341 return tsk_closid == closid;
342 }
343
344 /* The task's pmg is not unique, the partid must be considered too */
resctrl_arch_match_rmid(struct task_struct * tsk,u32 closid,u32 rmid)345 bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
346 {
347 u64 regval = mpam_get_regval(tsk);
348 u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
349 u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval);
350
351 if (cdp_enabled)
352 tsk_closid >>= 1;
353
354 return (tsk_closid == closid) && (tsk_rmid == rmid);
355 }
356
resctrl_arch_get_resource(enum resctrl_res_level l)357 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
358 {
359 if (l >= RDT_NUM_RESOURCES)
360 return NULL;
361
362 return &mpam_resctrl_controls[l].resctrl_res;
363 }
364
resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid)365 static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid)
366 {
367 struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
368
369 if (!mpam_is_enabled())
370 return -EINVAL;
371
372 if (!mon->class)
373 return -EINVAL;
374
375 switch (evtid) {
376 case QOS_L3_OCCUP_EVENT_ID:
377 /* With CDP, one monitor gets used for both code/data reads */
378 return mpam_alloc_csu_mon(mon->class);
379 case QOS_L3_MBM_LOCAL_EVENT_ID:
380 case QOS_L3_MBM_TOTAL_EVENT_ID:
381 return USE_PRE_ALLOCATED;
382 default:
383 return -EOPNOTSUPP;
384 }
385 }
386
resctrl_arch_mon_ctx_alloc(struct rdt_resource * r,enum resctrl_event_id evtid)387 void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r,
388 enum resctrl_event_id evtid)
389 {
390 DEFINE_WAIT(wait);
391 int *ret;
392
393 ret = kmalloc_obj(*ret);
394 if (!ret)
395 return ERR_PTR(-ENOMEM);
396
397 do {
398 prepare_to_wait(&resctrl_mon_ctx_waiters, &wait,
399 TASK_INTERRUPTIBLE);
400 *ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid);
401 if (*ret == -ENOSPC)
402 schedule();
403 } while (*ret == -ENOSPC && !signal_pending(current));
404 finish_wait(&resctrl_mon_ctx_waiters, &wait);
405
406 return ret;
407 }
408
resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid,u32 mon_idx)409 static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid,
410 u32 mon_idx)
411 {
412 struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
413
414 if (!mpam_is_enabled())
415 return;
416
417 if (!mon->class)
418 return;
419
420 if (evtid == QOS_L3_OCCUP_EVENT_ID)
421 mpam_free_csu_mon(mon->class, mon_idx);
422
423 wake_up(&resctrl_mon_ctx_waiters);
424 }
425
resctrl_arch_mon_ctx_free(struct rdt_resource * r,enum resctrl_event_id evtid,void * arch_mon_ctx)426 void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
427 enum resctrl_event_id evtid, void *arch_mon_ctx)
428 {
429 u32 mon_idx = *(u32 *)arch_mon_ctx;
430
431 kfree(arch_mon_ctx);
432
433 resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx);
434 }
435
__read_mon(struct mpam_resctrl_mon * mon,struct mpam_component * mon_comp,enum mpam_device_features mon_type,int mon_idx,enum resctrl_conf_type cdp_type,u32 closid,u32 rmid,u64 * val)436 static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
437 enum mpam_device_features mon_type,
438 int mon_idx,
439 enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val)
440 {
441 struct mon_cfg cfg;
442
443 if (!mpam_is_enabled())
444 return -EINVAL;
445
446 /* Shift closid to account for CDP */
447 closid = resctrl_get_config_index(closid, cdp_type);
448
449 if (irqs_disabled()) {
450 /* Check if we can access this domain without an IPI */
451 return -EIO;
452 }
453
454 cfg = (struct mon_cfg) {
455 .mon = mon_idx,
456 .match_pmg = true,
457 .partid = closid,
458 .pmg = rmid,
459 };
460
461 return mpam_msmon_read(mon_comp, &cfg, mon_type, val);
462 }
463
read_mon_cdp_safe(struct mpam_resctrl_mon * mon,struct mpam_component * mon_comp,enum mpam_device_features mon_type,int mon_idx,u32 closid,u32 rmid,u64 * val)464 static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
465 enum mpam_device_features mon_type,
466 int mon_idx, u32 closid, u32 rmid, u64 *val)
467 {
468 if (cdp_enabled) {
469 u64 code_val = 0, data_val = 0;
470 int err;
471
472 err = __read_mon(mon, mon_comp, mon_type, mon_idx,
473 CDP_CODE, closid, rmid, &code_val);
474 if (err)
475 return err;
476
477 err = __read_mon(mon, mon_comp, mon_type, mon_idx,
478 CDP_DATA, closid, rmid, &data_val);
479 if (err)
480 return err;
481
482 *val += code_val + data_val;
483 return 0;
484 }
485
486 return __read_mon(mon, mon_comp, mon_type, mon_idx,
487 CDP_NONE, closid, rmid, val);
488 }
489
490 /* MBWU when not in ABMC mode (not supported), and CSU counters. */
resctrl_arch_rmid_read(struct rdt_resource * r,struct rdt_domain_hdr * hdr,u32 closid,u32 rmid,enum resctrl_event_id eventid,void * arch_priv,u64 * val,void * arch_mon_ctx)491 int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
492 u32 closid, u32 rmid, enum resctrl_event_id eventid,
493 void *arch_priv, u64 *val, void *arch_mon_ctx)
494 {
495 struct mpam_resctrl_dom *l3_dom;
496 struct mpam_component *mon_comp;
497 u32 mon_idx = *(u32 *)arch_mon_ctx;
498 enum mpam_device_features mon_type;
499 struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid];
500
501 resctrl_arch_rmid_read_context_check();
502
503 if (!mpam_is_enabled())
504 return -EINVAL;
505
506 if (eventid >= QOS_NUM_EVENTS || !mon->class)
507 return -EINVAL;
508
509 l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr);
510 mon_comp = l3_dom->mon_comp[eventid];
511
512 if (eventid != QOS_L3_OCCUP_EVENT_ID)
513 return -EINVAL;
514
515 mon_type = mpam_feat_msmon_csu;
516
517 return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx,
518 closid, rmid, val);
519 }
520
521 /*
522 * The rmid realloc threshold should be for the smallest cache exposed to
523 * resctrl.
524 */
update_rmid_limits(struct mpam_class * class)525 static int update_rmid_limits(struct mpam_class *class)
526 {
527 u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx();
528 struct mpam_props *cprops = &class->props;
529 struct cacheinfo *ci;
530
531 lockdep_assert_cpus_held();
532
533 if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
534 return 0;
535
536 /*
537 * Assume cache levels are the same size for all CPUs...
538 * The check just requires any online CPU and it can't go offline as we
539 * hold the cpu lock.
540 */
541 ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level);
542 if (!ci || ci->size == 0) {
543 pr_debug("Could not read cache size for class %u\n",
544 class->level);
545 return -EINVAL;
546 }
547
548 if (!resctrl_rmid_realloc_limit ||
549 ci->size < resctrl_rmid_realloc_limit) {
550 resctrl_rmid_realloc_limit = ci->size;
551 resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg;
552 }
553
554 return 0;
555 }
556
cache_has_usable_cpor(struct mpam_class * class)557 static bool cache_has_usable_cpor(struct mpam_class *class)
558 {
559 struct mpam_props *cprops = &class->props;
560
561 if (!mpam_has_feature(mpam_feat_cpor_part, cprops))
562 return false;
563
564 /* resctrl uses u32 for all bitmap configurations */
565 return class->props.cpbm_wd <= 32;
566 }
567
mba_class_use_mbw_max(struct mpam_props * cprops)568 static bool mba_class_use_mbw_max(struct mpam_props *cprops)
569 {
570 return (mpam_has_feature(mpam_feat_mbw_max, cprops) &&
571 cprops->bwa_wd);
572 }
573
class_has_usable_mba(struct mpam_props * cprops)574 static bool class_has_usable_mba(struct mpam_props *cprops)
575 {
576 return mba_class_use_mbw_max(cprops);
577 }
578
cache_has_usable_csu(struct mpam_class * class)579 static bool cache_has_usable_csu(struct mpam_class *class)
580 {
581 struct mpam_props *cprops;
582
583 if (!class)
584 return false;
585
586 cprops = &class->props;
587
588 if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
589 return false;
590
591 /*
592 * CSU counters settle on the value, so we can get away with
593 * having only one.
594 */
595 if (!cprops->num_csu_mon)
596 return false;
597
598 return true;
599 }
600
601 /*
602 * Calculate the worst-case percentage change from each implemented step
603 * in the control.
604 */
get_mba_granularity(struct mpam_props * cprops)605 static u32 get_mba_granularity(struct mpam_props *cprops)
606 {
607 if (!mba_class_use_mbw_max(cprops))
608 return 0;
609
610 /*
611 * bwa_wd is the number of bits implemented in the 0.xxx
612 * fixed point fraction. 1 bit is 50%, 2 is 25% etc.
613 */
614 return DIV_ROUND_UP(MAX_MBA_BW, 1 << cprops->bwa_wd);
615 }
616
617 /*
618 * Each fixed-point hardware value architecturally represents a range
619 * of values: the full range 0% - 100% is split contiguously into
620 * (1 << cprops->bwa_wd) equal bands.
621 *
622 * Although the bwa_bwd fields have 6 bits the maximum valid value is 16
623 * as it reports the width of fields that are at most 16 bits. When
624 * fewer than 16 bits are valid the least significant bits are
625 * ignored. The implied binary point is kept between bits 15 and 16 and
626 * so the valid bits are leftmost.
627 *
628 * See ARM IHI0099B.a "MPAM system component specification", Section 9.3,
629 * "The fixed-point fractional format" for more information.
630 *
631 * Find the nearest percentage value to the upper bound of the selected band:
632 */
mbw_max_to_percent(u16 mbw_max,struct mpam_props * cprops)633 static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops)
634 {
635 u32 val = mbw_max;
636
637 val >>= 16 - cprops->bwa_wd;
638 val += 1;
639 val *= MAX_MBA_BW;
640 val = DIV_ROUND_CLOSEST(val, 1 << cprops->bwa_wd);
641
642 return val;
643 }
644
645 /*
646 * Find the band whose upper bound is closest to the specified percentage.
647 *
648 * A round-to-nearest policy is followed here as a balanced compromise
649 * between unexpected under-commit of the resource (where the total of
650 * a set of resource allocations after conversion is less than the
651 * expected total, due to rounding of the individual converted
652 * percentages) and over-commit (where the total of the converted
653 * allocations is greater than expected).
654 */
percent_to_mbw_max(u8 pc,struct mpam_props * cprops)655 static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops)
656 {
657 u32 val = pc;
658
659 val <<= cprops->bwa_wd;
660 val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW);
661 val = max(val, 1) - 1;
662 val <<= 16 - cprops->bwa_wd;
663
664 return val;
665 }
666
get_mba_min(struct mpam_props * cprops)667 static u32 get_mba_min(struct mpam_props *cprops)
668 {
669 if (!mba_class_use_mbw_max(cprops)) {
670 WARN_ON_ONCE(1);
671 return 0;
672 }
673
674 return mbw_max_to_percent(0, cprops);
675 }
676
677 /* Find the L3 cache that has affinity with this CPU */
find_l3_equivalent_bitmask(int cpu,cpumask_var_t tmp_cpumask)678 static int find_l3_equivalent_bitmask(int cpu, cpumask_var_t tmp_cpumask)
679 {
680 u32 cache_id = get_cpu_cacheinfo_id(cpu, 3);
681
682 lockdep_assert_cpus_held();
683
684 return mpam_get_cpumask_from_cache_id(cache_id, 3, tmp_cpumask);
685 }
686
687 /*
688 * topology_matches_l3() - Is the provided class the same shape as L3
689 * @victim: The class we'd like to pretend is L3.
690 *
691 * resctrl expects all the world's a Xeon, and all counters are on the
692 * L3. We allow some mapping counters on other classes. This requires
693 * that the CPU->domain mapping is the same kind of shape.
694 *
695 * Using cacheinfo directly would make this work even if resctrl can't
696 * use the L3 - but cacheinfo can't tell us anything about offline CPUs.
697 * Using the L3 resctrl domain list also depends on CPUs being online.
698 * Using the mpam_class we picked for L3 so we can use its domain list
699 * assumes that there are MPAM controls on the L3.
700 * Instead, this path eventually uses the mpam_get_cpumask_from_cache_id()
701 * helper which can tell us about offline CPUs ... but getting the cache_id
702 * to start with relies on at least one CPU per L3 cache being online at
703 * boot.
704 *
705 * Walk the victim component list and compare the affinity mask with the
706 * corresponding L3. The topology matches if each victim:component's affinity
707 * mask is the same as the CPU's corresponding L3's. These lists/masks are
708 * computed from firmware tables so don't change at runtime.
709 */
topology_matches_l3(struct mpam_class * victim)710 static bool topology_matches_l3(struct mpam_class *victim)
711 {
712 int cpu, err;
713 struct mpam_component *victim_iter;
714
715 lockdep_assert_cpus_held();
716
717 cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
718 if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL))
719 return false;
720
721 guard(srcu)(&mpam_srcu);
722 list_for_each_entry_srcu(victim_iter, &victim->components, class_list,
723 srcu_read_lock_held(&mpam_srcu)) {
724 if (cpumask_empty(&victim_iter->affinity)) {
725 pr_debug("class %u has CPU-less component %u - can't match L3!\n",
726 victim->level, victim_iter->comp_id);
727 return false;
728 }
729
730 cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask);
731 if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
732 return false;
733
734 cpumask_clear(tmp_cpumask);
735 err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
736 if (err) {
737 pr_debug("Failed to find L3's equivalent component to class %u component %u\n",
738 victim->level, victim_iter->comp_id);
739 return false;
740 }
741
742 /* Any differing bits in the affinity mask? */
743 if (!cpumask_equal(tmp_cpumask, &victim_iter->affinity)) {
744 pr_debug("class %u component %u has Mismatched CPU mask with L3 equivalent\n"
745 "L3:%*pbl != victim:%*pbl\n",
746 victim->level, victim_iter->comp_id,
747 cpumask_pr_args(tmp_cpumask),
748 cpumask_pr_args(&victim_iter->affinity));
749
750 return false;
751 }
752 }
753
754 return true;
755 }
756
757 /*
758 * Test if the traffic for a class matches that at egress from the L3. For
759 * MSC at memory controllers this is only possible if there is a single L3
760 * as otherwise the counters at the memory can include bandwidth from the
761 * non-local L3.
762 */
traffic_matches_l3(struct mpam_class * class)763 static bool traffic_matches_l3(struct mpam_class *class)
764 {
765 int err, cpu;
766
767 lockdep_assert_cpus_held();
768
769 if (class->type == MPAM_CLASS_CACHE && class->level == 3)
770 return true;
771
772 if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
773 pr_debug("class %u is a different cache from L3\n", class->level);
774 return false;
775 }
776
777 if (class->type != MPAM_CLASS_MEMORY) {
778 pr_debug("class %u is neither of type cache or memory\n", class->level);
779 return false;
780 }
781
782 cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
783 if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) {
784 pr_debug("cpumask allocation failed\n");
785 return false;
786 }
787
788 cpu = cpumask_any_and(&class->affinity, cpu_online_mask);
789 err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
790 if (err) {
791 pr_debug("Failed to find L3 downstream to cpu %d\n", cpu);
792 return false;
793 }
794
795 if (!cpumask_equal(tmp_cpumask, cpu_possible_mask)) {
796 pr_debug("There is more than one L3\n");
797 return false;
798 }
799
800 /* Be strict; the traffic might stop in the intermediate cache. */
801 if (get_cpu_cacheinfo_id(cpu, 4) != -1) {
802 pr_debug("L3 isn't the last level of cache\n");
803 return false;
804 }
805
806 if (num_possible_nodes() > 1) {
807 pr_debug("There is more than one numa node\n");
808 return false;
809 }
810
811 #ifdef CONFIG_HMEM_REPORTING
812 if (node_devices[cpu_to_node(cpu)]->cache_dev) {
813 pr_debug("There is a memory side cache\n");
814 return false;
815 }
816 #endif
817
818 return true;
819 }
820
821 /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */
mpam_resctrl_pick_caches(void)822 static void mpam_resctrl_pick_caches(void)
823 {
824 struct mpam_class *class;
825 struct mpam_resctrl_res *res;
826
827 lockdep_assert_cpus_held();
828
829 guard(srcu)(&mpam_srcu);
830 list_for_each_entry_srcu(class, &mpam_classes, classes_list,
831 srcu_read_lock_held(&mpam_srcu)) {
832 if (class->type != MPAM_CLASS_CACHE) {
833 pr_debug("class %u is not a cache\n", class->level);
834 continue;
835 }
836
837 if (class->level != 2 && class->level != 3) {
838 pr_debug("class %u is not L2 or L3\n", class->level);
839 continue;
840 }
841
842 if (!cache_has_usable_cpor(class)) {
843 pr_debug("class %u cache misses CPOR\n", class->level);
844 continue;
845 }
846
847 if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
848 pr_debug("class %u has missing CPUs, mask %*pb != %*pb\n", class->level,
849 cpumask_pr_args(&class->affinity),
850 cpumask_pr_args(cpu_possible_mask));
851 continue;
852 }
853
854 if (class->level == 2)
855 res = &mpam_resctrl_controls[RDT_RESOURCE_L2];
856 else
857 res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
858 res->class = class;
859 }
860 }
861
mpam_resctrl_pick_mba(void)862 static void mpam_resctrl_pick_mba(void)
863 {
864 struct mpam_class *class, *candidate_class = NULL;
865 struct mpam_resctrl_res *res;
866
867 lockdep_assert_cpus_held();
868
869 guard(srcu)(&mpam_srcu);
870 list_for_each_entry_srcu(class, &mpam_classes, classes_list,
871 srcu_read_lock_held(&mpam_srcu)) {
872 struct mpam_props *cprops = &class->props;
873
874 if (class->level != 3 && class->type == MPAM_CLASS_CACHE) {
875 pr_debug("class %u is a cache but not the L3\n", class->level);
876 continue;
877 }
878
879 if (!class_has_usable_mba(cprops)) {
880 pr_debug("class %u has no bandwidth control\n",
881 class->level);
882 continue;
883 }
884
885 if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
886 pr_debug("class %u has missing CPUs\n", class->level);
887 continue;
888 }
889
890 if (!topology_matches_l3(class)) {
891 pr_debug("class %u topology doesn't match L3\n",
892 class->level);
893 continue;
894 }
895
896 if (!traffic_matches_l3(class)) {
897 pr_debug("class %u traffic doesn't match L3 egress\n",
898 class->level);
899 continue;
900 }
901
902 /*
903 * Pick a resource to be MBA that as close as possible to
904 * the L3. mbm_total counts the bandwidth leaving the L3
905 * cache and MBA should correspond as closely as possible
906 * for proper operation of mba_sc.
907 */
908 if (!candidate_class || class->level < candidate_class->level)
909 candidate_class = class;
910 }
911
912 if (candidate_class) {
913 pr_debug("selected class %u to back MBA\n",
914 candidate_class->level);
915 res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
916 res->class = candidate_class;
917 }
918 }
919
counter_update_class(enum resctrl_event_id evt_id,struct mpam_class * class)920 static void counter_update_class(enum resctrl_event_id evt_id,
921 struct mpam_class *class)
922 {
923 struct mpam_class *existing_class = mpam_resctrl_counters[evt_id].class;
924
925 if (existing_class) {
926 if (class->level == 3) {
927 pr_debug("Existing class is L3 - L3 wins\n");
928 return;
929 }
930
931 if (existing_class->level < class->level) {
932 pr_debug("Existing class is closer to L3, %u versus %u - closer is better\n",
933 existing_class->level, class->level);
934 return;
935 }
936 }
937
938 mpam_resctrl_counters[evt_id].class = class;
939 }
940
mpam_resctrl_pick_counters(void)941 static void mpam_resctrl_pick_counters(void)
942 {
943 struct mpam_class *class;
944
945 lockdep_assert_cpus_held();
946
947 guard(srcu)(&mpam_srcu);
948 list_for_each_entry_srcu(class, &mpam_classes, classes_list,
949 srcu_read_lock_held(&mpam_srcu)) {
950 /* The name of the resource is L3... */
951 if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
952 pr_debug("class %u is a cache but not the L3", class->level);
953 continue;
954 }
955
956 if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
957 pr_debug("class %u does not cover all CPUs",
958 class->level);
959 continue;
960 }
961
962 if (cache_has_usable_csu(class)) {
963 pr_debug("class %u has usable CSU",
964 class->level);
965
966 /* CSU counters only make sense on a cache. */
967 switch (class->type) {
968 case MPAM_CLASS_CACHE:
969 if (update_rmid_limits(class))
970 break;
971
972 counter_update_class(QOS_L3_OCCUP_EVENT_ID, class);
973 break;
974 default:
975 break;
976 }
977 }
978 }
979 }
980
mpam_resctrl_control_init(struct mpam_resctrl_res * res)981 static int mpam_resctrl_control_init(struct mpam_resctrl_res *res)
982 {
983 struct mpam_class *class = res->class;
984 struct mpam_props *cprops = &class->props;
985 struct rdt_resource *r = &res->resctrl_res;
986
987 switch (r->rid) {
988 case RDT_RESOURCE_L2:
989 case RDT_RESOURCE_L3:
990 r->schema_fmt = RESCTRL_SCHEMA_BITMAP;
991 r->cache.arch_has_sparse_bitmasks = true;
992
993 r->cache.cbm_len = class->props.cpbm_wd;
994 /* mpam_devices will reject empty bitmaps */
995 r->cache.min_cbm_bits = 1;
996
997 if (r->rid == RDT_RESOURCE_L2) {
998 r->name = "L2";
999 r->ctrl_scope = RESCTRL_L2_CACHE;
1000 r->cdp_capable = true;
1001 } else {
1002 r->name = "L3";
1003 r->ctrl_scope = RESCTRL_L3_CACHE;
1004 r->cdp_capable = true;
1005 }
1006
1007 /*
1008 * Which bits are shared with other ...things... Unknown
1009 * devices use partid-0 which uses all the bitmap fields. Until
1010 * we have configured the SMMU and GIC not to do this 'all the
1011 * bits' is the correct answer here.
1012 */
1013 r->cache.shareable_bits = resctrl_get_default_ctrl(r);
1014 r->alloc_capable = true;
1015 break;
1016 case RDT_RESOURCE_MBA:
1017 r->schema_fmt = RESCTRL_SCHEMA_RANGE;
1018 r->ctrl_scope = RESCTRL_L3_CACHE;
1019
1020 r->membw.delay_linear = true;
1021 r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
1022 r->membw.min_bw = get_mba_min(cprops);
1023 r->membw.max_bw = MAX_MBA_BW;
1024 r->membw.bw_gran = get_mba_granularity(cprops);
1025
1026 r->name = "MB";
1027 r->alloc_capable = true;
1028 break;
1029 default:
1030 return -EINVAL;
1031 }
1032
1033 return 0;
1034 }
1035
mpam_resctrl_pick_domain_id(int cpu,struct mpam_component * comp)1036 static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp)
1037 {
1038 struct mpam_class *class = comp->class;
1039
1040 if (class->type == MPAM_CLASS_CACHE)
1041 return comp->comp_id;
1042
1043 if (topology_matches_l3(class)) {
1044 /* Use the corresponding L3 component ID as the domain ID */
1045 int id = get_cpu_cacheinfo_id(cpu, 3);
1046
1047 /* Implies topology_matches_l3() made a mistake */
1048 if (WARN_ON_ONCE(id == -1))
1049 return comp->comp_id;
1050
1051 return id;
1052 }
1053
1054 /* Otherwise, expose the ID used by the firmware table code. */
1055 return comp->comp_id;
1056 }
1057
mpam_resctrl_monitor_init(struct mpam_resctrl_mon * mon,enum resctrl_event_id type)1058 static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon,
1059 enum resctrl_event_id type)
1060 {
1061 struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
1062 struct rdt_resource *l3 = &res->resctrl_res;
1063
1064 lockdep_assert_cpus_held();
1065
1066 /*
1067 * There also needs to be an L3 cache present.
1068 * The check just requires any online CPU and it can't go offline as we
1069 * hold the cpu lock.
1070 */
1071 if (get_cpu_cacheinfo_id(raw_smp_processor_id(), 3) == -1)
1072 return 0;
1073
1074 /*
1075 * If there are no MPAM resources on L3, force it into existence.
1076 * topology_matches_l3() already ensures this looks like the L3.
1077 * The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init().
1078 */
1079 if (!res->class) {
1080 pr_warn_once("Faking L3 MSC to enable counters.\n");
1081 res->class = mpam_resctrl_counters[type].class;
1082 }
1083
1084 /*
1085 * Called multiple times!, once per event type that has a
1086 * monitoring class.
1087 * Setting name is necessary on monitor only platforms.
1088 */
1089 l3->name = "L3";
1090 l3->mon_scope = RESCTRL_L3_CACHE;
1091
1092 /*
1093 * num-rmid is the upper bound for the number of monitoring groups that
1094 * can exist simultaneously, including the default monitoring group for
1095 * each control group. Hence, advertise the whole rmid_idx space even
1096 * though each control group has its own pmg/rmid space. Unfortunately,
1097 * this does mean userspace needs to know the architecture to correctly
1098 * interpret this value.
1099 */
1100 l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
1101
1102 if (resctrl_enable_mon_event(type, false, 0, NULL))
1103 l3->mon_capable = true;
1104
1105 return 0;
1106 }
1107
resctrl_arch_get_config(struct rdt_resource * r,struct rdt_ctrl_domain * d,u32 closid,enum resctrl_conf_type type)1108 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
1109 u32 closid, enum resctrl_conf_type type)
1110 {
1111 u32 partid;
1112 struct mpam_config *cfg;
1113 struct mpam_props *cprops;
1114 struct mpam_resctrl_res *res;
1115 struct mpam_resctrl_dom *dom;
1116 enum mpam_device_features configured_by;
1117
1118 lockdep_assert_cpus_held();
1119
1120 if (!mpam_is_enabled())
1121 return resctrl_get_default_ctrl(r);
1122
1123 res = container_of(r, struct mpam_resctrl_res, resctrl_res);
1124 dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
1125 cprops = &res->class->props;
1126
1127 /*
1128 * When CDP is enabled, but the resource doesn't support it,
1129 * the control is cloned across both partids.
1130 * Pick one at random to read:
1131 */
1132 if (mpam_resctrl_hide_cdp(r->rid))
1133 type = CDP_DATA;
1134
1135 partid = resctrl_get_config_index(closid, type);
1136 cfg = &dom->ctrl_comp->cfg[partid];
1137
1138 switch (r->rid) {
1139 case RDT_RESOURCE_L2:
1140 case RDT_RESOURCE_L3:
1141 configured_by = mpam_feat_cpor_part;
1142 break;
1143 case RDT_RESOURCE_MBA:
1144 if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
1145 configured_by = mpam_feat_mbw_max;
1146 break;
1147 }
1148 fallthrough;
1149 default:
1150 return resctrl_get_default_ctrl(r);
1151 }
1152
1153 if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) ||
1154 !mpam_has_feature(configured_by, cfg))
1155 return resctrl_get_default_ctrl(r);
1156
1157 switch (configured_by) {
1158 case mpam_feat_cpor_part:
1159 return cfg->cpbm;
1160 case mpam_feat_mbw_max:
1161 return mbw_max_to_percent(cfg->mbw_max, cprops);
1162 default:
1163 return resctrl_get_default_ctrl(r);
1164 }
1165 }
1166
resctrl_arch_update_one(struct rdt_resource * r,struct rdt_ctrl_domain * d,u32 closid,enum resctrl_conf_type t,u32 cfg_val)1167 int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
1168 u32 closid, enum resctrl_conf_type t, u32 cfg_val)
1169 {
1170 int err;
1171 u32 partid;
1172 struct mpam_config cfg;
1173 struct mpam_props *cprops;
1174 struct mpam_resctrl_res *res;
1175 struct mpam_resctrl_dom *dom;
1176
1177 lockdep_assert_cpus_held();
1178 lockdep_assert_irqs_enabled();
1179
1180 if (!mpam_is_enabled())
1181 return -EINVAL;
1182
1183 /*
1184 * No need to check the CPU as mpam_apply_config() doesn't care, and
1185 * resctrl_arch_update_domains() relies on this.
1186 */
1187 res = container_of(r, struct mpam_resctrl_res, resctrl_res);
1188 dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
1189 cprops = &res->class->props;
1190
1191 if (mpam_resctrl_hide_cdp(r->rid))
1192 t = CDP_DATA;
1193
1194 partid = resctrl_get_config_index(closid, t);
1195 if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) {
1196 pr_debug("Not alloc capable or computed PARTID out of range\n");
1197 return -EINVAL;
1198 }
1199
1200 /*
1201 * Copy the current config to avoid clearing other resources when the
1202 * same component is exposed multiple times through resctrl.
1203 */
1204 cfg = dom->ctrl_comp->cfg[partid];
1205
1206 switch (r->rid) {
1207 case RDT_RESOURCE_L2:
1208 case RDT_RESOURCE_L3:
1209 cfg.cpbm = cfg_val;
1210 mpam_set_feature(mpam_feat_cpor_part, &cfg);
1211 break;
1212 case RDT_RESOURCE_MBA:
1213 if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
1214 cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops);
1215 mpam_set_feature(mpam_feat_mbw_max, &cfg);
1216 break;
1217 }
1218 fallthrough;
1219 default:
1220 return -EINVAL;
1221 }
1222
1223 /*
1224 * When CDP is enabled, but the resource doesn't support it, we need to
1225 * apply the same configuration to the other partid.
1226 */
1227 if (mpam_resctrl_hide_cdp(r->rid)) {
1228 partid = resctrl_get_config_index(closid, CDP_CODE);
1229 err = mpam_apply_config(dom->ctrl_comp, partid, &cfg);
1230 if (err)
1231 return err;
1232
1233 partid = resctrl_get_config_index(closid, CDP_DATA);
1234 return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
1235 }
1236
1237 return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
1238 }
1239
resctrl_arch_update_domains(struct rdt_resource * r,u32 closid)1240 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
1241 {
1242 int err;
1243 struct rdt_ctrl_domain *d;
1244
1245 lockdep_assert_cpus_held();
1246 lockdep_assert_irqs_enabled();
1247
1248 if (!mpam_is_enabled())
1249 return -EINVAL;
1250
1251 list_for_each_entry_rcu(d, &r->ctrl_domains, hdr.list) {
1252 for (enum resctrl_conf_type t = 0; t < CDP_NUM_TYPES; t++) {
1253 struct resctrl_staged_config *cfg = &d->staged_config[t];
1254
1255 if (!cfg->have_new_ctrl)
1256 continue;
1257
1258 err = resctrl_arch_update_one(r, d, closid, t,
1259 cfg->new_ctrl);
1260 if (err)
1261 return err;
1262 }
1263 }
1264
1265 return 0;
1266 }
1267
resctrl_arch_reset_all_ctrls(struct rdt_resource * r)1268 void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
1269 {
1270 struct mpam_resctrl_res *res;
1271
1272 lockdep_assert_cpus_held();
1273
1274 if (!mpam_is_enabled())
1275 return;
1276
1277 res = container_of(r, struct mpam_resctrl_res, resctrl_res);
1278 mpam_reset_class_locked(res->class);
1279 }
1280
mpam_resctrl_domain_hdr_init(int cpu,struct mpam_component * comp,enum resctrl_res_level rid,struct rdt_domain_hdr * hdr)1281 static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp,
1282 enum resctrl_res_level rid,
1283 struct rdt_domain_hdr *hdr)
1284 {
1285 lockdep_assert_cpus_held();
1286
1287 INIT_LIST_HEAD(&hdr->list);
1288 hdr->id = mpam_resctrl_pick_domain_id(cpu, comp);
1289 hdr->rid = rid;
1290 cpumask_set_cpu(cpu, &hdr->cpu_mask);
1291 }
1292
mpam_resctrl_online_domain_hdr(unsigned int cpu,struct rdt_domain_hdr * hdr)1293 static void mpam_resctrl_online_domain_hdr(unsigned int cpu,
1294 struct rdt_domain_hdr *hdr)
1295 {
1296 lockdep_assert_cpus_held();
1297
1298 cpumask_set_cpu(cpu, &hdr->cpu_mask);
1299 }
1300
1301 /**
1302 * mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU.
1303 * @cpu: The CPU to remove from the domain.
1304 * @hdr: The domain's header.
1305 *
1306 * Removes @cpu from the header mask. If this was the last CPU in the domain,
1307 * the domain header is removed from its parent list and true is returned,
1308 * indicating the parent structure can be freed.
1309 * If there are other CPUs in the domain, returns false.
1310 */
mpam_resctrl_offline_domain_hdr(unsigned int cpu,struct rdt_domain_hdr * hdr)1311 static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu,
1312 struct rdt_domain_hdr *hdr)
1313 {
1314 lockdep_assert_held(&domain_list_lock);
1315
1316 cpumask_clear_cpu(cpu, &hdr->cpu_mask);
1317 if (cpumask_empty(&hdr->cpu_mask)) {
1318 list_del_rcu(&hdr->list);
1319 synchronize_rcu();
1320 return true;
1321 }
1322
1323 return false;
1324 }
1325
mpam_resctrl_domain_insert(struct list_head * list,struct rdt_domain_hdr * new)1326 static void mpam_resctrl_domain_insert(struct list_head *list,
1327 struct rdt_domain_hdr *new)
1328 {
1329 struct rdt_domain_hdr *err;
1330 struct list_head *pos = NULL;
1331
1332 lockdep_assert_held(&domain_list_lock);
1333
1334 err = resctrl_find_domain(list, new->id, &pos);
1335 if (WARN_ON_ONCE(err))
1336 return;
1337
1338 list_add_tail_rcu(&new->list, pos);
1339 }
1340
find_component(struct mpam_class * class,int cpu)1341 static struct mpam_component *find_component(struct mpam_class *class, int cpu)
1342 {
1343 struct mpam_component *comp;
1344
1345 guard(srcu)(&mpam_srcu);
1346 list_for_each_entry_srcu(comp, &class->components, class_list,
1347 srcu_read_lock_held(&mpam_srcu)) {
1348 if (cpumask_test_cpu(cpu, &comp->affinity))
1349 return comp;
1350 }
1351
1352 return NULL;
1353 }
1354
1355 static struct mpam_resctrl_dom *
mpam_resctrl_alloc_domain(unsigned int cpu,struct mpam_resctrl_res * res)1356 mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
1357 {
1358 int err;
1359 struct mpam_resctrl_dom *dom;
1360 struct rdt_l3_mon_domain *mon_d;
1361 struct rdt_ctrl_domain *ctrl_d;
1362 struct mpam_class *class = res->class;
1363 struct mpam_component *comp_iter, *ctrl_comp;
1364 struct rdt_resource *r = &res->resctrl_res;
1365
1366 lockdep_assert_held(&domain_list_lock);
1367
1368 ctrl_comp = NULL;
1369 guard(srcu)(&mpam_srcu);
1370 list_for_each_entry_srcu(comp_iter, &class->components, class_list,
1371 srcu_read_lock_held(&mpam_srcu)) {
1372 if (cpumask_test_cpu(cpu, &comp_iter->affinity)) {
1373 ctrl_comp = comp_iter;
1374 break;
1375 }
1376 }
1377
1378 /* class has no component for this CPU */
1379 if (WARN_ON_ONCE(!ctrl_comp))
1380 return ERR_PTR(-EINVAL);
1381
1382 dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu));
1383 if (!dom)
1384 return ERR_PTR(-ENOMEM);
1385
1386 if (r->alloc_capable) {
1387 dom->ctrl_comp = ctrl_comp;
1388
1389 ctrl_d = &dom->resctrl_ctrl_dom;
1390 mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr);
1391 ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN;
1392 err = resctrl_online_ctrl_domain(r, ctrl_d);
1393 if (err)
1394 goto free_domain;
1395
1396 mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr);
1397 } else {
1398 pr_debug("Skipped control domain online - no controls\n");
1399 }
1400
1401 if (r->mon_capable) {
1402 struct mpam_component *any_mon_comp;
1403 struct mpam_resctrl_mon *mon;
1404 enum resctrl_event_id eventid;
1405
1406 /*
1407 * Even if the monitor domain is backed by a different
1408 * component, the L3 component IDs need to be used... only
1409 * there may be no ctrl_comp for the L3.
1410 * Search each event's class list for a component with
1411 * overlapping CPUs and set up the dom->mon_comp array.
1412 */
1413
1414 for_each_mpam_resctrl_mon(mon, eventid) {
1415 struct mpam_component *mon_comp;
1416
1417 if (!mon->class)
1418 continue; // dummy resource
1419
1420 mon_comp = find_component(mon->class, cpu);
1421 dom->mon_comp[eventid] = mon_comp;
1422 if (mon_comp)
1423 any_mon_comp = mon_comp;
1424 }
1425 if (!any_mon_comp) {
1426 WARN_ON_ONCE(0);
1427 err = -EFAULT;
1428 goto offline_ctrl_domain;
1429 }
1430
1431 mon_d = &dom->resctrl_mon_dom;
1432 mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr);
1433 mon_d->hdr.type = RESCTRL_MON_DOMAIN;
1434 err = resctrl_online_mon_domain(r, &mon_d->hdr);
1435 if (err)
1436 goto offline_ctrl_domain;
1437
1438 mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr);
1439 } else {
1440 pr_debug("Skipped monitor domain online - no monitors\n");
1441 }
1442
1443 return dom;
1444
1445 offline_ctrl_domain:
1446 if (r->alloc_capable) {
1447 mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
1448 resctrl_offline_ctrl_domain(r, ctrl_d);
1449 }
1450 free_domain:
1451 kfree(dom);
1452 dom = ERR_PTR(err);
1453
1454 return dom;
1455 }
1456
1457 /*
1458 * We know all the monitors are associated with the L3, even if there are no
1459 * controls and therefore no control component. Find the cache-id for the CPU
1460 * and use that to search for existing resctrl domains.
1461 * This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id
1462 * for anything that is not a cache.
1463 */
mpam_resctrl_get_mon_domain_from_cpu(int cpu)1464 static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu)
1465 {
1466 int cache_id;
1467 struct mpam_resctrl_dom *dom;
1468 struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3];
1469
1470 lockdep_assert_cpus_held();
1471
1472 if (!l3->class)
1473 return NULL;
1474 cache_id = get_cpu_cacheinfo_id(cpu, 3);
1475 if (cache_id < 0)
1476 return NULL;
1477
1478 list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) {
1479 if (dom->resctrl_mon_dom.hdr.id == cache_id)
1480 return dom;
1481 }
1482
1483 return NULL;
1484 }
1485
1486 static struct mpam_resctrl_dom *
mpam_resctrl_get_domain_from_cpu(int cpu,struct mpam_resctrl_res * res)1487 mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
1488 {
1489 struct mpam_resctrl_dom *dom;
1490 struct rdt_resource *r = &res->resctrl_res;
1491
1492 lockdep_assert_cpus_held();
1493
1494 list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) {
1495 if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity))
1496 return dom;
1497 }
1498
1499 if (r->rid != RDT_RESOURCE_L3)
1500 return NULL;
1501
1502 /* Search the mon domain list too - needed on monitor only platforms. */
1503 return mpam_resctrl_get_mon_domain_from_cpu(cpu);
1504 }
1505
mpam_resctrl_online_cpu(unsigned int cpu)1506 int mpam_resctrl_online_cpu(unsigned int cpu)
1507 {
1508 struct mpam_resctrl_res *res;
1509 enum resctrl_res_level rid;
1510
1511 guard(mutex)(&domain_list_lock);
1512 for_each_mpam_resctrl_control(res, rid) {
1513 struct mpam_resctrl_dom *dom;
1514 struct rdt_resource *r = &res->resctrl_res;
1515
1516 if (!res->class)
1517 continue; // dummy_resource;
1518
1519 dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
1520 if (!dom) {
1521 dom = mpam_resctrl_alloc_domain(cpu, res);
1522 if (IS_ERR(dom))
1523 return PTR_ERR(dom);
1524 } else {
1525 if (r->alloc_capable) {
1526 struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom;
1527
1528 mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr);
1529 }
1530 if (r->mon_capable) {
1531 struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom;
1532
1533 mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr);
1534 }
1535 }
1536 }
1537
1538 resctrl_online_cpu(cpu);
1539
1540 return 0;
1541 }
1542
mpam_resctrl_offline_cpu(unsigned int cpu)1543 void mpam_resctrl_offline_cpu(unsigned int cpu)
1544 {
1545 struct mpam_resctrl_res *res;
1546 enum resctrl_res_level rid;
1547
1548 resctrl_offline_cpu(cpu);
1549
1550 guard(mutex)(&domain_list_lock);
1551 for_each_mpam_resctrl_control(res, rid) {
1552 struct mpam_resctrl_dom *dom;
1553 struct rdt_l3_mon_domain *mon_d;
1554 struct rdt_ctrl_domain *ctrl_d;
1555 bool ctrl_dom_empty, mon_dom_empty;
1556 struct rdt_resource *r = &res->resctrl_res;
1557
1558 if (!res->class)
1559 continue; // dummy resource
1560
1561 dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
1562 if (WARN_ON_ONCE(!dom))
1563 continue;
1564
1565 if (r->alloc_capable) {
1566 ctrl_d = &dom->resctrl_ctrl_dom;
1567 ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
1568 if (ctrl_dom_empty)
1569 resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d);
1570 } else {
1571 ctrl_dom_empty = true;
1572 }
1573
1574 if (r->mon_capable) {
1575 mon_d = &dom->resctrl_mon_dom;
1576 mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr);
1577 if (mon_dom_empty)
1578 resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr);
1579 } else {
1580 mon_dom_empty = true;
1581 }
1582
1583 if (ctrl_dom_empty && mon_dom_empty)
1584 kfree(dom);
1585 }
1586 }
1587
mpam_resctrl_setup(void)1588 int mpam_resctrl_setup(void)
1589 {
1590 int err = 0;
1591 struct mpam_resctrl_res *res;
1592 enum resctrl_res_level rid;
1593 struct mpam_resctrl_mon *mon;
1594 enum resctrl_event_id eventid;
1595
1596 wait_event(wait_cacheinfo_ready, cacheinfo_ready);
1597
1598 cpus_read_lock();
1599 for_each_mpam_resctrl_control(res, rid) {
1600 INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains);
1601 INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains);
1602 res->resctrl_res.rid = rid;
1603 }
1604
1605 /* Find some classes to use for controls */
1606 mpam_resctrl_pick_caches();
1607 mpam_resctrl_pick_mba();
1608
1609 /* Initialise the resctrl structures from the classes */
1610 for_each_mpam_resctrl_control(res, rid) {
1611 if (!res->class)
1612 continue; // dummy resource
1613
1614 err = mpam_resctrl_control_init(res);
1615 if (err) {
1616 pr_debug("Failed to initialise rid %u\n", rid);
1617 goto internal_error;
1618 }
1619 }
1620
1621 /* Find some classes to use for monitors */
1622 mpam_resctrl_pick_counters();
1623
1624 for_each_mpam_resctrl_mon(mon, eventid) {
1625 if (!mon->class)
1626 continue; // dummy resource
1627
1628 err = mpam_resctrl_monitor_init(mon, eventid);
1629 if (err) {
1630 pr_debug("Failed to initialise event %u\n", eventid);
1631 goto internal_error;
1632 }
1633 }
1634
1635 cpus_read_unlock();
1636
1637 if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) {
1638 pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n",
1639 resctrl_arch_alloc_capable(), resctrl_arch_mon_capable());
1640 return -EOPNOTSUPP;
1641 }
1642
1643 err = resctrl_init();
1644 if (err)
1645 return err;
1646
1647 WRITE_ONCE(resctrl_enabled, true);
1648
1649 return 0;
1650
1651 internal_error:
1652 cpus_read_unlock();
1653 pr_debug("Internal error %d - resctrl not supported\n", err);
1654 return err;
1655 }
1656
mpam_resctrl_exit(void)1657 void mpam_resctrl_exit(void)
1658 {
1659 if (!READ_ONCE(resctrl_enabled))
1660 return;
1661
1662 WRITE_ONCE(resctrl_enabled, false);
1663 resctrl_exit();
1664 }
1665
1666 /*
1667 * The driver is detaching an MSC from this class, if resctrl was using it,
1668 * pull on resctrl_exit().
1669 */
mpam_resctrl_teardown_class(struct mpam_class * class)1670 void mpam_resctrl_teardown_class(struct mpam_class *class)
1671 {
1672 struct mpam_resctrl_res *res;
1673 enum resctrl_res_level rid;
1674 struct mpam_resctrl_mon *mon;
1675 enum resctrl_event_id eventid;
1676
1677 might_sleep();
1678
1679 for_each_mpam_resctrl_control(res, rid) {
1680 if (res->class == class) {
1681 res->class = NULL;
1682 break;
1683 }
1684 }
1685 for_each_mpam_resctrl_mon(mon, eventid) {
1686 if (mon->class == class) {
1687 mon->class = NULL;
1688 break;
1689 }
1690 }
1691 }
1692
__cacheinfo_ready(void)1693 static int __init __cacheinfo_ready(void)
1694 {
1695 cacheinfo_ready = true;
1696 wake_up(&wait_cacheinfo_ready);
1697
1698 return 0;
1699 }
1700 device_initcall_sync(__cacheinfo_ready);
1701
1702 #ifdef CONFIG_MPAM_KUNIT_TEST
1703 #include "test_mpam_resctrl.c"
1704 #endif
1705