xref: /linux/drivers/resctrl/mpam_resctrl.c (revision c43267e6794a36013fd495a4d81bf7f748fe4615)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2025 Arm Ltd.
3 
4 #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
5 
6 #include <linux/arm_mpam.h>
7 #include <linux/cacheinfo.h>
8 #include <linux/cpu.h>
9 #include <linux/cpumask.h>
10 #include <linux/errno.h>
11 #include <linux/limits.h>
12 #include <linux/list.h>
13 #include <linux/math.h>
14 #include <linux/printk.h>
15 #include <linux/rculist.h>
16 #include <linux/resctrl.h>
17 #include <linux/slab.h>
18 #include <linux/types.h>
19 #include <linux/wait.h>
20 
21 #include <asm/mpam.h>
22 
23 #include "mpam_internal.h"
24 
25 DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters);
26 
27 /*
28  * The classes we've picked to map to resctrl resources, wrapped
29  * in with their resctrl structure.
30  * Class pointer may be NULL.
31  */
32 static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
33 
34 #define for_each_mpam_resctrl_control(res, rid)					\
35 	for (rid = 0, res = &mpam_resctrl_controls[rid];			\
36 	     rid < RDT_NUM_RESOURCES;						\
37 	     rid++, res = &mpam_resctrl_controls[rid])
38 
39 /*
40  * The classes we've picked to map to resctrl events.
41  * Resctrl believes all the worlds a Xeon, and these are all on the L3. This
42  * array lets us find the actual class backing the event counters. e.g.
43  * the only memory bandwidth counters may be on the memory controller, but to
44  * make use of them, we pretend they are on L3. Restrict the events considered
45  * to those supported by MPAM.
46  * Class pointer may be NULL.
47  */
48 #define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID
49 static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1];
50 
51 #define for_each_mpam_resctrl_mon(mon, eventid)					\
52 	for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid];	\
53 	     eventid <= MPAM_MAX_EVENT;						\
54 	     eventid++, mon = &mpam_resctrl_counters[eventid])
55 
56 /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
57 static DEFINE_MUTEX(domain_list_lock);
58 
59 /*
60  * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1.
61  * This applies globally to all traffic the CPU generates.
62  */
63 static bool cdp_enabled;
64 
65 /*
66  * We use cacheinfo to discover the size of the caches and their id. cacheinfo
67  * populates this from a device_initcall(). mpam_resctrl_setup() must wait.
68  */
69 static bool cacheinfo_ready;
70 static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready);
71 
72 /*
73  * If resctrl_init() succeeded, resctrl_exit() can be used to remove support
74  * for the filesystem in the event of an error.
75  */
76 static bool resctrl_enabled;
77 
resctrl_arch_alloc_capable(void)78 bool resctrl_arch_alloc_capable(void)
79 {
80 	struct mpam_resctrl_res *res;
81 	enum resctrl_res_level rid;
82 
83 	for_each_mpam_resctrl_control(res, rid) {
84 		if (res->resctrl_res.alloc_capable)
85 			return true;
86 	}
87 
88 	return false;
89 }
90 
resctrl_arch_mon_capable(void)91 bool resctrl_arch_mon_capable(void)
92 {
93 	struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
94 	struct rdt_resource *l3 = &res->resctrl_res;
95 
96 	/* All monitors are presented as being on the L3 cache */
97 	return l3->mon_capable;
98 }
99 
resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)100 bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
101 {
102 	return false;
103 }
104 
resctrl_arch_mon_event_config_read(void * info)105 void resctrl_arch_mon_event_config_read(void *info)
106 {
107 }
108 
resctrl_arch_mon_event_config_write(void * info)109 void resctrl_arch_mon_event_config_write(void *info)
110 {
111 }
112 
resctrl_arch_reset_rmid_all(struct rdt_resource * r,struct rdt_l3_mon_domain * d)113 void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
114 {
115 }
116 
resctrl_arch_reset_rmid(struct rdt_resource * r,struct rdt_l3_mon_domain * d,u32 closid,u32 rmid,enum resctrl_event_id eventid)117 void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
118 			     u32 closid, u32 rmid, enum resctrl_event_id eventid)
119 {
120 }
121 
resctrl_arch_reset_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,u32 closid,u32 rmid,int cntr_id,enum resctrl_event_id eventid)122 void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
123 			     u32 closid, u32 rmid, int cntr_id,
124 			     enum resctrl_event_id eventid)
125 {
126 }
127 
resctrl_arch_config_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,enum resctrl_event_id evtid,u32 rmid,u32 closid,u32 cntr_id,bool assign)128 void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
129 			      enum resctrl_event_id evtid, u32 rmid, u32 closid,
130 			      u32 cntr_id, bool assign)
131 {
132 }
133 
resctrl_arch_cntr_read(struct rdt_resource * r,struct rdt_l3_mon_domain * d,u32 unused,u32 rmid,int cntr_id,enum resctrl_event_id eventid,u64 * val)134 int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
135 			   u32 unused, u32 rmid, int cntr_id,
136 			   enum resctrl_event_id eventid, u64 *val)
137 {
138 	return -EOPNOTSUPP;
139 }
140 
resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource * r)141 bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
142 {
143 	return false;
144 }
145 
resctrl_arch_mbm_cntr_assign_set(struct rdt_resource * r,bool enable)146 int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
147 {
148 	return -EINVAL;
149 }
150 
resctrl_arch_io_alloc_enable(struct rdt_resource * r,bool enable)151 int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable)
152 {
153 	return -EOPNOTSUPP;
154 }
155 
resctrl_arch_get_io_alloc_enabled(struct rdt_resource * r)156 bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r)
157 {
158 	return false;
159 }
160 
resctrl_arch_pre_mount(void)161 void resctrl_arch_pre_mount(void)
162 {
163 }
164 
resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)165 bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
166 {
167 	return mpam_resctrl_controls[rid].cdp_enabled;
168 }
169 
170 /**
171  * resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks.
172  *
173  * At boot, all existing tasks use partid zero for D and I.
174  * To enable/disable CDP emulation, all these tasks need relabelling.
175  */
resctrl_reset_task_closids(void)176 static void resctrl_reset_task_closids(void)
177 {
178 	struct task_struct *p, *t;
179 
180 	read_lock(&tasklist_lock);
181 	for_each_process_thread(p, t) {
182 		resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID,
183 					     RESCTRL_RESERVED_RMID);
184 	}
185 	read_unlock(&tasklist_lock);
186 }
187 
resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid,bool enable)188 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable)
189 {
190 	u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID;
191 	struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
192 	struct rdt_resource *l3 = &res->resctrl_res;
193 	int cpu;
194 
195 	if (!IS_ENABLED(CONFIG_EXPERT) && enable) {
196 		/*
197 		 * If the resctrl fs is mounted more than once, sequentially,
198 		 * then CDP can lead to the use of out of range PARTIDs.
199 		 */
200 		pr_warn("CDP not supported\n");
201 		return -EOPNOTSUPP;
202 	}
203 
204 	if (enable)
205 		pr_warn("CDP is an expert feature and may cause MPAM to malfunction.\n");
206 
207 	/*
208 	 * resctrl_arch_set_cdp_enabled() is only called with enable set to
209 	 * false on error and unmount.
210 	 */
211 	cdp_enabled = enable;
212 	mpam_resctrl_controls[rid].cdp_enabled = enable;
213 
214 	if (enable)
215 		l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx() / 2;
216 	else
217 		l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
218 
219 	/* The mbw_max feature can't hide cdp as it's a per-partid maximum. */
220 	if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled)
221 		mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false;
222 
223 	if (mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled &&
224 	    mpam_resctrl_controls[RDT_RESOURCE_MBA].class)
225 		mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true;
226 
227 	if (enable) {
228 		if (mpam_partid_max < 1)
229 			return -EINVAL;
230 
231 		partid_d = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_DATA);
232 		partid_i = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_CODE);
233 	}
234 
235 	mpam_set_task_partid_pmg(current, partid_d, partid_i, 0, 0);
236 	WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current));
237 
238 	resctrl_reset_task_closids();
239 
240 	for_each_possible_cpu(cpu)
241 		mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0);
242 	on_each_cpu(resctrl_arch_sync_cpu_closid_rmid, NULL, 1);
243 
244 	return 0;
245 }
246 
mpam_resctrl_hide_cdp(enum resctrl_res_level rid)247 static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid)
248 {
249 	return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid);
250 }
251 
252 /*
253  * MSC may raise an error interrupt if it sees an out or range partid/pmg,
254  * and go on to truncate the value. Regardless of what the hardware supports,
255  * only the system wide safe value is safe to use.
256  */
resctrl_arch_get_num_closid(struct rdt_resource * ignored)257 u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
258 {
259 	return mpam_partid_max + 1;
260 }
261 
resctrl_arch_system_num_rmid_idx(void)262 u32 resctrl_arch_system_num_rmid_idx(void)
263 {
264 	return (mpam_pmg_max + 1) * (mpam_partid_max + 1);
265 }
266 
resctrl_arch_rmid_idx_encode(u32 closid,u32 rmid)267 u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
268 {
269 	return closid * (mpam_pmg_max + 1) + rmid;
270 }
271 
resctrl_arch_rmid_idx_decode(u32 idx,u32 * closid,u32 * rmid)272 void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
273 {
274 	*closid = idx / (mpam_pmg_max + 1);
275 	*rmid = idx % (mpam_pmg_max + 1);
276 }
277 
resctrl_arch_sched_in(struct task_struct * tsk)278 void resctrl_arch_sched_in(struct task_struct *tsk)
279 {
280 	lockdep_assert_preemption_disabled();
281 
282 	mpam_thread_switch(tsk);
283 }
284 
resctrl_arch_set_cpu_default_closid_rmid(int cpu,u32 closid,u32 rmid)285 void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid)
286 {
287 	WARN_ON_ONCE(closid > U16_MAX);
288 	WARN_ON_ONCE(rmid > U8_MAX);
289 
290 	if (!cdp_enabled) {
291 		mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid);
292 	} else {
293 		/*
294 		 * When CDP is enabled, resctrl halves the closid range and we
295 		 * use odd/even partid for one closid.
296 		 */
297 		u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
298 		u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
299 
300 		mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid);
301 	}
302 }
303 
resctrl_arch_sync_cpu_closid_rmid(void * info)304 void resctrl_arch_sync_cpu_closid_rmid(void *info)
305 {
306 	struct resctrl_cpu_defaults *r = info;
307 
308 	lockdep_assert_preemption_disabled();
309 
310 	if (r) {
311 		resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(),
312 							 r->closid, r->rmid);
313 	}
314 
315 	resctrl_arch_sched_in(current);
316 }
317 
resctrl_arch_set_closid_rmid(struct task_struct * tsk,u32 closid,u32 rmid)318 void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
319 {
320 	WARN_ON_ONCE(closid > U16_MAX);
321 	WARN_ON_ONCE(rmid > U8_MAX);
322 
323 	if (!cdp_enabled) {
324 		mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid);
325 	} else {
326 		u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
327 		u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
328 
329 		mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid);
330 	}
331 }
332 
resctrl_arch_match_closid(struct task_struct * tsk,u32 closid)333 bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
334 {
335 	u64 regval = mpam_get_regval(tsk);
336 	u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
337 
338 	if (cdp_enabled)
339 		tsk_closid >>= 1;
340 
341 	return tsk_closid == closid;
342 }
343 
344 /* The task's pmg is not unique, the partid must be considered too */
resctrl_arch_match_rmid(struct task_struct * tsk,u32 closid,u32 rmid)345 bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
346 {
347 	u64 regval = mpam_get_regval(tsk);
348 	u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
349 	u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval);
350 
351 	if (cdp_enabled)
352 		tsk_closid >>= 1;
353 
354 	return (tsk_closid == closid) && (tsk_rmid == rmid);
355 }
356 
resctrl_arch_get_resource(enum resctrl_res_level l)357 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
358 {
359 	if (l >= RDT_NUM_RESOURCES)
360 		return NULL;
361 
362 	return &mpam_resctrl_controls[l].resctrl_res;
363 }
364 
resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid)365 static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid)
366 {
367 	struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
368 
369 	if (!mpam_is_enabled())
370 		return -EINVAL;
371 
372 	if (!mon->class)
373 		return -EINVAL;
374 
375 	switch (evtid) {
376 	case QOS_L3_OCCUP_EVENT_ID:
377 		/* With CDP, one monitor gets used for both code/data reads */
378 		return mpam_alloc_csu_mon(mon->class);
379 	case QOS_L3_MBM_LOCAL_EVENT_ID:
380 	case QOS_L3_MBM_TOTAL_EVENT_ID:
381 		return USE_PRE_ALLOCATED;
382 	default:
383 		return -EOPNOTSUPP;
384 	}
385 }
386 
resctrl_arch_mon_ctx_alloc(struct rdt_resource * r,enum resctrl_event_id evtid)387 void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r,
388 				 enum resctrl_event_id evtid)
389 {
390 	DEFINE_WAIT(wait);
391 	int *ret;
392 
393 	ret = kmalloc_obj(*ret);
394 	if (!ret)
395 		return ERR_PTR(-ENOMEM);
396 
397 	do {
398 		prepare_to_wait(&resctrl_mon_ctx_waiters, &wait,
399 				TASK_INTERRUPTIBLE);
400 		*ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid);
401 		if (*ret == -ENOSPC)
402 			schedule();
403 	} while (*ret == -ENOSPC && !signal_pending(current));
404 	finish_wait(&resctrl_mon_ctx_waiters, &wait);
405 
406 	return ret;
407 }
408 
resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid,u32 mon_idx)409 static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid,
410 					      u32 mon_idx)
411 {
412 	struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
413 
414 	if (!mpam_is_enabled())
415 		return;
416 
417 	if (!mon->class)
418 		return;
419 
420 	if (evtid == QOS_L3_OCCUP_EVENT_ID)
421 		mpam_free_csu_mon(mon->class, mon_idx);
422 
423 	wake_up(&resctrl_mon_ctx_waiters);
424 }
425 
resctrl_arch_mon_ctx_free(struct rdt_resource * r,enum resctrl_event_id evtid,void * arch_mon_ctx)426 void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
427 			       enum resctrl_event_id evtid, void *arch_mon_ctx)
428 {
429 	u32 mon_idx = *(u32 *)arch_mon_ctx;
430 
431 	kfree(arch_mon_ctx);
432 
433 	resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx);
434 }
435 
__read_mon(struct mpam_resctrl_mon * mon,struct mpam_component * mon_comp,enum mpam_device_features mon_type,int mon_idx,enum resctrl_conf_type cdp_type,u32 closid,u32 rmid,u64 * val)436 static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
437 		      enum mpam_device_features mon_type,
438 		      int mon_idx,
439 		      enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val)
440 {
441 	struct mon_cfg cfg;
442 
443 	if (!mpam_is_enabled())
444 		return -EINVAL;
445 
446 	/* Shift closid to account for CDP */
447 	closid = resctrl_get_config_index(closid, cdp_type);
448 
449 	if (irqs_disabled()) {
450 		/* Check if we can access this domain without an IPI */
451 		return -EIO;
452 	}
453 
454 	cfg = (struct mon_cfg) {
455 		.mon = mon_idx,
456 		.match_pmg = true,
457 		.partid = closid,
458 		.pmg = rmid,
459 	};
460 
461 	return mpam_msmon_read(mon_comp, &cfg, mon_type, val);
462 }
463 
read_mon_cdp_safe(struct mpam_resctrl_mon * mon,struct mpam_component * mon_comp,enum mpam_device_features mon_type,int mon_idx,u32 closid,u32 rmid,u64 * val)464 static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
465 			     enum mpam_device_features mon_type,
466 			     int mon_idx, u32 closid, u32 rmid, u64 *val)
467 {
468 	if (cdp_enabled) {
469 		u64 code_val = 0, data_val = 0;
470 		int err;
471 
472 		err = __read_mon(mon, mon_comp, mon_type, mon_idx,
473 				 CDP_CODE, closid, rmid, &code_val);
474 		if (err)
475 			return err;
476 
477 		err = __read_mon(mon, mon_comp, mon_type, mon_idx,
478 				 CDP_DATA, closid, rmid, &data_val);
479 		if (err)
480 			return err;
481 
482 		*val += code_val + data_val;
483 		return 0;
484 	}
485 
486 	return __read_mon(mon, mon_comp, mon_type, mon_idx,
487 			  CDP_NONE, closid, rmid, val);
488 }
489 
490 /* MBWU when not in ABMC mode (not supported), and CSU counters. */
resctrl_arch_rmid_read(struct rdt_resource * r,struct rdt_domain_hdr * hdr,u32 closid,u32 rmid,enum resctrl_event_id eventid,void * arch_priv,u64 * val,void * arch_mon_ctx)491 int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
492 			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
493 			   void *arch_priv, u64 *val, void *arch_mon_ctx)
494 {
495 	struct mpam_resctrl_dom *l3_dom;
496 	struct mpam_component *mon_comp;
497 	u32 mon_idx = *(u32 *)arch_mon_ctx;
498 	enum mpam_device_features mon_type;
499 	struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid];
500 
501 	resctrl_arch_rmid_read_context_check();
502 
503 	if (!mpam_is_enabled())
504 		return -EINVAL;
505 
506 	if (eventid >= QOS_NUM_EVENTS || !mon->class)
507 		return -EINVAL;
508 
509 	l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr);
510 	mon_comp = l3_dom->mon_comp[eventid];
511 
512 	if (eventid != QOS_L3_OCCUP_EVENT_ID)
513 		return -EINVAL;
514 
515 	mon_type = mpam_feat_msmon_csu;
516 
517 	return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx,
518 				 closid, rmid, val);
519 }
520 
521 /*
522  * The rmid realloc threshold should be for the smallest cache exposed to
523  * resctrl.
524  */
update_rmid_limits(struct mpam_class * class)525 static int update_rmid_limits(struct mpam_class *class)
526 {
527 	u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx();
528 	struct mpam_props *cprops = &class->props;
529 	struct cacheinfo *ci;
530 
531 	lockdep_assert_cpus_held();
532 
533 	if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
534 		return 0;
535 
536 	/*
537 	 * Assume cache levels are the same size for all CPUs...
538 	 * The check just requires any online CPU and it can't go offline as we
539 	 * hold the cpu lock.
540 	 */
541 	ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level);
542 	if (!ci || ci->size == 0) {
543 		pr_debug("Could not read cache size for class %u\n",
544 			 class->level);
545 		return -EINVAL;
546 	}
547 
548 	if (!resctrl_rmid_realloc_limit ||
549 	    ci->size < resctrl_rmid_realloc_limit) {
550 		resctrl_rmid_realloc_limit = ci->size;
551 		resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg;
552 	}
553 
554 	return 0;
555 }
556 
cache_has_usable_cpor(struct mpam_class * class)557 static bool cache_has_usable_cpor(struct mpam_class *class)
558 {
559 	struct mpam_props *cprops = &class->props;
560 
561 	if (!mpam_has_feature(mpam_feat_cpor_part, cprops))
562 		return false;
563 
564 	/* resctrl uses u32 for all bitmap configurations */
565 	return class->props.cpbm_wd <= 32;
566 }
567 
mba_class_use_mbw_max(struct mpam_props * cprops)568 static bool mba_class_use_mbw_max(struct mpam_props *cprops)
569 {
570 	return (mpam_has_feature(mpam_feat_mbw_max, cprops) &&
571 		cprops->bwa_wd);
572 }
573 
class_has_usable_mba(struct mpam_props * cprops)574 static bool class_has_usable_mba(struct mpam_props *cprops)
575 {
576 	return mba_class_use_mbw_max(cprops);
577 }
578 
cache_has_usable_csu(struct mpam_class * class)579 static bool cache_has_usable_csu(struct mpam_class *class)
580 {
581 	struct mpam_props *cprops;
582 
583 	if (!class)
584 		return false;
585 
586 	cprops = &class->props;
587 
588 	if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
589 		return false;
590 
591 	/*
592 	 * CSU counters settle on the value, so we can get away with
593 	 * having only one.
594 	 */
595 	if (!cprops->num_csu_mon)
596 		return false;
597 
598 	return true;
599 }
600 
601 /*
602  * Calculate the worst-case percentage change from each implemented step
603  * in the control.
604  */
get_mba_granularity(struct mpam_props * cprops)605 static u32 get_mba_granularity(struct mpam_props *cprops)
606 {
607 	if (!mba_class_use_mbw_max(cprops))
608 		return 0;
609 
610 	/*
611 	 * bwa_wd is the number of bits implemented in the 0.xxx
612 	 * fixed point fraction. 1 bit is 50%, 2 is 25% etc.
613 	 */
614 	return DIV_ROUND_UP(MAX_MBA_BW, 1 << cprops->bwa_wd);
615 }
616 
617 /*
618  * Each fixed-point hardware value architecturally represents a range
619  * of values: the full range 0% - 100% is split contiguously into
620  * (1 << cprops->bwa_wd) equal bands.
621  *
622  * Although the bwa_bwd fields have 6 bits the maximum valid value is 16
623  * as it reports the width of fields that are at most 16 bits. When
624  * fewer than 16 bits are valid the least significant bits are
625  * ignored. The implied binary point is kept between bits 15 and 16 and
626  * so the valid bits are leftmost.
627  *
628  * See ARM IHI0099B.a "MPAM system component specification", Section 9.3,
629  * "The fixed-point fractional format" for more information.
630  *
631  * Find the nearest percentage value to the upper bound of the selected band:
632  */
mbw_max_to_percent(u16 mbw_max,struct mpam_props * cprops)633 static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops)
634 {
635 	u32 val = mbw_max;
636 
637 	val >>= 16 - cprops->bwa_wd;
638 	val += 1;
639 	val *= MAX_MBA_BW;
640 	val = DIV_ROUND_CLOSEST(val, 1 << cprops->bwa_wd);
641 
642 	return val;
643 }
644 
645 /*
646  * Find the band whose upper bound is closest to the specified percentage.
647  *
648  * A round-to-nearest policy is followed here as a balanced compromise
649  * between unexpected under-commit of the resource (where the total of
650  * a set of resource allocations after conversion is less than the
651  * expected total, due to rounding of the individual converted
652  * percentages) and over-commit (where the total of the converted
653  * allocations is greater than expected).
654  */
percent_to_mbw_max(u8 pc,struct mpam_props * cprops)655 static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops)
656 {
657 	u32 val = pc;
658 
659 	val <<= cprops->bwa_wd;
660 	val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW);
661 	val = max(val, 1) - 1;
662 	val <<= 16 - cprops->bwa_wd;
663 
664 	return val;
665 }
666 
get_mba_min(struct mpam_props * cprops)667 static u32 get_mba_min(struct mpam_props *cprops)
668 {
669 	if (!mba_class_use_mbw_max(cprops)) {
670 		WARN_ON_ONCE(1);
671 		return 0;
672 	}
673 
674 	return mbw_max_to_percent(0, cprops);
675 }
676 
677 /* Find the L3 cache that has affinity with this CPU */
find_l3_equivalent_bitmask(int cpu,cpumask_var_t tmp_cpumask)678 static int find_l3_equivalent_bitmask(int cpu, cpumask_var_t tmp_cpumask)
679 {
680 	u32 cache_id = get_cpu_cacheinfo_id(cpu, 3);
681 
682 	lockdep_assert_cpus_held();
683 
684 	return mpam_get_cpumask_from_cache_id(cache_id, 3, tmp_cpumask);
685 }
686 
687 /*
688  * topology_matches_l3() - Is the provided class the same shape as L3
689  * @victim:		The class we'd like to pretend is L3.
690  *
691  * resctrl expects all the world's a Xeon, and all counters are on the
692  * L3. We allow some mapping counters on other classes. This requires
693  * that the CPU->domain mapping is the same kind of shape.
694  *
695  * Using cacheinfo directly would make this work even if resctrl can't
696  * use the L3 - but cacheinfo can't tell us anything about offline CPUs.
697  * Using the L3 resctrl domain list also depends on CPUs being online.
698  * Using the mpam_class we picked for L3 so we can use its domain list
699  * assumes that there are MPAM controls on the L3.
700  * Instead, this path eventually uses the mpam_get_cpumask_from_cache_id()
701  * helper which can tell us about offline CPUs ... but getting the cache_id
702  * to start with relies on at least one CPU per L3 cache being online at
703  * boot.
704  *
705  * Walk the victim component list and compare the affinity mask with the
706  * corresponding L3. The topology matches if each victim:component's affinity
707  * mask is the same as the CPU's corresponding L3's. These lists/masks are
708  * computed from firmware tables so don't change at runtime.
709  */
topology_matches_l3(struct mpam_class * victim)710 static bool topology_matches_l3(struct mpam_class *victim)
711 {
712 	int cpu, err;
713 	struct mpam_component *victim_iter;
714 
715 	lockdep_assert_cpus_held();
716 
717 	cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
718 	if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL))
719 		return false;
720 
721 	guard(srcu)(&mpam_srcu);
722 	list_for_each_entry_srcu(victim_iter, &victim->components, class_list,
723 				 srcu_read_lock_held(&mpam_srcu)) {
724 		if (cpumask_empty(&victim_iter->affinity)) {
725 			pr_debug("class %u has CPU-less component %u - can't match L3!\n",
726 				 victim->level, victim_iter->comp_id);
727 			return false;
728 		}
729 
730 		cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask);
731 		if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
732 			return false;
733 
734 		cpumask_clear(tmp_cpumask);
735 		err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
736 		if (err) {
737 			pr_debug("Failed to find L3's equivalent component to class %u component %u\n",
738 				 victim->level, victim_iter->comp_id);
739 			return false;
740 		}
741 
742 		/* Any differing bits in the affinity mask? */
743 		if (!cpumask_equal(tmp_cpumask, &victim_iter->affinity)) {
744 			pr_debug("class %u component %u has Mismatched CPU mask with L3 equivalent\n"
745 				 "L3:%*pbl != victim:%*pbl\n",
746 				 victim->level, victim_iter->comp_id,
747 				 cpumask_pr_args(tmp_cpumask),
748 				 cpumask_pr_args(&victim_iter->affinity));
749 
750 			return false;
751 		}
752 	}
753 
754 	return true;
755 }
756 
757 /*
758  * Test if the traffic for a class matches that at egress from the L3. For
759  * MSC at memory controllers this is only possible if there is a single L3
760  * as otherwise the counters at the memory can include bandwidth from the
761  * non-local L3.
762  */
traffic_matches_l3(struct mpam_class * class)763 static bool traffic_matches_l3(struct mpam_class *class)
764 {
765 	int err, cpu;
766 
767 	lockdep_assert_cpus_held();
768 
769 	if (class->type == MPAM_CLASS_CACHE && class->level == 3)
770 		return true;
771 
772 	if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
773 		pr_debug("class %u is a different cache from L3\n", class->level);
774 		return false;
775 	}
776 
777 	if (class->type != MPAM_CLASS_MEMORY) {
778 		pr_debug("class %u is neither of type cache or memory\n", class->level);
779 		return false;
780 	}
781 
782 	cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
783 	if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) {
784 		pr_debug("cpumask allocation failed\n");
785 		return false;
786 	}
787 
788 	cpu = cpumask_any_and(&class->affinity, cpu_online_mask);
789 	err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
790 	if (err) {
791 		pr_debug("Failed to find L3 downstream to cpu %d\n", cpu);
792 		return false;
793 	}
794 
795 	if (!cpumask_equal(tmp_cpumask, cpu_possible_mask)) {
796 		pr_debug("There is more than one L3\n");
797 		return false;
798 	}
799 
800 	/* Be strict; the traffic might stop in the intermediate cache. */
801 	if (get_cpu_cacheinfo_id(cpu, 4) != -1) {
802 		pr_debug("L3 isn't the last level of cache\n");
803 		return false;
804 	}
805 
806 	if (num_possible_nodes() > 1) {
807 		pr_debug("There is more than one numa node\n");
808 		return false;
809 	}
810 
811 #ifdef CONFIG_HMEM_REPORTING
812 	if (node_devices[cpu_to_node(cpu)]->cache_dev) {
813 		pr_debug("There is a memory side cache\n");
814 		return false;
815 	}
816 #endif
817 
818 	return true;
819 }
820 
821 /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */
mpam_resctrl_pick_caches(void)822 static void mpam_resctrl_pick_caches(void)
823 {
824 	struct mpam_class *class;
825 	struct mpam_resctrl_res *res;
826 
827 	lockdep_assert_cpus_held();
828 
829 	guard(srcu)(&mpam_srcu);
830 	list_for_each_entry_srcu(class, &mpam_classes, classes_list,
831 				 srcu_read_lock_held(&mpam_srcu)) {
832 		if (class->type != MPAM_CLASS_CACHE) {
833 			pr_debug("class %u is not a cache\n", class->level);
834 			continue;
835 		}
836 
837 		if (class->level != 2 && class->level != 3) {
838 			pr_debug("class %u is not L2 or L3\n", class->level);
839 			continue;
840 		}
841 
842 		if (!cache_has_usable_cpor(class)) {
843 			pr_debug("class %u cache misses CPOR\n", class->level);
844 			continue;
845 		}
846 
847 		if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
848 			pr_debug("class %u has missing CPUs, mask %*pb != %*pb\n", class->level,
849 				 cpumask_pr_args(&class->affinity),
850 				 cpumask_pr_args(cpu_possible_mask));
851 			continue;
852 		}
853 
854 		if (class->level == 2)
855 			res = &mpam_resctrl_controls[RDT_RESOURCE_L2];
856 		else
857 			res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
858 		res->class = class;
859 	}
860 }
861 
mpam_resctrl_pick_mba(void)862 static void mpam_resctrl_pick_mba(void)
863 {
864 	struct mpam_class *class, *candidate_class = NULL;
865 	struct mpam_resctrl_res *res;
866 
867 	lockdep_assert_cpus_held();
868 
869 	guard(srcu)(&mpam_srcu);
870 	list_for_each_entry_srcu(class, &mpam_classes, classes_list,
871 				 srcu_read_lock_held(&mpam_srcu)) {
872 		struct mpam_props *cprops = &class->props;
873 
874 		if (class->level != 3 && class->type == MPAM_CLASS_CACHE) {
875 			pr_debug("class %u is a cache but not the L3\n", class->level);
876 			continue;
877 		}
878 
879 		if (!class_has_usable_mba(cprops)) {
880 			pr_debug("class %u has no bandwidth control\n",
881 				 class->level);
882 			continue;
883 		}
884 
885 		if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
886 			pr_debug("class %u has missing CPUs\n", class->level);
887 			continue;
888 		}
889 
890 		if (!topology_matches_l3(class)) {
891 			pr_debug("class %u topology doesn't match L3\n",
892 				 class->level);
893 			continue;
894 		}
895 
896 		if (!traffic_matches_l3(class)) {
897 			pr_debug("class %u traffic doesn't match L3 egress\n",
898 				 class->level);
899 			continue;
900 		}
901 
902 		/*
903 		 * Pick a resource to be MBA that as close as possible to
904 		 * the L3. mbm_total counts the bandwidth leaving the L3
905 		 * cache and MBA should correspond as closely as possible
906 		 * for proper operation of mba_sc.
907 		 */
908 		if (!candidate_class || class->level < candidate_class->level)
909 			candidate_class = class;
910 	}
911 
912 	if (candidate_class) {
913 		pr_debug("selected class %u to back MBA\n",
914 			 candidate_class->level);
915 		res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
916 		res->class = candidate_class;
917 	}
918 }
919 
counter_update_class(enum resctrl_event_id evt_id,struct mpam_class * class)920 static void counter_update_class(enum resctrl_event_id evt_id,
921 				 struct mpam_class *class)
922 {
923 	struct mpam_class *existing_class = mpam_resctrl_counters[evt_id].class;
924 
925 	if (existing_class) {
926 		if (class->level == 3) {
927 			pr_debug("Existing class is L3 - L3 wins\n");
928 			return;
929 		}
930 
931 		if (existing_class->level < class->level) {
932 			pr_debug("Existing class is closer to L3, %u versus %u - closer is better\n",
933 				 existing_class->level, class->level);
934 			return;
935 		}
936 	}
937 
938 	mpam_resctrl_counters[evt_id].class = class;
939 }
940 
mpam_resctrl_pick_counters(void)941 static void mpam_resctrl_pick_counters(void)
942 {
943 	struct mpam_class *class;
944 
945 	lockdep_assert_cpus_held();
946 
947 	guard(srcu)(&mpam_srcu);
948 	list_for_each_entry_srcu(class, &mpam_classes, classes_list,
949 				 srcu_read_lock_held(&mpam_srcu)) {
950 		/* The name of the resource is L3... */
951 		if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
952 			pr_debug("class %u is a cache but not the L3", class->level);
953 			continue;
954 		}
955 
956 		if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
957 			pr_debug("class %u does not cover all CPUs",
958 				 class->level);
959 			continue;
960 		}
961 
962 		if (cache_has_usable_csu(class)) {
963 			pr_debug("class %u has usable CSU",
964 				 class->level);
965 
966 			/* CSU counters only make sense on a cache. */
967 			switch (class->type) {
968 			case MPAM_CLASS_CACHE:
969 				if (update_rmid_limits(class))
970 					break;
971 
972 				counter_update_class(QOS_L3_OCCUP_EVENT_ID, class);
973 				break;
974 			default:
975 				break;
976 			}
977 		}
978 	}
979 }
980 
mpam_resctrl_control_init(struct mpam_resctrl_res * res)981 static int mpam_resctrl_control_init(struct mpam_resctrl_res *res)
982 {
983 	struct mpam_class *class = res->class;
984 	struct mpam_props *cprops = &class->props;
985 	struct rdt_resource *r = &res->resctrl_res;
986 
987 	switch (r->rid) {
988 	case RDT_RESOURCE_L2:
989 	case RDT_RESOURCE_L3:
990 		r->schema_fmt = RESCTRL_SCHEMA_BITMAP;
991 		r->cache.arch_has_sparse_bitmasks = true;
992 
993 		r->cache.cbm_len = class->props.cpbm_wd;
994 		/* mpam_devices will reject empty bitmaps */
995 		r->cache.min_cbm_bits = 1;
996 
997 		if (r->rid == RDT_RESOURCE_L2) {
998 			r->name = "L2";
999 			r->ctrl_scope = RESCTRL_L2_CACHE;
1000 			r->cdp_capable = true;
1001 		} else {
1002 			r->name = "L3";
1003 			r->ctrl_scope = RESCTRL_L3_CACHE;
1004 			r->cdp_capable = true;
1005 		}
1006 
1007 		/*
1008 		 * Which bits are shared with other ...things...  Unknown
1009 		 * devices use partid-0 which uses all the bitmap fields. Until
1010 		 * we have configured the SMMU and GIC not to do this 'all the
1011 		 * bits' is the correct answer here.
1012 		 */
1013 		r->cache.shareable_bits = resctrl_get_default_ctrl(r);
1014 		r->alloc_capable = true;
1015 		break;
1016 	case RDT_RESOURCE_MBA:
1017 		r->schema_fmt = RESCTRL_SCHEMA_RANGE;
1018 		r->ctrl_scope = RESCTRL_L3_CACHE;
1019 
1020 		r->membw.delay_linear = true;
1021 		r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
1022 		r->membw.min_bw = get_mba_min(cprops);
1023 		r->membw.max_bw = MAX_MBA_BW;
1024 		r->membw.bw_gran = get_mba_granularity(cprops);
1025 
1026 		r->name = "MB";
1027 		r->alloc_capable = true;
1028 		break;
1029 	default:
1030 		return -EINVAL;
1031 	}
1032 
1033 	return 0;
1034 }
1035 
mpam_resctrl_pick_domain_id(int cpu,struct mpam_component * comp)1036 static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp)
1037 {
1038 	struct mpam_class *class = comp->class;
1039 
1040 	if (class->type == MPAM_CLASS_CACHE)
1041 		return comp->comp_id;
1042 
1043 	if (topology_matches_l3(class)) {
1044 		/* Use the corresponding L3 component ID as the domain ID */
1045 		int id = get_cpu_cacheinfo_id(cpu, 3);
1046 
1047 		/* Implies topology_matches_l3() made a mistake */
1048 		if (WARN_ON_ONCE(id == -1))
1049 			return comp->comp_id;
1050 
1051 		return id;
1052 	}
1053 
1054 	/* Otherwise, expose the ID used by the firmware table code. */
1055 	return comp->comp_id;
1056 }
1057 
mpam_resctrl_monitor_init(struct mpam_resctrl_mon * mon,enum resctrl_event_id type)1058 static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon,
1059 				     enum resctrl_event_id type)
1060 {
1061 	struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
1062 	struct rdt_resource *l3 = &res->resctrl_res;
1063 
1064 	lockdep_assert_cpus_held();
1065 
1066 	/*
1067 	 * There also needs to be an L3 cache present.
1068 	 * The check just requires any online CPU and it can't go offline as we
1069 	 * hold the cpu lock.
1070 	 */
1071 	if (get_cpu_cacheinfo_id(raw_smp_processor_id(), 3) == -1)
1072 		return 0;
1073 
1074 	/*
1075 	 * If there are no MPAM resources on L3, force it into existence.
1076 	 * topology_matches_l3() already ensures this looks like the L3.
1077 	 * The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init().
1078 	 */
1079 	if (!res->class) {
1080 		pr_warn_once("Faking L3 MSC to enable counters.\n");
1081 		res->class = mpam_resctrl_counters[type].class;
1082 	}
1083 
1084 	/*
1085 	 * Called multiple times!, once per event type that has a
1086 	 * monitoring class.
1087 	 * Setting name is necessary on monitor only platforms.
1088 	 */
1089 	l3->name = "L3";
1090 	l3->mon_scope = RESCTRL_L3_CACHE;
1091 
1092 	/*
1093 	 * num-rmid is the upper bound for the number of monitoring groups that
1094 	 * can exist simultaneously, including the default monitoring group for
1095 	 * each control group. Hence, advertise the whole rmid_idx space even
1096 	 * though each control group has its own pmg/rmid space. Unfortunately,
1097 	 * this does mean userspace needs to know the architecture to correctly
1098 	 * interpret this value.
1099 	 */
1100 	l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
1101 
1102 	if (resctrl_enable_mon_event(type, false, 0, NULL))
1103 		l3->mon_capable = true;
1104 
1105 	return 0;
1106 }
1107 
resctrl_arch_get_config(struct rdt_resource * r,struct rdt_ctrl_domain * d,u32 closid,enum resctrl_conf_type type)1108 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
1109 			    u32 closid, enum resctrl_conf_type type)
1110 {
1111 	u32 partid;
1112 	struct mpam_config *cfg;
1113 	struct mpam_props *cprops;
1114 	struct mpam_resctrl_res *res;
1115 	struct mpam_resctrl_dom *dom;
1116 	enum mpam_device_features configured_by;
1117 
1118 	lockdep_assert_cpus_held();
1119 
1120 	if (!mpam_is_enabled())
1121 		return resctrl_get_default_ctrl(r);
1122 
1123 	res = container_of(r, struct mpam_resctrl_res, resctrl_res);
1124 	dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
1125 	cprops = &res->class->props;
1126 
1127 	/*
1128 	 * When CDP is enabled, but the resource doesn't support it,
1129 	 * the control is cloned across both partids.
1130 	 * Pick one at random to read:
1131 	 */
1132 	if (mpam_resctrl_hide_cdp(r->rid))
1133 		type = CDP_DATA;
1134 
1135 	partid = resctrl_get_config_index(closid, type);
1136 	cfg = &dom->ctrl_comp->cfg[partid];
1137 
1138 	switch (r->rid) {
1139 	case RDT_RESOURCE_L2:
1140 	case RDT_RESOURCE_L3:
1141 		configured_by = mpam_feat_cpor_part;
1142 		break;
1143 	case RDT_RESOURCE_MBA:
1144 		if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
1145 			configured_by = mpam_feat_mbw_max;
1146 			break;
1147 		}
1148 		fallthrough;
1149 	default:
1150 		return resctrl_get_default_ctrl(r);
1151 	}
1152 
1153 	if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) ||
1154 	    !mpam_has_feature(configured_by, cfg))
1155 		return resctrl_get_default_ctrl(r);
1156 
1157 	switch (configured_by) {
1158 	case mpam_feat_cpor_part:
1159 		return cfg->cpbm;
1160 	case mpam_feat_mbw_max:
1161 		return mbw_max_to_percent(cfg->mbw_max, cprops);
1162 	default:
1163 		return resctrl_get_default_ctrl(r);
1164 	}
1165 }
1166 
resctrl_arch_update_one(struct rdt_resource * r,struct rdt_ctrl_domain * d,u32 closid,enum resctrl_conf_type t,u32 cfg_val)1167 int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
1168 			    u32 closid, enum resctrl_conf_type t, u32 cfg_val)
1169 {
1170 	int err;
1171 	u32 partid;
1172 	struct mpam_config cfg;
1173 	struct mpam_props *cprops;
1174 	struct mpam_resctrl_res *res;
1175 	struct mpam_resctrl_dom *dom;
1176 
1177 	lockdep_assert_cpus_held();
1178 	lockdep_assert_irqs_enabled();
1179 
1180 	if (!mpam_is_enabled())
1181 		return -EINVAL;
1182 
1183 	/*
1184 	 * No need to check the CPU as mpam_apply_config() doesn't care, and
1185 	 * resctrl_arch_update_domains() relies on this.
1186 	 */
1187 	res = container_of(r, struct mpam_resctrl_res, resctrl_res);
1188 	dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
1189 	cprops = &res->class->props;
1190 
1191 	if (mpam_resctrl_hide_cdp(r->rid))
1192 		t = CDP_DATA;
1193 
1194 	partid = resctrl_get_config_index(closid, t);
1195 	if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) {
1196 		pr_debug("Not alloc capable or computed PARTID out of range\n");
1197 		return -EINVAL;
1198 	}
1199 
1200 	/*
1201 	 * Copy the current config to avoid clearing other resources when the
1202 	 * same component is exposed multiple times through resctrl.
1203 	 */
1204 	cfg = dom->ctrl_comp->cfg[partid];
1205 
1206 	switch (r->rid) {
1207 	case RDT_RESOURCE_L2:
1208 	case RDT_RESOURCE_L3:
1209 		cfg.cpbm = cfg_val;
1210 		mpam_set_feature(mpam_feat_cpor_part, &cfg);
1211 		break;
1212 	case RDT_RESOURCE_MBA:
1213 		if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
1214 			cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops);
1215 			mpam_set_feature(mpam_feat_mbw_max, &cfg);
1216 			break;
1217 		}
1218 		fallthrough;
1219 	default:
1220 		return -EINVAL;
1221 	}
1222 
1223 	/*
1224 	 * When CDP is enabled, but the resource doesn't support it, we need to
1225 	 * apply the same configuration to the other partid.
1226 	 */
1227 	if (mpam_resctrl_hide_cdp(r->rid)) {
1228 		partid = resctrl_get_config_index(closid, CDP_CODE);
1229 		err = mpam_apply_config(dom->ctrl_comp, partid, &cfg);
1230 		if (err)
1231 			return err;
1232 
1233 		partid = resctrl_get_config_index(closid, CDP_DATA);
1234 		return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
1235 	}
1236 
1237 	return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
1238 }
1239 
resctrl_arch_update_domains(struct rdt_resource * r,u32 closid)1240 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
1241 {
1242 	int err;
1243 	struct rdt_ctrl_domain *d;
1244 
1245 	lockdep_assert_cpus_held();
1246 	lockdep_assert_irqs_enabled();
1247 
1248 	if (!mpam_is_enabled())
1249 		return -EINVAL;
1250 
1251 	list_for_each_entry_rcu(d, &r->ctrl_domains, hdr.list) {
1252 		for (enum resctrl_conf_type t = 0; t < CDP_NUM_TYPES; t++) {
1253 			struct resctrl_staged_config *cfg = &d->staged_config[t];
1254 
1255 			if (!cfg->have_new_ctrl)
1256 				continue;
1257 
1258 			err = resctrl_arch_update_one(r, d, closid, t,
1259 						      cfg->new_ctrl);
1260 			if (err)
1261 				return err;
1262 		}
1263 	}
1264 
1265 	return 0;
1266 }
1267 
resctrl_arch_reset_all_ctrls(struct rdt_resource * r)1268 void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
1269 {
1270 	struct mpam_resctrl_res *res;
1271 
1272 	lockdep_assert_cpus_held();
1273 
1274 	if (!mpam_is_enabled())
1275 		return;
1276 
1277 	res = container_of(r, struct mpam_resctrl_res, resctrl_res);
1278 	mpam_reset_class_locked(res->class);
1279 }
1280 
mpam_resctrl_domain_hdr_init(int cpu,struct mpam_component * comp,enum resctrl_res_level rid,struct rdt_domain_hdr * hdr)1281 static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp,
1282 					 enum resctrl_res_level rid,
1283 					 struct rdt_domain_hdr *hdr)
1284 {
1285 	lockdep_assert_cpus_held();
1286 
1287 	INIT_LIST_HEAD(&hdr->list);
1288 	hdr->id = mpam_resctrl_pick_domain_id(cpu, comp);
1289 	hdr->rid = rid;
1290 	cpumask_set_cpu(cpu, &hdr->cpu_mask);
1291 }
1292 
mpam_resctrl_online_domain_hdr(unsigned int cpu,struct rdt_domain_hdr * hdr)1293 static void mpam_resctrl_online_domain_hdr(unsigned int cpu,
1294 					   struct rdt_domain_hdr *hdr)
1295 {
1296 	lockdep_assert_cpus_held();
1297 
1298 	cpumask_set_cpu(cpu, &hdr->cpu_mask);
1299 }
1300 
1301 /**
1302  * mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU.
1303  * @cpu:	The CPU to remove from the domain.
1304  * @hdr:	The domain's header.
1305  *
1306  * Removes @cpu from the header mask. If this was the last CPU in the domain,
1307  * the domain header is removed from its parent list and true is returned,
1308  * indicating the parent structure can be freed.
1309  * If there are other CPUs in the domain, returns false.
1310  */
mpam_resctrl_offline_domain_hdr(unsigned int cpu,struct rdt_domain_hdr * hdr)1311 static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu,
1312 					    struct rdt_domain_hdr *hdr)
1313 {
1314 	lockdep_assert_held(&domain_list_lock);
1315 
1316 	cpumask_clear_cpu(cpu, &hdr->cpu_mask);
1317 	if (cpumask_empty(&hdr->cpu_mask)) {
1318 		list_del_rcu(&hdr->list);
1319 		synchronize_rcu();
1320 		return true;
1321 	}
1322 
1323 	return false;
1324 }
1325 
mpam_resctrl_domain_insert(struct list_head * list,struct rdt_domain_hdr * new)1326 static void mpam_resctrl_domain_insert(struct list_head *list,
1327 				       struct rdt_domain_hdr *new)
1328 {
1329 	struct rdt_domain_hdr *err;
1330 	struct list_head *pos = NULL;
1331 
1332 	lockdep_assert_held(&domain_list_lock);
1333 
1334 	err = resctrl_find_domain(list, new->id, &pos);
1335 	if (WARN_ON_ONCE(err))
1336 		return;
1337 
1338 	list_add_tail_rcu(&new->list, pos);
1339 }
1340 
find_component(struct mpam_class * class,int cpu)1341 static struct mpam_component *find_component(struct mpam_class *class, int cpu)
1342 {
1343 	struct mpam_component *comp;
1344 
1345 	guard(srcu)(&mpam_srcu);
1346 	list_for_each_entry_srcu(comp, &class->components, class_list,
1347 				 srcu_read_lock_held(&mpam_srcu)) {
1348 		if (cpumask_test_cpu(cpu, &comp->affinity))
1349 			return comp;
1350 	}
1351 
1352 	return NULL;
1353 }
1354 
1355 static struct mpam_resctrl_dom *
mpam_resctrl_alloc_domain(unsigned int cpu,struct mpam_resctrl_res * res)1356 mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
1357 {
1358 	int err;
1359 	struct mpam_resctrl_dom *dom;
1360 	struct rdt_l3_mon_domain *mon_d;
1361 	struct rdt_ctrl_domain *ctrl_d;
1362 	struct mpam_class *class = res->class;
1363 	struct mpam_component *comp_iter, *ctrl_comp;
1364 	struct rdt_resource *r = &res->resctrl_res;
1365 
1366 	lockdep_assert_held(&domain_list_lock);
1367 
1368 	ctrl_comp = NULL;
1369 	guard(srcu)(&mpam_srcu);
1370 	list_for_each_entry_srcu(comp_iter, &class->components, class_list,
1371 				 srcu_read_lock_held(&mpam_srcu)) {
1372 		if (cpumask_test_cpu(cpu, &comp_iter->affinity)) {
1373 			ctrl_comp = comp_iter;
1374 			break;
1375 		}
1376 	}
1377 
1378 	/* class has no component for this CPU */
1379 	if (WARN_ON_ONCE(!ctrl_comp))
1380 		return ERR_PTR(-EINVAL);
1381 
1382 	dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu));
1383 	if (!dom)
1384 		return ERR_PTR(-ENOMEM);
1385 
1386 	if (r->alloc_capable) {
1387 		dom->ctrl_comp = ctrl_comp;
1388 
1389 		ctrl_d = &dom->resctrl_ctrl_dom;
1390 		mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr);
1391 		ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN;
1392 		err = resctrl_online_ctrl_domain(r, ctrl_d);
1393 		if (err)
1394 			goto free_domain;
1395 
1396 		mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr);
1397 	} else {
1398 		pr_debug("Skipped control domain online - no controls\n");
1399 	}
1400 
1401 	if (r->mon_capable) {
1402 		struct mpam_component *any_mon_comp;
1403 		struct mpam_resctrl_mon *mon;
1404 		enum resctrl_event_id eventid;
1405 
1406 		/*
1407 		 * Even if the monitor domain is backed by a different
1408 		 * component, the L3 component IDs need to be used... only
1409 		 * there may be no ctrl_comp for the L3.
1410 		 * Search each event's class list for a component with
1411 		 * overlapping CPUs and set up the dom->mon_comp array.
1412 		 */
1413 
1414 		for_each_mpam_resctrl_mon(mon, eventid) {
1415 			struct mpam_component *mon_comp;
1416 
1417 			if (!mon->class)
1418 				continue;       // dummy resource
1419 
1420 			mon_comp = find_component(mon->class, cpu);
1421 			dom->mon_comp[eventid] = mon_comp;
1422 			if (mon_comp)
1423 				any_mon_comp = mon_comp;
1424 		}
1425 		if (!any_mon_comp) {
1426 			WARN_ON_ONCE(0);
1427 			err = -EFAULT;
1428 			goto offline_ctrl_domain;
1429 		}
1430 
1431 		mon_d = &dom->resctrl_mon_dom;
1432 		mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr);
1433 		mon_d->hdr.type = RESCTRL_MON_DOMAIN;
1434 		err = resctrl_online_mon_domain(r, &mon_d->hdr);
1435 		if (err)
1436 			goto offline_ctrl_domain;
1437 
1438 		mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr);
1439 	} else {
1440 		pr_debug("Skipped monitor domain online - no monitors\n");
1441 	}
1442 
1443 	return dom;
1444 
1445 offline_ctrl_domain:
1446 	if (r->alloc_capable) {
1447 		mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
1448 		resctrl_offline_ctrl_domain(r, ctrl_d);
1449 	}
1450 free_domain:
1451 	kfree(dom);
1452 	dom = ERR_PTR(err);
1453 
1454 	return dom;
1455 }
1456 
1457 /*
1458  * We know all the monitors are associated with the L3, even if there are no
1459  * controls and therefore no control component. Find the cache-id for the CPU
1460  * and use that to search for existing resctrl domains.
1461  * This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id
1462  * for anything that is not a cache.
1463  */
mpam_resctrl_get_mon_domain_from_cpu(int cpu)1464 static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu)
1465 {
1466 	int cache_id;
1467 	struct mpam_resctrl_dom *dom;
1468 	struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3];
1469 
1470 	lockdep_assert_cpus_held();
1471 
1472 	if (!l3->class)
1473 		return NULL;
1474 	cache_id = get_cpu_cacheinfo_id(cpu, 3);
1475 	if (cache_id < 0)
1476 		return NULL;
1477 
1478 	list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) {
1479 		if (dom->resctrl_mon_dom.hdr.id == cache_id)
1480 			return dom;
1481 	}
1482 
1483 	return NULL;
1484 }
1485 
1486 static struct mpam_resctrl_dom *
mpam_resctrl_get_domain_from_cpu(int cpu,struct mpam_resctrl_res * res)1487 mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
1488 {
1489 	struct mpam_resctrl_dom *dom;
1490 	struct rdt_resource *r = &res->resctrl_res;
1491 
1492 	lockdep_assert_cpus_held();
1493 
1494 	list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) {
1495 		if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity))
1496 			return dom;
1497 	}
1498 
1499 	if (r->rid != RDT_RESOURCE_L3)
1500 		return NULL;
1501 
1502 	/* Search the mon domain list too - needed on monitor only platforms. */
1503 	return mpam_resctrl_get_mon_domain_from_cpu(cpu);
1504 }
1505 
mpam_resctrl_online_cpu(unsigned int cpu)1506 int mpam_resctrl_online_cpu(unsigned int cpu)
1507 {
1508 	struct mpam_resctrl_res *res;
1509 	enum resctrl_res_level rid;
1510 
1511 	guard(mutex)(&domain_list_lock);
1512 	for_each_mpam_resctrl_control(res, rid) {
1513 		struct mpam_resctrl_dom *dom;
1514 		struct rdt_resource *r = &res->resctrl_res;
1515 
1516 		if (!res->class)
1517 			continue;	// dummy_resource;
1518 
1519 		dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
1520 		if (!dom) {
1521 			dom = mpam_resctrl_alloc_domain(cpu, res);
1522 			if (IS_ERR(dom))
1523 				return PTR_ERR(dom);
1524 		} else {
1525 			if (r->alloc_capable) {
1526 				struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom;
1527 
1528 				mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr);
1529 			}
1530 			if (r->mon_capable) {
1531 				struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom;
1532 
1533 				mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr);
1534 			}
1535 		}
1536 	}
1537 
1538 	resctrl_online_cpu(cpu);
1539 
1540 	return 0;
1541 }
1542 
mpam_resctrl_offline_cpu(unsigned int cpu)1543 void mpam_resctrl_offline_cpu(unsigned int cpu)
1544 {
1545 	struct mpam_resctrl_res *res;
1546 	enum resctrl_res_level rid;
1547 
1548 	resctrl_offline_cpu(cpu);
1549 
1550 	guard(mutex)(&domain_list_lock);
1551 	for_each_mpam_resctrl_control(res, rid) {
1552 		struct mpam_resctrl_dom *dom;
1553 		struct rdt_l3_mon_domain *mon_d;
1554 		struct rdt_ctrl_domain *ctrl_d;
1555 		bool ctrl_dom_empty, mon_dom_empty;
1556 		struct rdt_resource *r = &res->resctrl_res;
1557 
1558 		if (!res->class)
1559 			continue;	// dummy resource
1560 
1561 		dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
1562 		if (WARN_ON_ONCE(!dom))
1563 			continue;
1564 
1565 		if (r->alloc_capable) {
1566 			ctrl_d = &dom->resctrl_ctrl_dom;
1567 			ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
1568 			if (ctrl_dom_empty)
1569 				resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d);
1570 		} else {
1571 			ctrl_dom_empty = true;
1572 		}
1573 
1574 		if (r->mon_capable) {
1575 			mon_d = &dom->resctrl_mon_dom;
1576 			mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr);
1577 			if (mon_dom_empty)
1578 				resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr);
1579 		} else {
1580 			mon_dom_empty = true;
1581 		}
1582 
1583 		if (ctrl_dom_empty && mon_dom_empty)
1584 			kfree(dom);
1585 	}
1586 }
1587 
mpam_resctrl_setup(void)1588 int mpam_resctrl_setup(void)
1589 {
1590 	int err = 0;
1591 	struct mpam_resctrl_res *res;
1592 	enum resctrl_res_level rid;
1593 	struct mpam_resctrl_mon *mon;
1594 	enum resctrl_event_id eventid;
1595 
1596 	wait_event(wait_cacheinfo_ready, cacheinfo_ready);
1597 
1598 	cpus_read_lock();
1599 	for_each_mpam_resctrl_control(res, rid) {
1600 		INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains);
1601 		INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains);
1602 		res->resctrl_res.rid = rid;
1603 	}
1604 
1605 	/* Find some classes to use for controls */
1606 	mpam_resctrl_pick_caches();
1607 	mpam_resctrl_pick_mba();
1608 
1609 	/* Initialise the resctrl structures from the classes */
1610 	for_each_mpam_resctrl_control(res, rid) {
1611 		if (!res->class)
1612 			continue;	// dummy resource
1613 
1614 		err = mpam_resctrl_control_init(res);
1615 		if (err) {
1616 			pr_debug("Failed to initialise rid %u\n", rid);
1617 			goto internal_error;
1618 		}
1619 	}
1620 
1621 	/* Find some classes to use for monitors */
1622 	mpam_resctrl_pick_counters();
1623 
1624 	for_each_mpam_resctrl_mon(mon, eventid) {
1625 		if (!mon->class)
1626 			continue;	// dummy resource
1627 
1628 		err = mpam_resctrl_monitor_init(mon, eventid);
1629 		if (err) {
1630 			pr_debug("Failed to initialise event %u\n", eventid);
1631 			goto internal_error;
1632 		}
1633 	}
1634 
1635 	cpus_read_unlock();
1636 
1637 	if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) {
1638 		pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n",
1639 			 resctrl_arch_alloc_capable(), resctrl_arch_mon_capable());
1640 		return -EOPNOTSUPP;
1641 	}
1642 
1643 	err = resctrl_init();
1644 	if (err)
1645 		return err;
1646 
1647 	WRITE_ONCE(resctrl_enabled, true);
1648 
1649 	return 0;
1650 
1651 internal_error:
1652 	cpus_read_unlock();
1653 	pr_debug("Internal error %d - resctrl not supported\n", err);
1654 	return err;
1655 }
1656 
mpam_resctrl_exit(void)1657 void mpam_resctrl_exit(void)
1658 {
1659 	if (!READ_ONCE(resctrl_enabled))
1660 		return;
1661 
1662 	WRITE_ONCE(resctrl_enabled, false);
1663 	resctrl_exit();
1664 }
1665 
1666 /*
1667  * The driver is detaching an MSC from this class, if resctrl was using it,
1668  * pull on resctrl_exit().
1669  */
mpam_resctrl_teardown_class(struct mpam_class * class)1670 void mpam_resctrl_teardown_class(struct mpam_class *class)
1671 {
1672 	struct mpam_resctrl_res *res;
1673 	enum resctrl_res_level rid;
1674 	struct mpam_resctrl_mon *mon;
1675 	enum resctrl_event_id eventid;
1676 
1677 	might_sleep();
1678 
1679 	for_each_mpam_resctrl_control(res, rid) {
1680 		if (res->class == class) {
1681 			res->class = NULL;
1682 			break;
1683 		}
1684 	}
1685 	for_each_mpam_resctrl_mon(mon, eventid) {
1686 		if (mon->class == class) {
1687 			mon->class = NULL;
1688 			break;
1689 		}
1690 	}
1691 }
1692 
__cacheinfo_ready(void)1693 static int __init __cacheinfo_ready(void)
1694 {
1695 	cacheinfo_ready = true;
1696 	wake_up(&wait_cacheinfo_ready);
1697 
1698 	return 0;
1699 }
1700 device_initcall_sync(__cacheinfo_ready);
1701 
1702 #ifdef CONFIG_MPAM_KUNIT_TEST
1703 #include "test_mpam_resctrl.c"
1704 #endif
1705