1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * User interface for Resource Allocation in Resource Director Technology(RDT)
4  *
5  * Copyright (C) 2016 Intel Corporation
6  *
7  * Author: Fenghua Yu <fenghua.yu@intel.com>
8  *
9  * More information about RDT be found in the Intel (R) x86 Architecture
10  * Software Developer Manual.
11  */
12 
13 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
14 
15 #include <linux/cpu.h>
16 #include <linux/debugfs.h>
17 #include <linux/fs.h>
18 #include <linux/fs_parser.h>
19 #include <linux/sysfs.h>
20 #include <linux/kernfs.h>
21 #include <linux/seq_buf.h>
22 #include <linux/seq_file.h>
23 #include <linux/sched/signal.h>
24 #include <linux/sched/task.h>
25 #include <linux/slab.h>
26 #include <linux/task_work.h>
27 #include <linux/user_namespace.h>
28 
29 #include <uapi/linux/magic.h>
30 
31 #include <asm/resctrl.h>
32 #include "internal.h"
33 
34 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
35 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
36 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
37 
38 /* Mutex to protect rdtgroup access. */
39 DEFINE_MUTEX(rdtgroup_mutex);
40 
41 static struct kernfs_root *rdt_root;
42 struct rdtgroup rdtgroup_default;
43 LIST_HEAD(rdt_all_groups);
44 
45 /* list of entries for the schemata file */
46 LIST_HEAD(resctrl_schema_all);
47 
48 /* The filesystem can only be mounted once. */
49 bool resctrl_mounted;
50 
51 /* Kernel fs node for "info" directory under root */
52 static struct kernfs_node *kn_info;
53 
54 /* Kernel fs node for "mon_groups" directory under root */
55 static struct kernfs_node *kn_mongrp;
56 
57 /* Kernel fs node for "mon_data" directory under root */
58 static struct kernfs_node *kn_mondata;
59 
60 /*
61  * Used to store the max resource name width to display the schemata names in
62  * a tabular format.
63  */
64 int max_name_width;
65 
66 static struct seq_buf last_cmd_status;
67 static char last_cmd_status_buf[512];
68 
69 static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
70 static void rdtgroup_destroy_root(void);
71 
72 struct dentry *debugfs_resctrl;
73 
74 /*
75  * Memory bandwidth monitoring event to use for the default CTRL_MON group
76  * and each new CTRL_MON group created by the user.  Only relevant when
77  * the filesystem is mounted with the "mba_MBps" option so it does not
78  * matter that it remains uninitialized on systems that do not support
79  * the "mba_MBps" option.
80  */
81 enum resctrl_event_id mba_mbps_default_event;
82 
83 static bool resctrl_debug;
84 
rdt_last_cmd_clear(void)85 void rdt_last_cmd_clear(void)
86 {
87 	lockdep_assert_held(&rdtgroup_mutex);
88 	seq_buf_clear(&last_cmd_status);
89 }
90 
rdt_last_cmd_puts(const char * s)91 void rdt_last_cmd_puts(const char *s)
92 {
93 	lockdep_assert_held(&rdtgroup_mutex);
94 	seq_buf_puts(&last_cmd_status, s);
95 }
96 
rdt_last_cmd_printf(const char * fmt,...)97 void rdt_last_cmd_printf(const char *fmt, ...)
98 {
99 	va_list ap;
100 
101 	va_start(ap, fmt);
102 	lockdep_assert_held(&rdtgroup_mutex);
103 	seq_buf_vprintf(&last_cmd_status, fmt, ap);
104 	va_end(ap);
105 }
106 
rdt_staged_configs_clear(void)107 void rdt_staged_configs_clear(void)
108 {
109 	struct rdt_ctrl_domain *dom;
110 	struct rdt_resource *r;
111 
112 	lockdep_assert_held(&rdtgroup_mutex);
113 
114 	for_each_alloc_capable_rdt_resource(r) {
115 		list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
116 			memset(dom->staged_config, 0, sizeof(dom->staged_config));
117 	}
118 }
119 
resctrl_is_mbm_enabled(void)120 static bool resctrl_is_mbm_enabled(void)
121 {
122 	return (resctrl_arch_is_mbm_total_enabled() ||
123 		resctrl_arch_is_mbm_local_enabled());
124 }
125 
resctrl_is_mbm_event(int e)126 static bool resctrl_is_mbm_event(int e)
127 {
128 	return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
129 		e <= QOS_L3_MBM_LOCAL_EVENT_ID);
130 }
131 
132 /*
133  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
134  * we can keep a bitmap of free CLOSIDs in a single integer.
135  *
136  * Using a global CLOSID across all resources has some advantages and
137  * some drawbacks:
138  * + We can simply set current's closid to assign a task to a resource
139  *   group.
140  * + Context switch code can avoid extra memory references deciding which
141  *   CLOSID to load into the PQR_ASSOC MSR
142  * - We give up some options in configuring resource groups across multi-socket
143  *   systems.
144  * - Our choices on how to configure each resource become progressively more
145  *   limited as the number of resources grows.
146  */
147 static unsigned long closid_free_map;
148 static int closid_free_map_len;
149 
closids_supported(void)150 int closids_supported(void)
151 {
152 	return closid_free_map_len;
153 }
154 
closid_init(void)155 static void closid_init(void)
156 {
157 	struct resctrl_schema *s;
158 	u32 rdt_min_closid = 32;
159 
160 	/* Compute rdt_min_closid across all resources */
161 	list_for_each_entry(s, &resctrl_schema_all, list)
162 		rdt_min_closid = min(rdt_min_closid, s->num_closid);
163 
164 	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
165 
166 	/* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
167 	__clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map);
168 	closid_free_map_len = rdt_min_closid;
169 }
170 
closid_alloc(void)171 static int closid_alloc(void)
172 {
173 	int cleanest_closid;
174 	u32 closid;
175 
176 	lockdep_assert_held(&rdtgroup_mutex);
177 
178 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
179 	    resctrl_arch_is_llc_occupancy_enabled()) {
180 		cleanest_closid = resctrl_find_cleanest_closid();
181 		if (cleanest_closid < 0)
182 			return cleanest_closid;
183 		closid = cleanest_closid;
184 	} else {
185 		closid = ffs(closid_free_map);
186 		if (closid == 0)
187 			return -ENOSPC;
188 		closid--;
189 	}
190 	__clear_bit(closid, &closid_free_map);
191 
192 	return closid;
193 }
194 
closid_free(int closid)195 void closid_free(int closid)
196 {
197 	lockdep_assert_held(&rdtgroup_mutex);
198 
199 	__set_bit(closid, &closid_free_map);
200 }
201 
202 /**
203  * closid_allocated - test if provided closid is in use
204  * @closid: closid to be tested
205  *
206  * Return: true if @closid is currently associated with a resource group,
207  * false if @closid is free
208  */
closid_allocated(unsigned int closid)209 bool closid_allocated(unsigned int closid)
210 {
211 	lockdep_assert_held(&rdtgroup_mutex);
212 
213 	return !test_bit(closid, &closid_free_map);
214 }
215 
216 /**
217  * rdtgroup_mode_by_closid - Return mode of resource group with closid
218  * @closid: closid if the resource group
219  *
220  * Each resource group is associated with a @closid. Here the mode
221  * of a resource group can be queried by searching for it using its closid.
222  *
223  * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
224  */
rdtgroup_mode_by_closid(int closid)225 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
226 {
227 	struct rdtgroup *rdtgrp;
228 
229 	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
230 		if (rdtgrp->closid == closid)
231 			return rdtgrp->mode;
232 	}
233 
234 	return RDT_NUM_MODES;
235 }
236 
237 static const char * const rdt_mode_str[] = {
238 	[RDT_MODE_SHAREABLE]		= "shareable",
239 	[RDT_MODE_EXCLUSIVE]		= "exclusive",
240 	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
241 	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
242 };
243 
244 /**
245  * rdtgroup_mode_str - Return the string representation of mode
246  * @mode: the resource group mode as &enum rdtgroup_mode
247  *
248  * Return: string representation of valid mode, "unknown" otherwise
249  */
rdtgroup_mode_str(enum rdtgrp_mode mode)250 static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
251 {
252 	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
253 		return "unknown";
254 
255 	return rdt_mode_str[mode];
256 }
257 
258 /* set uid and gid of rdtgroup dirs and files to that of the creator */
rdtgroup_kn_set_ugid(struct kernfs_node * kn)259 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
260 {
261 	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
262 				.ia_uid = current_fsuid(),
263 				.ia_gid = current_fsgid(), };
264 
265 	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
266 	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
267 		return 0;
268 
269 	return kernfs_setattr(kn, &iattr);
270 }
271 
rdtgroup_add_file(struct kernfs_node * parent_kn,struct rftype * rft)272 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
273 {
274 	struct kernfs_node *kn;
275 	int ret;
276 
277 	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
278 				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
279 				  0, rft->kf_ops, rft, NULL, NULL);
280 	if (IS_ERR(kn))
281 		return PTR_ERR(kn);
282 
283 	ret = rdtgroup_kn_set_ugid(kn);
284 	if (ret) {
285 		kernfs_remove(kn);
286 		return ret;
287 	}
288 
289 	return 0;
290 }
291 
rdtgroup_seqfile_show(struct seq_file * m,void * arg)292 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
293 {
294 	struct kernfs_open_file *of = m->private;
295 	struct rftype *rft = of->kn->priv;
296 
297 	if (rft->seq_show)
298 		return rft->seq_show(of, m, arg);
299 	return 0;
300 }
301 
rdtgroup_file_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)302 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
303 				   size_t nbytes, loff_t off)
304 {
305 	struct rftype *rft = of->kn->priv;
306 
307 	if (rft->write)
308 		return rft->write(of, buf, nbytes, off);
309 
310 	return -EINVAL;
311 }
312 
313 static const struct kernfs_ops rdtgroup_kf_single_ops = {
314 	.atomic_write_len	= PAGE_SIZE,
315 	.write			= rdtgroup_file_write,
316 	.seq_show		= rdtgroup_seqfile_show,
317 };
318 
319 static const struct kernfs_ops kf_mondata_ops = {
320 	.atomic_write_len	= PAGE_SIZE,
321 	.seq_show		= rdtgroup_mondata_show,
322 };
323 
is_cpu_list(struct kernfs_open_file * of)324 static bool is_cpu_list(struct kernfs_open_file *of)
325 {
326 	struct rftype *rft = of->kn->priv;
327 
328 	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
329 }
330 
rdtgroup_cpus_show(struct kernfs_open_file * of,struct seq_file * s,void * v)331 static int rdtgroup_cpus_show(struct kernfs_open_file *of,
332 			      struct seq_file *s, void *v)
333 {
334 	struct rdtgroup *rdtgrp;
335 	struct cpumask *mask;
336 	int ret = 0;
337 
338 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
339 
340 	if (rdtgrp) {
341 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
342 			if (!rdtgrp->plr->d) {
343 				rdt_last_cmd_clear();
344 				rdt_last_cmd_puts("Cache domain offline\n");
345 				ret = -ENODEV;
346 			} else {
347 				mask = &rdtgrp->plr->d->hdr.cpu_mask;
348 				seq_printf(s, is_cpu_list(of) ?
349 					   "%*pbl\n" : "%*pb\n",
350 					   cpumask_pr_args(mask));
351 			}
352 		} else {
353 			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
354 				   cpumask_pr_args(&rdtgrp->cpu_mask));
355 		}
356 	} else {
357 		ret = -ENOENT;
358 	}
359 	rdtgroup_kn_unlock(of->kn);
360 
361 	return ret;
362 }
363 
364 /*
365  * This is safe against resctrl_sched_in() called from __switch_to()
366  * because __switch_to() is executed with interrupts disabled. A local call
367  * from update_closid_rmid() is protected against __switch_to() because
368  * preemption is disabled.
369  */
resctrl_arch_sync_cpu_closid_rmid(void * info)370 void resctrl_arch_sync_cpu_closid_rmid(void *info)
371 {
372 	struct resctrl_cpu_defaults *r = info;
373 
374 	if (r) {
375 		this_cpu_write(pqr_state.default_closid, r->closid);
376 		this_cpu_write(pqr_state.default_rmid, r->rmid);
377 	}
378 
379 	/*
380 	 * We cannot unconditionally write the MSR because the current
381 	 * executing task might have its own closid selected. Just reuse
382 	 * the context switch code.
383 	 */
384 	resctrl_sched_in(current);
385 }
386 
387 /*
388  * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
389  *
390  * Per task closids/rmids must have been set up before calling this function.
391  * @r may be NULL.
392  */
393 static void
update_closid_rmid(const struct cpumask * cpu_mask,struct rdtgroup * r)394 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
395 {
396 	struct resctrl_cpu_defaults defaults, *p = NULL;
397 
398 	if (r) {
399 		defaults.closid = r->closid;
400 		defaults.rmid = r->mon.rmid;
401 		p = &defaults;
402 	}
403 
404 	on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1);
405 }
406 
cpus_mon_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask)407 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
408 			  cpumask_var_t tmpmask)
409 {
410 	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
411 	struct list_head *head;
412 
413 	/* Check whether cpus belong to parent ctrl group */
414 	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
415 	if (!cpumask_empty(tmpmask)) {
416 		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
417 		return -EINVAL;
418 	}
419 
420 	/* Check whether cpus are dropped from this group */
421 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
422 	if (!cpumask_empty(tmpmask)) {
423 		/* Give any dropped cpus to parent rdtgroup */
424 		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
425 		update_closid_rmid(tmpmask, prgrp);
426 	}
427 
428 	/*
429 	 * If we added cpus, remove them from previous group that owned them
430 	 * and update per-cpu rmid
431 	 */
432 	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
433 	if (!cpumask_empty(tmpmask)) {
434 		head = &prgrp->mon.crdtgrp_list;
435 		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
436 			if (crgrp == rdtgrp)
437 				continue;
438 			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
439 				       tmpmask);
440 		}
441 		update_closid_rmid(tmpmask, rdtgrp);
442 	}
443 
444 	/* Done pushing/pulling - update this group with new mask */
445 	cpumask_copy(&rdtgrp->cpu_mask, newmask);
446 
447 	return 0;
448 }
449 
cpumask_rdtgrp_clear(struct rdtgroup * r,struct cpumask * m)450 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
451 {
452 	struct rdtgroup *crgrp;
453 
454 	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
455 	/* update the child mon group masks as well*/
456 	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
457 		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
458 }
459 
cpus_ctrl_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask,cpumask_var_t tmpmask1)460 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
461 			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
462 {
463 	struct rdtgroup *r, *crgrp;
464 	struct list_head *head;
465 
466 	/* Check whether cpus are dropped from this group */
467 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
468 	if (!cpumask_empty(tmpmask)) {
469 		/* Can't drop from default group */
470 		if (rdtgrp == &rdtgroup_default) {
471 			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
472 			return -EINVAL;
473 		}
474 
475 		/* Give any dropped cpus to rdtgroup_default */
476 		cpumask_or(&rdtgroup_default.cpu_mask,
477 			   &rdtgroup_default.cpu_mask, tmpmask);
478 		update_closid_rmid(tmpmask, &rdtgroup_default);
479 	}
480 
481 	/*
482 	 * If we added cpus, remove them from previous group and
483 	 * the prev group's child groups that owned them
484 	 * and update per-cpu closid/rmid.
485 	 */
486 	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
487 	if (!cpumask_empty(tmpmask)) {
488 		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
489 			if (r == rdtgrp)
490 				continue;
491 			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
492 			if (!cpumask_empty(tmpmask1))
493 				cpumask_rdtgrp_clear(r, tmpmask1);
494 		}
495 		update_closid_rmid(tmpmask, rdtgrp);
496 	}
497 
498 	/* Done pushing/pulling - update this group with new mask */
499 	cpumask_copy(&rdtgrp->cpu_mask, newmask);
500 
501 	/*
502 	 * Clear child mon group masks since there is a new parent mask
503 	 * now and update the rmid for the cpus the child lost.
504 	 */
505 	head = &rdtgrp->mon.crdtgrp_list;
506 	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
507 		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
508 		update_closid_rmid(tmpmask, rdtgrp);
509 		cpumask_clear(&crgrp->cpu_mask);
510 	}
511 
512 	return 0;
513 }
514 
rdtgroup_cpus_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)515 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
516 				   char *buf, size_t nbytes, loff_t off)
517 {
518 	cpumask_var_t tmpmask, newmask, tmpmask1;
519 	struct rdtgroup *rdtgrp;
520 	int ret;
521 
522 	if (!buf)
523 		return -EINVAL;
524 
525 	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
526 		return -ENOMEM;
527 	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
528 		free_cpumask_var(tmpmask);
529 		return -ENOMEM;
530 	}
531 	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
532 		free_cpumask_var(tmpmask);
533 		free_cpumask_var(newmask);
534 		return -ENOMEM;
535 	}
536 
537 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
538 	if (!rdtgrp) {
539 		ret = -ENOENT;
540 		goto unlock;
541 	}
542 
543 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
544 	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
545 		ret = -EINVAL;
546 		rdt_last_cmd_puts("Pseudo-locking in progress\n");
547 		goto unlock;
548 	}
549 
550 	if (is_cpu_list(of))
551 		ret = cpulist_parse(buf, newmask);
552 	else
553 		ret = cpumask_parse(buf, newmask);
554 
555 	if (ret) {
556 		rdt_last_cmd_puts("Bad CPU list/mask\n");
557 		goto unlock;
558 	}
559 
560 	/* check that user didn't specify any offline cpus */
561 	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
562 	if (!cpumask_empty(tmpmask)) {
563 		ret = -EINVAL;
564 		rdt_last_cmd_puts("Can only assign online CPUs\n");
565 		goto unlock;
566 	}
567 
568 	if (rdtgrp->type == RDTCTRL_GROUP)
569 		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
570 	else if (rdtgrp->type == RDTMON_GROUP)
571 		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
572 	else
573 		ret = -EINVAL;
574 
575 unlock:
576 	rdtgroup_kn_unlock(of->kn);
577 	free_cpumask_var(tmpmask);
578 	free_cpumask_var(newmask);
579 	free_cpumask_var(tmpmask1);
580 
581 	return ret ?: nbytes;
582 }
583 
584 /**
585  * rdtgroup_remove - the helper to remove resource group safely
586  * @rdtgrp: resource group to remove
587  *
588  * On resource group creation via a mkdir, an extra kernfs_node reference is
589  * taken to ensure that the rdtgroup structure remains accessible for the
590  * rdtgroup_kn_unlock() calls where it is removed.
591  *
592  * Drop the extra reference here, then free the rdtgroup structure.
593  *
594  * Return: void
595  */
rdtgroup_remove(struct rdtgroup * rdtgrp)596 static void rdtgroup_remove(struct rdtgroup *rdtgrp)
597 {
598 	kernfs_put(rdtgrp->kn);
599 	kfree(rdtgrp);
600 }
601 
_update_task_closid_rmid(void * task)602 static void _update_task_closid_rmid(void *task)
603 {
604 	/*
605 	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
606 	 * Otherwise, the MSR is updated when the task is scheduled in.
607 	 */
608 	if (task == current)
609 		resctrl_sched_in(task);
610 }
611 
update_task_closid_rmid(struct task_struct * t)612 static void update_task_closid_rmid(struct task_struct *t)
613 {
614 	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
615 		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
616 	else
617 		_update_task_closid_rmid(t);
618 }
619 
task_in_rdtgroup(struct task_struct * tsk,struct rdtgroup * rdtgrp)620 static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
621 {
622 	u32 closid, rmid = rdtgrp->mon.rmid;
623 
624 	if (rdtgrp->type == RDTCTRL_GROUP)
625 		closid = rdtgrp->closid;
626 	else if (rdtgrp->type == RDTMON_GROUP)
627 		closid = rdtgrp->mon.parent->closid;
628 	else
629 		return false;
630 
631 	return resctrl_arch_match_closid(tsk, closid) &&
632 	       resctrl_arch_match_rmid(tsk, closid, rmid);
633 }
634 
__rdtgroup_move_task(struct task_struct * tsk,struct rdtgroup * rdtgrp)635 static int __rdtgroup_move_task(struct task_struct *tsk,
636 				struct rdtgroup *rdtgrp)
637 {
638 	/* If the task is already in rdtgrp, no need to move the task. */
639 	if (task_in_rdtgroup(tsk, rdtgrp))
640 		return 0;
641 
642 	/*
643 	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
644 	 * updated by them.
645 	 *
646 	 * For ctrl_mon groups, move both closid and rmid.
647 	 * For monitor groups, can move the tasks only from
648 	 * their parent CTRL group.
649 	 */
650 	if (rdtgrp->type == RDTMON_GROUP &&
651 	    !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
652 		rdt_last_cmd_puts("Can't move task to different control group\n");
653 		return -EINVAL;
654 	}
655 
656 	if (rdtgrp->type == RDTMON_GROUP)
657 		resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
658 					     rdtgrp->mon.rmid);
659 	else
660 		resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
661 					     rdtgrp->mon.rmid);
662 
663 	/*
664 	 * Ensure the task's closid and rmid are written before determining if
665 	 * the task is current that will decide if it will be interrupted.
666 	 * This pairs with the full barrier between the rq->curr update and
667 	 * resctrl_sched_in() during context switch.
668 	 */
669 	smp_mb();
670 
671 	/*
672 	 * By now, the task's closid and rmid are set. If the task is current
673 	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
674 	 * group go into effect. If the task is not current, the MSR will be
675 	 * updated when the task is scheduled in.
676 	 */
677 	update_task_closid_rmid(tsk);
678 
679 	return 0;
680 }
681 
is_closid_match(struct task_struct * t,struct rdtgroup * r)682 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
683 {
684 	return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
685 		resctrl_arch_match_closid(t, r->closid));
686 }
687 
is_rmid_match(struct task_struct * t,struct rdtgroup * r)688 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
689 {
690 	return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
691 		resctrl_arch_match_rmid(t, r->mon.parent->closid,
692 					r->mon.rmid));
693 }
694 
695 /**
696  * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
697  * @r: Resource group
698  *
699  * Return: 1 if tasks have been assigned to @r, 0 otherwise
700  */
rdtgroup_tasks_assigned(struct rdtgroup * r)701 int rdtgroup_tasks_assigned(struct rdtgroup *r)
702 {
703 	struct task_struct *p, *t;
704 	int ret = 0;
705 
706 	lockdep_assert_held(&rdtgroup_mutex);
707 
708 	rcu_read_lock();
709 	for_each_process_thread(p, t) {
710 		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
711 			ret = 1;
712 			break;
713 		}
714 	}
715 	rcu_read_unlock();
716 
717 	return ret;
718 }
719 
rdtgroup_task_write_permission(struct task_struct * task,struct kernfs_open_file * of)720 static int rdtgroup_task_write_permission(struct task_struct *task,
721 					  struct kernfs_open_file *of)
722 {
723 	const struct cred *tcred = get_task_cred(task);
724 	const struct cred *cred = current_cred();
725 	int ret = 0;
726 
727 	/*
728 	 * Even if we're attaching all tasks in the thread group, we only
729 	 * need to check permissions on one of them.
730 	 */
731 	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
732 	    !uid_eq(cred->euid, tcred->uid) &&
733 	    !uid_eq(cred->euid, tcred->suid)) {
734 		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
735 		ret = -EPERM;
736 	}
737 
738 	put_cred(tcred);
739 	return ret;
740 }
741 
rdtgroup_move_task(pid_t pid,struct rdtgroup * rdtgrp,struct kernfs_open_file * of)742 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
743 			      struct kernfs_open_file *of)
744 {
745 	struct task_struct *tsk;
746 	int ret;
747 
748 	rcu_read_lock();
749 	if (pid) {
750 		tsk = find_task_by_vpid(pid);
751 		if (!tsk) {
752 			rcu_read_unlock();
753 			rdt_last_cmd_printf("No task %d\n", pid);
754 			return -ESRCH;
755 		}
756 	} else {
757 		tsk = current;
758 	}
759 
760 	get_task_struct(tsk);
761 	rcu_read_unlock();
762 
763 	ret = rdtgroup_task_write_permission(tsk, of);
764 	if (!ret)
765 		ret = __rdtgroup_move_task(tsk, rdtgrp);
766 
767 	put_task_struct(tsk);
768 	return ret;
769 }
770 
rdtgroup_tasks_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)771 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
772 				    char *buf, size_t nbytes, loff_t off)
773 {
774 	struct rdtgroup *rdtgrp;
775 	char *pid_str;
776 	int ret = 0;
777 	pid_t pid;
778 
779 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
780 	if (!rdtgrp) {
781 		rdtgroup_kn_unlock(of->kn);
782 		return -ENOENT;
783 	}
784 	rdt_last_cmd_clear();
785 
786 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
787 	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
788 		ret = -EINVAL;
789 		rdt_last_cmd_puts("Pseudo-locking in progress\n");
790 		goto unlock;
791 	}
792 
793 	while (buf && buf[0] != '\0' && buf[0] != '\n') {
794 		pid_str = strim(strsep(&buf, ","));
795 
796 		if (kstrtoint(pid_str, 0, &pid)) {
797 			rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
798 			ret = -EINVAL;
799 			break;
800 		}
801 
802 		if (pid < 0) {
803 			rdt_last_cmd_printf("Invalid pid %d\n", pid);
804 			ret = -EINVAL;
805 			break;
806 		}
807 
808 		ret = rdtgroup_move_task(pid, rdtgrp, of);
809 		if (ret) {
810 			rdt_last_cmd_printf("Error while processing task %d\n", pid);
811 			break;
812 		}
813 	}
814 
815 unlock:
816 	rdtgroup_kn_unlock(of->kn);
817 
818 	return ret ?: nbytes;
819 }
820 
show_rdt_tasks(struct rdtgroup * r,struct seq_file * s)821 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
822 {
823 	struct task_struct *p, *t;
824 	pid_t pid;
825 
826 	rcu_read_lock();
827 	for_each_process_thread(p, t) {
828 		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
829 			pid = task_pid_vnr(t);
830 			if (pid)
831 				seq_printf(s, "%d\n", pid);
832 		}
833 	}
834 	rcu_read_unlock();
835 }
836 
rdtgroup_tasks_show(struct kernfs_open_file * of,struct seq_file * s,void * v)837 static int rdtgroup_tasks_show(struct kernfs_open_file *of,
838 			       struct seq_file *s, void *v)
839 {
840 	struct rdtgroup *rdtgrp;
841 	int ret = 0;
842 
843 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
844 	if (rdtgrp)
845 		show_rdt_tasks(rdtgrp, s);
846 	else
847 		ret = -ENOENT;
848 	rdtgroup_kn_unlock(of->kn);
849 
850 	return ret;
851 }
852 
rdtgroup_closid_show(struct kernfs_open_file * of,struct seq_file * s,void * v)853 static int rdtgroup_closid_show(struct kernfs_open_file *of,
854 				struct seq_file *s, void *v)
855 {
856 	struct rdtgroup *rdtgrp;
857 	int ret = 0;
858 
859 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
860 	if (rdtgrp)
861 		seq_printf(s, "%u\n", rdtgrp->closid);
862 	else
863 		ret = -ENOENT;
864 	rdtgroup_kn_unlock(of->kn);
865 
866 	return ret;
867 }
868 
rdtgroup_rmid_show(struct kernfs_open_file * of,struct seq_file * s,void * v)869 static int rdtgroup_rmid_show(struct kernfs_open_file *of,
870 			      struct seq_file *s, void *v)
871 {
872 	struct rdtgroup *rdtgrp;
873 	int ret = 0;
874 
875 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
876 	if (rdtgrp)
877 		seq_printf(s, "%u\n", rdtgrp->mon.rmid);
878 	else
879 		ret = -ENOENT;
880 	rdtgroup_kn_unlock(of->kn);
881 
882 	return ret;
883 }
884 
885 #ifdef CONFIG_PROC_CPU_RESCTRL
886 
887 /*
888  * A task can only be part of one resctrl control group and of one monitor
889  * group which is associated to that control group.
890  *
891  * 1)   res:
892  *      mon:
893  *
894  *    resctrl is not available.
895  *
896  * 2)   res:/
897  *      mon:
898  *
899  *    Task is part of the root resctrl control group, and it is not associated
900  *    to any monitor group.
901  *
902  * 3)  res:/
903  *     mon:mon0
904  *
905  *    Task is part of the root resctrl control group and monitor group mon0.
906  *
907  * 4)  res:group0
908  *     mon:
909  *
910  *    Task is part of resctrl control group group0, and it is not associated
911  *    to any monitor group.
912  *
913  * 5) res:group0
914  *    mon:mon1
915  *
916  *    Task is part of resctrl control group group0 and monitor group mon1.
917  */
proc_resctrl_show(struct seq_file * s,struct pid_namespace * ns,struct pid * pid,struct task_struct * tsk)918 int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
919 		      struct pid *pid, struct task_struct *tsk)
920 {
921 	struct rdtgroup *rdtg;
922 	int ret = 0;
923 
924 	mutex_lock(&rdtgroup_mutex);
925 
926 	/* Return empty if resctrl has not been mounted. */
927 	if (!resctrl_mounted) {
928 		seq_puts(s, "res:\nmon:\n");
929 		goto unlock;
930 	}
931 
932 	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
933 		struct rdtgroup *crg;
934 
935 		/*
936 		 * Task information is only relevant for shareable
937 		 * and exclusive groups.
938 		 */
939 		if (rdtg->mode != RDT_MODE_SHAREABLE &&
940 		    rdtg->mode != RDT_MODE_EXCLUSIVE)
941 			continue;
942 
943 		if (!resctrl_arch_match_closid(tsk, rdtg->closid))
944 			continue;
945 
946 		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
947 			   rdt_kn_name(rdtg->kn));
948 		seq_puts(s, "mon:");
949 		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
950 				    mon.crdtgrp_list) {
951 			if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
952 						     crg->mon.rmid))
953 				continue;
954 			seq_printf(s, "%s", rdt_kn_name(crg->kn));
955 			break;
956 		}
957 		seq_putc(s, '\n');
958 		goto unlock;
959 	}
960 	/*
961 	 * The above search should succeed. Otherwise return
962 	 * with an error.
963 	 */
964 	ret = -ENOENT;
965 unlock:
966 	mutex_unlock(&rdtgroup_mutex);
967 
968 	return ret;
969 }
970 #endif
971 
rdt_last_cmd_status_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)972 static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
973 				    struct seq_file *seq, void *v)
974 {
975 	int len;
976 
977 	mutex_lock(&rdtgroup_mutex);
978 	len = seq_buf_used(&last_cmd_status);
979 	if (len)
980 		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
981 	else
982 		seq_puts(seq, "ok\n");
983 	mutex_unlock(&rdtgroup_mutex);
984 	return 0;
985 }
986 
rdt_kn_parent_priv(struct kernfs_node * kn)987 static void *rdt_kn_parent_priv(struct kernfs_node *kn)
988 {
989 	/*
990 	 * The parent pointer is only valid within RCU section since it can be
991 	 * replaced.
992 	 */
993 	guard(rcu)();
994 	return rcu_dereference(kn->__parent)->priv;
995 }
996 
rdt_num_closids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)997 static int rdt_num_closids_show(struct kernfs_open_file *of,
998 				struct seq_file *seq, void *v)
999 {
1000 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1001 
1002 	seq_printf(seq, "%u\n", s->num_closid);
1003 	return 0;
1004 }
1005 
rdt_default_ctrl_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1006 static int rdt_default_ctrl_show(struct kernfs_open_file *of,
1007 			     struct seq_file *seq, void *v)
1008 {
1009 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1010 	struct rdt_resource *r = s->res;
1011 
1012 	seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r));
1013 	return 0;
1014 }
1015 
rdt_min_cbm_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1016 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
1017 			     struct seq_file *seq, void *v)
1018 {
1019 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1020 	struct rdt_resource *r = s->res;
1021 
1022 	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
1023 	return 0;
1024 }
1025 
rdt_shareable_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1026 static int rdt_shareable_bits_show(struct kernfs_open_file *of,
1027 				   struct seq_file *seq, void *v)
1028 {
1029 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1030 	struct rdt_resource *r = s->res;
1031 
1032 	seq_printf(seq, "%x\n", r->cache.shareable_bits);
1033 	return 0;
1034 }
1035 
1036 /*
1037  * rdt_bit_usage_show - Display current usage of resources
1038  *
1039  * A domain is a shared resource that can now be allocated differently. Here
1040  * we display the current regions of the domain as an annotated bitmask.
1041  * For each domain of this resource its allocation bitmask
1042  * is annotated as below to indicate the current usage of the corresponding bit:
1043  *   0 - currently unused
1044  *   X - currently available for sharing and used by software and hardware
1045  *   H - currently used by hardware only but available for software use
1046  *   S - currently used and shareable by software only
1047  *   E - currently used exclusively by one resource group
1048  *   P - currently pseudo-locked by one resource group
1049  */
rdt_bit_usage_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1050 static int rdt_bit_usage_show(struct kernfs_open_file *of,
1051 			      struct seq_file *seq, void *v)
1052 {
1053 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1054 	/*
1055 	 * Use unsigned long even though only 32 bits are used to ensure
1056 	 * test_bit() is used safely.
1057 	 */
1058 	unsigned long sw_shareable = 0, hw_shareable = 0;
1059 	unsigned long exclusive = 0, pseudo_locked = 0;
1060 	struct rdt_resource *r = s->res;
1061 	struct rdt_ctrl_domain *dom;
1062 	int i, hwb, swb, excl, psl;
1063 	enum rdtgrp_mode mode;
1064 	bool sep = false;
1065 	u32 ctrl_val;
1066 
1067 	cpus_read_lock();
1068 	mutex_lock(&rdtgroup_mutex);
1069 	hw_shareable = r->cache.shareable_bits;
1070 	list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
1071 		if (sep)
1072 			seq_putc(seq, ';');
1073 		sw_shareable = 0;
1074 		exclusive = 0;
1075 		seq_printf(seq, "%d=", dom->hdr.id);
1076 		for (i = 0; i < closids_supported(); i++) {
1077 			if (!closid_allocated(i))
1078 				continue;
1079 			ctrl_val = resctrl_arch_get_config(r, dom, i,
1080 							   s->conf_type);
1081 			mode = rdtgroup_mode_by_closid(i);
1082 			switch (mode) {
1083 			case RDT_MODE_SHAREABLE:
1084 				sw_shareable |= ctrl_val;
1085 				break;
1086 			case RDT_MODE_EXCLUSIVE:
1087 				exclusive |= ctrl_val;
1088 				break;
1089 			case RDT_MODE_PSEUDO_LOCKSETUP:
1090 			/*
1091 			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
1092 			 * here but not included since the CBM
1093 			 * associated with this CLOSID in this mode
1094 			 * is not initialized and no task or cpu can be
1095 			 * assigned this CLOSID.
1096 			 */
1097 				break;
1098 			case RDT_MODE_PSEUDO_LOCKED:
1099 			case RDT_NUM_MODES:
1100 				WARN(1,
1101 				     "invalid mode for closid %d\n", i);
1102 				break;
1103 			}
1104 		}
1105 		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
1106 			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
1107 			hwb = test_bit(i, &hw_shareable);
1108 			swb = test_bit(i, &sw_shareable);
1109 			excl = test_bit(i, &exclusive);
1110 			psl = test_bit(i, &pseudo_locked);
1111 			if (hwb && swb)
1112 				seq_putc(seq, 'X');
1113 			else if (hwb && !swb)
1114 				seq_putc(seq, 'H');
1115 			else if (!hwb && swb)
1116 				seq_putc(seq, 'S');
1117 			else if (excl)
1118 				seq_putc(seq, 'E');
1119 			else if (psl)
1120 				seq_putc(seq, 'P');
1121 			else /* Unused bits remain */
1122 				seq_putc(seq, '0');
1123 		}
1124 		sep = true;
1125 	}
1126 	seq_putc(seq, '\n');
1127 	mutex_unlock(&rdtgroup_mutex);
1128 	cpus_read_unlock();
1129 	return 0;
1130 }
1131 
rdt_min_bw_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1132 static int rdt_min_bw_show(struct kernfs_open_file *of,
1133 			     struct seq_file *seq, void *v)
1134 {
1135 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1136 	struct rdt_resource *r = s->res;
1137 
1138 	seq_printf(seq, "%u\n", r->membw.min_bw);
1139 	return 0;
1140 }
1141 
rdt_num_rmids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1142 static int rdt_num_rmids_show(struct kernfs_open_file *of,
1143 			      struct seq_file *seq, void *v)
1144 {
1145 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1146 
1147 	seq_printf(seq, "%d\n", r->num_rmid);
1148 
1149 	return 0;
1150 }
1151 
rdt_mon_features_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1152 static int rdt_mon_features_show(struct kernfs_open_file *of,
1153 				 struct seq_file *seq, void *v)
1154 {
1155 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1156 	struct mon_evt *mevt;
1157 
1158 	list_for_each_entry(mevt, &r->evt_list, list) {
1159 		seq_printf(seq, "%s\n", mevt->name);
1160 		if (mevt->configurable)
1161 			seq_printf(seq, "%s_config\n", mevt->name);
1162 	}
1163 
1164 	return 0;
1165 }
1166 
rdt_bw_gran_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1167 static int rdt_bw_gran_show(struct kernfs_open_file *of,
1168 			     struct seq_file *seq, void *v)
1169 {
1170 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1171 	struct rdt_resource *r = s->res;
1172 
1173 	seq_printf(seq, "%u\n", r->membw.bw_gran);
1174 	return 0;
1175 }
1176 
rdt_delay_linear_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1177 static int rdt_delay_linear_show(struct kernfs_open_file *of,
1178 			     struct seq_file *seq, void *v)
1179 {
1180 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1181 	struct rdt_resource *r = s->res;
1182 
1183 	seq_printf(seq, "%u\n", r->membw.delay_linear);
1184 	return 0;
1185 }
1186 
max_threshold_occ_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1187 static int max_threshold_occ_show(struct kernfs_open_file *of,
1188 				  struct seq_file *seq, void *v)
1189 {
1190 	seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
1191 
1192 	return 0;
1193 }
1194 
rdt_thread_throttle_mode_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1195 static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
1196 					 struct seq_file *seq, void *v)
1197 {
1198 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1199 	struct rdt_resource *r = s->res;
1200 
1201 	switch (r->membw.throttle_mode) {
1202 	case THREAD_THROTTLE_PER_THREAD:
1203 		seq_puts(seq, "per-thread\n");
1204 		return 0;
1205 	case THREAD_THROTTLE_MAX:
1206 		seq_puts(seq, "max\n");
1207 		return 0;
1208 	case THREAD_THROTTLE_UNDEFINED:
1209 		seq_puts(seq, "undefined\n");
1210 		return 0;
1211 	}
1212 
1213 	WARN_ON_ONCE(1);
1214 
1215 	return 0;
1216 }
1217 
max_threshold_occ_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1218 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
1219 				       char *buf, size_t nbytes, loff_t off)
1220 {
1221 	unsigned int bytes;
1222 	int ret;
1223 
1224 	ret = kstrtouint(buf, 0, &bytes);
1225 	if (ret)
1226 		return ret;
1227 
1228 	if (bytes > resctrl_rmid_realloc_limit)
1229 		return -EINVAL;
1230 
1231 	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
1232 
1233 	return nbytes;
1234 }
1235 
1236 /*
1237  * rdtgroup_mode_show - Display mode of this resource group
1238  */
rdtgroup_mode_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1239 static int rdtgroup_mode_show(struct kernfs_open_file *of,
1240 			      struct seq_file *s, void *v)
1241 {
1242 	struct rdtgroup *rdtgrp;
1243 
1244 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1245 	if (!rdtgrp) {
1246 		rdtgroup_kn_unlock(of->kn);
1247 		return -ENOENT;
1248 	}
1249 
1250 	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
1251 
1252 	rdtgroup_kn_unlock(of->kn);
1253 	return 0;
1254 }
1255 
resctrl_peer_type(enum resctrl_conf_type my_type)1256 static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
1257 {
1258 	switch (my_type) {
1259 	case CDP_CODE:
1260 		return CDP_DATA;
1261 	case CDP_DATA:
1262 		return CDP_CODE;
1263 	default:
1264 	case CDP_NONE:
1265 		return CDP_NONE;
1266 	}
1267 }
1268 
rdt_has_sparse_bitmasks_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1269 static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
1270 					struct seq_file *seq, void *v)
1271 {
1272 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1273 	struct rdt_resource *r = s->res;
1274 
1275 	seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
1276 
1277 	return 0;
1278 }
1279 
1280 /**
1281  * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1282  * @r: Resource to which domain instance @d belongs.
1283  * @d: The domain instance for which @closid is being tested.
1284  * @cbm: Capacity bitmask being tested.
1285  * @closid: Intended closid for @cbm.
1286  * @type: CDP type of @r.
1287  * @exclusive: Only check if overlaps with exclusive resource groups
1288  *
1289  * Checks if provided @cbm intended to be used for @closid on domain
1290  * @d overlaps with any other closids or other hardware usage associated
1291  * with this domain. If @exclusive is true then only overlaps with
1292  * resource groups in exclusive mode will be considered. If @exclusive
1293  * is false then overlaps with any resource group or hardware entities
1294  * will be considered.
1295  *
1296  * @cbm is unsigned long, even if only 32 bits are used, to make the
1297  * bitmap functions work correctly.
1298  *
1299  * Return: false if CBM does not overlap, true if it does.
1300  */
__rdtgroup_cbm_overlaps(struct rdt_resource * r,struct rdt_ctrl_domain * d,unsigned long cbm,int closid,enum resctrl_conf_type type,bool exclusive)1301 static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
1302 				    unsigned long cbm, int closid,
1303 				    enum resctrl_conf_type type, bool exclusive)
1304 {
1305 	enum rdtgrp_mode mode;
1306 	unsigned long ctrl_b;
1307 	int i;
1308 
1309 	/* Check for any overlap with regions used by hardware directly */
1310 	if (!exclusive) {
1311 		ctrl_b = r->cache.shareable_bits;
1312 		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1313 			return true;
1314 	}
1315 
1316 	/* Check for overlap with other resource groups */
1317 	for (i = 0; i < closids_supported(); i++) {
1318 		ctrl_b = resctrl_arch_get_config(r, d, i, type);
1319 		mode = rdtgroup_mode_by_closid(i);
1320 		if (closid_allocated(i) && i != closid &&
1321 		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1322 			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1323 				if (exclusive) {
1324 					if (mode == RDT_MODE_EXCLUSIVE)
1325 						return true;
1326 					continue;
1327 				}
1328 				return true;
1329 			}
1330 		}
1331 	}
1332 
1333 	return false;
1334 }
1335 
1336 /**
1337  * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1338  * @s: Schema for the resource to which domain instance @d belongs.
1339  * @d: The domain instance for which @closid is being tested.
1340  * @cbm: Capacity bitmask being tested.
1341  * @closid: Intended closid for @cbm.
1342  * @exclusive: Only check if overlaps with exclusive resource groups
1343  *
1344  * Resources that can be allocated using a CBM can use the CBM to control
1345  * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1346  * for overlap. Overlap test is not limited to the specific resource for
1347  * which the CBM is intended though - when dealing with CDP resources that
1348  * share the underlying hardware the overlap check should be performed on
1349  * the CDP resource sharing the hardware also.
1350  *
1351  * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1352  * overlap test.
1353  *
1354  * Return: true if CBM overlap detected, false if there is no overlap
1355  */
rdtgroup_cbm_overlaps(struct resctrl_schema * s,struct rdt_ctrl_domain * d,unsigned long cbm,int closid,bool exclusive)1356 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
1357 			   unsigned long cbm, int closid, bool exclusive)
1358 {
1359 	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
1360 	struct rdt_resource *r = s->res;
1361 
1362 	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
1363 				    exclusive))
1364 		return true;
1365 
1366 	if (!resctrl_arch_get_cdp_enabled(r->rid))
1367 		return false;
1368 	return  __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
1369 }
1370 
1371 /**
1372  * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1373  * @rdtgrp: Resource group identified through its closid.
1374  *
1375  * An exclusive resource group implies that there should be no sharing of
1376  * its allocated resources. At the time this group is considered to be
1377  * exclusive this test can determine if its current schemata supports this
1378  * setting by testing for overlap with all other resource groups.
1379  *
1380  * Return: true if resource group can be exclusive, false if there is overlap
1381  * with allocations of other resource groups and thus this resource group
1382  * cannot be exclusive.
1383  */
rdtgroup_mode_test_exclusive(struct rdtgroup * rdtgrp)1384 static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1385 {
1386 	int closid = rdtgrp->closid;
1387 	struct rdt_ctrl_domain *d;
1388 	struct resctrl_schema *s;
1389 	struct rdt_resource *r;
1390 	bool has_cache = false;
1391 	u32 ctrl;
1392 
1393 	/* Walking r->domains, ensure it can't race with cpuhp */
1394 	lockdep_assert_cpus_held();
1395 
1396 	list_for_each_entry(s, &resctrl_schema_all, list) {
1397 		r = s->res;
1398 		if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
1399 			continue;
1400 		has_cache = true;
1401 		list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1402 			ctrl = resctrl_arch_get_config(r, d, closid,
1403 						       s->conf_type);
1404 			if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
1405 				rdt_last_cmd_puts("Schemata overlaps\n");
1406 				return false;
1407 			}
1408 		}
1409 	}
1410 
1411 	if (!has_cache) {
1412 		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1413 		return false;
1414 	}
1415 
1416 	return true;
1417 }
1418 
1419 /*
1420  * rdtgroup_mode_write - Modify the resource group's mode
1421  */
rdtgroup_mode_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1422 static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1423 				   char *buf, size_t nbytes, loff_t off)
1424 {
1425 	struct rdtgroup *rdtgrp;
1426 	enum rdtgrp_mode mode;
1427 	int ret = 0;
1428 
1429 	/* Valid input requires a trailing newline */
1430 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1431 		return -EINVAL;
1432 	buf[nbytes - 1] = '\0';
1433 
1434 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1435 	if (!rdtgrp) {
1436 		rdtgroup_kn_unlock(of->kn);
1437 		return -ENOENT;
1438 	}
1439 
1440 	rdt_last_cmd_clear();
1441 
1442 	mode = rdtgrp->mode;
1443 
1444 	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1445 	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1446 	    (!strcmp(buf, "pseudo-locksetup") &&
1447 	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1448 	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1449 		goto out;
1450 
1451 	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1452 		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1453 		ret = -EINVAL;
1454 		goto out;
1455 	}
1456 
1457 	if (!strcmp(buf, "shareable")) {
1458 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1459 			ret = rdtgroup_locksetup_exit(rdtgrp);
1460 			if (ret)
1461 				goto out;
1462 		}
1463 		rdtgrp->mode = RDT_MODE_SHAREABLE;
1464 	} else if (!strcmp(buf, "exclusive")) {
1465 		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1466 			ret = -EINVAL;
1467 			goto out;
1468 		}
1469 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1470 			ret = rdtgroup_locksetup_exit(rdtgrp);
1471 			if (ret)
1472 				goto out;
1473 		}
1474 		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1475 	} else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) &&
1476 		   !strcmp(buf, "pseudo-locksetup")) {
1477 		ret = rdtgroup_locksetup_enter(rdtgrp);
1478 		if (ret)
1479 			goto out;
1480 		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1481 	} else {
1482 		rdt_last_cmd_puts("Unknown or unsupported mode\n");
1483 		ret = -EINVAL;
1484 	}
1485 
1486 out:
1487 	rdtgroup_kn_unlock(of->kn);
1488 	return ret ?: nbytes;
1489 }
1490 
1491 /**
1492  * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1493  * @r: RDT resource to which @d belongs.
1494  * @d: RDT domain instance.
1495  * @cbm: bitmask for which the size should be computed.
1496  *
1497  * The bitmask provided associated with the RDT domain instance @d will be
1498  * translated into how many bytes it represents. The size in bytes is
1499  * computed by first dividing the total cache size by the CBM length to
1500  * determine how many bytes each bit in the bitmask represents. The result
1501  * is multiplied with the number of bits set in the bitmask.
1502  *
1503  * @cbm is unsigned long, even if only 32 bits are used to make the
1504  * bitmap functions work correctly.
1505  */
rdtgroup_cbm_to_size(struct rdt_resource * r,struct rdt_ctrl_domain * d,unsigned long cbm)1506 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1507 				  struct rdt_ctrl_domain *d, unsigned long cbm)
1508 {
1509 	unsigned int size = 0;
1510 	struct cacheinfo *ci;
1511 	int num_b;
1512 
1513 	if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
1514 		return size;
1515 
1516 	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1517 	ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
1518 	if (ci)
1519 		size = ci->size / r->cache.cbm_len * num_b;
1520 
1521 	return size;
1522 }
1523 
1524 /*
1525  * rdtgroup_size_show - Display size in bytes of allocated regions
1526  *
1527  * The "size" file mirrors the layout of the "schemata" file, printing the
1528  * size in bytes of each region instead of the capacity bitmask.
1529  */
rdtgroup_size_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1530 static int rdtgroup_size_show(struct kernfs_open_file *of,
1531 			      struct seq_file *s, void *v)
1532 {
1533 	struct resctrl_schema *schema;
1534 	enum resctrl_conf_type type;
1535 	struct rdt_ctrl_domain *d;
1536 	struct rdtgroup *rdtgrp;
1537 	struct rdt_resource *r;
1538 	unsigned int size;
1539 	int ret = 0;
1540 	u32 closid;
1541 	bool sep;
1542 	u32 ctrl;
1543 
1544 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1545 	if (!rdtgrp) {
1546 		rdtgroup_kn_unlock(of->kn);
1547 		return -ENOENT;
1548 	}
1549 
1550 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1551 		if (!rdtgrp->plr->d) {
1552 			rdt_last_cmd_clear();
1553 			rdt_last_cmd_puts("Cache domain offline\n");
1554 			ret = -ENODEV;
1555 		} else {
1556 			seq_printf(s, "%*s:", max_name_width,
1557 				   rdtgrp->plr->s->name);
1558 			size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
1559 						    rdtgrp->plr->d,
1560 						    rdtgrp->plr->cbm);
1561 			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
1562 		}
1563 		goto out;
1564 	}
1565 
1566 	closid = rdtgrp->closid;
1567 
1568 	list_for_each_entry(schema, &resctrl_schema_all, list) {
1569 		r = schema->res;
1570 		type = schema->conf_type;
1571 		sep = false;
1572 		seq_printf(s, "%*s:", max_name_width, schema->name);
1573 		list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1574 			if (sep)
1575 				seq_putc(s, ';');
1576 			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1577 				size = 0;
1578 			} else {
1579 				if (is_mba_sc(r))
1580 					ctrl = d->mbps_val[closid];
1581 				else
1582 					ctrl = resctrl_arch_get_config(r, d,
1583 								       closid,
1584 								       type);
1585 				if (r->rid == RDT_RESOURCE_MBA ||
1586 				    r->rid == RDT_RESOURCE_SMBA)
1587 					size = ctrl;
1588 				else
1589 					size = rdtgroup_cbm_to_size(r, d, ctrl);
1590 			}
1591 			seq_printf(s, "%d=%u", d->hdr.id, size);
1592 			sep = true;
1593 		}
1594 		seq_putc(s, '\n');
1595 	}
1596 
1597 out:
1598 	rdtgroup_kn_unlock(of->kn);
1599 
1600 	return ret;
1601 }
1602 
1603 #define INVALID_CONFIG_INDEX   UINT_MAX
1604 
1605 /**
1606  * mon_event_config_index_get - get the hardware index for the
1607  *                              configurable event
1608  * @evtid: event id.
1609  *
1610  * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
1611  *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
1612  *         INVALID_CONFIG_INDEX for invalid evtid
1613  */
mon_event_config_index_get(u32 evtid)1614 static inline unsigned int mon_event_config_index_get(u32 evtid)
1615 {
1616 	switch (evtid) {
1617 	case QOS_L3_MBM_TOTAL_EVENT_ID:
1618 		return 0;
1619 	case QOS_L3_MBM_LOCAL_EVENT_ID:
1620 		return 1;
1621 	default:
1622 		/* Should never reach here */
1623 		return INVALID_CONFIG_INDEX;
1624 	}
1625 }
1626 
resctrl_arch_mon_event_config_read(void * _config_info)1627 void resctrl_arch_mon_event_config_read(void *_config_info)
1628 {
1629 	struct resctrl_mon_config_info *config_info = _config_info;
1630 	unsigned int index;
1631 	u64 msrval;
1632 
1633 	index = mon_event_config_index_get(config_info->evtid);
1634 	if (index == INVALID_CONFIG_INDEX) {
1635 		pr_warn_once("Invalid event id %d\n", config_info->evtid);
1636 		return;
1637 	}
1638 	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
1639 
1640 	/* Report only the valid event configuration bits */
1641 	config_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
1642 }
1643 
mondata_config_read(struct resctrl_mon_config_info * mon_info)1644 static void mondata_config_read(struct resctrl_mon_config_info *mon_info)
1645 {
1646 	smp_call_function_any(&mon_info->d->hdr.cpu_mask,
1647 			      resctrl_arch_mon_event_config_read, mon_info, 1);
1648 }
1649 
mbm_config_show(struct seq_file * s,struct rdt_resource * r,u32 evtid)1650 static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
1651 {
1652 	struct resctrl_mon_config_info mon_info;
1653 	struct rdt_mon_domain *dom;
1654 	bool sep = false;
1655 
1656 	cpus_read_lock();
1657 	mutex_lock(&rdtgroup_mutex);
1658 
1659 	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1660 		if (sep)
1661 			seq_puts(s, ";");
1662 
1663 		memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info));
1664 		mon_info.r = r;
1665 		mon_info.d = dom;
1666 		mon_info.evtid = evtid;
1667 		mondata_config_read(&mon_info);
1668 
1669 		seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
1670 		sep = true;
1671 	}
1672 	seq_puts(s, "\n");
1673 
1674 	mutex_unlock(&rdtgroup_mutex);
1675 	cpus_read_unlock();
1676 
1677 	return 0;
1678 }
1679 
mbm_total_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1680 static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
1681 				       struct seq_file *seq, void *v)
1682 {
1683 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1684 
1685 	mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
1686 
1687 	return 0;
1688 }
1689 
mbm_local_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1690 static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
1691 				       struct seq_file *seq, void *v)
1692 {
1693 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1694 
1695 	mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
1696 
1697 	return 0;
1698 }
1699 
resctrl_arch_mon_event_config_write(void * _config_info)1700 void resctrl_arch_mon_event_config_write(void *_config_info)
1701 {
1702 	struct resctrl_mon_config_info *config_info = _config_info;
1703 	unsigned int index;
1704 
1705 	index = mon_event_config_index_get(config_info->evtid);
1706 	if (index == INVALID_CONFIG_INDEX) {
1707 		pr_warn_once("Invalid event id %d\n", config_info->evtid);
1708 		return;
1709 	}
1710 	wrmsr(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config, 0);
1711 }
1712 
mbm_config_write_domain(struct rdt_resource * r,struct rdt_mon_domain * d,u32 evtid,u32 val)1713 static void mbm_config_write_domain(struct rdt_resource *r,
1714 				    struct rdt_mon_domain *d, u32 evtid, u32 val)
1715 {
1716 	struct resctrl_mon_config_info mon_info = {0};
1717 
1718 	/*
1719 	 * Read the current config value first. If both are the same then
1720 	 * no need to write it again.
1721 	 */
1722 	mon_info.r = r;
1723 	mon_info.d = d;
1724 	mon_info.evtid = evtid;
1725 	mondata_config_read(&mon_info);
1726 	if (mon_info.mon_config == val)
1727 		return;
1728 
1729 	mon_info.mon_config = val;
1730 
1731 	/*
1732 	 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
1733 	 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
1734 	 * are scoped at the domain level. Writing any of these MSRs
1735 	 * on one CPU is observed by all the CPUs in the domain.
1736 	 */
1737 	smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write,
1738 			      &mon_info, 1);
1739 
1740 	/*
1741 	 * When an Event Configuration is changed, the bandwidth counters
1742 	 * for all RMIDs and Events will be cleared by the hardware. The
1743 	 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
1744 	 * every RMID on the next read to any event for every RMID.
1745 	 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
1746 	 * cleared while it is tracked by the hardware. Clear the
1747 	 * mbm_local and mbm_total counts for all the RMIDs.
1748 	 */
1749 	resctrl_arch_reset_rmid_all(r, d);
1750 }
1751 
mon_config_write(struct rdt_resource * r,char * tok,u32 evtid)1752 static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
1753 {
1754 	char *dom_str = NULL, *id_str;
1755 	unsigned long dom_id, val;
1756 	struct rdt_mon_domain *d;
1757 
1758 	/* Walking r->domains, ensure it can't race with cpuhp */
1759 	lockdep_assert_cpus_held();
1760 
1761 next:
1762 	if (!tok || tok[0] == '\0')
1763 		return 0;
1764 
1765 	/* Start processing the strings for each domain */
1766 	dom_str = strim(strsep(&tok, ";"));
1767 	id_str = strsep(&dom_str, "=");
1768 
1769 	if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1770 		rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
1771 		return -EINVAL;
1772 	}
1773 
1774 	if (!dom_str || kstrtoul(dom_str, 16, &val)) {
1775 		rdt_last_cmd_puts("Non-numeric event configuration value\n");
1776 		return -EINVAL;
1777 	}
1778 
1779 	/* Value from user cannot be more than the supported set of events */
1780 	if ((val & r->mbm_cfg_mask) != val) {
1781 		rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
1782 				    r->mbm_cfg_mask);
1783 		return -EINVAL;
1784 	}
1785 
1786 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
1787 		if (d->hdr.id == dom_id) {
1788 			mbm_config_write_domain(r, d, evtid, val);
1789 			goto next;
1790 		}
1791 	}
1792 
1793 	return -EINVAL;
1794 }
1795 
mbm_total_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1796 static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
1797 					    char *buf, size_t nbytes,
1798 					    loff_t off)
1799 {
1800 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1801 	int ret;
1802 
1803 	/* Valid input requires a trailing newline */
1804 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1805 		return -EINVAL;
1806 
1807 	cpus_read_lock();
1808 	mutex_lock(&rdtgroup_mutex);
1809 
1810 	rdt_last_cmd_clear();
1811 
1812 	buf[nbytes - 1] = '\0';
1813 
1814 	ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
1815 
1816 	mutex_unlock(&rdtgroup_mutex);
1817 	cpus_read_unlock();
1818 
1819 	return ret ?: nbytes;
1820 }
1821 
mbm_local_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1822 static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
1823 					    char *buf, size_t nbytes,
1824 					    loff_t off)
1825 {
1826 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1827 	int ret;
1828 
1829 	/* Valid input requires a trailing newline */
1830 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1831 		return -EINVAL;
1832 
1833 	cpus_read_lock();
1834 	mutex_lock(&rdtgroup_mutex);
1835 
1836 	rdt_last_cmd_clear();
1837 
1838 	buf[nbytes - 1] = '\0';
1839 
1840 	ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
1841 
1842 	mutex_unlock(&rdtgroup_mutex);
1843 	cpus_read_unlock();
1844 
1845 	return ret ?: nbytes;
1846 }
1847 
1848 /* rdtgroup information files for one cache resource. */
1849 static struct rftype res_common_files[] = {
1850 	{
1851 		.name		= "last_cmd_status",
1852 		.mode		= 0444,
1853 		.kf_ops		= &rdtgroup_kf_single_ops,
1854 		.seq_show	= rdt_last_cmd_status_show,
1855 		.fflags		= RFTYPE_TOP_INFO,
1856 	},
1857 	{
1858 		.name		= "num_closids",
1859 		.mode		= 0444,
1860 		.kf_ops		= &rdtgroup_kf_single_ops,
1861 		.seq_show	= rdt_num_closids_show,
1862 		.fflags		= RFTYPE_CTRL_INFO,
1863 	},
1864 	{
1865 		.name		= "mon_features",
1866 		.mode		= 0444,
1867 		.kf_ops		= &rdtgroup_kf_single_ops,
1868 		.seq_show	= rdt_mon_features_show,
1869 		.fflags		= RFTYPE_MON_INFO,
1870 	},
1871 	{
1872 		.name		= "num_rmids",
1873 		.mode		= 0444,
1874 		.kf_ops		= &rdtgroup_kf_single_ops,
1875 		.seq_show	= rdt_num_rmids_show,
1876 		.fflags		= RFTYPE_MON_INFO,
1877 	},
1878 	{
1879 		.name		= "cbm_mask",
1880 		.mode		= 0444,
1881 		.kf_ops		= &rdtgroup_kf_single_ops,
1882 		.seq_show	= rdt_default_ctrl_show,
1883 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1884 	},
1885 	{
1886 		.name		= "min_cbm_bits",
1887 		.mode		= 0444,
1888 		.kf_ops		= &rdtgroup_kf_single_ops,
1889 		.seq_show	= rdt_min_cbm_bits_show,
1890 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1891 	},
1892 	{
1893 		.name		= "shareable_bits",
1894 		.mode		= 0444,
1895 		.kf_ops		= &rdtgroup_kf_single_ops,
1896 		.seq_show	= rdt_shareable_bits_show,
1897 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1898 	},
1899 	{
1900 		.name		= "bit_usage",
1901 		.mode		= 0444,
1902 		.kf_ops		= &rdtgroup_kf_single_ops,
1903 		.seq_show	= rdt_bit_usage_show,
1904 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1905 	},
1906 	{
1907 		.name		= "min_bandwidth",
1908 		.mode		= 0444,
1909 		.kf_ops		= &rdtgroup_kf_single_ops,
1910 		.seq_show	= rdt_min_bw_show,
1911 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1912 	},
1913 	{
1914 		.name		= "bandwidth_gran",
1915 		.mode		= 0444,
1916 		.kf_ops		= &rdtgroup_kf_single_ops,
1917 		.seq_show	= rdt_bw_gran_show,
1918 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1919 	},
1920 	{
1921 		.name		= "delay_linear",
1922 		.mode		= 0444,
1923 		.kf_ops		= &rdtgroup_kf_single_ops,
1924 		.seq_show	= rdt_delay_linear_show,
1925 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1926 	},
1927 	/*
1928 	 * Platform specific which (if any) capabilities are provided by
1929 	 * thread_throttle_mode. Defer "fflags" initialization to platform
1930 	 * discovery.
1931 	 */
1932 	{
1933 		.name		= "thread_throttle_mode",
1934 		.mode		= 0444,
1935 		.kf_ops		= &rdtgroup_kf_single_ops,
1936 		.seq_show	= rdt_thread_throttle_mode_show,
1937 	},
1938 	{
1939 		.name		= "max_threshold_occupancy",
1940 		.mode		= 0644,
1941 		.kf_ops		= &rdtgroup_kf_single_ops,
1942 		.write		= max_threshold_occ_write,
1943 		.seq_show	= max_threshold_occ_show,
1944 		.fflags		= RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
1945 	},
1946 	{
1947 		.name		= "mbm_total_bytes_config",
1948 		.mode		= 0644,
1949 		.kf_ops		= &rdtgroup_kf_single_ops,
1950 		.seq_show	= mbm_total_bytes_config_show,
1951 		.write		= mbm_total_bytes_config_write,
1952 	},
1953 	{
1954 		.name		= "mbm_local_bytes_config",
1955 		.mode		= 0644,
1956 		.kf_ops		= &rdtgroup_kf_single_ops,
1957 		.seq_show	= mbm_local_bytes_config_show,
1958 		.write		= mbm_local_bytes_config_write,
1959 	},
1960 	{
1961 		.name		= "cpus",
1962 		.mode		= 0644,
1963 		.kf_ops		= &rdtgroup_kf_single_ops,
1964 		.write		= rdtgroup_cpus_write,
1965 		.seq_show	= rdtgroup_cpus_show,
1966 		.fflags		= RFTYPE_BASE,
1967 	},
1968 	{
1969 		.name		= "cpus_list",
1970 		.mode		= 0644,
1971 		.kf_ops		= &rdtgroup_kf_single_ops,
1972 		.write		= rdtgroup_cpus_write,
1973 		.seq_show	= rdtgroup_cpus_show,
1974 		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1975 		.fflags		= RFTYPE_BASE,
1976 	},
1977 	{
1978 		.name		= "tasks",
1979 		.mode		= 0644,
1980 		.kf_ops		= &rdtgroup_kf_single_ops,
1981 		.write		= rdtgroup_tasks_write,
1982 		.seq_show	= rdtgroup_tasks_show,
1983 		.fflags		= RFTYPE_BASE,
1984 	},
1985 	{
1986 		.name		= "mon_hw_id",
1987 		.mode		= 0444,
1988 		.kf_ops		= &rdtgroup_kf_single_ops,
1989 		.seq_show	= rdtgroup_rmid_show,
1990 		.fflags		= RFTYPE_MON_BASE | RFTYPE_DEBUG,
1991 	},
1992 	{
1993 		.name		= "schemata",
1994 		.mode		= 0644,
1995 		.kf_ops		= &rdtgroup_kf_single_ops,
1996 		.write		= rdtgroup_schemata_write,
1997 		.seq_show	= rdtgroup_schemata_show,
1998 		.fflags		= RFTYPE_CTRL_BASE,
1999 	},
2000 	{
2001 		.name		= "mba_MBps_event",
2002 		.mode		= 0644,
2003 		.kf_ops		= &rdtgroup_kf_single_ops,
2004 		.write		= rdtgroup_mba_mbps_event_write,
2005 		.seq_show	= rdtgroup_mba_mbps_event_show,
2006 	},
2007 	{
2008 		.name		= "mode",
2009 		.mode		= 0644,
2010 		.kf_ops		= &rdtgroup_kf_single_ops,
2011 		.write		= rdtgroup_mode_write,
2012 		.seq_show	= rdtgroup_mode_show,
2013 		.fflags		= RFTYPE_CTRL_BASE,
2014 	},
2015 	{
2016 		.name		= "size",
2017 		.mode		= 0444,
2018 		.kf_ops		= &rdtgroup_kf_single_ops,
2019 		.seq_show	= rdtgroup_size_show,
2020 		.fflags		= RFTYPE_CTRL_BASE,
2021 	},
2022 	{
2023 		.name		= "sparse_masks",
2024 		.mode		= 0444,
2025 		.kf_ops		= &rdtgroup_kf_single_ops,
2026 		.seq_show	= rdt_has_sparse_bitmasks_show,
2027 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
2028 	},
2029 	{
2030 		.name		= "ctrl_hw_id",
2031 		.mode		= 0444,
2032 		.kf_ops		= &rdtgroup_kf_single_ops,
2033 		.seq_show	= rdtgroup_closid_show,
2034 		.fflags		= RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
2035 	},
2036 
2037 };
2038 
rdtgroup_add_files(struct kernfs_node * kn,unsigned long fflags)2039 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
2040 {
2041 	struct rftype *rfts, *rft;
2042 	int ret, len;
2043 
2044 	rfts = res_common_files;
2045 	len = ARRAY_SIZE(res_common_files);
2046 
2047 	lockdep_assert_held(&rdtgroup_mutex);
2048 
2049 	if (resctrl_debug)
2050 		fflags |= RFTYPE_DEBUG;
2051 
2052 	for (rft = rfts; rft < rfts + len; rft++) {
2053 		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
2054 			ret = rdtgroup_add_file(kn, rft);
2055 			if (ret)
2056 				goto error;
2057 		}
2058 	}
2059 
2060 	return 0;
2061 error:
2062 	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
2063 	while (--rft >= rfts) {
2064 		if ((fflags & rft->fflags) == rft->fflags)
2065 			kernfs_remove_by_name(kn, rft->name);
2066 	}
2067 	return ret;
2068 }
2069 
rdtgroup_get_rftype_by_name(const char * name)2070 static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
2071 {
2072 	struct rftype *rfts, *rft;
2073 	int len;
2074 
2075 	rfts = res_common_files;
2076 	len = ARRAY_SIZE(res_common_files);
2077 
2078 	for (rft = rfts; rft < rfts + len; rft++) {
2079 		if (!strcmp(rft->name, name))
2080 			return rft;
2081 	}
2082 
2083 	return NULL;
2084 }
2085 
thread_throttle_mode_init(void)2086 static void thread_throttle_mode_init(void)
2087 {
2088 	enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED;
2089 	struct rdt_resource *r_mba, *r_smba;
2090 
2091 	r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
2092 	if (r_mba->alloc_capable &&
2093 	    r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
2094 		throttle_mode = r_mba->membw.throttle_mode;
2095 
2096 	r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA);
2097 	if (r_smba->alloc_capable &&
2098 	    r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
2099 		throttle_mode = r_smba->membw.throttle_mode;
2100 
2101 	if (throttle_mode == THREAD_THROTTLE_UNDEFINED)
2102 		return;
2103 
2104 	resctrl_file_fflags_init("thread_throttle_mode",
2105 				 RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
2106 }
2107 
resctrl_file_fflags_init(const char * config,unsigned long fflags)2108 void resctrl_file_fflags_init(const char *config, unsigned long fflags)
2109 {
2110 	struct rftype *rft;
2111 
2112 	rft = rdtgroup_get_rftype_by_name(config);
2113 	if (rft)
2114 		rft->fflags = fflags;
2115 }
2116 
2117 /**
2118  * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
2119  * @r: The resource group with which the file is associated.
2120  * @name: Name of the file
2121  *
2122  * The permissions of named resctrl file, directory, or link are modified
2123  * to not allow read, write, or execute by any user.
2124  *
2125  * WARNING: This function is intended to communicate to the user that the
2126  * resctrl file has been locked down - that it is not relevant to the
2127  * particular state the system finds itself in. It should not be relied
2128  * on to protect from user access because after the file's permissions
2129  * are restricted the user can still change the permissions using chmod
2130  * from the command line.
2131  *
2132  * Return: 0 on success, <0 on failure.
2133  */
rdtgroup_kn_mode_restrict(struct rdtgroup * r,const char * name)2134 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
2135 {
2136 	struct iattr iattr = {.ia_valid = ATTR_MODE,};
2137 	struct kernfs_node *kn;
2138 	int ret = 0;
2139 
2140 	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2141 	if (!kn)
2142 		return -ENOENT;
2143 
2144 	switch (kernfs_type(kn)) {
2145 	case KERNFS_DIR:
2146 		iattr.ia_mode = S_IFDIR;
2147 		break;
2148 	case KERNFS_FILE:
2149 		iattr.ia_mode = S_IFREG;
2150 		break;
2151 	case KERNFS_LINK:
2152 		iattr.ia_mode = S_IFLNK;
2153 		break;
2154 	}
2155 
2156 	ret = kernfs_setattr(kn, &iattr);
2157 	kernfs_put(kn);
2158 	return ret;
2159 }
2160 
2161 /**
2162  * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
2163  * @r: The resource group with which the file is associated.
2164  * @name: Name of the file
2165  * @mask: Mask of permissions that should be restored
2166  *
2167  * Restore the permissions of the named file. If @name is a directory the
2168  * permissions of its parent will be used.
2169  *
2170  * Return: 0 on success, <0 on failure.
2171  */
rdtgroup_kn_mode_restore(struct rdtgroup * r,const char * name,umode_t mask)2172 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
2173 			     umode_t mask)
2174 {
2175 	struct iattr iattr = {.ia_valid = ATTR_MODE,};
2176 	struct kernfs_node *kn, *parent;
2177 	struct rftype *rfts, *rft;
2178 	int ret, len;
2179 
2180 	rfts = res_common_files;
2181 	len = ARRAY_SIZE(res_common_files);
2182 
2183 	for (rft = rfts; rft < rfts + len; rft++) {
2184 		if (!strcmp(rft->name, name))
2185 			iattr.ia_mode = rft->mode & mask;
2186 	}
2187 
2188 	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2189 	if (!kn)
2190 		return -ENOENT;
2191 
2192 	switch (kernfs_type(kn)) {
2193 	case KERNFS_DIR:
2194 		parent = kernfs_get_parent(kn);
2195 		if (parent) {
2196 			iattr.ia_mode |= parent->mode;
2197 			kernfs_put(parent);
2198 		}
2199 		iattr.ia_mode |= S_IFDIR;
2200 		break;
2201 	case KERNFS_FILE:
2202 		iattr.ia_mode |= S_IFREG;
2203 		break;
2204 	case KERNFS_LINK:
2205 		iattr.ia_mode |= S_IFLNK;
2206 		break;
2207 	}
2208 
2209 	ret = kernfs_setattr(kn, &iattr);
2210 	kernfs_put(kn);
2211 	return ret;
2212 }
2213 
rdtgroup_mkdir_info_resdir(void * priv,char * name,unsigned long fflags)2214 static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
2215 				      unsigned long fflags)
2216 {
2217 	struct kernfs_node *kn_subdir;
2218 	int ret;
2219 
2220 	kn_subdir = kernfs_create_dir(kn_info, name,
2221 				      kn_info->mode, priv);
2222 	if (IS_ERR(kn_subdir))
2223 		return PTR_ERR(kn_subdir);
2224 
2225 	ret = rdtgroup_kn_set_ugid(kn_subdir);
2226 	if (ret)
2227 		return ret;
2228 
2229 	ret = rdtgroup_add_files(kn_subdir, fflags);
2230 	if (!ret)
2231 		kernfs_activate(kn_subdir);
2232 
2233 	return ret;
2234 }
2235 
fflags_from_resource(struct rdt_resource * r)2236 static unsigned long fflags_from_resource(struct rdt_resource *r)
2237 {
2238 	switch (r->rid) {
2239 	case RDT_RESOURCE_L3:
2240 	case RDT_RESOURCE_L2:
2241 		return RFTYPE_RES_CACHE;
2242 	case RDT_RESOURCE_MBA:
2243 	case RDT_RESOURCE_SMBA:
2244 		return RFTYPE_RES_MB;
2245 	}
2246 
2247 	return WARN_ON_ONCE(1);
2248 }
2249 
rdtgroup_create_info_dir(struct kernfs_node * parent_kn)2250 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
2251 {
2252 	struct resctrl_schema *s;
2253 	struct rdt_resource *r;
2254 	unsigned long fflags;
2255 	char name[32];
2256 	int ret;
2257 
2258 	/* create the directory */
2259 	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
2260 	if (IS_ERR(kn_info))
2261 		return PTR_ERR(kn_info);
2262 
2263 	ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
2264 	if (ret)
2265 		goto out_destroy;
2266 
2267 	/* loop over enabled controls, these are all alloc_capable */
2268 	list_for_each_entry(s, &resctrl_schema_all, list) {
2269 		r = s->res;
2270 		fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO;
2271 		ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
2272 		if (ret)
2273 			goto out_destroy;
2274 	}
2275 
2276 	for_each_mon_capable_rdt_resource(r) {
2277 		fflags = fflags_from_resource(r) | RFTYPE_MON_INFO;
2278 		sprintf(name, "%s_MON", r->name);
2279 		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
2280 		if (ret)
2281 			goto out_destroy;
2282 	}
2283 
2284 	ret = rdtgroup_kn_set_ugid(kn_info);
2285 	if (ret)
2286 		goto out_destroy;
2287 
2288 	kernfs_activate(kn_info);
2289 
2290 	return 0;
2291 
2292 out_destroy:
2293 	kernfs_remove(kn_info);
2294 	return ret;
2295 }
2296 
2297 static int
mongroup_create_dir(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,char * name,struct kernfs_node ** dest_kn)2298 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
2299 		    char *name, struct kernfs_node **dest_kn)
2300 {
2301 	struct kernfs_node *kn;
2302 	int ret;
2303 
2304 	/* create the directory */
2305 	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2306 	if (IS_ERR(kn))
2307 		return PTR_ERR(kn);
2308 
2309 	if (dest_kn)
2310 		*dest_kn = kn;
2311 
2312 	ret = rdtgroup_kn_set_ugid(kn);
2313 	if (ret)
2314 		goto out_destroy;
2315 
2316 	kernfs_activate(kn);
2317 
2318 	return 0;
2319 
2320 out_destroy:
2321 	kernfs_remove(kn);
2322 	return ret;
2323 }
2324 
l3_qos_cfg_update(void * arg)2325 static void l3_qos_cfg_update(void *arg)
2326 {
2327 	bool *enable = arg;
2328 
2329 	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
2330 }
2331 
l2_qos_cfg_update(void * arg)2332 static void l2_qos_cfg_update(void *arg)
2333 {
2334 	bool *enable = arg;
2335 
2336 	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
2337 }
2338 
is_mba_linear(void)2339 static inline bool is_mba_linear(void)
2340 {
2341 	return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear;
2342 }
2343 
set_cache_qos_cfg(int level,bool enable)2344 static int set_cache_qos_cfg(int level, bool enable)
2345 {
2346 	void (*update)(void *arg);
2347 	struct rdt_ctrl_domain *d;
2348 	struct rdt_resource *r_l;
2349 	cpumask_var_t cpu_mask;
2350 	int cpu;
2351 
2352 	/* Walking r->domains, ensure it can't race with cpuhp */
2353 	lockdep_assert_cpus_held();
2354 
2355 	if (level == RDT_RESOURCE_L3)
2356 		update = l3_qos_cfg_update;
2357 	else if (level == RDT_RESOURCE_L2)
2358 		update = l2_qos_cfg_update;
2359 	else
2360 		return -EINVAL;
2361 
2362 	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2363 		return -ENOMEM;
2364 
2365 	r_l = &rdt_resources_all[level].r_resctrl;
2366 	list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
2367 		if (r_l->cache.arch_has_per_cpu_cfg)
2368 			/* Pick all the CPUs in the domain instance */
2369 			for_each_cpu(cpu, &d->hdr.cpu_mask)
2370 				cpumask_set_cpu(cpu, cpu_mask);
2371 		else
2372 			/* Pick one CPU from each domain instance to update MSR */
2373 			cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
2374 	}
2375 
2376 	/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
2377 	on_each_cpu_mask(cpu_mask, update, &enable, 1);
2378 
2379 	free_cpumask_var(cpu_mask);
2380 
2381 	return 0;
2382 }
2383 
2384 /* Restore the qos cfg state when a domain comes online */
rdt_domain_reconfigure_cdp(struct rdt_resource * r)2385 void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
2386 {
2387 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2388 
2389 	if (!r->cdp_capable)
2390 		return;
2391 
2392 	if (r->rid == RDT_RESOURCE_L2)
2393 		l2_qos_cfg_update(&hw_res->cdp_enabled);
2394 
2395 	if (r->rid == RDT_RESOURCE_L3)
2396 		l3_qos_cfg_update(&hw_res->cdp_enabled);
2397 }
2398 
mba_sc_domain_allocate(struct rdt_resource * r,struct rdt_ctrl_domain * d)2399 static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
2400 {
2401 	u32 num_closid = resctrl_arch_get_num_closid(r);
2402 	int cpu = cpumask_any(&d->hdr.cpu_mask);
2403 	int i;
2404 
2405 	d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
2406 				   GFP_KERNEL, cpu_to_node(cpu));
2407 	if (!d->mbps_val)
2408 		return -ENOMEM;
2409 
2410 	for (i = 0; i < num_closid; i++)
2411 		d->mbps_val[i] = MBA_MAX_MBPS;
2412 
2413 	return 0;
2414 }
2415 
mba_sc_domain_destroy(struct rdt_resource * r,struct rdt_ctrl_domain * d)2416 static void mba_sc_domain_destroy(struct rdt_resource *r,
2417 				  struct rdt_ctrl_domain *d)
2418 {
2419 	kfree(d->mbps_val);
2420 	d->mbps_val = NULL;
2421 }
2422 
2423 /*
2424  * MBA software controller is supported only if
2425  * MBM is supported and MBA is in linear scale,
2426  * and the MBM monitor scope is the same as MBA
2427  * control scope.
2428  */
supports_mba_mbps(void)2429 static bool supports_mba_mbps(void)
2430 {
2431 	struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3);
2432 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
2433 
2434 	return (resctrl_is_mbm_enabled() &&
2435 		r->alloc_capable && is_mba_linear() &&
2436 		r->ctrl_scope == rmbm->mon_scope);
2437 }
2438 
2439 /*
2440  * Enable or disable the MBA software controller
2441  * which helps user specify bandwidth in MBps.
2442  */
set_mba_sc(bool mba_sc)2443 static int set_mba_sc(bool mba_sc)
2444 {
2445 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
2446 	u32 num_closid = resctrl_arch_get_num_closid(r);
2447 	struct rdt_ctrl_domain *d;
2448 	unsigned long fflags;
2449 	int i;
2450 
2451 	if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
2452 		return -EINVAL;
2453 
2454 	r->membw.mba_sc = mba_sc;
2455 
2456 	rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
2457 
2458 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2459 		for (i = 0; i < num_closid; i++)
2460 			d->mbps_val[i] = MBA_MAX_MBPS;
2461 	}
2462 
2463 	fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
2464 	resctrl_file_fflags_init("mba_MBps_event", fflags);
2465 
2466 	return 0;
2467 }
2468 
cdp_enable(int level)2469 static int cdp_enable(int level)
2470 {
2471 	struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
2472 	int ret;
2473 
2474 	if (!r_l->alloc_capable)
2475 		return -EINVAL;
2476 
2477 	ret = set_cache_qos_cfg(level, true);
2478 	if (!ret)
2479 		rdt_resources_all[level].cdp_enabled = true;
2480 
2481 	return ret;
2482 }
2483 
cdp_disable(int level)2484 static void cdp_disable(int level)
2485 {
2486 	struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
2487 
2488 	if (r_hw->cdp_enabled) {
2489 		set_cache_qos_cfg(level, false);
2490 		r_hw->cdp_enabled = false;
2491 	}
2492 }
2493 
resctrl_arch_set_cdp_enabled(enum resctrl_res_level l,bool enable)2494 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
2495 {
2496 	struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
2497 
2498 	if (!hw_res->r_resctrl.cdp_capable)
2499 		return -EINVAL;
2500 
2501 	if (enable)
2502 		return cdp_enable(l);
2503 
2504 	cdp_disable(l);
2505 
2506 	return 0;
2507 }
2508 
2509 /*
2510  * We don't allow rdtgroup directories to be created anywhere
2511  * except the root directory. Thus when looking for the rdtgroup
2512  * structure for a kernfs node we are either looking at a directory,
2513  * in which case the rdtgroup structure is pointed at by the "priv"
2514  * field, otherwise we have a file, and need only look to the parent
2515  * to find the rdtgroup.
2516  */
kernfs_to_rdtgroup(struct kernfs_node * kn)2517 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
2518 {
2519 	if (kernfs_type(kn) == KERNFS_DIR) {
2520 		/*
2521 		 * All the resource directories use "kn->priv"
2522 		 * to point to the "struct rdtgroup" for the
2523 		 * resource. "info" and its subdirectories don't
2524 		 * have rdtgroup structures, so return NULL here.
2525 		 */
2526 		if (kn == kn_info ||
2527 		    rcu_access_pointer(kn->__parent) == kn_info)
2528 			return NULL;
2529 		else
2530 			return kn->priv;
2531 	} else {
2532 		return rdt_kn_parent_priv(kn);
2533 	}
2534 }
2535 
rdtgroup_kn_get(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2536 static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2537 {
2538 	atomic_inc(&rdtgrp->waitcount);
2539 	kernfs_break_active_protection(kn);
2540 }
2541 
rdtgroup_kn_put(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2542 static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2543 {
2544 	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
2545 	    (rdtgrp->flags & RDT_DELETED)) {
2546 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2547 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2548 			rdtgroup_pseudo_lock_remove(rdtgrp);
2549 		kernfs_unbreak_active_protection(kn);
2550 		rdtgroup_remove(rdtgrp);
2551 	} else {
2552 		kernfs_unbreak_active_protection(kn);
2553 	}
2554 }
2555 
rdtgroup_kn_lock_live(struct kernfs_node * kn)2556 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
2557 {
2558 	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2559 
2560 	if (!rdtgrp)
2561 		return NULL;
2562 
2563 	rdtgroup_kn_get(rdtgrp, kn);
2564 
2565 	cpus_read_lock();
2566 	mutex_lock(&rdtgroup_mutex);
2567 
2568 	/* Was this group deleted while we waited? */
2569 	if (rdtgrp->flags & RDT_DELETED)
2570 		return NULL;
2571 
2572 	return rdtgrp;
2573 }
2574 
rdtgroup_kn_unlock(struct kernfs_node * kn)2575 void rdtgroup_kn_unlock(struct kernfs_node *kn)
2576 {
2577 	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2578 
2579 	if (!rdtgrp)
2580 		return;
2581 
2582 	mutex_unlock(&rdtgroup_mutex);
2583 	cpus_read_unlock();
2584 
2585 	rdtgroup_kn_put(rdtgrp, kn);
2586 }
2587 
2588 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2589 			     struct rdtgroup *prgrp,
2590 			     struct kernfs_node **mon_data_kn);
2591 
rdt_disable_ctx(void)2592 static void rdt_disable_ctx(void)
2593 {
2594 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2595 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2596 	set_mba_sc(false);
2597 
2598 	resctrl_debug = false;
2599 }
2600 
rdt_enable_ctx(struct rdt_fs_context * ctx)2601 static int rdt_enable_ctx(struct rdt_fs_context *ctx)
2602 {
2603 	int ret = 0;
2604 
2605 	if (ctx->enable_cdpl2) {
2606 		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
2607 		if (ret)
2608 			goto out_done;
2609 	}
2610 
2611 	if (ctx->enable_cdpl3) {
2612 		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
2613 		if (ret)
2614 			goto out_cdpl2;
2615 	}
2616 
2617 	if (ctx->enable_mba_mbps) {
2618 		ret = set_mba_sc(true);
2619 		if (ret)
2620 			goto out_cdpl3;
2621 	}
2622 
2623 	if (ctx->enable_debug)
2624 		resctrl_debug = true;
2625 
2626 	return 0;
2627 
2628 out_cdpl3:
2629 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2630 out_cdpl2:
2631 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2632 out_done:
2633 	return ret;
2634 }
2635 
schemata_list_add(struct rdt_resource * r,enum resctrl_conf_type type)2636 static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
2637 {
2638 	struct resctrl_schema *s;
2639 	const char *suffix = "";
2640 	int ret, cl;
2641 
2642 	s = kzalloc(sizeof(*s), GFP_KERNEL);
2643 	if (!s)
2644 		return -ENOMEM;
2645 
2646 	s->res = r;
2647 	s->num_closid = resctrl_arch_get_num_closid(r);
2648 	if (resctrl_arch_get_cdp_enabled(r->rid))
2649 		s->num_closid /= 2;
2650 
2651 	s->conf_type = type;
2652 	switch (type) {
2653 	case CDP_CODE:
2654 		suffix = "CODE";
2655 		break;
2656 	case CDP_DATA:
2657 		suffix = "DATA";
2658 		break;
2659 	case CDP_NONE:
2660 		suffix = "";
2661 		break;
2662 	}
2663 
2664 	ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
2665 	if (ret >= sizeof(s->name)) {
2666 		kfree(s);
2667 		return -EINVAL;
2668 	}
2669 
2670 	cl = strlen(s->name);
2671 
2672 	/*
2673 	 * If CDP is supported by this resource, but not enabled,
2674 	 * include the suffix. This ensures the tabular format of the
2675 	 * schemata file does not change between mounts of the filesystem.
2676 	 */
2677 	if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
2678 		cl += 4;
2679 
2680 	if (cl > max_name_width)
2681 		max_name_width = cl;
2682 
2683 	switch (r->schema_fmt) {
2684 	case RESCTRL_SCHEMA_BITMAP:
2685 		s->fmt_str = "%d=%x";
2686 		break;
2687 	case RESCTRL_SCHEMA_RANGE:
2688 		s->fmt_str = "%d=%u";
2689 		break;
2690 	}
2691 
2692 	if (WARN_ON_ONCE(!s->fmt_str)) {
2693 		kfree(s);
2694 		return -EINVAL;
2695 	}
2696 
2697 	INIT_LIST_HEAD(&s->list);
2698 	list_add(&s->list, &resctrl_schema_all);
2699 
2700 	return 0;
2701 }
2702 
schemata_list_create(void)2703 static int schemata_list_create(void)
2704 {
2705 	struct rdt_resource *r;
2706 	int ret = 0;
2707 
2708 	for_each_alloc_capable_rdt_resource(r) {
2709 		if (resctrl_arch_get_cdp_enabled(r->rid)) {
2710 			ret = schemata_list_add(r, CDP_CODE);
2711 			if (ret)
2712 				break;
2713 
2714 			ret = schemata_list_add(r, CDP_DATA);
2715 		} else {
2716 			ret = schemata_list_add(r, CDP_NONE);
2717 		}
2718 
2719 		if (ret)
2720 			break;
2721 	}
2722 
2723 	return ret;
2724 }
2725 
schemata_list_destroy(void)2726 static void schemata_list_destroy(void)
2727 {
2728 	struct resctrl_schema *s, *tmp;
2729 
2730 	list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
2731 		list_del(&s->list);
2732 		kfree(s);
2733 	}
2734 }
2735 
rdt_get_tree(struct fs_context * fc)2736 static int rdt_get_tree(struct fs_context *fc)
2737 {
2738 	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2739 	unsigned long flags = RFTYPE_CTRL_BASE;
2740 	struct rdt_mon_domain *dom;
2741 	struct rdt_resource *r;
2742 	int ret;
2743 
2744 	cpus_read_lock();
2745 	mutex_lock(&rdtgroup_mutex);
2746 	/*
2747 	 * resctrl file system can only be mounted once.
2748 	 */
2749 	if (resctrl_mounted) {
2750 		ret = -EBUSY;
2751 		goto out;
2752 	}
2753 
2754 	ret = rdtgroup_setup_root(ctx);
2755 	if (ret)
2756 		goto out;
2757 
2758 	ret = rdt_enable_ctx(ctx);
2759 	if (ret)
2760 		goto out_root;
2761 
2762 	ret = schemata_list_create();
2763 	if (ret) {
2764 		schemata_list_destroy();
2765 		goto out_ctx;
2766 	}
2767 
2768 	closid_init();
2769 
2770 	if (resctrl_arch_mon_capable())
2771 		flags |= RFTYPE_MON;
2772 
2773 	ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
2774 	if (ret)
2775 		goto out_schemata_free;
2776 
2777 	kernfs_activate(rdtgroup_default.kn);
2778 
2779 	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
2780 	if (ret < 0)
2781 		goto out_schemata_free;
2782 
2783 	if (resctrl_arch_mon_capable()) {
2784 		ret = mongroup_create_dir(rdtgroup_default.kn,
2785 					  &rdtgroup_default, "mon_groups",
2786 					  &kn_mongrp);
2787 		if (ret < 0)
2788 			goto out_info;
2789 
2790 		ret = mkdir_mondata_all(rdtgroup_default.kn,
2791 					&rdtgroup_default, &kn_mondata);
2792 		if (ret < 0)
2793 			goto out_mongrp;
2794 		rdtgroup_default.mon.mon_data_kn = kn_mondata;
2795 	}
2796 
2797 	ret = rdt_pseudo_lock_init();
2798 	if (ret)
2799 		goto out_mondata;
2800 
2801 	ret = kernfs_get_tree(fc);
2802 	if (ret < 0)
2803 		goto out_psl;
2804 
2805 	if (resctrl_arch_alloc_capable())
2806 		resctrl_arch_enable_alloc();
2807 	if (resctrl_arch_mon_capable())
2808 		resctrl_arch_enable_mon();
2809 
2810 	if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
2811 		resctrl_mounted = true;
2812 
2813 	if (resctrl_is_mbm_enabled()) {
2814 		r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
2815 		list_for_each_entry(dom, &r->mon_domains, hdr.list)
2816 			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
2817 						   RESCTRL_PICK_ANY_CPU);
2818 	}
2819 
2820 	goto out;
2821 
2822 out_psl:
2823 	rdt_pseudo_lock_release();
2824 out_mondata:
2825 	if (resctrl_arch_mon_capable())
2826 		kernfs_remove(kn_mondata);
2827 out_mongrp:
2828 	if (resctrl_arch_mon_capable())
2829 		kernfs_remove(kn_mongrp);
2830 out_info:
2831 	kernfs_remove(kn_info);
2832 out_schemata_free:
2833 	schemata_list_destroy();
2834 out_ctx:
2835 	rdt_disable_ctx();
2836 out_root:
2837 	rdtgroup_destroy_root();
2838 out:
2839 	rdt_last_cmd_clear();
2840 	mutex_unlock(&rdtgroup_mutex);
2841 	cpus_read_unlock();
2842 	return ret;
2843 }
2844 
2845 enum rdt_param {
2846 	Opt_cdp,
2847 	Opt_cdpl2,
2848 	Opt_mba_mbps,
2849 	Opt_debug,
2850 	nr__rdt_params
2851 };
2852 
2853 static const struct fs_parameter_spec rdt_fs_parameters[] = {
2854 	fsparam_flag("cdp",		Opt_cdp),
2855 	fsparam_flag("cdpl2",		Opt_cdpl2),
2856 	fsparam_flag("mba_MBps",	Opt_mba_mbps),
2857 	fsparam_flag("debug",		Opt_debug),
2858 	{}
2859 };
2860 
rdt_parse_param(struct fs_context * fc,struct fs_parameter * param)2861 static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2862 {
2863 	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2864 	struct fs_parse_result result;
2865 	const char *msg;
2866 	int opt;
2867 
2868 	opt = fs_parse(fc, rdt_fs_parameters, param, &result);
2869 	if (opt < 0)
2870 		return opt;
2871 
2872 	switch (opt) {
2873 	case Opt_cdp:
2874 		ctx->enable_cdpl3 = true;
2875 		return 0;
2876 	case Opt_cdpl2:
2877 		ctx->enable_cdpl2 = true;
2878 		return 0;
2879 	case Opt_mba_mbps:
2880 		msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
2881 		if (!supports_mba_mbps())
2882 			return invalfc(fc, msg);
2883 		ctx->enable_mba_mbps = true;
2884 		return 0;
2885 	case Opt_debug:
2886 		ctx->enable_debug = true;
2887 		return 0;
2888 	}
2889 
2890 	return -EINVAL;
2891 }
2892 
rdt_fs_context_free(struct fs_context * fc)2893 static void rdt_fs_context_free(struct fs_context *fc)
2894 {
2895 	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2896 
2897 	kernfs_free_fs_context(fc);
2898 	kfree(ctx);
2899 }
2900 
2901 static const struct fs_context_operations rdt_fs_context_ops = {
2902 	.free		= rdt_fs_context_free,
2903 	.parse_param	= rdt_parse_param,
2904 	.get_tree	= rdt_get_tree,
2905 };
2906 
rdt_init_fs_context(struct fs_context * fc)2907 static int rdt_init_fs_context(struct fs_context *fc)
2908 {
2909 	struct rdt_fs_context *ctx;
2910 
2911 	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2912 	if (!ctx)
2913 		return -ENOMEM;
2914 
2915 	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2916 	fc->fs_private = &ctx->kfc;
2917 	fc->ops = &rdt_fs_context_ops;
2918 	put_user_ns(fc->user_ns);
2919 	fc->user_ns = get_user_ns(&init_user_ns);
2920 	fc->global = true;
2921 	return 0;
2922 }
2923 
resctrl_arch_reset_all_ctrls(struct rdt_resource * r)2924 void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
2925 {
2926 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2927 	struct rdt_hw_ctrl_domain *hw_dom;
2928 	struct msr_param msr_param;
2929 	struct rdt_ctrl_domain *d;
2930 	int i;
2931 
2932 	/* Walking r->domains, ensure it can't race with cpuhp */
2933 	lockdep_assert_cpus_held();
2934 
2935 	msr_param.res = r;
2936 	msr_param.low = 0;
2937 	msr_param.high = hw_res->num_closid;
2938 
2939 	/*
2940 	 * Disable resource control for this resource by setting all
2941 	 * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
2942 	 * from each domain to update the MSRs below.
2943 	 */
2944 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2945 		hw_dom = resctrl_to_arch_ctrl_dom(d);
2946 
2947 		for (i = 0; i < hw_res->num_closid; i++)
2948 			hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r);
2949 		msr_param.dom = d;
2950 		smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
2951 	}
2952 
2953 	return;
2954 }
2955 
2956 /*
2957  * Move tasks from one to the other group. If @from is NULL, then all tasks
2958  * in the systems are moved unconditionally (used for teardown).
2959  *
2960  * If @mask is not NULL the cpus on which moved tasks are running are set
2961  * in that mask so the update smp function call is restricted to affected
2962  * cpus.
2963  */
rdt_move_group_tasks(struct rdtgroup * from,struct rdtgroup * to,struct cpumask * mask)2964 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2965 				 struct cpumask *mask)
2966 {
2967 	struct task_struct *p, *t;
2968 
2969 	read_lock(&tasklist_lock);
2970 	for_each_process_thread(p, t) {
2971 		if (!from || is_closid_match(t, from) ||
2972 		    is_rmid_match(t, from)) {
2973 			resctrl_arch_set_closid_rmid(t, to->closid,
2974 						     to->mon.rmid);
2975 
2976 			/*
2977 			 * Order the closid/rmid stores above before the loads
2978 			 * in task_curr(). This pairs with the full barrier
2979 			 * between the rq->curr update and resctrl_sched_in()
2980 			 * during context switch.
2981 			 */
2982 			smp_mb();
2983 
2984 			/*
2985 			 * If the task is on a CPU, set the CPU in the mask.
2986 			 * The detection is inaccurate as tasks might move or
2987 			 * schedule before the smp function call takes place.
2988 			 * In such a case the function call is pointless, but
2989 			 * there is no other side effect.
2990 			 */
2991 			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
2992 				cpumask_set_cpu(task_cpu(t), mask);
2993 		}
2994 	}
2995 	read_unlock(&tasklist_lock);
2996 }
2997 
free_all_child_rdtgrp(struct rdtgroup * rdtgrp)2998 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2999 {
3000 	struct rdtgroup *sentry, *stmp;
3001 	struct list_head *head;
3002 
3003 	head = &rdtgrp->mon.crdtgrp_list;
3004 	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
3005 		free_rmid(sentry->closid, sentry->mon.rmid);
3006 		list_del(&sentry->mon.crdtgrp_list);
3007 
3008 		if (atomic_read(&sentry->waitcount) != 0)
3009 			sentry->flags = RDT_DELETED;
3010 		else
3011 			rdtgroup_remove(sentry);
3012 	}
3013 }
3014 
3015 /*
3016  * Forcibly remove all of subdirectories under root.
3017  */
rmdir_all_sub(void)3018 static void rmdir_all_sub(void)
3019 {
3020 	struct rdtgroup *rdtgrp, *tmp;
3021 
3022 	/* Move all tasks to the default resource group */
3023 	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
3024 
3025 	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
3026 		/* Free any child rmids */
3027 		free_all_child_rdtgrp(rdtgrp);
3028 
3029 		/* Remove each rdtgroup other than root */
3030 		if (rdtgrp == &rdtgroup_default)
3031 			continue;
3032 
3033 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3034 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
3035 			rdtgroup_pseudo_lock_remove(rdtgrp);
3036 
3037 		/*
3038 		 * Give any CPUs back to the default group. We cannot copy
3039 		 * cpu_online_mask because a CPU might have executed the
3040 		 * offline callback already, but is still marked online.
3041 		 */
3042 		cpumask_or(&rdtgroup_default.cpu_mask,
3043 			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3044 
3045 		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3046 
3047 		kernfs_remove(rdtgrp->kn);
3048 		list_del(&rdtgrp->rdtgroup_list);
3049 
3050 		if (atomic_read(&rdtgrp->waitcount) != 0)
3051 			rdtgrp->flags = RDT_DELETED;
3052 		else
3053 			rdtgroup_remove(rdtgrp);
3054 	}
3055 	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
3056 	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
3057 
3058 	kernfs_remove(kn_info);
3059 	kernfs_remove(kn_mongrp);
3060 	kernfs_remove(kn_mondata);
3061 }
3062 
rdt_kill_sb(struct super_block * sb)3063 static void rdt_kill_sb(struct super_block *sb)
3064 {
3065 	struct rdt_resource *r;
3066 
3067 	cpus_read_lock();
3068 	mutex_lock(&rdtgroup_mutex);
3069 
3070 	rdt_disable_ctx();
3071 
3072 	/* Put everything back to default values. */
3073 	for_each_alloc_capable_rdt_resource(r)
3074 		resctrl_arch_reset_all_ctrls(r);
3075 
3076 	rmdir_all_sub();
3077 	rdt_pseudo_lock_release();
3078 	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
3079 	schemata_list_destroy();
3080 	rdtgroup_destroy_root();
3081 	if (resctrl_arch_alloc_capable())
3082 		resctrl_arch_disable_alloc();
3083 	if (resctrl_arch_mon_capable())
3084 		resctrl_arch_disable_mon();
3085 	resctrl_mounted = false;
3086 	kernfs_kill_sb(sb);
3087 	mutex_unlock(&rdtgroup_mutex);
3088 	cpus_read_unlock();
3089 }
3090 
3091 static struct file_system_type rdt_fs_type = {
3092 	.name			= "resctrl",
3093 	.init_fs_context	= rdt_init_fs_context,
3094 	.parameters		= rdt_fs_parameters,
3095 	.kill_sb		= rdt_kill_sb,
3096 };
3097 
mon_addfile(struct kernfs_node * parent_kn,const char * name,void * priv)3098 static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
3099 		       void *priv)
3100 {
3101 	struct kernfs_node *kn;
3102 	int ret = 0;
3103 
3104 	kn = __kernfs_create_file(parent_kn, name, 0444,
3105 				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
3106 				  &kf_mondata_ops, priv, NULL, NULL);
3107 	if (IS_ERR(kn))
3108 		return PTR_ERR(kn);
3109 
3110 	ret = rdtgroup_kn_set_ugid(kn);
3111 	if (ret) {
3112 		kernfs_remove(kn);
3113 		return ret;
3114 	}
3115 
3116 	return ret;
3117 }
3118 
mon_rmdir_one_subdir(struct kernfs_node * pkn,char * name,char * subname)3119 static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
3120 {
3121 	struct kernfs_node *kn;
3122 
3123 	kn = kernfs_find_and_get(pkn, name);
3124 	if (!kn)
3125 		return;
3126 	kernfs_put(kn);
3127 
3128 	if (kn->dir.subdirs <= 1)
3129 		kernfs_remove(kn);
3130 	else
3131 		kernfs_remove_by_name(kn, subname);
3132 }
3133 
3134 /*
3135  * Remove all subdirectories of mon_data of ctrl_mon groups
3136  * and monitor groups for the given domain.
3137  * Remove files and directories containing "sum" of domain data
3138  * when last domain being summed is removed.
3139  */
rmdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_mon_domain * d)3140 static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3141 					   struct rdt_mon_domain *d)
3142 {
3143 	struct rdtgroup *prgrp, *crgrp;
3144 	char subname[32];
3145 	bool snc_mode;
3146 	char name[32];
3147 
3148 	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3149 	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3150 	if (snc_mode)
3151 		sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
3152 
3153 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3154 		mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
3155 
3156 		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
3157 			mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
3158 	}
3159 }
3160 
mon_add_all_files(struct kernfs_node * kn,struct rdt_mon_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp,bool do_sum)3161 static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
3162 			     struct rdt_resource *r, struct rdtgroup *prgrp,
3163 			     bool do_sum)
3164 {
3165 	struct rmid_read rr = {0};
3166 	union mon_data_bits priv;
3167 	struct mon_evt *mevt;
3168 	int ret;
3169 
3170 	if (WARN_ON(list_empty(&r->evt_list)))
3171 		return -EPERM;
3172 
3173 	priv.u.rid = r->rid;
3174 	priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
3175 	priv.u.sum = do_sum;
3176 	list_for_each_entry(mevt, &r->evt_list, list) {
3177 		priv.u.evtid = mevt->evtid;
3178 		ret = mon_addfile(kn, mevt->name, priv.priv);
3179 		if (ret)
3180 			return ret;
3181 
3182 		if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
3183 			mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
3184 	}
3185 
3186 	return 0;
3187 }
3188 
mkdir_mondata_subdir(struct kernfs_node * parent_kn,struct rdt_mon_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp)3189 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
3190 				struct rdt_mon_domain *d,
3191 				struct rdt_resource *r, struct rdtgroup *prgrp)
3192 {
3193 	struct kernfs_node *kn, *ckn;
3194 	char name[32];
3195 	bool snc_mode;
3196 	int ret = 0;
3197 
3198 	lockdep_assert_held(&rdtgroup_mutex);
3199 
3200 	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3201 	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3202 	kn = kernfs_find_and_get(parent_kn, name);
3203 	if (kn) {
3204 		/*
3205 		 * rdtgroup_mutex will prevent this directory from being
3206 		 * removed. No need to keep this hold.
3207 		 */
3208 		kernfs_put(kn);
3209 	} else {
3210 		kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
3211 		if (IS_ERR(kn))
3212 			return PTR_ERR(kn);
3213 
3214 		ret = rdtgroup_kn_set_ugid(kn);
3215 		if (ret)
3216 			goto out_destroy;
3217 		ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
3218 		if (ret)
3219 			goto out_destroy;
3220 	}
3221 
3222 	if (snc_mode) {
3223 		sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
3224 		ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
3225 		if (IS_ERR(ckn)) {
3226 			ret = -EINVAL;
3227 			goto out_destroy;
3228 		}
3229 
3230 		ret = rdtgroup_kn_set_ugid(ckn);
3231 		if (ret)
3232 			goto out_destroy;
3233 
3234 		ret = mon_add_all_files(ckn, d, r, prgrp, false);
3235 		if (ret)
3236 			goto out_destroy;
3237 	}
3238 
3239 	kernfs_activate(kn);
3240 	return 0;
3241 
3242 out_destroy:
3243 	kernfs_remove(kn);
3244 	return ret;
3245 }
3246 
3247 /*
3248  * Add all subdirectories of mon_data for "ctrl_mon" groups
3249  * and "monitor" groups with given domain id.
3250  */
mkdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_mon_domain * d)3251 static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3252 					   struct rdt_mon_domain *d)
3253 {
3254 	struct kernfs_node *parent_kn;
3255 	struct rdtgroup *prgrp, *crgrp;
3256 	struct list_head *head;
3257 
3258 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3259 		parent_kn = prgrp->mon.mon_data_kn;
3260 		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
3261 
3262 		head = &prgrp->mon.crdtgrp_list;
3263 		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
3264 			parent_kn = crgrp->mon.mon_data_kn;
3265 			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
3266 		}
3267 	}
3268 }
3269 
mkdir_mondata_subdir_alldom(struct kernfs_node * parent_kn,struct rdt_resource * r,struct rdtgroup * prgrp)3270 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
3271 				       struct rdt_resource *r,
3272 				       struct rdtgroup *prgrp)
3273 {
3274 	struct rdt_mon_domain *dom;
3275 	int ret;
3276 
3277 	/* Walking r->domains, ensure it can't race with cpuhp */
3278 	lockdep_assert_cpus_held();
3279 
3280 	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
3281 		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
3282 		if (ret)
3283 			return ret;
3284 	}
3285 
3286 	return 0;
3287 }
3288 
3289 /*
3290  * This creates a directory mon_data which contains the monitored data.
3291  *
3292  * mon_data has one directory for each domain which are named
3293  * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
3294  * with L3 domain looks as below:
3295  * ./mon_data:
3296  * mon_L3_00
3297  * mon_L3_01
3298  * mon_L3_02
3299  * ...
3300  *
3301  * Each domain directory has one file per event:
3302  * ./mon_L3_00/:
3303  * llc_occupancy
3304  *
3305  */
mkdir_mondata_all(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,struct kernfs_node ** dest_kn)3306 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
3307 			     struct rdtgroup *prgrp,
3308 			     struct kernfs_node **dest_kn)
3309 {
3310 	struct rdt_resource *r;
3311 	struct kernfs_node *kn;
3312 	int ret;
3313 
3314 	/*
3315 	 * Create the mon_data directory first.
3316 	 */
3317 	ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
3318 	if (ret)
3319 		return ret;
3320 
3321 	if (dest_kn)
3322 		*dest_kn = kn;
3323 
3324 	/*
3325 	 * Create the subdirectories for each domain. Note that all events
3326 	 * in a domain like L3 are grouped into a resource whose domain is L3
3327 	 */
3328 	for_each_mon_capable_rdt_resource(r) {
3329 		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
3330 		if (ret)
3331 			goto out_destroy;
3332 	}
3333 
3334 	return 0;
3335 
3336 out_destroy:
3337 	kernfs_remove(kn);
3338 	return ret;
3339 }
3340 
3341 /**
3342  * cbm_ensure_valid - Enforce validity on provided CBM
3343  * @_val:	Candidate CBM
3344  * @r:		RDT resource to which the CBM belongs
3345  *
3346  * The provided CBM represents all cache portions available for use. This
3347  * may be represented by a bitmap that does not consist of contiguous ones
3348  * and thus be an invalid CBM.
3349  * Here the provided CBM is forced to be a valid CBM by only considering
3350  * the first set of contiguous bits as valid and clearing all bits.
3351  * The intention here is to provide a valid default CBM with which a new
3352  * resource group is initialized. The user can follow this with a
3353  * modification to the CBM if the default does not satisfy the
3354  * requirements.
3355  */
cbm_ensure_valid(u32 _val,struct rdt_resource * r)3356 static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
3357 {
3358 	unsigned int cbm_len = r->cache.cbm_len;
3359 	unsigned long first_bit, zero_bit;
3360 	unsigned long val = _val;
3361 
3362 	if (!val)
3363 		return 0;
3364 
3365 	first_bit = find_first_bit(&val, cbm_len);
3366 	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
3367 
3368 	/* Clear any remaining bits to ensure contiguous region */
3369 	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
3370 	return (u32)val;
3371 }
3372 
3373 /*
3374  * Initialize cache resources per RDT domain
3375  *
3376  * Set the RDT domain up to start off with all usable allocations. That is,
3377  * all shareable and unused bits. All-zero CBM is invalid.
3378  */
__init_one_rdt_domain(struct rdt_ctrl_domain * d,struct resctrl_schema * s,u32 closid)3379 static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
3380 				 u32 closid)
3381 {
3382 	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
3383 	enum resctrl_conf_type t = s->conf_type;
3384 	struct resctrl_staged_config *cfg;
3385 	struct rdt_resource *r = s->res;
3386 	u32 used_b = 0, unused_b = 0;
3387 	unsigned long tmp_cbm;
3388 	enum rdtgrp_mode mode;
3389 	u32 peer_ctl, ctrl_val;
3390 	int i;
3391 
3392 	cfg = &d->staged_config[t];
3393 	cfg->have_new_ctrl = false;
3394 	cfg->new_ctrl = r->cache.shareable_bits;
3395 	used_b = r->cache.shareable_bits;
3396 	for (i = 0; i < closids_supported(); i++) {
3397 		if (closid_allocated(i) && i != closid) {
3398 			mode = rdtgroup_mode_by_closid(i);
3399 			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
3400 				/*
3401 				 * ctrl values for locksetup aren't relevant
3402 				 * until the schemata is written, and the mode
3403 				 * becomes RDT_MODE_PSEUDO_LOCKED.
3404 				 */
3405 				continue;
3406 			/*
3407 			 * If CDP is active include peer domain's
3408 			 * usage to ensure there is no overlap
3409 			 * with an exclusive group.
3410 			 */
3411 			if (resctrl_arch_get_cdp_enabled(r->rid))
3412 				peer_ctl = resctrl_arch_get_config(r, d, i,
3413 								   peer_type);
3414 			else
3415 				peer_ctl = 0;
3416 			ctrl_val = resctrl_arch_get_config(r, d, i,
3417 							   s->conf_type);
3418 			used_b |= ctrl_val | peer_ctl;
3419 			if (mode == RDT_MODE_SHAREABLE)
3420 				cfg->new_ctrl |= ctrl_val | peer_ctl;
3421 		}
3422 	}
3423 	if (d->plr && d->plr->cbm > 0)
3424 		used_b |= d->plr->cbm;
3425 	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
3426 	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
3427 	cfg->new_ctrl |= unused_b;
3428 	/*
3429 	 * Force the initial CBM to be valid, user can
3430 	 * modify the CBM based on system availability.
3431 	 */
3432 	cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
3433 	/*
3434 	 * Assign the u32 CBM to an unsigned long to ensure that
3435 	 * bitmap_weight() does not access out-of-bound memory.
3436 	 */
3437 	tmp_cbm = cfg->new_ctrl;
3438 	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
3439 		rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
3440 		return -ENOSPC;
3441 	}
3442 	cfg->have_new_ctrl = true;
3443 
3444 	return 0;
3445 }
3446 
3447 /*
3448  * Initialize cache resources with default values.
3449  *
3450  * A new RDT group is being created on an allocation capable (CAT)
3451  * supporting system. Set this group up to start off with all usable
3452  * allocations.
3453  *
3454  * If there are no more shareable bits available on any domain then
3455  * the entire allocation will fail.
3456  */
rdtgroup_init_cat(struct resctrl_schema * s,u32 closid)3457 static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
3458 {
3459 	struct rdt_ctrl_domain *d;
3460 	int ret;
3461 
3462 	list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
3463 		ret = __init_one_rdt_domain(d, s, closid);
3464 		if (ret < 0)
3465 			return ret;
3466 	}
3467 
3468 	return 0;
3469 }
3470 
3471 /* Initialize MBA resource with default values. */
rdtgroup_init_mba(struct rdt_resource * r,u32 closid)3472 static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
3473 {
3474 	struct resctrl_staged_config *cfg;
3475 	struct rdt_ctrl_domain *d;
3476 
3477 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
3478 		if (is_mba_sc(r)) {
3479 			d->mbps_val[closid] = MBA_MAX_MBPS;
3480 			continue;
3481 		}
3482 
3483 		cfg = &d->staged_config[CDP_NONE];
3484 		cfg->new_ctrl = resctrl_get_default_ctrl(r);
3485 		cfg->have_new_ctrl = true;
3486 	}
3487 }
3488 
3489 /* Initialize the RDT group's allocations. */
rdtgroup_init_alloc(struct rdtgroup * rdtgrp)3490 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
3491 {
3492 	struct resctrl_schema *s;
3493 	struct rdt_resource *r;
3494 	int ret = 0;
3495 
3496 	rdt_staged_configs_clear();
3497 
3498 	list_for_each_entry(s, &resctrl_schema_all, list) {
3499 		r = s->res;
3500 		if (r->rid == RDT_RESOURCE_MBA ||
3501 		    r->rid == RDT_RESOURCE_SMBA) {
3502 			rdtgroup_init_mba(r, rdtgrp->closid);
3503 			if (is_mba_sc(r))
3504 				continue;
3505 		} else {
3506 			ret = rdtgroup_init_cat(s, rdtgrp->closid);
3507 			if (ret < 0)
3508 				goto out;
3509 		}
3510 
3511 		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
3512 		if (ret < 0) {
3513 			rdt_last_cmd_puts("Failed to initialize allocations\n");
3514 			goto out;
3515 		}
3516 
3517 	}
3518 
3519 	rdtgrp->mode = RDT_MODE_SHAREABLE;
3520 
3521 out:
3522 	rdt_staged_configs_clear();
3523 	return ret;
3524 }
3525 
mkdir_rdt_prepare_rmid_alloc(struct rdtgroup * rdtgrp)3526 static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
3527 {
3528 	int ret;
3529 
3530 	if (!resctrl_arch_mon_capable())
3531 		return 0;
3532 
3533 	ret = alloc_rmid(rdtgrp->closid);
3534 	if (ret < 0) {
3535 		rdt_last_cmd_puts("Out of RMIDs\n");
3536 		return ret;
3537 	}
3538 	rdtgrp->mon.rmid = ret;
3539 
3540 	ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
3541 	if (ret) {
3542 		rdt_last_cmd_puts("kernfs subdir error\n");
3543 		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3544 		return ret;
3545 	}
3546 
3547 	return 0;
3548 }
3549 
mkdir_rdt_prepare_rmid_free(struct rdtgroup * rgrp)3550 static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
3551 {
3552 	if (resctrl_arch_mon_capable())
3553 		free_rmid(rgrp->closid, rgrp->mon.rmid);
3554 }
3555 
3556 /*
3557  * We allow creating mon groups only with in a directory called "mon_groups"
3558  * which is present in every ctrl_mon group. Check if this is a valid
3559  * "mon_groups" directory.
3560  *
3561  * 1. The directory should be named "mon_groups".
3562  * 2. The mon group itself should "not" be named "mon_groups".
3563  *   This makes sure "mon_groups" directory always has a ctrl_mon group
3564  *   as parent.
3565  */
is_mon_groups(struct kernfs_node * kn,const char * name)3566 static bool is_mon_groups(struct kernfs_node *kn, const char *name)
3567 {
3568 	return (!strcmp(rdt_kn_name(kn), "mon_groups") &&
3569 		strcmp(name, "mon_groups"));
3570 }
3571 
mkdir_rdt_prepare(struct kernfs_node * parent_kn,const char * name,umode_t mode,enum rdt_group_type rtype,struct rdtgroup ** r)3572 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
3573 			     const char *name, umode_t mode,
3574 			     enum rdt_group_type rtype, struct rdtgroup **r)
3575 {
3576 	struct rdtgroup *prdtgrp, *rdtgrp;
3577 	unsigned long files = 0;
3578 	struct kernfs_node *kn;
3579 	int ret;
3580 
3581 	prdtgrp = rdtgroup_kn_lock_live(parent_kn);
3582 	if (!prdtgrp) {
3583 		ret = -ENODEV;
3584 		goto out_unlock;
3585 	}
3586 
3587 	/*
3588 	 * Check that the parent directory for a monitor group is a "mon_groups"
3589 	 * directory.
3590 	 */
3591 	if (rtype == RDTMON_GROUP && !is_mon_groups(parent_kn, name)) {
3592 		ret = -EPERM;
3593 		goto out_unlock;
3594 	}
3595 
3596 	if (rtype == RDTMON_GROUP &&
3597 	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3598 	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
3599 		ret = -EINVAL;
3600 		rdt_last_cmd_puts("Pseudo-locking in progress\n");
3601 		goto out_unlock;
3602 	}
3603 
3604 	/* allocate the rdtgroup. */
3605 	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
3606 	if (!rdtgrp) {
3607 		ret = -ENOSPC;
3608 		rdt_last_cmd_puts("Kernel out of memory\n");
3609 		goto out_unlock;
3610 	}
3611 	*r = rdtgrp;
3612 	rdtgrp->mon.parent = prdtgrp;
3613 	rdtgrp->type = rtype;
3614 	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
3615 
3616 	/* kernfs creates the directory for rdtgrp */
3617 	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
3618 	if (IS_ERR(kn)) {
3619 		ret = PTR_ERR(kn);
3620 		rdt_last_cmd_puts("kernfs create error\n");
3621 		goto out_free_rgrp;
3622 	}
3623 	rdtgrp->kn = kn;
3624 
3625 	/*
3626 	 * kernfs_remove() will drop the reference count on "kn" which
3627 	 * will free it. But we still need it to stick around for the
3628 	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
3629 	 * which will be dropped by kernfs_put() in rdtgroup_remove().
3630 	 */
3631 	kernfs_get(kn);
3632 
3633 	ret = rdtgroup_kn_set_ugid(kn);
3634 	if (ret) {
3635 		rdt_last_cmd_puts("kernfs perm error\n");
3636 		goto out_destroy;
3637 	}
3638 
3639 	if (rtype == RDTCTRL_GROUP) {
3640 		files = RFTYPE_BASE | RFTYPE_CTRL;
3641 		if (resctrl_arch_mon_capable())
3642 			files |= RFTYPE_MON;
3643 	} else {
3644 		files = RFTYPE_BASE | RFTYPE_MON;
3645 	}
3646 
3647 	ret = rdtgroup_add_files(kn, files);
3648 	if (ret) {
3649 		rdt_last_cmd_puts("kernfs fill error\n");
3650 		goto out_destroy;
3651 	}
3652 
3653 	/*
3654 	 * The caller unlocks the parent_kn upon success.
3655 	 */
3656 	return 0;
3657 
3658 out_destroy:
3659 	kernfs_put(rdtgrp->kn);
3660 	kernfs_remove(rdtgrp->kn);
3661 out_free_rgrp:
3662 	kfree(rdtgrp);
3663 out_unlock:
3664 	rdtgroup_kn_unlock(parent_kn);
3665 	return ret;
3666 }
3667 
mkdir_rdt_prepare_clean(struct rdtgroup * rgrp)3668 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
3669 {
3670 	kernfs_remove(rgrp->kn);
3671 	rdtgroup_remove(rgrp);
3672 }
3673 
3674 /*
3675  * Create a monitor group under "mon_groups" directory of a control
3676  * and monitor group(ctrl_mon). This is a resource group
3677  * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
3678  */
rdtgroup_mkdir_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3679 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
3680 			      const char *name, umode_t mode)
3681 {
3682 	struct rdtgroup *rdtgrp, *prgrp;
3683 	int ret;
3684 
3685 	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
3686 	if (ret)
3687 		return ret;
3688 
3689 	prgrp = rdtgrp->mon.parent;
3690 	rdtgrp->closid = prgrp->closid;
3691 
3692 	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3693 	if (ret) {
3694 		mkdir_rdt_prepare_clean(rdtgrp);
3695 		goto out_unlock;
3696 	}
3697 
3698 	kernfs_activate(rdtgrp->kn);
3699 
3700 	/*
3701 	 * Add the rdtgrp to the list of rdtgrps the parent
3702 	 * ctrl_mon group has to track.
3703 	 */
3704 	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
3705 
3706 out_unlock:
3707 	rdtgroup_kn_unlock(parent_kn);
3708 	return ret;
3709 }
3710 
3711 /*
3712  * These are rdtgroups created under the root directory. Can be used
3713  * to allocate and monitor resources.
3714  */
rdtgroup_mkdir_ctrl_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3715 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
3716 				   const char *name, umode_t mode)
3717 {
3718 	struct rdtgroup *rdtgrp;
3719 	struct kernfs_node *kn;
3720 	u32 closid;
3721 	int ret;
3722 
3723 	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
3724 	if (ret)
3725 		return ret;
3726 
3727 	kn = rdtgrp->kn;
3728 	ret = closid_alloc();
3729 	if (ret < 0) {
3730 		rdt_last_cmd_puts("Out of CLOSIDs\n");
3731 		goto out_common_fail;
3732 	}
3733 	closid = ret;
3734 	ret = 0;
3735 
3736 	rdtgrp->closid = closid;
3737 
3738 	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3739 	if (ret)
3740 		goto out_closid_free;
3741 
3742 	kernfs_activate(rdtgrp->kn);
3743 
3744 	ret = rdtgroup_init_alloc(rdtgrp);
3745 	if (ret < 0)
3746 		goto out_rmid_free;
3747 
3748 	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
3749 
3750 	if (resctrl_arch_mon_capable()) {
3751 		/*
3752 		 * Create an empty mon_groups directory to hold the subset
3753 		 * of tasks and cpus to monitor.
3754 		 */
3755 		ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
3756 		if (ret) {
3757 			rdt_last_cmd_puts("kernfs subdir error\n");
3758 			goto out_del_list;
3759 		}
3760 		if (is_mba_sc(NULL))
3761 			rdtgrp->mba_mbps_event = mba_mbps_default_event;
3762 	}
3763 
3764 	goto out_unlock;
3765 
3766 out_del_list:
3767 	list_del(&rdtgrp->rdtgroup_list);
3768 out_rmid_free:
3769 	mkdir_rdt_prepare_rmid_free(rdtgrp);
3770 out_closid_free:
3771 	closid_free(closid);
3772 out_common_fail:
3773 	mkdir_rdt_prepare_clean(rdtgrp);
3774 out_unlock:
3775 	rdtgroup_kn_unlock(parent_kn);
3776 	return ret;
3777 }
3778 
rdtgroup_mkdir(struct kernfs_node * parent_kn,const char * name,umode_t mode)3779 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
3780 			  umode_t mode)
3781 {
3782 	/* Do not accept '\n' to avoid unparsable situation. */
3783 	if (strchr(name, '\n'))
3784 		return -EINVAL;
3785 
3786 	/*
3787 	 * If the parent directory is the root directory and RDT
3788 	 * allocation is supported, add a control and monitoring
3789 	 * subdirectory
3790 	 */
3791 	if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
3792 		return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
3793 
3794 	/* Else, attempt to add a monitoring subdirectory. */
3795 	if (resctrl_arch_mon_capable())
3796 		return rdtgroup_mkdir_mon(parent_kn, name, mode);
3797 
3798 	return -EPERM;
3799 }
3800 
rdtgroup_rmdir_mon(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3801 static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3802 {
3803 	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3804 	u32 closid, rmid;
3805 	int cpu;
3806 
3807 	/* Give any tasks back to the parent group */
3808 	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
3809 
3810 	/*
3811 	 * Update per cpu closid/rmid of the moved CPUs first.
3812 	 * Note: the closid will not change, but the arch code still needs it.
3813 	 */
3814 	closid = prdtgrp->closid;
3815 	rmid = prdtgrp->mon.rmid;
3816 	for_each_cpu(cpu, &rdtgrp->cpu_mask)
3817 		resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
3818 
3819 	/*
3820 	 * Update the MSR on moved CPUs and CPUs which have moved
3821 	 * task running on them.
3822 	 */
3823 	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3824 	update_closid_rmid(tmpmask, NULL);
3825 
3826 	rdtgrp->flags = RDT_DELETED;
3827 	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3828 
3829 	/*
3830 	 * Remove the rdtgrp from the parent ctrl_mon group's list
3831 	 */
3832 	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3833 	list_del(&rdtgrp->mon.crdtgrp_list);
3834 
3835 	kernfs_remove(rdtgrp->kn);
3836 
3837 	return 0;
3838 }
3839 
rdtgroup_ctrl_remove(struct rdtgroup * rdtgrp)3840 static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
3841 {
3842 	rdtgrp->flags = RDT_DELETED;
3843 	list_del(&rdtgrp->rdtgroup_list);
3844 
3845 	kernfs_remove(rdtgrp->kn);
3846 	return 0;
3847 }
3848 
rdtgroup_rmdir_ctrl(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3849 static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3850 {
3851 	u32 closid, rmid;
3852 	int cpu;
3853 
3854 	/* Give any tasks back to the default group */
3855 	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
3856 
3857 	/* Give any CPUs back to the default group */
3858 	cpumask_or(&rdtgroup_default.cpu_mask,
3859 		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3860 
3861 	/* Update per cpu closid and rmid of the moved CPUs first */
3862 	closid = rdtgroup_default.closid;
3863 	rmid = rdtgroup_default.mon.rmid;
3864 	for_each_cpu(cpu, &rdtgrp->cpu_mask)
3865 		resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
3866 
3867 	/*
3868 	 * Update the MSR on moved CPUs and CPUs which have moved
3869 	 * task running on them.
3870 	 */
3871 	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3872 	update_closid_rmid(tmpmask, NULL);
3873 
3874 	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3875 	closid_free(rdtgrp->closid);
3876 
3877 	rdtgroup_ctrl_remove(rdtgrp);
3878 
3879 	/*
3880 	 * Free all the child monitor group rmids.
3881 	 */
3882 	free_all_child_rdtgrp(rdtgrp);
3883 
3884 	return 0;
3885 }
3886 
rdt_kn_parent(struct kernfs_node * kn)3887 static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn)
3888 {
3889 	/*
3890 	 * Valid within the RCU section it was obtained or while rdtgroup_mutex
3891 	 * is held.
3892 	 */
3893 	return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex));
3894 }
3895 
rdtgroup_rmdir(struct kernfs_node * kn)3896 static int rdtgroup_rmdir(struct kernfs_node *kn)
3897 {
3898 	struct kernfs_node *parent_kn;
3899 	struct rdtgroup *rdtgrp;
3900 	cpumask_var_t tmpmask;
3901 	int ret = 0;
3902 
3903 	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
3904 		return -ENOMEM;
3905 
3906 	rdtgrp = rdtgroup_kn_lock_live(kn);
3907 	if (!rdtgrp) {
3908 		ret = -EPERM;
3909 		goto out;
3910 	}
3911 	parent_kn = rdt_kn_parent(kn);
3912 
3913 	/*
3914 	 * If the rdtgroup is a ctrl_mon group and parent directory
3915 	 * is the root directory, remove the ctrl_mon group.
3916 	 *
3917 	 * If the rdtgroup is a mon group and parent directory
3918 	 * is a valid "mon_groups" directory, remove the mon group.
3919 	 */
3920 	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
3921 	    rdtgrp != &rdtgroup_default) {
3922 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3923 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
3924 			ret = rdtgroup_ctrl_remove(rdtgrp);
3925 		} else {
3926 			ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
3927 		}
3928 	} else if (rdtgrp->type == RDTMON_GROUP &&
3929 		 is_mon_groups(parent_kn, rdt_kn_name(kn))) {
3930 		ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
3931 	} else {
3932 		ret = -EPERM;
3933 	}
3934 
3935 out:
3936 	rdtgroup_kn_unlock(kn);
3937 	free_cpumask_var(tmpmask);
3938 	return ret;
3939 }
3940 
3941 /**
3942  * mongrp_reparent() - replace parent CTRL_MON group of a MON group
3943  * @rdtgrp:		the MON group whose parent should be replaced
3944  * @new_prdtgrp:	replacement parent CTRL_MON group for @rdtgrp
3945  * @cpus:		cpumask provided by the caller for use during this call
3946  *
3947  * Replaces the parent CTRL_MON group for a MON group, resulting in all member
3948  * tasks' CLOSID immediately changing to that of the new parent group.
3949  * Monitoring data for the group is unaffected by this operation.
3950  */
mongrp_reparent(struct rdtgroup * rdtgrp,struct rdtgroup * new_prdtgrp,cpumask_var_t cpus)3951 static void mongrp_reparent(struct rdtgroup *rdtgrp,
3952 			    struct rdtgroup *new_prdtgrp,
3953 			    cpumask_var_t cpus)
3954 {
3955 	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3956 
3957 	WARN_ON(rdtgrp->type != RDTMON_GROUP);
3958 	WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
3959 
3960 	/* Nothing to do when simply renaming a MON group. */
3961 	if (prdtgrp == new_prdtgrp)
3962 		return;
3963 
3964 	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3965 	list_move_tail(&rdtgrp->mon.crdtgrp_list,
3966 		       &new_prdtgrp->mon.crdtgrp_list);
3967 
3968 	rdtgrp->mon.parent = new_prdtgrp;
3969 	rdtgrp->closid = new_prdtgrp->closid;
3970 
3971 	/* Propagate updated closid to all tasks in this group. */
3972 	rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
3973 
3974 	update_closid_rmid(cpus, NULL);
3975 }
3976 
rdtgroup_rename(struct kernfs_node * kn,struct kernfs_node * new_parent,const char * new_name)3977 static int rdtgroup_rename(struct kernfs_node *kn,
3978 			   struct kernfs_node *new_parent, const char *new_name)
3979 {
3980 	struct kernfs_node *kn_parent;
3981 	struct rdtgroup *new_prdtgrp;
3982 	struct rdtgroup *rdtgrp;
3983 	cpumask_var_t tmpmask;
3984 	int ret;
3985 
3986 	rdtgrp = kernfs_to_rdtgroup(kn);
3987 	new_prdtgrp = kernfs_to_rdtgroup(new_parent);
3988 	if (!rdtgrp || !new_prdtgrp)
3989 		return -ENOENT;
3990 
3991 	/* Release both kernfs active_refs before obtaining rdtgroup mutex. */
3992 	rdtgroup_kn_get(rdtgrp, kn);
3993 	rdtgroup_kn_get(new_prdtgrp, new_parent);
3994 
3995 	mutex_lock(&rdtgroup_mutex);
3996 
3997 	rdt_last_cmd_clear();
3998 
3999 	/*
4000 	 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
4001 	 * either kernfs_node is a file.
4002 	 */
4003 	if (kernfs_type(kn) != KERNFS_DIR ||
4004 	    kernfs_type(new_parent) != KERNFS_DIR) {
4005 		rdt_last_cmd_puts("Source and destination must be directories");
4006 		ret = -EPERM;
4007 		goto out;
4008 	}
4009 
4010 	if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
4011 		ret = -ENOENT;
4012 		goto out;
4013 	}
4014 
4015 	kn_parent = rdt_kn_parent(kn);
4016 	if (rdtgrp->type != RDTMON_GROUP || !kn_parent ||
4017 	    !is_mon_groups(kn_parent, rdt_kn_name(kn))) {
4018 		rdt_last_cmd_puts("Source must be a MON group\n");
4019 		ret = -EPERM;
4020 		goto out;
4021 	}
4022 
4023 	if (!is_mon_groups(new_parent, new_name)) {
4024 		rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
4025 		ret = -EPERM;
4026 		goto out;
4027 	}
4028 
4029 	/*
4030 	 * If the MON group is monitoring CPUs, the CPUs must be assigned to the
4031 	 * current parent CTRL_MON group and therefore cannot be assigned to
4032 	 * the new parent, making the move illegal.
4033 	 */
4034 	if (!cpumask_empty(&rdtgrp->cpu_mask) &&
4035 	    rdtgrp->mon.parent != new_prdtgrp) {
4036 		rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
4037 		ret = -EPERM;
4038 		goto out;
4039 	}
4040 
4041 	/*
4042 	 * Allocate the cpumask for use in mongrp_reparent() to avoid the
4043 	 * possibility of failing to allocate it after kernfs_rename() has
4044 	 * succeeded.
4045 	 */
4046 	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
4047 		ret = -ENOMEM;
4048 		goto out;
4049 	}
4050 
4051 	/*
4052 	 * Perform all input validation and allocations needed to ensure
4053 	 * mongrp_reparent() will succeed before calling kernfs_rename(),
4054 	 * otherwise it would be necessary to revert this call if
4055 	 * mongrp_reparent() failed.
4056 	 */
4057 	ret = kernfs_rename(kn, new_parent, new_name);
4058 	if (!ret)
4059 		mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
4060 
4061 	free_cpumask_var(tmpmask);
4062 
4063 out:
4064 	mutex_unlock(&rdtgroup_mutex);
4065 	rdtgroup_kn_put(rdtgrp, kn);
4066 	rdtgroup_kn_put(new_prdtgrp, new_parent);
4067 	return ret;
4068 }
4069 
rdtgroup_show_options(struct seq_file * seq,struct kernfs_root * kf)4070 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
4071 {
4072 	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
4073 		seq_puts(seq, ",cdp");
4074 
4075 	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
4076 		seq_puts(seq, ",cdpl2");
4077 
4078 	if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA)))
4079 		seq_puts(seq, ",mba_MBps");
4080 
4081 	if (resctrl_debug)
4082 		seq_puts(seq, ",debug");
4083 
4084 	return 0;
4085 }
4086 
4087 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
4088 	.mkdir		= rdtgroup_mkdir,
4089 	.rmdir		= rdtgroup_rmdir,
4090 	.rename		= rdtgroup_rename,
4091 	.show_options	= rdtgroup_show_options,
4092 };
4093 
rdtgroup_setup_root(struct rdt_fs_context * ctx)4094 static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
4095 {
4096 	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
4097 				      KERNFS_ROOT_CREATE_DEACTIVATED |
4098 				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
4099 				      &rdtgroup_default);
4100 	if (IS_ERR(rdt_root))
4101 		return PTR_ERR(rdt_root);
4102 
4103 	ctx->kfc.root = rdt_root;
4104 	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
4105 
4106 	return 0;
4107 }
4108 
rdtgroup_destroy_root(void)4109 static void rdtgroup_destroy_root(void)
4110 {
4111 	kernfs_destroy_root(rdt_root);
4112 	rdtgroup_default.kn = NULL;
4113 }
4114 
rdtgroup_setup_default(void)4115 static void __init rdtgroup_setup_default(void)
4116 {
4117 	mutex_lock(&rdtgroup_mutex);
4118 
4119 	rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
4120 	rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
4121 	rdtgroup_default.type = RDTCTRL_GROUP;
4122 	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
4123 
4124 	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
4125 
4126 	mutex_unlock(&rdtgroup_mutex);
4127 }
4128 
domain_destroy_mon_state(struct rdt_mon_domain * d)4129 static void domain_destroy_mon_state(struct rdt_mon_domain *d)
4130 {
4131 	bitmap_free(d->rmid_busy_llc);
4132 	kfree(d->mbm_total);
4133 	kfree(d->mbm_local);
4134 }
4135 
resctrl_offline_ctrl_domain(struct rdt_resource * r,struct rdt_ctrl_domain * d)4136 void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
4137 {
4138 	mutex_lock(&rdtgroup_mutex);
4139 
4140 	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
4141 		mba_sc_domain_destroy(r, d);
4142 
4143 	mutex_unlock(&rdtgroup_mutex);
4144 }
4145 
resctrl_offline_mon_domain(struct rdt_resource * r,struct rdt_mon_domain * d)4146 void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4147 {
4148 	mutex_lock(&rdtgroup_mutex);
4149 
4150 	/*
4151 	 * If resctrl is mounted, remove all the
4152 	 * per domain monitor data directories.
4153 	 */
4154 	if (resctrl_mounted && resctrl_arch_mon_capable())
4155 		rmdir_mondata_subdir_allrdtgrp(r, d);
4156 
4157 	if (resctrl_is_mbm_enabled())
4158 		cancel_delayed_work(&d->mbm_over);
4159 	if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
4160 		/*
4161 		 * When a package is going down, forcefully
4162 		 * decrement rmid->ebusy. There is no way to know
4163 		 * that the L3 was flushed and hence may lead to
4164 		 * incorrect counts in rare scenarios, but leaving
4165 		 * the RMID as busy creates RMID leaks if the
4166 		 * package never comes back.
4167 		 */
4168 		__check_limbo(d, true);
4169 		cancel_delayed_work(&d->cqm_limbo);
4170 	}
4171 
4172 	domain_destroy_mon_state(d);
4173 
4174 	mutex_unlock(&rdtgroup_mutex);
4175 }
4176 
4177 /**
4178  * domain_setup_mon_state() -  Initialise domain monitoring structures.
4179  * @r:	The resource for the newly online domain.
4180  * @d:	The newly online domain.
4181  *
4182  * Allocate monitor resources that belong to this domain.
4183  * Called when the first CPU of a domain comes online, regardless of whether
4184  * the filesystem is mounted.
4185  * During boot this may be called before global allocations have been made by
4186  * resctrl_mon_resource_init().
4187  *
4188  * Returns 0 for success, or -ENOMEM.
4189  */
domain_setup_mon_state(struct rdt_resource * r,struct rdt_mon_domain * d)4190 static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
4191 {
4192 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
4193 	size_t tsize;
4194 
4195 	if (resctrl_arch_is_llc_occupancy_enabled()) {
4196 		d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
4197 		if (!d->rmid_busy_llc)
4198 			return -ENOMEM;
4199 	}
4200 	if (resctrl_arch_is_mbm_total_enabled()) {
4201 		tsize = sizeof(*d->mbm_total);
4202 		d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
4203 		if (!d->mbm_total) {
4204 			bitmap_free(d->rmid_busy_llc);
4205 			return -ENOMEM;
4206 		}
4207 	}
4208 	if (resctrl_arch_is_mbm_local_enabled()) {
4209 		tsize = sizeof(*d->mbm_local);
4210 		d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
4211 		if (!d->mbm_local) {
4212 			bitmap_free(d->rmid_busy_llc);
4213 			kfree(d->mbm_total);
4214 			return -ENOMEM;
4215 		}
4216 	}
4217 
4218 	return 0;
4219 }
4220 
resctrl_online_ctrl_domain(struct rdt_resource * r,struct rdt_ctrl_domain * d)4221 int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
4222 {
4223 	int err = 0;
4224 
4225 	mutex_lock(&rdtgroup_mutex);
4226 
4227 	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
4228 		/* RDT_RESOURCE_MBA is never mon_capable */
4229 		err = mba_sc_domain_allocate(r, d);
4230 	}
4231 
4232 	mutex_unlock(&rdtgroup_mutex);
4233 
4234 	return err;
4235 }
4236 
resctrl_online_mon_domain(struct rdt_resource * r,struct rdt_mon_domain * d)4237 int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4238 {
4239 	int err;
4240 
4241 	mutex_lock(&rdtgroup_mutex);
4242 
4243 	err = domain_setup_mon_state(r, d);
4244 	if (err)
4245 		goto out_unlock;
4246 
4247 	if (resctrl_is_mbm_enabled()) {
4248 		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
4249 		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
4250 					   RESCTRL_PICK_ANY_CPU);
4251 	}
4252 
4253 	if (resctrl_arch_is_llc_occupancy_enabled())
4254 		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
4255 
4256 	/*
4257 	 * If the filesystem is not mounted then only the default resource group
4258 	 * exists. Creation of its directories is deferred until mount time
4259 	 * by rdt_get_tree() calling mkdir_mondata_all().
4260 	 * If resctrl is mounted, add per domain monitor data directories.
4261 	 */
4262 	if (resctrl_mounted && resctrl_arch_mon_capable())
4263 		mkdir_mondata_subdir_allrdtgrp(r, d);
4264 
4265 out_unlock:
4266 	mutex_unlock(&rdtgroup_mutex);
4267 
4268 	return err;
4269 }
4270 
resctrl_online_cpu(unsigned int cpu)4271 void resctrl_online_cpu(unsigned int cpu)
4272 {
4273 	mutex_lock(&rdtgroup_mutex);
4274 	/* The CPU is set in default rdtgroup after online. */
4275 	cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
4276 	mutex_unlock(&rdtgroup_mutex);
4277 }
4278 
clear_childcpus(struct rdtgroup * r,unsigned int cpu)4279 static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
4280 {
4281 	struct rdtgroup *cr;
4282 
4283 	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
4284 		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
4285 			break;
4286 	}
4287 }
4288 
get_mon_domain_from_cpu(int cpu,struct rdt_resource * r)4289 static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu,
4290 						      struct rdt_resource *r)
4291 {
4292 	struct rdt_mon_domain *d;
4293 
4294 	lockdep_assert_cpus_held();
4295 
4296 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
4297 		/* Find the domain that contains this CPU */
4298 		if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
4299 			return d;
4300 	}
4301 
4302 	return NULL;
4303 }
4304 
resctrl_offline_cpu(unsigned int cpu)4305 void resctrl_offline_cpu(unsigned int cpu)
4306 {
4307 	struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
4308 	struct rdt_mon_domain *d;
4309 	struct rdtgroup *rdtgrp;
4310 
4311 	mutex_lock(&rdtgroup_mutex);
4312 	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
4313 		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
4314 			clear_childcpus(rdtgrp, cpu);
4315 			break;
4316 		}
4317 	}
4318 
4319 	if (!l3->mon_capable)
4320 		goto out_unlock;
4321 
4322 	d = get_mon_domain_from_cpu(cpu, l3);
4323 	if (d) {
4324 		if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) {
4325 			cancel_delayed_work(&d->mbm_over);
4326 			mbm_setup_overflow_handler(d, 0, cpu);
4327 		}
4328 		if (resctrl_arch_is_llc_occupancy_enabled() &&
4329 		    cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
4330 			cancel_delayed_work(&d->cqm_limbo);
4331 			cqm_setup_limbo_handler(d, 0, cpu);
4332 		}
4333 	}
4334 
4335 out_unlock:
4336 	mutex_unlock(&rdtgroup_mutex);
4337 }
4338 
4339 /*
4340  * resctrl_init - resctrl filesystem initialization
4341  *
4342  * Setup resctrl file system including set up root, create mount point,
4343  * register resctrl filesystem, and initialize files under root directory.
4344  *
4345  * Return: 0 on success or -errno
4346  */
resctrl_init(void)4347 int __init resctrl_init(void)
4348 {
4349 	int ret = 0;
4350 
4351 	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
4352 		     sizeof(last_cmd_status_buf));
4353 
4354 	rdtgroup_setup_default();
4355 
4356 	thread_throttle_mode_init();
4357 
4358 	ret = resctrl_mon_resource_init();
4359 	if (ret)
4360 		return ret;
4361 
4362 	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
4363 	if (ret) {
4364 		resctrl_mon_resource_exit();
4365 		return ret;
4366 	}
4367 
4368 	ret = register_filesystem(&rdt_fs_type);
4369 	if (ret)
4370 		goto cleanup_mountpoint;
4371 
4372 	/*
4373 	 * Adding the resctrl debugfs directory here may not be ideal since
4374 	 * it would let the resctrl debugfs directory appear on the debugfs
4375 	 * filesystem before the resctrl filesystem is mounted.
4376 	 * It may also be ok since that would enable debugging of RDT before
4377 	 * resctrl is mounted.
4378 	 * The reason why the debugfs directory is created here and not in
4379 	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
4380 	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
4381 	 * (the lockdep class of inode->i_rwsem). Other filesystem
4382 	 * interactions (eg. SyS_getdents) have the lock ordering:
4383 	 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
4384 	 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
4385 	 * is taken, thus creating dependency:
4386 	 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
4387 	 * issues considering the other two lock dependencies.
4388 	 * By creating the debugfs directory here we avoid a dependency
4389 	 * that may cause deadlock (even though file operations cannot
4390 	 * occur until the filesystem is mounted, but I do not know how to
4391 	 * tell lockdep that).
4392 	 */
4393 	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
4394 
4395 	return 0;
4396 
4397 cleanup_mountpoint:
4398 	sysfs_remove_mount_point(fs_kobj, "resctrl");
4399 	resctrl_mon_resource_exit();
4400 
4401 	return ret;
4402 }
4403 
resctrl_exit(void)4404 void __exit resctrl_exit(void)
4405 {
4406 	debugfs_remove_recursive(debugfs_resctrl);
4407 	unregister_filesystem(&rdt_fs_type);
4408 	sysfs_remove_mount_point(fs_kobj, "resctrl");
4409 
4410 	resctrl_mon_resource_exit();
4411 }
4412