1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * User interface for Resource Allocation in Resource Director Technology(RDT)
4 *
5 * Copyright (C) 2016 Intel Corporation
6 *
7 * Author: Fenghua Yu <fenghua.yu@intel.com>
8 *
9 * More information about RDT be found in the Intel (R) x86 Architecture
10 * Software Developer Manual.
11 */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/cpu.h>
16 #include <linux/debugfs.h>
17 #include <linux/fs.h>
18 #include <linux/fs_parser.h>
19 #include <linux/sysfs.h>
20 #include <linux/kernfs.h>
21 #include <linux/seq_buf.h>
22 #include <linux/seq_file.h>
23 #include <linux/sched/signal.h>
24 #include <linux/sched/task.h>
25 #include <linux/slab.h>
26 #include <linux/task_work.h>
27 #include <linux/user_namespace.h>
28
29 #include <uapi/linux/magic.h>
30
31 #include <asm/resctrl.h>
32 #include "internal.h"
33
34 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
35 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
36 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
37
38 /* Mutex to protect rdtgroup access. */
39 DEFINE_MUTEX(rdtgroup_mutex);
40
41 static struct kernfs_root *rdt_root;
42 struct rdtgroup rdtgroup_default;
43 LIST_HEAD(rdt_all_groups);
44
45 /* list of entries for the schemata file */
46 LIST_HEAD(resctrl_schema_all);
47
48 /* The filesystem can only be mounted once. */
49 bool resctrl_mounted;
50
51 /* Kernel fs node for "info" directory under root */
52 static struct kernfs_node *kn_info;
53
54 /* Kernel fs node for "mon_groups" directory under root */
55 static struct kernfs_node *kn_mongrp;
56
57 /* Kernel fs node for "mon_data" directory under root */
58 static struct kernfs_node *kn_mondata;
59
60 /*
61 * Used to store the max resource name width to display the schemata names in
62 * a tabular format.
63 */
64 int max_name_width;
65
66 static struct seq_buf last_cmd_status;
67 static char last_cmd_status_buf[512];
68
69 static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
70 static void rdtgroup_destroy_root(void);
71
72 struct dentry *debugfs_resctrl;
73
74 /*
75 * Memory bandwidth monitoring event to use for the default CTRL_MON group
76 * and each new CTRL_MON group created by the user. Only relevant when
77 * the filesystem is mounted with the "mba_MBps" option so it does not
78 * matter that it remains uninitialized on systems that do not support
79 * the "mba_MBps" option.
80 */
81 enum resctrl_event_id mba_mbps_default_event;
82
83 static bool resctrl_debug;
84
rdt_last_cmd_clear(void)85 void rdt_last_cmd_clear(void)
86 {
87 lockdep_assert_held(&rdtgroup_mutex);
88 seq_buf_clear(&last_cmd_status);
89 }
90
rdt_last_cmd_puts(const char * s)91 void rdt_last_cmd_puts(const char *s)
92 {
93 lockdep_assert_held(&rdtgroup_mutex);
94 seq_buf_puts(&last_cmd_status, s);
95 }
96
rdt_last_cmd_printf(const char * fmt,...)97 void rdt_last_cmd_printf(const char *fmt, ...)
98 {
99 va_list ap;
100
101 va_start(ap, fmt);
102 lockdep_assert_held(&rdtgroup_mutex);
103 seq_buf_vprintf(&last_cmd_status, fmt, ap);
104 va_end(ap);
105 }
106
rdt_staged_configs_clear(void)107 void rdt_staged_configs_clear(void)
108 {
109 struct rdt_ctrl_domain *dom;
110 struct rdt_resource *r;
111
112 lockdep_assert_held(&rdtgroup_mutex);
113
114 for_each_alloc_capable_rdt_resource(r) {
115 list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
116 memset(dom->staged_config, 0, sizeof(dom->staged_config));
117 }
118 }
119
resctrl_is_mbm_enabled(void)120 static bool resctrl_is_mbm_enabled(void)
121 {
122 return (resctrl_arch_is_mbm_total_enabled() ||
123 resctrl_arch_is_mbm_local_enabled());
124 }
125
resctrl_is_mbm_event(int e)126 static bool resctrl_is_mbm_event(int e)
127 {
128 return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
129 e <= QOS_L3_MBM_LOCAL_EVENT_ID);
130 }
131
132 /*
133 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
134 * we can keep a bitmap of free CLOSIDs in a single integer.
135 *
136 * Using a global CLOSID across all resources has some advantages and
137 * some drawbacks:
138 * + We can simply set current's closid to assign a task to a resource
139 * group.
140 * + Context switch code can avoid extra memory references deciding which
141 * CLOSID to load into the PQR_ASSOC MSR
142 * - We give up some options in configuring resource groups across multi-socket
143 * systems.
144 * - Our choices on how to configure each resource become progressively more
145 * limited as the number of resources grows.
146 */
147 static unsigned long closid_free_map;
148 static int closid_free_map_len;
149
closids_supported(void)150 int closids_supported(void)
151 {
152 return closid_free_map_len;
153 }
154
closid_init(void)155 static void closid_init(void)
156 {
157 struct resctrl_schema *s;
158 u32 rdt_min_closid = 32;
159
160 /* Compute rdt_min_closid across all resources */
161 list_for_each_entry(s, &resctrl_schema_all, list)
162 rdt_min_closid = min(rdt_min_closid, s->num_closid);
163
164 closid_free_map = BIT_MASK(rdt_min_closid) - 1;
165
166 /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
167 __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map);
168 closid_free_map_len = rdt_min_closid;
169 }
170
closid_alloc(void)171 static int closid_alloc(void)
172 {
173 int cleanest_closid;
174 u32 closid;
175
176 lockdep_assert_held(&rdtgroup_mutex);
177
178 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
179 resctrl_arch_is_llc_occupancy_enabled()) {
180 cleanest_closid = resctrl_find_cleanest_closid();
181 if (cleanest_closid < 0)
182 return cleanest_closid;
183 closid = cleanest_closid;
184 } else {
185 closid = ffs(closid_free_map);
186 if (closid == 0)
187 return -ENOSPC;
188 closid--;
189 }
190 __clear_bit(closid, &closid_free_map);
191
192 return closid;
193 }
194
closid_free(int closid)195 void closid_free(int closid)
196 {
197 lockdep_assert_held(&rdtgroup_mutex);
198
199 __set_bit(closid, &closid_free_map);
200 }
201
202 /**
203 * closid_allocated - test if provided closid is in use
204 * @closid: closid to be tested
205 *
206 * Return: true if @closid is currently associated with a resource group,
207 * false if @closid is free
208 */
closid_allocated(unsigned int closid)209 bool closid_allocated(unsigned int closid)
210 {
211 lockdep_assert_held(&rdtgroup_mutex);
212
213 return !test_bit(closid, &closid_free_map);
214 }
215
216 /**
217 * rdtgroup_mode_by_closid - Return mode of resource group with closid
218 * @closid: closid if the resource group
219 *
220 * Each resource group is associated with a @closid. Here the mode
221 * of a resource group can be queried by searching for it using its closid.
222 *
223 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
224 */
rdtgroup_mode_by_closid(int closid)225 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
226 {
227 struct rdtgroup *rdtgrp;
228
229 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
230 if (rdtgrp->closid == closid)
231 return rdtgrp->mode;
232 }
233
234 return RDT_NUM_MODES;
235 }
236
237 static const char * const rdt_mode_str[] = {
238 [RDT_MODE_SHAREABLE] = "shareable",
239 [RDT_MODE_EXCLUSIVE] = "exclusive",
240 [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup",
241 [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked",
242 };
243
244 /**
245 * rdtgroup_mode_str - Return the string representation of mode
246 * @mode: the resource group mode as &enum rdtgroup_mode
247 *
248 * Return: string representation of valid mode, "unknown" otherwise
249 */
rdtgroup_mode_str(enum rdtgrp_mode mode)250 static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
251 {
252 if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
253 return "unknown";
254
255 return rdt_mode_str[mode];
256 }
257
258 /* set uid and gid of rdtgroup dirs and files to that of the creator */
rdtgroup_kn_set_ugid(struct kernfs_node * kn)259 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
260 {
261 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
262 .ia_uid = current_fsuid(),
263 .ia_gid = current_fsgid(), };
264
265 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
266 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
267 return 0;
268
269 return kernfs_setattr(kn, &iattr);
270 }
271
rdtgroup_add_file(struct kernfs_node * parent_kn,struct rftype * rft)272 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
273 {
274 struct kernfs_node *kn;
275 int ret;
276
277 kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
278 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
279 0, rft->kf_ops, rft, NULL, NULL);
280 if (IS_ERR(kn))
281 return PTR_ERR(kn);
282
283 ret = rdtgroup_kn_set_ugid(kn);
284 if (ret) {
285 kernfs_remove(kn);
286 return ret;
287 }
288
289 return 0;
290 }
291
rdtgroup_seqfile_show(struct seq_file * m,void * arg)292 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
293 {
294 struct kernfs_open_file *of = m->private;
295 struct rftype *rft = of->kn->priv;
296
297 if (rft->seq_show)
298 return rft->seq_show(of, m, arg);
299 return 0;
300 }
301
rdtgroup_file_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)302 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
303 size_t nbytes, loff_t off)
304 {
305 struct rftype *rft = of->kn->priv;
306
307 if (rft->write)
308 return rft->write(of, buf, nbytes, off);
309
310 return -EINVAL;
311 }
312
313 static const struct kernfs_ops rdtgroup_kf_single_ops = {
314 .atomic_write_len = PAGE_SIZE,
315 .write = rdtgroup_file_write,
316 .seq_show = rdtgroup_seqfile_show,
317 };
318
319 static const struct kernfs_ops kf_mondata_ops = {
320 .atomic_write_len = PAGE_SIZE,
321 .seq_show = rdtgroup_mondata_show,
322 };
323
is_cpu_list(struct kernfs_open_file * of)324 static bool is_cpu_list(struct kernfs_open_file *of)
325 {
326 struct rftype *rft = of->kn->priv;
327
328 return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
329 }
330
rdtgroup_cpus_show(struct kernfs_open_file * of,struct seq_file * s,void * v)331 static int rdtgroup_cpus_show(struct kernfs_open_file *of,
332 struct seq_file *s, void *v)
333 {
334 struct rdtgroup *rdtgrp;
335 struct cpumask *mask;
336 int ret = 0;
337
338 rdtgrp = rdtgroup_kn_lock_live(of->kn);
339
340 if (rdtgrp) {
341 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
342 if (!rdtgrp->plr->d) {
343 rdt_last_cmd_clear();
344 rdt_last_cmd_puts("Cache domain offline\n");
345 ret = -ENODEV;
346 } else {
347 mask = &rdtgrp->plr->d->hdr.cpu_mask;
348 seq_printf(s, is_cpu_list(of) ?
349 "%*pbl\n" : "%*pb\n",
350 cpumask_pr_args(mask));
351 }
352 } else {
353 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
354 cpumask_pr_args(&rdtgrp->cpu_mask));
355 }
356 } else {
357 ret = -ENOENT;
358 }
359 rdtgroup_kn_unlock(of->kn);
360
361 return ret;
362 }
363
364 /*
365 * This is safe against resctrl_sched_in() called from __switch_to()
366 * because __switch_to() is executed with interrupts disabled. A local call
367 * from update_closid_rmid() is protected against __switch_to() because
368 * preemption is disabled.
369 */
resctrl_arch_sync_cpu_closid_rmid(void * info)370 void resctrl_arch_sync_cpu_closid_rmid(void *info)
371 {
372 struct resctrl_cpu_defaults *r = info;
373
374 if (r) {
375 this_cpu_write(pqr_state.default_closid, r->closid);
376 this_cpu_write(pqr_state.default_rmid, r->rmid);
377 }
378
379 /*
380 * We cannot unconditionally write the MSR because the current
381 * executing task might have its own closid selected. Just reuse
382 * the context switch code.
383 */
384 resctrl_sched_in(current);
385 }
386
387 /*
388 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
389 *
390 * Per task closids/rmids must have been set up before calling this function.
391 * @r may be NULL.
392 */
393 static void
update_closid_rmid(const struct cpumask * cpu_mask,struct rdtgroup * r)394 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
395 {
396 struct resctrl_cpu_defaults defaults, *p = NULL;
397
398 if (r) {
399 defaults.closid = r->closid;
400 defaults.rmid = r->mon.rmid;
401 p = &defaults;
402 }
403
404 on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1);
405 }
406
cpus_mon_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask)407 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
408 cpumask_var_t tmpmask)
409 {
410 struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
411 struct list_head *head;
412
413 /* Check whether cpus belong to parent ctrl group */
414 cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
415 if (!cpumask_empty(tmpmask)) {
416 rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
417 return -EINVAL;
418 }
419
420 /* Check whether cpus are dropped from this group */
421 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
422 if (!cpumask_empty(tmpmask)) {
423 /* Give any dropped cpus to parent rdtgroup */
424 cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
425 update_closid_rmid(tmpmask, prgrp);
426 }
427
428 /*
429 * If we added cpus, remove them from previous group that owned them
430 * and update per-cpu rmid
431 */
432 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
433 if (!cpumask_empty(tmpmask)) {
434 head = &prgrp->mon.crdtgrp_list;
435 list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
436 if (crgrp == rdtgrp)
437 continue;
438 cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
439 tmpmask);
440 }
441 update_closid_rmid(tmpmask, rdtgrp);
442 }
443
444 /* Done pushing/pulling - update this group with new mask */
445 cpumask_copy(&rdtgrp->cpu_mask, newmask);
446
447 return 0;
448 }
449
cpumask_rdtgrp_clear(struct rdtgroup * r,struct cpumask * m)450 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
451 {
452 struct rdtgroup *crgrp;
453
454 cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
455 /* update the child mon group masks as well*/
456 list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
457 cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
458 }
459
cpus_ctrl_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask,cpumask_var_t tmpmask1)460 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
461 cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
462 {
463 struct rdtgroup *r, *crgrp;
464 struct list_head *head;
465
466 /* Check whether cpus are dropped from this group */
467 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
468 if (!cpumask_empty(tmpmask)) {
469 /* Can't drop from default group */
470 if (rdtgrp == &rdtgroup_default) {
471 rdt_last_cmd_puts("Can't drop CPUs from default group\n");
472 return -EINVAL;
473 }
474
475 /* Give any dropped cpus to rdtgroup_default */
476 cpumask_or(&rdtgroup_default.cpu_mask,
477 &rdtgroup_default.cpu_mask, tmpmask);
478 update_closid_rmid(tmpmask, &rdtgroup_default);
479 }
480
481 /*
482 * If we added cpus, remove them from previous group and
483 * the prev group's child groups that owned them
484 * and update per-cpu closid/rmid.
485 */
486 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
487 if (!cpumask_empty(tmpmask)) {
488 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
489 if (r == rdtgrp)
490 continue;
491 cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
492 if (!cpumask_empty(tmpmask1))
493 cpumask_rdtgrp_clear(r, tmpmask1);
494 }
495 update_closid_rmid(tmpmask, rdtgrp);
496 }
497
498 /* Done pushing/pulling - update this group with new mask */
499 cpumask_copy(&rdtgrp->cpu_mask, newmask);
500
501 /*
502 * Clear child mon group masks since there is a new parent mask
503 * now and update the rmid for the cpus the child lost.
504 */
505 head = &rdtgrp->mon.crdtgrp_list;
506 list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
507 cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
508 update_closid_rmid(tmpmask, rdtgrp);
509 cpumask_clear(&crgrp->cpu_mask);
510 }
511
512 return 0;
513 }
514
rdtgroup_cpus_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)515 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
516 char *buf, size_t nbytes, loff_t off)
517 {
518 cpumask_var_t tmpmask, newmask, tmpmask1;
519 struct rdtgroup *rdtgrp;
520 int ret;
521
522 if (!buf)
523 return -EINVAL;
524
525 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
526 return -ENOMEM;
527 if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
528 free_cpumask_var(tmpmask);
529 return -ENOMEM;
530 }
531 if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
532 free_cpumask_var(tmpmask);
533 free_cpumask_var(newmask);
534 return -ENOMEM;
535 }
536
537 rdtgrp = rdtgroup_kn_lock_live(of->kn);
538 if (!rdtgrp) {
539 ret = -ENOENT;
540 goto unlock;
541 }
542
543 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
544 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
545 ret = -EINVAL;
546 rdt_last_cmd_puts("Pseudo-locking in progress\n");
547 goto unlock;
548 }
549
550 if (is_cpu_list(of))
551 ret = cpulist_parse(buf, newmask);
552 else
553 ret = cpumask_parse(buf, newmask);
554
555 if (ret) {
556 rdt_last_cmd_puts("Bad CPU list/mask\n");
557 goto unlock;
558 }
559
560 /* check that user didn't specify any offline cpus */
561 cpumask_andnot(tmpmask, newmask, cpu_online_mask);
562 if (!cpumask_empty(tmpmask)) {
563 ret = -EINVAL;
564 rdt_last_cmd_puts("Can only assign online CPUs\n");
565 goto unlock;
566 }
567
568 if (rdtgrp->type == RDTCTRL_GROUP)
569 ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
570 else if (rdtgrp->type == RDTMON_GROUP)
571 ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
572 else
573 ret = -EINVAL;
574
575 unlock:
576 rdtgroup_kn_unlock(of->kn);
577 free_cpumask_var(tmpmask);
578 free_cpumask_var(newmask);
579 free_cpumask_var(tmpmask1);
580
581 return ret ?: nbytes;
582 }
583
584 /**
585 * rdtgroup_remove - the helper to remove resource group safely
586 * @rdtgrp: resource group to remove
587 *
588 * On resource group creation via a mkdir, an extra kernfs_node reference is
589 * taken to ensure that the rdtgroup structure remains accessible for the
590 * rdtgroup_kn_unlock() calls where it is removed.
591 *
592 * Drop the extra reference here, then free the rdtgroup structure.
593 *
594 * Return: void
595 */
rdtgroup_remove(struct rdtgroup * rdtgrp)596 static void rdtgroup_remove(struct rdtgroup *rdtgrp)
597 {
598 kernfs_put(rdtgrp->kn);
599 kfree(rdtgrp);
600 }
601
_update_task_closid_rmid(void * task)602 static void _update_task_closid_rmid(void *task)
603 {
604 /*
605 * If the task is still current on this CPU, update PQR_ASSOC MSR.
606 * Otherwise, the MSR is updated when the task is scheduled in.
607 */
608 if (task == current)
609 resctrl_sched_in(task);
610 }
611
update_task_closid_rmid(struct task_struct * t)612 static void update_task_closid_rmid(struct task_struct *t)
613 {
614 if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
615 smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
616 else
617 _update_task_closid_rmid(t);
618 }
619
task_in_rdtgroup(struct task_struct * tsk,struct rdtgroup * rdtgrp)620 static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
621 {
622 u32 closid, rmid = rdtgrp->mon.rmid;
623
624 if (rdtgrp->type == RDTCTRL_GROUP)
625 closid = rdtgrp->closid;
626 else if (rdtgrp->type == RDTMON_GROUP)
627 closid = rdtgrp->mon.parent->closid;
628 else
629 return false;
630
631 return resctrl_arch_match_closid(tsk, closid) &&
632 resctrl_arch_match_rmid(tsk, closid, rmid);
633 }
634
__rdtgroup_move_task(struct task_struct * tsk,struct rdtgroup * rdtgrp)635 static int __rdtgroup_move_task(struct task_struct *tsk,
636 struct rdtgroup *rdtgrp)
637 {
638 /* If the task is already in rdtgrp, no need to move the task. */
639 if (task_in_rdtgroup(tsk, rdtgrp))
640 return 0;
641
642 /*
643 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
644 * updated by them.
645 *
646 * For ctrl_mon groups, move both closid and rmid.
647 * For monitor groups, can move the tasks only from
648 * their parent CTRL group.
649 */
650 if (rdtgrp->type == RDTMON_GROUP &&
651 !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
652 rdt_last_cmd_puts("Can't move task to different control group\n");
653 return -EINVAL;
654 }
655
656 if (rdtgrp->type == RDTMON_GROUP)
657 resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
658 rdtgrp->mon.rmid);
659 else
660 resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
661 rdtgrp->mon.rmid);
662
663 /*
664 * Ensure the task's closid and rmid are written before determining if
665 * the task is current that will decide if it will be interrupted.
666 * This pairs with the full barrier between the rq->curr update and
667 * resctrl_sched_in() during context switch.
668 */
669 smp_mb();
670
671 /*
672 * By now, the task's closid and rmid are set. If the task is current
673 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
674 * group go into effect. If the task is not current, the MSR will be
675 * updated when the task is scheduled in.
676 */
677 update_task_closid_rmid(tsk);
678
679 return 0;
680 }
681
is_closid_match(struct task_struct * t,struct rdtgroup * r)682 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
683 {
684 return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
685 resctrl_arch_match_closid(t, r->closid));
686 }
687
is_rmid_match(struct task_struct * t,struct rdtgroup * r)688 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
689 {
690 return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
691 resctrl_arch_match_rmid(t, r->mon.parent->closid,
692 r->mon.rmid));
693 }
694
695 /**
696 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
697 * @r: Resource group
698 *
699 * Return: 1 if tasks have been assigned to @r, 0 otherwise
700 */
rdtgroup_tasks_assigned(struct rdtgroup * r)701 int rdtgroup_tasks_assigned(struct rdtgroup *r)
702 {
703 struct task_struct *p, *t;
704 int ret = 0;
705
706 lockdep_assert_held(&rdtgroup_mutex);
707
708 rcu_read_lock();
709 for_each_process_thread(p, t) {
710 if (is_closid_match(t, r) || is_rmid_match(t, r)) {
711 ret = 1;
712 break;
713 }
714 }
715 rcu_read_unlock();
716
717 return ret;
718 }
719
rdtgroup_task_write_permission(struct task_struct * task,struct kernfs_open_file * of)720 static int rdtgroup_task_write_permission(struct task_struct *task,
721 struct kernfs_open_file *of)
722 {
723 const struct cred *tcred = get_task_cred(task);
724 const struct cred *cred = current_cred();
725 int ret = 0;
726
727 /*
728 * Even if we're attaching all tasks in the thread group, we only
729 * need to check permissions on one of them.
730 */
731 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
732 !uid_eq(cred->euid, tcred->uid) &&
733 !uid_eq(cred->euid, tcred->suid)) {
734 rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
735 ret = -EPERM;
736 }
737
738 put_cred(tcred);
739 return ret;
740 }
741
rdtgroup_move_task(pid_t pid,struct rdtgroup * rdtgrp,struct kernfs_open_file * of)742 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
743 struct kernfs_open_file *of)
744 {
745 struct task_struct *tsk;
746 int ret;
747
748 rcu_read_lock();
749 if (pid) {
750 tsk = find_task_by_vpid(pid);
751 if (!tsk) {
752 rcu_read_unlock();
753 rdt_last_cmd_printf("No task %d\n", pid);
754 return -ESRCH;
755 }
756 } else {
757 tsk = current;
758 }
759
760 get_task_struct(tsk);
761 rcu_read_unlock();
762
763 ret = rdtgroup_task_write_permission(tsk, of);
764 if (!ret)
765 ret = __rdtgroup_move_task(tsk, rdtgrp);
766
767 put_task_struct(tsk);
768 return ret;
769 }
770
rdtgroup_tasks_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)771 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
772 char *buf, size_t nbytes, loff_t off)
773 {
774 struct rdtgroup *rdtgrp;
775 char *pid_str;
776 int ret = 0;
777 pid_t pid;
778
779 rdtgrp = rdtgroup_kn_lock_live(of->kn);
780 if (!rdtgrp) {
781 rdtgroup_kn_unlock(of->kn);
782 return -ENOENT;
783 }
784 rdt_last_cmd_clear();
785
786 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
787 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
788 ret = -EINVAL;
789 rdt_last_cmd_puts("Pseudo-locking in progress\n");
790 goto unlock;
791 }
792
793 while (buf && buf[0] != '\0' && buf[0] != '\n') {
794 pid_str = strim(strsep(&buf, ","));
795
796 if (kstrtoint(pid_str, 0, &pid)) {
797 rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
798 ret = -EINVAL;
799 break;
800 }
801
802 if (pid < 0) {
803 rdt_last_cmd_printf("Invalid pid %d\n", pid);
804 ret = -EINVAL;
805 break;
806 }
807
808 ret = rdtgroup_move_task(pid, rdtgrp, of);
809 if (ret) {
810 rdt_last_cmd_printf("Error while processing task %d\n", pid);
811 break;
812 }
813 }
814
815 unlock:
816 rdtgroup_kn_unlock(of->kn);
817
818 return ret ?: nbytes;
819 }
820
show_rdt_tasks(struct rdtgroup * r,struct seq_file * s)821 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
822 {
823 struct task_struct *p, *t;
824 pid_t pid;
825
826 rcu_read_lock();
827 for_each_process_thread(p, t) {
828 if (is_closid_match(t, r) || is_rmid_match(t, r)) {
829 pid = task_pid_vnr(t);
830 if (pid)
831 seq_printf(s, "%d\n", pid);
832 }
833 }
834 rcu_read_unlock();
835 }
836
rdtgroup_tasks_show(struct kernfs_open_file * of,struct seq_file * s,void * v)837 static int rdtgroup_tasks_show(struct kernfs_open_file *of,
838 struct seq_file *s, void *v)
839 {
840 struct rdtgroup *rdtgrp;
841 int ret = 0;
842
843 rdtgrp = rdtgroup_kn_lock_live(of->kn);
844 if (rdtgrp)
845 show_rdt_tasks(rdtgrp, s);
846 else
847 ret = -ENOENT;
848 rdtgroup_kn_unlock(of->kn);
849
850 return ret;
851 }
852
rdtgroup_closid_show(struct kernfs_open_file * of,struct seq_file * s,void * v)853 static int rdtgroup_closid_show(struct kernfs_open_file *of,
854 struct seq_file *s, void *v)
855 {
856 struct rdtgroup *rdtgrp;
857 int ret = 0;
858
859 rdtgrp = rdtgroup_kn_lock_live(of->kn);
860 if (rdtgrp)
861 seq_printf(s, "%u\n", rdtgrp->closid);
862 else
863 ret = -ENOENT;
864 rdtgroup_kn_unlock(of->kn);
865
866 return ret;
867 }
868
rdtgroup_rmid_show(struct kernfs_open_file * of,struct seq_file * s,void * v)869 static int rdtgroup_rmid_show(struct kernfs_open_file *of,
870 struct seq_file *s, void *v)
871 {
872 struct rdtgroup *rdtgrp;
873 int ret = 0;
874
875 rdtgrp = rdtgroup_kn_lock_live(of->kn);
876 if (rdtgrp)
877 seq_printf(s, "%u\n", rdtgrp->mon.rmid);
878 else
879 ret = -ENOENT;
880 rdtgroup_kn_unlock(of->kn);
881
882 return ret;
883 }
884
885 #ifdef CONFIG_PROC_CPU_RESCTRL
886
887 /*
888 * A task can only be part of one resctrl control group and of one monitor
889 * group which is associated to that control group.
890 *
891 * 1) res:
892 * mon:
893 *
894 * resctrl is not available.
895 *
896 * 2) res:/
897 * mon:
898 *
899 * Task is part of the root resctrl control group, and it is not associated
900 * to any monitor group.
901 *
902 * 3) res:/
903 * mon:mon0
904 *
905 * Task is part of the root resctrl control group and monitor group mon0.
906 *
907 * 4) res:group0
908 * mon:
909 *
910 * Task is part of resctrl control group group0, and it is not associated
911 * to any monitor group.
912 *
913 * 5) res:group0
914 * mon:mon1
915 *
916 * Task is part of resctrl control group group0 and monitor group mon1.
917 */
proc_resctrl_show(struct seq_file * s,struct pid_namespace * ns,struct pid * pid,struct task_struct * tsk)918 int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
919 struct pid *pid, struct task_struct *tsk)
920 {
921 struct rdtgroup *rdtg;
922 int ret = 0;
923
924 mutex_lock(&rdtgroup_mutex);
925
926 /* Return empty if resctrl has not been mounted. */
927 if (!resctrl_mounted) {
928 seq_puts(s, "res:\nmon:\n");
929 goto unlock;
930 }
931
932 list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
933 struct rdtgroup *crg;
934
935 /*
936 * Task information is only relevant for shareable
937 * and exclusive groups.
938 */
939 if (rdtg->mode != RDT_MODE_SHAREABLE &&
940 rdtg->mode != RDT_MODE_EXCLUSIVE)
941 continue;
942
943 if (!resctrl_arch_match_closid(tsk, rdtg->closid))
944 continue;
945
946 seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
947 rdt_kn_name(rdtg->kn));
948 seq_puts(s, "mon:");
949 list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
950 mon.crdtgrp_list) {
951 if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
952 crg->mon.rmid))
953 continue;
954 seq_printf(s, "%s", rdt_kn_name(crg->kn));
955 break;
956 }
957 seq_putc(s, '\n');
958 goto unlock;
959 }
960 /*
961 * The above search should succeed. Otherwise return
962 * with an error.
963 */
964 ret = -ENOENT;
965 unlock:
966 mutex_unlock(&rdtgroup_mutex);
967
968 return ret;
969 }
970 #endif
971
rdt_last_cmd_status_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)972 static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
973 struct seq_file *seq, void *v)
974 {
975 int len;
976
977 mutex_lock(&rdtgroup_mutex);
978 len = seq_buf_used(&last_cmd_status);
979 if (len)
980 seq_printf(seq, "%.*s", len, last_cmd_status_buf);
981 else
982 seq_puts(seq, "ok\n");
983 mutex_unlock(&rdtgroup_mutex);
984 return 0;
985 }
986
rdt_kn_parent_priv(struct kernfs_node * kn)987 static void *rdt_kn_parent_priv(struct kernfs_node *kn)
988 {
989 /*
990 * The parent pointer is only valid within RCU section since it can be
991 * replaced.
992 */
993 guard(rcu)();
994 return rcu_dereference(kn->__parent)->priv;
995 }
996
rdt_num_closids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)997 static int rdt_num_closids_show(struct kernfs_open_file *of,
998 struct seq_file *seq, void *v)
999 {
1000 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1001
1002 seq_printf(seq, "%u\n", s->num_closid);
1003 return 0;
1004 }
1005
rdt_default_ctrl_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1006 static int rdt_default_ctrl_show(struct kernfs_open_file *of,
1007 struct seq_file *seq, void *v)
1008 {
1009 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1010 struct rdt_resource *r = s->res;
1011
1012 seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r));
1013 return 0;
1014 }
1015
rdt_min_cbm_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1016 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
1017 struct seq_file *seq, void *v)
1018 {
1019 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1020 struct rdt_resource *r = s->res;
1021
1022 seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
1023 return 0;
1024 }
1025
rdt_shareable_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1026 static int rdt_shareable_bits_show(struct kernfs_open_file *of,
1027 struct seq_file *seq, void *v)
1028 {
1029 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1030 struct rdt_resource *r = s->res;
1031
1032 seq_printf(seq, "%x\n", r->cache.shareable_bits);
1033 return 0;
1034 }
1035
1036 /*
1037 * rdt_bit_usage_show - Display current usage of resources
1038 *
1039 * A domain is a shared resource that can now be allocated differently. Here
1040 * we display the current regions of the domain as an annotated bitmask.
1041 * For each domain of this resource its allocation bitmask
1042 * is annotated as below to indicate the current usage of the corresponding bit:
1043 * 0 - currently unused
1044 * X - currently available for sharing and used by software and hardware
1045 * H - currently used by hardware only but available for software use
1046 * S - currently used and shareable by software only
1047 * E - currently used exclusively by one resource group
1048 * P - currently pseudo-locked by one resource group
1049 */
rdt_bit_usage_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1050 static int rdt_bit_usage_show(struct kernfs_open_file *of,
1051 struct seq_file *seq, void *v)
1052 {
1053 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1054 /*
1055 * Use unsigned long even though only 32 bits are used to ensure
1056 * test_bit() is used safely.
1057 */
1058 unsigned long sw_shareable = 0, hw_shareable = 0;
1059 unsigned long exclusive = 0, pseudo_locked = 0;
1060 struct rdt_resource *r = s->res;
1061 struct rdt_ctrl_domain *dom;
1062 int i, hwb, swb, excl, psl;
1063 enum rdtgrp_mode mode;
1064 bool sep = false;
1065 u32 ctrl_val;
1066
1067 cpus_read_lock();
1068 mutex_lock(&rdtgroup_mutex);
1069 hw_shareable = r->cache.shareable_bits;
1070 list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
1071 if (sep)
1072 seq_putc(seq, ';');
1073 sw_shareable = 0;
1074 exclusive = 0;
1075 seq_printf(seq, "%d=", dom->hdr.id);
1076 for (i = 0; i < closids_supported(); i++) {
1077 if (!closid_allocated(i))
1078 continue;
1079 ctrl_val = resctrl_arch_get_config(r, dom, i,
1080 s->conf_type);
1081 mode = rdtgroup_mode_by_closid(i);
1082 switch (mode) {
1083 case RDT_MODE_SHAREABLE:
1084 sw_shareable |= ctrl_val;
1085 break;
1086 case RDT_MODE_EXCLUSIVE:
1087 exclusive |= ctrl_val;
1088 break;
1089 case RDT_MODE_PSEUDO_LOCKSETUP:
1090 /*
1091 * RDT_MODE_PSEUDO_LOCKSETUP is possible
1092 * here but not included since the CBM
1093 * associated with this CLOSID in this mode
1094 * is not initialized and no task or cpu can be
1095 * assigned this CLOSID.
1096 */
1097 break;
1098 case RDT_MODE_PSEUDO_LOCKED:
1099 case RDT_NUM_MODES:
1100 WARN(1,
1101 "invalid mode for closid %d\n", i);
1102 break;
1103 }
1104 }
1105 for (i = r->cache.cbm_len - 1; i >= 0; i--) {
1106 pseudo_locked = dom->plr ? dom->plr->cbm : 0;
1107 hwb = test_bit(i, &hw_shareable);
1108 swb = test_bit(i, &sw_shareable);
1109 excl = test_bit(i, &exclusive);
1110 psl = test_bit(i, &pseudo_locked);
1111 if (hwb && swb)
1112 seq_putc(seq, 'X');
1113 else if (hwb && !swb)
1114 seq_putc(seq, 'H');
1115 else if (!hwb && swb)
1116 seq_putc(seq, 'S');
1117 else if (excl)
1118 seq_putc(seq, 'E');
1119 else if (psl)
1120 seq_putc(seq, 'P');
1121 else /* Unused bits remain */
1122 seq_putc(seq, '0');
1123 }
1124 sep = true;
1125 }
1126 seq_putc(seq, '\n');
1127 mutex_unlock(&rdtgroup_mutex);
1128 cpus_read_unlock();
1129 return 0;
1130 }
1131
rdt_min_bw_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1132 static int rdt_min_bw_show(struct kernfs_open_file *of,
1133 struct seq_file *seq, void *v)
1134 {
1135 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1136 struct rdt_resource *r = s->res;
1137
1138 seq_printf(seq, "%u\n", r->membw.min_bw);
1139 return 0;
1140 }
1141
rdt_num_rmids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1142 static int rdt_num_rmids_show(struct kernfs_open_file *of,
1143 struct seq_file *seq, void *v)
1144 {
1145 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1146
1147 seq_printf(seq, "%d\n", r->num_rmid);
1148
1149 return 0;
1150 }
1151
rdt_mon_features_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1152 static int rdt_mon_features_show(struct kernfs_open_file *of,
1153 struct seq_file *seq, void *v)
1154 {
1155 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1156 struct mon_evt *mevt;
1157
1158 list_for_each_entry(mevt, &r->evt_list, list) {
1159 seq_printf(seq, "%s\n", mevt->name);
1160 if (mevt->configurable)
1161 seq_printf(seq, "%s_config\n", mevt->name);
1162 }
1163
1164 return 0;
1165 }
1166
rdt_bw_gran_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1167 static int rdt_bw_gran_show(struct kernfs_open_file *of,
1168 struct seq_file *seq, void *v)
1169 {
1170 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1171 struct rdt_resource *r = s->res;
1172
1173 seq_printf(seq, "%u\n", r->membw.bw_gran);
1174 return 0;
1175 }
1176
rdt_delay_linear_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1177 static int rdt_delay_linear_show(struct kernfs_open_file *of,
1178 struct seq_file *seq, void *v)
1179 {
1180 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1181 struct rdt_resource *r = s->res;
1182
1183 seq_printf(seq, "%u\n", r->membw.delay_linear);
1184 return 0;
1185 }
1186
max_threshold_occ_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1187 static int max_threshold_occ_show(struct kernfs_open_file *of,
1188 struct seq_file *seq, void *v)
1189 {
1190 seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
1191
1192 return 0;
1193 }
1194
rdt_thread_throttle_mode_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1195 static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
1196 struct seq_file *seq, void *v)
1197 {
1198 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1199 struct rdt_resource *r = s->res;
1200
1201 switch (r->membw.throttle_mode) {
1202 case THREAD_THROTTLE_PER_THREAD:
1203 seq_puts(seq, "per-thread\n");
1204 return 0;
1205 case THREAD_THROTTLE_MAX:
1206 seq_puts(seq, "max\n");
1207 return 0;
1208 case THREAD_THROTTLE_UNDEFINED:
1209 seq_puts(seq, "undefined\n");
1210 return 0;
1211 }
1212
1213 WARN_ON_ONCE(1);
1214
1215 return 0;
1216 }
1217
max_threshold_occ_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1218 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
1219 char *buf, size_t nbytes, loff_t off)
1220 {
1221 unsigned int bytes;
1222 int ret;
1223
1224 ret = kstrtouint(buf, 0, &bytes);
1225 if (ret)
1226 return ret;
1227
1228 if (bytes > resctrl_rmid_realloc_limit)
1229 return -EINVAL;
1230
1231 resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
1232
1233 return nbytes;
1234 }
1235
1236 /*
1237 * rdtgroup_mode_show - Display mode of this resource group
1238 */
rdtgroup_mode_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1239 static int rdtgroup_mode_show(struct kernfs_open_file *of,
1240 struct seq_file *s, void *v)
1241 {
1242 struct rdtgroup *rdtgrp;
1243
1244 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1245 if (!rdtgrp) {
1246 rdtgroup_kn_unlock(of->kn);
1247 return -ENOENT;
1248 }
1249
1250 seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
1251
1252 rdtgroup_kn_unlock(of->kn);
1253 return 0;
1254 }
1255
resctrl_peer_type(enum resctrl_conf_type my_type)1256 static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
1257 {
1258 switch (my_type) {
1259 case CDP_CODE:
1260 return CDP_DATA;
1261 case CDP_DATA:
1262 return CDP_CODE;
1263 default:
1264 case CDP_NONE:
1265 return CDP_NONE;
1266 }
1267 }
1268
rdt_has_sparse_bitmasks_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1269 static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
1270 struct seq_file *seq, void *v)
1271 {
1272 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1273 struct rdt_resource *r = s->res;
1274
1275 seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
1276
1277 return 0;
1278 }
1279
1280 /**
1281 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1282 * @r: Resource to which domain instance @d belongs.
1283 * @d: The domain instance for which @closid is being tested.
1284 * @cbm: Capacity bitmask being tested.
1285 * @closid: Intended closid for @cbm.
1286 * @type: CDP type of @r.
1287 * @exclusive: Only check if overlaps with exclusive resource groups
1288 *
1289 * Checks if provided @cbm intended to be used for @closid on domain
1290 * @d overlaps with any other closids or other hardware usage associated
1291 * with this domain. If @exclusive is true then only overlaps with
1292 * resource groups in exclusive mode will be considered. If @exclusive
1293 * is false then overlaps with any resource group or hardware entities
1294 * will be considered.
1295 *
1296 * @cbm is unsigned long, even if only 32 bits are used, to make the
1297 * bitmap functions work correctly.
1298 *
1299 * Return: false if CBM does not overlap, true if it does.
1300 */
__rdtgroup_cbm_overlaps(struct rdt_resource * r,struct rdt_ctrl_domain * d,unsigned long cbm,int closid,enum resctrl_conf_type type,bool exclusive)1301 static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
1302 unsigned long cbm, int closid,
1303 enum resctrl_conf_type type, bool exclusive)
1304 {
1305 enum rdtgrp_mode mode;
1306 unsigned long ctrl_b;
1307 int i;
1308
1309 /* Check for any overlap with regions used by hardware directly */
1310 if (!exclusive) {
1311 ctrl_b = r->cache.shareable_bits;
1312 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1313 return true;
1314 }
1315
1316 /* Check for overlap with other resource groups */
1317 for (i = 0; i < closids_supported(); i++) {
1318 ctrl_b = resctrl_arch_get_config(r, d, i, type);
1319 mode = rdtgroup_mode_by_closid(i);
1320 if (closid_allocated(i) && i != closid &&
1321 mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1322 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1323 if (exclusive) {
1324 if (mode == RDT_MODE_EXCLUSIVE)
1325 return true;
1326 continue;
1327 }
1328 return true;
1329 }
1330 }
1331 }
1332
1333 return false;
1334 }
1335
1336 /**
1337 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1338 * @s: Schema for the resource to which domain instance @d belongs.
1339 * @d: The domain instance for which @closid is being tested.
1340 * @cbm: Capacity bitmask being tested.
1341 * @closid: Intended closid for @cbm.
1342 * @exclusive: Only check if overlaps with exclusive resource groups
1343 *
1344 * Resources that can be allocated using a CBM can use the CBM to control
1345 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1346 * for overlap. Overlap test is not limited to the specific resource for
1347 * which the CBM is intended though - when dealing with CDP resources that
1348 * share the underlying hardware the overlap check should be performed on
1349 * the CDP resource sharing the hardware also.
1350 *
1351 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1352 * overlap test.
1353 *
1354 * Return: true if CBM overlap detected, false if there is no overlap
1355 */
rdtgroup_cbm_overlaps(struct resctrl_schema * s,struct rdt_ctrl_domain * d,unsigned long cbm,int closid,bool exclusive)1356 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
1357 unsigned long cbm, int closid, bool exclusive)
1358 {
1359 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
1360 struct rdt_resource *r = s->res;
1361
1362 if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
1363 exclusive))
1364 return true;
1365
1366 if (!resctrl_arch_get_cdp_enabled(r->rid))
1367 return false;
1368 return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
1369 }
1370
1371 /**
1372 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1373 * @rdtgrp: Resource group identified through its closid.
1374 *
1375 * An exclusive resource group implies that there should be no sharing of
1376 * its allocated resources. At the time this group is considered to be
1377 * exclusive this test can determine if its current schemata supports this
1378 * setting by testing for overlap with all other resource groups.
1379 *
1380 * Return: true if resource group can be exclusive, false if there is overlap
1381 * with allocations of other resource groups and thus this resource group
1382 * cannot be exclusive.
1383 */
rdtgroup_mode_test_exclusive(struct rdtgroup * rdtgrp)1384 static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1385 {
1386 int closid = rdtgrp->closid;
1387 struct rdt_ctrl_domain *d;
1388 struct resctrl_schema *s;
1389 struct rdt_resource *r;
1390 bool has_cache = false;
1391 u32 ctrl;
1392
1393 /* Walking r->domains, ensure it can't race with cpuhp */
1394 lockdep_assert_cpus_held();
1395
1396 list_for_each_entry(s, &resctrl_schema_all, list) {
1397 r = s->res;
1398 if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
1399 continue;
1400 has_cache = true;
1401 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1402 ctrl = resctrl_arch_get_config(r, d, closid,
1403 s->conf_type);
1404 if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
1405 rdt_last_cmd_puts("Schemata overlaps\n");
1406 return false;
1407 }
1408 }
1409 }
1410
1411 if (!has_cache) {
1412 rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1413 return false;
1414 }
1415
1416 return true;
1417 }
1418
1419 /*
1420 * rdtgroup_mode_write - Modify the resource group's mode
1421 */
rdtgroup_mode_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1422 static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1423 char *buf, size_t nbytes, loff_t off)
1424 {
1425 struct rdtgroup *rdtgrp;
1426 enum rdtgrp_mode mode;
1427 int ret = 0;
1428
1429 /* Valid input requires a trailing newline */
1430 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1431 return -EINVAL;
1432 buf[nbytes - 1] = '\0';
1433
1434 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1435 if (!rdtgrp) {
1436 rdtgroup_kn_unlock(of->kn);
1437 return -ENOENT;
1438 }
1439
1440 rdt_last_cmd_clear();
1441
1442 mode = rdtgrp->mode;
1443
1444 if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1445 (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1446 (!strcmp(buf, "pseudo-locksetup") &&
1447 mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1448 (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1449 goto out;
1450
1451 if (mode == RDT_MODE_PSEUDO_LOCKED) {
1452 rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1453 ret = -EINVAL;
1454 goto out;
1455 }
1456
1457 if (!strcmp(buf, "shareable")) {
1458 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1459 ret = rdtgroup_locksetup_exit(rdtgrp);
1460 if (ret)
1461 goto out;
1462 }
1463 rdtgrp->mode = RDT_MODE_SHAREABLE;
1464 } else if (!strcmp(buf, "exclusive")) {
1465 if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1466 ret = -EINVAL;
1467 goto out;
1468 }
1469 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1470 ret = rdtgroup_locksetup_exit(rdtgrp);
1471 if (ret)
1472 goto out;
1473 }
1474 rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1475 } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) &&
1476 !strcmp(buf, "pseudo-locksetup")) {
1477 ret = rdtgroup_locksetup_enter(rdtgrp);
1478 if (ret)
1479 goto out;
1480 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1481 } else {
1482 rdt_last_cmd_puts("Unknown or unsupported mode\n");
1483 ret = -EINVAL;
1484 }
1485
1486 out:
1487 rdtgroup_kn_unlock(of->kn);
1488 return ret ?: nbytes;
1489 }
1490
1491 /**
1492 * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1493 * @r: RDT resource to which @d belongs.
1494 * @d: RDT domain instance.
1495 * @cbm: bitmask for which the size should be computed.
1496 *
1497 * The bitmask provided associated with the RDT domain instance @d will be
1498 * translated into how many bytes it represents. The size in bytes is
1499 * computed by first dividing the total cache size by the CBM length to
1500 * determine how many bytes each bit in the bitmask represents. The result
1501 * is multiplied with the number of bits set in the bitmask.
1502 *
1503 * @cbm is unsigned long, even if only 32 bits are used to make the
1504 * bitmap functions work correctly.
1505 */
rdtgroup_cbm_to_size(struct rdt_resource * r,struct rdt_ctrl_domain * d,unsigned long cbm)1506 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1507 struct rdt_ctrl_domain *d, unsigned long cbm)
1508 {
1509 unsigned int size = 0;
1510 struct cacheinfo *ci;
1511 int num_b;
1512
1513 if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
1514 return size;
1515
1516 num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1517 ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
1518 if (ci)
1519 size = ci->size / r->cache.cbm_len * num_b;
1520
1521 return size;
1522 }
1523
1524 /*
1525 * rdtgroup_size_show - Display size in bytes of allocated regions
1526 *
1527 * The "size" file mirrors the layout of the "schemata" file, printing the
1528 * size in bytes of each region instead of the capacity bitmask.
1529 */
rdtgroup_size_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1530 static int rdtgroup_size_show(struct kernfs_open_file *of,
1531 struct seq_file *s, void *v)
1532 {
1533 struct resctrl_schema *schema;
1534 enum resctrl_conf_type type;
1535 struct rdt_ctrl_domain *d;
1536 struct rdtgroup *rdtgrp;
1537 struct rdt_resource *r;
1538 unsigned int size;
1539 int ret = 0;
1540 u32 closid;
1541 bool sep;
1542 u32 ctrl;
1543
1544 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1545 if (!rdtgrp) {
1546 rdtgroup_kn_unlock(of->kn);
1547 return -ENOENT;
1548 }
1549
1550 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1551 if (!rdtgrp->plr->d) {
1552 rdt_last_cmd_clear();
1553 rdt_last_cmd_puts("Cache domain offline\n");
1554 ret = -ENODEV;
1555 } else {
1556 seq_printf(s, "%*s:", max_name_width,
1557 rdtgrp->plr->s->name);
1558 size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
1559 rdtgrp->plr->d,
1560 rdtgrp->plr->cbm);
1561 seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
1562 }
1563 goto out;
1564 }
1565
1566 closid = rdtgrp->closid;
1567
1568 list_for_each_entry(schema, &resctrl_schema_all, list) {
1569 r = schema->res;
1570 type = schema->conf_type;
1571 sep = false;
1572 seq_printf(s, "%*s:", max_name_width, schema->name);
1573 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1574 if (sep)
1575 seq_putc(s, ';');
1576 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1577 size = 0;
1578 } else {
1579 if (is_mba_sc(r))
1580 ctrl = d->mbps_val[closid];
1581 else
1582 ctrl = resctrl_arch_get_config(r, d,
1583 closid,
1584 type);
1585 if (r->rid == RDT_RESOURCE_MBA ||
1586 r->rid == RDT_RESOURCE_SMBA)
1587 size = ctrl;
1588 else
1589 size = rdtgroup_cbm_to_size(r, d, ctrl);
1590 }
1591 seq_printf(s, "%d=%u", d->hdr.id, size);
1592 sep = true;
1593 }
1594 seq_putc(s, '\n');
1595 }
1596
1597 out:
1598 rdtgroup_kn_unlock(of->kn);
1599
1600 return ret;
1601 }
1602
1603 #define INVALID_CONFIG_INDEX UINT_MAX
1604
1605 /**
1606 * mon_event_config_index_get - get the hardware index for the
1607 * configurable event
1608 * @evtid: event id.
1609 *
1610 * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
1611 * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
1612 * INVALID_CONFIG_INDEX for invalid evtid
1613 */
mon_event_config_index_get(u32 evtid)1614 static inline unsigned int mon_event_config_index_get(u32 evtid)
1615 {
1616 switch (evtid) {
1617 case QOS_L3_MBM_TOTAL_EVENT_ID:
1618 return 0;
1619 case QOS_L3_MBM_LOCAL_EVENT_ID:
1620 return 1;
1621 default:
1622 /* Should never reach here */
1623 return INVALID_CONFIG_INDEX;
1624 }
1625 }
1626
resctrl_arch_mon_event_config_read(void * _config_info)1627 void resctrl_arch_mon_event_config_read(void *_config_info)
1628 {
1629 struct resctrl_mon_config_info *config_info = _config_info;
1630 unsigned int index;
1631 u64 msrval;
1632
1633 index = mon_event_config_index_get(config_info->evtid);
1634 if (index == INVALID_CONFIG_INDEX) {
1635 pr_warn_once("Invalid event id %d\n", config_info->evtid);
1636 return;
1637 }
1638 rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
1639
1640 /* Report only the valid event configuration bits */
1641 config_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
1642 }
1643
mondata_config_read(struct resctrl_mon_config_info * mon_info)1644 static void mondata_config_read(struct resctrl_mon_config_info *mon_info)
1645 {
1646 smp_call_function_any(&mon_info->d->hdr.cpu_mask,
1647 resctrl_arch_mon_event_config_read, mon_info, 1);
1648 }
1649
mbm_config_show(struct seq_file * s,struct rdt_resource * r,u32 evtid)1650 static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
1651 {
1652 struct resctrl_mon_config_info mon_info;
1653 struct rdt_mon_domain *dom;
1654 bool sep = false;
1655
1656 cpus_read_lock();
1657 mutex_lock(&rdtgroup_mutex);
1658
1659 list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1660 if (sep)
1661 seq_puts(s, ";");
1662
1663 memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info));
1664 mon_info.r = r;
1665 mon_info.d = dom;
1666 mon_info.evtid = evtid;
1667 mondata_config_read(&mon_info);
1668
1669 seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
1670 sep = true;
1671 }
1672 seq_puts(s, "\n");
1673
1674 mutex_unlock(&rdtgroup_mutex);
1675 cpus_read_unlock();
1676
1677 return 0;
1678 }
1679
mbm_total_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1680 static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
1681 struct seq_file *seq, void *v)
1682 {
1683 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1684
1685 mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
1686
1687 return 0;
1688 }
1689
mbm_local_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1690 static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
1691 struct seq_file *seq, void *v)
1692 {
1693 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1694
1695 mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
1696
1697 return 0;
1698 }
1699
resctrl_arch_mon_event_config_write(void * _config_info)1700 void resctrl_arch_mon_event_config_write(void *_config_info)
1701 {
1702 struct resctrl_mon_config_info *config_info = _config_info;
1703 unsigned int index;
1704
1705 index = mon_event_config_index_get(config_info->evtid);
1706 if (index == INVALID_CONFIG_INDEX) {
1707 pr_warn_once("Invalid event id %d\n", config_info->evtid);
1708 return;
1709 }
1710 wrmsr(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config, 0);
1711 }
1712
mbm_config_write_domain(struct rdt_resource * r,struct rdt_mon_domain * d,u32 evtid,u32 val)1713 static void mbm_config_write_domain(struct rdt_resource *r,
1714 struct rdt_mon_domain *d, u32 evtid, u32 val)
1715 {
1716 struct resctrl_mon_config_info mon_info = {0};
1717
1718 /*
1719 * Read the current config value first. If both are the same then
1720 * no need to write it again.
1721 */
1722 mon_info.r = r;
1723 mon_info.d = d;
1724 mon_info.evtid = evtid;
1725 mondata_config_read(&mon_info);
1726 if (mon_info.mon_config == val)
1727 return;
1728
1729 mon_info.mon_config = val;
1730
1731 /*
1732 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
1733 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
1734 * are scoped at the domain level. Writing any of these MSRs
1735 * on one CPU is observed by all the CPUs in the domain.
1736 */
1737 smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write,
1738 &mon_info, 1);
1739
1740 /*
1741 * When an Event Configuration is changed, the bandwidth counters
1742 * for all RMIDs and Events will be cleared by the hardware. The
1743 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
1744 * every RMID on the next read to any event for every RMID.
1745 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
1746 * cleared while it is tracked by the hardware. Clear the
1747 * mbm_local and mbm_total counts for all the RMIDs.
1748 */
1749 resctrl_arch_reset_rmid_all(r, d);
1750 }
1751
mon_config_write(struct rdt_resource * r,char * tok,u32 evtid)1752 static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
1753 {
1754 char *dom_str = NULL, *id_str;
1755 unsigned long dom_id, val;
1756 struct rdt_mon_domain *d;
1757
1758 /* Walking r->domains, ensure it can't race with cpuhp */
1759 lockdep_assert_cpus_held();
1760
1761 next:
1762 if (!tok || tok[0] == '\0')
1763 return 0;
1764
1765 /* Start processing the strings for each domain */
1766 dom_str = strim(strsep(&tok, ";"));
1767 id_str = strsep(&dom_str, "=");
1768
1769 if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1770 rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
1771 return -EINVAL;
1772 }
1773
1774 if (!dom_str || kstrtoul(dom_str, 16, &val)) {
1775 rdt_last_cmd_puts("Non-numeric event configuration value\n");
1776 return -EINVAL;
1777 }
1778
1779 /* Value from user cannot be more than the supported set of events */
1780 if ((val & r->mbm_cfg_mask) != val) {
1781 rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
1782 r->mbm_cfg_mask);
1783 return -EINVAL;
1784 }
1785
1786 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1787 if (d->hdr.id == dom_id) {
1788 mbm_config_write_domain(r, d, evtid, val);
1789 goto next;
1790 }
1791 }
1792
1793 return -EINVAL;
1794 }
1795
mbm_total_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1796 static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
1797 char *buf, size_t nbytes,
1798 loff_t off)
1799 {
1800 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1801 int ret;
1802
1803 /* Valid input requires a trailing newline */
1804 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1805 return -EINVAL;
1806
1807 cpus_read_lock();
1808 mutex_lock(&rdtgroup_mutex);
1809
1810 rdt_last_cmd_clear();
1811
1812 buf[nbytes - 1] = '\0';
1813
1814 ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
1815
1816 mutex_unlock(&rdtgroup_mutex);
1817 cpus_read_unlock();
1818
1819 return ret ?: nbytes;
1820 }
1821
mbm_local_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1822 static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
1823 char *buf, size_t nbytes,
1824 loff_t off)
1825 {
1826 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1827 int ret;
1828
1829 /* Valid input requires a trailing newline */
1830 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1831 return -EINVAL;
1832
1833 cpus_read_lock();
1834 mutex_lock(&rdtgroup_mutex);
1835
1836 rdt_last_cmd_clear();
1837
1838 buf[nbytes - 1] = '\0';
1839
1840 ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
1841
1842 mutex_unlock(&rdtgroup_mutex);
1843 cpus_read_unlock();
1844
1845 return ret ?: nbytes;
1846 }
1847
1848 /* rdtgroup information files for one cache resource. */
1849 static struct rftype res_common_files[] = {
1850 {
1851 .name = "last_cmd_status",
1852 .mode = 0444,
1853 .kf_ops = &rdtgroup_kf_single_ops,
1854 .seq_show = rdt_last_cmd_status_show,
1855 .fflags = RFTYPE_TOP_INFO,
1856 },
1857 {
1858 .name = "num_closids",
1859 .mode = 0444,
1860 .kf_ops = &rdtgroup_kf_single_ops,
1861 .seq_show = rdt_num_closids_show,
1862 .fflags = RFTYPE_CTRL_INFO,
1863 },
1864 {
1865 .name = "mon_features",
1866 .mode = 0444,
1867 .kf_ops = &rdtgroup_kf_single_ops,
1868 .seq_show = rdt_mon_features_show,
1869 .fflags = RFTYPE_MON_INFO,
1870 },
1871 {
1872 .name = "num_rmids",
1873 .mode = 0444,
1874 .kf_ops = &rdtgroup_kf_single_ops,
1875 .seq_show = rdt_num_rmids_show,
1876 .fflags = RFTYPE_MON_INFO,
1877 },
1878 {
1879 .name = "cbm_mask",
1880 .mode = 0444,
1881 .kf_ops = &rdtgroup_kf_single_ops,
1882 .seq_show = rdt_default_ctrl_show,
1883 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1884 },
1885 {
1886 .name = "min_cbm_bits",
1887 .mode = 0444,
1888 .kf_ops = &rdtgroup_kf_single_ops,
1889 .seq_show = rdt_min_cbm_bits_show,
1890 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1891 },
1892 {
1893 .name = "shareable_bits",
1894 .mode = 0444,
1895 .kf_ops = &rdtgroup_kf_single_ops,
1896 .seq_show = rdt_shareable_bits_show,
1897 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1898 },
1899 {
1900 .name = "bit_usage",
1901 .mode = 0444,
1902 .kf_ops = &rdtgroup_kf_single_ops,
1903 .seq_show = rdt_bit_usage_show,
1904 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1905 },
1906 {
1907 .name = "min_bandwidth",
1908 .mode = 0444,
1909 .kf_ops = &rdtgroup_kf_single_ops,
1910 .seq_show = rdt_min_bw_show,
1911 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1912 },
1913 {
1914 .name = "bandwidth_gran",
1915 .mode = 0444,
1916 .kf_ops = &rdtgroup_kf_single_ops,
1917 .seq_show = rdt_bw_gran_show,
1918 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1919 },
1920 {
1921 .name = "delay_linear",
1922 .mode = 0444,
1923 .kf_ops = &rdtgroup_kf_single_ops,
1924 .seq_show = rdt_delay_linear_show,
1925 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1926 },
1927 /*
1928 * Platform specific which (if any) capabilities are provided by
1929 * thread_throttle_mode. Defer "fflags" initialization to platform
1930 * discovery.
1931 */
1932 {
1933 .name = "thread_throttle_mode",
1934 .mode = 0444,
1935 .kf_ops = &rdtgroup_kf_single_ops,
1936 .seq_show = rdt_thread_throttle_mode_show,
1937 },
1938 {
1939 .name = "max_threshold_occupancy",
1940 .mode = 0644,
1941 .kf_ops = &rdtgroup_kf_single_ops,
1942 .write = max_threshold_occ_write,
1943 .seq_show = max_threshold_occ_show,
1944 .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
1945 },
1946 {
1947 .name = "mbm_total_bytes_config",
1948 .mode = 0644,
1949 .kf_ops = &rdtgroup_kf_single_ops,
1950 .seq_show = mbm_total_bytes_config_show,
1951 .write = mbm_total_bytes_config_write,
1952 },
1953 {
1954 .name = "mbm_local_bytes_config",
1955 .mode = 0644,
1956 .kf_ops = &rdtgroup_kf_single_ops,
1957 .seq_show = mbm_local_bytes_config_show,
1958 .write = mbm_local_bytes_config_write,
1959 },
1960 {
1961 .name = "cpus",
1962 .mode = 0644,
1963 .kf_ops = &rdtgroup_kf_single_ops,
1964 .write = rdtgroup_cpus_write,
1965 .seq_show = rdtgroup_cpus_show,
1966 .fflags = RFTYPE_BASE,
1967 },
1968 {
1969 .name = "cpus_list",
1970 .mode = 0644,
1971 .kf_ops = &rdtgroup_kf_single_ops,
1972 .write = rdtgroup_cpus_write,
1973 .seq_show = rdtgroup_cpus_show,
1974 .flags = RFTYPE_FLAGS_CPUS_LIST,
1975 .fflags = RFTYPE_BASE,
1976 },
1977 {
1978 .name = "tasks",
1979 .mode = 0644,
1980 .kf_ops = &rdtgroup_kf_single_ops,
1981 .write = rdtgroup_tasks_write,
1982 .seq_show = rdtgroup_tasks_show,
1983 .fflags = RFTYPE_BASE,
1984 },
1985 {
1986 .name = "mon_hw_id",
1987 .mode = 0444,
1988 .kf_ops = &rdtgroup_kf_single_ops,
1989 .seq_show = rdtgroup_rmid_show,
1990 .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG,
1991 },
1992 {
1993 .name = "schemata",
1994 .mode = 0644,
1995 .kf_ops = &rdtgroup_kf_single_ops,
1996 .write = rdtgroup_schemata_write,
1997 .seq_show = rdtgroup_schemata_show,
1998 .fflags = RFTYPE_CTRL_BASE,
1999 },
2000 {
2001 .name = "mba_MBps_event",
2002 .mode = 0644,
2003 .kf_ops = &rdtgroup_kf_single_ops,
2004 .write = rdtgroup_mba_mbps_event_write,
2005 .seq_show = rdtgroup_mba_mbps_event_show,
2006 },
2007 {
2008 .name = "mode",
2009 .mode = 0644,
2010 .kf_ops = &rdtgroup_kf_single_ops,
2011 .write = rdtgroup_mode_write,
2012 .seq_show = rdtgroup_mode_show,
2013 .fflags = RFTYPE_CTRL_BASE,
2014 },
2015 {
2016 .name = "size",
2017 .mode = 0444,
2018 .kf_ops = &rdtgroup_kf_single_ops,
2019 .seq_show = rdtgroup_size_show,
2020 .fflags = RFTYPE_CTRL_BASE,
2021 },
2022 {
2023 .name = "sparse_masks",
2024 .mode = 0444,
2025 .kf_ops = &rdtgroup_kf_single_ops,
2026 .seq_show = rdt_has_sparse_bitmasks_show,
2027 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
2028 },
2029 {
2030 .name = "ctrl_hw_id",
2031 .mode = 0444,
2032 .kf_ops = &rdtgroup_kf_single_ops,
2033 .seq_show = rdtgroup_closid_show,
2034 .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
2035 },
2036
2037 };
2038
rdtgroup_add_files(struct kernfs_node * kn,unsigned long fflags)2039 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
2040 {
2041 struct rftype *rfts, *rft;
2042 int ret, len;
2043
2044 rfts = res_common_files;
2045 len = ARRAY_SIZE(res_common_files);
2046
2047 lockdep_assert_held(&rdtgroup_mutex);
2048
2049 if (resctrl_debug)
2050 fflags |= RFTYPE_DEBUG;
2051
2052 for (rft = rfts; rft < rfts + len; rft++) {
2053 if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
2054 ret = rdtgroup_add_file(kn, rft);
2055 if (ret)
2056 goto error;
2057 }
2058 }
2059
2060 return 0;
2061 error:
2062 pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
2063 while (--rft >= rfts) {
2064 if ((fflags & rft->fflags) == rft->fflags)
2065 kernfs_remove_by_name(kn, rft->name);
2066 }
2067 return ret;
2068 }
2069
rdtgroup_get_rftype_by_name(const char * name)2070 static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
2071 {
2072 struct rftype *rfts, *rft;
2073 int len;
2074
2075 rfts = res_common_files;
2076 len = ARRAY_SIZE(res_common_files);
2077
2078 for (rft = rfts; rft < rfts + len; rft++) {
2079 if (!strcmp(rft->name, name))
2080 return rft;
2081 }
2082
2083 return NULL;
2084 }
2085
thread_throttle_mode_init(void)2086 static void thread_throttle_mode_init(void)
2087 {
2088 enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED;
2089 struct rdt_resource *r_mba, *r_smba;
2090
2091 r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
2092 if (r_mba->alloc_capable &&
2093 r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
2094 throttle_mode = r_mba->membw.throttle_mode;
2095
2096 r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA);
2097 if (r_smba->alloc_capable &&
2098 r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
2099 throttle_mode = r_smba->membw.throttle_mode;
2100
2101 if (throttle_mode == THREAD_THROTTLE_UNDEFINED)
2102 return;
2103
2104 resctrl_file_fflags_init("thread_throttle_mode",
2105 RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
2106 }
2107
resctrl_file_fflags_init(const char * config,unsigned long fflags)2108 void resctrl_file_fflags_init(const char *config, unsigned long fflags)
2109 {
2110 struct rftype *rft;
2111
2112 rft = rdtgroup_get_rftype_by_name(config);
2113 if (rft)
2114 rft->fflags = fflags;
2115 }
2116
2117 /**
2118 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
2119 * @r: The resource group with which the file is associated.
2120 * @name: Name of the file
2121 *
2122 * The permissions of named resctrl file, directory, or link are modified
2123 * to not allow read, write, or execute by any user.
2124 *
2125 * WARNING: This function is intended to communicate to the user that the
2126 * resctrl file has been locked down - that it is not relevant to the
2127 * particular state the system finds itself in. It should not be relied
2128 * on to protect from user access because after the file's permissions
2129 * are restricted the user can still change the permissions using chmod
2130 * from the command line.
2131 *
2132 * Return: 0 on success, <0 on failure.
2133 */
rdtgroup_kn_mode_restrict(struct rdtgroup * r,const char * name)2134 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
2135 {
2136 struct iattr iattr = {.ia_valid = ATTR_MODE,};
2137 struct kernfs_node *kn;
2138 int ret = 0;
2139
2140 kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2141 if (!kn)
2142 return -ENOENT;
2143
2144 switch (kernfs_type(kn)) {
2145 case KERNFS_DIR:
2146 iattr.ia_mode = S_IFDIR;
2147 break;
2148 case KERNFS_FILE:
2149 iattr.ia_mode = S_IFREG;
2150 break;
2151 case KERNFS_LINK:
2152 iattr.ia_mode = S_IFLNK;
2153 break;
2154 }
2155
2156 ret = kernfs_setattr(kn, &iattr);
2157 kernfs_put(kn);
2158 return ret;
2159 }
2160
2161 /**
2162 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
2163 * @r: The resource group with which the file is associated.
2164 * @name: Name of the file
2165 * @mask: Mask of permissions that should be restored
2166 *
2167 * Restore the permissions of the named file. If @name is a directory the
2168 * permissions of its parent will be used.
2169 *
2170 * Return: 0 on success, <0 on failure.
2171 */
rdtgroup_kn_mode_restore(struct rdtgroup * r,const char * name,umode_t mask)2172 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
2173 umode_t mask)
2174 {
2175 struct iattr iattr = {.ia_valid = ATTR_MODE,};
2176 struct kernfs_node *kn, *parent;
2177 struct rftype *rfts, *rft;
2178 int ret, len;
2179
2180 rfts = res_common_files;
2181 len = ARRAY_SIZE(res_common_files);
2182
2183 for (rft = rfts; rft < rfts + len; rft++) {
2184 if (!strcmp(rft->name, name))
2185 iattr.ia_mode = rft->mode & mask;
2186 }
2187
2188 kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2189 if (!kn)
2190 return -ENOENT;
2191
2192 switch (kernfs_type(kn)) {
2193 case KERNFS_DIR:
2194 parent = kernfs_get_parent(kn);
2195 if (parent) {
2196 iattr.ia_mode |= parent->mode;
2197 kernfs_put(parent);
2198 }
2199 iattr.ia_mode |= S_IFDIR;
2200 break;
2201 case KERNFS_FILE:
2202 iattr.ia_mode |= S_IFREG;
2203 break;
2204 case KERNFS_LINK:
2205 iattr.ia_mode |= S_IFLNK;
2206 break;
2207 }
2208
2209 ret = kernfs_setattr(kn, &iattr);
2210 kernfs_put(kn);
2211 return ret;
2212 }
2213
rdtgroup_mkdir_info_resdir(void * priv,char * name,unsigned long fflags)2214 static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
2215 unsigned long fflags)
2216 {
2217 struct kernfs_node *kn_subdir;
2218 int ret;
2219
2220 kn_subdir = kernfs_create_dir(kn_info, name,
2221 kn_info->mode, priv);
2222 if (IS_ERR(kn_subdir))
2223 return PTR_ERR(kn_subdir);
2224
2225 ret = rdtgroup_kn_set_ugid(kn_subdir);
2226 if (ret)
2227 return ret;
2228
2229 ret = rdtgroup_add_files(kn_subdir, fflags);
2230 if (!ret)
2231 kernfs_activate(kn_subdir);
2232
2233 return ret;
2234 }
2235
fflags_from_resource(struct rdt_resource * r)2236 static unsigned long fflags_from_resource(struct rdt_resource *r)
2237 {
2238 switch (r->rid) {
2239 case RDT_RESOURCE_L3:
2240 case RDT_RESOURCE_L2:
2241 return RFTYPE_RES_CACHE;
2242 case RDT_RESOURCE_MBA:
2243 case RDT_RESOURCE_SMBA:
2244 return RFTYPE_RES_MB;
2245 }
2246
2247 return WARN_ON_ONCE(1);
2248 }
2249
rdtgroup_create_info_dir(struct kernfs_node * parent_kn)2250 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
2251 {
2252 struct resctrl_schema *s;
2253 struct rdt_resource *r;
2254 unsigned long fflags;
2255 char name[32];
2256 int ret;
2257
2258 /* create the directory */
2259 kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
2260 if (IS_ERR(kn_info))
2261 return PTR_ERR(kn_info);
2262
2263 ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
2264 if (ret)
2265 goto out_destroy;
2266
2267 /* loop over enabled controls, these are all alloc_capable */
2268 list_for_each_entry(s, &resctrl_schema_all, list) {
2269 r = s->res;
2270 fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO;
2271 ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
2272 if (ret)
2273 goto out_destroy;
2274 }
2275
2276 for_each_mon_capable_rdt_resource(r) {
2277 fflags = fflags_from_resource(r) | RFTYPE_MON_INFO;
2278 sprintf(name, "%s_MON", r->name);
2279 ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
2280 if (ret)
2281 goto out_destroy;
2282 }
2283
2284 ret = rdtgroup_kn_set_ugid(kn_info);
2285 if (ret)
2286 goto out_destroy;
2287
2288 kernfs_activate(kn_info);
2289
2290 return 0;
2291
2292 out_destroy:
2293 kernfs_remove(kn_info);
2294 return ret;
2295 }
2296
2297 static int
mongroup_create_dir(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,char * name,struct kernfs_node ** dest_kn)2298 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
2299 char *name, struct kernfs_node **dest_kn)
2300 {
2301 struct kernfs_node *kn;
2302 int ret;
2303
2304 /* create the directory */
2305 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2306 if (IS_ERR(kn))
2307 return PTR_ERR(kn);
2308
2309 if (dest_kn)
2310 *dest_kn = kn;
2311
2312 ret = rdtgroup_kn_set_ugid(kn);
2313 if (ret)
2314 goto out_destroy;
2315
2316 kernfs_activate(kn);
2317
2318 return 0;
2319
2320 out_destroy:
2321 kernfs_remove(kn);
2322 return ret;
2323 }
2324
l3_qos_cfg_update(void * arg)2325 static void l3_qos_cfg_update(void *arg)
2326 {
2327 bool *enable = arg;
2328
2329 wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
2330 }
2331
l2_qos_cfg_update(void * arg)2332 static void l2_qos_cfg_update(void *arg)
2333 {
2334 bool *enable = arg;
2335
2336 wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
2337 }
2338
is_mba_linear(void)2339 static inline bool is_mba_linear(void)
2340 {
2341 return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear;
2342 }
2343
set_cache_qos_cfg(int level,bool enable)2344 static int set_cache_qos_cfg(int level, bool enable)
2345 {
2346 void (*update)(void *arg);
2347 struct rdt_ctrl_domain *d;
2348 struct rdt_resource *r_l;
2349 cpumask_var_t cpu_mask;
2350 int cpu;
2351
2352 /* Walking r->domains, ensure it can't race with cpuhp */
2353 lockdep_assert_cpus_held();
2354
2355 if (level == RDT_RESOURCE_L3)
2356 update = l3_qos_cfg_update;
2357 else if (level == RDT_RESOURCE_L2)
2358 update = l2_qos_cfg_update;
2359 else
2360 return -EINVAL;
2361
2362 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2363 return -ENOMEM;
2364
2365 r_l = &rdt_resources_all[level].r_resctrl;
2366 list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
2367 if (r_l->cache.arch_has_per_cpu_cfg)
2368 /* Pick all the CPUs in the domain instance */
2369 for_each_cpu(cpu, &d->hdr.cpu_mask)
2370 cpumask_set_cpu(cpu, cpu_mask);
2371 else
2372 /* Pick one CPU from each domain instance to update MSR */
2373 cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
2374 }
2375
2376 /* Update QOS_CFG MSR on all the CPUs in cpu_mask */
2377 on_each_cpu_mask(cpu_mask, update, &enable, 1);
2378
2379 free_cpumask_var(cpu_mask);
2380
2381 return 0;
2382 }
2383
2384 /* Restore the qos cfg state when a domain comes online */
rdt_domain_reconfigure_cdp(struct rdt_resource * r)2385 void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
2386 {
2387 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2388
2389 if (!r->cdp_capable)
2390 return;
2391
2392 if (r->rid == RDT_RESOURCE_L2)
2393 l2_qos_cfg_update(&hw_res->cdp_enabled);
2394
2395 if (r->rid == RDT_RESOURCE_L3)
2396 l3_qos_cfg_update(&hw_res->cdp_enabled);
2397 }
2398
mba_sc_domain_allocate(struct rdt_resource * r,struct rdt_ctrl_domain * d)2399 static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
2400 {
2401 u32 num_closid = resctrl_arch_get_num_closid(r);
2402 int cpu = cpumask_any(&d->hdr.cpu_mask);
2403 int i;
2404
2405 d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
2406 GFP_KERNEL, cpu_to_node(cpu));
2407 if (!d->mbps_val)
2408 return -ENOMEM;
2409
2410 for (i = 0; i < num_closid; i++)
2411 d->mbps_val[i] = MBA_MAX_MBPS;
2412
2413 return 0;
2414 }
2415
mba_sc_domain_destroy(struct rdt_resource * r,struct rdt_ctrl_domain * d)2416 static void mba_sc_domain_destroy(struct rdt_resource *r,
2417 struct rdt_ctrl_domain *d)
2418 {
2419 kfree(d->mbps_val);
2420 d->mbps_val = NULL;
2421 }
2422
2423 /*
2424 * MBA software controller is supported only if
2425 * MBM is supported and MBA is in linear scale,
2426 * and the MBM monitor scope is the same as MBA
2427 * control scope.
2428 */
supports_mba_mbps(void)2429 static bool supports_mba_mbps(void)
2430 {
2431 struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3);
2432 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
2433
2434 return (resctrl_is_mbm_enabled() &&
2435 r->alloc_capable && is_mba_linear() &&
2436 r->ctrl_scope == rmbm->mon_scope);
2437 }
2438
2439 /*
2440 * Enable or disable the MBA software controller
2441 * which helps user specify bandwidth in MBps.
2442 */
set_mba_sc(bool mba_sc)2443 static int set_mba_sc(bool mba_sc)
2444 {
2445 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
2446 u32 num_closid = resctrl_arch_get_num_closid(r);
2447 struct rdt_ctrl_domain *d;
2448 unsigned long fflags;
2449 int i;
2450
2451 if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
2452 return -EINVAL;
2453
2454 r->membw.mba_sc = mba_sc;
2455
2456 rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
2457
2458 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2459 for (i = 0; i < num_closid; i++)
2460 d->mbps_val[i] = MBA_MAX_MBPS;
2461 }
2462
2463 fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
2464 resctrl_file_fflags_init("mba_MBps_event", fflags);
2465
2466 return 0;
2467 }
2468
cdp_enable(int level)2469 static int cdp_enable(int level)
2470 {
2471 struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
2472 int ret;
2473
2474 if (!r_l->alloc_capable)
2475 return -EINVAL;
2476
2477 ret = set_cache_qos_cfg(level, true);
2478 if (!ret)
2479 rdt_resources_all[level].cdp_enabled = true;
2480
2481 return ret;
2482 }
2483
cdp_disable(int level)2484 static void cdp_disable(int level)
2485 {
2486 struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
2487
2488 if (r_hw->cdp_enabled) {
2489 set_cache_qos_cfg(level, false);
2490 r_hw->cdp_enabled = false;
2491 }
2492 }
2493
resctrl_arch_set_cdp_enabled(enum resctrl_res_level l,bool enable)2494 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
2495 {
2496 struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
2497
2498 if (!hw_res->r_resctrl.cdp_capable)
2499 return -EINVAL;
2500
2501 if (enable)
2502 return cdp_enable(l);
2503
2504 cdp_disable(l);
2505
2506 return 0;
2507 }
2508
2509 /*
2510 * We don't allow rdtgroup directories to be created anywhere
2511 * except the root directory. Thus when looking for the rdtgroup
2512 * structure for a kernfs node we are either looking at a directory,
2513 * in which case the rdtgroup structure is pointed at by the "priv"
2514 * field, otherwise we have a file, and need only look to the parent
2515 * to find the rdtgroup.
2516 */
kernfs_to_rdtgroup(struct kernfs_node * kn)2517 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
2518 {
2519 if (kernfs_type(kn) == KERNFS_DIR) {
2520 /*
2521 * All the resource directories use "kn->priv"
2522 * to point to the "struct rdtgroup" for the
2523 * resource. "info" and its subdirectories don't
2524 * have rdtgroup structures, so return NULL here.
2525 */
2526 if (kn == kn_info ||
2527 rcu_access_pointer(kn->__parent) == kn_info)
2528 return NULL;
2529 else
2530 return kn->priv;
2531 } else {
2532 return rdt_kn_parent_priv(kn);
2533 }
2534 }
2535
rdtgroup_kn_get(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2536 static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2537 {
2538 atomic_inc(&rdtgrp->waitcount);
2539 kernfs_break_active_protection(kn);
2540 }
2541
rdtgroup_kn_put(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2542 static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2543 {
2544 if (atomic_dec_and_test(&rdtgrp->waitcount) &&
2545 (rdtgrp->flags & RDT_DELETED)) {
2546 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2547 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2548 rdtgroup_pseudo_lock_remove(rdtgrp);
2549 kernfs_unbreak_active_protection(kn);
2550 rdtgroup_remove(rdtgrp);
2551 } else {
2552 kernfs_unbreak_active_protection(kn);
2553 }
2554 }
2555
rdtgroup_kn_lock_live(struct kernfs_node * kn)2556 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
2557 {
2558 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2559
2560 if (!rdtgrp)
2561 return NULL;
2562
2563 rdtgroup_kn_get(rdtgrp, kn);
2564
2565 cpus_read_lock();
2566 mutex_lock(&rdtgroup_mutex);
2567
2568 /* Was this group deleted while we waited? */
2569 if (rdtgrp->flags & RDT_DELETED)
2570 return NULL;
2571
2572 return rdtgrp;
2573 }
2574
rdtgroup_kn_unlock(struct kernfs_node * kn)2575 void rdtgroup_kn_unlock(struct kernfs_node *kn)
2576 {
2577 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2578
2579 if (!rdtgrp)
2580 return;
2581
2582 mutex_unlock(&rdtgroup_mutex);
2583 cpus_read_unlock();
2584
2585 rdtgroup_kn_put(rdtgrp, kn);
2586 }
2587
2588 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2589 struct rdtgroup *prgrp,
2590 struct kernfs_node **mon_data_kn);
2591
rdt_disable_ctx(void)2592 static void rdt_disable_ctx(void)
2593 {
2594 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2595 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2596 set_mba_sc(false);
2597
2598 resctrl_debug = false;
2599 }
2600
rdt_enable_ctx(struct rdt_fs_context * ctx)2601 static int rdt_enable_ctx(struct rdt_fs_context *ctx)
2602 {
2603 int ret = 0;
2604
2605 if (ctx->enable_cdpl2) {
2606 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
2607 if (ret)
2608 goto out_done;
2609 }
2610
2611 if (ctx->enable_cdpl3) {
2612 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
2613 if (ret)
2614 goto out_cdpl2;
2615 }
2616
2617 if (ctx->enable_mba_mbps) {
2618 ret = set_mba_sc(true);
2619 if (ret)
2620 goto out_cdpl3;
2621 }
2622
2623 if (ctx->enable_debug)
2624 resctrl_debug = true;
2625
2626 return 0;
2627
2628 out_cdpl3:
2629 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2630 out_cdpl2:
2631 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2632 out_done:
2633 return ret;
2634 }
2635
schemata_list_add(struct rdt_resource * r,enum resctrl_conf_type type)2636 static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
2637 {
2638 struct resctrl_schema *s;
2639 const char *suffix = "";
2640 int ret, cl;
2641
2642 s = kzalloc(sizeof(*s), GFP_KERNEL);
2643 if (!s)
2644 return -ENOMEM;
2645
2646 s->res = r;
2647 s->num_closid = resctrl_arch_get_num_closid(r);
2648 if (resctrl_arch_get_cdp_enabled(r->rid))
2649 s->num_closid /= 2;
2650
2651 s->conf_type = type;
2652 switch (type) {
2653 case CDP_CODE:
2654 suffix = "CODE";
2655 break;
2656 case CDP_DATA:
2657 suffix = "DATA";
2658 break;
2659 case CDP_NONE:
2660 suffix = "";
2661 break;
2662 }
2663
2664 ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
2665 if (ret >= sizeof(s->name)) {
2666 kfree(s);
2667 return -EINVAL;
2668 }
2669
2670 cl = strlen(s->name);
2671
2672 /*
2673 * If CDP is supported by this resource, but not enabled,
2674 * include the suffix. This ensures the tabular format of the
2675 * schemata file does not change between mounts of the filesystem.
2676 */
2677 if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
2678 cl += 4;
2679
2680 if (cl > max_name_width)
2681 max_name_width = cl;
2682
2683 switch (r->schema_fmt) {
2684 case RESCTRL_SCHEMA_BITMAP:
2685 s->fmt_str = "%d=%x";
2686 break;
2687 case RESCTRL_SCHEMA_RANGE:
2688 s->fmt_str = "%d=%u";
2689 break;
2690 }
2691
2692 if (WARN_ON_ONCE(!s->fmt_str)) {
2693 kfree(s);
2694 return -EINVAL;
2695 }
2696
2697 INIT_LIST_HEAD(&s->list);
2698 list_add(&s->list, &resctrl_schema_all);
2699
2700 return 0;
2701 }
2702
schemata_list_create(void)2703 static int schemata_list_create(void)
2704 {
2705 struct rdt_resource *r;
2706 int ret = 0;
2707
2708 for_each_alloc_capable_rdt_resource(r) {
2709 if (resctrl_arch_get_cdp_enabled(r->rid)) {
2710 ret = schemata_list_add(r, CDP_CODE);
2711 if (ret)
2712 break;
2713
2714 ret = schemata_list_add(r, CDP_DATA);
2715 } else {
2716 ret = schemata_list_add(r, CDP_NONE);
2717 }
2718
2719 if (ret)
2720 break;
2721 }
2722
2723 return ret;
2724 }
2725
schemata_list_destroy(void)2726 static void schemata_list_destroy(void)
2727 {
2728 struct resctrl_schema *s, *tmp;
2729
2730 list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
2731 list_del(&s->list);
2732 kfree(s);
2733 }
2734 }
2735
rdt_get_tree(struct fs_context * fc)2736 static int rdt_get_tree(struct fs_context *fc)
2737 {
2738 struct rdt_fs_context *ctx = rdt_fc2context(fc);
2739 unsigned long flags = RFTYPE_CTRL_BASE;
2740 struct rdt_mon_domain *dom;
2741 struct rdt_resource *r;
2742 int ret;
2743
2744 cpus_read_lock();
2745 mutex_lock(&rdtgroup_mutex);
2746 /*
2747 * resctrl file system can only be mounted once.
2748 */
2749 if (resctrl_mounted) {
2750 ret = -EBUSY;
2751 goto out;
2752 }
2753
2754 ret = rdtgroup_setup_root(ctx);
2755 if (ret)
2756 goto out;
2757
2758 ret = rdt_enable_ctx(ctx);
2759 if (ret)
2760 goto out_root;
2761
2762 ret = schemata_list_create();
2763 if (ret) {
2764 schemata_list_destroy();
2765 goto out_ctx;
2766 }
2767
2768 closid_init();
2769
2770 if (resctrl_arch_mon_capable())
2771 flags |= RFTYPE_MON;
2772
2773 ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
2774 if (ret)
2775 goto out_schemata_free;
2776
2777 kernfs_activate(rdtgroup_default.kn);
2778
2779 ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
2780 if (ret < 0)
2781 goto out_schemata_free;
2782
2783 if (resctrl_arch_mon_capable()) {
2784 ret = mongroup_create_dir(rdtgroup_default.kn,
2785 &rdtgroup_default, "mon_groups",
2786 &kn_mongrp);
2787 if (ret < 0)
2788 goto out_info;
2789
2790 ret = mkdir_mondata_all(rdtgroup_default.kn,
2791 &rdtgroup_default, &kn_mondata);
2792 if (ret < 0)
2793 goto out_mongrp;
2794 rdtgroup_default.mon.mon_data_kn = kn_mondata;
2795 }
2796
2797 ret = rdt_pseudo_lock_init();
2798 if (ret)
2799 goto out_mondata;
2800
2801 ret = kernfs_get_tree(fc);
2802 if (ret < 0)
2803 goto out_psl;
2804
2805 if (resctrl_arch_alloc_capable())
2806 resctrl_arch_enable_alloc();
2807 if (resctrl_arch_mon_capable())
2808 resctrl_arch_enable_mon();
2809
2810 if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
2811 resctrl_mounted = true;
2812
2813 if (resctrl_is_mbm_enabled()) {
2814 r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
2815 list_for_each_entry(dom, &r->mon_domains, hdr.list)
2816 mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
2817 RESCTRL_PICK_ANY_CPU);
2818 }
2819
2820 goto out;
2821
2822 out_psl:
2823 rdt_pseudo_lock_release();
2824 out_mondata:
2825 if (resctrl_arch_mon_capable())
2826 kernfs_remove(kn_mondata);
2827 out_mongrp:
2828 if (resctrl_arch_mon_capable())
2829 kernfs_remove(kn_mongrp);
2830 out_info:
2831 kernfs_remove(kn_info);
2832 out_schemata_free:
2833 schemata_list_destroy();
2834 out_ctx:
2835 rdt_disable_ctx();
2836 out_root:
2837 rdtgroup_destroy_root();
2838 out:
2839 rdt_last_cmd_clear();
2840 mutex_unlock(&rdtgroup_mutex);
2841 cpus_read_unlock();
2842 return ret;
2843 }
2844
2845 enum rdt_param {
2846 Opt_cdp,
2847 Opt_cdpl2,
2848 Opt_mba_mbps,
2849 Opt_debug,
2850 nr__rdt_params
2851 };
2852
2853 static const struct fs_parameter_spec rdt_fs_parameters[] = {
2854 fsparam_flag("cdp", Opt_cdp),
2855 fsparam_flag("cdpl2", Opt_cdpl2),
2856 fsparam_flag("mba_MBps", Opt_mba_mbps),
2857 fsparam_flag("debug", Opt_debug),
2858 {}
2859 };
2860
rdt_parse_param(struct fs_context * fc,struct fs_parameter * param)2861 static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2862 {
2863 struct rdt_fs_context *ctx = rdt_fc2context(fc);
2864 struct fs_parse_result result;
2865 const char *msg;
2866 int opt;
2867
2868 opt = fs_parse(fc, rdt_fs_parameters, param, &result);
2869 if (opt < 0)
2870 return opt;
2871
2872 switch (opt) {
2873 case Opt_cdp:
2874 ctx->enable_cdpl3 = true;
2875 return 0;
2876 case Opt_cdpl2:
2877 ctx->enable_cdpl2 = true;
2878 return 0;
2879 case Opt_mba_mbps:
2880 msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
2881 if (!supports_mba_mbps())
2882 return invalfc(fc, msg);
2883 ctx->enable_mba_mbps = true;
2884 return 0;
2885 case Opt_debug:
2886 ctx->enable_debug = true;
2887 return 0;
2888 }
2889
2890 return -EINVAL;
2891 }
2892
rdt_fs_context_free(struct fs_context * fc)2893 static void rdt_fs_context_free(struct fs_context *fc)
2894 {
2895 struct rdt_fs_context *ctx = rdt_fc2context(fc);
2896
2897 kernfs_free_fs_context(fc);
2898 kfree(ctx);
2899 }
2900
2901 static const struct fs_context_operations rdt_fs_context_ops = {
2902 .free = rdt_fs_context_free,
2903 .parse_param = rdt_parse_param,
2904 .get_tree = rdt_get_tree,
2905 };
2906
rdt_init_fs_context(struct fs_context * fc)2907 static int rdt_init_fs_context(struct fs_context *fc)
2908 {
2909 struct rdt_fs_context *ctx;
2910
2911 ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2912 if (!ctx)
2913 return -ENOMEM;
2914
2915 ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2916 fc->fs_private = &ctx->kfc;
2917 fc->ops = &rdt_fs_context_ops;
2918 put_user_ns(fc->user_ns);
2919 fc->user_ns = get_user_ns(&init_user_ns);
2920 fc->global = true;
2921 return 0;
2922 }
2923
resctrl_arch_reset_all_ctrls(struct rdt_resource * r)2924 void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
2925 {
2926 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2927 struct rdt_hw_ctrl_domain *hw_dom;
2928 struct msr_param msr_param;
2929 struct rdt_ctrl_domain *d;
2930 int i;
2931
2932 /* Walking r->domains, ensure it can't race with cpuhp */
2933 lockdep_assert_cpus_held();
2934
2935 msr_param.res = r;
2936 msr_param.low = 0;
2937 msr_param.high = hw_res->num_closid;
2938
2939 /*
2940 * Disable resource control for this resource by setting all
2941 * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
2942 * from each domain to update the MSRs below.
2943 */
2944 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2945 hw_dom = resctrl_to_arch_ctrl_dom(d);
2946
2947 for (i = 0; i < hw_res->num_closid; i++)
2948 hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r);
2949 msr_param.dom = d;
2950 smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
2951 }
2952
2953 return;
2954 }
2955
2956 /*
2957 * Move tasks from one to the other group. If @from is NULL, then all tasks
2958 * in the systems are moved unconditionally (used for teardown).
2959 *
2960 * If @mask is not NULL the cpus on which moved tasks are running are set
2961 * in that mask so the update smp function call is restricted to affected
2962 * cpus.
2963 */
rdt_move_group_tasks(struct rdtgroup * from,struct rdtgroup * to,struct cpumask * mask)2964 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2965 struct cpumask *mask)
2966 {
2967 struct task_struct *p, *t;
2968
2969 read_lock(&tasklist_lock);
2970 for_each_process_thread(p, t) {
2971 if (!from || is_closid_match(t, from) ||
2972 is_rmid_match(t, from)) {
2973 resctrl_arch_set_closid_rmid(t, to->closid,
2974 to->mon.rmid);
2975
2976 /*
2977 * Order the closid/rmid stores above before the loads
2978 * in task_curr(). This pairs with the full barrier
2979 * between the rq->curr update and resctrl_sched_in()
2980 * during context switch.
2981 */
2982 smp_mb();
2983
2984 /*
2985 * If the task is on a CPU, set the CPU in the mask.
2986 * The detection is inaccurate as tasks might move or
2987 * schedule before the smp function call takes place.
2988 * In such a case the function call is pointless, but
2989 * there is no other side effect.
2990 */
2991 if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
2992 cpumask_set_cpu(task_cpu(t), mask);
2993 }
2994 }
2995 read_unlock(&tasklist_lock);
2996 }
2997
free_all_child_rdtgrp(struct rdtgroup * rdtgrp)2998 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2999 {
3000 struct rdtgroup *sentry, *stmp;
3001 struct list_head *head;
3002
3003 head = &rdtgrp->mon.crdtgrp_list;
3004 list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
3005 free_rmid(sentry->closid, sentry->mon.rmid);
3006 list_del(&sentry->mon.crdtgrp_list);
3007
3008 if (atomic_read(&sentry->waitcount) != 0)
3009 sentry->flags = RDT_DELETED;
3010 else
3011 rdtgroup_remove(sentry);
3012 }
3013 }
3014
3015 /*
3016 * Forcibly remove all of subdirectories under root.
3017 */
rmdir_all_sub(void)3018 static void rmdir_all_sub(void)
3019 {
3020 struct rdtgroup *rdtgrp, *tmp;
3021
3022 /* Move all tasks to the default resource group */
3023 rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
3024
3025 list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
3026 /* Free any child rmids */
3027 free_all_child_rdtgrp(rdtgrp);
3028
3029 /* Remove each rdtgroup other than root */
3030 if (rdtgrp == &rdtgroup_default)
3031 continue;
3032
3033 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3034 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
3035 rdtgroup_pseudo_lock_remove(rdtgrp);
3036
3037 /*
3038 * Give any CPUs back to the default group. We cannot copy
3039 * cpu_online_mask because a CPU might have executed the
3040 * offline callback already, but is still marked online.
3041 */
3042 cpumask_or(&rdtgroup_default.cpu_mask,
3043 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3044
3045 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3046
3047 kernfs_remove(rdtgrp->kn);
3048 list_del(&rdtgrp->rdtgroup_list);
3049
3050 if (atomic_read(&rdtgrp->waitcount) != 0)
3051 rdtgrp->flags = RDT_DELETED;
3052 else
3053 rdtgroup_remove(rdtgrp);
3054 }
3055 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
3056 update_closid_rmid(cpu_online_mask, &rdtgroup_default);
3057
3058 kernfs_remove(kn_info);
3059 kernfs_remove(kn_mongrp);
3060 kernfs_remove(kn_mondata);
3061 }
3062
rdt_kill_sb(struct super_block * sb)3063 static void rdt_kill_sb(struct super_block *sb)
3064 {
3065 struct rdt_resource *r;
3066
3067 cpus_read_lock();
3068 mutex_lock(&rdtgroup_mutex);
3069
3070 rdt_disable_ctx();
3071
3072 /* Put everything back to default values. */
3073 for_each_alloc_capable_rdt_resource(r)
3074 resctrl_arch_reset_all_ctrls(r);
3075
3076 rmdir_all_sub();
3077 rdt_pseudo_lock_release();
3078 rdtgroup_default.mode = RDT_MODE_SHAREABLE;
3079 schemata_list_destroy();
3080 rdtgroup_destroy_root();
3081 if (resctrl_arch_alloc_capable())
3082 resctrl_arch_disable_alloc();
3083 if (resctrl_arch_mon_capable())
3084 resctrl_arch_disable_mon();
3085 resctrl_mounted = false;
3086 kernfs_kill_sb(sb);
3087 mutex_unlock(&rdtgroup_mutex);
3088 cpus_read_unlock();
3089 }
3090
3091 static struct file_system_type rdt_fs_type = {
3092 .name = "resctrl",
3093 .init_fs_context = rdt_init_fs_context,
3094 .parameters = rdt_fs_parameters,
3095 .kill_sb = rdt_kill_sb,
3096 };
3097
mon_addfile(struct kernfs_node * parent_kn,const char * name,void * priv)3098 static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
3099 void *priv)
3100 {
3101 struct kernfs_node *kn;
3102 int ret = 0;
3103
3104 kn = __kernfs_create_file(parent_kn, name, 0444,
3105 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
3106 &kf_mondata_ops, priv, NULL, NULL);
3107 if (IS_ERR(kn))
3108 return PTR_ERR(kn);
3109
3110 ret = rdtgroup_kn_set_ugid(kn);
3111 if (ret) {
3112 kernfs_remove(kn);
3113 return ret;
3114 }
3115
3116 return ret;
3117 }
3118
mon_rmdir_one_subdir(struct kernfs_node * pkn,char * name,char * subname)3119 static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
3120 {
3121 struct kernfs_node *kn;
3122
3123 kn = kernfs_find_and_get(pkn, name);
3124 if (!kn)
3125 return;
3126 kernfs_put(kn);
3127
3128 if (kn->dir.subdirs <= 1)
3129 kernfs_remove(kn);
3130 else
3131 kernfs_remove_by_name(kn, subname);
3132 }
3133
3134 /*
3135 * Remove all subdirectories of mon_data of ctrl_mon groups
3136 * and monitor groups for the given domain.
3137 * Remove files and directories containing "sum" of domain data
3138 * when last domain being summed is removed.
3139 */
rmdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_mon_domain * d)3140 static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3141 struct rdt_mon_domain *d)
3142 {
3143 struct rdtgroup *prgrp, *crgrp;
3144 char subname[32];
3145 bool snc_mode;
3146 char name[32];
3147
3148 snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3149 sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3150 if (snc_mode)
3151 sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
3152
3153 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3154 mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
3155
3156 list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
3157 mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
3158 }
3159 }
3160
mon_add_all_files(struct kernfs_node * kn,struct rdt_mon_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp,bool do_sum)3161 static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
3162 struct rdt_resource *r, struct rdtgroup *prgrp,
3163 bool do_sum)
3164 {
3165 struct rmid_read rr = {0};
3166 union mon_data_bits priv;
3167 struct mon_evt *mevt;
3168 int ret;
3169
3170 if (WARN_ON(list_empty(&r->evt_list)))
3171 return -EPERM;
3172
3173 priv.u.rid = r->rid;
3174 priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
3175 priv.u.sum = do_sum;
3176 list_for_each_entry(mevt, &r->evt_list, list) {
3177 priv.u.evtid = mevt->evtid;
3178 ret = mon_addfile(kn, mevt->name, priv.priv);
3179 if (ret)
3180 return ret;
3181
3182 if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
3183 mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
3184 }
3185
3186 return 0;
3187 }
3188
mkdir_mondata_subdir(struct kernfs_node * parent_kn,struct rdt_mon_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp)3189 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
3190 struct rdt_mon_domain *d,
3191 struct rdt_resource *r, struct rdtgroup *prgrp)
3192 {
3193 struct kernfs_node *kn, *ckn;
3194 char name[32];
3195 bool snc_mode;
3196 int ret = 0;
3197
3198 lockdep_assert_held(&rdtgroup_mutex);
3199
3200 snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3201 sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3202 kn = kernfs_find_and_get(parent_kn, name);
3203 if (kn) {
3204 /*
3205 * rdtgroup_mutex will prevent this directory from being
3206 * removed. No need to keep this hold.
3207 */
3208 kernfs_put(kn);
3209 } else {
3210 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
3211 if (IS_ERR(kn))
3212 return PTR_ERR(kn);
3213
3214 ret = rdtgroup_kn_set_ugid(kn);
3215 if (ret)
3216 goto out_destroy;
3217 ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
3218 if (ret)
3219 goto out_destroy;
3220 }
3221
3222 if (snc_mode) {
3223 sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
3224 ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
3225 if (IS_ERR(ckn)) {
3226 ret = -EINVAL;
3227 goto out_destroy;
3228 }
3229
3230 ret = rdtgroup_kn_set_ugid(ckn);
3231 if (ret)
3232 goto out_destroy;
3233
3234 ret = mon_add_all_files(ckn, d, r, prgrp, false);
3235 if (ret)
3236 goto out_destroy;
3237 }
3238
3239 kernfs_activate(kn);
3240 return 0;
3241
3242 out_destroy:
3243 kernfs_remove(kn);
3244 return ret;
3245 }
3246
3247 /*
3248 * Add all subdirectories of mon_data for "ctrl_mon" groups
3249 * and "monitor" groups with given domain id.
3250 */
mkdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_mon_domain * d)3251 static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3252 struct rdt_mon_domain *d)
3253 {
3254 struct kernfs_node *parent_kn;
3255 struct rdtgroup *prgrp, *crgrp;
3256 struct list_head *head;
3257
3258 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3259 parent_kn = prgrp->mon.mon_data_kn;
3260 mkdir_mondata_subdir(parent_kn, d, r, prgrp);
3261
3262 head = &prgrp->mon.crdtgrp_list;
3263 list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
3264 parent_kn = crgrp->mon.mon_data_kn;
3265 mkdir_mondata_subdir(parent_kn, d, r, crgrp);
3266 }
3267 }
3268 }
3269
mkdir_mondata_subdir_alldom(struct kernfs_node * parent_kn,struct rdt_resource * r,struct rdtgroup * prgrp)3270 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
3271 struct rdt_resource *r,
3272 struct rdtgroup *prgrp)
3273 {
3274 struct rdt_mon_domain *dom;
3275 int ret;
3276
3277 /* Walking r->domains, ensure it can't race with cpuhp */
3278 lockdep_assert_cpus_held();
3279
3280 list_for_each_entry(dom, &r->mon_domains, hdr.list) {
3281 ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
3282 if (ret)
3283 return ret;
3284 }
3285
3286 return 0;
3287 }
3288
3289 /*
3290 * This creates a directory mon_data which contains the monitored data.
3291 *
3292 * mon_data has one directory for each domain which are named
3293 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
3294 * with L3 domain looks as below:
3295 * ./mon_data:
3296 * mon_L3_00
3297 * mon_L3_01
3298 * mon_L3_02
3299 * ...
3300 *
3301 * Each domain directory has one file per event:
3302 * ./mon_L3_00/:
3303 * llc_occupancy
3304 *
3305 */
mkdir_mondata_all(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,struct kernfs_node ** dest_kn)3306 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
3307 struct rdtgroup *prgrp,
3308 struct kernfs_node **dest_kn)
3309 {
3310 struct rdt_resource *r;
3311 struct kernfs_node *kn;
3312 int ret;
3313
3314 /*
3315 * Create the mon_data directory first.
3316 */
3317 ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
3318 if (ret)
3319 return ret;
3320
3321 if (dest_kn)
3322 *dest_kn = kn;
3323
3324 /*
3325 * Create the subdirectories for each domain. Note that all events
3326 * in a domain like L3 are grouped into a resource whose domain is L3
3327 */
3328 for_each_mon_capable_rdt_resource(r) {
3329 ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
3330 if (ret)
3331 goto out_destroy;
3332 }
3333
3334 return 0;
3335
3336 out_destroy:
3337 kernfs_remove(kn);
3338 return ret;
3339 }
3340
3341 /**
3342 * cbm_ensure_valid - Enforce validity on provided CBM
3343 * @_val: Candidate CBM
3344 * @r: RDT resource to which the CBM belongs
3345 *
3346 * The provided CBM represents all cache portions available for use. This
3347 * may be represented by a bitmap that does not consist of contiguous ones
3348 * and thus be an invalid CBM.
3349 * Here the provided CBM is forced to be a valid CBM by only considering
3350 * the first set of contiguous bits as valid and clearing all bits.
3351 * The intention here is to provide a valid default CBM with which a new
3352 * resource group is initialized. The user can follow this with a
3353 * modification to the CBM if the default does not satisfy the
3354 * requirements.
3355 */
cbm_ensure_valid(u32 _val,struct rdt_resource * r)3356 static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
3357 {
3358 unsigned int cbm_len = r->cache.cbm_len;
3359 unsigned long first_bit, zero_bit;
3360 unsigned long val = _val;
3361
3362 if (!val)
3363 return 0;
3364
3365 first_bit = find_first_bit(&val, cbm_len);
3366 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
3367
3368 /* Clear any remaining bits to ensure contiguous region */
3369 bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
3370 return (u32)val;
3371 }
3372
3373 /*
3374 * Initialize cache resources per RDT domain
3375 *
3376 * Set the RDT domain up to start off with all usable allocations. That is,
3377 * all shareable and unused bits. All-zero CBM is invalid.
3378 */
__init_one_rdt_domain(struct rdt_ctrl_domain * d,struct resctrl_schema * s,u32 closid)3379 static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
3380 u32 closid)
3381 {
3382 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
3383 enum resctrl_conf_type t = s->conf_type;
3384 struct resctrl_staged_config *cfg;
3385 struct rdt_resource *r = s->res;
3386 u32 used_b = 0, unused_b = 0;
3387 unsigned long tmp_cbm;
3388 enum rdtgrp_mode mode;
3389 u32 peer_ctl, ctrl_val;
3390 int i;
3391
3392 cfg = &d->staged_config[t];
3393 cfg->have_new_ctrl = false;
3394 cfg->new_ctrl = r->cache.shareable_bits;
3395 used_b = r->cache.shareable_bits;
3396 for (i = 0; i < closids_supported(); i++) {
3397 if (closid_allocated(i) && i != closid) {
3398 mode = rdtgroup_mode_by_closid(i);
3399 if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
3400 /*
3401 * ctrl values for locksetup aren't relevant
3402 * until the schemata is written, and the mode
3403 * becomes RDT_MODE_PSEUDO_LOCKED.
3404 */
3405 continue;
3406 /*
3407 * If CDP is active include peer domain's
3408 * usage to ensure there is no overlap
3409 * with an exclusive group.
3410 */
3411 if (resctrl_arch_get_cdp_enabled(r->rid))
3412 peer_ctl = resctrl_arch_get_config(r, d, i,
3413 peer_type);
3414 else
3415 peer_ctl = 0;
3416 ctrl_val = resctrl_arch_get_config(r, d, i,
3417 s->conf_type);
3418 used_b |= ctrl_val | peer_ctl;
3419 if (mode == RDT_MODE_SHAREABLE)
3420 cfg->new_ctrl |= ctrl_val | peer_ctl;
3421 }
3422 }
3423 if (d->plr && d->plr->cbm > 0)
3424 used_b |= d->plr->cbm;
3425 unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
3426 unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
3427 cfg->new_ctrl |= unused_b;
3428 /*
3429 * Force the initial CBM to be valid, user can
3430 * modify the CBM based on system availability.
3431 */
3432 cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
3433 /*
3434 * Assign the u32 CBM to an unsigned long to ensure that
3435 * bitmap_weight() does not access out-of-bound memory.
3436 */
3437 tmp_cbm = cfg->new_ctrl;
3438 if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
3439 rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
3440 return -ENOSPC;
3441 }
3442 cfg->have_new_ctrl = true;
3443
3444 return 0;
3445 }
3446
3447 /*
3448 * Initialize cache resources with default values.
3449 *
3450 * A new RDT group is being created on an allocation capable (CAT)
3451 * supporting system. Set this group up to start off with all usable
3452 * allocations.
3453 *
3454 * If there are no more shareable bits available on any domain then
3455 * the entire allocation will fail.
3456 */
rdtgroup_init_cat(struct resctrl_schema * s,u32 closid)3457 static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
3458 {
3459 struct rdt_ctrl_domain *d;
3460 int ret;
3461
3462 list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
3463 ret = __init_one_rdt_domain(d, s, closid);
3464 if (ret < 0)
3465 return ret;
3466 }
3467
3468 return 0;
3469 }
3470
3471 /* Initialize MBA resource with default values. */
rdtgroup_init_mba(struct rdt_resource * r,u32 closid)3472 static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
3473 {
3474 struct resctrl_staged_config *cfg;
3475 struct rdt_ctrl_domain *d;
3476
3477 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
3478 if (is_mba_sc(r)) {
3479 d->mbps_val[closid] = MBA_MAX_MBPS;
3480 continue;
3481 }
3482
3483 cfg = &d->staged_config[CDP_NONE];
3484 cfg->new_ctrl = resctrl_get_default_ctrl(r);
3485 cfg->have_new_ctrl = true;
3486 }
3487 }
3488
3489 /* Initialize the RDT group's allocations. */
rdtgroup_init_alloc(struct rdtgroup * rdtgrp)3490 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
3491 {
3492 struct resctrl_schema *s;
3493 struct rdt_resource *r;
3494 int ret = 0;
3495
3496 rdt_staged_configs_clear();
3497
3498 list_for_each_entry(s, &resctrl_schema_all, list) {
3499 r = s->res;
3500 if (r->rid == RDT_RESOURCE_MBA ||
3501 r->rid == RDT_RESOURCE_SMBA) {
3502 rdtgroup_init_mba(r, rdtgrp->closid);
3503 if (is_mba_sc(r))
3504 continue;
3505 } else {
3506 ret = rdtgroup_init_cat(s, rdtgrp->closid);
3507 if (ret < 0)
3508 goto out;
3509 }
3510
3511 ret = resctrl_arch_update_domains(r, rdtgrp->closid);
3512 if (ret < 0) {
3513 rdt_last_cmd_puts("Failed to initialize allocations\n");
3514 goto out;
3515 }
3516
3517 }
3518
3519 rdtgrp->mode = RDT_MODE_SHAREABLE;
3520
3521 out:
3522 rdt_staged_configs_clear();
3523 return ret;
3524 }
3525
mkdir_rdt_prepare_rmid_alloc(struct rdtgroup * rdtgrp)3526 static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
3527 {
3528 int ret;
3529
3530 if (!resctrl_arch_mon_capable())
3531 return 0;
3532
3533 ret = alloc_rmid(rdtgrp->closid);
3534 if (ret < 0) {
3535 rdt_last_cmd_puts("Out of RMIDs\n");
3536 return ret;
3537 }
3538 rdtgrp->mon.rmid = ret;
3539
3540 ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
3541 if (ret) {
3542 rdt_last_cmd_puts("kernfs subdir error\n");
3543 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3544 return ret;
3545 }
3546
3547 return 0;
3548 }
3549
mkdir_rdt_prepare_rmid_free(struct rdtgroup * rgrp)3550 static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
3551 {
3552 if (resctrl_arch_mon_capable())
3553 free_rmid(rgrp->closid, rgrp->mon.rmid);
3554 }
3555
3556 /*
3557 * We allow creating mon groups only with in a directory called "mon_groups"
3558 * which is present in every ctrl_mon group. Check if this is a valid
3559 * "mon_groups" directory.
3560 *
3561 * 1. The directory should be named "mon_groups".
3562 * 2. The mon group itself should "not" be named "mon_groups".
3563 * This makes sure "mon_groups" directory always has a ctrl_mon group
3564 * as parent.
3565 */
is_mon_groups(struct kernfs_node * kn,const char * name)3566 static bool is_mon_groups(struct kernfs_node *kn, const char *name)
3567 {
3568 return (!strcmp(rdt_kn_name(kn), "mon_groups") &&
3569 strcmp(name, "mon_groups"));
3570 }
3571
mkdir_rdt_prepare(struct kernfs_node * parent_kn,const char * name,umode_t mode,enum rdt_group_type rtype,struct rdtgroup ** r)3572 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
3573 const char *name, umode_t mode,
3574 enum rdt_group_type rtype, struct rdtgroup **r)
3575 {
3576 struct rdtgroup *prdtgrp, *rdtgrp;
3577 unsigned long files = 0;
3578 struct kernfs_node *kn;
3579 int ret;
3580
3581 prdtgrp = rdtgroup_kn_lock_live(parent_kn);
3582 if (!prdtgrp) {
3583 ret = -ENODEV;
3584 goto out_unlock;
3585 }
3586
3587 /*
3588 * Check that the parent directory for a monitor group is a "mon_groups"
3589 * directory.
3590 */
3591 if (rtype == RDTMON_GROUP && !is_mon_groups(parent_kn, name)) {
3592 ret = -EPERM;
3593 goto out_unlock;
3594 }
3595
3596 if (rtype == RDTMON_GROUP &&
3597 (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3598 prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
3599 ret = -EINVAL;
3600 rdt_last_cmd_puts("Pseudo-locking in progress\n");
3601 goto out_unlock;
3602 }
3603
3604 /* allocate the rdtgroup. */
3605 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
3606 if (!rdtgrp) {
3607 ret = -ENOSPC;
3608 rdt_last_cmd_puts("Kernel out of memory\n");
3609 goto out_unlock;
3610 }
3611 *r = rdtgrp;
3612 rdtgrp->mon.parent = prdtgrp;
3613 rdtgrp->type = rtype;
3614 INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
3615
3616 /* kernfs creates the directory for rdtgrp */
3617 kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
3618 if (IS_ERR(kn)) {
3619 ret = PTR_ERR(kn);
3620 rdt_last_cmd_puts("kernfs create error\n");
3621 goto out_free_rgrp;
3622 }
3623 rdtgrp->kn = kn;
3624
3625 /*
3626 * kernfs_remove() will drop the reference count on "kn" which
3627 * will free it. But we still need it to stick around for the
3628 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
3629 * which will be dropped by kernfs_put() in rdtgroup_remove().
3630 */
3631 kernfs_get(kn);
3632
3633 ret = rdtgroup_kn_set_ugid(kn);
3634 if (ret) {
3635 rdt_last_cmd_puts("kernfs perm error\n");
3636 goto out_destroy;
3637 }
3638
3639 if (rtype == RDTCTRL_GROUP) {
3640 files = RFTYPE_BASE | RFTYPE_CTRL;
3641 if (resctrl_arch_mon_capable())
3642 files |= RFTYPE_MON;
3643 } else {
3644 files = RFTYPE_BASE | RFTYPE_MON;
3645 }
3646
3647 ret = rdtgroup_add_files(kn, files);
3648 if (ret) {
3649 rdt_last_cmd_puts("kernfs fill error\n");
3650 goto out_destroy;
3651 }
3652
3653 /*
3654 * The caller unlocks the parent_kn upon success.
3655 */
3656 return 0;
3657
3658 out_destroy:
3659 kernfs_put(rdtgrp->kn);
3660 kernfs_remove(rdtgrp->kn);
3661 out_free_rgrp:
3662 kfree(rdtgrp);
3663 out_unlock:
3664 rdtgroup_kn_unlock(parent_kn);
3665 return ret;
3666 }
3667
mkdir_rdt_prepare_clean(struct rdtgroup * rgrp)3668 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
3669 {
3670 kernfs_remove(rgrp->kn);
3671 rdtgroup_remove(rgrp);
3672 }
3673
3674 /*
3675 * Create a monitor group under "mon_groups" directory of a control
3676 * and monitor group(ctrl_mon). This is a resource group
3677 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
3678 */
rdtgroup_mkdir_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3679 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
3680 const char *name, umode_t mode)
3681 {
3682 struct rdtgroup *rdtgrp, *prgrp;
3683 int ret;
3684
3685 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
3686 if (ret)
3687 return ret;
3688
3689 prgrp = rdtgrp->mon.parent;
3690 rdtgrp->closid = prgrp->closid;
3691
3692 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3693 if (ret) {
3694 mkdir_rdt_prepare_clean(rdtgrp);
3695 goto out_unlock;
3696 }
3697
3698 kernfs_activate(rdtgrp->kn);
3699
3700 /*
3701 * Add the rdtgrp to the list of rdtgrps the parent
3702 * ctrl_mon group has to track.
3703 */
3704 list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
3705
3706 out_unlock:
3707 rdtgroup_kn_unlock(parent_kn);
3708 return ret;
3709 }
3710
3711 /*
3712 * These are rdtgroups created under the root directory. Can be used
3713 * to allocate and monitor resources.
3714 */
rdtgroup_mkdir_ctrl_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3715 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
3716 const char *name, umode_t mode)
3717 {
3718 struct rdtgroup *rdtgrp;
3719 struct kernfs_node *kn;
3720 u32 closid;
3721 int ret;
3722
3723 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
3724 if (ret)
3725 return ret;
3726
3727 kn = rdtgrp->kn;
3728 ret = closid_alloc();
3729 if (ret < 0) {
3730 rdt_last_cmd_puts("Out of CLOSIDs\n");
3731 goto out_common_fail;
3732 }
3733 closid = ret;
3734 ret = 0;
3735
3736 rdtgrp->closid = closid;
3737
3738 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3739 if (ret)
3740 goto out_closid_free;
3741
3742 kernfs_activate(rdtgrp->kn);
3743
3744 ret = rdtgroup_init_alloc(rdtgrp);
3745 if (ret < 0)
3746 goto out_rmid_free;
3747
3748 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
3749
3750 if (resctrl_arch_mon_capable()) {
3751 /*
3752 * Create an empty mon_groups directory to hold the subset
3753 * of tasks and cpus to monitor.
3754 */
3755 ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
3756 if (ret) {
3757 rdt_last_cmd_puts("kernfs subdir error\n");
3758 goto out_del_list;
3759 }
3760 if (is_mba_sc(NULL))
3761 rdtgrp->mba_mbps_event = mba_mbps_default_event;
3762 }
3763
3764 goto out_unlock;
3765
3766 out_del_list:
3767 list_del(&rdtgrp->rdtgroup_list);
3768 out_rmid_free:
3769 mkdir_rdt_prepare_rmid_free(rdtgrp);
3770 out_closid_free:
3771 closid_free(closid);
3772 out_common_fail:
3773 mkdir_rdt_prepare_clean(rdtgrp);
3774 out_unlock:
3775 rdtgroup_kn_unlock(parent_kn);
3776 return ret;
3777 }
3778
rdtgroup_mkdir(struct kernfs_node * parent_kn,const char * name,umode_t mode)3779 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
3780 umode_t mode)
3781 {
3782 /* Do not accept '\n' to avoid unparsable situation. */
3783 if (strchr(name, '\n'))
3784 return -EINVAL;
3785
3786 /*
3787 * If the parent directory is the root directory and RDT
3788 * allocation is supported, add a control and monitoring
3789 * subdirectory
3790 */
3791 if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
3792 return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
3793
3794 /* Else, attempt to add a monitoring subdirectory. */
3795 if (resctrl_arch_mon_capable())
3796 return rdtgroup_mkdir_mon(parent_kn, name, mode);
3797
3798 return -EPERM;
3799 }
3800
rdtgroup_rmdir_mon(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3801 static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3802 {
3803 struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3804 u32 closid, rmid;
3805 int cpu;
3806
3807 /* Give any tasks back to the parent group */
3808 rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
3809
3810 /*
3811 * Update per cpu closid/rmid of the moved CPUs first.
3812 * Note: the closid will not change, but the arch code still needs it.
3813 */
3814 closid = prdtgrp->closid;
3815 rmid = prdtgrp->mon.rmid;
3816 for_each_cpu(cpu, &rdtgrp->cpu_mask)
3817 resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
3818
3819 /*
3820 * Update the MSR on moved CPUs and CPUs which have moved
3821 * task running on them.
3822 */
3823 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3824 update_closid_rmid(tmpmask, NULL);
3825
3826 rdtgrp->flags = RDT_DELETED;
3827 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3828
3829 /*
3830 * Remove the rdtgrp from the parent ctrl_mon group's list
3831 */
3832 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3833 list_del(&rdtgrp->mon.crdtgrp_list);
3834
3835 kernfs_remove(rdtgrp->kn);
3836
3837 return 0;
3838 }
3839
rdtgroup_ctrl_remove(struct rdtgroup * rdtgrp)3840 static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
3841 {
3842 rdtgrp->flags = RDT_DELETED;
3843 list_del(&rdtgrp->rdtgroup_list);
3844
3845 kernfs_remove(rdtgrp->kn);
3846 return 0;
3847 }
3848
rdtgroup_rmdir_ctrl(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3849 static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3850 {
3851 u32 closid, rmid;
3852 int cpu;
3853
3854 /* Give any tasks back to the default group */
3855 rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
3856
3857 /* Give any CPUs back to the default group */
3858 cpumask_or(&rdtgroup_default.cpu_mask,
3859 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3860
3861 /* Update per cpu closid and rmid of the moved CPUs first */
3862 closid = rdtgroup_default.closid;
3863 rmid = rdtgroup_default.mon.rmid;
3864 for_each_cpu(cpu, &rdtgrp->cpu_mask)
3865 resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
3866
3867 /*
3868 * Update the MSR on moved CPUs and CPUs which have moved
3869 * task running on them.
3870 */
3871 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3872 update_closid_rmid(tmpmask, NULL);
3873
3874 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3875 closid_free(rdtgrp->closid);
3876
3877 rdtgroup_ctrl_remove(rdtgrp);
3878
3879 /*
3880 * Free all the child monitor group rmids.
3881 */
3882 free_all_child_rdtgrp(rdtgrp);
3883
3884 return 0;
3885 }
3886
rdt_kn_parent(struct kernfs_node * kn)3887 static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn)
3888 {
3889 /*
3890 * Valid within the RCU section it was obtained or while rdtgroup_mutex
3891 * is held.
3892 */
3893 return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex));
3894 }
3895
rdtgroup_rmdir(struct kernfs_node * kn)3896 static int rdtgroup_rmdir(struct kernfs_node *kn)
3897 {
3898 struct kernfs_node *parent_kn;
3899 struct rdtgroup *rdtgrp;
3900 cpumask_var_t tmpmask;
3901 int ret = 0;
3902
3903 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
3904 return -ENOMEM;
3905
3906 rdtgrp = rdtgroup_kn_lock_live(kn);
3907 if (!rdtgrp) {
3908 ret = -EPERM;
3909 goto out;
3910 }
3911 parent_kn = rdt_kn_parent(kn);
3912
3913 /*
3914 * If the rdtgroup is a ctrl_mon group and parent directory
3915 * is the root directory, remove the ctrl_mon group.
3916 *
3917 * If the rdtgroup is a mon group and parent directory
3918 * is a valid "mon_groups" directory, remove the mon group.
3919 */
3920 if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
3921 rdtgrp != &rdtgroup_default) {
3922 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3923 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
3924 ret = rdtgroup_ctrl_remove(rdtgrp);
3925 } else {
3926 ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
3927 }
3928 } else if (rdtgrp->type == RDTMON_GROUP &&
3929 is_mon_groups(parent_kn, rdt_kn_name(kn))) {
3930 ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
3931 } else {
3932 ret = -EPERM;
3933 }
3934
3935 out:
3936 rdtgroup_kn_unlock(kn);
3937 free_cpumask_var(tmpmask);
3938 return ret;
3939 }
3940
3941 /**
3942 * mongrp_reparent() - replace parent CTRL_MON group of a MON group
3943 * @rdtgrp: the MON group whose parent should be replaced
3944 * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp
3945 * @cpus: cpumask provided by the caller for use during this call
3946 *
3947 * Replaces the parent CTRL_MON group for a MON group, resulting in all member
3948 * tasks' CLOSID immediately changing to that of the new parent group.
3949 * Monitoring data for the group is unaffected by this operation.
3950 */
mongrp_reparent(struct rdtgroup * rdtgrp,struct rdtgroup * new_prdtgrp,cpumask_var_t cpus)3951 static void mongrp_reparent(struct rdtgroup *rdtgrp,
3952 struct rdtgroup *new_prdtgrp,
3953 cpumask_var_t cpus)
3954 {
3955 struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3956
3957 WARN_ON(rdtgrp->type != RDTMON_GROUP);
3958 WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
3959
3960 /* Nothing to do when simply renaming a MON group. */
3961 if (prdtgrp == new_prdtgrp)
3962 return;
3963
3964 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3965 list_move_tail(&rdtgrp->mon.crdtgrp_list,
3966 &new_prdtgrp->mon.crdtgrp_list);
3967
3968 rdtgrp->mon.parent = new_prdtgrp;
3969 rdtgrp->closid = new_prdtgrp->closid;
3970
3971 /* Propagate updated closid to all tasks in this group. */
3972 rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
3973
3974 update_closid_rmid(cpus, NULL);
3975 }
3976
rdtgroup_rename(struct kernfs_node * kn,struct kernfs_node * new_parent,const char * new_name)3977 static int rdtgroup_rename(struct kernfs_node *kn,
3978 struct kernfs_node *new_parent, const char *new_name)
3979 {
3980 struct kernfs_node *kn_parent;
3981 struct rdtgroup *new_prdtgrp;
3982 struct rdtgroup *rdtgrp;
3983 cpumask_var_t tmpmask;
3984 int ret;
3985
3986 rdtgrp = kernfs_to_rdtgroup(kn);
3987 new_prdtgrp = kernfs_to_rdtgroup(new_parent);
3988 if (!rdtgrp || !new_prdtgrp)
3989 return -ENOENT;
3990
3991 /* Release both kernfs active_refs before obtaining rdtgroup mutex. */
3992 rdtgroup_kn_get(rdtgrp, kn);
3993 rdtgroup_kn_get(new_prdtgrp, new_parent);
3994
3995 mutex_lock(&rdtgroup_mutex);
3996
3997 rdt_last_cmd_clear();
3998
3999 /*
4000 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
4001 * either kernfs_node is a file.
4002 */
4003 if (kernfs_type(kn) != KERNFS_DIR ||
4004 kernfs_type(new_parent) != KERNFS_DIR) {
4005 rdt_last_cmd_puts("Source and destination must be directories");
4006 ret = -EPERM;
4007 goto out;
4008 }
4009
4010 if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
4011 ret = -ENOENT;
4012 goto out;
4013 }
4014
4015 kn_parent = rdt_kn_parent(kn);
4016 if (rdtgrp->type != RDTMON_GROUP || !kn_parent ||
4017 !is_mon_groups(kn_parent, rdt_kn_name(kn))) {
4018 rdt_last_cmd_puts("Source must be a MON group\n");
4019 ret = -EPERM;
4020 goto out;
4021 }
4022
4023 if (!is_mon_groups(new_parent, new_name)) {
4024 rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
4025 ret = -EPERM;
4026 goto out;
4027 }
4028
4029 /*
4030 * If the MON group is monitoring CPUs, the CPUs must be assigned to the
4031 * current parent CTRL_MON group and therefore cannot be assigned to
4032 * the new parent, making the move illegal.
4033 */
4034 if (!cpumask_empty(&rdtgrp->cpu_mask) &&
4035 rdtgrp->mon.parent != new_prdtgrp) {
4036 rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
4037 ret = -EPERM;
4038 goto out;
4039 }
4040
4041 /*
4042 * Allocate the cpumask for use in mongrp_reparent() to avoid the
4043 * possibility of failing to allocate it after kernfs_rename() has
4044 * succeeded.
4045 */
4046 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
4047 ret = -ENOMEM;
4048 goto out;
4049 }
4050
4051 /*
4052 * Perform all input validation and allocations needed to ensure
4053 * mongrp_reparent() will succeed before calling kernfs_rename(),
4054 * otherwise it would be necessary to revert this call if
4055 * mongrp_reparent() failed.
4056 */
4057 ret = kernfs_rename(kn, new_parent, new_name);
4058 if (!ret)
4059 mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
4060
4061 free_cpumask_var(tmpmask);
4062
4063 out:
4064 mutex_unlock(&rdtgroup_mutex);
4065 rdtgroup_kn_put(rdtgrp, kn);
4066 rdtgroup_kn_put(new_prdtgrp, new_parent);
4067 return ret;
4068 }
4069
rdtgroup_show_options(struct seq_file * seq,struct kernfs_root * kf)4070 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
4071 {
4072 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
4073 seq_puts(seq, ",cdp");
4074
4075 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
4076 seq_puts(seq, ",cdpl2");
4077
4078 if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA)))
4079 seq_puts(seq, ",mba_MBps");
4080
4081 if (resctrl_debug)
4082 seq_puts(seq, ",debug");
4083
4084 return 0;
4085 }
4086
4087 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
4088 .mkdir = rdtgroup_mkdir,
4089 .rmdir = rdtgroup_rmdir,
4090 .rename = rdtgroup_rename,
4091 .show_options = rdtgroup_show_options,
4092 };
4093
rdtgroup_setup_root(struct rdt_fs_context * ctx)4094 static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
4095 {
4096 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
4097 KERNFS_ROOT_CREATE_DEACTIVATED |
4098 KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
4099 &rdtgroup_default);
4100 if (IS_ERR(rdt_root))
4101 return PTR_ERR(rdt_root);
4102
4103 ctx->kfc.root = rdt_root;
4104 rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
4105
4106 return 0;
4107 }
4108
rdtgroup_destroy_root(void)4109 static void rdtgroup_destroy_root(void)
4110 {
4111 kernfs_destroy_root(rdt_root);
4112 rdtgroup_default.kn = NULL;
4113 }
4114
rdtgroup_setup_default(void)4115 static void __init rdtgroup_setup_default(void)
4116 {
4117 mutex_lock(&rdtgroup_mutex);
4118
4119 rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
4120 rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
4121 rdtgroup_default.type = RDTCTRL_GROUP;
4122 INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
4123
4124 list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
4125
4126 mutex_unlock(&rdtgroup_mutex);
4127 }
4128
domain_destroy_mon_state(struct rdt_mon_domain * d)4129 static void domain_destroy_mon_state(struct rdt_mon_domain *d)
4130 {
4131 bitmap_free(d->rmid_busy_llc);
4132 kfree(d->mbm_total);
4133 kfree(d->mbm_local);
4134 }
4135
resctrl_offline_ctrl_domain(struct rdt_resource * r,struct rdt_ctrl_domain * d)4136 void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
4137 {
4138 mutex_lock(&rdtgroup_mutex);
4139
4140 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
4141 mba_sc_domain_destroy(r, d);
4142
4143 mutex_unlock(&rdtgroup_mutex);
4144 }
4145
resctrl_offline_mon_domain(struct rdt_resource * r,struct rdt_mon_domain * d)4146 void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4147 {
4148 mutex_lock(&rdtgroup_mutex);
4149
4150 /*
4151 * If resctrl is mounted, remove all the
4152 * per domain monitor data directories.
4153 */
4154 if (resctrl_mounted && resctrl_arch_mon_capable())
4155 rmdir_mondata_subdir_allrdtgrp(r, d);
4156
4157 if (resctrl_is_mbm_enabled())
4158 cancel_delayed_work(&d->mbm_over);
4159 if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
4160 /*
4161 * When a package is going down, forcefully
4162 * decrement rmid->ebusy. There is no way to know
4163 * that the L3 was flushed and hence may lead to
4164 * incorrect counts in rare scenarios, but leaving
4165 * the RMID as busy creates RMID leaks if the
4166 * package never comes back.
4167 */
4168 __check_limbo(d, true);
4169 cancel_delayed_work(&d->cqm_limbo);
4170 }
4171
4172 domain_destroy_mon_state(d);
4173
4174 mutex_unlock(&rdtgroup_mutex);
4175 }
4176
4177 /**
4178 * domain_setup_mon_state() - Initialise domain monitoring structures.
4179 * @r: The resource for the newly online domain.
4180 * @d: The newly online domain.
4181 *
4182 * Allocate monitor resources that belong to this domain.
4183 * Called when the first CPU of a domain comes online, regardless of whether
4184 * the filesystem is mounted.
4185 * During boot this may be called before global allocations have been made by
4186 * resctrl_mon_resource_init().
4187 *
4188 * Returns 0 for success, or -ENOMEM.
4189 */
domain_setup_mon_state(struct rdt_resource * r,struct rdt_mon_domain * d)4190 static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
4191 {
4192 u32 idx_limit = resctrl_arch_system_num_rmid_idx();
4193 size_t tsize;
4194
4195 if (resctrl_arch_is_llc_occupancy_enabled()) {
4196 d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
4197 if (!d->rmid_busy_llc)
4198 return -ENOMEM;
4199 }
4200 if (resctrl_arch_is_mbm_total_enabled()) {
4201 tsize = sizeof(*d->mbm_total);
4202 d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
4203 if (!d->mbm_total) {
4204 bitmap_free(d->rmid_busy_llc);
4205 return -ENOMEM;
4206 }
4207 }
4208 if (resctrl_arch_is_mbm_local_enabled()) {
4209 tsize = sizeof(*d->mbm_local);
4210 d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
4211 if (!d->mbm_local) {
4212 bitmap_free(d->rmid_busy_llc);
4213 kfree(d->mbm_total);
4214 return -ENOMEM;
4215 }
4216 }
4217
4218 return 0;
4219 }
4220
resctrl_online_ctrl_domain(struct rdt_resource * r,struct rdt_ctrl_domain * d)4221 int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
4222 {
4223 int err = 0;
4224
4225 mutex_lock(&rdtgroup_mutex);
4226
4227 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
4228 /* RDT_RESOURCE_MBA is never mon_capable */
4229 err = mba_sc_domain_allocate(r, d);
4230 }
4231
4232 mutex_unlock(&rdtgroup_mutex);
4233
4234 return err;
4235 }
4236
resctrl_online_mon_domain(struct rdt_resource * r,struct rdt_mon_domain * d)4237 int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4238 {
4239 int err;
4240
4241 mutex_lock(&rdtgroup_mutex);
4242
4243 err = domain_setup_mon_state(r, d);
4244 if (err)
4245 goto out_unlock;
4246
4247 if (resctrl_is_mbm_enabled()) {
4248 INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
4249 mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
4250 RESCTRL_PICK_ANY_CPU);
4251 }
4252
4253 if (resctrl_arch_is_llc_occupancy_enabled())
4254 INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
4255
4256 /*
4257 * If the filesystem is not mounted then only the default resource group
4258 * exists. Creation of its directories is deferred until mount time
4259 * by rdt_get_tree() calling mkdir_mondata_all().
4260 * If resctrl is mounted, add per domain monitor data directories.
4261 */
4262 if (resctrl_mounted && resctrl_arch_mon_capable())
4263 mkdir_mondata_subdir_allrdtgrp(r, d);
4264
4265 out_unlock:
4266 mutex_unlock(&rdtgroup_mutex);
4267
4268 return err;
4269 }
4270
resctrl_online_cpu(unsigned int cpu)4271 void resctrl_online_cpu(unsigned int cpu)
4272 {
4273 mutex_lock(&rdtgroup_mutex);
4274 /* The CPU is set in default rdtgroup after online. */
4275 cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
4276 mutex_unlock(&rdtgroup_mutex);
4277 }
4278
clear_childcpus(struct rdtgroup * r,unsigned int cpu)4279 static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
4280 {
4281 struct rdtgroup *cr;
4282
4283 list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
4284 if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
4285 break;
4286 }
4287 }
4288
get_mon_domain_from_cpu(int cpu,struct rdt_resource * r)4289 static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu,
4290 struct rdt_resource *r)
4291 {
4292 struct rdt_mon_domain *d;
4293
4294 lockdep_assert_cpus_held();
4295
4296 list_for_each_entry(d, &r->mon_domains, hdr.list) {
4297 /* Find the domain that contains this CPU */
4298 if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
4299 return d;
4300 }
4301
4302 return NULL;
4303 }
4304
resctrl_offline_cpu(unsigned int cpu)4305 void resctrl_offline_cpu(unsigned int cpu)
4306 {
4307 struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
4308 struct rdt_mon_domain *d;
4309 struct rdtgroup *rdtgrp;
4310
4311 mutex_lock(&rdtgroup_mutex);
4312 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
4313 if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
4314 clear_childcpus(rdtgrp, cpu);
4315 break;
4316 }
4317 }
4318
4319 if (!l3->mon_capable)
4320 goto out_unlock;
4321
4322 d = get_mon_domain_from_cpu(cpu, l3);
4323 if (d) {
4324 if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) {
4325 cancel_delayed_work(&d->mbm_over);
4326 mbm_setup_overflow_handler(d, 0, cpu);
4327 }
4328 if (resctrl_arch_is_llc_occupancy_enabled() &&
4329 cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
4330 cancel_delayed_work(&d->cqm_limbo);
4331 cqm_setup_limbo_handler(d, 0, cpu);
4332 }
4333 }
4334
4335 out_unlock:
4336 mutex_unlock(&rdtgroup_mutex);
4337 }
4338
4339 /*
4340 * resctrl_init - resctrl filesystem initialization
4341 *
4342 * Setup resctrl file system including set up root, create mount point,
4343 * register resctrl filesystem, and initialize files under root directory.
4344 *
4345 * Return: 0 on success or -errno
4346 */
resctrl_init(void)4347 int __init resctrl_init(void)
4348 {
4349 int ret = 0;
4350
4351 seq_buf_init(&last_cmd_status, last_cmd_status_buf,
4352 sizeof(last_cmd_status_buf));
4353
4354 rdtgroup_setup_default();
4355
4356 thread_throttle_mode_init();
4357
4358 ret = resctrl_mon_resource_init();
4359 if (ret)
4360 return ret;
4361
4362 ret = sysfs_create_mount_point(fs_kobj, "resctrl");
4363 if (ret) {
4364 resctrl_mon_resource_exit();
4365 return ret;
4366 }
4367
4368 ret = register_filesystem(&rdt_fs_type);
4369 if (ret)
4370 goto cleanup_mountpoint;
4371
4372 /*
4373 * Adding the resctrl debugfs directory here may not be ideal since
4374 * it would let the resctrl debugfs directory appear on the debugfs
4375 * filesystem before the resctrl filesystem is mounted.
4376 * It may also be ok since that would enable debugging of RDT before
4377 * resctrl is mounted.
4378 * The reason why the debugfs directory is created here and not in
4379 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
4380 * during the debugfs directory creation also &sb->s_type->i_mutex_key
4381 * (the lockdep class of inode->i_rwsem). Other filesystem
4382 * interactions (eg. SyS_getdents) have the lock ordering:
4383 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
4384 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
4385 * is taken, thus creating dependency:
4386 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
4387 * issues considering the other two lock dependencies.
4388 * By creating the debugfs directory here we avoid a dependency
4389 * that may cause deadlock (even though file operations cannot
4390 * occur until the filesystem is mounted, but I do not know how to
4391 * tell lockdep that).
4392 */
4393 debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
4394
4395 return 0;
4396
4397 cleanup_mountpoint:
4398 sysfs_remove_mount_point(fs_kobj, "resctrl");
4399 resctrl_mon_resource_exit();
4400
4401 return ret;
4402 }
4403
resctrl_exit(void)4404 void __exit resctrl_exit(void)
4405 {
4406 debugfs_remove_recursive(debugfs_resctrl);
4407 unregister_filesystem(&rdt_fs_type);
4408 sysfs_remove_mount_point(fs_kobj, "resctrl");
4409
4410 resctrl_mon_resource_exit();
4411 }
4412