1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Resource Director Technology(RDT)
4 * - Monitoring code
5 *
6 * Copyright (C) 2017 Intel Corporation
7 *
8 * Author:
9 * Vikas Shivappa <vikas.shivappa@intel.com>
10 *
11 * This replaces the cqm.c based on perf but we reuse a lot of
12 * code and datastructures originally from Peter Zijlstra and Matt Fleming.
13 *
14 * More information about RDT be found in the Intel (R) x86 Architecture
15 * Software Developer Manual June 2016, volume 3, section 17.17.
16 */
17
18 #define pr_fmt(fmt) "resctrl: " fmt
19
20 #include <linux/cpu.h>
21 #include <linux/resctrl.h>
22 #include <linux/sizes.h>
23 #include <linux/slab.h>
24
25 #include "internal.h"
26
27 #define CREATE_TRACE_POINTS
28
29 #include "monitor_trace.h"
30
31 /**
32 * struct rmid_entry - dirty tracking for all RMID.
33 * @closid: The CLOSID for this entry.
34 * @rmid: The RMID for this entry.
35 * @busy: The number of domains with cached data using this RMID.
36 * @list: Member of the rmid_free_lru list when busy == 0.
37 *
38 * Depending on the architecture the correct monitor is accessed using
39 * both @closid and @rmid, or @rmid only.
40 *
41 * Take the rdtgroup_mutex when accessing.
42 */
43 struct rmid_entry {
44 u32 closid;
45 u32 rmid;
46 int busy;
47 struct list_head list;
48 };
49
50 /*
51 * @rmid_free_lru - A least recently used list of free RMIDs
52 * These RMIDs are guaranteed to have an occupancy less than the
53 * threshold occupancy
54 */
55 static LIST_HEAD(rmid_free_lru);
56
57 /*
58 * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has.
59 * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
60 * Indexed by CLOSID. Protected by rdtgroup_mutex.
61 */
62 static u32 *closid_num_dirty_rmid;
63
64 /*
65 * @rmid_limbo_count - count of currently unused but (potentially)
66 * dirty RMIDs.
67 * This counts RMIDs that no one is currently using but that
68 * may have a occupancy value > resctrl_rmid_realloc_threshold. User can
69 * change the threshold occupancy value.
70 */
71 static unsigned int rmid_limbo_count;
72
73 /*
74 * @rmid_entry - The entry in the limbo and free lists.
75 */
76 static struct rmid_entry *rmid_ptrs;
77
78 /*
79 * This is the threshold cache occupancy in bytes at which we will consider an
80 * RMID available for re-allocation.
81 */
82 unsigned int resctrl_rmid_realloc_threshold;
83
84 /*
85 * This is the maximum value for the reallocation threshold, in bytes.
86 */
87 unsigned int resctrl_rmid_realloc_limit;
88
89 /*
90 * x86 and arm64 differ in their handling of monitoring.
91 * x86's RMID are independent numbers, there is only one source of traffic
92 * with an RMID value of '1'.
93 * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
94 * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
95 * value is no longer unique.
96 * To account for this, resctrl uses an index. On x86 this is just the RMID,
97 * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
98 *
99 * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
100 * must accept an attempt to read every index.
101 */
__rmid_entry(u32 idx)102 static inline struct rmid_entry *__rmid_entry(u32 idx)
103 {
104 struct rmid_entry *entry;
105 u32 closid, rmid;
106
107 entry = &rmid_ptrs[idx];
108 resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
109
110 WARN_ON_ONCE(entry->closid != closid);
111 WARN_ON_ONCE(entry->rmid != rmid);
112
113 return entry;
114 }
115
limbo_release_entry(struct rmid_entry * entry)116 static void limbo_release_entry(struct rmid_entry *entry)
117 {
118 lockdep_assert_held(&rdtgroup_mutex);
119
120 rmid_limbo_count--;
121 list_add_tail(&entry->list, &rmid_free_lru);
122
123 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
124 closid_num_dirty_rmid[entry->closid]--;
125 }
126
127 /*
128 * Check the RMIDs that are marked as busy for this domain. If the
129 * reported LLC occupancy is below the threshold clear the busy bit and
130 * decrement the count. If the busy count gets to zero on an RMID, we
131 * free the RMID
132 */
__check_limbo(struct rdt_l3_mon_domain * d,bool force_free)133 void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free)
134 {
135 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
136 u32 idx_limit = resctrl_arch_system_num_rmid_idx();
137 struct rmid_entry *entry;
138 u32 idx, cur_idx = 1;
139 void *arch_mon_ctx;
140 void *arch_priv;
141 bool rmid_dirty;
142 u64 val = 0;
143
144 arch_priv = mon_event_all[QOS_L3_OCCUP_EVENT_ID].arch_priv;
145 arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
146 if (IS_ERR(arch_mon_ctx)) {
147 pr_warn_ratelimited("Failed to allocate monitor context: %ld",
148 PTR_ERR(arch_mon_ctx));
149 return;
150 }
151
152 /*
153 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
154 * are marked as busy for occupancy < threshold. If the occupancy
155 * is less than the threshold decrement the busy counter of the
156 * RMID and move it to the free list when the counter reaches 0.
157 */
158 for (;;) {
159 idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
160 if (idx >= idx_limit)
161 break;
162
163 entry = __rmid_entry(idx);
164 if (resctrl_arch_rmid_read(r, &d->hdr, entry->closid, entry->rmid,
165 QOS_L3_OCCUP_EVENT_ID, arch_priv, &val,
166 arch_mon_ctx)) {
167 rmid_dirty = true;
168 } else {
169 rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
170
171 /*
172 * x86's CLOSID and RMID are independent numbers, so the entry's
173 * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
174 * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
175 * used to select the configuration. It is thus necessary to track both
176 * CLOSID and RMID because there may be dependencies between them
177 * on some architectures.
178 */
179 trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
180 }
181
182 if (force_free || !rmid_dirty) {
183 clear_bit(idx, d->rmid_busy_llc);
184 if (!--entry->busy)
185 limbo_release_entry(entry);
186 }
187 cur_idx = idx + 1;
188 }
189
190 resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
191 }
192
has_busy_rmid(struct rdt_l3_mon_domain * d)193 bool has_busy_rmid(struct rdt_l3_mon_domain *d)
194 {
195 u32 idx_limit = resctrl_arch_system_num_rmid_idx();
196
197 return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
198 }
199
resctrl_find_free_rmid(u32 closid)200 static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
201 {
202 struct rmid_entry *itr;
203 u32 itr_idx, cmp_idx;
204
205 if (list_empty(&rmid_free_lru))
206 return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
207
208 list_for_each_entry(itr, &rmid_free_lru, list) {
209 /*
210 * Get the index of this free RMID, and the index it would need
211 * to be if it were used with this CLOSID.
212 * If the CLOSID is irrelevant on this architecture, the two
213 * index values are always the same on every entry and thus the
214 * very first entry will be returned.
215 */
216 itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
217 cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
218
219 if (itr_idx == cmp_idx)
220 return itr;
221 }
222
223 return ERR_PTR(-ENOSPC);
224 }
225
226 /**
227 * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
228 * RMID are clean, or the CLOSID that has
229 * the most clean RMID.
230 *
231 * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
232 * may not be able to allocate clean RMID. To avoid this the allocator will
233 * choose the CLOSID with the most clean RMID.
234 *
235 * When the CLOSID and RMID are independent numbers, the first free CLOSID will
236 * be returned.
237 *
238 * Return: Free CLOSID on success, < 0 on failure.
239 */
resctrl_find_cleanest_closid(void)240 int resctrl_find_cleanest_closid(void)
241 {
242 u32 cleanest_closid = ~0;
243 int i = 0;
244
245 lockdep_assert_held(&rdtgroup_mutex);
246
247 if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
248 return -EIO;
249
250 for (i = 0; i < closids_supported(); i++) {
251 int num_dirty;
252
253 if (closid_allocated(i))
254 continue;
255
256 num_dirty = closid_num_dirty_rmid[i];
257 if (num_dirty == 0)
258 return i;
259
260 if (cleanest_closid == ~0)
261 cleanest_closid = i;
262
263 if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
264 cleanest_closid = i;
265 }
266
267 if (cleanest_closid == ~0)
268 return -ENOSPC;
269
270 return cleanest_closid;
271 }
272
273 /*
274 * For MPAM the RMID value is not unique, and has to be considered with
275 * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
276 * allows all domains to be managed by a single free list.
277 * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
278 */
alloc_rmid(u32 closid)279 int alloc_rmid(u32 closid)
280 {
281 struct rmid_entry *entry;
282
283 lockdep_assert_held(&rdtgroup_mutex);
284
285 entry = resctrl_find_free_rmid(closid);
286 if (IS_ERR(entry))
287 return PTR_ERR(entry);
288
289 list_del(&entry->list);
290 return entry->rmid;
291 }
292
add_rmid_to_limbo(struct rmid_entry * entry)293 static void add_rmid_to_limbo(struct rmid_entry *entry)
294 {
295 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
296 struct rdt_l3_mon_domain *d;
297 u32 idx;
298
299 lockdep_assert_held(&rdtgroup_mutex);
300
301 /* Walking r->domains, ensure it can't race with cpuhp */
302 lockdep_assert_cpus_held();
303
304 idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
305
306 entry->busy = 0;
307 list_for_each_entry(d, &r->mon_domains, hdr.list) {
308 /*
309 * For the first limbo RMID in the domain,
310 * setup up the limbo worker.
311 */
312 if (!has_busy_rmid(d))
313 cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
314 RESCTRL_PICK_ANY_CPU);
315 set_bit(idx, d->rmid_busy_llc);
316 entry->busy++;
317 }
318
319 rmid_limbo_count++;
320 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
321 closid_num_dirty_rmid[entry->closid]++;
322 }
323
free_rmid(u32 closid,u32 rmid)324 void free_rmid(u32 closid, u32 rmid)
325 {
326 u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
327 struct rmid_entry *entry;
328
329 lockdep_assert_held(&rdtgroup_mutex);
330
331 /*
332 * Do not allow the default rmid to be free'd. Comparing by index
333 * allows architectures that ignore the closid parameter to avoid an
334 * unnecessary check.
335 */
336 if (!resctrl_arch_mon_capable() ||
337 idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
338 RESCTRL_RESERVED_RMID))
339 return;
340
341 entry = __rmid_entry(idx);
342
343 if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
344 add_rmid_to_limbo(entry);
345 else
346 list_add_tail(&entry->list, &rmid_free_lru);
347 }
348
get_mbm_state(struct rdt_l3_mon_domain * d,u32 closid,u32 rmid,enum resctrl_event_id evtid)349 static struct mbm_state *get_mbm_state(struct rdt_l3_mon_domain *d, u32 closid,
350 u32 rmid, enum resctrl_event_id evtid)
351 {
352 u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
353 struct mbm_state *state;
354
355 if (!resctrl_is_mbm_event(evtid))
356 return NULL;
357
358 state = d->mbm_states[MBM_STATE_IDX(evtid)];
359
360 return state ? &state[idx] : NULL;
361 }
362
363 /*
364 * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp.
365 *
366 * Return:
367 * Valid counter ID on success, or -ENOENT on failure.
368 */
mbm_cntr_get(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)369 static int mbm_cntr_get(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
370 struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
371 {
372 int cntr_id;
373
374 if (!r->mon.mbm_cntr_assignable)
375 return -ENOENT;
376
377 if (!resctrl_is_mbm_event(evtid))
378 return -ENOENT;
379
380 for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
381 if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
382 d->cntr_cfg[cntr_id].evtid == evtid)
383 return cntr_id;
384 }
385
386 return -ENOENT;
387 }
388
389 /*
390 * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d.
391 * Caller must ensure that the specified event is not assigned already.
392 *
393 * Return:
394 * Valid counter ID on success, or -ENOSPC on failure.
395 */
mbm_cntr_alloc(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)396 static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
397 struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
398 {
399 int cntr_id;
400
401 for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
402 if (!d->cntr_cfg[cntr_id].rdtgrp) {
403 d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
404 d->cntr_cfg[cntr_id].evtid = evtid;
405 return cntr_id;
406 }
407 }
408
409 return -ENOSPC;
410 }
411
412 /*
413 * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d.
414 */
mbm_cntr_free(struct rdt_l3_mon_domain * d,int cntr_id)415 static void mbm_cntr_free(struct rdt_l3_mon_domain *d, int cntr_id)
416 {
417 memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg));
418 }
419
__l3_mon_event_count(struct rdtgroup * rdtgrp,struct rmid_read * rr)420 static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
421 {
422 int cpu = smp_processor_id();
423 u32 closid = rdtgrp->closid;
424 u32 rmid = rdtgrp->mon.rmid;
425 struct rdt_l3_mon_domain *d;
426 int cntr_id = -ENOENT;
427 struct mbm_state *m;
428 u64 tval = 0;
429
430 if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) {
431 rr->err = -EIO;
432 return -EINVAL;
433 }
434 d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
435
436 if (rr->is_mbm_cntr) {
437 cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evt->evtid);
438 if (cntr_id < 0) {
439 rr->err = -ENOENT;
440 return -EINVAL;
441 }
442 }
443
444 if (rr->first) {
445 if (rr->is_mbm_cntr)
446 resctrl_arch_reset_cntr(rr->r, d, closid, rmid, cntr_id, rr->evt->evtid);
447 else
448 resctrl_arch_reset_rmid(rr->r, d, closid, rmid, rr->evt->evtid);
449 m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
450 if (m)
451 memset(m, 0, sizeof(struct mbm_state));
452 return 0;
453 }
454
455 /* Reading a single domain, must be on a CPU in that domain. */
456 if (!cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
457 return -EINVAL;
458 if (rr->is_mbm_cntr)
459 rr->err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id,
460 rr->evt->evtid, &tval);
461 else
462 rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, closid, rmid,
463 rr->evt->evtid, rr->evt->arch_priv,
464 &tval, rr->arch_mon_ctx);
465 if (rr->err)
466 return rr->err;
467
468 rr->val += tval;
469
470 return 0;
471 }
472
__l3_mon_event_count_sum(struct rdtgroup * rdtgrp,struct rmid_read * rr)473 static int __l3_mon_event_count_sum(struct rdtgroup *rdtgrp, struct rmid_read *rr)
474 {
475 int cpu = smp_processor_id();
476 u32 closid = rdtgrp->closid;
477 u32 rmid = rdtgrp->mon.rmid;
478 struct rdt_l3_mon_domain *d;
479 u64 tval = 0;
480 int err, ret;
481
482 /*
483 * Summing across domains is only done for systems that implement
484 * Sub-NUMA Cluster. There is no overlap with systems that support
485 * assignable counters.
486 */
487 if (rr->is_mbm_cntr) {
488 pr_warn_once("Summing domains using assignable counters is not supported\n");
489 rr->err = -EINVAL;
490 return -EINVAL;
491 }
492
493 /* Summing domains that share a cache, must be on a CPU for that cache. */
494 if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
495 return -EINVAL;
496
497 /*
498 * Legacy files must report the sum of an event across all
499 * domains that share the same L3 cache instance.
500 * Report success if a read from any domain succeeds, -EINVAL
501 * (translated to "Unavailable" for user space) if reading from
502 * all domains fail for any reason.
503 */
504 ret = -EINVAL;
505 list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
506 if (d->ci_id != rr->ci->id)
507 continue;
508 err = resctrl_arch_rmid_read(rr->r, &d->hdr, closid, rmid,
509 rr->evt->evtid, rr->evt->arch_priv,
510 &tval, rr->arch_mon_ctx);
511 if (!err) {
512 rr->val += tval;
513 ret = 0;
514 }
515 }
516
517 if (ret)
518 rr->err = ret;
519
520 return ret;
521 }
522
__mon_event_count(struct rdtgroup * rdtgrp,struct rmid_read * rr)523 static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
524 {
525 switch (rr->r->rid) {
526 case RDT_RESOURCE_L3:
527 WARN_ON_ONCE(rr->evt->any_cpu);
528 if (rr->hdr)
529 return __l3_mon_event_count(rdtgrp, rr);
530 else
531 return __l3_mon_event_count_sum(rdtgrp, rr);
532 case RDT_RESOURCE_PERF_PKG: {
533 u64 tval = 0;
534
535 rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, rdtgrp->closid,
536 rdtgrp->mon.rmid, rr->evt->evtid,
537 rr->evt->arch_priv,
538 &tval, rr->arch_mon_ctx);
539 if (rr->err)
540 return rr->err;
541
542 rr->val += tval;
543
544 return 0;
545 }
546 default:
547 rr->err = -EINVAL;
548 return -EINVAL;
549 }
550 }
551
552 /*
553 * mbm_bw_count() - Update bw count from values previously read by
554 * __mon_event_count().
555 * @rdtgrp: resctrl group associated with the CLOSID and RMID to identify
556 * the cached mbm_state.
557 * @rr: The struct rmid_read populated by __mon_event_count().
558 *
559 * Supporting function to calculate the memory bandwidth
560 * and delta bandwidth in MBps. The chunks value previously read by
561 * __mon_event_count() is compared with the chunks value from the previous
562 * invocation. This must be called once per second to maintain values in MBps.
563 */
mbm_bw_count(struct rdtgroup * rdtgrp,struct rmid_read * rr)564 static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
565 {
566 u64 cur_bw, bytes, cur_bytes;
567 u32 closid = rdtgrp->closid;
568 u32 rmid = rdtgrp->mon.rmid;
569 struct rdt_l3_mon_domain *d;
570 struct mbm_state *m;
571
572 if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
573 return;
574 d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
575 m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
576 if (WARN_ON_ONCE(!m))
577 return;
578
579 cur_bytes = rr->val;
580 bytes = cur_bytes - m->prev_bw_bytes;
581 m->prev_bw_bytes = cur_bytes;
582
583 cur_bw = bytes / SZ_1M;
584
585 m->prev_bw = cur_bw;
586 }
587
588 /*
589 * This is scheduled by mon_event_read() to read the CQM/MBM counters
590 * on a domain.
591 */
mon_event_count(void * info)592 void mon_event_count(void *info)
593 {
594 struct rdtgroup *rdtgrp, *entry;
595 struct rmid_read *rr = info;
596 struct list_head *head;
597 int ret;
598
599 rdtgrp = rr->rgrp;
600
601 ret = __mon_event_count(rdtgrp, rr);
602
603 /*
604 * For Ctrl groups read data from child monitor groups and
605 * add them together. Count events which are read successfully.
606 * Discard the rmid_read's reporting errors.
607 */
608 head = &rdtgrp->mon.crdtgrp_list;
609
610 if (rdtgrp->type == RDTCTRL_GROUP) {
611 list_for_each_entry(entry, head, mon.crdtgrp_list) {
612 if (__mon_event_count(entry, rr) == 0)
613 ret = 0;
614 }
615 }
616
617 /*
618 * __mon_event_count() calls for newly created monitor groups may
619 * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
620 * Discard error if any of the monitor event reads succeeded.
621 */
622 if (ret == 0)
623 rr->err = 0;
624 }
625
get_ctrl_domain_from_cpu(int cpu,struct rdt_resource * r)626 static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
627 struct rdt_resource *r)
628 {
629 struct rdt_ctrl_domain *d;
630
631 lockdep_assert_cpus_held();
632
633 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
634 /* Find the domain that contains this CPU */
635 if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
636 return d;
637 }
638
639 return NULL;
640 }
641
642 /*
643 * Feedback loop for MBA software controller (mba_sc)
644 *
645 * mba_sc is a feedback loop where we periodically read MBM counters and
646 * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
647 * that:
648 *
649 * current bandwidth(cur_bw) < user specified bandwidth(user_bw)
650 *
651 * This uses the MBM counters to measure the bandwidth and MBA throttle
652 * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
653 * fact that resctrl rdtgroups have both monitoring and control.
654 *
655 * The frequency of the checks is 1s and we just tag along the MBM overflow
656 * timer. Having 1s interval makes the calculation of bandwidth simpler.
657 *
658 * Although MBA's goal is to restrict the bandwidth to a maximum, there may
659 * be a need to increase the bandwidth to avoid unnecessarily restricting
660 * the L2 <-> L3 traffic.
661 *
662 * Since MBA controls the L2 external bandwidth where as MBM measures the
663 * L3 external bandwidth the following sequence could lead to such a
664 * situation.
665 *
666 * Consider an rdtgroup which had high L3 <-> memory traffic in initial
667 * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
668 * after some time rdtgroup has mostly L2 <-> L3 traffic.
669 *
670 * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
671 * throttle MSRs already have low percentage values. To avoid
672 * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
673 */
update_mba_bw(struct rdtgroup * rgrp,struct rdt_l3_mon_domain * dom_mbm)674 static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_l3_mon_domain *dom_mbm)
675 {
676 u32 closid, rmid, cur_msr_val, new_msr_val;
677 struct mbm_state *pmbm_data, *cmbm_data;
678 struct rdt_ctrl_domain *dom_mba;
679 enum resctrl_event_id evt_id;
680 struct rdt_resource *r_mba;
681 struct list_head *head;
682 struct rdtgroup *entry;
683 u32 cur_bw, user_bw;
684
685 r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
686 evt_id = rgrp->mba_mbps_event;
687
688 closid = rgrp->closid;
689 rmid = rgrp->mon.rmid;
690 pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
691 if (WARN_ON_ONCE(!pmbm_data))
692 return;
693
694 dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
695 if (!dom_mba) {
696 pr_warn_once("Failure to get domain for MBA update\n");
697 return;
698 }
699
700 cur_bw = pmbm_data->prev_bw;
701 user_bw = dom_mba->mbps_val[closid];
702
703 /* MBA resource doesn't support CDP */
704 cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
705
706 /*
707 * For Ctrl groups read data from child monitor groups.
708 */
709 head = &rgrp->mon.crdtgrp_list;
710 list_for_each_entry(entry, head, mon.crdtgrp_list) {
711 cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
712 if (WARN_ON_ONCE(!cmbm_data))
713 return;
714 cur_bw += cmbm_data->prev_bw;
715 }
716
717 /*
718 * Scale up/down the bandwidth linearly for the ctrl group. The
719 * bandwidth step is the bandwidth granularity specified by the
720 * hardware.
721 * Always increase throttling if current bandwidth is above the
722 * target set by user.
723 * But avoid thrashing up and down on every poll by checking
724 * whether a decrease in throttling is likely to push the group
725 * back over target. E.g. if currently throttling to 30% of bandwidth
726 * on a system with 10% granularity steps, check whether moving to
727 * 40% would go past the limit by multiplying current bandwidth by
728 * "(30 + 10) / 30".
729 */
730 if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
731 new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
732 } else if (cur_msr_val < MAX_MBA_BW &&
733 (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
734 new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
735 } else {
736 return;
737 }
738
739 resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
740 }
741
mbm_update_one_event(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)742 static void mbm_update_one_event(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
743 struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
744 {
745 struct rmid_read rr = {0};
746
747 rr.r = r;
748 rr.hdr = &d->hdr;
749 rr.evt = &mon_event_all[evtid];
750 if (resctrl_arch_mbm_cntr_assign_enabled(r)) {
751 rr.is_mbm_cntr = true;
752 } else {
753 rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, evtid);
754 if (IS_ERR(rr.arch_mon_ctx)) {
755 pr_warn_ratelimited("Failed to allocate monitor context: %ld",
756 PTR_ERR(rr.arch_mon_ctx));
757 return;
758 }
759 }
760
761 __mon_event_count(rdtgrp, &rr);
762
763 /*
764 * If the software controller is enabled, compute the
765 * bandwidth for this event id.
766 */
767 if (is_mba_sc(NULL))
768 mbm_bw_count(rdtgrp, &rr);
769
770 if (rr.arch_mon_ctx)
771 resctrl_arch_mon_ctx_free(rr.r, evtid, rr.arch_mon_ctx);
772 }
773
mbm_update(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp)774 static void mbm_update(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
775 struct rdtgroup *rdtgrp)
776 {
777 /*
778 * This is protected from concurrent reads from user as both
779 * the user and overflow handler hold the global mutex.
780 */
781 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
782 mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
783
784 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
785 mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
786 }
787
788 /*
789 * Handler to scan the limbo list and move the RMIDs
790 * to free list whose occupancy < threshold_occupancy.
791 */
cqm_handle_limbo(struct work_struct * work)792 void cqm_handle_limbo(struct work_struct *work)
793 {
794 unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
795 struct rdt_l3_mon_domain *d;
796
797 cpus_read_lock();
798 mutex_lock(&rdtgroup_mutex);
799
800 d = container_of(work, struct rdt_l3_mon_domain, cqm_limbo.work);
801
802 __check_limbo(d, false);
803
804 if (has_busy_rmid(d)) {
805 d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
806 RESCTRL_PICK_ANY_CPU);
807 schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
808 delay);
809 }
810
811 mutex_unlock(&rdtgroup_mutex);
812 cpus_read_unlock();
813 }
814
815 /**
816 * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
817 * domain.
818 * @dom: The domain the limbo handler should run for.
819 * @delay_ms: How far in the future the handler should run.
820 * @exclude_cpu: Which CPU the handler should not run on,
821 * RESCTRL_PICK_ANY_CPU to pick any CPU.
822 */
cqm_setup_limbo_handler(struct rdt_l3_mon_domain * dom,unsigned long delay_ms,int exclude_cpu)823 void cqm_setup_limbo_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
824 int exclude_cpu)
825 {
826 unsigned long delay = msecs_to_jiffies(delay_ms);
827 int cpu;
828
829 cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
830 dom->cqm_work_cpu = cpu;
831
832 if (cpu < nr_cpu_ids)
833 schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
834 }
835
mbm_handle_overflow(struct work_struct * work)836 void mbm_handle_overflow(struct work_struct *work)
837 {
838 unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
839 struct rdtgroup *prgrp, *crgrp;
840 struct rdt_l3_mon_domain *d;
841 struct list_head *head;
842 struct rdt_resource *r;
843
844 cpus_read_lock();
845 mutex_lock(&rdtgroup_mutex);
846
847 /*
848 * If the filesystem has been unmounted this work no longer needs to
849 * run.
850 */
851 if (!resctrl_mounted || !resctrl_arch_mon_capable())
852 goto out_unlock;
853
854 r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
855 d = container_of(work, struct rdt_l3_mon_domain, mbm_over.work);
856
857 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
858 mbm_update(r, d, prgrp);
859
860 head = &prgrp->mon.crdtgrp_list;
861 list_for_each_entry(crgrp, head, mon.crdtgrp_list)
862 mbm_update(r, d, crgrp);
863
864 if (is_mba_sc(NULL))
865 update_mba_bw(prgrp, d);
866 }
867
868 /*
869 * Re-check for housekeeping CPUs. This allows the overflow handler to
870 * move off a nohz_full CPU quickly.
871 */
872 d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
873 RESCTRL_PICK_ANY_CPU);
874 schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
875
876 out_unlock:
877 mutex_unlock(&rdtgroup_mutex);
878 cpus_read_unlock();
879 }
880
881 /**
882 * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
883 * domain.
884 * @dom: The domain the overflow handler should run for.
885 * @delay_ms: How far in the future the handler should run.
886 * @exclude_cpu: Which CPU the handler should not run on,
887 * RESCTRL_PICK_ANY_CPU to pick any CPU.
888 */
mbm_setup_overflow_handler(struct rdt_l3_mon_domain * dom,unsigned long delay_ms,int exclude_cpu)889 void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
890 int exclude_cpu)
891 {
892 unsigned long delay = msecs_to_jiffies(delay_ms);
893 int cpu;
894
895 /*
896 * When a domain comes online there is no guarantee the filesystem is
897 * mounted. If not, there is no need to catch counter overflow.
898 */
899 if (!resctrl_mounted || !resctrl_arch_mon_capable())
900 return;
901 cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
902 dom->mbm_work_cpu = cpu;
903
904 if (cpu < nr_cpu_ids)
905 schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
906 }
907
setup_rmid_lru_list(void)908 int setup_rmid_lru_list(void)
909 {
910 struct rmid_entry *entry = NULL;
911 u32 idx_limit;
912 u32 idx;
913 int i;
914
915 if (!resctrl_arch_mon_capable())
916 return 0;
917
918 /*
919 * Called on every mount, but the number of RMIDs cannot change
920 * after the first mount, so keep using the same set of rmid_ptrs[]
921 * until resctrl_exit(). Note that the limbo handler continues to
922 * access rmid_ptrs[] after resctrl is unmounted.
923 */
924 if (rmid_ptrs)
925 return 0;
926
927 idx_limit = resctrl_arch_system_num_rmid_idx();
928 rmid_ptrs = kzalloc_objs(struct rmid_entry, idx_limit);
929 if (!rmid_ptrs)
930 return -ENOMEM;
931
932 for (i = 0; i < idx_limit; i++) {
933 entry = &rmid_ptrs[i];
934 INIT_LIST_HEAD(&entry->list);
935
936 resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
937 list_add_tail(&entry->list, &rmid_free_lru);
938 }
939
940 /*
941 * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
942 * are always allocated. These are used for the rdtgroup_default
943 * control group, which was setup earlier in rdtgroup_setup_default().
944 */
945 idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
946 RESCTRL_RESERVED_RMID);
947 entry = __rmid_entry(idx);
948 list_del(&entry->list);
949
950 return 0;
951 }
952
free_rmid_lru_list(void)953 void free_rmid_lru_list(void)
954 {
955 if (!resctrl_arch_mon_capable())
956 return;
957
958 mutex_lock(&rdtgroup_mutex);
959 kfree(rmid_ptrs);
960 rmid_ptrs = NULL;
961 mutex_unlock(&rdtgroup_mutex);
962 }
963
964 #define MON_EVENT(_eventid, _name, _res, _fp) \
965 [_eventid] = { \
966 .name = _name, \
967 .evtid = _eventid, \
968 .rid = _res, \
969 .is_floating_point = _fp, \
970 }
971
972 /*
973 * All available events. Architecture code marks the ones that
974 * are supported by a system using resctrl_enable_mon_event()
975 * to set .enabled.
976 */
977 struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
978 MON_EVENT(QOS_L3_OCCUP_EVENT_ID, "llc_occupancy", RDT_RESOURCE_L3, false),
979 MON_EVENT(QOS_L3_MBM_TOTAL_EVENT_ID, "mbm_total_bytes", RDT_RESOURCE_L3, false),
980 MON_EVENT(QOS_L3_MBM_LOCAL_EVENT_ID, "mbm_local_bytes", RDT_RESOURCE_L3, false),
981 MON_EVENT(PMT_EVENT_ENERGY, "core_energy", RDT_RESOURCE_PERF_PKG, true),
982 MON_EVENT(PMT_EVENT_ACTIVITY, "activity", RDT_RESOURCE_PERF_PKG, true),
983 MON_EVENT(PMT_EVENT_STALLS_LLC_HIT, "stalls_llc_hit", RDT_RESOURCE_PERF_PKG, false),
984 MON_EVENT(PMT_EVENT_C1_RES, "c1_res", RDT_RESOURCE_PERF_PKG, false),
985 MON_EVENT(PMT_EVENT_UNHALTED_CORE_CYCLES, "unhalted_core_cycles", RDT_RESOURCE_PERF_PKG, false),
986 MON_EVENT(PMT_EVENT_STALLS_LLC_MISS, "stalls_llc_miss", RDT_RESOURCE_PERF_PKG, false),
987 MON_EVENT(PMT_EVENT_AUTO_C6_RES, "c6_res", RDT_RESOURCE_PERF_PKG, false),
988 MON_EVENT(PMT_EVENT_UNHALTED_REF_CYCLES, "unhalted_ref_cycles", RDT_RESOURCE_PERF_PKG, false),
989 MON_EVENT(PMT_EVENT_UOPS_RETIRED, "uops_retired", RDT_RESOURCE_PERF_PKG, false),
990 };
991
resctrl_enable_mon_event(enum resctrl_event_id eventid,bool any_cpu,unsigned int binary_bits,void * arch_priv)992 bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
993 unsigned int binary_bits, void *arch_priv)
994 {
995 if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS ||
996 binary_bits > MAX_BINARY_BITS))
997 return false;
998 if (mon_event_all[eventid].enabled) {
999 pr_warn("Duplicate enable for event %d\n", eventid);
1000 return false;
1001 }
1002 if (binary_bits && !mon_event_all[eventid].is_floating_point) {
1003 pr_warn("Event %d may not be floating point\n", eventid);
1004 return false;
1005 }
1006
1007 mon_event_all[eventid].any_cpu = any_cpu;
1008 mon_event_all[eventid].binary_bits = binary_bits;
1009 mon_event_all[eventid].arch_priv = arch_priv;
1010 mon_event_all[eventid].enabled = true;
1011
1012 return true;
1013 }
1014
resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)1015 bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
1016 {
1017 return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS &&
1018 mon_event_all[eventid].enabled;
1019 }
1020
resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)1021 u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)
1022 {
1023 return mon_event_all[evtid].evt_cfg;
1024 }
1025
1026 /**
1027 * struct mbm_transaction - Memory transaction an MBM event can be configured with.
1028 * @name: Name of memory transaction (read, write ...).
1029 * @val: The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to
1030 * represent the memory transaction within an event's configuration.
1031 */
1032 struct mbm_transaction {
1033 char name[32];
1034 u32 val;
1035 };
1036
1037 /* Decoded values for each type of memory transaction. */
1038 static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = {
1039 {"local_reads", READS_TO_LOCAL_MEM},
1040 {"remote_reads", READS_TO_REMOTE_MEM},
1041 {"local_non_temporal_writes", NON_TEMP_WRITE_TO_LOCAL_MEM},
1042 {"remote_non_temporal_writes", NON_TEMP_WRITE_TO_REMOTE_MEM},
1043 {"local_reads_slow_memory", READS_TO_LOCAL_S_MEM},
1044 {"remote_reads_slow_memory", READS_TO_REMOTE_S_MEM},
1045 {"dirty_victim_writes_all", DIRTY_VICTIMS_TO_ALL_MEM},
1046 };
1047
event_filter_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1048 int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
1049 {
1050 struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
1051 struct rdt_resource *r;
1052 bool sep = false;
1053 int ret = 0, i;
1054
1055 mutex_lock(&rdtgroup_mutex);
1056 rdt_last_cmd_clear();
1057
1058 r = resctrl_arch_get_resource(mevt->rid);
1059 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1060 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1061 ret = -EINVAL;
1062 goto out_unlock;
1063 }
1064
1065 for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
1066 if (mevt->evt_cfg & mbm_transactions[i].val) {
1067 if (sep)
1068 seq_putc(seq, ',');
1069 seq_printf(seq, "%s", mbm_transactions[i].name);
1070 sep = true;
1071 }
1072 }
1073 seq_putc(seq, '\n');
1074
1075 out_unlock:
1076 mutex_unlock(&rdtgroup_mutex);
1077
1078 return ret;
1079 }
1080
resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1081 int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s,
1082 void *v)
1083 {
1084 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1085 int ret = 0;
1086
1087 mutex_lock(&rdtgroup_mutex);
1088 rdt_last_cmd_clear();
1089
1090 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1091 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1092 ret = -EINVAL;
1093 goto out_unlock;
1094 }
1095
1096 seq_printf(s, "%u\n", r->mon.mbm_assign_on_mkdir);
1097
1098 out_unlock:
1099 mutex_unlock(&rdtgroup_mutex);
1100
1101 return ret;
1102 }
1103
resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1104 ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf,
1105 size_t nbytes, loff_t off)
1106 {
1107 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1108 bool value;
1109 int ret;
1110
1111 ret = kstrtobool(buf, &value);
1112 if (ret)
1113 return ret;
1114
1115 mutex_lock(&rdtgroup_mutex);
1116 rdt_last_cmd_clear();
1117
1118 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1119 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1120 ret = -EINVAL;
1121 goto out_unlock;
1122 }
1123
1124 r->mon.mbm_assign_on_mkdir = value;
1125
1126 out_unlock:
1127 mutex_unlock(&rdtgroup_mutex);
1128
1129 return ret ?: nbytes;
1130 }
1131
1132 /*
1133 * mbm_cntr_free_all() - Clear all the counter ID configuration details in the
1134 * domain @d. Called when mbm_assign_mode is changed.
1135 */
mbm_cntr_free_all(struct rdt_resource * r,struct rdt_l3_mon_domain * d)1136 static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
1137 {
1138 memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs);
1139 }
1140
1141 /*
1142 * resctrl_reset_rmid_all() - Reset all non-architecture states for all the
1143 * supported RMIDs.
1144 */
resctrl_reset_rmid_all(struct rdt_resource * r,struct rdt_l3_mon_domain * d)1145 static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
1146 {
1147 u32 idx_limit = resctrl_arch_system_num_rmid_idx();
1148 enum resctrl_event_id evt;
1149 int idx;
1150
1151 for_each_mbm_event_id(evt) {
1152 if (!resctrl_is_mon_event_enabled(evt))
1153 continue;
1154 idx = MBM_STATE_IDX(evt);
1155 memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit);
1156 }
1157 }
1158
1159 /*
1160 * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID
1161 * pair in the domain.
1162 *
1163 * Assign the counter if @assign is true else unassign the counter. Reset the
1164 * associated non-architectural state.
1165 */
rdtgroup_assign_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,enum resctrl_event_id evtid,u32 rmid,u32 closid,u32 cntr_id,bool assign)1166 static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
1167 enum resctrl_event_id evtid, u32 rmid, u32 closid,
1168 u32 cntr_id, bool assign)
1169 {
1170 struct mbm_state *m;
1171
1172 resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
1173
1174 m = get_mbm_state(d, closid, rmid, evtid);
1175 if (m)
1176 memset(m, 0, sizeof(*m));
1177 }
1178
1179 /*
1180 * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event
1181 * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d.
1182 *
1183 * Return:
1184 * 0 on success, < 0 on failure.
1185 */
rdtgroup_alloc_assign_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1186 static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
1187 struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1188 {
1189 int cntr_id;
1190
1191 /* No action required if the counter is assigned already. */
1192 cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
1193 if (cntr_id >= 0)
1194 return 0;
1195
1196 cntr_id = mbm_cntr_alloc(r, d, rdtgrp, mevt->evtid);
1197 if (cntr_id < 0) {
1198 rdt_last_cmd_printf("Failed to allocate counter for %s in domain %d\n",
1199 mevt->name, d->hdr.id);
1200 return cntr_id;
1201 }
1202
1203 rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, true);
1204
1205 return 0;
1206 }
1207
1208 /*
1209 * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in
1210 * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is
1211 * NULL; otherwise, assign the counter to the specified domain @d.
1212 *
1213 * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr()
1214 * will fail. The assignment process will abort at the first failure encountered
1215 * during domain traversal, which may result in the event being only partially
1216 * assigned.
1217 *
1218 * Return:
1219 * 0 on success, < 0 on failure.
1220 */
rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1221 static int rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
1222 struct mon_evt *mevt)
1223 {
1224 struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1225 int ret = 0;
1226
1227 if (!d) {
1228 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1229 ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
1230 if (ret)
1231 return ret;
1232 }
1233 } else {
1234 ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
1235 }
1236
1237 return ret;
1238 }
1239
1240 /*
1241 * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when
1242 * a new group is created.
1243 *
1244 * Each group can accommodate two counters per domain: one for the total
1245 * event and one for the local event. Assignments may fail due to the limited
1246 * number of counters. However, it is not necessary to fail the group creation
1247 * and thus no failure is returned. Users have the option to modify the
1248 * counter assignments after the group has been created.
1249 */
rdtgroup_assign_cntrs(struct rdtgroup * rdtgrp)1250 void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp)
1251 {
1252 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1253
1254 if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) ||
1255 !r->mon.mbm_assign_on_mkdir)
1256 return;
1257
1258 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1259 rdtgroup_assign_cntr_event(NULL, rdtgrp,
1260 &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
1261
1262 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1263 rdtgroup_assign_cntr_event(NULL, rdtgrp,
1264 &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
1265 }
1266
1267 /*
1268 * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration
1269 * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp.
1270 */
rdtgroup_free_unassign_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1271 static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
1272 struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1273 {
1274 int cntr_id;
1275
1276 cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
1277
1278 /* If there is no cntr_id assigned, nothing to do */
1279 if (cntr_id < 0)
1280 return;
1281
1282 rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, false);
1283
1284 mbm_cntr_free(d, cntr_id);
1285 }
1286
1287 /*
1288 * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with
1289 * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign
1290 * the counters from all the domains if @d is NULL else unassign from @d.
1291 */
rdtgroup_unassign_cntr_event(struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1292 static void rdtgroup_unassign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
1293 struct mon_evt *mevt)
1294 {
1295 struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1296
1297 if (!d) {
1298 list_for_each_entry(d, &r->mon_domains, hdr.list)
1299 rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
1300 } else {
1301 rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
1302 }
1303 }
1304
1305 /*
1306 * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events.
1307 * Called when a group is deleted.
1308 */
rdtgroup_unassign_cntrs(struct rdtgroup * rdtgrp)1309 void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp)
1310 {
1311 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1312
1313 if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r))
1314 return;
1315
1316 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1317 rdtgroup_unassign_cntr_event(NULL, rdtgrp,
1318 &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
1319
1320 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1321 rdtgroup_unassign_cntr_event(NULL, rdtgrp,
1322 &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
1323 }
1324
resctrl_parse_mem_transactions(char * tok,u32 * val)1325 static int resctrl_parse_mem_transactions(char *tok, u32 *val)
1326 {
1327 u32 temp_val = 0;
1328 char *evt_str;
1329 bool found;
1330 int i;
1331
1332 next_config:
1333 if (!tok || tok[0] == '\0') {
1334 *val = temp_val;
1335 return 0;
1336 }
1337
1338 /* Start processing the strings for each memory transaction type */
1339 evt_str = strim(strsep(&tok, ","));
1340 found = false;
1341 for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
1342 if (!strcmp(mbm_transactions[i].name, evt_str)) {
1343 temp_val |= mbm_transactions[i].val;
1344 found = true;
1345 break;
1346 }
1347 }
1348
1349 if (!found) {
1350 rdt_last_cmd_printf("Invalid memory transaction type %s\n", evt_str);
1351 return -EINVAL;
1352 }
1353
1354 goto next_config;
1355 }
1356
1357 /*
1358 * rdtgroup_update_cntr_event - Update the counter assignments for the event
1359 * in a group.
1360 * @r: Resource to which update needs to be done.
1361 * @rdtgrp: Resctrl group.
1362 * @evtid: MBM monitor event.
1363 */
rdtgroup_update_cntr_event(struct rdt_resource * r,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)1364 static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
1365 enum resctrl_event_id evtid)
1366 {
1367 struct rdt_l3_mon_domain *d;
1368 int cntr_id;
1369
1370 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1371 cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid);
1372 if (cntr_id >= 0)
1373 rdtgroup_assign_cntr(r, d, evtid, rdtgrp->mon.rmid,
1374 rdtgrp->closid, cntr_id, true);
1375 }
1376 }
1377
1378 /*
1379 * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event
1380 * for all the groups.
1381 * @mevt MBM Monitor event.
1382 */
resctrl_update_cntr_allrdtgrp(struct mon_evt * mevt)1383 static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt)
1384 {
1385 struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1386 struct rdtgroup *prgrp, *crgrp;
1387
1388 /*
1389 * Find all the groups where the event is assigned and update the
1390 * configuration of existing assignments.
1391 */
1392 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
1393 rdtgroup_update_cntr_event(r, prgrp, mevt->evtid);
1394
1395 list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
1396 rdtgroup_update_cntr_event(r, crgrp, mevt->evtid);
1397 }
1398 }
1399
event_filter_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1400 ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
1401 loff_t off)
1402 {
1403 struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
1404 struct rdt_resource *r;
1405 u32 evt_cfg = 0;
1406 int ret = 0;
1407
1408 /* Valid input requires a trailing newline */
1409 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1410 return -EINVAL;
1411
1412 buf[nbytes - 1] = '\0';
1413
1414 cpus_read_lock();
1415 mutex_lock(&rdtgroup_mutex);
1416
1417 rdt_last_cmd_clear();
1418
1419 r = resctrl_arch_get_resource(mevt->rid);
1420 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1421 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1422 ret = -EINVAL;
1423 goto out_unlock;
1424 }
1425
1426 ret = resctrl_parse_mem_transactions(buf, &evt_cfg);
1427 if (!ret && mevt->evt_cfg != evt_cfg) {
1428 mevt->evt_cfg = evt_cfg;
1429 resctrl_update_cntr_allrdtgrp(mevt);
1430 }
1431
1432 out_unlock:
1433 mutex_unlock(&rdtgroup_mutex);
1434 cpus_read_unlock();
1435
1436 return ret ?: nbytes;
1437 }
1438
resctrl_mbm_assign_mode_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1439 int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of,
1440 struct seq_file *s, void *v)
1441 {
1442 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1443 bool enabled;
1444
1445 mutex_lock(&rdtgroup_mutex);
1446 enabled = resctrl_arch_mbm_cntr_assign_enabled(r);
1447
1448 if (r->mon.mbm_cntr_assignable) {
1449 if (enabled)
1450 seq_puts(s, "[mbm_event]\n");
1451 else
1452 seq_puts(s, "[default]\n");
1453
1454 if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) {
1455 if (enabled)
1456 seq_puts(s, "default\n");
1457 else
1458 seq_puts(s, "mbm_event\n");
1459 }
1460 } else {
1461 seq_puts(s, "[default]\n");
1462 }
1463
1464 mutex_unlock(&rdtgroup_mutex);
1465
1466 return 0;
1467 }
1468
resctrl_mbm_assign_mode_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1469 ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
1470 size_t nbytes, loff_t off)
1471 {
1472 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1473 struct rdt_l3_mon_domain *d;
1474 int ret = 0;
1475 bool enable;
1476
1477 /* Valid input requires a trailing newline */
1478 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1479 return -EINVAL;
1480
1481 buf[nbytes - 1] = '\0';
1482
1483 cpus_read_lock();
1484 mutex_lock(&rdtgroup_mutex);
1485
1486 rdt_last_cmd_clear();
1487
1488 if (!strcmp(buf, "default")) {
1489 enable = 0;
1490 } else if (!strcmp(buf, "mbm_event")) {
1491 if (r->mon.mbm_cntr_assignable) {
1492 enable = 1;
1493 } else {
1494 ret = -EINVAL;
1495 rdt_last_cmd_puts("mbm_event mode is not supported\n");
1496 goto out_unlock;
1497 }
1498 } else {
1499 ret = -EINVAL;
1500 rdt_last_cmd_puts("Unsupported assign mode\n");
1501 goto out_unlock;
1502 }
1503
1504 if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) {
1505 ret = resctrl_arch_mbm_cntr_assign_set(r, enable);
1506 if (ret)
1507 goto out_unlock;
1508
1509 /* Update the visibility of BMEC related files */
1510 resctrl_bmec_files_show(r, NULL, !enable);
1511
1512 /*
1513 * Initialize the default memory transaction values for
1514 * total and local events.
1515 */
1516 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1517 mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
1518 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1519 mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
1520 (READS_TO_LOCAL_MEM |
1521 READS_TO_LOCAL_S_MEM |
1522 NON_TEMP_WRITE_TO_LOCAL_MEM);
1523 /* Enable auto assignment when switching to "mbm_event" mode */
1524 if (enable)
1525 r->mon.mbm_assign_on_mkdir = true;
1526 /*
1527 * Reset all the non-achitectural RMID state and assignable counters.
1528 */
1529 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1530 mbm_cntr_free_all(r, d);
1531 resctrl_reset_rmid_all(r, d);
1532 }
1533 }
1534
1535 out_unlock:
1536 mutex_unlock(&rdtgroup_mutex);
1537 cpus_read_unlock();
1538
1539 return ret ?: nbytes;
1540 }
1541
resctrl_num_mbm_cntrs_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1542 int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of,
1543 struct seq_file *s, void *v)
1544 {
1545 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1546 struct rdt_l3_mon_domain *dom;
1547 bool sep = false;
1548
1549 cpus_read_lock();
1550 mutex_lock(&rdtgroup_mutex);
1551
1552 list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1553 if (sep)
1554 seq_putc(s, ';');
1555
1556 seq_printf(s, "%d=%d", dom->hdr.id, r->mon.num_mbm_cntrs);
1557 sep = true;
1558 }
1559 seq_putc(s, '\n');
1560
1561 mutex_unlock(&rdtgroup_mutex);
1562 cpus_read_unlock();
1563 return 0;
1564 }
1565
resctrl_available_mbm_cntrs_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1566 int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of,
1567 struct seq_file *s, void *v)
1568 {
1569 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1570 struct rdt_l3_mon_domain *dom;
1571 bool sep = false;
1572 u32 cntrs, i;
1573 int ret = 0;
1574
1575 cpus_read_lock();
1576 mutex_lock(&rdtgroup_mutex);
1577
1578 rdt_last_cmd_clear();
1579
1580 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1581 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1582 ret = -EINVAL;
1583 goto out_unlock;
1584 }
1585
1586 list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1587 if (sep)
1588 seq_putc(s, ';');
1589
1590 cntrs = 0;
1591 for (i = 0; i < r->mon.num_mbm_cntrs; i++) {
1592 if (!dom->cntr_cfg[i].rdtgrp)
1593 cntrs++;
1594 }
1595
1596 seq_printf(s, "%d=%u", dom->hdr.id, cntrs);
1597 sep = true;
1598 }
1599 seq_putc(s, '\n');
1600
1601 out_unlock:
1602 mutex_unlock(&rdtgroup_mutex);
1603 cpus_read_unlock();
1604
1605 return ret;
1606 }
1607
mbm_L3_assignments_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1608 int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
1609 {
1610 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1611 struct rdt_l3_mon_domain *d;
1612 struct rdtgroup *rdtgrp;
1613 struct mon_evt *mevt;
1614 int ret = 0;
1615 bool sep;
1616
1617 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1618 if (!rdtgrp) {
1619 ret = -ENOENT;
1620 goto out_unlock;
1621 }
1622
1623 rdt_last_cmd_clear();
1624 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1625 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1626 ret = -EINVAL;
1627 goto out_unlock;
1628 }
1629
1630 for_each_mon_event(mevt) {
1631 if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid))
1632 continue;
1633
1634 sep = false;
1635 seq_printf(s, "%s:", mevt->name);
1636 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1637 if (sep)
1638 seq_putc(s, ';');
1639
1640 if (mbm_cntr_get(r, d, rdtgrp, mevt->evtid) < 0)
1641 seq_printf(s, "%d=_", d->hdr.id);
1642 else
1643 seq_printf(s, "%d=e", d->hdr.id);
1644
1645 sep = true;
1646 }
1647 seq_putc(s, '\n');
1648 }
1649
1650 out_unlock:
1651 rdtgroup_kn_unlock(of->kn);
1652
1653 return ret;
1654 }
1655
1656 /*
1657 * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching
1658 * event name.
1659 */
mbm_get_mon_event_by_name(struct rdt_resource * r,char * name)1660 static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name)
1661 {
1662 struct mon_evt *mevt;
1663
1664 for_each_mon_event(mevt) {
1665 if (mevt->rid == r->rid && mevt->enabled &&
1666 resctrl_is_mbm_event(mevt->evtid) &&
1667 !strcmp(mevt->name, name))
1668 return mevt;
1669 }
1670
1671 return NULL;
1672 }
1673
rdtgroup_modify_assign_state(char * assign,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1674 static int rdtgroup_modify_assign_state(char *assign, struct rdt_l3_mon_domain *d,
1675 struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1676 {
1677 int ret = 0;
1678
1679 if (!assign || strlen(assign) != 1)
1680 return -EINVAL;
1681
1682 switch (*assign) {
1683 case 'e':
1684 ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt);
1685 break;
1686 case '_':
1687 rdtgroup_unassign_cntr_event(d, rdtgrp, mevt);
1688 break;
1689 default:
1690 ret = -EINVAL;
1691 break;
1692 }
1693
1694 return ret;
1695 }
1696
resctrl_parse_mbm_assignment(struct rdt_resource * r,struct rdtgroup * rdtgrp,char * event,char * tok)1697 static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp,
1698 char *event, char *tok)
1699 {
1700 struct rdt_l3_mon_domain *d;
1701 unsigned long dom_id = 0;
1702 char *dom_str, *id_str;
1703 struct mon_evt *mevt;
1704 int ret;
1705
1706 mevt = mbm_get_mon_event_by_name(r, event);
1707 if (!mevt) {
1708 rdt_last_cmd_printf("Invalid event %s\n", event);
1709 return -ENOENT;
1710 }
1711
1712 next:
1713 if (!tok || tok[0] == '\0')
1714 return 0;
1715
1716 /* Start processing the strings for each domain */
1717 dom_str = strim(strsep(&tok, ";"));
1718
1719 id_str = strsep(&dom_str, "=");
1720
1721 /* Check for domain id '*' which means all domains */
1722 if (id_str && *id_str == '*') {
1723 ret = rdtgroup_modify_assign_state(dom_str, NULL, rdtgrp, mevt);
1724 if (ret)
1725 rdt_last_cmd_printf("Assign operation '%s:*=%s' failed\n",
1726 event, dom_str);
1727 return ret;
1728 } else if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1729 rdt_last_cmd_puts("Missing domain id\n");
1730 return -EINVAL;
1731 }
1732
1733 /* Verify if the dom_id is valid */
1734 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1735 if (d->hdr.id == dom_id) {
1736 ret = rdtgroup_modify_assign_state(dom_str, d, rdtgrp, mevt);
1737 if (ret) {
1738 rdt_last_cmd_printf("Assign operation '%s:%ld=%s' failed\n",
1739 event, dom_id, dom_str);
1740 return ret;
1741 }
1742 goto next;
1743 }
1744 }
1745
1746 rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id);
1747 return -EINVAL;
1748 }
1749
mbm_L3_assignments_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1750 ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
1751 size_t nbytes, loff_t off)
1752 {
1753 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1754 struct rdtgroup *rdtgrp;
1755 char *token, *event;
1756 int ret = 0;
1757
1758 /* Valid input requires a trailing newline */
1759 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1760 return -EINVAL;
1761
1762 buf[nbytes - 1] = '\0';
1763
1764 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1765 if (!rdtgrp) {
1766 rdtgroup_kn_unlock(of->kn);
1767 return -ENOENT;
1768 }
1769 rdt_last_cmd_clear();
1770
1771 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1772 rdt_last_cmd_puts("mbm_event mode is not enabled\n");
1773 rdtgroup_kn_unlock(of->kn);
1774 return -EINVAL;
1775 }
1776
1777 while ((token = strsep(&buf, "\n")) != NULL) {
1778 /*
1779 * The write command follows the following format:
1780 * "<Event>:<Domain ID>=<Assignment state>"
1781 * Extract the event name first.
1782 */
1783 event = strsep(&token, ":");
1784
1785 ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, token);
1786 if (ret)
1787 break;
1788 }
1789
1790 rdtgroup_kn_unlock(of->kn);
1791
1792 return ret ?: nbytes;
1793 }
1794
closid_num_dirty_rmid_alloc(struct rdt_resource * r)1795 static int closid_num_dirty_rmid_alloc(struct rdt_resource *r)
1796 {
1797 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
1798 u32 num_closid = resctrl_arch_get_num_closid(r);
1799 u32 *tmp;
1800
1801 /* For ARM memory ordering access to closid_num_dirty_rmid */
1802 mutex_lock(&rdtgroup_mutex);
1803
1804 /*
1805 * If the architecture hasn't provided a sanitised value here,
1806 * this may result in larger arrays than necessary. Resctrl will
1807 * use a smaller system wide value based on the resources in
1808 * use.
1809 */
1810 tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
1811 if (!tmp) {
1812 mutex_unlock(&rdtgroup_mutex);
1813 return -ENOMEM;
1814 }
1815
1816 closid_num_dirty_rmid = tmp;
1817
1818 mutex_unlock(&rdtgroup_mutex);
1819 }
1820
1821 return 0;
1822 }
1823
closid_num_dirty_rmid_free(void)1824 static void closid_num_dirty_rmid_free(void)
1825 {
1826 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
1827 mutex_lock(&rdtgroup_mutex);
1828 kfree(closid_num_dirty_rmid);
1829 closid_num_dirty_rmid = NULL;
1830 mutex_unlock(&rdtgroup_mutex);
1831 }
1832 }
1833
1834 /**
1835 * resctrl_l3_mon_resource_init() - Initialise global monitoring structures.
1836 *
1837 * Allocate and initialise global monitor resources that do not belong to a
1838 * specific domain. i.e. the closid_num_dirty_rmid[] used to find the CLOSID
1839 * with the cleanest set of RMIDs.
1840 * Called once during boot after the struct rdt_resource's have been configured
1841 * but before the filesystem is mounted.
1842 * Resctrl's cpuhp callbacks may be called before this point to bring a domain
1843 * online.
1844 *
1845 * Return: 0 for success, or -ENOMEM.
1846 */
resctrl_l3_mon_resource_init(void)1847 int resctrl_l3_mon_resource_init(void)
1848 {
1849 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1850 int ret;
1851
1852 if (!r->mon_capable)
1853 return 0;
1854
1855 ret = closid_num_dirty_rmid_alloc(r);
1856 if (ret)
1857 return ret;
1858
1859 if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
1860 mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
1861 resctrl_file_fflags_init("mbm_total_bytes_config",
1862 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1863 }
1864 if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
1865 mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
1866 resctrl_file_fflags_init("mbm_local_bytes_config",
1867 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1868 }
1869
1870 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1871 mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
1872 else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1873 mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
1874
1875 if (r->mon.mbm_cntr_assignable) {
1876 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1877 mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
1878 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1879 mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
1880 (READS_TO_LOCAL_MEM |
1881 READS_TO_LOCAL_S_MEM |
1882 NON_TEMP_WRITE_TO_LOCAL_MEM);
1883 r->mon.mbm_assign_on_mkdir = true;
1884 resctrl_file_fflags_init("num_mbm_cntrs",
1885 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1886 resctrl_file_fflags_init("available_mbm_cntrs",
1887 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1888 resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG);
1889 resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO |
1890 RFTYPE_RES_CACHE);
1891 resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
1892 }
1893
1894 return 0;
1895 }
1896
resctrl_l3_mon_resource_exit(void)1897 void resctrl_l3_mon_resource_exit(void)
1898 {
1899 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1900
1901 if (!r->mon_capable)
1902 return;
1903
1904 closid_num_dirty_rmid_free();
1905 }
1906