1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Cache Allocation code. 5 * 6 * Copyright (C) 2016 Intel Corporation 7 * 8 * Authors: 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Tony Luck <tony.luck@intel.com> 11 * 12 * More information about RDT be found in the Intel (R) x86 Architecture 13 * Software Developer Manual June 2016, volume 3, section 17.17. 14 */ 15 16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17 18 #include <linux/cpu.h> 19 #include <linux/kernfs.h> 20 #include <linux/seq_file.h> 21 #include <linux/slab.h> 22 #include <linux/tick.h> 23 24 #include "internal.h" 25 26 struct rdt_parse_data { 27 struct rdtgroup *rdtgrp; 28 char *buf; 29 }; 30 31 typedef int (ctrlval_parser_t)(struct rdt_parse_data *data, 32 struct resctrl_schema *s, 33 struct rdt_ctrl_domain *d); 34 35 /* 36 * Check whether MBA bandwidth percentage value is correct. The value is 37 * checked against the minimum and max bandwidth values specified by the 38 * hardware. The allocated bandwidth percentage is rounded to the next 39 * control step available on the hardware. 40 */ 41 static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) 42 { 43 int ret; 44 u32 bw; 45 46 /* 47 * Only linear delay values is supported for current Intel SKUs. 48 */ 49 if (!r->membw.delay_linear && r->membw.arch_needs_linear) { 50 rdt_last_cmd_puts("No support for non-linear MB domains\n"); 51 return false; 52 } 53 54 ret = kstrtou32(buf, 10, &bw); 55 if (ret) { 56 rdt_last_cmd_printf("Invalid MB value %s\n", buf); 57 return false; 58 } 59 60 /* Nothing else to do if software controller is enabled. */ 61 if (is_mba_sc(r)) { 62 *data = bw; 63 return true; 64 } 65 66 if (bw < r->membw.min_bw || bw > r->membw.max_bw) { 67 rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", 68 bw, r->membw.min_bw, r->membw.max_bw); 69 return false; 70 } 71 72 *data = roundup(bw, (unsigned long)r->membw.bw_gran); 73 return true; 74 } 75 76 static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, 77 struct rdt_ctrl_domain *d) 78 { 79 struct resctrl_staged_config *cfg; 80 u32 closid = data->rdtgrp->closid; 81 struct rdt_resource *r = s->res; 82 u32 bw_val; 83 84 cfg = &d->staged_config[s->conf_type]; 85 if (cfg->have_new_ctrl) { 86 rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); 87 return -EINVAL; 88 } 89 90 if (!bw_validate(data->buf, &bw_val, r)) 91 return -EINVAL; 92 93 if (is_mba_sc(r)) { 94 d->mbps_val[closid] = bw_val; 95 return 0; 96 } 97 98 cfg->new_ctrl = bw_val; 99 cfg->have_new_ctrl = true; 100 101 return 0; 102 } 103 104 /* 105 * Check whether a cache bit mask is valid. 106 * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID: 107 * - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1 108 * - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1 109 * 110 * Haswell does not support a non-contiguous 1s value and additionally 111 * requires at least two bits set. 112 * AMD allows non-contiguous bitmasks. 113 */ 114 static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) 115 { 116 u32 supported_bits = BIT_MASK(r->cache.cbm_len) - 1; 117 unsigned int cbm_len = r->cache.cbm_len; 118 unsigned long first_bit, zero_bit, val; 119 int ret; 120 121 ret = kstrtoul(buf, 16, &val); 122 if (ret) { 123 rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf); 124 return false; 125 } 126 127 if ((r->cache.min_cbm_bits > 0 && val == 0) || val > supported_bits) { 128 rdt_last_cmd_puts("Mask out of range\n"); 129 return false; 130 } 131 132 first_bit = find_first_bit(&val, cbm_len); 133 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); 134 135 /* Are non-contiguous bitmasks allowed? */ 136 if (!r->cache.arch_has_sparse_bitmasks && 137 (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) { 138 rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val); 139 return false; 140 } 141 142 if ((zero_bit - first_bit) < r->cache.min_cbm_bits) { 143 rdt_last_cmd_printf("Need at least %d bits in the mask\n", 144 r->cache.min_cbm_bits); 145 return false; 146 } 147 148 *data = val; 149 return true; 150 } 151 152 /* 153 * Read one cache bit mask (hex). Check that it is valid for the current 154 * resource type. 155 */ 156 static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, 157 struct rdt_ctrl_domain *d) 158 { 159 struct rdtgroup *rdtgrp = data->rdtgrp; 160 struct resctrl_staged_config *cfg; 161 struct rdt_resource *r = s->res; 162 u32 cbm_val; 163 164 cfg = &d->staged_config[s->conf_type]; 165 if (cfg->have_new_ctrl) { 166 rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); 167 return -EINVAL; 168 } 169 170 /* 171 * Cannot set up more than one pseudo-locked region in a cache 172 * hierarchy. 173 */ 174 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && 175 rdtgroup_pseudo_locked_in_hierarchy(d)) { 176 rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n"); 177 return -EINVAL; 178 } 179 180 if (!cbm_validate(data->buf, &cbm_val, r)) 181 return -EINVAL; 182 183 if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || 184 rdtgrp->mode == RDT_MODE_SHAREABLE) && 185 rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) { 186 rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n"); 187 return -EINVAL; 188 } 189 190 /* 191 * The CBM may not overlap with the CBM of another closid if 192 * either is exclusive. 193 */ 194 if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) { 195 rdt_last_cmd_puts("Overlaps with exclusive group\n"); 196 return -EINVAL; 197 } 198 199 if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) { 200 if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || 201 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 202 rdt_last_cmd_puts("Overlaps with other group\n"); 203 return -EINVAL; 204 } 205 } 206 207 cfg->new_ctrl = cbm_val; 208 cfg->have_new_ctrl = true; 209 210 return 0; 211 } 212 213 /* 214 * For each domain in this resource we expect to find a series of: 215 * id=mask 216 * separated by ";". The "id" is in decimal, and must match one of 217 * the "id"s for this resource. 218 */ 219 static int parse_line(char *line, struct resctrl_schema *s, 220 struct rdtgroup *rdtgrp) 221 { 222 enum resctrl_conf_type t = s->conf_type; 223 ctrlval_parser_t *parse_ctrlval = NULL; 224 struct resctrl_staged_config *cfg; 225 struct rdt_resource *r = s->res; 226 struct rdt_parse_data data; 227 struct rdt_ctrl_domain *d; 228 char *dom = NULL, *id; 229 unsigned long dom_id; 230 231 /* Walking r->domains, ensure it can't race with cpuhp */ 232 lockdep_assert_cpus_held(); 233 234 switch (r->schema_fmt) { 235 case RESCTRL_SCHEMA_BITMAP: 236 parse_ctrlval = &parse_cbm; 237 break; 238 case RESCTRL_SCHEMA_RANGE: 239 parse_ctrlval = &parse_bw; 240 break; 241 } 242 243 if (WARN_ON_ONCE(!parse_ctrlval)) 244 return -EINVAL; 245 246 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && 247 (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { 248 rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); 249 return -EINVAL; 250 } 251 252 next: 253 if (!line || line[0] == '\0') 254 return 0; 255 dom = strsep(&line, ";"); 256 id = strsep(&dom, "="); 257 if (!dom || kstrtoul(id, 10, &dom_id)) { 258 rdt_last_cmd_puts("Missing '=' or non-numeric domain\n"); 259 return -EINVAL; 260 } 261 dom = strim(dom); 262 list_for_each_entry(d, &r->ctrl_domains, hdr.list) { 263 if (d->hdr.id == dom_id) { 264 data.buf = dom; 265 data.rdtgrp = rdtgrp; 266 if (parse_ctrlval(&data, s, d)) 267 return -EINVAL; 268 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 269 cfg = &d->staged_config[t]; 270 /* 271 * In pseudo-locking setup mode and just 272 * parsed a valid CBM that should be 273 * pseudo-locked. Only one locked region per 274 * resource group and domain so just do 275 * the required initialization for single 276 * region and return. 277 */ 278 rdtgrp->plr->s = s; 279 rdtgrp->plr->d = d; 280 rdtgrp->plr->cbm = cfg->new_ctrl; 281 d->plr = rdtgrp->plr; 282 return 0; 283 } 284 goto next; 285 } 286 } 287 return -EINVAL; 288 } 289 290 static int rdtgroup_parse_resource(char *resname, char *tok, 291 struct rdtgroup *rdtgrp) 292 { 293 struct resctrl_schema *s; 294 295 list_for_each_entry(s, &resctrl_schema_all, list) { 296 if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid) 297 return parse_line(tok, s, rdtgrp); 298 } 299 rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname); 300 return -EINVAL; 301 } 302 303 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, 304 char *buf, size_t nbytes, loff_t off) 305 { 306 struct resctrl_schema *s; 307 struct rdtgroup *rdtgrp; 308 struct rdt_resource *r; 309 char *tok, *resname; 310 int ret = 0; 311 312 /* Valid input requires a trailing newline */ 313 if (nbytes == 0 || buf[nbytes - 1] != '\n') 314 return -EINVAL; 315 buf[nbytes - 1] = '\0'; 316 317 rdtgrp = rdtgroup_kn_lock_live(of->kn); 318 if (!rdtgrp) { 319 rdtgroup_kn_unlock(of->kn); 320 return -ENOENT; 321 } 322 rdt_last_cmd_clear(); 323 324 /* 325 * No changes to pseudo-locked region allowed. It has to be removed 326 * and re-created instead. 327 */ 328 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 329 ret = -EINVAL; 330 rdt_last_cmd_puts("Resource group is pseudo-locked\n"); 331 goto out; 332 } 333 334 rdt_staged_configs_clear(); 335 336 while ((tok = strsep(&buf, "\n")) != NULL) { 337 resname = strim(strsep(&tok, ":")); 338 if (!tok) { 339 rdt_last_cmd_puts("Missing ':'\n"); 340 ret = -EINVAL; 341 goto out; 342 } 343 if (tok[0] == '\0') { 344 rdt_last_cmd_printf("Missing '%s' value\n", resname); 345 ret = -EINVAL; 346 goto out; 347 } 348 ret = rdtgroup_parse_resource(resname, tok, rdtgrp); 349 if (ret) 350 goto out; 351 } 352 353 list_for_each_entry(s, &resctrl_schema_all, list) { 354 r = s->res; 355 356 /* 357 * Writes to mba_sc resources update the software controller, 358 * not the control MSR. 359 */ 360 if (is_mba_sc(r)) 361 continue; 362 363 ret = resctrl_arch_update_domains(r, rdtgrp->closid); 364 if (ret) 365 goto out; 366 } 367 368 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 369 /* 370 * If pseudo-locking fails we keep the resource group in 371 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service 372 * active and updated for just the domain the pseudo-locked 373 * region was requested for. 374 */ 375 ret = rdtgroup_pseudo_lock_create(rdtgrp); 376 } 377 378 out: 379 rdt_staged_configs_clear(); 380 rdtgroup_kn_unlock(of->kn); 381 return ret ?: nbytes; 382 } 383 384 static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) 385 { 386 struct rdt_resource *r = schema->res; 387 struct rdt_ctrl_domain *dom; 388 bool sep = false; 389 u32 ctrl_val; 390 391 /* Walking r->domains, ensure it can't race with cpuhp */ 392 lockdep_assert_cpus_held(); 393 394 seq_printf(s, "%*s:", max_name_width, schema->name); 395 list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { 396 if (sep) 397 seq_puts(s, ";"); 398 399 if (is_mba_sc(r)) 400 ctrl_val = dom->mbps_val[closid]; 401 else 402 ctrl_val = resctrl_arch_get_config(r, dom, closid, 403 schema->conf_type); 404 405 seq_printf(s, schema->fmt_str, dom->hdr.id, ctrl_val); 406 sep = true; 407 } 408 seq_puts(s, "\n"); 409 } 410 411 int rdtgroup_schemata_show(struct kernfs_open_file *of, 412 struct seq_file *s, void *v) 413 { 414 struct resctrl_schema *schema; 415 struct rdtgroup *rdtgrp; 416 int ret = 0; 417 u32 closid; 418 419 rdtgrp = rdtgroup_kn_lock_live(of->kn); 420 if (rdtgrp) { 421 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 422 list_for_each_entry(schema, &resctrl_schema_all, list) { 423 seq_printf(s, "%s:uninitialized\n", schema->name); 424 } 425 } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 426 if (!rdtgrp->plr->d) { 427 rdt_last_cmd_clear(); 428 rdt_last_cmd_puts("Cache domain offline\n"); 429 ret = -ENODEV; 430 } else { 431 seq_printf(s, "%s:%d=%x\n", 432 rdtgrp->plr->s->res->name, 433 rdtgrp->plr->d->hdr.id, 434 rdtgrp->plr->cbm); 435 } 436 } else { 437 closid = rdtgrp->closid; 438 list_for_each_entry(schema, &resctrl_schema_all, list) { 439 if (closid < schema->num_closid) 440 show_doms(s, schema, closid); 441 } 442 } 443 } else { 444 ret = -ENOENT; 445 } 446 rdtgroup_kn_unlock(of->kn); 447 return ret; 448 } 449 450 static int smp_mon_event_count(void *arg) 451 { 452 mon_event_count(arg); 453 454 return 0; 455 } 456 457 ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of, 458 char *buf, size_t nbytes, loff_t off) 459 { 460 struct rdtgroup *rdtgrp; 461 int ret = 0; 462 463 /* Valid input requires a trailing newline */ 464 if (nbytes == 0 || buf[nbytes - 1] != '\n') 465 return -EINVAL; 466 buf[nbytes - 1] = '\0'; 467 468 rdtgrp = rdtgroup_kn_lock_live(of->kn); 469 if (!rdtgrp) { 470 rdtgroup_kn_unlock(of->kn); 471 return -ENOENT; 472 } 473 rdt_last_cmd_clear(); 474 475 if (!strcmp(buf, "mbm_local_bytes")) { 476 if (resctrl_arch_is_mbm_local_enabled()) 477 rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID; 478 else 479 ret = -EINVAL; 480 } else if (!strcmp(buf, "mbm_total_bytes")) { 481 if (resctrl_arch_is_mbm_total_enabled()) 482 rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID; 483 else 484 ret = -EINVAL; 485 } else { 486 ret = -EINVAL; 487 } 488 489 if (ret) 490 rdt_last_cmd_printf("Unsupported event id '%s'\n", buf); 491 492 rdtgroup_kn_unlock(of->kn); 493 494 return ret ?: nbytes; 495 } 496 497 int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of, 498 struct seq_file *s, void *v) 499 { 500 struct rdtgroup *rdtgrp; 501 int ret = 0; 502 503 rdtgrp = rdtgroup_kn_lock_live(of->kn); 504 505 if (rdtgrp) { 506 switch (rdtgrp->mba_mbps_event) { 507 case QOS_L3_MBM_LOCAL_EVENT_ID: 508 seq_puts(s, "mbm_local_bytes\n"); 509 break; 510 case QOS_L3_MBM_TOTAL_EVENT_ID: 511 seq_puts(s, "mbm_total_bytes\n"); 512 break; 513 default: 514 pr_warn_once("Bad event %d\n", rdtgrp->mba_mbps_event); 515 ret = -EINVAL; 516 break; 517 } 518 } else { 519 ret = -ENOENT; 520 } 521 522 rdtgroup_kn_unlock(of->kn); 523 524 return ret; 525 } 526 527 struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, 528 struct list_head **pos) 529 { 530 struct rdt_domain_hdr *d; 531 struct list_head *l; 532 533 list_for_each(l, h) { 534 d = list_entry(l, struct rdt_domain_hdr, list); 535 /* When id is found, return its domain. */ 536 if (id == d->id) 537 return d; 538 /* Stop searching when finding id's position in sorted list. */ 539 if (id < d->id) 540 break; 541 } 542 543 if (pos) 544 *pos = l; 545 546 return NULL; 547 } 548 549 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, 550 struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, 551 cpumask_t *cpumask, int evtid, int first) 552 { 553 int cpu; 554 555 /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */ 556 lockdep_assert_cpus_held(); 557 558 /* 559 * Setup the parameters to pass to mon_event_count() to read the data. 560 */ 561 rr->rgrp = rdtgrp; 562 rr->evtid = evtid; 563 rr->r = r; 564 rr->d = d; 565 rr->first = first; 566 rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); 567 if (IS_ERR(rr->arch_mon_ctx)) { 568 rr->err = -EINVAL; 569 return; 570 } 571 572 cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU); 573 574 /* 575 * cpumask_any_housekeeping() prefers housekeeping CPUs, but 576 * are all the CPUs nohz_full? If yes, pick a CPU to IPI. 577 * MPAM's resctrl_arch_rmid_read() is unable to read the 578 * counters on some platforms if its called in IRQ context. 579 */ 580 if (tick_nohz_full_cpu(cpu)) 581 smp_call_function_any(cpumask, mon_event_count, rr, 1); 582 else 583 smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); 584 585 resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx); 586 } 587 588 int rdtgroup_mondata_show(struct seq_file *m, void *arg) 589 { 590 struct kernfs_open_file *of = m->private; 591 enum resctrl_res_level resid; 592 enum resctrl_event_id evtid; 593 struct rdt_domain_hdr *hdr; 594 struct rmid_read rr = {0}; 595 struct rdt_mon_domain *d; 596 struct rdtgroup *rdtgrp; 597 struct rdt_resource *r; 598 struct mon_data *md; 599 int domid, ret = 0; 600 601 rdtgrp = rdtgroup_kn_lock_live(of->kn); 602 if (!rdtgrp) { 603 ret = -ENOENT; 604 goto out; 605 } 606 607 md = of->kn->priv; 608 if (WARN_ON_ONCE(!md)) { 609 ret = -EIO; 610 goto out; 611 } 612 613 resid = md->rid; 614 domid = md->domid; 615 evtid = md->evtid; 616 r = resctrl_arch_get_resource(resid); 617 618 if (md->sum) { 619 /* 620 * This file requires summing across all domains that share 621 * the L3 cache id that was provided in the "domid" field of the 622 * struct mon_data. Search all domains in the resource for 623 * one that matches this cache id. 624 */ 625 list_for_each_entry(d, &r->mon_domains, hdr.list) { 626 if (d->ci->id == domid) { 627 rr.ci = d->ci; 628 mon_event_read(&rr, r, NULL, rdtgrp, 629 &d->ci->shared_cpu_map, evtid, false); 630 goto checkresult; 631 } 632 } 633 ret = -ENOENT; 634 goto out; 635 } else { 636 /* 637 * This file provides data from a single domain. Search 638 * the resource to find the domain with "domid". 639 */ 640 hdr = resctrl_find_domain(&r->mon_domains, domid, NULL); 641 if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { 642 ret = -ENOENT; 643 goto out; 644 } 645 d = container_of(hdr, struct rdt_mon_domain, hdr); 646 mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false); 647 } 648 649 checkresult: 650 651 if (rr.err == -EIO) 652 seq_puts(m, "Error\n"); 653 else if (rr.err == -EINVAL) 654 seq_puts(m, "Unavailable\n"); 655 else 656 seq_printf(m, "%llu\n", rr.val); 657 658 out: 659 rdtgroup_kn_unlock(of->kn); 660 return ret; 661 } 662