1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 */ 5 6 #define KMSG_COMPONENT "cpu" 7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9 #include <linux/cpufeature.h> 10 #include <linux/workqueue.h> 11 #include <linux/memblock.h> 12 #include <linux/uaccess.h> 13 #include <linux/sysctl.h> 14 #include <linux/cpuset.h> 15 #include <linux/device.h> 16 #include <linux/export.h> 17 #include <linux/kernel.h> 18 #include <linux/sched.h> 19 #include <linux/sched/topology.h> 20 #include <linux/delay.h> 21 #include <linux/init.h> 22 #include <linux/slab.h> 23 #include <linux/cpu.h> 24 #include <linux/smp.h> 25 #include <linux/mm.h> 26 #include <linux/nodemask.h> 27 #include <linux/node.h> 28 #include <asm/hiperdispatch.h> 29 #include <asm/sysinfo.h> 30 #include <asm/asm.h> 31 32 #define PTF_HORIZONTAL (0UL) 33 #define PTF_VERTICAL (1UL) 34 #define PTF_CHECK (2UL) 35 36 enum { 37 TOPOLOGY_MODE_HW, 38 TOPOLOGY_MODE_SINGLE, 39 TOPOLOGY_MODE_PACKAGE, 40 TOPOLOGY_MODE_UNINITIALIZED 41 }; 42 43 struct mask_info { 44 struct mask_info *next; 45 unsigned char id; 46 cpumask_t mask; 47 }; 48 49 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 50 static void set_topology_timer(void); 51 static void topology_work_fn(struct work_struct *work); 52 static struct sysinfo_15_1_x *tl_info; 53 static int cpu_management; 54 55 static DECLARE_WORK(topology_work, topology_work_fn); 56 57 /* 58 * Socket/Book linked lists and cpu_topology updates are 59 * protected by "sched_domains_mutex". 60 */ 61 static struct mask_info socket_info; 62 static struct mask_info book_info; 63 static struct mask_info drawer_info; 64 65 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 66 EXPORT_SYMBOL_GPL(cpu_topology); 67 68 static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) 69 { 70 static cpumask_t mask; 71 72 cpumask_clear(&mask); 73 if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) 74 goto out; 75 cpumask_set_cpu(cpu, &mask); 76 switch (topology_mode) { 77 case TOPOLOGY_MODE_HW: 78 while (info) { 79 if (cpumask_test_cpu(cpu, &info->mask)) { 80 cpumask_copy(&mask, &info->mask); 81 break; 82 } 83 info = info->next; 84 } 85 break; 86 case TOPOLOGY_MODE_PACKAGE: 87 cpumask_copy(&mask, cpu_present_mask); 88 break; 89 default: 90 fallthrough; 91 case TOPOLOGY_MODE_SINGLE: 92 break; 93 } 94 cpumask_and(&mask, &mask, &cpu_setup_mask); 95 out: 96 cpumask_copy(dst, &mask); 97 } 98 99 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) 100 { 101 static cpumask_t mask; 102 unsigned int max_cpu; 103 104 cpumask_clear(&mask); 105 if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) 106 goto out; 107 cpumask_set_cpu(cpu, &mask); 108 if (topology_mode != TOPOLOGY_MODE_HW) 109 goto out; 110 cpu -= cpu % (smp_cpu_mtid + 1); 111 max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); 112 for (; cpu <= max_cpu; cpu++) { 113 if (cpumask_test_cpu(cpu, &cpu_setup_mask)) 114 cpumask_set_cpu(cpu, &mask); 115 } 116 out: 117 cpumask_copy(dst, &mask); 118 } 119 120 #define TOPOLOGY_CORE_BITS 64 121 122 static void add_cpus_to_mask(struct topology_core *tl_core, 123 struct mask_info *drawer, 124 struct mask_info *book, 125 struct mask_info *socket) 126 { 127 struct cpu_topology_s390 *topo; 128 unsigned int core; 129 130 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 131 unsigned int max_cpu, rcore; 132 int cpu; 133 134 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 135 cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 136 if (cpu < 0) 137 continue; 138 max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); 139 for (; cpu <= max_cpu; cpu++) { 140 topo = &cpu_topology[cpu]; 141 topo->drawer_id = drawer->id; 142 topo->book_id = book->id; 143 topo->socket_id = socket->id; 144 topo->core_id = rcore; 145 topo->thread_id = cpu; 146 topo->dedicated = tl_core->d; 147 cpumask_set_cpu(cpu, &drawer->mask); 148 cpumask_set_cpu(cpu, &book->mask); 149 cpumask_set_cpu(cpu, &socket->mask); 150 smp_cpu_set_polarization(cpu, tl_core->pp); 151 smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH); 152 } 153 } 154 } 155 156 static void clear_masks(void) 157 { 158 struct mask_info *info; 159 160 info = &socket_info; 161 while (info) { 162 cpumask_clear(&info->mask); 163 info = info->next; 164 } 165 info = &book_info; 166 while (info) { 167 cpumask_clear(&info->mask); 168 info = info->next; 169 } 170 info = &drawer_info; 171 while (info) { 172 cpumask_clear(&info->mask); 173 info = info->next; 174 } 175 } 176 177 static union topology_entry *next_tle(union topology_entry *tle) 178 { 179 if (!tle->nl) 180 return (union topology_entry *)((struct topology_core *)tle + 1); 181 return (union topology_entry *)((struct topology_container *)tle + 1); 182 } 183 184 static void tl_to_masks(struct sysinfo_15_1_x *info) 185 { 186 struct mask_info *socket = &socket_info; 187 struct mask_info *book = &book_info; 188 struct mask_info *drawer = &drawer_info; 189 union topology_entry *tle, *end; 190 191 clear_masks(); 192 tle = info->tle; 193 end = (union topology_entry *)((unsigned long)info + info->length); 194 while (tle < end) { 195 switch (tle->nl) { 196 case 3: 197 drawer = drawer->next; 198 drawer->id = tle->container.id; 199 break; 200 case 2: 201 book = book->next; 202 book->id = tle->container.id; 203 break; 204 case 1: 205 socket = socket->next; 206 socket->id = tle->container.id; 207 break; 208 case 0: 209 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 210 break; 211 default: 212 clear_masks(); 213 return; 214 } 215 tle = next_tle(tle); 216 } 217 } 218 219 static void topology_update_polarization_simple(void) 220 { 221 int cpu; 222 223 for_each_possible_cpu(cpu) 224 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 225 } 226 227 static int ptf(unsigned long fc) 228 { 229 int cc; 230 231 asm volatile( 232 " .insn rre,0xb9a20000,%[fc],%[fc]\n" 233 CC_IPM(cc) 234 : CC_OUT(cc, cc) 235 : [fc] "d" (fc) 236 : CC_CLOBBER); 237 return CC_TRANSFORM(cc); 238 } 239 240 int topology_set_cpu_management(int fc) 241 { 242 int cpu, rc; 243 244 if (!cpu_has_topology()) 245 return -EOPNOTSUPP; 246 if (fc) 247 rc = ptf(PTF_VERTICAL); 248 else 249 rc = ptf(PTF_HORIZONTAL); 250 if (rc) 251 return -EBUSY; 252 for_each_possible_cpu(cpu) 253 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 254 return rc; 255 } 256 257 void update_cpu_masks(void) 258 { 259 struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; 260 int cpu, sibling, pkg_first, smt_first, id; 261 262 for_each_possible_cpu(cpu) { 263 topo = &cpu_topology[cpu]; 264 cpu_thread_map(&topo->thread_mask, cpu); 265 cpu_group_map(&topo->core_mask, &socket_info, cpu); 266 cpu_group_map(&topo->book_mask, &book_info, cpu); 267 cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); 268 topo->booted_cores = 0; 269 if (topology_mode != TOPOLOGY_MODE_HW) { 270 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 271 topo->thread_id = cpu; 272 topo->core_id = cpu; 273 topo->socket_id = id; 274 topo->book_id = id; 275 topo->drawer_id = id; 276 } 277 } 278 hd_reset_state(); 279 for_each_online_cpu(cpu) { 280 topo = &cpu_topology[cpu]; 281 pkg_first = cpumask_first(&topo->core_mask); 282 topo_package = &cpu_topology[pkg_first]; 283 if (cpu == pkg_first) { 284 for_each_cpu(sibling, &topo->core_mask) { 285 topo_sibling = &cpu_topology[sibling]; 286 smt_first = cpumask_first(&topo_sibling->thread_mask); 287 if (sibling == smt_first) { 288 topo_package->booted_cores++; 289 hd_add_core(sibling); 290 } 291 } 292 } else { 293 topo->booted_cores = topo_package->booted_cores; 294 } 295 } 296 } 297 298 void store_topology(struct sysinfo_15_1_x *info) 299 { 300 stsi(info, 15, 1, topology_mnest_limit()); 301 } 302 303 static void __arch_update_dedicated_flag(void *arg) 304 { 305 if (topology_cpu_dedicated(smp_processor_id())) 306 set_cpu_flag(CIF_DEDICATED_CPU); 307 else 308 clear_cpu_flag(CIF_DEDICATED_CPU); 309 } 310 311 static int __arch_update_cpu_topology(void) 312 { 313 struct sysinfo_15_1_x *info = tl_info; 314 int rc, hd_status; 315 316 hd_status = 0; 317 rc = 0; 318 mutex_lock(&smp_cpu_state_mutex); 319 if (cpu_has_topology()) { 320 rc = 1; 321 store_topology(info); 322 tl_to_masks(info); 323 } 324 update_cpu_masks(); 325 if (!cpu_has_topology()) 326 topology_update_polarization_simple(); 327 if (cpu_management == 1) 328 hd_status = hd_enable_hiperdispatch(); 329 mutex_unlock(&smp_cpu_state_mutex); 330 if (hd_status == 0) 331 hd_disable_hiperdispatch(); 332 return rc; 333 } 334 335 int arch_update_cpu_topology(void) 336 { 337 int rc; 338 339 rc = __arch_update_cpu_topology(); 340 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 341 return rc; 342 } 343 344 static void topology_work_fn(struct work_struct *work) 345 { 346 rebuild_sched_domains(); 347 } 348 349 void topology_schedule_update(void) 350 { 351 schedule_work(&topology_work); 352 } 353 354 static void topology_flush_work(void) 355 { 356 flush_work(&topology_work); 357 } 358 359 static void topology_timer_fn(struct timer_list *unused) 360 { 361 if (ptf(PTF_CHECK)) 362 topology_schedule_update(); 363 set_topology_timer(); 364 } 365 366 static struct timer_list topology_timer; 367 368 static atomic_t topology_poll = ATOMIC_INIT(0); 369 370 static void set_topology_timer(void) 371 { 372 if (atomic_add_unless(&topology_poll, -1, 0)) 373 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); 374 else 375 mod_timer(&topology_timer, jiffies + secs_to_jiffies(60)); 376 } 377 378 void topology_expect_change(void) 379 { 380 if (!cpu_has_topology()) 381 return; 382 /* This is racy, but it doesn't matter since it is just a heuristic. 383 * Worst case is that we poll in a higher frequency for a bit longer. 384 */ 385 if (atomic_read(&topology_poll) > 60) 386 return; 387 atomic_add(60, &topology_poll); 388 set_topology_timer(); 389 } 390 391 static int set_polarization(int polarization) 392 { 393 int rc = 0; 394 395 cpus_read_lock(); 396 mutex_lock(&smp_cpu_state_mutex); 397 if (cpu_management == polarization) 398 goto out; 399 rc = topology_set_cpu_management(polarization); 400 if (rc) 401 goto out; 402 cpu_management = polarization; 403 topology_expect_change(); 404 out: 405 mutex_unlock(&smp_cpu_state_mutex); 406 cpus_read_unlock(); 407 return rc; 408 } 409 410 static ssize_t dispatching_show(struct device *dev, 411 struct device_attribute *attr, 412 char *buf) 413 { 414 ssize_t count; 415 416 mutex_lock(&smp_cpu_state_mutex); 417 count = sysfs_emit(buf, "%d\n", cpu_management); 418 mutex_unlock(&smp_cpu_state_mutex); 419 return count; 420 } 421 422 static ssize_t dispatching_store(struct device *dev, 423 struct device_attribute *attr, 424 const char *buf, 425 size_t count) 426 { 427 int val, rc; 428 char delim; 429 430 if (sscanf(buf, "%d %c", &val, &delim) != 1) 431 return -EINVAL; 432 if (val != 0 && val != 1) 433 return -EINVAL; 434 rc = set_polarization(val); 435 return rc ? rc : count; 436 } 437 static DEVICE_ATTR_RW(dispatching); 438 439 static ssize_t cpu_polarization_show(struct device *dev, 440 struct device_attribute *attr, char *buf) 441 { 442 int cpu = dev->id; 443 ssize_t count; 444 445 mutex_lock(&smp_cpu_state_mutex); 446 switch (smp_cpu_get_polarization(cpu)) { 447 case POLARIZATION_HRZ: 448 count = sysfs_emit(buf, "horizontal\n"); 449 break; 450 case POLARIZATION_VL: 451 count = sysfs_emit(buf, "vertical:low\n"); 452 break; 453 case POLARIZATION_VM: 454 count = sysfs_emit(buf, "vertical:medium\n"); 455 break; 456 case POLARIZATION_VH: 457 count = sysfs_emit(buf, "vertical:high\n"); 458 break; 459 default: 460 count = sysfs_emit(buf, "unknown\n"); 461 break; 462 } 463 mutex_unlock(&smp_cpu_state_mutex); 464 return count; 465 } 466 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 467 468 static struct attribute *topology_cpu_attrs[] = { 469 &dev_attr_polarization.attr, 470 NULL, 471 }; 472 473 static struct attribute_group topology_cpu_attr_group = { 474 .attrs = topology_cpu_attrs, 475 }; 476 477 static ssize_t cpu_dedicated_show(struct device *dev, 478 struct device_attribute *attr, char *buf) 479 { 480 int cpu = dev->id; 481 ssize_t count; 482 483 mutex_lock(&smp_cpu_state_mutex); 484 count = sysfs_emit(buf, "%d\n", topology_cpu_dedicated(cpu)); 485 mutex_unlock(&smp_cpu_state_mutex); 486 return count; 487 } 488 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 489 490 static struct attribute *topology_extra_cpu_attrs[] = { 491 &dev_attr_dedicated.attr, 492 NULL, 493 }; 494 495 static struct attribute_group topology_extra_cpu_attr_group = { 496 .attrs = topology_extra_cpu_attrs, 497 }; 498 499 int topology_cpu_init(struct cpu *cpu) 500 { 501 int rc; 502 503 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 504 if (rc || !cpu_has_topology()) 505 return rc; 506 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 507 if (rc) 508 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 509 return rc; 510 } 511 512 static const struct cpumask *cpu_thread_mask(int cpu) 513 { 514 return &cpu_topology[cpu].thread_mask; 515 } 516 517 518 const struct cpumask *cpu_coregroup_mask(int cpu) 519 { 520 return &cpu_topology[cpu].core_mask; 521 } 522 523 static const struct cpumask *cpu_book_mask(int cpu) 524 { 525 return &cpu_topology[cpu].book_mask; 526 } 527 528 static const struct cpumask *cpu_drawer_mask(int cpu) 529 { 530 return &cpu_topology[cpu].drawer_mask; 531 } 532 533 static struct sched_domain_topology_level s390_topology[] = { 534 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 535 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 536 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 537 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 538 { cpu_cpu_mask, SD_INIT_NAME(PKG) }, 539 { NULL, }, 540 }; 541 542 static void __init alloc_masks(struct sysinfo_15_1_x *info, 543 struct mask_info *mask, int offset) 544 { 545 int i, nr_masks; 546 547 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 548 for (i = 0; i < info->mnest - offset; i++) 549 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 550 nr_masks = max(nr_masks, 1); 551 for (i = 0; i < nr_masks; i++) { 552 mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8); 553 mask = mask->next; 554 } 555 } 556 557 static int __init detect_polarization(union topology_entry *tle) 558 { 559 struct topology_core *tl_core; 560 561 while (tle->nl) 562 tle = next_tle(tle); 563 tl_core = (struct topology_core *)tle; 564 return tl_core->pp != POLARIZATION_HRZ; 565 } 566 567 void __init topology_init_early(void) 568 { 569 struct sysinfo_15_1_x *info; 570 571 set_sched_topology(s390_topology); 572 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 573 if (cpu_has_topology()) 574 topology_mode = TOPOLOGY_MODE_HW; 575 else 576 topology_mode = TOPOLOGY_MODE_SINGLE; 577 } 578 if (!cpu_has_topology()) 579 goto out; 580 tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); 581 info = tl_info; 582 store_topology(info); 583 cpu_management = detect_polarization(info->tle); 584 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 585 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 586 info->mag[4], info->mag[5], info->mnest); 587 alloc_masks(info, &socket_info, 1); 588 alloc_masks(info, &book_info, 2); 589 alloc_masks(info, &drawer_info, 3); 590 out: 591 cpumask_set_cpu(0, &cpu_setup_mask); 592 __arch_update_cpu_topology(); 593 __arch_update_dedicated_flag(NULL); 594 } 595 596 static inline int topology_get_mode(int enabled) 597 { 598 if (!enabled) 599 return TOPOLOGY_MODE_SINGLE; 600 return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 601 } 602 603 static inline int topology_is_enabled(void) 604 { 605 return topology_mode != TOPOLOGY_MODE_SINGLE; 606 } 607 608 static int __init topology_setup(char *str) 609 { 610 bool enabled; 611 int rc; 612 613 rc = kstrtobool(str, &enabled); 614 if (rc) 615 return rc; 616 topology_mode = topology_get_mode(enabled); 617 return 0; 618 } 619 early_param("topology", topology_setup); 620 621 static int topology_ctl_handler(const struct ctl_table *ctl, int write, 622 void *buffer, size_t *lenp, loff_t *ppos) 623 { 624 int enabled = topology_is_enabled(); 625 int new_mode; 626 int rc; 627 struct ctl_table ctl_entry = { 628 .procname = ctl->procname, 629 .data = &enabled, 630 .maxlen = sizeof(int), 631 .extra1 = SYSCTL_ZERO, 632 .extra2 = SYSCTL_ONE, 633 }; 634 635 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 636 if (rc < 0 || !write) 637 return rc; 638 639 mutex_lock(&smp_cpu_state_mutex); 640 new_mode = topology_get_mode(enabled); 641 if (topology_mode != new_mode) { 642 topology_mode = new_mode; 643 topology_schedule_update(); 644 } 645 mutex_unlock(&smp_cpu_state_mutex); 646 topology_flush_work(); 647 648 return rc; 649 } 650 651 static int polarization_ctl_handler(const struct ctl_table *ctl, int write, 652 void *buffer, size_t *lenp, loff_t *ppos) 653 { 654 int polarization; 655 int rc; 656 struct ctl_table ctl_entry = { 657 .procname = ctl->procname, 658 .data = &polarization, 659 .maxlen = sizeof(int), 660 .extra1 = SYSCTL_ZERO, 661 .extra2 = SYSCTL_ONE, 662 }; 663 664 polarization = cpu_management; 665 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 666 if (rc < 0 || !write) 667 return rc; 668 return set_polarization(polarization); 669 } 670 671 static const struct ctl_table topology_ctl_table[] = { 672 { 673 .procname = "topology", 674 .mode = 0644, 675 .proc_handler = topology_ctl_handler, 676 }, 677 { 678 .procname = "polarization", 679 .mode = 0644, 680 .proc_handler = polarization_ctl_handler, 681 }, 682 }; 683 684 static int __init topology_init(void) 685 { 686 struct device *dev_root; 687 int rc = 0; 688 689 timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); 690 if (cpu_has_topology()) 691 set_topology_timer(); 692 else 693 topology_update_polarization_simple(); 694 if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL)) 695 set_polarization(1); 696 register_sysctl("s390", topology_ctl_table); 697 698 dev_root = bus_get_dev_root(&cpu_subsys); 699 if (dev_root) { 700 rc = device_create_file(dev_root, &dev_attr_dispatching); 701 put_device(dev_root); 702 } 703 return rc; 704 } 705 device_initcall(topology_init); 706