1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Energy Model of devices 4 * 5 * Copyright (c) 2018-2021, Arm ltd. 6 * Written by: Quentin Perret, Arm ltd. 7 * Improvements provided by: Lukasz Luba, Arm ltd. 8 */ 9 10 #define pr_fmt(fmt) "energy_model: " fmt 11 12 #include <linux/cpu.h> 13 #include <linux/cpufreq.h> 14 #include <linux/cpumask.h> 15 #include <linux/debugfs.h> 16 #include <linux/energy_model.h> 17 #include <linux/sched/topology.h> 18 #include <linux/slab.h> 19 20 /* 21 * Mutex serializing the registrations of performance domains and letting 22 * callbacks defined by drivers sleep. 23 */ 24 static DEFINE_MUTEX(em_pd_mutex); 25 26 static void em_cpufreq_update_efficiencies(struct device *dev, 27 struct em_perf_state *table); 28 static void em_check_capacity_update(void); 29 static void em_update_workfn(struct work_struct *work); 30 static DECLARE_DELAYED_WORK(em_update_work, em_update_workfn); 31 32 static bool _is_cpu_device(struct device *dev) 33 { 34 return (dev->bus == &cpu_subsys); 35 } 36 37 #ifdef CONFIG_DEBUG_FS 38 static struct dentry *rootdir; 39 40 struct em_dbg_info { 41 struct em_perf_domain *pd; 42 int ps_id; 43 }; 44 45 #define DEFINE_EM_DBG_SHOW(name, fname) \ 46 static int em_debug_##fname##_show(struct seq_file *s, void *unused) \ 47 { \ 48 struct em_dbg_info *em_dbg = s->private; \ 49 struct em_perf_state *table; \ 50 unsigned long val; \ 51 \ 52 rcu_read_lock(); \ 53 table = em_perf_state_from_pd(em_dbg->pd); \ 54 val = table[em_dbg->ps_id].name; \ 55 rcu_read_unlock(); \ 56 \ 57 seq_printf(s, "%lu\n", val); \ 58 return 0; \ 59 } \ 60 DEFINE_SHOW_ATTRIBUTE(em_debug_##fname) 61 62 DEFINE_EM_DBG_SHOW(frequency, frequency); 63 DEFINE_EM_DBG_SHOW(power, power); 64 DEFINE_EM_DBG_SHOW(cost, cost); 65 DEFINE_EM_DBG_SHOW(performance, performance); 66 DEFINE_EM_DBG_SHOW(flags, inefficiency); 67 68 static void em_debug_create_ps(struct em_perf_domain *em_pd, 69 struct em_dbg_info *em_dbg, int i, 70 struct dentry *pd) 71 { 72 struct em_perf_state *table; 73 unsigned long freq; 74 struct dentry *d; 75 char name[24]; 76 77 em_dbg[i].pd = em_pd; 78 em_dbg[i].ps_id = i; 79 80 rcu_read_lock(); 81 table = em_perf_state_from_pd(em_pd); 82 freq = table[i].frequency; 83 rcu_read_unlock(); 84 85 snprintf(name, sizeof(name), "ps:%lu", freq); 86 87 /* Create per-ps directory */ 88 d = debugfs_create_dir(name, pd); 89 debugfs_create_file("frequency", 0444, d, &em_dbg[i], 90 &em_debug_frequency_fops); 91 debugfs_create_file("power", 0444, d, &em_dbg[i], 92 &em_debug_power_fops); 93 debugfs_create_file("cost", 0444, d, &em_dbg[i], 94 &em_debug_cost_fops); 95 debugfs_create_file("performance", 0444, d, &em_dbg[i], 96 &em_debug_performance_fops); 97 debugfs_create_file("inefficient", 0444, d, &em_dbg[i], 98 &em_debug_inefficiency_fops); 99 } 100 101 static int em_debug_cpus_show(struct seq_file *s, void *unused) 102 { 103 seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private))); 104 105 return 0; 106 } 107 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); 108 109 static int em_debug_flags_show(struct seq_file *s, void *unused) 110 { 111 struct em_perf_domain *pd = s->private; 112 113 seq_printf(s, "%#lx\n", pd->flags); 114 115 return 0; 116 } 117 DEFINE_SHOW_ATTRIBUTE(em_debug_flags); 118 119 static void em_debug_create_pd(struct device *dev) 120 { 121 struct em_dbg_info *em_dbg; 122 struct dentry *d; 123 int i; 124 125 /* Create the directory of the performance domain */ 126 d = debugfs_create_dir(dev_name(dev), rootdir); 127 128 if (_is_cpu_device(dev)) 129 debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus, 130 &em_debug_cpus_fops); 131 132 debugfs_create_file("flags", 0444, d, dev->em_pd, 133 &em_debug_flags_fops); 134 135 em_dbg = devm_kcalloc(dev, dev->em_pd->nr_perf_states, 136 sizeof(*em_dbg), GFP_KERNEL); 137 if (!em_dbg) 138 return; 139 140 /* Create a sub-directory for each performance state */ 141 for (i = 0; i < dev->em_pd->nr_perf_states; i++) 142 em_debug_create_ps(dev->em_pd, em_dbg, i, d); 143 144 } 145 146 static void em_debug_remove_pd(struct device *dev) 147 { 148 debugfs_lookup_and_remove(dev_name(dev), rootdir); 149 } 150 151 static int __init em_debug_init(void) 152 { 153 /* Create /sys/kernel/debug/energy_model directory */ 154 rootdir = debugfs_create_dir("energy_model", NULL); 155 156 return 0; 157 } 158 fs_initcall(em_debug_init); 159 #else /* CONFIG_DEBUG_FS */ 160 static void em_debug_create_pd(struct device *dev) {} 161 static void em_debug_remove_pd(struct device *dev) {} 162 #endif 163 164 static void em_release_table_kref(struct kref *kref) 165 { 166 /* It was the last owner of this table so we can free */ 167 kfree_rcu(container_of(kref, struct em_perf_table, kref), rcu); 168 } 169 170 /** 171 * em_table_free() - Handles safe free of the EM table when needed 172 * @table : EM table which is going to be freed 173 * 174 * No return values. 175 */ 176 void em_table_free(struct em_perf_table *table) 177 { 178 kref_put(&table->kref, em_release_table_kref); 179 } 180 181 /** 182 * em_table_alloc() - Allocate a new EM table 183 * @pd : EM performance domain for which this must be done 184 * 185 * Allocate a new EM table and initialize its kref to indicate that it 186 * has a user. 187 * Returns allocated table or NULL. 188 */ 189 struct em_perf_table *em_table_alloc(struct em_perf_domain *pd) 190 { 191 struct em_perf_table *table; 192 int table_size; 193 194 table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; 195 196 table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL); 197 if (!table) 198 return NULL; 199 200 kref_init(&table->kref); 201 202 return table; 203 } 204 205 static void em_init_performance(struct device *dev, struct em_perf_domain *pd, 206 struct em_perf_state *table, int nr_states) 207 { 208 u64 fmax, max_cap; 209 int i, cpu; 210 211 /* This is needed only for CPUs and EAS skip other devices */ 212 if (!_is_cpu_device(dev)) 213 return; 214 215 cpu = cpumask_first(em_span_cpus(pd)); 216 217 /* 218 * Calculate the performance value for each frequency with 219 * linear relationship. The final CPU capacity might not be ready at 220 * boot time, but the EM will be updated a bit later with correct one. 221 */ 222 fmax = (u64) table[nr_states - 1].frequency; 223 max_cap = (u64) arch_scale_cpu_capacity(cpu); 224 for (i = 0; i < nr_states; i++) 225 table[i].performance = div64_u64(max_cap * table[i].frequency, 226 fmax); 227 } 228 229 static int em_compute_costs(struct device *dev, struct em_perf_state *table, 230 const struct em_data_callback *cb, int nr_states, 231 unsigned long flags) 232 { 233 unsigned long prev_cost = ULONG_MAX; 234 int i, ret; 235 236 /* This is needed only for CPUs and EAS skip other devices */ 237 if (!_is_cpu_device(dev)) 238 return 0; 239 240 /* Compute the cost of each performance state. */ 241 for (i = nr_states - 1; i >= 0; i--) { 242 unsigned long power_res, cost; 243 244 if ((flags & EM_PERF_DOMAIN_ARTIFICIAL) && cb->get_cost) { 245 ret = cb->get_cost(dev, table[i].frequency, &cost); 246 if (ret || !cost || cost > EM_MAX_POWER) { 247 dev_err(dev, "EM: invalid cost %lu %d\n", 248 cost, ret); 249 return -EINVAL; 250 } 251 } else { 252 /* increase resolution of 'cost' precision */ 253 power_res = table[i].power * 10; 254 cost = power_res / table[i].performance; 255 } 256 257 table[i].cost = cost; 258 259 if (table[i].cost >= prev_cost) { 260 table[i].flags = EM_PERF_STATE_INEFFICIENT; 261 dev_dbg(dev, "EM: OPP:%lu is inefficient\n", 262 table[i].frequency); 263 } else { 264 prev_cost = table[i].cost; 265 } 266 } 267 268 return 0; 269 } 270 271 /** 272 * em_dev_compute_costs() - Calculate cost values for new runtime EM table 273 * @dev : Device for which the EM table is to be updated 274 * @table : The new EM table that is going to get the costs calculated 275 * @nr_states : Number of performance states 276 * 277 * Calculate the em_perf_state::cost values for new runtime EM table. The 278 * values are used for EAS during task placement. It also calculates and sets 279 * the efficiency flag for each performance state. When the function finish 280 * successfully the EM table is ready to be updated and used by EAS. 281 * 282 * Return 0 on success or a proper error in case of failure. 283 */ 284 int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, 285 int nr_states) 286 { 287 return em_compute_costs(dev, table, NULL, nr_states, 0); 288 } 289 290 /** 291 * em_dev_update_perf_domain() - Update runtime EM table for a device 292 * @dev : Device for which the EM is to be updated 293 * @new_table : The new EM table that is going to be used from now 294 * 295 * Update EM runtime modifiable table for the @dev using the provided @table. 296 * 297 * This function uses a mutex to serialize writers, so it must not be called 298 * from a non-sleeping context. 299 * 300 * Return 0 on success or an error code on failure. 301 */ 302 int em_dev_update_perf_domain(struct device *dev, 303 struct em_perf_table *new_table) 304 { 305 struct em_perf_table *old_table; 306 struct em_perf_domain *pd; 307 308 if (!dev) 309 return -EINVAL; 310 311 /* Serialize update/unregister or concurrent updates */ 312 mutex_lock(&em_pd_mutex); 313 314 if (!dev->em_pd) { 315 mutex_unlock(&em_pd_mutex); 316 return -EINVAL; 317 } 318 pd = dev->em_pd; 319 320 kref_get(&new_table->kref); 321 322 old_table = rcu_dereference_protected(pd->em_table, 323 lockdep_is_held(&em_pd_mutex)); 324 rcu_assign_pointer(pd->em_table, new_table); 325 326 em_cpufreq_update_efficiencies(dev, new_table->state); 327 328 em_table_free(old_table); 329 330 mutex_unlock(&em_pd_mutex); 331 return 0; 332 } 333 EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); 334 335 static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, 336 struct em_perf_state *table, 337 const struct em_data_callback *cb, 338 unsigned long flags) 339 { 340 unsigned long power, freq, prev_freq = 0; 341 int nr_states = pd->nr_perf_states; 342 int i, ret; 343 344 /* Build the list of performance states for this performance domain */ 345 for (i = 0, freq = 0; i < nr_states; i++, freq++) { 346 /* 347 * active_power() is a driver callback which ceils 'freq' to 348 * lowest performance state of 'dev' above 'freq' and updates 349 * 'power' and 'freq' accordingly. 350 */ 351 ret = cb->active_power(dev, &power, &freq); 352 if (ret) { 353 dev_err(dev, "EM: invalid perf. state: %d\n", 354 ret); 355 return -EINVAL; 356 } 357 358 /* 359 * We expect the driver callback to increase the frequency for 360 * higher performance states. 361 */ 362 if (freq <= prev_freq) { 363 dev_err(dev, "EM: non-increasing freq: %lu\n", 364 freq); 365 return -EINVAL; 366 } 367 368 /* 369 * The power returned by active_state() is expected to be 370 * positive and be in range. 371 */ 372 if (!power || power > EM_MAX_POWER) { 373 dev_err(dev, "EM: invalid power: %lu\n", 374 power); 375 return -EINVAL; 376 } 377 378 table[i].power = power; 379 table[i].frequency = prev_freq = freq; 380 } 381 382 em_init_performance(dev, pd, table, nr_states); 383 384 ret = em_compute_costs(dev, table, cb, nr_states, flags); 385 if (ret) 386 return -EINVAL; 387 388 return 0; 389 } 390 391 static int em_create_pd(struct device *dev, int nr_states, 392 const struct em_data_callback *cb, 393 const cpumask_t *cpus, 394 unsigned long flags) 395 { 396 struct em_perf_table *em_table; 397 struct em_perf_domain *pd; 398 struct device *cpu_dev; 399 int cpu, ret, num_cpus; 400 401 if (_is_cpu_device(dev)) { 402 num_cpus = cpumask_weight(cpus); 403 404 /* Prevent max possible energy calculation to not overflow */ 405 if (num_cpus > EM_MAX_NUM_CPUS) { 406 dev_err(dev, "EM: too many CPUs, overflow possible\n"); 407 return -EINVAL; 408 } 409 410 pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); 411 if (!pd) 412 return -ENOMEM; 413 414 cpumask_copy(em_span_cpus(pd), cpus); 415 } else { 416 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 417 if (!pd) 418 return -ENOMEM; 419 } 420 421 pd->nr_perf_states = nr_states; 422 423 em_table = em_table_alloc(pd); 424 if (!em_table) 425 goto free_pd; 426 427 ret = em_create_perf_table(dev, pd, em_table->state, cb, flags); 428 if (ret) 429 goto free_pd_table; 430 431 rcu_assign_pointer(pd->em_table, em_table); 432 433 if (_is_cpu_device(dev)) 434 for_each_cpu(cpu, cpus) { 435 cpu_dev = get_cpu_device(cpu); 436 cpu_dev->em_pd = pd; 437 } 438 439 dev->em_pd = pd; 440 441 return 0; 442 443 free_pd_table: 444 kfree(em_table); 445 free_pd: 446 kfree(pd); 447 return -EINVAL; 448 } 449 450 static void 451 em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table) 452 { 453 struct em_perf_domain *pd = dev->em_pd; 454 struct cpufreq_policy *policy; 455 int found = 0; 456 int i, cpu; 457 458 if (!_is_cpu_device(dev)) 459 return; 460 461 /* Try to get a CPU which is active and in this PD */ 462 cpu = cpumask_first_and(em_span_cpus(pd), cpu_active_mask); 463 if (cpu >= nr_cpu_ids) { 464 dev_warn(dev, "EM: No online CPU for CPUFreq policy\n"); 465 return; 466 } 467 468 policy = cpufreq_cpu_get(cpu); 469 if (!policy) { 470 dev_warn(dev, "EM: Access to CPUFreq policy failed\n"); 471 return; 472 } 473 474 for (i = 0; i < pd->nr_perf_states; i++) { 475 if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) 476 continue; 477 478 if (!cpufreq_table_set_inefficient(policy, table[i].frequency)) 479 found++; 480 } 481 482 cpufreq_cpu_put(policy); 483 484 if (!found) 485 return; 486 487 /* 488 * Efficiencies have been installed in CPUFreq, inefficient frequencies 489 * will be skipped. The EM can do the same. 490 */ 491 pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES; 492 } 493 494 /** 495 * em_pd_get() - Return the performance domain for a device 496 * @dev : Device to find the performance domain for 497 * 498 * Returns the performance domain to which @dev belongs, or NULL if it doesn't 499 * exist. 500 */ 501 struct em_perf_domain *em_pd_get(struct device *dev) 502 { 503 if (IS_ERR_OR_NULL(dev)) 504 return NULL; 505 506 return dev->em_pd; 507 } 508 EXPORT_SYMBOL_GPL(em_pd_get); 509 510 /** 511 * em_cpu_get() - Return the performance domain for a CPU 512 * @cpu : CPU to find the performance domain for 513 * 514 * Returns the performance domain to which @cpu belongs, or NULL if it doesn't 515 * exist. 516 */ 517 struct em_perf_domain *em_cpu_get(int cpu) 518 { 519 struct device *cpu_dev; 520 521 cpu_dev = get_cpu_device(cpu); 522 if (!cpu_dev) 523 return NULL; 524 525 return em_pd_get(cpu_dev); 526 } 527 EXPORT_SYMBOL_GPL(em_cpu_get); 528 529 /** 530 * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device 531 * @dev : Device for which the EM is to register 532 * @nr_states : Number of performance states to register 533 * @cb : Callback functions providing the data of the Energy Model 534 * @cpus : Pointer to cpumask_t, which in case of a CPU device is 535 * obligatory. It can be taken from i.e. 'policy->cpus'. For other 536 * type of devices this should be set to NULL. 537 * @microwatts : Flag indicating that the power values are in micro-Watts or 538 * in some other scale. It must be set properly. 539 * 540 * Create Energy Model tables for a performance domain using the callbacks 541 * defined in cb. 542 * 543 * The @microwatts is important to set with correct value. Some kernel 544 * sub-systems might rely on this flag and check if all devices in the EM are 545 * using the same scale. 546 * 547 * If multiple clients register the same performance domain, all but the first 548 * registration will be ignored. 549 * 550 * Return 0 on success 551 */ 552 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, 553 const struct em_data_callback *cb, 554 const cpumask_t *cpus, bool microwatts) 555 { 556 struct em_perf_table *em_table; 557 unsigned long cap, prev_cap = 0; 558 unsigned long flags = 0; 559 int cpu, ret; 560 561 if (!dev || !nr_states || !cb) 562 return -EINVAL; 563 564 /* 565 * Use a mutex to serialize the registration of performance domains and 566 * let the driver-defined callback functions sleep. 567 */ 568 mutex_lock(&em_pd_mutex); 569 570 if (dev->em_pd) { 571 ret = -EEXIST; 572 goto unlock; 573 } 574 575 if (_is_cpu_device(dev)) { 576 if (!cpus) { 577 dev_err(dev, "EM: invalid CPU mask\n"); 578 ret = -EINVAL; 579 goto unlock; 580 } 581 582 for_each_cpu(cpu, cpus) { 583 if (em_cpu_get(cpu)) { 584 dev_err(dev, "EM: exists for CPU%d\n", cpu); 585 ret = -EEXIST; 586 goto unlock; 587 } 588 /* 589 * All CPUs of a domain must have the same 590 * micro-architecture since they all share the same 591 * table. 592 */ 593 cap = arch_scale_cpu_capacity(cpu); 594 if (prev_cap && prev_cap != cap) { 595 dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n", 596 cpumask_pr_args(cpus)); 597 598 ret = -EINVAL; 599 goto unlock; 600 } 601 prev_cap = cap; 602 } 603 } 604 605 if (microwatts) 606 flags |= EM_PERF_DOMAIN_MICROWATTS; 607 else if (cb->get_cost) 608 flags |= EM_PERF_DOMAIN_ARTIFICIAL; 609 610 /* 611 * EM only supports uW (exception is artificial EM). 612 * Therefore, check and force the drivers to provide 613 * power in uW. 614 */ 615 if (!microwatts && !(flags & EM_PERF_DOMAIN_ARTIFICIAL)) { 616 dev_err(dev, "EM: only supports uW power values\n"); 617 ret = -EINVAL; 618 goto unlock; 619 } 620 621 ret = em_create_pd(dev, nr_states, cb, cpus, flags); 622 if (ret) 623 goto unlock; 624 625 dev->em_pd->flags |= flags; 626 dev->em_pd->min_perf_state = 0; 627 dev->em_pd->max_perf_state = nr_states - 1; 628 629 em_table = rcu_dereference_protected(dev->em_pd->em_table, 630 lockdep_is_held(&em_pd_mutex)); 631 em_cpufreq_update_efficiencies(dev, em_table->state); 632 633 em_debug_create_pd(dev); 634 dev_info(dev, "EM: created perf domain\n"); 635 636 unlock: 637 mutex_unlock(&em_pd_mutex); 638 639 if (_is_cpu_device(dev)) 640 em_check_capacity_update(); 641 642 return ret; 643 } 644 EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); 645 646 /** 647 * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device 648 * @dev : Device for which the EM is registered 649 * 650 * Unregister the EM for the specified @dev (but not a CPU device). 651 */ 652 void em_dev_unregister_perf_domain(struct device *dev) 653 { 654 if (IS_ERR_OR_NULL(dev) || !dev->em_pd) 655 return; 656 657 if (_is_cpu_device(dev)) 658 return; 659 660 /* 661 * The mutex separates all register/unregister requests and protects 662 * from potential clean-up/setup issues in the debugfs directories. 663 * The debugfs directory name is the same as device's name. 664 */ 665 mutex_lock(&em_pd_mutex); 666 em_debug_remove_pd(dev); 667 668 em_table_free(rcu_dereference_protected(dev->em_pd->em_table, 669 lockdep_is_held(&em_pd_mutex))); 670 671 kfree(dev->em_pd); 672 dev->em_pd = NULL; 673 mutex_unlock(&em_pd_mutex); 674 } 675 EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); 676 677 static struct em_perf_table *em_table_dup(struct em_perf_domain *pd) 678 { 679 struct em_perf_table *em_table; 680 struct em_perf_state *ps, *new_ps; 681 int ps_size; 682 683 em_table = em_table_alloc(pd); 684 if (!em_table) 685 return NULL; 686 687 new_ps = em_table->state; 688 689 rcu_read_lock(); 690 ps = em_perf_state_from_pd(pd); 691 /* Initialize data based on old table */ 692 ps_size = sizeof(struct em_perf_state) * pd->nr_perf_states; 693 memcpy(new_ps, ps, ps_size); 694 695 rcu_read_unlock(); 696 697 return em_table; 698 } 699 700 static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd, 701 struct em_perf_table *em_table) 702 { 703 int ret; 704 705 if (!em_is_artificial(pd)) { 706 ret = em_compute_costs(dev, em_table->state, NULL, 707 pd->nr_perf_states, pd->flags); 708 if (ret) 709 goto free_em_table; 710 } 711 712 ret = em_dev_update_perf_domain(dev, em_table); 713 if (ret) 714 goto free_em_table; 715 716 /* 717 * This is one-time-update, so give up the ownership in this updater. 718 * The EM framework has incremented the usage counter and from now 719 * will keep the reference (then free the memory when needed). 720 */ 721 free_em_table: 722 em_table_free(em_table); 723 return ret; 724 } 725 726 /* 727 * Adjustment of CPU performance values after boot, when all CPUs capacites 728 * are correctly calculated. 729 */ 730 static void em_adjust_new_capacity(unsigned int cpu, struct device *dev, 731 struct em_perf_domain *pd) 732 { 733 unsigned long cpu_capacity = arch_scale_cpu_capacity(cpu); 734 struct em_perf_table *em_table; 735 struct em_perf_state *table; 736 unsigned long em_max_perf; 737 738 rcu_read_lock(); 739 table = em_perf_state_from_pd(pd); 740 em_max_perf = table[pd->nr_perf_states - 1].performance; 741 rcu_read_unlock(); 742 743 if (em_max_perf == cpu_capacity) 744 return; 745 746 pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n", cpu, 747 cpu_capacity, em_max_perf); 748 749 em_table = em_table_dup(pd); 750 if (!em_table) { 751 dev_warn(dev, "EM: allocation failed\n"); 752 return; 753 } 754 755 em_init_performance(dev, pd, em_table->state, pd->nr_perf_states); 756 757 em_recalc_and_update(dev, pd, em_table); 758 } 759 760 /** 761 * em_adjust_cpu_capacity() - Adjust the EM for a CPU after a capacity update. 762 * @cpu: Target CPU. 763 * 764 * Adjust the existing EM for @cpu after a capacity update under the assumption 765 * that the capacity has been updated in the same way for all of the CPUs in 766 * the same perf domain. 767 */ 768 void em_adjust_cpu_capacity(unsigned int cpu) 769 { 770 struct device *dev = get_cpu_device(cpu); 771 struct em_perf_domain *pd; 772 773 pd = em_pd_get(dev); 774 if (pd) 775 em_adjust_new_capacity(cpu, dev, pd); 776 } 777 778 static void em_check_capacity_update(void) 779 { 780 cpumask_var_t cpu_done_mask; 781 int cpu; 782 783 if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) { 784 pr_warn("no free memory\n"); 785 return; 786 } 787 788 /* Check if CPUs capacity has changed than update EM */ 789 for_each_possible_cpu(cpu) { 790 struct cpufreq_policy *policy; 791 struct em_perf_domain *pd; 792 struct device *dev; 793 794 if (cpumask_test_cpu(cpu, cpu_done_mask)) 795 continue; 796 797 policy = cpufreq_cpu_get(cpu); 798 if (!policy) { 799 pr_debug("Accessing cpu%d policy failed\n", cpu); 800 schedule_delayed_work(&em_update_work, 801 msecs_to_jiffies(1000)); 802 break; 803 } 804 cpufreq_cpu_put(policy); 805 806 dev = get_cpu_device(cpu); 807 pd = em_pd_get(dev); 808 if (!pd || em_is_artificial(pd)) 809 continue; 810 811 cpumask_or(cpu_done_mask, cpu_done_mask, 812 em_span_cpus(pd)); 813 814 em_adjust_new_capacity(cpu, dev, pd); 815 } 816 817 free_cpumask_var(cpu_done_mask); 818 } 819 820 static void em_update_workfn(struct work_struct *work) 821 { 822 em_check_capacity_update(); 823 } 824 825 /** 826 * em_dev_update_chip_binning() - Update Energy Model after the new voltage 827 * information is present in the OPPs. 828 * @dev : Device for which the Energy Model has to be updated. 829 * 830 * This function allows to update easily the EM with new values available in 831 * the OPP framework and DT. It can be used after the chip has been properly 832 * verified by device drivers and the voltages adjusted for the 'chip binning'. 833 */ 834 int em_dev_update_chip_binning(struct device *dev) 835 { 836 struct em_perf_table *em_table; 837 struct em_perf_domain *pd; 838 int i, ret; 839 840 if (IS_ERR_OR_NULL(dev)) 841 return -EINVAL; 842 843 pd = em_pd_get(dev); 844 if (!pd) { 845 dev_warn(dev, "Couldn't find Energy Model\n"); 846 return -EINVAL; 847 } 848 849 em_table = em_table_dup(pd); 850 if (!em_table) { 851 dev_warn(dev, "EM: allocation failed\n"); 852 return -ENOMEM; 853 } 854 855 /* Update power values which might change due to new voltage in OPPs */ 856 for (i = 0; i < pd->nr_perf_states; i++) { 857 unsigned long freq = em_table->state[i].frequency; 858 unsigned long power; 859 860 ret = dev_pm_opp_calc_power(dev, &power, &freq); 861 if (ret) { 862 em_table_free(em_table); 863 return ret; 864 } 865 866 em_table->state[i].power = power; 867 } 868 869 return em_recalc_and_update(dev, pd, em_table); 870 } 871 EXPORT_SYMBOL_GPL(em_dev_update_chip_binning); 872 873 874 /** 875 * em_update_performance_limits() - Update Energy Model with performance 876 * limits information. 877 * @pd : Performance Domain with EM that has to be updated. 878 * @freq_min_khz : New minimum allowed frequency for this device. 879 * @freq_max_khz : New maximum allowed frequency for this device. 880 * 881 * This function allows to update the EM with information about available 882 * performance levels. It takes the minimum and maximum frequency in kHz 883 * and does internal translation to performance levels. 884 * Returns 0 on success or -EINVAL when failed. 885 */ 886 int em_update_performance_limits(struct em_perf_domain *pd, 887 unsigned long freq_min_khz, unsigned long freq_max_khz) 888 { 889 struct em_perf_state *table; 890 int min_ps = -1; 891 int max_ps = -1; 892 int i; 893 894 if (!pd) 895 return -EINVAL; 896 897 rcu_read_lock(); 898 table = em_perf_state_from_pd(pd); 899 900 for (i = 0; i < pd->nr_perf_states; i++) { 901 if (freq_min_khz == table[i].frequency) 902 min_ps = i; 903 if (freq_max_khz == table[i].frequency) 904 max_ps = i; 905 } 906 rcu_read_unlock(); 907 908 /* Only update when both are found and sane */ 909 if (min_ps < 0 || max_ps < 0 || max_ps < min_ps) 910 return -EINVAL; 911 912 913 /* Guard simultaneous updates and make them atomic */ 914 mutex_lock(&em_pd_mutex); 915 pd->min_perf_state = min_ps; 916 pd->max_perf_state = max_ps; 917 mutex_unlock(&em_pd_mutex); 918 919 return 0; 920 } 921 EXPORT_SYMBOL_GPL(em_update_performance_limits); 922 923 static void rebuild_sd_workfn(struct work_struct *work) 924 { 925 rebuild_sched_domains_energy(); 926 } 927 928 void em_rebuild_sched_domains(void) 929 { 930 static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); 931 932 /* 933 * When called from the cpufreq_register_driver() path, the 934 * cpu_hotplug_lock is already held, so use a work item to 935 * avoid nested locking in rebuild_sched_domains(). 936 */ 937 schedule_work(&rebuild_sd_work); 938 } 939