1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * intel_idle.c - native hardware idle loop for modern Intel processors
4 *
5 * Copyright (c) 2013 - 2020, Intel Corporation.
6 * Len Brown <len.brown@intel.com>
7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8 */
9
10 /*
11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12 * in lieu of the legacy ACPI processor_idle driver. The intent is to
13 * make Linux more efficient on these processors, as intel_idle knows
14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15 */
16
17 /*
18 * Design Assumptions
19 *
20 * All CPUs have same idle states as boot CPU
21 *
22 * Chipset BM_STS (bus master status) bit is a NOP
23 * for preventing entry into deep C-states
24 *
25 * CPU will flush caches as needed when entering a C-state via MWAIT
26 * (in contrast to entering ACPI C3, in which case the WBINVD
27 * instruction needs to be executed to flush the caches)
28 */
29
30 /*
31 * Known limitations
32 *
33 * ACPI has a .suspend hack to turn off deep c-statees during suspend
34 * to avoid complications with the lapic timer workaround.
35 * Have not seen issues with suspend, but may need same workaround here.
36 *
37 */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 /* #define DEBUG */
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <linux/time64.h>
49 #include <trace/events/power.h>
50 #include <linux/sched.h>
51 #include <linux/sched/smt.h>
52 #include <linux/mutex.h>
53 #include <linux/notifier.h>
54 #include <linux/cpu.h>
55 #include <linux/moduleparam.h>
56 #include <linux/sysfs.h>
57 #include <asm/cpuid/api.h>
58 #include <asm/cpu_device_id.h>
59 #include <asm/intel-family.h>
60 #include <asm/mwait.h>
61 #include <asm/spec-ctrl.h>
62 #include <asm/msr.h>
63 #include <asm/tsc.h>
64 #include <asm/fpu/api.h>
65 #include <asm/smp.h>
66
67 static struct cpuidle_driver intel_idle_driver = {
68 .name = "intel_idle",
69 .owner = THIS_MODULE,
70 };
71 /* intel_idle.max_cstate=0 disables driver */
72 static int max_cstate = CPUIDLE_STATE_MAX - 1;
73 static unsigned int disabled_states_mask __read_mostly;
74 static bool force_irq_on __read_mostly;
75 static bool ibrs_off __read_mostly;
76
77 /* The maximum allowed length for the 'table' module parameter */
78 #define MAX_CMDLINE_TABLE_LEN 256
79 /* Maximum allowed C-state latency */
80 #define MAX_CMDLINE_LATENCY_US (5 * USEC_PER_MSEC)
81 /* Maximum allowed C-state target residency */
82 #define MAX_CMDLINE_RESIDENCY_US (100 * USEC_PER_MSEC)
83
84 static char cmdline_table_str[MAX_CMDLINE_TABLE_LEN] __read_mostly;
85
86 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
87
88 static unsigned long auto_demotion_disable_flags;
89
90 static enum {
91 C1E_PROMOTION_PRESERVE,
92 C1E_PROMOTION_ENABLE,
93 C1E_PROMOTION_DISABLE
94 } c1e_promotion = C1E_PROMOTION_PRESERVE;
95
96 struct idle_cpu {
97 struct cpuidle_state *state_table;
98
99 /*
100 * Hardware C-state auto-demotion may not always be optimal.
101 * Indicate which enable bits to clear here.
102 */
103 unsigned long auto_demotion_disable_flags;
104 bool disable_promotion_to_c1e;
105 bool c1_demotion_supported;
106 bool use_acpi;
107 };
108
109 static bool c1_demotion_supported;
110 static DEFINE_MUTEX(c1_demotion_mutex);
111
112 static struct device *sysfs_root __initdata;
113
114 static const struct idle_cpu *icpu __initdata;
115 static struct cpuidle_state *cpuidle_state_table __initdata;
116
117 /* C-states data from the 'intel_idle.table' cmdline parameter */
118 static struct cpuidle_state cmdline_states[CPUIDLE_STATE_MAX] __initdata;
119
120 static unsigned int mwait_substates __initdata;
121
122 /*
123 * Enable interrupts before entering the C-state. On some platforms and for
124 * some C-states, this may measurably decrease interrupt latency.
125 */
126 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14)
127
128 /*
129 * Enable this state by default even if the ACPI _CST does not list it.
130 */
131 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
132
133 /*
134 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
135 * above.
136 */
137 #define CPUIDLE_FLAG_IBRS BIT(16)
138
139 /*
140 * Initialize large xstate for the C6-state entrance.
141 */
142 #define CPUIDLE_FLAG_INIT_XSTATE BIT(17)
143
144 /*
145 * Ignore the sub-state when matching mwait hints between the ACPI _CST and
146 * custom tables.
147 */
148 #define CPUIDLE_FLAG_PARTIAL_HINT_MATCH BIT(18)
149
150 /*
151 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
152 * the C-state (top nibble) and sub-state (bottom nibble)
153 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
154 *
155 * We store the hint at the top of our "flags" for each state.
156 */
157 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
158 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
159
__intel_idle(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index,bool irqoff)160 static __always_inline int __intel_idle(struct cpuidle_device *dev,
161 struct cpuidle_driver *drv,
162 int index, bool irqoff)
163 {
164 struct cpuidle_state *state = &drv->states[index];
165 unsigned int eax = flg2MWAIT(state->flags);
166 unsigned int ecx = 1*irqoff; /* break on interrupt flag */
167
168 mwait_idle_with_hints(eax, ecx);
169
170 return index;
171 }
172
173 /**
174 * intel_idle - Ask the processor to enter the given idle state.
175 * @dev: cpuidle device of the target CPU.
176 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
177 * @index: Target idle state index.
178 *
179 * Use the MWAIT instruction to notify the processor that the CPU represented by
180 * @dev is idle and it can try to enter the idle state corresponding to @index.
181 *
182 * If the local APIC timer is not known to be reliable in the target idle state,
183 * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
184 *
185 * Must be called under local_irq_disable().
186 */
intel_idle(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)187 static __cpuidle int intel_idle(struct cpuidle_device *dev,
188 struct cpuidle_driver *drv, int index)
189 {
190 return __intel_idle(dev, drv, index, true);
191 }
192
intel_idle_irq(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)193 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
194 struct cpuidle_driver *drv, int index)
195 {
196 return __intel_idle(dev, drv, index, false);
197 }
198
intel_idle_ibrs(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)199 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
200 struct cpuidle_driver *drv, int index)
201 {
202 bool smt_active = sched_smt_active();
203 u64 spec_ctrl = spec_ctrl_current();
204 int ret;
205
206 if (smt_active)
207 __update_spec_ctrl(0);
208
209 ret = __intel_idle(dev, drv, index, true);
210
211 if (smt_active)
212 __update_spec_ctrl(spec_ctrl);
213
214 return ret;
215 }
216
intel_idle_xstate(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)217 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
218 struct cpuidle_driver *drv, int index)
219 {
220 fpu_idle_fpregs();
221 return __intel_idle(dev, drv, index, true);
222 }
223
224 /**
225 * intel_idle_s2idle - Ask the processor to enter the given idle state.
226 * @dev: cpuidle device of the target CPU.
227 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
228 * @index: Target idle state index.
229 *
230 * Use the MWAIT instruction to notify the processor that the CPU represented by
231 * @dev is idle and it can try to enter the idle state corresponding to @index.
232 *
233 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
234 * scheduler tick and suspended scheduler clock on the target CPU.
235 */
intel_idle_s2idle(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)236 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
237 struct cpuidle_driver *drv, int index)
238 {
239 struct cpuidle_state *state = &drv->states[index];
240 unsigned int eax = flg2MWAIT(state->flags);
241 unsigned int ecx = 1; /* break on interrupt flag */
242
243 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
244 fpu_idle_fpregs();
245
246 mwait_idle_with_hints(eax, ecx);
247
248 return 0;
249 }
250
intel_idle_enter_dead(struct cpuidle_device * dev,int index)251 static void intel_idle_enter_dead(struct cpuidle_device *dev, int index)
252 {
253 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
254 struct cpuidle_state *state = &drv->states[index];
255 unsigned long eax = flg2MWAIT(state->flags);
256
257 mwait_play_dead(eax);
258 }
259
260 /*
261 * States are indexed by the cstate number,
262 * which is also the index into the MWAIT hint array.
263 * Thus C0 is a dummy.
264 */
265 static struct cpuidle_state nehalem_cstates[] __initdata = {
266 {
267 .name = "C1",
268 .desc = "MWAIT 0x00",
269 .flags = MWAIT2flg(0x00),
270 .exit_latency = 3,
271 .target_residency = 6,
272 .enter = intel_idle,
273 .enter_s2idle = intel_idle_s2idle, },
274 {
275 .name = "C1E",
276 .desc = "MWAIT 0x01",
277 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
278 .exit_latency = 10,
279 .target_residency = 20,
280 .enter = intel_idle,
281 .enter_s2idle = intel_idle_s2idle, },
282 {
283 .name = "C3",
284 .desc = "MWAIT 0x10",
285 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
286 .exit_latency = 20,
287 .target_residency = 80,
288 .enter = intel_idle,
289 .enter_s2idle = intel_idle_s2idle, },
290 {
291 .name = "C6",
292 .desc = "MWAIT 0x20",
293 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
294 .exit_latency = 200,
295 .target_residency = 800,
296 .enter = intel_idle,
297 .enter_s2idle = intel_idle_s2idle, },
298 {
299 .enter = NULL }
300 };
301
302 static struct cpuidle_state snb_cstates[] __initdata = {
303 {
304 .name = "C1",
305 .desc = "MWAIT 0x00",
306 .flags = MWAIT2flg(0x00),
307 .exit_latency = 2,
308 .target_residency = 2,
309 .enter = intel_idle,
310 .enter_s2idle = intel_idle_s2idle, },
311 {
312 .name = "C1E",
313 .desc = "MWAIT 0x01",
314 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
315 .exit_latency = 10,
316 .target_residency = 20,
317 .enter = intel_idle,
318 .enter_s2idle = intel_idle_s2idle, },
319 {
320 .name = "C3",
321 .desc = "MWAIT 0x10",
322 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
323 .exit_latency = 80,
324 .target_residency = 211,
325 .enter = intel_idle,
326 .enter_s2idle = intel_idle_s2idle, },
327 {
328 .name = "C6",
329 .desc = "MWAIT 0x20",
330 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
331 .exit_latency = 104,
332 .target_residency = 345,
333 .enter = intel_idle,
334 .enter_s2idle = intel_idle_s2idle, },
335 {
336 .name = "C7",
337 .desc = "MWAIT 0x30",
338 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
339 .exit_latency = 109,
340 .target_residency = 345,
341 .enter = intel_idle,
342 .enter_s2idle = intel_idle_s2idle, },
343 {
344 .enter = NULL }
345 };
346
347 static struct cpuidle_state byt_cstates[] __initdata = {
348 {
349 .name = "C1",
350 .desc = "MWAIT 0x00",
351 .flags = MWAIT2flg(0x00),
352 .exit_latency = 1,
353 .target_residency = 1,
354 .enter = intel_idle,
355 .enter_s2idle = intel_idle_s2idle, },
356 {
357 .name = "C6N",
358 .desc = "MWAIT 0x58",
359 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
360 .exit_latency = 300,
361 .target_residency = 275,
362 .enter = intel_idle,
363 .enter_s2idle = intel_idle_s2idle, },
364 {
365 .name = "C6S",
366 .desc = "MWAIT 0x52",
367 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
368 .exit_latency = 500,
369 .target_residency = 560,
370 .enter = intel_idle,
371 .enter_s2idle = intel_idle_s2idle, },
372 {
373 .name = "C7",
374 .desc = "MWAIT 0x60",
375 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
376 .exit_latency = 1200,
377 .target_residency = 4000,
378 .enter = intel_idle,
379 .enter_s2idle = intel_idle_s2idle, },
380 {
381 .name = "C7S",
382 .desc = "MWAIT 0x64",
383 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
384 .exit_latency = 10000,
385 .target_residency = 20000,
386 .enter = intel_idle,
387 .enter_s2idle = intel_idle_s2idle, },
388 {
389 .enter = NULL }
390 };
391
392 static struct cpuidle_state cht_cstates[] __initdata = {
393 {
394 .name = "C1",
395 .desc = "MWAIT 0x00",
396 .flags = MWAIT2flg(0x00),
397 .exit_latency = 1,
398 .target_residency = 1,
399 .enter = intel_idle,
400 .enter_s2idle = intel_idle_s2idle, },
401 {
402 .name = "C6N",
403 .desc = "MWAIT 0x58",
404 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
405 .exit_latency = 80,
406 .target_residency = 275,
407 .enter = intel_idle,
408 .enter_s2idle = intel_idle_s2idle, },
409 {
410 .name = "C6S",
411 .desc = "MWAIT 0x52",
412 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
413 .exit_latency = 200,
414 .target_residency = 560,
415 .enter = intel_idle,
416 .enter_s2idle = intel_idle_s2idle, },
417 {
418 .name = "C7",
419 .desc = "MWAIT 0x60",
420 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
421 .exit_latency = 1200,
422 .target_residency = 4000,
423 .enter = intel_idle,
424 .enter_s2idle = intel_idle_s2idle, },
425 {
426 .name = "C7S",
427 .desc = "MWAIT 0x64",
428 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
429 .exit_latency = 10000,
430 .target_residency = 20000,
431 .enter = intel_idle,
432 .enter_s2idle = intel_idle_s2idle, },
433 {
434 .enter = NULL }
435 };
436
437 static struct cpuidle_state ivb_cstates[] __initdata = {
438 {
439 .name = "C1",
440 .desc = "MWAIT 0x00",
441 .flags = MWAIT2flg(0x00),
442 .exit_latency = 1,
443 .target_residency = 1,
444 .enter = intel_idle,
445 .enter_s2idle = intel_idle_s2idle, },
446 {
447 .name = "C1E",
448 .desc = "MWAIT 0x01",
449 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
450 .exit_latency = 10,
451 .target_residency = 20,
452 .enter = intel_idle,
453 .enter_s2idle = intel_idle_s2idle, },
454 {
455 .name = "C3",
456 .desc = "MWAIT 0x10",
457 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
458 .exit_latency = 59,
459 .target_residency = 156,
460 .enter = intel_idle,
461 .enter_s2idle = intel_idle_s2idle, },
462 {
463 .name = "C6",
464 .desc = "MWAIT 0x20",
465 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
466 .exit_latency = 80,
467 .target_residency = 300,
468 .enter = intel_idle,
469 .enter_s2idle = intel_idle_s2idle, },
470 {
471 .name = "C7",
472 .desc = "MWAIT 0x30",
473 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
474 .exit_latency = 87,
475 .target_residency = 300,
476 .enter = intel_idle,
477 .enter_s2idle = intel_idle_s2idle, },
478 {
479 .enter = NULL }
480 };
481
482 static struct cpuidle_state ivt_cstates[] __initdata = {
483 {
484 .name = "C1",
485 .desc = "MWAIT 0x00",
486 .flags = MWAIT2flg(0x00),
487 .exit_latency = 1,
488 .target_residency = 1,
489 .enter = intel_idle,
490 .enter_s2idle = intel_idle_s2idle, },
491 {
492 .name = "C1E",
493 .desc = "MWAIT 0x01",
494 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
495 .exit_latency = 10,
496 .target_residency = 80,
497 .enter = intel_idle,
498 .enter_s2idle = intel_idle_s2idle, },
499 {
500 .name = "C3",
501 .desc = "MWAIT 0x10",
502 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
503 .exit_latency = 59,
504 .target_residency = 156,
505 .enter = intel_idle,
506 .enter_s2idle = intel_idle_s2idle, },
507 {
508 .name = "C6",
509 .desc = "MWAIT 0x20",
510 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
511 .exit_latency = 82,
512 .target_residency = 300,
513 .enter = intel_idle,
514 .enter_s2idle = intel_idle_s2idle, },
515 {
516 .enter = NULL }
517 };
518
519 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
520 {
521 .name = "C1",
522 .desc = "MWAIT 0x00",
523 .flags = MWAIT2flg(0x00),
524 .exit_latency = 1,
525 .target_residency = 1,
526 .enter = intel_idle,
527 .enter_s2idle = intel_idle_s2idle, },
528 {
529 .name = "C1E",
530 .desc = "MWAIT 0x01",
531 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
532 .exit_latency = 10,
533 .target_residency = 250,
534 .enter = intel_idle,
535 .enter_s2idle = intel_idle_s2idle, },
536 {
537 .name = "C3",
538 .desc = "MWAIT 0x10",
539 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
540 .exit_latency = 59,
541 .target_residency = 300,
542 .enter = intel_idle,
543 .enter_s2idle = intel_idle_s2idle, },
544 {
545 .name = "C6",
546 .desc = "MWAIT 0x20",
547 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
548 .exit_latency = 84,
549 .target_residency = 400,
550 .enter = intel_idle,
551 .enter_s2idle = intel_idle_s2idle, },
552 {
553 .enter = NULL }
554 };
555
556 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
557 {
558 .name = "C1",
559 .desc = "MWAIT 0x00",
560 .flags = MWAIT2flg(0x00),
561 .exit_latency = 1,
562 .target_residency = 1,
563 .enter = intel_idle,
564 .enter_s2idle = intel_idle_s2idle, },
565 {
566 .name = "C1E",
567 .desc = "MWAIT 0x01",
568 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
569 .exit_latency = 10,
570 .target_residency = 500,
571 .enter = intel_idle,
572 .enter_s2idle = intel_idle_s2idle, },
573 {
574 .name = "C3",
575 .desc = "MWAIT 0x10",
576 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
577 .exit_latency = 59,
578 .target_residency = 600,
579 .enter = intel_idle,
580 .enter_s2idle = intel_idle_s2idle, },
581 {
582 .name = "C6",
583 .desc = "MWAIT 0x20",
584 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
585 .exit_latency = 88,
586 .target_residency = 700,
587 .enter = intel_idle,
588 .enter_s2idle = intel_idle_s2idle, },
589 {
590 .enter = NULL }
591 };
592
593 static struct cpuidle_state hsw_cstates[] __initdata = {
594 {
595 .name = "C1",
596 .desc = "MWAIT 0x00",
597 .flags = MWAIT2flg(0x00),
598 .exit_latency = 2,
599 .target_residency = 2,
600 .enter = intel_idle,
601 .enter_s2idle = intel_idle_s2idle, },
602 {
603 .name = "C1E",
604 .desc = "MWAIT 0x01",
605 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
606 .exit_latency = 10,
607 .target_residency = 20,
608 .enter = intel_idle,
609 .enter_s2idle = intel_idle_s2idle, },
610 {
611 .name = "C3",
612 .desc = "MWAIT 0x10",
613 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
614 .exit_latency = 33,
615 .target_residency = 100,
616 .enter = intel_idle,
617 .enter_s2idle = intel_idle_s2idle, },
618 {
619 .name = "C6",
620 .desc = "MWAIT 0x20",
621 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
622 .exit_latency = 133,
623 .target_residency = 400,
624 .enter = intel_idle,
625 .enter_s2idle = intel_idle_s2idle, },
626 {
627 .name = "C7s",
628 .desc = "MWAIT 0x32",
629 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
630 .exit_latency = 166,
631 .target_residency = 500,
632 .enter = intel_idle,
633 .enter_s2idle = intel_idle_s2idle, },
634 {
635 .name = "C8",
636 .desc = "MWAIT 0x40",
637 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
638 .exit_latency = 300,
639 .target_residency = 900,
640 .enter = intel_idle,
641 .enter_s2idle = intel_idle_s2idle, },
642 {
643 .name = "C9",
644 .desc = "MWAIT 0x50",
645 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
646 .exit_latency = 600,
647 .target_residency = 1800,
648 .enter = intel_idle,
649 .enter_s2idle = intel_idle_s2idle, },
650 {
651 .name = "C10",
652 .desc = "MWAIT 0x60",
653 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
654 .exit_latency = 2600,
655 .target_residency = 7700,
656 .enter = intel_idle,
657 .enter_s2idle = intel_idle_s2idle, },
658 {
659 .enter = NULL }
660 };
661 static struct cpuidle_state bdw_cstates[] __initdata = {
662 {
663 .name = "C1",
664 .desc = "MWAIT 0x00",
665 .flags = MWAIT2flg(0x00),
666 .exit_latency = 2,
667 .target_residency = 2,
668 .enter = intel_idle,
669 .enter_s2idle = intel_idle_s2idle, },
670 {
671 .name = "C1E",
672 .desc = "MWAIT 0x01",
673 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
674 .exit_latency = 10,
675 .target_residency = 20,
676 .enter = intel_idle,
677 .enter_s2idle = intel_idle_s2idle, },
678 {
679 .name = "C3",
680 .desc = "MWAIT 0x10",
681 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
682 .exit_latency = 40,
683 .target_residency = 100,
684 .enter = intel_idle,
685 .enter_s2idle = intel_idle_s2idle, },
686 {
687 .name = "C6",
688 .desc = "MWAIT 0x20",
689 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
690 .exit_latency = 133,
691 .target_residency = 400,
692 .enter = intel_idle,
693 .enter_s2idle = intel_idle_s2idle, },
694 {
695 .name = "C7s",
696 .desc = "MWAIT 0x32",
697 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
698 .exit_latency = 166,
699 .target_residency = 500,
700 .enter = intel_idle,
701 .enter_s2idle = intel_idle_s2idle, },
702 {
703 .name = "C8",
704 .desc = "MWAIT 0x40",
705 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
706 .exit_latency = 300,
707 .target_residency = 900,
708 .enter = intel_idle,
709 .enter_s2idle = intel_idle_s2idle, },
710 {
711 .name = "C9",
712 .desc = "MWAIT 0x50",
713 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
714 .exit_latency = 600,
715 .target_residency = 1800,
716 .enter = intel_idle,
717 .enter_s2idle = intel_idle_s2idle, },
718 {
719 .name = "C10",
720 .desc = "MWAIT 0x60",
721 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
722 .exit_latency = 2600,
723 .target_residency = 7700,
724 .enter = intel_idle,
725 .enter_s2idle = intel_idle_s2idle, },
726 {
727 .enter = NULL }
728 };
729
730 static struct cpuidle_state skl_cstates[] __initdata = {
731 {
732 .name = "C1",
733 .desc = "MWAIT 0x00",
734 .flags = MWAIT2flg(0x00),
735 .exit_latency = 2,
736 .target_residency = 2,
737 .enter = intel_idle,
738 .enter_s2idle = intel_idle_s2idle, },
739 {
740 .name = "C1E",
741 .desc = "MWAIT 0x01",
742 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
743 .exit_latency = 10,
744 .target_residency = 20,
745 .enter = intel_idle,
746 .enter_s2idle = intel_idle_s2idle, },
747 {
748 .name = "C3",
749 .desc = "MWAIT 0x10",
750 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
751 .exit_latency = 70,
752 .target_residency = 100,
753 .enter = intel_idle,
754 .enter_s2idle = intel_idle_s2idle, },
755 {
756 .name = "C6",
757 .desc = "MWAIT 0x20",
758 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
759 .exit_latency = 85,
760 .target_residency = 200,
761 .enter = intel_idle,
762 .enter_s2idle = intel_idle_s2idle, },
763 {
764 .name = "C7s",
765 .desc = "MWAIT 0x33",
766 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
767 .exit_latency = 124,
768 .target_residency = 800,
769 .enter = intel_idle,
770 .enter_s2idle = intel_idle_s2idle, },
771 {
772 .name = "C8",
773 .desc = "MWAIT 0x40",
774 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
775 .exit_latency = 200,
776 .target_residency = 800,
777 .enter = intel_idle,
778 .enter_s2idle = intel_idle_s2idle, },
779 {
780 .name = "C9",
781 .desc = "MWAIT 0x50",
782 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
783 .exit_latency = 480,
784 .target_residency = 5000,
785 .enter = intel_idle,
786 .enter_s2idle = intel_idle_s2idle, },
787 {
788 .name = "C10",
789 .desc = "MWAIT 0x60",
790 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
791 .exit_latency = 890,
792 .target_residency = 5000,
793 .enter = intel_idle,
794 .enter_s2idle = intel_idle_s2idle, },
795 {
796 .enter = NULL }
797 };
798
799 static struct cpuidle_state skx_cstates[] __initdata = {
800 {
801 .name = "C1",
802 .desc = "MWAIT 0x00",
803 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
804 .exit_latency = 2,
805 .target_residency = 2,
806 .enter = intel_idle,
807 .enter_s2idle = intel_idle_s2idle, },
808 {
809 .name = "C1E",
810 .desc = "MWAIT 0x01",
811 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
812 .exit_latency = 10,
813 .target_residency = 20,
814 .enter = intel_idle,
815 .enter_s2idle = intel_idle_s2idle, },
816 {
817 .name = "C6",
818 .desc = "MWAIT 0x20",
819 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
820 .exit_latency = 133,
821 .target_residency = 600,
822 .enter = intel_idle,
823 .enter_s2idle = intel_idle_s2idle, },
824 {
825 .enter = NULL }
826 };
827
828 static struct cpuidle_state icx_cstates[] __initdata = {
829 {
830 .name = "C1",
831 .desc = "MWAIT 0x00",
832 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
833 .exit_latency = 1,
834 .target_residency = 1,
835 .enter = intel_idle,
836 .enter_s2idle = intel_idle_s2idle, },
837 {
838 .name = "C1E",
839 .desc = "MWAIT 0x01",
840 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
841 .exit_latency = 4,
842 .target_residency = 4,
843 .enter = intel_idle,
844 .enter_s2idle = intel_idle_s2idle, },
845 {
846 .name = "C6",
847 .desc = "MWAIT 0x20",
848 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
849 .exit_latency = 170,
850 .target_residency = 600,
851 .enter = intel_idle,
852 .enter_s2idle = intel_idle_s2idle, },
853 {
854 .enter = NULL }
855 };
856
857 /*
858 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
859 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
860 * But in this case there is effectively no C1, because C1 requests are
861 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
862 * and C1E requests end up with C1, so there is effectively no C1E.
863 *
864 * By default we enable C1E and disable C1 by marking it with
865 * 'CPUIDLE_FLAG_UNUSABLE'.
866 */
867 static struct cpuidle_state adl_cstates[] __initdata = {
868 {
869 .name = "C1",
870 .desc = "MWAIT 0x00",
871 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
872 .exit_latency = 1,
873 .target_residency = 1,
874 .enter = intel_idle,
875 .enter_s2idle = intel_idle_s2idle, },
876 {
877 .name = "C1E",
878 .desc = "MWAIT 0x01",
879 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
880 .exit_latency = 2,
881 .target_residency = 4,
882 .enter = intel_idle,
883 .enter_s2idle = intel_idle_s2idle, },
884 {
885 .name = "C6",
886 .desc = "MWAIT 0x20",
887 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
888 .exit_latency = 220,
889 .target_residency = 600,
890 .enter = intel_idle,
891 .enter_s2idle = intel_idle_s2idle, },
892 {
893 .name = "C8",
894 .desc = "MWAIT 0x40",
895 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
896 .exit_latency = 280,
897 .target_residency = 800,
898 .enter = intel_idle,
899 .enter_s2idle = intel_idle_s2idle, },
900 {
901 .name = "C10",
902 .desc = "MWAIT 0x60",
903 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
904 .exit_latency = 680,
905 .target_residency = 2000,
906 .enter = intel_idle,
907 .enter_s2idle = intel_idle_s2idle, },
908 {
909 .enter = NULL }
910 };
911
912 static struct cpuidle_state adl_l_cstates[] __initdata = {
913 {
914 .name = "C1",
915 .desc = "MWAIT 0x00",
916 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
917 .exit_latency = 1,
918 .target_residency = 1,
919 .enter = intel_idle,
920 .enter_s2idle = intel_idle_s2idle, },
921 {
922 .name = "C1E",
923 .desc = "MWAIT 0x01",
924 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
925 .exit_latency = 2,
926 .target_residency = 4,
927 .enter = intel_idle,
928 .enter_s2idle = intel_idle_s2idle, },
929 {
930 .name = "C6",
931 .desc = "MWAIT 0x20",
932 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
933 .exit_latency = 170,
934 .target_residency = 500,
935 .enter = intel_idle,
936 .enter_s2idle = intel_idle_s2idle, },
937 {
938 .name = "C8",
939 .desc = "MWAIT 0x40",
940 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
941 .exit_latency = 200,
942 .target_residency = 600,
943 .enter = intel_idle,
944 .enter_s2idle = intel_idle_s2idle, },
945 {
946 .name = "C10",
947 .desc = "MWAIT 0x60",
948 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
949 .exit_latency = 230,
950 .target_residency = 700,
951 .enter = intel_idle,
952 .enter_s2idle = intel_idle_s2idle, },
953 {
954 .enter = NULL }
955 };
956
957 static struct cpuidle_state mtl_l_cstates[] __initdata = {
958 {
959 .name = "C1E",
960 .desc = "MWAIT 0x01",
961 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
962 .exit_latency = 1,
963 .target_residency = 1,
964 .enter = intel_idle,
965 .enter_s2idle = intel_idle_s2idle, },
966 {
967 .name = "C6",
968 .desc = "MWAIT 0x20",
969 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
970 .exit_latency = 140,
971 .target_residency = 420,
972 .enter = intel_idle,
973 .enter_s2idle = intel_idle_s2idle, },
974 {
975 .name = "C10",
976 .desc = "MWAIT 0x60",
977 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
978 .exit_latency = 310,
979 .target_residency = 930,
980 .enter = intel_idle,
981 .enter_s2idle = intel_idle_s2idle, },
982 {
983 .enter = NULL }
984 };
985
986 static struct cpuidle_state ptl_cstates[] __initdata = {
987 {
988 .name = "C1",
989 .desc = "MWAIT 0x00",
990 .flags = MWAIT2flg(0x00),
991 .exit_latency = 1,
992 .target_residency = 1,
993 .enter = &intel_idle,
994 .enter_s2idle = intel_idle_s2idle, },
995 {
996 .name = "C1E",
997 .desc = "MWAIT 0x01",
998 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
999 .exit_latency = 10,
1000 .target_residency = 10,
1001 .enter = &intel_idle,
1002 .enter_s2idle = intel_idle_s2idle, },
1003 {
1004 .name = "C6S",
1005 .desc = "MWAIT 0x21",
1006 .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED,
1007 .exit_latency = 300,
1008 .target_residency = 300,
1009 .enter = &intel_idle,
1010 .enter_s2idle = intel_idle_s2idle, },
1011 {
1012 .name = "C10",
1013 .desc = "MWAIT 0x60",
1014 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1015 .exit_latency = 370,
1016 .target_residency = 2500,
1017 .enter = &intel_idle,
1018 .enter_s2idle = intel_idle_s2idle, },
1019 {
1020 .enter = NULL }
1021 };
1022
1023 static struct cpuidle_state gmt_cstates[] __initdata = {
1024 {
1025 .name = "C1",
1026 .desc = "MWAIT 0x00",
1027 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
1028 .exit_latency = 1,
1029 .target_residency = 1,
1030 .enter = intel_idle,
1031 .enter_s2idle = intel_idle_s2idle, },
1032 {
1033 .name = "C1E",
1034 .desc = "MWAIT 0x01",
1035 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1036 .exit_latency = 2,
1037 .target_residency = 4,
1038 .enter = intel_idle,
1039 .enter_s2idle = intel_idle_s2idle, },
1040 {
1041 .name = "C6",
1042 .desc = "MWAIT 0x20",
1043 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1044 .exit_latency = 195,
1045 .target_residency = 585,
1046 .enter = intel_idle,
1047 .enter_s2idle = intel_idle_s2idle, },
1048 {
1049 .name = "C8",
1050 .desc = "MWAIT 0x40",
1051 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1052 .exit_latency = 260,
1053 .target_residency = 1040,
1054 .enter = intel_idle,
1055 .enter_s2idle = intel_idle_s2idle, },
1056 {
1057 .name = "C10",
1058 .desc = "MWAIT 0x60",
1059 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1060 .exit_latency = 660,
1061 .target_residency = 1980,
1062 .enter = intel_idle,
1063 .enter_s2idle = intel_idle_s2idle, },
1064 {
1065 .enter = NULL }
1066 };
1067
1068 static struct cpuidle_state spr_cstates[] __initdata = {
1069 {
1070 .name = "C1",
1071 .desc = "MWAIT 0x00",
1072 .flags = MWAIT2flg(0x00),
1073 .exit_latency = 1,
1074 .target_residency = 1,
1075 .enter = intel_idle,
1076 .enter_s2idle = intel_idle_s2idle, },
1077 {
1078 .name = "C1E",
1079 .desc = "MWAIT 0x01",
1080 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1081 .exit_latency = 2,
1082 .target_residency = 4,
1083 .enter = intel_idle,
1084 .enter_s2idle = intel_idle_s2idle, },
1085 {
1086 .name = "C6",
1087 .desc = "MWAIT 0x20",
1088 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1089 CPUIDLE_FLAG_INIT_XSTATE,
1090 .exit_latency = 290,
1091 .target_residency = 800,
1092 .enter = intel_idle,
1093 .enter_s2idle = intel_idle_s2idle, },
1094 {
1095 .enter = NULL }
1096 };
1097
1098 static struct cpuidle_state gnr_cstates[] __initdata = {
1099 {
1100 .name = "C1",
1101 .desc = "MWAIT 0x00",
1102 .flags = MWAIT2flg(0x00),
1103 .exit_latency = 1,
1104 .target_residency = 1,
1105 .enter = intel_idle,
1106 .enter_s2idle = intel_idle_s2idle, },
1107 {
1108 .name = "C1E",
1109 .desc = "MWAIT 0x01",
1110 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1111 .exit_latency = 4,
1112 .target_residency = 4,
1113 .enter = intel_idle,
1114 .enter_s2idle = intel_idle_s2idle, },
1115 {
1116 .name = "C6",
1117 .desc = "MWAIT 0x20",
1118 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1119 CPUIDLE_FLAG_INIT_XSTATE |
1120 CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1121 .exit_latency = 170,
1122 .target_residency = 650,
1123 .enter = intel_idle,
1124 .enter_s2idle = intel_idle_s2idle, },
1125 {
1126 .name = "C6P",
1127 .desc = "MWAIT 0x21",
1128 .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
1129 CPUIDLE_FLAG_INIT_XSTATE |
1130 CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1131 .exit_latency = 210,
1132 .target_residency = 1000,
1133 .enter = intel_idle,
1134 .enter_s2idle = intel_idle_s2idle, },
1135 {
1136 .enter = NULL }
1137 };
1138
1139 static struct cpuidle_state gnrd_cstates[] __initdata = {
1140 {
1141 .name = "C1",
1142 .desc = "MWAIT 0x00",
1143 .flags = MWAIT2flg(0x00),
1144 .exit_latency = 1,
1145 .target_residency = 1,
1146 .enter = intel_idle,
1147 .enter_s2idle = intel_idle_s2idle, },
1148 {
1149 .name = "C1E",
1150 .desc = "MWAIT 0x01",
1151 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1152 .exit_latency = 4,
1153 .target_residency = 4,
1154 .enter = intel_idle,
1155 .enter_s2idle = intel_idle_s2idle, },
1156 {
1157 .name = "C6",
1158 .desc = "MWAIT 0x20",
1159 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1160 CPUIDLE_FLAG_INIT_XSTATE |
1161 CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1162 .exit_latency = 220,
1163 .target_residency = 650,
1164 .enter = intel_idle,
1165 .enter_s2idle = intel_idle_s2idle, },
1166 {
1167 .name = "C6P",
1168 .desc = "MWAIT 0x21",
1169 .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
1170 CPUIDLE_FLAG_INIT_XSTATE |
1171 CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1172 .exit_latency = 240,
1173 .target_residency = 750,
1174 .enter = intel_idle,
1175 .enter_s2idle = intel_idle_s2idle, },
1176 {
1177 .enter = NULL }
1178 };
1179
1180 static struct cpuidle_state atom_cstates[] __initdata = {
1181 {
1182 .name = "C1E",
1183 .desc = "MWAIT 0x00",
1184 .flags = MWAIT2flg(0x00),
1185 .exit_latency = 10,
1186 .target_residency = 20,
1187 .enter = intel_idle,
1188 .enter_s2idle = intel_idle_s2idle, },
1189 {
1190 .name = "C2",
1191 .desc = "MWAIT 0x10",
1192 .flags = MWAIT2flg(0x10),
1193 .exit_latency = 20,
1194 .target_residency = 80,
1195 .enter = intel_idle,
1196 .enter_s2idle = intel_idle_s2idle, },
1197 {
1198 .name = "C4",
1199 .desc = "MWAIT 0x30",
1200 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1201 .exit_latency = 100,
1202 .target_residency = 400,
1203 .enter = intel_idle,
1204 .enter_s2idle = intel_idle_s2idle, },
1205 {
1206 .name = "C6",
1207 .desc = "MWAIT 0x52",
1208 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1209 .exit_latency = 140,
1210 .target_residency = 560,
1211 .enter = intel_idle,
1212 .enter_s2idle = intel_idle_s2idle, },
1213 {
1214 .enter = NULL }
1215 };
1216 static struct cpuidle_state tangier_cstates[] __initdata = {
1217 {
1218 .name = "C1",
1219 .desc = "MWAIT 0x00",
1220 .flags = MWAIT2flg(0x00),
1221 .exit_latency = 1,
1222 .target_residency = 4,
1223 .enter = intel_idle,
1224 .enter_s2idle = intel_idle_s2idle, },
1225 {
1226 .name = "C4",
1227 .desc = "MWAIT 0x30",
1228 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1229 .exit_latency = 100,
1230 .target_residency = 400,
1231 .enter = intel_idle,
1232 .enter_s2idle = intel_idle_s2idle, },
1233 {
1234 .name = "C6",
1235 .desc = "MWAIT 0x52",
1236 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1237 .exit_latency = 140,
1238 .target_residency = 560,
1239 .enter = intel_idle,
1240 .enter_s2idle = intel_idle_s2idle, },
1241 {
1242 .name = "C7",
1243 .desc = "MWAIT 0x60",
1244 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1245 .exit_latency = 1200,
1246 .target_residency = 4000,
1247 .enter = intel_idle,
1248 .enter_s2idle = intel_idle_s2idle, },
1249 {
1250 .name = "C9",
1251 .desc = "MWAIT 0x64",
1252 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
1253 .exit_latency = 10000,
1254 .target_residency = 20000,
1255 .enter = intel_idle,
1256 .enter_s2idle = intel_idle_s2idle, },
1257 {
1258 .enter = NULL }
1259 };
1260 static struct cpuidle_state avn_cstates[] __initdata = {
1261 {
1262 .name = "C1",
1263 .desc = "MWAIT 0x00",
1264 .flags = MWAIT2flg(0x00),
1265 .exit_latency = 2,
1266 .target_residency = 2,
1267 .enter = intel_idle,
1268 .enter_s2idle = intel_idle_s2idle, },
1269 {
1270 .name = "C6",
1271 .desc = "MWAIT 0x51",
1272 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1273 .exit_latency = 15,
1274 .target_residency = 45,
1275 .enter = intel_idle,
1276 .enter_s2idle = intel_idle_s2idle, },
1277 {
1278 .enter = NULL }
1279 };
1280 static struct cpuidle_state knl_cstates[] __initdata = {
1281 {
1282 .name = "C1",
1283 .desc = "MWAIT 0x00",
1284 .flags = MWAIT2flg(0x00),
1285 .exit_latency = 1,
1286 .target_residency = 2,
1287 .enter = intel_idle,
1288 .enter_s2idle = intel_idle_s2idle },
1289 {
1290 .name = "C6",
1291 .desc = "MWAIT 0x10",
1292 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1293 .exit_latency = 120,
1294 .target_residency = 500,
1295 .enter = intel_idle,
1296 .enter_s2idle = intel_idle_s2idle },
1297 {
1298 .enter = NULL }
1299 };
1300
1301 static struct cpuidle_state bxt_cstates[] __initdata = {
1302 {
1303 .name = "C1",
1304 .desc = "MWAIT 0x00",
1305 .flags = MWAIT2flg(0x00),
1306 .exit_latency = 2,
1307 .target_residency = 2,
1308 .enter = intel_idle,
1309 .enter_s2idle = intel_idle_s2idle, },
1310 {
1311 .name = "C1E",
1312 .desc = "MWAIT 0x01",
1313 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1314 .exit_latency = 10,
1315 .target_residency = 20,
1316 .enter = intel_idle,
1317 .enter_s2idle = intel_idle_s2idle, },
1318 {
1319 .name = "C6",
1320 .desc = "MWAIT 0x20",
1321 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1322 .exit_latency = 133,
1323 .target_residency = 133,
1324 .enter = intel_idle,
1325 .enter_s2idle = intel_idle_s2idle, },
1326 {
1327 .name = "C7s",
1328 .desc = "MWAIT 0x31",
1329 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1330 .exit_latency = 155,
1331 .target_residency = 155,
1332 .enter = intel_idle,
1333 .enter_s2idle = intel_idle_s2idle, },
1334 {
1335 .name = "C8",
1336 .desc = "MWAIT 0x40",
1337 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1338 .exit_latency = 1000,
1339 .target_residency = 1000,
1340 .enter = intel_idle,
1341 .enter_s2idle = intel_idle_s2idle, },
1342 {
1343 .name = "C9",
1344 .desc = "MWAIT 0x50",
1345 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1346 .exit_latency = 2000,
1347 .target_residency = 2000,
1348 .enter = intel_idle,
1349 .enter_s2idle = intel_idle_s2idle, },
1350 {
1351 .name = "C10",
1352 .desc = "MWAIT 0x60",
1353 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1354 .exit_latency = 10000,
1355 .target_residency = 10000,
1356 .enter = intel_idle,
1357 .enter_s2idle = intel_idle_s2idle, },
1358 {
1359 .enter = NULL }
1360 };
1361
1362 static struct cpuidle_state dnv_cstates[] __initdata = {
1363 {
1364 .name = "C1",
1365 .desc = "MWAIT 0x00",
1366 .flags = MWAIT2flg(0x00),
1367 .exit_latency = 2,
1368 .target_residency = 2,
1369 .enter = intel_idle,
1370 .enter_s2idle = intel_idle_s2idle, },
1371 {
1372 .name = "C1E",
1373 .desc = "MWAIT 0x01",
1374 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1375 .exit_latency = 10,
1376 .target_residency = 20,
1377 .enter = intel_idle,
1378 .enter_s2idle = intel_idle_s2idle, },
1379 {
1380 .name = "C6",
1381 .desc = "MWAIT 0x20",
1382 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1383 .exit_latency = 50,
1384 .target_residency = 500,
1385 .enter = intel_idle,
1386 .enter_s2idle = intel_idle_s2idle, },
1387 {
1388 .enter = NULL }
1389 };
1390
1391 /*
1392 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1393 * C6, and this is indicated in the CPUID mwait leaf.
1394 */
1395 static struct cpuidle_state snr_cstates[] __initdata = {
1396 {
1397 .name = "C1",
1398 .desc = "MWAIT 0x00",
1399 .flags = MWAIT2flg(0x00),
1400 .exit_latency = 2,
1401 .target_residency = 2,
1402 .enter = intel_idle,
1403 .enter_s2idle = intel_idle_s2idle, },
1404 {
1405 .name = "C1E",
1406 .desc = "MWAIT 0x01",
1407 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1408 .exit_latency = 15,
1409 .target_residency = 25,
1410 .enter = intel_idle,
1411 .enter_s2idle = intel_idle_s2idle, },
1412 {
1413 .name = "C6",
1414 .desc = "MWAIT 0x20",
1415 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1416 .exit_latency = 130,
1417 .target_residency = 500,
1418 .enter = intel_idle,
1419 .enter_s2idle = intel_idle_s2idle, },
1420 {
1421 .enter = NULL }
1422 };
1423
1424 static struct cpuidle_state grr_cstates[] __initdata = {
1425 {
1426 .name = "C1",
1427 .desc = "MWAIT 0x00",
1428 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1429 .exit_latency = 1,
1430 .target_residency = 1,
1431 .enter = intel_idle,
1432 .enter_s2idle = intel_idle_s2idle, },
1433 {
1434 .name = "C1E",
1435 .desc = "MWAIT 0x01",
1436 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1437 .exit_latency = 2,
1438 .target_residency = 10,
1439 .enter = intel_idle,
1440 .enter_s2idle = intel_idle_s2idle, },
1441 {
1442 .name = "C6S",
1443 .desc = "MWAIT 0x22",
1444 .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED,
1445 .exit_latency = 140,
1446 .target_residency = 500,
1447 .enter = intel_idle,
1448 .enter_s2idle = intel_idle_s2idle, },
1449 {
1450 .enter = NULL }
1451 };
1452
1453 static struct cpuidle_state srf_cstates[] __initdata = {
1454 {
1455 .name = "C1",
1456 .desc = "MWAIT 0x00",
1457 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1458 .exit_latency = 1,
1459 .target_residency = 1,
1460 .enter = intel_idle,
1461 .enter_s2idle = intel_idle_s2idle, },
1462 {
1463 .name = "C1E",
1464 .desc = "MWAIT 0x01",
1465 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1466 .exit_latency = 2,
1467 .target_residency = 10,
1468 .enter = intel_idle,
1469 .enter_s2idle = intel_idle_s2idle, },
1470 {
1471 .name = "C6S",
1472 .desc = "MWAIT 0x22",
1473 .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED |
1474 CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1475 .exit_latency = 270,
1476 .target_residency = 700,
1477 .enter = intel_idle,
1478 .enter_s2idle = intel_idle_s2idle, },
1479 {
1480 .name = "C6SP",
1481 .desc = "MWAIT 0x23",
1482 .flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED |
1483 CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1484 .exit_latency = 310,
1485 .target_residency = 900,
1486 .enter = intel_idle,
1487 .enter_s2idle = intel_idle_s2idle, },
1488 {
1489 .enter = NULL }
1490 };
1491
1492 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1493 .state_table = nehalem_cstates,
1494 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1495 .disable_promotion_to_c1e = true,
1496 };
1497
1498 static const struct idle_cpu idle_cpu_nhx __initconst = {
1499 .state_table = nehalem_cstates,
1500 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1501 .disable_promotion_to_c1e = true,
1502 .use_acpi = true,
1503 };
1504
1505 static const struct idle_cpu idle_cpu_atom __initconst = {
1506 .state_table = atom_cstates,
1507 };
1508
1509 static const struct idle_cpu idle_cpu_tangier __initconst = {
1510 .state_table = tangier_cstates,
1511 };
1512
1513 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1514 .state_table = atom_cstates,
1515 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1516 };
1517
1518 static const struct idle_cpu idle_cpu_snb __initconst = {
1519 .state_table = snb_cstates,
1520 .disable_promotion_to_c1e = true,
1521 };
1522
1523 static const struct idle_cpu idle_cpu_snx __initconst = {
1524 .state_table = snb_cstates,
1525 .disable_promotion_to_c1e = true,
1526 .use_acpi = true,
1527 };
1528
1529 static const struct idle_cpu idle_cpu_byt __initconst = {
1530 .state_table = byt_cstates,
1531 .disable_promotion_to_c1e = true,
1532 };
1533
1534 static const struct idle_cpu idle_cpu_cht __initconst = {
1535 .state_table = cht_cstates,
1536 .disable_promotion_to_c1e = true,
1537 };
1538
1539 static const struct idle_cpu idle_cpu_ivb __initconst = {
1540 .state_table = ivb_cstates,
1541 .disable_promotion_to_c1e = true,
1542 };
1543
1544 static const struct idle_cpu idle_cpu_ivt __initconst = {
1545 .state_table = ivt_cstates,
1546 .disable_promotion_to_c1e = true,
1547 .use_acpi = true,
1548 };
1549
1550 static const struct idle_cpu idle_cpu_hsw __initconst = {
1551 .state_table = hsw_cstates,
1552 .disable_promotion_to_c1e = true,
1553 };
1554
1555 static const struct idle_cpu idle_cpu_hsx __initconst = {
1556 .state_table = hsw_cstates,
1557 .disable_promotion_to_c1e = true,
1558 .use_acpi = true,
1559 };
1560
1561 static const struct idle_cpu idle_cpu_bdw __initconst = {
1562 .state_table = bdw_cstates,
1563 .disable_promotion_to_c1e = true,
1564 };
1565
1566 static const struct idle_cpu idle_cpu_bdx __initconst = {
1567 .state_table = bdw_cstates,
1568 .disable_promotion_to_c1e = true,
1569 .use_acpi = true,
1570 };
1571
1572 static const struct idle_cpu idle_cpu_skl __initconst = {
1573 .state_table = skl_cstates,
1574 .disable_promotion_to_c1e = true,
1575 };
1576
1577 static const struct idle_cpu idle_cpu_skx __initconst = {
1578 .state_table = skx_cstates,
1579 .disable_promotion_to_c1e = true,
1580 .use_acpi = true,
1581 };
1582
1583 static const struct idle_cpu idle_cpu_icx __initconst = {
1584 .state_table = icx_cstates,
1585 .disable_promotion_to_c1e = true,
1586 .use_acpi = true,
1587 };
1588
1589 static const struct idle_cpu idle_cpu_adl __initconst = {
1590 .state_table = adl_cstates,
1591 };
1592
1593 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1594 .state_table = adl_l_cstates,
1595 };
1596
1597 static const struct idle_cpu idle_cpu_mtl_l __initconst = {
1598 .state_table = mtl_l_cstates,
1599 };
1600
1601 static const struct idle_cpu idle_cpu_ptl __initconst = {
1602 .state_table = ptl_cstates,
1603 };
1604
1605 static const struct idle_cpu idle_cpu_gmt __initconst = {
1606 .state_table = gmt_cstates,
1607 };
1608
1609 static const struct idle_cpu idle_cpu_spr __initconst = {
1610 .state_table = spr_cstates,
1611 .disable_promotion_to_c1e = true,
1612 .c1_demotion_supported = true,
1613 .use_acpi = true,
1614 };
1615
1616 static const struct idle_cpu idle_cpu_gnr __initconst = {
1617 .state_table = gnr_cstates,
1618 .disable_promotion_to_c1e = true,
1619 .c1_demotion_supported = true,
1620 .use_acpi = true,
1621 };
1622
1623 static const struct idle_cpu idle_cpu_gnrd __initconst = {
1624 .state_table = gnrd_cstates,
1625 .disable_promotion_to_c1e = true,
1626 .c1_demotion_supported = true,
1627 .use_acpi = true,
1628 };
1629
1630 static const struct idle_cpu idle_cpu_avn __initconst = {
1631 .state_table = avn_cstates,
1632 .disable_promotion_to_c1e = true,
1633 .use_acpi = true,
1634 };
1635
1636 static const struct idle_cpu idle_cpu_knl __initconst = {
1637 .state_table = knl_cstates,
1638 .use_acpi = true,
1639 };
1640
1641 static const struct idle_cpu idle_cpu_bxt __initconst = {
1642 .state_table = bxt_cstates,
1643 .disable_promotion_to_c1e = true,
1644 };
1645
1646 static const struct idle_cpu idle_cpu_dnv __initconst = {
1647 .state_table = dnv_cstates,
1648 .disable_promotion_to_c1e = true,
1649 .use_acpi = true,
1650 };
1651
1652 static const struct idle_cpu idle_cpu_tmt __initconst = {
1653 .disable_promotion_to_c1e = true,
1654 };
1655
1656 static const struct idle_cpu idle_cpu_snr __initconst = {
1657 .state_table = snr_cstates,
1658 .disable_promotion_to_c1e = true,
1659 .use_acpi = true,
1660 };
1661
1662 static const struct idle_cpu idle_cpu_grr __initconst = {
1663 .state_table = grr_cstates,
1664 .disable_promotion_to_c1e = true,
1665 .c1_demotion_supported = true,
1666 .use_acpi = true,
1667 };
1668
1669 static const struct idle_cpu idle_cpu_srf __initconst = {
1670 .state_table = srf_cstates,
1671 .disable_promotion_to_c1e = true,
1672 .c1_demotion_supported = true,
1673 .use_acpi = true,
1674 };
1675
1676 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1677 X86_MATCH_VFM(INTEL_NEHALEM_EP, &idle_cpu_nhx),
1678 X86_MATCH_VFM(INTEL_NEHALEM, &idle_cpu_nehalem),
1679 X86_MATCH_VFM(INTEL_NEHALEM_G, &idle_cpu_nehalem),
1680 X86_MATCH_VFM(INTEL_WESTMERE, &idle_cpu_nehalem),
1681 X86_MATCH_VFM(INTEL_WESTMERE_EP, &idle_cpu_nhx),
1682 X86_MATCH_VFM(INTEL_NEHALEM_EX, &idle_cpu_nhx),
1683 X86_MATCH_VFM(INTEL_ATOM_BONNELL, &idle_cpu_atom),
1684 X86_MATCH_VFM(INTEL_ATOM_BONNELL_MID, &idle_cpu_lincroft),
1685 X86_MATCH_VFM(INTEL_WESTMERE_EX, &idle_cpu_nhx),
1686 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &idle_cpu_snb),
1687 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &idle_cpu_snx),
1688 X86_MATCH_VFM(INTEL_ATOM_SALTWELL, &idle_cpu_atom),
1689 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &idle_cpu_byt),
1690 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1691 X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &idle_cpu_cht),
1692 X86_MATCH_VFM(INTEL_IVYBRIDGE, &idle_cpu_ivb),
1693 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &idle_cpu_ivt),
1694 X86_MATCH_VFM(INTEL_HASWELL, &idle_cpu_hsw),
1695 X86_MATCH_VFM(INTEL_HASWELL_X, &idle_cpu_hsx),
1696 X86_MATCH_VFM(INTEL_HASWELL_L, &idle_cpu_hsw),
1697 X86_MATCH_VFM(INTEL_HASWELL_G, &idle_cpu_hsw),
1698 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_D, &idle_cpu_avn),
1699 X86_MATCH_VFM(INTEL_BROADWELL, &idle_cpu_bdw),
1700 X86_MATCH_VFM(INTEL_BROADWELL_G, &idle_cpu_bdw),
1701 X86_MATCH_VFM(INTEL_BROADWELL_X, &idle_cpu_bdx),
1702 X86_MATCH_VFM(INTEL_BROADWELL_D, &idle_cpu_bdx),
1703 X86_MATCH_VFM(INTEL_SKYLAKE_L, &idle_cpu_skl),
1704 X86_MATCH_VFM(INTEL_SKYLAKE, &idle_cpu_skl),
1705 X86_MATCH_VFM(INTEL_KABYLAKE_L, &idle_cpu_skl),
1706 X86_MATCH_VFM(INTEL_KABYLAKE, &idle_cpu_skl),
1707 X86_MATCH_VFM(INTEL_SKYLAKE_X, &idle_cpu_skx),
1708 X86_MATCH_VFM(INTEL_ICELAKE_X, &idle_cpu_icx),
1709 X86_MATCH_VFM(INTEL_ICELAKE_D, &idle_cpu_icx),
1710 X86_MATCH_VFM(INTEL_ALDERLAKE, &idle_cpu_adl),
1711 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &idle_cpu_adl_l),
1712 X86_MATCH_VFM(INTEL_METEORLAKE_L, &idle_cpu_mtl_l),
1713 X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &idle_cpu_ptl),
1714 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &idle_cpu_gmt),
1715 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &idle_cpu_spr),
1716 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &idle_cpu_spr),
1717 X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &idle_cpu_gnr),
1718 X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &idle_cpu_gnrd),
1719 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &idle_cpu_knl),
1720 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &idle_cpu_knl),
1721 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &idle_cpu_bxt),
1722 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &idle_cpu_bxt),
1723 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &idle_cpu_dnv),
1724 X86_MATCH_VFM(INTEL_ATOM_TREMONT, &idle_cpu_tmt),
1725 X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &idle_cpu_tmt),
1726 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &idle_cpu_snr),
1727 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &idle_cpu_grr),
1728 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &idle_cpu_srf),
1729 X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &idle_cpu_srf),
1730 {}
1731 };
1732
1733 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1734 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, X86_FAMILY_ANY, X86_FEATURE_MWAIT, NULL),
1735 {}
1736 };
1737
intel_idle_max_cstate_reached(int cstate)1738 static bool __init intel_idle_max_cstate_reached(int cstate)
1739 {
1740 if (cstate + 1 > max_cstate) {
1741 pr_info("max_cstate %d reached\n", max_cstate);
1742 return true;
1743 }
1744 return false;
1745 }
1746
intel_idle_state_needs_timer_stop(struct cpuidle_state * state)1747 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1748 {
1749 unsigned long eax = flg2MWAIT(state->flags);
1750
1751 if (boot_cpu_has(X86_FEATURE_ARAT))
1752 return false;
1753
1754 /*
1755 * Switch over to one-shot tick broadcast if the target C-state
1756 * is deeper than C1.
1757 */
1758 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1759 }
1760
1761 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1762 #include <acpi/processor.h>
1763
1764 static bool no_acpi __read_mostly;
1765 module_param(no_acpi, bool, 0444);
1766 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1767
1768 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1769 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1770 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1771
1772 static bool no_native __read_mostly; /* No effect if no_acpi is set. */
1773 module_param_named(no_native, no_native, bool, 0444);
1774 MODULE_PARM_DESC(no_native, "Ignore cpu specific (native) idle states in lieu of ACPI idle states");
1775
1776 static struct acpi_processor_power acpi_state_table __initdata;
1777
1778 /**
1779 * intel_idle_cst_usable - Check if the _CST information can be used.
1780 *
1781 * Check if all of the C-states listed by _CST in the max_cstate range are
1782 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1783 */
intel_idle_cst_usable(void)1784 static bool __init intel_idle_cst_usable(void)
1785 {
1786 int cstate, limit;
1787
1788 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1789 acpi_state_table.count);
1790
1791 for (cstate = 1; cstate < limit; cstate++) {
1792 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1793
1794 if (cx->entry_method != ACPI_CSTATE_FFH)
1795 return false;
1796 }
1797
1798 return true;
1799 }
1800
intel_idle_acpi_cst_extract(void)1801 static bool __init intel_idle_acpi_cst_extract(void)
1802 {
1803 unsigned int cpu;
1804
1805 if (no_acpi) {
1806 pr_debug("Not allowed to use ACPI _CST\n");
1807 return false;
1808 }
1809
1810 for_each_possible_cpu(cpu) {
1811 struct acpi_processor *pr = per_cpu(processors, cpu);
1812
1813 if (!pr)
1814 continue;
1815
1816 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1817 continue;
1818
1819 acpi_state_table.count++;
1820
1821 if (!intel_idle_cst_usable())
1822 continue;
1823
1824 if (!acpi_processor_claim_cst_control())
1825 break;
1826
1827 return true;
1828 }
1829
1830 acpi_state_table.count = 0;
1831 pr_debug("ACPI _CST not found or not usable\n");
1832 return false;
1833 }
1834
intel_idle_init_cstates_acpi(struct cpuidle_driver * drv)1835 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1836 {
1837 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1838
1839 /*
1840 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1841 * the interesting states are ACPI_CSTATE_FFH.
1842 */
1843 for (cstate = 1; cstate < limit; cstate++) {
1844 struct acpi_processor_cx *cx;
1845 struct cpuidle_state *state;
1846
1847 if (intel_idle_max_cstate_reached(cstate - 1))
1848 break;
1849
1850 cx = &acpi_state_table.states[cstate];
1851
1852 state = &drv->states[drv->state_count++];
1853
1854 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1855 strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1856 state->exit_latency = cx->latency;
1857 /*
1858 * For C1-type C-states use the same number for both the exit
1859 * latency and target residency, because that is the case for
1860 * C1 in the majority of the static C-states tables above.
1861 * For the other types of C-states, however, set the target
1862 * residency to 3 times the exit latency which should lead to
1863 * a reasonable balance between energy-efficiency and
1864 * performance in the majority of interesting cases.
1865 */
1866 state->target_residency = cx->latency;
1867 if (cx->type > ACPI_STATE_C1)
1868 state->target_residency *= 3;
1869
1870 state->flags = MWAIT2flg(cx->address);
1871 if (cx->type > ACPI_STATE_C2)
1872 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1873
1874 if (disabled_states_mask & BIT(cstate))
1875 state->flags |= CPUIDLE_FLAG_OFF;
1876
1877 if (intel_idle_state_needs_timer_stop(state))
1878 state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1879
1880 if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1881 mark_tsc_unstable("TSC halts in idle");
1882
1883 state->enter = intel_idle;
1884 state->enter_dead = intel_idle_enter_dead;
1885 state->enter_s2idle = intel_idle_s2idle;
1886 }
1887 }
1888
intel_idle_off_by_default(unsigned int flags,u32 mwait_hint)1889 static bool __init intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
1890 {
1891 int cstate, limit;
1892
1893 /*
1894 * If there are no _CST C-states, do not disable any C-states by
1895 * default.
1896 */
1897 if (!acpi_state_table.count)
1898 return false;
1899
1900 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1901 /*
1902 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1903 * the interesting states are ACPI_CSTATE_FFH.
1904 */
1905 for (cstate = 1; cstate < limit; cstate++) {
1906 u32 acpi_hint = acpi_state_table.states[cstate].address;
1907 u32 table_hint = mwait_hint;
1908
1909 if (flags & CPUIDLE_FLAG_PARTIAL_HINT_MATCH) {
1910 acpi_hint &= ~MWAIT_SUBSTATE_MASK;
1911 table_hint &= ~MWAIT_SUBSTATE_MASK;
1912 }
1913
1914 if (acpi_hint == table_hint)
1915 return false;
1916 }
1917 return true;
1918 }
1919
ignore_native(void)1920 static inline bool ignore_native(void)
1921 {
1922 return no_native && !no_acpi;
1923 }
1924 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1925 #define force_use_acpi (false)
1926
intel_idle_acpi_cst_extract(void)1927 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
intel_idle_init_cstates_acpi(struct cpuidle_driver * drv)1928 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
intel_idle_off_by_default(unsigned int flags,u32 mwait_hint)1929 static inline bool intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
1930 {
1931 return false;
1932 }
ignore_native(void)1933 static inline bool ignore_native(void) { return false; }
1934 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1935
1936 /**
1937 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1938 *
1939 * Tune IVT multi-socket targets.
1940 * Assumption: num_sockets == (max_package_num + 1).
1941 */
ivt_idle_state_table_update(void)1942 static void __init ivt_idle_state_table_update(void)
1943 {
1944 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1945 int cpu, package_num, num_sockets = 1;
1946
1947 for_each_online_cpu(cpu) {
1948 package_num = topology_physical_package_id(cpu);
1949 if (package_num + 1 > num_sockets) {
1950 num_sockets = package_num + 1;
1951
1952 if (num_sockets > 4) {
1953 cpuidle_state_table = ivt_cstates_8s;
1954 return;
1955 }
1956 }
1957 }
1958
1959 if (num_sockets > 2)
1960 cpuidle_state_table = ivt_cstates_4s;
1961
1962 /* else, 1 and 2 socket systems use default ivt_cstates */
1963 }
1964
1965 /**
1966 * irtl_2_usec - IRTL to microseconds conversion.
1967 * @irtl: IRTL MSR value.
1968 *
1969 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1970 */
irtl_2_usec(unsigned long long irtl)1971 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1972 {
1973 static const unsigned int irtl_ns_units[] __initconst = {
1974 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1975 };
1976 unsigned long long ns;
1977
1978 if (!irtl)
1979 return 0;
1980
1981 ns = irtl_ns_units[(irtl >> 10) & 0x7];
1982
1983 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1984 }
1985
1986 /**
1987 * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1988 *
1989 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1990 * definitive maximum latency and use the same value for target_residency.
1991 */
bxt_idle_state_table_update(void)1992 static void __init bxt_idle_state_table_update(void)
1993 {
1994 unsigned long long msr;
1995 unsigned int usec;
1996
1997 rdmsrq(MSR_PKGC6_IRTL, msr);
1998 usec = irtl_2_usec(msr);
1999 if (usec) {
2000 bxt_cstates[2].exit_latency = usec;
2001 bxt_cstates[2].target_residency = usec;
2002 }
2003
2004 rdmsrq(MSR_PKGC7_IRTL, msr);
2005 usec = irtl_2_usec(msr);
2006 if (usec) {
2007 bxt_cstates[3].exit_latency = usec;
2008 bxt_cstates[3].target_residency = usec;
2009 }
2010
2011 rdmsrq(MSR_PKGC8_IRTL, msr);
2012 usec = irtl_2_usec(msr);
2013 if (usec) {
2014 bxt_cstates[4].exit_latency = usec;
2015 bxt_cstates[4].target_residency = usec;
2016 }
2017
2018 rdmsrq(MSR_PKGC9_IRTL, msr);
2019 usec = irtl_2_usec(msr);
2020 if (usec) {
2021 bxt_cstates[5].exit_latency = usec;
2022 bxt_cstates[5].target_residency = usec;
2023 }
2024
2025 rdmsrq(MSR_PKGC10_IRTL, msr);
2026 usec = irtl_2_usec(msr);
2027 if (usec) {
2028 bxt_cstates[6].exit_latency = usec;
2029 bxt_cstates[6].target_residency = usec;
2030 }
2031
2032 }
2033
2034 /**
2035 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
2036 *
2037 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
2038 */
sklh_idle_state_table_update(void)2039 static void __init sklh_idle_state_table_update(void)
2040 {
2041 unsigned long long msr;
2042 unsigned int eax, ebx, ecx, edx;
2043
2044
2045 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
2046 if (max_cstate <= 7)
2047 return;
2048
2049 /* if PC10 not present in CPUID.MWAIT.EDX */
2050 if ((mwait_substates & (0xF << 28)) == 0)
2051 return;
2052
2053 rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
2054
2055 /* PC10 is not enabled in PKG C-state limit */
2056 if ((msr & 0xF) != 8)
2057 return;
2058
2059 ecx = 0;
2060 cpuid(7, &eax, &ebx, &ecx, &edx);
2061
2062 /* if SGX is present */
2063 if (ebx & (1 << 2)) {
2064
2065 rdmsrq(MSR_IA32_FEAT_CTL, msr);
2066
2067 /* if SGX is enabled */
2068 if (msr & (1 << 18))
2069 return;
2070 }
2071
2072 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */
2073 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */
2074 }
2075
2076 /**
2077 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
2078 * idle states table.
2079 */
skx_idle_state_table_update(void)2080 static void __init skx_idle_state_table_update(void)
2081 {
2082 unsigned long long msr;
2083
2084 rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
2085
2086 /*
2087 * 000b: C0/C1 (no package C-state support)
2088 * 001b: C2
2089 * 010b: C6 (non-retention)
2090 * 011b: C6 (retention)
2091 * 111b: No Package C state limits.
2092 */
2093 if ((msr & 0x7) < 2) {
2094 /*
2095 * Uses the CC6 + PC0 latency and 3 times of
2096 * latency for target_residency if the PC6
2097 * is disabled in BIOS. This is consistent
2098 * with how intel_idle driver uses _CST
2099 * to set the target_residency.
2100 */
2101 skx_cstates[2].exit_latency = 92;
2102 skx_cstates[2].target_residency = 276;
2103 }
2104 }
2105
2106 /**
2107 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
2108 */
spr_idle_state_table_update(void)2109 static void __init spr_idle_state_table_update(void)
2110 {
2111 unsigned long long msr;
2112
2113 /*
2114 * By default, the C6 state assumes the worst-case scenario of package
2115 * C6. However, if PC6 is disabled, we update the numbers to match
2116 * core C6.
2117 */
2118 rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
2119
2120 /* Limit value 2 and above allow for PC6. */
2121 if ((msr & 0x7) < 2) {
2122 spr_cstates[2].exit_latency = 190;
2123 spr_cstates[2].target_residency = 600;
2124 }
2125 }
2126
2127 /**
2128 * byt_cht_auto_demotion_disable - Disable Bay/Cherry Trail auto-demotion.
2129 */
byt_cht_auto_demotion_disable(void)2130 static void __init byt_cht_auto_demotion_disable(void)
2131 {
2132 wrmsrq(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
2133 wrmsrq(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
2134 }
2135
intel_idle_verify_cstate(unsigned int mwait_hint)2136 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
2137 {
2138 unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) &
2139 MWAIT_CSTATE_MASK;
2140 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
2141 MWAIT_SUBSTATE_MASK;
2142
2143 /* Ignore the C-state if there are NO sub-states in CPUID for it. */
2144 if (num_substates == 0)
2145 return false;
2146
2147 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
2148 mark_tsc_unstable("TSC halts in idle states deeper than C2");
2149
2150 return true;
2151 }
2152
state_update_enter_method(struct cpuidle_state * state,int cstate)2153 static void state_update_enter_method(struct cpuidle_state *state, int cstate)
2154 {
2155 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
2156 /*
2157 * Combining with XSTATE with IBRS or IRQ_ENABLE flags
2158 * is not currently supported but this driver.
2159 */
2160 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
2161 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
2162 state->enter = intel_idle_xstate;
2163 return;
2164 }
2165
2166 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
2167 ((state->flags & CPUIDLE_FLAG_IBRS) || ibrs_off)) {
2168 /*
2169 * IBRS mitigation requires that C-states are entered
2170 * with interrupts disabled.
2171 */
2172 if (ibrs_off && (state->flags & CPUIDLE_FLAG_IRQ_ENABLE))
2173 state->flags &= ~CPUIDLE_FLAG_IRQ_ENABLE;
2174 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
2175 state->enter = intel_idle_ibrs;
2176 return;
2177 }
2178
2179 if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
2180 state->enter = intel_idle_irq;
2181 return;
2182 }
2183
2184 if (force_irq_on) {
2185 pr_info("forced intel_idle_irq for state %d\n", cstate);
2186 state->enter = intel_idle_irq;
2187 }
2188 }
2189
intel_idle_init_cstates_icpu(struct cpuidle_driver * drv)2190 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
2191 {
2192 int cstate;
2193
2194 switch (boot_cpu_data.x86_vfm) {
2195 case INTEL_IVYBRIDGE_X:
2196 ivt_idle_state_table_update();
2197 break;
2198 case INTEL_ATOM_GOLDMONT:
2199 case INTEL_ATOM_GOLDMONT_PLUS:
2200 bxt_idle_state_table_update();
2201 break;
2202 case INTEL_SKYLAKE:
2203 sklh_idle_state_table_update();
2204 break;
2205 case INTEL_SKYLAKE_X:
2206 skx_idle_state_table_update();
2207 break;
2208 case INTEL_SAPPHIRERAPIDS_X:
2209 case INTEL_EMERALDRAPIDS_X:
2210 spr_idle_state_table_update();
2211 break;
2212 case INTEL_ATOM_SILVERMONT:
2213 case INTEL_ATOM_AIRMONT:
2214 byt_cht_auto_demotion_disable();
2215 break;
2216 }
2217
2218 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
2219 struct cpuidle_state *state;
2220 unsigned int mwait_hint;
2221
2222 if (intel_idle_max_cstate_reached(cstate))
2223 break;
2224
2225 if (!cpuidle_state_table[cstate].enter &&
2226 !cpuidle_state_table[cstate].enter_s2idle)
2227 break;
2228
2229 if (!cpuidle_state_table[cstate].enter_dead)
2230 cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead;
2231
2232 /* If marked as unusable, skip this state. */
2233 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
2234 pr_debug("state %s is disabled\n",
2235 cpuidle_state_table[cstate].name);
2236 continue;
2237 }
2238
2239 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
2240 if (!intel_idle_verify_cstate(mwait_hint))
2241 continue;
2242
2243 /* Structure copy. */
2244 drv->states[drv->state_count] = cpuidle_state_table[cstate];
2245 state = &drv->states[drv->state_count];
2246
2247 state_update_enter_method(state, cstate);
2248
2249
2250 if ((disabled_states_mask & BIT(drv->state_count)) ||
2251 ((icpu->use_acpi || force_use_acpi) &&
2252 intel_idle_off_by_default(state->flags, mwait_hint) &&
2253 !(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
2254 state->flags |= CPUIDLE_FLAG_OFF;
2255
2256 if (intel_idle_state_needs_timer_stop(state))
2257 state->flags |= CPUIDLE_FLAG_TIMER_STOP;
2258
2259 drv->state_count++;
2260 }
2261 }
2262
2263 /**
2264 * intel_idle_cpuidle_driver_init - Create the list of available idle states.
2265 * @drv: cpuidle driver structure to initialize.
2266 */
intel_idle_cpuidle_driver_init(struct cpuidle_driver * drv)2267 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
2268 {
2269 cpuidle_poll_state_init(drv);
2270
2271 if (disabled_states_mask & BIT(0))
2272 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
2273
2274 drv->state_count = 1;
2275
2276 if (icpu && icpu->state_table)
2277 intel_idle_init_cstates_icpu(drv);
2278 else
2279 intel_idle_init_cstates_acpi(drv);
2280 }
2281
auto_demotion_disable(void)2282 static void auto_demotion_disable(void)
2283 {
2284 unsigned long long msr_bits;
2285
2286 rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
2287 msr_bits &= ~auto_demotion_disable_flags;
2288 wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
2289 }
2290
c1e_promotion_enable(void)2291 static void c1e_promotion_enable(void)
2292 {
2293 unsigned long long msr_bits;
2294
2295 rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
2296 msr_bits |= 0x2;
2297 wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
2298 }
2299
c1e_promotion_disable(void)2300 static void c1e_promotion_disable(void)
2301 {
2302 unsigned long long msr_bits;
2303
2304 rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
2305 msr_bits &= ~0x2;
2306 wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
2307 }
2308
2309 /**
2310 * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
2311 * @cpu: CPU to initialize.
2312 *
2313 * Register a cpuidle device object for @cpu and update its MSRs in accordance
2314 * with the processor model flags.
2315 */
intel_idle_cpu_init(unsigned int cpu)2316 static int intel_idle_cpu_init(unsigned int cpu)
2317 {
2318 struct cpuidle_device *dev;
2319
2320 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
2321 dev->cpu = cpu;
2322
2323 if (cpuidle_register_device(dev)) {
2324 pr_debug("cpuidle_register_device %d failed!\n", cpu);
2325 return -EIO;
2326 }
2327
2328 if (auto_demotion_disable_flags)
2329 auto_demotion_disable();
2330
2331 if (c1e_promotion == C1E_PROMOTION_ENABLE)
2332 c1e_promotion_enable();
2333 else if (c1e_promotion == C1E_PROMOTION_DISABLE)
2334 c1e_promotion_disable();
2335
2336 return 0;
2337 }
2338
intel_idle_cpu_online(unsigned int cpu)2339 static int intel_idle_cpu_online(unsigned int cpu)
2340 {
2341 struct cpuidle_device *dev;
2342
2343 if (!boot_cpu_has(X86_FEATURE_ARAT))
2344 tick_broadcast_enable();
2345
2346 /*
2347 * Some systems can hotplug a cpu at runtime after
2348 * the kernel has booted, we have to initialize the
2349 * driver in this case
2350 */
2351 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
2352 if (!dev->registered)
2353 return intel_idle_cpu_init(cpu);
2354
2355 return 0;
2356 }
2357
2358 /**
2359 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
2360 */
intel_idle_cpuidle_devices_uninit(void)2361 static void __init intel_idle_cpuidle_devices_uninit(void)
2362 {
2363 int i;
2364
2365 for_each_online_cpu(i)
2366 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
2367 }
2368
intel_c1_demotion_toggle(void * enable)2369 static void intel_c1_demotion_toggle(void *enable)
2370 {
2371 unsigned long long msr_val;
2372
2373 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2374 /*
2375 * Enable/disable C1 undemotion along with C1 demotion, as this is the
2376 * most sensible configuration in general.
2377 */
2378 if (enable)
2379 msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE;
2380 else
2381 msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE);
2382 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2383 }
2384
intel_c1_demotion_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2385 static ssize_t intel_c1_demotion_store(struct device *dev,
2386 struct device_attribute *attr,
2387 const char *buf, size_t count)
2388 {
2389 bool enable;
2390 int err;
2391
2392 err = kstrtobool(buf, &enable);
2393 if (err)
2394 return err;
2395
2396 mutex_lock(&c1_demotion_mutex);
2397 /* Enable/disable C1 demotion on all CPUs */
2398 on_each_cpu(intel_c1_demotion_toggle, (void *)enable, 1);
2399 mutex_unlock(&c1_demotion_mutex);
2400
2401 return count;
2402 }
2403
intel_c1_demotion_show(struct device * dev,struct device_attribute * attr,char * buf)2404 static ssize_t intel_c1_demotion_show(struct device *dev,
2405 struct device_attribute *attr, char *buf)
2406 {
2407 unsigned long long msr_val;
2408
2409 /*
2410 * Read the MSR value for a CPU and assume it is the same for all CPUs. Any other
2411 * configuration would be a BIOS bug.
2412 */
2413 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2414 return sysfs_emit(buf, "%d\n", !!(msr_val & NHM_C1_AUTO_DEMOTE));
2415 }
2416 static DEVICE_ATTR_RW(intel_c1_demotion);
2417
intel_idle_sysfs_init(void)2418 static int __init intel_idle_sysfs_init(void)
2419 {
2420 int err;
2421
2422 if (!c1_demotion_supported)
2423 return 0;
2424
2425 sysfs_root = bus_get_dev_root(&cpu_subsys);
2426 if (!sysfs_root)
2427 return 0;
2428
2429 err = sysfs_add_file_to_group(&sysfs_root->kobj,
2430 &dev_attr_intel_c1_demotion.attr,
2431 "cpuidle");
2432 if (err) {
2433 put_device(sysfs_root);
2434 return err;
2435 }
2436
2437 return 0;
2438 }
2439
intel_idle_sysfs_uninit(void)2440 static void __init intel_idle_sysfs_uninit(void)
2441 {
2442 if (!sysfs_root)
2443 return;
2444
2445 sysfs_remove_file_from_group(&sysfs_root->kobj,
2446 &dev_attr_intel_c1_demotion.attr,
2447 "cpuidle");
2448 put_device(sysfs_root);
2449 }
2450
2451 /**
2452 * get_cmdline_field - Get the current field from a cmdline string.
2453 * @args: The cmdline string to get the current field from.
2454 * @field: Pointer to the current field upon return.
2455 * @sep: The fields separator character.
2456 *
2457 * Examples:
2458 * Input: args="C1:1:1,C1E:2:10", sep=':'
2459 * Output: field="C1", return "1:1,C1E:2:10"
2460 * Input: args="C1:1:1,C1E:2:10", sep=','
2461 * Output: field="C1:1:1", return "C1E:2:10"
2462 * Ipnut: args="::", sep=':'
2463 * Output: field="", return ":"
2464 *
2465 * Return: The continuation of the cmdline string after the field or NULL.
2466 */
get_cmdline_field(char * args,char ** field,char sep)2467 static char *get_cmdline_field(char *args, char **field, char sep)
2468 {
2469 unsigned int i;
2470
2471 for (i = 0; args[i] && !isspace(args[i]); i++) {
2472 if (args[i] == sep)
2473 break;
2474 }
2475
2476 *field = args;
2477
2478 if (args[i] != sep)
2479 return NULL;
2480
2481 args[i] = '\0';
2482 return args + i + 1;
2483 }
2484
2485 /**
2486 * validate_cmdline_cstate - Validate a C-state from cmdline.
2487 * @state: The C-state to validate.
2488 * @prev_state: The previous C-state in the table or NULL.
2489 *
2490 * Return: 0 if the C-state is valid or -EINVAL otherwise.
2491 */
validate_cmdline_cstate(struct cpuidle_state * state,struct cpuidle_state * prev_state)2492 static int validate_cmdline_cstate(struct cpuidle_state *state,
2493 struct cpuidle_state *prev_state)
2494 {
2495 if (state->exit_latency == 0)
2496 /* Exit latency 0 can only be used for the POLL state */
2497 return -EINVAL;
2498
2499 if (state->exit_latency > MAX_CMDLINE_LATENCY_US)
2500 return -EINVAL;
2501
2502 if (state->target_residency > MAX_CMDLINE_RESIDENCY_US)
2503 return -EINVAL;
2504
2505 if (state->target_residency < state->exit_latency)
2506 return -EINVAL;
2507
2508 if (!prev_state)
2509 return 0;
2510
2511 if (state->exit_latency <= prev_state->exit_latency)
2512 return -EINVAL;
2513
2514 if (state->target_residency <= prev_state->target_residency)
2515 return -EINVAL;
2516
2517 return 0;
2518 }
2519
2520 /**
2521 * cmdline_table_adjust - Adjust the C-states table with data from cmdline.
2522 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
2523 *
2524 * Adjust the C-states table with data from the 'intel_idle.table' module
2525 * parameter (if specified).
2526 */
cmdline_table_adjust(struct cpuidle_driver * drv)2527 static void __init cmdline_table_adjust(struct cpuidle_driver *drv)
2528 {
2529 char *args = cmdline_table_str;
2530 struct cpuidle_state *state;
2531 int i;
2532
2533 if (args[0] == '\0')
2534 /* The 'intel_idle.table' module parameter was not specified */
2535 return;
2536
2537 /* Create a copy of the C-states table */
2538 for (i = 0; i < drv->state_count; i++)
2539 cmdline_states[i] = drv->states[i];
2540
2541 /*
2542 * Adjust the C-states table copy with data from the 'intel_idle.table'
2543 * module parameter.
2544 */
2545 while (args) {
2546 char *fields, *name, *val;
2547
2548 /*
2549 * Get the next C-state definition, which is expected to be
2550 * '<name>:<latency_us>:<target_residency_us>'. Treat "empty"
2551 * fields as unchanged. For example,
2552 * '<name>::<target_residency_us>' leaves the latency unchanged.
2553 */
2554 args = get_cmdline_field(args, &fields, ',');
2555
2556 /* name */
2557 fields = get_cmdline_field(fields, &name, ':');
2558 if (!fields)
2559 goto error;
2560
2561 if (!strcmp(name, "POLL")) {
2562 pr_err("Cannot adjust POLL\n");
2563 continue;
2564 }
2565
2566 /* Find the C-state by its name */
2567 state = NULL;
2568 for (i = 0; i < drv->state_count; i++) {
2569 if (!strcmp(name, drv->states[i].name)) {
2570 state = &cmdline_states[i];
2571 break;
2572 }
2573 }
2574
2575 if (!state) {
2576 pr_err("C-state '%s' was not found\n", name);
2577 continue;
2578 }
2579
2580 /* Latency */
2581 fields = get_cmdline_field(fields, &val, ':');
2582 if (!fields)
2583 goto error;
2584
2585 if (*val) {
2586 if (kstrtouint(val, 0, &state->exit_latency))
2587 goto error;
2588 }
2589
2590 /* Target residency */
2591 fields = get_cmdline_field(fields, &val, ':');
2592
2593 if (*val) {
2594 if (kstrtouint(val, 0, &state->target_residency))
2595 goto error;
2596 }
2597
2598 /*
2599 * Allow for 3 more fields, but ignore them. Helps to make
2600 * possible future extensions of the cmdline format backward
2601 * compatible.
2602 */
2603 for (i = 0; fields && i < 3; i++) {
2604 fields = get_cmdline_field(fields, &val, ':');
2605 if (!fields)
2606 break;
2607 }
2608
2609 if (fields) {
2610 pr_err("Too many fields for C-state '%s'\n", state->name);
2611 goto error;
2612 }
2613
2614 pr_info("C-state from cmdline: name=%s, latency=%u, residency=%u\n",
2615 state->name, state->exit_latency, state->target_residency);
2616 }
2617
2618 /* Validate the adjusted C-states, start with index 1 to skip POLL */
2619 for (i = 1; i < drv->state_count; i++) {
2620 struct cpuidle_state *prev_state;
2621
2622 state = &cmdline_states[i];
2623 prev_state = &cmdline_states[i - 1];
2624
2625 if (validate_cmdline_cstate(state, prev_state)) {
2626 pr_err("C-state '%s' validation failed\n", state->name);
2627 goto error;
2628 }
2629 }
2630
2631 /* Copy the adjusted C-states table back */
2632 for (i = 1; i < drv->state_count; i++)
2633 drv->states[i] = cmdline_states[i];
2634
2635 pr_info("Adjusted C-states with data from 'intel_idle.table'\n");
2636 return;
2637
2638 error:
2639 pr_info("Failed to adjust C-states with data from 'intel_idle.table'\n");
2640 }
2641
intel_idle_init(void)2642 static int __init intel_idle_init(void)
2643 {
2644 const struct x86_cpu_id *id;
2645 unsigned int eax, ebx, ecx;
2646 int retval;
2647
2648 /* Do not load intel_idle at all for now if idle= is passed */
2649 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
2650 return -ENODEV;
2651
2652 if (max_cstate == 0) {
2653 pr_debug("disabled\n");
2654 return -EPERM;
2655 }
2656
2657 id = x86_match_cpu(intel_idle_ids);
2658 if (id) {
2659 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
2660 pr_debug("Please enable MWAIT in BIOS SETUP\n");
2661 return -ENODEV;
2662 }
2663 } else {
2664 id = x86_match_cpu(intel_mwait_ids);
2665 if (!id)
2666 return -ENODEV;
2667 }
2668
2669 cpuid(CPUID_LEAF_MWAIT, &eax, &ebx, &ecx, &mwait_substates);
2670
2671 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
2672 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
2673 !mwait_substates)
2674 return -ENODEV;
2675
2676 pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
2677
2678 icpu = (const struct idle_cpu *)id->driver_data;
2679 if (icpu && ignore_native()) {
2680 pr_debug("ignoring native CPU idle states\n");
2681 icpu = NULL;
2682 }
2683 if (icpu) {
2684 if (icpu->state_table)
2685 cpuidle_state_table = icpu->state_table;
2686 else if (!intel_idle_acpi_cst_extract())
2687 return -ENODEV;
2688
2689 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
2690 if (icpu->disable_promotion_to_c1e)
2691 c1e_promotion = C1E_PROMOTION_DISABLE;
2692 if (icpu->c1_demotion_supported)
2693 c1_demotion_supported = true;
2694 if (icpu->use_acpi || force_use_acpi)
2695 intel_idle_acpi_cst_extract();
2696 } else if (!intel_idle_acpi_cst_extract()) {
2697 return -ENODEV;
2698 }
2699
2700 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2701 if (!intel_idle_cpuidle_devices)
2702 return -ENOMEM;
2703
2704 intel_idle_cpuidle_driver_init(&intel_idle_driver);
2705 cmdline_table_adjust(&intel_idle_driver);
2706
2707 retval = intel_idle_sysfs_init();
2708 if (retval)
2709 pr_warn("failed to initialized sysfs");
2710
2711 retval = cpuidle_register_driver(&intel_idle_driver);
2712 if (retval) {
2713 struct cpuidle_driver *drv = cpuidle_get_driver();
2714 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2715 drv ? drv->name : "none");
2716 goto init_driver_fail;
2717 }
2718
2719 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2720 intel_idle_cpu_online, NULL);
2721 if (retval < 0)
2722 goto hp_setup_fail;
2723
2724 pr_debug("Local APIC timer is reliable in %s\n",
2725 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2726
2727 arch_cpu_rescan_dead_smt_siblings();
2728
2729 return 0;
2730
2731 hp_setup_fail:
2732 intel_idle_cpuidle_devices_uninit();
2733 cpuidle_unregister_driver(&intel_idle_driver);
2734 init_driver_fail:
2735 intel_idle_sysfs_uninit();
2736 free_percpu(intel_idle_cpuidle_devices);
2737 return retval;
2738
2739 }
2740 subsys_initcall_sync(intel_idle_init);
2741
2742 /*
2743 * We are not really modular, but we used to support that. Meaning we also
2744 * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2745 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2746 * is the easiest way (currently) to continue doing that.
2747 */
2748 module_param(max_cstate, int, 0444);
2749 /*
2750 * The positions of the bits that are set in this number are the indices of the
2751 * idle states to be disabled by default (as reflected by the names of the
2752 * corresponding idle state directories in sysfs, "state0", "state1" ...
2753 * "state<i>" ..., where <i> is the index of the given state).
2754 */
2755 module_param_named(states_off, disabled_states_mask, uint, 0444);
2756 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2757 /*
2758 * Debugging option that forces the driver to enter all C-states with
2759 * interrupts enabled. Does not apply to C-states with
2760 * 'CPUIDLE_FLAG_INIT_XSTATE' and 'CPUIDLE_FLAG_IBRS' flags.
2761 */
2762 module_param(force_irq_on, bool, 0444);
2763 /*
2764 * Force the disabling of IBRS when X86_FEATURE_KERNEL_IBRS is on and
2765 * CPUIDLE_FLAG_IRQ_ENABLE isn't set.
2766 */
2767 module_param(ibrs_off, bool, 0444);
2768 MODULE_PARM_DESC(ibrs_off, "Disable IBRS when idle");
2769
2770 /*
2771 * Define the C-states table from a user input string. Expected format is
2772 * 'name:latency:residency', where:
2773 * - name: The C-state name.
2774 * - latency: The C-state exit latency in us.
2775 * - residency: The C-state target residency in us.
2776 *
2777 * Multiple C-states can be defined by separating them with commas:
2778 * 'name1:latency1:residency1,name2:latency2:residency2'
2779 *
2780 * Example: intel_idle.table=C1:1:1,C1E:5:10,C6:100:600
2781 *
2782 * To leave latency or residency unchanged, use an empty field, for example:
2783 * 'C1:1:1,C1E::10' - leaves C1E latency unchanged.
2784 */
2785 module_param_string(table, cmdline_table_str, MAX_CMDLINE_TABLE_LEN, 0444);
2786 MODULE_PARM_DESC(table, "Build the C-states table from a user input string");
2787