xref: /linux/arch/x86/kernel/apic/apic.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *	Local APIC handling, local APIC timers
4  *
5  *	(c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6  *
7  *	Fixes
8  *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
9  *					thanks to Eric Gilmore
10  *					and Rolf G. Tews
11  *					for testing these extensively.
12  *	Maciej W. Rozycki	:	Various updates and fixes.
13  *	Mikael Pettersson	:	Power Management for UP-APIC.
14  *	Pavel Machek and
15  *	Mikael Pettersson	:	PM converted to driver model.
16  */
17 
18 #include <linux/perf_event.h>
19 #include <linux/kernel_stat.h>
20 #include <linux/mc146818rtc.h>
21 #include <linux/acpi_pmtmr.h>
22 #include <linux/bitmap.h>
23 #include <linux/clockchips.h>
24 #include <linux/interrupt.h>
25 #include <linux/memblock.h>
26 #include <linux/ftrace.h>
27 #include <linux/ioport.h>
28 #include <linux/export.h>
29 #include <linux/syscore_ops.h>
30 #include <linux/delay.h>
31 #include <linux/timex.h>
32 #include <linux/i8253.h>
33 #include <linux/dmar.h>
34 #include <linux/init.h>
35 #include <linux/cpu.h>
36 #include <linux/dmi.h>
37 #include <linux/smp.h>
38 #include <linux/mm.h>
39 
40 #include <xen/xen.h>
41 
42 #include <asm/trace/irq_vectors.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/pc-conf-reg.h>
45 #include <asm/perf_event.h>
46 #include <asm/x86_init.h>
47 #include <linux/atomic.h>
48 #include <asm/barrier.h>
49 #include <asm/mpspec.h>
50 #include <asm/i8259.h>
51 #include <asm/proto.h>
52 #include <asm/traps.h>
53 #include <asm/apic.h>
54 #include <asm/acpi.h>
55 #include <asm/io_apic.h>
56 #include <asm/desc.h>
57 #include <asm/hpet.h>
58 #include <asm/mtrr.h>
59 #include <asm/time.h>
60 #include <asm/smp.h>
61 #include <asm/mce.h>
62 #include <asm/msr.h>
63 #include <asm/tsc.h>
64 #include <asm/hypervisor.h>
65 #include <asm/cpu_device_id.h>
66 #include <asm/intel-family.h>
67 #include <asm/irq_regs.h>
68 #include <asm/cpu.h>
69 
70 #include "local.h"
71 
72 /* Processor that is doing the boot up */
73 u32 boot_cpu_physical_apicid __ro_after_init = BAD_APICID;
74 EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
75 
76 u8 boot_cpu_apic_version __ro_after_init;
77 
78 /*
79  * This variable controls which CPUs receive external NMIs.  By default,
80  * external NMIs are delivered only to the BSP.
81  */
82 static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
83 
84 /*
85  * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID
86  */
87 static bool virt_ext_dest_id __ro_after_init;
88 
89 /* For parallel bootup. */
90 unsigned long apic_mmio_base __ro_after_init;
91 
apic_accessible(void)92 static inline bool apic_accessible(void)
93 {
94 	return x2apic_mode || apic_mmio_base;
95 }
96 
97 #ifdef CONFIG_X86_32
98 /* Local APIC was disabled by the BIOS and enabled by the kernel */
99 static int enabled_via_apicbase __ro_after_init;
100 
101 /*
102  * Handle interrupt mode configuration register (IMCR).
103  * This register controls whether the interrupt signals
104  * that reach the BSP come from the master PIC or from the
105  * local APIC. Before entering Symmetric I/O Mode, either
106  * the BIOS or the operating system must switch out of
107  * PIC Mode by changing the IMCR.
108  */
imcr_pic_to_apic(void)109 static inline void imcr_pic_to_apic(void)
110 {
111 	/* NMI and 8259 INTR go through APIC */
112 	pc_conf_set(PC_CONF_MPS_IMCR, 0x01);
113 }
114 
imcr_apic_to_pic(void)115 static inline void imcr_apic_to_pic(void)
116 {
117 	/* NMI and 8259 INTR go directly to BSP */
118 	pc_conf_set(PC_CONF_MPS_IMCR, 0x00);
119 }
120 #endif
121 
122 /*
123  * Knob to control our willingness to enable the local APIC.
124  *
125  * +1=force-enable
126  */
127 static int force_enable_local_apic __initdata;
128 
129 /*
130  * APIC command line parameters
131  */
parse_lapic(char * arg)132 static int __init parse_lapic(char *arg)
133 {
134 	if (IS_ENABLED(CONFIG_X86_32) && !arg)
135 		force_enable_local_apic = 1;
136 	else if (arg && !strncmp(arg, "notscdeadline", 13))
137 		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
138 	return 0;
139 }
140 early_param("lapic", parse_lapic);
141 
142 #ifdef CONFIG_X86_64
143 static int apic_calibrate_pmtmr __initdata;
setup_apicpmtimer(char * s)144 static __init int setup_apicpmtimer(char *s)
145 {
146 	apic_calibrate_pmtmr = 1;
147 	notsc_setup(NULL);
148 	return 1;
149 }
150 __setup("apicpmtimer", setup_apicpmtimer);
151 #endif
152 
153 static unsigned long mp_lapic_addr __ro_after_init;
154 bool apic_is_disabled __ro_after_init;
155 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
156 static int disable_apic_timer __initdata;
157 /* Local APIC timer works in C2 */
158 int local_apic_timer_c2_ok __ro_after_init;
159 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
160 
161 /*
162  * Debug level, exported for io_apic.c
163  */
164 int apic_verbosity __ro_after_init;
165 
166 int pic_mode __ro_after_init;
167 
168 /* Have we found an MP table */
169 int smp_found_config __ro_after_init;
170 
171 static struct resource lapic_resource = {
172 	.name = "Local APIC",
173 	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
174 };
175 
176 unsigned int lapic_timer_period = 0;
177 
178 static void apic_pm_activate(void);
179 
180 /*
181  * Get the LAPIC version
182  */
lapic_get_version(void)183 static inline int lapic_get_version(void)
184 {
185 	return GET_APIC_VERSION(apic_read(APIC_LVR));
186 }
187 
188 /*
189  * Check, if the APIC is integrated or a separate chip
190  */
lapic_is_integrated(void)191 static inline int lapic_is_integrated(void)
192 {
193 	return APIC_INTEGRATED(lapic_get_version());
194 }
195 
196 /*
197  * Check, whether this is a modern or a first generation APIC
198  */
modern_apic(void)199 static int modern_apic(void)
200 {
201 	/* AMD systems use old APIC versions, so check the CPU */
202 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
203 	    boot_cpu_data.x86 >= 0xf)
204 		return 1;
205 
206 	/* Hygon systems use modern APIC */
207 	if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
208 		return 1;
209 
210 	return lapic_get_version() >= 0x14;
211 }
212 
213 /*
214  * right after this call apic become NOOP driven
215  * so apic->write/read doesn't do anything
216  */
apic_disable(void)217 static void __init apic_disable(void)
218 {
219 	apic_install_driver(&apic_noop);
220 }
221 
native_apic_icr_write(u32 low,u32 id)222 void native_apic_icr_write(u32 low, u32 id)
223 {
224 	unsigned long flags;
225 
226 	local_irq_save(flags);
227 	apic_write(APIC_ICR2, SET_XAPIC_DEST_FIELD(id));
228 	apic_write(APIC_ICR, low);
229 	local_irq_restore(flags);
230 }
231 
native_apic_icr_read(void)232 u64 native_apic_icr_read(void)
233 {
234 	u32 icr1, icr2;
235 
236 	icr2 = apic_read(APIC_ICR2);
237 	icr1 = apic_read(APIC_ICR);
238 
239 	return icr1 | ((u64)icr2 << 32);
240 }
241 
242 /**
243  * lapic_get_maxlvt - get the maximum number of local vector table entries
244  */
lapic_get_maxlvt(void)245 int lapic_get_maxlvt(void)
246 {
247 	/*
248 	 * - we always have APIC integrated on 64bit mode
249 	 * - 82489DXs do not report # of LVT entries
250 	 */
251 	return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
252 }
253 
254 /*
255  * Local APIC timer
256  */
257 
258 /* Clock divisor */
259 #define APIC_DIVISOR 16
260 #define TSC_DIVISOR  8
261 
262 /* i82489DX specific */
263 #define		I82489DX_BASE_DIVIDER		(((0x2) << 18))
264 
265 /*
266  * This function sets up the local APIC timer, with a timeout of
267  * 'clocks' APIC bus clock. During calibration we actually call
268  * this function twice on the boot CPU, once with a bogus timeout
269  * value, second time for real. The other (noncalibrating) CPUs
270  * call this function only once, with the real, calibrated value.
271  *
272  * We do reads before writes even if unnecessary, to get around the
273  * P5 APIC double write bug.
274  */
__setup_APIC_LVTT(unsigned int clocks,int oneshot,int irqen)275 static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
276 {
277 	unsigned int lvtt_value, tmp_value;
278 
279 	lvtt_value = LOCAL_TIMER_VECTOR;
280 	if (!oneshot)
281 		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
282 	else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
283 		lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
284 
285 	/*
286 	 * The i82489DX APIC uses bit 18 and 19 for the base divider.  This
287 	 * overlaps with bit 18 on integrated APICs, but is not documented
288 	 * in the SDM. No problem though. i82489DX equipped systems do not
289 	 * have TSC deadline timer.
290 	 */
291 	if (!lapic_is_integrated())
292 		lvtt_value |= I82489DX_BASE_DIVIDER;
293 
294 	if (!irqen)
295 		lvtt_value |= APIC_LVT_MASKED;
296 
297 	apic_write(APIC_LVTT, lvtt_value);
298 
299 	if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
300 		/*
301 		 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
302 		 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
303 		 * According to Intel, MFENCE can do the serialization here.
304 		 */
305 		asm volatile("mfence" : : : "memory");
306 		return;
307 	}
308 
309 	/*
310 	 * Divide PICLK by 16
311 	 */
312 	tmp_value = apic_read(APIC_TDCR);
313 	apic_write(APIC_TDCR,
314 		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
315 		APIC_TDR_DIV_16);
316 
317 	if (!oneshot)
318 		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
319 }
320 
321 /*
322  * Setup extended LVT, AMD specific
323  *
324  * Software should use the LVT offsets the BIOS provides.  The offsets
325  * are determined by the subsystems using it like those for MCE
326  * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
327  * are supported. Beginning with family 10h at least 4 offsets are
328  * available.
329  *
330  * Since the offsets must be consistent for all cores, we keep track
331  * of the LVT offsets in software and reserve the offset for the same
332  * vector also to be used on other cores. An offset is freed by
333  * setting the entry to APIC_EILVT_MASKED.
334  *
335  * If the BIOS is right, there should be no conflicts. Otherwise a
336  * "[Firmware Bug]: ..." error message is generated. However, if
337  * software does not properly determines the offsets, it is not
338  * necessarily a BIOS bug.
339  */
340 
341 static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
342 
eilvt_entry_is_changeable(unsigned int old,unsigned int new)343 static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
344 {
345 	return (old & APIC_EILVT_MASKED)
346 		|| (new == APIC_EILVT_MASKED)
347 		|| ((new & ~APIC_EILVT_MASKED) == old);
348 }
349 
reserve_eilvt_offset(int offset,unsigned int new)350 static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
351 {
352 	unsigned int rsvd, vector;
353 
354 	if (offset >= APIC_EILVT_NR_MAX)
355 		return ~0;
356 
357 	rsvd = atomic_read(&eilvt_offsets[offset]);
358 	do {
359 		vector = rsvd & ~APIC_EILVT_MASKED;	/* 0: unassigned */
360 		if (vector && !eilvt_entry_is_changeable(vector, new))
361 			/* may not change if vectors are different */
362 			return rsvd;
363 	} while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
364 
365 	rsvd = new & ~APIC_EILVT_MASKED;
366 	if (rsvd && rsvd != vector)
367 		pr_info("LVT offset %d assigned for vector 0x%02x\n",
368 			offset, rsvd);
369 
370 	return new;
371 }
372 
373 /*
374  * If mask=1, the LVT entry does not generate interrupts while mask=0
375  * enables the vector. See also the BKDGs. Must be called with
376  * preemption disabled.
377  */
378 
setup_APIC_eilvt(u8 offset,u8 vector,u8 msg_type,u8 mask)379 int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
380 {
381 	unsigned long reg = APIC_EILVTn(offset);
382 	unsigned int new, old, reserved;
383 
384 	new = (mask << 16) | (msg_type << 8) | vector;
385 	old = apic_read(reg);
386 	reserved = reserve_eilvt_offset(offset, new);
387 
388 	if (reserved != new) {
389 		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
390 		       "vector 0x%x, but the register is already in use for "
391 		       "vector 0x%x on another cpu\n",
392 		       smp_processor_id(), reg, offset, new, reserved);
393 		return -EINVAL;
394 	}
395 
396 	if (!eilvt_entry_is_changeable(old, new)) {
397 		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
398 		       "vector 0x%x, but the register is already in use for "
399 		       "vector 0x%x on this cpu\n",
400 		       smp_processor_id(), reg, offset, new, old);
401 		return -EBUSY;
402 	}
403 
404 	apic_write(reg, new);
405 
406 	return 0;
407 }
408 EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
409 
410 /*
411  * Program the next event, relative to now
412  */
lapic_next_event(unsigned long delta,struct clock_event_device * evt)413 static int lapic_next_event(unsigned long delta,
414 			    struct clock_event_device *evt)
415 {
416 	apic_write(APIC_TMICT, delta);
417 	return 0;
418 }
419 
lapic_next_deadline(unsigned long delta,struct clock_event_device * evt)420 static int lapic_next_deadline(unsigned long delta,
421 			       struct clock_event_device *evt)
422 {
423 	u64 tsc;
424 
425 	/* This MSR is special and need a special fence: */
426 	weak_wrmsr_fence();
427 
428 	tsc = rdtsc();
429 	wrmsrq(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
430 	return 0;
431 }
432 
lapic_timer_shutdown(struct clock_event_device * evt)433 static int lapic_timer_shutdown(struct clock_event_device *evt)
434 {
435 	unsigned int v;
436 
437 	/* Lapic used as dummy for broadcast ? */
438 	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
439 		return 0;
440 
441 	v = apic_read(APIC_LVTT);
442 	v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
443 	apic_write(APIC_LVTT, v);
444 
445 	/*
446 	 * Setting APIC_LVT_MASKED (above) should be enough to tell
447 	 * the hardware that this timer will never fire. But AMD
448 	 * erratum 411 and some Intel CPU behavior circa 2024 say
449 	 * otherwise.  Time for belt and suspenders programming: mask
450 	 * the timer _and_ zero the counter registers:
451 	 */
452 	if (v & APIC_LVT_TIMER_TSCDEADLINE)
453 		wrmsrq(MSR_IA32_TSC_DEADLINE, 0);
454 	else
455 		apic_write(APIC_TMICT, 0);
456 
457 	return 0;
458 }
459 
460 static inline int
lapic_timer_set_periodic_oneshot(struct clock_event_device * evt,bool oneshot)461 lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
462 {
463 	/* Lapic used as dummy for broadcast ? */
464 	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
465 		return 0;
466 
467 	__setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
468 	return 0;
469 }
470 
lapic_timer_set_periodic(struct clock_event_device * evt)471 static int lapic_timer_set_periodic(struct clock_event_device *evt)
472 {
473 	return lapic_timer_set_periodic_oneshot(evt, false);
474 }
475 
lapic_timer_set_oneshot(struct clock_event_device * evt)476 static int lapic_timer_set_oneshot(struct clock_event_device *evt)
477 {
478 	return lapic_timer_set_periodic_oneshot(evt, true);
479 }
480 
481 /*
482  * Local APIC timer broadcast function
483  */
lapic_timer_broadcast(const struct cpumask * mask)484 static void lapic_timer_broadcast(const struct cpumask *mask)
485 {
486 #ifdef CONFIG_SMP
487 	__apic_send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
488 #endif
489 }
490 
491 
492 /*
493  * The local apic timer can be used for any function which is CPU local.
494  */
495 static struct clock_event_device lapic_clockevent = {
496 	.name				= "lapic",
497 	.features			= CLOCK_EVT_FEAT_PERIODIC |
498 					  CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
499 					  | CLOCK_EVT_FEAT_DUMMY,
500 	.shift				= 32,
501 	.set_state_shutdown		= lapic_timer_shutdown,
502 	.set_state_periodic		= lapic_timer_set_periodic,
503 	.set_state_oneshot		= lapic_timer_set_oneshot,
504 	.set_state_oneshot_stopped	= lapic_timer_shutdown,
505 	.set_next_event			= lapic_next_event,
506 	.broadcast			= lapic_timer_broadcast,
507 	.rating				= 100,
508 	.irq				= -1,
509 };
510 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
511 
512 static const struct x86_cpu_id deadline_match[] __initconst = {
513 	X86_MATCH_VFM_STEPS(INTEL_HASWELL_X,   0x2, 0x2, 0x3a), /* EP */
514 	X86_MATCH_VFM_STEPS(INTEL_HASWELL_X,   0x4, 0x4, 0x0f), /* EX */
515 
516 	X86_MATCH_VFM(INTEL_BROADWELL_X,	0x0b000020),
517 
518 	X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x2, 0x2, 0x00000011),
519 	X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x3, 0x3, 0x0700000e),
520 	X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x4, 0x4, 0x0f00000c),
521 	X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x5, 0x5, 0x0e000003),
522 
523 	X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X,   0x3, 0x3, 0x01000136),
524 	X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X,   0x4, 0x4, 0x02000014),
525 	X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X,   0x5, 0xf, 0),
526 
527 	X86_MATCH_VFM(INTEL_HASWELL,		0x22),
528 	X86_MATCH_VFM(INTEL_HASWELL_L,		0x20),
529 	X86_MATCH_VFM(INTEL_HASWELL_G,		0x17),
530 
531 	X86_MATCH_VFM(INTEL_BROADWELL,		0x25),
532 	X86_MATCH_VFM(INTEL_BROADWELL_G,	0x17),
533 
534 	X86_MATCH_VFM(INTEL_SKYLAKE_L,		0xb2),
535 	X86_MATCH_VFM(INTEL_SKYLAKE,		0xb2),
536 
537 	X86_MATCH_VFM(INTEL_KABYLAKE_L,		0x52),
538 	X86_MATCH_VFM(INTEL_KABYLAKE,		0x52),
539 
540 	{},
541 };
542 
apic_validate_deadline_timer(void)543 static __init bool apic_validate_deadline_timer(void)
544 {
545 	const struct x86_cpu_id *m;
546 	u32 rev;
547 
548 	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
549 		return false;
550 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
551 		return true;
552 
553 	m = x86_match_cpu(deadline_match);
554 	if (!m)
555 		return true;
556 
557 	rev = (u32)m->driver_data;
558 
559 	if (boot_cpu_data.microcode >= rev)
560 		return true;
561 
562 	setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
563 	pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
564 	       "please update microcode to version: 0x%x (or later)\n", rev);
565 	return false;
566 }
567 
568 /*
569  * Setup the local APIC timer for this CPU. Copy the initialized values
570  * of the boot CPU and register the clock event in the framework.
571  */
setup_APIC_timer(void)572 static void setup_APIC_timer(void)
573 {
574 	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
575 
576 	if (this_cpu_has(X86_FEATURE_ARAT)) {
577 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
578 		/* Make LAPIC timer preferable over percpu HPET */
579 		lapic_clockevent.rating = 150;
580 	}
581 
582 	memcpy(levt, &lapic_clockevent, sizeof(*levt));
583 	levt->cpumask = cpumask_of(smp_processor_id());
584 
585 	if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
586 		levt->name = "lapic-deadline";
587 		levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
588 				    CLOCK_EVT_FEAT_DUMMY);
589 		levt->set_next_event = lapic_next_deadline;
590 		clockevents_config_and_register(levt,
591 						tsc_khz * (1000 / TSC_DIVISOR),
592 						0xF, ~0UL);
593 	} else
594 		clockevents_register_device(levt);
595 }
596 
597 /*
598  * Install the updated TSC frequency from recalibration at the TSC
599  * deadline clockevent devices.
600  */
__lapic_update_tsc_freq(void * info)601 static void __lapic_update_tsc_freq(void *info)
602 {
603 	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
604 
605 	if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
606 		return;
607 
608 	clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
609 }
610 
lapic_update_tsc_freq(void)611 void lapic_update_tsc_freq(void)
612 {
613 	/*
614 	 * The clockevent device's ->mult and ->shift can both be
615 	 * changed. In order to avoid races, schedule the frequency
616 	 * update code on each CPU.
617 	 */
618 	on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
619 }
620 
621 /*
622  * In this functions we calibrate APIC bus clocks to the external timer.
623  *
624  * We want to do the calibration only once since we want to have local timer
625  * irqs synchronous. CPUs connected by the same APIC bus have the very same bus
626  * frequency.
627  *
628  * This was previously done by reading the PIT/HPET and waiting for a wrap
629  * around to find out, that a tick has elapsed. I have a box, where the PIT
630  * readout is broken, so it never gets out of the wait loop again. This was
631  * also reported by others.
632  *
633  * Monitoring the jiffies value is inaccurate and the clockevents
634  * infrastructure allows us to do a simple substitution of the interrupt
635  * handler.
636  *
637  * The calibration routine also uses the pm_timer when possible, as the PIT
638  * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
639  * back to normal later in the boot process).
640  */
641 
642 #define LAPIC_CAL_LOOPS		(HZ/10)
643 
644 static __initdata int lapic_cal_loops = -1;
645 static __initdata long lapic_cal_t1, lapic_cal_t2;
646 static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
647 static __initdata u32 lapic_cal_pm1, lapic_cal_pm2;
648 static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
649 
650 /*
651  * Temporary interrupt handler and polled calibration function.
652  */
lapic_cal_handler(struct clock_event_device * dev)653 static void __init lapic_cal_handler(struct clock_event_device *dev)
654 {
655 	unsigned long long tsc = 0;
656 	long tapic = apic_read(APIC_TMCCT);
657 	u32 pm = acpi_pm_read_early();
658 
659 	if (boot_cpu_has(X86_FEATURE_TSC))
660 		tsc = rdtsc();
661 
662 	switch (lapic_cal_loops++) {
663 	case 0:
664 		lapic_cal_t1 = tapic;
665 		lapic_cal_tsc1 = tsc;
666 		lapic_cal_pm1 = pm;
667 		lapic_cal_j1 = jiffies;
668 		break;
669 
670 	case LAPIC_CAL_LOOPS:
671 		lapic_cal_t2 = tapic;
672 		lapic_cal_tsc2 = tsc;
673 		if (pm < lapic_cal_pm1)
674 			pm += ACPI_PM_OVRRUN;
675 		lapic_cal_pm2 = pm;
676 		lapic_cal_j2 = jiffies;
677 		break;
678 	}
679 }
680 
681 static int __init
calibrate_by_pmtimer(u32 deltapm,long * delta,long * deltatsc)682 calibrate_by_pmtimer(u32 deltapm, long *delta, long *deltatsc)
683 {
684 	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
685 	const long pm_thresh = pm_100ms / 100;
686 	unsigned long mult;
687 	u64 res;
688 
689 #ifndef CONFIG_X86_PM_TIMER
690 	return -1;
691 #endif
692 
693 	apic_pr_verbose("... PM-Timer delta = %u\n", deltapm);
694 
695 	/* Check, if the PM timer is available */
696 	if (!deltapm)
697 		return -1;
698 
699 	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
700 
701 	if (deltapm > (pm_100ms - pm_thresh) &&
702 	    deltapm < (pm_100ms + pm_thresh)) {
703 		apic_pr_verbose("... PM-Timer result ok\n");
704 		return 0;
705 	}
706 
707 	res = (((u64)deltapm) *  mult) >> 22;
708 	do_div(res, 1000000);
709 	pr_warn("APIC calibration not consistent with PM-Timer: %ldms instead of 100ms\n",
710 		(long)res);
711 
712 	/* Correct the lapic counter value */
713 	res = (((u64)(*delta)) * pm_100ms);
714 	do_div(res, deltapm);
715 	pr_info("APIC delta adjusted to PM-Timer: "
716 		"%lu (%ld)\n", (unsigned long)res, *delta);
717 	*delta = (long)res;
718 
719 	/* Correct the tsc counter value */
720 	if (boot_cpu_has(X86_FEATURE_TSC)) {
721 		res = (((u64)(*deltatsc)) * pm_100ms);
722 		do_div(res, deltapm);
723 		apic_pr_verbose("TSC delta adjusted to PM-Timer: %lu (%ld)\n",
724 				(unsigned long)res, *deltatsc);
725 		*deltatsc = (long)res;
726 	}
727 
728 	return 0;
729 }
730 
lapic_init_clockevent(void)731 static int __init lapic_init_clockevent(void)
732 {
733 	if (!lapic_timer_period)
734 		return -1;
735 
736 	/* Calculate the scaled math multiplication factor */
737 	lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
738 					TICK_NSEC, lapic_clockevent.shift);
739 	lapic_clockevent.max_delta_ns =
740 		clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
741 	lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
742 	lapic_clockevent.min_delta_ns =
743 		clockevent_delta2ns(0xF, &lapic_clockevent);
744 	lapic_clockevent.min_delta_ticks = 0xF;
745 
746 	return 0;
747 }
748 
apic_needs_pit(void)749 bool __init apic_needs_pit(void)
750 {
751 	/*
752 	 * If the frequencies are not known, PIT is required for both TSC
753 	 * and apic timer calibration.
754 	 */
755 	if (!tsc_khz || !cpu_khz)
756 		return true;
757 
758 	/* Is there an APIC at all or is it disabled? */
759 	if (!boot_cpu_has(X86_FEATURE_APIC) || apic_is_disabled)
760 		return true;
761 
762 	/*
763 	 * If interrupt delivery mode is legacy PIC or virtual wire without
764 	 * configuration, the local APIC timer won't be set up. Make sure
765 	 * that the PIT is initialized.
766 	 */
767 	if (apic_intr_mode == APIC_PIC ||
768 	    apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
769 		return true;
770 
771 	/* Virt guests may lack ARAT, but still have DEADLINE */
772 	if (!boot_cpu_has(X86_FEATURE_ARAT))
773 		return true;
774 
775 	/* Deadline timer is based on TSC so no further PIT action required */
776 	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
777 		return false;
778 
779 	/* APIC timer disabled? */
780 	if (disable_apic_timer)
781 		return true;
782 	/*
783 	 * The APIC timer frequency is known already, no PIT calibration
784 	 * required. If unknown, let the PIT be initialized.
785 	 */
786 	return lapic_timer_period == 0;
787 }
788 
calibrate_APIC_clock(void)789 static int __init calibrate_APIC_clock(void)
790 {
791 	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
792 	u64 tsc_perj = 0, tsc_start = 0;
793 	unsigned long jif_start;
794 	unsigned long deltaj;
795 	long delta, deltatsc;
796 	int pm_referenced = 0;
797 
798 	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
799 		return 0;
800 
801 	/*
802 	 * Check if lapic timer has already been calibrated by platform
803 	 * specific routine, such as tsc calibration code. If so just fill
804 	 * in the clockevent structure and return.
805 	 */
806 	if (!lapic_init_clockevent()) {
807 		apic_pr_verbose("lapic timer already calibrated %d\n", lapic_timer_period);
808 		/*
809 		 * Direct calibration methods must have an always running
810 		 * local APIC timer, no need for broadcast timer.
811 		 */
812 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
813 		return 0;
814 	}
815 
816 	apic_pr_verbose("Using local APIC timer interrupts. Calibrating APIC timer ...\n");
817 
818 	/*
819 	 * There are platforms w/o global clockevent devices. Instead of
820 	 * making the calibration conditional on that, use a polling based
821 	 * approach everywhere.
822 	 */
823 	local_irq_disable();
824 
825 	/*
826 	 * Setup the APIC counter to maximum. There is no way the lapic
827 	 * can underflow in the 100ms detection time frame
828 	 */
829 	__setup_APIC_LVTT(0xffffffff, 0, 0);
830 
831 	/*
832 	 * Methods to terminate the calibration loop:
833 	 *  1) Global clockevent if available (jiffies)
834 	 *  2) TSC if available and frequency is known
835 	 */
836 	jif_start = READ_ONCE(jiffies);
837 
838 	if (tsc_khz) {
839 		tsc_start = rdtsc();
840 		tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
841 	}
842 
843 	/*
844 	 * Enable interrupts so the tick can fire, if a global
845 	 * clockevent device is available
846 	 */
847 	local_irq_enable();
848 
849 	while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
850 		/* Wait for a tick to elapse */
851 		while (1) {
852 			if (tsc_khz) {
853 				u64 tsc_now = rdtsc();
854 				if ((tsc_now - tsc_start) >= tsc_perj) {
855 					tsc_start += tsc_perj;
856 					break;
857 				}
858 			} else {
859 				unsigned long jif_now = READ_ONCE(jiffies);
860 
861 				if (time_after(jif_now, jif_start)) {
862 					jif_start = jif_now;
863 					break;
864 				}
865 			}
866 			cpu_relax();
867 		}
868 
869 		/* Invoke the calibration routine */
870 		local_irq_disable();
871 		lapic_cal_handler(NULL);
872 		local_irq_enable();
873 	}
874 
875 	local_irq_disable();
876 
877 	/* Build delta t1-t2 as apic timer counts down */
878 	delta = lapic_cal_t1 - lapic_cal_t2;
879 	apic_pr_verbose("... lapic delta = %ld\n", delta);
880 
881 	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
882 
883 	/* we trust the PM based calibration if possible */
884 	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
885 					&delta, &deltatsc);
886 
887 	lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
888 	lapic_init_clockevent();
889 
890 	apic_pr_verbose("..... delta %ld\n", delta);
891 	apic_pr_verbose("..... mult: %u\n", lapic_clockevent.mult);
892 	apic_pr_verbose("..... calibration result: %u\n", lapic_timer_period);
893 
894 	if (boot_cpu_has(X86_FEATURE_TSC)) {
895 		apic_pr_verbose("..... CPU clock speed is %ld.%04ld MHz.\n",
896 				(deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
897 				(deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
898 	}
899 
900 	apic_pr_verbose("..... host bus clock speed is %u.%04u MHz.\n",
901 			lapic_timer_period / (1000000 / HZ),
902 			lapic_timer_period % (1000000 / HZ));
903 
904 	/*
905 	 * Do a sanity check on the APIC calibration result
906 	 */
907 	if (lapic_timer_period < (1000000 / HZ)) {
908 		local_irq_enable();
909 		pr_warn("APIC frequency too slow, disabling apic timer\n");
910 		return -1;
911 	}
912 
913 	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
914 
915 	/*
916 	 * PM timer calibration failed or not turned on so lets try APIC
917 	 * timer based calibration, if a global clockevent device is
918 	 * available.
919 	 */
920 	if (!pm_referenced && global_clock_event) {
921 		apic_pr_verbose("... verify APIC timer\n");
922 
923 		/*
924 		 * Setup the apic timer manually
925 		 */
926 		levt->event_handler = lapic_cal_handler;
927 		lapic_timer_set_periodic(levt);
928 		lapic_cal_loops = -1;
929 
930 		/* Let the interrupts run */
931 		local_irq_enable();
932 
933 		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
934 			cpu_relax();
935 
936 		/* Stop the lapic timer */
937 		local_irq_disable();
938 		lapic_timer_shutdown(levt);
939 
940 		/* Jiffies delta */
941 		deltaj = lapic_cal_j2 - lapic_cal_j1;
942 		apic_pr_verbose("... jiffies delta = %lu\n", deltaj);
943 
944 		/* Check, if the jiffies result is consistent */
945 		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
946 			apic_pr_verbose("... jiffies result ok\n");
947 		else
948 			levt->features |= CLOCK_EVT_FEAT_DUMMY;
949 	}
950 	local_irq_enable();
951 
952 	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
953 		pr_warn("APIC timer disabled due to verification failure\n");
954 		return -1;
955 	}
956 
957 	return 0;
958 }
959 
960 /*
961  * Setup the boot APIC
962  *
963  * Calibrate and verify the result.
964  */
setup_boot_APIC_clock(void)965 void __init setup_boot_APIC_clock(void)
966 {
967 	/*
968 	 * The local apic timer can be disabled via the kernel
969 	 * commandline or from the CPU detection code. Register the lapic
970 	 * timer as a dummy clock event source on SMP systems, so the
971 	 * broadcast mechanism is used. On UP systems simply ignore it.
972 	 */
973 	if (disable_apic_timer) {
974 		pr_info("Disabling APIC timer\n");
975 		/* No broadcast on UP ! */
976 		if (num_possible_cpus() > 1) {
977 			lapic_clockevent.mult = 1;
978 			setup_APIC_timer();
979 		}
980 		return;
981 	}
982 
983 	if (calibrate_APIC_clock()) {
984 		/* No broadcast on UP ! */
985 		if (num_possible_cpus() > 1)
986 			setup_APIC_timer();
987 		return;
988 	}
989 
990 	/*
991 	 * If nmi_watchdog is set to IO_APIC, we need the
992 	 * PIT/HPET going.  Otherwise register lapic as a dummy
993 	 * device.
994 	 */
995 	lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
996 
997 	/* Setup the lapic or request the broadcast */
998 	setup_APIC_timer();
999 	amd_e400_c1e_apic_setup();
1000 }
1001 
setup_secondary_APIC_clock(void)1002 void setup_secondary_APIC_clock(void)
1003 {
1004 	setup_APIC_timer();
1005 	amd_e400_c1e_apic_setup();
1006 }
1007 
1008 /*
1009  * The guts of the apic timer interrupt
1010  */
local_apic_timer_interrupt(void)1011 static void local_apic_timer_interrupt(void)
1012 {
1013 	struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
1014 
1015 	/*
1016 	 * Normally we should not be here till LAPIC has been initialized but
1017 	 * in some cases like kdump, its possible that there is a pending LAPIC
1018 	 * timer interrupt from previous kernel's context and is delivered in
1019 	 * new kernel the moment interrupts are enabled.
1020 	 *
1021 	 * Interrupts are enabled early and LAPIC is setup much later, hence
1022 	 * its possible that when we get here evt->event_handler is NULL.
1023 	 * Check for event_handler being NULL and discard the interrupt as
1024 	 * spurious.
1025 	 */
1026 	if (!evt->event_handler) {
1027 		pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
1028 			smp_processor_id());
1029 		/* Switch it off */
1030 		lapic_timer_shutdown(evt);
1031 		return;
1032 	}
1033 
1034 	/*
1035 	 * the NMI deadlock-detector uses this.
1036 	 */
1037 	inc_irq_stat(apic_timer_irqs);
1038 
1039 	evt->event_handler(evt);
1040 }
1041 
1042 /*
1043  * Local APIC timer interrupt. This is the most natural way for doing
1044  * local interrupts, but local timer interrupts can be emulated by
1045  * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1046  *
1047  * [ if a single-CPU system runs an SMP kernel then we call the local
1048  *   interrupt as well. Thus we cannot inline the local irq ... ]
1049  */
DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)1050 DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)
1051 {
1052 	struct pt_regs *old_regs = set_irq_regs(regs);
1053 
1054 	apic_eoi();
1055 	trace_local_timer_entry(LOCAL_TIMER_VECTOR);
1056 	local_apic_timer_interrupt();
1057 	trace_local_timer_exit(LOCAL_TIMER_VECTOR);
1058 
1059 	set_irq_regs(old_regs);
1060 }
1061 
1062 /*
1063  * Local APIC start and shutdown
1064  */
1065 
1066 /**
1067  * clear_local_APIC - shutdown the local APIC
1068  *
1069  * This is called, when a CPU is disabled and before rebooting, so the state of
1070  * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
1071  * leftovers during boot.
1072  */
clear_local_APIC(void)1073 void clear_local_APIC(void)
1074 {
1075 	int maxlvt;
1076 	u32 v;
1077 
1078 	if (!apic_accessible())
1079 		return;
1080 
1081 	maxlvt = lapic_get_maxlvt();
1082 	/*
1083 	 * Masking an LVT entry can trigger a local APIC error
1084 	 * if the vector is zero. Mask LVTERR first to prevent this.
1085 	 */
1086 	if (maxlvt >= 3) {
1087 		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
1088 		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
1089 	}
1090 	/*
1091 	 * Careful: we have to set masks only first to deassert
1092 	 * any level-triggered sources.
1093 	 */
1094 	v = apic_read(APIC_LVTT);
1095 	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1096 	v = apic_read(APIC_LVT0);
1097 	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1098 	v = apic_read(APIC_LVT1);
1099 	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
1100 	if (maxlvt >= 4) {
1101 		v = apic_read(APIC_LVTPC);
1102 		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
1103 	}
1104 
1105 	/* lets not touch this if we didn't frob it */
1106 #ifdef CONFIG_X86_THERMAL_VECTOR
1107 	if (maxlvt >= 5) {
1108 		v = apic_read(APIC_LVTTHMR);
1109 		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
1110 	}
1111 #endif
1112 #ifdef CONFIG_X86_MCE_INTEL
1113 	if (maxlvt >= 6) {
1114 		v = apic_read(APIC_LVTCMCI);
1115 		if (!(v & APIC_LVT_MASKED))
1116 			apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
1117 	}
1118 #endif
1119 
1120 	/*
1121 	 * Clean APIC state for other OSs:
1122 	 */
1123 	apic_write(APIC_LVTT, APIC_LVT_MASKED);
1124 	apic_write(APIC_LVT0, APIC_LVT_MASKED);
1125 	apic_write(APIC_LVT1, APIC_LVT_MASKED);
1126 	if (maxlvt >= 3)
1127 		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
1128 	if (maxlvt >= 4)
1129 		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
1130 
1131 	/* Integrated APIC (!82489DX) ? */
1132 	if (lapic_is_integrated()) {
1133 		if (maxlvt > 3)
1134 			/* Clear ESR due to Pentium errata 3AP and 11AP */
1135 			apic_write(APIC_ESR, 0);
1136 		apic_read(APIC_ESR);
1137 	}
1138 }
1139 
1140 /**
1141  * apic_soft_disable - Clears and software disables the local APIC on hotplug
1142  *
1143  * Contrary to disable_local_APIC() this does not touch the enable bit in
1144  * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
1145  * bus would require a hardware reset as the APIC would lose track of bus
1146  * arbitration. On systems with FSB delivery APICBASE could be disabled,
1147  * but it has to be guaranteed that no interrupt is sent to the APIC while
1148  * in that state and it's not clear from the SDM whether it still responds
1149  * to INIT/SIPI messages. Stay on the safe side and use software disable.
1150  */
apic_soft_disable(void)1151 void apic_soft_disable(void)
1152 {
1153 	u32 value;
1154 
1155 	clear_local_APIC();
1156 
1157 	/* Soft disable APIC (implies clearing of registers for 82489DX!). */
1158 	value = apic_read(APIC_SPIV);
1159 	value &= ~APIC_SPIV_APIC_ENABLED;
1160 	apic_write(APIC_SPIV, value);
1161 }
1162 
1163 /**
1164  * disable_local_APIC - clear and disable the local APIC
1165  */
disable_local_APIC(void)1166 void disable_local_APIC(void)
1167 {
1168 	if (!apic_accessible())
1169 		return;
1170 
1171 	apic_soft_disable();
1172 
1173 #ifdef CONFIG_X86_32
1174 	/*
1175 	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
1176 	 * restore the disabled state.
1177 	 */
1178 	if (enabled_via_apicbase) {
1179 		unsigned int l, h;
1180 
1181 		rdmsr(MSR_IA32_APICBASE, l, h);
1182 		l &= ~MSR_IA32_APICBASE_ENABLE;
1183 		wrmsr(MSR_IA32_APICBASE, l, h);
1184 	}
1185 #endif
1186 }
1187 
1188 /*
1189  * If Linux enabled the LAPIC against the BIOS default disable it down before
1190  * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
1191  * not power-off.  Additionally clear all LVT entries before disable_local_APIC
1192  * for the case where Linux didn't enable the LAPIC.
1193  */
lapic_shutdown(void)1194 void lapic_shutdown(void)
1195 {
1196 	unsigned long flags;
1197 
1198 	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
1199 		return;
1200 
1201 	local_irq_save(flags);
1202 
1203 #ifdef CONFIG_X86_32
1204 	if (!enabled_via_apicbase)
1205 		clear_local_APIC();
1206 	else
1207 #endif
1208 		disable_local_APIC();
1209 
1210 
1211 	local_irq_restore(flags);
1212 }
1213 
1214 /**
1215  * sync_Arb_IDs - synchronize APIC bus arbitration IDs
1216  */
sync_Arb_IDs(void)1217 void __init sync_Arb_IDs(void)
1218 {
1219 	/*
1220 	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
1221 	 * needed on AMD.
1222 	 */
1223 	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1224 		return;
1225 
1226 	/*
1227 	 * Wait for idle.
1228 	 */
1229 	apic_wait_icr_idle();
1230 
1231 	apic_pr_debug("Synchronizing Arb IDs.\n");
1232 	apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
1233 }
1234 
1235 enum apic_intr_mode_id apic_intr_mode __ro_after_init;
1236 
__apic_intr_mode_select(void)1237 static int __init __apic_intr_mode_select(void)
1238 {
1239 	/* Check kernel option */
1240 	if (apic_is_disabled) {
1241 		pr_info("APIC disabled via kernel command line\n");
1242 		return APIC_PIC;
1243 	}
1244 
1245 	/* Check BIOS */
1246 #ifdef CONFIG_X86_64
1247 	/* On 64-bit, the APIC must be integrated, Check local APIC only */
1248 	if (!boot_cpu_has(X86_FEATURE_APIC)) {
1249 		apic_is_disabled = true;
1250 		pr_info("APIC disabled by BIOS\n");
1251 		return APIC_PIC;
1252 	}
1253 #else
1254 	/* On 32-bit, the APIC may be integrated APIC or 82489DX */
1255 
1256 	/* Neither 82489DX nor integrated APIC ? */
1257 	if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
1258 		apic_is_disabled = true;
1259 		return APIC_PIC;
1260 	}
1261 
1262 	/* If the BIOS pretends there is an integrated APIC ? */
1263 	if (!boot_cpu_has(X86_FEATURE_APIC) &&
1264 		APIC_INTEGRATED(boot_cpu_apic_version)) {
1265 		apic_is_disabled = true;
1266 		pr_err(FW_BUG "Local APIC not detected, force emulation\n");
1267 		return APIC_PIC;
1268 	}
1269 #endif
1270 
1271 	/* Check MP table or ACPI MADT configuration */
1272 	if (!smp_found_config) {
1273 		disable_ioapic_support();
1274 		if (!acpi_lapic) {
1275 			pr_info("APIC: ACPI MADT or MP tables are not detected\n");
1276 			return APIC_VIRTUAL_WIRE_NO_CONFIG;
1277 		}
1278 		return APIC_VIRTUAL_WIRE;
1279 	}
1280 
1281 #ifdef CONFIG_SMP
1282 	/* If SMP should be disabled, then really disable it! */
1283 	if (!setup_max_cpus) {
1284 		pr_info("APIC: SMP mode deactivated\n");
1285 		return APIC_SYMMETRIC_IO_NO_ROUTING;
1286 	}
1287 #endif
1288 
1289 	return APIC_SYMMETRIC_IO;
1290 }
1291 
1292 /* Select the interrupt delivery mode for the BSP */
apic_intr_mode_select(void)1293 void __init apic_intr_mode_select(void)
1294 {
1295 	apic_intr_mode = __apic_intr_mode_select();
1296 }
1297 
1298 /*
1299  * An initial setup of the virtual wire mode.
1300  */
init_bsp_APIC(void)1301 void __init init_bsp_APIC(void)
1302 {
1303 	unsigned int value;
1304 
1305 	/*
1306 	 * Don't do the setup now if we have a SMP BIOS as the
1307 	 * through-I/O-APIC virtual wire mode might be active.
1308 	 */
1309 	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
1310 		return;
1311 
1312 	/*
1313 	 * Do not trust the local APIC being empty at bootup.
1314 	 */
1315 	clear_local_APIC();
1316 
1317 	/*
1318 	 * Enable APIC.
1319 	 */
1320 	value = apic_read(APIC_SPIV);
1321 	value &= ~APIC_VECTOR_MASK;
1322 	value |= APIC_SPIV_APIC_ENABLED;
1323 
1324 #ifdef CONFIG_X86_32
1325 	/* This bit is reserved on P4/Xeon and should be cleared */
1326 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1327 	    (boot_cpu_data.x86 == 15))
1328 		value &= ~APIC_SPIV_FOCUS_DISABLED;
1329 	else
1330 #endif
1331 		value |= APIC_SPIV_FOCUS_DISABLED;
1332 	value |= SPURIOUS_APIC_VECTOR;
1333 	apic_write(APIC_SPIV, value);
1334 
1335 	/*
1336 	 * Set up the virtual wire mode.
1337 	 */
1338 	apic_write(APIC_LVT0, APIC_DM_EXTINT);
1339 	value = APIC_DM_NMI;
1340 	if (!lapic_is_integrated())		/* 82489DX */
1341 		value |= APIC_LVT_LEVEL_TRIGGER;
1342 	if (apic_extnmi == APIC_EXTNMI_NONE)
1343 		value |= APIC_LVT_MASKED;
1344 	apic_write(APIC_LVT1, value);
1345 }
1346 
1347 static void __init apic_bsp_setup(bool upmode);
1348 
1349 /* Init the interrupt delivery mode for the BSP */
apic_intr_mode_init(void)1350 void __init apic_intr_mode_init(void)
1351 {
1352 	bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
1353 
1354 	switch (apic_intr_mode) {
1355 	case APIC_PIC:
1356 		pr_info("APIC: Keep in PIC mode(8259)\n");
1357 		return;
1358 	case APIC_VIRTUAL_WIRE:
1359 		pr_info("APIC: Switch to virtual wire mode setup\n");
1360 		break;
1361 	case APIC_VIRTUAL_WIRE_NO_CONFIG:
1362 		pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
1363 		upmode = true;
1364 		break;
1365 	case APIC_SYMMETRIC_IO:
1366 		pr_info("APIC: Switch to symmetric I/O mode setup\n");
1367 		break;
1368 	case APIC_SYMMETRIC_IO_NO_ROUTING:
1369 		pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
1370 		break;
1371 	}
1372 
1373 	x86_64_probe_apic();
1374 
1375 	if (x86_platform.apic_post_init)
1376 		x86_platform.apic_post_init();
1377 
1378 	apic_bsp_setup(upmode);
1379 }
1380 
lapic_setup_esr(void)1381 static void lapic_setup_esr(void)
1382 {
1383 	unsigned int oldvalue, value, maxlvt;
1384 
1385 	if (!lapic_is_integrated()) {
1386 		pr_info("No ESR for 82489DX.\n");
1387 		return;
1388 	}
1389 
1390 	if (apic->disable_esr) {
1391 		/*
1392 		 * Something untraceable is creating bad interrupts on
1393 		 * secondary quads ... for the moment, just leave the
1394 		 * ESR disabled - we can't do anything useful with the
1395 		 * errors anyway - mbligh
1396 		 */
1397 		pr_info("Leaving ESR disabled.\n");
1398 		return;
1399 	}
1400 
1401 	maxlvt = lapic_get_maxlvt();
1402 	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
1403 		apic_write(APIC_ESR, 0);
1404 	oldvalue = apic_read(APIC_ESR);
1405 
1406 	/* enables sending errors */
1407 	value = ERROR_APIC_VECTOR;
1408 	apic_write(APIC_LVTERR, value);
1409 
1410 	/*
1411 	 * spec says clear errors after enabling vector.
1412 	 */
1413 	if (maxlvt > 3)
1414 		apic_write(APIC_ESR, 0);
1415 	value = apic_read(APIC_ESR);
1416 	if (value != oldvalue) {
1417 		apic_pr_verbose("ESR value before enabling vector: 0x%08x  after: 0x%08x\n",
1418 				oldvalue, value);
1419 	}
1420 }
1421 
1422 #define APIC_IR_REGS		APIC_ISR_NR
1423 #define APIC_IR_BITS		(APIC_IR_REGS * 32)
1424 #define APIC_IR_MAPSIZE		(APIC_IR_BITS / BITS_PER_LONG)
1425 
1426 union apic_ir {
1427 	unsigned long	map[APIC_IR_MAPSIZE];
1428 	u32		regs[APIC_IR_REGS];
1429 };
1430 
apic_check_and_ack(union apic_ir * irr,union apic_ir * isr)1431 static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
1432 {
1433 	int i, bit;
1434 
1435 	/* Read the IRRs */
1436 	for (i = 0; i < APIC_IR_REGS; i++)
1437 		irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
1438 
1439 	/* Read the ISRs */
1440 	for (i = 0; i < APIC_IR_REGS; i++)
1441 		isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
1442 
1443 	/*
1444 	 * If the ISR map is not empty. ACK the APIC and run another round
1445 	 * to verify whether a pending IRR has been unblocked and turned
1446 	 * into a ISR.
1447 	 */
1448 	if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
1449 		/*
1450 		 * There can be multiple ISR bits set when a high priority
1451 		 * interrupt preempted a lower priority one. Issue an ACK
1452 		 * per set bit.
1453 		 */
1454 		for_each_set_bit(bit, isr->map, APIC_IR_BITS)
1455 			apic_eoi();
1456 		return true;
1457 	}
1458 
1459 	return !bitmap_empty(irr->map, APIC_IR_BITS);
1460 }
1461 
1462 /*
1463  * After a crash, we no longer service the interrupts and a pending
1464  * interrupt from previous kernel might still have ISR bit set.
1465  *
1466  * Most probably by now the CPU has serviced that pending interrupt and it
1467  * might not have done the apic_eoi() because it thought, interrupt
1468  * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
1469  * the ISR bit and cpu thinks it has already serviced the interrupt. Hence
1470  * a vector might get locked. It was noticed for timer irq (vector
1471  * 0x31). Issue an extra EOI to clear ISR.
1472  *
1473  * If there are pending IRR bits they turn into ISR bits after a higher
1474  * priority ISR bit has been acked.
1475  */
apic_pending_intr_clear(void)1476 static void apic_pending_intr_clear(void)
1477 {
1478 	union apic_ir irr, isr;
1479 	unsigned int i;
1480 
1481 	/* 512 loops are way oversized and give the APIC a chance to obey. */
1482 	for (i = 0; i < 512; i++) {
1483 		if (!apic_check_and_ack(&irr, &isr))
1484 			return;
1485 	}
1486 	/* Dump the IRR/ISR content if that failed */
1487 	pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
1488 }
1489 
1490 /**
1491  * setup_local_APIC - setup the local APIC
1492  *
1493  * Used to setup local APIC while initializing BSP or bringing up APs.
1494  * Always called with preemption disabled.
1495  */
setup_local_APIC(void)1496 static void setup_local_APIC(void)
1497 {
1498 	int cpu = smp_processor_id();
1499 	unsigned int value;
1500 
1501 	if (apic_is_disabled) {
1502 		disable_ioapic_support();
1503 		return;
1504 	}
1505 
1506 	/*
1507 	 * If this comes from kexec/kcrash the APIC might be enabled in
1508 	 * SPIV. Soft disable it before doing further initialization.
1509 	 */
1510 	value = apic_read(APIC_SPIV);
1511 	value &= ~APIC_SPIV_APIC_ENABLED;
1512 	apic_write(APIC_SPIV, value);
1513 
1514 #ifdef CONFIG_X86_32
1515 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
1516 	if (lapic_is_integrated() && apic->disable_esr) {
1517 		apic_write(APIC_ESR, 0);
1518 		apic_write(APIC_ESR, 0);
1519 		apic_write(APIC_ESR, 0);
1520 		apic_write(APIC_ESR, 0);
1521 	}
1522 #endif
1523 	/*
1524 	 * Intel recommends to set DFR, LDR and TPR before enabling
1525 	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
1526 	 * document number 292116).
1527 	 *
1528 	 * Except for APICs which operate in physical destination mode.
1529 	 */
1530 	if (apic->init_apic_ldr)
1531 		apic->init_apic_ldr();
1532 
1533 	/*
1534 	 * Set Task Priority to 'accept all except vectors 0-31'.  An APIC
1535 	 * vector in the 16-31 range could be delivered if TPR == 0, but we
1536 	 * would think it's an exception and terrible things will happen.  We
1537 	 * never change this later on.
1538 	 */
1539 	value = apic_read(APIC_TASKPRI);
1540 	value &= ~APIC_TPRI_MASK;
1541 	value |= 0x10;
1542 	apic_write(APIC_TASKPRI, value);
1543 
1544 	/* Clear eventually stale ISR/IRR bits */
1545 	apic_pending_intr_clear();
1546 
1547 	/*
1548 	 * Now that we are all set up, enable the APIC
1549 	 */
1550 	value = apic_read(APIC_SPIV);
1551 	value &= ~APIC_VECTOR_MASK;
1552 	/*
1553 	 * Enable APIC
1554 	 */
1555 	value |= APIC_SPIV_APIC_ENABLED;
1556 
1557 #ifdef CONFIG_X86_32
1558 	/*
1559 	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1560 	 * certain networking cards. If high frequency interrupts are
1561 	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
1562 	 * entry is masked/unmasked at a high rate as well then sooner or
1563 	 * later IOAPIC line gets 'stuck', no more interrupts are received
1564 	 * from the device. If focus CPU is disabled then the hang goes
1565 	 * away, oh well :-(
1566 	 *
1567 	 * [ This bug can be reproduced easily with a level-triggered
1568 	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
1569 	 *   BX chipset. ]
1570 	 */
1571 	/*
1572 	 * Actually disabling the focus CPU check just makes the hang less
1573 	 * frequent as it makes the interrupt distribution model be more
1574 	 * like LRU than MRU (the short-term load is more even across CPUs).
1575 	 */
1576 
1577 	/*
1578 	 * - enable focus processor (bit==0)
1579 	 * - 64bit mode always use processor focus
1580 	 *   so no need to set it
1581 	 */
1582 	value &= ~APIC_SPIV_FOCUS_DISABLED;
1583 #endif
1584 
1585 	/*
1586 	 * Set spurious IRQ vector
1587 	 */
1588 	value |= SPURIOUS_APIC_VECTOR;
1589 	apic_write(APIC_SPIV, value);
1590 
1591 	perf_events_lapic_init();
1592 
1593 	/*
1594 	 * Set up LVT0, LVT1:
1595 	 *
1596 	 * set up through-local-APIC on the boot CPU's LINT0. This is not
1597 	 * strictly necessary in pure symmetric-IO mode, but sometimes
1598 	 * we delegate interrupts to the 8259A.
1599 	 */
1600 	/*
1601 	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1602 	 */
1603 	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1604 	if (!cpu && (pic_mode || !value || ioapic_is_disabled)) {
1605 		value = APIC_DM_EXTINT;
1606 		apic_pr_verbose("Enabled ExtINT on CPU#%d\n", cpu);
1607 	} else {
1608 		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1609 		apic_pr_verbose("Masked ExtINT on CPU#%d\n", cpu);
1610 	}
1611 	apic_write(APIC_LVT0, value);
1612 
1613 	/*
1614 	 * Only the BSP sees the LINT1 NMI signal by default. This can be
1615 	 * modified by apic_extnmi= boot option.
1616 	 */
1617 	if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
1618 	    apic_extnmi == APIC_EXTNMI_ALL)
1619 		value = APIC_DM_NMI;
1620 	else
1621 		value = APIC_DM_NMI | APIC_LVT_MASKED;
1622 
1623 	/* Is 82489DX ? */
1624 	if (!lapic_is_integrated())
1625 		value |= APIC_LVT_LEVEL_TRIGGER;
1626 	apic_write(APIC_LVT1, value);
1627 
1628 #ifdef CONFIG_X86_MCE_INTEL
1629 	/* Recheck CMCI information after local APIC is up on CPU #0 */
1630 	if (!cpu)
1631 		cmci_recheck();
1632 #endif
1633 }
1634 
end_local_APIC_setup(void)1635 static void end_local_APIC_setup(void)
1636 {
1637 	lapic_setup_esr();
1638 
1639 #ifdef CONFIG_X86_32
1640 	{
1641 		unsigned int value;
1642 		/* Disable the local apic timer */
1643 		value = apic_read(APIC_LVTT);
1644 		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1645 		apic_write(APIC_LVTT, value);
1646 	}
1647 #endif
1648 
1649 	apic_pm_activate();
1650 }
1651 
1652 /*
1653  * APIC setup function for application processors. Called from smpboot.c
1654  */
apic_ap_setup(void)1655 void apic_ap_setup(void)
1656 {
1657 	setup_local_APIC();
1658 	end_local_APIC_setup();
1659 }
1660 
apic_read_boot_cpu_id(bool x2apic)1661 static __init void apic_read_boot_cpu_id(bool x2apic)
1662 {
1663 	/*
1664 	 * This can be invoked from check_x2apic() before the APIC has been
1665 	 * selected. But that code knows for sure that the BIOS enabled
1666 	 * X2APIC.
1667 	 */
1668 	if (x2apic) {
1669 		boot_cpu_physical_apicid = native_apic_msr_read(APIC_ID);
1670 		boot_cpu_apic_version = GET_APIC_VERSION(native_apic_msr_read(APIC_LVR));
1671 	} else {
1672 		boot_cpu_physical_apicid = read_apic_id();
1673 		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
1674 	}
1675 	topology_register_boot_apic(boot_cpu_physical_apicid);
1676 }
1677 
1678 #ifdef CONFIG_X86_X2APIC
1679 int x2apic_mode;
1680 EXPORT_SYMBOL_GPL(x2apic_mode);
1681 
1682 enum {
1683 	X2APIC_OFF,
1684 	X2APIC_DISABLED,
1685 	/* All states below here have X2APIC enabled */
1686 	X2APIC_ON,
1687 	X2APIC_ON_LOCKED
1688 };
1689 static int x2apic_state;
1690 
x2apic_hw_locked(void)1691 static bool x2apic_hw_locked(void)
1692 {
1693 	u64 x86_arch_cap_msr;
1694 	u64 msr;
1695 
1696 	x86_arch_cap_msr = x86_read_arch_cap_msr();
1697 	if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
1698 		rdmsrq(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
1699 		return (msr & LEGACY_XAPIC_DISABLED);
1700 	}
1701 	return false;
1702 }
1703 
__x2apic_disable(void)1704 static void __x2apic_disable(void)
1705 {
1706 	u64 msr;
1707 
1708 	if (!boot_cpu_has(X86_FEATURE_APIC))
1709 		return;
1710 
1711 	rdmsrq(MSR_IA32_APICBASE, msr);
1712 	if (!(msr & X2APIC_ENABLE))
1713 		return;
1714 	/* Disable xapic and x2apic first and then reenable xapic mode */
1715 	wrmsrq(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1716 	wrmsrq(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1717 	printk_once(KERN_INFO "x2apic disabled\n");
1718 }
1719 
__x2apic_enable(void)1720 static void __x2apic_enable(void)
1721 {
1722 	u64 msr;
1723 
1724 	rdmsrq(MSR_IA32_APICBASE, msr);
1725 	if (msr & X2APIC_ENABLE)
1726 		return;
1727 	wrmsrq(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1728 	printk_once(KERN_INFO "x2apic enabled\n");
1729 }
1730 
setup_nox2apic(char * str)1731 static int __init setup_nox2apic(char *str)
1732 {
1733 	if (x2apic_enabled()) {
1734 		u32 apicid = native_apic_msr_read(APIC_ID);
1735 
1736 		if (apicid >= 255) {
1737 			pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
1738 				apicid);
1739 			return 0;
1740 		}
1741 		if (x2apic_hw_locked()) {
1742 			pr_warn("APIC locked in x2apic mode, can't disable\n");
1743 			return 0;
1744 		}
1745 		pr_warn("x2apic already enabled.\n");
1746 		__x2apic_disable();
1747 	}
1748 	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1749 	x2apic_state = X2APIC_DISABLED;
1750 	x2apic_mode = 0;
1751 	return 0;
1752 }
1753 early_param("nox2apic", setup_nox2apic);
1754 
1755 /* Called from cpu_init() to enable x2apic on (secondary) cpus */
x2apic_setup(void)1756 void x2apic_setup(void)
1757 {
1758 	/*
1759 	 * Try to make the AP's APIC state match that of the BSP,  but if the
1760 	 * BSP is unlocked and the AP is locked then there is a state mismatch.
1761 	 * Warn about the mismatch in case a GP fault occurs due to a locked AP
1762 	 * trying to be turned off.
1763 	 */
1764 	if (x2apic_state != X2APIC_ON_LOCKED && x2apic_hw_locked())
1765 		pr_warn("x2apic lock mismatch between BSP and AP.\n");
1766 	/*
1767 	 * If x2apic is not in ON or LOCKED state, disable it if already enabled
1768 	 * from BIOS.
1769 	 */
1770 	if (x2apic_state < X2APIC_ON) {
1771 		__x2apic_disable();
1772 		return;
1773 	}
1774 	__x2apic_enable();
1775 }
1776 
1777 static __init void apic_set_fixmap(bool read_apic);
1778 
x2apic_disable(void)1779 static __init void x2apic_disable(void)
1780 {
1781 	u32 x2apic_id;
1782 
1783 	if (x2apic_state < X2APIC_ON)
1784 		return;
1785 
1786 	x2apic_id = read_apic_id();
1787 	if (x2apic_id >= 255)
1788 		panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1789 
1790 	if (x2apic_hw_locked()) {
1791 		pr_warn("Cannot disable locked x2apic, id: %08x\n", x2apic_id);
1792 		return;
1793 	}
1794 
1795 	__x2apic_disable();
1796 
1797 	x2apic_mode = 0;
1798 	x2apic_state = X2APIC_DISABLED;
1799 
1800 	/*
1801 	 * Don't reread the APIC ID as it was already done from
1802 	 * check_x2apic() and the APIC driver still is a x2APIC variant,
1803 	 * which fails to do the read after x2APIC was disabled.
1804 	 */
1805 	apic_set_fixmap(false);
1806 }
1807 
x2apic_enable(void)1808 static __init void x2apic_enable(void)
1809 {
1810 	if (x2apic_state != X2APIC_OFF)
1811 		return;
1812 
1813 	x2apic_mode = 1;
1814 	x2apic_state = X2APIC_ON;
1815 	__x2apic_enable();
1816 }
1817 
try_to_enable_x2apic(int remap_mode)1818 static __init void try_to_enable_x2apic(int remap_mode)
1819 {
1820 	if (x2apic_state == X2APIC_DISABLED)
1821 		return;
1822 
1823 	if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
1824 		u32 apic_limit = 255;
1825 
1826 		/*
1827 		 * Using X2APIC without IR is not architecturally supported
1828 		 * on bare metal but may be supported in guests.
1829 		 */
1830 		if (!x86_init.hyper.x2apic_available()) {
1831 			pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
1832 			x2apic_disable();
1833 			return;
1834 		}
1835 
1836 		/*
1837 		 * If the hypervisor supports extended destination ID in
1838 		 * MSI, that increases the maximum APIC ID that can be
1839 		 * used for non-remapped IRQ domains.
1840 		 */
1841 		if (x86_init.hyper.msi_ext_dest_id()) {
1842 			virt_ext_dest_id = 1;
1843 			apic_limit = 32767;
1844 		}
1845 
1846 		/*
1847 		 * Without IR, all CPUs can be addressed by IOAPIC/MSI only
1848 		 * in physical mode, and CPUs with an APIC ID that cannot
1849 		 * be addressed must not be brought online.
1850 		 */
1851 		x2apic_set_max_apicid(apic_limit);
1852 		x2apic_phys = 1;
1853 	}
1854 	x2apic_enable();
1855 }
1856 
check_x2apic(void)1857 void __init check_x2apic(void)
1858 {
1859 	if (x2apic_enabled()) {
1860 		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
1861 		x2apic_mode = 1;
1862 		if (x2apic_hw_locked())
1863 			x2apic_state = X2APIC_ON_LOCKED;
1864 		else
1865 			x2apic_state = X2APIC_ON;
1866 		apic_read_boot_cpu_id(true);
1867 	} else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
1868 		x2apic_state = X2APIC_DISABLED;
1869 	}
1870 }
1871 #else /* CONFIG_X86_X2APIC */
check_x2apic(void)1872 void __init check_x2apic(void)
1873 {
1874 	if (!apic_is_x2apic_enabled())
1875 		return;
1876 	/*
1877 	 * Checkme: Can we simply turn off x2APIC here instead of disabling the APIC?
1878 	 */
1879 	pr_err("Kernel does not support x2APIC, please recompile with CONFIG_X86_X2APIC.\n");
1880 	pr_err("Disabling APIC, expect reduced performance and functionality.\n");
1881 
1882 	apic_is_disabled = true;
1883 	setup_clear_cpu_cap(X86_FEATURE_APIC);
1884 }
1885 
try_to_enable_x2apic(int remap_mode)1886 static inline void try_to_enable_x2apic(int remap_mode) { }
__x2apic_enable(void)1887 static inline void __x2apic_enable(void) { }
1888 #endif /* !CONFIG_X86_X2APIC */
1889 
enable_IR_x2apic(void)1890 void __init enable_IR_x2apic(void)
1891 {
1892 	unsigned long flags;
1893 	int ret, ir_stat;
1894 
1895 	if (ioapic_is_disabled) {
1896 		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
1897 		return;
1898 	}
1899 
1900 	ir_stat = irq_remapping_prepare();
1901 	if (ir_stat < 0 && !x2apic_supported())
1902 		return;
1903 
1904 	ret = save_ioapic_entries();
1905 	if (ret) {
1906 		pr_info("Saving IO-APIC state failed: %d\n", ret);
1907 		return;
1908 	}
1909 
1910 	local_irq_save(flags);
1911 	legacy_pic->mask_all();
1912 	mask_ioapic_entries();
1913 
1914 	/* If irq_remapping_prepare() succeeded, try to enable it */
1915 	if (ir_stat >= 0)
1916 		ir_stat = irq_remapping_enable();
1917 	/* ir_stat contains the remap mode or an error code */
1918 	try_to_enable_x2apic(ir_stat);
1919 
1920 	if (ir_stat < 0)
1921 		restore_ioapic_entries();
1922 	legacy_pic->restore_mask();
1923 	local_irq_restore(flags);
1924 }
1925 
1926 #ifdef CONFIG_X86_64
1927 /*
1928  * Detect and enable local APICs on non-SMP boards.
1929  * Original code written by Keir Fraser.
1930  * On AMD64 we trust the BIOS - if it says no APIC it is likely
1931  * not correctly set up (usually the APIC timer won't work etc.)
1932  */
detect_init_APIC(void)1933 static bool __init detect_init_APIC(void)
1934 {
1935 	if (!boot_cpu_has(X86_FEATURE_APIC)) {
1936 		pr_info("No local APIC present\n");
1937 		return false;
1938 	}
1939 
1940 	register_lapic_address(APIC_DEFAULT_PHYS_BASE);
1941 	return true;
1942 }
1943 #else
1944 
apic_verify(unsigned long addr)1945 static bool __init apic_verify(unsigned long addr)
1946 {
1947 	u32 features, h, l;
1948 
1949 	/*
1950 	 * The APIC feature bit should now be enabled
1951 	 * in `cpuid'
1952 	 */
1953 	features = cpuid_edx(1);
1954 	if (!(features & (1 << X86_FEATURE_APIC))) {
1955 		pr_warn("Could not enable APIC!\n");
1956 		return false;
1957 	}
1958 	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1959 
1960 	/* The BIOS may have set up the APIC at some other address */
1961 	if (boot_cpu_data.x86 >= 6) {
1962 		rdmsr(MSR_IA32_APICBASE, l, h);
1963 		if (l & MSR_IA32_APICBASE_ENABLE)
1964 			addr = l & MSR_IA32_APICBASE_BASE;
1965 	}
1966 
1967 	register_lapic_address(addr);
1968 	pr_info("Found and enabled local APIC!\n");
1969 	return true;
1970 }
1971 
apic_force_enable(unsigned long addr)1972 bool __init apic_force_enable(unsigned long addr)
1973 {
1974 	u32 h, l;
1975 
1976 	if (apic_is_disabled)
1977 		return false;
1978 
1979 	/*
1980 	 * Some BIOSes disable the local APIC in the APIC_BASE
1981 	 * MSR. This can only be done in software for Intel P6 or later
1982 	 * and AMD K7 (Model > 1) or later.
1983 	 */
1984 	if (boot_cpu_data.x86 >= 6) {
1985 		rdmsr(MSR_IA32_APICBASE, l, h);
1986 		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1987 			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1988 			l &= ~MSR_IA32_APICBASE_BASE;
1989 			l |= MSR_IA32_APICBASE_ENABLE | addr;
1990 			wrmsr(MSR_IA32_APICBASE, l, h);
1991 			enabled_via_apicbase = 1;
1992 		}
1993 	}
1994 	return apic_verify(addr);
1995 }
1996 
1997 /*
1998  * Detect and initialize APIC
1999  */
detect_init_APIC(void)2000 static bool __init detect_init_APIC(void)
2001 {
2002 	/* Disabled by kernel option? */
2003 	if (apic_is_disabled)
2004 		return false;
2005 
2006 	switch (boot_cpu_data.x86_vendor) {
2007 	case X86_VENDOR_AMD:
2008 		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
2009 		    (boot_cpu_data.x86 >= 15))
2010 			break;
2011 		goto no_apic;
2012 	case X86_VENDOR_HYGON:
2013 		break;
2014 	case X86_VENDOR_INTEL:
2015 		if ((boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)) ||
2016 		    boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO)
2017 			break;
2018 		goto no_apic;
2019 	default:
2020 		goto no_apic;
2021 	}
2022 
2023 	if (!boot_cpu_has(X86_FEATURE_APIC)) {
2024 		/*
2025 		 * Over-ride BIOS and try to enable the local APIC only if
2026 		 * "lapic" specified.
2027 		 */
2028 		if (!force_enable_local_apic) {
2029 			pr_info("Local APIC disabled by BIOS -- "
2030 				"you can enable it with \"lapic\"\n");
2031 			return false;
2032 		}
2033 		if (!apic_force_enable(APIC_DEFAULT_PHYS_BASE))
2034 			return false;
2035 	} else {
2036 		if (!apic_verify(APIC_DEFAULT_PHYS_BASE))
2037 			return false;
2038 	}
2039 
2040 	apic_pm_activate();
2041 
2042 	return true;
2043 
2044 no_apic:
2045 	pr_info("No local APIC present or hardware disabled\n");
2046 	return false;
2047 }
2048 #endif
2049 
2050 /**
2051  * init_apic_mappings - initialize APIC mappings
2052  */
init_apic_mappings(void)2053 void __init init_apic_mappings(void)
2054 {
2055 	if (apic_validate_deadline_timer())
2056 		pr_info("TSC deadline timer available\n");
2057 
2058 	if (x2apic_mode)
2059 		return;
2060 
2061 	if (!smp_found_config) {
2062 		if (!detect_init_APIC()) {
2063 			pr_info("APIC: disable apic facility\n");
2064 			apic_disable();
2065 		}
2066 	}
2067 }
2068 
apic_set_fixmap(bool read_apic)2069 static __init void apic_set_fixmap(bool read_apic)
2070 {
2071 	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
2072 	apic_mmio_base = APIC_BASE;
2073 	apic_pr_verbose("Mapped APIC to %16lx (%16lx)\n", apic_mmio_base, mp_lapic_addr);
2074 	if (read_apic)
2075 		apic_read_boot_cpu_id(false);
2076 }
2077 
register_lapic_address(unsigned long address)2078 void __init register_lapic_address(unsigned long address)
2079 {
2080 	/* This should only happen once */
2081 	WARN_ON_ONCE(mp_lapic_addr);
2082 	mp_lapic_addr = address;
2083 
2084 	if (!x2apic_mode)
2085 		apic_set_fixmap(true);
2086 }
2087 
2088 /*
2089  * Local APIC interrupts
2090  */
2091 
2092 /*
2093  * Common handling code for spurious_interrupt and spurious_vector entry
2094  * points below. No point in allowing the compiler to inline it twice.
2095  */
handle_spurious_interrupt(u8 vector)2096 static noinline void handle_spurious_interrupt(u8 vector)
2097 {
2098 	u32 v;
2099 
2100 	trace_spurious_apic_entry(vector);
2101 
2102 	inc_irq_stat(irq_spurious_count);
2103 
2104 	/*
2105 	 * If this is a spurious interrupt then do not acknowledge
2106 	 */
2107 	if (vector == SPURIOUS_APIC_VECTOR) {
2108 		/* See SDM vol 3 */
2109 		pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
2110 			smp_processor_id());
2111 		goto out;
2112 	}
2113 
2114 	/*
2115 	 * If it is a vectored one, verify it's set in the ISR. If set,
2116 	 * acknowledge it.
2117 	 */
2118 	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
2119 	if (v & (1 << (vector & 0x1f))) {
2120 		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
2121 			vector, smp_processor_id());
2122 		apic_eoi();
2123 	} else {
2124 		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
2125 			vector, smp_processor_id());
2126 	}
2127 out:
2128 	trace_spurious_apic_exit(vector);
2129 }
2130 
2131 /**
2132  * spurious_interrupt - Catch all for interrupts raised on unused vectors
2133  * @regs:	Pointer to pt_regs on stack
2134  * @vector:	The vector number
2135  *
2136  * This is invoked from ASM entry code to catch all interrupts which
2137  * trigger on an entry which is routed to the common_spurious idtentry
2138  * point.
2139  */
DEFINE_IDTENTRY_IRQ(spurious_interrupt)2140 DEFINE_IDTENTRY_IRQ(spurious_interrupt)
2141 {
2142 	handle_spurious_interrupt(vector);
2143 }
2144 
DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)2145 DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
2146 {
2147 	handle_spurious_interrupt(SPURIOUS_APIC_VECTOR);
2148 }
2149 
2150 /*
2151  * This interrupt should never happen with our APIC/SMP architecture
2152  */
DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)2153 DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)
2154 {
2155 	static const char * const error_interrupt_reason[] = {
2156 		"Send CS error",		/* APIC Error Bit 0 */
2157 		"Receive CS error",		/* APIC Error Bit 1 */
2158 		"Send accept error",		/* APIC Error Bit 2 */
2159 		"Receive accept error",		/* APIC Error Bit 3 */
2160 		"Redirectable IPI",		/* APIC Error Bit 4 */
2161 		"Send illegal vector",		/* APIC Error Bit 5 */
2162 		"Received illegal vector",	/* APIC Error Bit 6 */
2163 		"Illegal register address",	/* APIC Error Bit 7 */
2164 	};
2165 	u32 v, i = 0;
2166 
2167 	trace_error_apic_entry(ERROR_APIC_VECTOR);
2168 
2169 	/* First tickle the hardware, only then report what went on. -- REW */
2170 	if (lapic_get_maxlvt() > 3)	/* Due to the Pentium erratum 3AP. */
2171 		apic_write(APIC_ESR, 0);
2172 	v = apic_read(APIC_ESR);
2173 	apic_eoi();
2174 	atomic_inc(&irq_err_count);
2175 
2176 	apic_pr_debug("APIC error on CPU%d: %02x", smp_processor_id(), v);
2177 
2178 	v &= 0xff;
2179 	while (v) {
2180 		if (v & 0x1)
2181 			apic_pr_debug_cont(" : %s", error_interrupt_reason[i]);
2182 		i++;
2183 		v >>= 1;
2184 	}
2185 
2186 	apic_pr_debug_cont("\n");
2187 
2188 	trace_error_apic_exit(ERROR_APIC_VECTOR);
2189 }
2190 
2191 /**
2192  * connect_bsp_APIC - attach the APIC to the interrupt system
2193  */
connect_bsp_APIC(void)2194 static void __init connect_bsp_APIC(void)
2195 {
2196 #ifdef CONFIG_X86_32
2197 	if (pic_mode) {
2198 		/*
2199 		 * Do not trust the local APIC being empty at bootup.
2200 		 */
2201 		clear_local_APIC();
2202 		/*
2203 		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
2204 		 * local APIC to INT and NMI lines.
2205 		 */
2206 		apic_pr_verbose("Leaving PIC mode, enabling APIC mode.\n");
2207 		imcr_pic_to_apic();
2208 	}
2209 #endif
2210 }
2211 
2212 /**
2213  * disconnect_bsp_APIC - detach the APIC from the interrupt system
2214  * @virt_wire_setup:	indicates, whether virtual wire mode is selected
2215  *
2216  * Virtual wire mode is necessary to deliver legacy interrupts even when the
2217  * APIC is disabled.
2218  */
disconnect_bsp_APIC(int virt_wire_setup)2219 void disconnect_bsp_APIC(int virt_wire_setup)
2220 {
2221 	unsigned int value;
2222 
2223 #ifdef CONFIG_X86_32
2224 	if (pic_mode) {
2225 		/*
2226 		 * Put the board back into PIC mode (has an effect only on
2227 		 * certain older boards).  Note that APIC interrupts, including
2228 		 * IPIs, won't work beyond this point!  The only exception are
2229 		 * INIT IPIs.
2230 		 */
2231 		apic_pr_verbose("Disabling APIC mode, entering PIC mode.\n");
2232 		imcr_apic_to_pic();
2233 		return;
2234 	}
2235 #endif
2236 
2237 	/* Go back to Virtual Wire compatibility mode */
2238 
2239 	/* For the spurious interrupt use vector F, and enable it */
2240 	value = apic_read(APIC_SPIV);
2241 	value &= ~APIC_VECTOR_MASK;
2242 	value |= APIC_SPIV_APIC_ENABLED;
2243 	value |= 0xf;
2244 	apic_write(APIC_SPIV, value);
2245 
2246 	if (!virt_wire_setup) {
2247 		/*
2248 		 * For LVT0 make it edge triggered, active high,
2249 		 * external and enabled
2250 		 */
2251 		value = apic_read(APIC_LVT0);
2252 		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2253 			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2254 			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2255 		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2256 		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
2257 		apic_write(APIC_LVT0, value);
2258 	} else {
2259 		/* Disable LVT0 */
2260 		apic_write(APIC_LVT0, APIC_LVT_MASKED);
2261 	}
2262 
2263 	/*
2264 	 * For LVT1 make it edge triggered, active high,
2265 	 * nmi and enabled
2266 	 */
2267 	value = apic_read(APIC_LVT1);
2268 	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2269 			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2270 			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2271 	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2272 	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
2273 	apic_write(APIC_LVT1, value);
2274 }
2275 
__irq_msi_compose_msg(struct irq_cfg * cfg,struct msi_msg * msg,bool dmar)2276 void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
2277 			   bool dmar)
2278 {
2279 	memset(msg, 0, sizeof(*msg));
2280 
2281 	msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
2282 	msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical;
2283 	msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF;
2284 
2285 	msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED;
2286 	msg->arch_data.vector = cfg->vector;
2287 
2288 	msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
2289 	/*
2290 	 * Only the IOMMU itself can use the trick of putting destination
2291 	 * APIC ID into the high bits of the address. Anything else would
2292 	 * just be writing to memory if it tried that, and needs IR to
2293 	 * address APICs which can't be addressed in the normal 32-bit
2294 	 * address range at 0xFFExxxxx. That is typically just 8 bits, but
2295 	 * some hypervisors allow the extended destination ID field in bits
2296 	 * 5-11 to be used, giving support for 15 bits of APIC IDs in total.
2297 	 */
2298 	if (dmar)
2299 		msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8;
2300 	else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000)
2301 		msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8;
2302 	else
2303 		WARN_ON_ONCE(cfg->dest_apicid > 0xFF);
2304 }
2305 
x86_msi_msg_get_destid(struct msi_msg * msg,bool extid)2306 u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)
2307 {
2308 	u32 dest = msg->arch_addr_lo.destid_0_7;
2309 
2310 	if (extid)
2311 		dest |= msg->arch_addr_hi.destid_8_31 << 8;
2312 	return dest;
2313 }
2314 EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid);
2315 
apic_bsp_up_setup(void)2316 static void __init apic_bsp_up_setup(void)
2317 {
2318 	reset_phys_cpu_present_map(boot_cpu_physical_apicid);
2319 }
2320 
2321 /**
2322  * apic_bsp_setup - Setup function for local apic and io-apic
2323  * @upmode:		Force UP mode (for APIC_init_uniprocessor)
2324  */
apic_bsp_setup(bool upmode)2325 static void __init apic_bsp_setup(bool upmode)
2326 {
2327 	connect_bsp_APIC();
2328 	if (upmode)
2329 		apic_bsp_up_setup();
2330 	setup_local_APIC();
2331 
2332 	enable_IO_APIC();
2333 	end_local_APIC_setup();
2334 	irq_remap_enable_fault_handling();
2335 	setup_IO_APIC();
2336 	lapic_update_legacy_vectors();
2337 }
2338 
2339 #ifdef CONFIG_UP_LATE_INIT
up_late_init(void)2340 void __init up_late_init(void)
2341 {
2342 	if (apic_intr_mode == APIC_PIC)
2343 		return;
2344 
2345 	/* Setup local timer */
2346 	x86_init.timers.setup_percpu_clockev();
2347 }
2348 #endif
2349 
2350 /*
2351  * Power management
2352  */
2353 #ifdef CONFIG_PM
2354 
2355 static struct {
2356 	/*
2357 	 * 'active' is true if the local APIC was enabled by us and
2358 	 * not the BIOS; this signifies that we are also responsible
2359 	 * for disabling it before entering apm/acpi suspend
2360 	 */
2361 	int active;
2362 	/* r/w apic fields */
2363 	u32 apic_id;
2364 	unsigned int apic_taskpri;
2365 	unsigned int apic_ldr;
2366 	unsigned int apic_dfr;
2367 	unsigned int apic_spiv;
2368 	unsigned int apic_lvtt;
2369 	unsigned int apic_lvtpc;
2370 	unsigned int apic_lvt0;
2371 	unsigned int apic_lvt1;
2372 	unsigned int apic_lvterr;
2373 	unsigned int apic_tmict;
2374 	unsigned int apic_tdcr;
2375 	unsigned int apic_thmr;
2376 	unsigned int apic_cmci;
2377 } apic_pm_state;
2378 
lapic_suspend(void)2379 static int lapic_suspend(void)
2380 {
2381 	unsigned long flags;
2382 	int maxlvt;
2383 
2384 	if (!apic_pm_state.active)
2385 		return 0;
2386 
2387 	maxlvt = lapic_get_maxlvt();
2388 
2389 	apic_pm_state.apic_id = apic_read(APIC_ID);
2390 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
2391 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
2392 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
2393 	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
2394 	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
2395 	if (maxlvt >= 4)
2396 		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
2397 	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
2398 	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
2399 	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2400 	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2401 	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2402 #ifdef CONFIG_X86_THERMAL_VECTOR
2403 	if (maxlvt >= 5)
2404 		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2405 #endif
2406 #ifdef CONFIG_X86_MCE_INTEL
2407 	if (maxlvt >= 6)
2408 		apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
2409 #endif
2410 
2411 	local_irq_save(flags);
2412 
2413 	/*
2414 	 * Mask IOAPIC before disabling the local APIC to prevent stale IRR
2415 	 * entries on some implementations.
2416 	 */
2417 	mask_ioapic_entries();
2418 
2419 	disable_local_APIC();
2420 
2421 	irq_remapping_disable();
2422 
2423 	local_irq_restore(flags);
2424 	return 0;
2425 }
2426 
lapic_resume(void)2427 static void lapic_resume(void)
2428 {
2429 	unsigned int l, h;
2430 	unsigned long flags;
2431 	int maxlvt;
2432 
2433 	if (!apic_pm_state.active)
2434 		return;
2435 
2436 	local_irq_save(flags);
2437 
2438 	/*
2439 	 * IO-APIC and PIC have their own resume routines.
2440 	 * We just mask them here to make sure the interrupt
2441 	 * subsystem is completely quiet while we enable x2apic
2442 	 * and interrupt-remapping.
2443 	 */
2444 	mask_ioapic_entries();
2445 	legacy_pic->mask_all();
2446 
2447 	if (x2apic_mode) {
2448 		__x2apic_enable();
2449 	} else {
2450 		/*
2451 		 * Make sure the APICBASE points to the right address
2452 		 *
2453 		 * FIXME! This will be wrong if we ever support suspend on
2454 		 * SMP! We'll need to do this as part of the CPU restore!
2455 		 */
2456 		if (boot_cpu_data.x86 >= 6) {
2457 			rdmsr(MSR_IA32_APICBASE, l, h);
2458 			l &= ~MSR_IA32_APICBASE_BASE;
2459 			l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2460 			wrmsr(MSR_IA32_APICBASE, l, h);
2461 		}
2462 	}
2463 
2464 	maxlvt = lapic_get_maxlvt();
2465 	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
2466 	apic_write(APIC_ID, apic_pm_state.apic_id);
2467 	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
2468 	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
2469 	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
2470 	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2471 	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2472 	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2473 #ifdef CONFIG_X86_THERMAL_VECTOR
2474 	if (maxlvt >= 5)
2475 		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2476 #endif
2477 #ifdef CONFIG_X86_MCE_INTEL
2478 	if (maxlvt >= 6)
2479 		apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
2480 #endif
2481 	if (maxlvt >= 4)
2482 		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
2483 	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
2484 	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2485 	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
2486 	apic_write(APIC_ESR, 0);
2487 	apic_read(APIC_ESR);
2488 	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
2489 	apic_write(APIC_ESR, 0);
2490 	apic_read(APIC_ESR);
2491 
2492 	irq_remapping_reenable(x2apic_mode);
2493 
2494 	local_irq_restore(flags);
2495 }
2496 
2497 /*
2498  * This device has no shutdown method - fully functioning local APICs
2499  * are needed on every CPU up until machine_halt/restart/poweroff.
2500  */
2501 
2502 static struct syscore_ops lapic_syscore_ops = {
2503 	.resume		= lapic_resume,
2504 	.suspend	= lapic_suspend,
2505 };
2506 
apic_pm_activate(void)2507 static void apic_pm_activate(void)
2508 {
2509 	apic_pm_state.active = 1;
2510 }
2511 
init_lapic_sysfs(void)2512 static int __init init_lapic_sysfs(void)
2513 {
2514 	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2515 	if (boot_cpu_has(X86_FEATURE_APIC))
2516 		register_syscore_ops(&lapic_syscore_ops);
2517 
2518 	return 0;
2519 }
2520 
2521 /* local apic needs to resume before other devices access its registers. */
2522 core_initcall(init_lapic_sysfs);
2523 
2524 #else	/* CONFIG_PM */
2525 
apic_pm_activate(void)2526 static void apic_pm_activate(void) { }
2527 
2528 #endif	/* CONFIG_PM */
2529 
2530 #ifdef CONFIG_X86_64
2531 
2532 static int multi_checked;
2533 static int multi;
2534 
set_multi(const struct dmi_system_id * d)2535 static int set_multi(const struct dmi_system_id *d)
2536 {
2537 	if (multi)
2538 		return 0;
2539 	pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2540 	multi = 1;
2541 	return 0;
2542 }
2543 
2544 static const struct dmi_system_id multi_dmi_table[] = {
2545 	{
2546 		.callback = set_multi,
2547 		.ident = "IBM System Summit2",
2548 		.matches = {
2549 			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2550 			DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2551 		},
2552 	},
2553 	{}
2554 };
2555 
dmi_check_multi(void)2556 static void dmi_check_multi(void)
2557 {
2558 	if (multi_checked)
2559 		return;
2560 
2561 	dmi_check_system(multi_dmi_table);
2562 	multi_checked = 1;
2563 }
2564 
2565 /*
2566  * apic_is_clustered_box() -- Check if we can expect good TSC
2567  *
2568  * Thus far, the major user of this is IBM's Summit2 series:
2569  * Clustered boxes may have unsynced TSC problems if they are
2570  * multi-chassis.
2571  * Use DMI to check them
2572  */
apic_is_clustered_box(void)2573 int apic_is_clustered_box(void)
2574 {
2575 	dmi_check_multi();
2576 	return multi;
2577 }
2578 #endif
2579 
2580 /*
2581  * APIC command line parameters
2582  */
setup_nolapic(char * arg)2583 static int __init setup_nolapic(char *arg)
2584 {
2585 	apic_is_disabled = true;
2586 	setup_clear_cpu_cap(X86_FEATURE_APIC);
2587 	return 0;
2588 }
2589 early_param("nolapic", setup_nolapic);
2590 
parse_lapic_timer_c2_ok(char * arg)2591 static int __init parse_lapic_timer_c2_ok(char *arg)
2592 {
2593 	local_apic_timer_c2_ok = 1;
2594 	return 0;
2595 }
2596 early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
2597 
parse_disable_apic_timer(char * arg)2598 static int __init parse_disable_apic_timer(char *arg)
2599 {
2600 	disable_apic_timer = 1;
2601 	return 0;
2602 }
2603 early_param("noapictimer", parse_disable_apic_timer);
2604 
parse_nolapic_timer(char * arg)2605 static int __init parse_nolapic_timer(char *arg)
2606 {
2607 	disable_apic_timer = 1;
2608 	return 0;
2609 }
2610 early_param("nolapic_timer", parse_nolapic_timer);
2611 
apic_set_verbosity(char * arg)2612 static int __init apic_set_verbosity(char *arg)
2613 {
2614 	if (!arg)  {
2615 		if (IS_ENABLED(CONFIG_X86_32))
2616 			return -EINVAL;
2617 
2618 		ioapic_is_disabled = false;
2619 		return 0;
2620 	}
2621 
2622 	if (strcmp("debug", arg) == 0)
2623 		apic_verbosity = APIC_DEBUG;
2624 	else if (strcmp("verbose", arg) == 0)
2625 		apic_verbosity = APIC_VERBOSE;
2626 #ifdef CONFIG_X86_64
2627 	else {
2628 		pr_warn("APIC Verbosity level %s not recognised"
2629 			" use apic=verbose or apic=debug\n", arg);
2630 		return -EINVAL;
2631 	}
2632 #endif
2633 
2634 	return 0;
2635 }
2636 early_param("apic", apic_set_verbosity);
2637 
lapic_insert_resource(void)2638 static int __init lapic_insert_resource(void)
2639 {
2640 	if (!apic_mmio_base)
2641 		return -1;
2642 
2643 	/* Put local APIC into the resource map. */
2644 	lapic_resource.start = apic_mmio_base;
2645 	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
2646 	insert_resource(&iomem_resource, &lapic_resource);
2647 
2648 	return 0;
2649 }
2650 
2651 /*
2652  * need call insert after e820__reserve_resources()
2653  * that is using request_resource
2654  */
2655 late_initcall(lapic_insert_resource);
2656 
apic_set_extnmi(char * arg)2657 static int __init apic_set_extnmi(char *arg)
2658 {
2659 	if (!arg)
2660 		return -EINVAL;
2661 
2662 	if (!strncmp("all", arg, 3))
2663 		apic_extnmi = APIC_EXTNMI_ALL;
2664 	else if (!strncmp("none", arg, 4))
2665 		apic_extnmi = APIC_EXTNMI_NONE;
2666 	else if (!strncmp("bsp", arg, 3))
2667 		apic_extnmi = APIC_EXTNMI_BSP;
2668 	else {
2669 		pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
2670 		return -EINVAL;
2671 	}
2672 
2673 	return 0;
2674 }
2675 early_param("apic_extnmi", apic_set_extnmi);
2676