1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
5 * Copyright (C) 2011 Don Zickus Red Hat, Inc.
6 *
7 * Pentium III FXSR, SSE support
8 * Gareth Hughes <gareth@valinux.com>, May 2000
9 */
10
11 /*
12 * Handle hardware traps and faults.
13 */
14 #include <linux/spinlock.h>
15 #include <linux/kprobes.h>
16 #include <linux/kdebug.h>
17 #include <linux/sched/debug.h>
18 #include <linux/nmi.h>
19 #include <linux/debugfs.h>
20 #include <linux/delay.h>
21 #include <linux/hardirq.h>
22 #include <linux/ratelimit.h>
23 #include <linux/slab.h>
24 #include <linux/export.h>
25 #include <linux/atomic.h>
26 #include <linux/sched/clock.h>
27
28 #include <asm/cpu_entry_area.h>
29 #include <asm/traps.h>
30 #include <asm/mach_traps.h>
31 #include <asm/nmi.h>
32 #include <asm/x86_init.h>
33 #include <asm/reboot.h>
34 #include <asm/cache.h>
35 #include <asm/nospec-branch.h>
36 #include <asm/microcode.h>
37 #include <asm/sev.h>
38 #include <asm/fred.h>
39
40 #define CREATE_TRACE_POINTS
41 #include <trace/events/nmi.h>
42
43 /*
44 * An emergency handler can be set in any context including NMI
45 */
46 struct nmi_desc {
47 raw_spinlock_t lock;
48 nmi_handler_t emerg_handler;
49 struct list_head head;
50 };
51
52 static struct nmi_desc nmi_desc[NMI_MAX] =
53 {
54 {
55 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
56 .head = LIST_HEAD_INIT(nmi_desc[0].head),
57 },
58 {
59 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
60 .head = LIST_HEAD_INIT(nmi_desc[1].head),
61 },
62 {
63 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
64 .head = LIST_HEAD_INIT(nmi_desc[2].head),
65 },
66 {
67 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
68 .head = LIST_HEAD_INIT(nmi_desc[3].head),
69 },
70
71 };
72
73 struct nmi_stats {
74 unsigned int normal;
75 unsigned int unknown;
76 unsigned int external;
77 unsigned int swallow;
78 unsigned long recv_jiffies;
79 unsigned long idt_seq;
80 unsigned long idt_nmi_seq;
81 unsigned long idt_ignored;
82 atomic_long_t idt_calls;
83 unsigned long idt_seq_snap;
84 unsigned long idt_nmi_seq_snap;
85 unsigned long idt_ignored_snap;
86 long idt_calls_snap;
87 };
88
89 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
90
91 static int ignore_nmis __read_mostly;
92
93 int unknown_nmi_panic;
94 /*
95 * Prevent NMI reason port (0x61) being accessed simultaneously, can
96 * only be used in NMI handler.
97 */
98 static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
99
setup_unknown_nmi_panic(char * str)100 static int __init setup_unknown_nmi_panic(char *str)
101 {
102 unknown_nmi_panic = 1;
103 return 1;
104 }
105 __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
106
107 #define nmi_to_desc(type) (&nmi_desc[type])
108
109 static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
110
nmi_warning_debugfs(void)111 static int __init nmi_warning_debugfs(void)
112 {
113 debugfs_create_u64("nmi_longest_ns", 0644,
114 arch_debugfs_dir, &nmi_longest_ns);
115 return 0;
116 }
117 fs_initcall(nmi_warning_debugfs);
118
nmi_check_duration(struct nmiaction * action,u64 duration)119 static void nmi_check_duration(struct nmiaction *action, u64 duration)
120 {
121 int remainder_ns, decimal_msecs;
122
123 if (duration < nmi_longest_ns || duration < action->max_duration)
124 return;
125
126 action->max_duration = duration;
127
128 remainder_ns = do_div(duration, (1000 * 1000));
129 decimal_msecs = remainder_ns / 1000;
130
131 printk_ratelimited(KERN_INFO
132 "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
133 action->handler, duration, decimal_msecs);
134 }
135
nmi_handle(unsigned int type,struct pt_regs * regs)136 static int nmi_handle(unsigned int type, struct pt_regs *regs)
137 {
138 struct nmi_desc *desc = nmi_to_desc(type);
139 nmi_handler_t ehandler;
140 struct nmiaction *a;
141 int handled=0;
142
143 /*
144 * Call the emergency handler, if set
145 *
146 * In the case of crash_nmi_callback() emergency handler, it will
147 * return in the case of the crashing CPU to enable it to complete
148 * other necessary crashing actions ASAP. Other handlers in the
149 * linked list won't need to be run.
150 */
151 ehandler = desc->emerg_handler;
152 if (ehandler)
153 return ehandler(type, regs);
154
155 rcu_read_lock();
156
157 /*
158 * NMIs are edge-triggered, which means if you have enough
159 * of them concurrently, you can lose some because only one
160 * can be latched at any given time. Walk the whole list
161 * to handle those situations.
162 */
163 list_for_each_entry_rcu(a, &desc->head, list) {
164 int thishandled;
165 u64 delta;
166
167 delta = sched_clock();
168 thishandled = a->handler(type, regs);
169 handled += thishandled;
170 delta = sched_clock() - delta;
171 trace_nmi_handler(a->handler, (int)delta, thishandled);
172
173 nmi_check_duration(a, delta);
174 }
175
176 rcu_read_unlock();
177
178 /* return total number of NMI events handled */
179 return handled;
180 }
181 NOKPROBE_SYMBOL(nmi_handle);
182
__register_nmi_handler(unsigned int type,struct nmiaction * action)183 int __register_nmi_handler(unsigned int type, struct nmiaction *action)
184 {
185 struct nmi_desc *desc = nmi_to_desc(type);
186 unsigned long flags;
187
188 if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list)))
189 return -EINVAL;
190
191 raw_spin_lock_irqsave(&desc->lock, flags);
192
193 /*
194 * Indicate if there are multiple registrations on the
195 * internal NMI handler call chains (SERR and IO_CHECK).
196 */
197 WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
198 WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
199
200 /*
201 * some handlers need to be executed first otherwise a fake
202 * event confuses some handlers (kdump uses this flag)
203 */
204 if (action->flags & NMI_FLAG_FIRST)
205 list_add_rcu(&action->list, &desc->head);
206 else
207 list_add_tail_rcu(&action->list, &desc->head);
208
209 raw_spin_unlock_irqrestore(&desc->lock, flags);
210 return 0;
211 }
212 EXPORT_SYMBOL(__register_nmi_handler);
213
unregister_nmi_handler(unsigned int type,const char * name)214 void unregister_nmi_handler(unsigned int type, const char *name)
215 {
216 struct nmi_desc *desc = nmi_to_desc(type);
217 struct nmiaction *n, *found = NULL;
218 unsigned long flags;
219
220 raw_spin_lock_irqsave(&desc->lock, flags);
221
222 list_for_each_entry_rcu(n, &desc->head, list) {
223 /*
224 * the name passed in to describe the nmi handler
225 * is used as the lookup key
226 */
227 if (!strcmp(n->name, name)) {
228 WARN(in_nmi(),
229 "Trying to free NMI (%s) from NMI context!\n", n->name);
230 list_del_rcu(&n->list);
231 found = n;
232 break;
233 }
234 }
235
236 raw_spin_unlock_irqrestore(&desc->lock, flags);
237 if (found) {
238 synchronize_rcu();
239 INIT_LIST_HEAD(&found->list);
240 }
241 }
242 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
243
244 /**
245 * set_emergency_nmi_handler - Set emergency handler
246 * @type: NMI type
247 * @handler: the emergency handler to be stored
248 *
249 * Set an emergency NMI handler which, if set, will preempt all the other
250 * handlers in the linked list. If a NULL handler is passed in, it will clear
251 * it. It is expected that concurrent calls to this function will not happen
252 * or the system is screwed beyond repair.
253 */
set_emergency_nmi_handler(unsigned int type,nmi_handler_t handler)254 void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler)
255 {
256 struct nmi_desc *desc = nmi_to_desc(type);
257
258 if (WARN_ON_ONCE(desc->emerg_handler == handler))
259 return;
260 desc->emerg_handler = handler;
261
262 /*
263 * Ensure the emergency handler is visible to other CPUs before
264 * function return
265 */
266 smp_wmb();
267 }
268
269 static void
pci_serr_error(unsigned char reason,struct pt_regs * regs)270 pci_serr_error(unsigned char reason, struct pt_regs *regs)
271 {
272 /* check to see if anyone registered against these types of errors */
273 if (nmi_handle(NMI_SERR, regs))
274 return;
275
276 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
277 reason, smp_processor_id());
278
279 if (panic_on_unrecovered_nmi)
280 nmi_panic(regs, "NMI: Not continuing");
281
282 pr_emerg("Dazed and confused, but trying to continue\n");
283
284 /* Clear and disable the PCI SERR error line. */
285 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
286 outb(reason, NMI_REASON_PORT);
287 }
288 NOKPROBE_SYMBOL(pci_serr_error);
289
290 static void
io_check_error(unsigned char reason,struct pt_regs * regs)291 io_check_error(unsigned char reason, struct pt_regs *regs)
292 {
293 unsigned long i;
294
295 /* check to see if anyone registered against these types of errors */
296 if (nmi_handle(NMI_IO_CHECK, regs))
297 return;
298
299 pr_emerg(
300 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
301 reason, smp_processor_id());
302 show_regs(regs);
303
304 if (panic_on_io_nmi) {
305 nmi_panic(regs, "NMI IOCK error: Not continuing");
306
307 /*
308 * If we end up here, it means we have received an NMI while
309 * processing panic(). Simply return without delaying and
310 * re-enabling NMIs.
311 */
312 return;
313 }
314
315 /* Re-enable the IOCK line, wait for a few seconds */
316 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
317 outb(reason, NMI_REASON_PORT);
318
319 i = 20000;
320 while (--i) {
321 touch_nmi_watchdog();
322 udelay(100);
323 }
324
325 reason &= ~NMI_REASON_CLEAR_IOCHK;
326 outb(reason, NMI_REASON_PORT);
327 }
328 NOKPROBE_SYMBOL(io_check_error);
329
330 static void
unknown_nmi_error(unsigned char reason,struct pt_regs * regs)331 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
332 {
333 int handled;
334
335 /*
336 * Use 'false' as back-to-back NMIs are dealt with one level up.
337 * Of course this makes having multiple 'unknown' handlers useless
338 * as only the first one is ever run (unless it can actually determine
339 * if it caused the NMI)
340 */
341 handled = nmi_handle(NMI_UNKNOWN, regs);
342 if (handled) {
343 __this_cpu_add(nmi_stats.unknown, handled);
344 return;
345 }
346
347 __this_cpu_add(nmi_stats.unknown, 1);
348
349 pr_emerg_ratelimited("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
350 reason, smp_processor_id());
351
352 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
353 nmi_panic(regs, "NMI: Not continuing");
354
355 pr_emerg_ratelimited("Dazed and confused, but trying to continue\n");
356 }
357 NOKPROBE_SYMBOL(unknown_nmi_error);
358
359 static DEFINE_PER_CPU(bool, swallow_nmi);
360 static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
361
default_do_nmi(struct pt_regs * regs)362 static noinstr void default_do_nmi(struct pt_regs *regs)
363 {
364 unsigned char reason = 0;
365 int handled;
366 bool b2b = false;
367
368 /*
369 * CPU-specific NMI must be processed before non-CPU-specific
370 * NMI, otherwise we may lose it, because the CPU-specific
371 * NMI can not be detected/processed on other CPUs.
372 */
373
374 /*
375 * Back-to-back NMIs are interesting because they can either
376 * be two NMI or more than two NMIs (any thing over two is dropped
377 * due to NMI being edge-triggered). If this is the second half
378 * of the back-to-back NMI, assume we dropped things and process
379 * more handlers. Otherwise reset the 'swallow' NMI behaviour
380 */
381 if (regs->ip == __this_cpu_read(last_nmi_rip))
382 b2b = true;
383 else
384 __this_cpu_write(swallow_nmi, false);
385
386 __this_cpu_write(last_nmi_rip, regs->ip);
387
388 instrumentation_begin();
389
390 if (microcode_nmi_handler_enabled() && microcode_nmi_handler())
391 goto out;
392
393 handled = nmi_handle(NMI_LOCAL, regs);
394 __this_cpu_add(nmi_stats.normal, handled);
395 if (handled) {
396 /*
397 * There are cases when a NMI handler handles multiple
398 * events in the current NMI. One of these events may
399 * be queued for in the next NMI. Because the event is
400 * already handled, the next NMI will result in an unknown
401 * NMI. Instead lets flag this for a potential NMI to
402 * swallow.
403 */
404 if (handled > 1)
405 __this_cpu_write(swallow_nmi, true);
406 goto out;
407 }
408
409 /*
410 * Non-CPU-specific NMI: NMI sources can be processed on any CPU.
411 *
412 * Another CPU may be processing panic routines while holding
413 * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping,
414 * and if so, call its callback directly. If there is no CPU preparing
415 * crash dump, we simply loop here.
416 */
417 while (!raw_spin_trylock(&nmi_reason_lock)) {
418 run_crash_ipi_callback(regs);
419 cpu_relax();
420 }
421
422 reason = x86_platform.get_nmi_reason();
423
424 if (reason & NMI_REASON_MASK) {
425 if (reason & NMI_REASON_SERR)
426 pci_serr_error(reason, regs);
427 else if (reason & NMI_REASON_IOCHK)
428 io_check_error(reason, regs);
429 #ifdef CONFIG_X86_32
430 /*
431 * Reassert NMI in case it became active
432 * meanwhile as it's edge-triggered:
433 */
434 reassert_nmi();
435 #endif
436 __this_cpu_add(nmi_stats.external, 1);
437 raw_spin_unlock(&nmi_reason_lock);
438 goto out;
439 }
440 raw_spin_unlock(&nmi_reason_lock);
441
442 /*
443 * Only one NMI can be latched at a time. To handle
444 * this we may process multiple nmi handlers at once to
445 * cover the case where an NMI is dropped. The downside
446 * to this approach is we may process an NMI prematurely,
447 * while its real NMI is sitting latched. This will cause
448 * an unknown NMI on the next run of the NMI processing.
449 *
450 * We tried to flag that condition above, by setting the
451 * swallow_nmi flag when we process more than one event.
452 * This condition is also only present on the second half
453 * of a back-to-back NMI, so we flag that condition too.
454 *
455 * If both are true, we assume we already processed this
456 * NMI previously and we swallow it. Otherwise we reset
457 * the logic.
458 *
459 * There are scenarios where we may accidentally swallow
460 * a 'real' unknown NMI. For example, while processing
461 * a perf NMI another perf NMI comes in along with a
462 * 'real' unknown NMI. These two NMIs get combined into
463 * one (as described above). When the next NMI gets
464 * processed, it will be flagged by perf as handled, but
465 * no one will know that there was a 'real' unknown NMI sent
466 * also. As a result it gets swallowed. Or if the first
467 * perf NMI returns two events handled then the second
468 * NMI will get eaten by the logic below, again losing a
469 * 'real' unknown NMI. But this is the best we can do
470 * for now.
471 */
472 if (b2b && __this_cpu_read(swallow_nmi))
473 __this_cpu_add(nmi_stats.swallow, 1);
474 else
475 unknown_nmi_error(reason, regs);
476
477 out:
478 instrumentation_end();
479 }
480
481 /*
482 * NMIs can page fault or hit breakpoints which will cause it to lose
483 * its NMI context with the CPU when the breakpoint or page fault does an IRET.
484 *
485 * As a result, NMIs can nest if NMIs get unmasked due an IRET during
486 * NMI processing. On x86_64, the asm glue protects us from nested NMIs
487 * if the outer NMI came from kernel mode, but we can still nest if the
488 * outer NMI came from user mode.
489 *
490 * To handle these nested NMIs, we have three states:
491 *
492 * 1) not running
493 * 2) executing
494 * 3) latched
495 *
496 * When no NMI is in progress, it is in the "not running" state.
497 * When an NMI comes in, it goes into the "executing" state.
498 * Normally, if another NMI is triggered, it does not interrupt
499 * the running NMI and the HW will simply latch it so that when
500 * the first NMI finishes, it will restart the second NMI.
501 * (Note, the latch is binary, thus multiple NMIs triggering,
502 * when one is running, are ignored. Only one NMI is restarted.)
503 *
504 * If an NMI executes an iret, another NMI can preempt it. We do not
505 * want to allow this new NMI to run, but we want to execute it when the
506 * first one finishes. We set the state to "latched", and the exit of
507 * the first NMI will perform a dec_return, if the result is zero
508 * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the
509 * dec_return would have set the state to NMI_EXECUTING (what we want it
510 * to be when we are running). In this case, we simply jump back to
511 * rerun the NMI handler again, and restart the 'latched' NMI.
512 *
513 * No trap (breakpoint or page fault) should be hit before nmi_restart,
514 * thus there is no race between the first check of state for NOT_RUNNING
515 * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
516 * at this point.
517 *
518 * In case the NMI takes a page fault, we need to save off the CR2
519 * because the NMI could have preempted another page fault and corrupt
520 * the CR2 that is about to be read. As nested NMIs must be restarted
521 * and they can not take breakpoints or page faults, the update of the
522 * CR2 must be done before converting the nmi state back to NOT_RUNNING.
523 * Otherwise, there would be a race of another nested NMI coming in
524 * after setting state to NOT_RUNNING but before updating the nmi_cr2.
525 */
526 enum nmi_states {
527 NMI_NOT_RUNNING = 0,
528 NMI_EXECUTING,
529 NMI_LATCHED,
530 };
531 static DEFINE_PER_CPU(enum nmi_states, nmi_state);
532 static DEFINE_PER_CPU(unsigned long, nmi_cr2);
533 static DEFINE_PER_CPU(unsigned long, nmi_dr7);
534
DEFINE_IDTENTRY_RAW(exc_nmi)535 DEFINE_IDTENTRY_RAW(exc_nmi)
536 {
537 irqentry_state_t irq_state;
538 struct nmi_stats *nsp = this_cpu_ptr(&nmi_stats);
539
540 /*
541 * Re-enable NMIs right here when running as an SEV-ES guest. This might
542 * cause nested NMIs, but those can be handled safely.
543 */
544 sev_es_nmi_complete();
545 if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
546 raw_atomic_long_inc(&nsp->idt_calls);
547
548 if (arch_cpu_is_offline(smp_processor_id())) {
549 if (microcode_nmi_handler_enabled())
550 microcode_offline_nmi_handler();
551 return;
552 }
553
554 if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
555 this_cpu_write(nmi_state, NMI_LATCHED);
556 return;
557 }
558 this_cpu_write(nmi_state, NMI_EXECUTING);
559 this_cpu_write(nmi_cr2, read_cr2());
560
561 nmi_restart:
562 if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
563 WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
564 WARN_ON_ONCE(!(nsp->idt_seq & 0x1));
565 WRITE_ONCE(nsp->recv_jiffies, jiffies);
566 }
567
568 /*
569 * Needs to happen before DR7 is accessed, because the hypervisor can
570 * intercept DR7 reads/writes, turning those into #VC exceptions.
571 */
572 sev_es_ist_enter(regs);
573
574 this_cpu_write(nmi_dr7, local_db_save());
575
576 irq_state = irqentry_nmi_enter(regs);
577
578 inc_irq_stat(__nmi_count);
579
580 if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
581 WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
582 } else if (!ignore_nmis) {
583 if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
584 WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
585 WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
586 }
587 default_do_nmi(regs);
588 if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
589 WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
590 WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
591 }
592 }
593
594 irqentry_nmi_exit(regs, irq_state);
595
596 local_db_restore(this_cpu_read(nmi_dr7));
597
598 sev_es_ist_exit();
599
600 if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
601 write_cr2(this_cpu_read(nmi_cr2));
602 if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
603 WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
604 WARN_ON_ONCE(nsp->idt_seq & 0x1);
605 WRITE_ONCE(nsp->recv_jiffies, jiffies);
606 }
607 if (this_cpu_dec_return(nmi_state))
608 goto nmi_restart;
609 }
610
611 #if IS_ENABLED(CONFIG_KVM_INTEL)
DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)612 DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)
613 {
614 exc_nmi(regs);
615 }
616 #if IS_MODULE(CONFIG_KVM_INTEL)
617 EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx);
618 #endif
619 #endif
620
621 #ifdef CONFIG_NMI_CHECK_CPU
622
623 static char *nmi_check_stall_msg[] = {
624 /* */
625 /* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */
626 /* | +------ cpu_is_offline(cpu) */
627 /* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */
628 /* | | | NMI handler has been invoked. */
629 /* | | | */
630 /* V V V */
631 /* 0 0 0 */ "NMIs are not reaching exc_nmi() handler",
632 /* 0 0 1 */ "exc_nmi() handler is ignoring NMIs",
633 /* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi() handler",
634 /* 0 1 1 */ "CPU is offline and exc_nmi() handler is legitimately ignoring NMIs",
635 /* 1 0 0 */ "CPU is in exc_nmi() handler and no further NMIs are reaching handler",
636 /* 1 0 1 */ "CPU is in exc_nmi() handler which is legitimately ignoring NMIs",
637 /* 1 1 0 */ "CPU is offline in exc_nmi() handler and no more NMIs are reaching exc_nmi() handler",
638 /* 1 1 1 */ "CPU is offline in exc_nmi() handler which is legitimately ignoring NMIs",
639 };
640
nmi_backtrace_stall_snap(const struct cpumask * btp)641 void nmi_backtrace_stall_snap(const struct cpumask *btp)
642 {
643 int cpu;
644 struct nmi_stats *nsp;
645
646 for_each_cpu(cpu, btp) {
647 nsp = per_cpu_ptr(&nmi_stats, cpu);
648 nsp->idt_seq_snap = READ_ONCE(nsp->idt_seq);
649 nsp->idt_nmi_seq_snap = READ_ONCE(nsp->idt_nmi_seq);
650 nsp->idt_ignored_snap = READ_ONCE(nsp->idt_ignored);
651 nsp->idt_calls_snap = atomic_long_read(&nsp->idt_calls);
652 }
653 }
654
nmi_backtrace_stall_check(const struct cpumask * btp)655 void nmi_backtrace_stall_check(const struct cpumask *btp)
656 {
657 int cpu;
658 int idx;
659 unsigned long nmi_seq;
660 unsigned long j = jiffies;
661 char *modp;
662 char *msgp;
663 char *msghp;
664 struct nmi_stats *nsp;
665
666 for_each_cpu(cpu, btp) {
667 nsp = per_cpu_ptr(&nmi_stats, cpu);
668 modp = "";
669 msghp = "";
670 nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
671 if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
672 msgp = "CPU entered NMI handler function, but has not exited";
673 } else if (nsp->idt_nmi_seq_snap == nmi_seq ||
674 nsp->idt_nmi_seq_snap + 1 == nmi_seq) {
675 idx = ((nmi_seq & 0x1) << 2) |
676 (cpu_is_offline(cpu) << 1) |
677 (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
678 msgp = nmi_check_stall_msg[idx];
679 if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
680 modp = ", but OK because ignore_nmis was set";
681 if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
682 msghp = " (CPU exited one NMI handler function)";
683 else if (nmi_seq & 0x1)
684 msghp = " (CPU currently in NMI handler function)";
685 else
686 msghp = " (CPU was never in an NMI handler function)";
687 } else {
688 msgp = "CPU is handling NMIs";
689 }
690 pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp);
691 pr_alert("%s: last activity: %lu jiffies ago.\n",
692 __func__, j - READ_ONCE(nsp->recv_jiffies));
693 }
694 }
695
696 #endif
697
698 #ifdef CONFIG_X86_FRED
699 /*
700 * With FRED, CR2/DR6 is pushed to #PF/#DB stack frame during FRED
701 * event delivery, i.e., there is no problem of transient states.
702 * And NMI unblocking only happens when the stack frame indicates
703 * that so should happen.
704 *
705 * Thus, the NMI entry stub for FRED is really straightforward and
706 * as simple as most exception handlers. As such, #DB is allowed
707 * during NMI handling.
708 */
DEFINE_FREDENTRY_NMI(exc_nmi)709 DEFINE_FREDENTRY_NMI(exc_nmi)
710 {
711 irqentry_state_t irq_state;
712
713 if (arch_cpu_is_offline(smp_processor_id())) {
714 if (microcode_nmi_handler_enabled())
715 microcode_offline_nmi_handler();
716 return;
717 }
718
719 /*
720 * Save CR2 for eventual restore to cover the case where the NMI
721 * hits the VMENTER/VMEXIT region where guest CR2 is life. This
722 * prevents guest state corruption in case that the NMI handler
723 * takes a page fault.
724 */
725 this_cpu_write(nmi_cr2, read_cr2());
726
727 irq_state = irqentry_nmi_enter(regs);
728
729 inc_irq_stat(__nmi_count);
730 default_do_nmi(regs);
731
732 irqentry_nmi_exit(regs, irq_state);
733
734 if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
735 write_cr2(this_cpu_read(nmi_cr2));
736 }
737 #endif
738
stop_nmi(void)739 void stop_nmi(void)
740 {
741 ignore_nmis++;
742 }
743
restart_nmi(void)744 void restart_nmi(void)
745 {
746 ignore_nmis--;
747 }
748
749 /* reset the back-to-back NMI logic */
local_touch_nmi(void)750 void local_touch_nmi(void)
751 {
752 __this_cpu_write(last_nmi_rip, 0);
753 }
754 EXPORT_SYMBOL_GPL(local_touch_nmi);
755