1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
4 *
5 * This file contains spurious interrupt handling.
6 */
7
8 #include <linux/jiffies.h>
9 #include <linux/irq.h>
10 #include <linux/module.h>
11 #include <linux/interrupt.h>
12 #include <linux/moduleparam.h>
13 #include <linux/timer.h>
14
15 #include "internals.h"
16
17 static int irqfixup __read_mostly;
18
19 #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
20 static void poll_spurious_irqs(struct timer_list *unused);
21 static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
22 int irq_poll_cpu;
23 static atomic_t irq_poll_active;
24
25 /*
26 * Recovery handler for misrouted interrupts.
27 */
try_one_irq(struct irq_desc * desc,bool force)28 static bool try_one_irq(struct irq_desc *desc, bool force)
29 {
30 struct irqaction *action;
31 bool ret = false;
32
33 guard(raw_spinlock)(&desc->lock);
34
35 /*
36 * PER_CPU, nested thread interrupts and interrupts explicitly
37 * marked polled are excluded from polling.
38 */
39 if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc) ||
40 irq_settings_is_polled(desc))
41 return false;
42
43 /*
44 * Do not poll disabled interrupts unless the spurious
45 * disabled poller asks explicitly.
46 */
47 if (irqd_irq_disabled(&desc->irq_data) && !force)
48 return false;
49
50 /*
51 * All handlers must agree on IRQF_SHARED, so we test just the
52 * first.
53 */
54 action = desc->action;
55 if (!action || !(action->flags & IRQF_SHARED) || (action->flags & __IRQF_TIMER))
56 return false;
57
58 /* Already running on another processor */
59 if (irqd_irq_inprogress(&desc->irq_data)) {
60 /*
61 * Already running: If it is shared get the other
62 * CPU to go looking for our mystery interrupt too
63 */
64 desc->istate |= IRQS_PENDING;
65 return false;
66 }
67
68 /* Mark it poll in progress */
69 desc->istate |= IRQS_POLL_INPROGRESS;
70 do {
71 if (handle_irq_event(desc) == IRQ_HANDLED)
72 ret = true;
73 /* Make sure that there is still a valid action */
74 action = desc->action;
75 } while ((desc->istate & IRQS_PENDING) && action);
76 desc->istate &= ~IRQS_POLL_INPROGRESS;
77 return ret;
78 }
79
misrouted_irq(int irq)80 static int misrouted_irq(int irq)
81 {
82 struct irq_desc *desc;
83 int i, ok = 0;
84
85 if (atomic_inc_return(&irq_poll_active) != 1)
86 goto out;
87
88 irq_poll_cpu = smp_processor_id();
89
90 for_each_irq_desc(i, desc) {
91 if (!i)
92 continue;
93
94 if (i == irq) /* Already tried */
95 continue;
96
97 if (try_one_irq(desc, false))
98 ok = 1;
99 }
100 out:
101 atomic_dec(&irq_poll_active);
102 /* So the caller can adjust the irq error counts */
103 return ok;
104 }
105
poll_spurious_irqs(struct timer_list * unused)106 static void poll_spurious_irqs(struct timer_list *unused)
107 {
108 struct irq_desc *desc;
109 int i;
110
111 if (atomic_inc_return(&irq_poll_active) != 1)
112 goto out;
113 irq_poll_cpu = smp_processor_id();
114
115 for_each_irq_desc(i, desc) {
116 unsigned int state;
117
118 if (!i)
119 continue;
120
121 /* Racy but it doesn't matter */
122 state = READ_ONCE(desc->istate);
123 if (!(state & IRQS_SPURIOUS_DISABLED))
124 continue;
125
126 local_irq_disable();
127 try_one_irq(desc, true);
128 local_irq_enable();
129 }
130 out:
131 atomic_dec(&irq_poll_active);
132 mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
133 }
134
bad_action_ret(irqreturn_t action_ret)135 static inline int bad_action_ret(irqreturn_t action_ret)
136 {
137 unsigned int r = action_ret;
138
139 if (likely(r <= (IRQ_HANDLED | IRQ_WAKE_THREAD)))
140 return 0;
141 return 1;
142 }
143
144 /*
145 * If 99,900 of the previous 100,000 interrupts have not been handled
146 * then assume that the IRQ is stuck in some manner. Drop a diagnostic
147 * and try to turn the IRQ off.
148 *
149 * (The other 100-of-100,000 interrupts may have been a correctly
150 * functioning device sharing an IRQ with the failing one)
151 */
__report_bad_irq(struct irq_desc * desc,irqreturn_t action_ret)152 static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
153 {
154 unsigned int irq = irq_desc_get_irq(desc);
155 struct irqaction *action;
156
157 if (bad_action_ret(action_ret))
158 pr_err("irq event %d: bogus return value %x\n", irq, action_ret);
159 else
160 pr_err("irq %d: nobody cared (try booting with the \"irqpoll\" option)\n", irq);
161 dump_stack();
162 pr_err("handlers:\n");
163
164 /*
165 * We need to take desc->lock here. note_interrupt() is called
166 * w/o desc->lock held, but IRQ_PROGRESS set. We might race
167 * with something else removing an action. It's ok to take
168 * desc->lock here. See synchronize_irq().
169 */
170 guard(raw_spinlock_irqsave)(&desc->lock);
171 for_each_action_of_desc(desc, action) {
172 pr_err("[<%p>] %ps", action->handler, action->handler);
173 if (action->thread_fn)
174 pr_cont(" threaded [<%p>] %ps", action->thread_fn, action->thread_fn);
175 pr_cont("\n");
176 }
177 }
178
report_bad_irq(struct irq_desc * desc,irqreturn_t action_ret)179 static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
180 {
181 static int count = 100;
182
183 if (count > 0) {
184 count--;
185 __report_bad_irq(desc, action_ret);
186 }
187 }
188
try_misrouted_irq(unsigned int irq,struct irq_desc * desc,irqreturn_t action_ret)189 static inline bool try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
190 irqreturn_t action_ret)
191 {
192 struct irqaction *action;
193
194 if (!irqfixup)
195 return false;
196
197 /* We didn't actually handle the IRQ - see if it was misrouted? */
198 if (action_ret == IRQ_NONE)
199 return true;
200
201 /*
202 * But for 'irqfixup == 2' we also do it for handled interrupts if
203 * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
204 * traditional PC timer interrupt.. Legacy)
205 */
206 if (irqfixup < 2)
207 return false;
208
209 if (!irq)
210 return true;
211
212 /*
213 * Since we don't get the descriptor lock, "action" can
214 * change under us.
215 */
216 action = READ_ONCE(desc->action);
217 return action && (action->flags & IRQF_IRQPOLL);
218 }
219
220 #define SPURIOUS_DEFERRED 0x80000000
221
note_interrupt(struct irq_desc * desc,irqreturn_t action_ret)222 void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
223 {
224 unsigned int irq;
225
226 if (desc->istate & IRQS_POLL_INPROGRESS || irq_settings_is_polled(desc))
227 return;
228
229 if (bad_action_ret(action_ret)) {
230 report_bad_irq(desc, action_ret);
231 return;
232 }
233
234 /*
235 * We cannot call note_interrupt from the threaded handler
236 * because we need to look at the compound of all handlers
237 * (primary and threaded). Aside of that in the threaded
238 * shared case we have no serialization against an incoming
239 * hardware interrupt while we are dealing with a threaded
240 * result.
241 *
242 * So in case a thread is woken, we just note the fact and
243 * defer the analysis to the next hardware interrupt.
244 *
245 * The threaded handlers store whether they successfully
246 * handled an interrupt and we check whether that number
247 * changed versus the last invocation.
248 *
249 * We could handle all interrupts with the delayed by one
250 * mechanism, but for the non forced threaded case we'd just
251 * add pointless overhead to the straight hardirq interrupts
252 * for the sake of a few lines less code.
253 */
254 if (action_ret & IRQ_WAKE_THREAD) {
255 /*
256 * There is a thread woken. Check whether one of the
257 * shared primary handlers returned IRQ_HANDLED. If
258 * not we defer the spurious detection to the next
259 * interrupt.
260 */
261 if (action_ret == IRQ_WAKE_THREAD) {
262 int handled;
263 /*
264 * We use bit 31 of thread_handled_last to
265 * denote the deferred spurious detection
266 * active. No locking necessary as
267 * thread_handled_last is only accessed here
268 * and we have the guarantee that hard
269 * interrupts are not reentrant.
270 */
271 if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) {
272 desc->threads_handled_last |= SPURIOUS_DEFERRED;
273 return;
274 }
275 /*
276 * Check whether one of the threaded handlers
277 * returned IRQ_HANDLED since the last
278 * interrupt happened.
279 *
280 * For simplicity we just set bit 31, as it is
281 * set in threads_handled_last as well. So we
282 * avoid extra masking. And we really do not
283 * care about the high bits of the handled
284 * count. We just care about the count being
285 * different than the one we saw before.
286 */
287 handled = atomic_read(&desc->threads_handled);
288 handled |= SPURIOUS_DEFERRED;
289 if (handled != desc->threads_handled_last) {
290 action_ret = IRQ_HANDLED;
291 /*
292 * Note: We keep the SPURIOUS_DEFERRED
293 * bit set. We are handling the
294 * previous invocation right now.
295 * Keep it for the current one, so the
296 * next hardware interrupt will
297 * account for it.
298 */
299 desc->threads_handled_last = handled;
300 } else {
301 /*
302 * None of the threaded handlers felt
303 * responsible for the last interrupt
304 *
305 * We keep the SPURIOUS_DEFERRED bit
306 * set in threads_handled_last as we
307 * need to account for the current
308 * interrupt as well.
309 */
310 action_ret = IRQ_NONE;
311 }
312 } else {
313 /*
314 * One of the primary handlers returned
315 * IRQ_HANDLED. So we don't care about the
316 * threaded handlers on the same line. Clear
317 * the deferred detection bit.
318 *
319 * In theory we could/should check whether the
320 * deferred bit is set and take the result of
321 * the previous run into account here as
322 * well. But it's really not worth the
323 * trouble. If every other interrupt is
324 * handled we never trigger the spurious
325 * detector. And if this is just the one out
326 * of 100k unhandled ones which is handled
327 * then we merily delay the spurious detection
328 * by one hard interrupt. Not a real problem.
329 */
330 desc->threads_handled_last &= ~SPURIOUS_DEFERRED;
331 }
332 }
333
334 if (unlikely(action_ret == IRQ_NONE)) {
335 /*
336 * If we are seeing only the odd spurious IRQ caused by
337 * bus asynchronicity then don't eventually trigger an error,
338 * otherwise the counter becomes a doomsday timer for otherwise
339 * working systems
340 */
341 if (time_after(jiffies, desc->last_unhandled + HZ/10))
342 desc->irqs_unhandled = 1;
343 else
344 desc->irqs_unhandled++;
345 desc->last_unhandled = jiffies;
346 }
347
348 irq = irq_desc_get_irq(desc);
349 if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
350 int ok = misrouted_irq(irq);
351 if (action_ret == IRQ_NONE)
352 desc->irqs_unhandled -= ok;
353 }
354
355 if (likely(!desc->irqs_unhandled))
356 return;
357
358 /* Now getting into unhandled irq detection */
359 desc->irq_count++;
360 if (likely(desc->irq_count < 100000))
361 return;
362
363 desc->irq_count = 0;
364 if (unlikely(desc->irqs_unhandled > 99900)) {
365 /*
366 * The interrupt is stuck
367 */
368 __report_bad_irq(desc, action_ret);
369 /*
370 * Now kill the IRQ
371 */
372 pr_emerg("Disabling IRQ #%d\n", irq);
373 desc->istate |= IRQS_SPURIOUS_DISABLED;
374 desc->depth++;
375 irq_disable(desc);
376
377 mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
378 }
379 desc->irqs_unhandled = 0;
380 }
381
382 bool noirqdebug __read_mostly;
383
noirqdebug_setup(char * str)384 int noirqdebug_setup(char *str)
385 {
386 noirqdebug = 1;
387 pr_info("IRQ lockup detection disabled\n");
388 return 1;
389 }
390 __setup("noirqdebug", noirqdebug_setup);
391 module_param(noirqdebug, bool, 0644);
392 MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
393
irqfixup_setup(char * str)394 static int __init irqfixup_setup(char *str)
395 {
396 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
397 pr_warn("irqfixup boot option not supported with PREEMPT_RT\n");
398 return 1;
399 }
400 irqfixup = 1;
401 pr_warn("Misrouted IRQ fixup support enabled.\n");
402 pr_warn("This may impact system performance.\n");
403 return 1;
404 }
405 __setup("irqfixup", irqfixup_setup);
406 module_param(irqfixup, int, 0644);
407
irqpoll_setup(char * str)408 static int __init irqpoll_setup(char *str)
409 {
410 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
411 pr_warn("irqpoll boot option not supported with PREEMPT_RT\n");
412 return 1;
413 }
414 irqfixup = 2;
415 pr_warn("Misrouted IRQ fixup and polling support enabled\n");
416 pr_warn("This may significantly impact system performance\n");
417 return 1;
418 }
419 __setup("irqpoll", irqpoll_setup);
420