xref: /linux/kernel/irq/spurious.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
4  *
5  * This file contains spurious interrupt handling.
6  */
7 
8 #include <linux/jiffies.h>
9 #include <linux/irq.h>
10 #include <linux/module.h>
11 #include <linux/interrupt.h>
12 #include <linux/moduleparam.h>
13 #include <linux/timer.h>
14 
15 #include "internals.h"
16 
17 static int irqfixup __read_mostly;
18 
19 #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
20 static void poll_spurious_irqs(struct timer_list *unused);
21 static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
22 int irq_poll_cpu;
23 static atomic_t irq_poll_active;
24 
25 /*
26  * Recovery handler for misrouted interrupts.
27  */
try_one_irq(struct irq_desc * desc,bool force)28 static bool try_one_irq(struct irq_desc *desc, bool force)
29 {
30 	struct irqaction *action;
31 	bool ret = false;
32 
33 	guard(raw_spinlock)(&desc->lock);
34 
35 	/*
36 	 * PER_CPU, nested thread interrupts and interrupts explicitly
37 	 * marked polled are excluded from polling.
38 	 */
39 	if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc) ||
40 	    irq_settings_is_polled(desc))
41 		return false;
42 
43 	/*
44 	 * Do not poll disabled interrupts unless the spurious
45 	 * disabled poller asks explicitly.
46 	 */
47 	if (irqd_irq_disabled(&desc->irq_data) && !force)
48 		return false;
49 
50 	/*
51 	 * All handlers must agree on IRQF_SHARED, so we test just the
52 	 * first.
53 	 */
54 	action = desc->action;
55 	if (!action || !(action->flags & IRQF_SHARED) || (action->flags & __IRQF_TIMER))
56 		return false;
57 
58 	/* Already running on another processor */
59 	if (irqd_irq_inprogress(&desc->irq_data)) {
60 		/*
61 		 * Already running: If it is shared get the other
62 		 * CPU to go looking for our mystery interrupt too
63 		 */
64 		desc->istate |= IRQS_PENDING;
65 		return false;
66 	}
67 
68 	/* Mark it poll in progress */
69 	desc->istate |= IRQS_POLL_INPROGRESS;
70 	do {
71 		if (handle_irq_event(desc) == IRQ_HANDLED)
72 			ret = true;
73 		/* Make sure that there is still a valid action */
74 		action = desc->action;
75 	} while ((desc->istate & IRQS_PENDING) && action);
76 	desc->istate &= ~IRQS_POLL_INPROGRESS;
77 	return ret;
78 }
79 
misrouted_irq(int irq)80 static int misrouted_irq(int irq)
81 {
82 	struct irq_desc *desc;
83 	int i, ok = 0;
84 
85 	if (atomic_inc_return(&irq_poll_active) != 1)
86 		goto out;
87 
88 	irq_poll_cpu = smp_processor_id();
89 
90 	for_each_irq_desc(i, desc) {
91 		if (!i)
92 			 continue;
93 
94 		if (i == irq)	/* Already tried */
95 			continue;
96 
97 		if (try_one_irq(desc, false))
98 			ok = 1;
99 	}
100 out:
101 	atomic_dec(&irq_poll_active);
102 	/* So the caller can adjust the irq error counts */
103 	return ok;
104 }
105 
poll_spurious_irqs(struct timer_list * unused)106 static void poll_spurious_irqs(struct timer_list *unused)
107 {
108 	struct irq_desc *desc;
109 	int i;
110 
111 	if (atomic_inc_return(&irq_poll_active) != 1)
112 		goto out;
113 	irq_poll_cpu = smp_processor_id();
114 
115 	for_each_irq_desc(i, desc) {
116 		unsigned int state;
117 
118 		if (!i)
119 			 continue;
120 
121 		/* Racy but it doesn't matter */
122 		state = READ_ONCE(desc->istate);
123 		if (!(state & IRQS_SPURIOUS_DISABLED))
124 			continue;
125 
126 		local_irq_disable();
127 		try_one_irq(desc, true);
128 		local_irq_enable();
129 	}
130 out:
131 	atomic_dec(&irq_poll_active);
132 	mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
133 }
134 
bad_action_ret(irqreturn_t action_ret)135 static inline int bad_action_ret(irqreturn_t action_ret)
136 {
137 	unsigned int r = action_ret;
138 
139 	if (likely(r <= (IRQ_HANDLED | IRQ_WAKE_THREAD)))
140 		return 0;
141 	return 1;
142 }
143 
144 /*
145  * If 99,900 of the previous 100,000 interrupts have not been handled
146  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
147  * and try to turn the IRQ off.
148  *
149  * (The other 100-of-100,000 interrupts may have been a correctly
150  *  functioning device sharing an IRQ with the failing one)
151  */
__report_bad_irq(struct irq_desc * desc,irqreturn_t action_ret)152 static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
153 {
154 	unsigned int irq = irq_desc_get_irq(desc);
155 	struct irqaction *action;
156 
157 	if (bad_action_ret(action_ret))
158 		pr_err("irq event %d: bogus return value %x\n", irq, action_ret);
159 	else
160 		pr_err("irq %d: nobody cared (try booting with the \"irqpoll\" option)\n", irq);
161 	dump_stack();
162 	pr_err("handlers:\n");
163 
164 	/*
165 	 * We need to take desc->lock here. note_interrupt() is called
166 	 * w/o desc->lock held, but IRQ_PROGRESS set. We might race
167 	 * with something else removing an action. It's ok to take
168 	 * desc->lock here. See synchronize_irq().
169 	 */
170 	guard(raw_spinlock_irqsave)(&desc->lock);
171 	for_each_action_of_desc(desc, action) {
172 		pr_err("[<%p>] %ps", action->handler, action->handler);
173 		if (action->thread_fn)
174 			pr_cont(" threaded [<%p>] %ps", action->thread_fn, action->thread_fn);
175 		pr_cont("\n");
176 	}
177 }
178 
report_bad_irq(struct irq_desc * desc,irqreturn_t action_ret)179 static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
180 {
181 	static int count = 100;
182 
183 	if (count > 0) {
184 		count--;
185 		__report_bad_irq(desc, action_ret);
186 	}
187 }
188 
try_misrouted_irq(unsigned int irq,struct irq_desc * desc,irqreturn_t action_ret)189 static inline bool try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
190 				     irqreturn_t action_ret)
191 {
192 	struct irqaction *action;
193 
194 	if (!irqfixup)
195 		return false;
196 
197 	/* We didn't actually handle the IRQ - see if it was misrouted? */
198 	if (action_ret == IRQ_NONE)
199 		return true;
200 
201 	/*
202 	 * But for 'irqfixup == 2' we also do it for handled interrupts if
203 	 * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
204 	 * traditional PC timer interrupt.. Legacy)
205 	 */
206 	if (irqfixup < 2)
207 		return false;
208 
209 	if (!irq)
210 		return true;
211 
212 	/*
213 	 * Since we don't get the descriptor lock, "action" can
214 	 * change under us.
215 	 */
216 	action = READ_ONCE(desc->action);
217 	return action && (action->flags & IRQF_IRQPOLL);
218 }
219 
220 #define SPURIOUS_DEFERRED	0x80000000
221 
note_interrupt(struct irq_desc * desc,irqreturn_t action_ret)222 void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
223 {
224 	unsigned int irq;
225 
226 	if (desc->istate & IRQS_POLL_INPROGRESS || irq_settings_is_polled(desc))
227 		return;
228 
229 	if (bad_action_ret(action_ret)) {
230 		report_bad_irq(desc, action_ret);
231 		return;
232 	}
233 
234 	/*
235 	 * We cannot call note_interrupt from the threaded handler
236 	 * because we need to look at the compound of all handlers
237 	 * (primary and threaded). Aside of that in the threaded
238 	 * shared case we have no serialization against an incoming
239 	 * hardware interrupt while we are dealing with a threaded
240 	 * result.
241 	 *
242 	 * So in case a thread is woken, we just note the fact and
243 	 * defer the analysis to the next hardware interrupt.
244 	 *
245 	 * The threaded handlers store whether they successfully
246 	 * handled an interrupt and we check whether that number
247 	 * changed versus the last invocation.
248 	 *
249 	 * We could handle all interrupts with the delayed by one
250 	 * mechanism, but for the non forced threaded case we'd just
251 	 * add pointless overhead to the straight hardirq interrupts
252 	 * for the sake of a few lines less code.
253 	 */
254 	if (action_ret & IRQ_WAKE_THREAD) {
255 		/*
256 		 * There is a thread woken. Check whether one of the
257 		 * shared primary handlers returned IRQ_HANDLED. If
258 		 * not we defer the spurious detection to the next
259 		 * interrupt.
260 		 */
261 		if (action_ret == IRQ_WAKE_THREAD) {
262 			int handled;
263 			/*
264 			 * We use bit 31 of thread_handled_last to
265 			 * denote the deferred spurious detection
266 			 * active. No locking necessary as
267 			 * thread_handled_last is only accessed here
268 			 * and we have the guarantee that hard
269 			 * interrupts are not reentrant.
270 			 */
271 			if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) {
272 				desc->threads_handled_last |= SPURIOUS_DEFERRED;
273 				return;
274 			}
275 			/*
276 			 * Check whether one of the threaded handlers
277 			 * returned IRQ_HANDLED since the last
278 			 * interrupt happened.
279 			 *
280 			 * For simplicity we just set bit 31, as it is
281 			 * set in threads_handled_last as well. So we
282 			 * avoid extra masking. And we really do not
283 			 * care about the high bits of the handled
284 			 * count. We just care about the count being
285 			 * different than the one we saw before.
286 			 */
287 			handled = atomic_read(&desc->threads_handled);
288 			handled |= SPURIOUS_DEFERRED;
289 			if (handled != desc->threads_handled_last) {
290 				action_ret = IRQ_HANDLED;
291 				/*
292 				 * Note: We keep the SPURIOUS_DEFERRED
293 				 * bit set. We are handling the
294 				 * previous invocation right now.
295 				 * Keep it for the current one, so the
296 				 * next hardware interrupt will
297 				 * account for it.
298 				 */
299 				desc->threads_handled_last = handled;
300 			} else {
301 				/*
302 				 * None of the threaded handlers felt
303 				 * responsible for the last interrupt
304 				 *
305 				 * We keep the SPURIOUS_DEFERRED bit
306 				 * set in threads_handled_last as we
307 				 * need to account for the current
308 				 * interrupt as well.
309 				 */
310 				action_ret = IRQ_NONE;
311 			}
312 		} else {
313 			/*
314 			 * One of the primary handlers returned
315 			 * IRQ_HANDLED. So we don't care about the
316 			 * threaded handlers on the same line. Clear
317 			 * the deferred detection bit.
318 			 *
319 			 * In theory we could/should check whether the
320 			 * deferred bit is set and take the result of
321 			 * the previous run into account here as
322 			 * well. But it's really not worth the
323 			 * trouble. If every other interrupt is
324 			 * handled we never trigger the spurious
325 			 * detector. And if this is just the one out
326 			 * of 100k unhandled ones which is handled
327 			 * then we merily delay the spurious detection
328 			 * by one hard interrupt. Not a real problem.
329 			 */
330 			desc->threads_handled_last &= ~SPURIOUS_DEFERRED;
331 		}
332 	}
333 
334 	if (unlikely(action_ret == IRQ_NONE)) {
335 		/*
336 		 * If we are seeing only the odd spurious IRQ caused by
337 		 * bus asynchronicity then don't eventually trigger an error,
338 		 * otherwise the counter becomes a doomsday timer for otherwise
339 		 * working systems
340 		 */
341 		if (time_after(jiffies, desc->last_unhandled + HZ/10))
342 			desc->irqs_unhandled = 1;
343 		else
344 			desc->irqs_unhandled++;
345 		desc->last_unhandled = jiffies;
346 	}
347 
348 	irq = irq_desc_get_irq(desc);
349 	if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
350 		int ok = misrouted_irq(irq);
351 		if (action_ret == IRQ_NONE)
352 			desc->irqs_unhandled -= ok;
353 	}
354 
355 	if (likely(!desc->irqs_unhandled))
356 		return;
357 
358 	/* Now getting into unhandled irq detection */
359 	desc->irq_count++;
360 	if (likely(desc->irq_count < 100000))
361 		return;
362 
363 	desc->irq_count = 0;
364 	if (unlikely(desc->irqs_unhandled > 99900)) {
365 		/*
366 		 * The interrupt is stuck
367 		 */
368 		__report_bad_irq(desc, action_ret);
369 		/*
370 		 * Now kill the IRQ
371 		 */
372 		pr_emerg("Disabling IRQ #%d\n", irq);
373 		desc->istate |= IRQS_SPURIOUS_DISABLED;
374 		desc->depth++;
375 		irq_disable(desc);
376 
377 		mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
378 	}
379 	desc->irqs_unhandled = 0;
380 }
381 
382 bool noirqdebug __read_mostly;
383 
noirqdebug_setup(char * str)384 int noirqdebug_setup(char *str)
385 {
386 	noirqdebug = 1;
387 	pr_info("IRQ lockup detection disabled\n");
388 	return 1;
389 }
390 __setup("noirqdebug", noirqdebug_setup);
391 module_param(noirqdebug, bool, 0644);
392 MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
393 
irqfixup_setup(char * str)394 static int __init irqfixup_setup(char *str)
395 {
396 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
397 		pr_warn("irqfixup boot option not supported with PREEMPT_RT\n");
398 		return 1;
399 	}
400 	irqfixup = 1;
401 	pr_warn("Misrouted IRQ fixup support enabled.\n");
402 	pr_warn("This may impact system performance.\n");
403 	return 1;
404 }
405 __setup("irqfixup", irqfixup_setup);
406 module_param(irqfixup, int, 0644);
407 
irqpoll_setup(char * str)408 static int __init irqpoll_setup(char *str)
409 {
410 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
411 		pr_warn("irqpoll boot option not supported with PREEMPT_RT\n");
412 		return 1;
413 	}
414 	irqfixup = 2;
415 	pr_warn("Misrouted IRQ fixup and polling support enabled\n");
416 	pr_warn("This may significantly impact system performance\n");
417 	return 1;
418 }
419 __setup("irqpoll", irqpoll_setup);
420