1 /*
2  * eeh.c
3  * Copyright IBM Corporation 2001, 2005, 2006
4  * Copyright Dave Engebretsen & Todd Inglett 2001
5  * Copyright Linas Vepstas 2005, 2006
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
20  *
21  * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/sched.h>	/* for init_mm */
26 #include <linux/init.h>
27 #include <linux/list.h>
28 #include <linux/pci.h>
29 #include <linux/proc_fs.h>
30 #include <linux/rbtree.h>
31 #include <linux/seq_file.h>
32 #include <linux/spinlock.h>
33 #include <linux/export.h>
34 #include <linux/of.h>
35 
36 #include <linux/atomic.h>
37 #include <asm/eeh.h>
38 #include <asm/eeh_event.h>
39 #include <asm/io.h>
40 #include <asm/machdep.h>
41 #include <asm/ppc-pci.h>
42 #include <asm/rtas.h>
43 
44 
45 /** Overview:
46  *  EEH, or "Extended Error Handling" is a PCI bridge technology for
47  *  dealing with PCI bus errors that can't be dealt with within the
48  *  usual PCI framework, except by check-stopping the CPU.  Systems
49  *  that are designed for high-availability/reliability cannot afford
50  *  to crash due to a "mere" PCI error, thus the need for EEH.
51  *  An EEH-capable bridge operates by converting a detected error
52  *  into a "slot freeze", taking the PCI adapter off-line, making
53  *  the slot behave, from the OS'es point of view, as if the slot
54  *  were "empty": all reads return 0xff's and all writes are silently
55  *  ignored.  EEH slot isolation events can be triggered by parity
56  *  errors on the address or data busses (e.g. during posted writes),
57  *  which in turn might be caused by low voltage on the bus, dust,
58  *  vibration, humidity, radioactivity or plain-old failed hardware.
59  *
60  *  Note, however, that one of the leading causes of EEH slot
61  *  freeze events are buggy device drivers, buggy device microcode,
62  *  or buggy device hardware.  This is because any attempt by the
63  *  device to bus-master data to a memory address that is not
64  *  assigned to the device will trigger a slot freeze.   (The idea
65  *  is to prevent devices-gone-wild from corrupting system memory).
66  *  Buggy hardware/drivers will have a miserable time co-existing
67  *  with EEH.
68  *
69  *  Ideally, a PCI device driver, when suspecting that an isolation
70  *  event has occurred (e.g. by reading 0xff's), will then ask EEH
71  *  whether this is the case, and then take appropriate steps to
72  *  reset the PCI slot, the PCI device, and then resume operations.
73  *  However, until that day,  the checking is done here, with the
74  *  eeh_check_failure() routine embedded in the MMIO macros.  If
75  *  the slot is found to be isolated, an "EEH Event" is synthesized
76  *  and sent out for processing.
77  */
78 
79 /* If a device driver keeps reading an MMIO register in an interrupt
80  * handler after a slot isolation event, it might be broken.
81  * This sets the threshold for how many read attempts we allow
82  * before printing an error message.
83  */
84 #define EEH_MAX_FAILS	2100000
85 
86 /* Time to wait for a PCI slot to report status, in milliseconds */
87 #define PCI_BUS_RESET_WAIT_MSEC (60*1000)
88 
89 /* RTAS tokens */
90 static int ibm_set_eeh_option;
91 static int ibm_set_slot_reset;
92 static int ibm_read_slot_reset_state;
93 static int ibm_read_slot_reset_state2;
94 static int ibm_slot_error_detail;
95 static int ibm_get_config_addr_info;
96 static int ibm_get_config_addr_info2;
97 static int ibm_configure_bridge;
98 static int ibm_configure_pe;
99 
100 int eeh_subsystem_enabled;
101 EXPORT_SYMBOL(eeh_subsystem_enabled);
102 
103 /* Lock to avoid races due to multiple reports of an error */
104 static DEFINE_RAW_SPINLOCK(confirm_error_lock);
105 
106 /* Buffer for reporting slot-error-detail rtas calls. Its here
107  * in BSS, and not dynamically alloced, so that it ends up in
108  * RMO where RTAS can access it.
109  */
110 static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
111 static DEFINE_SPINLOCK(slot_errbuf_lock);
112 static int eeh_error_buf_size;
113 
114 /* Buffer for reporting pci register dumps. Its here in BSS, and
115  * not dynamically alloced, so that it ends up in RMO where RTAS
116  * can access it.
117  */
118 #define EEH_PCI_REGS_LOG_LEN 4096
119 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
120 
121 /* System monitoring statistics */
122 static unsigned long no_device;
123 static unsigned long no_dn;
124 static unsigned long no_cfg_addr;
125 static unsigned long ignored_check;
126 static unsigned long total_mmio_ffs;
127 static unsigned long false_positives;
128 static unsigned long slot_resets;
129 
130 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
131 
132 /* --------------------------------------------------------------- */
133 /* Below lies the EEH event infrastructure */
134 
rtas_slot_error_detail(struct pci_dn * pdn,int severity,char * driver_log,size_t loglen)135 static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
136                                    char *driver_log, size_t loglen)
137 {
138 	int config_addr;
139 	unsigned long flags;
140 	int rc;
141 
142 	/* Log the error with the rtas logger */
143 	spin_lock_irqsave(&slot_errbuf_lock, flags);
144 	memset(slot_errbuf, 0, eeh_error_buf_size);
145 
146 	/* Use PE configuration address, if present */
147 	config_addr = pdn->eeh_config_addr;
148 	if (pdn->eeh_pe_config_addr)
149 		config_addr = pdn->eeh_pe_config_addr;
150 
151 	rc = rtas_call(ibm_slot_error_detail,
152 	               8, 1, NULL, config_addr,
153 	               BUID_HI(pdn->phb->buid),
154 	               BUID_LO(pdn->phb->buid),
155 	               virt_to_phys(driver_log), loglen,
156 	               virt_to_phys(slot_errbuf),
157 	               eeh_error_buf_size,
158 	               severity);
159 
160 	if (rc == 0)
161 		log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
162 	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
163 }
164 
165 /**
166  * gather_pci_data - copy assorted PCI config space registers to buff
167  * @pdn: device to report data for
168  * @buf: point to buffer in which to log
169  * @len: amount of room in buffer
170  *
171  * This routine captures assorted PCI configuration space data,
172  * and puts them into a buffer for RTAS error logging.
173  */
gather_pci_data(struct pci_dn * pdn,char * buf,size_t len)174 static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
175 {
176 	struct pci_dev *dev = pdn->pcidev;
177 	u32 cfg;
178 	int cap, i;
179 	int n = 0;
180 
181 	n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
182 	printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
183 
184 	rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
185 	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
186 	printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
187 
188 	rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
189 	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
190 	printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
191 
192 	if (!dev) {
193 		printk(KERN_WARNING "EEH: no PCI device for this of node\n");
194 		return n;
195 	}
196 
197 	/* Gather bridge-specific registers */
198 	if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
199 		rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
200 		n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
201 		printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
202 
203 		rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
204 		n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
205 		printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
206 	}
207 
208 	/* Dump out the PCI-X command and status regs */
209 	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
210 	if (cap) {
211 		rtas_read_config(pdn, cap, 4, &cfg);
212 		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
213 		printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
214 
215 		rtas_read_config(pdn, cap+4, 4, &cfg);
216 		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
217 		printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
218 	}
219 
220 	/* If PCI-E capable, dump PCI-E cap 10, and the AER */
221 	cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
222 	if (cap) {
223 		n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
224 		printk(KERN_WARNING
225 		       "EEH: PCI-E capabilities and status follow:\n");
226 
227 		for (i=0; i<=8; i++) {
228 			rtas_read_config(pdn, cap+4*i, 4, &cfg);
229 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
230 			printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
231 		}
232 
233 		cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
234 		if (cap) {
235 			n += scnprintf(buf+n, len-n, "pci-e AER:\n");
236 			printk(KERN_WARNING
237 			       "EEH: PCI-E AER capability register set follows:\n");
238 
239 			for (i=0; i<14; i++) {
240 				rtas_read_config(pdn, cap+4*i, 4, &cfg);
241 				n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
242 				printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
243 			}
244 		}
245 	}
246 
247 	/* Gather status on devices under the bridge */
248 	if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
249 		struct device_node *dn;
250 
251 		for_each_child_of_node(pdn->node, dn) {
252 			pdn = PCI_DN(dn);
253 			if (pdn)
254 				n += gather_pci_data(pdn, buf+n, len-n);
255 		}
256 	}
257 
258 	return n;
259 }
260 
eeh_slot_error_detail(struct pci_dn * pdn,int severity)261 void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
262 {
263 	size_t loglen = 0;
264 	pci_regs_buf[0] = 0;
265 
266 	rtas_pci_enable(pdn, EEH_THAW_MMIO);
267 	rtas_configure_bridge(pdn);
268 	eeh_restore_bars(pdn);
269 	loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
270 
271 	rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
272 }
273 
274 /**
275  * read_slot_reset_state - Read the reset state of a device node's slot
276  * @dn: device node to read
277  * @rets: array to return results in
278  */
read_slot_reset_state(struct pci_dn * pdn,int rets[])279 static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
280 {
281 	int token, outputs;
282 	int config_addr;
283 
284 	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
285 		token = ibm_read_slot_reset_state2;
286 		outputs = 4;
287 	} else {
288 		token = ibm_read_slot_reset_state;
289 		rets[2] = 0; /* fake PE Unavailable info */
290 		outputs = 3;
291 	}
292 
293 	/* Use PE configuration address, if present */
294 	config_addr = pdn->eeh_config_addr;
295 	if (pdn->eeh_pe_config_addr)
296 		config_addr = pdn->eeh_pe_config_addr;
297 
298 	return rtas_call(token, 3, outputs, rets, config_addr,
299 			 BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
300 }
301 
302 /**
303  * eeh_wait_for_slot_status - returns error status of slot
304  * @pdn pci device node
305  * @max_wait_msecs maximum number to millisecs to wait
306  *
307  * Return negative value if a permanent error, else return
308  * Partition Endpoint (PE) status value.
309  *
310  * If @max_wait_msecs is positive, then this routine will
311  * sleep until a valid status can be obtained, or until
312  * the max allowed wait time is exceeded, in which case
313  * a -2 is returned.
314  */
315 int
eeh_wait_for_slot_status(struct pci_dn * pdn,int max_wait_msecs)316 eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
317 {
318 	int rc;
319 	int rets[3];
320 	int mwait;
321 
322 	while (1) {
323 		rc = read_slot_reset_state(pdn, rets);
324 		if (rc) return rc;
325 		if (rets[1] == 0) return -1;  /* EEH is not supported */
326 
327 		if (rets[0] != 5) return rets[0]; /* return actual status */
328 
329 		if (rets[2] == 0) return -1; /* permanently unavailable */
330 
331 		if (max_wait_msecs <= 0) break;
332 
333 		mwait = rets[2];
334 		if (mwait <= 0) {
335 			printk (KERN_WARNING
336 			        "EEH: Firmware returned bad wait value=%d\n", mwait);
337 			mwait = 1000;
338 		} else if (mwait > 300*1000) {
339 			printk (KERN_WARNING
340 			        "EEH: Firmware is taking too long, time=%d\n", mwait);
341 			mwait = 300*1000;
342 		}
343 		max_wait_msecs -= mwait;
344 		msleep (mwait);
345 	}
346 
347 	printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
348 	return -2;
349 }
350 
351 /**
352  * eeh_token_to_phys - convert EEH address token to phys address
353  * @token i/o token, should be address in the form 0xA....
354  */
eeh_token_to_phys(unsigned long token)355 static inline unsigned long eeh_token_to_phys(unsigned long token)
356 {
357 	pte_t *ptep;
358 	unsigned long pa;
359 
360 	ptep = find_linux_pte(init_mm.pgd, token);
361 	if (!ptep)
362 		return token;
363 	pa = pte_pfn(*ptep) << PAGE_SHIFT;
364 
365 	return pa | (token & (PAGE_SIZE-1));
366 }
367 
368 /**
369  * Return the "partitionable endpoint" (pe) under which this device lies
370  */
find_device_pe(struct device_node * dn)371 struct device_node * find_device_pe(struct device_node *dn)
372 {
373 	while ((dn->parent) && PCI_DN(dn->parent) &&
374 	      (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
375 		dn = dn->parent;
376 	}
377 	return dn;
378 }
379 
380 /** Mark all devices that are children of this device as failed.
381  *  Mark the device driver too, so that it can see the failure
382  *  immediately; this is critical, since some drivers poll
383  *  status registers in interrupts ... If a driver is polling,
384  *  and the slot is frozen, then the driver can deadlock in
385  *  an interrupt context, which is bad.
386  */
387 
__eeh_mark_slot(struct device_node * parent,int mode_flag)388 static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
389 {
390 	struct device_node *dn;
391 
392 	for_each_child_of_node(parent, dn) {
393 		if (PCI_DN(dn)) {
394 			/* Mark the pci device driver too */
395 			struct pci_dev *dev = PCI_DN(dn)->pcidev;
396 
397 			PCI_DN(dn)->eeh_mode |= mode_flag;
398 
399 			if (dev && dev->driver)
400 				dev->error_state = pci_channel_io_frozen;
401 
402 			__eeh_mark_slot(dn, mode_flag);
403 		}
404 	}
405 }
406 
eeh_mark_slot(struct device_node * dn,int mode_flag)407 void eeh_mark_slot (struct device_node *dn, int mode_flag)
408 {
409 	struct pci_dev *dev;
410 	dn = find_device_pe (dn);
411 
412 	/* Back up one, since config addrs might be shared */
413 	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
414 		dn = dn->parent;
415 
416 	PCI_DN(dn)->eeh_mode |= mode_flag;
417 
418 	/* Mark the pci device too */
419 	dev = PCI_DN(dn)->pcidev;
420 	if (dev)
421 		dev->error_state = pci_channel_io_frozen;
422 
423 	__eeh_mark_slot(dn, mode_flag);
424 }
425 
__eeh_clear_slot(struct device_node * parent,int mode_flag)426 static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
427 {
428 	struct device_node *dn;
429 
430 	for_each_child_of_node(parent, dn) {
431 		if (PCI_DN(dn)) {
432 			PCI_DN(dn)->eeh_mode &= ~mode_flag;
433 			PCI_DN(dn)->eeh_check_count = 0;
434 			__eeh_clear_slot(dn, mode_flag);
435 		}
436 	}
437 }
438 
eeh_clear_slot(struct device_node * dn,int mode_flag)439 void eeh_clear_slot (struct device_node *dn, int mode_flag)
440 {
441 	unsigned long flags;
442 	raw_spin_lock_irqsave(&confirm_error_lock, flags);
443 
444 	dn = find_device_pe (dn);
445 
446 	/* Back up one, since config addrs might be shared */
447 	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
448 		dn = dn->parent;
449 
450 	PCI_DN(dn)->eeh_mode &= ~mode_flag;
451 	PCI_DN(dn)->eeh_check_count = 0;
452 	__eeh_clear_slot(dn, mode_flag);
453 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
454 }
455 
__eeh_set_pe_freset(struct device_node * parent,unsigned int * freset)456 void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
457 {
458 	struct device_node *dn;
459 
460 	for_each_child_of_node(parent, dn) {
461 		if (PCI_DN(dn)) {
462 
463 			struct pci_dev *dev = PCI_DN(dn)->pcidev;
464 
465 			if (dev && dev->driver)
466 				*freset |= dev->needs_freset;
467 
468 			__eeh_set_pe_freset(dn, freset);
469 		}
470 	}
471 }
472 
eeh_set_pe_freset(struct device_node * dn,unsigned int * freset)473 void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
474 {
475 	struct pci_dev *dev;
476 	dn = find_device_pe(dn);
477 
478 	/* Back up one, since config addrs might be shared */
479 	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
480 		dn = dn->parent;
481 
482 	dev = PCI_DN(dn)->pcidev;
483 	if (dev)
484 		*freset |= dev->needs_freset;
485 
486 	__eeh_set_pe_freset(dn, freset);
487 }
488 
489 /**
490  * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
491  * @dn device node
492  * @dev pci device, if known
493  *
494  * Check for an EEH failure for the given device node.  Call this
495  * routine if the result of a read was all 0xff's and you want to
496  * find out if this is due to an EEH slot freeze.  This routine
497  * will query firmware for the EEH status.
498  *
499  * Returns 0 if there has not been an EEH error; otherwise returns
500  * a non-zero value and queues up a slot isolation event notification.
501  *
502  * It is safe to call this routine in an interrupt context.
503  */
eeh_dn_check_failure(struct device_node * dn,struct pci_dev * dev)504 int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
505 {
506 	int ret;
507 	int rets[3];
508 	unsigned long flags;
509 	struct pci_dn *pdn;
510 	int rc = 0;
511 	const char *location;
512 
513 	total_mmio_ffs++;
514 
515 	if (!eeh_subsystem_enabled)
516 		return 0;
517 
518 	if (!dn) {
519 		no_dn++;
520 		return 0;
521 	}
522 	dn = find_device_pe(dn);
523 	pdn = PCI_DN(dn);
524 
525 	/* Access to IO BARs might get this far and still not want checking. */
526 	if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
527 	    pdn->eeh_mode & EEH_MODE_NOCHECK) {
528 		ignored_check++;
529 		pr_debug("EEH: Ignored check (%x) for %s %s\n",
530 			 pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
531 		return 0;
532 	}
533 
534 	if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
535 		no_cfg_addr++;
536 		return 0;
537 	}
538 
539 	/* If we already have a pending isolation event for this
540 	 * slot, we know it's bad already, we don't need to check.
541 	 * Do this checking under a lock; as multiple PCI devices
542 	 * in one slot might report errors simultaneously, and we
543 	 * only want one error recovery routine running.
544 	 */
545 	raw_spin_lock_irqsave(&confirm_error_lock, flags);
546 	rc = 1;
547 	if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
548 		pdn->eeh_check_count ++;
549 		if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
550 			location = of_get_property(dn, "ibm,loc-code", NULL);
551 			printk (KERN_ERR "EEH: %d reads ignored for recovering device at "
552 				"location=%s driver=%s pci addr=%s\n",
553 				pdn->eeh_check_count, location,
554 				eeh_driver_name(dev), eeh_pci_name(dev));
555 			printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n",
556 				eeh_driver_name(dev));
557 			dump_stack();
558 		}
559 		goto dn_unlock;
560 	}
561 
562 	/*
563 	 * Now test for an EEH failure.  This is VERY expensive.
564 	 * Note that the eeh_config_addr may be a parent device
565 	 * in the case of a device behind a bridge, or it may be
566 	 * function zero of a multi-function device.
567 	 * In any case they must share a common PHB.
568 	 */
569 	ret = read_slot_reset_state(pdn, rets);
570 
571 	/* If the call to firmware failed, punt */
572 	if (ret != 0) {
573 		printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
574 		       ret, dn->full_name);
575 		false_positives++;
576 		pdn->eeh_false_positives ++;
577 		rc = 0;
578 		goto dn_unlock;
579 	}
580 
581 	/* Note that config-io to empty slots may fail;
582 	 * they are empty when they don't have children. */
583 	if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
584 		false_positives++;
585 		pdn->eeh_false_positives ++;
586 		rc = 0;
587 		goto dn_unlock;
588 	}
589 
590 	/* If EEH is not supported on this device, punt. */
591 	if (rets[1] != 1) {
592 		printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
593 		       ret, dn->full_name);
594 		false_positives++;
595 		pdn->eeh_false_positives ++;
596 		rc = 0;
597 		goto dn_unlock;
598 	}
599 
600 	/* If not the kind of error we know about, punt. */
601 	if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
602 		false_positives++;
603 		pdn->eeh_false_positives ++;
604 		rc = 0;
605 		goto dn_unlock;
606 	}
607 
608 	slot_resets++;
609 
610 	/* Avoid repeated reports of this failure, including problems
611 	 * with other functions on this device, and functions under
612 	 * bridges. */
613 	eeh_mark_slot (dn, EEH_MODE_ISOLATED);
614 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
615 
616 	eeh_send_failure_event (dn, dev);
617 
618 	/* Most EEH events are due to device driver bugs.  Having
619 	 * a stack trace will help the device-driver authors figure
620 	 * out what happened.  So print that out. */
621 	dump_stack();
622 	return 1;
623 
624 dn_unlock:
625 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
626 	return rc;
627 }
628 
629 EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
630 
631 /**
632  * eeh_check_failure - check if all 1's data is due to EEH slot freeze
633  * @token i/o token, should be address in the form 0xA....
634  * @val value, should be all 1's (XXX why do we need this arg??)
635  *
636  * Check for an EEH failure at the given token address.  Call this
637  * routine if the result of a read was all 0xff's and you want to
638  * find out if this is due to an EEH slot freeze event.  This routine
639  * will query firmware for the EEH status.
640  *
641  * Note this routine is safe to call in an interrupt context.
642  */
eeh_check_failure(const volatile void __iomem * token,unsigned long val)643 unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
644 {
645 	unsigned long addr;
646 	struct pci_dev *dev;
647 	struct device_node *dn;
648 
649 	/* Finding the phys addr + pci device; this is pretty quick. */
650 	addr = eeh_token_to_phys((unsigned long __force) token);
651 	dev = pci_get_device_by_addr(addr);
652 	if (!dev) {
653 		no_device++;
654 		return val;
655 	}
656 
657 	dn = pci_device_to_OF_node(dev);
658 	eeh_dn_check_failure (dn, dev);
659 
660 	pci_dev_put(dev);
661 	return val;
662 }
663 
664 EXPORT_SYMBOL(eeh_check_failure);
665 
666 /* ------------------------------------------------------------- */
667 /* The code below deals with error recovery */
668 
669 /**
670  * rtas_pci_enable - enable MMIO or DMA transfers for this slot
671  * @pdn pci device node
672  */
673 
674 int
rtas_pci_enable(struct pci_dn * pdn,int function)675 rtas_pci_enable(struct pci_dn *pdn, int function)
676 {
677 	int config_addr;
678 	int rc;
679 
680 	/* Use PE configuration address, if present */
681 	config_addr = pdn->eeh_config_addr;
682 	if (pdn->eeh_pe_config_addr)
683 		config_addr = pdn->eeh_pe_config_addr;
684 
685 	rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
686 	               config_addr,
687 	               BUID_HI(pdn->phb->buid),
688 	               BUID_LO(pdn->phb->buid),
689 		            function);
690 
691 	if (rc)
692 		printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
693 		        function, rc, pdn->node->full_name);
694 
695 	rc = eeh_wait_for_slot_status (pdn, PCI_BUS_RESET_WAIT_MSEC);
696 	if ((rc == 4) && (function == EEH_THAW_MMIO))
697 		return 0;
698 
699 	return rc;
700 }
701 
702 /**
703  * rtas_pci_slot_reset - raises/lowers the pci #RST line
704  * @pdn pci device node
705  * @state: 1/0 to raise/lower the #RST
706  *
707  * Clear the EEH-frozen condition on a slot.  This routine
708  * asserts the PCI #RST line if the 'state' argument is '1',
709  * and drops the #RST line if 'state is '0'.  This routine is
710  * safe to call in an interrupt context.
711  *
712  */
713 
714 static void
rtas_pci_slot_reset(struct pci_dn * pdn,int state)715 rtas_pci_slot_reset(struct pci_dn *pdn, int state)
716 {
717 	int config_addr;
718 	int rc;
719 
720 	BUG_ON (pdn==NULL);
721 
722 	if (!pdn->phb) {
723 		printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
724 		        pdn->node->full_name);
725 		return;
726 	}
727 
728 	/* Use PE configuration address, if present */
729 	config_addr = pdn->eeh_config_addr;
730 	if (pdn->eeh_pe_config_addr)
731 		config_addr = pdn->eeh_pe_config_addr;
732 
733 	rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
734 	               config_addr,
735 	               BUID_HI(pdn->phb->buid),
736 	               BUID_LO(pdn->phb->buid),
737 	               state);
738 
739 	/* Fundamental-reset not supported on this PE, try hot-reset */
740 	if (rc == -8 && state == 3) {
741 		rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
742 			       config_addr,
743 			       BUID_HI(pdn->phb->buid),
744 			       BUID_LO(pdn->phb->buid), 1);
745 		if (rc)
746 			printk(KERN_WARNING
747 				"EEH: Unable to reset the failed slot,"
748 				" #RST=%d dn=%s\n",
749 				rc, pdn->node->full_name);
750 	}
751 }
752 
753 /**
754  * pcibios_set_pcie_slot_reset - Set PCI-E reset state
755  * @dev:	pci device struct
756  * @state:	reset state to enter
757  *
758  * Return value:
759  * 	0 if success
760  **/
pcibios_set_pcie_reset_state(struct pci_dev * dev,enum pcie_reset_state state)761 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
762 {
763 	struct device_node *dn = pci_device_to_OF_node(dev);
764 	struct pci_dn *pdn = PCI_DN(dn);
765 
766 	switch (state) {
767 	case pcie_deassert_reset:
768 		rtas_pci_slot_reset(pdn, 0);
769 		break;
770 	case pcie_hot_reset:
771 		rtas_pci_slot_reset(pdn, 1);
772 		break;
773 	case pcie_warm_reset:
774 		rtas_pci_slot_reset(pdn, 3);
775 		break;
776 	default:
777 		return -EINVAL;
778 	};
779 
780 	return 0;
781 }
782 
783 /**
784  * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
785  * @pdn: pci device node to be reset.
786  */
787 
__rtas_set_slot_reset(struct pci_dn * pdn)788 static void __rtas_set_slot_reset(struct pci_dn *pdn)
789 {
790 	unsigned int freset = 0;
791 
792 	/* Determine type of EEH reset required for
793 	 * Partitionable Endpoint, a hot-reset (1)
794 	 * or a fundamental reset (3).
795 	 * A fundamental reset required by any device under
796 	 * Partitionable Endpoint trumps hot-reset.
797   	 */
798 	eeh_set_pe_freset(pdn->node, &freset);
799 
800 	if (freset)
801 		rtas_pci_slot_reset(pdn, 3);
802 	else
803 		rtas_pci_slot_reset(pdn, 1);
804 
805 	/* The PCI bus requires that the reset be held high for at least
806 	 * a 100 milliseconds. We wait a bit longer 'just in case'.  */
807 
808 #define PCI_BUS_RST_HOLD_TIME_MSEC 250
809 	msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
810 
811 	/* We might get hit with another EEH freeze as soon as the
812 	 * pci slot reset line is dropped. Make sure we don't miss
813 	 * these, and clear the flag now. */
814 	eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED);
815 
816 	rtas_pci_slot_reset (pdn, 0);
817 
818 	/* After a PCI slot has been reset, the PCI Express spec requires
819 	 * a 1.5 second idle time for the bus to stabilize, before starting
820 	 * up traffic. */
821 #define PCI_BUS_SETTLE_TIME_MSEC 1800
822 	msleep (PCI_BUS_SETTLE_TIME_MSEC);
823 }
824 
rtas_set_slot_reset(struct pci_dn * pdn)825 int rtas_set_slot_reset(struct pci_dn *pdn)
826 {
827 	int i, rc;
828 
829 	/* Take three shots at resetting the bus */
830 	for (i=0; i<3; i++) {
831 		__rtas_set_slot_reset(pdn);
832 
833 		rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
834 		if (rc == 0)
835 			return 0;
836 
837 		if (rc < 0) {
838 			printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
839 			       pdn->node->full_name);
840 			return -1;
841 		}
842 		printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
843 		       i+1, pdn->node->full_name, rc);
844 	}
845 
846 	return -1;
847 }
848 
849 /* ------------------------------------------------------- */
850 /** Save and restore of PCI BARs
851  *
852  * Although firmware will set up BARs during boot, it doesn't
853  * set up device BAR's after a device reset, although it will,
854  * if requested, set up bridge configuration. Thus, we need to
855  * configure the PCI devices ourselves.
856  */
857 
858 /**
859  * __restore_bars - Restore the Base Address Registers
860  * @pdn: pci device node
861  *
862  * Loads the PCI configuration space base address registers,
863  * the expansion ROM base address, the latency timer, and etc.
864  * from the saved values in the device node.
865  */
__restore_bars(struct pci_dn * pdn)866 static inline void __restore_bars (struct pci_dn *pdn)
867 {
868 	int i;
869 	u32 cmd;
870 
871 	if (NULL==pdn->phb) return;
872 	for (i=4; i<10; i++) {
873 		rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
874 	}
875 
876 	/* 12 == Expansion ROM Address */
877 	rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
878 
879 #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
880 #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
881 
882 	rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1,
883 	            SAVED_BYTE(PCI_CACHE_LINE_SIZE));
884 
885 	rtas_write_config (pdn, PCI_LATENCY_TIMER, 1,
886 	            SAVED_BYTE(PCI_LATENCY_TIMER));
887 
888 	/* max latency, min grant, interrupt pin and line */
889 	rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
890 
891 	/* Restore PERR & SERR bits, some devices require it,
892 	   don't touch the other command bits */
893 	rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
894 	if (pdn->config_space[1] & PCI_COMMAND_PARITY)
895 		cmd |= PCI_COMMAND_PARITY;
896 	else
897 		cmd &= ~PCI_COMMAND_PARITY;
898 	if (pdn->config_space[1] & PCI_COMMAND_SERR)
899 		cmd |= PCI_COMMAND_SERR;
900 	else
901 		cmd &= ~PCI_COMMAND_SERR;
902 	rtas_write_config(pdn, PCI_COMMAND, 4, cmd);
903 }
904 
905 /**
906  * eeh_restore_bars - restore the PCI config space info
907  *
908  * This routine performs a recursive walk to the children
909  * of this device as well.
910  */
eeh_restore_bars(struct pci_dn * pdn)911 void eeh_restore_bars(struct pci_dn *pdn)
912 {
913 	struct device_node *dn;
914 	if (!pdn)
915 		return;
916 
917 	if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
918 		__restore_bars (pdn);
919 
920 	for_each_child_of_node(pdn->node, dn)
921 		eeh_restore_bars (PCI_DN(dn));
922 }
923 
924 /**
925  * eeh_save_bars - save device bars
926  *
927  * Save the values of the device bars. Unlike the restore
928  * routine, this routine is *not* recursive. This is because
929  * PCI devices are added individually; but, for the restore,
930  * an entire slot is reset at a time.
931  */
eeh_save_bars(struct pci_dn * pdn)932 static void eeh_save_bars(struct pci_dn *pdn)
933 {
934 	int i;
935 
936 	if (!pdn )
937 		return;
938 
939 	for (i = 0; i < 16; i++)
940 		rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
941 }
942 
943 void
rtas_configure_bridge(struct pci_dn * pdn)944 rtas_configure_bridge(struct pci_dn *pdn)
945 {
946 	int config_addr;
947 	int rc;
948 	int token;
949 
950 	/* Use PE configuration address, if present */
951 	config_addr = pdn->eeh_config_addr;
952 	if (pdn->eeh_pe_config_addr)
953 		config_addr = pdn->eeh_pe_config_addr;
954 
955 	/* Use new configure-pe function, if supported */
956 	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
957 		token = ibm_configure_pe;
958 	else
959 		token = ibm_configure_bridge;
960 
961 	rc = rtas_call(token, 3, 1, NULL,
962 	               config_addr,
963 	               BUID_HI(pdn->phb->buid),
964 	               BUID_LO(pdn->phb->buid));
965 	if (rc) {
966 		printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
967 		        rc, pdn->node->full_name);
968 	}
969 }
970 
971 /* ------------------------------------------------------------- */
972 /* The code below deals with enabling EEH for devices during  the
973  * early boot sequence.  EEH must be enabled before any PCI probing
974  * can be done.
975  */
976 
977 #define EEH_ENABLE 1
978 
979 struct eeh_early_enable_info {
980 	unsigned int buid_hi;
981 	unsigned int buid_lo;
982 };
983 
get_pe_addr(int config_addr,struct eeh_early_enable_info * info)984 static int get_pe_addr (int config_addr,
985                         struct eeh_early_enable_info *info)
986 {
987 	unsigned int rets[3];
988 	int ret;
989 
990 	/* Use latest config-addr token on power6 */
991 	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
992 		/* Make sure we have a PE in hand */
993 		ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets,
994 			config_addr, info->buid_hi, info->buid_lo, 1);
995 		if (ret || (rets[0]==0))
996 			return 0;
997 
998 		ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets,
999 			config_addr, info->buid_hi, info->buid_lo, 0);
1000 		if (ret)
1001 			return 0;
1002 		return rets[0];
1003 	}
1004 
1005 	/* Use older config-addr token on power5 */
1006 	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
1007 		ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets,
1008 			config_addr, info->buid_hi, info->buid_lo, 0);
1009 		if (ret)
1010 			return 0;
1011 		return rets[0];
1012 	}
1013 	return 0;
1014 }
1015 
1016 /* Enable eeh for the given device node. */
early_enable_eeh(struct device_node * dn,void * data)1017 static void *early_enable_eeh(struct device_node *dn, void *data)
1018 {
1019 	unsigned int rets[3];
1020 	struct eeh_early_enable_info *info = data;
1021 	int ret;
1022 	const u32 *class_code = of_get_property(dn, "class-code", NULL);
1023 	const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
1024 	const u32 *device_id = of_get_property(dn, "device-id", NULL);
1025 	const u32 *regs;
1026 	int enable;
1027 	struct pci_dn *pdn = PCI_DN(dn);
1028 
1029 	pdn->class_code = 0;
1030 	pdn->eeh_mode = 0;
1031 	pdn->eeh_check_count = 0;
1032 	pdn->eeh_freeze_count = 0;
1033 	pdn->eeh_false_positives = 0;
1034 
1035 	if (!of_device_is_available(dn))
1036 		return NULL;
1037 
1038 	/* Ignore bad nodes. */
1039 	if (!class_code || !vendor_id || !device_id)
1040 		return NULL;
1041 
1042 	/* There is nothing to check on PCI to ISA bridges */
1043 	if (dn->type && !strcmp(dn->type, "isa")) {
1044 		pdn->eeh_mode |= EEH_MODE_NOCHECK;
1045 		return NULL;
1046 	}
1047 	pdn->class_code = *class_code;
1048 
1049 	/* Ok... see if this device supports EEH.  Some do, some don't,
1050 	 * and the only way to find out is to check each and every one. */
1051 	regs = of_get_property(dn, "reg", NULL);
1052 	if (regs) {
1053 		/* First register entry is addr (00BBSS00)  */
1054 		/* Try to enable eeh */
1055 		ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
1056 		                regs[0], info->buid_hi, info->buid_lo,
1057 		                EEH_ENABLE);
1058 
1059 		enable = 0;
1060 		if (ret == 0) {
1061 			pdn->eeh_config_addr = regs[0];
1062 
1063 			/* If the newer, better, ibm,get-config-addr-info is supported,
1064 			 * then use that instead. */
1065 			pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info);
1066 
1067 			/* Some older systems (Power4) allow the
1068 			 * ibm,set-eeh-option call to succeed even on nodes
1069 			 * where EEH is not supported. Verify support
1070 			 * explicitly. */
1071 			ret = read_slot_reset_state(pdn, rets);
1072 			if ((ret == 0) && (rets[1] == 1))
1073 				enable = 1;
1074 		}
1075 
1076 		if (enable) {
1077 			eeh_subsystem_enabled = 1;
1078 			pdn->eeh_mode |= EEH_MODE_SUPPORTED;
1079 
1080 			pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
1081 				 dn->full_name, pdn->eeh_config_addr,
1082 				 pdn->eeh_pe_config_addr);
1083 		} else {
1084 
1085 			/* This device doesn't support EEH, but it may have an
1086 			 * EEH parent, in which case we mark it as supported. */
1087 			if (dn->parent && PCI_DN(dn->parent)
1088 			    && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
1089 				/* Parent supports EEH. */
1090 				pdn->eeh_mode |= EEH_MODE_SUPPORTED;
1091 				pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
1092 				return NULL;
1093 			}
1094 		}
1095 	} else {
1096 		printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
1097 		       dn->full_name);
1098 	}
1099 
1100 	eeh_save_bars(pdn);
1101 	return NULL;
1102 }
1103 
1104 /*
1105  * Initialize EEH by trying to enable it for all of the adapters in the system.
1106  * As a side effect we can determine here if eeh is supported at all.
1107  * Note that we leave EEH on so failed config cycles won't cause a machine
1108  * check.  If a user turns off EEH for a particular adapter they are really
1109  * telling Linux to ignore errors.  Some hardware (e.g. POWER5) won't
1110  * grant access to a slot if EEH isn't enabled, and so we always enable
1111  * EEH for all slots/all devices.
1112  *
1113  * The eeh-force-off option disables EEH checking globally, for all slots.
1114  * Even if force-off is set, the EEH hardware is still enabled, so that
1115  * newer systems can boot.
1116  */
eeh_init(void)1117 void __init eeh_init(void)
1118 {
1119 	struct device_node *phb, *np;
1120 	struct eeh_early_enable_info info;
1121 
1122 	raw_spin_lock_init(&confirm_error_lock);
1123 	spin_lock_init(&slot_errbuf_lock);
1124 
1125 	np = of_find_node_by_path("/rtas");
1126 	if (np == NULL)
1127 		return;
1128 
1129 	ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
1130 	ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
1131 	ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
1132 	ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
1133 	ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
1134 	ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
1135 	ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
1136 	ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
1137 	ibm_configure_pe = rtas_token("ibm,configure-pe");
1138 
1139 	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
1140 		return;
1141 
1142 	eeh_error_buf_size = rtas_token("rtas-error-log-max");
1143 	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
1144 		eeh_error_buf_size = 1024;
1145 	}
1146 	if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
1147 		printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
1148 		      "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
1149 		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
1150 	}
1151 
1152 	/* Enable EEH for all adapters.  Note that eeh requires buid's */
1153 	for (phb = of_find_node_by_name(NULL, "pci"); phb;
1154 	     phb = of_find_node_by_name(phb, "pci")) {
1155 		unsigned long buid;
1156 
1157 		buid = get_phb_buid(phb);
1158 		if (buid == 0 || PCI_DN(phb) == NULL)
1159 			continue;
1160 
1161 		info.buid_lo = BUID_LO(buid);
1162 		info.buid_hi = BUID_HI(buid);
1163 		traverse_pci_devices(phb, early_enable_eeh, &info);
1164 	}
1165 
1166 	if (eeh_subsystem_enabled)
1167 		printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
1168 	else
1169 		printk(KERN_WARNING "EEH: No capable adapters found\n");
1170 }
1171 
1172 /**
1173  * eeh_add_device_early - enable EEH for the indicated device_node
1174  * @dn: device node for which to set up EEH
1175  *
1176  * This routine must be used to perform EEH initialization for PCI
1177  * devices that were added after system boot (e.g. hotplug, dlpar).
1178  * This routine must be called before any i/o is performed to the
1179  * adapter (inluding any config-space i/o).
1180  * Whether this actually enables EEH or not for this device depends
1181  * on the CEC architecture, type of the device, on earlier boot
1182  * command-line arguments & etc.
1183  */
eeh_add_device_early(struct device_node * dn)1184 static void eeh_add_device_early(struct device_node *dn)
1185 {
1186 	struct pci_controller *phb;
1187 	struct eeh_early_enable_info info;
1188 
1189 	if (!dn || !PCI_DN(dn))
1190 		return;
1191 	phb = PCI_DN(dn)->phb;
1192 
1193 	/* USB Bus children of PCI devices will not have BUID's */
1194 	if (NULL == phb || 0 == phb->buid)
1195 		return;
1196 
1197 	info.buid_hi = BUID_HI(phb->buid);
1198 	info.buid_lo = BUID_LO(phb->buid);
1199 	early_enable_eeh(dn, &info);
1200 }
1201 
eeh_add_device_tree_early(struct device_node * dn)1202 void eeh_add_device_tree_early(struct device_node *dn)
1203 {
1204 	struct device_node *sib;
1205 
1206 	for_each_child_of_node(dn, sib)
1207 		eeh_add_device_tree_early(sib);
1208 	eeh_add_device_early(dn);
1209 }
1210 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
1211 
1212 /**
1213  * eeh_add_device_late - perform EEH initialization for the indicated pci device
1214  * @dev: pci device for which to set up EEH
1215  *
1216  * This routine must be used to complete EEH initialization for PCI
1217  * devices that were added after system boot (e.g. hotplug, dlpar).
1218  */
eeh_add_device_late(struct pci_dev * dev)1219 static void eeh_add_device_late(struct pci_dev *dev)
1220 {
1221 	struct device_node *dn;
1222 	struct pci_dn *pdn;
1223 
1224 	if (!dev || !eeh_subsystem_enabled)
1225 		return;
1226 
1227 	pr_debug("EEH: Adding device %s\n", pci_name(dev));
1228 
1229 	dn = pci_device_to_OF_node(dev);
1230 	pdn = PCI_DN(dn);
1231 	if (pdn->pcidev == dev) {
1232 		pr_debug("EEH: Already referenced !\n");
1233 		return;
1234 	}
1235 	WARN_ON(pdn->pcidev);
1236 
1237 	pci_dev_get (dev);
1238 	pdn->pcidev = dev;
1239 
1240 	pci_addr_cache_insert_device(dev);
1241 	eeh_sysfs_add_device(dev);
1242 }
1243 
eeh_add_device_tree_late(struct pci_bus * bus)1244 void eeh_add_device_tree_late(struct pci_bus *bus)
1245 {
1246 	struct pci_dev *dev;
1247 
1248 	list_for_each_entry(dev, &bus->devices, bus_list) {
1249  		eeh_add_device_late(dev);
1250  		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1251  			struct pci_bus *subbus = dev->subordinate;
1252  			if (subbus)
1253  				eeh_add_device_tree_late(subbus);
1254  		}
1255 	}
1256 }
1257 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
1258 
1259 /**
1260  * eeh_remove_device - undo EEH setup for the indicated pci device
1261  * @dev: pci device to be removed
1262  *
1263  * This routine should be called when a device is removed from
1264  * a running system (e.g. by hotplug or dlpar).  It unregisters
1265  * the PCI device from the EEH subsystem.  I/O errors affecting
1266  * this device will no longer be detected after this call; thus,
1267  * i/o errors affecting this slot may leave this device unusable.
1268  */
eeh_remove_device(struct pci_dev * dev)1269 static void eeh_remove_device(struct pci_dev *dev)
1270 {
1271 	struct device_node *dn;
1272 	if (!dev || !eeh_subsystem_enabled)
1273 		return;
1274 
1275 	/* Unregister the device with the EEH/PCI address search system */
1276 	pr_debug("EEH: Removing device %s\n", pci_name(dev));
1277 
1278 	dn = pci_device_to_OF_node(dev);
1279 	if (PCI_DN(dn)->pcidev == NULL) {
1280 		pr_debug("EEH: Not referenced !\n");
1281 		return;
1282 	}
1283 	PCI_DN(dn)->pcidev = NULL;
1284 	pci_dev_put (dev);
1285 
1286 	pci_addr_cache_remove_device(dev);
1287 	eeh_sysfs_remove_device(dev);
1288 }
1289 
eeh_remove_bus_device(struct pci_dev * dev)1290 void eeh_remove_bus_device(struct pci_dev *dev)
1291 {
1292 	struct pci_bus *bus = dev->subordinate;
1293 	struct pci_dev *child, *tmp;
1294 
1295 	eeh_remove_device(dev);
1296 
1297 	if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1298 		list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
1299 			 eeh_remove_bus_device(child);
1300 	}
1301 }
1302 EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
1303 
proc_eeh_show(struct seq_file * m,void * v)1304 static int proc_eeh_show(struct seq_file *m, void *v)
1305 {
1306 	if (0 == eeh_subsystem_enabled) {
1307 		seq_printf(m, "EEH Subsystem is globally disabled\n");
1308 		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
1309 	} else {
1310 		seq_printf(m, "EEH Subsystem is enabled\n");
1311 		seq_printf(m,
1312 				"no device=%ld\n"
1313 				"no device node=%ld\n"
1314 				"no config address=%ld\n"
1315 				"check not wanted=%ld\n"
1316 				"eeh_total_mmio_ffs=%ld\n"
1317 				"eeh_false_positives=%ld\n"
1318 				"eeh_slot_resets=%ld\n",
1319 				no_device, no_dn, no_cfg_addr,
1320 				ignored_check, total_mmio_ffs,
1321 				false_positives,
1322 				slot_resets);
1323 	}
1324 
1325 	return 0;
1326 }
1327 
proc_eeh_open(struct inode * inode,struct file * file)1328 static int proc_eeh_open(struct inode *inode, struct file *file)
1329 {
1330 	return single_open(file, proc_eeh_show, NULL);
1331 }
1332 
1333 static const struct file_operations proc_eeh_operations = {
1334 	.open      = proc_eeh_open,
1335 	.read      = seq_read,
1336 	.llseek    = seq_lseek,
1337 	.release   = single_release,
1338 };
1339 
eeh_init_proc(void)1340 static int __init eeh_init_proc(void)
1341 {
1342 	if (machine_is(pseries))
1343 		proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
1344 	return 0;
1345 }
1346 __initcall(eeh_init_proc);
1347