powerpc/eeh: Fix missed PE#0 on P7IOC
authorGavin Shan <gwshan@linux.vnet.ibm.com>
Mon, 24 Nov 2014 22:27:00 +0000 (09:27 +1100)
committerMichael Ellerman <mpe@ellerman.id.au>
Fri, 23 Jan 2015 03:02:52 +0000 (14:02 +1100)
PE#0 should be regarded as valid for P7IOC, while it's invalid for
PHB3. The patch adds flag EEH_VALID_PE_ZERO to differentiate those
two cases. Without the patch, we possibly see frozen PE#0 state is
cleared without EEH recovery taken on P7IOC as following kernel logs
indicate:

[root@ltcfbl8eb ~]# dmesg
       :
pci 0000:00     : [PE# 000] Secondary bus 0 associated with PE#0
pci 0000:01     : [PE# 001] Secondary bus 1 associated with PE#1
pci 0001:00     : [PE# 000] Secondary bus 0 associated with PE#0
pci 0001:01     : [PE# 001] Secondary bus 1 associated with PE#1
pci 0002:00     : [PE# 000] Secondary bus 0 associated with PE#0
pci 0002:01     : [PE# 001] Secondary bus 1 associated with PE#1
pci 0003:00     : [PE# 000] Secondary bus 0 associated with PE#0
pci 0003:01     : [PE# 001] Secondary bus 1 associated with PE#1
pci 0003:20     : [PE# 002] Secondary bus 32..63 associated with PE#2
       :
EEH: Clear non-existing PHB#3-PE#0
EEH: PHB location: U78AE.001.WZS00M9-P1-002

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/eeh.h
arch/powerpc/kernel/eeh_pe.c
arch/powerpc/platforms/powernv/eeh-powernv.c

index 0652ebe117af66b8c0ed1ae300078abde74fb545..9c11d1ed6a368f64ae0a2eb62da7b94ee6ff357e 100644 (file)
@@ -38,8 +38,9 @@ struct device_node;
 #define EEH_FORCE_DISABLED     0x02    /* EEH disabled         */
 #define EEH_PROBE_MODE_DEV     0x04    /* From PCI device      */
 #define EEH_PROBE_MODE_DEVTREE 0x08    /* From device tree     */
-#define EEH_ENABLE_IO_FOR_LOG  0x10    /* Enable IO for log    */
-#define EEH_EARLY_DUMP_LOG     0x20    /* Dump log immediately */
+#define EEH_VALID_PE_ZERO      0x10    /* PE#0 is valid        */
+#define EEH_ENABLE_IO_FOR_LOG  0x20    /* Enable IO for log    */
+#define EEH_EARLY_DUMP_LOG     0x40    /* Dump log immediately */
 
 /*
  * Delay for PE reset, all in ms
index 5a63e2b0f65b616e3c6f5b066921e25d0f20d384..fa950fbc2d970b5670f49a0996d280ce95b1ba72 100644 (file)
@@ -239,10 +239,18 @@ static void *__eeh_pe_get(void *data, void *flag)
        if (pe->type & EEH_PE_PHB)
                return NULL;
 
-       /* We prefer PE address */
-       if (edev->pe_config_addr &&
-          (edev->pe_config_addr == pe->addr))
+       /*
+        * We prefer PE address. For most cases, we should
+        * have non-zero PE address
+        */
+       if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
+               if (edev->pe_config_addr == pe->addr)
+                       return pe;
+       } else {
+               if (edev->pe_config_addr &&
+                   (edev->pe_config_addr == pe->addr))
                return pe;
+       }
 
        /* Try BDF address */
        if (edev->config_addr &&
index 1d19e7917d7fc5e59cb2e4945c30a90be8e9e50a..e261869adc86d588ff620d7a6b4b5089db0fe553 100644 (file)
@@ -68,6 +68,17 @@ static int powernv_eeh_init(void)
 
                if (phb->model == PNV_PHB_MODEL_P7IOC)
                        eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+
+               /*
+                * PE#0 should be regarded as valid by EEH core
+                * if it's not the reserved one. Currently, we
+                * have the reserved PE#0 and PE#127 for PHB3
+                * and P7IOC separately. So we should regard
+                * PE#0 as valid for P7IOC.
+                */
+               if (phb->ioda.reserved_pe != 0)
+                       eeh_add_flag(EEH_VALID_PE_ZERO);
+
                break;
        }