Merge remote-tracking branch 'pci/pci/gavin-window-alignment' into next
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>
Mon, 17 Sep 2012 06:07:43 +0000 (16:07 +1000)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Mon, 17 Sep 2012 06:07:43 +0000 (16:07 +1000)
Merge Gavin patches from the PCI tree as subsequent powerpc
patches are going to depend on them

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
95 files changed:
arch/powerpc/Kconfig
arch/powerpc/boot/Makefile
arch/powerpc/include/asm/abs_addr.h [deleted file]
arch/powerpc/include/asm/debug.h
arch/powerpc/include/asm/eeh.h
arch/powerpc/include/asm/eeh_event.h
arch/powerpc/include/asm/exception-64e.h
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/hw_breakpoint.h
arch/powerpc/include/asm/kprobes.h
arch/powerpc/include/asm/kvm_book3s_asm.h
arch/powerpc/include/asm/machdep.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/pci-bridge.h
arch/powerpc/include/asm/ppc-pci.h
arch/powerpc/include/asm/probes.h [new file with mode: 0644]
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/setup.h
arch/powerpc/include/asm/thread_info.h
arch/powerpc/include/asm/uprobes.h [new file with mode: 0644]
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/dma-swiotlb.c
arch/powerpc/kernel/dma.c
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/fadump.c
arch/powerpc/kernel/hw_breakpoint.c
arch/powerpc/kernel/ibmebus.c
arch/powerpc/kernel/machine_kexec.c
arch/powerpc/kernel/paca.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/ptrace.c
arch/powerpc/kernel/rtas_flash.c
arch/powerpc/kernel/rtas_pci.c
arch/powerpc/kernel/signal.c
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/uprobes.c [new file with mode: 0644]
arch/powerpc/kernel/vdso.c
arch/powerpc/kernel/vio.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/mm/fault.c
arch/powerpc/mm/hash_native_64.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/init_64.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/pgtable_64.c
arch/powerpc/mm/stab.c
arch/powerpc/mm/subpage-prot.c
arch/powerpc/mm/tlb_low_64e.S
arch/powerpc/oprofile/op_model_power4.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/platforms/cell/beat.c
arch/powerpc/platforms/cell/beat.h
arch/powerpc/platforms/pasemi/iommu.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci-p5ioc2.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/ps3/setup.c
arch/powerpc/platforms/pseries/Makefile
arch/powerpc/platforms/pseries/eeh.c
arch/powerpc/platforms/pseries/eeh_cache.c
arch/powerpc/platforms/pseries/eeh_dev.c
arch/powerpc/platforms/pseries/eeh_driver.c
arch/powerpc/platforms/pseries/eeh_event.c
arch/powerpc/platforms/pseries/eeh_pe.c [new file with mode: 0644]
arch/powerpc/platforms/pseries/eeh_pseries.c
arch/powerpc/platforms/pseries/eeh_sysfs.c
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/msi.c
arch/powerpc/platforms/pseries/pci.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/sysdev/dart_iommu.c
arch/powerpc/xmon/xmon.c
drivers/crypto/nx/nx.c
drivers/infiniband/hw/ehca/ehca_cq.c
drivers/infiniband/hw/ehca/ehca_eq.c
drivers/infiniband/hw/ehca/ehca_mrmw.c
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ehca/ehca_tools.h
drivers/infiniband/hw/ehca/hcp_if.c
drivers/infiniband/hw/ehca/ipz_pt_fn.c
drivers/macintosh/smu.c
drivers/net/ethernet/ibm/ehea/ehea.h
drivers/net/ethernet/ibm/ehea/ehea_phyp.c
drivers/net/ethernet/ibm/ehea/ehea_qmr.c
drivers/scsi/ipr.c
drivers/tty/hvc/hvc_console.c
drivers/tty/hvc/hvc_vio.c
drivers/video/ps3fb.c

index 352f416269ce245c515e25e0cc5cbcf5a446d2a6..98e513b6270968b4f69736ce42f2e63522899ce1 100644 (file)
@@ -239,6 +239,9 @@ config PPC_OF_PLATFORM_PCI
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
        def_bool y
 
+config ARCH_SUPPORTS_UPROBES
+       def_bool y
+
 config PPC_ADV_DEBUG_REGS
        bool
        depends on 40x || BOOKE
index b7d833382be4889e09e7b6e6e62402afb9dd2056..6a15c968d21453230ab469b5921fea28b790dcc5 100644 (file)
@@ -107,6 +107,7 @@ src-boot := $(addprefix $(obj)/, $(src-boot))
 obj-boot := $(addsuffix .o, $(basename $(src-boot)))
 obj-wlib := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-wlib))))
 obj-plat := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-plat))))
+obj-plat: $(libfdt)
 
 quiet_cmd_copy_zlib = COPY    $@
       cmd_copy_zlib = sed "s@__used@@;s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
diff --git a/arch/powerpc/include/asm/abs_addr.h b/arch/powerpc/include/asm/abs_addr.h
deleted file mode 100644 (file)
index 9d92ba0..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-#ifndef _ASM_POWERPC_ABS_ADDR_H
-#define _ASM_POWERPC_ABS_ADDR_H
-#ifdef __KERNEL__
-
-
-/*
- * c 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/memblock.h>
-
-#include <asm/types.h>
-#include <asm/page.h>
-#include <asm/prom.h>
-
-struct mschunks_map {
-        unsigned long num_chunks;
-        unsigned long chunk_size;
-        unsigned long chunk_shift;
-        unsigned long chunk_mask;
-        u32 *mapping;
-};
-
-extern struct mschunks_map mschunks_map;
-
-/* Chunks are 256 KB */
-#define MSCHUNKS_CHUNK_SHIFT   (18)
-#define MSCHUNKS_CHUNK_SIZE    (1UL << MSCHUNKS_CHUNK_SHIFT)
-#define MSCHUNKS_OFFSET_MASK   (MSCHUNKS_CHUNK_SIZE - 1)
-
-static inline unsigned long chunk_to_addr(unsigned long chunk)
-{
-       return chunk << MSCHUNKS_CHUNK_SHIFT;
-}
-
-static inline unsigned long addr_to_chunk(unsigned long addr)
-{
-       return addr >> MSCHUNKS_CHUNK_SHIFT;
-}
-
-static inline unsigned long phys_to_abs(unsigned long pa)
-{
-       return pa;
-}
-
-/* Convenience macros */
-#define virt_to_abs(va) phys_to_abs(__pa(va))
-#define abs_to_virt(aa) __va(aa)
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_ABS_ADDR_H */
index 716d2f089eb61966e18f20af66e893cb0543cb49..32de2577bb6d6630ff77e58254df6d1218f0230d 100644 (file)
@@ -44,7 +44,7 @@ static inline int debugger_dabr_match(struct pt_regs *regs) { return 0; }
 static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 #endif
 
-extern int set_dabr(unsigned long dabr);
+extern int set_dabr(unsigned long dabr, unsigned long dabrx);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
                         unsigned long error_code, int signal_code, int brkpt);
index d60f99814ffb70ea92c9151211004f4b2cec7802..58c5ee61e70013dd05cb640362fb5aae08e4a0fa 100644 (file)
@@ -31,6 +31,45 @@ struct device_node;
 
 #ifdef CONFIG_EEH
 
+/*
+ * The struct is used to trace PE related EEH functionality.
+ * In theory, there will have one instance of the struct to
+ * be created against particular PE. In nature, PEs corelate
+ * to each other. the struct has to reflect that hierarchy in
+ * order to easily pick up those affected PEs when one particular
+ * PE has EEH errors.
+ *
+ * Also, one particular PE might be composed of PCI device, PCI
+ * bus and its subordinate components. The struct also need ship
+ * the information. Further more, one particular PE is only meaingful
+ * in the corresponding PHB. Therefore, the root PEs should be created
+ * against existing PHBs in on-to-one fashion.
+ */
+#define EEH_PE_PHB     1       /* PHB PE    */
+#define EEH_PE_DEVICE  2       /* Device PE */
+#define EEH_PE_BUS     3       /* Bus PE    */
+
+#define EEH_PE_ISOLATED                (1 << 0)        /* Isolated PE          */
+#define EEH_PE_RECOVERING      (1 << 1)        /* Recovering PE        */
+
+struct eeh_pe {
+       int type;                       /* PE type: PHB/Bus/Device      */
+       int state;                      /* PE EEH dependent mode        */
+       int config_addr;                /* Traditional PCI address      */
+       int addr;                       /* PE configuration address     */
+       struct pci_controller *phb;     /* Associated PHB               */
+       int check_count;                /* Times of ignored error       */
+       int freeze_count;               /* Times of froze up            */
+       int false_positives;            /* Times of reported #ff's      */
+       struct eeh_pe *parent;          /* Parent PE                    */
+       struct list_head child_list;    /* Link PE to the child list    */
+       struct list_head edevs;         /* Link list of EEH devices     */
+       struct list_head child;         /* Child PEs                    */
+};
+
+#define eeh_pe_for_each_dev(pe, edev) \
+               list_for_each_entry(edev, &pe->edevs, list)
+
 /*
  * The struct is used to trace EEH state for the associated
  * PCI device node or PCI device. In future, it might
@@ -38,21 +77,16 @@ struct device_node;
  * another tree except the currently existing tree of PCI
  * buses and PCI devices
  */
-#define EEH_MODE_SUPPORTED     (1<<0)  /* EEH supported on the device  */
-#define EEH_MODE_NOCHECK       (1<<1)  /* EEH check should be skipped  */
-#define EEH_MODE_ISOLATED      (1<<2)  /* The device has been isolated */
-#define EEH_MODE_RECOVERING    (1<<3)  /* Recovering the device        */
-#define EEH_MODE_IRQ_DISABLED  (1<<4)  /* Interrupt disabled           */
+#define EEH_DEV_IRQ_DISABLED   (1<<0)  /* Interrupt disabled           */
 
 struct eeh_dev {
        int mode;                       /* EEH mode                     */
        int class_code;                 /* Class code of the device     */
        int config_addr;                /* Config address               */
        int pe_config_addr;             /* PE config address            */
-       int check_count;                /* Times of ignored error       */
-       int freeze_count;               /* Times of froze up            */
-       int false_positives;            /* Times of reported #ff's      */
        u32 config_space[16];           /* Saved PCI config space       */
+       struct eeh_pe *pe;              /* Associated PE                */
+       struct list_head list;          /* Form link list in the PE     */
        struct pci_controller *phb;     /* Associated PHB               */
        struct device_node *dn;         /* Associated device node       */
        struct pci_dev *pdev;           /* Associated PCI device        */
@@ -95,19 +129,51 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
 struct eeh_ops {
        char *name;
        int (*init)(void);
-       int (*set_option)(struct device_node *dn, int option);
-       int (*get_pe_addr)(struct device_node *dn);
-       int (*get_state)(struct device_node *dn, int *state);
-       int (*reset)(struct device_node *dn, int option);
-       int (*wait_state)(struct device_node *dn, int max_wait);
-       int (*get_log)(struct device_node *dn, int severity, char *drv_log, unsigned long len);
-       int (*configure_bridge)(struct device_node *dn);
+       void* (*of_probe)(struct device_node *dn, void *flag);
+       void* (*dev_probe)(struct pci_dev *dev, void *flag);
+       int (*set_option)(struct eeh_pe *pe, int option);
+       int (*get_pe_addr)(struct eeh_pe *pe);
+       int (*get_state)(struct eeh_pe *pe, int *state);
+       int (*reset)(struct eeh_pe *pe, int option);
+       int (*wait_state)(struct eeh_pe *pe, int max_wait);
+       int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
+       int (*configure_bridge)(struct eeh_pe *pe);
        int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
        int (*write_config)(struct device_node *dn, int where, int size, u32 val);
 };
 
 extern struct eeh_ops *eeh_ops;
 extern int eeh_subsystem_enabled;
+extern struct mutex eeh_mutex;
+extern int eeh_probe_mode;
+
+#define EEH_PROBE_MODE_DEV     (1<<0)  /* From PCI device      */
+#define EEH_PROBE_MODE_DEVTREE (1<<1)  /* From device tree     */
+
+static inline void eeh_probe_mode_set(int flag)
+{
+       eeh_probe_mode = flag;
+}
+
+static inline int eeh_probe_mode_devtree(void)
+{
+       return (eeh_probe_mode == EEH_PROBE_MODE_DEVTREE);
+}
+
+static inline int eeh_probe_mode_dev(void)
+{
+       return (eeh_probe_mode == EEH_PROBE_MODE_DEV);
+}
+
+static inline void eeh_lock(void)
+{
+       mutex_lock(&eeh_mutex);
+}
+
+static inline void eeh_unlock(void)
+{
+       mutex_unlock(&eeh_mutex);
+}
 
 /*
  * Max number of EEH freezes allowed before we consider the device
@@ -115,19 +181,23 @@ extern int eeh_subsystem_enabled;
  */
 #define EEH_MAX_ALLOWED_FREEZES 5
 
+typedef void *(*eeh_traverse_func)(void *data, void *flag);
+int __devinit eeh_phb_pe_create(struct pci_controller *phb);
+int eeh_add_to_parent_pe(struct eeh_dev *edev);
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
+void *eeh_pe_dev_traverse(struct eeh_pe *root,
+               eeh_traverse_func fn, void *flag);
+void eeh_pe_restore_bars(struct eeh_pe *pe);
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
+
 void * __devinit eeh_dev_init(struct device_node *dn, void *data);
 void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb);
-void __init eeh_dev_phb_init(void);
-void __init eeh_init(void);
-#ifdef CONFIG_PPC_PSERIES
-int __init eeh_pseries_init(void);
-#endif
 int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
 unsigned long eeh_check_failure(const volatile void __iomem *token,
                                unsigned long val);
-int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev);
-void __init pci_addr_cache_build(void);
+int eeh_dev_check_failure(struct eeh_dev *edev);
+void __init eeh_addr_cache_build(void);
 void eeh_add_device_tree_early(struct device_node *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_remove_bus_device(struct pci_dev *);
@@ -156,34 +226,24 @@ static inline void *eeh_dev_init(struct device_node *dn, void *data)
 
 static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { }
 
-static inline void eeh_dev_phb_init(void) { }
-
-static inline void eeh_init(void) { }
-
-#ifdef CONFIG_PPC_PSERIES
-static inline int eeh_pseries_init(void)
-{
-       return 0;
-}
-#endif /* CONFIG_PPC_PSERIES */
-
 static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
 {
        return val;
 }
 
-static inline int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
-{
-       return 0;
-}
+#define eeh_dev_check_failure(x) (0)
 
-static inline void pci_addr_cache_build(void) { }
+static inline void eeh_addr_cache_build(void) { }
 
 static inline void eeh_add_device_tree_early(struct device_node *dn) { }
 
 static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
 
 static inline void eeh_remove_bus_device(struct pci_dev *dev) { }
+
+static inline void eeh_lock(void) { }
+static inline void eeh_unlock(void) { }
+
 #define EEH_POSSIBLE_ERROR(val, type) (0)
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
 #endif /* CONFIG_EEH */
index c68b012b7797b0144956c951cd90b9cab92713ab..de67d830151be7f9cf3067a2a5768eeb48fed6af 100644 (file)
  */
 struct eeh_event {
        struct list_head        list;   /* to form event queue  */
-       struct eeh_dev          *edev;  /* EEH device           */
+       struct eeh_pe           *pe;    /* EEH PE               */
 };
 
-int eeh_send_failure_event(struct eeh_dev *edev);
-struct eeh_dev *handle_eeh_events(struct eeh_event *);
+int eeh_send_failure_event(struct eeh_pe *pe);
+void eeh_handle_event(struct eeh_pe *pe);
 
 #endif /* __KERNEL__ */
 #endif /* ASM_POWERPC_EEH_EVENT_H */
index ac13addb849582e7dc450d3e9b7a091c1ddcf4de..51fa43e536b917c08c5870525c9a81063c1448e2 100644 (file)
@@ -37,6 +37,7 @@
  * critical data
  */
 
+#define PACA_EXGDBELL PACA_EXGEN
 
 /* We are out of SPRGs so we save some things in the PACA. The normal
  * exception frame is smaller than the CRIT or MC one though
@@ -45,8 +46,9 @@
 #define EX_CR          (1 * 8)
 #define EX_R10         (2 * 8)
 #define EX_R11         (3 * 8)
-#define EX_R14         (4 * 8)
-#define EX_R15         (5 * 8)
+#define EX_R13         (4 * 8)
+#define EX_R14         (5 * 8)
+#define EX_R15         (6 * 8)
 
 /*
  * The TLB miss exception uses different slots.
index 423cf9eaf4a4fddf35811e62de33dc16363812e5..7a867065db79a8286578180abc2e8a4d36272885 100644 (file)
 #define H_VASI_RESUMED          5
 #define H_VASI_COMPLETED        6
 
-/* DABRX flags */
-#define H_DABRX_HYPERVISOR     (1UL<<(63-61))
-#define H_DABRX_KERNEL         (1UL<<(63-62))
-#define H_DABRX_USER           (1UL<<(63-63))
-
 /* Each control block has to be on a 4K boundary */
 #define H_CB_ALIGNMENT          4096
 
index be04330af75194562e3473084fd33ec7589dd696..423424599dad9395a00f15df23ce17676be872ac 100644 (file)
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 struct arch_hw_breakpoint {
-       bool            extraneous_interrupt;
-       u8              len; /* length of the target data symbol */
-       int             type;
        unsigned long   address;
+       unsigned long   dabrx;
+       int             type;
+       u8              len; /* length of the target data symbol */
+       bool            extraneous_interrupt;
 };
 
 #include <linux/kdebug.h>
@@ -61,7 +62,7 @@ extern void ptrace_triggered(struct perf_event *bp,
                        struct perf_sample_data *data, struct pt_regs *regs);
 static inline void hw_breakpoint_disable(void)
 {
-       set_dabr(0);
+       set_dabr(0, 0);
 }
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
 
index be0171afdc0f513cfff0a8bdb22abc70de29528c..7b6feab6fd26f26cfc3c19456f163845d72db86e 100644 (file)
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
+#include <asm/probes.h>
 
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
 
 struct pt_regs;
 struct kprobe;
 
-typedef unsigned int kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0x7fe00008      /* trap */
+typedef ppc_opcode_t kprobe_opcode_t;
 #define MAX_INSN_SIZE 1
 
-#define IS_TW(instr)           (((instr) & 0xfc0007fe) == 0x7c000008)
-#define IS_TD(instr)           (((instr) & 0xfc0007fe) == 0x7c000088)
-#define IS_TDI(instr)          (((instr) & 0xfc000000) == 0x08000000)
-#define IS_TWI(instr)          (((instr) & 0xfc000000) == 0x0c000000)
-
 #ifdef CONFIG_PPC64
 /*
  * 64bit powerpc uses function descriptors.
@@ -72,12 +67,6 @@ typedef unsigned int kprobe_opcode_t;
                addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name); \
        }                                                               \
 }
-
-#define is_trap(instr) (IS_TW(instr) || IS_TD(instr) || \
-                       IS_TWI(instr) || IS_TDI(instr))
-#else
-/* Use stock kprobe_lookup_name since ppc32 doesn't use function descriptors */
-#define is_trap(instr) (IS_TW(instr) || IS_TWI(instr))
 #endif
 
 #define flush_insn_slot(p)     do { } while (0)
index bfcd00c1485d04474c00461d4f7b9097bd624fb4..88609b23b775460c96a4d9d805feaf49fecf94a3 100644 (file)
@@ -74,7 +74,6 @@ struct kvmppc_host_state {
        ulong vmhandler;
        ulong scratch0;
        ulong scratch1;
-       ulong sprg3;
        u8 in_guest;
        u8 restore_hid5;
        u8 napping;
index f7706d722b39539c3559e0d858924c7703f529d9..8111e1b78f7f48dabcacfa3120b1e3bba3eeb230 100644 (file)
@@ -180,7 +180,8 @@ struct machdep_calls {
        void            (*enable_pmcs)(void);
 
        /* Set DABR for this platform, leave empty for default implemenation */
-       int             (*set_dabr)(unsigned long dabr);
+       int             (*set_dabr)(unsigned long dabr,
+                                   unsigned long dabrx);
 
 #ifdef CONFIG_PPC32    /* XXX for now */
        /* A general init function, called by ppc_init in init/main.c.
index daf813fea91fa6998322ff4d0fba63b2398f52e0..7796519fd23870ab1706e41017f4831aacf7c8a5 100644 (file)
@@ -136,6 +136,7 @@ struct paca_struct {
        u8 io_sync;                     /* writel() needs spin_unlock sync */
        u8 irq_work_pending;            /* IRQ_WORK interrupt while soft-disable */
        u8 nap_state_lost;              /* NV GPR values lost in power7_idle */
+       u64 sprg3;                      /* Saved user-visible sprg */
 
 #ifdef CONFIG_PPC_POWERNV
        /* Pointer to OPAL machine check event structure set by the
index 8cccbee615198881125bdde527d4e4dad1fbb2e3..973df4d9d3663ba41477f5417937af9d42814614 100644 (file)
@@ -184,6 +184,8 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
 {
        return PCI_DN(dn)->edev;
 }
+#else
+#define of_node_to_eeh_dev(x) (NULL)
 #endif
 
 /** Find the bus corresponding to the indicated device node */
index 80fa704d410fe5105bd7776776a5af9a257147ac..ed57fa7920c88dd123fcf139353489fcef4746c0 100644 (file)
@@ -47,19 +47,17 @@ extern int rtas_setup_phb(struct pci_controller *phb);
 
 #ifdef CONFIG_EEH
 
-void pci_addr_cache_build(void);
-void pci_addr_cache_insert_device(struct pci_dev *dev);
-void pci_addr_cache_remove_device(struct pci_dev *dev);
-struct pci_dev *pci_addr_cache_get_device(unsigned long addr);
-void eeh_slot_error_detail(struct eeh_dev *edev, int severity);
-int eeh_pci_enable(struct eeh_dev *edev, int function);
-int eeh_reset_pe(struct eeh_dev *);
-void eeh_restore_bars(struct eeh_dev *);
+void eeh_addr_cache_insert_dev(struct pci_dev *dev);
+void eeh_addr_cache_rmv_dev(struct pci_dev *dev);
+struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr);
+void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
+int eeh_pci_enable(struct eeh_pe *pe, int function);
+int eeh_reset_pe(struct eeh_pe *);
+void eeh_save_bars(struct eeh_dev *edev);
 int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
 int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
-void eeh_mark_slot(struct device_node *dn, int mode_flag);
-void eeh_clear_slot(struct device_node *dn, int mode_flag);
-struct device_node *eeh_find_device_pe(struct device_node *dn);
+void eeh_pe_state_mark(struct eeh_pe *pe, int state);
+void eeh_pe_state_clear(struct eeh_pe *pe, int state);
 
 void eeh_sysfs_add_device(struct pci_dev *pdev);
 void eeh_sysfs_remove_device(struct pci_dev *pdev);
diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h
new file mode 100644 (file)
index 0000000..5f1e15b
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef _ASM_POWERPC_PROBES_H
+#define _ASM_POWERPC_PROBES_H
+#ifdef __KERNEL__
+/*
+ * Definitions common to probes files
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2012
+ */
+#include <linux/types.h>
+
+typedef u32 ppc_opcode_t;
+#define BREAKPOINT_INSTRUCTION 0x7fe00008      /* trap */
+
+/* Trap definitions per ISA */
+#define IS_TW(instr)           (((instr) & 0xfc0007fe) == 0x7c000008)
+#define IS_TD(instr)           (((instr) & 0xfc0007fe) == 0x7c000088)
+#define IS_TDI(instr)          (((instr) & 0xfc000000) == 0x08000000)
+#define IS_TWI(instr)          (((instr) & 0xfc000000) == 0x0c000000)
+
+#ifdef CONFIG_PPC64
+#define is_trap(instr)         (IS_TW(instr) || IS_TD(instr) || \
+                               IS_TWI(instr) || IS_TDI(instr))
+#else
+#define is_trap(instr)         (IS_TW(instr) || IS_TWI(instr))
+#endif /* CONFIG_PPC64 */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_PROBES_H */
index 54b73a28c20579f4f6923e4f9711b5d7dc062205..83efc6e81543b3785ad422fc87835acba869f422 100644 (file)
@@ -219,6 +219,8 @@ struct thread_struct {
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif
        unsigned long   dabr;           /* Data address breakpoint register */
+       unsigned long   dabrx;          /*      ... extension  */
+       unsigned long   trap_nr;        /* last trap # on this thread */
 #ifdef CONFIG_ALTIVEC
        /* Complete AltiVec register set */
        vector128       vr[32] __attribute__((aligned(16)));
index 638608677e2a53679128800323d26d5caa53cd44..121a90bbf7780c4d99d891fa9482c71e39aac51f 100644 (file)
 #define SPRN_DABRX     0x3F7   /* Data Address Breakpoint Register Extension */
 #define   DABRX_USER   (1UL << 0)
 #define   DABRX_KERNEL (1UL << 1)
+#define   DABRX_HYP    (1UL << 2)
+#define   DABRX_BTI    (1UL << 3)
+#define   DABRX_ALL     (DABRX_BTI | DABRX_HYP | DABRX_KERNEL | DABRX_USER)
 #define SPRN_DAR       0x013   /* Data Address Register */
 #define SPRN_DBCR      0x136   /* e300 Data Breakpoint Control Reg */
 #define SPRN_DSISR     0x012   /* Data Storage Interrupt Status Register */
  * 64-bit embedded
  *     - SPRG0 generic exception scratch
  *     - SPRG2 TLB exception stack
- *     - SPRG3 CPU and NUMA node for VDSO getcpu (user visible)
+ *     - SPRG3 critical exception scratch and
+ *        CPU and NUMA node for VDSO getcpu (user visible)
  *     - SPRG4 unused (user visible)
  *     - SPRG6 TLB miss scratch (user visible, sorry !)
  *     - SPRG7 critical exception scratch
 
 #ifdef CONFIG_PPC_BOOK3E_64
 #define SPRN_SPRG_MC_SCRATCH   SPRN_SPRG8
-#define SPRN_SPRG_CRIT_SCRATCH SPRN_SPRG7
+#define SPRN_SPRG_CRIT_SCRATCH SPRN_SPRG3
 #define SPRN_SPRG_DBG_SCRATCH  SPRN_SPRG9
 #define SPRN_SPRG_TLB_EXFRAME  SPRN_SPRG2
 #define SPRN_SPRG_TLB_SCRATCH  SPRN_SPRG6
 #define SPRN_SPRG_GEN_SCRATCH  SPRN_SPRG0
+#define SPRN_SPRG_GDBELL_SCRATCH SPRN_SPRG_GEN_SCRATCH
 
 #define SET_PACA(rX)   mtspr   SPRN_SPRG_PACA,rX
 #define GET_PACA(rX)   mfspr   rX,SPRN_SPRG_PACA
 #define PVR_VER(pvr)   (((pvr) >>  16) & 0xFFFF)       /* Version field */
 #define PVR_REV(pvr)   (((pvr) >>   0) & 0xFFFF)       /* Revison field */
 
-#define __is_processor(pv)     (PVR_VER(mfspr(SPRN_PVR)) == (pv))
+#define pvr_version_is(pvr)    (PVR_VER(mfspr(SPRN_PVR)) == (pvr))
 
 /*
  * IBM has further subdivided the standard PowerPC 16-bit version and
 #define PVR_476_ISS    0x00052000
 
 /* 64-bit processors */
-/* XXX the prefix should be PVR_, we'll do a global sweep to fix it one day */
-#define PV_NORTHSTAR   0x0033
-#define PV_PULSAR      0x0034
-#define PV_POWER4      0x0035
-#define PV_ICESTAR     0x0036
-#define PV_SSTAR       0x0037
-#define PV_POWER4p     0x0038
-#define PV_970         0x0039
-#define PV_POWER5      0x003A
-#define PV_POWER5p     0x003B
-#define PV_970FX       0x003C
-#define PV_POWER6      0x003E
-#define PV_POWER7      0x003F
-#define PV_630         0x0040
-#define PV_630p        0x0041
-#define PV_970MP       0x0044
-#define PV_970GX       0x0045
-#define PV_BE          0x0070
-#define PV_PA6T                0x0090
+#define PVR_NORTHSTAR  0x0033
+#define PVR_PULSAR     0x0034
+#define PVR_POWER4     0x0035
+#define PVR_ICESTAR    0x0036
+#define PVR_SSTAR      0x0037
+#define PVR_POWER4p    0x0038
+#define PVR_970                0x0039
+#define PVR_POWER5     0x003A
+#define PVR_POWER5p    0x003B
+#define PVR_970FX      0x003C
+#define PVR_POWER6     0x003E
+#define PVR_POWER7     0x003F
+#define PVR_630                0x0040
+#define PVR_630p       0x0041
+#define PVR_970MP      0x0044
+#define PVR_970GX      0x0045
+#define PVR_POWER7p    0x004A
+#define PVR_BE         0x0070
+#define PVR_PA6T       0x0090
 
 /* Macros for setting and retrieving special purpose registers */
 #ifndef __ASSEMBLY__
index d084ce195fc3af11b6c37443ae3ecc37c59fa3d4..8b9a306260b2cf7c7bc664d09ba2926bc1ac6dda 100644 (file)
@@ -9,7 +9,7 @@ extern void ppc_printk_progress(char *s, unsigned short hex);
 extern unsigned int rtas_data;
 extern int mem_init_done;      /* set on boot once kmalloc can be called */
 extern int init_bootmem_done;  /* set once bootmem is available */
-extern phys_addr_t memory_limit;
+extern unsigned long long memory_limit;
 extern unsigned long klimit;
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
index faf93529cbf087876049790c2e1ef30fb29c5cd7..e942203cd4a88e946f7dc4d1b5d34c81d939a1ec 100644 (file)
@@ -102,6 +102,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTOREALL         11      /* Restore all regs (implies NOERROR) */
 #define TIF_NOERROR            12      /* Force successful syscall return */
 #define TIF_NOTIFY_RESUME      13      /* callback before returning to user */
+#define TIF_UPROBE             14      /* breakpointed or single-stepping */
 #define TIF_SYSCALL_TRACEPOINT 15      /* syscall tracepoint instrumentation */
 
 /* as above, but as bit values */
@@ -118,12 +119,13 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_RESTOREALL                (1<<TIF_RESTOREALL)
 #define _TIF_NOERROR           (1<<TIF_NOERROR)
 #define _TIF_NOTIFY_RESUME     (1<<TIF_NOTIFY_RESUME)
+#define _TIF_UPROBE            (1<<TIF_UPROBE)
 #define _TIF_SYSCALL_TRACEPOINT        (1<<TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SYSCALL_T_OR_A    (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
                                 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
 
 #define _TIF_USER_WORK_MASK    (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
-                                _TIF_NOTIFY_RESUME)
+                                _TIF_NOTIFY_RESUME | _TIF_UPROBE)
 #define _TIF_PERSYSCALL_MASK   (_TIF_RESTOREALL|_TIF_NOERROR)
 
 /* Bits in local_flags */
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
new file mode 100644 (file)
index 0000000..b532060
--- /dev/null
@@ -0,0 +1,54 @@
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+/*
+ * User-space Probes (UProbes) for powerpc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007-2012
+ *
+ * Adapted from the x86 port by Ananth N Mavinakayanahalli <ananth@in.ibm.com>
+ */
+
+#include <linux/notifier.h>
+#include <asm/probes.h>
+
+typedef ppc_opcode_t uprobe_opcode_t;
+
+#define MAX_UINSN_BYTES                4
+#define UPROBE_XOL_SLOT_BYTES  (MAX_UINSN_BYTES)
+
+/* The following alias is needed for reference from arch-agnostic code */
+#define UPROBE_SWBP_INSN       BREAKPOINT_INSTRUCTION
+#define UPROBE_SWBP_INSN_SIZE  4 /* swbp insn size in bytes */
+
+struct arch_uprobe {
+       union {
+               u8      insn[MAX_UINSN_BYTES];
+               u32     ainsn;
+       };
+};
+
+struct arch_uprobe_task {
+       unsigned long   saved_trap_nr;
+};
+
+extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
+extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
+extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+#endif /* _ASM_UPROBES_H */
index bb282dd81612171b5b3545b7924db7ed907100d2..cde12f8a4ebc4e98342491a2fcb91bd31fb26f05 100644 (file)
@@ -96,6 +96,7 @@ obj-$(CONFIG_MODULES)         += ppc_ksyms.o
 obj-$(CONFIG_BOOTX_TEXT)       += btext.o
 obj-$(CONFIG_SMP)              += smp.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
+obj-$(CONFIG_UPROBES)          += uprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)   += legacy_serial.o udbg_16550.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
 obj-$(CONFIG_SWIOTLB)          += dma-swiotlb.o
index e8995727b1c15207757969ca9e4cd5b7990c2d17..7523539cfe9f88e0a49e4b8a5715e8dff7a64fc7 100644 (file)
@@ -206,6 +206,7 @@ int main(void)
        DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
        DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
        DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost));
+       DEFINE(PACA_SPRG3, offsetof(struct paca_struct, sprg3));
 #endif /* CONFIG_PPC64 */
 
        /* RTAS */
@@ -534,7 +535,6 @@ int main(void)
        HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);
        HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
        HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
-       HSTATE_FIELD(HSTATE_SPRG3, sprg3);
        HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
        HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
        HSTATE_FIELD(HSTATE_NAPPING, napping);
index 46943651da23ba25b3e1093ea346fb31e154243c..a720b54b971cd1f2a81a33e5168b3836678864cd 100644 (file)
@@ -12,6 +12,7 @@
  */
 
 #include <linux/dma-mapping.h>
+#include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
@@ -20,7 +21,6 @@
 #include <asm/machdep.h>
 #include <asm/swiotlb.h>
 #include <asm/dma.h>
-#include <asm/abs_addr.h>
 
 unsigned int ppc_swiotlb_enable;
 
index 355b9d84b0f8149efd45ed9b5b0035c3b70fac11..8032b97ccdcb6668f01dc9c2026c0080f0a14449 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/pci.h>
 #include <asm/vio.h>
 #include <asm/bug.h>
-#include <asm/abs_addr.h>
 #include <asm/machdep.h>
 
 /*
@@ -50,7 +49,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
                return NULL;
        ret = page_address(page);
        memset(ret, 0, size);
-       *dma_handle = virt_to_abs(ret) + get_dma_offset(dev);
+       *dma_handle = __pa(ret) + get_dma_offset(dev);
 
        return ret;
 #endif
index 98be7f0cd227019cbe6b6a07ad989327b8e36979..87a82fbdf05afa1791deae718e81d4e5bfb63db2 100644 (file)
@@ -25,6 +25,8 @@
 #include <asm/ppc-opcode.h>
 #include <asm/mmu.h>
 #include <asm/hw_irq.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
 
 /* XXX This will ultimately add space for a special exception save
  *     structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
 #define        SPECIAL_EXC_FRAME_SIZE  INT_FRAME_SIZE
 
 /* Exception prolog code for all exceptions */
-#define EXCEPTION_PROLOG(n, type, addition)                                \
+#define EXCEPTION_PROLOG(n, intnum, type, addition)                        \
        mtspr   SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */   \
        mfspr   r13,SPRN_SPRG_PACA;     /* get PACA */                      \
        std     r10,PACA_EX##type+EX_R10(r13);                              \
        std     r11,PACA_EX##type+EX_R11(r13);                              \
+       PROLOG_STORE_RESTORE_SCRATCH_##type;                                \
        mfcr    r10;                    /* save CR */                       \
+       mfspr   r11,SPRN_##type##_SRR1;/* what are we coming from */        \
+       DO_KVM  intnum,SPRN_##type##_SRR1;    /* KVM hook */                \
+       stw     r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
        addition;                       /* additional code for that exc. */ \
        std     r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */  \
-       stw     r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
-       mfspr   r11,SPRN_##type##_SRR1;/* what are we coming from */        \
        type##_SET_KSTACK;              /* get special stack if necessary */\
        andi.   r10,r11,MSR_PR;         /* save stack pointer */            \
        beq     1f;                     /* branch around if supervisor */   \
 #define SPRN_GEN_SRR0  SPRN_SRR0
 #define SPRN_GEN_SRR1  SPRN_SRR1
 
+#define        GDBELL_SET_KSTACK       GEN_SET_KSTACK
+#define SPRN_GDBELL_SRR0       SPRN_GSRR0
+#define SPRN_GDBELL_SRR1       SPRN_GSRR1
+
 #define CRIT_SET_KSTACK                                                            \
        ld      r1,PACA_CRIT_STACK(r13);                                    \
        subi    r1,r1,SPECIAL_EXC_FRAME_SIZE;
 #define SPRN_MC_SRR0   SPRN_MCSRR0
 #define SPRN_MC_SRR1   SPRN_MCSRR1
 
-#define NORMAL_EXCEPTION_PROLOG(n, addition)                               \
-       EXCEPTION_PROLOG(n, GEN, addition##_GEN(n))
+#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition)                       \
+       EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))
 
-#define CRIT_EXCEPTION_PROLOG(n, addition)                                 \
-       EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n))
+#define CRIT_EXCEPTION_PROLOG(n, intnum, addition)                         \
+       EXCEPTION_PROLOG(n, intnum, CRIT, addition##_CRIT(n))
 
-#define DBG_EXCEPTION_PROLOG(n, addition)                                  \
-       EXCEPTION_PROLOG(n, DBG, addition##_DBG(n))
+#define DBG_EXCEPTION_PROLOG(n, intnum, addition)                          \
+       EXCEPTION_PROLOG(n, intnum, DBG, addition##_DBG(n))
 
-#define MC_EXCEPTION_PROLOG(n, addition)                                   \
-       EXCEPTION_PROLOG(n, MC, addition##_MC(n))
+#define MC_EXCEPTION_PROLOG(n, intnum, addition)                           \
+       EXCEPTION_PROLOG(n, intnum, MC, addition##_MC(n))
 
+#define GDBELL_EXCEPTION_PROLOG(n, intnum, addition)                       \
+       EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n))
+
+/*
+ * Store user-visible scratch in PACA exception slots and restore proper value
+ */
+#define PROLOG_STORE_RESTORE_SCRATCH_GEN
+#define PROLOG_STORE_RESTORE_SCRATCH_GDBELL
+#define PROLOG_STORE_RESTORE_SCRATCH_DBG
+#define PROLOG_STORE_RESTORE_SCRATCH_MC
+
+#define PROLOG_STORE_RESTORE_SCRATCH_CRIT                                  \
+       mfspr   r10,SPRN_SPRG_CRIT_SCRATCH;     /* get r13 */               \
+       std     r10,PACA_EXCRIT+EX_R13(r13);                                \
+       ld      r11,PACA_SPRG3(r13);                                        \
+       mtspr   SPRN_SPRG_CRIT_SCRATCH,r11;
 
 /* Variants of the "addition" argument for the prolog
  */
 #define PROLOG_ADDITION_NONE_GEN(n)
+#define PROLOG_ADDITION_NONE_GDBELL(n)
 #define PROLOG_ADDITION_NONE_CRIT(n)
 #define PROLOG_ADDITION_NONE_DBG(n)
 #define PROLOG_ADDITION_NONE_MC(n)
 
 #define PROLOG_ADDITION_MASKABLE_GEN(n)                                            \
-       lbz     r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */      \
-       cmpwi   cr0,r11,0;              /* yes -> go out of line */         \
+       lbz     r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */      \
+       cmpwi   cr0,r10,0;              /* yes -> go out of line */         \
        beq     masked_interrupt_book3e_##n
 
 #define PROLOG_ADDITION_2REGS_GEN(n)                                       \
@@ -233,9 +258,9 @@ exc_##n##_bad_stack:                                                            \
 1:
 
 
-#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack)                  \
+#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack)          \
        START_EXCEPTION(label);                                         \
-       NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE)      \
+       NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
        EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE)             \
        ack(r8);                                                        \
        CHECK_NAPPING();                                                \
@@ -286,7 +311,8 @@ interrupt_end_book3e:
 
 /* Critical Input Interrupt */
        START_EXCEPTION(critical_input);
-       CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)
+       CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
+                             PROLOG_ADDITION_NONE)
 //     EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE)
 //     bl      special_reg_save_crit
 //     CHECK_NAPPING();
@@ -297,7 +323,8 @@ interrupt_end_book3e:
 
 /* Machine Check Interrupt */
        START_EXCEPTION(machine_check);
-       CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE)
+       MC_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_MACHINE_CHECK,
+                           PROLOG_ADDITION_NONE)
 //     EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE)
 //     bl      special_reg_save_mc
 //     addi    r3,r1,STACK_FRAME_OVERHEAD
@@ -308,7 +335,8 @@ interrupt_end_book3e:
 
 /* Data Storage Interrupt */
        START_EXCEPTION(data_storage)
-       NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS)
+       NORMAL_EXCEPTION_PROLOG(0x300, BOOKE_INTERRUPT_DATA_STORAGE,
+                               PROLOG_ADDITION_2REGS)
        mfspr   r14,SPRN_DEAR
        mfspr   r15,SPRN_ESR
        EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE)
@@ -316,18 +344,21 @@ interrupt_end_book3e:
 
 /* Instruction Storage Interrupt */
        START_EXCEPTION(instruction_storage);
-       NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS)
+       NORMAL_EXCEPTION_PROLOG(0x400, BOOKE_INTERRUPT_INST_STORAGE,
+                               PROLOG_ADDITION_2REGS)
        li      r15,0
        mr      r14,r10
        EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE)
        b       storage_fault_common
 
 /* External Input Interrupt */
-       MASKABLE_EXCEPTION(0x500, external_input, .do_IRQ, ACK_NONE)
+       MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL,
+                          external_input, .do_IRQ, ACK_NONE)
 
 /* Alignment */
        START_EXCEPTION(alignment);
-       NORMAL_EXCEPTION_PROLOG(0x600, PROLOG_ADDITION_2REGS)
+       NORMAL_EXCEPTION_PROLOG(0x600, BOOKE_INTERRUPT_ALIGNMENT,
+                               PROLOG_ADDITION_2REGS)
        mfspr   r14,SPRN_DEAR
        mfspr   r15,SPRN_ESR
        EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP)
@@ -335,7 +366,8 @@ interrupt_end_book3e:
 
 /* Program Interrupt */
        START_EXCEPTION(program);
-       NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG)
+       NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
+                               PROLOG_ADDITION_1REG)
        mfspr   r14,SPRN_ESR
        EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE)
        std     r14,_DSISR(r1)
@@ -347,7 +379,8 @@ interrupt_end_book3e:
 
 /* Floating Point Unavailable Interrupt */
        START_EXCEPTION(fp_unavailable);
-       NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE)
+       NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL,
+                               PROLOG_ADDITION_NONE)
        /* we can probably do a shorter exception entry for that one... */
        EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP)
        ld      r12,_MSR(r1)
@@ -362,14 +395,17 @@ interrupt_end_book3e:
        b       .ret_from_except
 
 /* Decrementer Interrupt */
-       MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC)
+       MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER,
+                          decrementer, .timer_interrupt, ACK_DEC)
 
 /* Fixed Interval Timer Interrupt */
-       MASKABLE_EXCEPTION(0x980, fixed_interval, .unknown_exception, ACK_FIT)
+       MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT,
+                          fixed_interval, .unknown_exception, ACK_FIT)
 
 /* Watchdog Timer Interrupt */
        START_EXCEPTION(watchdog);
-       CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)
+       CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
+                             PROLOG_ADDITION_NONE)
 //     EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE)
 //     bl      special_reg_save_crit
 //     CHECK_NAPPING();
@@ -388,7 +424,8 @@ interrupt_end_book3e:
 
 /* Auxiliary Processor Unavailable Interrupt */
        START_EXCEPTION(ap_unavailable);
-       NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE)
+       NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
+                               PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE)
        bl      .save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
@@ -397,7 +434,8 @@ interrupt_end_book3e:
 
 /* Debug exception as a critical interrupt*/
        START_EXCEPTION(debug_crit);
-       CRIT_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
+       CRIT_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
+                             PROLOG_ADDITION_2REGS)
 
        /*
         * If there is a single step or branch-taken exception in an
@@ -431,7 +469,7 @@ interrupt_end_book3e:
        mtcr    r10
        ld      r10,PACA_EXCRIT+EX_R10(r13)     /* restore registers */
        ld      r11,PACA_EXCRIT+EX_R11(r13)
-       mfspr   r13,SPRN_SPRG_CRIT_SCRATCH
+       ld      r13,PACA_EXCRIT+EX_R13(r13)
        rfci
 
        /* Normal debug exception */
@@ -444,7 +482,7 @@ interrupt_end_book3e:
        /* Now we mash up things to make it look like we are coming on a
         * normal exception
         */
-       mfspr   r15,SPRN_SPRG_CRIT_SCRATCH
+       ld      r15,PACA_EXCRIT+EX_R13(r13)
        mtspr   SPRN_SPRG_GEN_SCRATCH,r15
        mfspr   r14,SPRN_DBSR
        EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE)
@@ -462,7 +500,8 @@ kernel_dbg_exc:
 
 /* Debug exception as a debug interrupt*/
        START_EXCEPTION(debug_debug);
-       DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS)
+       DBG_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
+                                                PROLOG_ADDITION_2REGS)
 
        /*
         * If there is a single step or branch-taken exception in an
@@ -523,18 +562,21 @@ kernel_dbg_exc:
        b       .ret_from_except
 
        START_EXCEPTION(perfmon);
-       NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE)
+       NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
+                               PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      .performance_monitor_exception
        b       .ret_from_except_lite
 
 /* Doorbell interrupt */
-       MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE)
+       MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
+                          doorbell, .doorbell_exception, ACK_NONE)
 
 /* Doorbell critical Interrupt */
        START_EXCEPTION(doorbell_crit);
-       CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE)
+       CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
+                             PROLOG_ADDITION_NONE)
 //     EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE)
 //     bl      special_reg_save_crit
 //     CHECK_NAPPING();
@@ -543,12 +585,24 @@ kernel_dbg_exc:
 //     b       ret_from_crit_except
        b       .
 
-/* Guest Doorbell */
-       MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE)
+/*
+ *     Guest doorbell interrupt
+ *     This general exception use GSRRx save/restore registers
+ */
+       START_EXCEPTION(guest_doorbell);
+       GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
+                               PROLOG_ADDITION_NONE)
+       EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      .save_nvgprs
+       INTS_RESTORE_HARD
+       bl      .unknown_exception
+       b       .ret_from_except
 
 /* Guest Doorbell critical Interrupt */
        START_EXCEPTION(guest_doorbell_crit);
-       CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE)
+       CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
+                             PROLOG_ADDITION_NONE)
 //     EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE)
 //     bl      special_reg_save_crit
 //     CHECK_NAPPING();
@@ -559,7 +613,8 @@ kernel_dbg_exc:
 
 /* Hypervisor call */
        START_EXCEPTION(hypercall);
-       NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE)
+       NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
+                               PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      .save_nvgprs
@@ -569,7 +624,8 @@ kernel_dbg_exc:
 
 /* Embedded Hypervisor priviledged  */
        START_EXCEPTION(ehpriv);
-       NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE)
+       NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
+                               PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      .save_nvgprs
@@ -582,44 +638,42 @@ kernel_dbg_exc:
  * accordingly and if the interrupt is level sensitive, we hard disable
  */
 
+.macro masked_interrupt_book3e paca_irq full_mask
+       lbz     r10,PACAIRQHAPPENED(r13)
+       ori     r10,r10,\paca_irq
+       stb     r10,PACAIRQHAPPENED(r13)
+
+       .if \full_mask == 1
+       rldicl  r10,r11,48,1            /* clear MSR_EE */
+       rotldi  r11,r10,16
+       mtspr   SPRN_SRR1,r11
+       .endif
+
+       lwz     r11,PACA_EXGEN+EX_CR(r13)
+       mtcr    r11
+       ld      r10,PACA_EXGEN+EX_R10(r13)
+       ld      r11,PACA_EXGEN+EX_R11(r13)
+       mfspr   r13,SPRN_SPRG_GEN_SCRATCH
+       rfi
+       b       .
+.endm
+
 masked_interrupt_book3e_0x500:
-       /* XXX When adding support for EPR, use PACA_IRQ_EE_EDGE */
-       li      r11,PACA_IRQ_EE
-       b       masked_interrupt_book3e_full_mask
+       // XXX When adding support for EPR, use PACA_IRQ_EE_EDGE
+       masked_interrupt_book3e PACA_IRQ_EE 1
 
 masked_interrupt_book3e_0x900:
-       ACK_DEC(r11);
-       li      r11,PACA_IRQ_DEC
-       b       masked_interrupt_book3e_no_mask
+       ACK_DEC(r10);
+       masked_interrupt_book3e PACA_IRQ_DEC 0
+
 masked_interrupt_book3e_0x980:
-       ACK_FIT(r11);
-       li      r11,PACA_IRQ_DEC
-       b       masked_interrupt_book3e_no_mask
+       ACK_FIT(r10);
+       masked_interrupt_book3e PACA_IRQ_DEC 0
+
 masked_interrupt_book3e_0x280:
 masked_interrupt_book3e_0x2c0:
-       li      r11,PACA_IRQ_DBELL
-       b       masked_interrupt_book3e_no_mask
+       masked_interrupt_book3e PACA_IRQ_DBELL 0
 
-masked_interrupt_book3e_no_mask:
-       mtcr    r10
-       lbz     r10,PACAIRQHAPPENED(r13)
-       or      r10,r10,r11
-       stb     r10,PACAIRQHAPPENED(r13)
-       b       1f
-masked_interrupt_book3e_full_mask:
-       mtcr    r10
-       lbz     r10,PACAIRQHAPPENED(r13)
-       or      r10,r10,r11
-       stb     r10,PACAIRQHAPPENED(r13)
-       mfspr   r10,SPRN_SRR1
-       rldicl  r11,r10,48,1            /* clear MSR_EE */
-       rotldi  r10,r11,16
-       mtspr   SPRN_SRR1,r10
-1:     ld      r10,PACA_EXGEN+EX_R10(r13);
-       ld      r11,PACA_EXGEN+EX_R11(r13);
-       mfspr   r13,SPRN_SPRG_GEN_SCRATCH;
-       rfi
-       b       .
 /*
  * Called from arch_local_irq_enable when an interrupt needs
  * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
index 18bdf74fa164042e0e3b07a7dd0867faf8d6069c..06c8202a69cf43a1689a8a98323d398872f80df1 100644 (file)
@@ -289,8 +289,7 @@ int __init fadump_reserve_mem(void)
                else
                        memory_limit = memblock_end_of_DRAM();
                printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
-                               " dump, now %#016llx\n",
-                               (unsigned long long)memory_limit);
+                               " dump, now %#016llx\n", memory_limit);
        }
        if (memory_limit)
                memory_boundary = memory_limit;
index 956a4c496de942d93853f42f2db1067f45cf0085..a89cae481b0439a0b8bfe594d70dae6502c27a27 100644 (file)
@@ -73,7 +73,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
         * If so, DABR will be populated in single_step_dabr_instruction().
         */
        if (current->thread.last_hit_ubp != bp)
-               set_dabr(info->address | info->type | DABR_TRANSLATION);
+               set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx);
 
        return 0;
 }
@@ -97,7 +97,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
        }
 
        *slot = NULL;
-       set_dabr(0);
+       set_dabr(0, 0);
 }
 
 /*
@@ -170,6 +170,13 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 
        info->address = bp->attr.bp_addr;
        info->len = bp->attr.bp_len;
+       info->dabrx = DABRX_ALL;
+       if (bp->attr.exclude_user)
+               info->dabrx &= ~DABRX_USER;
+       if (bp->attr.exclude_kernel)
+               info->dabrx &= ~DABRX_KERNEL;
+       if (bp->attr.exclude_hv)
+               info->dabrx &= ~DABRX_HYP;
 
        /*
         * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
@@ -197,7 +204,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
 
        info = counter_arch_bp(tsk->thread.last_hit_ubp);
        regs->msr &= ~MSR_SE;
-       set_dabr(info->address | info->type | DABR_TRANSLATION);
+       set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx);
        tsk->thread.last_hit_ubp = NULL;
 }
 
@@ -215,7 +222,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)
        unsigned long dar = regs->dar;
 
        /* Disable breakpoints during exception handling */
-       set_dabr(0);
+       set_dabr(0, 0);
 
        /*
         * The counter may be concurrently released but that can only
@@ -281,7 +288,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)
        if (!info->extraneous_interrupt)
                perf_bp_event(bp, regs);
 
-       set_dabr(info->address | info->type | DABR_TRANSLATION);
+       set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx);
 out:
        rcu_read_unlock();
        return rc;
@@ -294,7 +301,7 @@ int __kprobes single_step_dabr_instruction(struct die_args *args)
 {
        struct pt_regs *regs = args->regs;
        struct perf_event *bp = NULL;
-       struct arch_hw_breakpoint *bp_info;
+       struct arch_hw_breakpoint *info;
 
        bp = current->thread.last_hit_ubp;
        /*
@@ -304,16 +311,16 @@ int __kprobes single_step_dabr_instruction(struct die_args *args)
        if (!bp)
                return NOTIFY_DONE;
 
-       bp_info = counter_arch_bp(bp);
+       info = counter_arch_bp(bp);
 
        /*
         * We shall invoke the user-defined callback function in the single
         * stepping handler to confirm to 'trigger-after-execute' semantics
         */
-       if (!bp_info->extraneous_interrupt)
+       if (!info->extraneous_interrupt)
                perf_bp_event(bp, regs);
 
-       set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION);
+       set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx);
        current->thread.last_hit_ubp = NULL;
 
        /*
index b01d14eeca8da2633711d2b114a860a257eea427..8220baa46faf8ced3c383bb5de46b310b802cbbf 100644 (file)
@@ -47,7 +47,6 @@
 #include <linux/stat.h>
 #include <linux/of_platform.h>
 #include <asm/ibmebus.h>
-#include <asm/abs_addr.h>
 
 static struct device ibmebus_bus_device = { /* fake "parent" device */
        .init_name = "ibmebus",
index 5df777794403d49a3820add9ba6409701b295da4..fa9f6c72f557026aaf2a4c7c7e763fb45cb7805d 100644 (file)
@@ -165,7 +165,7 @@ void __init reserve_crashkernel(void)
        if (memory_limit && memory_limit <= crashk_res.end) {
                memory_limit = crashk_res.end + 1;
                printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
-                      (unsigned long long)memory_limit);
+                      memory_limit);
        }
 
        printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
@@ -204,6 +204,12 @@ static struct property crashk_size_prop = {
        .value = &crashk_size,
 };
 
+static struct property memory_limit_prop = {
+       .name = "linux,memory-limit",
+       .length = sizeof(unsigned long long),
+       .value = &memory_limit,
+};
+
 static void __init export_crashk_values(struct device_node *node)
 {
        struct property *prop;
@@ -223,6 +229,12 @@ static void __init export_crashk_values(struct device_node *node)
                crashk_size = resource_size(&crashk_res);
                prom_add_property(node, &crashk_size_prop);
        }
+
+       /*
+        * memory_limit is required by the kexec-tools to limit the
+        * crash regions to the actual memory used.
+        */
+       prom_update_property(node, &memory_limit_prop);
 }
 
 static int __init kexec_setup(void)
index fbe1a12dc7f1ae8a2c4cdee3d2225f846f24ba15..cd6da855090c6661310bc2321cd9f33554a52170 100644 (file)
@@ -142,6 +142,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
        new_paca->hw_cpu_id = 0xffff;
        new_paca->kexec_state = KEXEC_STATE_NONE;
        new_paca->__current = &init_task;
+       new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
 #ifdef CONFIG_PPC_STD_MMU_64
        new_paca->slb_shadow_ptr = &slb_shadow[cpu];
 #endif /* CONFIG_PPC_STD_MMU_64 */
index 43fea543d68649d93e9eb734c89b13b3eb7c5a1b..7f94f760dd0c68c8dd031d8fcc8826b759f5420f 100644 (file)
@@ -980,13 +980,14 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus)
                if (i >= 3 && bus->self->transparent)
                        continue;
 
-               /* If we are going to re-assign everything, mark the resource
-                * as unset and move it down to 0
+               /* If we're going to reassign everything, we can
+                * shrink the P2P resource to have size as being
+                * of 0 in order to save space.
                 */
                if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
                        res->flags |= IORESOURCE_UNSET;
-                       res->end -= res->start;
                        res->start = 0;
+                       res->end = -1;
                        continue;
                }
 
@@ -1248,7 +1249,14 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)
                pr_warning("PCI: Cannot allocate resource region "
                           "%d of PCI bridge %d, will remap\n", i, bus->number);
        clear_resource:
-               res->start = res->end = 0;
+               /* The resource might be figured out when doing
+                * reassignment based on the resources required
+                * by the downstream PCI devices. Here we set
+                * the size of the resource to be 0 in order to
+                * save more space.
+                */
+               res->start = 0;
+               res->end = -1;
                res->flags = 0;
        }
 
index 1a1f2ddfb581a222fb5fe85a0d8983ef5ad7e2af..50e504c29bb95294baa35700b74ce7ee44396e60 100644 (file)
@@ -258,6 +258,7 @@ void do_send_trap(struct pt_regs *regs, unsigned long address,
 {
        siginfo_t info;
 
+       current->thread.trap_nr = signal_code;
        if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
                        11, SIGSEGV) == NOTIFY_STOP)
                return;
@@ -275,6 +276,7 @@ void do_dabr(struct pt_regs *regs, unsigned long address,
 {
        siginfo_t info;
 
+       current->thread.trap_nr = TRAP_HWBKPT;
        if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
                        11, SIGSEGV) == NOTIFY_STOP)
                return;
@@ -283,7 +285,7 @@ void do_dabr(struct pt_regs *regs, unsigned long address,
                return;
 
        /* Clear the DABR */
-       set_dabr(0);
+       set_dabr(0, 0);
 
        /* Deliver the signal to userspace */
        info.si_signo = SIGTRAP;
@@ -364,18 +366,19 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
 {
        if (thread->dabr) {
                thread->dabr = 0;
-               set_dabr(0);
+               thread->dabrx = 0;
+               set_dabr(0, 0);
        }
 }
 #endif /* !CONFIG_HAVE_HW_BREAKPOINT */
 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
 
-int set_dabr(unsigned long dabr)
+int set_dabr(unsigned long dabr, unsigned long dabrx)
 {
        __get_cpu_var(current_dabr) = dabr;
 
        if (ppc_md.set_dabr)
-               return ppc_md.set_dabr(dabr);
+               return ppc_md.set_dabr(dabr, dabrx);
 
        /* XXX should we have a CPU_FTR_HAS_DABR ? */
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -385,9 +388,8 @@ int set_dabr(unsigned long dabr)
 #endif
 #elif defined(CONFIG_PPC_BOOK3S)
        mtspr(SPRN_DABR, dabr);
+       mtspr(SPRN_DABRX, dabrx);
 #endif
-
-
        return 0;
 }
 
@@ -480,7 +482,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
  */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
        if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
-               set_dabr(new->thread.dabr);
+               set_dabr(new->thread.dabr, new->thread.dabrx);
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif
 
index f191bf02943a839dc243e0b9dcbbabe8f874eff1..37725e86651e99bcce696665ba7199fad3445363 100644 (file)
@@ -78,7 +78,7 @@ static int __init early_parse_mem(char *p)
                return 1;
 
        memory_limit = PAGE_ALIGN(memparse(p, &p));
-       DBG("memory limit = 0x%llx\n", (unsigned long long)memory_limit);
+       DBG("memory limit = 0x%llx\n", memory_limit);
 
        return 0;
 }
@@ -661,7 +661,7 @@ void __init early_init_devtree(void *params)
 
        /* make sure we've parsed cmdline for mem= before this */
        if (memory_limit)
-               first_memblock_size = min(first_memblock_size, memory_limit);
+               first_memblock_size = min_t(u64, first_memblock_size, memory_limit);
        setup_initial_memory_limit(memstart_addr, first_memblock_size);
        /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
        memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
index 0794a3017b1b53e65e4d1aa325b79711a9fb04b1..ce68278a5d73108dfb67b9e2a2035da2e172a224 100644 (file)
@@ -1691,7 +1691,7 @@ static void __init prom_initialize_tce_table(void)
                 * else will impact performance, so we always allocate 8MB.
                 * Anton
                 */
-               if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p))
+               if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p))
                        minsize = 8UL << 20;
                else
                        minsize = 4UL << 20;
index c10fc28b90920120a12c4bc6cc5c39dbf2780e6a..79d8e56470df8105c9119aee7f01f4938101acc8 100644 (file)
@@ -960,6 +960,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
                thread->ptrace_bps[0] = bp;
                ptrace_put_breakpoints(task);
                thread->dabr = data;
+               thread->dabrx = DABRX_ALL;
                return 0;
        }
 
@@ -983,6 +984,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 
        /* Move contents to the DABR register */
        task->thread.dabr = data;
+       task->thread.dabrx = DABRX_ALL;
 #else /* CONFIG_PPC_ADV_DEBUG_REGS */
        /* As described above, it was assumed 3 bits were passed with the data
         *  address, but we will assume only the mode bits will be passed
@@ -1397,6 +1399,7 @@ static long ppc_set_hwdebug(struct task_struct *child,
                dabr |= DABR_DATA_WRITE;
 
        child->thread.dabr = dabr;
+       child->thread.dabrx = DABRX_ALL;
 
        return 1;
 #endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */
index 2c0ee6405633f7f7240d3a20c464daab8146ea45..20b0120db0c341682f0d15710b690ca6b379b516 100644 (file)
@@ -21,7 +21,6 @@
 #include <asm/delay.h>
 #include <asm/uaccess.h>
 #include <asm/rtas.h>
-#include <asm/abs_addr.h>
 
 #define MODULE_VERS "1.0"
 #define MODULE_NAME "rtas_flash"
@@ -582,7 +581,7 @@ static void rtas_flash_firmware(int reboot_type)
        flist = (struct flash_block_list *)&rtas_data_buf[0];
        flist->num_blocks = 0;
        flist->next = rtas_firmware_flash_list;
-       rtas_block_list = virt_to_abs(flist);
+       rtas_block_list = __pa(flist);
        if (rtas_block_list >= 4UL*1024*1024*1024) {
                printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n");
                spin_unlock(&rtas_data_buf_lock);
@@ -596,13 +595,13 @@ static void rtas_flash_firmware(int reboot_type)
        for (f = flist; f; f = next) {
                /* Translate data addrs to absolute */
                for (i = 0; i < f->num_blocks; i++) {
-                       f->blocks[i].data = (char *)virt_to_abs(f->blocks[i].data);
+                       f->blocks[i].data = (char *)__pa(f->blocks[i].data);
                        image_size += f->blocks[i].length;
                }
                next = f->next;
                /* Don't translate NULL pointer for last entry */
                if (f->next)
-                       f->next = (struct flash_block_list *)virt_to_abs(f->next);
+                       f->next = (struct flash_block_list *)__pa(f->next);
                else
                        f->next = NULL;
                /* make num_blocks into the version/length field */
index 179af906dcda5685d77a118f0b5afe99db67286e..6de63e3250bb1af364e1b412ed511d1f50092f9c 100644 (file)
@@ -81,7 +81,7 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
                return PCIBIOS_DEVICE_NOT_FOUND;
 
        if (returnval == EEH_IO_ERROR_VALUE(size) &&
-           eeh_dn_check_failure (pdn->node, NULL))
+           eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node)))
                return PCIBIOS_DEVICE_NOT_FOUND;
 
        return PCIBIOS_SUCCESSFUL;
@@ -275,9 +275,6 @@ void __init find_and_init_phbs(void)
        of_node_put(root);
        pci_devs_phb_init();
 
-       /* Create EEH devices for all PHBs */
-       eeh_dev_phb_init();
-
        /*
         * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
         * in chosen.
index 5c023c9cf16ee70a7a3b281af2ad9807028c3b13..a2dc75793bd56b2d09786f9cbf3400b0f6051e51 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <linux/uprobes.h>
 #include <linux/key.h>
 #include <asm/hw_breakpoint.h>
 #include <asm/uaccess.h>
@@ -130,7 +131,7 @@ static int do_signal(struct pt_regs *regs)
         * triggered inside the kernel.
         */
        if (current->thread.dabr)
-               set_dabr(current->thread.dabr);
+               set_dabr(current->thread.dabr, current->thread.dabrx);
 #endif
        /* Re-enable the breakpoints for the signal stack */
        thread_change_pc(current, regs);
@@ -157,6 +158,11 @@ static int do_signal(struct pt_regs *regs)
 
 void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
 {
+       if (thread_info_flags & _TIF_UPROBE) {
+               clear_thread_flag(TIF_UPROBE);
+               uprobe_notify_resume(regs);
+       }
+
        if (thread_info_flags & _TIF_SIGPENDING)
                do_signal(regs);
 
index ae0843fa7a61f64540214b2fbf658265d24879df..32518401af68d274be52d52cf9249505496d07a9 100644 (file)
@@ -251,6 +251,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
        if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
                local_irq_enable();
 
+       current->thread.trap_nr = code;
        memset(&info, 0, sizeof(info));
        info.si_signo = signr;
        info.si_code = code;
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
new file mode 100644 (file)
index 0000000..d2d46d1
--- /dev/null
@@ -0,0 +1,184 @@
+/*
+ * User-space Probes (UProbes) for powerpc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007-2012
+ *
+ * Adapted from the x86 port by Ananth N Mavinakayanahalli <ananth@in.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/uprobes.h>
+#include <linux/uaccess.h>
+#include <linux/kdebug.h>
+
+#include <asm/sstep.h>
+
+#define UPROBE_TRAP_NR UINT_MAX
+
+/**
+ * arch_uprobe_analyze_insn
+ * @mm: the probed address space.
+ * @arch_uprobe: the probepoint information.
+ * @addr: vaddr to probe.
+ * Return 0 on success or a -ve number on error.
+ */
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
+               struct mm_struct *mm, unsigned long addr)
+{
+       if (addr & 0x03)
+               return -EINVAL;
+
+       /*
+        * We currently don't support a uprobe on an already
+        * existing breakpoint instruction underneath
+        */
+       if (is_trap(auprobe->ainsn))
+               return -ENOTSUPP;
+       return 0;
+}
+
+/*
+ * arch_uprobe_pre_xol - prepare to execute out of line.
+ * @auprobe: the probepoint information.
+ * @regs: reflects the saved user state of current task.
+ */
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       struct arch_uprobe_task *autask = &current->utask->autask;
+
+       autask->saved_trap_nr = current->thread.trap_nr;
+       current->thread.trap_nr = UPROBE_TRAP_NR;
+       regs->nip = current->utask->xol_vaddr;
+       return 0;
+}
+
+/**
+ * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
+ * @regs: Reflects the saved state of the task after it has hit a breakpoint
+ * instruction.
+ * Return the address of the breakpoint instruction.
+ */
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+       return instruction_pointer(regs);
+}
+
+/*
+ * If xol insn itself traps and generates a signal (SIGILL/SIGSEGV/etc),
+ * then detect the case where a singlestepped instruction jumps back to its
+ * own address. It is assumed that anything like do_page_fault/do_trap/etc
+ * sets thread.trap_nr != UINT_MAX.
+ *
+ * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr,
+ * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to
+ * UPROBE_TRAP_NR == UINT_MAX set by arch_uprobe_pre_xol().
+ */
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+       if (t->thread.trap_nr != UPROBE_TRAP_NR)
+               return true;
+
+       return false;
+}
+
+/*
+ * Called after single-stepping. To avoid the SMP problems that can
+ * occur when we temporarily put back the original opcode to
+ * single-step, we single-stepped a copy of the instruction.
+ *
+ * This function prepares to resume execution after the single-step.
+ */
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       struct uprobe_task *utask = current->utask;
+
+       WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
+
+       current->thread.trap_nr = utask->autask.saved_trap_nr;
+
+       /*
+        * On powerpc, except for loads and stores, most instructions
+        * including ones that alter code flow (branches, calls, returns)
+        * are emulated in the kernel. We get here only if the emulation
+        * support doesn't exist and have to fix-up the next instruction
+        * to be executed.
+        */
+       regs->nip = utask->vaddr + MAX_UINSN_BYTES;
+       return 0;
+}
+
+/* callback routine for handling exceptions. */
+int arch_uprobe_exception_notify(struct notifier_block *self,
+                               unsigned long val, void *data)
+{
+       struct die_args *args = data;
+       struct pt_regs *regs = args->regs;
+
+       /* regs == NULL is a kernel bug */
+       if (WARN_ON(!regs))
+               return NOTIFY_DONE;
+
+       /* We are only interested in userspace traps */
+       if (!user_mode(regs))
+               return NOTIFY_DONE;
+
+       switch (val) {
+       case DIE_BPT:
+               if (uprobe_pre_sstep_notifier(regs))
+                       return NOTIFY_STOP;
+               break;
+       case DIE_SSTEP:
+               if (uprobe_post_sstep_notifier(regs))
+                       return NOTIFY_STOP;
+       default:
+               break;
+       }
+       return NOTIFY_DONE;
+}
+
+/*
+ * This function gets called when XOL instruction either gets trapped or
+ * the thread has a fatal signal, so reset the instruction pointer to its
+ * probed address.
+ */
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       struct uprobe_task *utask = current->utask;
+
+       current->thread.trap_nr = utask->autask.saved_trap_nr;
+       instruction_pointer_set(regs, utask->vaddr);
+}
+
+/*
+ * See if the instruction can be emulated.
+ * Returns true if instruction was emulated, false otherwise.
+ */
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       int ret;
+
+       /*
+        * emulate_step() returns 1 if the insn was successfully emulated.
+        * For all other cases, we need to single-step in hardware.
+        */
+       ret = emulate_step(regs, auprobe->ainsn);
+       if (ret > 0)
+               return true;
+
+       return false;
+}
index b67db22e102dd93dc11ec131ac7e942cf6de3d5c..1b2076f049cecaf954b111df7a6a07b964258c52 100644 (file)
@@ -723,9 +723,7 @@ int __cpuinit vdso_getcpu_init(void)
 
        val = (cpu & 0xfff) | ((node & 0xffff) << 16);
        mtspr(SPRN_SPRG3, val);
-#ifdef CONFIG_KVM_BOOK3S_HANDLER
-       get_paca()->kvm_hstate.sprg3 = val;
-#endif
+       get_paca()->sprg3 = val;
 
        put_cpu();
 
index 02b32216bbc3bb28de3a22b307f6f19aab2cd0a9..201ba59738be93fefc95786336a7ebe57c0a1ed6 100644 (file)
@@ -33,7 +33,6 @@
 #include <asm/prom.h>
 #include <asm/firmware.h>
 #include <asm/tce.h>
-#include <asm/abs_addr.h>
 #include <asm/page.h>
 #include <asm/hvcall.h>
 
index 44b72feaff7d9876fad230253be42c1aee6e8af7..74a24bbb963762cdb89eac53e9a302c184084c35 100644 (file)
@@ -1065,7 +1065,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        mtspr   SPRN_DABRX,r6
 
        /* Restore SPRG3 */
-       ld      r3,HSTATE_SPRG3(r13)
+       ld      r3,PACA_SPRG3(r13)
        mtspr   SPRN_SPRG3,r3
 
        /*
index 08ffcf52a8564211d609bc2cf1fa8dfc3a6e8678..995f924e007f8f01fa4fe6eaba2b83006d66d81d 100644 (file)
@@ -133,6 +133,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address)
        up_read(&current->mm->mmap_sem);
 
        if (user_mode(regs)) {
+               current->thread.trap_nr = BUS_ADRERR;
                info.si_signo = SIGBUS;
                info.si_errno = 0;
                info.si_code = BUS_ADRERR;
index 90039bc64119902c5bf85b79b4dd8bab31fd0e10..f21e8ce8db33d2386aa31c5c358daa1fac162673 100644 (file)
 
 #include <linux/spinlock.h>
 #include <linux/bitops.h>
+#include <linux/of.h>
 #include <linux/threads.h>
 #include <linux/smp.h>
 
-#include <asm/abs_addr.h>
 #include <asm/machdep.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
index 377e5cbedbbb8c962d7d5f9b0fae7473b22b1481..ba45739bdfe8ad508f7ed0a4be7f4845c64dc6a3 100644 (file)
@@ -43,7 +43,6 @@
 #include <asm/uaccess.h>
 #include <asm/machdep.h>
 #include <asm/prom.h>
-#include <asm/abs_addr.h>
 #include <asm/tlbflush.h>
 #include <asm/io.h>
 #include <asm/eeh.h>
@@ -651,7 +650,7 @@ static void __init htab_initialize(void)
                DBG("Hash table allocated at %lx, size: %lx\n", table,
                    htab_size_bytes);
 
-               htab_address = abs_to_virt(table);
+               htab_address = __va(table);
 
                /* htab absolute addr + encoded htabsize */
                _SDR1 = table + __ilog2(pteg_count) - 11;
index 620b7acd2fdfed444e71abf3f080c709482e95d1..95a45293e5ac0dd2c6b343f1aa7a81ed8b69b666 100644 (file)
@@ -62,7 +62,6 @@
 #include <asm/cputable.h>
 #include <asm/sections.h>
 #include <asm/iommu.h>
-#include <asm/abs_addr.h>
 #include <asm/vdso.h>
 
 #include "mmu_decl.h"
index fbdad0e3929a8ddfbcb0f714a6480f0ca33ba6f6..44cf2b20503da71f491177ac5252664ea200c498 100644 (file)
@@ -62,7 +62,7 @@
 
 int init_bootmem_done;
 int mem_init_done;
-phys_addr_t memory_limit;
+unsigned long long memory_limit;
 
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
index 249a0631c4dbc63ca8cdf0ca422b8663b9a6299f..297d49547ea8d6b54e4452614a12851bb2506207 100644 (file)
@@ -51,7 +51,6 @@
 #include <asm/processor.h>
 #include <asm/cputable.h>
 #include <asm/sections.h>
-#include <asm/abs_addr.h>
 #include <asm/firmware.h>
 
 #include "mmu_decl.h"
index 9106ebb118f52e516e1053b2d0b9fe88367a442c..3f8efa6f29975183faa304a156f50d3360883123 100644 (file)
@@ -20,7 +20,6 @@
 #include <asm/paca.h>
 #include <asm/cputable.h>
 #include <asm/prom.h>
-#include <asm/abs_addr.h>
 
 struct stab_entry {
        unsigned long esid_data;
@@ -257,7 +256,7 @@ void __init stabs_alloc(void)
                memset((void *)newstab, 0, HW_PAGE_SIZE);
 
                paca[cpu].stab_addr = newstab;
-               paca[cpu].stab_real = virt_to_abs(newstab);
+               paca[cpu].stab_real = __pa(newstab);
                printk(KERN_INFO "Segment table for CPU %d at 0x%llx "
                       "virtual, 0x%llx absolute\n",
                       cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
index e4f8f1fc81a570a38e2ab4e63a35023558633309..7c415ddde948b9dcf3ad11c54ad94c647da278b1 100644 (file)
@@ -95,7 +95,8 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len)
        struct mm_struct *mm = current->mm;
        struct subpage_prot_table *spt = &mm->context.spt;
        u32 **spm, *spp;
-       int i, nw;
+       unsigned long i;
+       size_t nw;
        unsigned long next, limit;
 
        down_write(&mm->mmap_sem);
@@ -144,7 +145,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
        struct mm_struct *mm = current->mm;
        struct subpage_prot_table *spt = &mm->context.spt;
        u32 **spm, *spp;
-       int i, nw;
+       unsigned long i;
+       size_t nw;
        unsigned long next, limit;
        int err;
 
index f09d48e3268d9bab371f31e616147bb53894c69a..b4113bf863538adbf6d0bffdaa3aabe090270064 100644 (file)
@@ -20,6 +20,8 @@
 #include <asm/pgtable.h>
 #include <asm/exception-64e.h>
 #include <asm/ppc-opcode.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
 
 #ifdef CONFIG_PPC_64K_PAGES
 #define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1)
  *                                                                    *
  **********************************************************************/
 
-.macro tlb_prolog_bolted addr
-       mtspr   SPRN_SPRG_TLB_SCRATCH,r13
+.macro tlb_prolog_bolted intnum addr
+       mtspr   SPRN_SPRG_GEN_SCRATCH,r13
        mfspr   r13,SPRN_SPRG_PACA
        std     r10,PACA_EXTLB+EX_TLB_R10(r13)
        mfcr    r10
        std     r11,PACA_EXTLB+EX_TLB_R11(r13)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+       mfspr   r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+       DO_KVM  \intnum, SPRN_SRR1
        std     r16,PACA_EXTLB+EX_TLB_R16(r13)
        mfspr   r16,\addr               /* get faulting address */
        std     r14,PACA_EXTLB+EX_TLB_R14(r13)
        ld      r15,PACA_EXTLB+EX_TLB_R15(r13)
        TLB_MISS_RESTORE_STATS_BOLTED
        ld      r16,PACA_EXTLB+EX_TLB_R16(r13)
-       mfspr   r13,SPRN_SPRG_TLB_SCRATCH
+       mfspr   r13,SPRN_SPRG_GEN_SCRATCH
 .endm
 
 /* Data TLB miss */
        START_EXCEPTION(data_tlb_miss_bolted)
-       tlb_prolog_bolted SPRN_DEAR
+       tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
 
        /* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
 
@@ -214,7 +222,7 @@ itlb_miss_fault_bolted:
 
 /* Instruction TLB miss */
        START_EXCEPTION(instruction_tlb_miss_bolted)
-       tlb_prolog_bolted SPRN_SRR0
+       tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
 
        rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
        srdi    r15,r16,60              /* get region */
index 95ae77dec3f6ada8e2fc6fc58a223ed235d0032a..315f9495e9b2b5aa42a6c9aaba5a6307ac03ddde 100644 (file)
 #include <asm/reg.h>
 
 #define dbg(args...)
+#define OPROFILE_PM_PMCSEL_MSK      0xffULL
+#define OPROFILE_PM_UNIT_SHIFT      60
+#define OPROFILE_PM_UNIT_MSK        0xfULL
+#define OPROFILE_MAX_PMC_NUM        3
+#define OPROFILE_PMSEL_FIELD_WIDTH  8
+#define OPROFILE_UNIT_FIELD_WIDTH   4
+#define MMCRA_SIAR_VALID_MASK       0x10000000ULL
 
 static unsigned long reset_value[OP_MAX_COUNTER];
 
@@ -31,6 +38,61 @@ static int use_slot_nums;
 static u32 mmcr0_val;
 static u64 mmcr1_val;
 static u64 mmcra_val;
+static u32 cntr_marked_events;
+
+static int power7_marked_instr_event(u64 mmcr1)
+{
+       u64 psel, unit;
+       int pmc, cntr_marked_events = 0;
+
+       /* Given the MMCR1 value, look at the field for each counter to
+        * determine if it is a marked event.  Code based on the function
+        * power7_marked_instr_event() in file arch/powerpc/perf/power7-pmu.c.
+        */
+       for (pmc = 0; pmc < 4; pmc++) {
+               psel = mmcr1 & (OPROFILE_PM_PMCSEL_MSK
+                               << (OPROFILE_MAX_PMC_NUM - pmc)
+                               * OPROFILE_MAX_PMC_NUM);
+               psel = (psel >> ((OPROFILE_MAX_PMC_NUM - pmc)
+                                * OPROFILE_PMSEL_FIELD_WIDTH)) & ~1ULL;
+               unit = mmcr1 & (OPROFILE_PM_UNIT_MSK
+                               << (OPROFILE_PM_UNIT_SHIFT
+                                   - (pmc * OPROFILE_PMSEL_FIELD_WIDTH )));
+               unit = unit >> (OPROFILE_PM_UNIT_SHIFT
+                               - (pmc * OPROFILE_PMSEL_FIELD_WIDTH));
+
+               switch (psel >> 4) {
+               case 2:
+                       cntr_marked_events |= (pmc == 1 || pmc == 3) << pmc;
+                       break;
+               case 3:
+                       if (psel == 0x3c) {
+                               cntr_marked_events |= (pmc == 0) << pmc;
+                               break;
+                       }
+
+                       if (psel == 0x3e) {
+                               cntr_marked_events |= (pmc != 1) << pmc;
+                               break;
+                       }
+
+                       cntr_marked_events |= 1 << pmc;
+                       break;
+               case 4:
+               case 5:
+                       cntr_marked_events |= (unit == 0xd) << pmc;
+                       break;
+               case 6:
+                       if (psel == 0x64)
+                               cntr_marked_events |= (pmc >= 2) << pmc;
+                       break;
+               case 8:
+                       cntr_marked_events |= (unit == 0xd) << pmc;
+                       break;
+               }
+       }
+       return cntr_marked_events;
+}
 
 static int power4_reg_setup(struct op_counter_config *ctr,
                             struct op_system_config *sys,
@@ -47,6 +109,23 @@ static int power4_reg_setup(struct op_counter_config *ctr,
        mmcr1_val = sys->mmcr1;
        mmcra_val = sys->mmcra;
 
+       /* Power 7+ and newer architectures:
+        * Determine which counter events in the group (the group of events is
+        * specified by the bit settings in the MMCR1 register) are marked
+        * events for use in the interrupt handler.  Do the calculation once
+        * before OProfile starts.  Information is used in the interrupt
+        * handler.  Starting with Power 7+ we only record the sample for
+        * marked events if the SIAR valid bit is set.  For non marked events
+        * the sample is always recorded.
+        */
+       if (pvr_version_is(PVR_POWER7p))
+               cntr_marked_events = power7_marked_instr_event(mmcr1_val);
+       else
+               cntr_marked_events = 0; /* For older processors, set the bit map
+                                        * to zero so the sample will always be
+                                        * be recorded.
+                                        */
+
        for (i = 0; i < cur_cpu_spec->num_pmcs; ++i)
                reset_value[i] = 0x80000000UL - ctr[i].count;
 
@@ -61,10 +140,10 @@ static int power4_reg_setup(struct op_counter_config *ctr,
        else
                mmcr0_val |= MMCR0_PROBLEM_DISABLE;
 
-       if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) ||
-           __is_processor(PV_970) || __is_processor(PV_970FX) ||
-           __is_processor(PV_970MP) || __is_processor(PV_970GX) ||
-           __is_processor(PV_POWER5) || __is_processor(PV_POWER5p))
+       if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) ||
+           pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) ||
+           pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX) ||
+           pvr_version_is(PVR_POWER5) || pvr_version_is(PVR_POWER5p))
                use_slot_nums = 1;
 
        return 0;
@@ -84,9 +163,9 @@ extern void ppc_enable_pmcs(void);
  */
 static inline int mmcra_must_set_sample(void)
 {
-       if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) ||
-           __is_processor(PV_970) || __is_processor(PV_970FX) ||
-           __is_processor(PV_970MP) || __is_processor(PV_970GX))
+       if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) ||
+           pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) ||
+           pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX))
                return 1;
 
        return 0;
@@ -276,7 +355,7 @@ static bool pmc_overflow(unsigned long val)
         * PMCs because a user might set a period of less than 256 and we
         * don't want to mistakenly reset them.
         */
-       if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256))
+       if (pvr_version_is(PVR_POWER7) && ((0x80000000 - val) <= 256))
                return true;
 
        return false;
@@ -291,6 +370,7 @@ static void power4_handle_interrupt(struct pt_regs *regs,
        int i;
        unsigned int mmcr0;
        unsigned long mmcra;
+       bool siar_valid = false;
 
        mmcra = mfspr(SPRN_MMCRA);
 
@@ -300,11 +380,29 @@ static void power4_handle_interrupt(struct pt_regs *regs,
        /* set the PMM bit (see comment below) */
        mtmsrd(mfmsr() | MSR_PMM);
 
+       /* Check that the SIAR  valid bit in MMCRA is set to 1. */
+       if ((mmcra & MMCRA_SIAR_VALID_MASK) == MMCRA_SIAR_VALID_MASK)
+               siar_valid = true;
+
        for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) {
                val = classic_ctr_read(i);
                if (pmc_overflow(val)) {
                        if (oprofile_running && ctr[i].enabled) {
-                               oprofile_add_ext_sample(pc, regs, i, is_kernel);
+                               /* Power 7+ and newer architectures:
+                                * If the event is a marked event, then only
+                                * save the sample if the SIAR valid bit is
+                                * set.  If the event is not marked, then
+                                * always save the sample.
+                                * Note, the Sample enable bit in the MMCRA
+                                * register must be set to 1 if the group
+                                * contains a marked event.
+                                */
+                               if ((siar_valid &&
+                                    (cntr_marked_events & (1 << i)))
+                                   || !(cntr_marked_events & (1 << i)))
+                                       oprofile_add_ext_sample(pc, regs, i,
+                                                               is_kernel);
+
                                classic_ctr_write(i, reset_value[i]);
                        } else {
                                classic_ctr_write(i, 0);
index 7cd2dbd6e4c4fa615ae4442c5d00af106f3e17dd..fb55da91aa45028f796170cf63ece7239d9d89b1 100644 (file)
@@ -1396,7 +1396,7 @@ static bool pmc_overflow(unsigned long val)
         * PMCs because a user might set a period of less than 256 and we
         * don't want to mistakenly reset them.
         */
-       if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256))
+       if (pvr_version_is(PVR_POWER7) && ((0x80000000 - val) <= 256))
                return true;
 
        return false;
index 852592b2b7128e0fd72b41dca54f123e827240df..affcf566d460039ba0fd416ffad7f6ca77427ec3 100644 (file)
@@ -136,9 +136,9 @@ ssize_t beat_nvram_get_size(void)
        return BEAT_NVRAM_SIZE;
 }
 
-int beat_set_xdabr(unsigned long dabr)
+int beat_set_xdabr(unsigned long dabr, unsigned long dabrx)
 {
-       if (beat_set_dabr(dabr, DABRX_KERNEL | DABRX_USER))
+       if (beat_set_dabr(dabr, dabrx))
                return -1;
        return 0;
 }
index 32c8efcedc8091ed673592456a2249d73dbd3c83..bfcb8e351ae5e944207e99f920a12839aa40970c 100644 (file)
@@ -32,7 +32,7 @@ void beat_get_rtc_time(struct rtc_time *);
 ssize_t beat_nvram_get_size(void);
 ssize_t beat_nvram_read(char *, size_t, loff_t *);
 ssize_t beat_nvram_write(char *, size_t, loff_t *);
-int beat_set_xdabr(unsigned long);
+int beat_set_xdabr(unsigned long, unsigned long);
 void beat_power_save(void);
 void beat_kexec_cpu_down(int, int);
 
index 14943ef01918ac7443657be39798e94d9ba69527..7d2d036754b5b56ecf5df1ddeb6367d2cc6b223c 100644 (file)
 
 #undef DEBUG
 
+#include <linux/memblock.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <asm/iommu.h>
 #include <asm/machdep.h>
-#include <asm/abs_addr.h>
 #include <asm/firmware.h>
 
 #define IOBMAP_PAGE_SHIFT      12
@@ -99,7 +99,7 @@ static int iobmap_build(struct iommu_table *tbl, long index,
        ip = ((u32 *)tbl->it_base) + index;
 
        while (npages--) {
-               rpn = virt_to_abs(uaddr) >> IOBMAP_PAGE_SHIFT;
+               rpn = __pa(uaddr) >> IOBMAP_PAGE_SHIFT;
 
                *(ip++) = IOBMAP_L2E_V | rpn;
                /* invalidate tlb, can be optimized more */
@@ -258,7 +258,7 @@ void __init alloc_iobmap_l2(void)
        return;
 #endif
        /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */
-       iob_l2_base = (u32 *)abs_to_virt(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000));
+       iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000));
 
        printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base);
 }
index 0e7eccc0f88d7ededee728a1dbbd55287253b436..cae7281e4e667135c59606b48a1c8ad7bfeb3547 100644 (file)
@@ -30,7 +30,6 @@
 #include <asm/opal.h>
 #include <asm/iommu.h>
 #include <asm/tce.h>
-#include <asm/abs_addr.h>
 
 #include "powernv.h"
 #include "pci.h"
index 264967770c3ab7cf1272e7a69fd20cb12dda0b18..6b4bef4e9d821aa2519cf2a4eecdb7ada4d5f57a 100644 (file)
@@ -30,7 +30,6 @@
 #include <asm/opal.h>
 #include <asm/iommu.h>
 #include <asm/tce.h>
-#include <asm/abs_addr.h>
 
 #include "powernv.h"
 #include "pci.h"
index be3cfc5ceabbfb316a55c34a4fb5da18e41dd66e..c01688a1a741f40ff423fe40761d2fd783a46799 100644 (file)
@@ -30,7 +30,6 @@
 #include <asm/opal.h>
 #include <asm/iommu.h>
 #include <asm/tce.h>
-#include <asm/abs_addr.h>
 #include <asm/firmware.h>
 
 #include "powernv.h"
@@ -447,6 +446,11 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
                pnv_tce_invalidate(tbl, tces, tcep - 1);
 }
 
+static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+       return ((u64 *)tbl->it_base)[index - tbl->it_offset];
+}
+
 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
                               void *tce_mem, u64 tce_size,
                               u64 dma_offset)
@@ -597,6 +601,7 @@ void __init pnv_pci_init(void)
        ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
        ppc_md.tce_build = pnv_tce_build;
        ppc_md.tce_free = pnv_tce_free;
+       ppc_md.tce_get = pnv_tce_get;
        ppc_md.pci_probe_mode = pnv_pci_probe_mode;
        set_pci_dma_ops(&dma_iommu_ops);
 
index 2d664c5a83b0230918979585af4eb06adea17ab9..3f509f86432c1b28df128b984793df50789a06f3 100644 (file)
@@ -184,11 +184,15 @@ early_param("ps3flash", early_parse_ps3flash);
 #define prealloc_ps3flash_bounce_buffer()      do { } while (0)
 #endif
 
-static int ps3_set_dabr(unsigned long dabr)
+static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx)
 {
-       enum {DABR_USER = 1, DABR_KERNEL = 2,};
+       /* Have to set at least one bit in the DABRX */
+       if (dabrx == 0 && dabr == 0)
+               dabrx = DABRX_USER;
+       /* hypervisor only allows us to set BTI, Kernel and user */
+       dabrx &= DABRX_BTI | DABRX_KERNEL | DABRX_USER;
 
-       return lv1_set_dabr(dabr, DABR_KERNEL | DABR_USER) ? -1 : 0;
+       return lv1_set_dabr(dabr, dabrx) ? -1 : 0;
 }
 
 static void __init ps3_setup_arch(void)
index c222189f5bb230e1467103681ba0cb3923ebf275..890622b87c8f009feda9bb8bd36ac5768c6e3af9 100644 (file)
@@ -6,8 +6,9 @@ obj-y                   := lpar.o hvCall.o nvram.o reconfig.o \
                           firmware.o power.o dlpar.o mobility.o
 obj-$(CONFIG_SMP)      += smp.o
 obj-$(CONFIG_SCANLOG)  += scanlog.o
-obj-$(CONFIG_EEH)      += eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \
-                          eeh_event.o eeh_sysfs.o eeh_pseries.o
+obj-$(CONFIG_EEH)      += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+                          eeh_driver.o eeh_event.o eeh_sysfs.o \
+                          eeh_pseries.o
 obj-$(CONFIG_KEXEC)    += kexec.o
 obj-$(CONFIG_PCI)      += pci.o pci_dlpar.o
 obj-$(CONFIG_PSERIES_MSI)      += msi.o
index ecd394cf34e604b24b71138d481f570cd0d926ac..18c168b752da2e24785ec1d5a6c0a879577df798 100644 (file)
@@ -92,6 +92,20 @@ struct eeh_ops *eeh_ops = NULL;
 int eeh_subsystem_enabled;
 EXPORT_SYMBOL(eeh_subsystem_enabled);
 
+/*
+ * EEH probe mode support. The intention is to support multiple
+ * platforms for EEH. Some platforms like pSeries do PCI emunation
+ * based on device tree. However, other platforms like powernv probe
+ * PCI devices from hardware. The flag is used to distinguish that.
+ * In addition, struct eeh_ops::probe would be invoked for particular
+ * OF node or PCI device so that the corresponding PE would be created
+ * there.
+ */
+int eeh_probe_mode;
+
+/* Global EEH mutex */
+DEFINE_MUTEX(eeh_mutex);
+
 /* Lock to avoid races due to multiple reports of an error */
 static DEFINE_RAW_SPINLOCK(confirm_error_lock);
 
@@ -204,22 +218,12 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
                }
        }
 
-       /* Gather status on devices under the bridge */
-       if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
-               struct device_node *child;
-
-               for_each_child_of_node(dn, child) {
-                       if (of_node_to_eeh_dev(child))
-                               n += eeh_gather_pci_data(of_node_to_eeh_dev(child), buf+n, len-n);
-               }
-       }
-
        return n;
 }
 
 /**
  * eeh_slot_error_detail - Generate combined log including driver log and error log
- * @edev: device to report error log for
+ * @pe: EEH PE
  * @severity: temporary or permanent error log
  *
  * This routine should be called to generate the combined log, which
@@ -227,17 +231,22 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
  * out from the config space of the corresponding PCI device, while
  * the error log is fetched through platform dependent function call.
  */
-void eeh_slot_error_detail(struct eeh_dev *edev, int severity)
+void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
        size_t loglen = 0;
-       pci_regs_buf[0] = 0;
+       struct eeh_dev *edev;
 
-       eeh_pci_enable(edev, EEH_OPT_THAW_MMIO);
-       eeh_ops->configure_bridge(eeh_dev_to_of_node(edev));
-       eeh_restore_bars(edev);
-       loglen = eeh_gather_pci_data(edev, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
+       eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+       eeh_ops->configure_bridge(pe);
+       eeh_pe_restore_bars(pe);
 
-       eeh_ops->get_log(eeh_dev_to_of_node(edev), severity, pci_regs_buf, loglen);
+       pci_regs_buf[0] = 0;
+       eeh_pe_for_each_dev(pe, edev) {
+               loglen += eeh_gather_pci_data(edev, pci_regs_buf,
+                               EEH_PCI_REGS_LOG_LEN);
+        }
+
+       eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
 }
 
 /**
@@ -261,126 +270,8 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 }
 
 /**
- * eeh_find_device_pe - Retrieve the PE for the given device
- * @dn: device node
- *
- * Return the PE under which this device lies
- */
-struct device_node *eeh_find_device_pe(struct device_node *dn)
-{
-       while (dn->parent && of_node_to_eeh_dev(dn->parent) &&
-              (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
-               dn = dn->parent;
-       }
-       return dn;
-}
-
-/**
- * __eeh_mark_slot - Mark all child devices as failed
- * @parent: parent device
- * @mode_flag: failure flag
- *
- * Mark all devices that are children of this device as failed.
- * Mark the device driver too, so that it can see the failure
- * immediately; this is critical, since some drivers poll
- * status registers in interrupts ... If a driver is polling,
- * and the slot is frozen, then the driver can deadlock in
- * an interrupt context, which is bad.
- */
-static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
-{
-       struct device_node *dn;
-
-       for_each_child_of_node(parent, dn) {
-               if (of_node_to_eeh_dev(dn)) {
-                       /* Mark the pci device driver too */
-                       struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev;
-
-                       of_node_to_eeh_dev(dn)->mode |= mode_flag;
-
-                       if (dev && dev->driver)
-                               dev->error_state = pci_channel_io_frozen;
-
-                       __eeh_mark_slot(dn, mode_flag);
-               }
-       }
-}
-
-/**
- * eeh_mark_slot - Mark the indicated device and its children as failed
- * @dn: parent device
- * @mode_flag: failure flag
- *
- * Mark the indicated device and its child devices as failed.
- * The device drivers are marked as failed as well.
- */
-void eeh_mark_slot(struct device_node *dn, int mode_flag)
-{
-       struct pci_dev *dev;
-       dn = eeh_find_device_pe(dn);
-
-       /* Back up one, since config addrs might be shared */
-       if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
-               dn = dn->parent;
-
-       of_node_to_eeh_dev(dn)->mode |= mode_flag;
-
-       /* Mark the pci device too */
-       dev = of_node_to_eeh_dev(dn)->pdev;
-       if (dev)
-               dev->error_state = pci_channel_io_frozen;
-
-       __eeh_mark_slot(dn, mode_flag);
-}
-
-/**
- * __eeh_clear_slot - Clear failure flag for the child devices
- * @parent: parent device
- * @mode_flag: flag to be cleared
- *
- * Clear failure flag for the child devices.
- */
-static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
-{
-       struct device_node *dn;
-
-       for_each_child_of_node(parent, dn) {
-               if (of_node_to_eeh_dev(dn)) {
-                       of_node_to_eeh_dev(dn)->mode &= ~mode_flag;
-                       of_node_to_eeh_dev(dn)->check_count = 0;
-                       __eeh_clear_slot(dn, mode_flag);
-               }
-       }
-}
-
-/**
- * eeh_clear_slot - Clear failure flag for the indicated device and its children
- * @dn: parent device
- * @mode_flag: flag to be cleared
- *
- * Clear failure flag for the indicated device and its children.
- */
-void eeh_clear_slot(struct device_node *dn, int mode_flag)
-{
-       unsigned long flags;
-       raw_spin_lock_irqsave(&confirm_error_lock, flags);
-       
-       dn = eeh_find_device_pe(dn);
-       
-       /* Back up one, since config addrs might be shared */
-       if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
-               dn = dn->parent;
-
-       of_node_to_eeh_dev(dn)->mode &= ~mode_flag;
-       of_node_to_eeh_dev(dn)->check_count = 0;
-       __eeh_clear_slot(dn, mode_flag);
-       raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
-}
-
-/**
- * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
- * @dn: device node
- * @dev: pci device, if known
+ * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @edev: eeh device
  *
  * Check for an EEH failure for the given device node.  Call this
  * routine if the result of a read was all 0xff's and you want to
@@ -392,11 +283,13 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag)
  *
  * It is safe to call this routine in an interrupt context.
  */
-int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
+int eeh_dev_check_failure(struct eeh_dev *edev)
 {
        int ret;
        unsigned long flags;
-       struct eeh_dev *edev;
+       struct device_node *dn;
+       struct pci_dev *dev;
+       struct eeh_pe *pe;
        int rc = 0;
        const char *location;
 
@@ -405,23 +298,23 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
        if (!eeh_subsystem_enabled)
                return 0;
 
-       if (!dn) {
+       if (!edev) {
                eeh_stats.no_dn++;
                return 0;
        }
-       dn = eeh_find_device_pe(dn);
-       edev = of_node_to_eeh_dev(dn);
+       dn = eeh_dev_to_of_node(edev);
+       dev = eeh_dev_to_pci_dev(edev);
+       pe = edev->pe;
 
        /* Access to IO BARs might get this far and still not want checking. */
-       if (!(edev->mode & EEH_MODE_SUPPORTED) ||
-           edev->mode & EEH_MODE_NOCHECK) {
+       if (!pe) {
                eeh_stats.ignored_check++;
-               pr_debug("EEH: Ignored check (%x) for %s %s\n",
-                       edev->mode, eeh_pci_name(dev), dn->full_name);
+               pr_debug("EEH: Ignored check for %s %s\n",
+                       eeh_pci_name(dev), dn->full_name);
                return 0;
        }
 
-       if (!edev->config_addr && !edev->pe_config_addr) {
+       if (!pe->addr && !pe->config_addr) {
                eeh_stats.no_cfg_addr++;
                return 0;
        }
@@ -434,13 +327,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
         */
        raw_spin_lock_irqsave(&confirm_error_lock, flags);
        rc = 1;
-       if (edev->mode & EEH_MODE_ISOLATED) {
-               edev->check_count++;
-               if (edev->check_count % EEH_MAX_FAILS == 0) {
+       if (pe->state & EEH_PE_ISOLATED) {
+               pe->check_count++;
+               if (pe->check_count % EEH_MAX_FAILS == 0) {
                        location = of_get_property(dn, "ibm,loc-code", NULL);
                        printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
                                "location=%s driver=%s pci addr=%s\n",
-                               edev->check_count, location,
+                               pe->check_count, location,
                                eeh_driver_name(dev), eeh_pci_name(dev));
                        printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
                                eeh_driver_name(dev));
@@ -456,7 +349,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
         * function zero of a multi-function device.
         * In any case they must share a common PHB.
         */
-       ret = eeh_ops->get_state(dn, NULL);
+       ret = eeh_ops->get_state(pe, NULL);
 
        /* Note that config-io to empty slots may fail;
         * they are empty when they don't have children.
@@ -469,7 +362,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
            (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
            (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
                eeh_stats.false_positives++;
-               edev->false_positives ++;
+               pe->false_positives++;
                rc = 0;
                goto dn_unlock;
        }
@@ -480,10 +373,10 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
         * with other functions on this device, and functions under
         * bridges.
         */
-       eeh_mark_slot(dn, EEH_MODE_ISOLATED);
+       eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
        raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 
-       eeh_send_failure_event(edev);
+       eeh_send_failure_event(pe);
 
        /* Most EEH events are due to device driver bugs.  Having
         * a stack trace will help the device-driver authors figure
@@ -497,7 +390,7 @@ dn_unlock:
        return rc;
 }
 
-EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
+EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
 
 /**
  * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
@@ -514,21 +407,19 @@ EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
 unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
 {
        unsigned long addr;
-       struct pci_dev *dev;
-       struct device_node *dn;
+       struct eeh_dev *edev;
 
        /* Finding the phys addr + pci device; this is pretty quick. */
        addr = eeh_token_to_phys((unsigned long __force) token);
-       dev = pci_addr_cache_get_device(addr);
-       if (!dev) {
+       edev = eeh_addr_cache_get_dev(addr);
+       if (!edev) {
                eeh_stats.no_device++;
                return val;
        }
 
-       dn = pci_device_to_OF_node(dev);
-       eeh_dn_check_failure(dn, dev);
+       eeh_dev_check_failure(edev);
 
-       pci_dev_put(dev);
+       pci_dev_put(eeh_dev_to_pci_dev(edev));
        return val;
 }
 
@@ -537,23 +428,22 @@ EXPORT_SYMBOL(eeh_check_failure);
 
 /**
  * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
- * @edev: pci device node
+ * @pe: EEH PE
  *
  * This routine should be called to reenable frozen MMIO or DMA
  * so that it would work correctly again. It's useful while doing
  * recovery or log collection on the indicated device.
  */
-int eeh_pci_enable(struct eeh_dev *edev, int function)
+int eeh_pci_enable(struct eeh_pe *pe, int function)
 {
        int rc;
-       struct device_node *dn = eeh_dev_to_of_node(edev);
 
-       rc = eeh_ops->set_option(dn, function);
+       rc = eeh_ops->set_option(pe, function);
        if (rc)
-               printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
-                       function, rc, dn->full_name);
+               pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
+                       __func__, function, pe->phb->global_number, pe->addr, rc);
 
-       rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
+       rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
        if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
           (function == EEH_OPT_THAW_MMIO))
                return 0;
@@ -571,17 +461,24 @@ int eeh_pci_enable(struct eeh_dev *edev, int function)
  */
 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
 {
-       struct device_node *dn = pci_device_to_OF_node(dev);
+       struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+       struct eeh_pe *pe = edev->pe;
+
+       if (!pe) {
+               pr_err("%s: No PE found on PCI device %s\n",
+                       __func__, pci_name(dev));
+               return -EINVAL;
+       }
 
        switch (state) {
        case pcie_deassert_reset:
-               eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
+               eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
                break;
        case pcie_hot_reset:
-               eeh_ops->reset(dn, EEH_RESET_HOT);
+               eeh_ops->reset(pe, EEH_RESET_HOT);
                break;
        case pcie_warm_reset:
-               eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
+               eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
                break;
        default:
                return -EINVAL;
@@ -591,66 +488,37 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 }
 
 /**
- * __eeh_set_pe_freset - Check the required reset for child devices
- * @parent: parent device
- * @freset: return value
- *
- * Each device might have its preferred reset type: fundamental or
- * hot reset. The routine is used to collect the information from
- * the child devices so that they could be reset accordingly.
- */
-void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
-{
-       struct device_node *dn;
-
-       for_each_child_of_node(parent, dn) {
-               if (of_node_to_eeh_dev(dn)) {
-                       struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev;
-
-                       if (dev && dev->driver)
-                               *freset |= dev->needs_freset;
-
-                       __eeh_set_pe_freset(dn, freset);
-               }
-       }
-}
-
-/**
- * eeh_set_pe_freset - Check the required reset for the indicated device and its children
- * @dn: parent device
- * @freset: return value
+ * eeh_set_pe_freset - Check the required reset for the indicated device
+ * @data: EEH device
+ * @flag: return value
  *
  * Each device might have its preferred reset type: fundamental or
  * hot reset. The routine is used to collected the information for
  * the indicated device and its children so that the bunch of the
  * devices could be reset properly.
  */
-void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+static void *eeh_set_dev_freset(void *data, void *flag)
 {
        struct pci_dev *dev;
-       dn = eeh_find_device_pe(dn);
-
-       /* Back up one, since config addrs might be shared */
-       if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
-               dn = dn->parent;
+       unsigned int *freset = (unsigned int *)flag;
+       struct eeh_dev *edev = (struct eeh_dev *)data;
 
-       dev = of_node_to_eeh_dev(dn)->pdev;
+       dev = eeh_dev_to_pci_dev(edev);
        if (dev)
                *freset |= dev->needs_freset;
 
-       __eeh_set_pe_freset(dn, freset);
+       return NULL;
 }
 
 /**
  * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
- * @edev: pci device node to be reset.
+ * @pe: EEH PE
  *
  * Assert the PCI #RST line for 1/4 second.
  */
-static void eeh_reset_pe_once(struct eeh_dev *edev)
+static void eeh_reset_pe_once(struct eeh_pe *pe)
 {
        unsigned int freset = 0;
-       struct device_node *dn = eeh_dev_to_of_node(edev);
 
        /* Determine type of EEH reset required for
         * Partitionable Endpoint, a hot-reset (1)
@@ -658,12 +526,12 @@ static void eeh_reset_pe_once(struct eeh_dev *edev)
         * A fundamental reset required by any device under
         * Partitionable Endpoint trumps hot-reset.
         */
-       eeh_set_pe_freset(dn, &freset);
+       eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
 
        if (freset)
-               eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
+               eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
        else
-               eeh_ops->reset(dn, EEH_RESET_HOT);
+               eeh_ops->reset(pe, EEH_RESET_HOT);
 
        /* The PCI bus requires that the reset be held high for at least
         * a 100 milliseconds. We wait a bit longer 'just in case'.
@@ -675,9 +543,9 @@ static void eeh_reset_pe_once(struct eeh_dev *edev)
         * pci slot reset line is dropped. Make sure we don't miss
         * these, and clear the flag now.
         */
-       eeh_clear_slot(dn, EEH_MODE_ISOLATED);
+       eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 
-       eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
+       eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
 
        /* After a PCI slot has been reset, the PCI Express spec requires
         * a 1.5 second idle time for the bus to stabilize, before starting
@@ -689,116 +557,36 @@ static void eeh_reset_pe_once(struct eeh_dev *edev)
 
 /**
  * eeh_reset_pe - Reset the indicated PE
- * @edev: PCI device associated EEH device
+ * @pe: EEH PE
  *
  * This routine should be called to reset indicated device, including
  * PE. A PE might include multiple PCI devices and sometimes PCI bridges
  * might be involved as well.
  */
-int eeh_reset_pe(struct eeh_dev *edev)
+int eeh_reset_pe(struct eeh_pe *pe)
 {
        int i, rc;
-       struct device_node *dn = eeh_dev_to_of_node(edev);
 
        /* Take three shots at resetting the bus */
        for (i=0; i<3; i++) {
-               eeh_reset_pe_once(edev);
+               eeh_reset_pe_once(pe);
 
-               rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
+               rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
                if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
                        return 0;
 
                if (rc < 0) {
-                       printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
-                              dn->full_name);
+                       pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
+                               __func__, pe->phb->global_number, pe->addr);
                        return -1;
                }
-               printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
-                      i+1, dn->full_name, rc);
+               pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
+                       i+1, pe->phb->global_number, pe->addr, rc);
        }
 
        return -1;
 }
 
-/** Save and restore of PCI BARs
- *
- * Although firmware will set up BARs during boot, it doesn't
- * set up device BAR's after a device reset, although it will,
- * if requested, set up bridge configuration. Thus, we need to
- * configure the PCI devices ourselves.  
- */
-
-/**
- * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
- * @edev: PCI device associated EEH device
- *
- * Loads the PCI configuration space base address registers,
- * the expansion ROM base address, the latency timer, and etc.
- * from the saved values in the device node.
- */
-static inline void eeh_restore_one_device_bars(struct eeh_dev *edev)
-{
-       int i;
-       u32 cmd;
-       struct device_node *dn = eeh_dev_to_of_node(edev);
-
-       if (!edev->phb)
-               return;
-
-       for (i=4; i<10; i++) {
-               eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
-       }
-
-       /* 12 == Expansion ROM Address */
-       eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
-
-#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
-#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
-
-       eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
-                   SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-
-       eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
-                   SAVED_BYTE(PCI_LATENCY_TIMER));
-
-       /* max latency, min grant, interrupt pin and line */
-       eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
-
-       /* Restore PERR & SERR bits, some devices require it,
-        * don't touch the other command bits
-        */
-       eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
-       if (edev->config_space[1] & PCI_COMMAND_PARITY)
-               cmd |= PCI_COMMAND_PARITY;
-       else
-               cmd &= ~PCI_COMMAND_PARITY;
-       if (edev->config_space[1] & PCI_COMMAND_SERR)
-               cmd |= PCI_COMMAND_SERR;
-       else
-               cmd &= ~PCI_COMMAND_SERR;
-       eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
-}
-
-/**
- * eeh_restore_bars - Restore the PCI config space info
- * @edev: EEH device
- *
- * This routine performs a recursive walk to the children
- * of this device as well.
- */
-void eeh_restore_bars(struct eeh_dev *edev)
-{
-       struct device_node *dn;
-       if (!edev)
-               return;
-       
-       if ((edev->mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(edev->class_code))
-               eeh_restore_one_device_bars(edev);
-
-       for_each_child_of_node(eeh_dev_to_of_node(edev), dn)
-               eeh_restore_bars(of_node_to_eeh_dev(dn));
-}
-
 /**
  * eeh_save_bars - Save device bars
  * @edev: PCI device associated EEH device
@@ -808,7 +596,7 @@ void eeh_restore_bars(struct eeh_dev *edev)
  * PCI devices are added individually; but, for the restore,
  * an entire slot is reset at a time.
  */
-static void eeh_save_bars(struct eeh_dev *edev)
+void eeh_save_bars(struct eeh_dev *edev)
 {
        int i;
        struct device_node *dn;
@@ -821,102 +609,6 @@ static void eeh_save_bars(struct eeh_dev *edev)
                eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
 }
 
-/**
- * eeh_early_enable - Early enable EEH on the indicated device
- * @dn: device node
- * @data: BUID
- *
- * Enable EEH functionality on the specified PCI device. The function
- * is expected to be called before real PCI probing is done. However,
- * the PHBs have been initialized at this point.
- */
-static void *eeh_early_enable(struct device_node *dn, void *data)
-{
-       int ret;
-       const u32 *class_code = of_get_property(dn, "class-code", NULL);
-       const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
-       const u32 *device_id = of_get_property(dn, "device-id", NULL);
-       const u32 *regs;
-       int enable;
-       struct eeh_dev *edev = of_node_to_eeh_dev(dn);
-
-       edev->class_code = 0;
-       edev->mode = 0;
-       edev->check_count = 0;
-       edev->freeze_count = 0;
-       edev->false_positives = 0;
-
-       if (!of_device_is_available(dn))
-               return NULL;
-
-       /* Ignore bad nodes. */
-       if (!class_code || !vendor_id || !device_id)
-               return NULL;
-
-       /* There is nothing to check on PCI to ISA bridges */
-       if (dn->type && !strcmp(dn->type, "isa")) {
-               edev->mode |= EEH_MODE_NOCHECK;
-               return NULL;
-       }
-       edev->class_code = *class_code;
-
-       /* Ok... see if this device supports EEH.  Some do, some don't,
-        * and the only way to find out is to check each and every one.
-        */
-       regs = of_get_property(dn, "reg", NULL);
-       if (regs) {
-               /* First register entry is addr (00BBSS00)  */
-               /* Try to enable eeh */
-               ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE);
-
-               enable = 0;
-               if (ret == 0) {
-                       edev->config_addr = regs[0];
-
-                       /* If the newer, better, ibm,get-config-addr-info is supported, 
-                        * then use that instead.
-                        */
-                       edev->pe_config_addr = eeh_ops->get_pe_addr(dn);
-
-                       /* Some older systems (Power4) allow the
-                        * ibm,set-eeh-option call to succeed even on nodes
-                        * where EEH is not supported. Verify support
-                        * explicitly.
-                        */
-                       ret = eeh_ops->get_state(dn, NULL);
-                       if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
-                               enable = 1;
-               }
-
-               if (enable) {
-                       eeh_subsystem_enabled = 1;
-                       edev->mode |= EEH_MODE_SUPPORTED;
-
-                       pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
-                                dn->full_name, edev->config_addr,
-                                edev->pe_config_addr);
-               } else {
-
-                       /* This device doesn't support EEH, but it may have an
-                        * EEH parent, in which case we mark it as supported.
-                        */
-                       if (dn->parent && of_node_to_eeh_dev(dn->parent) &&
-                           (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
-                               /* Parent supports EEH. */
-                               edev->mode |= EEH_MODE_SUPPORTED;
-                               edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr;
-                               return NULL;
-                       }
-               }
-       } else {
-               printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
-                      dn->full_name);
-       }
-
-       eeh_save_bars(edev);
-       return NULL;
-}
-
 /**
  * eeh_ops_register - Register platform dependent EEH operations
  * @ops: platform dependent EEH operations
@@ -982,7 +674,7 @@ int __exit eeh_ops_unregister(const char *name)
  * Even if force-off is set, the EEH hardware is still enabled, so that
  * newer systems can boot.
  */
-void __init eeh_init(void)
+static int __init eeh_init(void)
 {
        struct pci_controller *hose, *tmp;
        struct device_node *phb;
@@ -992,27 +684,34 @@ void __init eeh_init(void)
        if (!eeh_ops) {
                pr_warning("%s: Platform EEH operation not found\n",
                        __func__);
-               return;
+               return -EEXIST;
        } else if ((ret = eeh_ops->init())) {
                pr_warning("%s: Failed to call platform init function (%d)\n",
                        __func__, ret);
-               return;
+               return ret;
        }
 
        raw_spin_lock_init(&confirm_error_lock);
 
        /* Enable EEH for all adapters */
-       list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
-               phb = hose->dn;
-               traverse_pci_devices(phb, eeh_early_enable, NULL);
+       if (eeh_probe_mode_devtree()) {
+               list_for_each_entry_safe(hose, tmp,
+                       &hose_list, list_node) {
+                       phb = hose->dn;
+                       traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
+               }
        }
 
        if (eeh_subsystem_enabled)
-               printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
+               pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
        else
-               printk(KERN_WARNING "EEH: No capable adapters found\n");
+               pr_warning("EEH: No capable adapters found\n");
+
+       return ret;
 }
 
+core_initcall_sync(eeh_init);
+
 /**
  * eeh_add_device_early - Enable EEH for the indicated device_node
  * @dn: device node for which to set up EEH
@@ -1037,7 +736,8 @@ static void eeh_add_device_early(struct device_node *dn)
        if (NULL == phb || 0 == phb->buid)
                return;
 
-       eeh_early_enable(dn, NULL);
+       /* FIXME: hotplug support on POWERNV */
+       eeh_ops->of_probe(dn, NULL);
 }
 
 /**
@@ -1087,7 +787,7 @@ static void eeh_add_device_late(struct pci_dev *dev)
        edev->pdev = dev;
        dev->dev.archdata.edev = edev;
 
-       pci_addr_cache_insert_device(dev);
+       eeh_addr_cache_insert_dev(dev);
        eeh_sysfs_add_device(dev);
 }
 
@@ -1143,7 +843,8 @@ static void eeh_remove_device(struct pci_dev *dev)
        dev->dev.archdata.edev = NULL;
        pci_dev_put(dev);
 
-       pci_addr_cache_remove_device(dev);
+       eeh_rmv_from_parent_pe(edev);
+       eeh_addr_cache_rmv_dev(dev);
        eeh_sysfs_remove_device(dev);
 }
 
index e5ae1c687c669e27e1f03c6007295e53e750eff6..5a4c87903057f46c6bfb7466ca87fdbc653cb1c0 100644 (file)
@@ -50,6 +50,7 @@ struct pci_io_addr_range {
        struct rb_node rb_node;
        unsigned long addr_lo;
        unsigned long addr_hi;
+       struct eeh_dev *edev;
        struct pci_dev *pcidev;
        unsigned int flags;
 };
@@ -59,7 +60,7 @@ static struct pci_io_addr_cache {
        spinlock_t piar_lock;
 } pci_io_addr_cache_root;
 
-static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
+static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
 {
        struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
 
@@ -74,7 +75,7 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
                                n = n->rb_right;
                        } else {
                                pci_dev_get(piar->pcidev);
-                               return piar->pcidev;
+                               return piar->edev;
                        }
                }
        }
@@ -83,7 +84,7 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
 }
 
 /**
- * pci_addr_cache_get_device - Get device, given only address
+ * eeh_addr_cache_get_dev - Get device, given only address
  * @addr: mmio (PIO) phys address or i/o port number
  *
  * Given an mmio phys address, or a port number, find a pci device
@@ -92,15 +93,15 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
  * from zero (that is, they do *not* have pci_io_addr added in).
  * It is safe to call this function within an interrupt.
  */
-struct pci_dev *pci_addr_cache_get_device(unsigned long addr)
+struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr)
 {
-       struct pci_dev *dev;
+       struct eeh_dev *edev;
        unsigned long flags;
 
        spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-       dev = __pci_addr_cache_get_device(addr);
+       edev = __eeh_addr_cache_get_device(addr);
        spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-       return dev;
+       return edev;
 }
 
 #ifdef DEBUG
@@ -108,7 +109,7 @@ struct pci_dev *pci_addr_cache_get_device(unsigned long addr)
  * Handy-dandy debug print routine, does nothing more
  * than print out the contents of our addr cache.
  */
-static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
+static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
 {
        struct rb_node *n;
        int cnt = 0;
@@ -117,7 +118,7 @@ static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
        while (n) {
                struct pci_io_addr_range *piar;
                piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-               printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
+               pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
                       (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
                       piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
                cnt++;
@@ -128,7 +129,7 @@ static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
 
 /* Insert address range into the rb tree. */
 static struct pci_io_addr_range *
-pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
+eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
                      unsigned long ahi, unsigned int flags)
 {
        struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
@@ -146,23 +147,24 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
                } else {
                        if (dev != piar->pcidev ||
                            alo != piar->addr_lo || ahi != piar->addr_hi) {
-                               printk(KERN_WARNING "PIAR: overlapping address range\n");
+                               pr_warning("PIAR: overlapping address range\n");
                        }
                        return piar;
                }
        }
-       piar = kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+       piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
        if (!piar)
                return NULL;
 
        pci_dev_get(dev);
        piar->addr_lo = alo;
        piar->addr_hi = ahi;
+       piar->edev = pci_dev_to_eeh_dev(dev);
        piar->pcidev = dev;
        piar->flags = flags;
 
 #ifdef DEBUG
-       printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
+       pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
                          alo, ahi, pci_name(dev));
 #endif
 
@@ -172,7 +174,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
        return piar;
 }
 
-static void __pci_addr_cache_insert_device(struct pci_dev *dev)
+static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
 {
        struct device_node *dn;
        struct eeh_dev *edev;
@@ -180,7 +182,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
 
        dn = pci_device_to_OF_node(dev);
        if (!dn) {
-               printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));
+               pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev));
                return;
        }
 
@@ -192,8 +194,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
        }
 
        /* Skip any devices for which EEH is not enabled. */
-       if (!(edev->mode & EEH_MODE_SUPPORTED) ||
-           edev->mode & EEH_MODE_NOCHECK) {
+       if (!edev->pe) {
 #ifdef DEBUG
                pr_info("PCI: skip building address cache for=%s - %s\n",
                        pci_name(dev), dn->full_name);
@@ -212,19 +213,19 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
                        continue;
                if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
                         continue;
-               pci_addr_cache_insert(dev, start, end, flags);
+               eeh_addr_cache_insert(dev, start, end, flags);
        }
 }
 
 /**
- * pci_addr_cache_insert_device - Add a device to the address cache
+ * eeh_addr_cache_insert_dev - Add a device to the address cache
  * @dev: PCI device whose I/O addresses we are interested in.
  *
  * In order to support the fast lookup of devices based on addresses,
  * we maintain a cache of devices that can be quickly searched.
  * This routine adds a device to that cache.
  */
-void pci_addr_cache_insert_device(struct pci_dev *dev)
+void eeh_addr_cache_insert_dev(struct pci_dev *dev)
 {
        unsigned long flags;
 
@@ -233,11 +234,11 @@ void pci_addr_cache_insert_device(struct pci_dev *dev)
                return;
 
        spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-       __pci_addr_cache_insert_device(dev);
+       __eeh_addr_cache_insert_dev(dev);
        spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
 }
 
-static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
+static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
 {
        struct rb_node *n;
 
@@ -258,7 +259,7 @@ restart:
 }
 
 /**
- * pci_addr_cache_remove_device - remove pci device from addr cache
+ * eeh_addr_cache_rmv_dev - remove pci device from addr cache
  * @dev: device to remove
  *
  * Remove a device from the addr-cache tree.
@@ -266,17 +267,17 @@ restart:
  * the tree multiple times (once per resource).
  * But so what; device removal doesn't need to be that fast.
  */
-void pci_addr_cache_remove_device(struct pci_dev *dev)
+void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-       __pci_addr_cache_remove_device(dev);
+       __eeh_addr_cache_rmv_dev(dev);
        spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
 }
 
 /**
- * pci_addr_cache_build - Build a cache of I/O addresses
+ * eeh_addr_cache_build - Build a cache of I/O addresses
  *
  * Build a cache of pci i/o addresses.  This cache will be used to
  * find the pci device that corresponds to a given address.
@@ -284,7 +285,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev)
  * Must be run late in boot process, after the pci controllers
  * have been scanned for devices (after all device resources are known).
  */
-void __init pci_addr_cache_build(void)
+void __init eeh_addr_cache_build(void)
 {
        struct device_node *dn;
        struct eeh_dev *edev;
@@ -293,7 +294,7 @@ void __init pci_addr_cache_build(void)
        spin_lock_init(&pci_io_addr_cache_root.piar_lock);
 
        for_each_pci_dev(dev) {
-               pci_addr_cache_insert_device(dev);
+               eeh_addr_cache_insert_dev(dev);
 
                dn = pci_device_to_OF_node(dev);
                if (!dn)
@@ -312,7 +313,7 @@ void __init pci_addr_cache_build(void)
 
 #ifdef DEBUG
        /* Verify tree built up above, echo back the list of addrs. */
-       pci_addr_cache_print(&pci_io_addr_cache_root);
+       eeh_addr_cache_print(&pci_io_addr_cache_root);
 #endif
 }
 
index c4507d09590029f94726b7b3afaeebba29b276aa..66442341d3a6009882c0039b2049d1f7e3af1800 100644 (file)
@@ -55,7 +55,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data)
        struct eeh_dev *edev;
 
        /* Allocate EEH device */
-       edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL);
+       edev = kzalloc(sizeof(*edev), GFP_KERNEL);
        if (!edev) {
                pr_warning("%s: out of memory\n", __func__);
                return NULL;
@@ -65,6 +65,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data)
        PCI_DN(dn)->edev = edev;
        edev->dn  = dn;
        edev->phb = phb;
+       INIT_LIST_HEAD(&edev->list);
 
        return NULL;
 }
@@ -80,6 +81,9 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb)
 {
        struct device_node *dn = phb->dn;
 
+       /* EEH PE for PHB */
+       eeh_phb_pe_create(phb);
+
        /* EEH device for PHB */
        eeh_dev_init(dn, phb);
 
@@ -93,10 +97,16 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb)
  * Scan all the existing PHBs and create EEH devices for their OF
  * nodes and their children OF nodes
  */
-void __init eeh_dev_phb_init(void)
+static int __init eeh_dev_phb_init(void)
 {
        struct pci_controller *phb, *tmp;
 
        list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
                eeh_dev_phb_init_dynamic(phb);
+
+       pr_info("EEH: devices created\n");
+
+       return 0;
 }
+
+core_initcall(eeh_dev_phb_init);
index baf92cd9dfab624c1fa37af234eb9da6f45bacf5..8370ce7d5931f0abff0b5d07d43e49208bd190c5 100644 (file)
@@ -93,7 +93,7 @@ static void eeh_disable_irq(struct pci_dev *dev)
        if (!irq_has_action(dev->irq))
                return;
 
-       edev->mode |= EEH_MODE_IRQ_DISABLED;
+       edev->mode |= EEH_DEV_IRQ_DISABLED;
        disable_irq_nosync(dev->irq);
 }
 
@@ -108,36 +108,43 @@ static void eeh_enable_irq(struct pci_dev *dev)
 {
        struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
 
-       if ((edev->mode) & EEH_MODE_IRQ_DISABLED) {
-               edev->mode &= ~EEH_MODE_IRQ_DISABLED;
+       if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
+               edev->mode &= ~EEH_DEV_IRQ_DISABLED;
                enable_irq(dev->irq);
        }
 }
 
 /**
  * eeh_report_error - Report pci error to each device driver
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  * 
  * Report an EEH error to each device driver, collect up and 
  * merge the device driver responses. Cumulative response 
  * passed back in "userdata".
  */
-static int eeh_report_error(struct pci_dev *dev, void *userdata)
+static void *eeh_report_error(void *data, void *userdata)
 {
+       struct eeh_dev *edev = (struct eeh_dev *)data;
+       struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
        enum pci_ers_result rc, *res = userdata;
        struct pci_driver *driver = dev->driver;
 
+       /* We might not have the associated PCI device,
+        * then we should continue for next one.
+        */
+       if (!dev) return NULL;
+
        dev->error_state = pci_channel_io_frozen;
 
        if (!driver)
-               return 0;
+               return NULL;
 
        eeh_disable_irq(dev);
 
        if (!driver->err_handler ||
            !driver->err_handler->error_detected)
-               return 0;
+               return NULL;
 
        rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 
@@ -145,27 +152,31 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
        if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
        if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
-       return 0;
+       return NULL;
 }
 
 /**
  * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  *
  * Tells each device driver that IO ports, MMIO and config space I/O
  * are now enabled. Collects up and merges the device driver responses.
  * Cumulative response passed back in "userdata".
  */
-static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
+static void *eeh_report_mmio_enabled(void *data, void *userdata)
 {
+       struct eeh_dev *edev = (struct eeh_dev *)data;
+       struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
        enum pci_ers_result rc, *res = userdata;
-       struct pci_driver *driver = dev->driver;
+       struct pci_driver *driver;
 
-       if (!driver ||
+       if (!dev) return NULL;
+
+       if (!(driver = dev->driver) ||
            !driver->err_handler ||
            !driver->err_handler->mmio_enabled)
-               return 0;
+               return NULL;
 
        rc = driver->err_handler->mmio_enabled(dev);
 
@@ -173,12 +184,12 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
        if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
        if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
-       return 0;
+       return NULL;
 }
 
 /**
  * eeh_report_reset - Tell device that slot has been reset
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  *
  * This routine must be called while EEH tries to reset particular
@@ -186,13 +197,15 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
  * some actions, usually to save data the driver needs so that the
  * driver can work again while the device is recovered.
  */
-static int eeh_report_reset(struct pci_dev *dev, void *userdata)
+static void *eeh_report_reset(void *data, void *userdata)
 {
+       struct eeh_dev *edev = (struct eeh_dev *)data;
+       struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
        enum pci_ers_result rc, *res = userdata;
-       struct pci_driver *driver = dev->driver;
+       struct pci_driver *driver;
 
-       if (!driver)
-               return 0;
+       if (!dev || !(driver = dev->driver))
+               return NULL;
 
        dev->error_state = pci_channel_io_normal;
 
@@ -200,7 +213,7 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
 
        if (!driver->err_handler ||
            !driver->err_handler->slot_reset)
-               return 0;
+               return NULL;
 
        rc = driver->err_handler->slot_reset(dev);
        if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -208,82 +221,89 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
        if (*res == PCI_ERS_RESULT_DISCONNECT &&
             rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 
-       return 0;
+       return NULL;
 }
 
 /**
  * eeh_report_resume - Tell device to resume normal operations
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  *
  * This routine must be called to notify the device driver that it
  * could resume so that the device driver can do some initialization
  * to make the recovered device work again.
  */
-static int eeh_report_resume(struct pci_dev *dev, void *userdata)
+static void *eeh_report_resume(void *data, void *userdata)
 {
-       struct pci_driver *driver = dev->driver;
+       struct eeh_dev *edev = (struct eeh_dev *)data;
+       struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+       struct pci_driver *driver;
+
+       if (!dev) return NULL;
 
        dev->error_state = pci_channel_io_normal;
 
-       if (!driver)
-               return 0;
+       if (!(driver = dev->driver))
+               return NULL;
 
        eeh_enable_irq(dev);
 
        if (!driver->err_handler ||
            !driver->err_handler->resume)
-               return 0;
+               return NULL;
 
        driver->err_handler->resume(dev);
 
-       return 0;
+       return NULL;
 }
 
 /**
  * eeh_report_failure - Tell device driver that device is dead.
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  *
  * This informs the device driver that the device is permanently
  * dead, and that no further recovery attempts will be made on it.
  */
-static int eeh_report_failure(struct pci_dev *dev, void *userdata)
+static void *eeh_report_failure(void *data, void *userdata)
 {
-       struct pci_driver *driver = dev->driver;
+       struct eeh_dev *edev = (struct eeh_dev *)data;
+       struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+       struct pci_driver *driver;
+
+       if (!dev) return NULL;
 
        dev->error_state = pci_channel_io_perm_failure;
 
-       if (!driver)
-               return 0;
+       if (!(driver = dev->driver))
+               return NULL;
 
        eeh_disable_irq(dev);
 
        if (!driver->err_handler ||
            !driver->err_handler->error_detected)
-               return 0;
+               return NULL;
 
        driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
 
-       return 0;
+       return NULL;
 }
 
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
- * @edev: PE associated EEH device
+ * @pe: EEH PE
  * @bus: PCI bus corresponding to the isolcated slot
  *
  * This routine must be called to do reset on the indicated PE.
  * During the reset, udev might be invoked because those affected
  * PCI devices will be removed and then added.
  */
-static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 {
-       struct device_node *dn;
        int cnt, rc;
 
        /* pcibios will clear the counter; save the value */
-       cnt = edev->freeze_count;
+       cnt = pe->freeze_count;
 
        if (bus)
                pcibios_remove_pci_devices(bus);
@@ -292,25 +312,13 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
         * Reconfigure bridges and devices. Don't try to bring the system
         * up if the reset failed for some reason.
         */
-       rc = eeh_reset_pe(edev);
+       rc = eeh_reset_pe(pe);
        if (rc)
                return rc;
 
-       /* Walk over all functions on this device. */
-       dn = eeh_dev_to_of_node(edev);
-       if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
-               dn = dn->parent->child;
-
-       while (dn) {
-               struct eeh_dev *pedev = of_node_to_eeh_dev(dn);
-
-               /* On Power4, always true because eeh_pe_config_addr=0 */
-               if (edev->pe_config_addr == pedev->pe_config_addr) {
-                       eeh_ops->configure_bridge(dn);
-                       eeh_restore_bars(pedev);
-               }
-               dn = dn->sibling;
-       }
+       /* Restore PE */
+       eeh_ops->configure_bridge(pe);
+       eeh_pe_restore_bars(pe);
 
        /* Give the system 5 seconds to finish running the user-space
         * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes, 
@@ -322,7 +330,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
                ssleep(5);
                pcibios_add_pci_devices(bus);
        }
-       edev->freeze_count = cnt;
+       pe->freeze_count = cnt;
 
        return 0;
 }
@@ -334,7 +342,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
 
 /**
  * eeh_handle_event - Reset a PCI device after hard lockup.
- * @event: EEH event
+ * @pe: EEH PE
  *
  * While PHB detects address or data parity errors on particular PCI
  * slot, the associated PE will be frozen. Besides, DMA's occurring
@@ -349,69 +357,24 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
  * drivers (which cause a second set of hotplug events to go out to
  * userspace).
  */
-struct eeh_dev *handle_eeh_events(struct eeh_event *event)
+void eeh_handle_event(struct eeh_pe *pe)
 {
-       struct device_node *frozen_dn;
-       struct eeh_dev *frozen_edev;
        struct pci_bus *frozen_bus;
        int rc = 0;
        enum pci_ers_result result = PCI_ERS_RESULT_NONE;
-       const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
-
-       frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev));
-       if (!frozen_dn) {
-               location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL);
-               location = location ? location : "unknown";
-               printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
-                               "for location=%s pci addr=%s\n",
-                       location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev)));
-               return NULL;
-       }
-
-       frozen_bus = pcibios_find_pci_bus(frozen_dn);
-       location = of_get_property(frozen_dn, "ibm,loc-code", NULL);
-       location = location ? location : "unknown";
-
-       /* There are two different styles for coming up with the PE.
-        * In the old style, it was the highest EEH-capable device
-        * which was always an EADS pci bridge.  In the new style,
-        * there might not be any EADS bridges, and even when there are,
-        * the firmware marks them as "EEH incapable". So another
-        * two-step is needed to find the pci bus..
-        */
-       if (!frozen_bus)
-               frozen_bus = pcibios_find_pci_bus(frozen_dn->parent);
 
+       frozen_bus = eeh_pe_bus_get(pe);
        if (!frozen_bus) {
-               printk(KERN_ERR "EEH: Cannot find PCI bus "
-                       "for location=%s dn=%s\n",
-                       location, frozen_dn->full_name);
-               return NULL;
+               pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+                       __func__, pe->phb->global_number, pe->addr);
+               return;
        }
 
-       frozen_edev = of_node_to_eeh_dev(frozen_dn);
-       frozen_edev->freeze_count++;
-       pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev));
-       drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev));
-
-       if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES)
+       pe->freeze_count++;
+       if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
                goto excess_failures;
-
-       printk(KERN_WARNING
-          "EEH: This PCI device has failed %d times in the last hour:\n",
-               frozen_edev->freeze_count);
-
-       if (frozen_edev->pdev) {
-               bus_pci_str = pci_name(frozen_edev->pdev);
-               bus_drv_str = eeh_pcid_name(frozen_edev->pdev);
-               printk(KERN_WARNING
-                       "EEH: Bus location=%s driver=%s pci addr=%s\n",
-                       location, bus_drv_str, bus_pci_str);
-       }
-
-       printk(KERN_WARNING
-               "EEH: Device location=%s driver=%s pci addr=%s\n",
-               location, drv_str, pci_str);
+       pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
+               pe->freeze_count);
 
        /* Walk the various device drivers attached to this slot through
         * a reset sequence, giving each an opportunity to do what it needs
@@ -419,12 +382,12 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
         * status ... if any child can't handle the reset, then the entire
         * slot is dlpar removed and added.
         */
-       pci_walk_bus(frozen_bus, eeh_report_error, &result);
+       eeh_pe_dev_traverse(pe, eeh_report_error, &result);
 
        /* Get the current PCI slot state. This can take a long time,
         * sometimes over 3 seconds for certain systems.
         */
-       rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000);
+       rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
        if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
                printk(KERN_WARNING "EEH: Permanent failure\n");
                goto hard_fail;
@@ -434,14 +397,14 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
         * don't post the error log until after all dev drivers
         * have been informed.
         */
-       eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP);
+       eeh_slot_error_detail(pe, EEH_LOG_TEMP);
 
        /* If all device drivers were EEH-unaware, then shut
         * down all of the device drivers, and hope they
         * go down willingly, without panicing the system.
         */
        if (result == PCI_ERS_RESULT_NONE) {
-               rc = eeh_reset_device(frozen_edev, frozen_bus);
+               rc = eeh_reset_device(pe, frozen_bus);
                if (rc) {
                        printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
                        goto hard_fail;
@@ -450,7 +413,7 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
 
        /* If all devices reported they can proceed, then re-enable MMIO */
        if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-               rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO);
+               rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
 
                if (rc < 0)
                        goto hard_fail;
@@ -458,13 +421,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
                        result = PCI_ERS_RESULT_NEED_RESET;
                } else {
                        result = PCI_ERS_RESULT_NONE;
-                       pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
+                       eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
                }
        }
 
        /* If all devices reported they can proceed, then re-enable DMA */
        if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-               rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA);
+               rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
 
                if (rc < 0)
                        goto hard_fail;
@@ -482,13 +445,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
 
        /* If any device called out for a reset, then reset the slot */
        if (result == PCI_ERS_RESULT_NEED_RESET) {
-               rc = eeh_reset_device(frozen_edev, NULL);
+               rc = eeh_reset_device(pe, NULL);
                if (rc) {
                        printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
                        goto hard_fail;
                }
                result = PCI_ERS_RESULT_NONE;
-               pci_walk_bus(frozen_bus, eeh_report_reset, &result);
+               eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
        }
 
        /* All devices should claim they have recovered by now. */
@@ -499,9 +462,9 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
        }
 
        /* Tell all device drivers that they can resume operations */
-       pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
+       eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
-       return frozen_edev;
+       return;
        
 excess_failures:
        /*
@@ -509,30 +472,26 @@ excess_failures:
         * are due to poorly seated PCI cards. Only 10% or so are
         * due to actual, failed cards.
         */
-       printk(KERN_ERR
-          "EEH: PCI device at location=%s driver=%s pci addr=%s\n"
-               "has failed %d times in the last hour "
-               "and has been permanently disabled.\n"
-               "Please try reseating this device or replacing it.\n",
-               location, drv_str, pci_str, frozen_edev->freeze_count);
+       pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
+              "last hour and has been permanently disabled.\n"
+              "Please try reseating or replacing it.\n",
+               pe->phb->global_number, pe->addr,
+               pe->freeze_count);
        goto perm_error;
 
 hard_fail:
-       printk(KERN_ERR
-          "EEH: Unable to recover from failure of PCI device "
-          "at location=%s driver=%s pci addr=%s\n"
-          "Please try reseating this device or replacing it.\n",
-               location, drv_str, pci_str);
+       pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
+              "Please try reseating or replacing it\n",
+               pe->phb->global_number, pe->addr);
 
 perm_error:
-       eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM);
+       eeh_slot_error_detail(pe, EEH_LOG_PERM);
 
        /* Notify all devices that they're about to go down. */
-       pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
+       eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
 
        /* Shut down the device drivers for good. */
-       pcibios_remove_pci_devices(frozen_bus);
-
-       return NULL;
+       if (frozen_bus)
+               pcibios_remove_pci_devices(frozen_bus);
 }
 
index fb506317ebb09141716d0d9bfe068fe7f953221d..51faaac8abe6c401636326deebd59f4d12457b28 100644 (file)
@@ -57,7 +57,7 @@ static int eeh_event_handler(void * dummy)
 {
        unsigned long flags;
        struct eeh_event *event;
-       struct eeh_dev *edev;
+       struct eeh_pe *pe;
 
        set_task_comm(current, "eehd");
 
@@ -76,28 +76,23 @@ static int eeh_event_handler(void * dummy)
 
        /* Serialize processing of EEH events */
        mutex_lock(&eeh_event_mutex);
-       edev = event->edev;
-       eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
-
-       printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
-              eeh_pci_name(edev->pdev));
+       pe = event->pe;
+       eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+       pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
+               pe->phb->global_number, pe->addr);
 
        set_current_state(TASK_INTERRUPTIBLE);  /* Don't add to load average */
-       edev = handle_eeh_events(event);
-
-       if (edev) {
-               eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
-               pci_dev_put(edev->pdev);
-       }
+       eeh_handle_event(pe);
+       eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 
        kfree(event);
        mutex_unlock(&eeh_event_mutex);
 
        /* If there are no new errors after an hour, clear the counter. */
-       if (edev && edev->freeze_count>0) {
+       if (pe && pe->freeze_count > 0) {
                msleep_interruptible(3600*1000);
-               if (edev->freeze_count>0)
-                       edev->freeze_count--;
+               if (pe->freeze_count > 0)
+                       pe->freeze_count--;
 
        }
 
@@ -119,36 +114,23 @@ static void eeh_thread_launcher(struct work_struct *dummy)
 
 /**
  * eeh_send_failure_event - Generate a PCI error event
- * @edev: EEH device
+ * @pe: EEH PE
  *
  * This routine can be called within an interrupt context;
  * the actual event will be delivered in a normal context
  * (from a workqueue).
  */
-int eeh_send_failure_event(struct eeh_dev *edev)
+int eeh_send_failure_event(struct eeh_pe *pe)
 {
        unsigned long flags;
        struct eeh_event *event;
-       struct device_node *dn = eeh_dev_to_of_node(edev);
-       const char *location;
-
-       if (!mem_init_done) {
-               printk(KERN_ERR "EEH: event during early boot not handled\n");
-               location = of_get_property(dn, "ibm,loc-code", NULL);
-               printk(KERN_ERR "EEH: device node = %s\n", dn->full_name);
-               printk(KERN_ERR "EEH: PCI location = %s\n", location);
-               return 1;
-       }
-       event = kmalloc(sizeof(*event), GFP_ATOMIC);
-       if (event == NULL) {
-               printk(KERN_ERR "EEH: out of memory, event not handled\n");
-               return 1;
-       }
-
-       if (edev->pdev)
-               pci_dev_get(edev->pdev);
 
-       event->edev = edev;
+       event = kzalloc(sizeof(*event), GFP_ATOMIC);
+       if (!event) {
+               pr_err("EEH: out of memory, event not handled\n");
+               return -ENOMEM;
+       }
+       event->pe = pe;
 
        /* We may or may not be called in an interrupt context */
        spin_lock_irqsave(&eeh_eventlist_lock, flags);
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
new file mode 100644 (file)
index 0000000..904123c
--- /dev/null
@@ -0,0 +1,591 @@
+/*
+ * The file intends to implement PE based on the information from
+ * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
+ * All the PEs should be organized as hierarchy tree. The first level
+ * of the tree will be associated to existing PHBs since the particular
+ * PE is only meaningful in one PHB domain.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+static LIST_HEAD(eeh_phb_pe);
+
+/**
+ * eeh_pe_alloc - Allocate PE
+ * @phb: PCI controller
+ * @type: PE type
+ *
+ * Allocate PE instance dynamically.
+ */
+static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
+{
+       struct eeh_pe *pe;
+
+       /* Allocate PHB PE */
+       pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
+       if (!pe) return NULL;
+
+       /* Initialize PHB PE */
+       pe->type = type;
+       pe->phb = phb;
+       INIT_LIST_HEAD(&pe->child_list);
+       INIT_LIST_HEAD(&pe->child);
+       INIT_LIST_HEAD(&pe->edevs);
+
+       return pe;
+}
+
+/**
+ * eeh_phb_pe_create - Create PHB PE
+ * @phb: PCI controller
+ *
+ * The function should be called while the PHB is detected during
+ * system boot or PCI hotplug in order to create PHB PE.
+ */
+int __devinit eeh_phb_pe_create(struct pci_controller *phb)
+{
+       struct eeh_pe *pe;
+
+       /* Allocate PHB PE */
+       pe = eeh_pe_alloc(phb, EEH_PE_PHB);
+       if (!pe) {
+               pr_err("%s: out of memory!\n", __func__);
+               return -ENOMEM;
+       }
+
+       /* Put it into the list */
+       eeh_lock();
+       list_add_tail(&pe->child, &eeh_phb_pe);
+       eeh_unlock();
+
+       pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
+
+       return 0;
+}
+
+/**
+ * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
+ * @phb: PCI controller
+ *
+ * The overall PEs form hierarchy tree. The first layer of the
+ * hierarchy tree is composed of PHB PEs. The function is used
+ * to retrieve the corresponding PHB PE according to the given PHB.
+ */
+static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
+{
+       struct eeh_pe *pe;
+
+       eeh_lock();
+
+       list_for_each_entry(pe, &eeh_phb_pe, child) {
+               /*
+                * Actually, we needn't check the type since
+                * the PE for PHB has been determined when that
+                * was created.
+                */
+               if (pe->type == EEH_PE_PHB &&
+                   pe->phb == phb) {
+                       eeh_unlock();
+                       return pe;
+               }
+       }
+
+       eeh_unlock();
+
+       return NULL;
+}
+
+/**
+ * eeh_pe_next - Retrieve the next PE in the tree
+ * @pe: current PE
+ * @root: root PE
+ *
+ * The function is used to retrieve the next PE in the
+ * hierarchy PE tree.
+ */
+static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
+                                 struct eeh_pe *root)
+{
+       struct list_head *next = pe->child_list.next;
+
+       if (next == &pe->child_list) {
+               while (1) {
+                       if (pe == root)
+                               return NULL;
+                       next = pe->child.next;
+                       if (next != &pe->parent->child_list)
+                               break;
+                       pe = pe->parent;
+               }
+       }
+
+       return list_entry(next, struct eeh_pe, child);
+}
+
+/**
+ * eeh_pe_traverse - Traverse PEs in the specified PHB
+ * @root: root PE
+ * @fn: callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the specified PE and its
+ * child PEs. The traversing is to be terminated once the
+ * callback returns something other than NULL, or no more PEs
+ * to be traversed.
+ */
+static void *eeh_pe_traverse(struct eeh_pe *root,
+                       eeh_traverse_func fn, void *flag)
+{
+       struct eeh_pe *pe;
+       void *ret;
+
+       for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+               ret = fn(pe, flag);
+               if (ret) return ret;
+       }
+
+       return NULL;
+}
+
+/**
+ * eeh_pe_dev_traverse - Traverse the devices from the PE
+ * @root: EEH PE
+ * @fn: function callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the devices of the specified
+ * PE and its child PEs.
+ */
+void *eeh_pe_dev_traverse(struct eeh_pe *root,
+               eeh_traverse_func fn, void *flag)
+{
+       struct eeh_pe *pe;
+       struct eeh_dev *edev;
+       void *ret;
+
+       if (!root) {
+               pr_warning("%s: Invalid PE %p\n", __func__, root);
+               return NULL;
+       }
+
+       /* Traverse root PE */
+       for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+               eeh_pe_for_each_dev(pe, edev) {
+                       ret = fn(edev, flag);
+                       if (ret) return ret;
+               }
+       }
+
+       return NULL;
+}
+
+/**
+ * __eeh_pe_get - Check the PE address
+ * @data: EEH PE
+ * @flag: EEH device
+ *
+ * For one particular PE, it can be identified by PE address
+ * or tranditional BDF address. BDF address is composed of
+ * Bus/Device/Function number. The extra data referred by flag
+ * indicates which type of address should be used.
+ */
+static void *__eeh_pe_get(void *data, void *flag)
+{
+       struct eeh_pe *pe = (struct eeh_pe *)data;
+       struct eeh_dev *edev = (struct eeh_dev *)flag;
+
+       /* Unexpected PHB PE */
+       if (pe->type == EEH_PE_PHB)
+               return NULL;
+
+       /* We prefer PE address */
+       if (edev->pe_config_addr &&
+          (edev->pe_config_addr == pe->addr))
+               return pe;
+
+       /* Try BDF address */
+       if (edev->pe_config_addr &&
+          (edev->config_addr == pe->config_addr))
+               return pe;
+
+       return NULL;
+}
+
+/**
+ * eeh_pe_get - Search PE based on the given address
+ * @edev: EEH device
+ *
+ * Search the corresponding PE based on the specified address which
+ * is included in the eeh device. The function is used to check if
+ * the associated PE has been created against the PE address. It's
+ * notable that the PE address has 2 format: traditional PE address
+ * which is composed of PCI bus/device/function number, or unified
+ * PE address.
+ */
+static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+{
+       struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
+       struct eeh_pe *pe;
+
+       eeh_lock();
+       pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+       eeh_unlock();
+
+       return pe;
+}
+
+/**
+ * eeh_pe_get_parent - Retrieve the parent PE
+ * @edev: EEH device
+ *
+ * The whole PEs existing in the system are organized as hierarchy
+ * tree. The function is used to retrieve the parent PE according
+ * to the parent EEH device.
+ */
+static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
+{
+       struct device_node *dn;
+       struct eeh_dev *parent;
+
+       /*
+        * It might have the case for the indirect parent
+        * EEH device already having associated PE, but
+        * the direct parent EEH device doesn't have yet.
+        */
+       dn = edev->dn->parent;
+       while (dn) {
+               /* We're poking out of PCI territory */
+               if (!PCI_DN(dn)) return NULL;
+
+               parent = of_node_to_eeh_dev(dn);
+               /* We're poking out of PCI territory */
+               if (!parent) return NULL;
+
+               if (parent->pe)
+                       return parent->pe;
+
+               dn = dn->parent;
+       }
+
+       return NULL;
+}
+
+/**
+ * eeh_add_to_parent_pe - Add EEH device to parent PE
+ * @edev: EEH device
+ *
+ * Add EEH device to the parent PE. If the parent PE already
+ * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
+ * we have to create new PE to hold the EEH device and the new
+ * PE will be linked to its parent PE as well.
+ */
+int eeh_add_to_parent_pe(struct eeh_dev *edev)
+{
+       struct eeh_pe *pe, *parent;
+
+       /*
+        * Search the PE has been existing or not according
+        * to the PE address. If that has been existing, the
+        * PE should be composed of PCI bus and its subordinate
+        * components.
+        */
+       pe = eeh_pe_get(edev);
+       if (pe) {
+               if (!edev->pe_config_addr) {
+                       pr_err("%s: PE with addr 0x%x already exists\n",
+                               __func__, edev->config_addr);
+                       return -EEXIST;
+               }
+
+               /* Mark the PE as type of PCI bus */
+               pe->type = EEH_PE_BUS;
+               edev->pe = pe;
+
+               /* Put the edev to PE */
+               list_add_tail(&edev->list, &pe->edevs);
+               pr_debug("EEH: Add %s to Bus PE#%x\n",
+                       edev->dn->full_name, pe->addr);
+
+               return 0;
+       }
+
+       /* Create a new EEH PE */
+       pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+       if (!pe) {
+               pr_err("%s: out of memory!\n", __func__);
+               return -ENOMEM;
+       }
+       pe->addr        = edev->pe_config_addr;
+       pe->config_addr = edev->config_addr;
+
+       /*
+        * Put the new EEH PE into hierarchy tree. If the parent
+        * can't be found, the newly created PE will be attached
+        * to PHB directly. Otherwise, we have to associate the
+        * PE with its parent.
+        */
+       parent = eeh_pe_get_parent(edev);
+       if (!parent) {
+               parent = eeh_phb_pe_get(edev->phb);
+               if (!parent) {
+                       pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
+                               __func__, edev->phb->global_number);
+                       edev->pe = NULL;
+                       kfree(pe);
+                       return -EEXIST;
+               }
+       }
+       pe->parent = parent;
+
+       /*
+        * Put the newly created PE into the child list and
+        * link the EEH device accordingly.
+        */
+       list_add_tail(&pe->child, &parent->child_list);
+       list_add_tail(&edev->list, &pe->edevs);
+       edev->pe = pe;
+       pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
+               edev->dn->full_name, pe->addr, pe->parent->addr);
+
+       return 0;
+}
+
+/**
+ * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
+ * @edev: EEH device
+ *
+ * The PE hierarchy tree might be changed when doing PCI hotplug.
+ * Also, the PCI devices or buses could be removed from the system
+ * during EEH recovery. So we have to call the function remove the
+ * corresponding PE accordingly if necessary.
+ */
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
+{
+       struct eeh_pe *pe, *parent;
+
+       if (!edev->pe) {
+               pr_warning("%s: No PE found for EEH device %s\n",
+                       __func__, edev->dn->full_name);
+               return -EEXIST;
+       }
+
+       /* Remove the EEH device */
+       pe = edev->pe;
+       edev->pe = NULL;
+       list_del(&edev->list);
+
+       /*
+        * Check if the parent PE includes any EEH devices.
+        * If not, we should delete that. Also, we should
+        * delete the parent PE if it doesn't have associated
+        * child PEs and EEH devices.
+        */
+       while (1) {
+               parent = pe->parent;
+               if (pe->type == EEH_PE_PHB)
+                       break;
+
+               if (list_empty(&pe->edevs) &&
+                   list_empty(&pe->child_list)) {
+                       list_del(&pe->child);
+                       kfree(pe);
+               }
+
+               pe = parent;
+       }
+
+       return 0;
+}
+
+/**
+ * __eeh_pe_state_mark - Mark the state for the PE
+ * @data: EEH PE
+ * @flag: state
+ *
+ * The function is used to mark the indicated state for the given
+ * PE. Also, the associated PCI devices will be put into IO frozen
+ * state as well.
+ */
+static void *__eeh_pe_state_mark(void *data, void *flag)
+{
+       struct eeh_pe *pe = (struct eeh_pe *)data;
+       int state = *((int *)flag);
+       struct eeh_dev *tmp;
+       struct pci_dev *pdev;
+
+       /*
+        * Mark the PE with the indicated state. Also,
+        * the associated PCI device will be put into
+        * I/O frozen state to avoid I/O accesses from
+        * the PCI device driver.
+        */
+       pe->state |= state;
+       eeh_pe_for_each_dev(pe, tmp) {
+               pdev = eeh_dev_to_pci_dev(tmp);
+               if (pdev)
+                       pdev->error_state = pci_channel_io_frozen;
+       }
+
+       return NULL;
+}
+
+/**
+ * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * @pe: EEH PE
+ *
+ * EEH error affects the current PE and its child PEs. The function
+ * is used to mark appropriate state for the affected PEs and the
+ * associated devices.
+ */
+void eeh_pe_state_mark(struct eeh_pe *pe, int state)
+{
+       eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+}
+
+/**
+ * __eeh_pe_state_clear - Clear state for the PE
+ * @data: EEH PE
+ * @flag: state
+ *
+ * The function is used to clear the indicated state from the
+ * given PE. Besides, we also clear the check count of the PE
+ * as well.
+ */
+static void *__eeh_pe_state_clear(void *data, void *flag)
+{
+       struct eeh_pe *pe = (struct eeh_pe *)data;
+       int state = *((int *)flag);
+
+       pe->state &= ~state;
+       pe->check_count = 0;
+
+       return NULL;
+}
+
+/**
+ * eeh_pe_state_clear - Clear state for the PE and its children
+ * @pe: PE
+ * @state: state to be cleared
+ *
+ * When the PE and its children has been recovered from error,
+ * we need clear the error state for that. The function is used
+ * for the purpose.
+ */
+void eeh_pe_state_clear(struct eeh_pe *pe, int state)
+{
+       eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
+}
+
+/**
+ * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
+ * @data: EEH device
+ * @flag: Unused
+ *
+ * Loads the PCI configuration space base address registers,
+ * the expansion ROM base address, the latency timer, and etc.
+ * from the saved values in the device node.
+ */
+static void *eeh_restore_one_device_bars(void *data, void *flag)
+{
+       int i;
+       u32 cmd;
+       struct eeh_dev *edev = (struct eeh_dev *)data;
+       struct device_node *dn = eeh_dev_to_of_node(edev);
+
+       for (i = 4; i < 10; i++)
+               eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+       /* 12 == Expansion ROM Address */
+       eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
+
+#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
+#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
+
+       eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+               SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+       eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+               SAVED_BYTE(PCI_LATENCY_TIMER));
+
+       /* max latency, min grant, interrupt pin and line */
+       eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+
+       /*
+        * Restore PERR & SERR bits, some devices require it,
+        * don't touch the other command bits
+        */
+       eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
+       if (edev->config_space[1] & PCI_COMMAND_PARITY)
+               cmd |= PCI_COMMAND_PARITY;
+       else
+               cmd &= ~PCI_COMMAND_PARITY;
+       if (edev->config_space[1] & PCI_COMMAND_SERR)
+               cmd |= PCI_COMMAND_SERR;
+       else
+               cmd &= ~PCI_COMMAND_SERR;
+       eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
+
+       return NULL;
+}
+
+/**
+ * eeh_pe_restore_bars - Restore the PCI config space info
+ * @pe: EEH PE
+ *
+ * This routine performs a recursive walk to the children
+ * of this device as well.
+ */
+void eeh_pe_restore_bars(struct eeh_pe *pe)
+{
+       eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
+}
+
+/**
+ * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the PCI bus according to the given PE. Basically,
+ * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
+ * primary PCI bus will be retrieved. The parent bus will be
+ * returned for BUS PE. However, we don't have associated PCI
+ * bus for DEVICE PE.
+ */
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
+{
+       struct pci_bus *bus = NULL;
+       struct eeh_dev *edev;
+       struct pci_dev *pdev;
+
+       if (pe->type == EEH_PE_PHB) {
+               bus = pe->phb->bus;
+       } else if (pe->type == EEH_PE_BUS) {
+               edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+               pdev = eeh_dev_to_pci_dev(edev);
+               if (pdev)
+                       bus = pdev->bus;
+       }
+
+       return bus;
+}
index c33360ec4f4f4d9acf4cade2360358c115c4829f..19506f935737d2ee7c27a16d1ecc909cc4b6c518 100644 (file)
@@ -129,27 +129,117 @@ static int pseries_eeh_init(void)
                eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
        }
 
+       /* Set EEH probe mode */
+       eeh_probe_mode_set(EEH_PROBE_MODE_DEVTREE);
+
        return 0;
 }
 
+/**
+ * pseries_eeh_of_probe - EEH probe on the given device
+ * @dn: OF node
+ * @flag: Unused
+ *
+ * When EEH module is installed during system boot, all PCI devices
+ * are checked one by one to see if it supports EEH. The function
+ * is introduced for the purpose.
+ */
+static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
+{
+       struct eeh_dev *edev;
+       struct eeh_pe pe;
+       const u32 *class_code, *vendor_id, *device_id;
+       const u32 *regs;
+       int enable = 0;
+       int ret;
+
+       /* Retrieve OF node and eeh device */
+       edev = of_node_to_eeh_dev(dn);
+       if (!of_device_is_available(dn))
+               return NULL;
+
+       /* Retrieve class/vendor/device IDs */
+       class_code = of_get_property(dn, "class-code", NULL);
+       vendor_id  = of_get_property(dn, "vendor-id", NULL);
+       device_id  = of_get_property(dn, "device-id", NULL);
+
+       /* Skip for bad OF node or PCI-ISA bridge */
+       if (!class_code || !vendor_id || !device_id)
+               return NULL;
+       if (dn->type && !strcmp(dn->type, "isa"))
+               return NULL;
+
+       /* Update class code and mode of eeh device */
+       edev->class_code = *class_code;
+       edev->mode = 0;
+
+       /* Retrieve the device address */
+       regs = of_get_property(dn, "reg", NULL);
+       if (!regs) {
+               pr_warning("%s: OF node property %s::reg not found\n",
+                       __func__, dn->full_name);
+               return NULL;
+       }
+
+       /* Initialize the fake PE */
+       memset(&pe, 0, sizeof(struct eeh_pe));
+       pe.phb = edev->phb;
+       pe.config_addr = regs[0];
+
+       /* Enable EEH on the device */
+       ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
+       if (!ret) {
+               edev->config_addr = regs[0];
+               /* Retrieve PE address */
+               edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
+               pe.addr = edev->pe_config_addr;
+
+               /* Some older systems (Power4) allow the ibm,set-eeh-option
+                * call to succeed even on nodes where EEH is not supported.
+                * Verify support explicitly.
+                */
+               ret = eeh_ops->get_state(&pe, NULL);
+               if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
+                       enable = 1;
+
+               if (enable) {
+                       eeh_subsystem_enabled = 1;
+                       eeh_add_to_parent_pe(edev);
+
+                       pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n",
+                               __func__, dn->full_name, pe.phb->global_number,
+                               pe.addr, pe.config_addr);
+               } else if (dn->parent && of_node_to_eeh_dev(dn->parent) &&
+                          (of_node_to_eeh_dev(dn->parent))->pe) {
+                       /* This device doesn't support EEH, but it may have an
+                        * EEH parent, in which case we mark it as supported.
+                        */
+                       edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr;
+                       edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr;
+                       eeh_add_to_parent_pe(edev);
+               }
+       }
+
+       /* Save memory bars */
+       eeh_save_bars(edev);
+
+       return NULL;
+}
+
 /**
  * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
- * @dn: device node
+ * @pe: EEH PE
  * @option: operation to be issued
  *
  * The function is used to control the EEH functionality globally.
  * Currently, following options are support according to PAPR:
  * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
  */
-static int pseries_eeh_set_option(struct device_node *dn, int option)
+static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
 {
        int ret = 0;
-       struct eeh_dev *edev;
-       const u32 *reg;
        int config_addr;
 
-       edev = of_node_to_eeh_dev(dn);
-
        /*
         * When we're enabling or disabling EEH functioality on
         * the particular PE, the PE config address is possibly
@@ -159,15 +249,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
        switch (option) {
        case EEH_OPT_DISABLE:
        case EEH_OPT_ENABLE:
-               reg = of_get_property(dn, "reg", NULL);
-               config_addr = reg[0];
-               break;
-
        case EEH_OPT_THAW_MMIO:
        case EEH_OPT_THAW_DMA:
-               config_addr = edev->config_addr;
-               if (edev->pe_config_addr)
-                       config_addr = edev->pe_config_addr;
+               config_addr = pe->config_addr;
+               if (pe->addr)
+                       config_addr = pe->addr;
                break;
 
        default:
@@ -177,15 +263,15 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
        }
 
        ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
-                       config_addr, BUID_HI(edev->phb->buid),
-                       BUID_LO(edev->phb->buid), option);
+                       config_addr, BUID_HI(pe->phb->buid),
+                       BUID_LO(pe->phb->buid), option);
 
        return ret;
 }
 
 /**
  * pseries_eeh_get_pe_addr - Retrieve PE address
- * @dn: device node
+ * @pe: EEH PE
  *
  * Retrieve the assocated PE address. Actually, there're 2 RTAS
  * function calls dedicated for the purpose. We need implement
@@ -196,14 +282,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
  * It's notable that zero'ed return value means invalid PE config
  * address.
  */
-static int pseries_eeh_get_pe_addr(struct device_node *dn)
+static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
 {
-       struct eeh_dev *edev;
        int ret = 0;
        int rets[3];
 
-       edev = of_node_to_eeh_dev(dn);
-
        if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
                /*
                 * First of all, we need to make sure there has one PE
@@ -211,18 +294,18 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
                 * meaningless.
                 */
                ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
-                               edev->config_addr, BUID_HI(edev->phb->buid),
-                               BUID_LO(edev->phb->buid), 1);
+                               pe->config_addr, BUID_HI(pe->phb->buid),
+                               BUID_LO(pe->phb->buid), 1);
                if (ret || (rets[0] == 0))
                        return 0;
 
                /* Retrieve the associated PE config address */
                ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
-                               edev->config_addr, BUID_HI(edev->phb->buid),
-                               BUID_LO(edev->phb->buid), 0);
+                               pe->config_addr, BUID_HI(pe->phb->buid),
+                               BUID_LO(pe->phb->buid), 0);
                if (ret) {
-                       pr_warning("%s: Failed to get PE address for %s\n",
-                               __func__, dn->full_name);
+                       pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n",
+                               __func__, pe->phb->global_number, pe->config_addr);
                        return 0;
                }
 
@@ -231,11 +314,11 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
 
        if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
                ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
-                               edev->config_addr, BUID_HI(edev->phb->buid),
-                               BUID_LO(edev->phb->buid), 0);
+                               pe->config_addr, BUID_HI(pe->phb->buid),
+                               BUID_LO(pe->phb->buid), 0);
                if (ret) {
-                       pr_warning("%s: Failed to get PE address for %s\n",
-                               __func__, dn->full_name);
+                       pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n",
+                               __func__, pe->phb->global_number, pe->config_addr);
                        return 0;
                }
 
@@ -247,7 +330,7 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
 
 /**
  * pseries_eeh_get_state - Retrieve PE state
- * @dn: PE associated device node
+ * @pe: EEH PE
  * @state: return value
  *
  * Retrieve the state of the specified PE. On RTAS compliant
@@ -258,30 +341,28 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
  * RTAS calls for the purpose, we need to try the new one and back
  * to the old one if the new one couldn't work properly.
  */
-static int pseries_eeh_get_state(struct device_node *dn, int *state)
+static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
 {
-       struct eeh_dev *edev;
        int config_addr;
        int ret;
        int rets[4];
        int result;
 
        /* Figure out PE config address if possible */
-       edev = of_node_to_eeh_dev(dn);
-       config_addr = edev->config_addr;
-       if (edev->pe_config_addr)
-               config_addr = edev->pe_config_addr;
+       config_addr = pe->config_addr;
+       if (pe->addr)
+               config_addr = pe->addr;
 
        if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
                ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
-                               config_addr, BUID_HI(edev->phb->buid),
-                               BUID_LO(edev->phb->buid));
+                               config_addr, BUID_HI(pe->phb->buid),
+                               BUID_LO(pe->phb->buid));
        } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
                /* Fake PE unavailable info */
                rets[2] = 0;
                ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
-                               config_addr, BUID_HI(edev->phb->buid),
-                               BUID_LO(edev->phb->buid));
+                               config_addr, BUID_HI(pe->phb->buid),
+                               BUID_LO(pe->phb->buid));
        } else {
                return EEH_STATE_NOT_SUPPORT;
        }
@@ -333,34 +414,32 @@ static int pseries_eeh_get_state(struct device_node *dn, int *state)
 
 /**
  * pseries_eeh_reset - Reset the specified PE
- * @dn: PE associated device node
+ * @pe: EEH PE
  * @option: reset option
  *
  * Reset the specified PE
  */
-static int pseries_eeh_reset(struct device_node *dn, int option)
+static int pseries_eeh_reset(struct eeh_pe *pe, int option)
 {
-       struct eeh_dev *edev;
        int config_addr;
        int ret;
 
        /* Figure out PE address */
-       edev = of_node_to_eeh_dev(dn);
-       config_addr = edev->config_addr;
-       if (edev->pe_config_addr)
-               config_addr = edev->pe_config_addr;
+       config_addr = pe->config_addr;
+       if (pe->addr)
+               config_addr = pe->addr;
 
        /* Reset PE through RTAS call */
        ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
-                       config_addr, BUID_HI(edev->phb->buid),
-                       BUID_LO(edev->phb->buid), option);
+                       config_addr, BUID_HI(pe->phb->buid),
+                       BUID_LO(pe->phb->buid), option);
 
        /* If fundamental-reset not supported, try hot-reset */
        if (option == EEH_RESET_FUNDAMENTAL &&
            ret == -8) {
                ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
-                               config_addr, BUID_HI(edev->phb->buid),
-                               BUID_LO(edev->phb->buid), EEH_RESET_HOT);
+                               config_addr, BUID_HI(pe->phb->buid),
+                               BUID_LO(pe->phb->buid), EEH_RESET_HOT);
        }
 
        return ret;
@@ -368,13 +447,13 @@ static int pseries_eeh_reset(struct device_node *dn, int option)
 
 /**
  * pseries_eeh_wait_state - Wait for PE state
- * @dn: PE associated device node
+ * @pe: EEH PE
  * @max_wait: maximal period in microsecond
  *
  * Wait for the state of associated PE. It might take some time
  * to retrieve the PE's state.
  */
-static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
+static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 {
        int ret;
        int mwait;
@@ -391,7 +470,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
 #define EEH_STATE_MAX_WAIT_TIME        (300 * 1000)
 
        while (1) {
-               ret = pseries_eeh_get_state(dn, &mwait);
+               ret = pseries_eeh_get_state(pe, &mwait);
 
                /*
                 * If the PE's state is temporarily unavailable,
@@ -426,7 +505,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
 
 /**
  * pseries_eeh_get_log - Retrieve error log
- * @dn: device node
+ * @pe: EEH PE
  * @severity: temporary or permanent error log
  * @drv_log: driver log to be combined with retrieved error log
  * @len: length of driver log
@@ -435,24 +514,22 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
  * Actually, the error will be retrieved through the dedicated
  * RTAS call.
  */
-static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len)
+static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
 {
-       struct eeh_dev *edev;
        int config_addr;
        unsigned long flags;
        int ret;
 
-       edev = of_node_to_eeh_dev(dn);
        spin_lock_irqsave(&slot_errbuf_lock, flags);
        memset(slot_errbuf, 0, eeh_error_buf_size);
 
        /* Figure out the PE address */
-       config_addr = edev->config_addr;
-       if (edev->pe_config_addr)
-               config_addr = edev->pe_config_addr;
+       config_addr = pe->config_addr;
+       if (pe->addr)
+               config_addr = pe->addr;
 
        ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
-                       BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid),
+                       BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
                        virt_to_phys(drv_log), len,
                        virt_to_phys(slot_errbuf), eeh_error_buf_size,
                        severity);
@@ -465,40 +542,38 @@ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_l
 
 /**
  * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
- * @dn: PE associated device node
+ * @pe: EEH PE
  *
  * The function will be called to reconfigure the bridges included
  * in the specified PE so that the mulfunctional PE would be recovered
  * again.
  */
-static int pseries_eeh_configure_bridge(struct device_node *dn)
+static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 {
-       struct eeh_dev *edev;
        int config_addr;
        int ret;
 
        /* Figure out the PE address */
-       edev = of_node_to_eeh_dev(dn);
-       config_addr = edev->config_addr;
-       if (edev->pe_config_addr)
-               config_addr = edev->pe_config_addr;
+       config_addr = pe->config_addr;
+       if (pe->addr)
+               config_addr = pe->addr;
 
        /* Use new configure-pe function, if supported */
        if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
                ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
-                               config_addr, BUID_HI(edev->phb->buid),
-                               BUID_LO(edev->phb->buid));
+                               config_addr, BUID_HI(pe->phb->buid),
+                               BUID_LO(pe->phb->buid));
        } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
                ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
-                               config_addr, BUID_HI(edev->phb->buid),
-                               BUID_LO(edev->phb->buid));
+                               config_addr, BUID_HI(pe->phb->buid),
+                               BUID_LO(pe->phb->buid));
        } else {
                return -EFAULT;
        }
 
        if (ret)
-               pr_warning("%s: Unable to configure bridge %d for %s\n",
-                       __func__, ret, dn->full_name);
+               pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
+                       __func__, pe->phb->global_number, pe->addr, ret);
 
        return ret;
 }
@@ -542,6 +617,8 @@ static int pseries_eeh_write_config(struct device_node *dn, int where, int size,
 static struct eeh_ops pseries_eeh_ops = {
        .name                   = "pseries",
        .init                   = pseries_eeh_init,
+       .of_probe               = pseries_eeh_of_probe,
+       .dev_probe              = NULL,
        .set_option             = pseries_eeh_set_option,
        .get_pe_addr            = pseries_eeh_get_pe_addr,
        .get_state              = pseries_eeh_get_state,
@@ -559,7 +636,21 @@ static struct eeh_ops pseries_eeh_ops = {
  * EEH initialization on pseries platform. This function should be
  * called before any EEH related functions.
  */
-int __init eeh_pseries_init(void)
+static int __init eeh_pseries_init(void)
 {
-       return eeh_ops_register(&pseries_eeh_ops);
+       int ret = -EINVAL;
+
+       if (!machine_is(pseries))
+               return ret;
+
+       ret = eeh_ops_register(&pseries_eeh_ops);
+       if (!ret)
+               pr_info("EEH: pSeries platform initialized\n");
+       else
+               pr_info("EEH: pSeries platform initialization failure (%d)\n",
+                       ret);
+
+       return ret;
 }
+
+early_initcall(eeh_pseries_init);
index 243b3510d70f7ae2e9e3a801eefd2440dd9366db..d37708360f2e472b68d226df3da5a372ecfd406e 100644 (file)
@@ -53,9 +53,6 @@ static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
 EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
 EEH_SHOW_ATTR(eeh_config_addr,     config_addr,     "0x%x");
 EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
-EEH_SHOW_ATTR(eeh_check_count,     check_count,     "%d"  );
-EEH_SHOW_ATTR(eeh_freeze_count,    freeze_count,    "%d"  );
-EEH_SHOW_ATTR(eeh_false_positives, false_positives, "%d"  );
 
 void eeh_sysfs_add_device(struct pci_dev *pdev)
 {
@@ -64,9 +61,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
        rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
        rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
        rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
-       rc += device_create_file(&pdev->dev, &dev_attr_eeh_check_count);
-       rc += device_create_file(&pdev->dev, &dev_attr_eeh_false_positives);
-       rc += device_create_file(&pdev->dev, &dev_attr_eeh_freeze_count);
 
        if (rc)
                printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
@@ -77,8 +71,5 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
        device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
        device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
        device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
-       device_remove_file(&pdev->dev, &dev_attr_eeh_check_count);
-       device_remove_file(&pdev->dev, &dev_attr_eeh_false_positives);
-       device_remove_file(&pdev->dev, &dev_attr_eeh_freeze_count);
 }
 
index bca220f2873c5cb862d4a9785aef836bf02c4765..6153eea27ce7d39ea51fb4d1f210be2a1934bfdf 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/memblock.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>       /* for show_stack */
 #include <linux/string.h>
@@ -41,7 +42,6 @@
 #include <asm/iommu.h>
 #include <asm/pci-bridge.h>
 #include <asm/machdep.h>
-#include <asm/abs_addr.h>
 #include <asm/pSeries_reconfig.h>
 #include <asm/firmware.h>
 #include <asm/tce.h>
@@ -99,7 +99,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
 
        while (npages--) {
                /* can't move this out since we might cross MEMBLOCK boundary */
-               rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
+               rpn = __pa(uaddr) >> TCE_SHIFT;
                *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
 
                uaddr += TCE_PAGE_SIZE;
@@ -148,7 +148,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
        int ret = 0;
        long tcenum_start = tcenum, npages_start = npages;
 
-       rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
+       rpn = __pa(uaddr) >> TCE_SHIFT;
        proto_tce = TCE_PCI_READ;
        if (direction != DMA_TO_DEVICE)
                proto_tce |= TCE_PCI_WRITE;
@@ -217,7 +217,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                __get_cpu_var(tce_page) = tcep;
        }
 
-       rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
+       rpn = __pa(uaddr) >> TCE_SHIFT;
        proto_tce = TCE_PCI_READ;
        if (direction != DMA_TO_DEVICE)
                proto_tce |= TCE_PCI_WRITE;
@@ -237,7 +237,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 
                rc = plpar_tce_put_indirect((u64)tbl->it_index,
                                            (u64)tcenum << 12,
-                                           (u64)virt_to_abs(tcep),
+                                           (u64)__pa(tcep),
                                            limit);
 
                npages -= limit;
@@ -441,7 +441,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
 
                rc = plpar_tce_put_indirect(liobn,
                                            dma_offset,
-                                           (u64)virt_to_abs(tcep),
+                                           (u64)__pa(tcep),
                                            limit);
 
                num_tce -= limit;
index 5f3ef876ded20e0f71da39c963bba0dd19ddb95d..177055d0186b6b69b73f7ec2932def7346d55874 100644 (file)
@@ -31,7 +31,6 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/machdep.h>
-#include <asm/abs_addr.h>
 #include <asm/mmu_context.h>
 #include <asm/iommu.h>
 #include <asm/tlbflush.h>
index 109fdb75578d780a6a1edf162e556be58570f085..d19f4977c83492e1174be5456e1d0ff7ee33ca33 100644 (file)
@@ -210,6 +210,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 {
        struct device_node *dn;
+       struct eeh_dev *edev;
 
        /* Found our PE and assume 8 at that point. */
 
@@ -217,7 +218,10 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
        if (!dn)
                return NULL;
 
-       dn = eeh_find_device_pe(dn);
+       /* Get the top level device in the PE */
+       edev = of_node_to_eeh_dev(dn);
+       edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
+       dn = eeh_dev_to_of_node(edev);
        if (!dn)
                return NULL;
 
@@ -387,12 +391,13 @@ static int check_msix_entries(struct pci_dev *pdev)
        return 0;
 }
 
-static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
 {
        struct pci_dn *pdn;
        int hwirq, virq, i, rc;
        struct msi_desc *entry;
        struct msi_msg msg;
+       int nvec = nvec_in;
 
        pdn = get_pdn(pdev);
        if (!pdn)
@@ -401,11 +406,24 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
        if (type == PCI_CAP_ID_MSIX && check_msix_entries(pdev))
                return -EINVAL;
 
+       /*
+        * Firmware currently refuse any non power of two allocation
+        * so we round up if the quota will allow it.
+        */
+       if (type == PCI_CAP_ID_MSIX) {
+               int m = roundup_pow_of_two(nvec);
+               int quota = msi_quota_for_device(pdev, m);
+
+               if (quota >= m)
+                       nvec = m;
+       }
+
        /*
         * Try the new more explicit firmware interface, if that fails fall
         * back to the old interface. The old interface is known to never
         * return MSI-Xs.
         */
+again:
        if (type == PCI_CAP_ID_MSI) {
                rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec);
 
@@ -417,6 +435,10 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
                rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);
 
        if (rc != nvec) {
+               if (nvec != nvec_in) {
+                       nvec = nvec_in;
+                       goto again;
+               }
                pr_debug("rtas_msi: rtas_change_msi() failed\n");
                return rc;
        }
index 2c6ded29f73d3d5afbcdef6bd9719d777da0cd12..56b864d777ee518f0cf921c7592c41fdcae8792f 100644 (file)
@@ -73,7 +73,7 @@ void __init pSeries_final_fixup(void)
 {
        pSeries_request_regions();
 
-       pci_addr_cache_build();
+       eeh_addr_cache_build();
 }
 
 /*
index 51ecac920dd8e4cb74e17a9086cb893ad57dc6d6..e3cb7ae616587c4fa6239683e409fc38fce9328d 100644 (file)
@@ -388,10 +388,8 @@ static void __init pSeries_setup_arch(void)
 
        /* Find and initialize PCI host bridges */
        init_pci_config_tokens();
-       eeh_pseries_init();
        find_and_init_phbs();
        pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
-       eeh_init();
 
        pSeries_nvram_init();
 
@@ -416,16 +414,20 @@ static int __init pSeries_init_panel(void)
 }
 machine_arch_initcall(pseries, pSeries_init_panel);
 
-static int pseries_set_dabr(unsigned long dabr)
+static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
 {
        return plpar_hcall_norets(H_SET_DABR, dabr);
 }
 
-static int pseries_set_xdabr(unsigned long dabr)
+static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
 {
-       /* We want to catch accesses from kernel and userspace */
-       return plpar_hcall_norets(H_SET_XDABR, dabr,
-                       H_DABRX_KERNEL | H_DABRX_USER);
+       /* Have to set at least one bit in the DABRX according to PAPR */
+       if (dabrx == 0 && dabr == 0)
+               dabrx = DABRX_USER;
+       /* PAPR says we can only set kernel and user bits */
+       dabrx &= DABRX_KERNEL | DABRX_USER;
+
+       return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
 }
 
 #define CMO_CHARACTERISTICS_TOKEN 44
@@ -529,10 +531,10 @@ static void __init pSeries_init_early(void)
        if (firmware_has_feature(FW_FEATURE_LPAR))
                hvc_vio_init_early();
 #endif
-       if (firmware_has_feature(FW_FEATURE_DABR))
-               ppc_md.set_dabr = pseries_set_dabr;
-       else if (firmware_has_feature(FW_FEATURE_XDABR))
+       if (firmware_has_feature(FW_FEATURE_XDABR))
                ppc_md.set_dabr = pseries_set_xdabr;
+       else if (firmware_has_feature(FW_FEATURE_DABR))
+               ppc_md.set_dabr = pseries_set_dabr;
 
        pSeries_cmo_feature_init();
        iommu_init_early_pSeries();
index 4f2680f431b5070fa26aeecacb73edc0345fe453..8ef63a01e34561e3831b7047f1f0a8fd79a4d04b 100644 (file)
@@ -43,7 +43,6 @@
 #include <asm/iommu.h>
 #include <asm/pci-bridge.h>
 #include <asm/machdep.h>
-#include <asm/abs_addr.h>
 #include <asm/cacheflush.h>
 #include <asm/ppc-pci.h>
 
@@ -167,7 +166,7 @@ static int dart_build(struct iommu_table *tbl, long index,
         */
        l = npages;
        while (l--) {
-               rpn = virt_to_abs(uaddr) >> DART_PAGE_SHIFT;
+               rpn = __pa(uaddr) >> DART_PAGE_SHIFT;
 
                *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);
 
@@ -244,7 +243,7 @@ static int __init dart_init(struct device_node *dart_node)
                panic("DART: Cannot map registers!");
 
        /* Map in DART table */
-       dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize);
+       dart_vbase = ioremap(__pa(dart_tablebase), dart_tablesize);
 
        /* Fill initial table */
        for (i = 0; i < dart_tablesize/4; i++)
@@ -463,7 +462,7 @@ void __init alloc_dart_table(void)
         * will blow up an entire large page anyway in the kernel mapping
         */
        dart_tablebase = (unsigned long)
-               abs_to_virt(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));
+               __va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));
 
        printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase);
 }
index 9b49c65ee7a42f6f9d0b8dc436628c34bafb262b..987f441525cb7c646e3424e87403e5ee9ec5af1e 100644 (file)
@@ -740,7 +740,7 @@ static void insert_bpts(void)
 static void insert_cpu_bpts(void)
 {
        if (dabr.enabled)
-               set_dabr(dabr.address | (dabr.enabled & 7));
+               set_dabr(dabr.address | (dabr.enabled & 7), DABRX_ALL);
        if (iabr && cpu_has_feature(CPU_FTR_IABR))
                mtspr(SPRN_IABR, iabr->address
                         | (iabr->enabled & (BP_IABR|BP_IABR_TE)));
@@ -768,7 +768,7 @@ static void remove_bpts(void)
 
 static void remove_cpu_bpts(void)
 {
-       set_dabr(0);
+       set_dabr(0, 0);
        if (cpu_has_feature(CPU_FTR_IABR))
                mtspr(SPRN_IABR, 0);
 }
index d7f179cc2e984ae17d3888db194b309edfa2bb21..638110efae9bede0b58e2f4f2b2315e73d07402d 100644 (file)
@@ -34,7 +34,6 @@
 #include <linux/device.h>
 #include <linux/of.h>
 #include <asm/pSeries_reconfig.h>
-#include <asm/abs_addr.h>
 #include <asm/hvcall.h>
 #include <asm/vio.h>
 
@@ -104,10 +103,10 @@ struct nx_sg *nx_build_sg_list(struct nx_sg *sg_head,
        /* determine the start and end for this address range - slightly
         * different if this is in VMALLOC_REGION */
        if (is_vmalloc_addr(start_addr))
-               sg_addr = phys_to_abs(page_to_phys(vmalloc_to_page(start_addr)))
+               sg_addr = page_to_phys(vmalloc_to_page(start_addr))
                          + offset_in_page(sg_addr);
        else
-               sg_addr = virt_to_abs(sg_addr);
+               sg_addr = __pa(sg_addr);
 
        end_addr = sg_addr + len;
 
@@ -265,17 +264,17 @@ void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function)
        nx_ctx->csbcpb->csb.valid |= NX_CSB_VALID_BIT;
 
        nx_ctx->op.flags = function;
-       nx_ctx->op.csbcpb = virt_to_abs(nx_ctx->csbcpb);
-       nx_ctx->op.in = virt_to_abs(nx_ctx->in_sg);
-       nx_ctx->op.out = virt_to_abs(nx_ctx->out_sg);
+       nx_ctx->op.csbcpb = __pa(nx_ctx->csbcpb);
+       nx_ctx->op.in = __pa(nx_ctx->in_sg);
+       nx_ctx->op.out = __pa(nx_ctx->out_sg);
 
        if (nx_ctx->csbcpb_aead) {
                nx_ctx->csbcpb_aead->csb.valid |= NX_CSB_VALID_BIT;
 
                nx_ctx->op_aead.flags = function;
-               nx_ctx->op_aead.csbcpb = virt_to_abs(nx_ctx->csbcpb_aead);
-               nx_ctx->op_aead.in = virt_to_abs(nx_ctx->in_sg);
-               nx_ctx->op_aead.out = virt_to_abs(nx_ctx->out_sg);
+               nx_ctx->op_aead.csbcpb = __pa(nx_ctx->csbcpb_aead);
+               nx_ctx->op_aead.in = __pa(nx_ctx->in_sg);
+               nx_ctx->op_aead.out = __pa(nx_ctx->out_sg);
        }
 }
 
index d9b0ebcb67d7371946f8a644d26066e3a175df28..8f5290147e8a8d1fbeb5399f230a900c6e023c97 100644 (file)
@@ -220,7 +220,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
                        cq = ERR_PTR(-EAGAIN);
                        goto create_cq_exit4;
                }
-               rpage = virt_to_abs(vpage);
+               rpage = __pa(vpage);
 
                h_ret = hipz_h_register_rpage_cq(adapter_handle,
                                                 my_cq->ipz_cq_handle,
index 818d721fc4489186fee9d7c6b1fa7e79835f412a..90da6747d3954507ff6bf1149509d85eb58f0484 100644 (file)
@@ -101,7 +101,7 @@ int ehca_create_eq(struct ehca_shca *shca,
                if (!vpage)
                        goto create_eq_exit2;
 
-               rpage = virt_to_abs(vpage);
+               rpage = __pa(vpage);
                h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
                                                 eq->ipz_eq_handle,
                                                 &eq->pf,
index b781b2cb062409a2f97a4a045c2c67b7e6641a71..87844869dcc2f76f8a07d9b6397d02a16bfbeeae 100644 (file)
@@ -1136,7 +1136,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
                }
 
                if (rnum > 1) {
-                       rpage = virt_to_abs(kpage);
+                       rpage = __pa(kpage);
                        if (!rpage) {
                                ehca_err(&shca->ib_device, "kpage=%p i=%x",
                                         kpage, i);
@@ -1231,7 +1231,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
                         pginfo->num_kpages, pginfo->num_hwpages, kpage);
                goto ehca_rereg_mr_rereg1_exit1;
        }
-       rpage = virt_to_abs(kpage);
+       rpage = __pa(kpage);
        if (!rpage) {
                ehca_err(&shca->ib_device, "kpage=%p", kpage);
                ret = -EFAULT;
@@ -1525,7 +1525,7 @@ static inline void *ehca_calc_sectbase(int top, int dir, int idx)
        unsigned long ret = idx;
        ret |= dir << EHCA_DIR_INDEX_SHIFT;
        ret |= top << EHCA_TOP_INDEX_SHIFT;
-       return abs_to_virt(ret << SECTION_SIZE_BITS);
+       return __va(ret << SECTION_SIZE_BITS);
 }
 
 #define ehca_bmap_valid(entry) \
@@ -1537,7 +1537,7 @@ static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
 {
        u64 h_ret = 0;
        unsigned long page = 0;
-       u64 rpage = virt_to_abs(kpage);
+       u64 rpage = __pa(kpage);
        int page_count;
 
        void *sectbase = ehca_calc_sectbase(top, dir, idx);
@@ -1553,7 +1553,7 @@ static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
                for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
                     rnum++) {
                        void *pg = sectbase + ((page++) * pginfo->hwpage_size);
-                       kpage[rnum] = virt_to_abs(pg);
+                       kpage[rnum] = __pa(pg);
                }
 
                h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
@@ -1870,9 +1870,8 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
                for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
                        pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
                                << PAGE_SHIFT ;
-                       *kpage = phys_to_abs(pgaddr +
-                                            (pginfo->next_hwpage *
-                                             pginfo->hwpage_size));
+                       *kpage = pgaddr + (pginfo->next_hwpage *
+                                          pginfo->hwpage_size);
                        if ( !(*kpage) ) {
                                ehca_gen_err("pgaddr=%llx "
                                             "chunk->page_list[i]=%llx "
@@ -1927,7 +1926,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
                u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
                if (ehca_debug_level >= 3)
                        ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
-                                    *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
+                                    *(u64 *)__va(pgaddr));
                if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
                        ehca_gen_err("uncontiguous page found pgaddr=%llx "
                                     "prev_pgaddr=%llx page_list_i=%x",
@@ -1962,7 +1961,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
                        if (nr_kpages == kpages_per_hwpage) {
                                pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
                                           << PAGE_SHIFT );
-                               *kpage = phys_to_abs(pgaddr);
+                               *kpage = pgaddr;
                                if ( !(*kpage) ) {
                                        ehca_gen_err("pgaddr=%llx i=%x",
                                                     pgaddr, i);
@@ -1990,13 +1989,11 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
                                                 (pginfo->hwpage_size - 1)) >>
                                                PAGE_SHIFT;
                                        nr_kpages -= pginfo->kpage_cnt;
-                                       *kpage = phys_to_abs(
-                                               pgaddr &
-                                               ~(pginfo->hwpage_size - 1));
+                                       *kpage = pgaddr &
+                                                ~(pginfo->hwpage_size - 1);
                                }
                                if (ehca_debug_level >= 3) {
-                                       u64 val = *(u64 *)abs_to_virt(
-                                               phys_to_abs(pgaddr));
+                                       u64 val = *(u64 *)__va(pgaddr);
                                        ehca_gen_dbg("kpage=%llx chunk_page=%llx "
                                                     "value=%016llx",
                                                     *kpage, pgaddr, val);
@@ -2084,9 +2081,8 @@ static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
                                             pginfo->num_hwpages, i);
                                return -EFAULT;
                        }
-                       *kpage = phys_to_abs(
-                               (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
-                               (pginfo->next_hwpage * pginfo->hwpage_size));
+                       *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
+                                (pginfo->next_hwpage * pginfo->hwpage_size);
                        if ( !(*kpage) && pbuf->addr ) {
                                ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
                                             "next_hwpage=%llx", pbuf->addr,
@@ -2124,8 +2120,8 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
        /* loop over desired page_list entries */
        fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
        for (i = 0; i < number; i++) {
-               *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) +
-                                    pginfo->next_hwpage * pginfo->hwpage_size);
+               *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
+                          pginfo->next_hwpage * pginfo->hwpage_size;
                if ( !(*kpage) ) {
                        ehca_gen_err("*fmrlist=%llx fmrlist=%p "
                                     "next_listelem=%llx next_hwpage=%llx",
@@ -2152,8 +2148,7 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
                        u64 prev = *kpage;
                        /* check if adrs are contiguous */
                        for (j = 1; j < cnt_per_hwpage; j++) {
-                               u64 p = phys_to_abs(fmrlist[j] &
-                                                   ~(pginfo->hwpage_size - 1));
+                               u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
                                if (prev + pginfo->u.fmr.fmr_pgsize != p) {
                                        ehca_gen_err("uncontiguous fmr pages "
                                                     "found prev=%llx p=%llx "
@@ -2388,8 +2383,8 @@ static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
                memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
        }
 
-       start_section = phys_to_abs(pfn * PAGE_SIZE) / EHCA_SECTSIZE;
-       end_section = phys_to_abs((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
+       start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
+       end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
        for (i = start_section; i < end_section; i++) {
                int ret;
                top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
@@ -2508,7 +2503,7 @@ static u64 ehca_map_vaddr(void *caddr)
        if (!ehca_bmap)
                return EHCA_INVAL_ADDR;
 
-       abs_addr = virt_to_abs(caddr);
+       abs_addr = __pa(caddr);
        top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
        if (!ehca_bmap_valid(ehca_bmap->top[top]))
                return EHCA_INVAL_ADDR;
index 964f85520798b81605cf65ea516ef5465e7dc8ad..149393915ae5aa700891ed8422e36fd57b15a16e 100644 (file)
@@ -321,7 +321,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
                        ret = -EINVAL;
                        goto init_qp_queue1;
                }
-               rpage = virt_to_abs(vpage);
+               rpage = __pa(vpage);
 
                h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
                                                 my_qp->ipz_qp_handle,
@@ -1094,7 +1094,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
        ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
                 qp_num, bad_send_wqe_p);
        /* convert wqe pointer to vadr */
-       bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
+       bad_send_wqe_v = __va((u64)bad_send_wqe_p);
        if (ehca_debug_level >= 2)
                ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
        squeue = &my_qp->ipz_squeue;
@@ -1138,7 +1138,7 @@ static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
        /* convert real to abs address */
        wqe_p = wqe_p & (~(1UL << 63));
 
-       wqe_v = abs_to_virt(wqe_p);
+       wqe_v = __va(wqe_p);
 
        if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
                ehca_gen_err("Invalid offset for calculating left cqes "
index fd05f48f6b0b649305014e22967e41bf4dcafefb..47f94984353de9afac694d51806696d5c6d3d7c8 100644 (file)
@@ -135,7 +135,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
                                     mad_hdr->attr_mod);
                }
                for (j = 0; j < send_wr->num_sge; j++) {
-                       u8 *data = (u8 *)abs_to_virt(sge->addr);
+                       u8 *data = __va(sge->addr);
                        ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
                                     "lkey=%x",
                                     idx, j, data, sge->length, sge->lkey);
index 54c0d23bad9294c97f55402bb2910ef209d951b8..d280b12aae6499860a7913ab3f8e2406bd65bc12 100644 (file)
@@ -59,7 +59,6 @@
 #include <linux/device.h>
 
 #include <linux/atomic.h>
-#include <asm/abs_addr.h>
 #include <asm/ibmebus.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
index e6f9cdd94c7a9e0bc5114024974eb47de59e1e6e..2d41d04fd959421df86e7348ea277e8e3a01f521 100644 (file)
@@ -396,7 +396,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
                      struct hipz_query_port *query_port_response_block)
 {
        u64 ret;
-       u64 r_cb = virt_to_abs(query_port_response_block);
+       u64 r_cb = __pa(query_port_response_block);
 
        if (r_cb & (EHCA_PAGESIZE-1)) {
                ehca_gen_err("response block not page aligned");
@@ -438,7 +438,7 @@ u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
 u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
                     struct hipz_query_hca *query_hca_rblock)
 {
-       u64 r_cb = virt_to_abs(query_hca_rblock);
+       u64 r_cb = __pa(query_hca_rblock);
 
        if (r_cb & (EHCA_PAGESIZE-1)) {
                ehca_gen_err("response_block=%p not page aligned",
@@ -577,7 +577,7 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
                                adapter_handle.handle, /* r4 */
                                qp_handle.handle,      /* r5 */
                                update_mask,           /* r6 */
-                               virt_to_abs(mqpcb),    /* r7 */
+                               __pa(mqpcb),           /* r7 */
                                0, 0, 0, 0, 0);
 
        if (ret == H_NOT_ENOUGH_RESOURCES)
@@ -595,7 +595,7 @@ u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
        return ehca_plpar_hcall_norets(H_QUERY_QP,
                                       adapter_handle.handle, /* r4 */
                                       qp_handle.handle,      /* r5 */
-                                      virt_to_abs(qqpcb),    /* r6 */
+                                      __pa(qqpcb),           /* r6 */
                                       0, 0, 0, 0);
 }
 
@@ -787,7 +787,7 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
                if (count > 1) {
                        u64 *kpage;
                        int i;
-                       kpage = (u64 *)abs_to_virt(logical_address_of_page);
+                       kpage = __va(logical_address_of_page);
                        for (i = 0; i < count; i++)
                                ehca_gen_dbg("kpage[%d]=%p",
                                             i, (void *)kpage[i]);
@@ -944,7 +944,7 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
                      void *rblock,
                      unsigned long *byte_count)
 {
-       u64 r_cb = virt_to_abs(rblock);
+       u64 r_cb = __pa(rblock);
 
        if (r_cb & (EHCA_PAGESIZE-1)) {
                ehca_gen_err("rblock not page aligned.");
index 1898d6e7cce5374b48d811c7cf00e041c20d0b05..62c71fadb4d97d10a29f0e05452b63f888fb0814 100644 (file)
@@ -81,7 +81,7 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
 {
        int i;
        for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
-               u64 page = (u64)virt_to_abs(queue->queue_pages[i]);
+               u64 page = __pa(queue->queue_pages[i]);
                if (addr >= page && addr < page + queue->pagesize) {
                        *q_offset = addr - page + i * queue->pagesize;
                        return 0;
index 54ac7ffacb40c9dc6352af3045c836f0162e7b73..7d5a6b40b31cf973a99e7f7232ab52bcd0bddf90 100644 (file)
@@ -45,7 +45,6 @@
 #include <asm/pmac_feature.h>
 #include <asm/smu.h>
 #include <asm/sections.h>
-#include <asm/abs_addr.h>
 #include <asm/uaccess.h>
 
 #define VERSION "0.7"
@@ -502,7 +501,7 @@ int __init smu_init (void)
         * 32 bits value safely
         */
        smu->cmd_buf_abs = (u32)smu_cmdbuf_abs;
-       smu->cmd_buf = (struct smu_cmd_buf *)abs_to_virt(smu_cmdbuf_abs);
+       smu->cmd_buf = __va(smu_cmdbuf_abs);
 
        smu->db_node = of_find_node_by_name(NULL, "smu-doorbell");
        if (smu->db_node == NULL) {
index b8e46cc31e53ecc385c7fcf5740a2bc0f0eed2d6..6be7b9839f35a2ad8ebe567bc76366f80e2896a9 100644 (file)
@@ -35,7 +35,6 @@
 #include <linux/if_vlan.h>
 
 #include <asm/ibmebus.h>
-#include <asm/abs_addr.h>
 #include <asm/io.h>
 
 #define DRV_NAME       "ehea"
index 30f903332e927c58d8ad3f736620640a50163b94..d3a130ccdcc85fa63620a477c7c8e16b5b99cb3f 100644 (file)
@@ -141,7 +141,7 @@ u64 ehea_h_query_ehea_qp(const u64 adapter_handle, const u8 qp_category,
                                       qp_category,             /* R5 */
                                       qp_handle,               /* R6 */
                                       sel_mask,                /* R7 */
-                                      virt_to_abs(cb_addr),    /* R8 */
+                                      __pa(cb_addr),           /* R8 */
                                       0, 0);
 }
 
@@ -415,7 +415,7 @@ u64 ehea_h_modify_ehea_qp(const u64 adapter_handle, const u8 cat,
                                 (u64) cat,                     /* R5 */
                                 qp_handle,                     /* R6 */
                                 sel_mask,                      /* R7 */
-                                virt_to_abs(cb_addr),          /* R8 */
+                                __pa(cb_addr),                 /* R8 */
                                 0, 0, 0, 0);                   /* R9-R12 */
 
        *inv_attr_id = outs[0];
@@ -528,7 +528,7 @@ u64 ehea_h_query_ehea(const u64 adapter_handle, void *cb_addr)
 {
        u64 hret, cb_logaddr;
 
-       cb_logaddr = virt_to_abs(cb_addr);
+       cb_logaddr = __pa(cb_addr);
 
        hret = ehea_plpar_hcall_norets(H_QUERY_HEA,
                                       adapter_handle,          /* R4 */
@@ -545,7 +545,7 @@ u64 ehea_h_query_ehea_port(const u64 adapter_handle, const u16 port_num,
                           void *cb_addr)
 {
        u64 port_info;
-       u64 cb_logaddr = virt_to_abs(cb_addr);
+       u64 cb_logaddr = __pa(cb_addr);
        u64 arr_index = 0;
 
        port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat)
@@ -567,7 +567,7 @@ u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num,
        unsigned long outs[PLPAR_HCALL9_BUFSIZE];
        u64 port_info;
        u64 arr_index = 0;
-       u64 cb_logaddr = virt_to_abs(cb_addr);
+       u64 cb_logaddr = __pa(cb_addr);
 
        port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat)
                  | EHEA_BMASK_SET(H_MEHEAPORT_PN, port_num);
@@ -621,6 +621,6 @@ u64 ehea_h_error_data(const u64 adapter_handle, const u64 ressource_handle,
        return ehea_plpar_hcall_norets(H_ERROR_DATA,
                                       adapter_handle,          /* R4 */
                                       ressource_handle,        /* R5 */
-                                      virt_to_abs(rblock),     /* R6 */
+                                      __pa(rblock),            /* R6 */
                                       0, 0, 0, 0);             /* R7-R12 */
 }
index cb66f574dc97440c56b5c603bff208937a7e5117..27f881758d16c541560d616b22efc16649389241 100644 (file)
@@ -163,7 +163,7 @@ struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter,
                        goto out_kill_hwq;
                }
 
-               rpage = virt_to_abs(vpage);
+               rpage = __pa(vpage);
                hret = ehea_h_register_rpage(adapter->handle,
                                             0, EHEA_CQ_REGISTER_ORIG,
                                             cq->fw_handle, rpage, 1);
@@ -290,7 +290,7 @@ struct ehea_eq *ehea_create_eq(struct ehea_adapter *adapter,
                        goto out_kill_hwq;
                }
 
-               rpage = virt_to_abs(vpage);
+               rpage = __pa(vpage);
 
                hret = ehea_h_register_rpage(adapter->handle, 0,
                                             EHEA_EQ_REGISTER_ORIG,
@@ -395,7 +395,7 @@ static int ehea_qp_alloc_register(struct ehea_qp *qp, struct hw_queue *hw_queue,
                        pr_err("hw_qpageit_get_inc failed\n");
                        goto out_kill_hwq;
                }
-               rpage = virt_to_abs(vpage);
+               rpage = __pa(vpage);
                hret = ehea_h_register_rpage(adapter->handle,
                                             0, h_call_q_selector,
                                             qp->fw_handle, rpage, 1);
@@ -790,7 +790,7 @@ u64 ehea_map_vaddr(void *caddr)
        if (!ehea_bmap)
                return EHEA_INVAL_ADDR;
 
-       index = virt_to_abs(caddr) >> SECTION_SIZE_BITS;
+       index = __pa(caddr) >> SECTION_SIZE_BITS;
        top = (index >> EHEA_TOP_INDEX_SHIFT) & EHEA_INDEX_MASK;
        if (!ehea_bmap->top[top])
                return EHEA_INVAL_ADDR;
@@ -812,7 +812,7 @@ static inline void *ehea_calc_sectbase(int top, int dir, int idx)
        unsigned long ret = idx;
        ret |= dir << EHEA_DIR_INDEX_SHIFT;
        ret |= top << EHEA_TOP_INDEX_SHIFT;
-       return abs_to_virt(ret << SECTION_SIZE_BITS);
+       return __va(ret << SECTION_SIZE_BITS);
 }
 
 static u64 ehea_reg_mr_section(int top, int dir, int idx, u64 *pt,
@@ -822,7 +822,7 @@ static u64 ehea_reg_mr_section(int top, int dir, int idx, u64 *pt,
        void *pg;
        u64 j, m, hret;
        unsigned long k = 0;
-       u64 pt_abs = virt_to_abs(pt);
+       u64 pt_abs = __pa(pt);
 
        void *sectbase = ehea_calc_sectbase(top, dir, idx);
 
@@ -830,7 +830,7 @@ static u64 ehea_reg_mr_section(int top, int dir, int idx, u64 *pt,
 
                for (m = 0; m < EHEA_MAX_RPAGE; m++) {
                        pg = sectbase + ((k++) * EHEA_PAGESIZE);
-                       pt[m] = virt_to_abs(pg);
+                       pt[m] = __pa(pg);
                }
                hret = ehea_h_register_rpage_mr(adapter->handle, mr->handle, 0,
                                                0, pt_abs, EHEA_MAX_RPAGE);
index 467dc38246f93317221239e45a798c867bd1cdb0..6077c43edacc00d9b7d05faf1cbce7c4ef4296d8 100644 (file)
@@ -6304,14 +6304,14 @@ static struct ata_port_info sata_port_info = {
 
 #ifdef CONFIG_PPC_PSERIES
 static const u16 ipr_blocked_processors[] = {
-       PV_NORTHSTAR,
-       PV_PULSAR,
-       PV_POWER4,
-       PV_ICESTAR,
-       PV_SSTAR,
-       PV_POWER4p,
-       PV_630,
-       PV_630p
+       PVR_NORTHSTAR,
+       PVR_PULSAR,
+       PVR_POWER4,
+       PVR_ICESTAR,
+       PVR_SSTAR,
+       PVR_POWER4p,
+       PVR_630,
+       PVR_630p
 };
 
 /**
@@ -6331,7 +6331,7 @@ static int ipr_invalid_adapter(struct ipr_ioa_cfg *ioa_cfg)
 
        if ((ioa_cfg->type == 0x5702) && (ioa_cfg->pdev->revision < 4)) {
                for (i = 0; i < ARRAY_SIZE(ipr_blocked_processors); i++){
-                       if (__is_processor(ipr_blocked_processors[i]))
+                       if (pvr_version_is(ipr_blocked_processors[i]))
                                return 1;
                }
        }
index 2d691eb7c40aa28f30602834902cc48fc1c52dad..f1d4d96a4a0784b95160de07c665635d159ba237 100644 (file)
@@ -245,6 +245,20 @@ static void hvc_port_destruct(struct tty_port *port)
        kfree(hp);
 }
 
+static void hvc_check_console(int index)
+{
+       /* Already enabled, bail out */
+       if (hvc_console.flags & CON_ENABLED)
+               return;
+
+       /* If this index is what the user requested, then register
+        * now (setup won't fail at this point).  It's ok to just
+        * call register again if previously .setup failed.
+        */
+       if (index == hvc_console.index)
+               register_console(&hvc_console);
+}
+
 /*
  * hvc_instantiate() is an early console discovery method which locates
  * consoles * prior to the vio subsystem discovering them.  Hotplugged
@@ -275,12 +289,8 @@ int hvc_instantiate(uint32_t vtermno, int index, const struct hv_ops *ops)
        if (last_hvc < index)
                last_hvc = index;
 
-       /* if this index is what the user requested, then register
-        * now (setup won't fail at this point).  It's ok to just
-        * call register again if previously .setup failed.
-        */
-       if (index == hvc_console.index)
-               register_console(&hvc_console);
+       /* check if we need to re-register the kernel console */
+       hvc_check_console(index);
 
        return 0;
 }
@@ -858,10 +868,15 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data,
                i = ++last_hvc;
 
        hp->index = i;
+       cons_ops[i] = ops;
+       vtermnos[i] = vtermno;
 
        list_add_tail(&(hp->next), &hvc_structs);
        spin_unlock(&hvc_structs_lock);
 
+       /* check if we need to re-register the kernel console */
+       hvc_check_console(i);
+
        return hp;
 }
 EXPORT_SYMBOL_GPL(hvc_alloc);
@@ -874,8 +889,12 @@ int hvc_remove(struct hvc_struct *hp)
        tty = tty_port_tty_get(&hp->port);
 
        spin_lock_irqsave(&hp->lock, flags);
-       if (hp->index < MAX_NR_HVC_CONSOLES)
+       if (hp->index < MAX_NR_HVC_CONSOLES) {
+               console_lock();
                vtermnos[hp->index] = -1;
+               cons_ops[hp->index] = NULL;
+               console_unlock();
+       }
 
        /* Don't whack hp->irq because tty_hangup() will need to free the irq. */
 
index ee307799271a4d3cd362cb131a58490d0d00f18b..070c0ee6864239c5d395f71d37d563edcbbee7be 100644 (file)
@@ -230,6 +230,69 @@ static const struct hv_ops hvterm_hvsi_ops = {
        .tiocmset = hvterm_hvsi_tiocmset,
 };
 
+static void udbg_hvc_putc(char c)
+{
+       int count = -1;
+
+       if (!hvterm_privs[0])
+               return;
+
+       if (c == '\n')
+               udbg_hvc_putc('\r');
+
+       do {
+               switch(hvterm_privs[0]->proto) {
+               case HV_PROTOCOL_RAW:
+                       count = hvterm_raw_put_chars(0, &c, 1);
+                       break;
+               case HV_PROTOCOL_HVSI:
+                       count = hvterm_hvsi_put_chars(0, &c, 1);
+                       break;
+               }
+       } while(count == 0);
+}
+
+static int udbg_hvc_getc_poll(void)
+{
+       int rc = 0;
+       char c;
+
+       if (!hvterm_privs[0])
+               return -1;
+
+       switch(hvterm_privs[0]->proto) {
+       case HV_PROTOCOL_RAW:
+               rc = hvterm_raw_get_chars(0, &c, 1);
+               break;
+       case HV_PROTOCOL_HVSI:
+               rc = hvterm_hvsi_get_chars(0, &c, 1);
+               break;
+       }
+       if (!rc)
+               return -1;
+       return c;
+}
+
+static int udbg_hvc_getc(void)
+{
+       int ch;
+
+       if (!hvterm_privs[0])
+               return -1;
+
+       for (;;) {
+               ch = udbg_hvc_getc_poll();
+               if (ch == -1) {
+                       /* This shouldn't be needed...but... */
+                       volatile unsigned long delay;
+                       for (delay=0; delay < 2000000; delay++)
+                               ;
+               } else {
+                       return ch;
+               }
+       }
+}
+
 static int __devinit hvc_vio_probe(struct vio_dev *vdev,
                                   const struct vio_device_id *id)
 {
@@ -289,6 +352,13 @@ static int __devinit hvc_vio_probe(struct vio_dev *vdev,
                return PTR_ERR(hp);
        dev_set_drvdata(&vdev->dev, hp);
 
+       /* register udbg if it's not there already for console 0 */
+       if (hp->index == 0 && !udbg_putc) {
+               udbg_putc = udbg_hvc_putc;
+               udbg_getc = udbg_hvc_getc;
+               udbg_getc_poll = udbg_hvc_getc_poll;
+       }
+
        return 0;
 }
 
@@ -331,59 +401,6 @@ static void __exit hvc_vio_exit(void)
 }
 module_exit(hvc_vio_exit);
 
-static void udbg_hvc_putc(char c)
-{
-       int count = -1;
-
-       if (c == '\n')
-               udbg_hvc_putc('\r');
-
-       do {
-               switch(hvterm_priv0.proto) {
-               case HV_PROTOCOL_RAW:
-                       count = hvterm_raw_put_chars(0, &c, 1);
-                       break;
-               case HV_PROTOCOL_HVSI:
-                       count = hvterm_hvsi_put_chars(0, &c, 1);
-                       break;
-               }
-       } while(count == 0);
-}
-
-static int udbg_hvc_getc_poll(void)
-{
-       int rc = 0;
-       char c;
-
-       switch(hvterm_priv0.proto) {
-       case HV_PROTOCOL_RAW:
-               rc = hvterm_raw_get_chars(0, &c, 1);
-               break;
-       case HV_PROTOCOL_HVSI:
-               rc = hvterm_hvsi_get_chars(0, &c, 1);
-               break;
-       }
-       if (!rc)
-               return -1;
-       return c;
-}
-
-static int udbg_hvc_getc(void)
-{
-       int ch;
-       for (;;) {
-               ch = udbg_hvc_getc_poll();
-               if (ch == -1) {
-                       /* This shouldn't be needed...but... */
-                       volatile unsigned long delay;
-                       for (delay=0; delay < 2000000; delay++)
-                               ;
-               } else {
-                       return ch;
-               }
-       }
-}
-
 void __init hvc_vio_init_early(void)
 {
        struct device_node *stdout_node;
index 213fbbcf613bdf0467eb97eec7c2c3e5649eb52d..4e292f29bf5dedb710085d291105c75f604e3e74 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 
-#include <asm/abs_addr.h>
 #include <asm/cell-regs.h>
 #include <asm/lv1call.h>
 #include <asm/ps3av.h>
@@ -1141,7 +1140,7 @@ static int __devinit ps3fb_probe(struct ps3_system_bus_device *dev)
         */
        fb_start = ps3fb_videomemory.address + GPU_FB_START;
        info->screen_base = (char __force __iomem *)fb_start;
-       info->fix.smem_start = virt_to_abs(fb_start);
+       info->fix.smem_start = __pa(fb_start);
        info->fix.smem_len = ps3fb_videomemory.size - GPU_FB_START;
 
        info->pseudo_palette = par->pseudo_palette;