Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
authorLinus Torvalds <torvalds@g5.osdl.org>
Sat, 23 Sep 2006 23:49:31 +0000 (16:49 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Sat, 23 Sep 2006 23:49:31 +0000 (16:49 -0700)
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (353 commits)
  [IPV6] ADDRCONF: Mobile IPv6 Home Address support.
  [IPV6] ADDRCONF: Allow non-DAD'able addresses.
  [IPV6] NDISC: Fix is_router flag setting.
  [IPV6] ADDRCONF: Convert addrconf_lock to RCU.
  [IPV6] NDISC: Add proxy_ndp sysctl.
  [IPV6] NDISC: Set per-entry is_router flag in Proxy NA.
  [IPV6] NDISC: Avoid updating neighbor cache for proxied address in receiving NA.
  [IPV6]: Don't forward packets to proxied link-local address.
  [IPV6] NDISC: Handle NDP messages to proxied addresses.
  [NETFILTER]: PPTP conntrack: fix another GRE keymap leak
  [NETFILTER]: PPTP conntrack: fix GRE keymap leak
  [NETFILTER]: PPTP conntrack: fix PPTP_IN_CALL message types
  [NETFILTER]: PPTP conntrack: check call ID before changing state
  [NETFILTER]: PPTP conntrack: clean up debugging cruft
  [NETFILTER]: PPTP conntrack: consolidate header parsing
  [NETFILTER]: PPTP conntrack: consolidate header size checks
  [NETFILTER]: PPTP conntrack: simplify expectation handling
  [NETFILTER]: PPTP conntrack: remove unnecessary cid/pcid header pointers
  [NETFILTER]: PPTP conntrack: fix header definitions
  [NETFILTER]: PPTP conntrack: remove more dead code
  ...

184 files changed:
MAINTAINERS
arch/frv/Makefile
arch/frv/boot/Makefile
arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
arch/i386/kernel/cpu/cpufreq/longhaul.c
arch/i386/kernel/cpu/cpufreq/longhaul.h
arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
arch/powerpc/platforms/powermac/feature.c
arch/powerpc/platforms/powermac/smp.c
arch/sparc/kernel/ebus.c
arch/sparc/kernel/ioport.c
crypto/hmac.c
drivers/char/agp/agp.h
drivers/char/agp/backend.c
drivers/char/agp/efficeon-agp.c
drivers/char/agp/frontend.c
drivers/char/agp/generic.c
drivers/char/agp/intel-agp.c
drivers/char/agp/via-agp.c
drivers/char/briq_panel.c
drivers/char/istallion.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_ondemand.c
drivers/cpufreq/cpufreq_stats.c
drivers/infiniband/Kconfig
drivers/infiniband/Makefile
drivers/infiniband/core/Makefile
drivers/infiniband/core/addr.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/device.c
drivers/infiniband/core/iwcm.c [new file with mode: 0644]
drivers/infiniband/core/iwcm.h [new file with mode: 0644]
drivers/infiniband/core/mad.c
drivers/infiniband/core/mad_priv.h
drivers/infiniband/core/mad_rmpp.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/smi.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucm.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/amso1100/Kbuild [new file with mode: 0644]
drivers/infiniband/hw/amso1100/Kconfig [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_ae.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_ae.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_alloc.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_cm.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_cq.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_intr.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_mm.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_mq.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_mq.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_pd.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_provider.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_provider.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_qp.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_rnic.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_status.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_user.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_vq.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_vq.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_wr.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/Kconfig [new file with mode: 0644]
drivers/infiniband/hw/ehca/Makefile [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_av.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_classes.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_classes_pSeries.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_cq.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_eq.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_hca.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_irq.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_irq.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_iverbs.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_main.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_mcast.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_mrmw.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_mrmw.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_pd.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_qes.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_qp.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_reqs.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_sqp.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_tools.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_uverbs.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/hcp_if.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/hcp_if.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/hcp_phyp.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/hcp_phyp.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/hipz_fns.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/hipz_fns_core.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/hipz_hw.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ipz_pt_fn.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ipz_pt_fn.h [new file with mode: 0644]
drivers/infiniband/hw/ipath/Kconfig
drivers/infiniband/hw/ipath/Makefile
drivers/infiniband/hw/ipath/ipath_common.h
drivers/infiniband/hw/ipath/ipath_cq.c
drivers/infiniband/hw/ipath/ipath_debug.h
drivers/infiniband/hw/ipath/ipath_diag.c
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/ipath/ipath_file_ops.c
drivers/infiniband/hw/ipath/ipath_fs.c
drivers/infiniband/hw/ipath/ipath_ht400.c [deleted file]
drivers/infiniband/hw/ipath/ipath_iba6110.c [new file with mode: 0644]
drivers/infiniband/hw/ipath/ipath_iba6120.c [new file with mode: 0644]
drivers/infiniband/hw/ipath/ipath_init_chip.c
drivers/infiniband/hw/ipath/ipath_intr.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_keys.c
drivers/infiniband/hw/ipath/ipath_layer.c
drivers/infiniband/hw/ipath/ipath_layer.h
drivers/infiniband/hw/ipath/ipath_mad.c
drivers/infiniband/hw/ipath/ipath_mmap.c [new file with mode: 0644]
drivers/infiniband/hw/ipath/ipath_mr.c
drivers/infiniband/hw/ipath/ipath_pe800.c [deleted file]
drivers/infiniband/hw/ipath/ipath_qp.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_registers.h
drivers/infiniband/hw/ipath/ipath_ruc.c
drivers/infiniband/hw/ipath/ipath_srq.c
drivers/infiniband/hw/ipath/ipath_stats.c
drivers/infiniband/hw/ipath/ipath_sysfs.c
drivers/infiniband/hw/ipath/ipath_uc.c
drivers/infiniband/hw/ipath/ipath_ud.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/infiniband/hw/ipath/ipath_verbs.h
drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
drivers/infiniband/hw/ipath/ipath_wc_ppc64.c [new file with mode: 0644]
drivers/infiniband/hw/ipath/verbs_debug.h [deleted file]
drivers/infiniband/hw/mthca/mthca_av.c
drivers/infiniband/hw/mthca/mthca_catas.c
drivers/infiniband/hw/mthca/mthca_cmd.c
drivers/infiniband/hw/mthca/mthca_cq.c
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_mad.c
drivers/infiniband/hw/mthca/mthca_main.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/mthca/mthca_srq.c
drivers/infiniband/hw/mthca/mthca_uar.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/iser/Kconfig
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/macintosh/adbhid.c
drivers/net/lp486e.c
drivers/net/mv643xx_eth.c
drivers/usb/input/hid-core.c
drivers/video/console/fbcon.c
drivers/video/riva/fbdev.c
fs/cifs/CHANGES
fs/cifs/cifs_fs_sb.h
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/cifspdu.h
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/xattr.c
include/asm-generic/audit_change_attr.h
include/asm-generic/audit_dir_write.h
include/asm-ppc/ibm4xx.h
include/linux/kernel.h
include/linux/mm.h
include/rdma/ib_addr.h
include/rdma/ib_sa.h
include/rdma/ib_user_verbs.h
include/rdma/ib_verbs.h
include/rdma/iw_cm.h [new file with mode: 0644]
include/rdma/rdma_cm.h
lib/audit.c
mm/mmap.c
sound/aoa/Kconfig

index ed2a83cfad7c05de65acf1c3cbcbe46df7f2dd57..b08c537018de214f2a7147ebaee36ff428a58dec 100644 (file)
@@ -298,6 +298,14 @@ L: info-linux@geode.amd.com
 W:     http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 S:     Supported
 
+AMSO1100 RNIC DRIVER
+P:     Tom Tucker
+M:     tom@opengridcomputing.com
+P:     Steve Wise
+M:     swise@opengridcomputing.com
+L:     openib-general@openib.org
+S:     Maintained
+
 AOA (Apple Onboard Audio) ALSA DRIVER
 P:     Johannes Berg
 M:     johannes@sipsolutions.net
@@ -991,6 +999,14 @@ EFS FILESYSTEM
 W:     http://aeschi.ch.eu.org/efs/
 S:     Orphan
 
+EHCA (IBM GX bus InfiniBand adapter) DRIVER:
+P:     Hoang-Nam Nguyen
+M:     hnguyen@de.ibm.com
+P:     Christoph Raisch
+M:     raisch@de.ibm.com
+L:     openib-general@openib.org
+S:     Supported
+
 EMU10K1 SOUND DRIVER
 P:     James Courtier-Dutton
 M:     James@superbug.demon.co.uk
index d163747d17c0e5a45f4be201007b498fbf84cc22..038e3a8457e0b7ad1ad91b46299a1662f6a3a726 100644 (file)
@@ -108,11 +108,8 @@ Image: vmlinux
 bootstrap:
        $(Q)$(MAKEBOOT) bootstrap
 
-archmrproper:
-       $(Q)$(MAKE) $(build)=arch/frv/boot mrproper
-
 archclean:
-       $(Q)$(MAKE) $(build)=arch/frv/boot clean
+       $(Q)$(MAKE) $(clean)=arch/frv/boot
 
 archdep: scripts/mkdep symlinks
        $(Q)$(MAKE) $(build)=arch/frv/boot dep
index 5dfc93fd945a9c25dff66c2b2c17c4a09946d6b9..dc6f03824423c76f378c1cc384671e70e2b1503b 100644 (file)
@@ -8,6 +8,8 @@
 # Copyright (C) 1995-2000 Russell King
 #
 
+targets := Image zImage bootpImage
+
 SYSTEM =$(TOPDIR)/$(LINUX)
 
 ZTEXTADDR       = 0x02080000
@@ -66,7 +68,6 @@ zinstall: $(CONFIGURE) zImage
 # miscellany
 #
 mrproper clean:
-       $(RM) Image zImage bootpImage
 #      @$(MAKE) -C compressed clean
 #      @$(MAKE) -C bootp clean
 
index e6ea00edcb5445cad33b065157685005e5185b74..ea19d091fd41e278c2e58122ccb372721b959849 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/compiler.h>
 #include <linux/sched.h>       /* current */
+#include <linux/dmi.h>
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/uaccess.h>
@@ -387,6 +388,33 @@ static int acpi_cpufreq_early_init_acpi(void)
        return acpi_processor_preregister_performance(acpi_perf_data);
 }
 
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+       bios_with_sw_any_bug = 1;
+       return 0;
+}
+
+static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = {
+       {
+               .callback = sw_any_bug_found,
+               .ident = "Supermicro Server X6DLP",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+                       DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+               },
+       },
+       { }
+};
+
 static int
 acpi_cpufreq_cpu_init (
        struct cpufreq_policy   *policy)
@@ -422,8 +450,17 @@ acpi_cpufreq_cpu_init (
         * coordination is required.
         */
        if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-           policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+           policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
                policy->cpus = perf->shared_cpu_map;
+       }
+
+#ifdef CONFIG_SMP
+       dmi_check_system(sw_any_bug_dmi_table);
+       if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+               policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+               policy->cpus = cpu_core_map[cpu];
+       }
+#endif
 
        if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
                acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
index 4f2c3aeef724cf18e7647338650c0f2a3a7508f7..f5cc9f5c9bab3dae9d0cf050507ac774b945037b 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
 #define        CPU_NEHEMIAH    5
 
 static int cpu_model;
-static unsigned int numscales=16, numvscales;
+static unsigned int numscales=16;
 static unsigned int fsb;
-static int minvid, maxvid;
+
+static struct mV_pos *vrm_mV_table;
+static unsigned char *mV_vrm_table;
+struct f_msr {
+       unsigned char vrm;
+};
+static struct f_msr f_msr_table[32];
+
+static unsigned int highest_speed, lowest_speed; /* kHz */
 static unsigned int minmult, maxmult;
 static int can_scale_voltage;
-static int vrmrev;
 static struct acpi_processor *pr = NULL;
 static struct acpi_processor_cx *cx = NULL;
+static int port22_en;
 
 /* Module parameters */
-static int dont_scale_voltage;
-
+static int scale_voltage;
+static int ignore_latency;
 
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
 
@@ -71,7 +80,6 @@ static int dont_scale_voltage;
 /* Clock ratios multiplied by 10 */
 static int clock_ratio[32];
 static int eblcr_table[32];
-static int voltage_table[32];
 static unsigned int highest_speed, lowest_speed; /* kHz */
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
@@ -124,10 +132,9 @@ static int longhaul_get_cpu_mult(void)
 
 /* For processor with BCR2 MSR */
 
-static void do_longhaul1(int cx_address, unsigned int clock_ratio_index)
+static void do_longhaul1(unsigned int clock_ratio_index)
 {
        union msr_bcr2 bcr2;
-       u32 t;
 
        rdmsrl(MSR_VIA_BCR2, bcr2.val);
        /* Enable software clock multiplier */
@@ -136,13 +143,11 @@ static void do_longhaul1(int cx_address, unsigned int clock_ratio_index)
 
        /* Sync to timer tick */
        safe_halt();
-       ACPI_FLUSH_CPU_CACHE();
        /* Change frequency on next halt or sleep */
        wrmsrl(MSR_VIA_BCR2, bcr2.val);
-       /* Invoke C3 */
-       inb(cx_address);
-       /* Dummy op - must do something useless after P_LVL3 read */
-       t = inl(acpi_fadt.xpm_tmr_blk.address);
+       /* Invoke transition */
+       ACPI_FLUSH_CPU_CACHE();
+       halt();
 
        /* Disable software clock multiplier */
        local_irq_disable();
@@ -164,11 +169,16 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
        longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
        longhaul.bits.EnableSoftBusRatio = 1;
 
+       if (can_scale_voltage) {
+               longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm;
+               longhaul.bits.EnableSoftVID = 1;
+       }
+
        /* Sync to timer tick */
        safe_halt();
-       ACPI_FLUSH_CPU_CACHE();
        /* Change frequency on next halt or sleep */
        wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+       ACPI_FLUSH_CPU_CACHE();
        /* Invoke C3 */
        inb(cx_address);
        /* Dummy op - must do something useless after P_LVL3 read */
@@ -227,10 +237,13 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
        outb(0xFF,0xA1);        /* Overkill */
        outb(0xFE,0x21);        /* TMR0 only */
 
-       /* Disable bus master arbitration */
-       if (pr->flags.bm_check) {
+       if (pr->flags.bm_control) {
+               /* Disable bus master arbitration */
                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1,
                                  ACPI_MTX_DO_NOT_LOCK);
+       } else if (port22_en) {
+               /* Disable AGP and PCI arbiters */
+               outb(3, 0x22);
        }
 
        switch (longhaul_version) {
@@ -244,7 +257,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
         */
        case TYPE_LONGHAUL_V1:
        case TYPE_LONGHAUL_V2:
-               do_longhaul1(cx->address, clock_ratio_index);
+               do_longhaul1(clock_ratio_index);
                break;
 
        /*
@@ -259,14 +272,20 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
         * to work in practice.
         */
        case TYPE_POWERSAVER:
+               /* Don't allow wakeup */
+               acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0,
+                                 ACPI_MTX_DO_NOT_LOCK);
                do_powersaver(cx->address, clock_ratio_index);
                break;
        }
 
-       /* Enable bus master arbitration */
-       if (pr->flags.bm_check) {
+       if (pr->flags.bm_control) {
+               /* Enable bus master arbitration */
                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0,
                                  ACPI_MTX_DO_NOT_LOCK);
+       } else if (port22_en) {
+               /* Enable arbiters */
+               outb(0, 0x22);
        }
 
        outb(pic2_mask,0xA1);   /* restore mask */
@@ -446,53 +465,57 @@ static int __init longhaul_get_ranges(void)
 static void __init longhaul_setup_voltagescaling(void)
 {
        union msr_longhaul longhaul;
+       struct mV_pos minvid, maxvid;
+       unsigned int j, speed, pos, kHz_step, numvscales;
 
-       rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-       if (!(longhaul.bits.RevisionID & 1))
+       rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+       if (!(longhaul.bits.RevisionID & 1)) {
+               printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
                return;
+       }
+
+       if (!longhaul.bits.VRMRev) {
+               printk (KERN_INFO PFX "VRM 8.5\n");
+               vrm_mV_table = &vrm85_mV[0];
+               mV_vrm_table = &mV_vrm85[0];
+       } else {
+               printk (KERN_INFO PFX "Mobile VRM\n");
+               vrm_mV_table = &mobilevrm_mV[0];
+               mV_vrm_table = &mV_mobilevrm[0];
+       }
 
-       minvid = longhaul.bits.MinimumVID;
-       maxvid = longhaul.bits.MaximumVID;
-       vrmrev = longhaul.bits.VRMRev;
+       minvid = vrm_mV_table[longhaul.bits.MinimumVID];
+       maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
+       numvscales = maxvid.pos - minvid.pos + 1;
+       kHz_step = (highest_speed - lowest_speed) / numvscales;
 
-       if (minvid == 0 || maxvid == 0) {
+       if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
                printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
                                        "Voltage scaling disabled.\n",
-                                       minvid/1000, minvid%1000, maxvid/1000, maxvid%1000);
+                                       minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000);
                return;
        }
 
-       if (minvid == maxvid) {
+       if (minvid.mV == maxvid.mV) {
                printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
                                "both %d.%03d. Voltage scaling disabled\n",
-                               maxvid/1000, maxvid%1000);
+                               maxvid.mV/1000, maxvid.mV%1000);
                return;
        }
 
-       if (vrmrev==0) {
-               dprintk ("VRM 8.5\n");
-               memcpy (voltage_table, vrm85scales, sizeof(voltage_table));
-               numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25;
-       } else {
-               dprintk ("Mobile VRM\n");
-               memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table));
-               numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5;
+       printk(KERN_INFO PFX "Max VID=%d.%03d  Min VID=%d.%03d, %d possible voltage scales\n",
+               maxvid.mV/1000, maxvid.mV%1000,
+               minvid.mV/1000, minvid.mV%1000,
+               numvscales);
+       
+       j = 0;
+       while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
+               speed = longhaul_table[j].frequency;
+               pos = (speed - lowest_speed) / kHz_step + minvid.pos;
+               f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos];
+               j++;
        }
 
-       /* Current voltage isn't readable at first, so we need to
-          set it to a known value. The spec says to use maxvid */
-       longhaul.bits.RevisionKey = longhaul.bits.RevisionID;   /* FIXME: This is bad. */
-       longhaul.bits.EnableSoftVID = 1;
-       longhaul.bits.SoftVID = maxvid;
-       wrmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-       minvid = voltage_table[minvid];
-       maxvid = voltage_table[maxvid];
-
-       dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n",
-               maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales);
-
        can_scale_voltage = 1;
 }
 
@@ -540,21 +563,33 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
        return 1;
 }
 
+/* VIA don't support PM2 reg, but have something similar */
+static int enable_arbiter_disable(void)
+{
+       struct pci_dev *dev;
+       u8 pci_cmd;
+
+       /* Find PLE133 host bridge */
+       dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, NULL);
+       if (dev != NULL) {
+               /* Enable access to port 0x22 */
+               pci_read_config_byte(dev, 0x78, &pci_cmd);
+               if ( !(pci_cmd & 1<<7) ) {
+                       pci_cmd |= 1<<7;
+                       pci_write_config_byte(dev, 0x78, pci_cmd);
+               }
+               return 1;
+       }
+       return 0;
+}
+
 static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 {
        struct cpuinfo_x86 *c = cpu_data;
        char *cpuname=NULL;
        int ret;
 
-       /* Check ACPI support for C3 state */
-       acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
-                        &longhaul_walk_callback, NULL, (void *)&pr);
-       if (pr == NULL) goto err_acpi;
-
-       cx = &pr->power.states[ACPI_STATE_C3];
-       if (cx->address == 0 || cx->latency > 1000) goto err_acpi;
-
-       /* Now check what we have on this motherboard */
+       /* Check what we have on this motherboard */
        switch (c->x86_model) {
        case 6:
                cpu_model = CPU_SAMUEL;
@@ -636,12 +671,36 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
                break;
        };
 
+       /* Find ACPI data for processor */
+       acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+                           &longhaul_walk_callback, NULL, (void *)&pr);
+       if (pr == NULL)
+               goto err_acpi;
+
+       if (longhaul_version == TYPE_POWERSAVER) {
+               /* Check ACPI support for C3 state */
+               cx = &pr->power.states[ACPI_STATE_C3];
+               if (cx->address == 0 ||
+                  (cx->latency > 1000 && ignore_latency == 0) )
+                       goto err_acpi;
+
+       } else {
+               /* Check ACPI support for bus master arbiter disable */
+               if (!pr->flags.bm_control) {
+                       if (!enable_arbiter_disable()) {
+                               printk(KERN_ERR PFX "No ACPI support. No VT8601 host bridge. Aborting.\n");
+                               return -ENODEV;
+                       } else
+                               port22_en = 1;
+               }
+       }
+
        ret = longhaul_get_ranges();
        if (ret != 0)
                return ret;
 
        if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) &&
-                (dont_scale_voltage==0))
+                (scale_voltage != 0))
                longhaul_setup_voltagescaling();
 
        policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
@@ -729,8 +788,10 @@ static void __exit longhaul_exit(void)
        kfree(longhaul_table);
 }
 
-module_param (dont_scale_voltage, int, 0644);
-MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor");
+module_param (scale_voltage, int, 0644);
+MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
+module_param(ignore_latency, int, 0644);
+MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test");
 
 MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
@@ -738,4 +799,3 @@ MODULE_LICENSE ("GPL");
 
 late_initcall(longhaul_init);
 module_exit(longhaul_exit);
-
index d3a95d77ee85014f54a79dcb59224c4a6de14770..bc4682aad69b506acd909307ef6eba787013fd64 100644 (file)
@@ -450,17 +450,45 @@ static int __initdata nehemiah_c_eblcr[32] = {
  * Voltage scales. Div/Mod by 1000 to get actual voltage.
  * Which scale to use depends on the VRM type in use.
  */
-static int __initdata vrm85scales[32] = {
-       1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700,
-       1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300,
-       1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725,
-       1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325,
+
+struct mV_pos {
+       unsigned short mV;
+       unsigned short pos;
+};
+
+static struct mV_pos __initdata vrm85_mV[32] = {
+       {1250, 8},      {1200, 6},      {1150, 4},      {1100, 2},
+       {1050, 0},      {1800, 30},     {1750, 28},     {1700, 26},
+       {1650, 24},     {1600, 22},     {1550, 20},     {1500, 18},
+       {1450, 16},     {1400, 14},     {1350, 12},     {1300, 10},
+       {1275, 9},      {1225, 7},      {1175, 5},      {1125, 3},
+       {1075, 1},      {1825, 31},     {1775, 29},     {1725, 27},
+       {1675, 25},     {1625, 23},     {1575, 21},     {1525, 19},
+       {1475, 17},     {1425, 15},     {1375, 13},     {1325, 11}
+};
+
+static unsigned char __initdata mV_vrm85[32] = {
+       0x04,   0x14,   0x03,   0x13,   0x02,   0x12,   0x01,   0x11,
+       0x00,   0x10,   0x0f,   0x1f,   0x0e,   0x1e,   0x0d,   0x1d,
+       0x0c,   0x1c,   0x0b,   0x1b,   0x0a,   0x1a,   0x09,   0x19,
+       0x08,   0x18,   0x07,   0x17,   0x06,   0x16,   0x05,   0x15
+};
+
+static struct mV_pos __initdata mobilevrm_mV[32] = {
+       {1750, 31},     {1700, 30},     {1650, 29},     {1600, 28},
+       {1550, 27},     {1500, 26},     {1450, 25},     {1400, 24},
+       {1350, 23},     {1300, 22},     {1250, 21},     {1200, 20},
+       {1150, 19},     {1100, 18},     {1050, 17},     {1000, 16},
+       {975, 15},      {950, 14},      {925, 13},      {900, 12},
+       {875, 11},      {850, 10},      {825, 9},       {800, 8},
+       {775, 7},       {750, 6},       {725, 5},       {700, 4},
+       {675, 3},       {650, 2},       {625, 1},       {600, 0}
 };
 
-static int __initdata mobilevrmscales[32] = {
-       2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
-       1600, 1550, 1500, 1450, 1500, 1350, 1300, -1,
-       1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
-       1075, 1050, 1025, 1000, 975, 950, 925, -1,
+static unsigned char __initdata mV_mobilevrm[32] = {
+       0x1f,   0x1e,   0x1d,   0x1c,   0x1b,   0x1a,   0x19,   0x18,
+       0x17,   0x16,   0x15,   0x14,   0x13,   0x12,   0x11,   0x10,
+       0x0f,   0x0e,   0x0d,   0x0c,   0x0b,   0x0a,   0x09,   0x08,
+       0x07,   0x06,   0x05,   0x04,   0x03,   0x02,   0x01,   0x00
 };
 
index b77f1358bd79e341bd3d2a9a8606819bb1a1f01a..7a9325349e949e8e5be8f7bb3c25e3578fd6d613 100644 (file)
@@ -23,6 +23,7 @@
 
 #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <acpi/processor.h>
 #endif
 
@@ -377,6 +378,35 @@ static int centrino_cpu_early_init_acpi(void)
        return 0;
 }
 
+
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+       bios_with_sw_any_bug = 1;
+       return 0;
+}
+
+
+static struct dmi_system_id sw_any_bug_dmi_table[] = {
+       {
+               .callback = sw_any_bug_found,
+               .ident = "Supermicro Server X6DLP",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+                       DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+               },
+       },
+       { }
+};
+
+
 /*
  * centrino_cpu_init_acpi - register with ACPI P-States library
  *
@@ -398,14 +428,24 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
                dprintk(PFX "obtaining ACPI data failed\n");
                return -EIO;
        }
+
        policy->shared_type = p->shared_type;
        /*
         * Will let policy->cpus know about dependency only when software 
         * coordination is required.
         */
        if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-           policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+           policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
                policy->cpus = p->shared_cpu_map;
+       }
+
+#ifdef CONFIG_SMP
+       dmi_check_system(sw_any_bug_dmi_table);
+       if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+               policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+               policy->cpus = cpu_core_map[cpu];
+       }
+#endif
 
        /* verify the acpi_data */
        if (p->state_count <= 1) {
index 13fcaf5b17960edbec0b8740550fa325006888bd..e49621be66400103b0e69c0f22d1e0d380c4e72f 100644 (file)
@@ -1058,8 +1058,8 @@ core99_reset_cpu(struct device_node *node, long param, long value)
        if (np == NULL)
                return -ENODEV;
        for (np = np->child; np != NULL; np = np->sibling) {
-               u32 *num = get_property(np, "reg", NULL);
-               u32 *rst = get_property(np, "soft-reset", NULL);
+               const u32 *num = get_property(np, "reg", NULL);
+               const u32 *rst = get_property(np, "soft-reset", NULL);
                if (num == NULL || rst == NULL)
                        continue;
                if (param == *num) {
index 653eeb64d1e28e448ef67191b7f38de13edbfa89..1949b657b0926158cf10fc2f226fc43b4600b695 100644 (file)
@@ -702,7 +702,7 @@ static void __init smp_core99_setup(int ncpus)
        /* GPIO based HW sync on ppc32 Core99 */
        if (pmac_tb_freeze == NULL && !machine_is_compatible("MacRISC4")) {
                struct device_node *cpu;
-               u32 *tbprop = NULL;
+               const u32 *tbprop = NULL;
 
                core99_tb_gpio = KL_GPIO_TB_ENABLE;     /* default value */
                cpu = of_find_node_by_type(NULL, "cpu");
index 81c0cbd96ff01f271f888892aa79543ffa417661..75ac24d229b1dda145b04ac1f68f317e050e47b3 100644 (file)
@@ -277,7 +277,7 @@ void __init ebus_init(void)
        struct pci_dev *pdev;
        struct pcidev_cookie *cookie;
        struct device_node *dp;
-       unsigned long addr, *base;
+       struct resource *p;
        unsigned short pci_command;
        int len, reg, nreg;
        int num_ebus = 0;
@@ -321,13 +321,12 @@ void __init ebus_init(void)
                }
                nreg = len / sizeof(struct linux_prom_pci_registers);
 
-               base = &ebus->self->resource[0].start;
+               p = &ebus->self->resource[0];
                for (reg = 0; reg < nreg; reg++) {
                        if (!(regs[reg].which_io & 0x03000000))
                                continue;
 
-                       addr = regs[reg].phys_lo;
-                       *base++ = addr;
+                       (p++)->start = regs[reg].phys_lo;
                }
 
                ebus->ofdev.node = dp;
index 8654b446ac9ede55439c4639158d296213ae3c2e..d33f8a07ccaca4c26d1f3aebd38a771ba93816f7 100644 (file)
@@ -508,6 +508,7 @@ void __init sbus_arch_bus_ranges_init(struct device_node *pn, struct sbus_bus *s
 
 void __init sbus_setup_iommu(struct sbus_bus *sbus, struct device_node *dp)
 {
+#ifndef CONFIG_SUN4
        struct device_node *parent = dp->parent;
 
        if (sparc_cpu_model != sun4d &&
@@ -524,6 +525,7 @@ void __init sbus_setup_iommu(struct sbus_bus *sbus, struct device_node *dp)
 
                iounit_init(dp->node, parent->node, sbus);
        }
+#endif
 }
 
 void __init sbus_setup_arch_props(struct sbus_bus *sbus, struct device_node *dp)
index f403b6946047161db8ea6a11e023f51d9598c0cb..b521bcd2b2c6036f155bc23641d41df877baca18 100644 (file)
@@ -92,13 +92,17 @@ static int hmac_init(struct hash_desc *pdesc)
        struct hmac_ctx *ctx = align_ptr(ipad + bs * 2 + ds, sizeof(void *));
        struct hash_desc desc;
        struct scatterlist tmp;
+       int err;
 
        desc.tfm = ctx->child;
        desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
        sg_set_buf(&tmp, ipad, bs);
 
-       return unlikely(crypto_hash_init(&desc)) ?:
-              crypto_hash_update(&desc, &tmp, 1);
+       err = crypto_hash_init(&desc);
+       if (unlikely(err))
+               return err;
+
+       return crypto_hash_update(&desc, &tmp, bs);
 }
 
 static int hmac_update(struct hash_desc *pdesc,
@@ -123,13 +127,17 @@ static int hmac_final(struct hash_desc *pdesc, u8 *out)
        struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *));
        struct hash_desc desc;
        struct scatterlist tmp;
+       int err;
 
        desc.tfm = ctx->child;
        desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
        sg_set_buf(&tmp, opad, bs + ds);
 
-       return unlikely(crypto_hash_final(&desc, digest)) ?:
-              crypto_hash_digest(&desc, &tmp, bs + ds, out);
+       err = crypto_hash_final(&desc, digest);
+       if (unlikely(err))
+               return err;
+
+       return crypto_hash_digest(&desc, &tmp, bs + ds, out);
 }
 
 static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
@@ -145,6 +153,7 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
        struct hash_desc desc;
        struct scatterlist sg1[2];
        struct scatterlist sg2[1];
+       int err;
 
        desc.tfm = ctx->child;
        desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -154,8 +163,11 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
        sg1[1].length = 0;
        sg_set_buf(sg2, opad, bs + ds);
 
-       return unlikely(crypto_hash_digest(&desc, sg1, nbytes + bs, digest)) ?:
-              crypto_hash_digest(&desc, sg2, bs + ds, out);
+       err = crypto_hash_digest(&desc, sg1, nbytes + bs, digest);
+       if (unlikely(err))
+               return err;
+
+       return crypto_hash_digest(&desc, sg2, bs + ds, out);
 }
 
 static int hmac_init_tfm(struct crypto_tfm *tfm)
index 3c623b67ea1cd87de906a65c6ccb6655cc9f46ab..8b3317fd46c9a8fdb9c6ce02f390639fbb61e315 100644 (file)
@@ -117,7 +117,7 @@ struct agp_bridge_driver {
 };
 
 struct agp_bridge_data {
-       struct agp_version *version;
+       const struct agp_version *version;
        struct agp_bridge_driver *driver;
        struct vm_operations_struct *vm_ops;
        void *previous_size;
index 509adc403250a8126f72dac666493a680d06f8ad..d59e037ddd1234e395b2a277f0c34c45cacb03b1 100644 (file)
@@ -44,7 +44,7 @@
  * past 0.99 at all due to some boolean logic error. */
 #define AGPGART_VERSION_MAJOR 0
 #define AGPGART_VERSION_MINOR 101
-static struct agp_version agp_current_version =
+static const struct agp_version agp_current_version =
 {
        .major = AGPGART_VERSION_MAJOR,
        .minor = AGPGART_VERSION_MINOR,
index b788b0a3bbf333e7b1a26d49525ea972f19f2e33..30f730ff81c1717c82eb3f50a8d94a1194255350 100644 (file)
@@ -337,13 +337,6 @@ static struct agp_bridge_driver efficeon_driver = {
        .agp_destroy_page       = agp_generic_destroy_page,
 };
 
-
-static int agp_efficeon_resume(struct pci_dev *pdev)
-{
-       printk(KERN_DEBUG PFX "agp_efficeon_resume()\n");
-       return efficeon_configure();
-}
-
 static int __devinit agp_efficeon_probe(struct pci_dev *pdev,
                                     const struct pci_device_id *ent)
 {
@@ -414,11 +407,18 @@ static void __devexit agp_efficeon_remove(struct pci_dev *pdev)
        agp_put_bridge(bridge);
 }
 
+#ifdef CONFIG_PM
 static int agp_efficeon_suspend(struct pci_dev *dev, pm_message_t state)
 {
        return 0;
 }
 
+static int agp_efficeon_resume(struct pci_dev *pdev)
+{
+       printk(KERN_DEBUG PFX "agp_efficeon_resume()\n");
+       return efficeon_configure();
+}
+#endif
 
 static struct pci_device_id agp_efficeon_pci_table[] = {
        {
@@ -439,8 +439,10 @@ static struct pci_driver agp_efficeon_pci_driver = {
        .id_table       = agp_efficeon_pci_table,
        .probe          = agp_efficeon_probe,
        .remove         = agp_efficeon_remove,
+#ifdef CONFIG_PM
        .suspend        = agp_efficeon_suspend,
        .resume         = agp_efficeon_resume,
+#endif
 };
 
 static int __init agp_efficeon_init(void)
index d9c5a9142ad1daa4a5e5de83e11175e7e0863e28..0f2ed2aa2d815039d7cd6cd3dc26a20a4e15791e 100644 (file)
@@ -151,35 +151,12 @@ static void agp_add_seg_to_client(struct agp_client *client,
        client->segments = seg;
 }
 
-/* Originally taken from linux/mm/mmap.c from the array
- * protection_map.
- * The original really should be exported to modules, or
- * some routine which does the conversion for you
- */
-
-static const pgprot_t my_protect_map[16] =
-{
-       __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
-       __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
-};
-
 static pgprot_t agp_convert_mmap_flags(int prot)
 {
-#define _trans(x,bit1,bit2) \
-((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
-
        unsigned long prot_bits;
-       pgprot_t temp;
-
-       prot_bits = _trans(prot, PROT_READ, VM_READ) |
-           _trans(prot, PROT_WRITE, VM_WRITE) |
-           _trans(prot, PROT_EXEC, VM_EXEC);
-
-       prot_bits |= VM_SHARED;
 
-       temp = my_protect_map[prot_bits & 0x0000000f];
-
-       return temp;
+       prot_bits = calc_vm_prot_bits(prot) | VM_SHARED;
+       return vm_get_page_prot(prot_bits);
 }
 
 static int agp_create_segment(struct agp_client *client, struct agp_region *region)
index cc5ea347a8a7899a9e3228289697a2cd88378839..0dcdb363923fed70e4fa9f964c563a53bcf4f67c 100644 (file)
@@ -568,25 +568,34 @@ static void agp_v3_parse_one(u32 *requested_mode, u32 *bridge_agpstat, u32 *vga_
                *bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
                goto done;
 
+       } else if (*requested_mode & AGPSTAT3_4X) {
+               *bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+               *bridge_agpstat |= AGPSTAT3_4X;
+               goto done;
+
        } else {
 
                /*
-                * If we didn't specify AGPx8, we can only do x4.
-                * If the hardware can't do x4, we're up shit creek, and never
-                *  should have got this far.
+                * If we didn't specify an AGP mode, we see if both
+                * the graphics card, and the bridge can do x8, and use if so.
+                * If not, we fall back to x4 mode.
                 */
-               *bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
-               if ((*bridge_agpstat & AGPSTAT3_4X) && (*vga_agpstat & AGPSTAT3_4X))
-                       *bridge_agpstat |= AGPSTAT3_4X;
-               else {
-                       printk(KERN_INFO PFX "Badness. Don't know which AGP mode to set. "
-                                                       "[bridge_agpstat:%x vga_agpstat:%x fell back to:- bridge_agpstat:%x vga_agpstat:%x]\n",
-                                                       origbridge, origvga, *bridge_agpstat, *vga_agpstat);
-                       if (!(*bridge_agpstat & AGPSTAT3_4X))
-                               printk(KERN_INFO PFX "Bridge couldn't do AGP x4.\n");
-                       if (!(*vga_agpstat & AGPSTAT3_4X))
-                               printk(KERN_INFO PFX "Graphic card couldn't do AGP x4.\n");
-                       return;
+               if ((*bridge_agpstat & AGPSTAT3_8X) && (*vga_agpstat & AGPSTAT3_8X)) {
+                       printk(KERN_INFO PFX "No AGP mode specified. Setting to highest mode supported by bridge & card (x8).\n");
+                       *bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
+                       *vga_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
+               } else {
+                       printk(KERN_INFO PFX "Fell back to AGPx4 mode because");
+                       if (!(*bridge_agpstat & AGPSTAT3_8X)) {
+                               printk("bridge couldn't do x8. bridge_agpstat:%x (orig=%x)\n", *bridge_agpstat, origbridge);
+                               *bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+                               *bridge_agpstat |= AGPSTAT3_4X;
+                       }
+                       if (!(*vga_agpstat & AGPSTAT3_8X)) {
+                               printk("graphics card couldn't do x8. vga_agpstat:%x (orig=%x)\n", *vga_agpstat, origvga);
+                               *vga_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+                               *vga_agpstat |= AGPSTAT3_4X;
+                       }
                }
        }
 
index 61ac3809f997cbbb9932a8eb3f0428d63195a2d4..d1ede7db5a12f46027174a41ae52c2692b180bf9 100644 (file)
@@ -2,14 +2,6 @@
  * Intel AGPGART routines.
  */
 
-/*
- * Intel(R) 855GM/852GM and 865G support added by David Dawes
- * <dawes@tungstengraphics.com>.
- *
- * Intel(R) 915G/915GM support added by Alan Hourihane
- * <alanh@tungstengraphics.com>.
- */
-
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/init.h>
@@ -17,6 +9,21 @@
 #include <linux/agp_backend.h>
 #include "agp.h"
 
+#define PCI_DEVICE_ID_INTEL_82946GZ_HB      0x2970
+#define PCI_DEVICE_ID_INTEL_82946GZ_IG      0x2972
+#define PCI_DEVICE_ID_INTEL_82965G_1_HB     0x2980
+#define PCI_DEVICE_ID_INTEL_82965G_1_IG     0x2982
+#define PCI_DEVICE_ID_INTEL_82965Q_HB       0x2990
+#define PCI_DEVICE_ID_INTEL_82965Q_IG       0x2992
+#define PCI_DEVICE_ID_INTEL_82965G_HB       0x29A0
+#define PCI_DEVICE_ID_INTEL_82965G_IG       0x29A2
+
+#define IS_I965 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82946GZ_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_1_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965Q_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_HB)
+
+
 /* Intel 815 register */
 #define INTEL_815_APCONT       0x51
 #define INTEL_815_ATTBASE_MASK ~0x1FFFFFFF
@@ -40,6 +47,8 @@
 #define I915_GMCH_GMS_STOLEN_48M       (0x6 << 4)
 #define I915_GMCH_GMS_STOLEN_64M       (0x7 << 4)
 
+/* Intel 965G registers */
+#define I965_MSAC 0x62
 
 /* Intel 7505 registers */
 #define INTEL_I7505_APSIZE     0x74
@@ -354,6 +363,7 @@ static struct aper_size_info_fixed intel_i830_sizes[] =
        /* The 64M mode still requires a 128k gatt */
        {64, 16384, 5},
        {256, 65536, 6},
+       {512, 131072, 7},
 };
 
 static struct _intel_i830_private {
@@ -377,7 +387,11 @@ static void intel_i830_init_gtt_entries(void)
        /* We obtain the size of the GTT, which is also stored (for some
         * reason) at the top of stolen memory. Then we add 4KB to that
         * for the video BIOS popup, which is also stored in there. */
-       size = agp_bridge->driver->fetch_size() + 4;
+
+       if (IS_I965)
+               size = 512 + 4;
+       else
+               size = agp_bridge->driver->fetch_size() + 4;
 
        if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82830_HB ||
            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82845G_HB) {
@@ -423,7 +437,7 @@ static void intel_i830_init_gtt_entries(void)
                        if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
                            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
                            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-                           agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
+                           agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965 )
                                gtt_entries = MB(48) - KB(size);
                        else
                                gtt_entries = 0;
@@ -433,7 +447,7 @@ static void intel_i830_init_gtt_entries(void)
                        if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
                            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
                            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-                           agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
+                           agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965)
                                gtt_entries = MB(64) - KB(size);
                        else
                                gtt_entries = 0;
@@ -791,6 +805,77 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge)
 
        return 0;
 }
+static int intel_i965_fetch_size(void)
+{
+       struct aper_size_info_fixed *values;
+       u32 offset = 0;
+       u8 temp;
+
+#define I965_512MB_ADDRESS_MASK (3<<1)
+
+       values = A_SIZE_FIX(agp_bridge->driver->aperture_sizes);
+
+       pci_read_config_byte(intel_i830_private.i830_dev, I965_MSAC, &temp);
+       temp &= I965_512MB_ADDRESS_MASK;
+       switch (temp) {
+       case 0x00:
+               offset = 0; /* 128MB */
+               break;
+       case 0x06:
+               offset = 3; /* 512MB */
+               break;
+       default:
+       case 0x02:
+               offset = 2; /* 256MB */
+               break;
+       }
+
+       agp_bridge->previous_size = agp_bridge->current_size = (void *)(values + offset);
+
+       return values[offset].size;
+}
+
+/* The intel i965 automatically initializes the agp aperture during POST.
++ * Use the memory already set aside for in the GTT.
++ */
+static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge)
+{
+       int page_order;
+       struct aper_size_info_fixed *size;
+       int num_entries;
+       u32 temp;
+
+       size = agp_bridge->current_size;
+       page_order = size->page_order;
+       num_entries = size->num_entries;
+       agp_bridge->gatt_table_real = NULL;
+
+       pci_read_config_dword(intel_i830_private.i830_dev, I915_MMADDR, &temp);
+
+       temp &= 0xfff00000;
+       intel_i830_private.gtt = ioremap((temp + (512 * 1024)) , 512 * 1024);
+
+       if (!intel_i830_private.gtt)
+               return -ENOMEM;
+
+
+       intel_i830_private.registers = ioremap(temp,128 * 4096);
+       if (!intel_i830_private.registers)
+               return -ENOMEM;
+
+       temp = readl(intel_i830_private.registers+I810_PGETBL_CTL) & 0xfffff000;
+       global_cache_flush();   /* FIXME: ? */
+
+       /* we have to call this as early as possible after the MMIO base address is known */
+       intel_i830_init_gtt_entries();
+
+       agp_bridge->gatt_table = NULL;
+
+       agp_bridge->gatt_bus_addr = temp;
+
+       return 0;
+}
+
 
 static int intel_fetch_size(void)
 {
@@ -1307,7 +1392,7 @@ static struct agp_bridge_driver intel_830_driver = {
        .owner                  = THIS_MODULE,
        .aperture_sizes         = intel_i830_sizes,
        .size_type              = FIXED_APER_SIZE,
-       .num_aperture_sizes     = 3,
+       .num_aperture_sizes     = 4,
        .needs_scratch_page     = TRUE,
        .configure              = intel_i830_configure,
        .fetch_size             = intel_i830_fetch_size,
@@ -1469,7 +1554,7 @@ static struct agp_bridge_driver intel_915_driver = {
        .owner                  = THIS_MODULE,
        .aperture_sizes         = intel_i830_sizes,
        .size_type              = FIXED_APER_SIZE,
-       .num_aperture_sizes     = 3,
+       .num_aperture_sizes     = 4,
        .needs_scratch_page     = TRUE,
        .configure              = intel_i915_configure,
        .fetch_size             = intel_i915_fetch_size,
@@ -1489,6 +1574,29 @@ static struct agp_bridge_driver intel_915_driver = {
        .agp_destroy_page       = agp_generic_destroy_page,
 };
 
+static struct agp_bridge_driver intel_i965_driver = {
+       .owner                  = THIS_MODULE,
+       .aperture_sizes         = intel_i830_sizes,
+       .size_type              = FIXED_APER_SIZE,
+       .num_aperture_sizes     = 4,
+       .needs_scratch_page     = TRUE,
+       .configure              = intel_i915_configure,
+       .fetch_size             = intel_i965_fetch_size,
+       .cleanup                = intel_i915_cleanup,
+       .tlb_flush              = intel_i810_tlbflush,
+       .mask_memory            = intel_i810_mask_memory,
+       .masks                  = intel_i810_masks,
+       .agp_enable             = intel_i810_agp_enable,
+       .cache_flush            = global_cache_flush,
+       .create_gatt_table      = intel_i965_create_gatt_table,
+       .free_gatt_table        = intel_i830_free_gatt_table,
+       .insert_memory          = intel_i915_insert_entries,
+       .remove_memory          = intel_i915_remove_entries,
+       .alloc_by_type          = intel_i830_alloc_by_type,
+       .free_by_type           = intel_i810_free_by_type,
+       .agp_alloc_page         = agp_generic_alloc_page,
+       .agp_destroy_page       = agp_generic_destroy_page,
+};
 
 static struct agp_bridge_driver intel_7505_driver = {
        .owner                  = THIS_MODULE,
@@ -1684,6 +1792,35 @@ static int __devinit agp_intel_probe(struct pci_dev *pdev,
                        bridge->driver = &intel_845_driver;
                name = "945GM";
                break;
+       case PCI_DEVICE_ID_INTEL_82946GZ_HB:
+               if (find_i830(PCI_DEVICE_ID_INTEL_82946GZ_IG))
+                       bridge->driver = &intel_i965_driver;
+               else
+                       bridge->driver = &intel_845_driver;
+               name = "946GZ";
+               break;
+       case PCI_DEVICE_ID_INTEL_82965G_1_HB:
+               if (find_i830(PCI_DEVICE_ID_INTEL_82965G_1_IG))
+                       bridge->driver = &intel_i965_driver;
+               else
+                       bridge->driver = &intel_845_driver;
+               name = "965G";
+               break;
+       case PCI_DEVICE_ID_INTEL_82965Q_HB:
+               if (find_i830(PCI_DEVICE_ID_INTEL_82965Q_IG))
+                       bridge->driver = &intel_i965_driver;
+               else
+                       bridge->driver = &intel_845_driver;
+               name = "965Q";
+               break;
+       case PCI_DEVICE_ID_INTEL_82965G_HB:
+               if (find_i830(PCI_DEVICE_ID_INTEL_82965G_IG))
+                       bridge->driver = &intel_i965_driver;
+               else
+                       bridge->driver = &intel_845_driver;
+               name = "965G";
+               break;
+
        case PCI_DEVICE_ID_INTEL_7505_0:
                bridge->driver = &intel_7505_driver;
                name = "E7505";
@@ -1766,6 +1903,7 @@ static void __devexit agp_intel_remove(struct pci_dev *pdev)
        agp_put_bridge(bridge);
 }
 
+#ifdef CONFIG_PM
 static int agp_intel_resume(struct pci_dev *pdev)
 {
        struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
@@ -1786,9 +1924,12 @@ static int agp_intel_resume(struct pci_dev *pdev)
                intel_i830_configure();
        else if (bridge->driver == &intel_810_driver)
                intel_i810_configure();
+       else if (bridge->driver == &intel_i965_driver)
+               intel_i915_configure();
 
        return 0;
 }
+#endif
 
 static struct pci_device_id agp_intel_pci_table[] = {
 #define ID(x)                                          \
@@ -1825,6 +1966,10 @@ static struct pci_device_id agp_intel_pci_table[] = {
        ID(PCI_DEVICE_ID_INTEL_82915GM_HB),
        ID(PCI_DEVICE_ID_INTEL_82945G_HB),
        ID(PCI_DEVICE_ID_INTEL_82945GM_HB),
+       ID(PCI_DEVICE_ID_INTEL_82946GZ_HB),
+       ID(PCI_DEVICE_ID_INTEL_82965G_1_HB),
+       ID(PCI_DEVICE_ID_INTEL_82965Q_HB),
+       ID(PCI_DEVICE_ID_INTEL_82965G_HB),
        { }
 };
 
@@ -1835,7 +1980,9 @@ static struct pci_driver agp_intel_pci_driver = {
        .id_table       = agp_intel_pci_table,
        .probe          = agp_intel_probe,
        .remove         = __devexit_p(agp_intel_remove),
+#ifdef CONFIG_PM
        .resume         = agp_intel_resume,
+#endif
 };
 
 static int __init agp_intel_init(void)
index b8ec25d174787d56c4df0d07294a76f7a4328ff5..c149ac9ce9a7668726527e1f837bad9a84a7192a 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/agp_backend.h>
 #include "agp.h"
 
-static struct pci_device_id agp_via_pci_table[];
+static const struct pci_device_id agp_via_pci_table[];
 
 #define VIA_GARTCTRL   0x80
 #define VIA_APSIZE     0x84
@@ -485,7 +485,7 @@ static int agp_via_resume(struct pci_dev *pdev)
 #endif /* CONFIG_PM */
 
 /* must be the same order as name table above */
-static struct pci_device_id agp_via_pci_table[] = {
+static const struct pci_device_id agp_via_pci_table[] = {
 #define ID(x) \
        {                                               \
        .class          = (PCI_CLASS_BRIDGE_HOST << 8), \
index a0e5eac5f33aa73821c0fe4f87e339034178009d..b8c22255f6ada92615b2e70d8ad717927e04c9ae 100644 (file)
@@ -87,7 +87,7 @@ static int briq_panel_release(struct inode *ino, struct file *filep)
        return 0;
 }
 
-static ssize_t briq_panel_read(struct file *file, char *buf, size_t count,
+static ssize_t briq_panel_read(struct file *file, char __user *buf, size_t count,
                         loff_t *ppos)
 {
        unsigned short c;
@@ -135,7 +135,7 @@ static void scroll_vfd( void )
        vfd_cursor = 20;
 }
 
-static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
+static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_t len,
                          loff_t *ppos)
 {
        size_t indx = len;
@@ -150,19 +150,22 @@ static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
                return -EBUSY;
 
        for (;;) {
+               char c;
                if (!indx)
                        break;
+               if (get_user(c, buf))
+                       return -EFAULT;
                if (esc) {
-                       set_led(*buf);
+                       set_led(c);
                        esc = 0;
-               } else if (*buf == 27) {
+               } else if (c == 27) {
                        esc = 1;
-               } else if (*buf == 12) {
+               } else if (c == 12) {
                        /* do a form feed */
                        for (i=0; i<40; i++)
                                vfd[i] = ' ';
                        vfd_cursor = 0;
-               } else if (*buf == 10) {
+               } else if (c == 10) {
                        if (vfd_cursor < 20)
                                vfd_cursor = 20;
                        else if (vfd_cursor < 40)
@@ -175,7 +178,7 @@ static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
                        /* just a character */
                        if (vfd_cursor > 39)
                                scroll_vfd();
-                       vfd[vfd_cursor++] = *buf;
+                       vfd[vfd_cursor++] = c;
                }
                indx--;
                buf++;
@@ -202,7 +205,7 @@ static struct miscdevice briq_panel_miscdev = {
 static int __init briq_panel_init(void)
 {
        struct device_node *root = find_path_device("/");
-       char *machine;
+       const char *machine;
        int i;
 
        machine = get_property(root, "model", NULL);
index 84dfc4278139eaf809b666d47bd7de8623aa9898..8c09997cc3d63b9108c7c4f948a9eb611a580bff 100644 (file)
@@ -3488,7 +3488,7 @@ static int stli_initecp(stlibrd_t *brdp)
  */
        EBRDENABLE(brdp);
        sigsp = (cdkecpsig_t __iomem *) EBRDGETMEMPTR(brdp, CDK_SIGADDR);
-       memcpy(&sig, sigsp, sizeof(cdkecpsig_t));
+       memcpy_fromio(&sig, sigsp, sizeof(cdkecpsig_t));
        EBRDDISABLE(brdp);
 
        if (sig.magic != cpu_to_le32(ECP_MAGIC))
index b3df613ae4ec84dfbcf75ad40b4487d68355ba70..d35a9f06ab7b6fe25420c53dd657e849fc00cbed 100644 (file)
@@ -32,7 +32,7 @@
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "cpufreq-core", msg)
 
 /**
- * The "cpufreq driver" - the arch- or hardware-dependend low
+ * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
  * also protects the cpufreq_cpu_data array.
  */
index 52cf1f02182591f64f527144e1e3a1394ee9a050..bf8aa45d4f019cccd866103d278fa9e7bd4f669d 100644 (file)
@@ -55,6 +55,10 @@ struct cpu_dbs_info_s {
        struct cpufreq_policy *cur_policy;
        struct work_struct work;
        unsigned int enable;
+       struct cpufreq_frequency_table *freq_table;
+       unsigned int freq_lo;
+       unsigned int freq_lo_jiffies;
+       unsigned int freq_hi_jiffies;
 };
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
 
@@ -72,15 +76,15 @@ static DEFINE_MUTEX(dbs_mutex);
 
 static struct workqueue_struct *kondemand_wq;
 
-struct dbs_tuners {
+static struct dbs_tuners {
        unsigned int sampling_rate;
        unsigned int up_threshold;
        unsigned int ignore_nice;
-};
-
-static struct dbs_tuners dbs_tuners_ins = {
+       unsigned int powersave_bias;
+} dbs_tuners_ins = {
        .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
        .ignore_nice = 0,
+       .powersave_bias = 0,
 };
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
@@ -96,6 +100,70 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
        return retval;
 }
 
+/*
+ * Find right freq to be set now with powersave_bias on.
+ * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
+ * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
+ */
+static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+                                         unsigned int freq_next,
+                                         unsigned int relation)
+{
+       unsigned int freq_req, freq_reduc, freq_avg;
+       unsigned int freq_hi, freq_lo;
+       unsigned int index = 0;
+       unsigned int jiffies_total, jiffies_hi, jiffies_lo;
+       struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);
+
+       if (!dbs_info->freq_table) {
+               dbs_info->freq_lo = 0;
+               dbs_info->freq_lo_jiffies = 0;
+               return freq_next;
+       }
+
+       cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
+                       relation, &index);
+       freq_req = dbs_info->freq_table[index].frequency;
+       freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
+       freq_avg = freq_req - freq_reduc;
+
+       /* Find freq bounds for freq_avg in freq_table */
+       index = 0;
+       cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+                       CPUFREQ_RELATION_H, &index);
+       freq_lo = dbs_info->freq_table[index].frequency;
+       index = 0;
+       cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+                       CPUFREQ_RELATION_L, &index);
+       freq_hi = dbs_info->freq_table[index].frequency;
+
+       /* Find out how long we have to be in hi and lo freqs */
+       if (freq_hi == freq_lo) {
+               dbs_info->freq_lo = 0;
+               dbs_info->freq_lo_jiffies = 0;
+               return freq_lo;
+       }
+       jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+       jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
+       jiffies_hi += ((freq_hi - freq_lo) / 2);
+       jiffies_hi /= (freq_hi - freq_lo);
+       jiffies_lo = jiffies_total - jiffies_hi;
+       dbs_info->freq_lo = freq_lo;
+       dbs_info->freq_lo_jiffies = jiffies_lo;
+       dbs_info->freq_hi_jiffies = jiffies_hi;
+       return freq_hi;
+}
+
+static void ondemand_powersave_bias_init(void)
+{
+       int i;
+       for_each_online_cpu(i) {
+               struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
+               dbs_info->freq_table = cpufreq_frequency_get_table(i);
+               dbs_info->freq_lo = 0;
+       }
+}
+
 /************************** sysfs interface ************************/
 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
 {
@@ -124,6 +192,7 @@ static ssize_t show_##file_name                                             \
 show_one(sampling_rate, sampling_rate);
 show_one(up_threshold, up_threshold);
 show_one(ignore_nice_load, ignore_nice);
+show_one(powersave_bias, powersave_bias);
 
 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
                const char *buf, size_t count)
@@ -198,6 +267,27 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
        return count;
 }
 
+static ssize_t store_powersave_bias(struct cpufreq_policy *unused,
+               const char *buf, size_t count)
+{
+       unsigned int input;
+       int ret;
+       ret = sscanf(buf, "%u", &input);
+
+       if (ret != 1)
+               return -EINVAL;
+
+       if (input > 1000)
+               input = 1000;
+
+       mutex_lock(&dbs_mutex);
+       dbs_tuners_ins.powersave_bias = input;
+       ondemand_powersave_bias_init();
+       mutex_unlock(&dbs_mutex);
+
+       return count;
+}
+
 #define define_one_rw(_name) \
 static struct freq_attr _name = \
 __ATTR(_name, 0644, show_##_name, store_##_name)
@@ -205,6 +295,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 define_one_rw(sampling_rate);
 define_one_rw(up_threshold);
 define_one_rw(ignore_nice_load);
+define_one_rw(powersave_bias);
 
 static struct attribute * dbs_attributes[] = {
        &sampling_rate_max.attr,
@@ -212,6 +303,7 @@ static struct attribute * dbs_attributes[] = {
        &sampling_rate.attr,
        &up_threshold.attr,
        &ignore_nice_load.attr,
+       &powersave_bias.attr,
        NULL
 };
 
@@ -234,6 +326,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
        if (!this_dbs_info->enable)
                return;
 
+       this_dbs_info->freq_lo = 0;
        policy = this_dbs_info->cur_policy;
        cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
        total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
@@ -274,11 +367,18 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
        /* Check for frequency increase */
        if (load > dbs_tuners_ins.up_threshold) {
                /* if we are already at full speed then break out early */
-               if (policy->cur == policy->max)
-                       return;
-
-               __cpufreq_driver_target(policy, policy->max,
-                       CPUFREQ_RELATION_H);
+               if (!dbs_tuners_ins.powersave_bias) {
+                       if (policy->cur == policy->max)
+                               return;
+
+                       __cpufreq_driver_target(policy, policy->max,
+                               CPUFREQ_RELATION_H);
+               } else {
+                       int freq = powersave_bias_target(policy, policy->max,
+                                       CPUFREQ_RELATION_H);
+                       __cpufreq_driver_target(policy, freq,
+                               CPUFREQ_RELATION_L);
+               }
                return;
        }
 
@@ -293,37 +393,64 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
         * policy. To be safe, we focus 10 points under the threshold.
         */
        if (load < (dbs_tuners_ins.up_threshold - 10)) {
-               unsigned int freq_next;
-               freq_next = (policy->cur * load) /
+               unsigned int freq_next = (policy->cur * load) /
                        (dbs_tuners_ins.up_threshold - 10);
-
-               __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
+               if (!dbs_tuners_ins.powersave_bias) {
+                       __cpufreq_driver_target(policy, freq_next,
+                                       CPUFREQ_RELATION_L);
+               } else {
+                       int freq = powersave_bias_target(policy, freq_next,
+                                       CPUFREQ_RELATION_L);
+                       __cpufreq_driver_target(policy, freq,
+                               CPUFREQ_RELATION_L);
+               }
        }
 }
 
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
 static void do_dbs_timer(void *data)
 {
        unsigned int cpu = smp_processor_id();
        struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+       /* We want all CPUs to do sampling nearly on same jiffy */
+       int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+       delay -= jiffies % delay;
 
        if (!dbs_info->enable)
                return;
-
-       lock_cpu_hotplug();
-       dbs_check_cpu(dbs_info);
-       unlock_cpu_hotplug();
-       queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-                       usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
+       /* Common NORMAL_SAMPLE setup */
+       INIT_WORK(&dbs_info->work, do_dbs_timer, (void *)DBS_NORMAL_SAMPLE);
+       if (!dbs_tuners_ins.powersave_bias ||
+           (unsigned long) data == DBS_NORMAL_SAMPLE) {
+               lock_cpu_hotplug();
+               dbs_check_cpu(dbs_info);
+               unlock_cpu_hotplug();
+               if (dbs_info->freq_lo) {
+                       /* Setup timer for SUB_SAMPLE */
+                       INIT_WORK(&dbs_info->work, do_dbs_timer,
+                                       (void *)DBS_SUB_SAMPLE);
+                       delay = dbs_info->freq_hi_jiffies;
+               }
+       } else {
+               __cpufreq_driver_target(dbs_info->cur_policy,
+                                       dbs_info->freq_lo,
+                                       CPUFREQ_RELATION_H);
+       }
+       queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_init(unsigned int cpu)
 {
        struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+       /* We want all CPUs to do sampling nearly on same jiffy */
+       int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+       delay -= jiffies % delay;
 
-       INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
-       queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-                       usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
-       return;
+       ondemand_powersave_bias_init();
+       INIT_WORK(&dbs_info->work, do_dbs_timer, NULL);
+       queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
index 25eee5394201d30866359043fd985c3075d8ef70..c2ecc599dc5f388a67e5f812a949424ee3db8d6d 100644 (file)
@@ -350,12 +350,10 @@ __init cpufreq_stats_init(void)
        }
 
        register_hotcpu_notifier(&cpufreq_stat_cpu_notifier);
-       lock_cpu_hotplug();
        for_each_online_cpu(cpu) {
                cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE,
                        (void *)(long)cpu);
        }
-       unlock_cpu_hotplug();
        return 0;
 }
 static void
index 69a53d476b5b7ee2cb30dd88518ffb11cd73bf70..9edfacee7d8442cb6e5c49db23d493829991d540 100644 (file)
@@ -14,7 +14,7 @@ config INFINIBAND_USER_MAD
        ---help---
          Userspace InfiniBand Management Datagram (MAD) support.  This
          is the kernel side of the userspace MAD support, which allows
-         userspace processes to send and receive MADs. You will also 
+         userspace processes to send and receive MADs. You will also
          need libibumad from <http://www.openib.org>.
 
 config INFINIBAND_USER_ACCESS
@@ -36,6 +36,8 @@ config INFINIBAND_ADDR_TRANS
 
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
+source "drivers/infiniband/hw/ehca/Kconfig"
+source "drivers/infiniband/hw/amso1100/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
index c7ff58c1d0e5e9a38b0baf87de600e24a40afc3b..2b5d1098ef45f4c3ba9fa5ccfed4c2aa76b0e7c3 100644 (file)
@@ -1,6 +1,8 @@
 obj-$(CONFIG_INFINIBAND)               += core/
 obj-$(CONFIG_INFINIBAND_MTHCA)         += hw/mthca/
-obj-$(CONFIG_IPATH_CORE)               += hw/ipath/
+obj-$(CONFIG_INFINIBAND_IPATH)         += hw/ipath/
+obj-$(CONFIG_INFINIBAND_EHCA)          += hw/ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)      += hw/amso1100/
 obj-$(CONFIG_INFINIBAND_IPOIB)         += ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)           += ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)          += ulp/iser/
index 68e73ec2d1f87f77b1a9c327469979aa64859991..163d991eb8c96cf86cff654d8781a94e95af9c1a 100644 (file)
@@ -1,7 +1,7 @@
 infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)     := ib_addr.o rdma_cm.o
 
 obj-$(CONFIG_INFINIBAND) +=            ib_core.o ib_mad.o ib_sa.o \
-                                       ib_cm.o $(infiniband-y)
+                                       ib_cm.o iw_cm.o $(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=   ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=        ib_uverbs.o ib_ucm.o
 
@@ -14,6 +14,8 @@ ib_sa-y :=                    sa_query.o
 
 ib_cm-y :=                     cm.o
 
+iw_cm-y :=                     iwcm.o
+
 rdma_cm-y :=                   cma.o
 
 ib_addr-y :=                   addr.o
index 1205e8027829aa55dee38e10af974abd8d46ba93..9cbf09e2052f87d5845b41a3d348f129f7f5250d 100644 (file)
@@ -61,12 +61,15 @@ static LIST_HEAD(req_list);
 static DECLARE_WORK(work, process_req, NULL);
 static struct workqueue_struct *addr_wq;
 
-static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
-                    unsigned char *dst_dev_addr)
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+                    const unsigned char *dst_dev_addr)
 {
        switch (dev->type) {
        case ARPHRD_INFINIBAND:
-               dev_addr->dev_type = IB_NODE_CA;
+               dev_addr->dev_type = RDMA_NODE_IB_CA;
+               break;
+       case ARPHRD_ETHER:
+               dev_addr->dev_type = RDMA_NODE_RNIC;
                break;
        default:
                return -EADDRNOTAVAIL;
@@ -78,6 +81,7 @@ static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
                memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
        return 0;
 }
+EXPORT_SYMBOL(rdma_copy_addr);
 
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
@@ -89,7 +93,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
        if (!dev)
                return -EADDRNOTAVAIL;
 
-       ret = copy_addr(dev_addr, dev, NULL);
+       ret = rdma_copy_addr(dev_addr, dev, NULL);
        dev_put(dev);
        return ret;
 }
@@ -161,7 +165,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
 
        /* If the device does ARP internally, return 'done' */
        if (rt->idev->dev->flags & IFF_NOARP) {
-               copy_addr(addr, rt->idev->dev, NULL);
+               rdma_copy_addr(addr, rt->idev->dev, NULL);
                goto put;
        }
 
@@ -181,7 +185,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
                src_in->sin_addr.s_addr = rt->rt_src;
        }
 
-       ret = copy_addr(addr, neigh->dev, neigh->ha);
+       ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
 release:
        neigh_release(neigh);
 put:
@@ -245,7 +249,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
        if (ZERONET(src_ip)) {
                src_in->sin_family = dst_in->sin_family;
                src_in->sin_addr.s_addr = dst_ip;
-               ret = copy_addr(addr, dev, dev->dev_addr);
+               ret = rdma_copy_addr(addr, dev, dev->dev_addr);
        } else if (LOOPBACK(src_ip)) {
                ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
                if (!ret)
@@ -327,10 +331,10 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 
-static int netevent_callback(struct notifier_block *self, unsigned long event, 
+static int netevent_callback(struct notifier_block *self, unsigned long event,
        void *ctx)
 {
-       if (event == NETEVENT_NEIGH_UPDATE) {  
+       if (event == NETEVENT_NEIGH_UPDATE) {
                struct neighbour *neigh = ctx;
 
                if (neigh->dev->type == ARPHRD_INFINIBAND &&
index 75313ade2e0de709be67c1277f48126acfb3a777..20e9f64e67a6cba164aba9ee81b9653ddc207141 100644 (file)
@@ -62,12 +62,13 @@ struct ib_update_work {
 
 static inline int start_port(struct ib_device *device)
 {
-       return device->node_type == IB_NODE_SWITCH ? 0 : 1;
+       return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
 }
 
 static inline int end_port(struct ib_device *device)
 {
-       return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
+       return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+               0 : device->phys_port_cnt;
 }
 
 int ib_get_cached_gid(struct ib_device *device,
index 0de335b7bfc2f9e5236f603cf765e3fd96d89fc1..f35fcc4c06389e77f260254cd8b2943b6e5d77de 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -41,6 +41,7 @@
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/random.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
@@ -73,6 +74,7 @@ static struct ib_cm {
        struct rb_root remote_id_table;
        struct rb_root remote_sidr_table;
        struct idr local_id_table;
+       __be32 random_id_operand;
        struct workqueue_struct *wq;
 } cm;
 
@@ -177,7 +179,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
        if (IS_ERR(ah))
                return PTR_ERR(ah);
 
-       m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 
+       m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
                               cm_id_priv->av.pkey_index,
                               0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
                               GFP_ATOMIC);
@@ -299,15 +301,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
        unsigned long flags;
-       int ret;
+       int ret, id;
        static int next_id;
 
        do {
                spin_lock_irqsave(&cm.lock, flags);
-               ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++,
-                                       (__force int *) &cm_id_priv->id.local_id);
+               ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
+                                       next_id++, &id);
                spin_unlock_irqrestore(&cm.lock, flags);
        } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+
+       cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand);
        return ret;
 }
 
@@ -316,7 +320,8 @@ static void cm_free_id(__be32 local_id)
        unsigned long flags;
 
        spin_lock_irqsave(&cm.lock, flags);
-       idr_remove(&cm.local_id_table, (__force int) local_id);
+       idr_remove(&cm.local_id_table,
+                  (__force int) (local_id ^ cm.random_id_operand));
        spin_unlock_irqrestore(&cm.lock, flags);
 }
 
@@ -324,7 +329,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
 {
        struct cm_id_private *cm_id_priv;
 
-       cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id);
+       cm_id_priv = idr_find(&cm.local_id_table,
+                             (__force int) (local_id ^ cm.random_id_operand));
        if (cm_id_priv) {
                if (cm_id_priv->id.remote_id == remote_id)
                        atomic_inc(&cm_id_priv->refcount);
@@ -679,6 +685,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
 {
        int wait_time;
 
+       cm_cleanup_timewait(cm_id_priv->timewait_info);
+
        /*
         * The cm_id could be destroyed by the user before we exit timewait.
         * To protect against this, we search for the cm_id after exiting
@@ -1354,7 +1362,7 @@ static int cm_req_handler(struct cm_work *work)
                                                            id.local_id);
        if (IS_ERR(cm_id_priv->timewait_info)) {
                ret = PTR_ERR(cm_id_priv->timewait_info);
-               goto error1;
+               goto destroy;
        }
        cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
        cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
@@ -1363,7 +1371,8 @@ static int cm_req_handler(struct cm_work *work)
        listen_cm_id_priv = cm_match_req(work, cm_id_priv);
        if (!listen_cm_id_priv) {
                ret = -EINVAL;
-               goto error2;
+               kfree(cm_id_priv->timewait_info);
+               goto destroy;
        }
 
        cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1373,12 +1382,22 @@ static int cm_req_handler(struct cm_work *work)
 
        cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
        ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
-       if (ret)
-               goto error3;
+       if (ret) {
+               ib_get_cached_gid(work->port->cm_dev->device,
+                                 work->port->port_num, 0, &work->path[0].sgid);
+               ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+                              &work->path[0].sgid, sizeof work->path[0].sgid,
+                              NULL, 0);
+               goto rejected;
+       }
        if (req_msg->alt_local_lid) {
                ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
-               if (ret)
-                       goto error3;
+               if (ret) {
+                       ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+                                      &work->path[0].sgid,
+                                      sizeof work->path[0].sgid, NULL, 0);
+                       goto rejected;
+               }
        }
        cm_id_priv->tid = req_msg->hdr.tid;
        cm_id_priv->timeout_ms = cm_convert_to_ms(
@@ -1400,12 +1419,11 @@ static int cm_req_handler(struct cm_work *work)
        cm_deref_id(listen_cm_id_priv);
        return 0;
 
-error3:        atomic_dec(&cm_id_priv->refcount);
+rejected:
+       atomic_dec(&cm_id_priv->refcount);
        cm_deref_id(listen_cm_id_priv);
-       cm_cleanup_timewait(cm_id_priv->timewait_info);
-error2:        kfree(cm_id_priv->timewait_info);
-       cm_id_priv->timewait_info = NULL;
-error1:        ib_destroy_cm_id(&cm_id_priv->id);
+destroy:
+       ib_destroy_cm_id(cm_id);
        return ret;
 }
 
@@ -2072,8 +2090,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
                        spin_unlock_irqrestore(&cm.lock, flags);
                        return NULL;
                }
-               cm_id_priv = idr_find(&cm.local_id_table,
-                                     (__force int) timewait_info->work.local_id);
+               cm_id_priv = idr_find(&cm.local_id_table, (__force int)
+                                     (timewait_info->work.local_id ^
+                                      cm.random_id_operand));
                if (cm_id_priv) {
                        if (cm_id_priv->id.remote_id == remote_id)
                                atomic_inc(&cm_id_priv->refcount);
@@ -3125,7 +3144,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
                qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
                                           IB_ACCESS_REMOTE_WRITE;
                if (cm_id_priv->responder_resources)
-                       qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
+                       qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
+                                                   IB_ACCESS_REMOTE_ATOMIC;
                qp_attr->pkey_index = cm_id_priv->av.pkey_index;
                qp_attr->port_num = cm_id_priv->av.port->port_num;
                ret = 0;
@@ -3262,6 +3282,9 @@ static void cm_add_one(struct ib_device *device)
        int ret;
        u8 i;
 
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
        cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
                         device->phys_port_cnt, GFP_KERNEL);
        if (!cm_dev)
@@ -3349,6 +3372,7 @@ static int __init ib_cm_init(void)
        cm.remote_qp_table = RB_ROOT;
        cm.remote_sidr_table = RB_ROOT;
        idr_init(&cm.local_id_table);
+       get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
        idr_pre_get(&cm.local_id_table, GFP_KERNEL);
 
        cm.wq = create_workqueue("ib_cm");
index 5d625a81193f079ea2f38272ed03b0381f9e1b0e..1178bd434d1b1f12cd41f32bbbb0c5aaa3a50929 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/idr.h>
+#include <linux/inetdevice.h>
 
 #include <net/tcp.h>
 
@@ -43,6 +44,7 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
+#include <rdma/iw_cm.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -60,6 +62,7 @@ static struct ib_client cma_client = {
        .remove = cma_remove_one
 };
 
+static struct ib_sa_client sa_client;
 static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
@@ -124,6 +127,7 @@ struct rdma_id_private {
        int                     query_id;
        union {
                struct ib_cm_id *ib;
+               struct iw_cm_id *iw;
        } cm_id;
 
        u32                     seq_num;
@@ -259,15 +263,24 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
        id_priv->cma_dev = NULL;
 }
 
-static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
+       enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
        struct cma_device *cma_dev;
        union ib_gid gid;
        int ret = -ENODEV;
 
-       ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid),
+       switch (rdma_node_get_transport(dev_type)) {
+       case RDMA_TRANSPORT_IB:
+               ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+               break;
+       case RDMA_TRANSPORT_IWARP:
+               iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+               break;
+       default:
+               return -ENODEV;
+       }
 
-       mutex_lock(&lock);
        list_for_each_entry(cma_dev, &dev_list, list) {
                ret = ib_find_cached_gid(cma_dev->device, &gid,
                                         &id_priv->id.port_num, NULL);
@@ -276,20 +289,9 @@ static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
                        break;
                }
        }
-       mutex_unlock(&lock);
        return ret;
 }
 
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
-{
-       switch (id_priv->id.route.addr.dev_addr.dev_type) {
-       case IB_NODE_CA:
-               return cma_acquire_ib_dev(id_priv);
-       default:
-               return -ENODEV;
-       }
-}
-
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
        if (atomic_dec_and_test(&id_priv->refcount))
@@ -347,6 +349,16 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
                                          IB_QP_PKEY_INDEX | IB_QP_PORT);
 }
 
+static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+       struct ib_qp_attr qp_attr;
+
+       qp_attr.qp_state = IB_QPS_INIT;
+       qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+
+       return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+}
+
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
                   struct ib_qp_init_attr *qp_init_attr)
 {
@@ -362,10 +374,13 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
        if (IS_ERR(qp))
                return PTR_ERR(qp);
 
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = cma_init_ib_qp(id_priv, qp);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = cma_init_iw_qp(id_priv, qp);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -451,13 +466,17 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
        int ret;
 
        id_priv = container_of(id, struct rdma_id_private, id);
-       switch (id_priv->id.device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
                                         qp_attr_mask);
                if (qp_attr->qp_state == IB_QPS_RTR)
                        qp_attr->rq_psn = id_priv->seq_num;
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+                                       qp_attr_mask);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -590,8 +609,8 @@ static int cma_notify_user(struct rdma_id_private *id_priv,
 
 static void cma_cancel_route(struct rdma_id_private *id_priv)
 {
-       switch (id_priv->id.device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                if (id_priv->query)
                        ib_sa_cancel_query(id_priv->query_id, id_priv->query);
                break;
@@ -611,11 +630,15 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv)
        cma_exch(id_priv, CMA_DESTROYING);
 
        if (id_priv->cma_dev) {
-               switch (id_priv->id.device->node_type) {
-               case IB_NODE_CA:
-                       if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+               switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+               case RDMA_TRANSPORT_IB:
+                       if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
                                ib_destroy_cm_id(id_priv->cm_id.ib);
                        break;
+               case RDMA_TRANSPORT_IWARP:
+                       if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+                               iw_destroy_cm_id(id_priv->cm_id.iw);
+                       break;
                default:
                        break;
                }
@@ -689,19 +712,25 @@ void rdma_destroy_id(struct rdma_cm_id *id)
        state = cma_exch(id_priv, CMA_DESTROYING);
        cma_cancel_operation(id_priv, state);
 
+       mutex_lock(&lock);
        if (id_priv->cma_dev) {
-               switch (id->device->node_type) {
-               case IB_NODE_CA:
-                       if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+               mutex_unlock(&lock);
+               switch (rdma_node_get_transport(id->device->node_type)) {
+               case RDMA_TRANSPORT_IB:
+                       if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
                                ib_destroy_cm_id(id_priv->cm_id.ib);
                        break;
+               case RDMA_TRANSPORT_IWARP:
+                       if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+                               iw_destroy_cm_id(id_priv->cm_id.iw);
+                       break;
                default:
                        break;
                }
-               mutex_lock(&lock);
+               mutex_lock(&lock);
                cma_detach_from_dev(id_priv);
-               mutex_unlock(&lock);
        }
+       mutex_unlock(&lock);
 
        cma_release_port(id_priv);
        cma_deref_id(id_priv);
@@ -869,7 +898,7 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
        ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
        ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
        ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
-       rt->addr.dev_addr.dev_type = IB_NODE_CA;
+       rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
 
        id_priv = container_of(id, struct rdma_id_private, id);
        id_priv->state = CMA_CONNECT;
@@ -898,7 +927,9 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        }
 
        atomic_inc(&conn_id->dev_remove);
-       ret = cma_acquire_ib_dev(conn_id);
+       mutex_lock(&lock);
+       ret = cma_acquire_dev(conn_id);
+       mutex_unlock(&lock);
        if (ret) {
                ret = -ENODEV;
                cma_release_remove(conn_id);
@@ -982,6 +1013,130 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
        }
 }
 
+static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
+{
+       struct rdma_id_private *id_priv = iw_id->context;
+       enum rdma_cm_event_type event = 0;
+       struct sockaddr_in *sin;
+       int ret = 0;
+
+       atomic_inc(&id_priv->dev_remove);
+
+       switch (iw_event->event) {
+       case IW_CM_EVENT_CLOSE:
+               event = RDMA_CM_EVENT_DISCONNECTED;
+               break;
+       case IW_CM_EVENT_CONNECT_REPLY:
+               sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+               *sin = iw_event->local_addr;
+               sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+               *sin = iw_event->remote_addr;
+               if (iw_event->status)
+                       event = RDMA_CM_EVENT_REJECTED;
+               else
+                       event = RDMA_CM_EVENT_ESTABLISHED;
+               break;
+       case IW_CM_EVENT_ESTABLISHED:
+               event = RDMA_CM_EVENT_ESTABLISHED;
+               break;
+       default:
+               BUG_ON(1);
+       }
+
+       ret = cma_notify_user(id_priv, event, iw_event->status,
+                             iw_event->private_data,
+                             iw_event->private_data_len);
+       if (ret) {
+               /* Destroy the CM ID by returning a non-zero value. */
+               id_priv->cm_id.iw = NULL;
+               cma_exch(id_priv, CMA_DESTROYING);
+               cma_release_remove(id_priv);
+               rdma_destroy_id(&id_priv->id);
+               return ret;
+       }
+
+       cma_release_remove(id_priv);
+       return ret;
+}
+
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+                              struct iw_cm_event *iw_event)
+{
+       struct rdma_cm_id *new_cm_id;
+       struct rdma_id_private *listen_id, *conn_id;
+       struct sockaddr_in *sin;
+       struct net_device *dev = NULL;
+       int ret;
+
+       listen_id = cm_id->context;
+       atomic_inc(&listen_id->dev_remove);
+       if (!cma_comp(listen_id, CMA_LISTEN)) {
+               ret = -ECONNABORTED;
+               goto out;
+       }
+
+       /* Create a new RDMA id for the new IW CM ID */
+       new_cm_id = rdma_create_id(listen_id->id.event_handler,
+                                  listen_id->id.context,
+                                  RDMA_PS_TCP);
+       if (!new_cm_id) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+       atomic_inc(&conn_id->dev_remove);
+       conn_id->state = CMA_CONNECT;
+
+       dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
+       if (!dev) {
+               ret = -EADDRNOTAVAIL;
+               cma_release_remove(conn_id);
+               rdma_destroy_id(new_cm_id);
+               goto out;
+       }
+       ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+       if (ret) {
+               cma_release_remove(conn_id);
+               rdma_destroy_id(new_cm_id);
+               goto out;
+       }
+
+       mutex_lock(&lock);
+       ret = cma_acquire_dev(conn_id);
+       mutex_unlock(&lock);
+       if (ret) {
+               cma_release_remove(conn_id);
+               rdma_destroy_id(new_cm_id);
+               goto out;
+       }
+
+       conn_id->cm_id.iw = cm_id;
+       cm_id->context = conn_id;
+       cm_id->cm_handler = cma_iw_handler;
+
+       sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+       *sin = iw_event->local_addr;
+       sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+       *sin = iw_event->remote_addr;
+
+       ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
+                             iw_event->private_data,
+                             iw_event->private_data_len);
+       if (ret) {
+               /* User wants to destroy the CM ID */
+               conn_id->cm_id.iw = NULL;
+               cma_exch(conn_id, CMA_DESTROYING);
+               cma_release_remove(conn_id);
+               rdma_destroy_id(&conn_id->id);
+       }
+
+out:
+       if (dev)
+               dev_put(dev);
+       cma_release_remove(listen_id);
+       return ret;
+}
+
 static int cma_ib_listen(struct rdma_id_private *id_priv)
 {
        struct ib_cm_compare_data compare_data;
@@ -1011,6 +1166,30 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
        return ret;
 }
 
+static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
+{
+       int ret;
+       struct sockaddr_in *sin;
+
+       id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+                                           iw_conn_req_handler,
+                                           id_priv);
+       if (IS_ERR(id_priv->cm_id.iw))
+               return PTR_ERR(id_priv->cm_id.iw);
+
+       sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+       id_priv->cm_id.iw->local_addr = *sin;
+
+       ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
+
+       if (ret) {
+               iw_destroy_cm_id(id_priv->cm_id.iw);
+               id_priv->cm_id.iw = NULL;
+       }
+
+       return ret;
+}
+
 static int cma_listen_handler(struct rdma_cm_id *id,
                              struct rdma_cm_event *event)
 {
@@ -1087,12 +1266,17 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 
        id_priv->backlog = backlog;
        if (id->device) {
-               switch (id->device->node_type) {
-               case IB_NODE_CA:
+               switch (rdma_node_get_transport(id->device->node_type)) {
+               case RDMA_TRANSPORT_IB:
                        ret = cma_ib_listen(id_priv);
                        if (ret)
                                goto err;
                        break;
+               case RDMA_TRANSPORT_IWARP:
+                       ret = cma_iw_listen(id_priv, backlog);
+                       if (ret)
+                               goto err;
+                       break;
                default:
                        ret = -ENOSYS;
                        goto err;
@@ -1140,7 +1324,7 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
        path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
        path_rec.numb_path = 1;
 
-       id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
+       id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
                                id_priv->id.port_num, &path_rec,
                                IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
                                IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
@@ -1231,6 +1415,23 @@ err:
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);
 
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+       struct cma_work *work;
+
+       work = kzalloc(sizeof *work, GFP_KERNEL);
+       if (!work)
+               return -ENOMEM;
+
+       work->id = id_priv;
+       INIT_WORK(&work->work, cma_work_handler, work);
+       work->old_state = CMA_ROUTE_QUERY;
+       work->new_state = CMA_ROUTE_RESOLVED;
+       work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+       queue_work(cma_wq, &work->work);
+       return 0;
+}
+
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 {
        struct rdma_id_private *id_priv;
@@ -1241,10 +1442,13 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
                return -EINVAL;
 
        atomic_inc(&id_priv->refcount);
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = cma_resolve_ib_route(id_priv, timeout_ms);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = cma_resolve_iw_route(id_priv, timeout_ms);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -1309,16 +1513,26 @@ static void addr_handler(int status, struct sockaddr *src_addr,
        enum rdma_cm_event_type event;
 
        atomic_inc(&id_priv->dev_remove);
-       if (!id_priv->cma_dev && !status)
+
+       /*
+        * Grab mutex to block rdma_destroy_id() from removing the device while
+        * we're trying to acquire it.
+        */
+       mutex_lock(&lock);
+       if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+               mutex_unlock(&lock);
+               goto out;
+       }
+
+       if (!status && !id_priv->cma_dev)
                status = cma_acquire_dev(id_priv);
+       mutex_unlock(&lock);
 
        if (status) {
-               if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
+               if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
                        goto out;
                event = RDMA_CM_EVENT_ADDR_ERROR;
        } else {
-               if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
-                       goto out;
                memcpy(&id_priv->id.route.addr.src_addr, src_addr,
                       ip_addr_size(src_addr));
                event = RDMA_CM_EVENT_ADDR_RESOLVED;
@@ -1492,7 +1706,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
        hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
                if (cma_any_addr(&cur_id->id.route.addr.src_addr))
                        return -EADDRNOTAVAIL;
-               
+
                cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
                if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
                        return -EADDRINUSE;
@@ -1542,8 +1756,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
        if (!cma_any_addr(addr)) {
                ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
-               if (!ret)
+               if (!ret) {
+                       mutex_lock(&lock);
                        ret = cma_acquire_dev(id_priv);
+                       mutex_unlock(&lock);
+               }
                if (ret)
                        goto err;
        }
@@ -1649,6 +1866,47 @@ out:
        return ret;
 }
 
+static int cma_connect_iw(struct rdma_id_private *id_priv,
+                         struct rdma_conn_param *conn_param)
+{
+       struct iw_cm_id *cm_id;
+       struct sockaddr_in* sin;
+       int ret;
+       struct iw_cm_conn_param iw_param;
+
+       cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
+       if (IS_ERR(cm_id)) {
+               ret = PTR_ERR(cm_id);
+               goto out;
+       }
+
+       id_priv->cm_id.iw = cm_id;
+
+       sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+       cm_id->local_addr = *sin;
+
+       sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+       cm_id->remote_addr = *sin;
+
+       ret = cma_modify_qp_rtr(&id_priv->id);
+       if (ret) {
+               iw_destroy_cm_id(cm_id);
+               return ret;
+       }
+
+       iw_param.ord = conn_param->initiator_depth;
+       iw_param.ird = conn_param->responder_resources;
+       iw_param.private_data = conn_param->private_data;
+       iw_param.private_data_len = conn_param->private_data_len;
+       if (id_priv->id.qp)
+               iw_param.qpn = id_priv->qp_num;
+       else
+               iw_param.qpn = conn_param->qp_num;
+       ret = iw_cm_connect(cm_id, &iw_param);
+out:
+       return ret;
+}
+
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
        struct rdma_id_private *id_priv;
@@ -1664,10 +1922,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
                id_priv->srq = conn_param->srq;
        }
 
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = cma_connect_ib(id_priv, conn_param);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = cma_connect_iw(id_priv, conn_param);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -1708,6 +1969,28 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
        return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
 }
 
+static int cma_accept_iw(struct rdma_id_private *id_priv,
+                 struct rdma_conn_param *conn_param)
+{
+       struct iw_cm_conn_param iw_param;
+       int ret;
+
+       ret = cma_modify_qp_rtr(&id_priv->id);
+       if (ret)
+               return ret;
+
+       iw_param.ord = conn_param->initiator_depth;
+       iw_param.ird = conn_param->responder_resources;
+       iw_param.private_data = conn_param->private_data;
+       iw_param.private_data_len = conn_param->private_data_len;
+       if (id_priv->id.qp) {
+               iw_param.qpn = id_priv->qp_num;
+       } else
+               iw_param.qpn = conn_param->qp_num;
+
+       return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
+}
+
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
        struct rdma_id_private *id_priv;
@@ -1723,13 +2006,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
                id_priv->srq = conn_param->srq;
        }
 
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                if (conn_param)
                        ret = cma_accept_ib(id_priv, conn_param);
                else
                        ret = cma_rep_recv(id_priv);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = cma_accept_iw(id_priv, conn_param);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -1756,12 +2042,16 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
        if (!cma_comp(id_priv, CMA_CONNECT))
                return -EINVAL;
 
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = ib_send_cm_rej(id_priv->cm_id.ib,
                                     IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
                                     private_data, private_data_len);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = iw_cm_reject(id_priv->cm_id.iw,
+                                  private_data, private_data_len);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -1780,17 +2070,20 @@ int rdma_disconnect(struct rdma_cm_id *id)
            !cma_comp(id_priv, CMA_DISCONNECT))
                return -EINVAL;
 
-       ret = cma_modify_qp_err(id);
-       if (ret)
-               goto out;
-
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
+               ret = cma_modify_qp_err(id);
+               if (ret)
+                       goto out;
                /* Initiate or respond to a disconnect. */
                if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
                        ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+               break;
        default:
+               ret = -EINVAL;
                break;
        }
 out:
@@ -1907,12 +2200,15 @@ static int cma_init(void)
        if (!cma_wq)
                return -ENOMEM;
 
+       ib_sa_register_client(&sa_client);
+
        ret = ib_register_client(&cma_client);
        if (ret)
                goto err;
        return 0;
 
 err:
+       ib_sa_unregister_client(&sa_client);
        destroy_workqueue(cma_wq);
        return ret;
 }
@@ -1920,6 +2216,7 @@ err:
 static void cma_cleanup(void)
 {
        ib_unregister_client(&cma_client);
+       ib_sa_unregister_client(&sa_client);
        destroy_workqueue(cma_wq);
        idr_destroy(&sdp_ps);
        idr_destroy(&tcp_ps);
index b2f3cb91d9bcfdb90159d1e72cc5493d6798343d..63d2a39fb82c84a83aab4454f6336d81efbc41ae 100644 (file)
@@ -385,7 +385,7 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
 EXPORT_SYMBOL(ib_get_client_data);
 
 /**
- * ib_set_client_data - Get IB client context
+ * ib_set_client_data - Set IB client context
  * @device:Device to set context for
  * @client:Client to set context for
  * @data:Context to set
@@ -505,7 +505,7 @@ int ib_query_port(struct ib_device *device,
                  u8 port_num,
                  struct ib_port_attr *port_attr)
 {
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                if (port_num)
                        return -EINVAL;
        } else if (port_num < 1 || port_num > device->phys_port_cnt)
@@ -580,7 +580,7 @@ int ib_modify_port(struct ib_device *device,
                   u8 port_num, int port_modify_mask,
                   struct ib_port_modify *port_modify)
 {
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                if (port_num)
                        return -EINVAL;
        } else if (port_num < 1 || port_num > device->phys_port_cnt)
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
new file mode 100644 (file)
index 0000000..c3fb304
--- /dev/null
@@ -0,0 +1,1019 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_addr.h>
+
+#include "iwcm.h"
+
+MODULE_AUTHOR("Tom Tucker");
+MODULE_DESCRIPTION("iWARP CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct workqueue_struct *iwcm_wq;
+struct iwcm_work {
+       struct work_struct work;
+       struct iwcm_id_private *cm_id;
+       struct list_head list;
+       struct iw_cm_event event;
+       struct list_head free_list;
+};
+
+/*
+ * The following services provide a mechanism for pre-allocating iwcm_work
+ * elements.  The design pre-allocates them  based on the cm_id type:
+ *     LISTENING IDS:  Get enough elements preallocated to handle the
+ *                     listen backlog.
+ *     ACTIVE IDS:     4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
+ *     PASSIVE IDS:    3: ESTABLISHED, DISCONNECT, CLOSE
+ *
+ * Allocating them in connect and listen avoids having to deal
+ * with allocation failures on the event upcall from the provider (which
+ * is called in the interrupt context).
+ *
+ * One exception is when creating the cm_id for incoming connection requests.
+ * There are two cases:
+ * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
+ *    the backlog is exceeded, then no more connection request events will
+ *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
+ *    to the provider to reject the connectino request.
+ * 2) in the connection request workqueue handler, cm_conn_req_handler().
+ *    If work elements cannot be allocated for the new connect request cm_id,
+ *    then IWCM will call the provider reject method.  This is ok since
+ *    cm_conn_req_handler() runs in the workqueue thread context.
+ */
+
+static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
+{
+       struct iwcm_work *work;
+
+       if (list_empty(&cm_id_priv->work_free_list))
+               return NULL;
+       work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
+                         free_list);
+       list_del_init(&work->free_list);
+       return work;
+}
+
+static void put_work(struct iwcm_work *work)
+{
+       list_add(&work->free_list, &work->cm_id->work_free_list);
+}
+
+static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
+{
+       struct list_head *e, *tmp;
+
+       list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+               kfree(list_entry(e, struct iwcm_work, free_list));
+}
+
+static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
+{
+       struct iwcm_work *work;
+
+       BUG_ON(!list_empty(&cm_id_priv->work_free_list));
+       while (count--) {
+               work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
+               if (!work) {
+                       dealloc_work_entries(cm_id_priv);
+                       return -ENOMEM;
+               }
+               work->cm_id = cm_id_priv;
+               INIT_LIST_HEAD(&work->list);
+               put_work(work);
+       }
+       return 0;
+}
+
+/*
+ * Save private data from incoming connection requests in the
+ * cm_id_priv so the low level driver doesn't have to.  Adjust
+ * the event ptr to point to the local copy.
+ */
+static int copy_private_data(struct iwcm_id_private *cm_id_priv,
+                      struct iw_cm_event *event)
+{
+       void *p;
+
+       p = kmalloc(event->private_data_len, GFP_ATOMIC);
+       if (!p)
+               return -ENOMEM;
+       memcpy(p, event->private_data, event->private_data_len);
+       event->private_data = p;
+       return 0;
+}
+
+/*
+ * Release a reference on cm_id. If the last reference is being removed
+ * and iw_destroy_cm_id is waiting, wake up the waiting thread.
+ */
+static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+       int ret = 0;
+
+       BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+       if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+               BUG_ON(!list_empty(&cm_id_priv->work_list));
+               if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
+                       BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
+                       BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
+                                       &cm_id_priv->flags));
+                       ret = 1;
+               }
+               complete(&cm_id_priv->destroy_comp);
+       }
+
+       return ret;
+}
+
+static void add_ref(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       atomic_inc(&cm_id_priv->refcount);
+}
+
+static void rem_ref(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       iwcm_deref_id(cm_id_priv);
+}
+
+static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
+
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+                                iw_cm_handler cm_handler,
+                                void *context)
+{
+       struct iwcm_id_private *cm_id_priv;
+
+       cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
+       if (!cm_id_priv)
+               return ERR_PTR(-ENOMEM);
+
+       cm_id_priv->state = IW_CM_STATE_IDLE;
+       cm_id_priv->id.device = device;
+       cm_id_priv->id.cm_handler = cm_handler;
+       cm_id_priv->id.context = context;
+       cm_id_priv->id.event_handler = cm_event_handler;
+       cm_id_priv->id.add_ref = add_ref;
+       cm_id_priv->id.rem_ref = rem_ref;
+       spin_lock_init(&cm_id_priv->lock);
+       atomic_set(&cm_id_priv->refcount, 1);
+       init_waitqueue_head(&cm_id_priv->connect_wait);
+       init_completion(&cm_id_priv->destroy_comp);
+       INIT_LIST_HEAD(&cm_id_priv->work_list);
+       INIT_LIST_HEAD(&cm_id_priv->work_free_list);
+
+       return &cm_id_priv->id;
+}
+EXPORT_SYMBOL(iw_create_cm_id);
+
+
+static int iwcm_modify_qp_err(struct ib_qp *qp)
+{
+       struct ib_qp_attr qp_attr;
+
+       if (!qp)
+               return -EINVAL;
+
+       qp_attr.qp_state = IB_QPS_ERR;
+       return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * This is really the RDMAC CLOSING state. It is most similar to the
+ * IB SQD QP state.
+ */
+static int iwcm_modify_qp_sqd(struct ib_qp *qp)
+{
+       struct ib_qp_attr qp_attr;
+
+       BUG_ON(qp == NULL);
+       qp_attr.qp_state = IB_QPS_SQD;
+       return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * Block if a passive or active connection is currenlty being processed. Then
+ * process the event as follows:
+ * - If we are ESTABLISHED, move to CLOSING and modify the QP state
+ *   based on the abrupt flag
+ * - If the connection is already in the CLOSING or IDLE state, the peer is
+ *   disconnecting concurrently with us and we've already seen the
+ *   DISCONNECT event -- ignore the request and return 0
+ * - Disconnect on a listening endpoint returns -EINVAL
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret = 0;
+       struct ib_qp *qp = NULL;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       /* Wait if we're currently in a connect or accept downcall */
+       wait_event(cm_id_priv->connect_wait,
+                  !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_ESTABLISHED:
+               cm_id_priv->state = IW_CM_STATE_CLOSING;
+
+               /* QP could be <nul> for user-mode client */
+               if (cm_id_priv->qp)
+                       qp = cm_id_priv->qp;
+               else
+                       ret = -EINVAL;
+               break;
+       case IW_CM_STATE_LISTEN:
+               ret = -EINVAL;
+               break;
+       case IW_CM_STATE_CLOSING:
+               /* remote peer closed first */
+       case IW_CM_STATE_IDLE:
+               /* accept or connect returned !0 */
+               break;
+       case IW_CM_STATE_CONN_RECV:
+               /*
+                * App called disconnect before/without calling accept after
+                * connect_request event delivered.
+                */
+               break;
+       case IW_CM_STATE_CONN_SENT:
+               /* Can only get here if wait above fails */
+       default:
+               BUG();
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       if (qp) {
+               if (abrupt)
+                       ret = iwcm_modify_qp_err(qp);
+               else
+                       ret = iwcm_modify_qp_sqd(qp);
+
+               /*
+                * If both sides are disconnecting the QP could
+                * already be in ERR or SQD states
+                */
+               ret = 0;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_disconnect);
+
+/*
+ * CM_ID <-- DESTROYING
+ *
+ * Clean up all resources associated with the connection and release
+ * the initial reference taken by iw_create_cm_id.
+ */
+static void destroy_cm_id(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       /*
+        * Wait if we're currently in a connect or accept downcall. A
+        * listening endpoint should never block here.
+        */
+       wait_event(cm_id_priv->connect_wait,
+                  !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_LISTEN:
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               /* destroy the listening endpoint */
+               ret = cm_id->device->iwcm->destroy_listen(cm_id);
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_ESTABLISHED:
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               /* Abrupt close of the connection */
+               (void)iwcm_modify_qp_err(cm_id_priv->qp);
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_IDLE:
+       case IW_CM_STATE_CLOSING:
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               break;
+       case IW_CM_STATE_CONN_RECV:
+               /*
+                * App called destroy before/without calling accept after
+                * receiving connection request event notification.
+                */
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               break;
+       case IW_CM_STATE_CONN_SENT:
+       case IW_CM_STATE_DESTROYING:
+       default:
+               BUG();
+               break;
+       }
+       if (cm_id_priv->qp) {
+               cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+               cm_id_priv->qp = NULL;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       (void)iwcm_deref_id(cm_id_priv);
+}
+
+/*
+ * This function is only called by the application thread and cannot
+ * be called by the event thread. The function will wait for all
+ * references to be released on the cm_id and then kfree the cm_id
+ * object.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
+
+       destroy_cm_id(cm_id);
+
+       wait_for_completion(&cm_id_priv->destroy_comp);
+
+       dealloc_work_entries(cm_id_priv);
+
+       kfree(cm_id_priv);
+}
+EXPORT_SYMBOL(iw_destroy_cm_id);
+
+/*
+ * CM_ID <-- LISTEN
+ *
+ * Start listening for connect requests. Generates one CONNECT_REQUEST
+ * event for each inbound connect request.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret = 0;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+       ret = alloc_work_entries(cm_id_priv, backlog);
+       if (ret)
+               return ret;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_IDLE:
+               cm_id_priv->state = IW_CM_STATE_LISTEN;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+               if (ret)
+                       cm_id_priv->state = IW_CM_STATE_IDLE;
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_listen);
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * Rejects an inbound connection request. No events are generated.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id,
+                const void *private_data,
+                u8 private_data_len)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+               return -EINVAL;
+       }
+       cm_id_priv->state = IW_CM_STATE_IDLE;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       ret = cm_id->device->iwcm->reject(cm_id, private_data,
+                                         private_data_len);
+
+       clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       wake_up_all(&cm_id_priv->connect_wait);
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_reject);
+
+/*
+ * CM_ID <-- ESTABLISHED
+ *
+ * Accepts an inbound connection request and generates an ESTABLISHED
+ * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
+ * until the ESTABLISHED event is received from the provider.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id,
+                struct iw_cm_conn_param *iw_param)
+{
+       struct iwcm_id_private *cm_id_priv;
+       struct ib_qp *qp;
+       unsigned long flags;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+               return -EINVAL;
+       }
+       /* Get the ib_qp given the QPN */
+       qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+       if (!qp) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               return -EINVAL;
+       }
+       cm_id->device->iwcm->add_ref(qp);
+       cm_id_priv->qp = qp;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+       if (ret) {
+               /* An error on accept precludes provider events */
+               BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               if (cm_id_priv->qp) {
+                       cm_id->device->iwcm->rem_ref(qp);
+                       cm_id_priv->qp = NULL;
+               }
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_accept);
+
+/*
+ * Active Side: CM_ID <-- CONN_SENT
+ *
+ * If successful, results in the generation of a CONNECT_REPLY
+ * event. iw_cm_disconnect and iw_cm_destroy will block until the
+ * CONNECT_REPLY event is received from the provider.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+       struct iwcm_id_private *cm_id_priv;
+       int ret = 0;
+       unsigned long flags;
+       struct ib_qp *qp;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+       ret = alloc_work_entries(cm_id_priv, 4);
+       if (ret)
+               return ret;
+
+       set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+       if (cm_id_priv->state != IW_CM_STATE_IDLE) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+               return -EINVAL;
+       }
+
+       /* Get the ib_qp given the QPN */
+       qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+       if (!qp) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               return -EINVAL;
+       }
+       cm_id->device->iwcm->add_ref(qp);
+       cm_id_priv->qp = qp;
+       cm_id_priv->state = IW_CM_STATE_CONN_SENT;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+       if (ret) {
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               if (cm_id_priv->qp) {
+                       cm_id->device->iwcm->rem_ref(qp);
+                       cm_id_priv->qp = NULL;
+               }
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_connect);
+
+/*
+ * Passive Side: new CM_ID <-- CONN_RECV
+ *
+ * Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent.
+ *
+ * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and
+ * device. These are copied when the device is cloned. The event
+ * contains the new four tuple.
+ *
+ * An error on the child should not affect the parent, so this
+ * function does not return a value.
+ */
+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
+                               struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       struct iw_cm_id *cm_id;
+       struct iwcm_id_private *cm_id_priv;
+       int ret;
+
+       /*
+        * The provider should never generate a connection request
+        * event with a bad status.
+        */
+       BUG_ON(iw_event->status);
+
+       /*
+        * We could be destroying the listening id. If so, ignore this
+        * upcall.
+        */
+       spin_lock_irqsave(&listen_id_priv->lock, flags);
+       if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+               spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+               return;
+       }
+       spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
+       cm_id = iw_create_cm_id(listen_id_priv->id.device,
+                               listen_id_priv->id.cm_handler,
+                               listen_id_priv->id.context);
+       /* If the cm_id could not be created, ignore the request */
+       if (IS_ERR(cm_id))
+               return;
+
+       cm_id->provider_data = iw_event->provider_data;
+       cm_id->local_addr = iw_event->local_addr;
+       cm_id->remote_addr = iw_event->remote_addr;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+
+       ret = alloc_work_entries(cm_id_priv, 3);
+       if (ret) {
+               iw_cm_reject(cm_id, NULL, 0);
+               iw_destroy_cm_id(cm_id);
+               return;
+       }
+
+       /* Call the client CM handler */
+       ret = cm_id->cm_handler(cm_id, iw_event);
+       if (ret) {
+               set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+               destroy_cm_id(cm_id);
+               if (atomic_read(&cm_id_priv->refcount)==0)
+                       kfree(cm_id);
+       }
+
+       if (iw_event->private_data_len)
+               kfree(iw_event->private_data);
+}
+
+/*
+ * Passive Side: CM_ID <-- ESTABLISHED
+ *
+ * The provider generated an ESTABLISHED event which means that
+ * the MPA negotion has completed successfully and we are now in MPA
+ * FPDU mode.
+ *
+ * This event can only be received in the CONN_RECV state. If the
+ * remote peer closed, the ESTABLISHED event would be received followed
+ * by the CLOSE event. If the app closes, it will block until we wake
+ * it up after processing this event.
+ */
+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
+                              struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+       /*
+        * We clear the CONNECT_WAIT bit here to allow the callback
+        * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
+        * from a callback handler is not allowed.
+        */
+       clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+       cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+       wake_up_all(&cm_id_priv->connect_wait);
+
+       return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- ESTABLISHED
+ *
+ * The app has called connect and is waiting for the established event to
+ * post it's requests to the server. This event will wake up anyone
+ * blocked in iw_cm_disconnect or iw_destroy_id.
+ */
+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
+                              struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       /*
+        * Clear the connect wait bit so a callback function calling
+        * iw_cm_disconnect will not wait and deadlock this thread
+        */
+       clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+       if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+               cm_id_priv->id.local_addr = iw_event->local_addr;
+               cm_id_priv->id.remote_addr = iw_event->remote_addr;
+               cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+       } else {
+               /* REJECTED or RESET */
+               cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+               cm_id_priv->qp = NULL;
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+
+       if (iw_event->private_data_len)
+               kfree(iw_event->private_data);
+
+       /* Wake up waiters on connect complete */
+       wake_up_all(&cm_id_priv->connect_wait);
+
+       return ret;
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * If in the ESTABLISHED state, move to CLOSING.
+ */
+static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
+                                 struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
+               cm_id_priv->state = IW_CM_STATE_CLOSING;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * If in the ESTBLISHED or CLOSING states, the QP will have have been
+ * moved by the provider to the ERR state. Disassociate the CM_ID from
+ * the QP,  move to IDLE, and remove the 'connected' reference.
+ *
+ * If in some other state, the cm_id was destroyed asynchronously.
+ * This is the last reference that will result in waking up
+ * the app thread blocked in iw_destroy_cm_id.
+ */
+static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
+                                 struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       int ret = 0;
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+       if (cm_id_priv->qp) {
+               cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+               cm_id_priv->qp = NULL;
+       }
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_ESTABLISHED:
+       case IW_CM_STATE_CLOSING:
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_DESTROYING:
+               break;
+       default:
+               BUG();
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       return ret;
+}
+
+static int process_event(struct iwcm_id_private *cm_id_priv,
+                        struct iw_cm_event *iw_event)
+{
+       int ret = 0;
+
+       switch (iw_event->event) {
+       case IW_CM_EVENT_CONNECT_REQUEST:
+               cm_conn_req_handler(cm_id_priv, iw_event);
+               break;
+       case IW_CM_EVENT_CONNECT_REPLY:
+               ret = cm_conn_rep_handler(cm_id_priv, iw_event);
+               break;
+       case IW_CM_EVENT_ESTABLISHED:
+               ret = cm_conn_est_handler(cm_id_priv, iw_event);
+               break;
+       case IW_CM_EVENT_DISCONNECT:
+               cm_disconnect_handler(cm_id_priv, iw_event);
+               break;
+       case IW_CM_EVENT_CLOSE:
+               ret = cm_close_handler(cm_id_priv, iw_event);
+               break;
+       default:
+               BUG();
+       }
+
+       return ret;
+}
+
+/*
+ * Process events on the work_list for the cm_id. If the callback
+ * function requests that the cm_id be deleted, a flag is set in the
+ * cm_id flags to indicate that when the last reference is
+ * removed, the cm_id is to be destroyed. This is necessary to
+ * distinguish between an object that will be destroyed by the app
+ * thread asleep on the destroy_comp list vs. an object destroyed
+ * here synchronously when the last reference is removed.
+ */
+static void cm_work_handler(void *arg)
+{
+       struct iwcm_work *work = arg, lwork;
+       struct iwcm_id_private *cm_id_priv = work->cm_id;
+       unsigned long flags;
+       int empty;
+       int ret = 0;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       empty = list_empty(&cm_id_priv->work_list);
+       while (!empty) {
+               work = list_entry(cm_id_priv->work_list.next,
+                                 struct iwcm_work, list);
+               list_del_init(&work->list);
+               empty = list_empty(&cm_id_priv->work_list);
+               lwork = *work;
+               put_work(work);
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+               ret = process_event(cm_id_priv, &work->event);
+               if (ret) {
+                       set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+                       destroy_cm_id(&cm_id_priv->id);
+               }
+               BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+               if (iwcm_deref_id(cm_id_priv))
+                       return;
+
+               if (atomic_read(&cm_id_priv->refcount)==0 &&
+                   test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+                       dealloc_work_entries(cm_id_priv);
+                       kfree(cm_id_priv);
+                       return;
+               }
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * This function is called on interrupt context. Schedule events on
+ * the iwcm_wq thread to allow callback functions to downcall into
+ * the CM and/or block.  Events are queued to a per-CM_ID
+ * work_list. If this is the first event on the work_list, the work
+ * element is also queued on the iwcm_wq thread.
+ *
+ * Each event holds a reference on the cm_id. Until the last posted
+ * event has been delivered and processed, the cm_id cannot be
+ * deleted.
+ *
+ * Returns:
+ *           0 - the event was handled.
+ *     -ENOMEM - the event was not handled due to lack of resources.
+ */
+static int cm_event_handler(struct iw_cm_id *cm_id,
+                            struct iw_cm_event *iw_event)
+{
+       struct iwcm_work *work;
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret = 0;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       work = get_work(cm_id_priv);
+       if (!work) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       INIT_WORK(&work->work, cm_work_handler, work);
+       work->cm_id = cm_id_priv;
+       work->event = *iw_event;
+
+       if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
+            work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
+           work->event.private_data_len) {
+               ret = copy_private_data(cm_id_priv, &work->event);
+               if (ret) {
+                       put_work(work);
+                       goto out;
+               }
+       }
+
+       atomic_inc(&cm_id_priv->refcount);
+       if (list_empty(&cm_id_priv->work_list)) {
+               list_add_tail(&work->list, &cm_id_priv->work_list);
+               queue_work(iwcm_wq, &work->work);
+       } else
+               list_add_tail(&work->list, &cm_id_priv->work_list);
+out:
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       return ret;
+}
+
+static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
+                                 struct ib_qp_attr *qp_attr,
+                                 int *qp_attr_mask)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_IDLE:
+       case IW_CM_STATE_CONN_SENT:
+       case IW_CM_STATE_CONN_RECV:
+       case IW_CM_STATE_ESTABLISHED:
+               *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+               qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+                                          IB_ACCESS_REMOTE_WRITE|
+                                          IB_ACCESS_REMOTE_READ;
+               ret = 0;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       return ret;
+}
+
+static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
+                                 struct ib_qp_attr *qp_attr,
+                                 int *qp_attr_mask)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_IDLE:
+       case IW_CM_STATE_CONN_SENT:
+       case IW_CM_STATE_CONN_RECV:
+       case IW_CM_STATE_ESTABLISHED:
+               *qp_attr_mask = 0;
+               ret = 0;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       return ret;
+}
+
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
+                      struct ib_qp_attr *qp_attr,
+                      int *qp_attr_mask)
+{
+       struct iwcm_id_private *cm_id_priv;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       switch (qp_attr->qp_state) {
+       case IB_QPS_INIT:
+       case IB_QPS_RTR:
+               ret = iwcm_init_qp_init_attr(cm_id_priv,
+                                            qp_attr, qp_attr_mask);
+               break;
+       case IB_QPS_RTS:
+               ret = iwcm_init_qp_rts_attr(cm_id_priv,
+                                           qp_attr, qp_attr_mask);
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_init_qp_attr);
+
+static int __init iw_cm_init(void)
+{
+       iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
+       if (!iwcm_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void __exit iw_cm_cleanup(void)
+{
+       destroy_workqueue(iwcm_wq);
+}
+
+module_init(iw_cm_init);
+module_exit(iw_cm_cleanup);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
new file mode 100644 (file)
index 0000000..3f6cc82
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IWCM_H
+#define IWCM_H
+
+enum iw_cm_state {
+       IW_CM_STATE_IDLE,             /* unbound, inactive */
+       IW_CM_STATE_LISTEN,           /* listen waiting for connect */
+       IW_CM_STATE_CONN_RECV,        /* inbound waiting for user accept */
+       IW_CM_STATE_CONN_SENT,        /* outbound waiting for peer accept */
+       IW_CM_STATE_ESTABLISHED,      /* established */
+       IW_CM_STATE_CLOSING,          /* disconnect */
+       IW_CM_STATE_DESTROYING        /* object being deleted */
+};
+
+struct iwcm_id_private {
+       struct iw_cm_id id;
+       enum iw_cm_state state;
+       unsigned long flags;
+       struct ib_qp *qp;
+       struct completion destroy_comp;
+       wait_queue_head_t connect_wait;
+       struct list_head work_list;
+       spinlock_t lock;
+       atomic_t refcount;
+       struct list_head work_free_list;
+};
+
+#define IWCM_F_CALLBACK_DESTROY   1
+#define IWCM_F_CONNECT_WAIT       2
+
+#endif /* IWCM_H */
index 1c3cfbbe6a97fcf3e7e5ce71060ad9b302a53833..082f03c158f006825bba87446b442fc0f726eb60 100644 (file)
@@ -1246,8 +1246,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
        int i;
 
        for (i = 0; i < MAX_MGMT_OUI; i++)
-                /* Is there matching OUI for this vendor class ? */
-                if (!memcmp(vendor_class->oui[i], oui, 3))
+               /* Is there matching OUI for this vendor class ? */
+               if (!memcmp(vendor_class->oui[i], oui, 3))
                        return i;
 
        return -1;
@@ -2237,7 +2237,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
        list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
                                 &mad_agent_priv->send_list, agent_list) {
                if (mad_send_wr->status == IB_WC_SUCCESS) {
-                       mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+                       mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
                        mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
                }
        }
@@ -2528,10 +2528,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                        }
                }
                sg_list.addr = dma_map_single(qp_info->port_priv->
-                                               device->dma_device,
+                                               device->dma_device,
                                              &mad_priv->grh,
                                              sizeof *mad_priv -
-                                               sizeof mad_priv->header,
+                                               sizeof mad_priv->header,
                                              DMA_FROM_DEVICE);
                pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
                recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
@@ -2606,7 +2606,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
        struct ib_qp *qp;
 
        attr = kmalloc(sizeof *attr, GFP_KERNEL);
-       if (!attr) {
+       if (!attr) {
                printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
                return -ENOMEM;
        }
@@ -2876,7 +2876,10 @@ static void ib_mad_init_device(struct ib_device *device)
 {
        int start, end, i;
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                start = 0;
                end   = 0;
        } else {
@@ -2923,7 +2926,7 @@ static void ib_mad_remove_device(struct ib_device *device)
 {
        int i, num_ports, cur_port;
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                num_ports = 1;
                cur_port = 0;
        } else {
index d147f3bad2ce78f819525d5332b88212b0a452be..d06b59083f6ea91f59a2e0698b61b2a322471a39 100644 (file)
@@ -38,8 +38,8 @@
 #define __IB_MAD_PRIV_H__
 
 #include <linux/completion.h>
+#include <linux/err.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
index ebcd5b1817706a29d8a786e751e633facec5493e..1ef79d015a1e32010c392bc6641e0a2dce99bb3f 100644 (file)
@@ -33,8 +33,6 @@
  * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
  */
 
-#include <linux/dma-mapping.h>
-
 #include "mad_priv.h"
 #include "mad_rmpp.h"
 
@@ -60,6 +58,7 @@ struct mad_rmpp_recv {
        int last_ack;
        int seg_num;
        int newwin;
+       int repwin;
 
        __be64 tid;
        u32 src_qp;
@@ -170,6 +169,32 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
        return msg;
 }
 
+static void ack_ds_ack(struct ib_mad_agent_private *agent,
+                      struct ib_mad_recv_wc *recv_wc)
+{
+       struct ib_mad_send_buf *msg;
+       struct ib_rmpp_mad *rmpp_mad;
+       int ret;
+
+       msg = alloc_response_msg(&agent->agent, recv_wc);
+       if (IS_ERR(msg))
+               return;
+
+       rmpp_mad = msg->mad;
+       memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
+
+       rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+       ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+       rmpp_mad->rmpp_hdr.seg_num = 0;
+       rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
+
+       ret = ib_post_send_mad(msg, NULL);
+       if (ret) {
+               ib_destroy_ah(msg->ah);
+               ib_free_send_mad(msg);
+       }
+}
+
 void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
 {
        struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
@@ -271,6 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
        rmpp_recv->newwin = 1;
        rmpp_recv->seg_num = 1;
        rmpp_recv->last_ack = 0;
+       rmpp_recv->repwin = 1;
 
        mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
        rmpp_recv->tid = mad_hdr->tid;
@@ -365,7 +391,7 @@ static inline int window_size(struct ib_mad_agent_private *agent)
 static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
                                                  int seg_num)
 {
-        struct ib_mad_recv_buf *seg_buf;
+       struct ib_mad_recv_buf *seg_buf;
        int cur_seg_num;
 
        list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
@@ -591,6 +617,16 @@ static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
                        break;
 }
 
+static void process_ds_ack(struct ib_mad_agent_private *agent,
+                          struct ib_mad_recv_wc *mad_recv_wc, int newwin)
+{
+       struct mad_rmpp_recv *rmpp_recv;
+
+       rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
+       if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
+               rmpp_recv->repwin = newwin;
+}
+
 static void process_rmpp_ack(struct ib_mad_agent_private *agent,
                             struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -616,8 +652,18 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 
        spin_lock_irqsave(&agent->lock, flags);
        mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
-       if (!mad_send_wr)
-               goto out;       /* Unmatched ACK */
+       if (!mad_send_wr) {
+               if (!seg_num)
+                       process_ds_ack(agent, mad_recv_wc, newwin);
+               goto out;       /* Unmatched or DS RMPP ACK */
+       }
+
+       if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
+           (mad_send_wr->timeout)) {
+               spin_unlock_irqrestore(&agent->lock, flags);
+               ack_ds_ack(agent, mad_recv_wc);
+               return;         /* Repeated ACK for DS RMPP transaction */
+       }
 
        if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
            (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
@@ -656,6 +702,9 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
                if (mad_send_wr->refcount == 1)
                        ib_reset_mad_timeout(mad_send_wr,
                                             mad_send_wr->send_buf.timeout_ms);
+               spin_unlock_irqrestore(&agent->lock, flags);
+               ack_ds_ack(agent, mad_recv_wc);
+               return;
        } else if (mad_send_wr->refcount == 1 &&
                   mad_send_wr->seg_num < mad_send_wr->newwin &&
                   mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
@@ -772,6 +821,39 @@ out:
        return NULL;
 }
 
+static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
+{
+       struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
+       struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
+       struct mad_rmpp_recv *rmpp_recv;
+       struct ib_ah_attr ah_attr;
+       unsigned long flags;
+       int newwin = 1;
+
+       if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
+               goto out;
+
+       spin_lock_irqsave(&agent->lock, flags);
+       list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+               if (rmpp_recv->tid != mad_hdr->tid ||
+                   rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
+                   rmpp_recv->class_version != mad_hdr->class_version ||
+                   (rmpp_recv->method & IB_MGMT_METHOD_RESP))
+                       continue;
+
+               if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+                       continue;
+
+               if (rmpp_recv->slid == ah_attr.dlid) {
+                       newwin = rmpp_recv->repwin;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&agent->lock, flags);
+out:
+       return newwin;
+}
+
 int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
        struct ib_rmpp_mad *rmpp_mad;
@@ -787,7 +869,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
                return IB_RMPP_RESULT_INTERNAL;
        }
 
-       mad_send_wr->newwin = 1;
+       mad_send_wr->newwin = init_newwin(mad_send_wr);
 
        /* We need to wait for the final ACK even if there isn't a response */
        mad_send_wr->refcount += (mad_send_wr->timeout == 0);
index d6b84226bba7b75d29cf441f319e8bbb327f2c6e..1706d3c7e95eaf2f8f8315cfab1897fb940eeaa8 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -75,6 +76,7 @@ struct ib_sa_device {
 struct ib_sa_query {
        void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
        void (*release)(struct ib_sa_query *);
+       struct ib_sa_client    *client;
        struct ib_sa_port      *port;
        struct ib_mad_send_buf *mad_buf;
        struct ib_sa_sm_ah     *sm_ah;
@@ -415,6 +417,31 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
        }
 }
 
+void ib_sa_register_client(struct ib_sa_client *client)
+{
+       atomic_set(&client->users, 1);
+       init_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_register_client);
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+       atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+       if (atomic_dec_and_test(&client->users))
+               complete(&client->comp);
+}
+
+void ib_sa_unregister_client(struct ib_sa_client *client)
+{
+       ib_sa_client_put(client);
+       wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_unregister_client);
+
 /**
  * ib_sa_cancel_query - try to cancel an SA query
  * @id:ID of query to cancel
@@ -557,6 +584,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
 
 /**
  * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:Path Record to send in query
@@ -579,7 +607,8 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
  * error code.  Otherwise it is a query ID that can be used to cancel
  * the query.
  */
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+                      struct ib_device *device, u8 port_num,
                       struct ib_sa_path_rec *rec,
                       ib_sa_comp_mask comp_mask,
                       int timeout_ms, gfp_t gfp_mask,
@@ -614,8 +643,10 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
                goto err1;
        }
 
-       query->callback = callback;
-       query->context  = context;
+       ib_sa_client_get(client);
+       query->sa_query.client = client;
+       query->callback        = callback;
+       query->context         = context;
 
        mad = query->sa_query.mad_buf->mad;
        init_mad(mad, agent);
@@ -639,6 +670,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 
 err2:
        *sa_query = NULL;
+       ib_sa_client_put(query->sa_query.client);
        ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -671,6 +703,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
 
 /**
  * ib_sa_service_rec_query - Start Service Record operation
+ * @client:SA client
  * @device:device to send request on
  * @port_num: port number to send request on
  * @method:SA method - should be get, set, or delete
@@ -695,7 +728,8 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
  * error code.  Otherwise it is a request ID that can be used to cancel
  * the query.
  */
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+                           struct ib_device *device, u8 port_num, u8 method,
                            struct ib_sa_service_rec *rec,
                            ib_sa_comp_mask comp_mask,
                            int timeout_ms, gfp_t gfp_mask,
@@ -735,8 +769,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
                goto err1;
        }
 
-       query->callback = callback;
-       query->context  = context;
+       ib_sa_client_get(client);
+       query->sa_query.client = client;
+       query->callback        = callback;
+       query->context         = context;
 
        mad = query->sa_query.mad_buf->mad;
        init_mad(mad, agent);
@@ -761,6 +797,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
 
 err2:
        *sa_query = NULL;
+       ib_sa_client_put(query->sa_query.client);
        ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -791,7 +828,8 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
        kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+                            struct ib_device *device, u8 port_num,
                             u8 method,
                             struct ib_sa_mcmember_rec *rec,
                             ib_sa_comp_mask comp_mask,
@@ -827,8 +865,10 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
                goto err1;
        }
 
-       query->callback = callback;
-       query->context  = context;
+       ib_sa_client_get(client);
+       query->sa_query.client = client;
+       query->callback        = callback;
+       query->context         = context;
 
        mad = query->sa_query.mad_buf->mad;
        init_mad(mad, agent);
@@ -853,6 +893,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 
 err2:
        *sa_query = NULL;
+       ib_sa_client_put(query->sa_query.client);
        ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -887,8 +928,9 @@ static void send_handler(struct ib_mad_agent *agent,
        idr_remove(&query_idr, query->id);
        spin_unlock_irqrestore(&idr_lock, flags);
 
-        ib_free_send_mad(mad_send_wc->send_buf);
+       ib_free_send_mad(mad_send_wc->send_buf);
        kref_put(&query->sm_ah->ref, free_sm_ah);
+       ib_sa_client_put(query->client);
        query->release(query);
 }
 
@@ -919,7 +961,10 @@ static void ib_sa_add_one(struct ib_device *device)
        struct ib_sa_device *sa_dev;
        int s, e, i;
 
-       if (device->node_type == IB_NODE_SWITCH)
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH)
                s = e = 0;
        else {
                s = 1;
index 35852e794e26533aa61c8c8fe3900c3b5d372eee..54b81e17ad50da39091b32f2fb1d67831c546a48 100644 (file)
@@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 
                /* C14-9:2 */
                if (hop_ptr && hop_ptr < hop_cnt) {
-                       if (node_type != IB_NODE_SWITCH)
+                       if (node_type != RDMA_NODE_IB_SWITCH)
                                return 0;
 
                        /* smp->return_path set when received */
@@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
                if (hop_ptr == hop_cnt) {
                        /* smp->return_path set when received */
                        smp->hop_ptr++;
-                       return (node_type == IB_NODE_SWITCH ||
+                       return (node_type == RDMA_NODE_IB_SWITCH ||
                                smp->dr_dlid == IB_LID_PERMISSIVE);
                }
 
@@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 
                /* C14-13:2 */
                if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-                       if (node_type != IB_NODE_SWITCH)
+                       if (node_type != RDMA_NODE_IB_SWITCH)
                                return 0;
 
                        smp->hop_ptr--;
@@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
                if (hop_ptr == 1) {
                        smp->hop_ptr--;
                        /* C14-13:3 -- SMPs destined for SM shouldn't be here */
-                       return (node_type == IB_NODE_SWITCH ||
+                       return (node_type == RDMA_NODE_IB_SWITCH ||
                                smp->dr_slid == IB_LID_PERMISSIVE);
                }
 
@@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 
                /* C14-9:2 -- intermediate hop */
                if (hop_ptr && hop_ptr < hop_cnt) {
-                       if (node_type != IB_NODE_SWITCH)
+                       if (node_type != RDMA_NODE_IB_SWITCH)
                                return 0;
 
                        smp->return_path[hop_ptr] = port_num;
@@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
                                smp->return_path[hop_ptr] = port_num;
                        /* smp->hop_ptr updated when sending */
 
-                       return (node_type == IB_NODE_SWITCH ||
+                       return (node_type == RDMA_NODE_IB_SWITCH ||
                                smp->dr_dlid == IB_LID_PERMISSIVE);
                }
 
@@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 
                /* C14-13:2 */
                if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-                       if (node_type != IB_NODE_SWITCH)
+                       if (node_type != RDMA_NODE_IB_SWITCH)
                                return 0;
 
                        /* smp->hop_ptr updated when sending */
@@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
                                return 1;
                        }
                        /* smp->hop_ptr updated when sending */
-                       return (node_type == IB_NODE_SWITCH);
+                       return (node_type == RDMA_NODE_IB_SWITCH);
                }
 
                /* C14-13:4 -- hop_ptr = 0 -> give to SM */
index 21f9282c1b25d00dcddef4fd2ccba530ad05f543..709323c14c5d3fff1fba12f50c6290e0a17d6a4a 100644 (file)
@@ -68,7 +68,7 @@ struct port_table_attribute {
        int                     index;
 };
 
-static inline int ibdev_is_alive(const struct ib_device *dev) 
+static inline int ibdev_is_alive(const struct ib_device *dev)
 {
        return dev->reg_state == IB_DEV_REGISTERED;
 }
@@ -589,10 +589,11 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
                return -ENODEV;
 
        switch (dev->node_type) {
-       case IB_NODE_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
-       case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
-       case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
-       default:             return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+       case RDMA_NODE_IB_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
+       case RDMA_NODE_RNIC:      return sprintf(buf, "%d: RNIC\n", dev->node_type);
+       case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+       case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+       default:                  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
        }
 }
 
@@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_device *device)
        if (ret)
                goto err_put;
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                ret = add_port(device, 0);
                if (ret)
                        goto err_put;
index c1c6fda9452cc44f6b44400e00eaa7f5e42ea43a..ad4f4d5c2924044cf9199fbad08406f2a5eae4ef 100644 (file)
@@ -309,9 +309,9 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
                info          = evt->param.apr_rcvd.apr_info;
                break;
        case IB_CM_SIDR_REQ_RECEIVED:
-               uvt->resp.u.sidr_req_resp.pkey = 
+               uvt->resp.u.sidr_req_resp.pkey =
                                        evt->param.sidr_req_rcvd.pkey;
-               uvt->resp.u.sidr_req_resp.port = 
+               uvt->resp.u.sidr_req_resp.port =
                                        evt->param.sidr_req_rcvd.port;
                uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
                break;
@@ -1237,7 +1237,7 @@ static struct class ucm_class = {
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
        struct ib_ucm_device *dev;
-       
+
        dev = container_of(class_dev, struct ib_ucm_device, class_dev);
        return sprintf(buf, "%s\n", dev->ib_dev->name);
 }
@@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_device *device)
 {
        struct ib_ucm_device *ucm_dev;
 
-       if (!device->alloc_ucontext)
+       if (!device->alloc_ucontext ||
+           rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
                return;
 
        ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
index 1273f8807e849a28f7aec5bba0aef5bd87dbba6c..807fbd6b84140b99152aa52f978412827692bc5e 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_device *device)
        struct ib_umad_device *umad_dev;
        int s, e, i;
 
-       if (device->node_type == IB_NODE_SWITCH)
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH)
                s = e = 0;
        else {
                s = 1;
index 30923eb68ec7404ad158d91780704837466ad337..b72c7f69ca906b0e9931a4b5b53a270e58dd765e 100644 (file)
@@ -155,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
 }
 
 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
-                                       struct ib_ucontext *context)
+                                       struct ib_ucontext *context, int nested)
 {
        struct ib_uobject *uobj;
 
@@ -163,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
        if (!uobj)
                return NULL;
 
-       down_read(&uobj->mutex);
+       if (nested)
+               down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
+       else
+               down_read(&uobj->mutex);
        if (!uobj->live) {
                put_uobj_read(uobj);
                return NULL;
@@ -190,17 +193,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
        return uobj;
 }
 
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context)
+static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
+                         int nested)
 {
        struct ib_uobject *uobj;
 
-       uobj = idr_read_uobj(idr, id, context);
+       uobj = idr_read_uobj(idr, id, context, nested);
        return uobj ? uobj->object : NULL;
 }
 
 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
 {
-       return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context);
+       return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
 }
 
 static void put_pd_read(struct ib_pd *pd)
@@ -208,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd)
        put_uobj_read(pd->uobject);
 }
 
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context)
+static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
 {
-       return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context);
+       return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
 }
 
 static void put_cq_read(struct ib_cq *cq)
@@ -220,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq)
 
 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
 {
-       return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context);
+       return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
 }
 
 static void put_ah_read(struct ib_ah *ah)
@@ -230,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah)
 
 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
 {
-       return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context);
+       return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
 }
 
 static void put_qp_read(struct ib_qp *qp)
@@ -240,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp)
 
 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
 {
-       return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context);
+       return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
 }
 
 static void put_srq_read(struct ib_srq *srq)
@@ -837,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 err_copy:
        idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
 
-
 err_free:
        ib_destroy_cq(cq);
 
@@ -867,7 +870,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
                   (unsigned long) cmd.response + sizeof resp,
                   in_len - sizeof cmd, out_len - sizeof resp);
 
-       cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+       cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
        if (!cq)
                return -EINVAL;
 
@@ -875,11 +878,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
        if (ret)
                goto out;
 
-       memset(&resp, 0, sizeof resp);
        resp.cqe = cq->cqe;
 
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
-                        &resp, sizeof resp))
+                        &resp, sizeof resp.cqe))
                ret = -EFAULT;
 
 out:
@@ -894,7 +896,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 {
        struct ib_uverbs_poll_cq       cmd;
        struct ib_uverbs_poll_cq_resp *resp;
-       struct ib_uobject             *uobj;
        struct ib_cq                  *cq;
        struct ib_wc                  *wc;
        int                            ret = 0;
@@ -915,16 +916,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
                goto out_wc;
        }
 
-       uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-       if (!uobj) {
+       cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+       if (!cq) {
                ret = -EINVAL;
                goto out;
        }
-       cq = uobj->object;
 
        resp->count = ib_poll_cq(cq, cmd.ne, wc);
 
-       put_uobj_read(uobj);
+       put_cq_read(cq);
 
        for (i = 0; i < resp->count; i++) {
                resp->wc[i].wr_id          = wc[i].wr_id;
@@ -959,21 +959,19 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
                                int out_len)
 {
        struct ib_uverbs_req_notify_cq cmd;
-       struct ib_uobject             *uobj;
        struct ib_cq                  *cq;
 
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-       if (!uobj)
+       cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+       if (!cq)
                return -EINVAL;
-       cq = uobj->object;
 
        ib_req_notify_cq(cq, cmd.solicited_only ?
                         IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
 
-       put_uobj_read(uobj);
+       put_cq_read(cq);
 
        return in_len;
 }
@@ -1064,9 +1062,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 
        srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
        pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-       scq = idr_read_cq(cmd.send_cq_handle, file->ucontext);
+       scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
        rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
-               scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext);
+               scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
 
        if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
                ret = -EINVAL;
@@ -1274,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
                            int out_len)
 {
        struct ib_uverbs_modify_qp cmd;
+       struct ib_udata            udata;
        struct ib_qp              *qp;
        struct ib_qp_attr         *attr;
        int                        ret;
@@ -1281,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
+       INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+                  out_len);
+
        attr = kmalloc(sizeof *attr, GFP_KERNEL);
        if (!attr)
                return -ENOMEM;
@@ -1337,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
        attr->alt_ah_attr.ah_flags          = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
        attr->alt_ah_attr.port_num          = cmd.alt_dest.port_num;
 
-       ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+       ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
 
        put_qp_read(qp);
 
@@ -1674,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
                                break;
                }
 
-
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp))
                ret = -EFAULT;
@@ -1724,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
                                break;
                }
 
-
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp))
                ret = -EFAULT;
@@ -2055,6 +2055,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
                             int out_len)
 {
        struct ib_uverbs_modify_srq cmd;
+       struct ib_udata             udata;
        struct ib_srq              *srq;
        struct ib_srq_attr          attr;
        int                         ret;
@@ -2062,6 +2063,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
+       INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+                  out_len);
+
        srq = idr_read_srq(cmd.srq_handle, file->ucontext);
        if (!srq)
                return -EINVAL;
@@ -2069,7 +2073,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
        attr.max_wr    = cmd.max_wr;
        attr.srq_limit = cmd.srq_limit;
 
-       ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
+       ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
 
        put_srq_read(srq);
 
index 468999c388033e83f4668f4f43e187f911f7a3ec..8b5dd3649bbf37e8c0679393506a30a46d879e38 100644 (file)
@@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult)
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+       switch (node_type) {
+       case RDMA_NODE_IB_CA:
+       case RDMA_NODE_IB_SWITCH:
+       case RDMA_NODE_IB_ROUTER:
+               return RDMA_TRANSPORT_IB;
+       case RDMA_NODE_RNIC:
+               return RDMA_TRANSPORT_IWARP;
+       default:
+               BUG();
+               return 0;
+       }
+}
+EXPORT_SYMBOL(rdma_node_get_transport);
+
 /* Protection domains */
 
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
@@ -231,7 +248,7 @@ int ib_modify_srq(struct ib_srq *srq,
                  struct ib_srq_attr *srq_attr,
                  enum ib_srq_attr_mask srq_attr_mask)
 {
-       return srq->device->modify_srq(srq, srq_attr, srq_attr_mask);
+       return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_srq);
 
@@ -547,7 +564,7 @@ int ib_modify_qp(struct ib_qp *qp,
                 struct ib_qp_attr *qp_attr,
                 int qp_attr_mask)
 {
-       return qp->device->modify_qp(qp, qp_attr, qp_attr_mask);
+       return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
 
diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild
new file mode 100644 (file)
index 0000000..06964c4
--- /dev/null
@@ -0,0 +1,8 @@
+ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
+
+iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
+       c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig
new file mode 100644 (file)
index 0000000..809cb14
--- /dev/null
@@ -0,0 +1,15 @@
+config INFINIBAND_AMSO1100
+       tristate "Ammasso 1100 HCA support"
+       depends on PCI && INET && INFINIBAND
+       ---help---
+         This is a low-level driver for the Ammasso 1100 host
+         channel adapter (HCA).
+
+config INFINIBAND_AMSO1100_DEBUG
+       bool "Verbose debugging output"
+       depends on INFINIBAND_AMSO1100
+       default n
+       ---help---
+         This option causes the amso1100 driver to produce a bunch of
+         debug messages.  Select this if you are developing the driver
+         or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
new file mode 100644 (file)
index 0000000..9e9120f
--- /dev/null
@@ -0,0 +1,1255 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_provider.h"
+
+MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
+MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
+    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
+
+static int debug = -1;         /* defaults above */
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static int c2_up(struct net_device *netdev);
+static int c2_down(struct net_device *netdev);
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+static void c2_tx_interrupt(struct net_device *netdev);
+static void c2_rx_interrupt(struct net_device *netdev);
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void c2_tx_timeout(struct net_device *netdev);
+static int c2_change_mtu(struct net_device *netdev, int new_mtu);
+static void c2_reset(struct c2_port *c2_port);
+static struct net_device_stats *c2_get_stats(struct net_device *netdev);
+
+static struct pci_device_id c2_pci_table[] = {
+       { PCI_DEVICE(0x18b8, 0xb001) },
+       { 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, c2_pci_table);
+
+static void c2_print_macaddr(struct net_device *netdev)
+{
+       pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
+               "IRQ %u\n", netdev->name,
+               netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+               netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
+               netdev->irq);
+}
+
+static void c2_set_rxbufsize(struct c2_port *c2_port)
+{
+       struct net_device *netdev = c2_port->netdev;
+
+       if (netdev->mtu > RX_BUF_SIZE)
+               c2_port->rx_buf_size =
+                   netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
+                   NET_IP_ALIGN;
+       else
+               c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
+}
+
+/*
+ * Allocate TX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
+                           dma_addr_t base, void __iomem * mmio_txp_ring)
+{
+       struct c2_tx_desc *tx_desc;
+       struct c2_txp_desc __iomem *txp_desc;
+       struct c2_element *elem;
+       int i;
+
+       tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL);
+       if (!tx_ring->start)
+               return -ENOMEM;
+
+       elem = tx_ring->start;
+       tx_desc = vaddr;
+       txp_desc = mmio_txp_ring;
+       for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
+               tx_desc->len = 0;
+               tx_desc->status = 0;
+
+               /* Set TXP_HTXD_UNINIT */
+               __raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+                            (void __iomem *) txp_desc + C2_TXP_ADDR);
+               __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
+               __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+                            (void __iomem *) txp_desc + C2_TXP_FLAGS);
+
+               elem->skb = NULL;
+               elem->ht_desc = tx_desc;
+               elem->hw_desc = txp_desc;
+
+               if (i == tx_ring->count - 1) {
+                       elem->next = tx_ring->start;
+                       tx_desc->next_offset = base;
+               } else {
+                       elem->next = elem + 1;
+                       tx_desc->next_offset =
+                           base + (i + 1) * sizeof(*tx_desc);
+               }
+       }
+
+       tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
+
+       return 0;
+}
+
+/*
+ * Allocate RX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
+                           dma_addr_t base, void __iomem * mmio_rxp_ring)
+{
+       struct c2_rx_desc *rx_desc;
+       struct c2_rxp_desc __iomem *rxp_desc;
+       struct c2_element *elem;
+       int i;
+
+       rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL);
+       if (!rx_ring->start)
+               return -ENOMEM;
+
+       elem = rx_ring->start;
+       rx_desc = vaddr;
+       rxp_desc = mmio_rxp_ring;
+       for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
+               rx_desc->len = 0;
+               rx_desc->status = 0;
+
+               /* Set RXP_HRXD_UNINIT */
+               __raw_writew(cpu_to_be16(RXP_HRXD_OK),
+                      (void __iomem *) rxp_desc + C2_RXP_STATUS);
+               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
+               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
+               __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+                            (void __iomem *) rxp_desc + C2_RXP_ADDR);
+               __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+                            (void __iomem *) rxp_desc + C2_RXP_FLAGS);
+
+               elem->skb = NULL;
+               elem->ht_desc = rx_desc;
+               elem->hw_desc = rxp_desc;
+
+               if (i == rx_ring->count - 1) {
+                       elem->next = rx_ring->start;
+                       rx_desc->next_offset = base;
+               } else {
+                       elem->next = elem + 1;
+                       rx_desc->next_offset =
+                           base + (i + 1) * sizeof(*rx_desc);
+               }
+       }
+
+       rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
+
+       return 0;
+}
+
+/* Setup buffer for receiving */
+static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
+{
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_rx_desc *rx_desc = elem->ht_desc;
+       struct sk_buff *skb;
+       dma_addr_t mapaddr;
+       u32 maplen;
+       struct c2_rxp_hdr *rxp_hdr;
+
+       skb = dev_alloc_skb(c2_port->rx_buf_size);
+       if (unlikely(!skb)) {
+               pr_debug("%s: out of memory for receive\n",
+                       c2_port->netdev->name);
+               return -ENOMEM;
+       }
+
+       /* Zero out the rxp hdr in the sk_buff */
+       memset(skb->data, 0, sizeof(*rxp_hdr));
+
+       skb->dev = c2_port->netdev;
+
+       maplen = c2_port->rx_buf_size;
+       mapaddr =
+           pci_map_single(c2dev->pcidev, skb->data, maplen,
+                          PCI_DMA_FROMDEVICE);
+
+       /* Set the sk_buff RXP_header to RXP_HRXD_READY */
+       rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+       rxp_hdr->flags = RXP_HRXD_READY;
+
+       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+       __raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
+                    elem->hw_desc + C2_RXP_LEN);
+       __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
+       __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+       elem->skb = skb;
+       elem->mapaddr = mapaddr;
+       elem->maplen = maplen;
+       rx_desc->len = maplen;
+
+       return 0;
+}
+
+/*
+ * Allocate buffers for the Rx ring
+ * For receive:  rx_ring.to_clean is next received frame
+ */
+static int c2_rx_fill(struct c2_port *c2_port)
+{
+       struct c2_ring *rx_ring = &c2_port->rx_ring;
+       struct c2_element *elem;
+       int ret = 0;
+
+       elem = rx_ring->start;
+       do {
+               if (c2_rx_alloc(c2_port, elem)) {
+                       ret = 1;
+                       break;
+               }
+       } while ((elem = elem->next) != rx_ring->start);
+
+       rx_ring->to_clean = rx_ring->start;
+       return ret;
+}
+
+/* Free all buffers in RX ring, assumes receiver stopped */
+static void c2_rx_clean(struct c2_port *c2_port)
+{
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_ring *rx_ring = &c2_port->rx_ring;
+       struct c2_element *elem;
+       struct c2_rx_desc *rx_desc;
+
+       elem = rx_ring->start;
+       do {
+               rx_desc = elem->ht_desc;
+               rx_desc->len = 0;
+
+               __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+               __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+               __raw_writew(0, elem->hw_desc + C2_RXP_LEN);
+               __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+                            elem->hw_desc + C2_RXP_ADDR);
+               __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+                            elem->hw_desc + C2_RXP_FLAGS);
+
+               if (elem->skb) {
+                       pci_unmap_single(c2dev->pcidev, elem->mapaddr,
+                                        elem->maplen, PCI_DMA_FROMDEVICE);
+                       dev_kfree_skb(elem->skb);
+                       elem->skb = NULL;
+               }
+       } while ((elem = elem->next) != rx_ring->start);
+}
+
+static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
+{
+       struct c2_tx_desc *tx_desc = elem->ht_desc;
+
+       tx_desc->len = 0;
+
+       pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
+                        PCI_DMA_TODEVICE);
+
+       if (elem->skb) {
+               dev_kfree_skb_any(elem->skb);
+               elem->skb = NULL;
+       }
+
+       return 0;
+}
+
+/* Free all buffers in TX ring, assumes transmitter stopped */
+static void c2_tx_clean(struct c2_port *c2_port)
+{
+       struct c2_ring *tx_ring = &c2_port->tx_ring;
+       struct c2_element *elem;
+       struct c2_txp_desc txp_htxd;
+       int retry;
+       unsigned long flags;
+
+       spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+       elem = tx_ring->start;
+
+       do {
+               retry = 0;
+               do {
+                       txp_htxd.flags =
+                           readw(elem->hw_desc + C2_TXP_FLAGS);
+
+                       if (txp_htxd.flags == TXP_HTXD_READY) {
+                               retry = 1;
+                               __raw_writew(0,
+                                            elem->hw_desc + C2_TXP_LEN);
+                               __raw_writeq(0,
+                                            elem->hw_desc + C2_TXP_ADDR);
+                               __raw_writew(cpu_to_be16(TXP_HTXD_DONE),
+                                            elem->hw_desc + C2_TXP_FLAGS);
+                               c2_port->netstats.tx_dropped++;
+                               break;
+                       } else {
+                               __raw_writew(0,
+                                            elem->hw_desc + C2_TXP_LEN);
+                               __raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+                                            elem->hw_desc + C2_TXP_ADDR);
+                               __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+                                            elem->hw_desc + C2_TXP_FLAGS);
+                       }
+
+                       c2_tx_free(c2_port->c2dev, elem);
+
+               } while ((elem = elem->next) != tx_ring->start);
+       } while (retry);
+
+       c2_port->tx_avail = c2_port->tx_ring.count - 1;
+       c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
+
+       if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+               netif_wake_queue(c2_port->netdev);
+
+       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+}
+
+/*
+ * Process transmit descriptors marked 'DONE' by the firmware,
+ * freeing up their unneeded sk_buffs.
+ */
+static void c2_tx_interrupt(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_ring *tx_ring = &c2_port->tx_ring;
+       struct c2_element *elem;
+       struct c2_txp_desc txp_htxd;
+
+       spin_lock(&c2_port->tx_lock);
+
+       for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
+            elem = elem->next) {
+               txp_htxd.flags =
+                   be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS));
+
+               if (txp_htxd.flags != TXP_HTXD_DONE)
+                       break;
+
+               if (netif_msg_tx_done(c2_port)) {
+                       /* PCI reads are expensive in fast path */
+                       txp_htxd.len =
+                           be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN));
+                       pr_debug("%s: tx done slot %3Zu status 0x%x len "
+                               "%5u bytes\n",
+                               netdev->name, elem - tx_ring->start,
+                               txp_htxd.flags, txp_htxd.len);
+               }
+
+               c2_tx_free(c2dev, elem);
+               ++(c2_port->tx_avail);
+       }
+
+       tx_ring->to_clean = elem;
+
+       if (netif_queue_stopped(netdev)
+           && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+               netif_wake_queue(netdev);
+
+       spin_unlock(&c2_port->tx_lock);
+}
+
+static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
+{
+       struct c2_rx_desc *rx_desc = elem->ht_desc;
+       struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+
+       if (rxp_hdr->status != RXP_HRXD_OK ||
+           rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
+               pr_debug("BAD RXP_HRXD\n");
+               pr_debug("  rx_desc : %p\n", rx_desc);
+               pr_debug("    index : %Zu\n",
+                       elem - c2_port->rx_ring.start);
+               pr_debug("    len   : %u\n", rx_desc->len);
+               pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
+                       (void *) __pa((unsigned long) rxp_hdr));
+               pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
+               pr_debug("    status: 0x%x\n", rxp_hdr->status);
+               pr_debug("    len   : %u\n", rxp_hdr->len);
+               pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
+       }
+
+       /* Setup the skb for reuse since we're dropping this pkt */
+       elem->skb->tail = elem->skb->data = elem->skb->head;
+
+       /* Zero out the rxp hdr in the sk_buff */
+       memset(elem->skb->data, 0, sizeof(*rxp_hdr));
+
+       /* Write the descriptor to the adapter's rx ring */
+       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+       __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+       __raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
+                    elem->hw_desc + C2_RXP_LEN);
+       __raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR);
+       __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+       pr_debug("packet dropped\n");
+       c2_port->netstats.rx_dropped++;
+}
+
+static void c2_rx_interrupt(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_ring *rx_ring = &c2_port->rx_ring;
+       struct c2_element *elem;
+       struct c2_rx_desc *rx_desc;
+       struct c2_rxp_hdr *rxp_hdr;
+       struct sk_buff *skb;
+       dma_addr_t mapaddr;
+       u32 maplen, buflen;
+       unsigned long flags;
+
+       spin_lock_irqsave(&c2dev->lock, flags);
+
+       /* Begin where we left off */
+       rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
+
+       for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
+            elem = elem->next) {
+               rx_desc = elem->ht_desc;
+               mapaddr = elem->mapaddr;
+               maplen = elem->maplen;
+               skb = elem->skb;
+               rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+
+               if (rxp_hdr->flags != RXP_HRXD_DONE)
+                       break;
+               buflen = rxp_hdr->len;
+
+               /* Sanity check the RXP header */
+               if (rxp_hdr->status != RXP_HRXD_OK ||
+                   buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
+                       c2_rx_error(c2_port, elem);
+                       continue;
+               }
+
+               /*
+                * Allocate and map a new skb for replenishing the host
+                * RX desc
+                */
+               if (c2_rx_alloc(c2_port, elem)) {
+                       c2_rx_error(c2_port, elem);
+                       continue;
+               }
+
+               /* Unmap the old skb */
+               pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
+                                PCI_DMA_FROMDEVICE);
+
+               prefetch(skb->data);
+
+               /*
+                * Skip past the leading 8 bytes comprising of the
+                * "struct c2_rxp_hdr", prepended by the adapter
+                * to the usual Ethernet header ("struct ethhdr"),
+                * to the start of the raw Ethernet packet.
+                *
+                * Fix up the various fields in the sk_buff before
+                * passing it up to netif_rx(). The transfer size
+                * (in bytes) specified by the adapter len field of
+                * the "struct rxp_hdr_t" does NOT include the
+                * "sizeof(struct c2_rxp_hdr)".
+                */
+               skb->data += sizeof(*rxp_hdr);
+               skb->tail = skb->data + buflen;
+               skb->len = buflen;
+               skb->dev = netdev;
+               skb->protocol = eth_type_trans(skb, netdev);
+
+               netif_rx(skb);
+
+               netdev->last_rx = jiffies;
+               c2_port->netstats.rx_packets++;
+               c2_port->netstats.rx_bytes += buflen;
+       }
+
+       /* Save where we left off */
+       rx_ring->to_clean = elem;
+       c2dev->cur_rx = elem - rx_ring->start;
+       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+       spin_unlock_irqrestore(&c2dev->lock, flags);
+}
+
+/*
+ * Handle netisr0 TX & RX interrupts.
+ */
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+       unsigned int netisr0, dmaisr;
+       int handled = 0;
+       struct c2_dev *c2dev = (struct c2_dev *) dev_id;
+
+       /* Process CCILNET interrupts */
+       netisr0 = readl(c2dev->regs + C2_NISR0);
+       if (netisr0) {
+
+               /*
+                * There is an issue with the firmware that always
+                * provides the status of RX for both TX & RX
+                * interrupts.  So process both queues here.
+                */
+               c2_rx_interrupt(c2dev->netdev);
+               c2_tx_interrupt(c2dev->netdev);
+
+               /* Clear the interrupt */
+               writel(netisr0, c2dev->regs + C2_NISR0);
+               handled++;
+       }
+
+       /* Process RNIC interrupts */
+       dmaisr = readl(c2dev->regs + C2_DISR);
+       if (dmaisr) {
+               writel(dmaisr, c2dev->regs + C2_DISR);
+               c2_rnic_interrupt(c2dev);
+               handled++;
+       }
+
+       if (handled) {
+               return IRQ_HANDLED;
+       } else {
+               return IRQ_NONE;
+       }
+}
+
+static int c2_up(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_element *elem;
+       struct c2_rxp_hdr *rxp_hdr;
+       struct in_device *in_dev;
+       size_t rx_size, tx_size;
+       int ret, i;
+       unsigned int netimr0;
+
+       if (netif_msg_ifup(c2_port))
+               pr_debug("%s: enabling interface\n", netdev->name);
+
+       /* Set the Rx buffer size based on MTU */
+       c2_set_rxbufsize(c2_port);
+
+       /* Allocate DMA'able memory for Tx/Rx host descriptor rings */
+       rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
+       tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
+
+       c2_port->mem_size = tx_size + rx_size;
+       c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
+                                           &c2_port->dma);
+       if (c2_port->mem == NULL) {
+               pr_debug("Unable to allocate memory for "
+                       "host descriptor rings\n");
+               return -ENOMEM;
+       }
+
+       memset(c2_port->mem, 0, c2_port->mem_size);
+
+       /* Create the Rx host descriptor ring */
+       if ((ret =
+            c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
+                             c2dev->mmio_rxp_ring))) {
+               pr_debug("Unable to create RX ring\n");
+               goto bail0;
+       }
+
+       /* Allocate Rx buffers for the host descriptor ring */
+       if (c2_rx_fill(c2_port)) {
+               pr_debug("Unable to fill RX ring\n");
+               goto bail1;
+       }
+
+       /* Create the Tx host descriptor ring */
+       if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
+                                   c2_port->dma + rx_size,
+                                   c2dev->mmio_txp_ring))) {
+               pr_debug("Unable to create TX ring\n");
+               goto bail1;
+       }
+
+       /* Set the TX pointer to where we left off */
+       c2_port->tx_avail = c2_port->tx_ring.count - 1;
+       c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
+           c2_port->tx_ring.start + c2dev->cur_tx;
+
+       /* missing: Initialize MAC */
+
+       BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
+
+       /* Reset the adapter, ensures the driver is in sync with the RXP */
+       c2_reset(c2_port);
+
+       /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
+       for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
+            i++, elem++) {
+               rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+               rxp_hdr->flags = 0;
+               __raw_writew(cpu_to_be16(RXP_HRXD_READY),
+                            elem->hw_desc + C2_RXP_FLAGS);
+       }
+
+       /* Enable network packets */
+       netif_start_queue(netdev);
+
+       /* Enable IRQ */
+       writel(0, c2dev->regs + C2_IDIS);
+       netimr0 = readl(c2dev->regs + C2_NIMR0);
+       netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
+       writel(netimr0, c2dev->regs + C2_NIMR0);
+
+       /* Tell the stack to ignore arp requests for ipaddrs bound to
+        * other interfaces.  This is needed to prevent the host stack
+        * from responding to arp requests to the ipaddr bound on the
+        * rdma interface.
+        */
+       in_dev = in_dev_get(netdev);
+       in_dev->cnf.arp_ignore = 1;
+       in_dev_put(in_dev);
+
+       return 0;
+
+      bail1:
+       c2_rx_clean(c2_port);
+       kfree(c2_port->rx_ring.start);
+
+      bail0:
+       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+                           c2_port->dma);
+
+       return ret;
+}
+
+static int c2_down(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+
+       if (netif_msg_ifdown(c2_port))
+               pr_debug("%s: disabling interface\n",
+                       netdev->name);
+
+       /* Wait for all the queued packets to get sent */
+       c2_tx_interrupt(netdev);
+
+       /* Disable network packets */
+       netif_stop_queue(netdev);
+
+       /* Disable IRQs by clearing the interrupt mask */
+       writel(1, c2dev->regs + C2_IDIS);
+       writel(0, c2dev->regs + C2_NIMR0);
+
+       /* missing: Stop transmitter */
+
+       /* missing: Stop receiver */
+
+       /* Reset the adapter, ensures the driver is in sync with the RXP */
+       c2_reset(c2_port);
+
+       /* missing: Turn off LEDs here */
+
+       /* Free all buffers in the host descriptor rings */
+       c2_tx_clean(c2_port);
+       c2_rx_clean(c2_port);
+
+       /* Free the host descriptor rings */
+       kfree(c2_port->rx_ring.start);
+       kfree(c2_port->tx_ring.start);
+       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+                           c2_port->dma);
+
+       return 0;
+}
+
+static void c2_reset(struct c2_port *c2_port)
+{
+       struct c2_dev *c2dev = c2_port->c2dev;
+       unsigned int cur_rx = c2dev->cur_rx;
+
+       /* Tell the hardware to quiesce */
+       C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
+
+       /*
+        * The hardware will reset the C2_PCI_HRX_QUI bit once
+        * the RXP is quiesced.  Wait 2 seconds for this.
+        */
+       ssleep(2);
+
+       cur_rx = C2_GET_CUR_RX(c2dev);
+
+       if (cur_rx & C2_PCI_HRX_QUI)
+               pr_debug("c2_reset: failed to quiesce the hardware!\n");
+
+       cur_rx &= ~C2_PCI_HRX_QUI;
+
+       c2dev->cur_rx = cur_rx;
+
+       pr_debug("Current RX: %u\n", c2dev->cur_rx);
+}
+
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_ring *tx_ring = &c2_port->tx_ring;
+       struct c2_element *elem;
+       dma_addr_t mapaddr;
+       u32 maplen;
+       unsigned long flags;
+       unsigned int i;
+
+       spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+       if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
+               netif_stop_queue(netdev);
+               spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+               pr_debug("%s: Tx ring full when queue awake!\n",
+                       netdev->name);
+               return NETDEV_TX_BUSY;
+       }
+
+       maplen = skb_headlen(skb);
+       mapaddr =
+           pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
+
+       elem = tx_ring->to_use;
+       elem->skb = skb;
+       elem->mapaddr = mapaddr;
+       elem->maplen = maplen;
+
+       /* Tell HW to xmit */
+       __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR);
+       __raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN);
+       __raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS);
+
+       c2_port->netstats.tx_packets++;
+       c2_port->netstats.tx_bytes += maplen;
+
+       /* Loop thru additional data fragments and queue them */
+       if (skb_shinfo(skb)->nr_frags) {
+               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+                       maplen = frag->size;
+                       mapaddr =
+                           pci_map_page(c2dev->pcidev, frag->page,
+                                        frag->page_offset, maplen,
+                                        PCI_DMA_TODEVICE);
+
+                       elem = elem->next;
+                       elem->skb = NULL;
+                       elem->mapaddr = mapaddr;
+                       elem->maplen = maplen;
+
+                       /* Tell HW to xmit */
+                       __raw_writeq(cpu_to_be64(mapaddr),
+                                    elem->hw_desc + C2_TXP_ADDR);
+                       __raw_writew(cpu_to_be16(maplen),
+                                    elem->hw_desc + C2_TXP_LEN);
+                       __raw_writew(cpu_to_be16(TXP_HTXD_READY),
+                                    elem->hw_desc + C2_TXP_FLAGS);
+
+                       c2_port->netstats.tx_packets++;
+                       c2_port->netstats.tx_bytes += maplen;
+               }
+       }
+
+       tx_ring->to_use = elem->next;
+       c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
+
+       if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
+               netif_stop_queue(netdev);
+               if (netif_msg_tx_queued(c2_port))
+                       pr_debug("%s: transmit queue full\n",
+                               netdev->name);
+       }
+
+       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+       netdev->trans_start = jiffies;
+
+       return NETDEV_TX_OK;
+}
+
+static struct net_device_stats *c2_get_stats(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+
+       return &c2_port->netstats;
+}
+
+static void c2_tx_timeout(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+
+       if (netif_msg_timer(c2_port))
+               pr_debug("%s: tx timeout\n", netdev->name);
+
+       c2_tx_clean(c2_port);
+}
+
+static int c2_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       int ret = 0;
+
+       if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+               return -EINVAL;
+
+       netdev->mtu = new_mtu;
+
+       if (netif_running(netdev)) {
+               c2_down(netdev);
+
+               c2_up(netdev);
+       }
+
+       return ret;
+}
+
+/* Initialize network device */
+static struct net_device *c2_devinit(struct c2_dev *c2dev,
+                                    void __iomem * mmio_addr)
+{
+       struct c2_port *c2_port = NULL;
+       struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
+
+       if (!netdev) {
+               pr_debug("c2_port etherdev alloc failed");
+               return NULL;
+       }
+
+       SET_MODULE_OWNER(netdev);
+       SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+       netdev->open = c2_up;
+       netdev->stop = c2_down;
+       netdev->hard_start_xmit = c2_xmit_frame;
+       netdev->get_stats = c2_get_stats;
+       netdev->tx_timeout = c2_tx_timeout;
+       netdev->change_mtu = c2_change_mtu;
+       netdev->watchdog_timeo = C2_TX_TIMEOUT;
+       netdev->irq = c2dev->pcidev->irq;
+
+       c2_port = netdev_priv(netdev);
+       c2_port->netdev = netdev;
+       c2_port->c2dev = c2dev;
+       c2_port->msg_enable = netif_msg_init(debug, default_msg);
+       c2_port->tx_ring.count = C2_NUM_TX_DESC;
+       c2_port->rx_ring.count = C2_NUM_RX_DESC;
+
+       spin_lock_init(&c2_port->tx_lock);
+
+       /* Copy our 48-bit ethernet hardware address */
+       memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
+
+       /* Validate the MAC address */
+       if (!is_valid_ether_addr(netdev->dev_addr)) {
+               pr_debug("Invalid MAC Address\n");
+               c2_print_macaddr(netdev);
+               free_netdev(netdev);
+               return NULL;
+       }
+
+       c2dev->netdev = netdev;
+
+       return netdev;
+}
+
+static int __devinit c2_probe(struct pci_dev *pcidev,
+                             const struct pci_device_id *ent)
+{
+       int ret = 0, i;
+       unsigned long reg0_start, reg0_flags, reg0_len;
+       unsigned long reg2_start, reg2_flags, reg2_len;
+       unsigned long reg4_start, reg4_flags, reg4_len;
+       unsigned kva_map_size;
+       struct net_device *netdev = NULL;
+       struct c2_dev *c2dev = NULL;
+       void __iomem *mmio_regs = NULL;
+
+       printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
+               DRV_VERSION);
+
+       /* Enable PCI device */
+       ret = pci_enable_device(pcidev);
+       if (ret) {
+               printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
+                       pci_name(pcidev));
+               goto bail0;
+       }
+
+       reg0_start = pci_resource_start(pcidev, BAR_0);
+       reg0_len = pci_resource_len(pcidev, BAR_0);
+       reg0_flags = pci_resource_flags(pcidev, BAR_0);
+
+       reg2_start = pci_resource_start(pcidev, BAR_2);
+       reg2_len = pci_resource_len(pcidev, BAR_2);
+       reg2_flags = pci_resource_flags(pcidev, BAR_2);
+
+       reg4_start = pci_resource_start(pcidev, BAR_4);
+       reg4_len = pci_resource_len(pcidev, BAR_4);
+       reg4_flags = pci_resource_flags(pcidev, BAR_4);
+
+       pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
+       pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
+       pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
+
+       /* Make sure PCI base addr are MMIO */
+       if (!(reg0_flags & IORESOURCE_MEM) ||
+           !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
+               printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+               ret = -ENODEV;
+               goto bail1;
+       }
+
+       /* Check for weird/broken PCI region reporting */
+       if ((reg0_len < C2_REG0_SIZE) ||
+           (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
+               printk(KERN_ERR PFX "Invalid PCI region sizes\n");
+               ret = -ENODEV;
+               goto bail1;
+       }
+
+       /* Reserve PCI I/O and memory resources */
+       ret = pci_request_regions(pcidev, DRV_NAME);
+       if (ret) {
+               printk(KERN_ERR PFX "%s: Unable to request regions\n",
+                       pci_name(pcidev));
+               goto bail1;
+       }
+
+       if ((sizeof(dma_addr_t) > 4)) {
+               ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+               if (ret < 0) {
+                       printk(KERN_ERR PFX "64b DMA configuration failed\n");
+                       goto bail2;
+               }
+       } else {
+               ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+               if (ret < 0) {
+                       printk(KERN_ERR PFX "32b DMA configuration failed\n");
+                       goto bail2;
+               }
+       }
+
+       /* Enables bus-mastering on the device */
+       pci_set_master(pcidev);
+
+       /* Remap the adapter PCI registers in BAR4 */
+       mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+                                   sizeof(struct c2_adapter_pci_regs));
+       if (mmio_regs == 0UL) {
+               printk(KERN_ERR PFX
+                       "Unable to remap adapter PCI registers in BAR4\n");
+               ret = -EIO;
+               goto bail2;
+       }
+
+       /* Validate PCI regs magic */
+       for (i = 0; i < sizeof(c2_magic); i++) {
+               if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
+                       printk(KERN_ERR PFX "Downlevel Firmware boot loader "
+                               "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
+                              "utility to update your boot loader\n",
+                               i + 1, sizeof(c2_magic),
+                               readb(mmio_regs + C2_REGS_MAGIC + i),
+                               c2_magic[i]);
+                       printk(KERN_ERR PFX "Adapter not claimed\n");
+                       iounmap(mmio_regs);
+                       ret = -EIO;
+                       goto bail2;
+               }
+       }
+
+       /* Validate the adapter version */
+       if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
+               printk(KERN_ERR PFX "Version mismatch "
+                       "[fw=%u, c2=%u], Adapter not claimed\n",
+                       be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)),
+                       C2_VERSION);
+               ret = -EINVAL;
+               iounmap(mmio_regs);
+               goto bail2;
+       }
+
+       /* Validate the adapter IVN */
+       if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
+               printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
+                      "the OpenIB device support kit. "
+                      "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
+                       be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)),
+                       C2_IVN);
+               ret = -EINVAL;
+               iounmap(mmio_regs);
+               goto bail2;
+       }
+
+       /* Allocate hardware structure */
+       c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
+       if (!c2dev) {
+               printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
+                       pci_name(pcidev));
+               ret = -ENOMEM;
+               iounmap(mmio_regs);
+               goto bail2;
+       }
+
+       memset(c2dev, 0, sizeof(*c2dev));
+       spin_lock_init(&c2dev->lock);
+       c2dev->pcidev = pcidev;
+       c2dev->cur_tx = 0;
+
+       /* Get the last RX index */
+       c2dev->cur_rx =
+           (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) -
+            0xffffc000) / sizeof(struct c2_rxp_desc);
+
+       /* Request an interrupt line for the driver */
+       ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev);
+       if (ret) {
+               printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
+                       pci_name(pcidev), pcidev->irq);
+               iounmap(mmio_regs);
+               goto bail3;
+       }
+
+       /* Set driver specific data */
+       pci_set_drvdata(pcidev, c2dev);
+
+       /* Initialize network device */
+       if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+               iounmap(mmio_regs);
+               goto bail4;
+       }
+
+       /* Save off the actual size prior to unmapping mmio_regs */
+       kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE));
+
+       /* Unmap the adapter PCI registers in BAR4 */
+       iounmap(mmio_regs);
+
+       /* Register network device */
+       ret = register_netdev(netdev);
+       if (ret) {
+               printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
+                       ret);
+               goto bail5;
+       }
+
+       /* Disable network packets */
+       netif_stop_queue(netdev);
+
+       /* Remap the adapter HRXDQ PA space to kernel VA space */
+       c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
+                                              C2_RXP_HRXDQ_SIZE);
+       if (c2dev->mmio_rxp_ring == 0UL) {
+               printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
+               ret = -EIO;
+               goto bail6;
+       }
+
+       /* Remap the adapter HTXDQ PA space to kernel VA space */
+       c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
+                                              C2_TXP_HTXDQ_SIZE);
+       if (c2dev->mmio_txp_ring == 0UL) {
+               printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
+               ret = -EIO;
+               goto bail7;
+       }
+
+       /* Save off the current RX index in the last 4 bytes of the TXP Ring */
+       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+       /* Remap the PCI registers in adapter BAR0 to kernel VA space */
+       c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
+       if (c2dev->regs == 0UL) {
+               printk(KERN_ERR PFX "Unable to remap BAR0\n");
+               ret = -EIO;
+               goto bail8;
+       }
+
+       /* Remap the PCI registers in adapter BAR4 to kernel VA space */
+       c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
+       c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+                                    kva_map_size);
+       if (c2dev->kva == 0UL) {
+               printk(KERN_ERR PFX "Unable to remap BAR4\n");
+               ret = -EIO;
+               goto bail9;
+       }
+
+       /* Print out the MAC address */
+       c2_print_macaddr(netdev);
+
+       ret = c2_rnic_init(c2dev);
+       if (ret) {
+               printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
+               goto bail10;
+       }
+
+       c2_register_device(c2dev);
+
+       return 0;
+
+ bail10:
+       iounmap(c2dev->kva);
+
+ bail9:
+       iounmap(c2dev->regs);
+
+ bail8:
+       iounmap(c2dev->mmio_txp_ring);
+
+ bail7:
+       iounmap(c2dev->mmio_rxp_ring);
+
+ bail6:
+       unregister_netdev(netdev);
+
+ bail5:
+       free_netdev(netdev);
+
+ bail4:
+       free_irq(pcidev->irq, c2dev);
+
+ bail3:
+       ib_dealloc_device(&c2dev->ibdev);
+
+ bail2:
+       pci_release_regions(pcidev);
+
+ bail1:
+       pci_disable_device(pcidev);
+
+ bail0:
+       return ret;
+}
+
+static void __devexit c2_remove(struct pci_dev *pcidev)
+{
+       struct c2_dev *c2dev = pci_get_drvdata(pcidev);
+       struct net_device *netdev = c2dev->netdev;
+
+       /* Unregister with OpenIB */
+       c2_unregister_device(c2dev);
+
+       /* Clean up the RNIC resources */
+       c2_rnic_term(c2dev);
+
+       /* Remove network device from the kernel */
+       unregister_netdev(netdev);
+
+       /* Free network device */
+       free_netdev(netdev);
+
+       /* Free the interrupt line */
+       free_irq(pcidev->irq, c2dev);
+
+       /* missing: Turn LEDs off here */
+
+       /* Unmap adapter PA space */
+       iounmap(c2dev->kva);
+       iounmap(c2dev->regs);
+       iounmap(c2dev->mmio_txp_ring);
+       iounmap(c2dev->mmio_rxp_ring);
+
+       /* Free the hardware structure */
+       ib_dealloc_device(&c2dev->ibdev);
+
+       /* Release reserved PCI I/O and memory resources */
+       pci_release_regions(pcidev);
+
+       /* Disable PCI device */
+       pci_disable_device(pcidev);
+
+       /* Clear driver specific data */
+       pci_set_drvdata(pcidev, NULL);
+}
+
+static struct pci_driver c2_pci_driver = {
+       .name = DRV_NAME,
+       .id_table = c2_pci_table,
+       .probe = c2_probe,
+       .remove = __devexit_p(c2_remove),
+};
+
+static int __init c2_init_module(void)
+{
+       return pci_module_init(&c2_pci_driver);
+}
+
+static void __exit c2_exit_module(void)
+{
+       pci_unregister_driver(&c2_pci_driver);
+}
+
+module_init(c2_init_module);
+module_exit(c2_exit_module);
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
new file mode 100644 (file)
index 0000000..1b17dcd
--- /dev/null
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __C2_H
+#define __C2_H
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <asm/semaphore.h>
+
+#include "c2_provider.h"
+#include "c2_mq.h"
+#include "c2_status.h"
+
+#define DRV_NAME     "c2"
+#define DRV_VERSION  "1.1"
+#define PFX          DRV_NAME ": "
+
+#define BAR_0                0
+#define BAR_2                2
+#define BAR_4                4
+
+#define RX_BUF_SIZE         (1536 + 8)
+#define ETH_JUMBO_MTU        9000
+#define C2_MAGIC            "CEPHEUS"
+#define C2_VERSION           4
+#define C2_IVN              (18 & 0x7fffffff)
+
+#define C2_REG0_SIZE        (16 * 1024)
+#define C2_REG2_SIZE        (2 * 1024 * 1024)
+#define C2_REG4_SIZE        (256 * 1024 * 1024)
+#define C2_NUM_TX_DESC       341
+#define C2_NUM_RX_DESC       256
+#define C2_PCI_REGS_OFFSET  (0x10000)
+#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
+#define C2_RXP_HRXDQ_SIZE   (4096)
+#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
+#define C2_TXP_HTXDQ_SIZE   (4096)
+#define C2_TX_TIMEOUT      (6*HZ)
+
+/* CEPHEUS */
+static const u8 c2_magic[] = {
+       0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
+};
+
+enum adapter_pci_regs {
+       C2_REGS_MAGIC = 0x0000,
+       C2_REGS_VERS = 0x0008,
+       C2_REGS_IVN = 0x000C,
+       C2_REGS_PCI_WINSIZE = 0x0010,
+       C2_REGS_Q0_QSIZE = 0x0014,
+       C2_REGS_Q0_MSGSIZE = 0x0018,
+       C2_REGS_Q0_POOLSTART = 0x001C,
+       C2_REGS_Q0_SHARED = 0x0020,
+       C2_REGS_Q1_QSIZE = 0x0024,
+       C2_REGS_Q1_MSGSIZE = 0x0028,
+       C2_REGS_Q1_SHARED = 0x0030,
+       C2_REGS_Q2_QSIZE = 0x0034,
+       C2_REGS_Q2_MSGSIZE = 0x0038,
+       C2_REGS_Q2_SHARED = 0x0040,
+       C2_REGS_ENADDR = 0x004C,
+       C2_REGS_RDMA_ENADDR = 0x0054,
+       C2_REGS_HRX_CUR = 0x006C,
+};
+
+struct c2_adapter_pci_regs {
+       char reg_magic[8];
+       u32 version;
+       u32 ivn;
+       u32 pci_window_size;
+       u32 q0_q_size;
+       u32 q0_msg_size;
+       u32 q0_pool_start;
+       u32 q0_shared;
+       u32 q1_q_size;
+       u32 q1_msg_size;
+       u32 q1_pool_start;
+       u32 q1_shared;
+       u32 q2_q_size;
+       u32 q2_msg_size;
+       u32 q2_pool_start;
+       u32 q2_shared;
+       u32 log_start;
+       u32 log_size;
+       u8 host_enaddr[8];
+       u8 rdma_enaddr[8];
+       u32 crash_entry;
+       u32 crash_ready[2];
+       u32 fw_txd_cur;
+       u32 fw_hrxd_cur;
+       u32 fw_rxd_cur;
+};
+
+enum pci_regs {
+       C2_HISR = 0x0000,
+       C2_DISR = 0x0004,
+       C2_HIMR = 0x0008,
+       C2_DIMR = 0x000C,
+       C2_NISR0 = 0x0010,
+       C2_NISR1 = 0x0014,
+       C2_NIMR0 = 0x0018,
+       C2_NIMR1 = 0x001C,
+       C2_IDIS = 0x0020,
+};
+
+enum {
+       C2_PCI_HRX_INT = 1 << 8,
+       C2_PCI_HTX_INT = 1 << 17,
+       C2_PCI_HRX_QUI = 1 << 31,
+};
+
+/*
+ * Cepheus registers in BAR0.
+ */
+struct c2_pci_regs {
+       u32 hostisr;
+       u32 dmaisr;
+       u32 hostimr;
+       u32 dmaimr;
+       u32 netisr0;
+       u32 netisr1;
+       u32 netimr0;
+       u32 netimr1;
+       u32 int_disable;
+};
+
+/* TXP flags */
+enum c2_txp_flags {
+       TXP_HTXD_DONE = 0,
+       TXP_HTXD_READY = 1 << 0,
+       TXP_HTXD_UNINIT = 1 << 1,
+};
+
+/* RXP flags */
+enum c2_rxp_flags {
+       RXP_HRXD_UNINIT = 0,
+       RXP_HRXD_READY = 1 << 0,
+       RXP_HRXD_DONE = 1 << 1,
+};
+
+/* RXP status */
+enum c2_rxp_status {
+       RXP_HRXD_ZERO = 0,
+       RXP_HRXD_OK = 1 << 0,
+       RXP_HRXD_BUF_OV = 1 << 1,
+};
+
+/* TXP descriptor fields */
+enum txp_desc {
+       C2_TXP_FLAGS = 0x0000,
+       C2_TXP_LEN = 0x0002,
+       C2_TXP_ADDR = 0x0004,
+};
+
+/* RXP descriptor fields */
+enum rxp_desc {
+       C2_RXP_FLAGS = 0x0000,
+       C2_RXP_STATUS = 0x0002,
+       C2_RXP_COUNT = 0x0004,
+       C2_RXP_LEN = 0x0006,
+       C2_RXP_ADDR = 0x0008,
+};
+
+struct c2_txp_desc {
+       u16 flags;
+       u16 len;
+       u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_desc {
+       u16 flags;
+       u16 status;
+       u16 count;
+       u16 len;
+       u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_hdr {
+       u16 flags;
+       u16 status;
+       u16 len;
+       u16 rsvd;
+} __attribute__ ((packed));
+
+struct c2_tx_desc {
+       u32 len;
+       u32 status;
+       dma_addr_t next_offset;
+};
+
+struct c2_rx_desc {
+       u32 len;
+       u32 status;
+       dma_addr_t next_offset;
+};
+
+struct c2_alloc {
+       u32 last;
+       u32 max;
+       spinlock_t lock;
+       unsigned long *table;
+};
+
+struct c2_array {
+       struct {
+               void **page;
+               int used;
+       } *page_list;
+};
+
+/*
+ * The MQ shared pointer pool is organized as a linked list of
+ * chunks. Each chunk contains a linked list of free shared pointers
+ * that can be allocated to a given user mode client.
+ *
+ */
+struct sp_chunk {
+       struct sp_chunk *next;
+       dma_addr_t dma_addr;
+       DECLARE_PCI_UNMAP_ADDR(mapping);
+       u16 head;
+       u16 shared_ptr[0];
+};
+
+struct c2_pd_table {
+       u32 last;
+       u32 max;
+       spinlock_t lock;
+       unsigned long *table;
+};
+
+struct c2_qp_table {
+       struct idr idr;
+       spinlock_t lock;
+       int last;
+};
+
+struct c2_element {
+       struct c2_element *next;
+       void *ht_desc;          /* host     descriptor */
+       void __iomem *hw_desc;  /* hardware descriptor */
+       struct sk_buff *skb;
+       dma_addr_t mapaddr;
+       u32 maplen;
+};
+
+struct c2_ring {
+       struct c2_element *to_clean;
+       struct c2_element *to_use;
+       struct c2_element *start;
+       unsigned long count;
+};
+
+struct c2_dev {
+       struct ib_device ibdev;
+       void __iomem *regs;
+       void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
+       void __iomem *mmio_rxp_ring;
+       spinlock_t lock;
+       struct pci_dev *pcidev;
+       struct net_device *netdev;
+       struct net_device *pseudo_netdev;
+       unsigned int cur_tx;
+       unsigned int cur_rx;
+       u32 adapter_handle;
+       int device_cap_flags;
+       void __iomem *kva;      /* KVA device memory */
+       unsigned long pa;       /* PA device memory */
+       void **qptr_array;
+
+       kmem_cache_t *host_msg_cache;
+
+       struct list_head cca_link;              /* adapter list */
+       struct list_head eh_wakeup_list;        /* event wakeup list */
+       wait_queue_head_t req_vq_wo;
+
+       /* Cached RNIC properties */
+       struct ib_device_attr props;
+
+       struct c2_pd_table pd_table;
+       struct c2_qp_table qp_table;
+       int ports;              /* num of GigE ports */
+       int devnum;
+       spinlock_t vqlock;      /* sync vbs req MQ */
+
+       /* Verbs Queues */
+       struct c2_mq req_vq;    /* Verbs Request MQ */
+       struct c2_mq rep_vq;    /* Verbs Reply MQ */
+       struct c2_mq aeq;       /* Async Events MQ */
+
+       /* Kernel client MQs */
+       struct sp_chunk *kern_mqsp_pool;
+
+       /* Device updates these values when posting messages to a host
+        * target queue */
+       u16 req_vq_shared;
+       u16 rep_vq_shared;
+       u16 aeq_shared;
+       u16 irq_claimed;
+
+       /*
+        * Shared host target pages for user-accessible MQs.
+        */
+       int hthead;             /* index of first free entry */
+       void *htpages;          /* kernel vaddr */
+       int htlen;              /* length of htpages memory */
+       void *htuva;            /* user mapped vaddr */
+       spinlock_t htlock;      /* serialize allocation */
+
+       u64 adapter_hint_uva;   /* access to the activity FIFO */
+
+       //      spinlock_t aeq_lock;
+       //      spinlock_t rnic_lock;
+
+       u16 *hint_count;
+       dma_addr_t hint_count_dma;
+       u16 hints_read;
+
+       int init;               /* TRUE if it's ready */
+       char ae_cache_name[16];
+       char vq_cache_name[16];
+};
+
+struct c2_port {
+       u32 msg_enable;
+       struct c2_dev *c2dev;
+       struct net_device *netdev;
+
+       spinlock_t tx_lock;
+       u32 tx_avail;
+       struct c2_ring tx_ring;
+       struct c2_ring rx_ring;
+
+       void *mem;              /* PCI memory for host rings */
+       dma_addr_t dma;
+       unsigned long mem_size;
+
+       u32 rx_buf_size;
+
+       struct net_device_stats netstats;
+};
+
+/*
+ * Activity FIFO registers in BAR0.
+ */
+#define PCI_BAR0_HOST_HINT     0x100
+#define PCI_BAR0_ADAPTER_HINT  0x2000
+
+/*
+ * Ammasso PCI vendor id and Cepheus PCI device id.
+ */
+#define CQ_ARMED       0x01
+#define CQ_WAIT_FOR_DMA        0x80
+
+/*
+ * The format of a hint is as follows:
+ * Lower 16 bits are the count of hints for the queue.
+ * Next 15 bits are the qp_index
+ * Upper most bit depends on who reads it:
+ *    If read by producer, then it means Full (1) or Not-Full (0)
+ *    If read by consumer, then it means Empty (1) or Not-Empty (0)
+ */
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+
+/*
+ * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
+ * struct.
+ */
+#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
+
+#ifndef readq
+static inline u64 readq(const void __iomem * addr)
+{
+       u64 ret = readl(addr + 4);
+       ret <<= 32;
+       ret |= readl(addr);
+
+       return ret;
+}
+#endif
+
+#ifndef writeq
+static inline void __raw_writeq(u64 val, void __iomem * addr)
+{
+       __raw_writel((u32) (val), addr);
+       __raw_writel((u32) (val >> 32), (addr + 4));
+}
+#endif
+
+#define C2_SET_CUR_RX(c2dev, cur_rx) \
+       __raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
+
+#define C2_GET_CUR_RX(c2dev) \
+       be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092))
+
+static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
+{
+       return container_of(ibdev, struct c2_dev, ibdev);
+}
+
+static inline int c2_errno(void *reply)
+{
+       switch (c2_wr_get_result(reply)) {
+       case C2_OK:
+               return 0;
+       case CCERR_NO_BUFS:
+       case CCERR_INSUFFICIENT_RESOURCES:
+       case CCERR_ZERO_RDMA_READ_RESOURCES:
+               return -ENOMEM;
+       case CCERR_MR_IN_USE:
+       case CCERR_QP_IN_USE:
+               return -EBUSY;
+       case CCERR_ADDR_IN_USE:
+               return -EADDRINUSE;
+       case CCERR_ADDR_NOT_AVAIL:
+               return -EADDRNOTAVAIL;
+       case CCERR_CONN_RESET:
+               return -ECONNRESET;
+       case CCERR_NOT_IMPLEMENTED:
+       case CCERR_INVALID_WQE:
+               return -ENOSYS;
+       case CCERR_QP_NOT_PRIVILEGED:
+               return -EPERM;
+       case CCERR_STACK_ERROR:
+               return -EPROTO;
+       case CCERR_ACCESS_VIOLATION:
+       case CCERR_BASE_AND_BOUNDS_VIOLATION:
+               return -EFAULT;
+       case CCERR_STAG_STATE_NOT_INVALID:
+       case CCERR_INVALID_ADDRESS:
+       case CCERR_INVALID_CQ:
+       case CCERR_INVALID_EP:
+       case CCERR_INVALID_MODIFIER:
+       case CCERR_INVALID_MTU:
+       case CCERR_INVALID_PD_ID:
+       case CCERR_INVALID_QP:
+       case CCERR_INVALID_RNIC:
+       case CCERR_INVALID_STAG:
+               return -EINVAL;
+       default:
+               return -EAGAIN;
+       }
+}
+
+/* Device */
+extern int c2_register_device(struct c2_dev *c2dev);
+extern void c2_unregister_device(struct c2_dev *c2dev);
+extern int c2_rnic_init(struct c2_dev *c2dev);
+extern void c2_rnic_term(struct c2_dev *c2dev);
+extern void c2_rnic_interrupt(struct c2_dev *c2dev);
+extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+
+/* QPs */
+extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
+                      struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
+extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
+extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
+extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+                       struct ib_qp_attr *attr, int attr_mask);
+extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+                                int ord, int ird);
+extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+                       struct ib_send_wr **bad_wr);
+extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+                          struct ib_recv_wr **bad_wr);
+extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
+extern void c2_set_qp_state(struct c2_qp *, int);
+extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
+
+/* PDs */
+extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
+extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
+extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
+
+/* CQs */
+extern int c2_init_cq(struct c2_dev *c2dev, int entries,
+                     struct c2_ucontext *ctx, struct c2_cq *cq);
+extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
+extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
+extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
+extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
+
+/* CM */
+extern int c2_llp_connect(struct iw_cm_id *cm_id,
+                         struct iw_cm_conn_param *iw_param);
+extern int c2_llp_accept(struct iw_cm_id *cm_id,
+                        struct iw_cm_conn_param *iw_param);
+extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
+                        u8 pdata_len);
+extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
+extern int c2_llp_service_destroy(struct iw_cm_id *cm_id);
+
+/* MM */
+extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+                                     int page_size, int pbl_depth, u32 length,
+                                     u32 off, u64 *va, enum c2_acf acf,
+                                     struct c2_mr *mr);
+extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
+
+/* AE */
+extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
+
+/* MQSP Allocator */
+extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+                            struct sp_chunk **root);
+extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
+extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+                         dma_addr_t *dma_addr, gfp_t gfp_mask);
+extern void c2_free_mqsp(u16 * mqsp);
+#endif
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
new file mode 100644 (file)
index 0000000..08f46c8
--- /dev/null
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_status.h"
+#include "c2_ae.h"
+
+static int c2_convert_cm_status(u32 c2_status)
+{
+       switch (c2_status) {
+       case C2_CONN_STATUS_SUCCESS:
+               return 0;
+       case C2_CONN_STATUS_REJECTED:
+               return -ENETRESET;
+       case C2_CONN_STATUS_REFUSED:
+               return -ECONNREFUSED;
+       case C2_CONN_STATUS_TIMEDOUT:
+               return -ETIMEDOUT;
+       case C2_CONN_STATUS_NETUNREACH:
+               return -ENETUNREACH;
+       case C2_CONN_STATUS_HOSTUNREACH:
+               return -EHOSTUNREACH;
+       case C2_CONN_STATUS_INVALID_RNIC:
+               return -EINVAL;
+       case C2_CONN_STATUS_INVALID_QP:
+               return -EINVAL;
+       case C2_CONN_STATUS_INVALID_QP_STATE:
+               return -EINVAL;
+       case C2_CONN_STATUS_ADDR_NOT_AVAIL:
+               return -EADDRNOTAVAIL;
+       default:
+               printk(KERN_ERR PFX
+                      "%s - Unable to convert CM status: %d\n",
+                      __FUNCTION__, c2_status);
+               return -EIO;
+       }
+}
+
+#ifdef DEBUG
+static const char* to_event_str(int event)
+{
+       static const char* event_str[] = {
+               "CCAE_REMOTE_SHUTDOWN",
+               "CCAE_ACTIVE_CONNECT_RESULTS",
+               "CCAE_CONNECTION_REQUEST",
+               "CCAE_LLP_CLOSE_COMPLETE",
+               "CCAE_TERMINATE_MESSAGE_RECEIVED",
+               "CCAE_LLP_CONNECTION_RESET",
+               "CCAE_LLP_CONNECTION_LOST",
+               "CCAE_LLP_SEGMENT_SIZE_INVALID",
+               "CCAE_LLP_INVALID_CRC",
+               "CCAE_LLP_BAD_FPDU",
+               "CCAE_INVALID_DDP_VERSION",
+               "CCAE_INVALID_RDMA_VERSION",
+               "CCAE_UNEXPECTED_OPCODE",
+               "CCAE_INVALID_DDP_QUEUE_NUMBER",
+               "CCAE_RDMA_READ_NOT_ENABLED",
+               "CCAE_RDMA_WRITE_NOT_ENABLED",
+               "CCAE_RDMA_READ_TOO_SMALL",
+               "CCAE_NO_L_BIT",
+               "CCAE_TAGGED_INVALID_STAG",
+               "CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
+               "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
+               "CCAE_TAGGED_INVALID_PD",
+               "CCAE_WRAP_ERROR",
+               "CCAE_BAD_CLOSE",
+               "CCAE_BAD_LLP_CLOSE",
+               "CCAE_INVALID_MSN_RANGE",
+               "CCAE_INVALID_MSN_GAP",
+               "CCAE_IRRQ_OVERFLOW",
+               "CCAE_IRRQ_MSN_GAP",
+               "CCAE_IRRQ_MSN_RANGE",
+               "CCAE_IRRQ_INVALID_STAG",
+               "CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
+               "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
+               "CCAE_IRRQ_INVALID_PD",
+               "CCAE_IRRQ_WRAP_ERROR",
+               "CCAE_CQ_SQ_COMPLETION_OVERFLOW",
+               "CCAE_CQ_RQ_COMPLETION_ERROR",
+               "CCAE_QP_SRQ_WQE_ERROR",
+               "CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
+               "CCAE_CQ_OVERFLOW",
+               "CCAE_CQ_OPERATION_ERROR",
+               "CCAE_SRQ_LIMIT_REACHED",
+               "CCAE_QP_RQ_LIMIT_REACHED",
+               "CCAE_SRQ_CATASTROPHIC_ERROR",
+               "CCAE_RNIC_CATASTROPHIC_ERROR"
+       };
+
+       if (event < CCAE_REMOTE_SHUTDOWN ||
+           event > CCAE_RNIC_CATASTROPHIC_ERROR)
+               return "<invalid event>";
+
+       event -= CCAE_REMOTE_SHUTDOWN;
+       return event_str[event];
+}
+
+static const char *to_qp_state_str(int state)
+{
+       switch (state) {
+       case C2_QP_STATE_IDLE:
+               return "C2_QP_STATE_IDLE";
+       case C2_QP_STATE_CONNECTING:
+               return "C2_QP_STATE_CONNECTING";
+       case C2_QP_STATE_RTS:
+               return "C2_QP_STATE_RTS";
+       case C2_QP_STATE_CLOSING:
+               return "C2_QP_STATE_CLOSING";
+       case C2_QP_STATE_TERMINATE:
+               return "C2_QP_STATE_TERMINATE";
+       case C2_QP_STATE_ERROR:
+               return "C2_QP_STATE_ERROR";
+       default:
+               return "<invalid QP state>";
+       };
+}
+#endif
+
+void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
+{
+       struct c2_mq *mq = c2dev->qptr_array[mq_index];
+       union c2wr *wr;
+       void *resource_user_context;
+       struct iw_cm_event cm_event;
+       struct ib_event ib_event;
+       enum c2_resource_indicator resource_indicator;
+       enum c2_event_id event_id;
+       unsigned long flags;
+       int status;
+
+       /*
+        * retreive the message
+        */
+       wr = c2_mq_consume(mq);
+       if (!wr)
+               return;
+
+       memset(&ib_event, 0, sizeof(ib_event));
+       memset(&cm_event, 0, sizeof(cm_event));
+
+       event_id = c2_wr_get_id(wr);
+       resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
+       resource_user_context =
+           (void *) (unsigned long) wr->ae.ae_generic.user_context;
+
+       status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
+
+       pr_debug("event received c2_dev=%p, event_id=%d, "
+               "resource_indicator=%d, user_context=%p, status = %d\n",
+               c2dev, event_id, resource_indicator, resource_user_context,
+               status);
+
+       switch (resource_indicator) {
+       case C2_RES_IND_QP:{
+
+               struct c2_qp *qp = (struct c2_qp *)resource_user_context;
+               struct iw_cm_id *cm_id = qp->cm_id;
+               struct c2wr_ae_active_connect_results *res;
+
+               if (!cm_id) {
+                       pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
+                               qp);
+                       goto ignore_it;
+               }
+               pr_debug("%s: event = %s, user_context=%llx, "
+                       "resource_type=%x, "
+                       "resource=%x, qp_state=%s\n",
+                       __FUNCTION__,
+                       to_event_str(event_id),
+                       be64_to_cpu(wr->ae.ae_generic.user_context),
+                       be32_to_cpu(wr->ae.ae_generic.resource_type),
+                       be32_to_cpu(wr->ae.ae_generic.resource),
+                       to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
+
+               c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
+
+               switch (event_id) {
+               case CCAE_ACTIVE_CONNECT_RESULTS:
+                       res = &wr->ae.ae_active_connect_results;
+                       cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+                       cm_event.local_addr.sin_addr.s_addr = res->laddr;
+                       cm_event.remote_addr.sin_addr.s_addr = res->raddr;
+                       cm_event.local_addr.sin_port = res->lport;
+                       cm_event.remote_addr.sin_port = res->rport;
+                       if (status == 0) {
+                               cm_event.private_data_len =
+                                       be32_to_cpu(res->private_data_length);
+                               cm_event.private_data = res->private_data;
+                       } else {
+                               spin_lock_irqsave(&qp->lock, flags);
+                               if (qp->cm_id) {
+                                       qp->cm_id->rem_ref(qp->cm_id);
+                                       qp->cm_id = NULL;
+                               }
+                               spin_unlock_irqrestore(&qp->lock, flags);
+                               cm_event.private_data_len = 0;
+                               cm_event.private_data = NULL;
+                       }
+                       if (cm_id->event_handler)
+                               cm_id->event_handler(cm_id, &cm_event);
+                       break;
+               case CCAE_TERMINATE_MESSAGE_RECEIVED:
+               case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
+                       ib_event.device = &c2dev->ibdev;
+                       ib_event.element.qp = &qp->ibqp;
+                       ib_event.event = IB_EVENT_QP_REQ_ERR;
+
+                       if (qp->ibqp.event_handler)
+                               qp->ibqp.event_handler(&ib_event,
+                                                      qp->ibqp.
+                                                      qp_context);
+                       break;
+               case CCAE_BAD_CLOSE:
+               case CCAE_LLP_CLOSE_COMPLETE:
+               case CCAE_LLP_CONNECTION_RESET:
+               case CCAE_LLP_CONNECTION_LOST:
+                       BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
+
+                       spin_lock_irqsave(&qp->lock, flags);
+                       if (qp->cm_id) {
+                               qp->cm_id->rem_ref(qp->cm_id);
+                               qp->cm_id = NULL;
+                       }
+                       spin_unlock_irqrestore(&qp->lock, flags);
+                       cm_event.event = IW_CM_EVENT_CLOSE;
+                       cm_event.status = 0;
+                       if (cm_id->event_handler)
+                               cm_id->event_handler(cm_id, &cm_event);
+                       break;
+               default:
+                       BUG_ON(1);
+                       pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
+                               "CM_ID=%p\n",
+                               __FUNCTION__, __LINE__,
+                               event_id, qp, cm_id);
+                       break;
+               }
+               break;
+       }
+
+       case C2_RES_IND_EP:{
+
+               struct c2wr_ae_connection_request *req =
+                       &wr->ae.ae_connection_request;
+               struct iw_cm_id *cm_id =
+                       (struct iw_cm_id *)resource_user_context;
+
+               pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
+               if (event_id != CCAE_CONNECTION_REQUEST) {
+                       pr_debug("%s: Invalid event_id: %d\n",
+                               __FUNCTION__, event_id);
+                       break;
+               }
+               cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
+               cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
+               cm_event.local_addr.sin_addr.s_addr = req->laddr;
+               cm_event.remote_addr.sin_addr.s_addr = req->raddr;
+               cm_event.local_addr.sin_port = req->lport;
+               cm_event.remote_addr.sin_port = req->rport;
+               cm_event.private_data_len =
+                       be32_to_cpu(req->private_data_length);
+               cm_event.private_data = req->private_data;
+
+               if (cm_id->event_handler)
+                       cm_id->event_handler(cm_id, &cm_event);
+               break;
+       }
+
+       case C2_RES_IND_CQ:{
+               struct c2_cq *cq =
+                   (struct c2_cq *) resource_user_context;
+
+               pr_debug("IB_EVENT_CQ_ERR\n");
+               ib_event.device = &c2dev->ibdev;
+               ib_event.element.cq = &cq->ibcq;
+               ib_event.event = IB_EVENT_CQ_ERR;
+
+               if (cq->ibcq.event_handler)
+                       cq->ibcq.event_handler(&ib_event,
+                                              cq->ibcq.cq_context);
+       }
+
+       default:
+               printk("Bad resource indicator = %d\n",
+                      resource_indicator);
+               break;
+       }
+
+ ignore_it:
+       c2_mq_free(mq);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.h b/drivers/infiniband/hw/amso1100/c2_ae.h
new file mode 100644 (file)
index 0000000..3a065c3
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_AE_H_
+#define _C2_AE_H_
+
+/*
+ * WARNING: If you change this file, also bump C2_IVN_BASE
+ * in common/include/clustercore/c2_ivn.h.
+ */
+
+/*
+ * Asynchronous Event Identifiers
+ *
+ * These start at 0x80 only so it's obvious from inspection that
+ * they are not work-request statuses.  This isn't critical.
+ *
+ * NOTE: these event id's must fit in eight bits.
+ */
+enum c2_event_id {
+       CCAE_REMOTE_SHUTDOWN = 0x80,
+       CCAE_ACTIVE_CONNECT_RESULTS,
+       CCAE_CONNECTION_REQUEST,
+       CCAE_LLP_CLOSE_COMPLETE,
+       CCAE_TERMINATE_MESSAGE_RECEIVED,
+       CCAE_LLP_CONNECTION_RESET,
+       CCAE_LLP_CONNECTION_LOST,
+       CCAE_LLP_SEGMENT_SIZE_INVALID,
+       CCAE_LLP_INVALID_CRC,
+       CCAE_LLP_BAD_FPDU,
+       CCAE_INVALID_DDP_VERSION,
+       CCAE_INVALID_RDMA_VERSION,
+       CCAE_UNEXPECTED_OPCODE,
+       CCAE_INVALID_DDP_QUEUE_NUMBER,
+       CCAE_RDMA_READ_NOT_ENABLED,
+       CCAE_RDMA_WRITE_NOT_ENABLED,
+       CCAE_RDMA_READ_TOO_SMALL,
+       CCAE_NO_L_BIT,
+       CCAE_TAGGED_INVALID_STAG,
+       CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
+       CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
+       CCAE_TAGGED_INVALID_PD,
+       CCAE_WRAP_ERROR,
+       CCAE_BAD_CLOSE,
+       CCAE_BAD_LLP_CLOSE,
+       CCAE_INVALID_MSN_RANGE,
+       CCAE_INVALID_MSN_GAP,
+       CCAE_IRRQ_OVERFLOW,
+       CCAE_IRRQ_MSN_GAP,
+       CCAE_IRRQ_MSN_RANGE,
+       CCAE_IRRQ_INVALID_STAG,
+       CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
+       CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
+       CCAE_IRRQ_INVALID_PD,
+       CCAE_IRRQ_WRAP_ERROR,
+       CCAE_CQ_SQ_COMPLETION_OVERFLOW,
+       CCAE_CQ_RQ_COMPLETION_ERROR,
+       CCAE_QP_SRQ_WQE_ERROR,
+       CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
+       CCAE_CQ_OVERFLOW,
+       CCAE_CQ_OPERATION_ERROR,
+       CCAE_SRQ_LIMIT_REACHED,
+       CCAE_QP_RQ_LIMIT_REACHED,
+       CCAE_SRQ_CATASTROPHIC_ERROR,
+       CCAE_RNIC_CATASTROPHIC_ERROR
+/* WARNING If you add more id's, make sure their values fit in eight bits. */
+};
+
+/*
+ * Resource Indicators and Identifiers
+ */
+enum c2_resource_indicator {
+       C2_RES_IND_QP = 1,
+       C2_RES_IND_EP,
+       C2_RES_IND_CQ,
+       C2_RES_IND_SRQ,
+};
+
+#endif /* _C2_AE_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
new file mode 100644 (file)
index 0000000..1d25299
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "c2.h"
+
+static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
+                              struct sp_chunk **head)
+{
+       int i;
+       struct sp_chunk *new_head;
+
+       new_head = (struct sp_chunk *) __get_free_page(gfp_mask);
+       if (new_head == NULL)
+               return -ENOMEM;
+
+       new_head->dma_addr = dma_map_single(c2dev->ibdev.dma_device, new_head,
+                                           PAGE_SIZE, DMA_FROM_DEVICE);
+       pci_unmap_addr_set(new_head, mapping, new_head->dma_addr);
+
+       new_head->next = NULL;
+       new_head->head = 0;
+
+       /* build list where each index is the next free slot */
+       for (i = 0;
+            i < (PAGE_SIZE - sizeof(struct sp_chunk) -
+                 sizeof(u16)) / sizeof(u16) - 1;
+            i++) {
+               new_head->shared_ptr[i] = i + 1;
+       }
+       /* terminate list */
+       new_head->shared_ptr[i] = 0xFFFF;
+
+       *head = new_head;
+       return 0;
+}
+
+int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+                     struct sp_chunk **root)
+{
+       return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
+}
+
+void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
+{
+       struct sp_chunk *next;
+
+       while (root) {
+               next = root->next;
+               dma_unmap_single(c2dev->ibdev.dma_device,
+                                pci_unmap_addr(root, mapping), PAGE_SIZE,
+                                DMA_FROM_DEVICE);
+               __free_page((struct page *) root);
+               root = next;
+       }
+}
+
+u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+                  dma_addr_t *dma_addr, gfp_t gfp_mask)
+{
+       u16 mqsp;
+
+       while (head) {
+               mqsp = head->head;
+               if (mqsp != 0xFFFF) {
+                       head->head = head->shared_ptr[mqsp];
+                       break;
+               } else if (head->next == NULL) {
+                       if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
+                           0) {
+                               head = head->next;
+                               mqsp = head->head;
+                               head->head = head->shared_ptr[mqsp];
+                               break;
+                       } else
+                               return NULL;
+               } else
+                       head = head->next;
+       }
+       if (head) {
+               *dma_addr = head->dma_addr +
+                           ((unsigned long) &(head->shared_ptr[mqsp]) -
+                            (unsigned long) head);
+               pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
+                        &(head->shared_ptr[mqsp]), (u64)*dma_addr);
+               return &(head->shared_ptr[mqsp]);
+       }
+       return NULL;
+}
+
+void c2_free_mqsp(u16 * mqsp)
+{
+       struct sp_chunk *head;
+       u16 idx;
+
+       /* The chunk containing this ptr begins at the page boundary */
+       head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
+
+       /* Link head to new mqsp */
+       *mqsp = head->head;
+
+       /* Compute the shared_ptr index */
+       idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
+       idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
+
+       /* Point this index at the head */
+       head->shared_ptr[idx] = head->head;
+
+       /* Point head at this index */
+       head->head = idx;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
new file mode 100644 (file)
index 0000000..485254e
--- /dev/null
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_vq.h"
+#include <rdma/iw_cm.h>
+
+int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+       struct c2_dev *c2dev = to_c2dev(cm_id->device);
+       struct ib_qp *ibqp;
+       struct c2_qp *qp;
+       struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
+       struct c2_vq_req *vq_req;
+       int err;
+
+       ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+       if (!ibqp)
+               return -EINVAL;
+       qp = to_c2qp(ibqp);
+
+       /* Associate QP <--> CM_ID */
+       cm_id->provider_data = qp;
+       cm_id->add_ref(cm_id);
+       qp->cm_id = cm_id;
+
+       /*
+        * only support the max private_data length
+        */
+       if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+               err = -EINVAL;
+               goto bail0;
+       }
+       /*
+        * Set the rdma read limits
+        */
+       err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+       if (err)
+               goto bail0;
+
+       /*
+        * Create and send a WR_QP_CONNECT...
+        */
+       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+       if (!wr) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+
+       c2_wr_set_id(wr, CCWR_QP_CONNECT);
+       wr->hdr.context = 0;
+       wr->rnic_handle = c2dev->adapter_handle;
+       wr->qp_handle = qp->adapter_handle;
+
+       wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
+       wr->remote_port = cm_id->remote_addr.sin_port;
+
+       /*
+        * Move any private data from the callers's buf into
+        * the WR.
+        */
+       if (iw_param->private_data) {
+               wr->private_data_length =
+                       cpu_to_be32(iw_param->private_data_len);
+               memcpy(&wr->private_data[0], iw_param->private_data,
+                      iw_param->private_data_len);
+       } else
+               wr->private_data_length = 0;
+
+       /*
+        * Send WR to adapter.  NOTE: There is no synch reply from
+        * the adapter.
+        */
+       err = vq_send_wr(c2dev, (union c2wr *) wr);
+       vq_req_free(c2dev, vq_req);
+
+ bail1:
+       kfree(wr);
+ bail0:
+       if (err) {
+               /*
+                * If we fail, release reference on QP and
+                * disassociate QP from CM_ID
+                */
+               cm_id->provider_data = NULL;
+               qp->cm_id = NULL;
+               cm_id->rem_ref(cm_id);
+       }
+       return err;
+}
+
+int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+       struct c2_dev *c2dev;
+       struct c2wr_ep_listen_create_req wr;
+       struct c2wr_ep_listen_create_rep *reply;
+       struct c2_vq_req *vq_req;
+       int err;
+
+       c2dev = to_c2dev(cm_id->device);
+       if (c2dev == NULL)
+               return -EINVAL;
+
+       /*
+        * Allocate verbs request.
+        */
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req)
+               return -ENOMEM;
+
+       /*
+        * Build the WR
+        */
+       c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
+       wr.hdr.context = (u64) (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
+       wr.local_port = cm_id->local_addr.sin_port;
+       wr.backlog = cpu_to_be32(backlog);
+       wr.user_context = (u64) (unsigned long) cm_id;
+
+       /*
+        * Reference the request struct.  Dereferenced in the int handler.
+        */
+       vq_req_get(c2dev, vq_req);
+
+       /*
+        * Send WR to adapter
+        */
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail0;
+       }
+
+       /*
+        * Wait for reply from adapter
+        */
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail0;
+
+       /*
+        * Process reply
+        */
+       reply =
+           (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+
+       if ((err = c2_errno(reply)) != 0)
+               goto bail1;
+
+       /*
+        * Keep the adapter handle. Used in subsequent destroy
+        */
+       cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
+
+       /*
+        * free vq stuff
+        */
+       vq_repbuf_free(c2dev, reply);
+       vq_req_free(c2dev, vq_req);
+
+       return 0;
+
+ bail1:
+       vq_repbuf_free(c2dev, reply);
+ bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+
+int c2_llp_service_destroy(struct iw_cm_id *cm_id)
+{
+
+       struct c2_dev *c2dev;
+       struct c2wr_ep_listen_destroy_req wr;
+       struct c2wr_ep_listen_destroy_rep *reply;
+       struct c2_vq_req *vq_req;
+       int err;
+
+       c2dev = to_c2dev(cm_id->device);
+       if (c2dev == NULL)
+               return -EINVAL;
+
+       /*
+        * Allocate verbs request.
+        */
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req)
+               return -ENOMEM;
+
+       /*
+        * Build the WR
+        */
+       c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
+       wr.hdr.context = (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
+
+       /*
+        * reference the request struct.  dereferenced in the int handler.
+        */
+       vq_req_get(c2dev, vq_req);
+
+       /*
+        * Send WR to adapter
+        */
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail0;
+       }
+
+       /*
+        * Wait for reply from adapter
+        */
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail0;
+
+       /*
+        * Process reply
+        */
+       reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+       if ((err = c2_errno(reply)) != 0)
+               goto bail1;
+
+ bail1:
+       vq_repbuf_free(c2dev, reply);
+ bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+       struct c2_dev *c2dev = to_c2dev(cm_id->device);
+       struct c2_qp *qp;
+       struct ib_qp *ibqp;
+       struct c2wr_cr_accept_req *wr;  /* variable length WR */
+       struct c2_vq_req *vq_req;
+       struct c2wr_cr_accept_rep *reply;       /* VQ Reply msg ptr. */
+       int err;
+
+       ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+       if (!ibqp)
+               return -EINVAL;
+       qp = to_c2qp(ibqp);
+
+       /* Set the RDMA read limits */
+       err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+       if (err)
+               goto bail0;
+
+       /* Allocate verbs request. */
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+       vq_req->qp = qp;
+       vq_req->cm_id = cm_id;
+       vq_req->event = IW_CM_EVENT_ESTABLISHED;
+
+       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+       if (!wr) {
+               err = -ENOMEM;
+               goto bail2;
+       }
+
+       /* Build the WR */
+       c2_wr_set_id(wr, CCWR_CR_ACCEPT);
+       wr->hdr.context = (unsigned long) vq_req;
+       wr->rnic_handle = c2dev->adapter_handle;
+       wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
+       wr->qp_handle = qp->adapter_handle;
+
+       /* Replace the cr_handle with the QP after accept */
+       cm_id->provider_data = qp;
+       cm_id->add_ref(cm_id);
+       qp->cm_id = cm_id;
+
+       cm_id->provider_data = qp;
+
+       /* Validate private_data length */
+       if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+               err = -EINVAL;
+               goto bail2;
+       }
+
+       if (iw_param->private_data) {
+               wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
+               memcpy(&wr->private_data[0],
+                      iw_param->private_data, iw_param->private_data_len);
+       } else
+               wr->private_data_length = 0;
+
+       /* Reference the request struct.  Dereferenced in the int handler. */
+       vq_req_get(c2dev, vq_req);
+
+       /* Send WR to adapter */
+       err = vq_send_wr(c2dev, (union c2wr *) wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail2;
+       }
+
+       /* Wait for reply from adapter */
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail2;
+
+       /* Check that reply is present */
+       reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail2;
+       }
+
+       err = c2_errno(reply);
+       vq_repbuf_free(c2dev, reply);
+
+       if (!err)
+               c2_set_qp_state(qp, C2_QP_STATE_RTS);
+ bail2:
+       kfree(wr);
+ bail1:
+       vq_req_free(c2dev, vq_req);
+ bail0:
+       if (err) {
+               /*
+                * If we fail, release reference on QP and
+                * disassociate QP from CM_ID
+                */
+               cm_id->provider_data = NULL;
+               qp->cm_id = NULL;
+               cm_id->rem_ref(cm_id);
+       }
+       return err;
+}
+
+int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+       struct c2_dev *c2dev;
+       struct c2wr_cr_reject_req wr;
+       struct c2_vq_req *vq_req;
+       struct c2wr_cr_reject_rep *reply;
+       int err;
+
+       c2dev = to_c2dev(cm_id->device);
+
+       /*
+        * Allocate verbs request.
+        */
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req)
+               return -ENOMEM;
+
+       /*
+        * Build the WR
+        */
+       c2_wr_set_id(&wr, CCWR_CR_REJECT);
+       wr.hdr.context = (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
+
+       /*
+        * reference the request struct.  dereferenced in the int handler.
+        */
+       vq_req_get(c2dev, vq_req);
+
+       /*
+        * Send WR to adapter
+        */
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail0;
+       }
+
+       /*
+        * Wait for reply from adapter
+        */
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail0;
+
+       /*
+        * Process reply
+        */
+       reply = (struct c2wr_cr_reject_rep *) (unsigned long)
+               vq_req->reply_msg;
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+       err = c2_errno(reply);
+       /*
+        * free vq stuff
+        */
+       vq_repbuf_free(c2dev, reply);
+
+ bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
new file mode 100644 (file)
index 0000000..9d7bcc5
--- /dev/null
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
+
+static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
+{
+       struct c2_cq *cq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&c2dev->lock, flags);
+       cq = c2dev->qptr_array[cqn];
+       if (!cq) {
+               spin_unlock_irqrestore(&c2dev->lock, flags);
+               return NULL;
+       }
+       atomic_inc(&cq->refcount);
+       spin_unlock_irqrestore(&c2dev->lock, flags);
+       return cq;
+}
+
+static void c2_cq_put(struct c2_cq *cq)
+{
+       if (atomic_dec_and_test(&cq->refcount))
+               wake_up(&cq->wait);
+}
+
+void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
+{
+       struct c2_cq *cq;
+
+       cq = c2_cq_get(c2dev, mq_index);
+       if (!cq) {
+               printk("discarding events on destroyed CQN=%d\n", mq_index);
+               return;
+       }
+
+       (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+       c2_cq_put(cq);
+}
+
+void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
+{
+       struct c2_cq *cq;
+       struct c2_mq *q;
+
+       cq = c2_cq_get(c2dev, mq_index);
+       if (!cq)
+               return;
+
+       spin_lock_irq(&cq->lock);
+       q = &cq->mq;
+       if (q && !c2_mq_empty(q)) {
+               u16 priv = q->priv;
+               struct c2wr_ce *msg;
+
+               while (priv != be16_to_cpu(*q->shared)) {
+                       msg = (struct c2wr_ce *)
+                               (q->msg_pool.host + priv * q->msg_size);
+                       if (msg->qp_user_context == (u64) (unsigned long) qp) {
+                               msg->qp_user_context = (u64) 0;
+                       }
+                       priv = (priv + 1) % q->q_size;
+               }
+       }
+       spin_unlock_irq(&cq->lock);
+       c2_cq_put(cq);
+}
+
+static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
+{
+       switch (status) {
+       case C2_OK:
+               return IB_WC_SUCCESS;
+       case CCERR_FLUSHED:
+               return IB_WC_WR_FLUSH_ERR;
+       case CCERR_BASE_AND_BOUNDS_VIOLATION:
+               return IB_WC_LOC_PROT_ERR;
+       case CCERR_ACCESS_VIOLATION:
+               return IB_WC_LOC_ACCESS_ERR;
+       case CCERR_TOTAL_LENGTH_TOO_BIG:
+               return IB_WC_LOC_LEN_ERR;
+       case CCERR_INVALID_WINDOW:
+               return IB_WC_MW_BIND_ERR;
+       default:
+               return IB_WC_GENERAL_ERR;
+       }
+}
+
+
+static inline int c2_poll_one(struct c2_dev *c2dev,
+                             struct c2_cq *cq, struct ib_wc *entry)
+{
+       struct c2wr_ce *ce;
+       struct c2_qp *qp;
+       int is_recv = 0;
+
+       ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+       if (!ce) {
+               return -EAGAIN;
+       }
+
+       /*
+        * if the qp returned is null then this qp has already
+        * been freed and we are unable process the completion.
+        * try pulling the next message
+        */
+       while ((qp =
+               (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
+               c2_mq_free(&cq->mq);
+               ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+               if (!ce)
+                       return -EAGAIN;
+       }
+
+       entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
+       entry->wr_id = ce->hdr.context;
+       entry->qp_num = ce->handle;
+       entry->wc_flags = 0;
+       entry->slid = 0;
+       entry->sl = 0;
+       entry->src_qp = 0;
+       entry->dlid_path_bits = 0;
+       entry->pkey_index = 0;
+
+       switch (c2_wr_get_id(ce)) {
+       case C2_WR_TYPE_SEND:
+               entry->opcode = IB_WC_SEND;
+               break;
+       case C2_WR_TYPE_RDMA_WRITE:
+               entry->opcode = IB_WC_RDMA_WRITE;
+               break;
+       case C2_WR_TYPE_RDMA_READ:
+               entry->opcode = IB_WC_RDMA_READ;
+               break;
+       case C2_WR_TYPE_BIND_MW:
+               entry->opcode = IB_WC_BIND_MW;
+               break;
+       case C2_WR_TYPE_RECV:
+               entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
+               entry->opcode = IB_WC_RECV;
+               is_recv = 1;
+               break;
+       default:
+               break;
+       }
+
+       /* consume the WQEs */
+       if (is_recv)
+               c2_mq_lconsume(&qp->rq_mq, 1);
+       else
+               c2_mq_lconsume(&qp->sq_mq,
+                              be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
+
+       /* free the message */
+       c2_mq_free(&cq->mq);
+
+       return 0;
+}
+
+int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+       struct c2_dev *c2dev = to_c2dev(ibcq->device);
+       struct c2_cq *cq = to_c2cq(ibcq);
+       unsigned long flags;
+       int npolled, err;
+
+       spin_lock_irqsave(&cq->lock, flags);
+
+       for (npolled = 0; npolled < num_entries; ++npolled) {
+
+               err = c2_poll_one(c2dev, cq, entry + npolled);
+               if (err)
+                       break;
+       }
+
+       spin_unlock_irqrestore(&cq->lock, flags);
+
+       return npolled;
+}
+
+int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+       struct c2_mq_shared __iomem *shared;
+       struct c2_cq *cq;
+
+       cq = to_c2cq(ibcq);
+       shared = cq->mq.peer;
+
+       if (notify == IB_CQ_NEXT_COMP)
+               writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
+       else if (notify == IB_CQ_SOLICITED)
+               writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
+       else
+               return -EINVAL;
+
+       writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
+
+       /*
+        * Now read back shared->armed to make the PCI
+        * write synchronous.  This is necessary for
+        * correct cq notification semantics.
+        */
+       readb(&shared->armed);
+
+       return 0;
+}
+
+static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
+{
+
+       dma_unmap_single(c2dev->ibdev.dma_device, pci_unmap_addr(mq, mapping),
+                        mq->q_size * mq->msg_size, DMA_FROM_DEVICE);
+       free_pages((unsigned long) mq->msg_pool.host,
+                  get_order(mq->q_size * mq->msg_size));
+}
+
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
+                          int msg_size)
+{
+       unsigned long pool_start;
+
+       pool_start = __get_free_pages(GFP_KERNEL,
+                                     get_order(q_size * msg_size));
+       if (!pool_start)
+               return -ENOMEM;
+
+       c2_mq_rep_init(mq,
+                      0,               /* index (currently unknown) */
+                      q_size,
+                      msg_size,
+                      (u8 *) pool_start,
+                      NULL,    /* peer (currently unknown) */
+                      C2_MQ_HOST_TARGET);
+
+       mq->host_dma = dma_map_single(c2dev->ibdev.dma_device,
+                                     (void *)pool_start,
+                                     q_size * msg_size, DMA_FROM_DEVICE);
+       pci_unmap_addr_set(mq, mapping, mq->host_dma);
+
+       return 0;
+}
+
+int c2_init_cq(struct c2_dev *c2dev, int entries,
+              struct c2_ucontext *ctx, struct c2_cq *cq)
+{
+       struct c2wr_cq_create_req wr;
+       struct c2wr_cq_create_rep *reply;
+       unsigned long peer_pa;
+       struct c2_vq_req *vq_req;
+       int err;
+
+       might_sleep();
+
+       cq->ibcq.cqe = entries - 1;
+       cq->is_kernel = !ctx;
+
+       /* Allocate a shared pointer */
+       cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+                                     &cq->mq.shared_dma, GFP_KERNEL);
+       if (!cq->mq.shared)
+               return -ENOMEM;
+
+       /* Allocate pages for the message pool */
+       err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
+       if (err)
+               goto bail0;
+
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+
+       memset(&wr, 0, sizeof(wr));
+       c2_wr_set_id(&wr, CCWR_CQ_CREATE);
+       wr.hdr.context = (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.msg_size = cpu_to_be32(cq->mq.msg_size);
+       wr.depth = cpu_to_be32(cq->mq.q_size);
+       wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
+       wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
+       wr.user_context = (u64) (unsigned long) (cq);
+
+       vq_req_get(c2dev, vq_req);
+
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail2;
+       }
+
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail2;
+
+       reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail2;
+       }
+
+       if ((err = c2_errno(reply)) != 0)
+               goto bail3;
+
+       cq->adapter_handle = reply->cq_handle;
+       cq->mq.index = be32_to_cpu(reply->mq_index);
+
+       peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
+       cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
+       if (!cq->mq.peer) {
+               err = -ENOMEM;
+               goto bail3;
+       }
+
+       vq_repbuf_free(c2dev, reply);
+       vq_req_free(c2dev, vq_req);
+
+       spin_lock_init(&cq->lock);
+       atomic_set(&cq->refcount, 1);
+       init_waitqueue_head(&cq->wait);
+
+       /*
+        * Use the MQ index allocated by the adapter to
+        * store the CQ in the qptr_array
+        */
+       cq->cqn = cq->mq.index;
+       c2dev->qptr_array[cq->cqn] = cq;
+
+       return 0;
+
+      bail3:
+       vq_repbuf_free(c2dev, reply);
+      bail2:
+       vq_req_free(c2dev, vq_req);
+      bail1:
+       c2_free_cq_buf(c2dev, &cq->mq);
+      bail0:
+       c2_free_mqsp(cq->mq.shared);
+
+       return err;
+}
+
+void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
+{
+       int err;
+       struct c2_vq_req *vq_req;
+       struct c2wr_cq_destroy_req wr;
+       struct c2wr_cq_destroy_rep *reply;
+
+       might_sleep();
+
+       /* Clear CQ from the qptr array */
+       spin_lock_irq(&c2dev->lock);
+       c2dev->qptr_array[cq->mq.index] = NULL;
+       atomic_dec(&cq->refcount);
+       spin_unlock_irq(&c2dev->lock);
+
+       wait_event(cq->wait, !atomic_read(&cq->refcount));
+
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req) {
+               goto bail0;
+       }
+
+       memset(&wr, 0, sizeof(wr));
+       c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
+       wr.hdr.context = (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.cq_handle = cq->adapter_handle;
+
+       vq_req_get(c2dev, vq_req);
+
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail1;
+       }
+
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail1;
+
+       reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+
+       vq_repbuf_free(c2dev, reply);
+      bail1:
+       vq_req_free(c2dev, vq_req);
+      bail0:
+       if (cq->is_kernel) {
+               c2_free_cq_buf(c2dev, &cq->mq);
+       }
+
+       return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
new file mode 100644 (file)
index 0000000..0d0bc33
--- /dev/null
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_vq.h"
+
+static void handle_mq(struct c2_dev *c2dev, u32 index);
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
+
+/*
+ * Handle RNIC interrupts
+ */
+void c2_rnic_interrupt(struct c2_dev *c2dev)
+{
+       unsigned int mq_index;
+
+       while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
+               mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
+               if (mq_index & 0x80000000) {
+                       break;
+               }
+
+               c2dev->hints_read++;
+               handle_mq(c2dev, mq_index);
+       }
+
+}
+
+/*
+ * Top level MQ handler
+ */
+static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
+{
+       if (c2dev->qptr_array[mq_index] == NULL) {
+               pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n",
+                       mq_index);
+               return;
+       }
+
+       switch (mq_index) {
+       case (0):
+               /*
+                * An index of 0 in the activity queue
+                * indicates the req vq now has messages
+                * available...
+                *
+                * Wake up any waiters waiting on req VQ
+                * message availability.
+                */
+               wake_up(&c2dev->req_vq_wo);
+               break;
+       case (1):
+               handle_vq(c2dev, mq_index);
+               break;
+       case (2):
+               /* We have to purge the VQ in case there are pending
+                * accept reply requests that would result in the
+                * generation of an ESTABLISHED event. If we don't
+                * generate these first, a CLOSE event could end up
+                * being delivered before the ESTABLISHED event.
+                */
+               handle_vq(c2dev, 1);
+
+               c2_ae_event(c2dev, mq_index);
+               break;
+       default:
+               /* There is no event synchronization between CQ events
+                * and AE or CM events. In fact, CQE could be
+                * delivered for all of the I/O up to and including the
+                * FLUSH for a peer disconenct prior to the ESTABLISHED
+                * event being delivered to the app. The reason for this
+                * is that CM events are delivered on a thread, while AE
+                * and CM events are delivered on interrupt context.
+                */
+               c2_cq_event(c2dev, mq_index);
+               break;
+       }
+
+       return;
+}
+
+/*
+ * Handles verbs WR replies.
+ */
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
+{
+       void *adapter_msg, *reply_msg;
+       struct c2wr_hdr *host_msg;
+       struct c2wr_hdr tmp;
+       struct c2_mq *reply_vq;
+       struct c2_vq_req *req;
+       struct iw_cm_event cm_event;
+       int err;
+
+       reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index];
+
+       /*
+        * get next msg from mq_index into adapter_msg.
+        * don't free it yet.
+        */
+       adapter_msg = c2_mq_consume(reply_vq);
+       if (adapter_msg == NULL) {
+               return;
+       }
+
+       host_msg = vq_repbuf_alloc(c2dev);
+
+       /*
+        * If we can't get a host buffer, then we'll still
+        * wakeup the waiter, we just won't give him the msg.
+        * It is assumed the waiter will deal with this...
+        */
+       if (!host_msg) {
+               pr_debug("handle_vq: no repbufs!\n");
+
+               /*
+                * just copy the WR header into a local variable.
+                * this allows us to still demux on the context
+                */
+               host_msg = &tmp;
+               memcpy(host_msg, adapter_msg, sizeof(tmp));
+               reply_msg = NULL;
+       } else {
+               memcpy(host_msg, adapter_msg, reply_vq->msg_size);
+               reply_msg = host_msg;
+       }
+
+       /*
+        * consume the msg from the MQ
+        */
+       c2_mq_free(reply_vq);
+
+       /*
+        * wakeup the waiter.
+        */
+       req = (struct c2_vq_req *) (unsigned long) host_msg->context;
+       if (req == NULL) {
+               /*
+                * We should never get here, as the adapter should
+                * never send us a reply that we're not expecting.
+                */
+               vq_repbuf_free(c2dev, host_msg);
+               pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
+               return;
+       }
+
+       err = c2_errno(reply_msg);
+       if (!err) switch (req->event) {
+       case IW_CM_EVENT_ESTABLISHED:
+               c2_set_qp_state(req->qp,
+                               C2_QP_STATE_RTS);
+       case IW_CM_EVENT_CLOSE:
+
+               /*
+                * Move the QP to RTS if this is
+                * the established event
+                */
+               cm_event.event = req->event;
+               cm_event.status = 0;
+               cm_event.local_addr = req->cm_id->local_addr;
+               cm_event.remote_addr = req->cm_id->remote_addr;
+               cm_event.private_data = NULL;
+               cm_event.private_data_len = 0;
+               req->cm_id->event_handler(req->cm_id, &cm_event);
+               break;
+       default:
+               break;
+       }
+
+       req->reply_msg = (u64) (unsigned long) (reply_msg);
+       atomic_set(&req->reply_ready, 1);
+       wake_up(&req->wait_object);
+
+       /*
+        * If the request was cancelled, then this put will
+        * free the vq_req memory...and reply_msg!!!
+        */
+       vq_req_put(c2dev, req);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c
new file mode 100644 (file)
index 0000000..1e4f464
--- /dev/null
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_vq.h"
+
+#define PBL_VIRT 1
+#define PBL_PHYS 2
+
+/*
+ * Send all the PBL messages to convey the remainder of the PBL
+ * Wait for the adapter's reply on the last one.
+ * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
+ *
+ * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
+ *       Reply buffer _is_ freed by this function.
+ */
+static int
+send_pbl_messages(struct c2_dev *c2dev, u32 stag_index,
+                 unsigned long va, u32 pbl_depth,
+                 struct c2_vq_req *vq_req, int pbl_type)
+{
+       u32 pbe_count;          /* amt that fits in a PBL msg */
+       u32 count;              /* amt in this PBL MSG. */
+       struct c2wr_nsmr_pbl_req *wr;   /* PBL WR ptr */
+       struct c2wr_nsmr_pbl_rep *reply;        /* reply ptr */
+       int err, pbl_virt, pbl_index, i;
+
+       switch (pbl_type) {
+       case PBL_VIRT:
+               pbl_virt = 1;
+               break;
+       case PBL_PHYS:
+               pbl_virt = 0;
+               break;
+       default:
+               return -EINVAL;
+               break;
+       }
+
+       pbe_count = (c2dev->req_vq.msg_size -
+                    sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
+       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+       if (!wr) {
+               return -ENOMEM;
+       }
+       c2_wr_set_id(wr, CCWR_NSMR_PBL);
+
+       /*
+        * Only the last PBL message will generate a reply from the verbs,
+        * so we set the context to 0 indicating there is no kernel verbs
+        * handler blocked awaiting this reply.
+        */
+       wr->hdr.context = 0;
+       wr->rnic_handle = c2dev->adapter_handle;
+       wr->stag_index = stag_index;    /* already swapped */
+       wr->flags = 0;
+       pbl_index = 0;
+       while (pbl_depth) {
+               count = min(pbe_count, pbl_depth);
+               wr->addrs_length = cpu_to_be32(count);
+
+               /*
+                *  If this is the last message, then reference the
+                *  vq request struct cuz we're gonna wait for a reply.
+                *  also make this PBL msg as the last one.
+                */
+               if (count == pbl_depth) {
+                       /*
+                        * reference the request struct.  dereferenced in the
+                        * int handler.
+                        */
+                       vq_req_get(c2dev, vq_req);
+                       wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
+
+                       /*
+                        * This is the last PBL message.
+                        * Set the context to our VQ Request Object so we can
+                        * wait for the reply.
+                        */
+                       wr->hdr.context = (unsigned long) vq_req;
+               }
+
+               /*
+                * If pbl_virt is set then va is a virtual address
+                * that describes a virtually contiguous memory
+                * allocation. The wr needs the start of each virtual page
+                * to be converted to the corresponding physical address
+                * of the page. If pbl_virt is not set then va is an array
+                * of physical addresses and there is no conversion to do.
+                * Just fill in the wr with what is in the array.
+                */
+               for (i = 0; i < count; i++) {
+                       if (pbl_virt) {
+                               va += PAGE_SIZE;
+                       } else {
+                               wr->paddrs[i] =
+                                   cpu_to_be64(((u64 *)va)[pbl_index + i]);
+                       }
+               }
+
+               /*
+                * Send WR to adapter
+                */
+               err = vq_send_wr(c2dev, (union c2wr *) wr);
+               if (err) {
+                       if (count <= pbe_count) {
+                               vq_req_put(c2dev, vq_req);
+                       }
+                       goto bail0;
+               }
+               pbl_depth -= count;
+               pbl_index += count;
+       }
+
+       /*
+        *  Now wait for the reply...
+        */
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err) {
+               goto bail0;
+       }
+
+       /*
+        * Process reply
+        */
+       reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       err = c2_errno(reply);
+
+       vq_repbuf_free(c2dev, reply);
+      bail0:
+       kfree(wr);
+       return err;
+}
+
+#define C2_PBL_MAX_DEPTH 131072
+int
+c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+                          int page_size, int pbl_depth, u32 length,
+                          u32 offset, u64 *va, enum c2_acf acf,
+                          struct c2_mr *mr)
+{
+       struct c2_vq_req *vq_req;
+       struct c2wr_nsmr_register_req *wr;
+       struct c2wr_nsmr_register_rep *reply;
+       u16 flags;
+       int i, pbe_count, count;
+       int err;
+
+       if (!va || !length || !addr_list || !pbl_depth)
+               return -EINTR;
+
+       /*
+        * Verify PBL depth is within rnic max
+        */
+       if (pbl_depth > C2_PBL_MAX_DEPTH) {
+               return -EINTR;
+       }
+
+       /*
+        * allocate verbs request object
+        */
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req)
+               return -ENOMEM;
+
+       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+       if (!wr) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       /*
+        * build the WR
+        */
+       c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
+       wr->hdr.context = (unsigned long) vq_req;
+       wr->rnic_handle = c2dev->adapter_handle;
+
+       flags = (acf | MEM_VA_BASED | MEM_REMOTE);
+
+       /*
+        * compute how many pbes can fit in the message
+        */
+       pbe_count = (c2dev->req_vq.msg_size -
+                    sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
+
+       if (pbl_depth <= pbe_count) {
+               flags |= MEM_PBL_COMPLETE;
+       }
+       wr->flags = cpu_to_be16(flags);
+       wr->stag_key = 0;       //stag_key;
+       wr->va = cpu_to_be64(*va);
+       wr->pd_id = mr->pd->pd_id;
+       wr->pbe_size = cpu_to_be32(page_size);
+       wr->length = cpu_to_be32(length);
+       wr->pbl_depth = cpu_to_be32(pbl_depth);
+       wr->fbo = cpu_to_be32(offset);
+       count = min(pbl_depth, pbe_count);
+       wr->addrs_length = cpu_to_be32(count);
+
+       /*
+        * fill out the PBL for this message
+        */
+       for (i = 0; i < count; i++) {
+               wr->paddrs[i] = cpu_to_be64(addr_list[i]);
+       }
+
+       /*
+        * regerence the request struct
+        */
+       vq_req_get(c2dev, vq_req);
+
+       /*
+        * send the WR to the adapter
+        */
+       err = vq_send_wr(c2dev, (union c2wr *) wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail1;
+       }
+
+       /*
+        * wait for reply from adapter
+        */
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err) {
+               goto bail1;
+       }
+
+       /*
+        * process reply
+        */
+       reply =
+           (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+       if ((err = c2_errno(reply))) {
+               goto bail2;
+       }
+       //*p_pb_entries = be32_to_cpu(reply->pbl_depth);
+       mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
+       vq_repbuf_free(c2dev, reply);
+
+       /*
+        * if there are still more PBEs we need to send them to
+        * the adapter and wait for a reply on the final one.
+        * reuse vq_req for this purpose.
+        */
+       pbl_depth -= count;
+       if (pbl_depth) {
+
+               vq_req->reply_msg = (unsigned long) NULL;
+               atomic_set(&vq_req->reply_ready, 0);
+               err = send_pbl_messages(c2dev,
+                                       cpu_to_be32(mr->ibmr.lkey),
+                                       (unsigned long) &addr_list[i],
+                                       pbl_depth, vq_req, PBL_PHYS);
+               if (err) {
+                       goto bail1;
+               }
+       }
+
+       vq_req_free(c2dev, vq_req);
+       kfree(wr);
+
+       return err;
+
+      bail2:
+       vq_repbuf_free(c2dev, reply);
+      bail1:
+       kfree(wr);
+      bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
+{
+       struct c2_vq_req *vq_req;       /* verbs request object */
+       struct c2wr_stag_dealloc_req wr;        /* work request */
+       struct c2wr_stag_dealloc_rep *reply;    /* WR reply  */
+       int err;
+
+
+       /*
+        * allocate verbs request object
+        */
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req) {
+               return -ENOMEM;
+       }
+
+       /*
+        * Build the WR
+        */
+       c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
+       wr.hdr.context = (u64) (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.stag_index = cpu_to_be32(stag_index);
+
+       /*
+        * reference the request struct.  dereferenced in the int handler.
+        */
+       vq_req_get(c2dev, vq_req);
+
+       /*
+        * Send WR to adapter
+        */
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail0;
+       }
+
+       /*
+        * Wait for reply from adapter
+        */
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err) {
+               goto bail0;
+       }
+
+       /*
+        * Process reply
+        */
+       reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       err = c2_errno(reply);
+
+       vq_repbuf_free(c2dev, reply);
+      bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c
new file mode 100644 (file)
index 0000000..b88a755
--- /dev/null
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_mq.h"
+
+void *c2_mq_alloc(struct c2_mq *q)
+{
+       BUG_ON(q->magic != C2_MQ_MAGIC);
+       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+       if (c2_mq_full(q)) {
+               return NULL;
+       } else {
+#ifdef DEBUG
+               struct c2wr_hdr *m =
+                   (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+               BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
+               m->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+               return m;
+#else
+               return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+       }
+}
+
+void c2_mq_produce(struct c2_mq *q)
+{
+       BUG_ON(q->magic != C2_MQ_MAGIC);
+       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+       if (!c2_mq_full(q)) {
+               q->priv = (q->priv + 1) % q->q_size;
+               q->hint_count++;
+               /* Update peer's offset. */
+               __raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+       }
+}
+
+void *c2_mq_consume(struct c2_mq *q)
+{
+       BUG_ON(q->magic != C2_MQ_MAGIC);
+       BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+       if (c2_mq_empty(q)) {
+               return NULL;
+       } else {
+#ifdef DEBUG
+               struct c2wr_hdr *m = (struct c2wr_hdr *)
+                   (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+               BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
+#endif
+               return m;
+#else
+               return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+       }
+}
+
+void c2_mq_free(struct c2_mq *q)
+{
+       BUG_ON(q->magic != C2_MQ_MAGIC);
+       BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+       if (!c2_mq_empty(q)) {
+
+#ifdef CCMSGMAGIC
+               {
+                       struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
+                           (q->msg_pool.adapter + q->priv * q->msg_size);
+                       __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
+               }
+#endif
+               q->priv = (q->priv + 1) % q->q_size;
+               /* Update peer's offset. */
+               __raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+       }
+}
+
+
+void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
+{
+       BUG_ON(q->magic != C2_MQ_MAGIC);
+       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+       while (wqe_count--) {
+               BUG_ON(c2_mq_empty(q));
+               *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
+       }
+}
+
+#if 0
+u32 c2_mq_count(struct c2_mq *q)
+{
+       s32 count;
+
+       if (q->type == C2_MQ_HOST_TARGET)
+               count = be16_to_cpu(*q->shared) - q->priv;
+       else
+               count = q->priv - be16_to_cpu(*q->shared);
+
+       if (count < 0)
+               count += q->q_size;
+
+       return (u32) count;
+}
+#endif  /*  0  */
+
+void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+                   u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
+{
+       BUG_ON(!q->shared);
+
+       /* This code assumes the byte swapping has already been done! */
+       q->index = index;
+       q->q_size = q_size;
+       q->msg_size = msg_size;
+       q->msg_pool.adapter = pool_start;
+       q->peer = (struct c2_mq_shared __iomem *) peer;
+       q->magic = C2_MQ_MAGIC;
+       q->type = type;
+       q->priv = 0;
+       q->hint_count = 0;
+       return;
+}
+void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+                   u8 *pool_start, u16 __iomem *peer, u32 type)
+{
+       BUG_ON(!q->shared);
+
+       /* This code assumes the byte swapping has already been done! */
+       q->index = index;
+       q->q_size = q_size;
+       q->msg_size = msg_size;
+       q->msg_pool.host = pool_start;
+       q->peer = (struct c2_mq_shared __iomem *) peer;
+       q->magic = C2_MQ_MAGIC;
+       q->type = type;
+       q->priv = 0;
+       q->hint_count = 0;
+       return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h
new file mode 100644 (file)
index 0000000..9185bbb
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _C2_MQ_H_
+#define _C2_MQ_H_
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include "c2_wr.h"
+
+enum c2_shared_regs {
+
+       C2_SHARED_ARMED = 0x10,
+       C2_SHARED_NOTIFY = 0x18,
+       C2_SHARED_SHARED = 0x40,
+};
+
+struct c2_mq_shared {
+       u16 unused1;
+       u8 armed;
+       u8 notification_type;
+       u32 unused2;
+       u16 shared;
+       /* Pad to 64 bytes. */
+       u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
+};
+
+enum c2_mq_type {
+       C2_MQ_HOST_TARGET = 1,
+       C2_MQ_ADAPTER_TARGET = 2,
+};
+
+/*
+ * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
+ * c2_user_mq_t (which is the same format) is for user-mode MQs...
+ */
+#define C2_MQ_MAGIC 0x4d512020 /* 'MQ  ' */
+struct c2_mq {
+       u32 magic;
+       union {
+               u8 *host;
+               u8 __iomem *adapter;
+       } msg_pool;
+       dma_addr_t host_dma;
+       DECLARE_PCI_UNMAP_ADDR(mapping);
+       u16 hint_count;
+       u16 priv;
+       struct c2_mq_shared __iomem *peer;
+       u16 *shared;
+       dma_addr_t shared_dma;
+       u32 q_size;
+       u32 msg_size;
+       u32 index;
+       enum c2_mq_type type;
+};
+
+static __inline__ int c2_mq_empty(struct c2_mq *q)
+{
+       return q->priv == be16_to_cpu(*q->shared);
+}
+
+static __inline__ int c2_mq_full(struct c2_mq *q)
+{
+       return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
+}
+
+extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
+extern void *c2_mq_alloc(struct c2_mq *q);
+extern void c2_mq_produce(struct c2_mq *q);
+extern void *c2_mq_consume(struct c2_mq *q);
+extern void c2_mq_free(struct c2_mq *q);
+extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+                      u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
+extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+                          u8 *pool_start, u16 __iomem *peer, u32 type);
+
+#endif                         /* _C2_MQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c
new file mode 100644 (file)
index 0000000..00c7099
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include "c2.h"
+#include "c2_provider.h"
+
+int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
+{
+       u32 obj;
+       int ret = 0;
+
+       spin_lock(&c2dev->pd_table.lock);
+       obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
+                                c2dev->pd_table.last);
+       if (obj >= c2dev->pd_table.max)
+               obj = find_first_zero_bit(c2dev->pd_table.table,
+                                         c2dev->pd_table.max);
+       if (obj < c2dev->pd_table.max) {
+               pd->pd_id = obj;
+               __set_bit(obj, c2dev->pd_table.table);
+               c2dev->pd_table.last = obj+1;
+               if (c2dev->pd_table.last >= c2dev->pd_table.max)
+                       c2dev->pd_table.last = 0;
+       } else
+               ret = -ENOMEM;
+       spin_unlock(&c2dev->pd_table.lock);
+       return ret;
+}
+
+void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
+{
+       spin_lock(&c2dev->pd_table.lock);
+       __clear_bit(pd->pd_id, c2dev->pd_table.table);
+       spin_unlock(&c2dev->pd_table.lock);
+}
+
+int __devinit c2_init_pd_table(struct c2_dev *c2dev)
+{
+
+       c2dev->pd_table.last = 0;
+       c2dev->pd_table.max = c2dev->props.max_pd;
+       spin_lock_init(&c2dev->pd_table.lock);
+       c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
+                                       sizeof(long), GFP_KERNEL);
+       if (!c2dev->pd_table.table)
+               return -ENOMEM;
+       bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
+       return 0;
+}
+
+void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
+{
+       kfree(c2dev->pd_table.table);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
new file mode 100644 (file)
index 0000000..dd6af55
--- /dev/null
@@ -0,0 +1,870 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_arp.h>
+#include <linux/vmalloc.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include "c2.h"
+#include "c2_provider.h"
+#include "c2_user.h"
+
+static int c2_query_device(struct ib_device *ibdev,
+                          struct ib_device_attr *props)
+{
+       struct c2_dev *c2dev = to_c2dev(ibdev);
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       *props = c2dev->props;
+       return 0;
+}
+
+static int c2_query_port(struct ib_device *ibdev,
+                        u8 port, struct ib_port_attr *props)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       props->max_mtu = IB_MTU_4096;
+       props->lid = 0;
+       props->lmc = 0;
+       props->sm_lid = 0;
+       props->sm_sl = 0;
+       props->state = IB_PORT_ACTIVE;
+       props->phys_state = 0;
+       props->port_cap_flags =
+           IB_PORT_CM_SUP |
+           IB_PORT_REINIT_SUP |
+           IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+       props->gid_tbl_len = 1;
+       props->pkey_tbl_len = 1;
+       props->qkey_viol_cntr = 0;
+       props->active_width = 1;
+       props->active_speed = 1;
+
+       return 0;
+}
+
+static int c2_modify_port(struct ib_device *ibdev,
+                         u8 port, int port_modify_mask,
+                         struct ib_port_modify *props)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return 0;
+}
+
+static int c2_query_pkey(struct ib_device *ibdev,
+                        u8 port, u16 index, u16 * pkey)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       *pkey = 0;
+       return 0;
+}
+
+static int c2_query_gid(struct ib_device *ibdev, u8 port,
+                       int index, union ib_gid *gid)
+{
+       struct c2_dev *c2dev = to_c2dev(ibdev);
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+       memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
+
+       return 0;
+}
+
+/* Allocate the user context data structure. This keeps track
+ * of all objects associated with a particular user-mode client.
+ */
+static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
+                                            struct ib_udata *udata)
+{
+       struct c2_ucontext *context;
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       context = kmalloc(sizeof(*context), GFP_KERNEL);
+       if (!context)
+               return ERR_PTR(-ENOMEM);
+
+       return &context->ibucontext;
+}
+
+static int c2_dealloc_ucontext(struct ib_ucontext *context)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       kfree(context);
+       return 0;
+}
+
+static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return -ENOSYS;
+}
+
+static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
+                                struct ib_ucontext *context,
+                                struct ib_udata *udata)
+{
+       struct c2_pd *pd;
+       int err;
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+       if (!pd)
+               return ERR_PTR(-ENOMEM);
+
+       err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
+       if (err) {
+               kfree(pd);
+               return ERR_PTR(err);
+       }
+
+       if (context) {
+               if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
+                       c2_pd_free(to_c2dev(ibdev), pd);
+                       kfree(pd);
+                       return ERR_PTR(-EFAULT);
+               }
+       }
+
+       return &pd->ibpd;
+}
+
+static int c2_dealloc_pd(struct ib_pd *pd)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
+       kfree(pd);
+
+       return 0;
+}
+
+static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return ERR_PTR(-ENOSYS);
+}
+
+static int c2_ah_destroy(struct ib_ah *ah)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return -ENOSYS;
+}
+
+static void c2_add_ref(struct ib_qp *ibqp)
+{
+       struct c2_qp *qp;
+       BUG_ON(!ibqp);
+       qp = to_c2qp(ibqp);
+       atomic_inc(&qp->refcount);
+}
+
+static void c2_rem_ref(struct ib_qp *ibqp)
+{
+       struct c2_qp *qp;
+       BUG_ON(!ibqp);
+       qp = to_c2qp(ibqp);
+       if (atomic_dec_and_test(&qp->refcount))
+               wake_up(&qp->wait);
+}
+
+struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
+{
+       struct c2_dev* c2dev = to_c2dev(device);
+       struct c2_qp *qp;
+
+       qp = c2_find_qpn(c2dev, qpn);
+       pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
+               __FUNCTION__, qp, qpn, device,
+               (qp?atomic_read(&qp->refcount):0));
+
+       return (qp?&qp->ibqp:NULL);
+}
+
+static struct ib_qp *c2_create_qp(struct ib_pd *pd,
+                                 struct ib_qp_init_attr *init_attr,
+                                 struct ib_udata *udata)
+{
+       struct c2_qp *qp;
+       int err;
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       switch (init_attr->qp_type) {
+       case IB_QPT_RC:
+               qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+               if (!qp) {
+                       pr_debug("%s: Unable to allocate QP\n", __FUNCTION__);
+                       return ERR_PTR(-ENOMEM);
+               }
+               spin_lock_init(&qp->lock);
+               if (pd->uobject) {
+                       /* userspace specific */
+               }
+
+               err = c2_alloc_qp(to_c2dev(pd->device),
+                                 to_c2pd(pd), init_attr, qp);
+
+               if (err && pd->uobject) {
+                       /* userspace specific */
+               }
+
+               break;
+       default:
+               pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__,
+                       init_attr->qp_type);
+               return ERR_PTR(-EINVAL);
+               break;
+       }
+
+       if (err) {
+               kfree(qp);
+               return ERR_PTR(err);
+       }
+
+       return &qp->ibqp;
+}
+
+static int c2_destroy_qp(struct ib_qp *ib_qp)
+{
+       struct c2_qp *qp = to_c2qp(ib_qp);
+
+       pr_debug("%s:%u qp=%p,qp->state=%d\n",
+               __FUNCTION__, __LINE__,ib_qp,qp->state);
+       c2_free_qp(to_c2dev(ib_qp->device), qp);
+       kfree(qp);
+       return 0;
+}
+
+static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries,
+                                 struct ib_ucontext *context,
+                                 struct ib_udata *udata)
+{
+       struct c2_cq *cq;
+       int err;
+
+       cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+       if (!cq) {
+               pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
+       if (err) {
+               pr_debug("%s: error initializing CQ\n", __FUNCTION__);
+               kfree(cq);
+               return ERR_PTR(err);
+       }
+
+       return &cq->ibcq;
+}
+
+static int c2_destroy_cq(struct ib_cq *ib_cq)
+{
+       struct c2_cq *cq = to_c2cq(ib_cq);
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       c2_free_cq(to_c2dev(ib_cq->device), cq);
+       kfree(cq);
+
+       return 0;
+}
+
+static inline u32 c2_convert_access(int acc)
+{
+       return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
+           (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
+           (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
+           C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
+}
+
+static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
+                                   struct ib_phys_buf *buffer_list,
+                                   int num_phys_buf, int acc, u64 * iova_start)
+{
+       struct c2_mr *mr;
+       u64 *page_list;
+       u32 total_len;
+       int err, i, j, k, page_shift, pbl_depth;
+
+       pbl_depth = 0;
+       total_len = 0;
+
+       page_shift = PAGE_SHIFT;
+       /*
+        * If there is only 1 buffer we assume this could
+        * be a map of all phy mem...use a 32k page_shift.
+        */
+       if (num_phys_buf == 1)
+               page_shift += 3;
+
+       for (i = 0; i < num_phys_buf; i++) {
+
+               if (buffer_list[i].addr & ~PAGE_MASK) {
+                       pr_debug("Unaligned Memory Buffer: 0x%x\n",
+                               (unsigned int) buffer_list[i].addr);
+                       return ERR_PTR(-EINVAL);
+               }
+
+               if (!buffer_list[i].size) {
+                       pr_debug("Invalid Buffer Size\n");
+                       return ERR_PTR(-EINVAL);
+               }
+
+               total_len += buffer_list[i].size;
+               pbl_depth += ALIGN(buffer_list[i].size,
+                                  (1 << page_shift)) >> page_shift;
+       }
+
+       page_list = vmalloc(sizeof(u64) * pbl_depth);
+       if (!page_list) {
+               pr_debug("couldn't vmalloc page_list of size %zd\n",
+                       (sizeof(u64) * pbl_depth));
+               return ERR_PTR(-ENOMEM);
+       }
+
+       for (i = 0, j = 0; i < num_phys_buf; i++) {
+
+               int naddrs;
+
+               naddrs = ALIGN(buffer_list[i].size,
+                              (1 << page_shift)) >> page_shift;
+               for (k = 0; k < naddrs; k++)
+                       page_list[j++] = (buffer_list[i].addr +
+                                                    (k << page_shift));
+       }
+
+       mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       mr->pd = to_c2pd(ib_pd);
+       pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
+               "*iova_start %llx, first pa %llx, last pa %llx\n",
+               __FUNCTION__, page_shift, pbl_depth, total_len,
+               *iova_start, page_list[0], page_list[pbl_depth-1]);
+       err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+                                        (1 << page_shift), pbl_depth,
+                                        total_len, 0, iova_start,
+                                        c2_convert_access(acc), mr);
+       vfree(page_list);
+       if (err) {
+               kfree(mr);
+               return ERR_PTR(err);
+       }
+
+       return &mr->ibmr;
+}
+
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
+{
+       struct ib_phys_buf bl;
+       u64 kva = 0;
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       /* AMSO1100 limit */
+       bl.size = 0xffffffff;
+       bl.addr = 0;
+       return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
+}
+
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+                                   int acc, struct ib_udata *udata)
+{
+       u64 *pages;
+       u64 kva = 0;
+       int shift, n, len;
+       int i, j, k;
+       int err = 0;
+       struct ib_umem_chunk *chunk;
+       struct c2_pd *c2pd = to_c2pd(pd);
+       struct c2_mr *c2mr;
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       shift = ffs(region->page_size) - 1;
+
+       c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
+       if (!c2mr)
+               return ERR_PTR(-ENOMEM);
+       c2mr->pd = c2pd;
+
+       n = 0;
+       list_for_each_entry(chunk, &region->chunk_list, list)
+               n += chunk->nents;
+
+       pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+       if (!pages) {
+               err = -ENOMEM;
+               goto err;
+       }
+
+       i = 0;
+       list_for_each_entry(chunk, &region->chunk_list, list) {
+               for (j = 0; j < chunk->nmap; ++j) {
+                       len = sg_dma_len(&chunk->page_list[j]) >> shift;
+                       for (k = 0; k < len; ++k) {
+                               pages[i++] =
+                                       sg_dma_address(&chunk->page_list[j]) +
+                                       (region->page_size * k);
+                       }
+               }
+       }
+
+       kva = (u64)region->virt_base;
+       err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
+                                        pages,
+                                        region->page_size,
+                                        i,
+                                        region->length,
+                                        region->offset,
+                                        &kva,
+                                        c2_convert_access(acc),
+                                        c2mr);
+       kfree(pages);
+       if (err) {
+               kfree(c2mr);
+               return ERR_PTR(err);
+       }
+       return &c2mr->ibmr;
+
+err:
+       kfree(c2mr);
+       return ERR_PTR(err);
+}
+
+static int c2_dereg_mr(struct ib_mr *ib_mr)
+{
+       struct c2_mr *mr = to_c2mr(ib_mr);
+       int err;
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
+       if (err)
+               pr_debug("c2_stag_dealloc failed: %d\n", err);
+       else
+               kfree(mr);
+
+       return err;
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+       struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return sprintf(buf, "%x\n", dev->props.hw_ver);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+       struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return sprintf(buf, "%x.%x.%x\n",
+                      (int) (dev->props.fw_ver >> 32),
+                      (int) (dev->props.fw_ver >> 16) & 0xffff,
+                      (int) (dev->props.fw_ver & 0xffff));
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return sprintf(buf, "AMSO1100\n");
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *c2_class_attributes[] = {
+       &class_device_attr_hw_rev,
+       &class_device_attr_fw_ver,
+       &class_device_attr_hca_type,
+       &class_device_attr_board_id
+};
+
+static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                       int attr_mask, struct ib_udata *udata)
+{
+       int err;
+
+       err =
+           c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
+                        attr_mask);
+
+       return err;
+}
+
+static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return -ENOSYS;
+}
+
+static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return -ENOSYS;
+}
+
+static int c2_process_mad(struct ib_device *ibdev,
+                         int mad_flags,
+                         u8 port_num,
+                         struct ib_wc *in_wc,
+                         struct ib_grh *in_grh,
+                         struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return -ENOSYS;
+}
+
+static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       /* Request a connection */
+       return c2_llp_connect(cm_id, iw_param);
+}
+
+static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       /* Accept the new connection */
+       return c2_llp_accept(cm_id, iw_param);
+}
+
+static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+       int err;
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       err = c2_llp_reject(cm_id, pdata, pdata_len);
+       return err;
+}
+
+static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+       int err;
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       err = c2_llp_service_create(cm_id, backlog);
+       pr_debug("%s:%u err=%d\n",
+               __FUNCTION__, __LINE__,
+               err);
+       return err;
+}
+
+static int c2_service_destroy(struct iw_cm_id *cm_id)
+{
+       int err;
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+       err = c2_llp_service_destroy(cm_id);
+
+       return err;
+}
+
+static int c2_pseudo_up(struct net_device *netdev)
+{
+       struct in_device *ind;
+       struct c2_dev *c2dev = netdev->priv;
+
+       ind = in_dev_get(netdev);
+       if (!ind)
+               return 0;
+
+       pr_debug("adding...\n");
+       for_ifa(ind) {
+#ifdef DEBUG
+               u8 *ip = (u8 *) & ifa->ifa_address;
+
+               pr_debug("%s: %d.%d.%d.%d\n",
+                      ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+               c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+       }
+       endfor_ifa(ind);
+       in_dev_put(ind);
+
+       return 0;
+}
+
+static int c2_pseudo_down(struct net_device *netdev)
+{
+       struct in_device *ind;
+       struct c2_dev *c2dev = netdev->priv;
+
+       ind = in_dev_get(netdev);
+       if (!ind)
+               return 0;
+
+       pr_debug("deleting...\n");
+       for_ifa(ind) {
+#ifdef DEBUG
+               u8 *ip = (u8 *) & ifa->ifa_address;
+
+               pr_debug("%s: %d.%d.%d.%d\n",
+                      ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+               c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+       }
+       endfor_ifa(ind);
+       in_dev_put(ind);
+
+       return 0;
+}
+
+static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+       kfree_skb(skb);
+       return NETDEV_TX_OK;
+}
+
+static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       int ret = 0;
+
+       if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+               return -EINVAL;
+
+       netdev->mtu = new_mtu;
+
+       /* TODO: Tell rnic about new rmda interface mtu */
+       return ret;
+}
+
+static void setup(struct net_device *netdev)
+{
+       SET_MODULE_OWNER(netdev);
+       netdev->open = c2_pseudo_up;
+       netdev->stop = c2_pseudo_down;
+       netdev->hard_start_xmit = c2_pseudo_xmit_frame;
+       netdev->get_stats = NULL;
+       netdev->tx_timeout = NULL;
+       netdev->set_mac_address = NULL;
+       netdev->change_mtu = c2_pseudo_change_mtu;
+       netdev->watchdog_timeo = 0;
+       netdev->type = ARPHRD_ETHER;
+       netdev->mtu = 1500;
+       netdev->hard_header_len = ETH_HLEN;
+       netdev->addr_len = ETH_ALEN;
+       netdev->tx_queue_len = 0;
+       netdev->flags |= IFF_NOARP;
+       return;
+}
+
+static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
+{
+       char name[IFNAMSIZ];
+       struct net_device *netdev;
+
+       /* change ethxxx to iwxxx */
+       strcpy(name, "iw");
+       strcat(name, &c2dev->netdev->name[3]);
+       netdev = alloc_netdev(sizeof(*netdev), name, setup);
+       if (!netdev) {
+               printk(KERN_ERR PFX "%s -  etherdev alloc failed",
+                       __FUNCTION__);
+               return NULL;
+       }
+
+       netdev->priv = c2dev;
+
+       SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+       memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
+
+       /* Print out the MAC address */
+       pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
+               netdev->name,
+               netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+               netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+
+#if 0
+       /* Disable network packets */
+       netif_stop_queue(netdev);
+#endif
+       return netdev;
+}
+
+int c2_register_device(struct c2_dev *dev)
+{
+       int ret;
+       int i;
+
+       /* Register pseudo network device */
+       dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
+       if (dev->pseudo_netdev) {
+               ret = register_netdev(dev->pseudo_netdev);
+               if (ret) {
+                       printk(KERN_ERR PFX
+                               "Unable to register netdev, ret = %d\n", ret);
+                       free_netdev(dev->pseudo_netdev);
+                       return ret;
+               }
+       }
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
+       dev->ibdev.owner = THIS_MODULE;
+       dev->ibdev.uverbs_cmd_mask =
+           (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+           (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+           (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+           (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+           (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+           (1ull << IB_USER_VERBS_CMD_REG_MR) |
+           (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+           (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+           (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+           (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+           (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+           (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+           (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+           (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+           (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+           (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+           (1ull << IB_USER_VERBS_CMD_POST_RECV);
+
+       dev->ibdev.node_type = RDMA_NODE_RNIC;
+       memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+       memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
+       dev->ibdev.phys_port_cnt = 1;
+       dev->ibdev.dma_device = &dev->pcidev->dev;
+       dev->ibdev.class_dev.dev = &dev->pcidev->dev;
+       dev->ibdev.query_device = c2_query_device;
+       dev->ibdev.query_port = c2_query_port;
+       dev->ibdev.modify_port = c2_modify_port;
+       dev->ibdev.query_pkey = c2_query_pkey;
+       dev->ibdev.query_gid = c2_query_gid;
+       dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
+       dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
+       dev->ibdev.mmap = c2_mmap_uar;
+       dev->ibdev.alloc_pd = c2_alloc_pd;
+       dev->ibdev.dealloc_pd = c2_dealloc_pd;
+       dev->ibdev.create_ah = c2_ah_create;
+       dev->ibdev.destroy_ah = c2_ah_destroy;
+       dev->ibdev.create_qp = c2_create_qp;
+       dev->ibdev.modify_qp = c2_modify_qp;
+       dev->ibdev.destroy_qp = c2_destroy_qp;
+       dev->ibdev.create_cq = c2_create_cq;
+       dev->ibdev.destroy_cq = c2_destroy_cq;
+       dev->ibdev.poll_cq = c2_poll_cq;
+       dev->ibdev.get_dma_mr = c2_get_dma_mr;
+       dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
+       dev->ibdev.reg_user_mr = c2_reg_user_mr;
+       dev->ibdev.dereg_mr = c2_dereg_mr;
+
+       dev->ibdev.alloc_fmr = NULL;
+       dev->ibdev.unmap_fmr = NULL;
+       dev->ibdev.dealloc_fmr = NULL;
+       dev->ibdev.map_phys_fmr = NULL;
+
+       dev->ibdev.attach_mcast = c2_multicast_attach;
+       dev->ibdev.detach_mcast = c2_multicast_detach;
+       dev->ibdev.process_mad = c2_process_mad;
+
+       dev->ibdev.req_notify_cq = c2_arm_cq;
+       dev->ibdev.post_send = c2_post_send;
+       dev->ibdev.post_recv = c2_post_receive;
+
+       dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+       dev->ibdev.iwcm->add_ref = c2_add_ref;
+       dev->ibdev.iwcm->rem_ref = c2_rem_ref;
+       dev->ibdev.iwcm->get_qp = c2_get_qp;
+       dev->ibdev.iwcm->connect = c2_connect;
+       dev->ibdev.iwcm->accept = c2_accept;
+       dev->ibdev.iwcm->reject = c2_reject;
+       dev->ibdev.iwcm->create_listen = c2_service_create;
+       dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
+
+       ret = ib_register_device(&dev->ibdev);
+       if (ret)
+               return ret;
+
+       for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) {
+               ret = class_device_create_file(&dev->ibdev.class_dev,
+                                              c2_class_attributes[i]);
+               if (ret) {
+                       unregister_netdev(dev->pseudo_netdev);
+                       free_netdev(dev->pseudo_netdev);
+                       ib_unregister_device(&dev->ibdev);
+                       return ret;
+               }
+       }
+
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       return 0;
+}
+
+void c2_unregister_device(struct c2_dev *dev)
+{
+       pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+       unregister_netdev(dev->pseudo_netdev);
+       free_netdev(dev->pseudo_netdev);
+       ib_unregister_device(&dev->ibdev);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
new file mode 100644 (file)
index 0000000..fc90622
--- /dev/null
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_PROVIDER_H
+#define C2_PROVIDER_H
+#include <linux/inetdevice.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+
+#include "c2_mq.h"
+#include <rdma/iw_cm.h>
+
+#define C2_MPT_FLAG_ATOMIC        (1 << 14)
+#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
+#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
+#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
+#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
+
+struct c2_buf_list {
+       void *buf;
+        DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+
+/* The user context keeps track of objects allocated for a
+ * particular user-mode client. */
+struct c2_ucontext {
+       struct ib_ucontext ibucontext;
+};
+
+struct c2_mtt;
+
+/* All objects associated with a PD are kept in the
+ * associated user context if present.
+ */
+struct c2_pd {
+       struct ib_pd ibpd;
+       u32 pd_id;
+};
+
+struct c2_mr {
+       struct ib_mr ibmr;
+       struct c2_pd *pd;
+};
+
+struct c2_av;
+
+enum c2_ah_type {
+       C2_AH_ON_HCA,
+       C2_AH_PCI_POOL,
+       C2_AH_KMALLOC
+};
+
+struct c2_ah {
+       struct ib_ah ibah;
+};
+
+struct c2_cq {
+       struct ib_cq ibcq;
+       spinlock_t lock;
+       atomic_t refcount;
+       int cqn;
+       int is_kernel;
+       wait_queue_head_t wait;
+
+       u32 adapter_handle;
+       struct c2_mq mq;
+};
+
+struct c2_wq {
+       spinlock_t lock;
+};
+struct iw_cm_id;
+struct c2_qp {
+       struct ib_qp ibqp;
+       struct iw_cm_id *cm_id;
+       spinlock_t lock;
+       atomic_t refcount;
+       wait_queue_head_t wait;
+       int qpn;
+
+       u32 adapter_handle;
+       u32 send_sgl_depth;
+       u32 recv_sgl_depth;
+       u32 rdma_write_sgl_depth;
+       u8 state;
+
+       struct c2_mq sq_mq;
+       struct c2_mq rq_mq;
+};
+
+struct c2_cr_query_attrs {
+       u32 local_addr;
+       u32 remote_addr;
+       u16 local_port;
+       u16 remote_port;
+};
+
+static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
+{
+       return container_of(ibpd, struct c2_pd, ibpd);
+}
+
+static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
+{
+       return container_of(ibucontext, struct c2_ucontext, ibucontext);
+}
+
+static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
+{
+       return container_of(ibmr, struct c2_mr, ibmr);
+}
+
+
+static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
+{
+       return container_of(ibah, struct c2_ah, ibah);
+}
+
+static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
+{
+       return container_of(ibcq, struct c2_cq, ibcq);
+}
+
+static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
+{
+       return container_of(ibqp, struct c2_qp, ibqp);
+}
+
+static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
+{
+       struct in_device *ind;
+       int ret = 0;
+
+       ind = in_dev_get(netdev);
+       if (!ind)
+               return 0;
+
+       for_ifa(ind) {
+               if (ifa->ifa_address == addr) {
+                       ret = 1;
+                       break;
+               }
+       }
+       endfor_ifa(ind);
+       in_dev_put(ind);
+       return ret;
+}
+#endif                         /* C2_PROVIDER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
new file mode 100644 (file)
index 0000000..1226113
--- /dev/null
@@ -0,0 +1,975 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_MAX_ORD_PER_QP 128
+#define C2_MAX_IRD_PER_QP 128
+
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+#define NO_SUPPORT -1
+static const u8 c2_opcode[] = {
+       [IB_WR_SEND] = C2_WR_TYPE_SEND,
+       [IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
+       [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
+       [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
+       [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
+       [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
+       [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
+};
+
+static int to_c2_state(enum ib_qp_state ib_state)
+{
+       switch (ib_state) {
+       case IB_QPS_RESET:
+               return C2_QP_STATE_IDLE;
+       case IB_QPS_RTS:
+               return C2_QP_STATE_RTS;
+       case IB_QPS_SQD:
+               return C2_QP_STATE_CLOSING;
+       case IB_QPS_SQE:
+               return C2_QP_STATE_CLOSING;
+       case IB_QPS_ERR:
+               return C2_QP_STATE_ERROR;
+       default:
+               return -1;
+       }
+}
+
+static int to_ib_state(enum c2_qp_state c2_state)
+{
+       switch (c2_state) {
+       case C2_QP_STATE_IDLE:
+               return IB_QPS_RESET;
+       case C2_QP_STATE_CONNECTING:
+               return IB_QPS_RTR;
+       case C2_QP_STATE_RTS:
+               return IB_QPS_RTS;
+       case C2_QP_STATE_CLOSING:
+               return IB_QPS_SQD;
+       case C2_QP_STATE_ERROR:
+               return IB_QPS_ERR;
+       case C2_QP_STATE_TERMINATE:
+               return IB_QPS_SQE;
+       default:
+               return -1;
+       }
+}
+
+static const char *to_ib_state_str(int ib_state)
+{
+       static const char *state_str[] = {
+               "IB_QPS_RESET",
+               "IB_QPS_INIT",
+               "IB_QPS_RTR",
+               "IB_QPS_RTS",
+               "IB_QPS_SQD",
+               "IB_QPS_SQE",
+               "IB_QPS_ERR"
+       };
+       if (ib_state < IB_QPS_RESET ||
+           ib_state > IB_QPS_ERR)
+               return "<invalid IB QP state>";
+
+       ib_state -= IB_QPS_RESET;
+       return state_str[ib_state];
+}
+
+void c2_set_qp_state(struct c2_qp *qp, int c2_state)
+{
+       int new_state = to_ib_state(c2_state);
+
+       pr_debug("%s: qp[%p] state modify %s --> %s\n",
+              __FUNCTION__,
+               qp,
+               to_ib_state_str(qp->state),
+               to_ib_state_str(new_state));
+       qp->state = new_state;
+}
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+                struct ib_qp_attr *attr, int attr_mask)
+{
+       struct c2wr_qp_modify_req wr;
+       struct c2wr_qp_modify_rep *reply;
+       struct c2_vq_req *vq_req;
+       unsigned long flags;
+       u8 next_state;
+       int err;
+
+       pr_debug("%s:%d qp=%p, %s --> %s\n",
+               __FUNCTION__, __LINE__,
+               qp,
+               to_ib_state_str(qp->state),
+               to_ib_state_str(attr->qp_state));
+
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req)
+               return -ENOMEM;
+
+       c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+       wr.hdr.context = (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.qp_handle = qp->adapter_handle;
+       wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+       wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+       wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+       wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+       if (attr_mask & IB_QP_STATE) {
+               /* Ensure the state is valid */
+               if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
+                       return -EINVAL;
+
+               wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
+
+               if (attr->qp_state == IB_QPS_ERR) {
+                       spin_lock_irqsave(&qp->lock, flags);
+                       if (qp->cm_id && qp->state == IB_QPS_RTS) {
+                               pr_debug("Generating CLOSE event for QP-->ERR, "
+                                       "qp=%p, cm_id=%p\n",qp,qp->cm_id);
+                               /* Generate an CLOSE event */
+                               vq_req->cm_id = qp->cm_id;
+                               vq_req->event = IW_CM_EVENT_CLOSE;
+                       }
+                       spin_unlock_irqrestore(&qp->lock, flags);
+               }
+               next_state =  attr->qp_state;
+
+       } else if (attr_mask & IB_QP_CUR_STATE) {
+
+               if (attr->cur_qp_state != IB_QPS_RTR &&
+                   attr->cur_qp_state != IB_QPS_RTS &&
+                   attr->cur_qp_state != IB_QPS_SQD &&
+                   attr->cur_qp_state != IB_QPS_SQE)
+                       return -EINVAL;
+               else
+                       wr.next_qp_state =
+                           cpu_to_be32(to_c2_state(attr->cur_qp_state));
+
+               next_state = attr->cur_qp_state;
+
+       } else {
+               err = 0;
+               goto bail0;
+       }
+
+       /* reference the request struct */
+       vq_req_get(c2dev, vq_req);
+
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail0;
+       }
+
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail0;
+
+       reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       err = c2_errno(reply);
+       if (!err)
+               qp->state = next_state;
+#ifdef DEBUG
+       else
+               pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err);
+#endif
+       /*
+        * If we're going to error and generating the event here, then
+        * we need to remove the reference because there will be no
+        * close event generated by the adapter
+       */
+       spin_lock_irqsave(&qp->lock, flags);
+       if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
+               qp->cm_id->rem_ref(qp->cm_id);
+               qp->cm_id = NULL;
+       }
+       spin_unlock_irqrestore(&qp->lock, flags);
+
+       vq_repbuf_free(c2dev, reply);
+      bail0:
+       vq_req_free(c2dev, vq_req);
+
+       pr_debug("%s:%d qp=%p, cur_state=%s\n",
+               __FUNCTION__, __LINE__,
+               qp,
+               to_ib_state_str(qp->state));
+       return err;
+}
+
+int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+                         int ord, int ird)
+{
+       struct c2wr_qp_modify_req wr;
+       struct c2wr_qp_modify_rep *reply;
+       struct c2_vq_req *vq_req;
+       int err;
+
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req)
+               return -ENOMEM;
+
+       c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+       wr.hdr.context = (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.qp_handle = qp->adapter_handle;
+       wr.ord = cpu_to_be32(ord);
+       wr.ird = cpu_to_be32(ird);
+       wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+       wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+       wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+       /* reference the request struct */
+       vq_req_get(c2dev, vq_req);
+
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail0;
+       }
+
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail0;
+
+       reply = (struct c2wr_qp_modify_rep *) (unsigned long)
+               vq_req->reply_msg;
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       err = c2_errno(reply);
+       vq_repbuf_free(c2dev, reply);
+      bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+       struct c2_vq_req *vq_req;
+       struct c2wr_qp_destroy_req wr;
+       struct c2wr_qp_destroy_rep *reply;
+       unsigned long flags;
+       int err;
+
+       /*
+        * Allocate a verb request message
+        */
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req) {
+               return -ENOMEM;
+       }
+
+       /*
+        * Initialize the WR
+        */
+       c2_wr_set_id(&wr, CCWR_QP_DESTROY);
+       wr.hdr.context = (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.qp_handle = qp->adapter_handle;
+
+       /*
+        * reference the request struct.  dereferenced in the int handler.
+        */
+       vq_req_get(c2dev, vq_req);
+
+       spin_lock_irqsave(&qp->lock, flags);
+       if (qp->cm_id && qp->state == IB_QPS_RTS) {
+               pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
+                       "qp=%p, cm_id=%p\n",qp,qp->cm_id);
+               /* Generate an CLOSE event */
+               vq_req->qp = qp;
+               vq_req->cm_id = qp->cm_id;
+               vq_req->event = IW_CM_EVENT_CLOSE;
+       }
+       spin_unlock_irqrestore(&qp->lock, flags);
+
+       /*
+        * Send WR to adapter
+        */
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail0;
+       }
+
+       /*
+        * Wait for reply from adapter
+        */
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err) {
+               goto bail0;
+       }
+
+       /*
+        * Process reply
+        */
+       reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       spin_lock_irqsave(&qp->lock, flags);
+       if (qp->cm_id) {
+               qp->cm_id->rem_ref(qp->cm_id);
+               qp->cm_id = NULL;
+       }
+       spin_unlock_irqrestore(&qp->lock, flags);
+
+       vq_repbuf_free(c2dev, reply);
+      bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+       int ret;
+
+        do {
+               spin_lock_irq(&c2dev->qp_table.lock);
+               ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
+                                       c2dev->qp_table.last++, &qp->qpn);
+               spin_unlock_irq(&c2dev->qp_table.lock);
+        } while ((ret == -EAGAIN) &&
+                idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
+       return ret;
+}
+
+static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
+{
+       spin_lock_irq(&c2dev->qp_table.lock);
+       idr_remove(&c2dev->qp_table.idr, qpn);
+       spin_unlock_irq(&c2dev->qp_table.lock);
+}
+
+struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
+{
+       unsigned long flags;
+       struct c2_qp *qp;
+
+       spin_lock_irqsave(&c2dev->qp_table.lock, flags);
+       qp = idr_find(&c2dev->qp_table.idr, qpn);
+       spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
+       return qp;
+}
+
+int c2_alloc_qp(struct c2_dev *c2dev,
+               struct c2_pd *pd,
+               struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
+{
+       struct c2wr_qp_create_req wr;
+       struct c2wr_qp_create_rep *reply;
+       struct c2_vq_req *vq_req;
+       struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
+       struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
+       unsigned long peer_pa;
+       u32 q_size, msg_size, mmap_size;
+       void __iomem *mmap;
+       int err;
+
+       err = c2_alloc_qpn(c2dev, qp);
+       if (err)
+               return err;
+       qp->ibqp.qp_num = qp->qpn;
+       qp->ibqp.qp_type = IB_QPT_RC;
+
+       /* Allocate the SQ and RQ shared pointers */
+       qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+                                        &qp->sq_mq.shared_dma, GFP_KERNEL);
+       if (!qp->sq_mq.shared) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+                                        &qp->rq_mq.shared_dma, GFP_KERNEL);
+       if (!qp->rq_mq.shared) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+
+       /* Allocate the verbs request */
+       vq_req = vq_req_alloc(c2dev);
+       if (vq_req == NULL) {
+               err = -ENOMEM;
+               goto bail2;
+       }
+
+       /* Initialize the work request */
+       memset(&wr, 0, sizeof(wr));
+       c2_wr_set_id(&wr, CCWR_QP_CREATE);
+       wr.hdr.context = (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+       wr.sq_cq_handle = send_cq->adapter_handle;
+       wr.rq_cq_handle = recv_cq->adapter_handle;
+       wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
+       wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
+       wr.srq_handle = 0;
+       wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
+                              QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
+       wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+       wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
+       wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+       wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
+       wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
+       wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
+       wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
+       wr.pd_id = pd->pd_id;
+       wr.user_context = (unsigned long) qp;
+
+       vq_req_get(c2dev, vq_req);
+
+       /* Send the WR to the adapter */
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail3;
+       }
+
+       /* Wait for the verb reply  */
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err) {
+               goto bail3;
+       }
+
+       /* Process the reply */
+       reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail3;
+       }
+
+       if ((err = c2_wr_get_result(reply)) != 0) {
+               goto bail4;
+       }
+
+       /* Fill in the kernel QP struct */
+       atomic_set(&qp->refcount, 1);
+       qp->adapter_handle = reply->qp_handle;
+       qp->state = IB_QPS_RESET;
+       qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
+       qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
+       qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
+
+       /* Initialize the SQ MQ */
+       q_size = be32_to_cpu(reply->sq_depth);
+       msg_size = be32_to_cpu(reply->sq_msg_size);
+       peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
+       mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+       mmap = ioremap_nocache(peer_pa, mmap_size);
+       if (!mmap) {
+               err = -ENOMEM;
+               goto bail5;
+       }
+
+       c2_mq_req_init(&qp->sq_mq,
+                      be32_to_cpu(reply->sq_mq_index),
+                      q_size,
+                      msg_size,
+                      mmap + sizeof(struct c2_mq_shared),      /* pool start */
+                      mmap,                            /* peer */
+                      C2_MQ_ADAPTER_TARGET);
+
+       /* Initialize the RQ mq */
+       q_size = be32_to_cpu(reply->rq_depth);
+       msg_size = be32_to_cpu(reply->rq_msg_size);
+       peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
+       mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+       mmap = ioremap_nocache(peer_pa, mmap_size);
+       if (!mmap) {
+               err = -ENOMEM;
+               goto bail6;
+       }
+
+       c2_mq_req_init(&qp->rq_mq,
+                      be32_to_cpu(reply->rq_mq_index),
+                      q_size,
+                      msg_size,
+                      mmap + sizeof(struct c2_mq_shared),      /* pool start */
+                      mmap,                            /* peer */
+                      C2_MQ_ADAPTER_TARGET);
+
+       vq_repbuf_free(c2dev, reply);
+       vq_req_free(c2dev, vq_req);
+
+       return 0;
+
+      bail6:
+       iounmap(qp->sq_mq.peer);
+      bail5:
+       destroy_qp(c2dev, qp);
+      bail4:
+       vq_repbuf_free(c2dev, reply);
+      bail3:
+       vq_req_free(c2dev, vq_req);
+      bail2:
+       c2_free_mqsp(qp->rq_mq.shared);
+      bail1:
+       c2_free_mqsp(qp->sq_mq.shared);
+      bail0:
+       c2_free_qpn(c2dev, qp->qpn);
+       return err;
+}
+
+void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+       struct c2_cq *send_cq;
+       struct c2_cq *recv_cq;
+
+       send_cq = to_c2cq(qp->ibqp.send_cq);
+       recv_cq = to_c2cq(qp->ibqp.recv_cq);
+
+       /*
+        * Lock CQs here, so that CQ polling code can do QP lookup
+        * without taking a lock.
+        */
+       spin_lock_irq(&send_cq->lock);
+       if (send_cq != recv_cq)
+               spin_lock(&recv_cq->lock);
+
+       c2_free_qpn(c2dev, qp->qpn);
+
+       if (send_cq != recv_cq)
+               spin_unlock(&recv_cq->lock);
+       spin_unlock_irq(&send_cq->lock);
+
+       /*
+        * Destory qp in the rnic...
+        */
+       destroy_qp(c2dev, qp);
+
+       /*
+        * Mark any unreaped CQEs as null and void.
+        */
+       c2_cq_clean(c2dev, qp, send_cq->cqn);
+       if (send_cq != recv_cq)
+               c2_cq_clean(c2dev, qp, recv_cq->cqn);
+       /*
+        * Unmap the MQs and return the shared pointers
+        * to the message pool.
+        */
+       iounmap(qp->sq_mq.peer);
+       iounmap(qp->rq_mq.peer);
+       c2_free_mqsp(qp->sq_mq.shared);
+       c2_free_mqsp(qp->rq_mq.shared);
+
+       atomic_dec(&qp->refcount);
+       wait_event(qp->wait, !atomic_read(&qp->refcount));
+}
+
+/*
+ * Function: move_sgl
+ *
+ * Description:
+ * Move an SGL from the user's work request struct into a CCIL Work Request
+ * message, swapping to WR byte order and ensure the total length doesn't
+ * overflow.
+ *
+ * IN:
+ * dst         - ptr to CCIL Work Request message SGL memory.
+ * src         - ptr to the consumers SGL memory.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int
+move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
+        u8 * actual_count)
+{
+       u32 tot = 0;            /* running total */
+       u8 acount = 0;          /* running total non-0 len sge's */
+
+       while (count > 0) {
+               /*
+                * If the addition of this SGE causes the
+                * total SGL length to exceed 2^32-1, then
+                * fail-n-bail.
+                *
+                * If the current total plus the next element length
+                * wraps, then it will go negative and be less than the
+                * current total...
+                */
+               if ((tot + src->length) < tot) {
+                       return -EINVAL;
+               }
+               /*
+                * Bug: 1456 (as well as 1498 & 1643)
+                * Skip over any sge's supplied with len=0
+                */
+               if (src->length) {
+                       tot += src->length;
+                       dst->stag = cpu_to_be32(src->lkey);
+                       dst->to = cpu_to_be64(src->addr);
+                       dst->length = cpu_to_be32(src->length);
+                       dst++;
+                       acount++;
+               }
+               src++;
+               count--;
+       }
+
+       if (acount == 0) {
+               /*
+                * Bug: 1476 (as well as 1498, 1456 and 1643)
+                * Setup the SGL in the WR to make it easier for the RNIC.
+                * This way, the FW doesn't have to deal with special cases.
+                * Setting length=0 should be sufficient.
+                */
+               dst->stag = 0;
+               dst->to = 0;
+               dst->length = 0;
+       }
+
+       *p_len = tot;
+       *actual_count = acount;
+       return 0;
+}
+
+/*
+ * Function: c2_activity (private function)
+ *
+ * Description:
+ * Post an mq index to the host->adapter activity fifo.
+ *
+ * IN:
+ * c2dev       - ptr to c2dev structure
+ * mq_index    - mq index to post
+ * shared      - value most recently written to shared
+ *
+ * OUT:
+ *
+ * Return:
+ * none
+ */
+static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
+{
+       /*
+        * First read the register to see if the FIFO is full, and if so,
+        * spin until it's not.  This isn't perfect -- there is no
+        * synchronization among the clients of the register, but in
+        * practice it prevents multiple CPU from hammering the bus
+        * with PCI RETRY. Note that when this does happen, the card
+        * cannot get on the bus and the card and system hang in a
+        * deadlock -- thus the need for this code. [TOT]
+        */
+       while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               schedule_timeout(0);
+       }
+
+       __raw_writel(C2_HINT_MAKE(mq_index, shared),
+                    c2dev->regs + PCI_BAR0_ADAPTER_HINT);
+}
+
+/*
+ * Function: qp_wr_post
+ *
+ * Description:
+ * This in-line function allocates a MQ msg, then moves the host-copy of
+ * the completed WR into msg.  Then it posts the message.
+ *
+ * IN:
+ * q           - ptr to user MQ.
+ * wr          - ptr to host-copy of the WR.
+ * qp          - ptr to user qp
+ * size                - Number of bytes to post.  Assumed to be divisible by 4.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
+{
+       union c2wr *msg;
+
+       msg = c2_mq_alloc(q);
+       if (msg == NULL) {
+               return -EINVAL;
+       }
+#ifdef CCMSGMAGIC
+       ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+
+       /*
+        * Since all header fields in the WR are the same as the
+        * CQE, set the following so the adapter need not.
+        */
+       c2_wr_set_result(wr, CCERR_PENDING);
+
+       /*
+        * Copy the wr down to the adapter
+        */
+       memcpy((void *) msg, (void *) wr, size);
+
+       c2_mq_produce(q);
+       return 0;
+}
+
+
+int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+                struct ib_send_wr **bad_wr)
+{
+       struct c2_dev *c2dev = to_c2dev(ibqp->device);
+       struct c2_qp *qp = to_c2qp(ibqp);
+       union c2wr wr;
+       int err = 0;
+
+       u32 flags;
+       u32 tot_len;
+       u8 actual_sge_count;
+       u32 msg_size;
+
+       if (qp->state > IB_QPS_RTS)
+               return -EINVAL;
+
+       while (ib_wr) {
+
+               flags = 0;
+               wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+               if (ib_wr->send_flags & IB_SEND_SIGNALED) {
+                       flags |= SQ_SIGNALED;
+               }
+
+               switch (ib_wr->opcode) {
+               case IB_WR_SEND:
+                       if (ib_wr->send_flags & IB_SEND_SOLICITED) {
+                               c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
+                               msg_size = sizeof(struct c2wr_send_req);
+                       } else {
+                               c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
+                               msg_size = sizeof(struct c2wr_send_req);
+                       }
+
+                       wr.sqwr.send.remote_stag = 0;
+                       msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge;
+                       if (ib_wr->num_sge > qp->send_sgl_depth) {
+                               err = -EINVAL;
+                               break;
+                       }
+                       if (ib_wr->send_flags & IB_SEND_FENCE) {
+                               flags |= SQ_READ_FENCE;
+                       }
+                       err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
+                                      ib_wr->sg_list,
+                                      ib_wr->num_sge,
+                                      &tot_len, &actual_sge_count);
+                       wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
+                       c2_wr_set_sge_count(&wr, actual_sge_count);
+                       break;
+               case IB_WR_RDMA_WRITE:
+                       c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
+                       msg_size = sizeof(struct c2wr_rdma_write_req) +
+                           (sizeof(struct c2_data_addr) * ib_wr->num_sge);
+                       if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
+                               err = -EINVAL;
+                               break;
+                       }
+                       if (ib_wr->send_flags & IB_SEND_FENCE) {
+                               flags |= SQ_READ_FENCE;
+                       }
+                       wr.sqwr.rdma_write.remote_stag =
+                           cpu_to_be32(ib_wr->wr.rdma.rkey);
+                       wr.sqwr.rdma_write.remote_to =
+                           cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+                       err = move_sgl((struct c2_data_addr *)
+                                      & (wr.sqwr.rdma_write.data),
+                                      ib_wr->sg_list,
+                                      ib_wr->num_sge,
+                                      &tot_len, &actual_sge_count);
+                       wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
+                       c2_wr_set_sge_count(&wr, actual_sge_count);
+                       break;
+               case IB_WR_RDMA_READ:
+                       c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
+                       msg_size = sizeof(struct c2wr_rdma_read_req);
+
+                       /* IWarp only suppots 1 sge for RDMA reads */
+                       if (ib_wr->num_sge > 1) {
+                               err = -EINVAL;
+                               break;
+                       }
+
+                       /*
+                        * Move the local and remote stag/to/len into the WR.
+                        */
+                       wr.sqwr.rdma_read.local_stag =
+                           cpu_to_be32(ib_wr->sg_list->lkey);
+                       wr.sqwr.rdma_read.local_to =
+                           cpu_to_be64(ib_wr->sg_list->addr);
+                       wr.sqwr.rdma_read.remote_stag =
+                           cpu_to_be32(ib_wr->wr.rdma.rkey);
+                       wr.sqwr.rdma_read.remote_to =
+                           cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+                       wr.sqwr.rdma_read.length =
+                           cpu_to_be32(ib_wr->sg_list->length);
+                       break;
+               default:
+                       /* error */
+                       msg_size = 0;
+                       err = -EINVAL;
+                       break;
+               }
+
+               /*
+                * If we had an error on the last wr build, then
+                * break out.  Possible errors include bogus WR
+                * type, and a bogus SGL length...
+                */
+               if (err) {
+                       break;
+               }
+
+               /*
+                * Store flags
+                */
+               c2_wr_set_flags(&wr, flags);
+
+               /*
+                * Post the puppy!
+                */
+               err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
+               if (err) {
+                       break;
+               }
+
+               /*
+                * Enqueue mq index to activity FIFO.
+                */
+               c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
+
+               ib_wr = ib_wr->next;
+       }
+
+       if (err)
+               *bad_wr = ib_wr;
+       return err;
+}
+
+int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+                   struct ib_recv_wr **bad_wr)
+{
+       struct c2_dev *c2dev = to_c2dev(ibqp->device);
+       struct c2_qp *qp = to_c2qp(ibqp);
+       union c2wr wr;
+       int err = 0;
+
+       if (qp->state > IB_QPS_RTS)
+               return -EINVAL;
+
+       /*
+        * Try and post each work request
+        */
+       while (ib_wr) {
+               u32 tot_len;
+               u8 actual_sge_count;
+
+               if (ib_wr->num_sge > qp->recv_sgl_depth) {
+                       err = -EINVAL;
+                       break;
+               }
+
+               /*
+                * Create local host-copy of the WR
+                */
+               wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+               c2_wr_set_id(&wr, CCWR_RECV);
+               c2_wr_set_flags(&wr, 0);
+
+               /* sge_count is limited to eight bits. */
+               BUG_ON(ib_wr->num_sge >= 256);
+               err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
+                              ib_wr->sg_list,
+                              ib_wr->num_sge, &tot_len, &actual_sge_count);
+               c2_wr_set_sge_count(&wr, actual_sge_count);
+
+               /*
+                * If we had an error on the last wr build, then
+                * break out.  Possible errors include bogus WR
+                * type, and a bogus SGL length...
+                */
+               if (err) {
+                       break;
+               }
+
+               err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
+               if (err) {
+                       break;
+               }
+
+               /*
+                * Enqueue mq index to activity FIFO
+                */
+               c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
+
+               ib_wr = ib_wr->next;
+       }
+
+       if (err)
+               *bad_wr = ib_wr;
+       return err;
+}
+
+void __devinit c2_init_qp_table(struct c2_dev *c2dev)
+{
+       spin_lock_init(&c2dev->qp_table.lock);
+       idr_init(&c2dev->qp_table.idr);
+}
+
+void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
+{
+       idr_destroy(&c2dev->qp_table.idr);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
new file mode 100644 (file)
index 0000000..f49a32b
--- /dev/null
@@ -0,0 +1,664 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/inet.h>
+#include <linux/vmalloc.h>
+
+#include <linux/route.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_vq.h"
+
+/* Device capabilities */
+#define C2_MIN_PAGESIZE  1024
+
+#define C2_MAX_MRS       32768
+#define C2_MAX_QPS       16000
+#define C2_MAX_WQE_SZ    256
+#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
+#define C2_MAX_SGES      4
+#define C2_MAX_SGE_RD    1
+#define C2_MAX_CQS       32768
+#define C2_MAX_CQES      4096
+#define C2_MAX_PDS       16384
+
+/*
+ * Send the adapter INIT message to the amso1100
+ */
+static int c2_adapter_init(struct c2_dev *c2dev)
+{
+       struct c2wr_init_req wr;
+       int err;
+
+       memset(&wr, 0, sizeof(wr));
+       c2_wr_set_id(&wr, CCWR_INIT);
+       wr.hdr.context = 0;
+       wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
+       wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
+       wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
+       wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
+       wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
+       wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
+
+       /* Post the init message */
+       err = vq_send_wr(c2dev, (union c2wr *) & wr);
+
+       return err;
+}
+
+/*
+ * Send the adapter TERM message to the amso1100
+ */
+static void c2_adapter_term(struct c2_dev *c2dev)
+{
+       struct c2wr_init_req wr;
+
+       memset(&wr, 0, sizeof(wr));
+       c2_wr_set_id(&wr, CCWR_TERM);
+       wr.hdr.context = 0;
+
+       /* Post the init message */
+       vq_send_wr(c2dev, (union c2wr *) & wr);
+       c2dev->init = 0;
+
+       return;
+}
+
+/*
+ * Query the adapter
+ */
+static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
+{
+       struct c2_vq_req *vq_req;
+       struct c2wr_rnic_query_req wr;
+       struct c2wr_rnic_query_rep *reply;
+       int err;
+
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req)
+               return -ENOMEM;
+
+       c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
+       wr.hdr.context = (unsigned long) vq_req;
+       wr.rnic_handle = c2dev->adapter_handle;
+
+       vq_req_get(c2dev, vq_req);
+
+       err = vq_send_wr(c2dev, (union c2wr *) &wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail1;
+       }
+
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail1;
+
+       reply =
+           (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
+       if (!reply)
+               err = -ENOMEM;
+
+       err = c2_errno(reply);
+       if (err)
+               goto bail2;
+
+       props->fw_ver =
+               ((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
+               ((be32_to_cpu(reply->fw_ver_minor) && 0xFFFF) << 16) |
+               (be32_to_cpu(reply->fw_ver_patch) && 0xFFFF);
+       memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
+       props->max_mr_size         = 0xFFFFFFFF;
+       props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
+       props->vendor_id           = be32_to_cpu(reply->vendor_id);
+       props->vendor_part_id      = be32_to_cpu(reply->part_number);
+       props->hw_ver              = be32_to_cpu(reply->hw_version);
+       props->max_qp              = be32_to_cpu(reply->max_qps);
+       props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
+       props->device_cap_flags    = c2dev->device_cap_flags;
+       props->max_sge             = C2_MAX_SGES;
+       props->max_sge_rd          = C2_MAX_SGE_RD;
+       props->max_cq              = be32_to_cpu(reply->max_cqs);
+       props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
+       props->max_mr              = be32_to_cpu(reply->max_mrs);
+       props->max_pd              = be32_to_cpu(reply->max_pds);
+       props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
+       props->max_ee_rd_atom      = 0;
+       props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
+       props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
+       props->max_ee_init_rd_atom = 0;
+       props->atomic_cap          = IB_ATOMIC_NONE;
+       props->max_ee              = 0;
+       props->max_rdd             = 0;
+       props->max_mw              = be32_to_cpu(reply->max_mws);
+       props->max_raw_ipv6_qp     = 0;
+       props->max_raw_ethy_qp     = 0;
+       props->max_mcast_grp       = 0;
+       props->max_mcast_qp_attach = 0;
+       props->max_total_mcast_qp_attach = 0;
+       props->max_ah              = 0;
+       props->max_fmr             = 0;
+       props->max_map_per_fmr     = 0;
+       props->max_srq             = 0;
+       props->max_srq_wr          = 0;
+       props->max_srq_sge         = 0;
+       props->max_pkeys           = 0;
+       props->local_ca_ack_delay  = 0;
+
+ bail2:
+       vq_repbuf_free(c2dev, reply);
+
+ bail1:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+/*
+ * Add an IP address to the RNIC interface
+ */
+int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+       struct c2_vq_req *vq_req;
+       struct c2wr_rnic_setconfig_req *wr;
+       struct c2wr_rnic_setconfig_rep *reply;
+       struct c2_netaddr netaddr;
+       int err, len;
+
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req)
+               return -ENOMEM;
+
+       len = sizeof(struct c2_netaddr);
+       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+       if (!wr) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+       wr->hdr.context = (unsigned long) vq_req;
+       wr->rnic_handle = c2dev->adapter_handle;
+       wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
+
+       netaddr.ip_addr = inaddr;
+       netaddr.netmask = inmask;
+       netaddr.mtu = 0;
+
+       memcpy(wr->data, &netaddr, len);
+
+       vq_req_get(c2dev, vq_req);
+
+       err = vq_send_wr(c2dev, (union c2wr *) wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail1;
+       }
+
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail1;
+
+       reply =
+           (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+
+       err = c2_errno(reply);
+       vq_repbuf_free(c2dev, reply);
+
+      bail1:
+       kfree(wr);
+      bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+/*
+ * Delete an IP address from the RNIC interface
+ */
+int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+       struct c2_vq_req *vq_req;
+       struct c2wr_rnic_setconfig_req *wr;
+       struct c2wr_rnic_setconfig_rep *reply;
+       struct c2_netaddr netaddr;
+       int err, len;
+
+       vq_req = vq_req_alloc(c2dev);
+       if (!vq_req)
+               return -ENOMEM;
+
+       len = sizeof(struct c2_netaddr);
+       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+       if (!wr) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+       wr->hdr.context = (unsigned long) vq_req;
+       wr->rnic_handle = c2dev->adapter_handle;
+       wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
+
+       netaddr.ip_addr = inaddr;
+       netaddr.netmask = inmask;
+       netaddr.mtu = 0;
+
+       memcpy(wr->data, &netaddr, len);
+
+       vq_req_get(c2dev, vq_req);
+
+       err = vq_send_wr(c2dev, (union c2wr *) wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail1;
+       }
+
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err)
+               goto bail1;
+
+       reply =
+           (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+
+       err = c2_errno(reply);
+       vq_repbuf_free(c2dev, reply);
+
+      bail1:
+       kfree(wr);
+      bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+/*
+ * Open a single RNIC instance to use with all
+ * low level openib calls
+ */
+static int c2_rnic_open(struct c2_dev *c2dev)
+{
+       struct c2_vq_req *vq_req;
+       union c2wr wr;
+       struct c2wr_rnic_open_rep *reply;
+       int err;
+
+       vq_req = vq_req_alloc(c2dev);
+       if (vq_req == NULL) {
+               return -ENOMEM;
+       }
+
+       memset(&wr, 0, sizeof(wr));
+       c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
+       wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
+       wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
+       wr.rnic_open.req.port_num = cpu_to_be16(0);
+       wr.rnic_open.req.user_context = (unsigned long) c2dev;
+
+       vq_req_get(c2dev, vq_req);
+
+       err = vq_send_wr(c2dev, &wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail0;
+       }
+
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err) {
+               goto bail0;
+       }
+
+       reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       if ((err = c2_errno(reply)) != 0) {
+               goto bail1;
+       }
+
+       c2dev->adapter_handle = reply->rnic_handle;
+
+      bail1:
+       vq_repbuf_free(c2dev, reply);
+      bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+/*
+ * Close the RNIC instance
+ */
+static int c2_rnic_close(struct c2_dev *c2dev)
+{
+       struct c2_vq_req *vq_req;
+       union c2wr wr;
+       struct c2wr_rnic_close_rep *reply;
+       int err;
+
+       vq_req = vq_req_alloc(c2dev);
+       if (vq_req == NULL) {
+               return -ENOMEM;
+       }
+
+       memset(&wr, 0, sizeof(wr));
+       c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
+       wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
+       wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
+
+       vq_req_get(c2dev, vq_req);
+
+       err = vq_send_wr(c2dev, &wr);
+       if (err) {
+               vq_req_put(c2dev, vq_req);
+               goto bail0;
+       }
+
+       err = vq_wait_for_reply(c2dev, vq_req);
+       if (err) {
+               goto bail0;
+       }
+
+       reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
+       if (!reply) {
+               err = -ENOMEM;
+               goto bail0;
+       }
+
+       if ((err = c2_errno(reply)) != 0) {
+               goto bail1;
+       }
+
+       c2dev->adapter_handle = 0;
+
+      bail1:
+       vq_repbuf_free(c2dev, reply);
+      bail0:
+       vq_req_free(c2dev, vq_req);
+       return err;
+}
+
+/*
+ * Called by c2_probe to initialize the RNIC. This principally
+ * involves initalizing the various limits and resouce pools that
+ * comprise the RNIC instance.
+ */
+int c2_rnic_init(struct c2_dev *c2dev)
+{
+       int err;
+       u32 qsize, msgsize;
+       void *q1_pages;
+       void *q2_pages;
+       void __iomem *mmio_regs;
+
+       /* Device capabilities */
+       c2dev->device_cap_flags =
+           (IB_DEVICE_RESIZE_MAX_WR |
+            IB_DEVICE_CURR_QP_STATE_MOD |
+            IB_DEVICE_SYS_IMAGE_GUID |
+            IB_DEVICE_ZERO_STAG |
+            IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+
+       /* Allocate the qptr_array */
+       c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
+       if (!c2dev->qptr_array) {
+               return -ENOMEM;
+       }
+
+       /* Inialize the qptr_array */
+       memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
+       c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
+       c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
+       c2dev->qptr_array[2] = (void *) &c2dev->aeq;
+
+       /* Initialize data structures */
+       init_waitqueue_head(&c2dev->req_vq_wo);
+       spin_lock_init(&c2dev->vqlock);
+       spin_lock_init(&c2dev->lock);
+
+       /* Allocate MQ shared pointer pool for kernel clients. User
+        * mode client pools are hung off the user context
+        */
+       err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
+       if (err) {
+               goto bail0;
+       }
+
+       /* Allocate shared pointers for Q0, Q1, and Q2 from
+        * the shared pointer pool.
+        */
+
+       c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+                                            &c2dev->hint_count_dma,
+                                            GFP_KERNEL);
+       c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+                                            &c2dev->req_vq.shared_dma,
+                                            GFP_KERNEL);
+       c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+                                            &c2dev->rep_vq.shared_dma,
+                                            GFP_KERNEL);
+       c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+                                         &c2dev->aeq.shared_dma, GFP_KERNEL);
+       if (!c2dev->hint_count || !c2dev->req_vq.shared ||
+           !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+
+       mmio_regs = c2dev->kva;
+       /* Initialize the Verbs Request Queue */
+       c2_mq_req_init(&c2dev->req_vq, 0,
+                      be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)),
+                      be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
+                      mmio_regs +
+                      be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
+                      mmio_regs +
+                      be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)),
+                      C2_MQ_ADAPTER_TARGET);
+
+       /* Initialize the Verbs Reply Queue */
+       qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE));
+       msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
+       q1_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+       if (!q1_pages) {
+               err = -ENOMEM;
+               goto bail1;
+       }
+       c2dev->rep_vq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+                                               (void *)q1_pages, qsize * msgsize,
+                                               DMA_FROM_DEVICE);
+       pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
+       pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
+                (u64)c2dev->rep_vq.host_dma);
+       c2_mq_rep_init(&c2dev->rep_vq,
+                  1,
+                  qsize,
+                  msgsize,
+                  q1_pages,
+                  mmio_regs +
+                  be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)),
+                  C2_MQ_HOST_TARGET);
+
+       /* Initialize the Asynchronus Event Queue */
+       qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE));
+       msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
+       q2_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+       if (!q2_pages) {
+               err = -ENOMEM;
+               goto bail2;
+       }
+       c2dev->aeq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+                                               (void *)q2_pages, qsize * msgsize,
+                                               DMA_FROM_DEVICE);
+       pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
+       pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages,
+                (u64)c2dev->rep_vq.host_dma);
+       c2_mq_rep_init(&c2dev->aeq,
+                      2,
+                      qsize,
+                      msgsize,
+                      q2_pages,
+                      mmio_regs +
+                      be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)),
+                      C2_MQ_HOST_TARGET);
+
+       /* Initialize the verbs request allocator */
+       err = vq_init(c2dev);
+       if (err)
+               goto bail3;
+
+       /* Enable interrupts on the adapter */
+       writel(0, c2dev->regs + C2_IDIS);
+
+       /* create the WR init message */
+       err = c2_adapter_init(c2dev);
+       if (err)
+               goto bail4;
+       c2dev->init++;
+
+       /* open an adapter instance */
+       err = c2_rnic_open(c2dev);
+       if (err)
+               goto bail4;
+
+       /* Initialize cached the adapter limits */
+       if (c2_rnic_query(c2dev, &c2dev->props))
+               goto bail5;
+
+       /* Initialize the PD pool */
+       err = c2_init_pd_table(c2dev);
+       if (err)
+               goto bail5;
+
+       /* Initialize the QP pool */
+       c2_init_qp_table(c2dev);
+       return 0;
+
+      bail5:
+       c2_rnic_close(c2dev);
+      bail4:
+       vq_term(c2dev);
+      bail3:
+       dma_unmap_single(c2dev->ibdev.dma_device,
+                        pci_unmap_addr(&c2dev->aeq, mapping),
+                        c2dev->aeq.q_size * c2dev->aeq.msg_size,
+                        DMA_FROM_DEVICE);
+       kfree(q2_pages);
+      bail2:
+       dma_unmap_single(c2dev->ibdev.dma_device,
+                        pci_unmap_addr(&c2dev->rep_vq, mapping),
+                        c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+                        DMA_FROM_DEVICE);
+       kfree(q1_pages);
+      bail1:
+       c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+      bail0:
+       vfree(c2dev->qptr_array);
+
+       return err;
+}
+
+/*
+ * Called by c2_remove to cleanup the RNIC resources.
+ */
+void c2_rnic_term(struct c2_dev *c2dev)
+{
+
+       /* Close the open adapter instance */
+       c2_rnic_close(c2dev);
+
+       /* Send the TERM message to the adapter */
+       c2_adapter_term(c2dev);
+
+       /* Disable interrupts on the adapter */
+       writel(1, c2dev->regs + C2_IDIS);
+
+       /* Free the QP pool */
+       c2_cleanup_qp_table(c2dev);
+
+       /* Free the PD pool */
+       c2_cleanup_pd_table(c2dev);
+
+       /* Free the verbs request allocator */
+       vq_term(c2dev);
+
+       /* Unmap and free the asynchronus event queue */
+       dma_unmap_single(c2dev->ibdev.dma_device,
+                        pci_unmap_addr(&c2dev->aeq, mapping),
+                        c2dev->aeq.q_size * c2dev->aeq.msg_size,
+                        DMA_FROM_DEVICE);
+       kfree(c2dev->aeq.msg_pool.host);
+
+       /* Unmap and free the verbs reply queue */
+       dma_unmap_single(c2dev->ibdev.dma_device,
+                        pci_unmap_addr(&c2dev->rep_vq, mapping),
+                        c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+                        DMA_FROM_DEVICE);
+       kfree(c2dev->rep_vq.msg_pool.host);
+
+       /* Free the MQ shared pointer pool */
+       c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+
+       /* Free the qptr_array */
+       vfree(c2dev->qptr_array);
+
+       return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_status.h b/drivers/infiniband/hw/amso1100/c2_status.h
new file mode 100644 (file)
index 0000000..6ee4aa9
--- /dev/null
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef        _C2_STATUS_H_
+#define _C2_STATUS_H_
+
+/*
+ * Verbs Status Codes
+ */
+enum c2_status {
+       C2_OK = 0,              /* This must be zero */
+       CCERR_INSUFFICIENT_RESOURCES = 1,
+       CCERR_INVALID_MODIFIER = 2,
+       CCERR_INVALID_MODE = 3,
+       CCERR_IN_USE = 4,
+       CCERR_INVALID_RNIC = 5,
+       CCERR_INTERRUPTED_OPERATION = 6,
+       CCERR_INVALID_EH = 7,
+       CCERR_INVALID_CQ = 8,
+       CCERR_CQ_EMPTY = 9,
+       CCERR_NOT_IMPLEMENTED = 10,
+       CCERR_CQ_DEPTH_TOO_SMALL = 11,
+       CCERR_PD_IN_USE = 12,
+       CCERR_INVALID_PD = 13,
+       CCERR_INVALID_SRQ = 14,
+       CCERR_INVALID_ADDRESS = 15,
+       CCERR_INVALID_NETMASK = 16,
+       CCERR_INVALID_QP = 17,
+       CCERR_INVALID_QP_STATE = 18,
+       CCERR_TOO_MANY_WRS_POSTED = 19,
+       CCERR_INVALID_WR_TYPE = 20,
+       CCERR_INVALID_SGL_LENGTH = 21,
+       CCERR_INVALID_SQ_DEPTH = 22,
+       CCERR_INVALID_RQ_DEPTH = 23,
+       CCERR_INVALID_ORD = 24,
+       CCERR_INVALID_IRD = 25,
+       CCERR_QP_ATTR_CANNOT_CHANGE = 26,
+       CCERR_INVALID_STAG = 27,
+       CCERR_QP_IN_USE = 28,
+       CCERR_OUTSTANDING_WRS = 29,
+       CCERR_STAG_IN_USE = 30,
+       CCERR_INVALID_STAG_INDEX = 31,
+       CCERR_INVALID_SGL_FORMAT = 32,
+       CCERR_ADAPTER_TIMEOUT = 33,
+       CCERR_INVALID_CQ_DEPTH = 34,
+       CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
+       CCERR_INVALID_EP = 36,
+       CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
+       CCERR_FLUSHED = 38,
+       CCERR_INVALID_WQE = 39,
+       CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
+       CCERR_REMOTE_TERMINATION_ERROR = 41,
+       CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
+       CCERR_ACCESS_VIOLATION = 43,
+       CCERR_INVALID_PD_ID = 44,
+       CCERR_WRAP_ERROR = 45,
+       CCERR_INV_STAG_ACCESS_ERROR = 46,
+       CCERR_ZERO_RDMA_READ_RESOURCES = 47,
+       CCERR_QP_NOT_PRIVILEGED = 48,
+       CCERR_STAG_STATE_NOT_INVALID = 49,
+       CCERR_INVALID_PAGE_SIZE = 50,
+       CCERR_INVALID_BUFFER_SIZE = 51,
+       CCERR_INVALID_PBE = 52,
+       CCERR_INVALID_FBO = 53,
+       CCERR_INVALID_LENGTH = 54,
+       CCERR_INVALID_ACCESS_RIGHTS = 55,
+       CCERR_PBL_TOO_BIG = 56,
+       CCERR_INVALID_VA = 57,
+       CCERR_INVALID_REGION = 58,
+       CCERR_INVALID_WINDOW = 59,
+       CCERR_TOTAL_LENGTH_TOO_BIG = 60,
+       CCERR_INVALID_QP_ID = 61,
+       CCERR_ADDR_IN_USE = 62,
+       CCERR_ADDR_NOT_AVAIL = 63,
+       CCERR_NET_DOWN = 64,
+       CCERR_NET_UNREACHABLE = 65,
+       CCERR_CONN_ABORTED = 66,
+       CCERR_CONN_RESET = 67,
+       CCERR_NO_BUFS = 68,
+       CCERR_CONN_TIMEDOUT = 69,
+       CCERR_CONN_REFUSED = 70,
+       CCERR_HOST_UNREACHABLE = 71,
+       CCERR_INVALID_SEND_SGL_DEPTH = 72,
+       CCERR_INVALID_RECV_SGL_DEPTH = 73,
+       CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
+       CCERR_INSUFFICIENT_PRIVILEGES = 75,
+       CCERR_STACK_ERROR = 76,
+       CCERR_INVALID_VERSION = 77,
+       CCERR_INVALID_MTU = 78,
+       CCERR_INVALID_IMAGE = 79,
+       CCERR_PENDING = 98,     /* not an error; user internally by adapter */
+       CCERR_DEFER = 99,       /* not an error; used internally by adapter */
+       CCERR_FAILED_WRITE = 100,
+       CCERR_FAILED_ERASE = 101,
+       CCERR_FAILED_VERIFICATION = 102,
+       CCERR_NOT_FOUND = 103,
+
+};
+
+/*
+ * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
+ */
+enum c2_connect_status {
+       C2_CONN_STATUS_SUCCESS = C2_OK,
+       C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
+       C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
+       C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
+       C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
+       C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
+       C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
+       C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
+       C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
+       C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
+       C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
+};
+
+/*
+ * Flash programming status codes.
+ */
+enum c2_flash_status {
+       C2_FLASH_STATUS_SUCCESS = 0x0000,
+       C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
+       C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
+       C2_FLASH_STATUS_ECLBS = 0x0400,
+       C2_FLASH_STATUS_PSLBS = 0x0800,
+       C2_FLASH_STATUS_VPENS = 0x1000,
+};
+
+#endif                         /* _C2_STATUS_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_user.h b/drivers/infiniband/hw/amso1100/c2_user.h
new file mode 100644 (file)
index 0000000..7e9e7ad
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_USER_H
+#define C2_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct c2_alloc_ucontext_resp {
+       __u32 qp_tab_size;
+       __u32 uarc_size;
+};
+
+struct c2_alloc_pd_resp {
+       __u32 pdn;
+       __u32 reserved;
+};
+
+struct c2_create_cq {
+       __u32 lkey;
+       __u32 pdn;
+       __u64 arm_db_page;
+       __u64 set_db_page;
+       __u32 arm_db_index;
+       __u32 set_db_index;
+};
+
+struct c2_create_cq_resp {
+       __u32 cqn;
+       __u32 reserved;
+};
+
+struct c2_create_qp {
+       __u32 lkey;
+       __u32 reserved;
+       __u64 sq_db_page;
+       __u64 rq_db_page;
+       __u32 sq_db_index;
+       __u32 rq_db_index;
+};
+
+#endif                         /* C2_USER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c
new file mode 100644 (file)
index 0000000..40caeb5
--- /dev/null
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "c2_vq.h"
+#include "c2_provider.h"
+
+/*
+ * Verbs Request Objects:
+ *
+ * VQ Request Objects are allocated by the kernel verbs handlers.
+ * They contain a wait object, a refcnt, an atomic bool indicating that the
+ * adapter has replied, and a copy of the verb reply work request.
+ * A pointer to the VQ Request Object is passed down in the context
+ * field of the work request message, and reflected back by the adapter
+ * in the verbs reply message.  The function handle_vq() in the interrupt
+ * path will use this pointer to:
+ *     1) append a copy of the verbs reply message
+ *     2) mark that the reply is ready
+ *     3) wake up the kernel verbs handler blocked awaiting the reply.
+ *
+ *
+ * The kernel verbs handlers do a "get" to put a 2nd reference on the
+ * VQ Request object.  If the kernel verbs handler exits before the adapter
+ * can respond, this extra reference will keep the VQ Request object around
+ * until the adapter's reply can be processed.  The reason we need this is
+ * because a pointer to this object is stuffed into the context field of
+ * the verbs work request message, and reflected back in the reply message.
+ * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
+ * kernel verb handler that is blocked awaiting the verb reply.
+ * So handle_vq() will do a "put" on the object when it's done accessing it.
+ * NOTE:  If we guarantee that the kernel verb handler will never bail before
+ *        getting the reply, then we don't need these refcnts.
+ *
+ *
+ * VQ Request objects are freed by the kernel verbs handlers only
+ * after the verb has been processed, or when the adapter fails and
+ * does not reply.
+ *
+ *
+ * Verbs Reply Buffers:
+ *
+ * VQ Reply bufs are local host memory copies of a
+ * outstanding Verb Request reply
+ * message.  The are always allocated by the kernel verbs handlers, and _may_ be
+ * freed by either the kernel verbs handler -or- the interrupt handler.  The
+ * kernel verbs handler _must_ free the repbuf, then free the vq request object
+ * in that order.
+ */
+
+int vq_init(struct c2_dev *c2dev)
+{
+       sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
+               (char) ('0' + c2dev->devnum));
+       c2dev->host_msg_cache =
+           kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
+                             SLAB_HWCACHE_ALIGN, NULL, NULL);
+       if (c2dev->host_msg_cache == NULL) {
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+void vq_term(struct c2_dev *c2dev)
+{
+       kmem_cache_destroy(c2dev->host_msg_cache);
+}
+
+/* vq_req_alloc - allocate a VQ Request Object and initialize it.
+ * The refcnt is set to 1.
+ */
+struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
+{
+       struct c2_vq_req *r;
+
+       r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
+       if (r) {
+               init_waitqueue_head(&r->wait_object);
+               r->reply_msg = (u64) NULL;
+               r->event = 0;
+               r->cm_id = NULL;
+               r->qp = NULL;
+               atomic_set(&r->refcnt, 1);
+               atomic_set(&r->reply_ready, 0);
+       }
+       return r;
+}
+
+
+/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
+ * has already free the VQ Reply Buffer if it existed.
+ */
+void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+       r->reply_msg = (u64) NULL;
+       if (atomic_dec_and_test(&r->refcnt)) {
+               kfree(r);
+       }
+}
+
+/* vq_req_get - reference a VQ Request Object.  Done
+ * only in the kernel verbs handlers.
+ */
+void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+       atomic_inc(&r->refcnt);
+}
+
+
+/* vq_req_put - dereference and potentially free a VQ Request Object.
+ *
+ * This is only called by handle_vq() on the
+ * interrupt when it is done processing
+ * a verb reply message.  If the associated
+ * kernel verbs handler has already bailed,
+ * then this put will actually free the VQ
+ * Request object _and_ the VQ Reply Buffer
+ * if it exists.
+ */
+void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+       if (atomic_dec_and_test(&r->refcnt)) {
+               if (r->reply_msg != (u64) NULL)
+                       vq_repbuf_free(c2dev,
+                                      (void *) (unsigned long) r->reply_msg);
+               kfree(r);
+       }
+}
+
+
+/*
+ * vq_repbuf_alloc - allocate a VQ Reply Buffer.
+ */
+void *vq_repbuf_alloc(struct c2_dev *c2dev)
+{
+       return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC);
+}
+
+/*
+ * vq_send_wr - post a verbs request message to the Verbs Request Queue.
+ * If a message is not available in the MQ, then block until one is available.
+ * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
+ * When the adapter drains the Verbs Request Queue,
+ * it inserts MQ index 0 in to the
+ * adapter->host activity fifo and interrupts the host.
+ */
+int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
+{
+       void *msg;
+       wait_queue_t __wait;
+
+       /*
+        * grab adapter vq lock
+        */
+       spin_lock(&c2dev->vqlock);
+
+       /*
+        * allocate msg
+        */
+       msg = c2_mq_alloc(&c2dev->req_vq);
+
+       /*
+        * If we cannot get a msg, then we'll wait
+        * When a messages are available, the int handler will wake_up()
+        * any waiters.
+        */
+       while (msg == NULL) {
+               pr_debug("%s:%d no available msg in VQ, waiting...\n",
+                      __FUNCTION__, __LINE__);
+               init_waitqueue_entry(&__wait, current);
+               add_wait_queue(&c2dev->req_vq_wo, &__wait);
+               spin_unlock(&c2dev->vqlock);
+               for (;;) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       if (!c2_mq_full(&c2dev->req_vq)) {
+                               break;
+                       }
+                       if (!signal_pending(current)) {
+                               schedule_timeout(1 * HZ);       /* 1 second... */
+                               continue;
+                       }
+                       set_current_state(TASK_RUNNING);
+                       remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+                       return -EINTR;
+               }
+               set_current_state(TASK_RUNNING);
+               remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+               spin_lock(&c2dev->vqlock);
+               msg = c2_mq_alloc(&c2dev->req_vq);
+       }
+
+       /*
+        * copy wr into adapter msg
+        */
+       memcpy(msg, wr, c2dev->req_vq.msg_size);
+
+       /*
+        * post msg
+        */
+       c2_mq_produce(&c2dev->req_vq);
+
+       /*
+        * release adapter vq lock
+        */
+       spin_unlock(&c2dev->vqlock);
+       return 0;
+}
+
+
+/*
+ * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
+ */
+int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
+{
+       if (!wait_event_timeout(req->wait_object,
+                               atomic_read(&req->reply_ready),
+                               60*HZ))
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+/*
+ * vq_repbuf_free - Free a Verbs Reply Buffer.
+ */
+void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
+{
+       kmem_cache_free(c2dev->host_msg_cache, reply);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.h b/drivers/infiniband/hw/amso1100/c2_vq.h
new file mode 100644 (file)
index 0000000..3380562
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_VQ_H_
+#define _C2_VQ_H_
+#include <linux/sched.h>
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_provider.h"
+
+struct c2_vq_req {
+       u64 reply_msg;          /* ptr to reply msg */
+       wait_queue_head_t wait_object;  /* wait object for vq reqs */
+       atomic_t reply_ready;   /* set when reply is ready */
+       atomic_t refcnt;        /* used to cancel WRs... */
+       int event;
+       struct iw_cm_id *cm_id;
+       struct c2_qp *qp;
+};
+
+extern int vq_init(struct c2_dev *c2dev);
+extern void vq_term(struct c2_dev *c2dev);
+
+extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
+extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
+
+extern void *vq_repbuf_alloc(struct c2_dev *c2dev);
+extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
+
+extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
+#endif                         /* _C2_VQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h
new file mode 100644 (file)
index 0000000..3ec6c43
--- /dev/null
@@ -0,0 +1,1520 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_WR_H_
+#define _C2_WR_H_
+
+#ifdef CCDEBUG
+#define CCWR_MAGIC             0xb07700b0
+#endif
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+/* Maximum allowed size in bytes of private_data exchange
+ * on connect.
+ */
+#define C2_MAX_PRIVATE_DATA_SIZE 200
+
+/*
+ * These types are shared among the adapter, host, and CCIL consumer.
+ */
+enum c2_cq_notification_type {
+       C2_CQ_NOTIFICATION_TYPE_NONE = 1,
+       C2_CQ_NOTIFICATION_TYPE_NEXT,
+       C2_CQ_NOTIFICATION_TYPE_NEXT_SE
+};
+
+enum c2_setconfig_cmd {
+       C2_CFG_ADD_ADDR = 1,
+       C2_CFG_DEL_ADDR = 2,
+       C2_CFG_ADD_ROUTE = 3,
+       C2_CFG_DEL_ROUTE = 4
+};
+
+enum c2_getconfig_cmd {
+       C2_GETCONFIG_ROUTES = 1,
+       C2_GETCONFIG_ADDRS
+};
+
+/*
+ *  CCIL Work Request Identifiers
+ */
+enum c2wr_ids {
+       CCWR_RNIC_OPEN = 1,
+       CCWR_RNIC_QUERY,
+       CCWR_RNIC_SETCONFIG,
+       CCWR_RNIC_GETCONFIG,
+       CCWR_RNIC_CLOSE,
+       CCWR_CQ_CREATE,
+       CCWR_CQ_QUERY,
+       CCWR_CQ_MODIFY,
+       CCWR_CQ_DESTROY,
+       CCWR_QP_CONNECT,
+       CCWR_PD_ALLOC,
+       CCWR_PD_DEALLOC,
+       CCWR_SRQ_CREATE,
+       CCWR_SRQ_QUERY,
+       CCWR_SRQ_MODIFY,
+       CCWR_SRQ_DESTROY,
+       CCWR_QP_CREATE,
+       CCWR_QP_QUERY,
+       CCWR_QP_MODIFY,
+       CCWR_QP_DESTROY,
+       CCWR_NSMR_STAG_ALLOC,
+       CCWR_NSMR_REGISTER,
+       CCWR_NSMR_PBL,
+       CCWR_STAG_DEALLOC,
+       CCWR_NSMR_REREGISTER,
+       CCWR_SMR_REGISTER,
+       CCWR_MR_QUERY,
+       CCWR_MW_ALLOC,
+       CCWR_MW_QUERY,
+       CCWR_EP_CREATE,
+       CCWR_EP_GETOPT,
+       CCWR_EP_SETOPT,
+       CCWR_EP_DESTROY,
+       CCWR_EP_BIND,
+       CCWR_EP_CONNECT,
+       CCWR_EP_LISTEN,
+       CCWR_EP_SHUTDOWN,
+       CCWR_EP_LISTEN_CREATE,
+       CCWR_EP_LISTEN_DESTROY,
+       CCWR_EP_QUERY,
+       CCWR_CR_ACCEPT,
+       CCWR_CR_REJECT,
+       CCWR_CONSOLE,
+       CCWR_TERM,
+       CCWR_FLASH_INIT,
+       CCWR_FLASH,
+       CCWR_BUF_ALLOC,
+       CCWR_BUF_FREE,
+       CCWR_FLASH_WRITE,
+       CCWR_INIT,              /* WARNING: Don't move this ever again! */
+
+
+
+       /* Add new IDs here */
+
+
+
+       /*
+        * WARNING: CCWR_LAST must always be the last verbs id defined!
+        *          All the preceding IDs are fixed, and must not change.
+        *          You can add new IDs, but must not remove or reorder
+        *          any IDs. If you do, YOU will ruin any hope of
+        *          compatability between versions.
+        */
+       CCWR_LAST,
+
+       /*
+        * Start over at 1 so that arrays indexed by user wr id's
+        * begin at 1.  This is OK since the verbs and user wr id's
+        * are always used on disjoint sets of queues.
+        */
+       /*
+        * The order of the CCWR_SEND_XX verbs must
+        * match the order of the RDMA_OPs
+        */
+       CCWR_SEND = 1,
+       CCWR_SEND_INV,
+       CCWR_SEND_SE,
+       CCWR_SEND_SE_INV,
+       CCWR_RDMA_WRITE,
+       CCWR_RDMA_READ,
+       CCWR_RDMA_READ_INV,
+       CCWR_MW_BIND,
+       CCWR_NSMR_FASTREG,
+       CCWR_STAG_INVALIDATE,
+       CCWR_RECV,
+       CCWR_NOP,
+       CCWR_UNIMPL,
+/* WARNING: This must always be the last user wr id defined! */
+};
+#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
+
+/*
+ * SQ/RQ Work Request Types
+ */
+enum c2_wr_type {
+       C2_WR_TYPE_SEND = CCWR_SEND,
+       C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
+       C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
+       C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
+       C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
+       C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
+       C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
+       C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
+       C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
+       C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
+       C2_WR_TYPE_RECV = CCWR_RECV,
+       C2_WR_TYPE_NOP = CCWR_NOP,
+};
+
+struct c2_netaddr {
+       u32 ip_addr;
+       u32 netmask;
+       u32 mtu;
+};
+
+struct c2_route {
+       u32 ip_addr;            /* 0 indicates the default route */
+       u32 netmask;            /* netmask associated with dst */
+       u32 flags;
+       union {
+               u32 ipaddr;     /* address of the nexthop interface */
+               u8 enaddr[6];
+       } nexthop;
+};
+
+/*
+ * A Scatter Gather Entry.
+ */
+struct c2_data_addr {
+       u32 stag;
+       u32 length;
+       u64 to;
+};
+
+/*
+ * MR and MW flags used by the consumer, RI, and RNIC.
+ */
+enum c2_mm_flags {
+       MEM_REMOTE = 0x0001,    /* allow mw binds with remote access. */
+       MEM_VA_BASED = 0x0002,  /* Not Zero-based */
+       MEM_PBL_COMPLETE = 0x0004,      /* PBL array is complete in this msg */
+       MEM_LOCAL_READ = 0x0008,        /* allow local reads */
+       MEM_LOCAL_WRITE = 0x0010,       /* allow local writes */
+       MEM_REMOTE_READ = 0x0020,       /* allow remote reads */
+       MEM_REMOTE_WRITE = 0x0040,      /* allow remote writes */
+       MEM_WINDOW_BIND = 0x0080,       /* binds allowed */
+       MEM_SHARED = 0x0100,    /* set if MR is shared */
+       MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */
+};
+
+/*
+ * CCIL API ACF flags defined in terms of the low level mem flags.
+ * This minimizes translation needed in the user API
+ */
+enum c2_acf {
+       C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
+       C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
+       C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
+       C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
+       C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
+};
+
+/*
+ * Image types of objects written to flash
+ */
+#define C2_FLASH_IMG_BITFILE 1
+#define C2_FLASH_IMG_OPTION_ROM 2
+#define C2_FLASH_IMG_VPD 3
+
+/*
+ *  to fix bug 1815 we define the max size allowable of the
+ *  terminate message (per the IETF spec).Refer to the IETF
+ *  protocal specification, section 12.1.6, page 64)
+ *  The message is prefixed by 20 types of DDP info.
+ *
+ *  Then the message has 6 bytes for the terminate control
+ *  and DDP segment length info plus a DDP header (either
+ *  14 or 18 byts) plus 28 bytes for the RDMA header.
+ *  Thus the max size in:
+ *  20 + (6 + 18 + 28) = 72
+ */
+#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
+
+/*
+ * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
+ */
+#define WR_BUILD_STR_LEN 64
+
+/*
+ * WARNING:  All of these structs need to align any 64bit types on
+ * 64 bit boundaries!  64bit types include u64 and u64.
+ */
+
+/*
+ * Clustercore Work Request Header.  Be sensitive to field layout
+ * and alignment.
+ */
+struct c2wr_hdr {
+       /* wqe_count is part of the cqe.  It is put here so the
+        * adapter can write to it while the wr is pending without
+        * clobbering part of the wr.  This word need not be dma'd
+        * from the host to adapter by libccil, but we copy it anyway
+        * to make the memcpy to the adapter better aligned.
+        */
+       u32 wqe_count;
+
+       /* Put these fields next so that later 32- and 64-bit
+        * quantities are naturally aligned.
+        */
+       u8 id;
+       u8 result;              /* adapter -> host */
+       u8 sge_count;           /* host -> adapter */
+       u8 flags;               /* host -> adapter */
+
+       u64 context;
+#ifdef CCMSGMAGIC
+       u32 magic;
+       u32 pad;
+#endif
+} __attribute__((packed));
+
+/*
+ *------------------------ RNIC ------------------------
+ */
+
+/*
+ * WR_RNIC_OPEN
+ */
+
+/*
+ * Flags for the RNIC WRs
+ */
+enum c2_rnic_flags {
+       RNIC_IRD_STATIC = 0x0001,
+       RNIC_ORD_STATIC = 0x0002,
+       RNIC_QP_STATIC = 0x0004,
+       RNIC_SRQ_SUPPORTED = 0x0008,
+       RNIC_PBL_BLOCK_MODE = 0x0010,
+       RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
+       RNIC_CQ_OVF_DETECTED = 0x0040,
+       RNIC_PRIV_MODE = 0x0080
+};
+
+struct c2wr_rnic_open_req {
+       struct c2wr_hdr hdr;
+       u64 user_context;
+       u16 flags;              /* See enum c2_rnic_flags */
+       u16 port_num;
+} __attribute__((packed));
+
+struct c2wr_rnic_open_rep {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+} __attribute__((packed));
+
+union c2wr_rnic_open {
+       struct c2wr_rnic_open_req req;
+       struct c2wr_rnic_open_rep rep;
+} __attribute__((packed));
+
+struct c2wr_rnic_query_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_QUERY
+ */
+struct c2wr_rnic_query_rep {
+       struct c2wr_hdr hdr;
+       u64 user_context;
+       u32 vendor_id;
+       u32 part_number;
+       u32 hw_version;
+       u32 fw_ver_major;
+       u32 fw_ver_minor;
+       u32 fw_ver_patch;
+       char fw_ver_build_str[WR_BUILD_STR_LEN];
+       u32 max_qps;
+       u32 max_qp_depth;
+       u32 max_srq_depth;
+       u32 max_send_sgl_depth;
+       u32 max_rdma_sgl_depth;
+       u32 max_cqs;
+       u32 max_cq_depth;
+       u32 max_cq_event_handlers;
+       u32 max_mrs;
+       u32 max_pbl_depth;
+       u32 max_pds;
+       u32 max_global_ird;
+       u32 max_global_ord;
+       u32 max_qp_ird;
+       u32 max_qp_ord;
+       u32 flags;
+       u32 max_mws;
+       u32 pbe_range_low;
+       u32 pbe_range_high;
+       u32 max_srqs;
+       u32 page_size;
+} __attribute__((packed));
+
+union c2wr_rnic_query {
+       struct c2wr_rnic_query_req req;
+       struct c2wr_rnic_query_rep rep;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_GETCONFIG
+ */
+
+struct c2wr_rnic_getconfig_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 option;             /* see c2_getconfig_cmd_t */
+       u64 reply_buf;
+       u32 reply_buf_len;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_getconfig_rep {
+       struct c2wr_hdr hdr;
+       u32 option;             /* see c2_getconfig_cmd_t */
+       u32 count_len;          /* length of the number of addresses configured */
+} __attribute__((packed)) ;
+
+union c2wr_rnic_getconfig {
+       struct c2wr_rnic_getconfig_req req;
+       struct c2wr_rnic_getconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_SETCONFIG
+ */
+struct c2wr_rnic_setconfig_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 option;             /* See c2_setconfig_cmd_t */
+       /* variable data and pad. See c2_netaddr and c2_route */
+       u8 data[0];
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_setconfig_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_setconfig {
+       struct c2wr_rnic_setconfig_req req;
+       struct c2wr_rnic_setconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_CLOSE
+ */
+struct c2wr_rnic_close_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_close_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_close {
+       struct c2wr_rnic_close_req req;
+       struct c2wr_rnic_close_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ CQ ------------------------
+ */
+struct c2wr_cq_create_req {
+       struct c2wr_hdr hdr;
+       u64 shared_ht;
+       u64 user_context;
+       u64 msg_pool;
+       u32 rnic_handle;
+       u32 msg_size;
+       u32 depth;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_create_rep {
+       struct c2wr_hdr hdr;
+       u32 mq_index;
+       u32 adapter_shared;
+       u32 cq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_cq_create {
+       struct c2wr_cq_create_req req;
+       struct c2wr_cq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 cq_handle;
+       u32 new_depth;
+       u64 new_msg_pool;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_modify {
+       struct c2wr_cq_modify_req req;
+       struct c2wr_cq_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 cq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_destroy {
+       struct c2wr_cq_destroy_req req;
+       struct c2wr_cq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ PD ------------------------
+ */
+struct c2wr_pd_alloc_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_alloc_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_alloc {
+       struct c2wr_pd_alloc_req req;
+       struct c2wr_pd_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_dealloc {
+       struct c2wr_pd_dealloc_req req;
+       struct c2wr_pd_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ SRQ ------------------------
+ */
+struct c2wr_srq_create_req {
+       struct c2wr_hdr hdr;
+       u64 shared_ht;
+       u64 user_context;
+       u32 rnic_handle;
+       u32 srq_depth;
+       u32 srq_limit;
+       u32 sgl_depth;
+       u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_create_rep {
+       struct c2wr_hdr hdr;
+       u32 srq_depth;
+       u32 sgl_depth;
+       u32 msg_size;
+       u32 mq_index;
+       u32 mq_start;
+       u32 srq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_srq_create {
+       struct c2wr_srq_create_req req;
+       struct c2wr_srq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 srq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_srq_destroy {
+       struct c2wr_srq_destroy_req req;
+       struct c2wr_srq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ QP ------------------------
+ */
+enum c2wr_qp_flags {
+       QP_RDMA_READ = 0x00000001,      /* RDMA read enabled? */
+       QP_RDMA_WRITE = 0x00000002,     /* RDMA write enabled? */
+       QP_MW_BIND = 0x00000004,        /* MWs enabled */
+       QP_ZERO_STAG = 0x00000008,      /* enabled? */
+       QP_REMOTE_TERMINATION = 0x00000010,     /* remote end terminated */
+       QP_RDMA_READ_RESPONSE = 0x00000020      /* Remote RDMA read  */
+           /* enabled? */
+};
+
+struct c2wr_qp_create_req {
+       struct c2wr_hdr hdr;
+       u64 shared_sq_ht;
+       u64 shared_rq_ht;
+       u64 user_context;
+       u32 rnic_handle;
+       u32 sq_cq_handle;
+       u32 rq_cq_handle;
+       u32 sq_depth;
+       u32 rq_depth;
+       u32 srq_handle;
+       u32 srq_limit;
+       u32 flags;              /* see enum c2wr_qp_flags */
+       u32 send_sgl_depth;
+       u32 recv_sgl_depth;
+       u32 rdma_write_sgl_depth;
+       u32 ord;
+       u32 ird;
+       u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_create_rep {
+       struct c2wr_hdr hdr;
+       u32 sq_depth;
+       u32 rq_depth;
+       u32 send_sgl_depth;
+       u32 recv_sgl_depth;
+       u32 rdma_write_sgl_depth;
+       u32 ord;
+       u32 ird;
+       u32 sq_msg_size;
+       u32 sq_mq_index;
+       u32 sq_mq_start;
+       u32 rq_msg_size;
+       u32 rq_mq_index;
+       u32 rq_mq_start;
+       u32 qp_handle;
+} __attribute__((packed)) ;
+
+union c2wr_qp_create {
+       struct c2wr_qp_create_req req;
+       struct c2wr_qp_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_rep {
+       struct c2wr_hdr hdr;
+       u64 user_context;
+       u32 rnic_handle;
+       u32 sq_depth;
+       u32 rq_depth;
+       u32 send_sgl_depth;
+       u32 rdma_write_sgl_depth;
+       u32 recv_sgl_depth;
+       u32 ord;
+       u32 ird;
+       u16 qp_state;
+       u16 flags;              /* see c2wr_qp_flags_t */
+       u32 qp_id;
+       u32 local_addr;
+       u32 remote_addr;
+       u16 local_port;
+       u16 remote_port;
+       u32 terminate_msg_length;       /* 0 if not present */
+       u8 data[0];
+       /* Terminate Message in-line here. */
+} __attribute__((packed)) ;
+
+union c2wr_qp_query {
+       struct c2wr_qp_query_req req;
+       struct c2wr_qp_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_req {
+       struct c2wr_hdr hdr;
+       u64 stream_msg;
+       u32 stream_msg_length;
+       u32 rnic_handle;
+       u32 qp_handle;
+       u32 next_qp_state;
+       u32 ord;
+       u32 ird;
+       u32 sq_depth;
+       u32 rq_depth;
+       u32 llp_ep_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_rep {
+       struct c2wr_hdr hdr;
+       u32 ord;
+       u32 ird;
+       u32 sq_depth;
+       u32 rq_depth;
+       u32 sq_msg_size;
+       u32 sq_mq_index;
+       u32 sq_mq_start;
+       u32 rq_msg_size;
+       u32 rq_mq_index;
+       u32 rq_mq_start;
+} __attribute__((packed)) ;
+
+union c2wr_qp_modify {
+       struct c2wr_qp_modify_req req;
+       struct c2wr_qp_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_qp_destroy {
+       struct c2wr_qp_destroy_req req;
+       struct c2wr_qp_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
+ * only be posted when a QP is in IDLE state.  After the connect request is
+ * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
+ * No synchronous reply from adapter to this WR.  The results of
+ * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
+ * See c2wr_ae_active_connect_results_t
+ */
+struct c2wr_qp_connect_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 qp_handle;
+       u32 remote_addr;
+       u16 remote_port;
+       u16 pad;
+       u32 private_data_length;
+       u8 private_data[0];     /* Private data in-line. */
+} __attribute__((packed)) ;
+
+struct c2wr_qp_connect {
+       struct c2wr_qp_connect_req req;
+       /* no synchronous reply.         */
+} __attribute__((packed)) ;
+
+
+/*
+ *------------------------ MM ------------------------
+ */
+
+struct c2wr_nsmr_stag_alloc_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 pbl_depth;
+       u32 pd_id;
+       u32 flags;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_stag_alloc_rep {
+       struct c2wr_hdr hdr;
+       u32 pbl_depth;
+       u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_stag_alloc {
+       struct c2wr_nsmr_stag_alloc_req req;
+       struct c2wr_nsmr_stag_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_req {
+       struct c2wr_hdr hdr;
+       u64 va;
+       u32 rnic_handle;
+       u16 flags;
+       u8 stag_key;
+       u8 pad;
+       u32 pd_id;
+       u32 pbl_depth;
+       u32 pbe_size;
+       u32 fbo;
+       u32 length;
+       u32 addrs_length;
+       /* array of paddrs (must be aligned on a 64bit boundary) */
+       u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_rep {
+       struct c2wr_hdr hdr;
+       u32 pbl_depth;
+       u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_register {
+       struct c2wr_nsmr_register_req req;
+       struct c2wr_nsmr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 flags;
+       u32 stag_index;
+       u32 addrs_length;
+       /* array of paddrs (must be aligned on a 64bit boundary) */
+       u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_pbl {
+       struct c2wr_nsmr_pbl_req req;
+       struct c2wr_nsmr_pbl_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_rep {
+       struct c2wr_hdr hdr;
+       u8 stag_key;
+       u8 pad[3];
+       u32 pd_id;
+       u32 flags;
+       u32 pbl_depth;
+} __attribute__((packed)) ;
+
+union c2wr_mr_query {
+       struct c2wr_mr_query_req req;
+       struct c2wr_mr_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_rep {
+       struct c2wr_hdr hdr;
+       u8 stag_key;
+       u8 pad[3];
+       u32 pd_id;
+       u32 flags;
+} __attribute__((packed)) ;
+
+union c2wr_mw_query {
+       struct c2wr_mw_query_req req;
+       struct c2wr_mw_query_rep rep;
+} __attribute__((packed)) ;
+
+
+struct c2wr_stag_dealloc_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_stag_dealloc_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_stag_dealloc {
+       struct c2wr_stag_dealloc_req req;
+       struct c2wr_stag_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_req {
+       struct c2wr_hdr hdr;
+       u64 va;
+       u32 rnic_handle;
+       u16 flags;
+       u8 stag_key;
+       u8 pad;
+       u32 stag_index;
+       u32 pd_id;
+       u32 pbl_depth;
+       u32 pbe_size;
+       u32 fbo;
+       u32 length;
+       u32 addrs_length;
+       u32 pad1;
+       /* array of paddrs (must be aligned on a 64bit boundary) */
+       u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_rep {
+       struct c2wr_hdr hdr;
+       u32 pbl_depth;
+       u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_reregister {
+       struct c2wr_nsmr_reregister_req req;
+       struct c2wr_nsmr_reregister_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_req {
+       struct c2wr_hdr hdr;
+       u64 va;
+       u32 rnic_handle;
+       u16 flags;
+       u8 stag_key;
+       u8 pad;
+       u32 stag_index;
+       u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_rep {
+       struct c2wr_hdr hdr;
+       u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_smr_register {
+       struct c2wr_smr_register_req req;
+       struct c2wr_smr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_rep {
+       struct c2wr_hdr hdr;
+       u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_mw_alloc {
+       struct c2wr_mw_alloc_req req;
+       struct c2wr_mw_alloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ WRs -----------------------
+ */
+
+struct c2wr_user_hdr {
+       struct c2wr_hdr hdr;            /* Has status and WR Type */
+} __attribute__((packed)) ;
+
+enum c2_qp_state {
+       C2_QP_STATE_IDLE = 0x01,
+       C2_QP_STATE_CONNECTING = 0x02,
+       C2_QP_STATE_RTS = 0x04,
+       C2_QP_STATE_CLOSING = 0x08,
+       C2_QP_STATE_TERMINATE = 0x10,
+       C2_QP_STATE_ERROR = 0x20,
+};
+
+/* Completion queue entry. */
+struct c2wr_ce {
+       struct c2wr_hdr hdr;            /* Has status and WR Type */
+       u64 qp_user_context;    /* c2_user_qp_t * */
+       u32 qp_state;           /* Current QP State */
+       u32 handle;             /* QPID or EP Handle */
+       u32 bytes_rcvd;         /* valid for RECV WCs */
+       u32 stag;
+} __attribute__((packed)) ;
+
+
+/*
+ * Flags used for all post-sq WRs.  These must fit in the flags
+ * field of the struct c2wr_hdr (eight bits).
+ */
+enum {
+       SQ_SIGNALED = 0x01,
+       SQ_READ_FENCE = 0x02,
+       SQ_FENCE = 0x04,
+};
+
+/*
+ * Common fields for all post-sq WRs.  Namely the standard header and a
+ * secondary header with fields common to all post-sq WRs.
+ */
+struct c2_sq_hdr {
+       struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * Same as above but for post-rq WRs.
+ */
+struct c2_rq_hdr {
+       struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * use the same struct for all sends.
+ */
+struct c2wr_send_req {
+       struct c2_sq_hdr sq_hdr;
+       u32 sge_len;
+       u32 remote_stag;
+       u8 data[0];             /* SGE array */
+} __attribute__((packed));
+
+union c2wr_send {
+       struct c2wr_send_req req;
+       struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_write_req {
+       struct c2_sq_hdr sq_hdr;
+       u64 remote_to;
+       u32 remote_stag;
+       u32 sge_len;
+       u8 data[0];             /* SGE array */
+} __attribute__((packed));
+
+union c2wr_rdma_write {
+       struct c2wr_rdma_write_req req;
+       struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_read_req {
+       struct c2_sq_hdr sq_hdr;
+       u64 local_to;
+       u64 remote_to;
+       u32 local_stag;
+       u32 remote_stag;
+       u32 length;
+} __attribute__((packed));
+
+union c2wr_rdma_read {
+       struct c2wr_rdma_read_req req;
+       struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_mw_bind_req {
+       struct c2_sq_hdr sq_hdr;
+       u64 va;
+       u8 stag_key;
+       u8 pad[3];
+       u32 mw_stag_index;
+       u32 mr_stag_index;
+       u32 length;
+       u32 flags;
+} __attribute__((packed));
+
+union c2wr_mw_bind {
+       struct c2wr_mw_bind_req req;
+       struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_nsmr_fastreg_req {
+       struct c2_sq_hdr sq_hdr;
+       u64 va;
+       u8 stag_key;
+       u8 pad[3];
+       u32 stag_index;
+       u32 pbe_size;
+       u32 fbo;
+       u32 length;
+       u32 addrs_length;
+       /* array of paddrs (must be aligned on a 64bit boundary) */
+       u64 paddrs[0];
+} __attribute__((packed));
+
+union c2wr_nsmr_fastreg {
+       struct c2wr_nsmr_fastreg_req req;
+       struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_stag_invalidate_req {
+       struct c2_sq_hdr sq_hdr;
+       u8 stag_key;
+       u8 pad[3];
+       u32 stag_index;
+} __attribute__((packed));
+
+union c2wr_stag_invalidate {
+       struct c2wr_stag_invalidate_req req;
+       struct c2wr_ce rep;
+} __attribute__((packed));
+
+union c2wr_sqwr {
+       struct c2_sq_hdr sq_hdr;
+       struct c2wr_send_req send;
+       struct c2wr_send_req send_se;
+       struct c2wr_send_req send_inv;
+       struct c2wr_send_req send_se_inv;
+       struct c2wr_rdma_write_req rdma_write;
+       struct c2wr_rdma_read_req rdma_read;
+       struct c2wr_mw_bind_req mw_bind;
+       struct c2wr_nsmr_fastreg_req nsmr_fastreg;
+       struct c2wr_stag_invalidate_req stag_inv;
+} __attribute__((packed));
+
+
+/*
+ * RQ WRs
+ */
+struct c2wr_rqwr {
+       struct c2_rq_hdr rq_hdr;
+       u8 data[0];             /* array of SGEs */
+} __attribute__((packed));
+
+union c2wr_recv {
+       struct c2wr_rqwr req;
+       struct c2wr_ce rep;
+} __attribute__((packed));
+
+/*
+ * All AEs start with this header.  Most AEs only need to convey the
+ * information in the header.  Some, like LLP connection events, need
+ * more info.  The union typdef c2wr_ae_t has all the possible AEs.
+ *
+ * hdr.context is the user_context from the rnic_open WR.  NULL If this
+ * is not affiliated with an rnic
+ *
+ * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
+ * CCAE_LLP_CLOSE_COMPLETE)
+ *
+ * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
+ *
+ * user_context is the context passed down when the host created the resource.
+ */
+struct c2wr_ae_hdr {
+       struct c2wr_hdr hdr;
+       u64 user_context;       /* user context for this res. */
+       u32 resource_type;      /* see enum c2_resource_indicator */
+       u32 resource;           /* handle for resource */
+       u32 qp_state;           /* current QP State */
+} __attribute__((packed));
+
+/*
+ * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
+ * the adapter moves the QP into RTS state
+ */
+struct c2wr_ae_active_connect_results {
+       struct c2wr_ae_hdr ae_hdr;
+       u32 laddr;
+       u32 raddr;
+       u16 lport;
+       u16 rport;
+       u32 private_data_length;
+       u8 private_data[0];     /* data is in-line in the msg. */
+} __attribute__((packed));
+
+/*
+ * When connections are established by the stack (and the private data
+ * MPA frame is received), the adapter will generate an event to the host.
+ * The details of the connection, any private data, and the new connection
+ * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
+ * AE queue:
+ */
+struct c2wr_ae_connection_request {
+       struct c2wr_ae_hdr ae_hdr;
+       u32 cr_handle;          /* connreq handle (sock ptr) */
+       u32 laddr;
+       u32 raddr;
+       u16 lport;
+       u16 rport;
+       u32 private_data_length;
+       u8 private_data[0];     /* data is in-line in the msg. */
+} __attribute__((packed));
+
+union c2wr_ae {
+       struct c2wr_ae_hdr ae_generic;
+       struct c2wr_ae_active_connect_results ae_active_connect_results;
+       struct c2wr_ae_connection_request ae_connection_request;
+} __attribute__((packed));
+
+struct c2wr_init_req {
+       struct c2wr_hdr hdr;
+       u64 hint_count;
+       u64 q0_host_shared;
+       u64 q1_host_shared;
+       u64 q1_host_msg_pool;
+       u64 q2_host_shared;
+       u64 q2_host_msg_pool;
+} __attribute__((packed));
+
+struct c2wr_init_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_init {
+       struct c2wr_init_req req;
+       struct c2wr_init_rep rep;
+} __attribute__((packed));
+
+/*
+ * For upgrading flash.
+ */
+
+struct c2wr_flash_init_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+} __attribute__((packed));
+
+struct c2wr_flash_init_rep {
+       struct c2wr_hdr hdr;
+       u32 adapter_flash_buf_offset;
+       u32 adapter_flash_len;
+} __attribute__((packed));
+
+union c2wr_flash_init {
+       struct c2wr_flash_init_req req;
+       struct c2wr_flash_init_rep rep;
+} __attribute__((packed));
+
+struct c2wr_flash_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 len;
+} __attribute__((packed));
+
+struct c2wr_flash_rep {
+       struct c2wr_hdr hdr;
+       u32 status;
+} __attribute__((packed));
+
+union c2wr_flash {
+       struct c2wr_flash_req req;
+       struct c2wr_flash_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 size;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_rep {
+       struct c2wr_hdr hdr;
+       u32 offset;             /* 0 if mem not available */
+       u32 size;               /* 0 if mem not available */
+} __attribute__((packed));
+
+union c2wr_buf_alloc {
+       struct c2wr_buf_alloc_req req;
+       struct c2wr_buf_alloc_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_free_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 offset;             /* Must match value from alloc */
+       u32 size;               /* Must match value from alloc */
+} __attribute__((packed));
+
+struct c2wr_buf_free_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_buf_free {
+       struct c2wr_buf_free_req req;
+       struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_flash_write_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 offset;
+       u32 size;
+       u32 type;
+       u32 flags;
+} __attribute__((packed));
+
+struct c2wr_flash_write_rep {
+       struct c2wr_hdr hdr;
+       u32 status;
+} __attribute__((packed));
+
+union c2wr_flash_write {
+       struct c2wr_flash_write_req req;
+       struct c2wr_flash_write_rep rep;
+} __attribute__((packed));
+
+/*
+ * Messages for LLP connection setup.
+ */
+
+/*
+ * Listen Request.  This allocates a listening endpoint to allow passive
+ * connection setup.  Newly established LLP connections are passed up
+ * via an AE.  See c2wr_ae_connection_request_t
+ */
+struct c2wr_ep_listen_create_req {
+       struct c2wr_hdr hdr;
+       u64 user_context;       /* returned in AEs. */
+       u32 rnic_handle;
+       u32 local_addr;         /* local addr, or 0  */
+       u16 local_port;         /* 0 means "pick one" */
+       u16 pad;
+       u32 backlog;            /* tradional tcp listen bl */
+} __attribute__((packed));
+
+struct c2wr_ep_listen_create_rep {
+       struct c2wr_hdr hdr;
+       u32 ep_handle;          /* handle to new listening ep */
+       u16 local_port;         /* resulting port... */
+       u16 pad;
+} __attribute__((packed));
+
+union c2wr_ep_listen_create {
+       struct c2wr_ep_listen_create_req req;
+       struct c2wr_ep_listen_create_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_ep_listen_destroy {
+       struct c2wr_ep_listen_destroy_req req;
+       struct c2wr_ep_listen_destroy_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_query_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_query_rep {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 local_addr;
+       u32 remote_addr;
+       u16 local_port;
+       u16 remote_port;
+} __attribute__((packed));
+
+union c2wr_ep_query {
+       struct c2wr_ep_query_req req;
+       struct c2wr_ep_query_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * The host passes this down to indicate acceptance of a pending iWARP
+ * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
+ * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
+ */
+struct c2wr_cr_accept_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 qp_handle;          /* QP to bind to this LLP conn */
+       u32 ep_handle;          /* LLP  handle to accept */
+       u32 private_data_length;
+       u8 private_data[0];     /* data in-line in msg. */
+} __attribute__((packed));
+
+/*
+ * adapter sends reply when private data is successfully submitted to
+ * the LLP.
+ */
+struct c2wr_cr_accept_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_accept {
+       struct c2wr_cr_accept_req req;
+       struct c2wr_cr_accept_rep rep;
+} __attribute__((packed));
+
+/*
+ * The host sends this down if a given iWARP connection request was
+ * rejected by the consumer.  The cr_handle was obtained from a
+ * previous c2wr_ae_connection_request_t AE sent by the adapter.
+ */
+struct  c2wr_cr_reject_req {
+       struct c2wr_hdr hdr;
+       u32 rnic_handle;
+       u32 ep_handle;          /* LLP handle to reject */
+} __attribute__((packed));
+
+/*
+ * Dunno if this is needed, but we'll add it for now.  The adapter will
+ * send the reject_reply after the LLP endpoint has been destroyed.
+ */
+struct  c2wr_cr_reject_rep {
+       struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_reject {
+       struct c2wr_cr_reject_req req;
+       struct c2wr_cr_reject_rep rep;
+} __attribute__((packed));
+
+/*
+ * console command.  Used to implement a debug console over the verbs
+ * request and reply queues.
+ */
+
+/*
+ * Console request message.  It contains:
+ *     - message hdr with id = CCWR_CONSOLE
+ *     - the physaddr/len of host memory to be used for the reply.
+ *     - the command string.  eg:  "netstat -s" or "zoneinfo"
+ */
+struct c2wr_console_req {
+       struct c2wr_hdr hdr;            /* id = CCWR_CONSOLE */
+       u64 reply_buf;          /* pinned host buf for reply */
+       u32 reply_buf_len;      /* length of reply buffer */
+       u8 command[0];          /* NUL terminated ascii string */
+       /* containing the command req */
+} __attribute__((packed));
+
+/*
+ * flags used in the console reply.
+ */
+enum c2_console_flags {
+       CONS_REPLY_TRUNCATED = 0x00000001       /* reply was truncated */
+} __attribute__((packed));
+
+/*
+ * Console reply message.
+ * hdr.result contains the c2_status_t error if the reply was _not_ generated,
+ * or C2_OK if the reply was generated.
+ */
+struct c2wr_console_rep {
+       struct c2wr_hdr hdr;            /* id = CCWR_CONSOLE */
+       u32 flags;
+} __attribute__((packed));
+
+union c2wr_console {
+       struct c2wr_console_req req;
+       struct c2wr_console_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * Giant union with all WRs.  Makes life easier...
+ */
+union c2wr {
+       struct c2wr_hdr hdr;
+       struct c2wr_user_hdr user_hdr;
+       union c2wr_rnic_open rnic_open;
+       union c2wr_rnic_query rnic_query;
+       union c2wr_rnic_getconfig rnic_getconfig;
+       union c2wr_rnic_setconfig rnic_setconfig;
+       union c2wr_rnic_close rnic_close;
+       union c2wr_cq_create cq_create;
+       union c2wr_cq_modify cq_modify;
+       union c2wr_cq_destroy cq_destroy;
+       union c2wr_pd_alloc pd_alloc;
+       union c2wr_pd_dealloc pd_dealloc;
+       union c2wr_srq_create srq_create;
+       union c2wr_srq_destroy srq_destroy;
+       union c2wr_qp_create qp_create;
+       union c2wr_qp_query qp_query;
+       union c2wr_qp_modify qp_modify;
+       union c2wr_qp_destroy qp_destroy;
+       struct c2wr_qp_connect qp_connect;
+       union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
+       union c2wr_nsmr_register nsmr_register;
+       union c2wr_nsmr_pbl nsmr_pbl;
+       union c2wr_mr_query mr_query;
+       union c2wr_mw_query mw_query;
+       union c2wr_stag_dealloc stag_dealloc;
+       union c2wr_sqwr sqwr;
+       struct c2wr_rqwr rqwr;
+       struct c2wr_ce ce;
+       union c2wr_ae ae;
+       union c2wr_init init;
+       union c2wr_ep_listen_create ep_listen_create;
+       union c2wr_ep_listen_destroy ep_listen_destroy;
+       union c2wr_cr_accept cr_accept;
+       union c2wr_cr_reject cr_reject;
+       union c2wr_console console;
+       union c2wr_flash_init flash_init;
+       union c2wr_flash flash;
+       union c2wr_buf_alloc buf_alloc;
+       union c2wr_buf_free buf_free;
+       union c2wr_flash_write flash_write;
+} __attribute__((packed));
+
+
+/*
+ * Accessors for the wr fields that are packed together tightly to
+ * reduce the wr message size.  The wr arguments are void* so that
+ * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
+ * in the struct c2wr union can be passed in.
+ */
+static __inline__ u8 c2_wr_get_id(void *wr)
+{
+       return ((struct c2wr_hdr *) wr)->id;
+}
+static __inline__ void c2_wr_set_id(void *wr, u8 id)
+{
+       ((struct c2wr_hdr *) wr)->id = id;
+}
+static __inline__ u8 c2_wr_get_result(void *wr)
+{
+       return ((struct c2wr_hdr *) wr)->result;
+}
+static __inline__ void c2_wr_set_result(void *wr, u8 result)
+{
+       ((struct c2wr_hdr *) wr)->result = result;
+}
+static __inline__ u8 c2_wr_get_flags(void *wr)
+{
+       return ((struct c2wr_hdr *) wr)->flags;
+}
+static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
+{
+       ((struct c2wr_hdr *) wr)->flags = flags;
+}
+static __inline__ u8 c2_wr_get_sge_count(void *wr)
+{
+       return ((struct c2wr_hdr *) wr)->sge_count;
+}
+static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
+{
+       ((struct c2wr_hdr *) wr)->sge_count = sge_count;
+}
+static __inline__ u32 c2_wr_get_wqe_count(void *wr)
+{
+       return ((struct c2wr_hdr *) wr)->wqe_count;
+}
+static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
+{
+       ((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
+}
+
+#endif                         /* _C2_WR_H_ */
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
new file mode 100644 (file)
index 0000000..922389b
--- /dev/null
@@ -0,0 +1,16 @@
+config INFINIBAND_EHCA
+       tristate "eHCA support"
+       depends on IBMEBUS && INFINIBAND
+       ---help---
+       This driver supports the IBM pSeries eHCA InfiniBand adapter.
+
+       To compile the driver as a module, choose M here. The module
+       will be called ib_ehca.
+
+config INFINIBAND_EHCA_SCALING
+       bool "Scaling support (EXPERIMENTAL)"
+       depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
+       ---help---
+       eHCA scaling support schedules the CQ callbacks to different CPUs.
+
+       To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile
new file mode 100644 (file)
index 0000000..74d284e
--- /dev/null
@@ -0,0 +1,16 @@
+#  Authors: Heiko J Schick <schickhj@de.ibm.com>
+#           Christoph Raisch <raisch@de.ibm.com>
+#           Joachim Fenkes <fenkes@de.ibm.com>
+#
+#  Copyright (c) 2005 IBM Corporation
+#
+#  All rights reserved.
+#
+#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
+
+obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
+
+ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
+               ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
+               ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
+
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
new file mode 100644 (file)
index 0000000..3bac197
--- /dev/null
@@ -0,0 +1,271 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  adress vector functions
+ *
+ *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *av_cache;
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+       int ret;
+       struct ehca_av *av;
+       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+                                             ib_device);
+
+       av = kmem_cache_alloc(av_cache, SLAB_KERNEL);
+       if (!av) {
+               ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
+                        pd, ah_attr);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       av->av.sl = ah_attr->sl;
+       av->av.dlid = ah_attr->dlid;
+       av->av.slid_path_bits = ah_attr->src_path_bits;
+
+       if (ehca_static_rate < 0) {
+               int ah_mult = ib_rate_to_mult(ah_attr->static_rate);
+               int ehca_mult =
+                       ib_rate_to_mult(shca->sport[ah_attr->port_num].rate );
+
+               if (ah_mult >= ehca_mult)
+                       av->av.ipd = 0;
+               else
+                       av->av.ipd = (ah_mult > 0) ?
+                               ((ehca_mult - 1) / ah_mult) : 0;
+       } else
+               av->av.ipd = ehca_static_rate;
+
+       av->av.lnh = ah_attr->ah_flags;
+       av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
+       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
+                                           ah_attr->grh.traffic_class);
+       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+                                           ah_attr->grh.flow_label);
+       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+                                           ah_attr->grh.hop_limit);
+       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
+       /* set sgid in grh.word_1 */
+       if (ah_attr->ah_flags & IB_AH_GRH) {
+               int rc;
+               struct ib_port_attr port_attr;
+               union ib_gid gid;
+               memset(&port_attr, 0, sizeof(port_attr));
+               rc = ehca_query_port(pd->device, ah_attr->port_num,
+                                    &port_attr);
+               if (rc) { /* invalid port number */
+                       ret = -EINVAL;
+                       ehca_err(pd->device, "Invalid port number "
+                                "ehca_query_port() returned %x "
+                                "pd=%p ah_attr=%p", rc, pd, ah_attr);
+                       goto create_ah_exit1;
+               }
+               memset(&gid, 0, sizeof(gid));
+               rc = ehca_query_gid(pd->device,
+                                   ah_attr->port_num,
+                                   ah_attr->grh.sgid_index, &gid);
+               if (rc) {
+                       ret = -EINVAL;
+                       ehca_err(pd->device, "Failed to retrieve sgid "
+                                "ehca_query_gid() returned %x "
+                                "pd=%p ah_attr=%p", rc, pd, ah_attr);
+                       goto create_ah_exit1;
+               }
+               memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
+       }
+       /* for the time being we use a hard coded PMTU of 2048 Bytes */
+       av->av.pmtu = 4;
+
+       /* dgid comes in grh.word_3 */
+       memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
+              sizeof(ah_attr->grh.dgid));
+
+       return &av->ib_ah;
+
+create_ah_exit1:
+       kmem_cache_free(av_cache, av);
+
+       return ERR_PTR(ret);
+}
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+       struct ehca_av *av;
+       struct ehca_ud_av new_ehca_av;
+       struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+       u32 cur_pid = current->tgid;
+
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           my_pd->ownpid != cur_pid) {
+               ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               return -EINVAL;
+       }
+
+       memset(&new_ehca_av, 0, sizeof(new_ehca_av));
+       new_ehca_av.sl = ah_attr->sl;
+       new_ehca_av.dlid = ah_attr->dlid;
+       new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
+       new_ehca_av.ipd = ah_attr->static_rate;
+       new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
+                                        (ah_attr->ah_flags & IB_AH_GRH) > 0);
+       new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
+                                               ah_attr->grh.traffic_class);
+       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+                                                ah_attr->grh.flow_label);
+       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+                                                ah_attr->grh.hop_limit);
+       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
+
+       /* set sgid in grh.word_1 */
+       if (ah_attr->ah_flags & IB_AH_GRH) {
+               int rc;
+               struct ib_port_attr port_attr;
+               union ib_gid gid;
+               memset(&port_attr, 0, sizeof(port_attr));
+               rc = ehca_query_port(ah->device, ah_attr->port_num,
+                                    &port_attr);
+               if (rc) { /* invalid port number */
+                       ehca_err(ah->device, "Invalid port number "
+                                "ehca_query_port() returned %x "
+                                "ah=%p ah_attr=%p port_num=%x",
+                                rc, ah, ah_attr, ah_attr->port_num);
+                       return -EINVAL;
+               }
+               memset(&gid, 0, sizeof(gid));
+               rc = ehca_query_gid(ah->device,
+                                   ah_attr->port_num,
+                                   ah_attr->grh.sgid_index, &gid);
+               if (rc) {
+                       ehca_err(ah->device, "Failed to retrieve sgid "
+                                "ehca_query_gid() returned %x "
+                                "ah=%p ah_attr=%p port_num=%x "
+                                "sgid_index=%x",
+                                rc, ah, ah_attr, ah_attr->port_num,
+                                ah_attr->grh.sgid_index);
+                       return -EINVAL;
+               }
+               memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
+       }
+
+       new_ehca_av.pmtu = 4; /* see also comment in create_ah() */
+
+       memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
+              sizeof(ah_attr->grh.dgid));
+
+       av = container_of(ah, struct ehca_av, ib_ah);
+       av->av = new_ehca_av;
+
+       return 0;
+}
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+       struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
+       struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+       u32 cur_pid = current->tgid;
+
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           my_pd->ownpid != cur_pid) {
+               ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               return -EINVAL;
+       }
+
+       memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
+              sizeof(ah_attr->grh.dgid));
+       ah_attr->sl = av->av.sl;
+
+       ah_attr->dlid = av->av.dlid;
+
+       ah_attr->src_path_bits = av->av.slid_path_bits;
+       ah_attr->static_rate = av->av.ipd;
+       ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
+       ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
+                                                   av->av.grh.word_0);
+       ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
+                                               av->av.grh.word_0);
+       ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
+                                                av->av.grh.word_0);
+
+       return 0;
+}
+
+int ehca_destroy_ah(struct ib_ah *ah)
+{
+       struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+       u32 cur_pid = current->tgid;
+
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           my_pd->ownpid != cur_pid) {
+               ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               return -EINVAL;
+       }
+
+       kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
+
+       return 0;
+}
+
+int ehca_init_av_cache(void)
+{
+       av_cache = kmem_cache_create("ehca_cache_av",
+                                  sizeof(struct ehca_av), 0,
+                                  SLAB_HWCACHE_ALIGN,
+                                  NULL, NULL);
+       if (!av_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void ehca_cleanup_av_cache(void)
+{
+       if (av_cache)
+               kmem_cache_destroy(av_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
new file mode 100644 (file)
index 0000000..1c72203
--- /dev/null
@@ -0,0 +1,346 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Struct definition for eHCA internal structures
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_H__
+#define __EHCA_CLASSES_H__
+
+#include "ehca_classes.h"
+#include "ipz_pt_fn.h"
+
+struct ehca_module;
+struct ehca_qp;
+struct ehca_cq;
+struct ehca_eq;
+struct ehca_mr;
+struct ehca_mw;
+struct ehca_pd;
+struct ehca_av;
+
+#ifdef CONFIG_PPC64
+#include "ehca_classes_pSeries.h"
+#endif
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ehca_irq.h"
+
+struct ehca_eq {
+       u32 length;
+       struct ipz_queue ipz_queue;
+       struct ipz_eq_handle ipz_eq_handle;
+       struct work_struct work;
+       struct h_galpas galpas;
+       int is_initialized;
+       struct ehca_pfeq pf;
+       spinlock_t spinlock;
+       struct tasklet_struct interrupt_task;
+       u32 ist;
+};
+
+struct ehca_sport {
+       struct ib_cq *ibcq_aqp1;
+       struct ib_qp *ibqp_aqp1;
+       enum ib_rate  rate;
+       enum ib_port_state port_state;
+};
+
+struct ehca_shca {
+       struct ib_device ib_device;
+       struct ibmebus_dev *ibmebus_dev;
+       u8 num_ports;
+       int hw_level;
+       struct list_head shca_list;
+       struct ipz_adapter_handle ipz_hca_handle;
+       struct ehca_sport sport[2];
+       struct ehca_eq eq;
+       struct ehca_eq neq;
+       struct ehca_mr *maxmr;
+       struct ehca_pd *pd;
+       struct h_galpas galpas;
+};
+
+struct ehca_pd {
+       struct ib_pd ib_pd;
+       struct ipz_pd fw_pd;
+       u32 ownpid;
+};
+
+struct ehca_qp {
+       struct ib_qp ib_qp;
+       u32 qp_type;
+       struct ipz_queue ipz_squeue;
+       struct ipz_queue ipz_rqueue;
+       struct h_galpas galpas;
+       u32 qkey;
+       u32 real_qp_num;
+       u32 token;
+       spinlock_t spinlock_s;
+       spinlock_t spinlock_r;
+       u32 sq_max_inline_data_size;
+       struct ipz_qp_handle ipz_qp_handle;
+       struct ehca_pfqp pf;
+       struct ib_qp_init_attr init_attr;
+       u64 uspace_squeue;
+       u64 uspace_rqueue;
+       u64 uspace_fwh;
+       struct ehca_cq *send_cq;
+       struct ehca_cq *recv_cq;
+       unsigned int sqerr_purgeflag;
+       struct hlist_node list_entries;
+};
+
+/* must be power of 2 */
+#define QP_HASHTAB_LEN 8
+
+struct ehca_cq {
+       struct ib_cq ib_cq;
+       struct ipz_queue ipz_queue;
+       struct h_galpas galpas;
+       spinlock_t spinlock;
+       u32 cq_number;
+       u32 token;
+       u32 nr_of_entries;
+       struct ipz_cq_handle ipz_cq_handle;
+       struct ehca_pfcq pf;
+       spinlock_t cb_lock;
+       u64 uspace_queue;
+       u64 uspace_fwh;
+       struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
+       struct list_head entry;
+       u32 nr_callbacks;
+       spinlock_t task_lock;
+       u32 ownpid;
+};
+
+enum ehca_mr_flag {
+       EHCA_MR_FLAG_FMR = 0x80000000,   /* FMR, created with ehca_alloc_fmr */
+       EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
+};
+
+struct ehca_mr {
+       union {
+               struct ib_mr ib_mr;     /* must always be first in ehca_mr */
+               struct ib_fmr ib_fmr;   /* must always be first in ehca_mr */
+       } ib;
+       spinlock_t mrlock;
+
+       enum ehca_mr_flag flags;
+       u32 num_pages;          /* number of MR pages */
+       u32 num_4k;             /* number of 4k "page" portions to form MR */
+       int acl;                /* ACL (stored here for usage in reregister) */
+       u64 *start;             /* virtual start address (stored here for */
+                               /* usage in reregister) */
+       u64 size;               /* size (stored here for usage in reregister) */
+       u32 fmr_page_size;      /* page size for FMR */
+       u32 fmr_max_pages;      /* max pages for FMR */
+       u32 fmr_max_maps;       /* max outstanding maps for FMR */
+       u32 fmr_map_cnt;        /* map counter for FMR */
+       /* fw specific data */
+       struct ipz_mrmw_handle ipz_mr_handle;   /* MR handle for h-calls */
+       struct h_galpas galpas;
+       /* data for userspace bridge */
+       u32 nr_of_pages;
+       void *pagearray;
+};
+
+struct ehca_mw {
+       struct ib_mw ib_mw;     /* gen2 mw, must always be first in ehca_mw */
+       spinlock_t mwlock;
+
+       u8 never_bound;         /* indication MW was never bound */
+       struct ipz_mrmw_handle ipz_mw_handle;   /* MW handle for h-calls */
+       struct h_galpas galpas;
+};
+
+enum ehca_mr_pgi_type {
+       EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
+                                 * ehca_rereg_phys_mr,
+                                 * ehca_reg_internal_maxmr */
+       EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
+       EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
+};
+
+struct ehca_mr_pginfo {
+       enum ehca_mr_pgi_type type;
+       u64 num_pages;
+       u64 page_cnt;
+       u64 num_4k;       /* number of 4k "page" portions */
+       u64 page_4k_cnt;  /* counter for 4k "page" portions */
+       u64 next_4k;      /* next 4k "page" portion in buffer/chunk/listelem */
+
+       /* type EHCA_MR_PGI_PHYS section */
+       int num_phys_buf;
+       struct ib_phys_buf *phys_buf_array;
+       u64 next_buf;
+
+       /* type EHCA_MR_PGI_USER section */
+       struct ib_umem *region;
+       struct ib_umem_chunk *next_chunk;
+       u64 next_nmap;
+
+       /* type EHCA_MR_PGI_FMR section */
+       u64 *page_list;
+       u64 next_listelem;
+       /* next_4k also used within EHCA_MR_PGI_FMR */
+};
+
+/* output parameters for MR/FMR hipz calls */
+struct ehca_mr_hipzout_parms {
+       struct ipz_mrmw_handle handle;
+       u32 lkey;
+       u32 rkey;
+       u64 len;
+       u64 vaddr;
+       u32 acl;
+};
+
+/* output parameters for MW hipz calls */
+struct ehca_mw_hipzout_parms {
+       struct ipz_mrmw_handle handle;
+       u32 rkey;
+};
+
+struct ehca_av {
+       struct ib_ah ib_ah;
+       struct ehca_ud_av av;
+};
+
+struct ehca_ucontext {
+       struct ib_ucontext ib_ucontext;
+};
+
+struct ehca_module *ehca_module_new(void);
+
+int ehca_module_delete(struct ehca_module *me);
+
+int ehca_eq_ctor(struct ehca_eq *eq);
+
+int ehca_eq_dtor(struct ehca_eq *eq);
+
+struct ehca_shca *ehca_shca_new(void);
+
+int ehca_shca_delete(struct ehca_shca *me);
+
+struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
+
+int ehca_init_pd_cache(void);
+void ehca_cleanup_pd_cache(void);
+int ehca_init_cq_cache(void);
+void ehca_cleanup_cq_cache(void);
+int ehca_init_qp_cache(void);
+void ehca_cleanup_qp_cache(void);
+int ehca_init_av_cache(void);
+void ehca_cleanup_av_cache(void);
+int ehca_init_mrmw_cache(void);
+void ehca_cleanup_mrmw_cache(void);
+
+extern spinlock_t ehca_qp_idr_lock;
+extern spinlock_t ehca_cq_idr_lock;
+extern struct idr ehca_qp_idr;
+extern struct idr ehca_cq_idr;
+
+extern int ehca_static_rate;
+extern int ehca_port_act_time;
+extern int ehca_use_hp_mr;
+
+struct ipzu_queue_resp {
+       u64 queue;        /* points to first queue entry */
+       u32 qe_size;      /* queue entry size */
+       u32 act_nr_of_sg;
+       u32 queue_length; /* queue length allocated in bytes */
+       u32 pagesize;
+       u32 toggle_state;
+       u32 dummy; /* padding for 8 byte alignment */
+};
+
+struct ehca_create_cq_resp {
+       u32 cq_number;
+       u32 token;
+       struct ipzu_queue_resp ipz_queue;
+       struct h_galpas galpas;
+};
+
+struct ehca_create_qp_resp {
+       u32 qp_num;
+       u32 token;
+       u32 qp_type;
+       u32 qkey;
+       /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
+       u32 real_qp_num;
+       u32 dummy; /* padding for 8 byte alignment */
+       struct ipzu_queue_resp ipz_squeue;
+       struct ipzu_queue_resp ipz_rqueue;
+       struct h_galpas galpas;
+};
+
+struct ehca_alloc_cq_parms {
+       u32 nr_cqe;
+       u32 act_nr_of_entries;
+       u32 act_pages;
+       struct ipz_eq_handle eq_handle;
+};
+
+struct ehca_alloc_qp_parms {
+       int servicetype;
+       int sigtype;
+       int daqp_ctrl;
+       int max_send_sge;
+       int max_recv_sge;
+       int ud_av_l_key_ctl;
+
+       u16 act_nr_send_wqes;
+       u16 act_nr_recv_wqes;
+       u8  act_nr_recv_sges;
+       u8  act_nr_send_sges;
+
+       u32 nr_rq_pages;
+       u32 nr_sq_pages;
+
+       struct ipz_eq_handle ipz_eq_handle;
+       struct ipz_pd pd;
+};
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
new file mode 100644 (file)
index 0000000..5665f21
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  pSeries interface definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_PSERIES_H__
+#define __EHCA_CLASSES_PSERIES_H__
+
+#include "hcp_phyp.h"
+#include "ipz_pt_fn.h"
+
+
+struct ehca_pfqp {
+       struct ipz_qpt sqpt;
+       struct ipz_qpt rqpt;
+};
+
+struct ehca_pfcq {
+       struct ipz_qpt qpt;
+       u32 cqnr;
+};
+
+struct ehca_pfeq {
+       struct ipz_qpt qpt;
+       struct h_galpa galpa;
+       u32 eqnr;
+};
+
+struct ipz_adapter_handle {
+       u64 handle;
+};
+
+struct ipz_cq_handle {
+       u64 handle;
+};
+
+struct ipz_eq_handle {
+       u64 handle;
+};
+
+struct ipz_qp_handle {
+       u64 handle;
+};
+struct ipz_mrmw_handle {
+       u64 handle;
+};
+
+struct ipz_pd {
+       u32 value;
+};
+
+struct hcp_modify_qp_control_block {
+       u32 qkey;                      /* 00 */
+       u32 rdd;                       /* reliable datagram domain */
+       u32 send_psn;                  /* 02 */
+       u32 receive_psn;               /* 03 */
+       u32 prim_phys_port;            /* 04 */
+       u32 alt_phys_port;             /* 05 */
+       u32 prim_p_key_idx;            /* 06 */
+       u32 alt_p_key_idx;             /* 07 */
+       u32 rdma_atomic_ctrl;          /* 08 */
+       u32 qp_state;                  /* 09 */
+       u32 reserved_10;               /* 10 */
+       u32 rdma_nr_atomic_resp_res;   /* 11 */
+       u32 path_migration_state;      /* 12 */
+       u32 rdma_atomic_outst_dest_qp; /* 13 */
+       u32 dest_qp_nr;                /* 14 */
+       u32 min_rnr_nak_timer_field;   /* 15 */
+       u32 service_level;             /* 16 */
+       u32 send_grh_flag;             /* 17 */
+       u32 retry_count;               /* 18 */
+       u32 timeout;                   /* 19 */
+       u32 path_mtu;                  /* 20 */
+       u32 max_static_rate;           /* 21 */
+       u32 dlid;                      /* 22 */
+       u32 rnr_retry_count;           /* 23 */
+       u32 source_path_bits;          /* 24 */
+       u32 traffic_class;             /* 25 */
+       u32 hop_limit;                 /* 26 */
+       u32 source_gid_idx;            /* 27 */
+       u32 flow_label;                /* 28 */
+       u32 reserved_29;               /* 29 */
+       union {                        /* 30 */
+               u64 dw[2];
+               u8 byte[16];
+       } dest_gid;
+       u32 service_level_al;          /* 34 */
+       u32 send_grh_flag_al;          /* 35 */
+       u32 retry_count_al;            /* 36 */
+       u32 timeout_al;                /* 37 */
+       u32 max_static_rate_al;        /* 38 */
+       u32 dlid_al;                   /* 39 */
+       u32 rnr_retry_count_al;        /* 40 */
+       u32 source_path_bits_al;       /* 41 */
+       u32 traffic_class_al;          /* 42 */
+       u32 hop_limit_al;              /* 43 */
+       u32 source_gid_idx_al;         /* 44 */
+       u32 flow_label_al;             /* 45 */
+       u32 reserved_46;               /* 46 */
+       u32 reserved_47;               /* 47 */
+       union {                        /* 48 */
+               u64 dw[2];
+               u8 byte[16];
+       } dest_gid_al;
+       u32 max_nr_outst_send_wr;      /* 52 */
+       u32 max_nr_outst_recv_wr;      /* 53 */
+       u32 disable_ete_credit_check;  /* 54 */
+       u32 qp_number;                 /* 55 */
+       u64 send_queue_handle;         /* 56 */
+       u64 recv_queue_handle;         /* 58 */
+       u32 actual_nr_sges_in_sq_wqe;  /* 60 */
+       u32 actual_nr_sges_in_rq_wqe;  /* 61 */
+       u32 qp_enable;                 /* 62 */
+       u32 curr_srq_limit;            /* 63 */
+       u64 qp_aff_asyn_ev_log_reg;    /* 64 */
+       u64 shared_rq_hndl;            /* 66 */
+       u64 trigg_doorbell_qp_hndl;    /* 68 */
+       u32 reserved_70_127[58];       /* 70 */
+};
+
+#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM(0,0)
+#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM(2,2)
+#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM(3,3)
+#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM(4,4)
+#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM(5,5)
+#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM(6,6)
+#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM(7,7)
+#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM(8,8)
+#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM(9,9)
+#define MQPCB_QP_STATE                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11,11)
+#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12,12)
+#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13,13)
+#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14,14)
+#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15,15)
+#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16,16)
+#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17,17)
+#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18,18)
+#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19,19)
+#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20,20)
+#define MQPCB_PATH_MTU                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21,21)
+#define MQPCB_MAX_STATIC_RATE                   EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22,22)
+#define MQPCB_DLID                              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23,23)
+#define MQPCB_RNR_RETRY_COUNT                   EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24,24)
+#define MQPCB_SOURCE_PATH_BITS                  EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25,25)
+#define MQPCB_TRAFFIC_CLASS                     EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26,26)
+#define MQPCB_HOP_LIMIT                         EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27,27)
+#define MQPCB_SOURCE_GID_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28,28)
+#define MQPCB_FLOW_LABEL                        EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30,30)
+#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31,31)
+#define MQPCB_SERVICE_LEVEL_AL                  EHCA_BMASK_IBM(28,31)
+#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32,32)
+#define MQPCB_SEND_GRH_FLAG_AL                  EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33,33)
+#define MQPCB_RETRY_COUNT_AL                    EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34,34)
+#define MQPCB_TIMEOUT_AL                        EHCA_BMASK_IBM(27,31)
+#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35,35)
+#define MQPCB_MAX_STATIC_RATE_AL                EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36,36)
+#define MQPCB_DLID_AL                           EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37,37)
+#define MQPCB_RNR_RETRY_COUNT_AL                EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38,38)
+#define MQPCB_SOURCE_PATH_BITS_AL               EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39,39)
+#define MQPCB_TRAFFIC_CLASS_AL                  EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40,40)
+#define MQPCB_HOP_LIMIT_AL                      EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41,41)
+#define MQPCB_SOURCE_GID_IDX_AL                 EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42,42)
+#define MQPCB_FLOW_LABEL_AL                     EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44,44)
+#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45,45)
+#define MQPCB_MAX_NR_OUTST_SEND_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46,46)
+#define MQPCB_MAX_NR_OUTST_RECV_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47,47)
+#define MQPCB_DISABLE_ETE_CREDIT_CHECK          EHCA_BMASK_IBM(31,31)
+#define MQPCB_QP_NUMBER                         EHCA_BMASK_IBM(8,31)
+#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48,48)
+#define MQPCB_QP_ENABLE                         EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_CURR_SQR_LIMIT               EHCA_BMASK_IBM(49,49)
+#define MQPCB_CURR_SQR_LIMIT                    EHCA_BMASK_IBM(15,31)
+#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50,50)
+#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51,51)
+
+#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
new file mode 100644 (file)
index 0000000..458fe19
--- /dev/null
@@ -0,0 +1,427 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Completion queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *cq_cache;
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
+{
+       unsigned int qp_num = qp->real_qp_num;
+       unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
+       unsigned long spl_flags;
+
+       spin_lock_irqsave(&cq->spinlock, spl_flags);
+       hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
+       spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+
+       ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
+                cq->cq_number, qp_num);
+
+       return 0;
+}
+
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
+{
+       int ret = -EINVAL;
+       unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+       struct hlist_node *iter;
+       struct ehca_qp *qp;
+       unsigned long spl_flags;
+
+       spin_lock_irqsave(&cq->spinlock, spl_flags);
+       hlist_for_each(iter, &cq->qp_hashtab[key]) {
+               qp = hlist_entry(iter, struct ehca_qp, list_entries);
+               if (qp->real_qp_num == real_qp_num) {
+                       hlist_del(iter);
+                       ehca_dbg(cq->ib_cq.device,
+                                "removed qp from cq .cq_num=%x real_qp_num=%x",
+                                cq->cq_number, real_qp_num);
+                       ret = 0;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+       if (ret)
+               ehca_err(cq->ib_cq.device,
+                        "qp not found cq_num=%x real_qp_num=%x",
+                        cq->cq_number, real_qp_num);
+
+       return ret;
+}
+
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
+{
+       struct ehca_qp *ret = NULL;
+       unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+       struct hlist_node *iter;
+       struct ehca_qp *qp;
+       hlist_for_each(iter, &cq->qp_hashtab[key]) {
+               qp = hlist_entry(iter, struct ehca_qp, list_entries);
+               if (qp->real_qp_num == real_qp_num) {
+                       ret = qp;
+                       break;
+               }
+       }
+       return ret;
+}
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+                            struct ib_ucontext *context,
+                            struct ib_udata *udata)
+{
+       static const u32 additional_cqe = 20;
+       struct ib_cq *cq;
+       struct ehca_cq *my_cq;
+       struct ehca_shca *shca =
+               container_of(device, struct ehca_shca, ib_device);
+       struct ipz_adapter_handle adapter_handle;
+       struct ehca_alloc_cq_parms param; /* h_call's out parameters */
+       struct h_galpa gal;
+       void *vpage;
+       u32 counter;
+       u64 rpage, cqx_fec, h_ret;
+       int ipz_rc, ret, i;
+       unsigned long flags;
+
+       if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
+               return ERR_PTR(-EINVAL);
+
+       my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL);
+       if (!my_cq) {
+               ehca_err(device, "Out of memory for ehca_cq struct device=%p",
+                        device);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       memset(my_cq, 0, sizeof(struct ehca_cq));
+       memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
+
+       spin_lock_init(&my_cq->spinlock);
+       spin_lock_init(&my_cq->cb_lock);
+       spin_lock_init(&my_cq->task_lock);
+       my_cq->ownpid = current->tgid;
+
+       cq = &my_cq->ib_cq;
+
+       adapter_handle = shca->ipz_hca_handle;
+       param.eq_handle = shca->eq.ipz_eq_handle;
+
+       do {
+               if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
+                       cq = ERR_PTR(-ENOMEM);
+                       ehca_err(device, "Can't reserve idr nr. device=%p",
+                                device);
+                       goto create_cq_exit1;
+               }
+
+               spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+               ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
+               spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+       } while (ret == -EAGAIN);
+
+       if (ret) {
+               cq = ERR_PTR(-ENOMEM);
+               ehca_err(device, "Can't allocate new idr entry. device=%p",
+                        device);
+               goto create_cq_exit1;
+       }
+
+       /*
+        * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
+        * for receiving errors CQEs.
+        */
+       param.nr_cqe = cqe + additional_cqe;
+       h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
+
+       if (h_ret != H_SUCCESS) {
+               ehca_err(device, "hipz_h_alloc_resource_cq() failed "
+                        "h_ret=%lx device=%p", h_ret, device);
+               cq = ERR_PTR(ehca2ib_return_code(h_ret));
+               goto create_cq_exit2;
+       }
+
+       ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
+                               EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
+       if (!ipz_rc) {
+               ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
+                        ipz_rc, device);
+               cq = ERR_PTR(-EINVAL);
+               goto create_cq_exit3;
+       }
+
+       for (counter = 0; counter < param.act_pages; counter++) {
+               vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+               if (!vpage) {
+                       ehca_err(device, "ipz_qpageit_get_inc() "
+                                "returns NULL device=%p", device);
+                       cq = ERR_PTR(-EAGAIN);
+                       goto create_cq_exit4;
+               }
+               rpage = virt_to_abs(vpage);
+
+               h_ret = hipz_h_register_rpage_cq(adapter_handle,
+                                                my_cq->ipz_cq_handle,
+                                                &my_cq->pf,
+                                                0,
+                                                0,
+                                                rpage,
+                                                1,
+                                                my_cq->galpas.
+                                                kernel);
+
+               if (h_ret < H_SUCCESS) {
+                       ehca_err(device, "hipz_h_register_rpage_cq() failed "
+                                "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i "
+                                "act_pages=%i", my_cq, my_cq->cq_number,
+                                h_ret, counter, param.act_pages);
+                       cq = ERR_PTR(-EINVAL);
+                       goto create_cq_exit4;
+               }
+
+               if (counter == (param.act_pages - 1)) {
+                       vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+                       if ((h_ret != H_SUCCESS) || vpage) {
+                               ehca_err(device, "Registration of pages not "
+                                        "complete ehca_cq=%p cq_num=%x "
+                                        "h_ret=%lx", my_cq, my_cq->cq_number,
+                                        h_ret);
+                               cq = ERR_PTR(-EAGAIN);
+                               goto create_cq_exit4;
+                       }
+               } else {
+                       if (h_ret != H_PAGE_REGISTERED) {
+                               ehca_err(device, "Registration of page failed "
+                                        "ehca_cq=%p cq_num=%x h_ret=%lx"
+                                        "counter=%i act_pages=%i",
+                                        my_cq, my_cq->cq_number,
+                                        h_ret, counter, param.act_pages);
+                               cq = ERR_PTR(-ENOMEM);
+                               goto create_cq_exit4;
+                       }
+               }
+       }
+
+       ipz_qeit_reset(&my_cq->ipz_queue);
+
+       gal = my_cq->galpas.kernel;
+       cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
+       ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx",
+                my_cq, my_cq->cq_number, cqx_fec);
+
+       my_cq->ib_cq.cqe = my_cq->nr_of_entries =
+               param.act_nr_of_entries - additional_cqe;
+       my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
+
+       for (i = 0; i < QP_HASHTAB_LEN; i++)
+               INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
+
+       if (context) {
+               struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
+               struct ehca_create_cq_resp resp;
+               struct vm_area_struct *vma;
+               memset(&resp, 0, sizeof(resp));
+               resp.cq_number = my_cq->cq_number;
+               resp.token = my_cq->token;
+               resp.ipz_queue.qe_size = ipz_queue->qe_size;
+               resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
+               resp.ipz_queue.queue_length = ipz_queue->queue_length;
+               resp.ipz_queue.pagesize = ipz_queue->pagesize;
+               resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
+               ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
+                                      ipz_queue->queue_length,
+                                      (void**)&resp.ipz_queue.queue,
+                                      &vma);
+               if (ret) {
+                       ehca_err(device, "Could not mmap queue pages");
+                       cq = ERR_PTR(ret);
+                       goto create_cq_exit4;
+               }
+               my_cq->uspace_queue = resp.ipz_queue.queue;
+               resp.galpas = my_cq->galpas;
+               ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
+                                        (void**)&resp.galpas.kernel.fw_handle,
+                                        &vma);
+               if (ret) {
+                       ehca_err(device, "Could not mmap fw_handle");
+                       cq = ERR_PTR(ret);
+                       goto create_cq_exit5;
+               }
+               my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+               if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+                       ehca_err(device, "Copy to udata failed.");
+                       goto create_cq_exit6;
+               }
+       }
+
+       return cq;
+
+create_cq_exit6:
+       ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+
+create_cq_exit5:
+       ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
+
+create_cq_exit4:
+       ipz_queue_dtor(&my_cq->ipz_queue);
+
+create_cq_exit3:
+       h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+       if (h_ret != H_SUCCESS)
+               ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
+                        "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
+
+create_cq_exit2:
+       spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+       idr_remove(&ehca_cq_idr, my_cq->token);
+       spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+create_cq_exit1:
+       kmem_cache_free(cq_cache, my_cq);
+
+       return cq;
+}
+
+int ehca_destroy_cq(struct ib_cq *cq)
+{
+       u64 h_ret;
+       int ret;
+       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+       int cq_num = my_cq->cq_number;
+       struct ib_device *device = cq->device;
+       struct ehca_shca *shca = container_of(device, struct ehca_shca,
+                                             ib_device);
+       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+       u32 cur_pid = current->tgid;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+       while (my_cq->nr_callbacks)
+               yield();
+
+       idr_remove(&ehca_cq_idr, my_cq->token);
+       spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+       if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+               ehca_err(device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_cq->ownpid);
+               return -EINVAL;
+       }
+
+       /* un-mmap if vma alloc */
+       if (my_cq->uspace_queue ) {
+               ret = ehca_munmap(my_cq->uspace_queue,
+                                 my_cq->ipz_queue.queue_length);
+               if (ret)
+                       ehca_err(device, "Could not munmap queue ehca_cq=%p "
+                                "cq_num=%x", my_cq, cq_num);
+               ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+               if (ret)
+                       ehca_err(device, "Could not munmap fwh ehca_cq=%p "
+                                "cq_num=%x", my_cq, cq_num);
+       }
+
+       h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
+       if (h_ret == H_R_STATE) {
+               /* cq in err: read err data and destroy it forcibly */
+               ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err "
+                        "state. Try to delete it forcibly.",
+                        my_cq, cq_num, my_cq->ipz_cq_handle.handle);
+               ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
+               h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+               if (h_ret == H_SUCCESS)
+                       ehca_dbg(device, "cq_num=%x deleted successfully.",
+                                cq_num);
+       }
+       if (h_ret != H_SUCCESS) {
+               ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx "
+                        "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
+               return ehca2ib_return_code(h_ret);
+       }
+       ipz_queue_dtor(&my_cq->ipz_queue);
+       kmem_cache_free(cq_cache, my_cq);
+
+       return 0;
+}
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+       u32 cur_pid = current->tgid;
+
+       if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+               ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_cq->ownpid);
+               return -EINVAL;
+       }
+
+       /* TODO: proper resize needs to be done */
+       ehca_err(cq->device, "not implemented yet");
+
+       return -EFAULT;
+}
+
+int ehca_init_cq_cache(void)
+{
+       cq_cache = kmem_cache_create("ehca_cache_cq",
+                                    sizeof(struct ehca_cq), 0,
+                                    SLAB_HWCACHE_ALIGN,
+                                    NULL, NULL);
+       if (!cq_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void ehca_cleanup_cq_cache(void)
+{
+       if (cq_cache)
+               kmem_cache_destroy(cq_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
new file mode 100644 (file)
index 0000000..5281dec
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Event queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_qes.h"
+#include "hcp_if.h"
+#include "ipz_pt_fn.h"
+
+int ehca_create_eq(struct ehca_shca *shca,
+                  struct ehca_eq *eq,
+                  const enum ehca_eq_type type, const u32 length)
+{
+       u64 ret;
+       u32 nr_pages;
+       u32 i;
+       void *vpage;
+       struct ib_device *ib_dev = &shca->ib_device;
+
+       spin_lock_init(&eq->spinlock);
+       eq->is_initialized = 0;
+
+       if (type != EHCA_EQ && type != EHCA_NEQ) {
+               ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
+               return -EINVAL;
+       }
+       if (!length) {
+               ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
+               return -EINVAL;
+       }
+
+       ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+                                      &eq->pf,
+                                      type,
+                                      length,
+                                      &eq->ipz_eq_handle,
+                                      &eq->length,
+                                      &nr_pages, &eq->ist);
+
+       if (ret != H_SUCCESS) {
+               ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
+               return -EINVAL;
+       }
+
+       ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages,
+                            EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0);
+       if (!ret) {
+               ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
+               goto create_eq_exit1;
+       }
+
+       for (i = 0; i < nr_pages; i++) {
+               u64 rpage;
+
+               if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) {
+                       ret = H_RESOURCE;
+                       goto create_eq_exit2;
+               }
+
+               rpage = virt_to_abs(vpage);
+               ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+                                              eq->ipz_eq_handle,
+                                              &eq->pf,
+                                              0, 0, rpage, 1);
+
+               if (i == (nr_pages - 1)) {
+                       /* last page */
+                       vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
+                       if (ret != H_SUCCESS || vpage)
+                               goto create_eq_exit2;
+               } else {
+                       if (ret != H_PAGE_REGISTERED || !vpage)
+                               goto create_eq_exit2;
+               }
+       }
+
+       ipz_qeit_reset(&eq->ipz_queue);
+
+       /* register interrupt handlers and initialize work queues */
+       if (type == EHCA_EQ) {
+               ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq,
+                                         SA_INTERRUPT, "ehca_eq",
+                                         (void *)shca);
+               if (ret < 0)
+                       ehca_err(ib_dev, "Can't map interrupt handler.");
+
+               tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
+       } else if (type == EHCA_NEQ) {
+               ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq,
+                                         SA_INTERRUPT, "ehca_neq",
+                                         (void *)shca);
+               if (ret < 0)
+                       ehca_err(ib_dev, "Can't map interrupt handler.");
+
+               tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
+       }
+
+       eq->is_initialized = 1;
+
+       return 0;
+
+create_eq_exit2:
+       ipz_queue_dtor(&eq->ipz_queue);
+
+create_eq_exit1:
+       hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+       return -EINVAL;
+}
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+       unsigned long flags;
+       void *eqe;
+
+       spin_lock_irqsave(&eq->spinlock, flags);
+       eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
+       spin_unlock_irqrestore(&eq->spinlock, flags);
+
+       return eqe;
+}
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+       unsigned long flags;
+       u64 h_ret;
+
+       spin_lock_irqsave(&eq->spinlock, flags);
+       ibmebus_free_irq(NULL, eq->ist, (void *)shca);
+
+       h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+       spin_unlock_irqrestore(&eq->spinlock, flags);
+
+       if (h_ret != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "Can't free EQ resources.");
+               return -EINVAL;
+       }
+       ipz_queue_dtor(&eq->ipz_queue);
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
new file mode 100644 (file)
index 0000000..5eae6ac
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HCA query functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+{
+       int ret = 0;
+       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+                                             ib_device);
+       struct hipz_query_hca *rblock;
+
+       rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (!rblock) {
+               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+               return -ENOMEM;
+       }
+
+       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "Can't query device properties");
+               ret = -EINVAL;
+               goto query_device1;
+       }
+
+       memset(props, 0, sizeof(struct ib_device_attr));
+       props->fw_ver          = rblock->hw_ver;
+       props->max_mr_size     = rblock->max_mr_size;
+       props->vendor_id       = rblock->vendor_id >> 8;
+       props->vendor_part_id  = rblock->vendor_part_id >> 16;
+       props->hw_ver          = rblock->hw_ver;
+       props->max_qp          = min_t(int, rblock->max_qp, INT_MAX);
+       props->max_qp_wr       = min_t(int, rblock->max_wqes_wq, INT_MAX);
+       props->max_sge         = min_t(int, rblock->max_sge, INT_MAX);
+       props->max_sge_rd      = min_t(int, rblock->max_sge_rd, INT_MAX);
+       props->max_cq          = min_t(int, rblock->max_cq, INT_MAX);
+       props->max_cqe         = min_t(int, rblock->max_cqe, INT_MAX);
+       props->max_mr          = min_t(int, rblock->max_mr, INT_MAX);
+       props->max_mw          = min_t(int, rblock->max_mw, INT_MAX);
+       props->max_pd          = min_t(int, rblock->max_pd, INT_MAX);
+       props->max_ah          = min_t(int, rblock->max_ah, INT_MAX);
+       props->max_fmr         = min_t(int, rblock->max_mr, INT_MAX);
+       props->max_srq         = 0;
+       props->max_srq_wr      = 0;
+       props->max_srq_sge     = 0;
+       props->max_pkeys       = 16;
+       props->local_ca_ack_delay
+               = rblock->local_ca_ack_delay;
+       props->max_raw_ipv6_qp
+               = min_t(int, rblock->max_raw_ipv6_qp, INT_MAX);
+       props->max_raw_ethy_qp
+               = min_t(int, rblock->max_raw_ethy_qp, INT_MAX);
+       props->max_mcast_grp
+               = min_t(int, rblock->max_mcast_grp, INT_MAX);
+       props->max_mcast_qp_attach
+               = min_t(int, rblock->max_mcast_qp_attach, INT_MAX);
+       props->max_total_mcast_qp_attach
+               = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
+
+query_device1:
+       kfree(rblock);
+
+       return ret;
+}
+
+int ehca_query_port(struct ib_device *ibdev,
+                   u8 port, struct ib_port_attr *props)
+{
+       int ret = 0;
+       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+                                             ib_device);
+       struct hipz_query_port *rblock;
+
+       rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (!rblock) {
+               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+               return -ENOMEM;
+       }
+
+       if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "Can't query port properties");
+               ret = -EINVAL;
+               goto query_port1;
+       }
+
+       memset(props, 0, sizeof(struct ib_port_attr));
+       props->state = rblock->state;
+
+       switch (rblock->max_mtu) {
+       case 0x1:
+               props->active_mtu = props->max_mtu = IB_MTU_256;
+               break;
+       case 0x2:
+               props->active_mtu = props->max_mtu = IB_MTU_512;
+               break;
+       case 0x3:
+               props->active_mtu = props->max_mtu = IB_MTU_1024;
+               break;
+       case 0x4:
+               props->active_mtu = props->max_mtu = IB_MTU_2048;
+               break;
+       case 0x5:
+               props->active_mtu = props->max_mtu = IB_MTU_4096;
+               break;
+       default:
+               ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
+                        rblock->max_mtu);
+               break;
+       }
+
+       props->gid_tbl_len     = rblock->gid_tbl_len;
+       props->max_msg_sz      = rblock->max_msg_sz;
+       props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
+       props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
+       props->pkey_tbl_len    = rblock->pkey_tbl_len;
+       props->lid             = rblock->lid;
+       props->sm_lid          = rblock->sm_lid;
+       props->lmc             = rblock->lmc;
+       props->sm_sl           = rblock->sm_sl;
+       props->subnet_timeout  = rblock->subnet_timeout;
+       props->init_type_reply = rblock->init_type_reply;
+
+       props->active_width    = IB_WIDTH_12X;
+       props->active_speed    = 0x1;
+
+query_port1:
+       kfree(rblock);
+
+       return ret;
+}
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+       int ret = 0;
+       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+       struct hipz_query_port *rblock;
+
+       if (index > 16) {
+               ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+               return -EINVAL;
+       }
+
+       rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (!rblock) {
+               ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
+               return -ENOMEM;
+       }
+
+       if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "Can't query port properties");
+               ret = -EINVAL;
+               goto query_pkey1;
+       }
+
+       memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
+
+query_pkey1:
+       kfree(rblock);
+
+       return ret;
+}
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port,
+                  int index, union ib_gid *gid)
+{
+       int ret = 0;
+       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+                                             ib_device);
+       struct hipz_query_port *rblock;
+
+       if (index > 255) {
+               ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+               return -EINVAL;
+       }
+
+       rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (!rblock) {
+               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+               return -ENOMEM;
+       }
+
+       if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "Can't query port properties");
+               ret = -EINVAL;
+               goto query_gid1;
+       }
+
+       memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
+       memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
+
+query_gid1:
+       kfree(rblock);
+
+       return ret;
+}
+
+int ehca_modify_port(struct ib_device *ibdev,
+                    u8 port, int port_modify_mask,
+                    struct ib_port_modify *props)
+{
+       /* Not implemented yet */
+       return -EFAULT;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
new file mode 100644 (file)
index 0000000..2a65b5b
--- /dev/null
@@ -0,0 +1,762 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Functions for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM(1,1)
+#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM(8,31)
+#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM(2,7)
+#define EQE_CQ_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32,63)
+#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32,63)
+
+#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM(1,1)
+#define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
+#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
+
+#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
+#define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static void queue_comp_task(struct ehca_cq *__cq);
+
+static struct ehca_comp_pool* pool;
+static struct notifier_block comp_pool_callback_nb;
+
+#endif
+
+static inline void comp_event_callback(struct ehca_cq *cq)
+{
+       if (!cq->ib_cq.comp_handler)
+               return;
+
+       spin_lock(&cq->cb_lock);
+       cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
+       spin_unlock(&cq->cb_lock);
+
+       return;
+}
+
+static void print_error_data(struct ehca_shca * shca, void* data,
+                            u64* rblock, int length)
+{
+       u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+       u64 resource = rblock[1];
+
+       switch (type) {
+       case 0x1: /* Queue Pair */
+       {
+               struct ehca_qp *qp = (struct ehca_qp*)data;
+
+               /* only print error data if AER is set */
+               if (rblock[6] == 0)
+                       return;
+
+               ehca_err(&shca->ib_device,
+                        "QP 0x%x (resource=%lx) has errors.",
+                        qp->ib_qp.qp_num, resource);
+               break;
+       }
+       case 0x4: /* Completion Queue */
+       {
+               struct ehca_cq *cq = (struct ehca_cq*)data;
+
+               ehca_err(&shca->ib_device,
+                        "CQ 0x%x (resource=%lx) has errors.",
+                        cq->cq_number, resource);
+               break;
+       }
+       default:
+               ehca_err(&shca->ib_device,
+                        "Unknown errror type: %lx on %s.",
+                        type, shca->ib_device.name);
+               break;
+       }
+
+       ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
+       ehca_err(&shca->ib_device, "EHCA ----- error data begin "
+                "---------------------------------------------------");
+       ehca_dmp(rblock, length, "resource=%lx", resource);
+       ehca_err(&shca->ib_device, "EHCA ----- error data end "
+                "----------------------------------------------------");
+
+       return;
+}
+
+int ehca_error_data(struct ehca_shca *shca, void *data,
+                   u64 resource)
+{
+
+       unsigned long ret;
+       u64 *rblock;
+       unsigned long block_count;
+
+       rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (!rblock) {
+               ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
+               ret = -ENOMEM;
+               goto error_data1;
+       }
+
+       ret = hipz_h_error_data(shca->ipz_hca_handle,
+                               resource,
+                               rblock,
+                               &block_count);
+
+       if (ret == H_R_STATE) {
+               ehca_err(&shca->ib_device,
+                        "No error data is available: %lx.", resource);
+       }
+       else if (ret == H_SUCCESS) {
+               int length;
+
+               length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
+
+               if (length > PAGE_SIZE)
+                       length = PAGE_SIZE;
+
+               print_error_data(shca, data, rblock, length);
+       }
+       else {
+               ehca_err(&shca->ib_device,
+                        "Error data could not be fetched: %lx", resource);
+       }
+
+       kfree(rblock);
+
+error_data1:
+       return ret;
+
+}
+
+static void qp_event_callback(struct ehca_shca *shca,
+                             u64 eqe,
+                             enum ib_event_type event_type)
+{
+       struct ib_event event;
+       struct ehca_qp *qp;
+       unsigned long flags;
+       u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
+
+       spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+       qp = idr_find(&ehca_qp_idr, token);
+       spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+
+       if (!qp)
+               return;
+
+       ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
+
+       if (!qp->ib_qp.event_handler)
+               return;
+
+       event.device     = &shca->ib_device;
+       event.event      = event_type;
+       event.element.qp = &qp->ib_qp;
+
+       qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+
+       return;
+}
+
+static void cq_event_callback(struct ehca_shca *shca,
+                                         u64 eqe)
+{
+       struct ehca_cq *cq;
+       unsigned long flags;
+       u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
+
+       spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+       cq = idr_find(&ehca_cq_idr, token);
+       spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+       if (!cq)
+               return;
+
+       ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
+
+       return;
+}
+
+static void parse_identifier(struct ehca_shca *shca, u64 eqe)
+{
+       u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
+
+       switch (identifier) {
+       case 0x02: /* path migrated */
+               qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
+               break;
+       case 0x03: /* communication established */
+               qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
+               break;
+       case 0x04: /* send queue drained */
+               qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
+               break;
+       case 0x05: /* QP error */
+       case 0x06: /* QP error */
+               qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
+               break;
+       case 0x07: /* CQ error */
+       case 0x08: /* CQ error */
+               cq_event_callback(shca, eqe);
+               break;
+       case 0x09: /* MRMWPTE error */
+               ehca_err(&shca->ib_device, "MRMWPTE error.");
+               break;
+       case 0x0A: /* port event */
+               ehca_err(&shca->ib_device, "Port event.");
+               break;
+       case 0x0B: /* MR access error */
+               ehca_err(&shca->ib_device, "MR access error.");
+               break;
+       case 0x0C: /* EQ error */
+               ehca_err(&shca->ib_device, "EQ error.");
+               break;
+       case 0x0D: /* P/Q_Key mismatch */
+               ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
+               break;
+       case 0x10: /* sampling complete */
+               ehca_err(&shca->ib_device, "Sampling complete.");
+               break;
+       case 0x11: /* unaffiliated access error */
+               ehca_err(&shca->ib_device, "Unaffiliated access error.");
+               break;
+       case 0x12: /* path migrating error */
+               ehca_err(&shca->ib_device, "Path migration error.");
+               break;
+       case 0x13: /* interface trace stopped */
+               ehca_err(&shca->ib_device, "Interface trace stopped.");
+               break;
+       case 0x14: /* first error capture info available */
+       default:
+               ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
+                        identifier, shca->ib_device.name);
+               break;
+       }
+
+       return;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
+       struct ib_event event;
+       u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+       u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+
+       switch (ec) {
+       case 0x30: /* port availability change */
+               if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+                       ehca_info(&shca->ib_device,
+                                 "port %x is active.", port);
+                       event.device = &shca->ib_device;
+                       event.event = IB_EVENT_PORT_ACTIVE;
+                       event.element.port_num = port;
+                       shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+                       ib_dispatch_event(&event);
+               } else {
+                       ehca_info(&shca->ib_device,
+                                 "port %x is inactive.", port);
+                       event.device = &shca->ib_device;
+                       event.event = IB_EVENT_PORT_ERR;
+                       event.element.port_num = port;
+                       shca->sport[port - 1].port_state = IB_PORT_DOWN;
+                       ib_dispatch_event(&event);
+               }
+               break;
+       case 0x31:
+               /* port configuration change
+                * disruptive change is caused by
+                * LID, PKEY or SM change
+                */
+               ehca_warn(&shca->ib_device,
+                         "disruptive port %x configuration change", port);
+
+               ehca_info(&shca->ib_device,
+                        "port %x is inactive.", port);
+               event.device = &shca->ib_device;
+               event.event = IB_EVENT_PORT_ERR;
+               event.element.port_num = port;
+               shca->sport[port - 1].port_state = IB_PORT_DOWN;
+               ib_dispatch_event(&event);
+
+               ehca_info(&shca->ib_device,
+                        "port %x is active.", port);
+               event.device = &shca->ib_device;
+               event.event = IB_EVENT_PORT_ACTIVE;
+               event.element.port_num = port;
+               shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+               ib_dispatch_event(&event);
+               break;
+       case 0x32: /* adapter malfunction */
+               ehca_err(&shca->ib_device, "Adapter malfunction.");
+               break;
+       case 0x33:  /* trace stopped */
+               ehca_err(&shca->ib_device, "Traced stopped.");
+               break;
+       default:
+               ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
+                        ec, shca->ib_device.name);
+               break;
+       }
+
+       return;
+}
+
+static inline void reset_eq_pending(struct ehca_cq *cq)
+{
+       u64 CQx_EP;
+       struct h_galpa gal = cq->galpas.kernel;
+
+       hipz_galpa_store_cq(gal, cqx_ep, 0x0);
+       CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
+
+       return;
+}
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs)
+{
+       struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+       tasklet_hi_schedule(&shca->neq.interrupt_task);
+
+       return IRQ_HANDLED;
+}
+
+void ehca_tasklet_neq(unsigned long data)
+{
+       struct ehca_shca *shca = (struct ehca_shca*)data;
+       struct ehca_eqe *eqe;
+       u64 ret;
+
+       eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+
+       while (eqe) {
+               if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
+                       parse_ec(shca, eqe->entry);
+
+               eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+       }
+
+       ret = hipz_h_reset_event(shca->ipz_hca_handle,
+                                shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
+
+       if (ret != H_SUCCESS)
+               ehca_err(&shca->ib_device, "Can't clear notification events.");
+
+       return;
+}
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs)
+{
+       struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+       tasklet_hi_schedule(&shca->eq.interrupt_task);
+
+       return IRQ_HANDLED;
+}
+
+void ehca_tasklet_eq(unsigned long data)
+{
+       struct ehca_shca *shca = (struct ehca_shca*)data;
+       struct ehca_eqe *eqe;
+       int int_state;
+       int query_cnt = 0;
+
+       do {
+               eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+               if ((shca->hw_level >= 2) && eqe)
+                       int_state = 1;
+               else
+                       int_state = 0;
+
+               while ((int_state == 1) || eqe) {
+                       while (eqe) {
+                               u64 eqe_value = eqe->entry;
+
+                               ehca_dbg(&shca->ib_device,
+                                        "eqe_value=%lx", eqe_value);
+
+                               /* TODO: better structure */
+                               if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
+                                                  eqe_value)) {
+                                       unsigned long flags;
+                                       u32 token;
+                                       struct ehca_cq *cq;
+
+                                       ehca_dbg(&shca->ib_device,
+                                                "... completion event");
+                                       token =
+                                               EHCA_BMASK_GET(EQE_CQ_TOKEN,
+                                                              eqe_value);
+                                       spin_lock_irqsave(&ehca_cq_idr_lock,
+                                                         flags);
+                                       cq = idr_find(&ehca_cq_idr, token);
+
+                                       if (cq == NULL) {
+                                               spin_unlock(&ehca_cq_idr_lock);
+                                               break;
+                                       }
+
+                                       reset_eq_pending(cq);
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+                                       queue_comp_task(cq);
+                                       spin_unlock_irqrestore(&ehca_cq_idr_lock,
+                                                              flags);
+#else
+                                       spin_unlock_irqrestore(&ehca_cq_idr_lock,
+                                                              flags);
+                                       comp_event_callback(cq);
+#endif
+                               } else {
+                                       ehca_dbg(&shca->ib_device,
+                                                "... non completion event");
+                                       parse_identifier(shca, eqe_value);
+                               }
+                               eqe =
+                                       (struct ehca_eqe *)ehca_poll_eq(shca,
+                                                                   &shca->eq);
+                       }
+
+                       if (shca->hw_level >= 2) {
+                               int_state =
+                                   hipz_h_query_int_state(shca->ipz_hca_handle,
+                                                          shca->eq.ist);
+                               query_cnt++;
+                               iosync();
+                               if (query_cnt >= 100) {
+                                       query_cnt = 0;
+                                       int_state = 0;
+                               }
+                       }
+                       eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+               }
+       } while (int_state != 0);
+
+       return;
+}
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
+{
+       unsigned long flags_last_cpu;
+
+       if (ehca_debug_level)
+               ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
+
+       spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
+       pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
+       if (pool->last_cpu == NR_CPUS)
+               pool->last_cpu = first_cpu(cpu_online_map);
+       spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+
+       return pool->last_cpu;
+}
+
+static void __queue_comp_task(struct ehca_cq *__cq,
+                             struct ehca_cpu_comp_task *cct)
+{
+       unsigned long flags_cct;
+       unsigned long flags_cq;
+
+       spin_lock_irqsave(&cct->task_lock, flags_cct);
+       spin_lock_irqsave(&__cq->task_lock, flags_cq);
+
+       if (__cq->nr_callbacks == 0) {
+               __cq->nr_callbacks++;
+               list_add_tail(&__cq->entry, &cct->cq_list);
+               cct->cq_jobs++;
+               wake_up(&cct->wait_queue);
+       }
+       else
+               __cq->nr_callbacks++;
+
+       spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
+       spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+}
+
+static void queue_comp_task(struct ehca_cq *__cq)
+{
+       int cpu;
+       int cpu_id;
+       struct ehca_cpu_comp_task *cct;
+
+       cpu = get_cpu();
+       cpu_id = find_next_online_cpu(pool);
+
+       BUG_ON(!cpu_online(cpu_id));
+
+       cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+
+       if (cct->cq_jobs > 0) {
+               cpu_id = find_next_online_cpu(pool);
+               cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+       }
+
+       __queue_comp_task(__cq, cct);
+
+       put_cpu();
+
+       return;
+}
+
+static void run_comp_task(struct ehca_cpu_comp_task* cct)
+{
+       struct ehca_cq *cq;
+       unsigned long flags_cct;
+       unsigned long flags_cq;
+
+       spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+       while (!list_empty(&cct->cq_list)) {
+               cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+               spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+               comp_event_callback(cq);
+               spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+               spin_lock_irqsave(&cq->task_lock, flags_cq);
+               cq->nr_callbacks--;
+               if (cq->nr_callbacks == 0) {
+                       list_del_init(cct->cq_list.next);
+                       cct->cq_jobs--;
+               }
+               spin_unlock_irqrestore(&cq->task_lock, flags_cq);
+
+       }
+
+       spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+       return;
+}
+
+static int comp_task(void *__cct)
+{
+       struct ehca_cpu_comp_task* cct = __cct;
+       DECLARE_WAITQUEUE(wait, current);
+
+       set_current_state(TASK_INTERRUPTIBLE);
+       while(!kthread_should_stop()) {
+               add_wait_queue(&cct->wait_queue, &wait);
+
+               if (list_empty(&cct->cq_list))
+                       schedule();
+               else
+                       __set_current_state(TASK_RUNNING);
+
+               remove_wait_queue(&cct->wait_queue, &wait);
+
+               if (!list_empty(&cct->cq_list))
+                       run_comp_task(__cct);
+
+               set_current_state(TASK_INTERRUPTIBLE);
+       }
+       __set_current_state(TASK_RUNNING);
+
+       return 0;
+}
+
+static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
+                                           int cpu)
+{
+       struct ehca_cpu_comp_task *cct;
+
+       cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+       spin_lock_init(&cct->task_lock);
+       INIT_LIST_HEAD(&cct->cq_list);
+       init_waitqueue_head(&cct->wait_queue);
+       cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
+
+       return cct->task;
+}
+
+static void destroy_comp_task(struct ehca_comp_pool *pool,
+                             int cpu)
+{
+       struct ehca_cpu_comp_task *cct;
+       struct task_struct *task;
+       unsigned long flags_cct;
+
+       cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+
+       spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+       task = cct->task;
+       cct->task = NULL;
+       cct->cq_jobs = 0;
+
+       spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+       if (task)
+               kthread_stop(task);
+
+       return;
+}
+
+static void take_over_work(struct ehca_comp_pool *pool,
+                          int cpu)
+{
+       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+       LIST_HEAD(list);
+       struct ehca_cq *cq;
+       unsigned long flags_cct;
+
+       spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+       list_splice_init(&cct->cq_list, &list);
+
+       while(!list_empty(&list)) {
+              cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+
+              list_del(&cq->entry);
+              __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
+                                                smp_processor_id()));
+       }
+
+       spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+}
+
+static int comp_pool_callback(struct notifier_block *nfb,
+                             unsigned long action,
+                             void *hcpu)
+{
+       unsigned int cpu = (unsigned long)hcpu;
+       struct ehca_cpu_comp_task *cct;
+
+       switch (action) {
+       case CPU_UP_PREPARE:
+               ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
+               if(!create_comp_task(pool, cpu)) {
+                       ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
+                       return NOTIFY_BAD;
+               }
+               break;
+       case CPU_UP_CANCELED:
+               ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
+               cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+               kthread_bind(cct->task, any_online_cpu(cpu_online_map));
+               destroy_comp_task(pool, cpu);
+               break;
+       case CPU_ONLINE:
+               ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
+               cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+               kthread_bind(cct->task, cpu);
+               wake_up_process(cct->task);
+               break;
+       case CPU_DOWN_PREPARE:
+               ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
+               break;
+       case CPU_DOWN_FAILED:
+               ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
+               break;
+       case CPU_DEAD:
+               ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
+               destroy_comp_task(pool, cpu);
+               take_over_work(pool, cpu);
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+#endif
+
+int ehca_create_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+       int cpu;
+       struct task_struct *task;
+
+       pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
+       if (pool == NULL)
+               return -ENOMEM;
+
+       spin_lock_init(&pool->last_cpu_lock);
+       pool->last_cpu = any_online_cpu(cpu_online_map);
+
+       pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
+       if (pool->cpu_comp_tasks == NULL) {
+               kfree(pool);
+               return -EINVAL;
+       }
+
+       for_each_online_cpu(cpu) {
+               task = create_comp_task(pool, cpu);
+               if (task) {
+                       kthread_bind(task, cpu);
+                       wake_up_process(task);
+               }
+       }
+
+       comp_pool_callback_nb.notifier_call = comp_pool_callback;
+       comp_pool_callback_nb.priority =0;
+       register_cpu_notifier(&comp_pool_callback_nb);
+#endif
+
+       return 0;
+}
+
+void ehca_destroy_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+       int i;
+
+       unregister_cpu_notifier(&comp_pool_callback_nb);
+
+       for (i = 0; i < NR_CPUS; i++) {
+               if (cpu_online(i))
+                       destroy_comp_task(pool, i);
+       }
+#endif
+
+       return;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
new file mode 100644 (file)
index 0000000..85bf1fe
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions and structs for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IRQ_H
+#define __EHCA_IRQ_H
+
+
+struct ehca_shca;
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <asm/atomic.h>
+
+int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_neq(unsigned long data);
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_eq(unsigned long data);
+
+struct ehca_cpu_comp_task {
+       wait_queue_head_t wait_queue;
+       struct list_head cq_list;
+       struct task_struct *task;
+       spinlock_t task_lock;
+       int cq_jobs;
+};
+
+struct ehca_comp_pool {
+       struct ehca_cpu_comp_task *cpu_comp_tasks;
+       int last_cpu;
+       spinlock_t last_cpu_lock;
+};
+
+int ehca_create_comp_pool(void);
+void ehca_destroy_comp_pool(void);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
new file mode 100644 (file)
index 0000000..319c39d
--- /dev/null
@@ -0,0 +1,182 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions for internal functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Dietmar Decker <ddecker@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IVERBS_H__
+#define __EHCA_IVERBS_H__
+
+#include "ehca_classes.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
+
+int ehca_query_port(struct ib_device *ibdev, u8 port,
+                   struct ib_port_attr *props);
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
+                  union ib_gid *gid);
+
+int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
+                    struct ib_port_modify *props);
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+                           struct ib_ucontext *context,
+                           struct ib_udata *udata);
+
+int ehca_dealloc_pd(struct ib_pd *pd);
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_destroy_ah(struct ib_ah *ah);
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+                              struct ib_phys_buf *phys_buf_array,
+                              int num_phys_buf,
+                              int mr_access_flags, u64 *iova_start);
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
+                              struct ib_umem *region,
+                              int mr_access_flags, struct ib_udata *udata);
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+                      int mr_rereg_mask,
+                      struct ib_pd *pd,
+                      struct ib_phys_buf *phys_buf_array,
+                      int num_phys_buf, int mr_access_flags, u64 *iova_start);
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
+
+int ehca_dereg_mr(struct ib_mr *mr);
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
+
+int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+                struct ib_mw_bind *mw_bind);
+
+int ehca_dealloc_mw(struct ib_mw *mw);
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+                             int mr_access_flags,
+                             struct ib_fmr_attr *fmr_attr);
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+                     u64 *page_list, int list_len, u64 iova);
+
+int ehca_unmap_fmr(struct list_head *fmr_list);
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr);
+
+enum ehca_eq_type {
+       EHCA_EQ = 0, /* Event Queue              */
+       EHCA_NEQ     /* Notification Event Queue */
+};
+
+int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
+                  enum ehca_eq_type type, const u32 length);
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+                            struct ib_ucontext *context,
+                            struct ib_udata *udata);
+
+int ehca_destroy_cq(struct ib_cq *cq);
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+
+int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+                            struct ib_qp_init_attr *init_attr,
+                            struct ib_udata *udata);
+
+int ehca_destroy_qp(struct ib_qp *qp);
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+                  struct ib_udata *udata);
+
+int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+                 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
+                  struct ib_send_wr **bad_send_wr);
+
+int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+                  struct ib_recv_wr **bad_recv_wr);
+
+u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
+                   struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+                                       struct ib_udata *udata);
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context);
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+void ehca_poll_eqs(unsigned long data);
+
+int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
+                    struct vm_area_struct **vma);
+
+int ehca_mmap_register(u64 physical,void **mapped,
+                      struct vm_area_struct **vma);
+
+int ehca_munmap(unsigned long addr, size_t len);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
new file mode 100644 (file)
index 0000000..2380994
--- /dev/null
@@ -0,0 +1,818 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  module start stop, hca detection
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
+MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
+MODULE_VERSION("SVNEHCA_0016");
+
+int ehca_open_aqp1     = 0;
+int ehca_debug_level   = 0;
+int ehca_hw_level      = 0;
+int ehca_nr_ports      = 2;
+int ehca_use_hp_mr     = 0;
+int ehca_port_act_time = 30;
+int ehca_poll_all_eqs  = 1;
+int ehca_static_rate   = -1;
+
+module_param_named(open_aqp1,     ehca_open_aqp1,     int, 0);
+module_param_named(debug_level,   ehca_debug_level,   int, 0);
+module_param_named(hw_level,      ehca_hw_level,      int, 0);
+module_param_named(nr_ports,      ehca_nr_ports,      int, 0);
+module_param_named(use_hp_mr,     ehca_use_hp_mr,     int, 0);
+module_param_named(port_act_time, ehca_port_act_time, int, 0);
+module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, 0);
+module_param_named(static_rate,   ehca_static_rate,   int, 0);
+
+MODULE_PARM_DESC(open_aqp1,
+                "AQP1 on startup (0: no (default), 1: yes)");
+MODULE_PARM_DESC(debug_level,
+                "debug level"
+                " (0: no debug traces (default), 1: with debug traces)");
+MODULE_PARM_DESC(hw_level,
+                "hardware level"
+                " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+MODULE_PARM_DESC(nr_ports,
+                "number of connected ports (default: 2)");
+MODULE_PARM_DESC(use_hp_mr,
+                "high performance MRs (0: no (default), 1: yes)");
+MODULE_PARM_DESC(port_act_time,
+                "time to wait for port activation (default: 30 sec)");
+MODULE_PARM_DESC(poll_all_eqs,
+                "polls all event queues periodically"
+                " (0: no, 1: yes (default))");
+MODULE_PARM_DESC(static_rate,
+                "set permanent static rate (default: disabled)");
+
+spinlock_t ehca_qp_idr_lock;
+spinlock_t ehca_cq_idr_lock;
+DEFINE_IDR(ehca_qp_idr);
+DEFINE_IDR(ehca_cq_idr);
+
+static struct list_head shca_list; /* list of all registered ehcas */
+static spinlock_t shca_list_lock;
+
+static struct timer_list poll_eqs_timer;
+
+static int ehca_create_slab_caches(void)
+{
+       int ret;
+
+       ret = ehca_init_pd_cache();
+       if (ret) {
+               ehca_gen_err("Cannot create PD SLAB cache.");
+               return ret;
+       }
+
+       ret = ehca_init_cq_cache();
+       if (ret) {
+               ehca_gen_err("Cannot create CQ SLAB cache.");
+               goto create_slab_caches2;
+       }
+
+       ret = ehca_init_qp_cache();
+       if (ret) {
+               ehca_gen_err("Cannot create QP SLAB cache.");
+               goto create_slab_caches3;
+       }
+
+       ret = ehca_init_av_cache();
+       if (ret) {
+               ehca_gen_err("Cannot create AV SLAB cache.");
+               goto create_slab_caches4;
+       }
+
+       ret = ehca_init_mrmw_cache();
+       if (ret) {
+               ehca_gen_err("Cannot create MR&MW SLAB cache.");
+               goto create_slab_caches5;
+       }
+
+       return 0;
+
+create_slab_caches5:
+       ehca_cleanup_av_cache();
+
+create_slab_caches4:
+       ehca_cleanup_qp_cache();
+
+create_slab_caches3:
+       ehca_cleanup_cq_cache();
+
+create_slab_caches2:
+       ehca_cleanup_pd_cache();
+
+       return ret;
+}
+
+static void ehca_destroy_slab_caches(void)
+{
+       ehca_cleanup_mrmw_cache();
+       ehca_cleanup_av_cache();
+       ehca_cleanup_qp_cache();
+       ehca_cleanup_cq_cache();
+       ehca_cleanup_pd_cache();
+}
+
+#define EHCA_HCAAVER  EHCA_BMASK_IBM(32,39)
+#define EHCA_REVID    EHCA_BMASK_IBM(40,63)
+
+int ehca_sense_attributes(struct ehca_shca *shca)
+{
+       int ret = 0;
+       u64 h_ret;
+       struct hipz_query_hca *rblock;
+
+       rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (!rblock) {
+               ehca_gen_err("Cannot allocate rblock memory.");
+               return -ENOMEM;
+       }
+
+       h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
+       if (h_ret != H_SUCCESS) {
+               ehca_gen_err("Cannot query device properties. h_ret=%lx",
+                            h_ret);
+               ret = -EPERM;
+               goto num_ports1;
+       }
+
+       if (ehca_nr_ports == 1)
+               shca->num_ports = 1;
+       else
+               shca->num_ports = (u8)rblock->num_ports;
+
+       ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
+
+       if (ehca_hw_level == 0) {
+               u32 hcaaver;
+               u32 revid;
+
+               hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
+               revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
+
+               ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
+
+               if ((hcaaver == 1) && (revid == 0))
+                       shca->hw_level = 0;
+               else if ((hcaaver == 1) && (revid == 1))
+                       shca->hw_level = 1;
+               else if ((hcaaver == 1) && (revid == 2))
+                       shca->hw_level = 2;
+       }
+       ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
+
+       shca->sport[0].rate = IB_RATE_30_GBPS;
+       shca->sport[1].rate = IB_RATE_30_GBPS;
+
+num_ports1:
+       kfree(rblock);
+       return ret;
+}
+
+static int init_node_guid(struct ehca_shca *shca)
+{
+       int ret = 0;
+       struct hipz_query_hca *rblock;
+
+       rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (!rblock) {
+               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+               return -ENOMEM;
+       }
+
+       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "Can't query device properties");
+               ret = -EINVAL;
+               goto init_node_guid1;
+       }
+
+       memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
+
+init_node_guid1:
+       kfree(rblock);
+       return ret;
+}
+
+int ehca_register_device(struct ehca_shca *shca)
+{
+       int ret;
+
+       ret = init_node_guid(shca);
+       if (ret)
+               return ret;
+
+       strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
+       shca->ib_device.owner               = THIS_MODULE;
+
+       shca->ib_device.uverbs_abi_ver      = 5;
+       shca->ib_device.uverbs_cmd_mask     =
+               (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
+               (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
+               (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
+               (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
+               (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
+               (1ull << IB_USER_VERBS_CMD_REG_MR)              |
+               (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
+               (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+               (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
+               (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
+               (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
+               (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
+               (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
+               (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
+               (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
+               (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+
+       shca->ib_device.node_type           = RDMA_NODE_IB_CA;
+       shca->ib_device.phys_port_cnt       = shca->num_ports;
+       shca->ib_device.dma_device          = &shca->ibmebus_dev->ofdev.dev;
+       shca->ib_device.query_device        = ehca_query_device;
+       shca->ib_device.query_port          = ehca_query_port;
+       shca->ib_device.query_gid           = ehca_query_gid;
+       shca->ib_device.query_pkey          = ehca_query_pkey;
+       /* shca->in_device.modify_device    = ehca_modify_device    */
+       shca->ib_device.modify_port         = ehca_modify_port;
+       shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
+       shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
+       shca->ib_device.alloc_pd            = ehca_alloc_pd;
+       shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
+       shca->ib_device.create_ah           = ehca_create_ah;
+       /* shca->ib_device.modify_ah        = ehca_modify_ah;       */
+       shca->ib_device.query_ah            = ehca_query_ah;
+       shca->ib_device.destroy_ah          = ehca_destroy_ah;
+       shca->ib_device.create_qp           = ehca_create_qp;
+       shca->ib_device.modify_qp           = ehca_modify_qp;
+       shca->ib_device.query_qp            = ehca_query_qp;
+       shca->ib_device.destroy_qp          = ehca_destroy_qp;
+       shca->ib_device.post_send           = ehca_post_send;
+       shca->ib_device.post_recv           = ehca_post_recv;
+       shca->ib_device.create_cq           = ehca_create_cq;
+       shca->ib_device.destroy_cq          = ehca_destroy_cq;
+       shca->ib_device.resize_cq           = ehca_resize_cq;
+       shca->ib_device.poll_cq             = ehca_poll_cq;
+       /* shca->ib_device.peek_cq          = ehca_peek_cq;         */
+       shca->ib_device.req_notify_cq       = ehca_req_notify_cq;
+       /* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
+       shca->ib_device.get_dma_mr          = ehca_get_dma_mr;
+       shca->ib_device.reg_phys_mr         = ehca_reg_phys_mr;
+       shca->ib_device.reg_user_mr         = ehca_reg_user_mr;
+       shca->ib_device.query_mr            = ehca_query_mr;
+       shca->ib_device.dereg_mr            = ehca_dereg_mr;
+       shca->ib_device.rereg_phys_mr       = ehca_rereg_phys_mr;
+       shca->ib_device.alloc_mw            = ehca_alloc_mw;
+       shca->ib_device.bind_mw             = ehca_bind_mw;
+       shca->ib_device.dealloc_mw          = ehca_dealloc_mw;
+       shca->ib_device.alloc_fmr           = ehca_alloc_fmr;
+       shca->ib_device.map_phys_fmr        = ehca_map_phys_fmr;
+       shca->ib_device.unmap_fmr           = ehca_unmap_fmr;
+       shca->ib_device.dealloc_fmr         = ehca_dealloc_fmr;
+       shca->ib_device.attach_mcast        = ehca_attach_mcast;
+       shca->ib_device.detach_mcast        = ehca_detach_mcast;
+       /* shca->ib_device.process_mad      = ehca_process_mad;     */
+       shca->ib_device.mmap                = ehca_mmap;
+
+       ret = ib_register_device(&shca->ib_device);
+       if (ret)
+               ehca_err(&shca->ib_device,
+                        "ib_register_device() failed ret=%x", ret);
+
+       return ret;
+}
+
+static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+{
+       struct ehca_sport *sport = &shca->sport[port - 1];
+       struct ib_cq *ibcq;
+       struct ib_qp *ibqp;
+       struct ib_qp_init_attr qp_init_attr;
+       int ret;
+
+       if (sport->ibcq_aqp1) {
+               ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
+               return -EPERM;
+       }
+
+       ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10);
+       if (IS_ERR(ibcq)) {
+               ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
+               return PTR_ERR(ibcq);
+       }
+       sport->ibcq_aqp1 = ibcq;
+
+       if (sport->ibqp_aqp1) {
+               ehca_err(&shca->ib_device, "AQP1 QP is already created.");
+               ret = -EPERM;
+               goto create_aqp1;
+       }
+
+       memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
+       qp_init_attr.send_cq          = ibcq;
+       qp_init_attr.recv_cq          = ibcq;
+       qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
+       qp_init_attr.cap.max_send_wr  = 100;
+       qp_init_attr.cap.max_recv_wr  = 100;
+       qp_init_attr.cap.max_send_sge = 2;
+       qp_init_attr.cap.max_recv_sge = 1;
+       qp_init_attr.qp_type          = IB_QPT_GSI;
+       qp_init_attr.port_num         = port;
+       qp_init_attr.qp_context       = NULL;
+       qp_init_attr.event_handler    = NULL;
+       qp_init_attr.srq              = NULL;
+
+       ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
+       if (IS_ERR(ibqp)) {
+               ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
+               ret = PTR_ERR(ibqp);
+               goto create_aqp1;
+       }
+       sport->ibqp_aqp1 = ibqp;
+
+       return 0;
+
+create_aqp1:
+       ib_destroy_cq(sport->ibcq_aqp1);
+       return ret;
+}
+
+static int ehca_destroy_aqp1(struct ehca_sport *sport)
+{
+       int ret;
+
+       ret = ib_destroy_qp(sport->ibqp_aqp1);
+       if (ret) {
+               ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret);
+               return ret;
+       }
+
+       ret = ib_destroy_cq(sport->ibcq_aqp1);
+       if (ret)
+               ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret);
+
+       return ret;
+}
+
+static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
+{
+       return  snprintf(buf, PAGE_SIZE, "%d\n",
+                        ehca_debug_level);
+}
+
+static ssize_t ehca_store_debug_level(struct device_driver *ddp,
+                                     const char *buf, size_t count)
+{
+       int value = (*buf) - '0';
+       if (value >= 0 && value <= 9)
+               ehca_debug_level = value;
+       return 1;
+}
+
+DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
+           ehca_show_debug_level, ehca_store_debug_level);
+
+void ehca_create_driver_sysfs(struct ibmebus_driver *drv)
+{
+       driver_create_file(&drv->driver, &driver_attr_debug_level);
+}
+
+void ehca_remove_driver_sysfs(struct ibmebus_driver *drv)
+{
+       driver_remove_file(&drv->driver, &driver_attr_debug_level);
+}
+
+#define EHCA_RESOURCE_ATTR(name)                                           \
+static ssize_t  ehca_show_##name(struct device *dev,                       \
+                                struct device_attribute *attr,            \
+                                char *buf)                                \
+{                                                                         \
+       struct ehca_shca *shca;                                            \
+       struct hipz_query_hca *rblock;                                     \
+       int data;                                                          \
+                                                                          \
+       shca = dev->driver_data;                                           \
+                                                                          \
+       rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);                      \
+       if (!rblock) {                                                     \
+               dev_err(dev, "Can't allocate rblock memory.");             \
+               return 0;                                                  \
+       }                                                                  \
+                                                                          \
+       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
+               dev_err(dev, "Can't query device properties");             \
+               kfree(rblock);                                             \
+               return 0;                                                  \
+       }                                                                  \
+                                                                          \
+       data = rblock->name;                                               \
+       kfree(rblock);                                                     \
+                                                                          \
+       if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))     \
+               return snprintf(buf, 256, "1\n");                          \
+       else                                                               \
+               return snprintf(buf, 256, "%d\n", data);                   \
+                                                                          \
+}                                                                         \
+static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
+
+EHCA_RESOURCE_ATTR(num_ports);
+EHCA_RESOURCE_ATTR(hw_ver);
+EHCA_RESOURCE_ATTR(max_eq);
+EHCA_RESOURCE_ATTR(cur_eq);
+EHCA_RESOURCE_ATTR(max_cq);
+EHCA_RESOURCE_ATTR(cur_cq);
+EHCA_RESOURCE_ATTR(max_qp);
+EHCA_RESOURCE_ATTR(cur_qp);
+EHCA_RESOURCE_ATTR(max_mr);
+EHCA_RESOURCE_ATTR(cur_mr);
+EHCA_RESOURCE_ATTR(max_mw);
+EHCA_RESOURCE_ATTR(cur_mw);
+EHCA_RESOURCE_ATTR(max_pd);
+EHCA_RESOURCE_ATTR(max_ah);
+
+static ssize_t ehca_show_adapter_handle(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct ehca_shca *shca = dev->driver_data;
+
+       return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
+
+}
+static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+
+
+void ehca_create_device_sysfs(struct ibmebus_dev *dev)
+{
+       device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+       device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
+       device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+       device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
+       device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+       device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
+       device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+       device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
+       device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+       device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
+       device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+       device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
+       device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+       device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
+       device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
+{
+       device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
+       device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+static int __devinit ehca_probe(struct ibmebus_dev *dev,
+                               const struct of_device_id *id)
+{
+       struct ehca_shca *shca;
+       u64 *handle;
+       struct ib_pd *ibpd;
+       int ret;
+
+       handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
+       if (!handle) {
+               ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
+                            dev->ofdev.node->full_name);
+               return -ENODEV;
+       }
+
+       if (!(*handle)) {
+               ehca_gen_err("Wrong eHCA handle for adapter: %s.",
+                            dev->ofdev.node->full_name);
+               return -ENODEV;
+       }
+
+       shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
+       if (!shca) {
+               ehca_gen_err("Cannot allocate shca memory.");
+               return -ENOMEM;
+       }
+
+       shca->ibmebus_dev = dev;
+       shca->ipz_hca_handle.handle = *handle;
+       dev->ofdev.dev.driver_data = shca;
+
+       ret = ehca_sense_attributes(shca);
+       if (ret < 0) {
+               ehca_gen_err("Cannot sense eHCA attributes.");
+               goto probe1;
+       }
+
+       ret = ehca_register_device(shca);
+       if (ret) {
+               ehca_gen_err("Cannot register Infiniband device");
+               goto probe1;
+       }
+
+       /* create event queues */
+       ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+       if (ret) {
+               ehca_err(&shca->ib_device, "Cannot create EQ.");
+               goto probe2;
+       }
+
+       ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
+       if (ret) {
+               ehca_err(&shca->ib_device, "Cannot create NEQ.");
+               goto probe3;
+       }
+
+       /* create internal protection domain */
+       ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL);
+       if (IS_ERR(ibpd)) {
+               ehca_err(&shca->ib_device, "Cannot create internal PD.");
+               ret = PTR_ERR(ibpd);
+               goto probe4;
+       }
+
+       shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
+       shca->pd->ib_pd.device = &shca->ib_device;
+
+       /* create internal max MR */
+       ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
+
+       if (ret) {
+               ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x",
+                        ret);
+               goto probe5;
+       }
+
+       /* create AQP1 for port 1 */
+       if (ehca_open_aqp1 == 1) {
+               shca->sport[0].port_state = IB_PORT_DOWN;
+               ret = ehca_create_aqp1(shca, 1);
+               if (ret) {
+                       ehca_err(&shca->ib_device,
+                                "Cannot create AQP1 for port 1.");
+                       goto probe6;
+               }
+       }
+
+       /* create AQP1 for port 2 */
+       if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
+               shca->sport[1].port_state = IB_PORT_DOWN;
+               ret = ehca_create_aqp1(shca, 2);
+               if (ret) {
+                       ehca_err(&shca->ib_device,
+                                "Cannot create AQP1 for port 2.");
+                       goto probe7;
+               }
+       }
+
+       ehca_create_device_sysfs(dev);
+
+       spin_lock(&shca_list_lock);
+       list_add(&shca->shca_list, &shca_list);
+       spin_unlock(&shca_list_lock);
+
+       return 0;
+
+probe7:
+       ret = ehca_destroy_aqp1(&shca->sport[0]);
+       if (ret)
+               ehca_err(&shca->ib_device,
+                        "Cannot destroy AQP1 for port 1. ret=%x", ret);
+
+probe6:
+       ret = ehca_dereg_internal_maxmr(shca);
+       if (ret)
+               ehca_err(&shca->ib_device,
+                        "Cannot destroy internal MR. ret=%x", ret);
+
+probe5:
+       ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+       if (ret)
+               ehca_err(&shca->ib_device,
+                        "Cannot destroy internal PD. ret=%x", ret);
+
+probe4:
+       ret = ehca_destroy_eq(shca, &shca->neq);
+       if (ret)
+               ehca_err(&shca->ib_device,
+                        "Cannot destroy NEQ. ret=%x", ret);
+
+probe3:
+       ret = ehca_destroy_eq(shca, &shca->eq);
+       if (ret)
+               ehca_err(&shca->ib_device,
+                        "Cannot destroy EQ. ret=%x", ret);
+
+probe2:
+       ib_unregister_device(&shca->ib_device);
+
+probe1:
+       ib_dealloc_device(&shca->ib_device);
+
+       return -EINVAL;
+}
+
+static int __devexit ehca_remove(struct ibmebus_dev *dev)
+{
+       struct ehca_shca *shca = dev->ofdev.dev.driver_data;
+       int ret;
+
+       ehca_remove_device_sysfs(dev);
+
+       if (ehca_open_aqp1 == 1) {
+               int i;
+               for (i = 0; i < shca->num_ports; i++) {
+                       ret = ehca_destroy_aqp1(&shca->sport[i]);
+                       if (ret)
+                               ehca_err(&shca->ib_device,
+                                        "Cannot destroy AQP1 for port %x "
+                                        "ret=%x", ret, i);
+               }
+       }
+
+       ib_unregister_device(&shca->ib_device);
+
+       ret = ehca_dereg_internal_maxmr(shca);
+       if (ret)
+               ehca_err(&shca->ib_device,
+                        "Cannot destroy internal MR. ret=%x", ret);
+
+       ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+       if (ret)
+               ehca_err(&shca->ib_device,
+                        "Cannot destroy internal PD. ret=%x", ret);
+
+       ret = ehca_destroy_eq(shca, &shca->eq);
+       if (ret)
+               ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret);
+
+       ret = ehca_destroy_eq(shca, &shca->neq);
+       if (ret)
+               ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret);
+
+       ib_dealloc_device(&shca->ib_device);
+
+       spin_lock(&shca_list_lock);
+       list_del(&shca->shca_list);
+       spin_unlock(&shca_list_lock);
+
+       return ret;
+}
+
+static struct of_device_id ehca_device_table[] =
+{
+       {
+               .name       = "lhca",
+               .compatible = "IBM,lhca",
+       },
+       {},
+};
+
+static struct ibmebus_driver ehca_driver = {
+       .name     = "ehca",
+       .id_table = ehca_device_table,
+       .probe    = ehca_probe,
+       .remove   = ehca_remove,
+};
+
+void ehca_poll_eqs(unsigned long data)
+{
+       struct ehca_shca *shca;
+
+       spin_lock(&shca_list_lock);
+       list_for_each_entry(shca, &shca_list, shca_list) {
+               if (shca->eq.is_initialized)
+                       ehca_tasklet_eq((unsigned long)(void*)shca);
+       }
+       mod_timer(&poll_eqs_timer, jiffies + HZ);
+       spin_unlock(&shca_list_lock);
+}
+
+int __init ehca_module_init(void)
+{
+       int ret;
+
+       printk(KERN_INFO "eHCA Infiniband Device Driver "
+                        "(Rel.: SVNEHCA_0016)\n");
+       idr_init(&ehca_qp_idr);
+       idr_init(&ehca_cq_idr);
+       spin_lock_init(&ehca_qp_idr_lock);
+       spin_lock_init(&ehca_cq_idr_lock);
+
+       INIT_LIST_HEAD(&shca_list);
+       spin_lock_init(&shca_list_lock);
+
+       if ((ret = ehca_create_comp_pool())) {
+               ehca_gen_err("Cannot create comp pool.");
+               return ret;
+       }
+
+       if ((ret = ehca_create_slab_caches())) {
+               ehca_gen_err("Cannot create SLAB caches");
+               ret = -ENOMEM;
+               goto module_init1;
+       }
+
+       if ((ret = ibmebus_register_driver(&ehca_driver))) {
+               ehca_gen_err("Cannot register eHCA device driver");
+               ret = -EINVAL;
+               goto module_init2;
+       }
+
+       ehca_create_driver_sysfs(&ehca_driver);
+
+       if (ehca_poll_all_eqs != 1) {
+               ehca_gen_err("WARNING!!!");
+               ehca_gen_err("It is possible to lose interrupts.");
+       } else {
+               init_timer(&poll_eqs_timer);
+               poll_eqs_timer.function = ehca_poll_eqs;
+               poll_eqs_timer.expires = jiffies + HZ;
+               add_timer(&poll_eqs_timer);
+       }
+
+       return 0;
+
+module_init2:
+       ehca_destroy_slab_caches();
+
+module_init1:
+       ehca_destroy_comp_pool();
+       return ret;
+};
+
+void __exit ehca_module_exit(void)
+{
+       if (ehca_poll_all_eqs == 1)
+               del_timer_sync(&poll_eqs_timer);
+
+       ehca_remove_driver_sysfs(&ehca_driver);
+       ibmebus_unregister_driver(&ehca_driver);
+
+       ehca_destroy_slab_caches();
+
+       ehca_destroy_comp_pool();
+
+       idr_destroy(&ehca_cq_idr);
+       idr_destroy(&ehca_qp_idr);
+};
+
+module_init(ehca_module_init);
+module_exit(ehca_module_exit);
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c
new file mode 100644 (file)
index 0000000..32a8706
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  mcast  functions
+ *
+ *  Authors: Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+#define MAX_MC_LID 0xFFFE
+#define MIN_MC_LID 0xC000      /* Multicast limits */
+#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
+#define EHCA_VALID_MULTICAST_LID(lid) \
+       (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
+
+int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+                                             ib_device);
+       union ib_gid my_gid;
+       u64 subnet_prefix, interface_id, h_ret;
+
+       if (ibqp->qp_type != IB_QPT_UD) {
+               ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
+               return -EINVAL;
+       }
+
+       if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+               ehca_err(ibqp->device, "invalid mulitcast gid");
+               return -EINVAL;
+       } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+               ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+               return -EINVAL;
+       }
+
+       memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+       subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+       interface_id = be64_to_cpu(my_gid.global.interface_id);
+       h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
+                                  my_qp->ipz_qp_handle,
+                                  my_qp->galpas.kernel,
+                                  lid, subnet_prefix, interface_id);
+       if (h_ret != H_SUCCESS)
+               ehca_err(ibqp->device,
+                        "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
+                        "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+       return ehca2ib_return_code(h_ret);
+}
+
+int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+       struct ehca_shca *shca = container_of(ibqp->pd->device,
+                                             struct ehca_shca, ib_device);
+       union ib_gid my_gid;
+       u64 subnet_prefix, interface_id, h_ret;
+
+       if (ibqp->qp_type != IB_QPT_UD) {
+               ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
+               return -EINVAL;
+       }
+
+       if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+               ehca_err(ibqp->device, "invalid mulitcast gid");
+               return -EINVAL;
+       } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+               ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+               return -EINVAL;
+       }
+
+       memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+       subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+       interface_id = be64_to_cpu(my_gid.global.interface_id);
+       h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
+                                  my_qp->ipz_qp_handle,
+                                  my_qp->galpas.kernel,
+                                  lid, subnet_prefix, interface_id);
+       if (h_ret != H_SUCCESS)
+               ehca_err(ibqp->device,
+                        "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
+                        "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+       return ehca2ib_return_code(h_ret);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
new file mode 100644 (file)
index 0000000..5ca6544
--- /dev/null
@@ -0,0 +1,2261 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  MR/MW functions
+ *
+ *  Authors: Dietmar Decker <ddecker@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "hcp_if.h"
+#include "hipz_hw.h"
+
+static struct kmem_cache *mr_cache;
+static struct kmem_cache *mw_cache;
+
+static struct ehca_mr *ehca_mr_new(void)
+{
+       struct ehca_mr *me;
+
+       me = kmem_cache_alloc(mr_cache, SLAB_KERNEL);
+       if (me) {
+               memset(me, 0, sizeof(struct ehca_mr));
+               spin_lock_init(&me->mrlock);
+       } else
+               ehca_gen_err("alloc failed");
+
+       return me;
+}
+
+static void ehca_mr_delete(struct ehca_mr *me)
+{
+       kmem_cache_free(mr_cache, me);
+}
+
+static struct ehca_mw *ehca_mw_new(void)
+{
+       struct ehca_mw *me;
+
+       me = kmem_cache_alloc(mw_cache, SLAB_KERNEL);
+       if (me) {
+               memset(me, 0, sizeof(struct ehca_mw));
+               spin_lock_init(&me->mwlock);
+       } else
+               ehca_gen_err("alloc failed");
+
+       return me;
+}
+
+static void ehca_mw_delete(struct ehca_mw *me)
+{
+       kmem_cache_free(mw_cache, me);
+}
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
+{
+       struct ib_mr *ib_mr;
+       int ret;
+       struct ehca_mr *e_maxmr;
+       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+       struct ehca_shca *shca =
+               container_of(pd->device, struct ehca_shca, ib_device);
+
+       if (shca->maxmr) {
+               e_maxmr = ehca_mr_new();
+               if (!e_maxmr) {
+                       ehca_err(&shca->ib_device, "out of memory");
+                       ib_mr = ERR_PTR(-ENOMEM);
+                       goto get_dma_mr_exit0;
+               }
+
+               ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE,
+                                    mr_access_flags, e_pd,
+                                    &e_maxmr->ib.ib_mr.lkey,
+                                    &e_maxmr->ib.ib_mr.rkey);
+               if (ret) {
+                       ib_mr = ERR_PTR(ret);
+                       goto get_dma_mr_exit0;
+               }
+               ib_mr = &e_maxmr->ib.ib_mr;
+       } else {
+               ehca_err(&shca->ib_device, "no internal max-MR exist!");
+               ib_mr = ERR_PTR(-EINVAL);
+               goto get_dma_mr_exit0;
+       }
+
+get_dma_mr_exit0:
+       if (IS_ERR(ib_mr))
+               ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ",
+                        PTR_ERR(ib_mr), pd, mr_access_flags);
+       return ib_mr;
+} /* end ehca_get_dma_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+                              struct ib_phys_buf *phys_buf_array,
+                              int num_phys_buf,
+                              int mr_access_flags,
+                              u64 *iova_start)
+{
+       struct ib_mr *ib_mr;
+       int ret;
+       struct ehca_mr *e_mr;
+       struct ehca_shca *shca =
+               container_of(pd->device, struct ehca_shca, ib_device);
+       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+       u64 size;
+       struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+       u32 num_pages_mr;
+       u32 num_pages_4k; /* 4k portion "pages" */
+
+       if ((num_phys_buf <= 0) || !phys_buf_array) {
+               ehca_err(pd->device, "bad input values: num_phys_buf=%x "
+                        "phys_buf_array=%p", num_phys_buf, phys_buf_array);
+               ib_mr = ERR_PTR(-EINVAL);
+               goto reg_phys_mr_exit0;
+       }
+       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+               /*
+                * Remote Write Access requires Local Write Access
+                * Remote Atomic Access requires Local Write Access
+                */
+               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+                        mr_access_flags);
+               ib_mr = ERR_PTR(-EINVAL);
+               goto reg_phys_mr_exit0;
+       }
+
+       /* check physical buffer list and calculate size */
+       ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
+                                           iova_start, &size);
+       if (ret) {
+               ib_mr = ERR_PTR(ret);
+               goto reg_phys_mr_exit0;
+       }
+       if ((size == 0) ||
+           (((u64)iova_start + size) < (u64)iova_start)) {
+               ehca_err(pd->device, "bad input values: size=%lx iova_start=%p",
+                        size, iova_start);
+               ib_mr = ERR_PTR(-EINVAL);
+               goto reg_phys_mr_exit0;
+       }
+
+       e_mr = ehca_mr_new();
+       if (!e_mr) {
+               ehca_err(pd->device, "out of memory");
+               ib_mr = ERR_PTR(-ENOMEM);
+               goto reg_phys_mr_exit0;
+       }
+
+       /* determine number of MR pages */
+       num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size +
+                        PAGE_SIZE - 1) / PAGE_SIZE);
+       num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size +
+                        EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+       /* register MR on HCA */
+       if (ehca_mr_is_maxmr(size, iova_start)) {
+               e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+               ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
+                                    e_pd, &e_mr->ib.ib_mr.lkey,
+                                    &e_mr->ib.ib_mr.rkey);
+               if (ret) {
+                       ib_mr = ERR_PTR(ret);
+                       goto reg_phys_mr_exit1;
+               }
+       } else {
+               pginfo.type           = EHCA_MR_PGI_PHYS;
+               pginfo.num_pages      = num_pages_mr;
+               pginfo.num_4k         = num_pages_4k;
+               pginfo.num_phys_buf   = num_phys_buf;
+               pginfo.phys_buf_array = phys_buf_array;
+               pginfo.next_4k        = (((u64)iova_start & ~PAGE_MASK) /
+                                        EHCA_PAGESIZE);
+
+               ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
+                                 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
+                                 &e_mr->ib.ib_mr.rkey);
+               if (ret) {
+                       ib_mr = ERR_PTR(ret);
+                       goto reg_phys_mr_exit1;
+               }
+       }
+
+       /* successful registration of all pages */
+       return &e_mr->ib.ib_mr;
+
+reg_phys_mr_exit1:
+       ehca_mr_delete(e_mr);
+reg_phys_mr_exit0:
+       if (IS_ERR(ib_mr))
+               ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p "
+                        "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
+                        PTR_ERR(ib_mr), pd, phys_buf_array,
+                        num_phys_buf, mr_access_flags, iova_start);
+       return ib_mr;
+} /* end ehca_reg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
+                              struct ib_umem *region,
+                              int mr_access_flags,
+                              struct ib_udata *udata)
+{
+       struct ib_mr *ib_mr;
+       struct ehca_mr *e_mr;
+       struct ehca_shca *shca =
+               container_of(pd->device, struct ehca_shca, ib_device);
+       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+       struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+       int ret;
+       u32 num_pages_mr;
+       u32 num_pages_4k; /* 4k portion "pages" */
+
+       if (!pd) {
+               ehca_gen_err("bad pd=%p", pd);
+               return ERR_PTR(-EFAULT);
+       }
+       if (!region) {
+               ehca_err(pd->device, "bad input values: region=%p", region);
+               ib_mr = ERR_PTR(-EINVAL);
+               goto reg_user_mr_exit0;
+       }
+       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+               /*
+                * Remote Write Access requires Local Write Access
+                * Remote Atomic Access requires Local Write Access
+                */
+               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+                        mr_access_flags);
+               ib_mr = ERR_PTR(-EINVAL);
+               goto reg_user_mr_exit0;
+       }
+       if (region->page_size != PAGE_SIZE) {
+               ehca_err(pd->device, "page size not supported, "
+                        "region->page_size=%x", region->page_size);
+               ib_mr = ERR_PTR(-EINVAL);
+               goto reg_user_mr_exit0;
+       }
+
+       if ((region->length == 0) ||
+           ((region->virt_base + region->length) < region->virt_base)) {
+               ehca_err(pd->device, "bad input values: length=%lx "
+                        "virt_base=%lx", region->length, region->virt_base);
+               ib_mr = ERR_PTR(-EINVAL);
+               goto reg_user_mr_exit0;
+       }
+
+       e_mr = ehca_mr_new();
+       if (!e_mr) {
+               ehca_err(pd->device, "out of memory");
+               ib_mr = ERR_PTR(-ENOMEM);
+               goto reg_user_mr_exit0;
+       }
+
+       /* determine number of MR pages */
+       num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
+                        PAGE_SIZE - 1) / PAGE_SIZE);
+       num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
+                        EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+       /* register MR on HCA */
+       pginfo.type       = EHCA_MR_PGI_USER;
+       pginfo.num_pages  = num_pages_mr;
+       pginfo.num_4k     = num_pages_4k;
+       pginfo.region     = region;
+       pginfo.next_4k    = region->offset / EHCA_PAGESIZE;
+       pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
+                                              (&region->chunk_list),
+                                              list);
+
+       ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
+                         region->length, mr_access_flags, e_pd, &pginfo,
+                         &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
+       if (ret) {
+               ib_mr = ERR_PTR(ret);
+               goto reg_user_mr_exit1;
+       }
+
+       /* successful registration of all pages */
+       return &e_mr->ib.ib_mr;
+
+reg_user_mr_exit1:
+       ehca_mr_delete(e_mr);
+reg_user_mr_exit0:
+       if (IS_ERR(ib_mr))
+               ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
+                        " udata=%p",
+                        PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
+       return ib_mr;
+} /* end ehca_reg_user_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+                      int mr_rereg_mask,
+                      struct ib_pd *pd,
+                      struct ib_phys_buf *phys_buf_array,
+                      int num_phys_buf,
+                      int mr_access_flags,
+                      u64 *iova_start)
+{
+       int ret;
+
+       struct ehca_shca *shca =
+               container_of(mr->device, struct ehca_shca, ib_device);
+       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+       struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+       u64 new_size;
+       u64 *new_start;
+       u32 new_acl;
+       struct ehca_pd *new_pd;
+       u32 tmp_lkey, tmp_rkey;
+       unsigned long sl_flags;
+       u32 num_pages_mr = 0;
+       u32 num_pages_4k = 0; /* 4k portion "pages" */
+       struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+       u32 cur_pid = current->tgid;
+
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           (my_pd->ownpid != cur_pid)) {
+               ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               ret = -EINVAL;
+               goto rereg_phys_mr_exit0;
+       }
+
+       if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
+               /* TODO not supported, because PHYP rereg hCall needs pages */
+               ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
+                        "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
+               ret = -EINVAL;
+               goto rereg_phys_mr_exit0;
+       }
+
+       if (mr_rereg_mask & IB_MR_REREG_PD) {
+               if (!pd) {
+                       ehca_err(mr->device, "rereg with bad pd, pd=%p "
+                                "mr_rereg_mask=%x", pd, mr_rereg_mask);
+                       ret = -EINVAL;
+                       goto rereg_phys_mr_exit0;
+               }
+       }
+
+       if ((mr_rereg_mask &
+            ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
+           (mr_rereg_mask == 0)) {
+               ret = -EINVAL;
+               goto rereg_phys_mr_exit0;
+       }
+
+       /* check other parameters */
+       if (e_mr == shca->maxmr) {
+               /* should be impossible, however reject to be sure */
+               ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
+                        "shca->maxmr=%p mr->lkey=%x",
+                        mr, shca->maxmr, mr->lkey);
+               ret = -EINVAL;
+               goto rereg_phys_mr_exit0;
+       }
+       if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
+               if (e_mr->flags & EHCA_MR_FLAG_FMR) {
+                       ehca_err(mr->device, "not supported for FMR, mr=%p "
+                                "flags=%x", mr, e_mr->flags);
+                       ret = -EINVAL;
+                       goto rereg_phys_mr_exit0;
+               }
+               if (!phys_buf_array || num_phys_buf <= 0) {
+                       ehca_err(mr->device, "bad input values: mr_rereg_mask=%x"
+                                " phys_buf_array=%p num_phys_buf=%x",
+                                mr_rereg_mask, phys_buf_array, num_phys_buf);
+                       ret = -EINVAL;
+                       goto rereg_phys_mr_exit0;
+               }
+       }
+       if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&     /* change ACL */
+           (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+            ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
+               /*
+                * Remote Write Access requires Local Write Access
+                * Remote Atomic Access requires Local Write Access
+                */
+               ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
+                        "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
+               ret = -EINVAL;
+               goto rereg_phys_mr_exit0;
+       }
+
+       /* set requested values dependent on rereg request */
+       spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+       new_start = e_mr->start;  /* new == old address */
+       new_size  = e_mr->size;   /* new == old length */
+       new_acl   = e_mr->acl;    /* new == old access control */
+       new_pd    = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/
+
+       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+               new_start = iova_start; /* change address */
+               /* check physical buffer list and calculate size */
+               ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
+                                                   num_phys_buf, iova_start,
+                                                   &new_size);
+               if (ret)
+                       goto rereg_phys_mr_exit1;
+               if ((new_size == 0) ||
+                   (((u64)iova_start + new_size) < (u64)iova_start)) {
+                       ehca_err(mr->device, "bad input values: new_size=%lx "
+                                "iova_start=%p", new_size, iova_start);
+                       ret = -EINVAL;
+                       goto rereg_phys_mr_exit1;
+               }
+               num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size +
+                                PAGE_SIZE - 1) / PAGE_SIZE);
+               num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size +
+                                EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+               pginfo.type           = EHCA_MR_PGI_PHYS;
+               pginfo.num_pages      = num_pages_mr;
+               pginfo.num_4k         = num_pages_4k;
+               pginfo.num_phys_buf   = num_phys_buf;
+               pginfo.phys_buf_array = phys_buf_array;
+               pginfo.next_4k        = (((u64)iova_start & ~PAGE_MASK) /
+                                        EHCA_PAGESIZE);
+       }
+       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+               new_acl = mr_access_flags;
+       if (mr_rereg_mask & IB_MR_REREG_PD)
+               new_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+       ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
+                           new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+       if (ret)
+               goto rereg_phys_mr_exit1;
+
+       /* successful reregistration */
+       if (mr_rereg_mask & IB_MR_REREG_PD)
+               mr->pd = pd;
+       mr->lkey = tmp_lkey;
+       mr->rkey = tmp_rkey;
+
+rereg_phys_mr_exit1:
+       spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+rereg_phys_mr_exit0:
+       if (ret)
+               ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p "
+                        "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
+                        "iova_start=%p",
+                        ret, mr, mr_rereg_mask, pd, phys_buf_array,
+                        num_phys_buf, mr_access_flags, iova_start);
+       return ret;
+} /* end ehca_rereg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
+{
+       int ret = 0;
+       u64 h_ret;
+       struct ehca_shca *shca =
+               container_of(mr->device, struct ehca_shca, ib_device);
+       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+       struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+       u32 cur_pid = current->tgid;
+       unsigned long sl_flags;
+       struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           (my_pd->ownpid != cur_pid)) {
+               ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               ret = -EINVAL;
+               goto query_mr_exit0;
+       }
+
+       if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+               ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+                        "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+               ret = -EINVAL;
+               goto query_mr_exit0;
+       }
+
+       memset(mr_attr, 0, sizeof(struct ib_mr_attr));
+       spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+
+       h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p "
+                        "hca_hndl=%lx mr_hndl=%lx lkey=%x",
+                        h_ret, mr, shca->ipz_hca_handle.handle,
+                        e_mr->ipz_mr_handle.handle, mr->lkey);
+               ret = ehca_mrmw_map_hrc_query_mr(h_ret);
+               goto query_mr_exit1;
+       }
+       mr_attr->pd               = mr->pd;
+       mr_attr->device_virt_addr = hipzout.vaddr;
+       mr_attr->size             = hipzout.len;
+       mr_attr->lkey             = hipzout.lkey;
+       mr_attr->rkey             = hipzout.rkey;
+       ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
+
+query_mr_exit1:
+       spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+query_mr_exit0:
+       if (ret)
+               ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p",
+                        ret, mr, mr_attr);
+       return ret;
+} /* end ehca_query_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_mr(struct ib_mr *mr)
+{
+       int ret = 0;
+       u64 h_ret;
+       struct ehca_shca *shca =
+               container_of(mr->device, struct ehca_shca, ib_device);
+       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+       struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+       u32 cur_pid = current->tgid;
+
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           (my_pd->ownpid != cur_pid)) {
+               ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               ret = -EINVAL;
+               goto dereg_mr_exit0;
+       }
+
+       if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+               ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+                        "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+               ret = -EINVAL;
+               goto dereg_mr_exit0;
+       } else if (e_mr == shca->maxmr) {
+               /* should be impossible, however reject to be sure */
+               ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
+                        "shca->maxmr=%p mr->lkey=%x",
+                        mr, shca->maxmr, mr->lkey);
+               ret = -EINVAL;
+               goto dereg_mr_exit0;
+       }
+
+       /* TODO: BUSY: MR still has bound window(s) */
+       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p "
+                        "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
+                        h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
+                        e_mr->ipz_mr_handle.handle, mr->lkey);
+               ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+               goto dereg_mr_exit0;
+       }
+
+       /* successful deregistration */
+       ehca_mr_delete(e_mr);
+
+dereg_mr_exit0:
+       if (ret)
+               ehca_err(mr->device, "ret=%x mr=%p", ret, mr);
+       return ret;
+} /* end ehca_dereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
+{
+       struct ib_mw *ib_mw;
+       u64 h_ret;
+       struct ehca_mw *e_mw;
+       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+       struct ehca_shca *shca =
+               container_of(pd->device, struct ehca_shca, ib_device);
+       struct ehca_mw_hipzout_parms hipzout = {{0},0};
+
+       e_mw = ehca_mw_new();
+       if (!e_mw) {
+               ib_mw = ERR_PTR(-ENOMEM);
+               goto alloc_mw_exit0;
+       }
+
+       h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
+                                        e_pd->fw_pd, &hipzout);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
+                        "shca=%p hca_hndl=%lx mw=%p",
+                        h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
+               ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret));
+               goto alloc_mw_exit1;
+       }
+       /* successful MW allocation */
+       e_mw->ipz_mw_handle = hipzout.handle;
+       e_mw->ib_mw.rkey    = hipzout.rkey;
+       return &e_mw->ib_mw;
+
+alloc_mw_exit1:
+       ehca_mw_delete(e_mw);
+alloc_mw_exit0:
+       if (IS_ERR(ib_mw))
+               ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd);
+       return ib_mw;
+} /* end ehca_alloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_bind_mw(struct ib_qp *qp,
+                struct ib_mw *mw,
+                struct ib_mw_bind *mw_bind)
+{
+       /* TODO: not supported up to now */
+       ehca_gen_err("bind MW currently not supported by HCAD");
+
+       return -EPERM;
+} /* end ehca_bind_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_mw(struct ib_mw *mw)
+{
+       u64 h_ret;
+       struct ehca_shca *shca =
+               container_of(mw->device, struct ehca_shca, ib_device);
+       struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
+
+       h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p "
+                        "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
+                        h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
+                        e_mw->ipz_mw_handle.handle);
+               return ehca_mrmw_map_hrc_free_mw(h_ret);
+       }
+       /* successful deallocation */
+       ehca_mw_delete(e_mw);
+       return 0;
+} /* end ehca_dealloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+                             int mr_access_flags,
+                             struct ib_fmr_attr *fmr_attr)
+{
+       struct ib_fmr *ib_fmr;
+       struct ehca_shca *shca =
+               container_of(pd->device, struct ehca_shca, ib_device);
+       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+       struct ehca_mr *e_fmr;
+       int ret;
+       u32 tmp_lkey, tmp_rkey;
+       struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+
+       /* check other parameters */
+       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+               /*
+                * Remote Write Access requires Local Write Access
+                * Remote Atomic Access requires Local Write Access
+                */
+               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+                        mr_access_flags);
+               ib_fmr = ERR_PTR(-EINVAL);
+               goto alloc_fmr_exit0;
+       }
+       if (mr_access_flags & IB_ACCESS_MW_BIND) {
+               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+                        mr_access_flags);
+               ib_fmr = ERR_PTR(-EINVAL);
+               goto alloc_fmr_exit0;
+       }
+       if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
+               ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
+                        "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
+                        fmr_attr->max_pages, fmr_attr->max_maps,
+                        fmr_attr->page_shift);
+               ib_fmr = ERR_PTR(-EINVAL);
+               goto alloc_fmr_exit0;
+       }
+       if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) &&
+           ((1 << fmr_attr->page_shift) != PAGE_SIZE)) {
+               ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
+                        fmr_attr->page_shift);
+               ib_fmr = ERR_PTR(-EINVAL);
+               goto alloc_fmr_exit0;
+       }
+
+       e_fmr = ehca_mr_new();
+       if (!e_fmr) {
+               ib_fmr = ERR_PTR(-ENOMEM);
+               goto alloc_fmr_exit0;
+       }
+       e_fmr->flags |= EHCA_MR_FLAG_FMR;
+
+       /* register MR on HCA */
+       ret = ehca_reg_mr(shca, e_fmr, NULL,
+                         fmr_attr->max_pages * (1 << fmr_attr->page_shift),
+                         mr_access_flags, e_pd, &pginfo,
+                         &tmp_lkey, &tmp_rkey);
+       if (ret) {
+               ib_fmr = ERR_PTR(ret);
+               goto alloc_fmr_exit1;
+       }
+
+       /* successful */
+       e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
+       e_fmr->fmr_max_pages = fmr_attr->max_pages;
+       e_fmr->fmr_max_maps = fmr_attr->max_maps;
+       e_fmr->fmr_map_cnt = 0;
+       return &e_fmr->ib.ib_fmr;
+
+alloc_fmr_exit1:
+       ehca_mr_delete(e_fmr);
+alloc_fmr_exit0:
+       if (IS_ERR(ib_fmr))
+               ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x "
+                        "fmr_attr=%p", PTR_ERR(ib_fmr), pd,
+                        mr_access_flags, fmr_attr);
+       return ib_fmr;
+} /* end ehca_alloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+                     u64 *page_list,
+                     int list_len,
+                     u64 iova)
+{
+       int ret;
+       struct ehca_shca *shca =
+               container_of(fmr->device, struct ehca_shca, ib_device);
+       struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+       struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
+       struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+       u32 tmp_lkey, tmp_rkey;
+
+       if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+               ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+                        e_fmr, e_fmr->flags);
+               ret = -EINVAL;
+               goto map_phys_fmr_exit0;
+       }
+       ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
+       if (ret)
+               goto map_phys_fmr_exit0;
+       if (iova % e_fmr->fmr_page_size) {
+               /* only whole-numbered pages */
+               ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x",
+                        iova, e_fmr->fmr_page_size);
+               ret = -EINVAL;
+               goto map_phys_fmr_exit0;
+       }
+       if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
+               /* HCAD does not limit the maps, however trace this anyway */
+               ehca_info(fmr->device, "map limit exceeded, fmr=%p "
+                         "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
+                         fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
+       }
+
+       pginfo.type      = EHCA_MR_PGI_FMR;
+       pginfo.num_pages = list_len;
+       pginfo.num_4k    = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE);
+       pginfo.page_list = page_list;
+       pginfo.next_4k   = ((iova & (e_fmr->fmr_page_size-1)) /
+                           EHCA_PAGESIZE);
+
+       ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova,
+                           list_len * e_fmr->fmr_page_size,
+                           e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+       if (ret)
+               goto map_phys_fmr_exit0;
+
+       /* successful reregistration */
+       e_fmr->fmr_map_cnt++;
+       e_fmr->ib.ib_fmr.lkey = tmp_lkey;
+       e_fmr->ib.ib_fmr.rkey = tmp_rkey;
+       return 0;
+
+map_phys_fmr_exit0:
+       if (ret)
+               ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
+                        "iova=%lx",
+                        ret, fmr, page_list, list_len, iova);
+       return ret;
+} /* end ehca_map_phys_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_fmr(struct list_head *fmr_list)
+{
+       int ret = 0;
+       struct ib_fmr *ib_fmr;
+       struct ehca_shca *shca = NULL;
+       struct ehca_shca *prev_shca;
+       struct ehca_mr *e_fmr;
+       u32 num_fmr = 0;
+       u32 unmap_fmr_cnt = 0;
+
+       /* check all FMR belong to same SHCA, and check internal flag */
+       list_for_each_entry(ib_fmr, fmr_list, list) {
+               prev_shca = shca;
+               if (!ib_fmr) {
+                       ehca_gen_err("bad fmr=%p in list", ib_fmr);
+                       ret = -EINVAL;
+                       goto unmap_fmr_exit0;
+               }
+               shca = container_of(ib_fmr->device, struct ehca_shca,
+                                   ib_device);
+               e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+               if ((shca != prev_shca) && prev_shca) {
+                       ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
+                                "prev_shca=%p e_fmr=%p",
+                                shca, prev_shca, e_fmr);
+                       ret = -EINVAL;
+                       goto unmap_fmr_exit0;
+               }
+               if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+                       ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
+                                "e_fmr->flags=%x", e_fmr, e_fmr->flags);
+                       ret = -EINVAL;
+                       goto unmap_fmr_exit0;
+               }
+               num_fmr++;
+       }
+
+       /* loop over all FMRs to unmap */
+       list_for_each_entry(ib_fmr, fmr_list, list) {
+               unmap_fmr_cnt++;
+               e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+               shca = container_of(ib_fmr->device, struct ehca_shca,
+                                   ib_device);
+               ret = ehca_unmap_one_fmr(shca, e_fmr);
+               if (ret) {
+                       /* unmap failed, stop unmapping of rest of FMRs */
+                       ehca_err(&shca->ib_device, "unmap of one FMR failed, "
+                                "stop rest, e_fmr=%p num_fmr=%x "
+                                "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
+                                unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
+                       goto unmap_fmr_exit0;
+               }
+       }
+
+unmap_fmr_exit0:
+       if (ret)
+               ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
+                            ret, fmr_list, num_fmr, unmap_fmr_cnt);
+       return ret;
+} /* end ehca_unmap_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr)
+{
+       int ret;
+       u64 h_ret;
+       struct ehca_shca *shca =
+               container_of(fmr->device, struct ehca_shca, ib_device);
+       struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+
+       if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+               ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+                        e_fmr, e_fmr->flags);
+               ret = -EINVAL;
+               goto free_fmr_exit0;
+       }
+
+       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p "
+                        "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
+                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
+                        e_fmr->ipz_mr_handle.handle, fmr->lkey);
+               ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+               goto free_fmr_exit0;
+       }
+       /* successful deregistration */
+       ehca_mr_delete(e_fmr);
+       return 0;
+
+free_fmr_exit0:
+       if (ret)
+               ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr);
+       return ret;
+} /* end ehca_dealloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr(struct ehca_shca *shca,
+               struct ehca_mr *e_mr,
+               u64 *iova_start,
+               u64 size,
+               int acl,
+               struct ehca_pd *e_pd,
+               struct ehca_mr_pginfo *pginfo,
+               u32 *lkey, /*OUT*/
+               u32 *rkey) /*OUT*/
+{
+       int ret;
+       u64 h_ret;
+       u32 hipz_acl;
+       struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+       ehca_mrmw_map_acl(acl, &hipz_acl);
+       ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+       if (ehca_use_hp_mr == 1)
+               hipz_acl |= 0x00000001;
+
+       h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
+                                        (u64)iova_start, size, hipz_acl,
+                                        e_pd->fw_pd, &hipzout);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
+                        "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
+               ret = ehca_mrmw_map_hrc_alloc(h_ret);
+               goto ehca_reg_mr_exit0;
+       }
+
+       e_mr->ipz_mr_handle = hipzout.handle;
+
+       ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
+       if (ret)
+               goto ehca_reg_mr_exit1;
+
+       /* successful registration */
+       e_mr->num_pages = pginfo->num_pages;
+       e_mr->num_4k    = pginfo->num_4k;
+       e_mr->start     = iova_start;
+       e_mr->size      = size;
+       e_mr->acl       = acl;
+       *lkey = hipzout.lkey;
+       *rkey = hipzout.rkey;
+       return 0;
+
+ehca_reg_mr_exit1:
+       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
+                        "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
+                        "pginfo=%p num_pages=%lx num_4k=%lx ret=%x",
+                        h_ret, shca, e_mr, iova_start, size, acl, e_pd,
+                        hipzout.lkey, pginfo, pginfo->num_pages,
+                        pginfo->num_4k, ret);
+               ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
+                        "not recoverable");
+       }
+ehca_reg_mr_exit0:
+       if (ret)
+               ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+                        "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+                        "num_pages=%lx num_4k=%lx",
+                        ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
+                        pginfo->num_pages, pginfo->num_4k);
+       return ret;
+} /* end ehca_reg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+                      struct ehca_mr *e_mr,
+                      struct ehca_mr_pginfo *pginfo)
+{
+       int ret = 0;
+       u64 h_ret;
+       u32 rnum;
+       u64 rpage;
+       u32 i;
+       u64 *kpage;
+
+       kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (!kpage) {
+               ehca_err(&shca->ib_device, "kpage alloc failed");
+               ret = -ENOMEM;
+               goto ehca_reg_mr_rpages_exit0;
+       }
+
+       /* max 512 pages per shot */
+       for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) {
+
+               if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+                       rnum = pginfo->num_4k % 512; /* last shot */
+                       if (rnum == 0)
+                               rnum = 512;      /* last shot is full */
+               } else
+                       rnum = 512;
+
+               if (rnum > 1) {
+                       ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage);
+                       if (ret) {
+                               ehca_err(&shca->ib_device, "ehca_set_pagebuf "
+                                        "bad rc, ret=%x rnum=%x kpage=%p",
+                                        ret, rnum, kpage);
+                               ret = -EFAULT;
+                               goto ehca_reg_mr_rpages_exit1;
+                       }
+                       rpage = virt_to_abs(kpage);
+                       if (!rpage) {
+                               ehca_err(&shca->ib_device, "kpage=%p i=%x",
+                                        kpage, i);
+                               ret = -EFAULT;
+                               goto ehca_reg_mr_rpages_exit1;
+                       }
+               } else {  /* rnum==1 */
+                       ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage);
+                       if (ret) {
+                               ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 "
+                                        "bad rc, ret=%x i=%x", ret, i);
+                               ret = -EFAULT;
+                               goto ehca_reg_mr_rpages_exit1;
+                       }
+               }
+
+               h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr,
+                                                0, /* pagesize 4k */
+                                                0, rpage, rnum);
+
+               if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+                       /*
+                        * check for 'registration complete'==H_SUCCESS
+                        * and for 'page registered'==H_PAGE_REGISTERED
+                        */
+                       if (h_ret != H_SUCCESS) {
+                               ehca_err(&shca->ib_device, "last "
+                                        "hipz_reg_rpage_mr failed, h_ret=%lx "
+                                        "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
+                                        " lkey=%x", h_ret, e_mr, i,
+                                        shca->ipz_hca_handle.handle,
+                                        e_mr->ipz_mr_handle.handle,
+                                        e_mr->ib.ib_mr.lkey);
+                               ret = ehca_mrmw_map_hrc_rrpg_last(h_ret);
+                               break;
+                       } else
+                               ret = 0;
+               } else if (h_ret != H_PAGE_REGISTERED) {
+                       ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
+                                "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx "
+                                "mr_hndl=%lx", h_ret, e_mr, i,
+                                e_mr->ib.ib_mr.lkey,
+                                shca->ipz_hca_handle.handle,
+                                e_mr->ipz_mr_handle.handle);
+                       ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret);
+                       break;
+               } else
+                       ret = 0;
+       } /* end for(i) */
+
+
+ehca_reg_mr_rpages_exit1:
+       kfree(kpage);
+ehca_reg_mr_rpages_exit0:
+       if (ret)
+               ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
+                        "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo,
+                        pginfo->num_pages, pginfo->num_4k);
+       return ret;
+} /* end ehca_reg_mr_rpages() */
+
+/*----------------------------------------------------------------------*/
+
+inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
+                               struct ehca_mr *e_mr,
+                               u64 *iova_start,
+                               u64 size,
+                               u32 acl,
+                               struct ehca_pd *e_pd,
+                               struct ehca_mr_pginfo *pginfo,
+                               u32 *lkey, /*OUT*/
+                               u32 *rkey) /*OUT*/
+{
+       int ret;
+       u64 h_ret;
+       u32 hipz_acl;
+       u64 *kpage;
+       u64 rpage;
+       struct ehca_mr_pginfo pginfo_save;
+       struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+       ehca_mrmw_map_acl(acl, &hipz_acl);
+       ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+       kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (!kpage) {
+               ehca_err(&shca->ib_device, "kpage alloc failed");
+               ret = -ENOMEM;
+               goto ehca_rereg_mr_rereg1_exit0;
+       }
+
+       pginfo_save = *pginfo;
+       ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage);
+       if (ret) {
+               ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
+                        "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p",
+                        e_mr, pginfo, pginfo->type, pginfo->num_pages,
+                        pginfo->num_4k,kpage);
+               goto ehca_rereg_mr_rereg1_exit1;
+       }
+       rpage = virt_to_abs(kpage);
+       if (!rpage) {
+               ehca_err(&shca->ib_device, "kpage=%p", kpage);
+               ret = -EFAULT;
+               goto ehca_rereg_mr_rereg1_exit1;
+       }
+       h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
+                                     (u64)iova_start, size, hipz_acl,
+                                     e_pd->fw_pd, rpage, &hipzout);
+       if (h_ret != H_SUCCESS) {
+               /*
+                * reregistration unsuccessful, try it again with the 3 hCalls,
+                * e.g. this is required in case H_MR_CONDITION
+                * (MW bound or MR is shared)
+                */
+               ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
+                         "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
+               *pginfo = pginfo_save;
+               ret = -EAGAIN;
+       } else if ((u64*)hipzout.vaddr != iova_start) {
+               ehca_err(&shca->ib_device, "PHYP changed iova_start in "
+                        "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
+                        "mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
+                        hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
+                        e_mr->ib.ib_mr.lkey, hipzout.lkey);
+               ret = -EFAULT;
+       } else {
+               /*
+                * successful reregistration
+                * note: start and start_out are identical for eServer HCAs
+                */
+               e_mr->num_pages = pginfo->num_pages;
+               e_mr->num_4k    = pginfo->num_4k;
+               e_mr->start     = iova_start;
+               e_mr->size      = size;
+               e_mr->acl       = acl;
+               *lkey = hipzout.lkey;
+               *rkey = hipzout.rkey;
+       }
+
+ehca_rereg_mr_rereg1_exit1:
+       kfree(kpage);
+ehca_rereg_mr_rereg1_exit0:
+       if ( ret && (ret != -EAGAIN) )
+               ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
+                        "pginfo=%p num_pages=%lx num_4k=%lx",
+                        ret, *lkey, *rkey, pginfo, pginfo->num_pages,
+                        pginfo->num_4k);
+       return ret;
+} /* end ehca_rereg_mr_rereg1() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+                 struct ehca_mr *e_mr,
+                 u64 *iova_start,
+                 u64 size,
+                 int acl,
+                 struct ehca_pd *e_pd,
+                 struct ehca_mr_pginfo *pginfo,
+                 u32 *lkey,
+                 u32 *rkey)
+{
+       int ret = 0;
+       u64 h_ret;
+       int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
+       int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
+
+       /* first determine reregistration hCall(s) */
+       if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) ||
+           (pginfo->num_4k > e_mr->num_4k)) {
+               ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx "
+                        "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k);
+               rereg_1_hcall = 0;
+               rereg_3_hcall = 1;
+       }
+
+       if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
+               rereg_1_hcall = 0;
+               rereg_3_hcall = 1;
+               e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
+               ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
+                        e_mr);
+       }
+
+       if (rereg_1_hcall) {
+               ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
+                                          acl, e_pd, pginfo, lkey, rkey);
+               if (ret) {
+                       if (ret == -EAGAIN)
+                               rereg_3_hcall = 1;
+                       else
+                               goto ehca_rereg_mr_exit0;
+               }
+       }
+
+       if (rereg_3_hcall) {
+               struct ehca_mr save_mr;
+
+               /* first deregister old MR */
+               h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+               if (h_ret != H_SUCCESS) {
+                       ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+                                "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx "
+                                "mr->lkey=%x",
+                                h_ret, e_mr, shca->ipz_hca_handle.handle,
+                                e_mr->ipz_mr_handle.handle,
+                                e_mr->ib.ib_mr.lkey);
+                       ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+                       goto ehca_rereg_mr_exit0;
+               }
+               /* clean ehca_mr_t, without changing struct ib_mr and lock */
+               save_mr = *e_mr;
+               ehca_mr_deletenew(e_mr);
+
+               /* set some MR values */
+               e_mr->flags = save_mr.flags;
+               e_mr->fmr_page_size = save_mr.fmr_page_size;
+               e_mr->fmr_max_pages = save_mr.fmr_max_pages;
+               e_mr->fmr_max_maps = save_mr.fmr_max_maps;
+               e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
+
+               ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
+                                     e_pd, pginfo, lkey, rkey);
+               if (ret) {
+                       u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
+                       memcpy(&e_mr->flags, &(save_mr.flags),
+                              sizeof(struct ehca_mr) - offset);
+                       goto ehca_rereg_mr_exit0;
+               }
+       }
+
+ehca_rereg_mr_exit0:
+       if (ret)
+               ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+                        "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+                        "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
+                        "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
+                        acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey,
+                        rereg_1_hcall, rereg_3_hcall);
+       return ret;
+} /* end ehca_rereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+                      struct ehca_mr *e_fmr)
+{
+       int ret = 0;
+       u64 h_ret;
+       int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */
+       int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */
+       struct ehca_pd *e_pd =
+               container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
+       struct ehca_mr save_fmr;
+       u32 tmp_lkey, tmp_rkey;
+       struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+       struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+       /* first check if reregistration hCall can be used for unmap */
+       if (e_fmr->fmr_max_pages > 512) {
+               rereg_1_hcall = 0;
+               rereg_3_hcall = 1;
+       }
+
+       if (rereg_1_hcall) {
+               /*
+                * note: after using rereg hcall with len=0,
+                * rereg hcall must be used again for registering pages
+                */
+               h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
+                                             0, 0, e_pd->fw_pd, 0, &hipzout);
+               if (h_ret != H_SUCCESS) {
+                       /*
+                        * should not happen, because length checked above,
+                        * FMRs are not shared and no MW bound to FMRs
+                        */
+                       ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
+                                "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
+                                "mr_hndl=%lx lkey=%x lkey_out=%x",
+                                h_ret, e_fmr, shca->ipz_hca_handle.handle,
+                                e_fmr->ipz_mr_handle.handle,
+                                e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
+                       rereg_3_hcall = 1;
+               } else {
+                       /* successful reregistration */
+                       e_fmr->start = NULL;
+                       e_fmr->size = 0;
+                       tmp_lkey = hipzout.lkey;
+                       tmp_rkey = hipzout.rkey;
+               }
+       }
+
+       if (rereg_3_hcall) {
+               struct ehca_mr save_mr;
+
+               /* first free old FMR */
+               h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+               if (h_ret != H_SUCCESS) {
+                       ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+                                "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
+                                "lkey=%x",
+                                h_ret, e_fmr, shca->ipz_hca_handle.handle,
+                                e_fmr->ipz_mr_handle.handle,
+                                e_fmr->ib.ib_fmr.lkey);
+                       ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+                       goto ehca_unmap_one_fmr_exit0;
+               }
+               /* clean ehca_mr_t, without changing lock */
+               save_fmr = *e_fmr;
+               ehca_mr_deletenew(e_fmr);
+
+               /* set some MR values */
+               e_fmr->flags = save_fmr.flags;
+               e_fmr->fmr_page_size = save_fmr.fmr_page_size;
+               e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
+               e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
+               e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
+               e_fmr->acl = save_fmr.acl;
+
+               pginfo.type      = EHCA_MR_PGI_FMR;
+               pginfo.num_pages = 0;
+               pginfo.num_4k    = 0;
+               ret = ehca_reg_mr(shca, e_fmr, NULL,
+                                 (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
+                                 e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
+                                 &tmp_rkey);
+               if (ret) {
+                       u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
+                       memcpy(&e_fmr->flags, &(save_mr.flags),
+                              sizeof(struct ehca_mr) - offset);
+                       goto ehca_unmap_one_fmr_exit0;
+               }
+       }
+
+ehca_unmap_one_fmr_exit0:
+       if (ret)
+               ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
+                        "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x",
+                        ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages,
+                        rereg_1_hcall, rereg_3_hcall);
+       return ret;
+} /* end ehca_unmap_one_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_smr(struct ehca_shca *shca,
+                struct ehca_mr *e_origmr,
+                struct ehca_mr *e_newmr,
+                u64 *iova_start,
+                int acl,
+                struct ehca_pd *e_pd,
+                u32 *lkey, /*OUT*/
+                u32 *rkey) /*OUT*/
+{
+       int ret = 0;
+       u64 h_ret;
+       u32 hipz_acl;
+       struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+       ehca_mrmw_map_acl(acl, &hipz_acl);
+       ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+       h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+                                   (u64)iova_start, hipz_acl, e_pd->fw_pd,
+                                   &hipzout);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+                        "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
+                        "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+                        h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
+                        shca->ipz_hca_handle.handle,
+                        e_origmr->ipz_mr_handle.handle,
+                        e_origmr->ib.ib_mr.lkey);
+               ret = ehca_mrmw_map_hrc_reg_smr(h_ret);
+               goto ehca_reg_smr_exit0;
+       }
+       /* successful registration */
+       e_newmr->num_pages     = e_origmr->num_pages;
+       e_newmr->num_4k        = e_origmr->num_4k;
+       e_newmr->start         = iova_start;
+       e_newmr->size          = e_origmr->size;
+       e_newmr->acl           = acl;
+       e_newmr->ipz_mr_handle = hipzout.handle;
+       *lkey = hipzout.lkey;
+       *rkey = hipzout.rkey;
+       return 0;
+
+ehca_reg_smr_exit0:
+       if (ret)
+               ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p "
+                        "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
+                        ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
+       return ret;
+} /* end ehca_reg_smr() */
+
+/*----------------------------------------------------------------------*/
+
+/* register internal max-MR to internal SHCA */
+int ehca_reg_internal_maxmr(
+       struct ehca_shca *shca,
+       struct ehca_pd *e_pd,
+       struct ehca_mr **e_maxmr)  /*OUT*/
+{
+       int ret;
+       struct ehca_mr *e_mr;
+       u64 *iova_start;
+       u64 size_maxmr;
+       struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+       struct ib_phys_buf ib_pbuf;
+       u32 num_pages_mr;
+       u32 num_pages_4k; /* 4k portion "pages" */
+
+       e_mr = ehca_mr_new();
+       if (!e_mr) {
+               ehca_err(&shca->ib_device, "out of memory");
+               ret = -ENOMEM;
+               goto ehca_reg_internal_maxmr_exit0;
+       }
+       e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+
+       /* register internal max-MR on HCA */
+       size_maxmr = (u64)high_memory - PAGE_OFFSET;
+       iova_start = (u64*)KERNELBASE;
+       ib_pbuf.addr = 0;
+       ib_pbuf.size = size_maxmr;
+       num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr +
+                        PAGE_SIZE - 1) / PAGE_SIZE);
+       num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr +
+                        EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+       pginfo.type           = EHCA_MR_PGI_PHYS;
+       pginfo.num_pages      = num_pages_mr;
+       pginfo.num_4k         = num_pages_4k;
+       pginfo.num_phys_buf   = 1;
+       pginfo.phys_buf_array = &ib_pbuf;
+
+       ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
+                         &pginfo, &e_mr->ib.ib_mr.lkey,
+                         &e_mr->ib.ib_mr.rkey);
+       if (ret) {
+               ehca_err(&shca->ib_device, "reg of internal max MR failed, "
+                        "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x "
+                        "num_pages_4k=%x", e_mr, iova_start, size_maxmr,
+                        num_pages_mr, num_pages_4k);
+               goto ehca_reg_internal_maxmr_exit1;
+       }
+
+       /* successful registration of all pages */
+       e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
+       e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
+       e_mr->ib.ib_mr.uobject = NULL;
+       atomic_inc(&(e_pd->ib_pd.usecnt));
+       atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
+       *e_maxmr = e_mr;
+       return 0;
+
+ehca_reg_internal_maxmr_exit1:
+       ehca_mr_delete(e_mr);
+ehca_reg_internal_maxmr_exit0:
+       if (ret)
+               ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p",
+                        ret, shca, e_pd, e_maxmr);
+       return ret;
+} /* end ehca_reg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+                  struct ehca_mr *e_newmr,
+                  u64 *iova_start,
+                  int acl,
+                  struct ehca_pd *e_pd,
+                  u32 *lkey,
+                  u32 *rkey)
+{
+       u64 h_ret;
+       struct ehca_mr *e_origmr = shca->maxmr;
+       u32 hipz_acl;
+       struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+       ehca_mrmw_map_acl(acl, &hipz_acl);
+       ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+       h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+                                   (u64)iova_start, hipz_acl, e_pd->fw_pd,
+                                   &hipzout);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+                        "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+                        h_ret, e_origmr, shca->ipz_hca_handle.handle,
+                        e_origmr->ipz_mr_handle.handle,
+                        e_origmr->ib.ib_mr.lkey);
+               return ehca_mrmw_map_hrc_reg_smr(h_ret);
+       }
+       /* successful registration */
+       e_newmr->num_pages     = e_origmr->num_pages;
+       e_newmr->num_4k        = e_origmr->num_4k;
+       e_newmr->start         = iova_start;
+       e_newmr->size          = e_origmr->size;
+       e_newmr->acl           = acl;
+       e_newmr->ipz_mr_handle = hipzout.handle;
+       *lkey = hipzout.lkey;
+       *rkey = hipzout.rkey;
+       return 0;
+} /* end ehca_reg_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
+{
+       int ret;
+       struct ehca_mr *e_maxmr;
+       struct ib_pd *ib_pd;
+
+       if (!shca->maxmr) {
+               ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
+               ret = -EINVAL;
+               goto ehca_dereg_internal_maxmr_exit0;
+       }
+
+       e_maxmr = shca->maxmr;
+       ib_pd = e_maxmr->ib.ib_mr.pd;
+       shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
+
+       ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
+       if (ret) {
+               ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
+                        "ret=%x e_maxmr=%p shca=%p lkey=%x",
+                        ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
+               shca->maxmr = e_maxmr;
+               goto ehca_dereg_internal_maxmr_exit0;
+       }
+
+       atomic_dec(&ib_pd->usecnt);
+
+ehca_dereg_internal_maxmr_exit0:
+       if (ret)
+               ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p",
+                        ret, shca, shca->maxmr);
+       return ret;
+} /* end ehca_dereg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check physical buffer array of MR verbs for validness and
+ * calculates MR size
+ */
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+                                 int num_phys_buf,
+                                 u64 *iova_start,
+                                 u64 *size)
+{
+       struct ib_phys_buf *pbuf = phys_buf_array;
+       u64 size_count = 0;
+       u32 i;
+
+       if (num_phys_buf == 0) {
+               ehca_gen_err("bad phys buf array len, num_phys_buf=0");
+               return -EINVAL;
+       }
+       /* check first buffer */
+       if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
+               ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
+                            "pbuf->addr=%lx pbuf->size=%lx",
+                            iova_start, pbuf->addr, pbuf->size);
+               return -EINVAL;
+       }
+       if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
+           (num_phys_buf > 1)) {
+               ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx "
+                            "pbuf->size=%lx", pbuf->addr, pbuf->size);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < num_phys_buf; i++) {
+               if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
+                       ehca_gen_err("bad address, i=%x pbuf->addr=%lx "
+                                    "pbuf->size=%lx",
+                                    i, pbuf->addr, pbuf->size);
+                       return -EINVAL;
+               }
+               if (((i > 0) && /* not 1st */
+                    (i < (num_phys_buf - 1)) &&        /* not last */
+                    (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
+                       ehca_gen_err("bad size, i=%x pbuf->size=%lx",
+                                    i, pbuf->size);
+                       return -EINVAL;
+               }
+               size_count += pbuf->size;
+               pbuf++;
+       }
+
+       *size = size_count;
+       return 0;
+} /* end ehca_mr_chk_buf_and_calc_size() */
+
+/*----------------------------------------------------------------------*/
+
+/* check page list of map FMR verb for validness */
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+                            u64 *page_list,
+                            int list_len)
+{
+       u32 i;
+       u64 *page;
+
+       if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
+               ehca_gen_err("bad list_len, list_len=%x "
+                            "e_fmr->fmr_max_pages=%x fmr=%p",
+                            list_len, e_fmr->fmr_max_pages, e_fmr);
+               return -EINVAL;
+       }
+
+       /* each page must be aligned */
+       page = page_list;
+       for (i = 0; i < list_len; i++) {
+               if (*page % e_fmr->fmr_page_size) {
+                       ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p "
+                                    "fmr_page_size=%x", i, *page, page, e_fmr,
+                                    e_fmr->fmr_page_size);
+                       return -EINVAL;
+               }
+               page++;
+       }
+
+       return 0;
+} /* end ehca_fmr_check_page_list() */
+
+/*----------------------------------------------------------------------*/
+
+/* setup page buffer from page info */
+int ehca_set_pagebuf(struct ehca_mr *e_mr,
+                    struct ehca_mr_pginfo *pginfo,
+                    u32 number,
+                    u64 *kpage)
+{
+       int ret = 0;
+       struct ib_umem_chunk *prev_chunk;
+       struct ib_umem_chunk *chunk;
+       struct ib_phys_buf *pbuf;
+       u64 *fmrlist;
+       u64 num4k, pgaddr, offs4k;
+       u32 i = 0;
+       u32 j = 0;
+
+       if (pginfo->type == EHCA_MR_PGI_PHYS) {
+               /* loop over desired phys_buf_array entries */
+               while (i < number) {
+                       pbuf   = pginfo->phys_buf_array + pginfo->next_buf;
+                       num4k  = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size +
+                                 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
+                       offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
+                       while (pginfo->next_4k < offs4k + num4k) {
+                               /* sanity check */
+                               if ((pginfo->page_cnt >= pginfo->num_pages) ||
+                                   (pginfo->page_4k_cnt >= pginfo->num_4k)) {
+                                       ehca_gen_err("page_cnt >= num_pages, "
+                                                    "page_cnt=%lx "
+                                                    "num_pages=%lx "
+                                                    "page_4k_cnt=%lx "
+                                                    "num_4k=%lx i=%x",
+                                                    pginfo->page_cnt,
+                                                    pginfo->num_pages,
+                                                    pginfo->page_4k_cnt,
+                                                    pginfo->num_4k, i);
+                                       ret = -EFAULT;
+                                       goto ehca_set_pagebuf_exit0;
+                               }
+                               *kpage = phys_to_abs(
+                                       (pbuf->addr & EHCA_PAGEMASK)
+                                       + (pginfo->next_4k * EHCA_PAGESIZE));
+                               if ( !(*kpage) && pbuf->addr ) {
+                                       ehca_gen_err("pbuf->addr=%lx "
+                                                    "pbuf->size=%lx "
+                                                    "next_4k=%lx", pbuf->addr,
+                                                    pbuf->size,
+                                                    pginfo->next_4k);
+                                       ret = -EFAULT;
+                                       goto ehca_set_pagebuf_exit0;
+                               }
+                               (pginfo->page_4k_cnt)++;
+                               (pginfo->next_4k)++;
+                               if (pginfo->next_4k %
+                                   (PAGE_SIZE / EHCA_PAGESIZE) == 0)
+                                       (pginfo->page_cnt)++;
+                               kpage++;
+                               i++;
+                               if (i >= number) break;
+                       }
+                       if (pginfo->next_4k >= offs4k + num4k) {
+                               (pginfo->next_buf)++;
+                               pginfo->next_4k = 0;
+                       }
+               }
+       } else if (pginfo->type == EHCA_MR_PGI_USER) {
+               /* loop over desired chunk entries */
+               chunk      = pginfo->next_chunk;
+               prev_chunk = pginfo->next_chunk;
+               list_for_each_entry_continue(chunk,
+                                            (&(pginfo->region->chunk_list)),
+                                            list) {
+                       for (i = pginfo->next_nmap; i < chunk->nmap; ) {
+                               pgaddr = ( page_to_pfn(chunk->page_list[i].page)
+                                          << PAGE_SHIFT );
+                               *kpage = phys_to_abs(pgaddr +
+                                                    (pginfo->next_4k *
+                                                     EHCA_PAGESIZE));
+                               if ( !(*kpage) ) {
+                                       ehca_gen_err("pgaddr=%lx "
+                                                    "chunk->page_list[i]=%lx "
+                                                    "i=%x next_4k=%lx mr=%p",
+                                                    pgaddr,
+                                                    (u64)sg_dma_address(
+                                                            &chunk->
+                                                            page_list[i]),
+                                                    i, pginfo->next_4k, e_mr);
+                                       ret = -EFAULT;
+                                       goto ehca_set_pagebuf_exit0;
+                               }
+                               (pginfo->page_4k_cnt)++;
+                               (pginfo->next_4k)++;
+                               kpage++;
+                               if (pginfo->next_4k %
+                                   (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
+                                       (pginfo->page_cnt)++;
+                                       (pginfo->next_nmap)++;
+                                       pginfo->next_4k = 0;
+                                       i++;
+                               }
+                               j++;
+                               if (j >= number) break;
+                       }
+                       if ((pginfo->next_nmap >= chunk->nmap) &&
+                           (j >= number)) {
+                               pginfo->next_nmap = 0;
+                               prev_chunk = chunk;
+                               break;
+                       } else if (pginfo->next_nmap >= chunk->nmap) {
+                               pginfo->next_nmap = 0;
+                               prev_chunk = chunk;
+                       } else if (j >= number)
+                               break;
+                       else
+                               prev_chunk = chunk;
+               }
+               pginfo->next_chunk =
+                       list_prepare_entry(prev_chunk,
+                                          (&(pginfo->region->chunk_list)),
+                                          list);
+       } else if (pginfo->type == EHCA_MR_PGI_FMR) {
+               /* loop over desired page_list entries */
+               fmrlist = pginfo->page_list + pginfo->next_listelem;
+               for (i = 0; i < number; i++) {
+                       *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
+                                            pginfo->next_4k * EHCA_PAGESIZE);
+                       if ( !(*kpage) ) {
+                               ehca_gen_err("*fmrlist=%lx fmrlist=%p "
+                                            "next_listelem=%lx next_4k=%lx",
+                                            *fmrlist, fmrlist,
+                                            pginfo->next_listelem,
+                                            pginfo->next_4k);
+                               ret = -EFAULT;
+                               goto ehca_set_pagebuf_exit0;
+                       }
+                       (pginfo->page_4k_cnt)++;
+                       (pginfo->next_4k)++;
+                       kpage++;
+                       if (pginfo->next_4k %
+                           (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
+                               (pginfo->page_cnt)++;
+                               (pginfo->next_listelem)++;
+                               fmrlist++;
+                               pginfo->next_4k = 0;
+                       }
+               }
+       } else {
+               ehca_gen_err("bad pginfo->type=%x", pginfo->type);
+               ret = -EFAULT;
+               goto ehca_set_pagebuf_exit0;
+       }
+
+ehca_set_pagebuf_exit0:
+       if (ret)
+               ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
+                            "num_4k=%lx next_buf=%lx next_4k=%lx number=%x "
+                            "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x "
+                            "next_listelem=%lx region=%p next_chunk=%p "
+                            "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type,
+                            pginfo->num_pages, pginfo->num_4k,
+                            pginfo->next_buf, pginfo->next_4k, number, kpage,
+                            pginfo->page_cnt, pginfo->page_4k_cnt, i,
+                            pginfo->next_listelem, pginfo->region,
+                            pginfo->next_chunk, pginfo->next_nmap);
+       return ret;
+} /* end ehca_set_pagebuf() */
+
+/*----------------------------------------------------------------------*/
+
+/* setup 1 page from page info page buffer */
+int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
+                      struct ehca_mr_pginfo *pginfo,
+                      u64 *rpage)
+{
+       int ret = 0;
+       struct ib_phys_buf *tmp_pbuf;
+       u64 *fmrlist;
+       struct ib_umem_chunk *chunk;
+       struct ib_umem_chunk *prev_chunk;
+       u64 pgaddr, num4k, offs4k;
+
+       if (pginfo->type == EHCA_MR_PGI_PHYS) {
+               /* sanity check */
+               if ((pginfo->page_cnt >= pginfo->num_pages) ||
+                   (pginfo->page_4k_cnt >= pginfo->num_4k)) {
+                       ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx "
+                                    "num_pages=%lx page_4k_cnt=%lx num_4k=%lx",
+                                    pginfo->page_cnt, pginfo->num_pages,
+                                    pginfo->page_4k_cnt, pginfo->num_4k);
+                       ret = -EFAULT;
+                       goto ehca_set_pagebuf_1_exit0;
+               }
+               tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf;
+               num4k  = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size +
+                         EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
+               offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
+               *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) +
+                                    (pginfo->next_4k * EHCA_PAGESIZE));
+               if ( !(*rpage) && tmp_pbuf->addr ) {
+                       ehca_gen_err("tmp_pbuf->addr=%lx"
+                                    " tmp_pbuf->size=%lx next_4k=%lx",
+                                    tmp_pbuf->addr, tmp_pbuf->size,
+                                    pginfo->next_4k);
+                       ret = -EFAULT;
+                       goto ehca_set_pagebuf_1_exit0;
+               }
+               (pginfo->page_4k_cnt)++;
+               (pginfo->next_4k)++;
+               if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0)
+                       (pginfo->page_cnt)++;
+               if (pginfo->next_4k >= offs4k + num4k) {
+                       (pginfo->next_buf)++;
+                       pginfo->next_4k = 0;
+               }
+       } else if (pginfo->type == EHCA_MR_PGI_USER) {
+               chunk      = pginfo->next_chunk;
+               prev_chunk = pginfo->next_chunk;
+               list_for_each_entry_continue(chunk,
+                                            (&(pginfo->region->chunk_list)),
+                                            list) {
+                       pgaddr = ( page_to_pfn(chunk->page_list[
+                                                      pginfo->next_nmap].page)
+                                  << PAGE_SHIFT);
+                       *rpage = phys_to_abs(pgaddr +
+                                            (pginfo->next_4k * EHCA_PAGESIZE));
+                       if ( !(*rpage) ) {
+                               ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx"
+                                            " next_nmap=%lx next_4k=%lx mr=%p",
+                                            pgaddr, (u64)sg_dma_address(
+                                                    &chunk->page_list[
+                                                            pginfo->
+                                                            next_nmap]),
+                                            pginfo->next_nmap, pginfo->next_4k,
+                                            e_mr);
+                               ret = -EFAULT;
+                               goto ehca_set_pagebuf_1_exit0;
+                       }
+                       (pginfo->page_4k_cnt)++;
+                       (pginfo->next_4k)++;
+                       if (pginfo->next_4k %
+                           (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
+                               (pginfo->page_cnt)++;
+                               (pginfo->next_nmap)++;
+                               pginfo->next_4k = 0;
+                       }
+                       if (pginfo->next_nmap >= chunk->nmap) {
+                               pginfo->next_nmap = 0;
+                               prev_chunk = chunk;
+                       }
+                       break;
+               }
+               pginfo->next_chunk =
+                       list_prepare_entry(prev_chunk,
+                                          (&(pginfo->region->chunk_list)),
+                                          list);
+       } else if (pginfo->type == EHCA_MR_PGI_FMR) {
+               fmrlist = pginfo->page_list + pginfo->next_listelem;
+               *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
+                                    pginfo->next_4k * EHCA_PAGESIZE);
+               if ( !(*rpage) ) {
+                       ehca_gen_err("*fmrlist=%lx fmrlist=%p "
+                                    "next_listelem=%lx next_4k=%lx",
+                                    *fmrlist, fmrlist, pginfo->next_listelem,
+                                    pginfo->next_4k);
+                       ret = -EFAULT;
+                       goto ehca_set_pagebuf_1_exit0;
+               }
+               (pginfo->page_4k_cnt)++;
+               (pginfo->next_4k)++;
+               if (pginfo->next_4k %
+                   (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
+                       (pginfo->page_cnt)++;
+                       (pginfo->next_listelem)++;
+                       pginfo->next_4k = 0;
+               }
+       } else {
+               ehca_gen_err("bad pginfo->type=%x", pginfo->type);
+               ret = -EFAULT;
+               goto ehca_set_pagebuf_1_exit0;
+       }
+
+ehca_set_pagebuf_1_exit0:
+       if (ret)
+               ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
+                            "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p "
+                            "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx "
+                            "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr,
+                            pginfo, pginfo->type, pginfo->num_pages,
+                            pginfo->num_4k, pginfo->next_buf, pginfo->next_4k,
+                            rpage, pginfo->page_cnt, pginfo->page_4k_cnt,
+                            pginfo->next_listelem, pginfo->region,
+                            pginfo->next_chunk, pginfo->next_nmap);
+       return ret;
+} /* end ehca_set_pagebuf_1() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check MR if it is a max-MR, i.e. uses whole memory
+ * in case it's a max-MR 1 is returned, else 0
+ */
+int ehca_mr_is_maxmr(u64 size,
+                    u64 *iova_start)
+{
+       /* a MR is treated as max-MR only if it fits following: */
+       if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
+           (iova_start == (void*)KERNELBASE)) {
+               ehca_gen_dbg("this is a max-MR");
+               return 1;
+       } else
+               return 0;
+} /* end ehca_mr_is_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/* map access control for MR/MW. This routine is used for MR and MW. */
+void ehca_mrmw_map_acl(int ib_acl,
+                      u32 *hipz_acl)
+{
+       *hipz_acl = 0;
+       if (ib_acl & IB_ACCESS_REMOTE_READ)
+               *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
+       if (ib_acl & IB_ACCESS_REMOTE_WRITE)
+               *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
+       if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
+               *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
+       if (ib_acl & IB_ACCESS_LOCAL_WRITE)
+               *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
+       if (ib_acl & IB_ACCESS_MW_BIND)
+               *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
+} /* end ehca_mrmw_map_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/* sets page size in hipz access control for MR/MW. */
+void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/
+{
+       return; /* HCA supports only 4k */
+} /* end ehca_mrmw_set_pgsize_hipz_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * reverse map access control for MR/MW.
+ * This routine is used for MR and MW.
+ */
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+                              int *ib_acl) /*OUT*/
+{
+       *ib_acl = 0;
+       if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
+               *ib_acl |= IB_ACCESS_REMOTE_READ;
+       if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
+               *ib_acl |= IB_ACCESS_REMOTE_WRITE;
+       if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
+               *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
+       if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
+               *ib_acl |= IB_ACCESS_LOCAL_WRITE;
+       if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
+               *ib_acl |= IB_ACCESS_MW_BIND;
+} /* end ehca_mrmw_reverse_map_acl() */
+
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR/MW allocations
+ * Used for hipz_mr_reg_alloc and hipz_mw_alloc.
+ */
+int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
+{
+       switch (hipz_rc) {
+       case H_SUCCESS:              /* successful completion */
+               return 0;
+       case H_ADAPTER_PARM:         /* invalid adapter handle */
+       case H_RT_PARM:              /* invalid resource type */
+       case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+       case H_MLENGTH_PARM:         /* invalid memory length */
+       case H_MEM_ACCESS_PARM:      /* invalid access controls */
+       case H_CONSTRAINED:          /* resource constraint */
+               return -EINVAL;
+       case H_BUSY:                 /* long busy */
+               return -EBUSY;
+       default:
+               return -EINVAL;
+       }
+} /* end ehca_mrmw_map_hrc_alloc() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR register rpage
+ * Used for hipz_h_register_rpage_mr at registering last page
+ */
+int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc)
+{
+       switch (hipz_rc) {
+       case H_SUCCESS:         /* registration complete */
+               return 0;
+       case H_PAGE_REGISTERED: /* page registered */
+       case H_ADAPTER_PARM:    /* invalid adapter handle */
+       case H_RH_PARM:         /* invalid resource handle */
+/*     case H_QT_PARM:            invalid queue type */
+       case H_PARAMETER:       /*
+                                * invalid logical address,
+                                * or count zero or greater 512
+                                */
+       case H_TABLE_FULL:      /* page table full */
+       case H_HARDWARE:        /* HCA not operational */
+               return -EINVAL;
+       case H_BUSY:            /* long busy */
+               return -EBUSY;
+       default:
+               return -EINVAL;
+       }
+} /* end ehca_mrmw_map_hrc_rrpg_last() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR register rpage
+ * Used for hipz_h_register_rpage_mr at registering one page, but not last page
+ */
+int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc)
+{
+       switch (hipz_rc) {
+       case H_PAGE_REGISTERED: /* page registered */
+               return 0;
+       case H_SUCCESS:         /* registration complete */
+       case H_ADAPTER_PARM:    /* invalid adapter handle */
+       case H_RH_PARM:         /* invalid resource handle */
+/*     case H_QT_PARM:            invalid queue type */
+       case H_PARAMETER:       /*
+                                * invalid logical address,
+                                * or count zero or greater 512
+                                */
+       case H_TABLE_FULL:      /* page table full */
+       case H_HARDWARE:        /* HCA not operational */
+               return -EINVAL;
+       case H_BUSY:            /* long busy */
+               return -EBUSY;
+       default:
+               return -EINVAL;
+       }
+} /* end ehca_mrmw_map_hrc_rrpg_notlast() */
+
+/*----------------------------------------------------------------------*/
+
+/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */
+int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc)
+{
+       switch (hipz_rc) {
+       case H_SUCCESS:              /* successful completion */
+               return 0;
+       case H_ADAPTER_PARM:         /* invalid adapter handle */
+       case H_RH_PARM:              /* invalid resource handle */
+               return -EINVAL;
+       case H_BUSY:                 /* long busy */
+               return -EBUSY;
+       default:
+               return -EINVAL;
+       }
+} /* end ehca_mrmw_map_hrc_query_mr() */
+
+/*----------------------------------------------------------------------*/
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for freeing MR resource
+ * Used for hipz_h_free_resource_mr
+ */
+int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc)
+{
+       switch (hipz_rc) {
+       case H_SUCCESS:      /* resource freed */
+               return 0;
+       case H_ADAPTER_PARM: /* invalid adapter handle */
+       case H_RH_PARM:      /* invalid resource handle */
+       case H_R_STATE:      /* invalid resource state */
+       case H_HARDWARE:     /* HCA not operational */
+               return -EINVAL;
+       case H_RESOURCE:     /* Resource in use */
+       case H_BUSY:         /* long busy */
+               return -EBUSY;
+       default:
+               return -EINVAL;
+       }
+} /* end ehca_mrmw_map_hrc_free_mr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for freeing MW resource
+ * Used for hipz_h_free_resource_mw
+ */
+int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc)
+{
+       switch (hipz_rc) {
+       case H_SUCCESS:      /* resource freed */
+               return 0;
+       case H_ADAPTER_PARM: /* invalid adapter handle */
+       case H_RH_PARM:      /* invalid resource handle */
+       case H_R_STATE:      /* invalid resource state */
+       case H_HARDWARE:     /* HCA not operational */
+               return -EINVAL;
+       case H_RESOURCE:     /* Resource in use */
+       case H_BUSY:         /* long busy */
+               return -EBUSY;
+       default:
+               return -EINVAL;
+       }
+} /* end ehca_mrmw_map_hrc_free_mw() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for SMR registrations
+ * Used for hipz_h_register_smr.
+ */
+int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc)
+{
+       switch (hipz_rc) {
+       case H_SUCCESS:              /* successful completion */
+               return 0;
+       case H_ADAPTER_PARM:         /* invalid adapter handle */
+       case H_RH_PARM:              /* invalid resource handle */
+       case H_MEM_PARM:             /* invalid MR virtual address */
+       case H_MEM_ACCESS_PARM:      /* invalid access controls */
+       case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+               return -EINVAL;
+       case H_BUSY:                 /* long busy */
+               return -EBUSY;
+       default:
+               return -EINVAL;
+       }
+} /* end ehca_mrmw_map_hrc_reg_smr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * MR destructor and constructor
+ * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
+ * except struct ib_mr and spinlock
+ */
+void ehca_mr_deletenew(struct ehca_mr *mr)
+{
+       mr->flags         = 0;
+       mr->num_pages     = 0;
+       mr->num_4k        = 0;
+       mr->acl           = 0;
+       mr->start         = NULL;
+       mr->fmr_page_size = 0;
+       mr->fmr_max_pages = 0;
+       mr->fmr_max_maps  = 0;
+       mr->fmr_map_cnt   = 0;
+       memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
+       memset(&mr->galpas, 0, sizeof(mr->galpas));
+       mr->nr_of_pages   = 0;
+       mr->pagearray     = NULL;
+} /* end ehca_mr_deletenew() */
+
+int ehca_init_mrmw_cache(void)
+{
+       mr_cache = kmem_cache_create("ehca_cache_mr",
+                                    sizeof(struct ehca_mr), 0,
+                                    SLAB_HWCACHE_ALIGN,
+                                    NULL, NULL);
+       if (!mr_cache)
+               return -ENOMEM;
+       mw_cache = kmem_cache_create("ehca_cache_mw",
+                                    sizeof(struct ehca_mw), 0,
+                                    SLAB_HWCACHE_ALIGN,
+                                    NULL, NULL);
+       if (!mw_cache) {
+               kmem_cache_destroy(mr_cache);
+               mr_cache = NULL;
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+void ehca_cleanup_mrmw_cache(void)
+{
+       if (mr_cache)
+               kmem_cache_destroy(mr_cache);
+       if (mw_cache)
+               kmem_cache_destroy(mw_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
new file mode 100644 (file)
index 0000000..d936e40
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  MR/MW declarations and inline functions
+ *
+ *  Authors: Dietmar Decker <ddecker@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _EHCA_MRMW_H_
+#define _EHCA_MRMW_H_
+
+int ehca_reg_mr(struct ehca_shca *shca,
+               struct ehca_mr *e_mr,
+               u64 *iova_start,
+               u64 size,
+               int acl,
+               struct ehca_pd *e_pd,
+               struct ehca_mr_pginfo *pginfo,
+               u32 *lkey,
+               u32 *rkey);
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+                      struct ehca_mr *e_mr,
+                      struct ehca_mr_pginfo *pginfo);
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+                 struct ehca_mr *e_mr,
+                 u64 *iova_start,
+                 u64 size,
+                 int mr_access_flags,
+                 struct ehca_pd *e_pd,
+                 struct ehca_mr_pginfo *pginfo,
+                 u32 *lkey,
+                 u32 *rkey);
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+                      struct ehca_mr *e_fmr);
+
+int ehca_reg_smr(struct ehca_shca *shca,
+                struct ehca_mr *e_origmr,
+                struct ehca_mr *e_newmr,
+                u64 *iova_start,
+                int acl,
+                struct ehca_pd *e_pd,
+                u32 *lkey,
+                u32 *rkey);
+
+int ehca_reg_internal_maxmr(struct ehca_shca *shca,
+                           struct ehca_pd *e_pd,
+                           struct ehca_mr **maxmr);
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+                  struct ehca_mr *e_newmr,
+                  u64 *iova_start,
+                  int acl,
+                  struct ehca_pd *e_pd,
+                  u32 *lkey,
+                  u32 *rkey);
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
+
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+                                 int num_phys_buf,
+                                 u64 *iova_start,
+                                 u64 *size);
+
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+                            u64 *page_list,
+                            int list_len);
+
+int ehca_set_pagebuf(struct ehca_mr *e_mr,
+                    struct ehca_mr_pginfo *pginfo,
+                    u32 number,
+                    u64 *kpage);
+
+int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
+                      struct ehca_mr_pginfo *pginfo,
+                      u64 *rpage);
+
+int ehca_mr_is_maxmr(u64 size,
+                    u64 *iova_start);
+
+void ehca_mrmw_map_acl(int ib_acl,
+                      u32 *hipz_acl);
+
+void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
+
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+                              int *ib_acl);
+
+int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc);
+
+void ehca_mr_deletenew(struct ehca_mr *mr);
+
+#endif  /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
new file mode 100644 (file)
index 0000000..2c3cdc6
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  PD functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+
+static struct kmem_cache *pd_cache;
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+                           struct ib_ucontext *context, struct ib_udata *udata)
+{
+       struct ehca_pd *pd;
+
+       pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL);
+       if (!pd) {
+               ehca_err(device, "device=%p context=%p out of memory",
+                        device, context);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       memset(pd, 0, sizeof(struct ehca_pd));
+       pd->ownpid = current->tgid;
+
+       /*
+        * Kernel PD: when device = -1, 0
+        * User   PD: when context != -1
+        */
+       if (!context) {
+               /*
+                * Kernel PDs after init reuses always
+                * the one created in ehca_shca_reopen()
+                */
+               struct ehca_shca *shca = container_of(device, struct ehca_shca,
+                                                     ib_device);
+               pd->fw_pd.value = shca->pd->fw_pd.value;
+       } else
+               pd->fw_pd.value = (u64)pd;
+
+       return &pd->ib_pd;
+}
+
+int ehca_dealloc_pd(struct ib_pd *pd)
+{
+       u32 cur_pid = current->tgid;
+       struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           my_pd->ownpid != cur_pid) {
+               ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               return -EINVAL;
+       }
+
+       kmem_cache_free(pd_cache,
+                       container_of(pd, struct ehca_pd, ib_pd));
+
+       return 0;
+}
+
+int ehca_init_pd_cache(void)
+{
+       pd_cache = kmem_cache_create("ehca_cache_pd",
+                                    sizeof(struct ehca_pd), 0,
+                                    SLAB_HWCACHE_ALIGN,
+                                    NULL, NULL);
+       if (!pd_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void ehca_cleanup_pd_cache(void)
+{
+       if (pd_cache)
+               kmem_cache_destroy(pd_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
new file mode 100644 (file)
index 0000000..8707d29
--- /dev/null
@@ -0,0 +1,259 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Hardware request structures
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _EHCA_QES_H_
+#define _EHCA_QES_H_
+
+#include "ehca_tools.h"
+
+/* virtual scatter gather entry to specify remote adresses with length */
+struct ehca_vsgentry {
+       u64 vaddr;
+       u32 lkey;
+       u32 length;
+};
+
+#define GRH_FLAG_MASK        EHCA_BMASK_IBM(7,7)
+#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM(0,3)
+#define GRH_TCLASS_MASK      EHCA_BMASK_IBM(4,12)
+#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13,31)
+#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32,47)
+#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48,55)
+#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56,63)
+
+/*
+ * Unreliable Datagram Address Vector Format
+ * see IBTA Vol1 chapter 8.3 Global Routing Header
+ */
+struct ehca_ud_av {
+       u8 sl;
+       u8 lnh;
+       u16 dlid;
+       u8 reserved1;
+       u8 reserved2;
+       u8 reserved3;
+       u8 slid_path_bits;
+       u8 reserved4;
+       u8 ipd;
+       u8 reserved5;
+       u8 pmtu;
+       u32 reserved6;
+       u64 reserved7;
+       union {
+               struct {
+                       u64 word_0; /* always set to 6  */
+                       /*should be 0x1B for IB transport */
+                       u64 word_1;
+                       u64 word_2;
+                       u64 word_3;
+                       u64 word_4;
+               } grh;
+               struct {
+                       u32 wd_0;
+                       u32 wd_1;
+                       /* DWord_1 --> SGID */
+
+                       u32 sgid_wd3;
+                       u32 sgid_wd2;
+
+                       u32 sgid_wd1;
+                       u32 sgid_wd0;
+                       /* DWord_3 --> DGID */
+
+                       u32 dgid_wd3;
+                       u32 dgid_wd2;
+
+                       u32 dgid_wd1;
+                       u32 dgid_wd0;
+               } grh_l;
+       };
+};
+
+/* maximum number of sg entries allowed in a WQE */
+#define MAX_WQE_SG_ENTRIES 252
+
+#define WQE_OPTYPE_SEND             0x80
+#define WQE_OPTYPE_RDMAREAD         0x40
+#define WQE_OPTYPE_RDMAWRITE        0x20
+#define WQE_OPTYPE_CMPSWAP          0x10
+#define WQE_OPTYPE_FETCHADD         0x08
+#define WQE_OPTYPE_BIND             0x04
+
+#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
+#define WQE_WRFLAG_FENCE            0x40
+#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
+#define WQE_WRFLAG_SOLIC_EVENT      0x10
+
+#define WQEF_CACHE_HINT             0x80
+#define WQEF_CACHE_HINT_RD_WR       0x40
+#define WQEF_TIMED_WQE              0x20
+#define WQEF_PURGE                  0x08
+#define WQEF_HIGH_NIBBLE            0xF0
+
+#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
+#define MW_BIND_ACCESSCTRL_R_READ    0x20
+#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
+
+struct ehca_wqe {
+       u64 work_request_id;
+       u8 optype;
+       u8 wr_flag;
+       u16 pkeyi;
+       u8 wqef;
+       u8 nr_of_data_seg;
+       u16 wqe_provided_slid;
+       u32 destination_qp_number;
+       u32 resync_psn_sqp;
+       u32 local_ee_context_qkey;
+       u32 immediate_data;
+       union {
+               struct {
+                       u64 remote_virtual_adress;
+                       u32 rkey;
+                       u32 reserved;
+                       u64 atomic_1st_op_dma_len;
+                       u64 atomic_2nd_op;
+                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+
+               } nud;
+               struct {
+                       u64 ehca_ud_av_ptr;
+                       u64 reserved1;
+                       u64 reserved2;
+                       u64 reserved3;
+                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+               } ud_avp;
+               struct {
+                       struct ehca_ud_av ud_av;
+                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
+                                                    2];
+               } ud_av;
+               struct {
+                       u64 reserved0;
+                       u64 reserved1;
+                       u64 reserved2;
+                       u64 reserved3;
+                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+               } all_rcv;
+
+               struct {
+                       u64 reserved;
+                       u32 rkey;
+                       u32 old_rkey;
+                       u64 reserved1;
+                       u64 reserved2;
+                       u64 virtual_address;
+                       u32 reserved3;
+                       u32 length;
+                       u32 reserved4;
+                       u16 reserved5;
+                       u8 reserved6;
+                       u8 lr_ctl;
+                       u32 lkey;
+                       u32 reserved7;
+                       u64 reserved8;
+                       u64 reserved9;
+                       u64 reserved10;
+                       u64 reserved11;
+               } bind;
+               struct {
+                       u64 reserved12;
+                       u64 reserved13;
+                       u32 size;
+                       u32 start;
+               } inline_data;
+       } u;
+
+};
+
+#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0)
+#define WC_IMM_DATA     EHCA_BMASK_IBM(1,1)
+#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2,2)
+#define WC_SE_BIT       EHCA_BMASK_IBM(3,3)
+#define WC_STATUS_ERROR_BIT 0x80000000
+#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
+#define WC_STATUS_PURGE_BIT 0x10
+
+struct ehca_cqe {
+       u64 work_request_id;
+       u8 optype;
+       u8 w_completion_flags;
+       u16 reserved1;
+       u32 nr_bytes_transferred;
+       u32 immediate_data;
+       u32 local_qp_number;
+       u8 freed_resource_count;
+       u8 service_level;
+       u16 wqe_count;
+       u32 qp_token;
+       u32 qkey_ee_token;
+       u32 remote_qp_number;
+       u16 dlid;
+       u16 rlid;
+       u16 reserved2;
+       u16 pkey_index;
+       u32 cqe_timestamp;
+       u32 wqe_timestamp;
+       u8 wqe_timestamp_valid;
+       u8 reserved3;
+       u8 reserved4;
+       u8 cqe_flags;
+       u32 status;
+};
+
+struct ehca_eqe {
+       u64 entry;
+};
+
+struct ehca_mrte {
+       u64 starting_va;
+       u64 length; /* length of memory region in bytes*/
+       u32 pd;
+       u8 key_instance;
+       u8 pagesize;
+       u8 mr_control;
+       u8 local_remote_access_ctrl;
+       u8 reserved[0x20 - 0x18];
+       u64 at_pointer[4];
+};
+#endif /*_EHCA_QES_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
new file mode 100644 (file)
index 0000000..4394123
--- /dev/null
@@ -0,0 +1,1507 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  QP functions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static struct kmem_cache *qp_cache;
+
+/*
+ * attributes not supported by query qp
+ */
+#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
+                                    IB_QP_MAX_QP_RD_ATOMIC   | \
+                                    IB_QP_ACCESS_FLAGS       | \
+                                    IB_QP_EN_SQD_ASYNC_NOTIFY)
+
+/*
+ * ehca (internal) qp state values
+ */
+enum ehca_qp_state {
+       EHCA_QPS_RESET = 1,
+       EHCA_QPS_INIT = 2,
+       EHCA_QPS_RTR = 3,
+       EHCA_QPS_RTS = 5,
+       EHCA_QPS_SQD = 6,
+       EHCA_QPS_SQE = 8,
+       EHCA_QPS_ERR = 128
+};
+
+/*
+ * qp state transitions as defined by IB Arch Rel 1.1 page 431
+ */
+enum ib_qp_statetrans {
+       IB_QPST_ANY2RESET,
+       IB_QPST_ANY2ERR,
+       IB_QPST_RESET2INIT,
+       IB_QPST_INIT2RTR,
+       IB_QPST_INIT2INIT,
+       IB_QPST_RTR2RTS,
+       IB_QPST_RTS2SQD,
+       IB_QPST_RTS2RTS,
+       IB_QPST_SQD2RTS,
+       IB_QPST_SQE2RTS,
+       IB_QPST_SQD2SQD,
+       IB_QPST_MAX     /* nr of transitions, this must be last!!! */
+};
+
+/*
+ * ib2ehca_qp_state maps IB to ehca qp_state
+ * returns ehca qp state corresponding to given ib qp state
+ */
+static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
+{
+       switch (ib_qp_state) {
+       case IB_QPS_RESET:
+               return EHCA_QPS_RESET;
+       case IB_QPS_INIT:
+               return EHCA_QPS_INIT;
+       case IB_QPS_RTR:
+               return EHCA_QPS_RTR;
+       case IB_QPS_RTS:
+               return EHCA_QPS_RTS;
+       case IB_QPS_SQD:
+               return EHCA_QPS_SQD;
+       case IB_QPS_SQE:
+               return EHCA_QPS_SQE;
+       case IB_QPS_ERR:
+               return EHCA_QPS_ERR;
+       default:
+               ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
+               return -EINVAL;
+       }
+}
+
+/*
+ * ehca2ib_qp_state maps ehca to IB qp_state
+ * returns ib qp state corresponding to given ehca qp state
+ */
+static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
+                                               ehca_qp_state)
+{
+       switch (ehca_qp_state) {
+       case EHCA_QPS_RESET:
+               return IB_QPS_RESET;
+       case EHCA_QPS_INIT:
+               return IB_QPS_INIT;
+       case EHCA_QPS_RTR:
+               return IB_QPS_RTR;
+       case EHCA_QPS_RTS:
+               return IB_QPS_RTS;
+       case EHCA_QPS_SQD:
+               return IB_QPS_SQD;
+       case EHCA_QPS_SQE:
+               return IB_QPS_SQE;
+       case EHCA_QPS_ERR:
+               return IB_QPS_ERR;
+       default:
+               ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
+               return -EINVAL;
+       }
+}
+
+/*
+ * ehca_qp_type used as index for req_attr and opt_attr of
+ * struct ehca_modqp_statetrans
+ */
+enum ehca_qp_type {
+       QPT_RC = 0,
+       QPT_UC = 1,
+       QPT_UD = 2,
+       QPT_SQP = 3,
+       QPT_MAX
+};
+
+/*
+ * ib2ehcaqptype maps Ib to ehca qp_type
+ * returns ehca qp type corresponding to ib qp type
+ */
+static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
+{
+       switch (ibqptype) {
+       case IB_QPT_SMI:
+       case IB_QPT_GSI:
+               return QPT_SQP;
+       case IB_QPT_RC:
+               return QPT_RC;
+       case IB_QPT_UC:
+               return QPT_UC;
+       case IB_QPT_UD:
+               return QPT_UD;
+       default:
+               ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+               return -EINVAL;
+       }
+}
+
+static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
+                                                        int ib_tostate)
+{
+       int index = -EINVAL;
+       switch (ib_tostate) {
+       case IB_QPS_RESET:
+               index = IB_QPST_ANY2RESET;
+               break;
+       case IB_QPS_INIT:
+               switch (ib_fromstate) {
+               case IB_QPS_RESET:
+                       index = IB_QPST_RESET2INIT;
+                       break;
+               case IB_QPS_INIT:
+                       index = IB_QPST_INIT2INIT;
+                       break;
+               }
+               break;
+       case IB_QPS_RTR:
+               if (ib_fromstate == IB_QPS_INIT)
+                       index = IB_QPST_INIT2RTR;
+               break;
+       case IB_QPS_RTS:
+               switch (ib_fromstate) {
+               case IB_QPS_RTR:
+                       index = IB_QPST_RTR2RTS;
+                       break;
+               case IB_QPS_RTS:
+                       index = IB_QPST_RTS2RTS;
+                       break;
+               case IB_QPS_SQD:
+                       index = IB_QPST_SQD2RTS;
+                       break;
+               case IB_QPS_SQE:
+                       index = IB_QPST_SQE2RTS;
+                       break;
+               }
+               break;
+       case IB_QPS_SQD:
+               if (ib_fromstate == IB_QPS_RTS)
+                       index = IB_QPST_RTS2SQD;
+               break;
+       case IB_QPS_SQE:
+               break;
+       case IB_QPS_ERR:
+               index = IB_QPST_ANY2ERR;
+               break;
+       default:
+               break;
+       }
+       return index;
+}
+
+enum ehca_service_type {
+       ST_RC = 0,
+       ST_UC = 1,
+       ST_RD = 2,
+       ST_UD = 3
+};
+
+/*
+ * ibqptype2servicetype returns hcp service type corresponding to given
+ * ib qp type used by create_qp()
+ */
+static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
+{
+       switch (ibqptype) {
+       case IB_QPT_SMI:
+       case IB_QPT_GSI:
+               return ST_UD;
+       case IB_QPT_RC:
+               return ST_RC;
+       case IB_QPT_UC:
+               return ST_UC;
+       case IB_QPT_UD:
+               return ST_UD;
+       case IB_QPT_RAW_IPV6:
+               return -EINVAL;
+       case IB_QPT_RAW_ETY:
+               return -EINVAL;
+       default:
+               ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+               return -EINVAL;
+       }
+}
+
+/*
+ * init_qp_queues initializes/constructs r/squeue and registers queue pages.
+ */
+static inline int init_qp_queues(struct ehca_shca *shca,
+                                struct ehca_qp *my_qp,
+                                int nr_sq_pages,
+                                int nr_rq_pages,
+                                int swqe_size,
+                                int rwqe_size,
+                                int nr_send_sges, int nr_receive_sges)
+{
+       int ret, cnt, ipz_rc;
+       void *vpage;
+       u64 rpage, h_ret;
+       struct ib_device *ib_dev = &shca->ib_device;
+       struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
+
+       ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue,
+                               nr_sq_pages,
+                               EHCA_PAGESIZE, swqe_size, nr_send_sges);
+       if (!ipz_rc) {
+               ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x",
+                        ipz_rc);
+               return -EBUSY;
+       }
+
+       ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue,
+                               nr_rq_pages,
+                               EHCA_PAGESIZE, rwqe_size, nr_receive_sges);
+       if (!ipz_rc) {
+               ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
+                        ipz_rc);
+               ret = -EBUSY;
+               goto init_qp_queues0;
+       }
+       /* register SQ pages */
+       for (cnt = 0; cnt < nr_sq_pages; cnt++) {
+               vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
+               if (!vpage) {
+                       ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() "
+                                "failed p_vpage= %p", vpage);
+                       ret = -EINVAL;
+                       goto init_qp_queues1;
+               }
+               rpage = virt_to_abs(vpage);
+
+               h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
+                                                my_qp->ipz_qp_handle,
+                                                &my_qp->pf, 0, 0,
+                                                rpage, 1,
+                                                my_qp->galpas.kernel);
+               if (h_ret < H_SUCCESS) {
+                       ehca_err(ib_dev, "SQ hipz_qp_register_rpage()"
+                                " failed rc=%lx", h_ret);
+                       ret = ehca2ib_return_code(h_ret);
+                       goto init_qp_queues1;
+               }
+       }
+
+       ipz_qeit_reset(&my_qp->ipz_squeue);
+
+       /* register RQ pages */
+       for (cnt = 0; cnt < nr_rq_pages; cnt++) {
+               vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
+               if (!vpage) {
+                       ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
+                                "failed p_vpage = %p", vpage);
+                       ret = -EINVAL;
+                       goto init_qp_queues1;
+               }
+
+               rpage = virt_to_abs(vpage);
+
+               h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
+                                                my_qp->ipz_qp_handle,
+                                                &my_qp->pf, 0, 1,
+                                                rpage, 1,my_qp->galpas.kernel);
+               if (h_ret < H_SUCCESS) {
+                       ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
+                                "rc=%lx", h_ret);
+                       ret = ehca2ib_return_code(h_ret);
+                       goto init_qp_queues1;
+               }
+               if (cnt == (nr_rq_pages - 1)) { /* last page! */
+                       if (h_ret != H_SUCCESS) {
+                               ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+                                        "h_ret= %lx ", h_ret);
+                               ret = ehca2ib_return_code(h_ret);
+                               goto init_qp_queues1;
+                       }
+                       vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
+                       if (vpage) {
+                               ehca_err(ib_dev, "ipz_qpageit_get_inc() "
+                                        "should not succeed vpage=%p", vpage);
+                               ret = -EINVAL;
+                               goto init_qp_queues1;
+                       }
+               } else {
+                       if (h_ret != H_PAGE_REGISTERED) {
+                               ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+                                        "h_ret= %lx ", h_ret);
+                               ret = ehca2ib_return_code(h_ret);
+                               goto init_qp_queues1;
+                       }
+               }
+       }
+
+       ipz_qeit_reset(&my_qp->ipz_rqueue);
+
+       return 0;
+
+init_qp_queues1:
+       ipz_queue_dtor(&my_qp->ipz_rqueue);
+init_qp_queues0:
+       ipz_queue_dtor(&my_qp->ipz_squeue);
+       return ret;
+}
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+                            struct ib_qp_init_attr *init_attr,
+                            struct ib_udata *udata)
+{
+       static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
+       static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
+       struct ehca_qp *my_qp;
+       struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+                                             ib_device);
+       struct ib_ucontext *context = NULL;
+       u64 h_ret;
+       int max_send_sge, max_recv_sge, ret;
+
+       /* h_call's out parameters */
+       struct ehca_alloc_qp_parms parms;
+       u32 swqe_size = 0, rwqe_size = 0;
+       u8 daqp_completion, isdaqp;
+       unsigned long flags;
+
+       if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
+               init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
+               ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
+                        init_attr->sq_sig_type);
+               return ERR_PTR(-EINVAL);
+       }
+
+       /* save daqp completion bits */
+       daqp_completion = init_attr->qp_type & 0x60;
+       /* save daqp bit */
+       isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0;
+       init_attr->qp_type = init_attr->qp_type & 0x1F;
+
+       if (init_attr->qp_type != IB_QPT_UD &&
+           init_attr->qp_type != IB_QPT_SMI &&
+           init_attr->qp_type != IB_QPT_GSI &&
+           init_attr->qp_type != IB_QPT_UC &&
+           init_attr->qp_type != IB_QPT_RC) {
+               ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type);
+               return ERR_PTR(-EINVAL);
+       }
+       if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD)
+           && isdaqp) {
+               ehca_err(pd->device, "unsupported LL QP Type=%x",
+                        init_attr->qp_type);
+               return ERR_PTR(-EINVAL);
+       } else if (init_attr->qp_type == IB_QPT_RC && isdaqp &&
+                  (init_attr->cap.max_send_wr > 255 ||
+                   init_attr->cap.max_recv_wr > 255 )) {
+                      ehca_err(pd->device, "Invalid Number of max_sq_wr =%x "
+                               "or max_rq_wr=%x for QP Type=%x",
+                               init_attr->cap.max_send_wr,
+                               init_attr->cap.max_recv_wr,init_attr->qp_type);
+                      return ERR_PTR(-EINVAL);
+       } else if (init_attr->qp_type == IB_QPT_UD && isdaqp &&
+                 init_attr->cap.max_send_wr > 255) {
+               ehca_err(pd->device,
+                        "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x",
+                        init_attr->cap.max_send_wr, init_attr->qp_type);
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (pd->uobject && udata)
+               context = pd->uobject->context;
+
+       my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL);
+       if (!my_qp) {
+               ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       memset(my_qp, 0, sizeof(struct ehca_qp));
+       memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
+       spin_lock_init(&my_qp->spinlock_s);
+       spin_lock_init(&my_qp->spinlock_r);
+
+       my_qp->recv_cq =
+               container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
+       my_qp->send_cq =
+               container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
+
+       my_qp->init_attr = *init_attr;
+
+       do {
+               if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
+                       ret = -ENOMEM;
+                       ehca_err(pd->device, "Can't reserve idr resources.");
+                       goto create_qp_exit0;
+               }
+
+               spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+               ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
+               spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+       } while (ret == -EAGAIN);
+
+       if (ret) {
+               ret = -ENOMEM;
+               ehca_err(pd->device, "Can't allocate new idr entry.");
+               goto create_qp_exit0;
+       }
+
+       parms.servicetype = ibqptype2servicetype(init_attr->qp_type);
+       if (parms.servicetype < 0) {
+               ret = -EINVAL;
+               ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type);
+               goto create_qp_exit0;
+       }
+
+       if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+               parms.sigtype = HCALL_SIGT_EVERY;
+       else
+               parms.sigtype = HCALL_SIGT_BY_WQE;
+
+       /* UD_AV CIRCUMVENTION */
+       max_send_sge = init_attr->cap.max_send_sge;
+       max_recv_sge = init_attr->cap.max_recv_sge;
+       if (IB_QPT_UD == init_attr->qp_type ||
+           IB_QPT_GSI == init_attr->qp_type ||
+           IB_QPT_SMI == init_attr->qp_type) {
+               max_send_sge += 2;
+               max_recv_sge += 2;
+       }
+
+       parms.ipz_eq_handle = shca->eq.ipz_eq_handle;
+       parms.daqp_ctrl = isdaqp | daqp_completion;
+       parms.pd = my_pd->fw_pd;
+       parms.max_recv_sge = max_recv_sge;
+       parms.max_send_sge = max_send_sge;
+
+       h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms);
+
+       if (h_ret != H_SUCCESS) {
+               ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
+                        h_ret);
+               ret = ehca2ib_return_code(h_ret);
+               goto create_qp_exit1;
+       }
+
+       switch (init_attr->qp_type) {
+       case IB_QPT_RC:
+               if (isdaqp == 0) {
+                       swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
+                                            (parms.act_nr_send_sges)]);
+                       rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
+                                            (parms.act_nr_recv_sges)]);
+               } else { /* for daqp we need to use msg size, not wqe size */
+                       swqe_size = da_rc_msg_size[max_send_sge];
+                       rwqe_size = da_rc_msg_size[max_recv_sge];
+                       parms.act_nr_send_sges = 1;
+                       parms.act_nr_recv_sges = 1;
+               }
+               break;
+       case IB_QPT_UC:
+               swqe_size = offsetof(struct ehca_wqe,
+                                    u.nud.sg_list[parms.act_nr_send_sges]);
+               rwqe_size = offsetof(struct ehca_wqe,
+                                    u.nud.sg_list[parms.act_nr_recv_sges]);
+               break;
+
+       case IB_QPT_UD:
+       case IB_QPT_GSI:
+       case IB_QPT_SMI:
+               /* UD circumvention */
+               parms.act_nr_recv_sges -= 2;
+               parms.act_nr_send_sges -= 2;
+               if (isdaqp) {
+                       swqe_size = da_ud_sq_msg_size[max_send_sge];
+                       rwqe_size = da_rc_msg_size[max_recv_sge];
+                       parms.act_nr_send_sges = 1;
+                       parms.act_nr_recv_sges = 1;
+               } else {
+                       swqe_size = offsetof(struct ehca_wqe,
+                                            u.ud_av.sg_list[parms.act_nr_send_sges]);
+                       rwqe_size = offsetof(struct ehca_wqe,
+                                            u.ud_av.sg_list[parms.act_nr_recv_sges]);
+               }
+
+               if (IB_QPT_GSI == init_attr->qp_type ||
+                   IB_QPT_SMI == init_attr->qp_type) {
+                       parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
+                       parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
+                       parms.act_nr_send_sges = init_attr->cap.max_send_sge;
+                       parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
+                       my_qp->real_qp_num =
+                               (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
+               }
+
+               break;
+
+       default:
+               break;
+       }
+
+       /* initializes r/squeue and registers queue pages */
+       ret = init_qp_queues(shca, my_qp,
+                            parms.nr_sq_pages, parms.nr_rq_pages,
+                            swqe_size, rwqe_size,
+                            parms.act_nr_send_sges, parms.act_nr_recv_sges);
+       if (ret) {
+               ehca_err(pd->device,
+                        "Couldn't initialize r/squeue and pages ret=%x", ret);
+               goto create_qp_exit2;
+       }
+
+       my_qp->ib_qp.pd = &my_pd->ib_pd;
+       my_qp->ib_qp.device = my_pd->ib_pd.device;
+
+       my_qp->ib_qp.recv_cq = init_attr->recv_cq;
+       my_qp->ib_qp.send_cq = init_attr->send_cq;
+
+       my_qp->ib_qp.qp_num = my_qp->real_qp_num;
+       my_qp->ib_qp.qp_type = init_attr->qp_type;
+
+       my_qp->qp_type = init_attr->qp_type;
+       my_qp->ib_qp.srq = init_attr->srq;
+
+       my_qp->ib_qp.qp_context = init_attr->qp_context;
+       my_qp->ib_qp.event_handler = init_attr->event_handler;
+
+       init_attr->cap.max_inline_data = 0; /* not supported yet */
+       init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
+       init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
+       init_attr->cap.max_send_sge = parms.act_nr_send_sges;
+       init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
+
+       /* NOTE: define_apq0() not supported yet */
+       if (init_attr->qp_type == IB_QPT_GSI) {
+               h_ret = ehca_define_sqp(shca, my_qp, init_attr);
+               if (h_ret != H_SUCCESS) {
+                       ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
+                                h_ret);
+                       ret = ehca2ib_return_code(h_ret);
+                       goto create_qp_exit3;
+               }
+       }
+       if (init_attr->send_cq) {
+               struct ehca_cq *cq = container_of(init_attr->send_cq,
+                                                 struct ehca_cq, ib_cq);
+               ret = ehca_cq_assign_qp(cq, my_qp);
+               if (ret) {
+                       ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
+                                ret);
+                       goto create_qp_exit3;
+               }
+               my_qp->send_cq = cq;
+       }
+       /* copy queues, galpa data to user space */
+       if (context && udata) {
+               struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
+               struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
+               struct ehca_create_qp_resp resp;
+               struct vm_area_struct * vma;
+               memset(&resp, 0, sizeof(resp));
+
+               resp.qp_num = my_qp->real_qp_num;
+               resp.token = my_qp->token;
+               resp.qp_type = my_qp->qp_type;
+               resp.qkey = my_qp->qkey;
+               resp.real_qp_num = my_qp->real_qp_num;
+               /* rqueue properties */
+               resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size;
+               resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg;
+               resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
+               resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
+               resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
+               ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000,
+                                      ipz_rqueue->queue_length,
+                                      (void**)&resp.ipz_rqueue.queue,
+                                      &vma);
+               if (ret) {
+                       ehca_err(pd->device, "Could not mmap rqueue pages");
+                       goto create_qp_exit3;
+               }
+               my_qp->uspace_rqueue = resp.ipz_rqueue.queue;
+               /* squeue properties */
+               resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
+               resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
+               resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
+               resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
+               resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
+               ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000,
+                                      ipz_squeue->queue_length,
+                                      (void**)&resp.ipz_squeue.queue,
+                                      &vma);
+               if (ret) {
+                       ehca_err(pd->device, "Could not mmap squeue pages");
+                       goto create_qp_exit4;
+               }
+               my_qp->uspace_squeue = resp.ipz_squeue.queue;
+               /* fw_handle */
+               resp.galpas = my_qp->galpas;
+               ret = ehca_mmap_register(my_qp->galpas.user.fw_handle,
+                                        (void**)&resp.galpas.kernel.fw_handle,
+                                        &vma);
+               if (ret) {
+                       ehca_err(pd->device, "Could not mmap fw_handle");
+                       goto create_qp_exit5;
+               }
+               my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+
+               if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+                       ehca_err(pd->device, "Copy to udata failed");
+                       ret = -EINVAL;
+                       goto create_qp_exit6;
+               }
+       }
+
+       return &my_qp->ib_qp;
+
+create_qp_exit6:
+       ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
+
+create_qp_exit5:
+       ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length);
+
+create_qp_exit4:
+       ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length);
+
+create_qp_exit3:
+       ipz_queue_dtor(&my_qp->ipz_rqueue);
+       ipz_queue_dtor(&my_qp->ipz_squeue);
+
+create_qp_exit2:
+       hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+
+create_qp_exit1:
+       spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+       idr_remove(&ehca_qp_idr, my_qp->token);
+       spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+create_qp_exit0:
+       kmem_cache_free(qp_cache, my_qp);
+       return ERR_PTR(ret);
+}
+
+/*
+ * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
+ * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
+ * returns total number of bad wqes in bad_wqe_cnt
+ */
+static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
+                          int *bad_wqe_cnt)
+{
+       u64 h_ret;
+       struct ipz_queue *squeue;
+       void *bad_send_wqe_p, *bad_send_wqe_v;
+       void *squeue_start_p, *squeue_end_p;
+       void *squeue_start_v, *squeue_end_v;
+       struct ehca_wqe *wqe;
+       int qp_num = my_qp->ib_qp.qp_num;
+
+       /* get send wqe pointer */
+       h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+                                          my_qp->ipz_qp_handle, &my_qp->pf,
+                                          &bad_send_wqe_p, NULL, 2);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
+                        " ehca_qp=%p qp_num=%x h_ret=%lx",
+                        my_qp, qp_num, h_ret);
+               return ehca2ib_return_code(h_ret);
+       }
+       bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63)));
+       ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
+                qp_num, bad_send_wqe_p);
+       /* convert wqe pointer to vadr */
+       bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
+       if (ehca_debug_level)
+               ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
+       squeue = &my_qp->ipz_squeue;
+       squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L));
+       squeue_end_p = squeue_start_p+squeue->queue_length;
+       squeue_start_v = abs_to_virt((u64)squeue_start_p);
+       squeue_end_v = abs_to_virt((u64)squeue_end_p);
+       ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p",
+                qp_num, squeue_start_v, squeue_end_v);
+
+       /* loop sets wqe's purge bit */
+       wqe = (struct ehca_wqe*)bad_send_wqe_v;
+       *bad_wqe_cnt = 0;
+       while (wqe->optype != 0xff && wqe->wqef != 0xff) {
+               if (ehca_debug_level)
+                       ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
+               wqe->nr_of_data_seg = 0; /* suppress data access */
+               wqe->wqef = WQEF_PURGE; /* WQE to be purged */
+               wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size);
+               *bad_wqe_cnt = (*bad_wqe_cnt)+1;
+               if ((void*)wqe >= squeue_end_v) {
+                       wqe = squeue_start_v;
+               }
+       }
+       /*
+        * bad wqe will be reprocessed and ignored when pol_cq() is called,
+        *  i.e. nr of wqes with flush error status is one less
+        */
+       ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
+                qp_num, (*bad_wqe_cnt)-1);
+       wqe->wqef = 0;
+
+       return 0;
+}
+
+/*
+ * internal_modify_qp with circumvention to handle aqp0 properly
+ * smi_reset2init indicates if this is an internal reset-to-init-call for
+ * smi. This flag must always be zero if called from ehca_modify_qp()!
+ * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
+ */
+static int internal_modify_qp(struct ib_qp *ibqp,
+                             struct ib_qp_attr *attr,
+                             int attr_mask, int smi_reset2init)
+{
+       enum ib_qp_state qp_cur_state, qp_new_state;
+       int cnt, qp_attr_idx, ret = 0;
+       enum ib_qp_statetrans statetrans;
+       struct hcp_modify_qp_control_block *mqpcb;
+       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+       struct ehca_shca *shca =
+               container_of(ibqp->pd->device, struct ehca_shca, ib_device);
+       u64 update_mask;
+       u64 h_ret;
+       int bad_wqe_cnt = 0;
+       int squeue_locked = 0;
+       unsigned long spl_flags = 0;
+
+       /* do query_qp to obtain current attr values */
+       mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+       if (mqpcb == NULL) {
+               ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
+                        "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
+               return -ENOMEM;
+       }
+
+       h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
+                               my_qp->ipz_qp_handle,
+                               &my_qp->pf,
+                               mqpcb, my_qp->galpas.kernel);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(ibqp->device, "hipz_h_query_qp() failed "
+                        "ehca_qp=%p qp_num=%x h_ret=%lx",
+                        my_qp, ibqp->qp_num, h_ret);
+               ret = ehca2ib_return_code(h_ret);
+               goto modify_qp_exit1;
+       }
+
+       qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
+
+       if (qp_cur_state == -EINVAL) {  /* invalid qp state */
+               ret = -EINVAL;
+               ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
+                        "ehca_qp=%p qp_num=%x",
+                        mqpcb->qp_state, my_qp, ibqp->qp_num);
+               goto modify_qp_exit1;
+       }
+       /*
+        * circumvention to set aqp0 initial state to init
+        * as expected by IB spec
+        */
+       if (smi_reset2init == 0 &&
+           ibqp->qp_type == IB_QPT_SMI &&
+           qp_cur_state == IB_QPS_RESET &&
+           (attr_mask & IB_QP_STATE) &&
+           attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
+               struct ib_qp_attr smiqp_attr = {
+                       .qp_state = IB_QPS_INIT,
+                       .port_num = my_qp->init_attr.port_num,
+                       .pkey_index = 0,
+                       .qkey = 0
+               };
+               int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
+                       IB_QP_PKEY_INDEX | IB_QP_QKEY;
+               int smirc = internal_modify_qp(
+                       ibqp, &smiqp_attr, smiqp_attr_mask, 1);
+               if (smirc) {
+                       ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
+                                "ehca_modify_qp() rc=%x", smirc);
+                       ret = H_PARAMETER;
+                       goto modify_qp_exit1;
+               }
+               qp_cur_state = IB_QPS_INIT;
+               ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
+       }
+       /* is transmitted current state  equal to "real" current state */
+       if ((attr_mask & IB_QP_CUR_STATE) &&
+           qp_cur_state != attr->cur_qp_state) {
+               ret = -EINVAL;
+               ehca_err(ibqp->device,
+                        "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
+                        " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
+                        attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
+               goto modify_qp_exit1;
+       }
+
+       ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x "
+                "new qp_state=%x attribute_mask=%x",
+                my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
+
+       qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
+       if (!smi_reset2init &&
+           !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
+                               attr_mask)) {
+               ret = -EINVAL;
+               ehca_err(ibqp->device,
+                        "Invalid qp transition new_state=%x cur_state=%x "
+                        "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
+                        qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
+               goto modify_qp_exit1;
+       }
+
+       if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state)))
+               update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+       else {
+               ret = -EINVAL;
+               ehca_err(ibqp->device, "Invalid new qp state=%x "
+                        "ehca_qp=%p qp_num=%x",
+                        qp_new_state, my_qp, ibqp->qp_num);
+               goto modify_qp_exit1;
+       }
+
+       /* retrieve state transition struct to get req and opt attrs */
+       statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
+       if (statetrans < 0) {
+               ret = -EINVAL;
+               ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
+                        "new_qp_state=%x State_xsition=%x ehca_qp=%p "
+                        "qp_num=%x", qp_cur_state, qp_new_state,
+                        statetrans, my_qp, ibqp->qp_num);
+               goto modify_qp_exit1;
+       }
+
+       qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
+
+       if (qp_attr_idx < 0) {
+               ret = qp_attr_idx;
+               ehca_err(ibqp->device,
+                        "Invalid QP type=%x ehca_qp=%p qp_num=%x",
+                        ibqp->qp_type, my_qp, ibqp->qp_num);
+               goto modify_qp_exit1;
+       }
+
+       ehca_dbg(ibqp->device,
+                "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
+                my_qp, ibqp->qp_num, statetrans);
+
+       /* sqe -> rts: set purge bit of bad wqe before actual trans */
+       if ((my_qp->qp_type == IB_QPT_UD ||
+            my_qp->qp_type == IB_QPT_GSI ||
+            my_qp->qp_type == IB_QPT_SMI) &&
+           statetrans == IB_QPST_SQE2RTS) {
+               /* mark next free wqe if kernel */
+               if (my_qp->uspace_squeue == 0) {
+                       struct ehca_wqe *wqe;
+                       /* lock send queue */
+                       spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+                       squeue_locked = 1;
+                       /* mark next free wqe */
+                       wqe = (struct ehca_wqe*)
+                               ipz_qeit_get(&my_qp->ipz_squeue);
+                       wqe->optype = wqe->wqef = 0xff;
+                       ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
+                                ibqp->qp_num, wqe);
+               }
+               ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
+               if (ret) {
+                       ehca_err(ibqp->device, "prepare_sqe_rts() failed "
+                                "ehca_qp=%p qp_num=%x ret=%x",
+                                my_qp, ibqp->qp_num, ret);
+                       goto modify_qp_exit2;
+               }
+       }
+
+       /*
+        * enable RDMA_Atomic_Control if reset->init und reliable con
+        * this is necessary since gen2 does not provide that flag,
+        * but pHyp requires it
+        */
+       if (statetrans == IB_QPST_RESET2INIT &&
+           (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
+               mqpcb->rdma_atomic_ctrl = 3;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
+       }
+       /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
+       if (statetrans == IB_QPST_INIT2RTR &&
+           (ibqp->qp_type == IB_QPT_UC) &&
+           !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
+               mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
+               update_mask |=
+                       EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+       }
+
+       if (attr_mask & IB_QP_PKEY_INDEX) {
+               mqpcb->prim_p_key_idx = attr->pkey_index;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
+       }
+       if (attr_mask & IB_QP_PORT) {
+               if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
+                       ret = -EINVAL;
+                       ehca_err(ibqp->device, "Invalid port=%x. "
+                                "ehca_qp=%p qp_num=%x num_ports=%x",
+                                attr->port_num, my_qp, ibqp->qp_num,
+                                shca->num_ports);
+                       goto modify_qp_exit2;
+               }
+               mqpcb->prim_phys_port = attr->port_num;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
+       }
+       if (attr_mask & IB_QP_QKEY) {
+               mqpcb->qkey = attr->qkey;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
+       }
+       if (attr_mask & IB_QP_AV) {
+               int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate);
+               int ehca_mult = ib_rate_to_mult(shca->sport[my_qp->
+                                               init_attr.port_num].rate);
+
+               mqpcb->dlid = attr->ah_attr.dlid;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
+               mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
+               mqpcb->service_level = attr->ah_attr.sl;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
+
+               if (ah_mult < ehca_mult)
+                       mqpcb->max_static_rate = (ah_mult > 0) ?
+                       ((ehca_mult - 1) / ah_mult) : 0;
+               else
+                       mqpcb->max_static_rate = 0;
+
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
+
+               /*
+                * only if GRH is TRUE we might consider SOURCE_GID_IDX
+                * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+                */
+               if (attr->ah_attr.ah_flags == IB_AH_GRH) {
+                       mqpcb->send_grh_flag = 1 << 31;
+                       update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+                       mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
+                       update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
+
+                       for (cnt = 0; cnt < 16; cnt++)
+                               mqpcb->dest_gid.byte[cnt] =
+                                       attr->ah_attr.grh.dgid.raw[cnt];
+
+                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
+                       mqpcb->flow_label = attr->ah_attr.grh.flow_label;
+                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
+                       mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
+                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
+                       mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
+                       update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
+               }
+       }
+
+       if (attr_mask & IB_QP_PATH_MTU) {
+               mqpcb->path_mtu = attr->path_mtu;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
+       }
+       if (attr_mask & IB_QP_TIMEOUT) {
+               mqpcb->timeout = attr->timeout;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
+       }
+       if (attr_mask & IB_QP_RETRY_CNT) {
+               mqpcb->retry_count = attr->retry_cnt;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
+       }
+       if (attr_mask & IB_QP_RNR_RETRY) {
+               mqpcb->rnr_retry_count = attr->rnr_retry;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
+       }
+       if (attr_mask & IB_QP_RQ_PSN) {
+               mqpcb->receive_psn = attr->rq_psn;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
+       }
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+               mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
+                       attr->max_dest_rd_atomic : 2;
+               update_mask |=
+                       EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+       }
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+               mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
+                       attr->max_rd_atomic : 2;
+               update_mask |=
+                       EHCA_BMASK_SET
+                       (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
+       }
+       if (attr_mask & IB_QP_ALT_PATH) {
+               int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate);
+               int ehca_mult = ib_rate_to_mult(
+                       shca->sport[my_qp->init_attr.port_num].rate);
+
+               mqpcb->dlid_al = attr->alt_ah_attr.dlid;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1);
+               mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
+               update_mask |=
+                       EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1);
+               mqpcb->service_level_al = attr->alt_ah_attr.sl;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1);
+
+               if (ah_mult < ehca_mult)
+                       mqpcb->max_static_rate = (ah_mult > 0) ?
+                       ((ehca_mult - 1) / ah_mult) : 0;
+               else
+                       mqpcb->max_static_rate_al = 0;
+
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1);
+
+               /*
+                * only if GRH is TRUE we might consider SOURCE_GID_IDX
+                * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+                */
+               if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
+                       mqpcb->send_grh_flag_al = 1 << 31;
+                       update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
+                       mqpcb->source_gid_idx_al =
+                               attr->alt_ah_attr.grh.sgid_index;
+                       update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1);
+
+                       for (cnt = 0; cnt < 16; cnt++)
+                               mqpcb->dest_gid_al.byte[cnt] =
+                                       attr->alt_ah_attr.grh.dgid.raw[cnt];
+
+                       update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1);
+                       mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
+                       update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1);
+                       mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
+                       update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1);
+                       mqpcb->traffic_class_al =
+                               attr->alt_ah_attr.grh.traffic_class;
+                       update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
+               }
+       }
+
+       if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+               mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
+               update_mask |=
+                       EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
+       }
+
+       if (attr_mask & IB_QP_SQ_PSN) {
+               mqpcb->send_psn = attr->sq_psn;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
+       }
+
+       if (attr_mask & IB_QP_DEST_QPN) {
+               mqpcb->dest_qp_nr = attr->dest_qp_num;
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
+       }
+
+       if (attr_mask & IB_QP_PATH_MIG_STATE) {
+               mqpcb->path_migration_state = attr->path_mig_state;
+               update_mask |=
+                       EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
+       }
+
+       if (attr_mask & IB_QP_CAP) {
+               mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
+               update_mask |=
+                       EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
+               mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
+               update_mask |=
+                       EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
+               /* no support for max_send/recv_sge yet */
+       }
+
+       if (ehca_debug_level)
+               ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
+
+       h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+                                my_qp->ipz_qp_handle,
+                                &my_qp->pf,
+                                update_mask,
+                                mqpcb, my_qp->galpas.kernel);
+
+       if (h_ret != H_SUCCESS) {
+               ret = ehca2ib_return_code(h_ret);
+               ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
+                        "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num);
+               goto modify_qp_exit2;
+       }
+
+       if ((my_qp->qp_type == IB_QPT_UD ||
+            my_qp->qp_type == IB_QPT_GSI ||
+            my_qp->qp_type == IB_QPT_SMI) &&
+           statetrans == IB_QPST_SQE2RTS) {
+               /* doorbell to reprocessing wqes */
+               iosync(); /* serialize GAL register access */
+               hipz_update_sqa(my_qp, bad_wqe_cnt-1);
+               ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
+       }
+
+       if (statetrans == IB_QPST_RESET2INIT ||
+           statetrans == IB_QPST_INIT2INIT) {
+               mqpcb->qp_enable = 1;
+               mqpcb->qp_state = EHCA_QPS_INIT;
+               update_mask = 0;
+               update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+
+               h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+                                        my_qp->ipz_qp_handle,
+                                        &my_qp->pf,
+                                        update_mask,
+                                        mqpcb,
+                                        my_qp->galpas.kernel);
+
+               if (h_ret != H_SUCCESS) {
+                       ret = ehca2ib_return_code(h_ret);
+                       ehca_err(ibqp->device, "ENABLE in context of "
+                                "RESET_2_INIT failed! Maybe you didn't get "
+                                "a LID h_ret=%lx ehca_qp=%p qp_num=%x",
+                                h_ret, my_qp, ibqp->qp_num);
+                       goto modify_qp_exit2;
+               }
+       }
+
+       if (statetrans == IB_QPST_ANY2RESET) {
+               ipz_qeit_reset(&my_qp->ipz_rqueue);
+               ipz_qeit_reset(&my_qp->ipz_squeue);
+       }
+
+       if (attr_mask & IB_QP_QKEY)
+               my_qp->qkey = attr->qkey;
+
+modify_qp_exit2:
+       if (squeue_locked) { /* this means: sqe -> rts */
+               spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+               my_qp->sqerr_purgeflag = 1;
+       }
+
+modify_qp_exit1:
+       kfree(mqpcb);
+
+       return ret;
+}
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+                  struct ib_udata *udata)
+{
+       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+       struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+                                            ib_pd);
+       u32 cur_pid = current->tgid;
+
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           my_pd->ownpid != cur_pid) {
+               ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               return -EINVAL;
+       }
+
+       return internal_modify_qp(ibqp, attr, attr_mask, 0);
+}
+
+int ehca_query_qp(struct ib_qp *qp,
+                 struct ib_qp_attr *qp_attr,
+                 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+       struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+                                            ib_pd);
+       struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
+                                             ib_device);
+       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+       struct hcp_modify_qp_control_block *qpcb;
+       u32 cur_pid = current->tgid;
+       int cnt, ret = 0;
+       u64 h_ret;
+
+       if (my_pd->ib_pd.uobject  && my_pd->ib_pd.uobject->context  &&
+           my_pd->ownpid != cur_pid) {
+               ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               return -EINVAL;
+       }
+
+       if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
+               ehca_err(qp->device,"Invalid attribute mask "
+                        "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
+                        my_qp, qp->qp_num, qp_attr_mask);
+               return -EINVAL;
+       }
+
+       qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL );
+       if (!qpcb) {
+               ehca_err(qp->device,"Out of memory for qpcb "
+                        "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
+               return -ENOMEM;
+       }
+
+       h_ret = hipz_h_query_qp(adapter_handle,
+                               my_qp->ipz_qp_handle,
+                               &my_qp->pf,
+                               qpcb, my_qp->galpas.kernel);
+
+       if (h_ret != H_SUCCESS) {
+               ret = ehca2ib_return_code(h_ret);
+               ehca_err(qp->device,"hipz_h_query_qp() failed "
+                        "ehca_qp=%p qp_num=%x h_ret=%lx",
+                        my_qp, qp->qp_num, h_ret);
+               goto query_qp_exit1;
+       }
+
+       qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
+       qp_attr->qp_state = qp_attr->cur_qp_state;
+
+       if (qp_attr->cur_qp_state == -EINVAL) {
+               ret = -EINVAL;
+               ehca_err(qp->device,"Got invalid ehca_qp_state=%x "
+                        "ehca_qp=%p qp_num=%x",
+                        qpcb->qp_state, my_qp, qp->qp_num);
+               goto query_qp_exit1;
+       }
+
+       if (qp_attr->qp_state == IB_QPS_SQD)
+               qp_attr->sq_draining = 1;
+
+       qp_attr->qkey = qpcb->qkey;
+       qp_attr->path_mtu = qpcb->path_mtu;
+       qp_attr->path_mig_state = qpcb->path_migration_state;
+       qp_attr->rq_psn = qpcb->receive_psn;
+       qp_attr->sq_psn = qpcb->send_psn;
+       qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
+       qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
+       qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
+       /* UD_AV CIRCUMVENTION */
+       if (my_qp->qp_type == IB_QPT_UD) {
+               qp_attr->cap.max_send_sge =
+                       qpcb->actual_nr_sges_in_sq_wqe - 2;
+               qp_attr->cap.max_recv_sge =
+                       qpcb->actual_nr_sges_in_rq_wqe - 2;
+       } else {
+               qp_attr->cap.max_send_sge =
+                       qpcb->actual_nr_sges_in_sq_wqe;
+               qp_attr->cap.max_recv_sge =
+                       qpcb->actual_nr_sges_in_rq_wqe;
+       }
+
+       qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
+       qp_attr->dest_qp_num = qpcb->dest_qp_nr;
+
+       qp_attr->pkey_index =
+               EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx);
+
+       qp_attr->port_num =
+               EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port);
+
+       qp_attr->timeout = qpcb->timeout;
+       qp_attr->retry_cnt = qpcb->retry_count;
+       qp_attr->rnr_retry = qpcb->rnr_retry_count;
+
+       qp_attr->alt_pkey_index =
+               EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx);
+
+       qp_attr->alt_port_num = qpcb->alt_phys_port;
+       qp_attr->alt_timeout = qpcb->timeout_al;
+
+       /* primary av */
+       qp_attr->ah_attr.sl = qpcb->service_level;
+
+       if (qpcb->send_grh_flag) {
+               qp_attr->ah_attr.ah_flags = IB_AH_GRH;
+       }
+
+       qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
+       qp_attr->ah_attr.dlid = qpcb->dlid;
+       qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
+       qp_attr->ah_attr.port_num = qp_attr->port_num;
+
+       /* primary GRH */
+       qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
+       qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
+       qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
+       qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
+
+       for (cnt = 0; cnt < 16; cnt++)
+               qp_attr->ah_attr.grh.dgid.raw[cnt] =
+                       qpcb->dest_gid.byte[cnt];
+
+       /* alternate AV */
+       qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
+       if (qpcb->send_grh_flag_al) {
+               qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
+       }
+
+       qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
+       qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
+       qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
+
+       /* alternate GRH */
+       qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
+       qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
+       qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
+       qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
+
+       for (cnt = 0; cnt < 16; cnt++)
+               qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
+                       qpcb->dest_gid_al.byte[cnt];
+
+       /* return init attributes given in ehca_create_qp */
+       if (qp_init_attr)
+               *qp_init_attr = my_qp->init_attr;
+
+       if (ehca_debug_level)
+               ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
+
+query_qp_exit1:
+       kfree(qpcb);
+
+       return ret;
+}
+
+int ehca_destroy_qp(struct ib_qp *ibqp)
+{
+       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+                                             ib_device);
+       struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+                                            ib_pd);
+       u32 cur_pid = current->tgid;
+       u32 qp_num = ibqp->qp_num;
+       int ret;
+       u64 h_ret;
+       u8 port_num;
+       enum ib_qp_type qp_type;
+       unsigned long flags;
+
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           my_pd->ownpid != cur_pid) {
+               ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               return -EINVAL;
+       }
+
+       if (my_qp->send_cq) {
+               ret = ehca_cq_unassign_qp(my_qp->send_cq,
+                                             my_qp->real_qp_num);
+               if (ret) {
+                       ehca_err(ibqp->device, "Couldn't unassign qp from "
+                                "send_cq ret=%x qp_num=%x cq_num=%x", ret,
+                                my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number);
+                       return ret;
+               }
+       }
+
+       spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+       idr_remove(&ehca_qp_idr, my_qp->token);
+       spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+       /* un-mmap if vma alloc */
+       if (my_qp->uspace_rqueue) {
+               ret = ehca_munmap(my_qp->uspace_rqueue,
+                                 my_qp->ipz_rqueue.queue_length);
+               if (ret)
+                       ehca_err(ibqp->device, "Could not munmap rqueue "
+                                "qp_num=%x", qp_num);
+               ret = ehca_munmap(my_qp->uspace_squeue,
+                                 my_qp->ipz_squeue.queue_length);
+               if (ret)
+                       ehca_err(ibqp->device, "Could not munmap squeue "
+                                "qp_num=%x", qp_num);
+               ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
+               if (ret)
+                       ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x",
+                                qp_num);
+       }
+
+       h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+       if (h_ret != H_SUCCESS) {
+               ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
+                        "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
+               return ehca2ib_return_code(h_ret);
+       }
+
+       port_num = my_qp->init_attr.port_num;
+       qp_type  = my_qp->init_attr.qp_type;
+
+       /* no support for IB_QPT_SMI yet */
+       if (qp_type == IB_QPT_GSI) {
+               struct ib_event event;
+               ehca_info(ibqp->device, "device %s: port %x is inactive.",
+                         shca->ib_device.name, port_num);
+               event.device = &shca->ib_device;
+               event.event = IB_EVENT_PORT_ERR;
+               event.element.port_num = port_num;
+               shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
+               ib_dispatch_event(&event);
+       }
+
+       ipz_queue_dtor(&my_qp->ipz_rqueue);
+       ipz_queue_dtor(&my_qp->ipz_squeue);
+       kmem_cache_free(qp_cache, my_qp);
+       return 0;
+}
+
+int ehca_init_qp_cache(void)
+{
+       qp_cache = kmem_cache_create("ehca_cache_qp",
+                                    sizeof(struct ehca_qp), 0,
+                                    SLAB_HWCACHE_ALIGN,
+                                    NULL, NULL);
+       if (!qp_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void ehca_cleanup_qp_cache(void)
+{
+       if (qp_cache)
+               kmem_cache_destroy(qp_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
new file mode 100644 (file)
index 0000000..b46bda1
--- /dev/null
@@ -0,0 +1,653 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  post_send/recv, poll_cq, req_notify
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm-powerpc/system.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+                                 struct ehca_wqe *wqe_p,
+                                 struct ib_recv_wr *recv_wr)
+{
+       u8 cnt_ds;
+       if (unlikely((recv_wr->num_sge < 0) ||
+                    (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
+               ehca_gen_err("Invalid number of WQE SGE. "
+                        "num_sqe=%x max_nr_of_sg=%x",
+                        recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
+               return -EINVAL; /* invalid SG list length */
+       }
+
+       /* clear wqe header until sglist */
+       memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+       wqe_p->work_request_id = recv_wr->wr_id;
+       wqe_p->nr_of_data_seg = recv_wr->num_sge;
+
+       for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
+               wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
+                       recv_wr->sg_list[cnt_ds].addr;
+               wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
+                       recv_wr->sg_list[cnt_ds].lkey;
+               wqe_p->u.all_rcv.sg_list[cnt_ds].length =
+                       recv_wr->sg_list[cnt_ds].length;
+       }
+
+       if (ehca_debug_level) {
+               ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue);
+               ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
+       }
+
+       return 0;
+}
+
+#if defined(DEBUG_GSI_SEND_WR)
+
+/* need ib_mad struct */
+#include <rdma/ib_mad.h>
+
+static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+{
+       int idx;
+       int j;
+       while (send_wr) {
+               struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
+               struct ib_sge *sge = send_wr->sg_list;
+               ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
+                            "send_flags=%x opcode=%x",idx, send_wr->wr_id,
+                            send_wr->num_sge, send_wr->send_flags,
+                            send_wr->opcode);
+               if (mad_hdr) {
+                       ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
+                                    "mgmt_class=%x class_version=%x method=%x "
+                                    "status=%x class_specific=%x tid=%lx "
+                                    "attr_id=%x resv=%x attr_mod=%x",
+                                    idx, mad_hdr->base_version,
+                                    mad_hdr->mgmt_class,
+                                    mad_hdr->class_version, mad_hdr->method,
+                                    mad_hdr->status, mad_hdr->class_specific,
+                                    mad_hdr->tid, mad_hdr->attr_id,
+                                    mad_hdr->resv,
+                                    mad_hdr->attr_mod);
+               }
+               for (j = 0; j < send_wr->num_sge; j++) {
+                       u8 *data = (u8 *) abs_to_virt(sge->addr);
+                       ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
+                                    "lkey=%x",
+                                    idx, j, data, sge->length, sge->lkey);
+                       /* assume length is n*16 */
+                       ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
+                                idx, j);
+                       sge++;
+               } /* eof for j */
+               idx++;
+               send_wr = send_wr->next;
+       } /* eof while send_wr */
+}
+
+#endif /* DEBUG_GSI_SEND_WR */
+
+static inline int ehca_write_swqe(struct ehca_qp *qp,
+                                 struct ehca_wqe *wqe_p,
+                                 const struct ib_send_wr *send_wr)
+{
+       u32 idx;
+       u64 dma_length;
+       struct ehca_av *my_av;
+       u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+
+       if (unlikely((send_wr->num_sge < 0) ||
+                    (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
+               ehca_gen_err("Invalid number of WQE SGE. "
+                        "num_sqe=%x max_nr_of_sg=%x",
+                        send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
+               return -EINVAL; /* invalid SG list length */
+       }
+
+       /* clear wqe header until sglist */
+       memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+       wqe_p->work_request_id = send_wr->wr_id;
+
+       switch (send_wr->opcode) {
+       case IB_WR_SEND:
+       case IB_WR_SEND_WITH_IMM:
+               wqe_p->optype = WQE_OPTYPE_SEND;
+               break;
+       case IB_WR_RDMA_WRITE:
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
+               break;
+       case IB_WR_RDMA_READ:
+               wqe_p->optype = WQE_OPTYPE_RDMAREAD;
+               break;
+       default:
+               ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
+               return -EINVAL; /* invalid opcode */
+       }
+
+       wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
+
+       wqe_p->wr_flag = 0;
+
+       if (send_wr->send_flags & IB_SEND_SIGNALED)
+               wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+
+       if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
+           send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+               /* this might not work as long as HW does not support it */
+               wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+               wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
+       }
+
+       wqe_p->nr_of_data_seg = send_wr->num_sge;
+
+       switch (qp->qp_type) {
+       case IB_QPT_SMI:
+       case IB_QPT_GSI:
+               /* no break is intential here */
+       case IB_QPT_UD:
+               /* IB 1.2 spec C10-15 compliance */
+               if (send_wr->wr.ud.remote_qkey & 0x80000000)
+                       remote_qkey = qp->qkey;
+
+               wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+               wqe_p->local_ee_context_qkey = remote_qkey;
+               if (!send_wr->wr.ud.ah) {
+                       ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+                       return -EINVAL;
+               }
+               my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
+               wqe_p->u.ud_av.ud_av = my_av->av;
+
+               /*
+                * omitted check of IB_SEND_INLINE
+                * since HW does not support it
+                */
+               for (idx = 0; idx < send_wr->num_sge; idx++) {
+                       wqe_p->u.ud_av.sg_list[idx].vaddr =
+                               send_wr->sg_list[idx].addr;
+                       wqe_p->u.ud_av.sg_list[idx].lkey =
+                               send_wr->sg_list[idx].lkey;
+                       wqe_p->u.ud_av.sg_list[idx].length =
+                               send_wr->sg_list[idx].length;
+               } /* eof for idx */
+               if (qp->qp_type == IB_QPT_SMI ||
+                   qp->qp_type == IB_QPT_GSI)
+                       wqe_p->u.ud_av.ud_av.pmtu = 1;
+               if (qp->qp_type == IB_QPT_GSI) {
+                       wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
+#ifdef DEBUG_GSI_SEND_WR
+                       trace_send_wr_ud(send_wr);
+#endif /* DEBUG_GSI_SEND_WR */
+               }
+               break;
+
+       case IB_QPT_UC:
+               if (send_wr->send_flags & IB_SEND_FENCE)
+                       wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
+               /* no break is intentional here */
+       case IB_QPT_RC:
+               /* TODO: atomic not implemented */
+               wqe_p->u.nud.remote_virtual_adress =
+                       send_wr->wr.rdma.remote_addr;
+               wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
+
+               /*
+                * omitted checking of IB_SEND_INLINE
+                * since HW does not support it
+                */
+               dma_length = 0;
+               for (idx = 0; idx < send_wr->num_sge; idx++) {
+                       wqe_p->u.nud.sg_list[idx].vaddr =
+                               send_wr->sg_list[idx].addr;
+                       wqe_p->u.nud.sg_list[idx].lkey =
+                               send_wr->sg_list[idx].lkey;
+                       wqe_p->u.nud.sg_list[idx].length =
+                               send_wr->sg_list[idx].length;
+                       dma_length += send_wr->sg_list[idx].length;
+               } /* eof idx */
+               wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
+
+               break;
+
+       default:
+               ehca_gen_err("Invalid qptype=%x", qp->qp_type);
+               return -EINVAL;
+       }
+
+       if (ehca_debug_level) {
+               ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
+               ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
+       }
+       return 0;
+}
+
+/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
+static inline void map_ib_wc_status(u32 cqe_status,
+                                   enum ib_wc_status *wc_status)
+{
+       if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
+               switch (cqe_status & 0x3F) {
+               case 0x01:
+               case 0x21:
+                       *wc_status = IB_WC_LOC_LEN_ERR;
+                       break;
+               case 0x02:
+               case 0x22:
+                       *wc_status = IB_WC_LOC_QP_OP_ERR;
+                       break;
+               case 0x03:
+               case 0x23:
+                       *wc_status = IB_WC_LOC_EEC_OP_ERR;
+                       break;
+               case 0x04:
+               case 0x24:
+                       *wc_status = IB_WC_LOC_PROT_ERR;
+                       break;
+               case 0x05:
+               case 0x25:
+                       *wc_status = IB_WC_WR_FLUSH_ERR;
+                       break;
+               case 0x06:
+                       *wc_status = IB_WC_MW_BIND_ERR;
+                       break;
+               case 0x07: /* remote error - look into bits 20:24 */
+                       switch ((cqe_status
+                                & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
+                       case 0x0:
+                               /*
+                                * PSN Sequence Error!
+                                * couldn't find a matching status!
+                                */
+                               *wc_status = IB_WC_GENERAL_ERR;
+                               break;
+                       case 0x1:
+                               *wc_status = IB_WC_REM_INV_REQ_ERR;
+                               break;
+                       case 0x2:
+                               *wc_status = IB_WC_REM_ACCESS_ERR;
+                               break;
+                       case 0x3:
+                               *wc_status = IB_WC_REM_OP_ERR;
+                               break;
+                       case 0x4:
+                               *wc_status = IB_WC_REM_INV_RD_REQ_ERR;
+                               break;
+                       }
+                       break;
+               case 0x08:
+                       *wc_status = IB_WC_RETRY_EXC_ERR;
+                       break;
+               case 0x09:
+                       *wc_status = IB_WC_RNR_RETRY_EXC_ERR;
+                       break;
+               case 0x0A:
+               case 0x2D:
+                       *wc_status = IB_WC_REM_ABORT_ERR;
+                       break;
+               case 0x0B:
+               case 0x2E:
+                       *wc_status = IB_WC_INV_EECN_ERR;
+                       break;
+               case 0x0C:
+               case 0x2F:
+                       *wc_status = IB_WC_INV_EEC_STATE_ERR;
+                       break;
+               case 0x0D:
+                       *wc_status = IB_WC_BAD_RESP_ERR;
+                       break;
+               case 0x10:
+                       /* WQE purged */
+                       *wc_status = IB_WC_WR_FLUSH_ERR;
+                       break;
+               default:
+                       *wc_status = IB_WC_FATAL_ERR;
+
+               }
+       } else
+               *wc_status = IB_WC_SUCCESS;
+}
+
+int ehca_post_send(struct ib_qp *qp,
+                  struct ib_send_wr *send_wr,
+                  struct ib_send_wr **bad_send_wr)
+{
+       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+       struct ib_send_wr *cur_send_wr;
+       struct ehca_wqe *wqe_p;
+       int wqe_cnt = 0;
+       int ret = 0;
+       unsigned long spl_flags;
+
+       /* LOCK the QUEUE */
+       spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+
+       /* loop processes list of send reqs */
+       for (cur_send_wr = send_wr; cur_send_wr != NULL;
+            cur_send_wr = cur_send_wr->next) {
+               u64 start_offset = my_qp->ipz_squeue.current_q_offset;
+               /* get pointer next to free WQE */
+               wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
+               if (unlikely(!wqe_p)) {
+                       /* too many posted work requests: queue overflow */
+                       if (bad_send_wr)
+                               *bad_send_wr = cur_send_wr;
+                       if (wqe_cnt == 0) {
+                               ret = -ENOMEM;
+                               ehca_err(qp->device, "Too many posted WQEs "
+                                        "qp_num=%x", qp->qp_num);
+                       }
+                       goto post_send_exit0;
+               }
+               /* write a SEND WQE into the QUEUE */
+               ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
+               /*
+                * if something failed,
+                * reset the free entry pointer to the start value
+                */
+               if (unlikely(ret)) {
+                       my_qp->ipz_squeue.current_q_offset = start_offset;
+                       *bad_send_wr = cur_send_wr;
+                       if (wqe_cnt == 0) {
+                               ret = -EINVAL;
+                               ehca_err(qp->device, "Could not write WQE "
+                                        "qp_num=%x", qp->qp_num);
+                       }
+                       goto post_send_exit0;
+               }
+               wqe_cnt++;
+               ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
+                        my_qp, qp->qp_num, wqe_cnt);
+       } /* eof for cur_send_wr */
+
+post_send_exit0:
+       /* UNLOCK the QUEUE */
+       spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+       iosync(); /* serialize GAL register access */
+       hipz_update_sqa(my_qp, wqe_cnt);
+       return ret;
+}
+
+int ehca_post_recv(struct ib_qp *qp,
+                  struct ib_recv_wr *recv_wr,
+                  struct ib_recv_wr **bad_recv_wr)
+{
+       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+       struct ib_recv_wr *cur_recv_wr;
+       struct ehca_wqe *wqe_p;
+       int wqe_cnt = 0;
+       int ret = 0;
+       unsigned long spl_flags;
+
+       /* LOCK the QUEUE */
+       spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
+
+       /* loop processes list of send reqs */
+       for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
+            cur_recv_wr = cur_recv_wr->next) {
+               u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
+               /* get pointer next to free WQE */
+               wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
+               if (unlikely(!wqe_p)) {
+                       /* too many posted work requests: queue overflow */
+                       if (bad_recv_wr)
+                               *bad_recv_wr = cur_recv_wr;
+                       if (wqe_cnt == 0) {
+                               ret = -ENOMEM;
+                               ehca_err(qp->device, "Too many posted WQEs "
+                                        "qp_num=%x", qp->qp_num);
+                       }
+                       goto post_recv_exit0;
+               }
+               /* write a RECV WQE into the QUEUE */
+               ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+               /*
+                * if something failed,
+                * reset the free entry pointer to the start value
+                */
+               if (unlikely(ret)) {
+                       my_qp->ipz_rqueue.current_q_offset = start_offset;
+                       *bad_recv_wr = cur_recv_wr;
+                       if (wqe_cnt == 0) {
+                               ret = -EINVAL;
+                               ehca_err(qp->device, "Could not write WQE "
+                                        "qp_num=%x", qp->qp_num);
+                       }
+                       goto post_recv_exit0;
+               }
+               wqe_cnt++;
+               ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
+                    my_qp, qp->qp_num, wqe_cnt);
+       } /* eof for cur_recv_wr */
+
+post_recv_exit0:
+       spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
+       iosync(); /* serialize GAL register access */
+       hipz_update_rqa(my_qp, wqe_cnt);
+       return ret;
+}
+
+/*
+ * ib_wc_opcode table converts ehca wc opcode to ib
+ * Since we use zero to indicate invalid opcode, the actual ib opcode must
+ * be decremented!!!
+ */
+static const u8 ib_wc_opcode[255] = {
+       [0x01] = IB_WC_RECV+1,
+       [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
+       [0x04] = IB_WC_BIND_MW+1,
+       [0x08] = IB_WC_FETCH_ADD+1,
+       [0x10] = IB_WC_COMP_SWAP+1,
+       [0x20] = IB_WC_RDMA_WRITE+1,
+       [0x40] = IB_WC_RDMA_READ+1,
+       [0x80] = IB_WC_SEND+1
+};
+
+/* internal function to poll one entry of cq */
+static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
+{
+       int ret = 0;
+       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+       struct ehca_cqe *cqe;
+       int cqe_count = 0;
+
+poll_cq_one_read_cqe:
+       cqe = (struct ehca_cqe *)
+               ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
+       if (!cqe) {
+               ret = -EAGAIN;
+               ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
+                        "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret);
+               goto  poll_cq_one_exit0;
+       }
+
+       /* prevents loads being reordered across this point */
+       rmb();
+
+       cqe_count++;
+       if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
+               struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
+               int purgeflag;
+               unsigned long spl_flags;
+               if (!qp) {
+                       ehca_err(cq->device, "cq_num=%x qp_num=%x "
+                                "could not find qp -> ignore cqe",
+                                my_cq->cq_number, cqe->local_qp_number);
+                       ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
+                                my_cq->cq_number, cqe->local_qp_number);
+                       /* ignore this purged cqe */
+                       goto poll_cq_one_read_cqe;
+               }
+               spin_lock_irqsave(&qp->spinlock_s, spl_flags);
+               purgeflag = qp->sqerr_purgeflag;
+               spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
+
+               if (purgeflag) {
+                       ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
+                                "src_qp=%x",
+                                cqe->local_qp_number, cqe->remote_qp_number);
+                       if (ehca_debug_level)
+                               ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
+                                        cqe->local_qp_number,
+                                        cqe->remote_qp_number);
+                       /*
+                        * ignore this to avoid double cqes of bad wqe
+                        * that caused sqe and turn off purge flag
+                        */
+                       qp->sqerr_purgeflag = 0;
+                       goto poll_cq_one_read_cqe;
+               }
+       }
+
+       /* tracing cqe */
+       if (ehca_debug_level) {
+               ehca_dbg(cq->device,
+                        "Received COMPLETION ehca_cq=%p cq_num=%x -----",
+                        my_cq, my_cq->cq_number);
+               ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+                        my_cq, my_cq->cq_number);
+               ehca_dbg(cq->device,
+                        "ehca_cq=%p cq_num=%x -------------------------",
+                        my_cq, my_cq->cq_number);
+       }
+
+       /* we got a completion! */
+       wc->wr_id = cqe->work_request_id;
+
+       /* eval ib_wc_opcode */
+       wc->opcode = ib_wc_opcode[cqe->optype]-1;
+       if (unlikely(wc->opcode == -1)) {
+               ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
+                        "ehca_cq=%p cq_num=%x",
+                        cqe->optype, cqe->status, my_cq, my_cq->cq_number);
+               /* dump cqe for other infos */
+               ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+                        my_cq, my_cq->cq_number);
+               /* update also queue adder to throw away this entry!!! */
+               goto poll_cq_one_exit0;
+       }
+       /* eval ib_wc_status */
+       if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) {
+               /* complete with errors */
+               map_ib_wc_status(cqe->status, &wc->status);
+               wc->vendor_err = wc->status;
+       } else
+               wc->status = IB_WC_SUCCESS;
+
+       wc->qp_num = cqe->local_qp_number;
+       wc->byte_len = cqe->nr_bytes_transferred;
+       wc->pkey_index = cqe->pkey_index;
+       wc->slid = cqe->rlid;
+       wc->dlid_path_bits = cqe->dlid;
+       wc->src_qp = cqe->remote_qp_number;
+       wc->wc_flags = cqe->w_completion_flags;
+       wc->imm_data = cpu_to_be32(cqe->immediate_data);
+       wc->sl = cqe->service_level;
+
+       if (wc->status != IB_WC_SUCCESS)
+               ehca_dbg(cq->device,
+                        "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
+                        "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
+                        "cqe=%p", my_cq, my_cq->cq_number, cqe->optype,
+                        cqe->status, cqe->local_qp_number,
+                        cqe->remote_qp_number, cqe->work_request_id, cqe);
+
+poll_cq_one_exit0:
+       if (cqe_count > 0)
+               hipz_update_feca(my_cq, cqe_count);
+
+       return ret;
+}
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+       int nr;
+       struct ib_wc *current_wc = wc;
+       int ret = 0;
+       unsigned long spl_flags;
+
+       if (num_entries < 1) {
+               ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
+                        "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
+               ret = -EINVAL;
+               goto poll_cq_exit0;
+       }
+
+       spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+       for (nr = 0; nr < num_entries; nr++) {
+               ret = ehca_poll_cq_one(cq, current_wc);
+               if (ret)
+                       break;
+               current_wc++;
+       } /* eof for nr */
+       spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+       if (ret == -EAGAIN  || !ret)
+               ret = nr;
+
+poll_cq_exit0:
+       return ret;
+}
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
+{
+       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+
+       switch (cq_notify) {
+       case IB_CQ_SOLICITED:
+               hipz_set_cqx_n0(my_cq, 1);
+               break;
+       case IB_CQ_NEXT_COMP:
+               hipz_set_cqx_n1(my_cq, 1);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
new file mode 100644 (file)
index 0000000..9f16e9c
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  SQP functions
+ *
+ *  Authors: Khadija Souissi <souissi@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+
+/**
+ * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
+ * pair is created successfully, the corresponding port gets active.
+ *
+ * Define Special Queue pair 0 (SMI QP) is still not supported.
+ *
+ * @qp_init_attr: Queue pair init attributes with port and queue pair type
+ */
+
+u64 ehca_define_sqp(struct ehca_shca *shca,
+                   struct ehca_qp *ehca_qp,
+                   struct ib_qp_init_attr *qp_init_attr)
+{
+       u32 pma_qp_nr, bma_qp_nr;
+       u64 ret;
+       u8 port = qp_init_attr->port_num;
+       int counter;
+
+       shca->sport[port - 1].port_state = IB_PORT_DOWN;
+
+       switch (qp_init_attr->qp_type) {
+       case IB_QPT_SMI:
+               /* function not supported yet */
+               break;
+       case IB_QPT_GSI:
+               ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
+                                        ehca_qp->ipz_qp_handle,
+                                        ehca_qp->galpas.kernel,
+                                        (u32) qp_init_attr->port_num,
+                                        &pma_qp_nr, &bma_qp_nr);
+
+               if (ret != H_SUCCESS) {
+                       ehca_err(&shca->ib_device,
+                                "Can't define AQP1 for port %x. rc=%lx",
+                                port, ret);
+                       return ret;
+               }
+               break;
+       default:
+               ehca_err(&shca->ib_device, "invalid qp_type=%x",
+                        qp_init_attr->qp_type);
+               return H_PARAMETER;
+       }
+
+       for (counter = 0;
+            shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
+                    counter < ehca_port_act_time;
+            counter++) {
+               ehca_dbg(&shca->ib_device, "... wait until port %x is active",
+                        port);
+               msleep_interruptible(1000);
+       }
+
+       if (counter == ehca_port_act_time) {
+               ehca_err(&shca->ib_device, "Port %x is not active.", port);
+               return H_HARDWARE;
+       }
+
+       return H_SUCCESS;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
new file mode 100644 (file)
index 0000000..9f56bb8
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  auxiliary functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef EHCA_TOOLS_H
+#define EHCA_TOOLS_H
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+
+#include <asm/abs_addr.h>
+#include <asm/ibmebus.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+extern int ehca_debug_level;
+
+#define ehca_dbg(ib_dev, format, arg...) \
+       do { \
+               if (unlikely(ehca_debug_level)) \
+                       dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
+                                  "PU%04x EHCA_DBG:%s " format "\n", \
+                                  get_paca()->paca_index, __FUNCTION__, \
+                                  ## arg); \
+       } while (0)
+
+#define ehca_info(ib_dev, format, arg...) \
+       dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
+                get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_warn(ib_dev, format, arg...) \
+       dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
+                get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_err(ib_dev, format, arg...) \
+       dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
+               get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/* use this one only if no ib_dev available */
+#define ehca_gen_dbg(format, arg...) \
+       do { \
+               if (unlikely(ehca_debug_level)) \
+                       printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\
+                              get_paca()->paca_index, __FUNCTION__, ## arg); \
+       } while (0)
+
+#define ehca_gen_warn(format, arg...) \
+       do { \
+               if (unlikely(ehca_debug_level)) \
+                       printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\
+                              get_paca()->paca_index, __FUNCTION__, ## arg); \
+       } while (0)
+
+#define ehca_gen_err(format, arg...) \
+       printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
+               get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/**
+ * ehca_dmp - printk a memory block, whose length is n*8 bytes.
+ * Each line has the following layout:
+ * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
+ */
+#define ehca_dmp(adr, len, format, args...) \
+       do {                                   \
+               unsigned int x;                       \
+               unsigned int l = (unsigned int)(len); \
+               unsigned char *deb = (unsigned char*)(adr);     \
+               for (x = 0; x < l; x += 16) { \
+                       printk("EHCA_DMP:%s" format \
+                              " adr=%p ofs=%04x %016lx %016lx\n", \
+                              __FUNCTION__, ##args, deb, x, \
+                              *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
+                       deb += 16; \
+               } \
+       } while (0)
+
+/* define a bitmask, little endian version */
+#define EHCA_BMASK(pos,length) (((pos)<<16)+(length))
+
+/* define a bitmask, the ibm way... */
+#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1))
+
+/* internal function, don't use */
+#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff)
+
+/* internal function, don't use */
+#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff))
+
+/**
+ * EHCA_BMASK_SET - return value shifted and masked by mask
+ * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
+ * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
+ * in variable
+ */
+#define EHCA_BMASK_SET(mask,value) \
+       ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask))
+
+/**
+ * EHCA_BMASK_GET - extract a parameter from value by mask
+ */
+#define EHCA_BMASK_GET(mask,value) \
+       (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask)))
+
+
+/* Converts ehca to ib return code */
+static inline int ehca2ib_return_code(u64 ehca_rc)
+{
+       switch (ehca_rc) {
+       case H_SUCCESS:
+               return 0;
+       case H_BUSY:
+               return -EBUSY;
+       case H_NO_MEM:
+               return -ENOMEM;
+       default:
+               return -EINVAL;
+       }
+}
+
+
+#endif /* EHCA_TOOLS_H */
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
new file mode 100644 (file)
index 0000000..e08764e
--- /dev/null
@@ -0,0 +1,392 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  userspace support verbs
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+                                       struct ib_udata *udata)
+{
+       struct ehca_ucontext *my_context;
+
+       my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
+       if (!my_context) {
+               ehca_err(device, "Out of memory device=%p", device);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       return &my_context->ib_ucontext;
+}
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context)
+{
+       kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
+       return 0;
+}
+
+struct page *ehca_nopage(struct vm_area_struct *vma,
+                        unsigned long address, int *type)
+{
+       struct page *mypage = NULL;
+       u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+       u32 idr_handle = fileoffset >> 32;
+       u32 q_type = (fileoffset >> 28) & 0xF;    /* CQ, QP,...        */
+       u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+       u32 cur_pid = current->tgid;
+       unsigned long flags;
+       struct ehca_cq *cq;
+       struct ehca_qp *qp;
+       struct ehca_pd *pd;
+       u64 offset;
+       void *vaddr;
+
+       switch (q_type) {
+       case 1: /* CQ */
+               spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+               cq = idr_find(&ehca_cq_idr, idr_handle);
+               spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+               /* make sure this mmap really belongs to the authorized user */
+               if (!cq) {
+                       ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
+                       return NOPAGE_SIGBUS;
+               }
+
+               if (cq->ownpid != cur_pid) {
+                       ehca_err(cq->ib_cq.device,
+                                "Invalid caller pid=%x ownpid=%x",
+                                cur_pid, cq->ownpid);
+                       return NOPAGE_SIGBUS;
+               }
+
+               if (rsrc_type == 2) {
+                       ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
+                       offset = address - vma->vm_start;
+                       vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
+                       ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
+                                offset, vaddr);
+                       mypage = virt_to_page(vaddr);
+               }
+               break;
+
+       case 2: /* QP */
+               spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+               qp = idr_find(&ehca_qp_idr, idr_handle);
+               spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+               /* make sure this mmap really belongs to the authorized user */
+               if (!qp) {
+                       ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
+                       return NOPAGE_SIGBUS;
+               }
+
+               pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+               if (pd->ownpid != cur_pid) {
+                       ehca_err(qp->ib_qp.device,
+                                "Invalid caller pid=%x ownpid=%x",
+                                cur_pid, pd->ownpid);
+                       return NOPAGE_SIGBUS;
+               }
+
+               if (rsrc_type == 2) {   /* rqueue */
+                       ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
+                       offset = address - vma->vm_start;
+                       vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
+                       ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+                                offset, vaddr);
+                       mypage = virt_to_page(vaddr);
+               } else if (rsrc_type == 3) {    /* squeue */
+                       ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
+                       offset = address - vma->vm_start;
+                       vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
+                       ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+                                offset, vaddr);
+                       mypage = virt_to_page(vaddr);
+               }
+               break;
+
+       default:
+               ehca_gen_err("bad queue type %x", q_type);
+               return NOPAGE_SIGBUS;
+       }
+
+       if (!mypage) {
+               ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS");
+               return NOPAGE_SIGBUS;
+       }
+       get_page(mypage);
+
+       return mypage;
+}
+
+static struct vm_operations_struct ehcau_vm_ops = {
+       .nopage = ehca_nopage,
+};
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+       u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+       u32 idr_handle = fileoffset >> 32;
+       u32 q_type = (fileoffset >> 28) & 0xF;    /* CQ, QP,...        */
+       u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+       u32 cur_pid = current->tgid;
+       u32 ret;
+       u64 vsize, physical;
+       unsigned long flags;
+       struct ehca_cq *cq;
+       struct ehca_qp *qp;
+       struct ehca_pd *pd;
+
+       switch (q_type) {
+       case  1: /* CQ */
+               spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+               cq = idr_find(&ehca_cq_idr, idr_handle);
+               spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+               /* make sure this mmap really belongs to the authorized user */
+               if (!cq)
+                       return -EINVAL;
+
+               if (cq->ownpid != cur_pid) {
+                       ehca_err(cq->ib_cq.device,
+                                "Invalid caller pid=%x ownpid=%x",
+                                cur_pid, cq->ownpid);
+                       return -ENOMEM;
+               }
+
+               if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
+                       return -EINVAL;
+
+               switch (rsrc_type) {
+               case 1: /* galpa fw handle */
+                       ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq);
+                       vma->vm_flags |= VM_RESERVED;
+                       vsize = vma->vm_end - vma->vm_start;
+                       if (vsize != EHCA_PAGESIZE) {
+                               ehca_err(cq->ib_cq.device, "invalid vsize=%lx",
+                                        vma->vm_end - vma->vm_start);
+                               return -EINVAL;
+                       }
+
+                       physical = cq->galpas.user.fw_handle;
+                       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+                       vma->vm_flags |= VM_IO | VM_RESERVED;
+
+                       ehca_dbg(cq->ib_cq.device,
+                                "vsize=%lx physical=%lx", vsize, physical);
+                       ret = remap_pfn_range(vma, vma->vm_start,
+                                             physical >> PAGE_SHIFT, vsize,
+                                             vma->vm_page_prot);
+                       if (ret) {
+                               ehca_err(cq->ib_cq.device,
+                                        "remap_pfn_range() failed ret=%x",
+                                        ret);
+                               return -ENOMEM;
+                       }
+                       break;
+
+               case 2: /* cq queue_addr */
+                       ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq);
+                       vma->vm_flags |= VM_RESERVED;
+                       vma->vm_ops = &ehcau_vm_ops;
+                       break;
+
+               default:
+                       ehca_err(cq->ib_cq.device, "bad resource type %x",
+                                rsrc_type);
+                       return -EINVAL;
+               }
+               break;
+
+       case 2: /* QP */
+               spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+               qp = idr_find(&ehca_qp_idr, idr_handle);
+               spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+               /* make sure this mmap really belongs to the authorized user */
+               if (!qp)
+                       return -EINVAL;
+
+               pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+               if (pd->ownpid != cur_pid) {
+                       ehca_err(qp->ib_qp.device,
+                                "Invalid caller pid=%x ownpid=%x",
+                                cur_pid, pd->ownpid);
+                       return -ENOMEM;
+               }
+
+               if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
+                       return -EINVAL;
+
+               switch (rsrc_type) {
+               case 1: /* galpa fw handle */
+                       ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp);
+                       vma->vm_flags |= VM_RESERVED;
+                       vsize = vma->vm_end - vma->vm_start;
+                       if (vsize != EHCA_PAGESIZE) {
+                               ehca_err(qp->ib_qp.device, "invalid vsize=%lx",
+                                        vma->vm_end - vma->vm_start);
+                               return -EINVAL;
+                       }
+
+                       physical = qp->galpas.user.fw_handle;
+                       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+                       vma->vm_flags |= VM_IO | VM_RESERVED;
+
+                       ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx",
+                                vsize, physical);
+                       ret = remap_pfn_range(vma, vma->vm_start,
+                                             physical >> PAGE_SHIFT, vsize,
+                                             vma->vm_page_prot);
+                       if (ret) {
+                               ehca_err(qp->ib_qp.device,
+                                        "remap_pfn_range() failed ret=%x",
+                                        ret);
+                               return -ENOMEM;
+                       }
+                       break;
+
+               case 2: /* qp rqueue_addr */
+                       ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp);
+                       vma->vm_flags |= VM_RESERVED;
+                       vma->vm_ops = &ehcau_vm_ops;
+                       break;
+
+               case 3: /* qp squeue_addr */
+                       ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp);
+                       vma->vm_flags |= VM_RESERVED;
+                       vma->vm_ops = &ehcau_vm_ops;
+                       break;
+
+               default:
+                       ehca_err(qp->ib_qp.device, "bad resource type %x",
+                                rsrc_type);
+                       return -EINVAL;
+               }
+               break;
+
+       default:
+               ehca_gen_err("bad queue type %x", q_type);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped,
+                    struct vm_area_struct **vma)
+{
+       down_write(&current->mm->mmap_sem);
+       *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE,
+                                MAP_SHARED | MAP_ANONYMOUS,
+                                foffset);
+       up_write(&current->mm->mmap_sem);
+       if (!(*mapped)) {
+               ehca_gen_err("couldn't mmap foffset=%lx length=%lx",
+                            foffset, length);
+               return -EINVAL;
+       }
+
+       *vma = find_vma(current->mm, (u64)*mapped);
+       if (!(*vma)) {
+               down_write(&current->mm->mmap_sem);
+               do_munmap(current->mm, 0, length);
+               up_write(&current->mm->mmap_sem);
+               ehca_gen_err("couldn't find vma queue=%p", *mapped);
+               return -EINVAL;
+       }
+       (*vma)->vm_flags |= VM_RESERVED;
+       (*vma)->vm_ops = &ehcau_vm_ops;
+
+       return 0;
+}
+
+int ehca_mmap_register(u64 physical, void **mapped,
+                      struct vm_area_struct **vma)
+{
+       int ret;
+       unsigned long vsize;
+       /* ehca hw supports only 4k page */
+       ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
+       if (ret) {
+               ehca_gen_err("could'nt mmap physical=%lx", physical);
+               return ret;
+       }
+
+       (*vma)->vm_flags |= VM_RESERVED;
+       vsize = (*vma)->vm_end - (*vma)->vm_start;
+       if (vsize != EHCA_PAGESIZE) {
+               ehca_gen_err("invalid vsize=%lx",
+                            (*vma)->vm_end - (*vma)->vm_start);
+               return -EINVAL;
+       }
+
+       (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
+       (*vma)->vm_flags |= VM_IO | VM_RESERVED;
+
+       ret = remap_pfn_range((*vma), (*vma)->vm_start,
+                             physical >> PAGE_SHIFT, vsize,
+                             (*vma)->vm_page_prot);
+       if (ret) {
+               ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
+               return -ENOMEM;
+       }
+
+       return 0;
+
+}
+
+int ehca_munmap(unsigned long addr, size_t len) {
+       int ret = 0;
+       struct mm_struct *mm = current->mm;
+       if (mm) {
+               down_write(&mm->mmap_sem);
+               ret = do_munmap(mm, addr, len);
+               up_write(&mm->mmap_sem);
+       }
+       return ret;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
new file mode 100644 (file)
index 0000000..3fb46e6
--- /dev/null
@@ -0,0 +1,874 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/hvcall.h>
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hcp_phyp.h"
+#include "hipz_fns.h"
+#include "ipz_pt_fn.h"
+
+#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
+#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
+#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
+#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
+#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
+#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
+#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
+#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
+
+#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
+#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
+#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
+
+#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
+#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
+
+#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
+
+/* direct access qp controls */
+#define DAQP_CTRL_ENABLE    0x01
+#define DAQP_CTRL_SEND_COMP 0x20
+#define DAQP_CTRL_RECV_COMP 0x40
+
+static u32 get_longbusy_msecs(int longbusy_rc)
+{
+       switch (longbusy_rc) {
+       case H_LONG_BUSY_ORDER_1_MSEC:
+               return 1;
+       case H_LONG_BUSY_ORDER_10_MSEC:
+               return 10;
+       case H_LONG_BUSY_ORDER_100_MSEC:
+               return 100;
+       case H_LONG_BUSY_ORDER_1_SEC:
+               return 1000;
+       case H_LONG_BUSY_ORDER_10_SEC:
+               return 10000;
+       case H_LONG_BUSY_ORDER_100_SEC:
+               return 100000;
+       default:
+               return 1;
+       }
+}
+
+static long ehca_plpar_hcall_norets(unsigned long opcode,
+                                   unsigned long arg1,
+                                   unsigned long arg2,
+                                   unsigned long arg3,
+                                   unsigned long arg4,
+                                   unsigned long arg5,
+                                   unsigned long arg6,
+                                   unsigned long arg7)
+{
+       long ret;
+       int i, sleep_msecs;
+
+       ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+                    "arg5=%lx arg6=%lx arg7=%lx",
+                    opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+
+       for (i = 0; i < 5; i++) {
+               ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
+                                        arg5, arg6, arg7);
+
+               if (H_IS_LONG_BUSY(ret)) {
+                       sleep_msecs = get_longbusy_msecs(ret);
+                       msleep_interruptible(sleep_msecs);
+                       continue;
+               }
+
+               if (ret < H_SUCCESS)
+                       ehca_gen_err("opcode=%lx ret=%lx"
+                                    " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+                                    " arg5=%lx arg6=%lx arg7=%lx ",
+                                    opcode, ret,
+                                    arg1, arg2, arg3, arg4, arg5,
+                                    arg6, arg7);
+
+               ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
+               return ret;
+
+       }
+
+       return H_BUSY;
+}
+
+static long ehca_plpar_hcall9(unsigned long opcode,
+                             unsigned long *outs, /* array of 9 outputs */
+                             unsigned long arg1,
+                             unsigned long arg2,
+                             unsigned long arg3,
+                             unsigned long arg4,
+                             unsigned long arg5,
+                             unsigned long arg6,
+                             unsigned long arg7,
+                             unsigned long arg8,
+                             unsigned long arg9)
+{
+       long ret;
+       int i, sleep_msecs;
+
+       ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+                    "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
+                    opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
+                    arg8, arg9);
+
+       for (i = 0; i < 5; i++) {
+               ret = plpar_hcall9(opcode, outs,
+                                  arg1, arg2, arg3, arg4, arg5,
+                                  arg6, arg7, arg8, arg9);
+
+               if (H_IS_LONG_BUSY(ret)) {
+                       sleep_msecs = get_longbusy_msecs(ret);
+                       msleep_interruptible(sleep_msecs);
+                       continue;
+               }
+
+               if (ret < H_SUCCESS)
+                       ehca_gen_err("opcode=%lx ret=%lx"
+                                    " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+                                    " arg5=%lx arg6=%lx arg7=%lx arg8=%lx"
+                                    " arg9=%lx"
+                                    " out1=%lx out2=%lx out3=%lx out4=%lx"
+                                    " out5=%lx out6=%lx out7=%lx out8=%lx"
+                                    " out9=%lx",
+                                    opcode, ret,
+                                    arg1, arg2, arg3, arg4, arg5,
+                                    arg6, arg7, arg8, arg9,
+                                    outs[0], outs[1], outs[2], outs[3],
+                                    outs[4], outs[5], outs[6], outs[7],
+                                    outs[8]);
+
+               ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
+                            "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
+                            "out9=%lx",
+                            opcode, ret, outs[0], outs[1], outs[2], outs[3],
+                            outs[4], outs[5], outs[6], outs[7], outs[8]);
+               return ret;
+
+       }
+
+       return H_BUSY;
+}
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+                            struct ehca_pfeq *pfeq,
+                            const u32 neq_control,
+                            const u32 number_of_entries,
+                            struct ipz_eq_handle *eq_handle,
+                            u32 *act_nr_of_entries,
+                            u32 *act_pages,
+                            u32 *eq_ist)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+       u64 allocate_controls;
+
+       /* resource type */
+       allocate_controls = 3ULL;
+
+       /* ISN is associated */
+       if (neq_control != 1)
+               allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
+       else /* notification event queue */
+               allocate_controls = (1ULL << 63) | allocate_controls;
+
+       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+                               adapter_handle.handle,  /* r4 */
+                               allocate_controls,      /* r5 */
+                               number_of_entries,      /* r6 */
+                               0, 0, 0, 0, 0, 0);
+       eq_handle->handle = outs[0];
+       *act_nr_of_entries = (u32)outs[3];
+       *act_pages = (u32)outs[4];
+       *eq_ist = (u32)outs[5];
+
+       if (ret == H_NOT_ENOUGH_RESOURCES)
+               ehca_gen_err("Not enough resource - ret=%lx ", ret);
+
+       return ret;
+}
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+                      struct ipz_eq_handle eq_handle,
+                      const u64 event_mask)
+{
+       return ehca_plpar_hcall_norets(H_RESET_EVENTS,
+                                      adapter_handle.handle, /* r4 */
+                                      eq_handle.handle,      /* r5 */
+                                      event_mask,            /* r6 */
+                                      0, 0, 0, 0);
+}
+
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+                            struct ehca_cq *cq,
+                            struct ehca_alloc_cq_parms *param)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+                               adapter_handle.handle,   /* r4  */
+                               2,                       /* r5  */
+                               param->eq_handle.handle, /* r6  */
+                               cq->token,               /* r7  */
+                               param->nr_cqe,           /* r8  */
+                               0, 0, 0, 0);
+       cq->ipz_cq_handle.handle = outs[0];
+       param->act_nr_of_entries = (u32)outs[3];
+       param->act_pages = (u32)outs[4];
+
+       if (ret == H_SUCCESS)
+               hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+
+       if (ret == H_NOT_ENOUGH_RESOURCES)
+               ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+       return ret;
+}
+
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+                            struct ehca_qp *qp,
+                            struct ehca_alloc_qp_parms *parms)
+{
+       u64 ret;
+       u64 allocate_controls;
+       u64 max_r10_reg;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+       u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
+       u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
+       int daqp_ctrl = parms->daqp_ctrl;
+
+       allocate_controls =
+               EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
+                              (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
+                                (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
+                                (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
+                                parms->ud_av_l_key_ctl)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
+
+       max_r10_reg =
+               EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
+                              max_nr_send_wqes)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
+                                max_nr_receive_wqes)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
+                                parms->max_send_sge)
+               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
+                                parms->max_recv_sge);
+
+       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+                               adapter_handle.handle,             /* r4  */
+                               allocate_controls,                 /* r5  */
+                               qp->send_cq->ipz_cq_handle.handle,
+                               qp->recv_cq->ipz_cq_handle.handle,
+                               parms->ipz_eq_handle.handle,
+                               ((u64)qp->token << 32) | parms->pd.value,
+                               max_r10_reg,                       /* r10 */
+                               parms->ud_av_l_key_ctl,            /* r11 */
+                               0);
+       qp->ipz_qp_handle.handle = outs[0];
+       qp->real_qp_num = (u32)outs[1];
+       parms->act_nr_send_sges =
+               (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
+       parms->act_nr_recv_wqes =
+               (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
+       parms->act_nr_send_sges =
+               (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
+       parms->act_nr_recv_sges =
+               (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
+       parms->nr_sq_pages =
+               (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
+       parms->nr_rq_pages =
+               (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
+
+       if (ret == H_SUCCESS)
+               hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
+
+       if (ret == H_NOT_ENOUGH_RESOURCES)
+               ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+       return ret;
+}
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+                     const u8 port_id,
+                     struct hipz_query_port *query_port_response_block)
+{
+       u64 ret;
+       u64 r_cb = virt_to_abs(query_port_response_block);
+
+       if (r_cb & (EHCA_PAGESIZE-1)) {
+               ehca_gen_err("response block not page aligned");
+               return H_PARAMETER;
+       }
+
+       ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
+                                     adapter_handle.handle, /* r4 */
+                                     port_id,               /* r5 */
+                                     r_cb,                  /* r6 */
+                                     0, 0, 0, 0);
+
+       if (ehca_debug_level)
+               ehca_dmp(query_port_response_block, 64, "response_block");
+
+       return ret;
+}
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+                    struct hipz_query_hca *query_hca_rblock)
+{
+       u64 r_cb = virt_to_abs(query_hca_rblock);
+
+       if (r_cb & (EHCA_PAGESIZE-1)) {
+               ehca_gen_err("response_block=%p not page aligned",
+                            query_hca_rblock);
+               return H_PARAMETER;
+       }
+
+       return ehca_plpar_hcall_norets(H_QUERY_HCA,
+                                      adapter_handle.handle, /* r4 */
+                                      r_cb,                  /* r5 */
+                                      0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+                         const u8 pagesize,
+                         const u8 queue_type,
+                         const u64 resource_handle,
+                         const u64 logical_address_of_page,
+                         u64 count)
+{
+       return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
+                                      adapter_handle.handle,      /* r4  */
+                                      queue_type | pagesize << 8, /* r5  */
+                                      resource_handle,            /* r6  */
+                                      logical_address_of_page,    /* r7  */
+                                      count,                      /* r8  */
+                                      0, 0);
+}
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+                            const struct ipz_eq_handle eq_handle,
+                            struct ehca_pfeq *pfeq,
+                            const u8 pagesize,
+                            const u8 queue_type,
+                            const u64 logical_address_of_page,
+                            const u64 count)
+{
+       if (count != 1) {
+               ehca_gen_err("Ppage counter=%lx", count);
+               return H_PARAMETER;
+       }
+       return hipz_h_register_rpage(adapter_handle,
+                                    pagesize,
+                                    queue_type,
+                                    eq_handle.handle,
+                                    logical_address_of_page, count);
+}
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
+                          u32 ist)
+{
+       u64 ret;
+       ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
+                                     adapter_handle.handle, /* r4 */
+                                     ist,                   /* r5 */
+                                     0, 0, 0, 0, 0);
+
+       if (ret != H_SUCCESS && ret != H_BUSY)
+               ehca_gen_err("Could not query interrupt state.");
+
+       return ret;
+}
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+                            const struct ipz_cq_handle cq_handle,
+                            struct ehca_pfcq *pfcq,
+                            const u8 pagesize,
+                            const u8 queue_type,
+                            const u64 logical_address_of_page,
+                            const u64 count,
+                            const struct h_galpa gal)
+{
+       if (count != 1) {
+               ehca_gen_err("Page counter=%lx", count);
+               return H_PARAMETER;
+       }
+
+       return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
+                                    cq_handle.handle, logical_address_of_page,
+                                    count);
+}
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+                            const struct ipz_qp_handle qp_handle,
+                            struct ehca_pfqp *pfqp,
+                            const u8 pagesize,
+                            const u8 queue_type,
+                            const u64 logical_address_of_page,
+                            const u64 count,
+                            const struct h_galpa galpa)
+{
+       if (count != 1) {
+               ehca_gen_err("Page counter=%lx", count);
+               return H_PARAMETER;
+       }
+
+       return hipz_h_register_rpage(adapter_handle,pagesize,queue_type,
+                                    qp_handle.handle,logical_address_of_page,
+                                    count);
+}
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+                              const struct ipz_qp_handle qp_handle,
+                              struct ehca_pfqp *pfqp,
+                              void **log_addr_next_sq_wqe2processed,
+                              void **log_addr_next_rq_wqe2processed,
+                              int dis_and_get_function_code)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+                               adapter_handle.handle,     /* r4 */
+                               dis_and_get_function_code, /* r5 */
+                               qp_handle.handle,          /* r6 */
+                               0, 0, 0, 0, 0, 0);
+       if (log_addr_next_sq_wqe2processed)
+               *log_addr_next_sq_wqe2processed = (void*)outs[0];
+       if (log_addr_next_rq_wqe2processed)
+               *log_addr_next_rq_wqe2processed = (void*)outs[1];
+
+       return ret;
+}
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+                    const struct ipz_qp_handle qp_handle,
+                    struct ehca_pfqp *pfqp,
+                    const u64 update_mask,
+                    struct hcp_modify_qp_control_block *mqpcb,
+                    struct h_galpa gal)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+       ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
+                               adapter_handle.handle, /* r4 */
+                               qp_handle.handle,      /* r5 */
+                               update_mask,           /* r6 */
+                               virt_to_abs(mqpcb),    /* r7 */
+                               0, 0, 0, 0, 0);
+
+       if (ret == H_NOT_ENOUGH_RESOURCES)
+               ehca_gen_err("Insufficient resources ret=%lx", ret);
+
+       return ret;
+}
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+                   const struct ipz_qp_handle qp_handle,
+                   struct ehca_pfqp *pfqp,
+                   struct hcp_modify_qp_control_block *qqpcb,
+                   struct h_galpa gal)
+{
+       return ehca_plpar_hcall_norets(H_QUERY_QP,
+                                      adapter_handle.handle, /* r4 */
+                                      qp_handle.handle,      /* r5 */
+                                      virt_to_abs(qqpcb),    /* r6 */
+                                      0, 0, 0, 0);
+}
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+                     struct ehca_qp *qp)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = hcp_galpas_dtor(&qp->galpas);
+       if (ret) {
+               ehca_gen_err("Could not destruct qp->galpas");
+               return H_RESOURCE;
+       }
+       ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+                               adapter_handle.handle,     /* r4 */
+                               /* function code */
+                               1,                         /* r5 */
+                               qp->ipz_qp_handle.handle,  /* r6 */
+                               0, 0, 0, 0, 0, 0);
+       if (ret == H_HARDWARE)
+               ehca_gen_err("HCA not operational. ret=%lx", ret);
+
+       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+                                     adapter_handle.handle,     /* r4 */
+                                     qp->ipz_qp_handle.handle,  /* r5 */
+                                     0, 0, 0, 0, 0);
+
+       if (ret == H_RESOURCE)
+               ehca_gen_err("Resource still in use. ret=%lx", ret);
+
+       return ret;
+}
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+                      const struct ipz_qp_handle qp_handle,
+                      struct h_galpa gal,
+                      u32 port)
+{
+       return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
+                                      adapter_handle.handle, /* r4 */
+                                      qp_handle.handle,      /* r5 */
+                                      port,                  /* r6 */
+                                      0, 0, 0, 0);
+}
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+                      const struct ipz_qp_handle qp_handle,
+                      struct h_galpa gal,
+                      u32 port, u32 * pma_qp_nr,
+                      u32 * bma_qp_nr)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
+                               adapter_handle.handle, /* r4 */
+                               qp_handle.handle,      /* r5 */
+                               port,                  /* r6 */
+                               0, 0, 0, 0, 0, 0);
+       *pma_qp_nr = (u32)outs[0];
+       *bma_qp_nr = (u32)outs[1];
+
+       if (ret == H_ALIAS_EXIST)
+               ehca_gen_err("AQP1 already exists. ret=%lx", ret);
+
+       return ret;
+}
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+                      const struct ipz_qp_handle qp_handle,
+                      struct h_galpa gal,
+                      u16 mcg_dlid,
+                      u64 subnet_prefix, u64 interface_id)
+{
+       u64 ret;
+
+       ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
+                                     adapter_handle.handle,  /* r4 */
+                                     qp_handle.handle,       /* r5 */
+                                     mcg_dlid,               /* r6 */
+                                     interface_id,           /* r7 */
+                                     subnet_prefix,          /* r8 */
+                                     0, 0);
+
+       if (ret == H_NOT_ENOUGH_RESOURCES)
+               ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+       return ret;
+}
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+                      const struct ipz_qp_handle qp_handle,
+                      struct h_galpa gal,
+                      u16 mcg_dlid,
+                      u64 subnet_prefix, u64 interface_id)
+{
+       return ehca_plpar_hcall_norets(H_DETACH_MCQP,
+                                      adapter_handle.handle, /* r4 */
+                                      qp_handle.handle,      /* r5 */
+                                      mcg_dlid,              /* r6 */
+                                      interface_id,          /* r7 */
+                                      subnet_prefix,         /* r8 */
+                                      0, 0);
+}
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+                     struct ehca_cq *cq,
+                     u8 force_flag)
+{
+       u64 ret;
+
+       ret = hcp_galpas_dtor(&cq->galpas);
+       if (ret) {
+               ehca_gen_err("Could not destruct cp->galpas");
+               return H_RESOURCE;
+       }
+
+       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+                                     adapter_handle.handle,     /* r4 */
+                                     cq->ipz_cq_handle.handle,  /* r5 */
+                                     force_flag != 0 ? 1L : 0L, /* r6 */
+                                     0, 0, 0, 0);
+
+       if (ret == H_RESOURCE)
+               ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
+
+       return ret;
+}
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+                     struct ehca_eq *eq)
+{
+       u64 ret;
+
+       ret = hcp_galpas_dtor(&eq->galpas);
+       if (ret) {
+               ehca_gen_err("Could not destruct eq->galpas");
+               return H_RESOURCE;
+       }
+
+       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+                                     adapter_handle.handle,     /* r4 */
+                                     eq->ipz_eq_handle.handle,  /* r5 */
+                                     0, 0, 0, 0, 0);
+
+       if (ret == H_RESOURCE)
+               ehca_gen_err("Resource in use. ret=%lx ", ret);
+
+       return ret;
+}
+
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+                            const struct ehca_mr *mr,
+                            const u64 vaddr,
+                            const u64 length,
+                            const u32 access_ctrl,
+                            const struct ipz_pd pd,
+                            struct ehca_mr_hipzout_parms *outparms)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+                               adapter_handle.handle,            /* r4 */
+                               5,                                /* r5 */
+                               vaddr,                            /* r6 */
+                               length,                           /* r7 */
+                               (((u64)access_ctrl) << 32ULL),    /* r8 */
+                               pd.value,                         /* r9 */
+                               0, 0, 0);
+       outparms->handle.handle = outs[0];
+       outparms->lkey = (u32)outs[2];
+       outparms->rkey = (u32)outs[3];
+
+       return ret;
+}
+
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+                            const struct ehca_mr *mr,
+                            const u8 pagesize,
+                            const u8 queue_type,
+                            const u64 logical_address_of_page,
+                            const u64 count)
+{
+       u64 ret;
+
+       if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
+               ehca_gen_err("logical_address_of_page not on a 4k boundary "
+                            "adapter_handle=%lx mr=%p mr_handle=%lx "
+                            "pagesize=%x queue_type=%x "
+                            "logical_address_of_page=%lx count=%lx",
+                            adapter_handle.handle, mr,
+                            mr->ipz_mr_handle.handle, pagesize, queue_type,
+                            logical_address_of_page, count);
+               ret = H_PARAMETER;
+       } else
+               ret = hipz_h_register_rpage(adapter_handle, pagesize,
+                                           queue_type,
+                                           mr->ipz_mr_handle.handle,
+                                           logical_address_of_page, count);
+       return ret;
+}
+
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+                   const struct ehca_mr *mr,
+                   struct ehca_mr_hipzout_parms *outparms)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
+                               adapter_handle.handle,     /* r4 */
+                               mr->ipz_mr_handle.handle,  /* r5 */
+                               0, 0, 0, 0, 0, 0, 0);
+       outparms->len = outs[0];
+       outparms->vaddr = outs[1];
+       outparms->acl  = outs[4] >> 32;
+       outparms->lkey = (u32)(outs[5] >> 32);
+       outparms->rkey = (u32)(outs[5] & (0xffffffff));
+
+       return ret;
+}
+
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+                           const struct ehca_mr *mr)
+{
+       return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+                                      adapter_handle.handle,    /* r4 */
+                                      mr->ipz_mr_handle.handle, /* r5 */
+                                      0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+                         const struct ehca_mr *mr,
+                         const u64 vaddr_in,
+                         const u64 length,
+                         const u32 access_ctrl,
+                         const struct ipz_pd pd,
+                         const u64 mr_addr_cb,
+                         struct ehca_mr_hipzout_parms *outparms)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
+                               adapter_handle.handle,    /* r4 */
+                               mr->ipz_mr_handle.handle, /* r5 */
+                               vaddr_in,                 /* r6 */
+                               length,                   /* r7 */
+                               /* r8 */
+                               ((((u64)access_ctrl) << 32ULL) | pd.value),
+                               mr_addr_cb,               /* r9 */
+                               0, 0, 0);
+       outparms->vaddr = outs[1];
+       outparms->lkey = (u32)outs[2];
+       outparms->rkey = (u32)outs[3];
+
+       return ret;
+}
+
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+                       const struct ehca_mr *mr,
+                       const struct ehca_mr *orig_mr,
+                       const u64 vaddr_in,
+                       const u32 access_ctrl,
+                       const struct ipz_pd pd,
+                       struct ehca_mr_hipzout_parms *outparms)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
+                               adapter_handle.handle,            /* r4 */
+                               orig_mr->ipz_mr_handle.handle,    /* r5 */
+                               vaddr_in,                         /* r6 */
+                               (((u64)access_ctrl) << 32ULL),    /* r7 */
+                               pd.value,                         /* r8 */
+                               0, 0, 0, 0);
+       outparms->handle.handle = outs[0];
+       outparms->lkey = (u32)outs[2];
+       outparms->rkey = (u32)outs[3];
+
+       return ret;
+}
+
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+                            const struct ehca_mw *mw,
+                            const struct ipz_pd pd,
+                            struct ehca_mw_hipzout_parms *outparms)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+                               adapter_handle.handle,      /* r4 */
+                               6,                          /* r5 */
+                               pd.value,                   /* r6 */
+                               0, 0, 0, 0, 0, 0);
+       outparms->handle.handle = outs[0];
+       outparms->rkey = (u32)outs[3];
+
+       return ret;
+}
+
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+                   const struct ehca_mw *mw,
+                   struct ehca_mw_hipzout_parms *outparms)
+{
+       u64 ret;
+       u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+       ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
+                               adapter_handle.handle,    /* r4 */
+                               mw->ipz_mw_handle.handle, /* r5 */
+                               0, 0, 0, 0, 0, 0, 0);
+       outparms->rkey = (u32)outs[3];
+
+       return ret;
+}
+
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+                           const struct ehca_mw *mw)
+{
+       return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+                                      adapter_handle.handle,    /* r4 */
+                                      mw->ipz_mw_handle.handle, /* r5 */
+                                      0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+                     const u64 ressource_handle,
+                     void *rblock,
+                     unsigned long *byte_count)
+{
+       u64 r_cb = virt_to_abs(rblock);
+
+       if (r_cb & (EHCA_PAGESIZE-1)) {
+               ehca_gen_err("rblock not page aligned.");
+               return H_PARAMETER;
+       }
+
+       return ehca_plpar_hcall_norets(H_ERROR_DATA,
+                                      adapter_handle.handle,
+                                      ressource_handle,
+                                      r_cb,
+                                      0, 0, 0, 0);
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
new file mode 100644 (file)
index 0000000..587ebd4
--- /dev/null
@@ -0,0 +1,261 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_IF_H__
+#define __HCP_IF_H__
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "hipz_hw.h"
+
+/*
+ * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize
+ * resources, create the empty EQPT (ring).
+ */
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+                            struct ehca_pfeq *pfeq,
+                            const u32 neq_control,
+                            const u32 number_of_entries,
+                            struct ipz_eq_handle *eq_handle,
+                            u32 * act_nr_of_entries,
+                            u32 * act_pages,
+                            u32 * eq_ist);
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+                      struct ipz_eq_handle eq_handle,
+                      const u64 event_mask);
+/*
+ * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
+ * resources, create the empty CQPT (ring).
+ */
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+                            struct ehca_cq *cq,
+                            struct ehca_alloc_cq_parms *param);
+
+
+/*
+ * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
+ * initialize resources, create empty QPPTs (2 rings).
+ */
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+                            struct ehca_qp *qp,
+                            struct ehca_alloc_qp_parms *parms);
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+                     const u8 port_id,
+                     struct hipz_query_port *query_port_response_block);
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+                    struct hipz_query_hca *query_hca_rblock);
+
+/*
+ * hipz_h_register_rpage internal function in hcp_if.h for all
+ * hcp_H_REGISTER_RPAGE calls.
+ */
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+                         const u8 pagesize,
+                         const u8 queue_type,
+                         const u64 resource_handle,
+                         const u64 logical_address_of_page,
+                         u64 count);
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+                            const struct ipz_eq_handle eq_handle,
+                            struct ehca_pfeq *pfeq,
+                            const u8 pagesize,
+                            const u8 queue_type,
+                            const u64 logical_address_of_page,
+                            const u64 count);
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle
+                          hcp_adapter_handle,
+                          u32 ist);
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+                            const struct ipz_cq_handle cq_handle,
+                            struct ehca_pfcq *pfcq,
+                            const u8 pagesize,
+                            const u8 queue_type,
+                            const u64 logical_address_of_page,
+                            const u64 count,
+                            const struct h_galpa gal);
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+                            const struct ipz_qp_handle qp_handle,
+                            struct ehca_pfqp *pfqp,
+                            const u8 pagesize,
+                            const u8 queue_type,
+                            const u64 logical_address_of_page,
+                            const u64 count,
+                            const struct h_galpa galpa);
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+                              const struct ipz_qp_handle qp_handle,
+                              struct ehca_pfqp *pfqp,
+                              void **log_addr_next_sq_wqe_tb_processed,
+                              void **log_addr_next_rq_wqe_tb_processed,
+                              int dis_and_get_function_code);
+enum hcall_sigt {
+       HCALL_SIGT_NO_CQE = 0,
+       HCALL_SIGT_BY_WQE = 1,
+       HCALL_SIGT_EVERY = 2
+};
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+                    const struct ipz_qp_handle qp_handle,
+                    struct ehca_pfqp *pfqp,
+                    const u64 update_mask,
+                    struct hcp_modify_qp_control_block *mqpcb,
+                    struct h_galpa gal);
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+                   const struct ipz_qp_handle qp_handle,
+                   struct ehca_pfqp *pfqp,
+                   struct hcp_modify_qp_control_block *qqpcb,
+                   struct h_galpa gal);
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+                     struct ehca_qp *qp);
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+                      const struct ipz_qp_handle qp_handle,
+                      struct h_galpa gal,
+                      u32 port);
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+                      const struct ipz_qp_handle qp_handle,
+                      struct h_galpa gal,
+                      u32 port, u32 * pma_qp_nr,
+                      u32 * bma_qp_nr);
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+                      const struct ipz_qp_handle qp_handle,
+                      struct h_galpa gal,
+                      u16 mcg_dlid,
+                      u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+                      const struct ipz_qp_handle qp_handle,
+                      struct h_galpa gal,
+                      u16 mcg_dlid,
+                      u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+                     struct ehca_cq *cq,
+                     u8 force_flag);
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+                     struct ehca_eq *eq);
+
+/*
+ * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+                            const struct ehca_mr *mr,
+                            const u64 vaddr,
+                            const u64 length,
+                            const u32 access_ctrl,
+                            const struct ipz_pd pd,
+                            struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+                            const struct ehca_mr *mr,
+                            const u8 pagesize,
+                            const u8 queue_type,
+                            const u64 logical_address_of_page,
+                            const u64 count);
+
+/* hipz_h_query_mr queries MR in HW and FW */
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+                   const struct ehca_mr *mr,
+                   struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mr frees MR resources in HW and FW */
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+                           const struct ehca_mr *mr);
+
+/* hipz_h_reregister_pmr reregisters MR in HW and FW */
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+                         const struct ehca_mr *mr,
+                         const u64 vaddr_in,
+                         const u64 length,
+                         const u32 access_ctrl,
+                         const struct ipz_pd pd,
+                         const u64 mr_addr_cb,
+                         struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_smr register shared MR in HW and FW */
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+                       const struct ehca_mr *mr,
+                       const struct ehca_mr *orig_mr,
+                       const u64 vaddr_in,
+                       const u32 access_ctrl,
+                       const struct ipz_pd pd,
+                       struct ehca_mr_hipzout_parms *outparms);
+
+/*
+ * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+                            const struct ehca_mw *mw,
+                            const struct ipz_pd pd,
+                            struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_query_mw queries MW in HW and FW */
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+                   const struct ehca_mw *mw,
+                   struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mw frees MW resources in HW and FW */
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+                           const struct ehca_mw *mw);
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+                     const u64 ressource_handle,
+                     void *rblock,
+                     unsigned long *byte_count);
+
+#endif /* __HCP_IF_H__ */
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
new file mode 100644 (file)
index 0000000..0b1a477
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *   load store abstraction for ehca register access with tracing
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+int hcall_map_page(u64 physaddr, u64 *mapaddr)
+{
+       *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
+       return 0;
+}
+
+int hcall_unmap_page(u64 mapaddr)
+{
+       iounmap((volatile void __iomem*)mapaddr);
+       return 0;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+                   u64 paddr_kernel, u64 paddr_user)
+{
+       int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
+       if (ret)
+               return ret;
+
+       galpas->user.fw_handle = paddr_user;
+
+       return 0;
+}
+
+int hcp_galpas_dtor(struct h_galpas *galpas)
+{
+       if (galpas->kernel.fw_handle) {
+               int ret = hcall_unmap_page(galpas->kernel.fw_handle);
+               if (ret)
+                       return ret;
+       }
+
+       galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
new file mode 100644 (file)
index 0000000..5305c2a
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware calls
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_PHYP_H__
+#define __HCP_PHYP_H__
+
+
+/*
+ * eHCA page (mapped into memory)
+ * resource to access eHCA register pages in CPU address space
+*/
+struct h_galpa {
+       u64 fw_handle;
+       /* for pSeries this is a 64bit memory address where
+          I/O memory is mapped into CPU address space (kv) */
+};
+
+/*
+ * resource to access eHCA address space registers, all types
+ */
+struct h_galpas {
+       u32 pid;                /*PID of userspace galpa checking */
+       struct h_galpa user;    /* user space accessible resource,
+                                  set to 0 if unused */
+       struct h_galpa kernel;  /* kernel space accessible resource,
+                                  set to 0 if unused */
+};
+
+static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
+{
+       u64 addr = galpa.fw_handle + offset;
+       return *(volatile u64 __force *)addr;
+}
+
+static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
+{
+       u64 addr = galpa.fw_handle + offset;
+       *(volatile u64 __force *)addr = value;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+                   u64 paddr_kernel, u64 paddr_user);
+
+int hcp_galpas_dtor(struct h_galpas *galpas);
+
+int hcall_map_page(u64 physaddr, u64 * mapaddr);
+
+int hcall_unmap_page(u64 mapaddr);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h
new file mode 100644 (file)
index 0000000..9dac93d
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_H__
+#define __HIPZ_FNS_H__
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+#include "hipz_fns_core.h"
+
+#define hipz_galpa_store_eq(gal, offset, value) \
+       hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_eq(gal, offset) \
+       hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qped(gal, offset, value) \
+       hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_qped(gal, offset) \
+       hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
+
+#define hipz_galpa_store_mrmw(gal, offset, value) \
+       hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_mrmw(gal, offset) \
+       hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h
new file mode 100644 (file)
index 0000000..20898a1
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_CORE_H__
+#define __HIPZ_FNS_CORE_H__
+
+#include "hcp_phyp.h"
+#include "hipz_hw.h"
+
+#define hipz_galpa_store_cq(gal, offset, value) \
+       hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_cq(gal, offset) \
+       hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qp(gal,offset, value) \
+       hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
+#define hipz_galpa_load_qp(gal, offset) \
+       hipz_galpa_load(gal,QPTEMM_OFFSET(offset))
+
+static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+       /*  ringing doorbell :-) */
+       hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
+                           EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
+}
+
+static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+       /*  ringing doorbell :-) */
+       hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
+                           EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
+}
+
+static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
+{
+       hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
+                           EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
+}
+
+static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
+{
+       u64 cqx_n0_reg;
+
+       hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
+                           EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
+                                          value));
+       cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
+}
+
+static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
+{
+       u64 cqx_n1_reg;
+
+       hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
+                           EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
+       cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
+}
+
+#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
new file mode 100644 (file)
index 0000000..3fc92b0
--- /dev/null
@@ -0,0 +1,388 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  eHCA register definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_HW_H__
+#define __HIPZ_HW_H__
+
+#include "ehca_tools.h"
+
+/* QP Table Entry Memory Map */
+struct hipz_qptemm {
+       u64 qpx_hcr;
+       u64 qpx_c;
+       u64 qpx_herr;
+       u64 qpx_aer;
+/* 0x20*/
+       u64 qpx_sqa;
+       u64 qpx_sqc;
+       u64 qpx_rqa;
+       u64 qpx_rqc;
+/* 0x40*/
+       u64 qpx_st;
+       u64 qpx_pmstate;
+       u64 qpx_pmfa;
+       u64 qpx_pkey;
+/* 0x60*/
+       u64 qpx_pkeya;
+       u64 qpx_pkeyb;
+       u64 qpx_pkeyc;
+       u64 qpx_pkeyd;
+/* 0x80*/
+       u64 qpx_qkey;
+       u64 qpx_dqp;
+       u64 qpx_dlidp;
+       u64 qpx_portp;
+/* 0xa0*/
+       u64 qpx_slidp;
+       u64 qpx_slidpp;
+       u64 qpx_dlida;
+       u64 qpx_porta;
+/* 0xc0*/
+       u64 qpx_slida;
+       u64 qpx_slidpa;
+       u64 qpx_slvl;
+       u64 qpx_ipd;
+/* 0xe0*/
+       u64 qpx_mtu;
+       u64 qpx_lato;
+       u64 qpx_rlimit;
+       u64 qpx_rnrlimit;
+/* 0x100*/
+       u64 qpx_t;
+       u64 qpx_sqhp;
+       u64 qpx_sqptp;
+       u64 qpx_nspsn;
+/* 0x120*/
+       u64 qpx_nspsnhwm;
+       u64 reserved1;
+       u64 qpx_sdsi;
+       u64 qpx_sdsbc;
+/* 0x140*/
+       u64 qpx_sqwsize;
+       u64 qpx_sqwts;
+       u64 qpx_lsn;
+       u64 qpx_nssn;
+/* 0x160 */
+       u64 qpx_mor;
+       u64 qpx_cor;
+       u64 qpx_sqsize;
+       u64 qpx_erc;
+/* 0x180*/
+       u64 qpx_rnrrc;
+       u64 qpx_ernrwt;
+       u64 qpx_rnrresp;
+       u64 qpx_lmsna;
+/* 0x1a0 */
+       u64 qpx_sqhpc;
+       u64 qpx_sqcptp;
+       u64 qpx_sigt;
+       u64 qpx_wqecnt;
+/* 0x1c0*/
+       u64 qpx_rqhp;
+       u64 qpx_rqptp;
+       u64 qpx_rqsize;
+       u64 qpx_nrr;
+/* 0x1e0*/
+       u64 qpx_rdmac;
+       u64 qpx_nrpsn;
+       u64 qpx_lapsn;
+       u64 qpx_lcr;
+/* 0x200*/
+       u64 qpx_rwc;
+       u64 qpx_rwva;
+       u64 qpx_rdsi;
+       u64 qpx_rdsbc;
+/* 0x220*/
+       u64 qpx_rqwsize;
+       u64 qpx_crmsn;
+       u64 qpx_rdd;
+       u64 qpx_larpsn;
+/* 0x240*/
+       u64 qpx_pd;
+       u64 qpx_scqn;
+       u64 qpx_rcqn;
+       u64 qpx_aeqn;
+/* 0x260*/
+       u64 qpx_aaelog;
+       u64 qpx_ram;
+       u64 qpx_rdmaqe0;
+       u64 qpx_rdmaqe1;
+/* 0x280*/
+       u64 qpx_rdmaqe2;
+       u64 qpx_rdmaqe3;
+       u64 qpx_nrpsnhwm;
+/* 0x298*/
+       u64 reserved[(0x400 - 0x298) / 8];
+/* 0x400 extended data */
+       u64 reserved_ext[(0x500 - 0x400) / 8];
+/* 0x500 */
+       u64 reserved2[(0x1000 - 0x500) / 8];
+/* 0x1000      */
+};
+
+#define QPX_SQADDER EHCA_BMASK_IBM(48,63)
+#define QPX_RQADDER EHCA_BMASK_IBM(48,63)
+
+#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
+
+/* MRMWPT Entry Memory Map */
+struct hipz_mrmwmm {
+       /* 0x00 */
+       u64 mrx_hcr;
+
+       u64 mrx_c;
+       u64 mrx_herr;
+       u64 mrx_aer;
+       /* 0x20 */
+       u64 mrx_pp;
+       u64 reserved1;
+       u64 reserved2;
+       u64 reserved3;
+       /* 0x40 */
+       u64 reserved4[(0x200 - 0x40) / 8];
+       /* 0x200 */
+       u64 mrx_ctl[64];
+
+};
+
+#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x)
+
+struct hipz_qpedmm {
+       /* 0x00 */
+       u64 reserved0[(0x400) / 8];
+       /* 0x400 */
+       u64 qpedx_phh;
+       u64 qpedx_ppsgp;
+       /* 0x410 */
+       u64 qpedx_ppsgu;
+       u64 qpedx_ppdgp;
+       /* 0x420 */
+       u64 qpedx_ppdgu;
+       u64 qpedx_aph;
+       /* 0x430 */
+       u64 qpedx_apsgp;
+       u64 qpedx_apsgu;
+       /* 0x440 */
+       u64 qpedx_apdgp;
+       u64 qpedx_apdgu;
+       /* 0x450 */
+       u64 qpedx_apav;
+       u64 qpedx_apsav;
+       /* 0x460  */
+       u64 qpedx_hcr;
+       u64 reserved1[4];
+       /* 0x488 */
+       u64 qpedx_rrl0;
+       /* 0x490 */
+       u64 qpedx_rrrkey0;
+       u64 qpedx_rrva0;
+       /* 0x4a0 */
+       u64 reserved2;
+       u64 qpedx_rrl1;
+       /* 0x4b0 */
+       u64 qpedx_rrrkey1;
+       u64 qpedx_rrva1;
+       /* 0x4c0 */
+       u64 reserved3;
+       u64 qpedx_rrl2;
+       /* 0x4d0 */
+       u64 qpedx_rrrkey2;
+       u64 qpedx_rrva2;
+       /* 0x4e0 */
+       u64 reserved4;
+       u64 qpedx_rrl3;
+       /* 0x4f0 */
+       u64 qpedx_rrrkey3;
+       u64 qpedx_rrva3;
+};
+
+#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x)
+
+/* CQ Table Entry Memory Map */
+struct hipz_cqtemm {
+       u64 cqx_hcr;
+       u64 cqx_c;
+       u64 cqx_herr;
+       u64 cqx_aer;
+/* 0x20  */
+       u64 cqx_ptp;
+       u64 cqx_tp;
+       u64 cqx_fec;
+       u64 cqx_feca;
+/* 0x40  */
+       u64 cqx_ep;
+       u64 cqx_eq;
+/* 0x50  */
+       u64 reserved1;
+       u64 cqx_n0;
+/* 0x60  */
+       u64 cqx_n1;
+       u64 reserved2[(0x1000 - 0x60) / 8];
+/* 0x1000 */
+};
+
+#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32,63)
+#define CQX_FECADDER              EHCA_BMASK_IBM(32,63)
+#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0)
+#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0)
+
+#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x)
+
+/* EQ Table Entry Memory Map */
+struct hipz_eqtemm {
+       u64 eqx_hcr;
+       u64 eqx_c;
+
+       u64 eqx_herr;
+       u64 eqx_aer;
+/* 0x20 */
+       u64 eqx_ptp;
+       u64 eqx_tp;
+       u64 eqx_ssba;
+       u64 eqx_psba;
+
+/* 0x40 */
+       u64 eqx_cec;
+       u64 eqx_meql;
+       u64 eqx_xisbi;
+       u64 eqx_xisc;
+/* 0x60 */
+       u64 eqx_it;
+
+};
+
+#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x)
+
+/* access control defines for MR/MW */
+#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
+#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
+#define HIPZ_ACCESSCTRL_R_READ   0x00200000
+#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
+#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
+
+/* query hca response block */
+struct hipz_query_hca {
+       u32 cur_reliable_dg;
+       u32 cur_qp;
+       u32 cur_cq;
+       u32 cur_eq;
+       u32 cur_mr;
+       u32 cur_mw;
+       u32 cur_ee_context;
+       u32 cur_mcast_grp;
+       u32 cur_qp_attached_mcast_grp;
+       u32 reserved1;
+       u32 cur_ipv6_qp;
+       u32 cur_eth_qp;
+       u32 cur_hp_mr;
+       u32 reserved2[3];
+       u32 max_rd_domain;
+       u32 max_qp;
+       u32 max_cq;
+       u32 max_eq;
+       u32 max_mr;
+       u32 max_hp_mr;
+       u32 max_mw;
+       u32 max_mrwpte;
+       u32 max_special_mrwpte;
+       u32 max_rd_ee_context;
+       u32 max_mcast_grp;
+       u32 max_total_mcast_qp_attach;
+       u32 max_mcast_qp_attach;
+       u32 max_raw_ipv6_qp;
+       u32 max_raw_ethy_qp;
+       u32 internal_clock_frequency;
+       u32 max_pd;
+       u32 max_ah;
+       u32 max_cqe;
+       u32 max_wqes_wq;
+       u32 max_partitions;
+       u32 max_rr_ee_context;
+       u32 max_rr_qp;
+       u32 max_rr_hca;
+       u32 max_act_wqs_ee_context;
+       u32 max_act_wqs_qp;
+       u32 max_sge;
+       u32 max_sge_rd;
+       u32 memory_page_size_supported;
+       u64 max_mr_size;
+       u32 local_ca_ack_delay;
+       u32 num_ports;
+       u32 vendor_id;
+       u32 vendor_part_id;
+       u32 hw_ver;
+       u64 node_guid;
+       u64 hca_cap_indicators;
+       u32 data_counter_register_size;
+       u32 max_shared_rq;
+       u32 max_isns_eq;
+       u32 max_neq;
+} __attribute__ ((packed));
+
+/* query port response block */
+struct hipz_query_port {
+       u32 state;
+       u32 bad_pkey_cntr;
+       u32 lmc;
+       u32 lid;
+       u32 subnet_timeout;
+       u32 qkey_viol_cntr;
+       u32 sm_sl;
+       u32 sm_lid;
+       u32 capability_mask;
+       u32 init_type_reply;
+       u32 pkey_tbl_len;
+       u32 gid_tbl_len;
+       u64 gid_prefix;
+       u32 port_nr;
+       u16 pkey_entries[16];
+       u8  reserved1[32];
+       u32 trent_size;
+       u32 trbuf_size;
+       u64 max_msg_sz;
+       u32 max_mtu;
+       u32 vl_cap;
+       u8  reserved2[1900];
+       u64 guid_entries[255];
+} __attribute__ ((packed));
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
new file mode 100644 (file)
index 0000000..e028ff1
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "ipz_pt_fn.h"
+
+void *ipz_qpageit_get_inc(struct ipz_queue *queue)
+{
+       void *ret = ipz_qeit_get(queue);
+       queue->current_q_offset += queue->pagesize;
+       if (queue->current_q_offset > queue->queue_length) {
+               queue->current_q_offset -= queue->pagesize;
+               ret = NULL;
+       }
+       if (((u64)ret) % EHCA_PAGESIZE) {
+               ehca_gen_err("ERROR!! not at PAGE-Boundary");
+               return NULL;
+       }
+       return ret;
+}
+
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
+{
+       void *ret = ipz_qeit_get(queue);
+       u64 last_entry_in_q = queue->queue_length - queue->qe_size;
+
+       queue->current_q_offset += queue->qe_size;
+       if (queue->current_q_offset > last_entry_in_q) {
+               queue->current_q_offset = 0;
+               queue->toggle_state = (~queue->toggle_state) & 1;
+       }
+
+       return ret;
+}
+
+int ipz_queue_ctor(struct ipz_queue *queue,
+                  const u32 nr_of_pages,
+                  const u32 pagesize, const u32 qe_size, const u32 nr_of_sg)
+{
+       int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+       int f;
+
+       if (pagesize > PAGE_SIZE) {
+               ehca_gen_err("FATAL ERROR: pagesize=%x is greater "
+                            "than kernel page size", pagesize);
+               return 0;
+       }
+       if (!pages_per_kpage) {
+               ehca_gen_err("FATAL ERROR: invalid kernel page size. "
+                            "pages_per_kpage=%x", pages_per_kpage);
+               return 0;
+       }
+       queue->queue_length = nr_of_pages * pagesize;
+       queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+       if (!queue->queue_pages) {
+               ehca_gen_err("ERROR!! didn't get the memory");
+               return 0;
+       }
+       memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
+       /*
+        * allocate pages for queue:
+        * outer loop allocates whole kernel pages (page aligned) and
+        * inner loop divides a kernel page into smaller hca queue pages
+        */
+       f = 0;
+       while (f < nr_of_pages) {
+               u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL);
+               int k;
+               if (!kpage)
+                       goto ipz_queue_ctor_exit0; /*NOMEM*/
+               for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) {
+                       (queue->queue_pages)[f] = (struct ipz_page *)kpage;
+                       kpage += EHCA_PAGESIZE;
+                       f++;
+               }
+       }
+
+       queue->current_q_offset = 0;
+       queue->qe_size = qe_size;
+       queue->act_nr_of_sg = nr_of_sg;
+       queue->pagesize = pagesize;
+       queue->toggle_state = 1;
+       return 1;
+
+ ipz_queue_ctor_exit0:
+       ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x",
+                    queue, f, nr_of_pages);
+       for (f = 0; f < nr_of_pages; f += pages_per_kpage) {
+               if (!(queue->queue_pages)[f])
+                       break;
+               free_page((unsigned long)(queue->queue_pages)[f]);
+       }
+       return 0;
+}
+
+int ipz_queue_dtor(struct ipz_queue *queue)
+{
+       int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+       int g;
+       int nr_pages;
+
+       if (!queue || !queue->queue_pages) {
+               ehca_gen_dbg("queue or queue_pages is NULL");
+               return 0;
+       }
+       nr_pages = queue->queue_length / queue->pagesize;
+       for (g = 0; g < nr_pages; g += pages_per_kpage)
+               free_page((unsigned long)(queue->queue_pages)[g]);
+       vfree(queue->queue_pages);
+
+       return 1;
+}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
new file mode 100644 (file)
index 0000000..2f13509
--- /dev/null
@@ -0,0 +1,247 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IPZ_PT_FN_H__
+#define __IPZ_PT_FN_H__
+
+#define EHCA_PAGESHIFT   12
+#define EHCA_PAGESIZE   4096UL
+#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
+#define EHCA_PT_ENTRIES 512UL
+
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+
+/* struct generic ehca page */
+struct ipz_page {
+       u8 entries[EHCA_PAGESIZE];
+};
+
+/* struct generic queue in linux kernel virtual memory (kv) */
+struct ipz_queue {
+       u64 current_q_offset;   /* current queue entry */
+
+       struct ipz_page **queue_pages;  /* array of pages belonging to queue */
+       u32 qe_size;            /* queue entry size */
+       u32 act_nr_of_sg;
+       u32 queue_length;       /* queue length allocated in bytes */
+       u32 pagesize;
+       u32 toggle_state;       /* toggle flag - per page */
+       u32 dummy3;             /* 64 bit alignment */
+};
+
+/*
+ * return current Queue Entry for a certain q_offset
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
+{
+       struct ipz_page *current_page;
+       if (q_offset >= queue->queue_length)
+               return NULL;
+       current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
+       return  &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+}
+
+/*
+ * return current Queue Entry
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_get(struct ipz_queue *queue)
+{
+       return ipz_qeit_calc(queue, queue->current_q_offset);
+}
+
+/*
+ * return current Queue Page , increment Queue Page iterator from
+ * page to page in struct ipz_queue, last increment will return 0! and
+ * NOT wrap
+ * returns address (kv) of Queue Page
+ * warning don't use in parallel with ipz_QE_get_inc()
+ */
+void *ipz_qpageit_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
+{
+       void *ret = ipz_qeit_get(queue);
+       queue->current_q_offset += queue->qe_size;
+       if (queue->current_q_offset >= queue->queue_length) {
+               queue->current_q_offset = 0;
+               /* toggle the valid flag */
+               queue->toggle_state = (~queue->toggle_state) & 1;
+       }
+
+       return ret;
+}
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
+{
+       struct ehca_cqe *cqe = ipz_qeit_get(queue);
+       u32 cqe_flags = cqe->cqe_flags;
+
+       if ((cqe_flags >> 7) != (queue->toggle_state & 1))
+               return NULL;
+
+       ipz_qeit_get_inc(queue);
+       return cqe;
+}
+
+/*
+ * returns and resets Queue Entry iterator
+ * returns address (kv) of first Queue Entry
+ */
+static inline void *ipz_qeit_reset(struct ipz_queue *queue)
+{
+       queue->current_q_offset = 0;
+       return ipz_qeit_get(queue);
+}
+
+/* struct generic page table */
+struct ipz_pt {
+       u64 entries[EHCA_PT_ENTRIES];
+};
+
+/* struct page table for a queue, only to be used in pf */
+struct ipz_qpt {
+       /* queue page tables (kv), use u64 because we know the element length */
+       u64 *qpts;
+       u32 n_qpts;
+       u32 n_ptes;       /*  number of page table entries */
+       u64 *current_pte_addr;
+};
+
+/*
+ * constructor for a ipz_queue_t, placement new for ipz_queue_t,
+ * new for all dependent datastructors
+ * all QP Tables are the same
+ * flow:
+ *    allocate+pin queue
+ * see ipz_qpt_ctor()
+ * returns true if ok, false if out of memory
+ */
+int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
+                  const u32 pagesize, const u32 qe_size,
+                  const u32 nr_of_sg);
+
+/*
+ * destructor for a ipz_queue_t
+ *  -# free queue
+ *  see ipz_queue_ctor()
+ *  returns true if ok, false if queue was NULL-ptr of free failed
+ */
+int ipz_queue_dtor(struct ipz_queue *queue);
+
+/*
+ * constructor for a ipz_qpt_t,
+ * placement new for struct ipz_queue, new for all dependent datastructors
+ * all QP Tables are the same,
+ * flow:
+ * -# allocate+pin queue
+ * -# initialise ptcb
+ * -# allocate+pin PTs
+ * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
+ * -# the ring must have room for exactly nr_of_PTEs
+ * see ipz_qpt_ctor()
+ */
+void ipz_qpt_ctor(struct ipz_qpt *qpt,
+                 const u32 nr_of_qes,
+                 const u32 pagesize,
+                 const u32 qe_size,
+                 const u8 lowbyte, const u8 toggle,
+                 u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ * fix EQ page problems
+ */
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Event Queue Entry, increment Queue Entry iterator
+ * by one step in struct ipz_queue if valid, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_queue_QPageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
+{
+       void *ret = ipz_qeit_get(queue);
+       u32 qe = *(u8 *) ret;
+       if ((qe >> 7) != (queue->toggle_state & 1))
+               return NULL;
+       ipz_qeit_eq_get_inc(queue); /* this is a good one */
+       return ret;
+}
+
+/* returns address (GX) of first queue entry */
+static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
+{
+       return be64_to_cpu(qpt->qpts[0]);
+}
+
+/* returns address (kv) of first page of queue page table */
+static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
+{
+       return qpt->qpts;
+}
+
+#endif                         /* __IPZ_PT_FN_H__ */
index 1db9489f1e82bbc7ae0d10a934af5961a94397df..574a678e7fdd0a24199de80f9aab33bda82837a4 100644 (file)
@@ -1,16 +1,9 @@
-config IPATH_CORE
-       tristate "QLogic InfiniPath Driver"
-       depends on 64BIT && PCI_MSI && NET
-       ---help---
-       This is a low-level driver for QLogic InfiniPath host channel
-       adapters (HCAs) based on the HT-400 and PE-800 chips.
-
 config INFINIBAND_IPATH
-       tristate "QLogic InfiniPath Verbs Driver"
-       depends on IPATH_CORE && INFINIBAND
+       tristate "QLogic InfiniPath Driver"
+       depends on PCI_MSI && 64BIT && INFINIBAND
        ---help---
-       This is a driver that provides InfiniBand verbs support for
-       QLogic InfiniPath host channel adapters (HCAs).  This
-       allows these devices to be used with both kernel upper level
-       protocols such as IP-over-InfiniBand as well as with userspace
-       applications (in conjunction with InfiniBand userspace access).
+       This is a driver for QLogic InfiniPath host channel adapters,
+       including InfiniBand verbs support.  This driver allows these
+       devices to be used with both kernel upper level protocols such
+       as IP-over-InfiniBand as well as with userspace applications
+       (in conjunction with InfiniBand userspace access).
index b0bf72864130e03945c0da8117d713f12b5b0302..5e29cb0095e568eefd85578eac42a71dbfe34b2d 100644 (file)
@@ -1,36 +1,35 @@
 EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
        -DIPATH_KERN_TYPE=0
 
-obj-$(CONFIG_IPATH_CORE) += ipath_core.o
 obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
 
-ipath_core-y := \
+ib_ipath-y := \
+       ipath_cq.o \
        ipath_diag.o \
        ipath_driver.o \
        ipath_eeprom.o \
        ipath_file_ops.o \
        ipath_fs.o \
-       ipath_ht400.o \
+       ipath_iba6110.o \
+       ipath_iba6120.o \
        ipath_init_chip.o \
        ipath_intr.o \
-       ipath_layer.o \
-       ipath_pe800.o \
-       ipath_stats.o \
-       ipath_sysfs.o \
-       ipath_user_pages.o
-
-ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-
-ib_ipath-y := \
-       ipath_cq.o \
        ipath_keys.o \
+       ipath_layer.o \
        ipath_mad.o \
+       ipath_mmap.o \
        ipath_mr.o \
        ipath_qp.o \
        ipath_rc.o \
        ipath_ruc.o \
        ipath_srq.o \
+       ipath_stats.o \
+       ipath_sysfs.o \
        ipath_uc.o \
        ipath_ud.o \
-       ipath_verbs.o \
-       ipath_verbs_mcast.o
+       ipath_user_pages.o \
+       ipath_verbs_mcast.o \
+       ipath_verbs.o
+
+ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
index 062bd392e7e55d22e8d5bf95d057e8af2eb68c78..f577905e3acaafc3d624c04a00dca92b6f08be35 100644 (file)
@@ -106,9 +106,9 @@ struct infinipath_stats {
        __u64 sps_ether_spkts;
        /* number of "ethernet" packets received by driver */
        __u64 sps_ether_rpkts;
-       /* number of SMA packets sent by driver */
+       /* number of SMA packets sent by driver. Obsolete. */
        __u64 sps_sma_spkts;
-       /* number of SMA packets received by driver */
+       /* number of SMA packets received by driver. Obsolete. */
        __u64 sps_sma_rpkts;
        /* number of times all ports rcvhdrq was full and packet dropped */
        __u64 sps_hdrqfull;
@@ -138,7 +138,7 @@ struct infinipath_stats {
        __u64 sps_pageunlocks;
        /*
         * Number of packets dropped in kernel other than errors (ether
-        * packets if ipath not configured, sma/mad, etc.)
+        * packets if ipath not configured, etc.)
         */
        __u64 sps_krdrops;
        /* pad for future growth */
@@ -153,8 +153,6 @@ struct infinipath_stats {
 #define IPATH_STATUS_DISABLED      0x2 /* hardware disabled */
 /* Device has been disabled via admin request */
 #define IPATH_STATUS_ADMIN_DISABLED    0x4
-#define IPATH_STATUS_OIB_SMA       0x8 /* ipath_mad kernel SMA running */
-#define IPATH_STATUS_SMA          0x10 /* user SMA running */
 /* Chip has been found and initted */
 #define IPATH_STATUS_CHIP_PRESENT 0x20
 /* IB link is at ACTIVE, usable for data traffic */
@@ -465,12 +463,11 @@ struct __ipath_sendpkt {
        struct ipath_iovec sps_iov[4];
 };
 
-/* Passed into SMA special file's ->read and ->write methods. */
-struct ipath_sma_pkt
-{
-       __u32 unit;     /* unit on which to send packet */
-       __u64 data;     /* address of payload in userspace */
-       __u32 len;      /* length of payload */
+/* Passed into diag data special file's ->write method. */
+struct ipath_diag_pkt {
+       __u32 unit;
+       __u64 data;
+       __u32 len;
 };
 
 /*
index 3efee341c9bcddcf31c1401a3b38ac17c3e6f641..049221bc590e0eda2c2c743eca6da06af88a0259 100644 (file)
  * @entry: work completion entry to add
  * @sig: true if @entry is a solicitated entry
  *
- * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
+ * This may be called with qp->s_lock held.
  */
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 {
+       struct ipath_cq_wc *wc = cq->queue;
        unsigned long flags;
+       u32 head;
        u32 next;
 
        spin_lock_irqsave(&cq->lock, flags);
 
-       if (cq->head == cq->ibcq.cqe)
+       /*
+        * Note that the head pointer might be writable by user processes.
+        * Take care to verify it is a sane value.
+        */
+       head = wc->head;
+       if (head >= (unsigned) cq->ibcq.cqe) {
+               head = cq->ibcq.cqe;
                next = 0;
-       else
-               next = cq->head + 1;
-       if (unlikely(next == cq->tail)) {
+       else
+               next = head + 1;
+       if (unlikely(next == wc->tail)) {
                spin_unlock_irqrestore(&cq->lock, flags);
                if (cq->ibcq.event_handler) {
                        struct ib_event ev;
@@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
                }
                return;
        }
-       cq->queue[cq->head] = *entry;
-       cq->head = next;
+       wc->queue[head] = *entry;
+       wc->head = next;
 
        if (cq->notify == IB_CQ_NEXT_COMP ||
            (cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 {
        struct ipath_cq *cq = to_icq(ibcq);
+       struct ipath_cq_wc *wc = cq->queue;
        unsigned long flags;
        int npolled;
 
        spin_lock_irqsave(&cq->lock, flags);
 
        for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-               if (cq->tail == cq->head)
+               if (wc->tail == wc->head)
                        break;
-               *entry = cq->queue[cq->tail];
-               if (cq->tail == cq->ibcq.cqe)
-                       cq->tail = 0;
+               *entry = wc->queue[wc->tail];
+               if (wc->tail >= cq->ibcq.cqe)
+                       wc->tail = 0;
                else
-                       cq->tail++;
+                       wc->tail++;
        }
 
        spin_unlock_irqrestore(&cq->lock, flags);
@@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
 {
        struct ipath_ibdev *dev = to_idev(ibdev);
        struct ipath_cq *cq;
-       struct ib_wc *wc;
+       struct ipath_cq_wc *wc;
        struct ib_cq *ret;
 
-       if (entries > ib_ipath_max_cqes) {
+       if (entries < 1 || entries > ib_ipath_max_cqes) {
                ret = ERR_PTR(-EINVAL);
-               goto bail;
+               goto done;
        }
 
        if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
                ret = ERR_PTR(-ENOMEM);
-               goto bail;
+               goto done;
        }
 
-       /*
-        * Need to use vmalloc() if we want to support large #s of
-        * entries.
-        */
+       /* Allocate the completion queue structure. */
        cq = kmalloc(sizeof(*cq), GFP_KERNEL);
        if (!cq) {
                ret = ERR_PTR(-ENOMEM);
-               goto bail;
+               goto done;
        }
 
        /*
-        * Need to use vmalloc() if we want to support large #s of entries.
+        * Allocate the completion queue entries and head/tail pointers.
+        * This is allocated separately so that it can be resized and
+        * also mapped into user space.
+        * We need to use vmalloc() in order to support mmap and large
+        * numbers of entries.
         */
-       wc = vmalloc(sizeof(*wc) * (entries + 1));
+       wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
        if (!wc) {
-               kfree(cq);
                ret = ERR_PTR(-ENOMEM);
-               goto bail;
+               goto bail_cq;
        }
+
+       /*
+        * Return the address of the WC as the offset to mmap.
+        * See ipath_mmap() for details.
+        */
+       if (udata && udata->outlen >= sizeof(__u64)) {
+               struct ipath_mmap_info *ip;
+               __u64 offset = (__u64) wc;
+               int err;
+
+               err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+               if (err) {
+                       ret = ERR_PTR(err);
+                       goto bail_wc;
+               }
+
+               /* Allocate info for ipath_mmap(). */
+               ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+               if (!ip) {
+                       ret = ERR_PTR(-ENOMEM);
+                       goto bail_wc;
+               }
+               cq->ip = ip;
+               ip->context = context;
+               ip->obj = wc;
+               kref_init(&ip->ref);
+               ip->mmap_cnt = 0;
+               ip->size = PAGE_ALIGN(sizeof(*wc) +
+                                     sizeof(struct ib_wc) * entries);
+               spin_lock_irq(&dev->pending_lock);
+               ip->next = dev->pending_mmaps;
+               dev->pending_mmaps = ip;
+               spin_unlock_irq(&dev->pending_lock);
+       } else
+               cq->ip = NULL;
+
        /*
         * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
         * The number of entries should be >= the number requested or return
@@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
        cq->triggered = 0;
        spin_lock_init(&cq->lock);
        tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-       cq->head = 0;
-       cq->tail = 0;
+       wc->head = 0;
+       wc->tail = 0;
        cq->queue = wc;
 
        ret = &cq->ibcq;
 
        dev->n_cqs_allocated++;
+       goto done;
 
-bail:
+bail_wc:
+       vfree(wc);
+
+bail_cq:
+       kfree(cq);
+
+done:
        return ret;
 }
 
@@ -229,7 +281,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
 
        tasklet_kill(&cq->comptask);
        dev->n_cqs_allocated--;
-       vfree(cq->queue);
+       if (cq->ip)
+               kref_put(&cq->ip->ref, ipath_release_mmap_info);
+       else
+               vfree(cq->queue);
        kfree(cq);
 
        return 0;
@@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
        spin_lock_irqsave(&cq->lock, flags);
        /*
         * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-        * any other transitions.
+        * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
         */
        if (cq->notify != IB_CQ_NEXT_COMP)
                cq->notify = notify;
@@ -264,46 +319,86 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
 int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
        struct ipath_cq *cq = to_icq(ibcq);
-       struct ib_wc *wc, *old_wc;
-       u32 n;
+       struct ipath_cq_wc *old_wc = cq->queue;
+       struct ipath_cq_wc *wc;
+       u32 head, tail, n;
        int ret;
 
+       if (cqe < 1 || cqe > ib_ipath_max_cqes) {
+               ret = -EINVAL;
+               goto bail;
+       }
+
        /*
         * Need to use vmalloc() if we want to support large #s of entries.
         */
-       wc = vmalloc(sizeof(*wc) * (cqe + 1));
+       wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
        if (!wc) {
                ret = -ENOMEM;
                goto bail;
        }
 
+       /*
+        * Return the address of the WC as the offset to mmap.
+        * See ipath_mmap() for details.
+        */
+       if (udata && udata->outlen >= sizeof(__u64)) {
+               __u64 offset = (__u64) wc;
+
+               ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+               if (ret)
+                       goto bail;
+       }
+
        spin_lock_irq(&cq->lock);
-       if (cq->head < cq->tail)
-               n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
+       /*
+        * Make sure head and tail are sane since they
+        * might be user writable.
+        */
+       head = old_wc->head;
+       if (head > (u32) cq->ibcq.cqe)
+               head = (u32) cq->ibcq.cqe;
+       tail = old_wc->tail;
+       if (tail > (u32) cq->ibcq.cqe)
+               tail = (u32) cq->ibcq.cqe;
+       if (head < tail)
+               n = cq->ibcq.cqe + 1 + head - tail;
        else
-               n = cq->head - cq->tail;
+               n = head - tail;
        if (unlikely((u32)cqe < n)) {
                spin_unlock_irq(&cq->lock);
                vfree(wc);
                ret = -EOVERFLOW;
                goto bail;
        }
-       for (n = 0; cq->tail != cq->head; n++) {
-               wc[n] = cq->queue[cq->tail];
-               if (cq->tail == cq->ibcq.cqe)
-                       cq->tail = 0;
+       for (n = 0; tail != head; n++) {
+               wc->queue[n] = old_wc->queue[tail];
+               if (tail == (u32) cq->ibcq.cqe)
+                       tail = 0;
                else
-                       cq->tail++;
+                       tail++;
        }
        cq->ibcq.cqe = cqe;
-       cq->head = n;
-       cq->tail = 0;
-       old_wc = cq->queue;
+       wc->head = n;
+       wc->tail = 0;
        cq->queue = wc;
        spin_unlock_irq(&cq->lock);
 
        vfree(old_wc);
 
+       if (cq->ip) {
+               struct ipath_ibdev *dev = to_idev(ibcq->device);
+               struct ipath_mmap_info *ip = cq->ip;
+
+               ip->obj = wc;
+               ip->size = PAGE_ALIGN(sizeof(*wc) +
+                                     sizeof(struct ib_wc) * cqe);
+               spin_lock_irq(&dev->pending_lock);
+               ip->next = dev->pending_mmaps;
+               dev->pending_mmaps = ip;
+               spin_unlock_irq(&dev->pending_lock);
+       }
+
        ret = 0;
 
 bail:
index f415beda0d32bd3e2c7809468625e10e90a3f862..df69f0d80b8bf84d22da9b688b960b09809338da 100644 (file)
@@ -60,7 +60,6 @@
 #define __IPATH_USER_SEND   0x1000     /* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000     /* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000     /* print ethernet packet data */
-#define __IPATH_SMADBG      0x8000     /* sma packet debug */
 #define __IPATH_IPATHDBG    0x10000    /* Ethernet (IPATH) gen debug */
 #define __IPATH_IPATHWARN   0x20000    /* Ethernet (IPATH) warnings */
 #define __IPATH_IPATHERR    0x40000    /* Ethernet (IPATH) errors */
@@ -84,7 +83,6 @@
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG     0x0
 #define __IPATH_EPKTDBG   0x0  /* print ethernet packet data */
-#define __IPATH_SMADBG    0x0   /* process startup (init)/exit messages */
 #define __IPATH_IPATHDBG  0x0  /* Ethernet (IPATH) table dump on */
 #define __IPATH_IPATHWARN 0x0  /* Ethernet (IPATH) warnings on   */
 #define __IPATH_IPATHERR  0x0  /* Ethernet (IPATH) errors on   */
index 147dd89e21c901267c8799467f55fdfb2eb29e97..29958b6e0214a672905bab6dc61a50e7f8a021d0 100644 (file)
  * through the /sys/bus/pci resource mmap interface.
  */
 
+#include <linux/io.h>
 #include <linux/pci.h>
+#include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 int ipath_diag_inuse;
@@ -274,6 +275,158 @@ bail:
        return ret;
 }
 
+static ssize_t ipath_diagpkt_write(struct file *fp,
+                                  const char __user *data,
+                                  size_t count, loff_t *off);
+
+static struct file_operations diagpkt_file_ops = {
+       .owner = THIS_MODULE,
+       .write = ipath_diagpkt_write,
+};
+
+static struct cdev *diagpkt_cdev;
+static struct class_device *diagpkt_class_dev;
+
+int __init ipath_diagpkt_add(void)
+{
+       return ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+                              "ipath_diagpkt", &diagpkt_file_ops,
+                              &diagpkt_cdev, &diagpkt_class_dev);
+}
+
+void __exit ipath_diagpkt_remove(void)
+{
+       ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
+}
+
+/**
+ * ipath_diagpkt_write - write an IB packet
+ * @fp: the diag data device file pointer
+ * @data: ipath_diag_pkt structure saying where to get the packet
+ * @count: size of data to write
+ * @off: unused by this code
+ */
+static ssize_t ipath_diagpkt_write(struct file *fp,
+                                  const char __user *data,
+                                  size_t count, loff_t *off)
+{
+       u32 __iomem *piobuf;
+       u32 plen, clen, pbufn;
+       struct ipath_diag_pkt dp;
+       u32 *tmpbuf = NULL;
+       struct ipath_devdata *dd;
+       ssize_t ret = 0;
+       u64 val;
+
+       if (count < sizeof(dp)) {
+               ret = -EINVAL;
+               goto bail;
+       }
+
+       if (copy_from_user(&dp, data, sizeof(dp))) {
+               ret = -EFAULT;
+               goto bail;
+       }
+
+       /* send count must be an exact number of dwords */
+       if (dp.len & 3) {
+               ret = -EINVAL;
+               goto bail;
+       }
+
+       clen = dp.len >> 2;
+
+       dd = ipath_lookup(dp.unit);
+       if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
+           !dd->ipath_kregbase) {
+               ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
+                          dp.unit);
+               ret = -ENODEV;
+               goto bail;
+       }
+
+       if (ipath_diag_inuse && !diag_set_link &&
+           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+               diag_set_link = 1;
+               ipath_cdbg(VERBOSE, "Trying to set to set link active for "
+                          "diag pkt\n");
+               ipath_set_linkstate(dd, IPATH_IB_LINKARM);
+               ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
+       }
+
+       if (!(dd->ipath_flags & IPATH_INITTED)) {
+               /* no hardware, freeze, etc. */
+               ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
+               ret = -ENODEV;
+               goto bail;
+       }
+       val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
+       if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
+           val != IPATH_IBSTATE_ACTIVE) {
+               ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
+                          dd->ipath_unit, (unsigned long long) val);
+               ret = -EINVAL;
+               goto bail;
+       }
+
+       /* need total length before first word written */
+       /* +1 word is for the qword padding */
+       plen = sizeof(u32) + dp.len;
+
+       if ((plen + 4) > dd->ipath_ibmaxlen) {
+               ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
+                         plen - 4, dd->ipath_ibmaxlen);
+               ret = -EINVAL;
+               goto bail;      /* before writing pbc */
+       }
+       tmpbuf = vmalloc(plen);
+       if (!tmpbuf) {
+               dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
+                        "failing\n");
+               ret = -ENOMEM;
+               goto bail;
+       }
+
+       if (copy_from_user(tmpbuf,
+                          (const void __user *) (unsigned long) dp.data,
+                          dp.len)) {
+               ret = -EFAULT;
+               goto bail;
+       }
+
+       piobuf = ipath_getpiobuf(dd, &pbufn);
+       if (!piobuf) {
+               ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
+                          dd->ipath_unit);
+               ret = -EBUSY;
+               goto bail;
+       }
+
+       plen >>= 2;             /* in dwords */
+
+       if (ipath_debug & __IPATH_PKTDBG)
+               ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
+                          dd->ipath_unit, plen - 1, pbufn);
+
+       /* we have to flush after the PBC for correctness on some cpus
+        * or WC buffer can be written out of order */
+       writeq(plen, piobuf);
+       ipath_flush_wc();
+       /* copy all by the trigger word, then flush, so it's written
+        * to chip before trigger word, then write trigger word, then
+        * flush again, so packet is sent. */
+       __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+       ipath_flush_wc();
+       __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+       ipath_flush_wc();
+
+       ret = sizeof(dp);
+
+bail:
+       vfree(tmpbuf);
+       return ret;
+}
+
 static int ipath_diag_release(struct inode *in, struct file *fp)
 {
        mutex_lock(&ipath_mutex);
index f98518d912b5478e543024fed41c34f37266c628..2108466c7e337169649c4a64e0f9157995a9aca7 100644 (file)
@@ -39,7 +39,7 @@
 #include <linux/vmalloc.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 static void ipath_update_pio_bufs(struct ipath_devdata *);
@@ -51,8 +51,6 @@ const char *ipath_get_unit_name(int unit)
        return iname;
 }
 
-EXPORT_SYMBOL_GPL(ipath_get_unit_name);
-
 #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
 #define PFX IPATH_DRV_NAME ": "
 
@@ -60,13 +58,13 @@ EXPORT_SYMBOL_GPL(ipath_get_unit_name);
  * The size has to be longer than this string, so we can append
  * board/chip information to it in the init code.
  */
-const char ipath_core_version[] = IPATH_IDSTR "\n";
+const char ib_ipath_version[] = IPATH_IDSTR "\n";
 
 static struct idr unit_table;
 DEFINE_SPINLOCK(ipath_devs_lock);
 LIST_HEAD(ipath_dev_list);
 
-wait_queue_head_t ipath_sma_state_wait;
+wait_queue_head_t ipath_state_wait;
 
 unsigned ipath_debug = __IPATH_INFO;
 
@@ -403,10 +401,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
        /* setup the chip-specific functions, as early as possible. */
        switch (ent->device) {
        case PCI_DEVICE_ID_INFINIPATH_HT:
-               ipath_init_ht400_funcs(dd);
+               ipath_init_iba6110_funcs(dd);
                break;
        case PCI_DEVICE_ID_INFINIPATH_PE800:
-               ipath_init_pe800_funcs(dd);
+               ipath_init_iba6120_funcs(dd);
                break;
        default:
                ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -440,7 +438,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
        }
        dd->ipath_pcirev = rev;
 
+#if defined(__powerpc__)
+       /* There isn't a generic way to specify writethrough mappings */
+       dd->ipath_kregbase = __ioremap(addr, len,
+               (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
+#else
        dd->ipath_kregbase = ioremap_nocache(addr, len);
+#endif
 
        if (!dd->ipath_kregbase) {
                ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
@@ -503,7 +507,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
        ipathfs_add_device(dd);
        ipath_user_add(dd);
        ipath_diag_add(dd);
-       ipath_layer_add(dd);
+       ipath_register_ib_device(dd);
 
        goto bail;
 
@@ -532,7 +536,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
                return;
 
        dd = pci_get_drvdata(pdev);
-       ipath_layer_remove(dd);
+       ipath_unregister_ib_device(dd->verbs_dev);
        ipath_diag_remove(dd);
        ipath_user_remove(dd);
        ipathfs_remove_device(dd);
@@ -607,21 +611,23 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
  *
  * wait up to msecs milliseconds for IB link state change to occur for
  * now, take the easy polling route.  Currently used only by
- * ipath_layer_set_linkstate.  Returns 0 if state reached, otherwise
+ * ipath_set_linkstate.  Returns 0 if state reached, otherwise
  * -ETIMEDOUT state can have multiple states set, for any of several
  * transitions.
  */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
+static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
+                               int msecs)
 {
-       dd->ipath_sma_state_wanted = state;
-       wait_event_interruptible_timeout(ipath_sma_state_wait,
+       dd->ipath_state_wanted = state;
+       wait_event_interruptible_timeout(ipath_state_wait,
                                         (dd->ipath_flags & state),
                                         msecs_to_jiffies(msecs));
-       dd->ipath_sma_state_wanted = 0;
+       dd->ipath_state_wanted = 0;
 
        if (!(dd->ipath_flags & state)) {
                u64 val;
-               ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n",
+               ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
+                          " ms\n",
                           /* test INIT ahead of DOWN, both can be set */
                           (state & IPATH_LINKINIT) ? "INIT" :
                           ((state & IPATH_LINKDOWN) ? "DOWN" :
@@ -807,58 +813,6 @@ bail:
        return skb;
 }
 
-/**
- * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
- * @dd: the infinipath device
- * @etail: the sk_buff number
- * @tlen: the total packet length
- * @hdr: the ethernet header
- *
- * Separate routine for better overall optimization
- */
-static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
-                           u32 tlen, struct ether_header *hdr)
-{
-       u32 elen;
-       u8 pad, *bthbytes;
-       struct sk_buff *skb, *nskb;
-
-       if (dd->ipath_port0_skbs &&
-                       hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) {
-               /*
-                * Allocate a new sk_buff to replace the one we give
-                * to the network stack.
-                */
-               nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
-               if (!nskb) {
-                       /* count OK packets that we drop */
-                       ipath_stats.sps_krdrops++;
-                       return;
-               }
-
-               bthbytes = (u8 *) hdr->bth;
-               pad = (bthbytes[1] >> 4) & 3;
-               /* +CRC32 */
-               elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
-
-               skb = dd->ipath_port0_skbs[etail];
-               dd->ipath_port0_skbs[etail] = nskb;
-               skb_put(skb, elen);
-
-               dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
-                                   ((char __iomem *) dd->ipath_kregbase
-                                    + dd->ipath_rcvegrbase), 0,
-                                   virt_to_phys(nskb->data));
-
-               __ipath_layer_rcv(dd, hdr, skb);
-
-               /* another ether packet received */
-               ipath_stats.sps_ether_rpkts++;
-       }
-       else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP)
-               __ipath_layer_rcv_lid(dd, hdr);
-}
-
 static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
                             u32 eflags,
                             u32 l,
@@ -972,26 +926,17 @@ reloop:
                if (unlikely(eflags))
                        ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
                else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-                               int ret = __ipath_verbs_rcv(dd, rc + 1,
-                                                           ebuf, tlen);
-                               if (ret == -ENODEV)
-                                       ipath_cdbg(VERBOSE,
-                                                  "received IB packet, "
-                                                  "not SMA (QP=%x)\n", qp);
-                               if (dd->ipath_lli_counter)
-                                       dd->ipath_lli_counter--;
-
-               } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-                       if (qp == IPATH_KD_QP &&
-                           bthbytes[0] == ipath_layer_rcv_opcode &&
-                           ebuf)
-                               ipath_rcv_layer(dd, etail, tlen,
-                                               (struct ether_header *)hdr);
-                       else
-                               ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-                                          "qp=%x), len %x; ignored\n",
-                                          etype, bthbytes[0], qp, tlen);
+                       ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
+                       if (dd->ipath_lli_counter)
+                               dd->ipath_lli_counter--;
+                       ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+                                  "qp=%x), len %x; ignored\n",
+                                  etype, bthbytes[0], qp, tlen);
                }
+               else if (etype == RCVHQ_RCV_TYPE_EAGER)
+                       ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+                                  "qp=%x), len %x; ignored\n",
+                                  etype, bthbytes[0], qp, tlen);
                else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
                        ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
                                  be32_to_cpu(hdr->bth[0]) & 0xff);
@@ -1024,7 +969,8 @@ reloop:
                 */
                if (l == hdrqtail || (i && !(i&0xf))) {
                        u64 lval;
-                       if (l == hdrqtail) /* PE-800 interrupt only on last */
+                       if (l == hdrqtail)
+                               /* request IBA6120 interrupt only on last */
                                lval = dd->ipath_rhdrhead_intr_off | l;
                        else
                                lval = l;
@@ -1038,7 +984,7 @@ reloop:
        }
 
        if (!dd->ipath_rhdrhead_intr_off && !reloop) {
-               /* HT-400 workaround; we can have a race clearing chip
+               /* IBA6110 workaround; we can have a race clearing chip
                 * interrupt with another interrupt about to be delivered,
                 * and can clear it before it is delivered on the GPIO
                 * workaround.  By doing the extra check here for the
@@ -1211,7 +1157,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
  *
  * do appropriate marking as busy, etc.
  * returns buffer number if one found (>=0), negative number is error.
- * Used by ipath_sma_send_pkt and ipath_layer_send
+ * Used by ipath_layer_send
  */
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
 {
@@ -1317,13 +1263,6 @@ rescan:
                goto bail;
        }
 
-       if (updated)
-               /*
-                * ran out of bufs, now some (at least this one we just
-                * got) are now available, so tell the layered driver.
-                */
-               __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-
        /*
         * set next starting place.  Since it's just an optimization,
         * it doesn't matter who wins on this, so no locking
@@ -1500,7 +1439,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
        return ret;
 }
 
-void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 {
        static const char *what[4] = {
                [0] = "DOWN",
@@ -1511,7 +1450,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
        int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
                        INFINIPATH_IBCC_LINKCMD_MASK;
 
-       ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
+       ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
                   "is %s\n", dd->ipath_unit,
                   what[linkcmd],
                   ipath_ibcstatus_str[
@@ -1520,7 +1459,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
                            INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
                           INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
        /* flush all queued sends when going to DOWN or INIT, to be sure that
-        * they don't block SMA and other MAD packets */
+        * they don't block MAD packets */
        if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
                ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
                                 INFINIPATH_S_ABORT);
@@ -1534,6 +1473,180 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
                         dd->ipath_ibcctrl | which);
 }
 
+int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+{
+       u32 lstate;
+       int ret;
+
+       switch (newstate) {
+       case IPATH_IB_LINKDOWN:
+               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
+                                   INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+               /* don't wait */
+               ret = 0;
+               goto bail;
+
+       case IPATH_IB_LINKDOWN_SLEEP:
+               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
+                                   INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+               /* don't wait */
+               ret = 0;
+               goto bail;
+
+       case IPATH_IB_LINKDOWN_DISABLE:
+               ipath_set_ib_lstate(dd,
+                                   INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
+                                   INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+               /* don't wait */
+               ret = 0;
+               goto bail;
+
+       case IPATH_IB_LINKINIT:
+               if (dd->ipath_flags & IPATH_LINKINIT) {
+                       ret = 0;
+                       goto bail;
+               }
+               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
+                                   INFINIPATH_IBCC_LINKCMD_SHIFT);
+               lstate = IPATH_LINKINIT;
+               break;
+
+       case IPATH_IB_LINKARM:
+               if (dd->ipath_flags & IPATH_LINKARMED) {
+                       ret = 0;
+                       goto bail;
+               }
+               if (!(dd->ipath_flags &
+                     (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
+                       ret = -EINVAL;
+                       goto bail;
+               }
+               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
+                                   INFINIPATH_IBCC_LINKCMD_SHIFT);
+               /*
+                * Since the port can transition to ACTIVE by receiving
+                * a non VL 15 packet, wait for either state.
+                */
+               lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
+               break;
+
+       case IPATH_IB_LINKACTIVE:
+               if (dd->ipath_flags & IPATH_LINKACTIVE) {
+                       ret = 0;
+                       goto bail;
+               }
+               if (!(dd->ipath_flags & IPATH_LINKARMED)) {
+                       ret = -EINVAL;
+                       goto bail;
+               }
+               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
+                                   INFINIPATH_IBCC_LINKCMD_SHIFT);
+               lstate = IPATH_LINKACTIVE;
+               break;
+
+       default:
+               ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
+               ret = -EINVAL;
+               goto bail;
+       }
+       ret = ipath_wait_linkstate(dd, lstate, 2000);
+
+bail:
+       return ret;
+}
+
+/**
+ * ipath_set_mtu - set the MTU
+ * @dd: the infinipath device
+ * @arg: the new MTU
+ *
+ * we can handle "any" incoming size, the issue here is whether we
+ * need to restrict our outgoing size.   For now, we don't do any
+ * sanity checking on this, and we don't deal with what happens to
+ * programs that are already running when the size changes.
+ * NOTE: changing the MTU will usually cause the IBC to go back to
+ * link initialize (IPATH_IBSTATE_INIT) state...
+ */
+int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
+{
+       u32 piosize;
+       int changed = 0;
+       int ret;
+
+       /*
+        * mtu is IB data payload max.  It's the largest power of 2 less
+        * than piosize (or even larger, since it only really controls the
+        * largest we can receive; we can send the max of the mtu and
+        * piosize).  We check that it's one of the valid IB sizes.
+        */
+       if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
+           arg != 4096) {
+               ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
+               ret = -EINVAL;
+               goto bail;
+       }
+       if (dd->ipath_ibmtu == arg) {
+               ret = 0;        /* same as current */
+               goto bail;
+       }
+
+       piosize = dd->ipath_ibmaxlen;
+       dd->ipath_ibmtu = arg;
+
+       if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
+               /* Only if it's not the initial value (or reset to it) */
+               if (piosize != dd->ipath_init_ibmaxlen) {
+                       dd->ipath_ibmaxlen = piosize;
+                       changed = 1;
+               }
+       } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
+               piosize = arg + IPATH_PIO_MAXIBHDR;
+               ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
+                          "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
+                          arg);
+               dd->ipath_ibmaxlen = piosize;
+               changed = 1;
+       }
+
+       if (changed) {
+               /*
+                * set the IBC maxpktlength to the size of our pio
+                * buffers in words
+                */
+               u64 ibc = dd->ipath_ibcctrl;
+               ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
+                        INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
+
+               piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
+               dd->ipath_ibmaxlen = piosize;
+               piosize /= sizeof(u32); /* in words */
+               /*
+                * for ICRC, which we only send in diag test pkt mode, and
+                * we don't need to worry about that for mtu
+                */
+               piosize += 1;
+
+               ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+               dd->ipath_ibcctrl = ibc;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+                                dd->ipath_ibcctrl);
+               dd->ipath_f_tidtemplate(dd);
+       }
+
+       ret = 0;
+
+bail:
+       return ret;
+}
+
+int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+{
+       dd->ipath_lid = arg;
+       dd->ipath_lmc = lmc;
+
+       return 0;
+}
+
 /**
  * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
  * @dd: the infinipath device
@@ -1637,13 +1750,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
        ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
                            INFINIPATH_IBCC_LINKINITCMD_SHIFT);
 
-       /*
-        * we are shutting down, so tell the layered driver.  We don't do
-        * this on just a link state change, much like ethernet, a cable
-        * unplug, etc. doesn't change driver state
-        */
-       ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN);
-
        /* disable IBC */
        dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
        ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
@@ -1743,7 +1849,7 @@ static int __init infinipath_init(void)
 {
        int ret;
 
-       ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
+       ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
 
        /*
         * These must be called before the driver is registered with
@@ -1776,8 +1882,18 @@ static int __init infinipath_init(void)
                goto bail_group;
        }
 
+       ret = ipath_diagpkt_add();
+       if (ret < 0) {
+               printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
+                      "diag data device: error %d\n", -ret);
+               goto bail_ipathfs;
+       }
+
        goto bail;
 
+bail_ipathfs:
+       ipath_exit_ipathfs();
+
 bail_group:
        ipath_driver_remove_group(&ipath_driver.driver);
 
@@ -1888,6 +2004,8 @@ static void __exit infinipath_cleanup(void)
        struct ipath_devdata *dd, *tmp;
        unsigned long flags;
 
+       ipath_diagpkt_remove();
+
        ipath_exit_ipathfs();
 
        ipath_driver_remove_group(&ipath_driver.driver);
@@ -1998,5 +2116,22 @@ bail:
        return ret;
 }
 
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
+{
+       u64 val;
+       if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
+               return -1;
+       }
+       if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
+               dd->ipath_rx_pol_inv = new_pol_inv;
+               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+               val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+                        INFINIPATH_XGXS_RX_POL_SHIFT);
+               val |= ((u64)dd->ipath_rx_pol_inv) <<
+                       INFINIPATH_XGXS_RX_POL_SHIFT;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+       }
+       return 0;
+}
 module_init(infinipath_init);
 module_exit(infinipath_cleanup);
index bbaa70e57db1ebf4442e6f3cb4e6e9f4a430efe6..29930e22318e5a09f78349fe4c8d9e052db9aa62 100644 (file)
@@ -39,7 +39,6 @@
 #include <asm/pgtable.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 static int ipath_open(struct inode *, struct file *);
@@ -985,15 +984,17 @@ static int mmap_piobufs(struct vm_area_struct *vma,
         * write combining behavior we want on the PIO buffers!
         */
 
-       if (vma->vm_flags & VM_READ) {
-               dev_info(&dd->pcidev->dev,
-                        "Can't map piobufs as readable (flags=%lx)\n",
-                        vma->vm_flags);
-               ret = -EPERM;
-               goto bail;
-       }
+#if defined(__powerpc__)
+       /* There isn't a generic way to specify writethrough mappings */
+       pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+       pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
+       pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+#endif
 
-       /* don't allow them to later change to readable with mprotect */
+       /*
+        * don't allow them to later change to readable with mprotect (for when
+        * not initially mapped readable, as is normally the case)
+        */
        vma->vm_flags &= ~VM_MAYREAD;
        vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
 
@@ -1109,7 +1110,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
                ret = mmap_rcvegrbufs(vma, pd);
        else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
                /*
-                * The rcvhdrq itself; readonly except on HT-400 (so have
+                * The rcvhdrq itself; readonly except on HT (so have
                 * to allow writable mapping), multiple pages, contiguous
                 * from an i/o perspective.
                 */
@@ -1149,6 +1150,7 @@ static unsigned int ipath_poll(struct file *fp,
        struct ipath_portdata *pd;
        u32 head, tail;
        int bit;
+       unsigned pollflag = 0;
        struct ipath_devdata *dd;
 
        pd = port_fp(fp);
@@ -1185,9 +1187,12 @@ static unsigned int ipath_poll(struct file *fp,
                        clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
                        pd->port_rcvwait_to++;
                }
+               else
+                       pollflag = POLLIN | POLLRDNORM;
        }
        else {
                /* it's already happened; don't do wait_event overhead */
+               pollflag = POLLIN | POLLRDNORM;
                pd->port_rcvnowait++;
        }
 
@@ -1195,7 +1200,7 @@ static unsigned int ipath_poll(struct file *fp,
        ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
                         dd->ipath_rcvctrl);
 
-       return 0;
+       return pollflag;
 }
 
 static int try_alloc_port(struct ipath_devdata *dd, int port,
@@ -1297,14 +1302,14 @@ static int find_best_unit(struct file *fp)
         * This code is present to allow a knowledgeable person to
         * specify the layout of processes to processors before opening
         * this driver, and then we'll assign the process to the "closest"
-        * HT-400 to that processor (we assume reasonable connectivity,
+        * InfiniPath chip to that processor (we assume reasonable connectivity,
         * for now).  This code assumes that if affinity has been set
         * before this point, that at most one cpu is set; for now this
         * is reasonable.  I check for both cpus_empty() and cpus_full(),
         * in case some kernel variant sets none of the bits when no
         * affinity is set.  2.6.11 and 12 kernels have all present
         * cpus set.  Some day we'll have to fix it up further to handle
-        * a cpu subset.  This algorithm fails for two HT-400's connected
+        * a cpu subset.  This algorithm fails for two HT chips connected
         * in tunnel fashion.  Eventually this needs real topology
         * information.  There may be some issues with dual core numbering
         * as well.  This needs more work prior to release.
@@ -1815,7 +1820,7 @@ int ipath_user_add(struct ipath_devdata *dd)
                if (ret < 0) {
                        ipath_dev_err(dd, "Could not create wildcard "
                                      "minor: error %d\n", -ret);
-                       goto bail_sma;
+                       goto bail_user;
                }
 
                atomic_set(&user_setup, 1);
@@ -1831,7 +1836,7 @@ int ipath_user_add(struct ipath_devdata *dd)
 
        goto bail;
 
-bail_sma:
+bail_user:
        user_cleanup();
 bail:
        return ret;
index 0936d8e8d7043bdc51408c5e4bdf9ba25d9bc54f..a5eb30a06a5cb0698c2f93f44202ac3bcf76a200 100644 (file)
@@ -191,8 +191,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
        portinfo[4] = (dd->ipath_lid << 16);
 
        /*
-        * Notimpl yet SMLID (should we store this in the driver, in case
-        * SMA dies?)  CapabilityMask is 0, we don't support any of these
+        * Notimpl yet SMLID.
+        * CapabilityMask is 0, we don't support any of these
         * DiagCode is 0; we don't store any diag info for now Notimpl yet
         * M_KeyLeasePeriod (we don't support M_Key)
         */
diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_ht400.c
deleted file mode 100644 (file)
index 3db015d..0000000
+++ /dev/null
@@ -1,1603 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains all of the code that is specific to the InfiniPath
- * HT-400 chip.
- */
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-
-/*
- * This lists the InfiniPath HT400 registers, in the actual chip layout.
- * This structure should never be directly accessed.
- *
- * The names are in InterCap form because they're taken straight from
- * the chip specification.  Since they're only used in this file, they
- * don't pollute the rest of the source.
-*/
-
-struct _infinipath_do_not_use_kernel_regs {
-       unsigned long long Revision;
-       unsigned long long Control;
-       unsigned long long PageAlign;
-       unsigned long long PortCnt;
-       unsigned long long DebugPortSelect;
-       unsigned long long DebugPort;
-       unsigned long long SendRegBase;
-       unsigned long long UserRegBase;
-       unsigned long long CounterRegBase;
-       unsigned long long Scratch;
-       unsigned long long ReservedMisc1;
-       unsigned long long InterruptConfig;
-       unsigned long long IntBlocked;
-       unsigned long long IntMask;
-       unsigned long long IntStatus;
-       unsigned long long IntClear;
-       unsigned long long ErrorMask;
-       unsigned long long ErrorStatus;
-       unsigned long long ErrorClear;
-       unsigned long long HwErrMask;
-       unsigned long long HwErrStatus;
-       unsigned long long HwErrClear;
-       unsigned long long HwDiagCtrl;
-       unsigned long long MDIO;
-       unsigned long long IBCStatus;
-       unsigned long long IBCCtrl;
-       unsigned long long ExtStatus;
-       unsigned long long ExtCtrl;
-       unsigned long long GPIOOut;
-       unsigned long long GPIOMask;
-       unsigned long long GPIOStatus;
-       unsigned long long GPIOClear;
-       unsigned long long RcvCtrl;
-       unsigned long long RcvBTHQP;
-       unsigned long long RcvHdrSize;
-       unsigned long long RcvHdrCnt;
-       unsigned long long RcvHdrEntSize;
-       unsigned long long RcvTIDBase;
-       unsigned long long RcvTIDCnt;
-       unsigned long long RcvEgrBase;
-       unsigned long long RcvEgrCnt;
-       unsigned long long RcvBufBase;
-       unsigned long long RcvBufSize;
-       unsigned long long RxIntMemBase;
-       unsigned long long RxIntMemSize;
-       unsigned long long RcvPartitionKey;
-       unsigned long long ReservedRcv[10];
-       unsigned long long SendCtrl;
-       unsigned long long SendPIOBufBase;
-       unsigned long long SendPIOSize;
-       unsigned long long SendPIOBufCnt;
-       unsigned long long SendPIOAvailAddr;
-       unsigned long long TxIntMemBase;
-       unsigned long long TxIntMemSize;
-       unsigned long long ReservedSend[9];
-       unsigned long long SendBufferError;
-       unsigned long long SendBufferErrorCONT1;
-       unsigned long long SendBufferErrorCONT2;
-       unsigned long long SendBufferErrorCONT3;
-       unsigned long long ReservedSBE[4];
-       unsigned long long RcvHdrAddr0;
-       unsigned long long RcvHdrAddr1;
-       unsigned long long RcvHdrAddr2;
-       unsigned long long RcvHdrAddr3;
-       unsigned long long RcvHdrAddr4;
-       unsigned long long RcvHdrAddr5;
-       unsigned long long RcvHdrAddr6;
-       unsigned long long RcvHdrAddr7;
-       unsigned long long RcvHdrAddr8;
-       unsigned long long ReservedRHA[7];
-       unsigned long long RcvHdrTailAddr0;
-       unsigned long long RcvHdrTailAddr1;
-       unsigned long long RcvHdrTailAddr2;
-       unsigned long long RcvHdrTailAddr3;
-       unsigned long long RcvHdrTailAddr4;
-       unsigned long long RcvHdrTailAddr5;
-       unsigned long long RcvHdrTailAddr6;
-       unsigned long long RcvHdrTailAddr7;
-       unsigned long long RcvHdrTailAddr8;
-       unsigned long long ReservedRHTA[7];
-       unsigned long long Sync;        /* Software only */
-       unsigned long long Dump;        /* Software only */
-       unsigned long long SimVer;      /* Software only */
-       unsigned long long ReservedSW[5];
-       unsigned long long SerdesConfig0;
-       unsigned long long SerdesConfig1;
-       unsigned long long SerdesStatus;
-       unsigned long long XGXSConfig;
-       unsigned long long ReservedSW2[4];
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof(struct \
-    _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
-    struct infinipath_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_ht_kregs = {
-       .kr_control = IPATH_KREG_OFFSET(Control),
-       .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
-       .kr_debugport = IPATH_KREG_OFFSET(DebugPort),
-       .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
-       .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
-       .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
-       .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
-       .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
-       .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
-       .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
-       .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
-       .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
-       .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
-       .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
-       .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
-       .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
-       .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
-       .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
-       .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
-       .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
-       .kr_intclear = IPATH_KREG_OFFSET(IntClear),
-       .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
-       .kr_intmask = IPATH_KREG_OFFSET(IntMask),
-       .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
-       .kr_mdio = IPATH_KREG_OFFSET(MDIO),
-       .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
-       .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
-       .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
-       .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
-       .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
-       .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
-       .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
-       .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
-       .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
-       .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
-       .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
-       .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
-       .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
-       .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
-       .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
-       .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
-       .kr_revision = IPATH_KREG_OFFSET(Revision),
-       .kr_scratch = IPATH_KREG_OFFSET(Scratch),
-       .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
-       .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
-       .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
-       .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
-       .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
-       .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
-       .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
-       .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
-       .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
-       .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
-       .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
-       .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
-       .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
-       .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
-       /*
-        * These should not be used directly via ipath_read_kreg64(),
-        * use them with ipath_read_kreg64_port(),
-        */
-       .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
-       .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
-};
-
-static const struct ipath_cregs ipath_ht_cregs = {
-       .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
-       .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
-       .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
-       .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
-       .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
-       .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
-       .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
-       .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
-       .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
-       .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
-       .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
-       .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
-       /* calc from Reg_CounterRegBase + offset */
-       .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
-       .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
-       .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
-       .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
-       .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
-       .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
-       .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
-       .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
-       .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
-       .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
-       .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
-       .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
-       .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
-       .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
-       .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
-       .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
-       .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
-       .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
-       .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
-       .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
-       .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
-};
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1FF
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
-#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR   0x0000000000800000ULL
-#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR   0x0000000001000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR   0x0000000002000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR   0x0000000004000000ULL
-#define INFINIPATH_HWE_HTCMISCERR4          0x0000000008000000ULL
-#define INFINIPATH_HWE_HTCMISCERR5          0x0000000010000000ULL
-#define INFINIPATH_HWE_HTCMISCERR6          0x0000000020000000ULL
-#define INFINIPATH_HWE_HTCMISCERR7          0x0000000040000000ULL
-#define INFINIPATH_HWE_HTCBUSTREQPARITYERR  0x0000000080000000ULL
-#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
-#define INFINIPATH_HWE_HTCBUSIREQPARITYERR  0x0000000200000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_FBSLIP        0x0200000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_RFSLIP        0x0400000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_FBSLIP        0x0800000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_RFSLIP        0x1000000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED      0x2000000000000000ULL
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_CORRECT     0x0000000000008000
-
-/*
- * masks and bits that are different in different chips, or present only
- * in one
- */
-static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
-    INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
-static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
-    INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
-
-static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA \
-       (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL \
-       (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-/* keep the code below somewhat more readonable; not used elsewhere */
-#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
-                               infinipath_hwe_htclnkabyte1crcerr)
-#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr |    \
-                               infinipath_hwe_htclnkbbyte1crcerr)
-#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
-                               infinipath_hwe_htclnkbbyte0crcerr)
-#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr |    \
-                               infinipath_hwe_htclnkbbyte1crcerr)
-
-static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
-                         char *msg, size_t msgl)
-{
-       char bitsmsg[64];
-       ipath_err_t crcbits = hwerrs &
-               (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
-       /* don't check if 8bit HT */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-               crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
-       /* don't check if 8bit HT */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-               crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
-       /*
-        * we'll want to ignore link errors on link that is
-        * not in use, if any.  For now, complain about both
-        */
-       if (crcbits) {
-               u16 ctrl0, ctrl1;
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[HT%s lane %s CRC (%llx); ignore till reload]",
-                        !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
-                        "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
-                                   ? "1 (B)" : "0+1 (A+B)"),
-                        !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
-                        : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
-                           "0+1"), (unsigned long long) crcbits);
-               strlcat(msg, bitsmsg, msgl);
-
-               /*
-                * print extra info for debugging.  slave/primary
-                * config word 4, 8 (link control 0, 1)
-                */
-
-               if (pci_read_config_word(dd->pcidev,
-                                        dd->ipath_ht_slave_off + 0x4,
-                                        &ctrl0))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkctrl0 of slave/primary "
-                                "config block\n");
-               else if (!(ctrl0 & 1 << 6))
-                       /* not if EOC bit set */
-                       ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
-                                 ((ctrl0 >> 8) & 7) ? " CRC" : "",
-                                 ((ctrl0 >> 4) & 1) ? "linkfail" :
-                                 "");
-               if (pci_read_config_word(dd->pcidev,
-                                        dd->ipath_ht_slave_off + 0x8,
-                                        &ctrl1))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkctrl1 of slave/primary "
-                                "config block\n");
-               else if (!(ctrl1 & 1 << 6))
-                       /* not if EOC bit set */
-                       ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
-                                 ((ctrl1 >> 8) & 7) ? " CRC" : "",
-                                 ((ctrl1 >> 4) & 1) ? "linkfail" :
-                                 "");
-
-               /* disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~crcbits;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-               ipath_dbg("HT crc errs: %s\n", msg);
-       } else
-               ipath_dbg("ignoring HT crc errors 0x%llx, "
-                         "not in use\n", (unsigned long long)
-                         (hwerrs & (_IPATH_HTLINK0_CRCBITS |
-                                    _IPATH_HTLINK1_CRCBITS)));
-}
-
-/**
- * ipath_ht_handle_hwerrors - display hardware errors
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid
- * excessive stack use.  Most hardware errors are catastrophic, but for
- * right now, we'll print them and continue.
- * We reuse the same message buffer as ipath_handle_errors() to avoid
- * excessive stack usage.
- */
-static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
-                                    size_t msgl)
-{
-       ipath_err_t hwerrs;
-       u32 bits, ctrl;
-       int isfatal = 0;
-       char bitsmsg[64];
-
-       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-
-       if (!hwerrs) {
-               ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
-               /*
-                * better than printing cofusing messages
-                * This seems to be related to clearing the crc error, or
-                * the pll error during init.
-                */
-               goto bail;
-       } else if (hwerrs == -1LL) {
-               ipath_dev_err(dd, "Read of hardware error status failed "
-                             "(all bits set); ignoring\n");
-               goto bail;
-       }
-       ipath_stats.sps_hwerrs++;
-
-       /* Always clear the error status register, except MEMBISTFAIL,
-        * regardless of whether we continue or stop using the chip.
-        * We want that set so we know it failed, even across driver reload.
-        * We'll still ignore it in the hwerrmask.  We do this partly for
-        * diagnostics, but also for support */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
-       hwerrs &= dd->ipath_hwerrmask;
-
-       /*
-        * make sure we get this much out, unless told to be quiet,
-        * or it's occurred within the last 5 seconds
-        */
-       if ((hwerrs & ~dd->ipath_lasthwerror) ||
-           (ipath_debug & __IPATH_VERBDBG))
-               dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
-                        "(cleared)\n", (unsigned long long) hwerrs);
-       dd->ipath_lasthwerror |= hwerrs;
-
-       if (hwerrs & ~infinipath_hwe_bitsextant)
-               ipath_dev_err(dd, "hwerror interrupt with unknown errors "
-                             "%llx set\n", (unsigned long long)
-                             (hwerrs & ~infinipath_hwe_bitsextant));
-
-       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-       if (ctrl & INFINIPATH_C_FREEZEMODE) {
-               if (hwerrs) {
-                       /*
-                        * if any set that we aren't ignoring; only
-                        * make the complaint once, in case it's stuck
-                        * or recurring, and we get here multiple
-                        * times.
-                        */
-                       if (dd->ipath_flags & IPATH_INITTED) {
-                               ipath_dev_err(dd, "Fatal Error (freeze "
-                                             "mode), no longer usable\n");
-                               isfatal = 1;
-                       }
-                       *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-                       /* mark as having had error */
-                       *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-                       /*
-                        * mark as not usable, at a minimum until driver
-                        * is reloaded, probably until reboot, since no
-                        * other reset is possible.
-                        */
-                       dd->ipath_flags &= ~IPATH_INITTED;
-               } else {
-                       ipath_dbg("Clearing freezemode on ignored hardware "
-                                 "error\n");
-                       ctrl &= ~INFINIPATH_C_FREEZEMODE;
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                                        ctrl);
-               }
-       }
-
-       *msg = '\0';
-
-       /*
-        * may someday want to decode into which bits are which
-        * functional area for parity errors, etc.
-        */
-       if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
-                     << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
-               bits = (u32) ((hwerrs >>
-                              INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
-                             INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
-               snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
-                        bits);
-               strlcat(msg, bitsmsg, msgl);
-       }
-       if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
-                     << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
-               bits = (u32) ((hwerrs >>
-                              INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
-                             INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
-               snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
-                        bits);
-               strlcat(msg, bitsmsg, msgl);
-       }
-       if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
-                     << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-               bits = (u32) ((hwerrs >>
-                              INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
-                             INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
-               snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
-                        bits);
-               strlcat(msg, bitsmsg, msgl);
-       }
-       if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
-               strlcat(msg, "[IB2IPATH Parity]", msgl);
-       if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
-               strlcat(msg, "[IPATH2IB Parity]", msgl);
-       if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
-               strlcat(msg, "[HTC Ireq Parity]", msgl);
-       if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
-               strlcat(msg, "[HTC Treq Parity]", msgl);
-       if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
-               strlcat(msg, "[HTC Tresp Parity]", msgl);
-
-       if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
-               hwerr_crcbits(dd, hwerrs, msg, msgl);
-
-       if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
-               strlcat(msg, "[HT core Misc5]", msgl);
-       if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
-               strlcat(msg, "[HT core Misc6]", msgl);
-       if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
-               strlcat(msg, "[HT core Misc7]", msgl);
-       if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-               strlcat(msg, "[Memory BIST test failed, HT-400 unusable]",
-                       msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |       \
-                        INFINIPATH_HWE_COREPLL_RFSLIP |        \
-                        INFINIPATH_HWE_HTBPLL_FBSLIP |         \
-                        INFINIPATH_HWE_HTBPLL_RFSLIP |         \
-                        INFINIPATH_HWE_HTAPLL_FBSLIP |         \
-                        INFINIPATH_HWE_HTAPLL_RFSLIP)
-
-       if (hwerrs & _IPATH_PLL_FAIL) {
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[PLL failed (%llx), HT-400 unusable]",
-                        (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
-               strlcat(msg, bitsmsg, msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
-               /*
-                * If it occurs, it is left masked since the eternal
-                * interface is unused
-                */
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
-               strlcat(msg, "[Rx Dsync]", msgl);
-       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
-               strlcat(msg, "[SerDes PLL]", msgl);
-
-       ipath_dev_err(dd, "%s hardware error\n", msg);
-       if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
-               /*
-                * for status file; if no trailing brace is copied,
-                * we'll know it was truncated.
-                */
-               snprintf(dd->ipath_freezemsg,
-                        dd->ipath_freezelen, "{%s}", msg);
-
-bail:;
-}
-
-/**
- * ipath_ht_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * fill in the board name, based on the board revision register
- */
-static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
-                             size_t namelen)
-{
-       char *n = NULL;
-       u8 boardrev = dd->ipath_boardrev;
-       int ret;
-
-       switch (boardrev) {
-       case 4:         /* Ponderosa is one of the bringup boards */
-               n = "Ponderosa";
-               break;
-       case 5:
-               /*
-                * HT-460 original production board; two production levels, with
-                * different serial number ranges.   See ipath_ht_early_init() for
-                * case where we enable IPATH_GPIO_INTR for later serial # range.
-                */
-               n = "InfiniPath_HT-460";
-               break;
-       case 6:
-               n = "OEM_Board_3";
-               break;
-       case 7:
-               /* HT-460 small form factor production board */
-               n = "InfiniPath_HT-465";
-               break;
-       case 8:
-               n = "LS/X-1";
-               break;
-       case 9:         /* Comstock bringup test board */
-               n = "Comstock";
-               break;
-       case 10:
-               n = "OEM_Board_2";
-               break;
-       case 11:
-               n = "InfiniPath_HT-470";
-               break;
-       case 12:
-               n = "OEM_Board_4";
-               break;
-       default:                /* don't know, just print the number */
-               ipath_dev_err(dd, "Don't yet know about board "
-                             "with ID %u\n", boardrev);
-               snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u",
-                        boardrev);
-               break;
-       }
-       if (n)
-               snprintf(name, namelen, "%s", n);
-
-       if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
-               /*
-                * This version of the driver only supports the HT-400
-                * Rev 3.2
-                */
-               ipath_dev_err(dd,
-                             "Unsupported HT-400 revision %u.%u!\n",
-                             dd->ipath_majrev, dd->ipath_minrev);
-               ret = 1;
-               goto bail;
-       }
-       /*
-        * pkt/word counters are 32 bit, and therefore wrap fast enough
-        * that we snapshot them from a timer, and maintain 64 bit shadow
-        * copies
-        */
-       dd->ipath_flags |= IPATH_32BITCOUNTERS;
-       if (dd->ipath_htspeed != 800)
-               ipath_dev_err(dd,
-                             "Incorrectly configured for HT @ %uMHz\n",
-                             dd->ipath_htspeed);
-       if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 ||
-           dd->ipath_boardrev == 6)
-               dd->ipath_flags |= IPATH_GPIO_INTR;
-       else
-               dd->ipath_flags |= IPATH_POLL_RX_INTR;
-       if (dd->ipath_boardrev == 8) {  /* LS/X-1 */
-               u64 val;
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-               if (val & INFINIPATH_EXTS_SERDESSEL) {
-                       /*
-                        * hardware disabled
-                        *
-                        * This means that the chip is hardware disabled,
-                        * and will not be able to bring up the link,
-                        * in any case.  We special case this and abort
-                        * early, to avoid later messages.  We also set
-                        * the DISABLED status bit
-                        */
-                       ipath_dbg("Unit %u is hardware-disabled\n",
-                                 dd->ipath_unit);
-                       *dd->ipath_statusp |= IPATH_STATUS_DISABLED;
-                       /* this value is handled differently */
-                       ret = 2;
-                       goto bail;
-               }
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static void ipath_check_htlink(struct ipath_devdata *dd)
-{
-       u8 linkerr, link_off, i;
-
-       for (i = 0; i < 2; i++) {
-               link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
-               if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkerror%d of HT slave/primary block\n",
-                                i);
-               else if (linkerr & 0xf0) {
-                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-                                  "clearing\n", linkerr >> 4, i);
-                       /*
-                        * writing the linkerr bits that are set should
-                        * clear them
-                        */
-                       if (pci_write_config_byte(dd->pcidev, link_off,
-                                                 linkerr))
-                               ipath_dbg("Failed write to clear HT "
-                                         "linkerror%d\n", i);
-                       if (pci_read_config_byte(dd->pcidev, link_off,
-                                                &linkerr))
-                               dev_info(&dd->pcidev->dev,
-                                        "Couldn't reread linkerror%d of "
-                                        "HT slave/primary block\n", i);
-                       else if (linkerr & 0xf0)
-                               dev_info(&dd->pcidev->dev,
-                                        "HT linkerror%d bits 0x%x "
-                                        "couldn't be cleared\n",
-                                        i, linkerr >> 4);
-               }
-       }
-}
-
-static int ipath_setup_ht_reset(struct ipath_devdata *dd)
-{
-       ipath_dbg("No reset possible for HT-400\n");
-       return 0;
-}
-
-#define HT_CAPABILITY_ID   0x08        /* HT capabilities not defined in kernel */
-#define HT_INTR_DISC_CONFIG  0x80      /* HT interrupt and discovery cap */
-#define HT_INTR_REG_INDEX    2 /* intconfig requires indirect accesses */
-
-/*
- * Bits 13-15 of command==0 is slave/primary block.  Clear any HT CRC
- * errors.  We only bother to do this at load time, because it's OK if
- * it happened before we were loaded (first time after boot/reset),
- * but any time after that, it's fatal anyway.  Also need to not check
- * for for upper byte errors if we are in 8 bit mode, so figure out
- * our width.  For now, at least, also complain if it's 8 bit.
- */
-static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
-                            int pos, u8 cap_type)
-{
-       u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
-       u16 linkctrl = 0;
-       int i;
-
-       dd->ipath_ht_slave_off = pos;
-       /* command word, master_host bit */
-       /* master host || slave */
-       if ((cap_type >> 2) & 1)
-               link_a_b_off = 4;
-       else
-               link_a_b_off = 0;
-       ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
-                  link_a_b_off ? 1 : 0,
-                  link_a_b_off ? 'B' : 'A');
-
-       link_a_b_off += pos;
-
-       /*
-        * check both link control registers; clear both HT CRC sets if
-        * necessary.
-        */
-       for (i = 0; i < 2; i++) {
-               link_off = pos + i * 4 + 0x4;
-               if (pci_read_config_word(pdev, link_off, &linkctrl))
-                       ipath_dev_err(dd, "Couldn't read HT link control%d "
-                                     "register\n", i);
-               else if (linkctrl & (0xf << 8)) {
-                       ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
-                                  "bits %x\n", i, linkctrl & (0xf << 8));
-                       /*
-                        * now write them back to clear the error.
-                        */
-                       pci_write_config_byte(pdev, link_off,
-                                             linkctrl & (0xf << 8));
-               }
-       }
-
-       /*
-        * As with HT CRC bits, same for protocol errors that might occur
-        * during boot.
-        */
-       for (i = 0; i < 2; i++) {
-               link_off = pos + i * 4 + 0xd;
-               if (pci_read_config_byte(pdev, link_off, &linkerr))
-                       dev_info(&pdev->dev, "Couldn't read linkerror%d "
-                                "of HT slave/primary block\n", i);
-               else if (linkerr & 0xf0) {
-                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-                                  "clearing\n", linkerr >> 4, i);
-                       /*
-                        * writing the linkerr bits that are set will clear
-                        * them
-                        */
-                       if (pci_write_config_byte
-                           (pdev, link_off, linkerr))
-                               ipath_dbg("Failed write to clear HT "
-                                         "linkerror%d\n", i);
-                       if (pci_read_config_byte(pdev, link_off, &linkerr))
-                               dev_info(&pdev->dev, "Couldn't reread "
-                                        "linkerror%d of HT slave/primary "
-                                        "block\n", i);
-                       else if (linkerr & 0xf0)
-                               dev_info(&pdev->dev, "HT linkerror%d bits "
-                                        "0x%x couldn't be cleared\n",
-                                        i, linkerr >> 4);
-               }
-       }
-
-       /*
-        * this is just for our link to the host, not devices connected
-        * through tunnel.
-        */
-
-       if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
-               ipath_dev_err(dd, "Couldn't read HT link width "
-                             "config register\n");
-       else {
-               u32 width;
-               switch (linkwidth & 7) {
-               case 5:
-                       width = 4;
-                       break;
-               case 4:
-                       width = 2;
-                       break;
-               case 3:
-                       width = 32;
-                       break;
-               case 1:
-                       width = 16;
-                       break;
-               case 0:
-               default:        /* if wrong, assume 8 bit */
-                       width = 8;
-                       break;
-               }
-
-               dd->ipath_htwidth = width;
-
-               if (linkwidth != 0x11) {
-                       ipath_dev_err(dd, "Not configured for 16 bit HT "
-                                     "(%x)\n", linkwidth);
-                       if (!(linkwidth & 0xf)) {
-                               ipath_dbg("Will ignore HT lane1 errors\n");
-                               dd->ipath_flags |= IPATH_8BIT_IN_HT0;
-                       }
-               }
-       }
-
-       /*
-        * this is just for our link to the host, not devices connected
-        * through tunnel.
-        */
-       if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
-               ipath_dev_err(dd, "Couldn't read HT link frequency "
-                             "config register\n");
-       else {
-               u32 speed;
-               switch (linkwidth & 0xf) {
-               case 6:
-                       speed = 1000;
-                       break;
-               case 5:
-                       speed = 800;
-                       break;
-               case 4:
-                       speed = 600;
-                       break;
-               case 3:
-                       speed = 500;
-                       break;
-               case 2:
-                       speed = 400;
-                       break;
-               case 1:
-                       speed = 300;
-                       break;
-               default:
-                       /*
-                        * assume reserved and vendor-specific are 200...
-                        */
-               case 0:
-                       speed = 200;
-                       break;
-               }
-               dd->ipath_htspeed = speed;
-       }
-}
-
-static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
-                           int pos)
-{
-       u32 int_handler_addr_lower;
-       u32 int_handler_addr_upper;
-       u64 ihandler;
-       u32 intvec;
-
-       /* use indirection register to get the intr handler */
-       pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x10);
-       pci_read_config_dword(pdev, pos + 4, &int_handler_addr_lower);
-       pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x11);
-       pci_read_config_dword(pdev, pos + 4, &int_handler_addr_upper);
-
-       ihandler = (u64) int_handler_addr_lower |
-               ((u64) int_handler_addr_upper << 32);
-
-       /*
-        * kernels with CONFIG_PCI_MSI set the vector in the irq field of
-        * struct pci_device, so we use that to program the HT-400 internal
-        * interrupt register (not config space) with that value. The BIOS
-        * must still have done the basic MSI setup.
-        */
-       intvec = pdev->irq;
-       /*
-        * clear any vector bits there; normally not set but we'll overload
-        * this for some debug purposes (setting the HTC debug register
-        * value from software, rather than GPIOs), so it might be set on a
-        * driver reload.
-        */
-       ihandler &= ~0xff0000;
-       /* x86 vector goes in intrinfo[23:16] */
-       ihandler |= intvec << 16;
-       ipath_cdbg(VERBOSE, "ihandler lower %x, upper %x, intvec %x, "
-                  "interruptconfig %llx\n", int_handler_addr_lower,
-                  int_handler_addr_upper, intvec,
-                  (unsigned long long) ihandler);
-
-       /* can't program yet, so save for interrupt setup */
-       dd->ipath_intconfig = ihandler;
-       /* keep going, so we find link control stuff also */
-
-       return ihandler != 0;
-}
-
-/**
- * ipath_setup_ht_config - setup the interruptconfig register
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * setup the interruptconfig register from the HT config info.
- * Also clear CRC errors in HT linkcontrol, if necessary.
- * This is done only for the real hardware.  It is done before
- * chip address space is initted, so can't touch infinipath registers
- */
-static int ipath_setup_ht_config(struct ipath_devdata *dd,
-                                struct pci_dev *pdev)
-{
-       int pos, ret = 0;
-       int ihandler = 0;
-
-       /*
-        * Read the capability info to find the interrupt info, and also
-        * handle clearing CRC errors in linkctrl register if necessary.  We
-        * do this early, before we ever enable errors or hardware errors,
-        * mostly to avoid causing the chip to enter freeze mode.
-        */
-       pos = pci_find_capability(pdev, HT_CAPABILITY_ID);
-       if (!pos) {
-               ipath_dev_err(dd, "Couldn't find HyperTransport "
-                             "capability; no interrupts\n");
-               ret = -ENODEV;
-               goto bail;
-       }
-       do {
-               u8 cap_type;
-
-               /* the HT capability type byte is 3 bytes after the
-                * capability byte.
-                */
-               if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
-                       dev_info(&pdev->dev, "Couldn't read config "
-                                "command @ %d\n", pos);
-                       continue;
-               }
-               if (!(cap_type & 0xE0))
-                       slave_or_pri_blk(dd, pdev, pos, cap_type);
-               else if (cap_type == HT_INTR_DISC_CONFIG)
-                       ihandler = set_int_handler(dd, pdev, pos);
-       } while ((pos = pci_find_next_capability(pdev, pos,
-                                                HT_CAPABILITY_ID)));
-
-       if (!ihandler) {
-               ipath_dev_err(dd, "Couldn't find interrupt handler in "
-                             "config space\n");
-               ret = -ENODEV;
-       }
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * Called during driver unload.
- * This is currently a nop for the HT-400, not for all chips
- */
-static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
-{
-}
-
-/**
- * ipath_setup_ht_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
- *
- * Set the state of the two external LEDs, to indicate physical and
- * logical state of IB link.   For this chip (at least with recommended
- * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
- * (logical state)
- *
- * Note:  We try to match the Mellanox HCA LED behavior as best
- * we can.  Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate.  That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
-                                    u64 lst, u64 ltst)
-{
-       u64 extctl;
-
-       /* the diags use the LED to indicate diag info, so we leave
-        * the external LED alone when the diags are running */
-       if (ipath_diag_inuse)
-               return;
-
-       /*
-        * start by setting both LED control bits to off, then turn
-        * on the appropriate bit(s).
-        */
-       if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
-               /*
-                * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
-                * is inverted,  because it is normally used to indicate
-                * a hardware fault at reset, if there were errors
-                */
-               extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
-                       | INFINIPATH_EXTC_LEDGBLERR_OFF;
-               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-                       extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
-               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-                       extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
-       }
-       else {
-               extctl = dd->ipath_extctrl &
-                       ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
-                         INFINIPATH_EXTC_LED2PRIPORT_ON);
-               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-                       extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
-               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-                       extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
-       }
-       dd->ipath_extctrl = extctl;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
-}
-
-static void ipath_init_ht_variables(void)
-{
-       ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-       ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-       ipath_gpio_sda = IPATH_GPIO_SDA;
-       ipath_gpio_scl = IPATH_GPIO_SCL;
-
-       infinipath_i_bitsextant =
-               (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
-               (INFINIPATH_I_RCVAVAIL_MASK <<
-                INFINIPATH_I_RCVAVAIL_SHIFT) |
-               INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
-               INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
-
-       infinipath_e_bitsextant =
-               INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
-               INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
-               INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
-               INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
-               INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
-               INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
-               INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-               INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
-               INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
-               INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
-               INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
-               INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
-               INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
-               INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
-               INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
-               INFINIPATH_E_HARDWARE;
-
-       infinipath_hwe_bitsextant =
-               (INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
-               (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
-               (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
-               INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
-               INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
-               INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
-               INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
-               INFINIPATH_HWE_HTCMISCERR4 |
-               INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
-               INFINIPATH_HWE_HTCMISCERR7 |
-               INFINIPATH_HWE_HTCBUSTREQPARITYERR |
-               INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
-               INFINIPATH_HWE_HTCBUSIREQPARITYERR |
-               INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
-               INFINIPATH_HWE_MEMBISTFAILED |
-               INFINIPATH_HWE_COREPLL_FBSLIP |
-               INFINIPATH_HWE_COREPLL_RFSLIP |
-               INFINIPATH_HWE_HTBPLL_FBSLIP |
-               INFINIPATH_HWE_HTBPLL_RFSLIP |
-               INFINIPATH_HWE_HTAPLL_FBSLIP |
-               INFINIPATH_HWE_HTAPLL_RFSLIP |
-               INFINIPATH_HWE_SERDESPLLFAILED |
-               INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
-               INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
-
-       infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-       infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
-}
-
-/**
- * ipath_ht_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
-{
-       ipath_err_t val;
-       u64 extsval;
-
-       extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
-       if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
-               ipath_dev_err(dd, "MemBIST did not complete!\n");
-
-       ipath_check_htlink(dd);
-
-       /* barring bugs, all hwerrors become interrupts, which can */
-       val = -1LL;
-       /* don't look at crc lane1 if 8 bit */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-               val &= ~infinipath_hwe_htclnkabyte1crcerr;
-       /* don't look at crc lane1 if 8 bit */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-               val &= ~infinipath_hwe_htclnkbbyte1crcerr;
-
-       /*
-        * disable RXDSYNCMEMPARITY because external serdes is unused,
-        * and therefore the logic will never be used or initialized,
-        * and uninitialized state will normally result in this error
-        * being asserted.  Similarly for the external serdess pll
-        * lock signal.
-        */
-       val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
-                INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
-
-       /*
-        * Disable MISCERR4 because of an inversion in the HT core
-        * logic checking for errors that cause this bit to be set.
-        * The errata can also cause the protocol error bit to be set
-        * in the HT config space linkerror register(s).
-        */
-       val &= ~INFINIPATH_HWE_HTCMISCERR4;
-
-       /*
-        * PLL ignored because MDIO interface has a logic problem
-        * for reads, on Comstock and Ponderosa.  BRINGUP
-        */
-       if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
-               val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-       dd->ipath_hwerrmask = val;
-}
-
-/**
- * ipath_ht_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
-{
-       u64 val, config1;
-       int ret = 0, change = 0;
-
-       ipath_dbg("Trying to bringup serdes\n");
-
-       if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
-           INFINIPATH_HWE_SERDESPLLFAILED)
-       {
-               ipath_dbg("At start, serdes PLL failed bit set in "
-                         "hwerrstatus, clearing and continuing\n");
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                                INFINIPATH_HWE_SERDESPLLFAILED);
-       }
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-       config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
-
-       ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
-                  "config1=%llx, sstatus=%llx xgxs %llx\n",
-                  (unsigned long long) val, (unsigned long long) config1,
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       /* force reset on */
-       val |= INFINIPATH_SERDC0_RESET_PLL
-               /* | INFINIPATH_SERDC0_RESET_MASK */
-               ;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-       udelay(15);             /* need pll reset set at least for a bit */
-
-       if (val & INFINIPATH_SERDC0_RESET_PLL) {
-               u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
-               /* set lane resets, and tx idle, during pll reset */
-               val2 |= INFINIPATH_SERDC0_RESET_MASK |
-                       INFINIPATH_SERDC0_TXIDLE;
-               ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
-                          "%llx)\n", (unsigned long long) val2);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-                                val2);
-               /*
-                * be sure chip saw it
-                */
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               /*
-                * need pll reset clear at least 11 usec before lane
-                * resets cleared; give it a few more
-                */
-               udelay(15);
-               val = val2;     /* for check below */
-       }
-
-       if (val & (INFINIPATH_SERDC0_RESET_PLL |
-                  INFINIPATH_SERDC0_RESET_MASK |
-                  INFINIPATH_SERDC0_TXIDLE)) {
-               val &= ~(INFINIPATH_SERDC0_RESET_PLL |
-                        INFINIPATH_SERDC0_RESET_MASK |
-                        INFINIPATH_SERDC0_TXIDLE);
-               /* clear them */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-                                val);
-       }
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-       if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
-            INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
-               val &= ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
-                        INFINIPATH_XGXS_MDIOADDR_SHIFT);
-               /*
-                * we use address 3
-                */
-               val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
-               change = 1;
-       }
-       if (val & INFINIPATH_XGXS_RESET) {
-               /* normally true after boot */
-               val &= ~INFINIPATH_XGXS_RESET;
-               change = 1;
-       }
-       if (change)
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       /* clear current and de-emphasis bits */
-       config1 &= ~0x0ffffffff00ULL;
-       /* set current to 20ma */
-       config1 |= 0x00000000000ULL;
-       /* set de-emphasis to -5.68dB */
-       config1 |= 0x0cccc000000ULL;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
-
-       ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
-                  "config1=%llx, sstatus=%llx xgxs %llx\n",
-                  (unsigned long long) val, (unsigned long long) config1,
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       if (!ipath_waitfor_mdio_cmdready(dd)) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_mdio,
-                                ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
-                                               IPATH_MDIO_CTRL_XGXS_REG_8,
-                                               0));
-               if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
-                                          IPATH_MDIO_DATAVALID, &val))
-                       ipath_dbg("Never got MDIO data for XGXS status "
-                                 "read\n");
-               else
-                       ipath_cdbg(VERBOSE, "MDIO Read reg8, "
-                                  "'bank' 31 %x\n", (u32) val);
-       } else
-               ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
-
-       return ret;             /* for now, say we always succeeded */
-}
-
-/**
- * ipath_ht_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * driver is being unloaded
- */
-static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
-{
-       u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       val |= INFINIPATH_SERDC0_TXIDLE;
-       ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
-                 (unsigned long long) val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-}
-
-static int ipath_ht_intconfig(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (!dd->ipath_intconfig) {
-               ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
-                             "interrupt address\n");
-               ret = 1;
-               goto bail;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
-                        dd->ipath_intconfig);  /* interrupt address */
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_pe_put_tid - write a TID in chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for special locking etc.
- * It's used for both the full cleanup on exit, as well as the normal
- * setup and teardown.
- */
-static void ipath_ht_put_tid(struct ipath_devdata *dd,
-                            u64 __iomem *tidptr, u32 type,
-                            unsigned long pa)
-{
-       if (pa != dd->ipath_tidinvalid) {
-               if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
-                       dev_info(&dd->pcidev->dev,
-                                "physaddr %lx has more than "
-                                "40 bits, using only 40!!!\n", pa);
-                       pa &= INFINIPATH_RT_ADDR_MASK;
-               }
-               if (type == 0)
-                       pa |= dd->ipath_tidtemplate;
-               else {
-                       /* in words (fixed, full page).  */
-                       u64 lenvalid = PAGE_SIZE >> 2;
-                       lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-                       pa |= lenvalid | INFINIPATH_RT_VALID;
-               }
-       }
-       if (dd->ipath_kregbase)
-               writeq(pa, tidptr);
-}
-
-/**
- * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * Used from ipath_close(), and at chip initialization.
- */
-static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
-       u64 __iomem *tidbase;
-       int i;
-
-       if (!dd->ipath_kregbase)
-               return;
-
-       ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
-       /*
-        * need to invalidate all of the expected TID entries for this
-        * port, so we don't have valid entries that might somehow get
-        * used (early in next use of this port, or through some bug)
-        */
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  port * dd->ipath_rcvtidcnt *
-                                  sizeof(*tidbase));
-       for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid);
-
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvegrbase +
-                                  port * dd->ipath_rcvegrcnt *
-                                  sizeof(*tidbase));
-
-       for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid);
-}
-
-/**
- * ipath_ht_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
-{
-       dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
-       dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-       dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
-
-       /*
-        * work around chip errata bug 7358, by marking invalid tids
-        * as having max length
-        */
-       dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
-               INFINIPATH_RT_BUFSIZE_SHIFT;
-}
-
-static int ipath_ht_early_init(struct ipath_devdata *dd)
-{
-       u32 __iomem *piobuf;
-       u32 pioincr, val32, egrsize;
-       int i;
-
-       /*
-        * one cache line; long IB headers will spill over into received
-        * buffer
-        */
-       dd->ipath_rcvhdrentsize = 16;
-       dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
-
-       /*
-        * For HT-400, we allocate a somewhat overly large eager buffer,
-        * such that we can guarantee that we can receive the largest
-        * packet that we can send out.  To truly support a 4KB MTU,
-        * we need to bump this to a large value.  To date, other than
-        * testing, we have never encountered an HCA that can really
-        * send 4KB MTU packets, so we do not handle that (we'll get
-        * errors interrupts if we ever see one).
-        */
-       dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
-       egrsize = dd->ipath_rcvegrbufsize;
-
-       /*
-        * the min() check here is currently a nop, but it may not
-        * always be, depending on just how we do ipath_rcvegrbufsize
-        */
-       dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
-                                dd->ipath_rcvegrbufsize);
-       dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-       ipath_ht_tidtemplate(dd);
-
-       /*
-        * zero all the TID entries at startup.  We do this for sanity,
-        * in case of a previous driver crash of some kind, and also
-        * because the chip powers up with these memories in an unknown
-        * state.  Use portcnt, not cfgports, since this is for the
-        * full chip, not for current (possibly different) configuration
-        * value.
-        * Chip Errata bug 6447
-        */
-       for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
-               ipath_ht_clear_tids(dd, val32);
-
-       /*
-        * write the pbc of each buffer, to be sure it's initialized, then
-        * cancel all the buffers, and also abort any packets that might
-        * have been in flight for some reason (the latter is for driver
-        * unload/reload, but isn't a bad idea at first init).  PIO send
-        * isn't enabled at this point, so there is no danger of sending
-        * these out on the wire.
-        * Chip Errata bug 6610
-        */
-       piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
-                                 dd->ipath_piobufbase);
-       pioincr = dd->ipath_palign / sizeof(*piobuf);
-       for (i = 0; i < dd->ipath_piobcnt2k; i++) {
-               /*
-                * reasonable word count, just to init pbc
-                */
-               writel(16, piobuf);
-               piobuf += pioincr;
-       }
-       /*
-        * self-clearing
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                        INFINIPATH_S_ABORT);
-
-       ipath_get_eeprom_info(dd);
-       if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
-               dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
-               /*
-                * Later production HT-460 has same changes as HT-465, so
-                * can use GPIO interrupts.  They have serial #'s starting
-                * with 128, rather than 112.
-                */
-               dd->ipath_flags |= IPATH_GPIO_INTR;
-               dd->ipath_flags &= ~IPATH_POLL_RX_INTR;
-       }
-       return 0;
-}
-
-/**
- * ipath_init_ht_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithims.
- */
-static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
-       struct ipath_base_info *kinfo = kbase;
-
-       kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
-               IPATH_RUNTIME_RCVHDR_COPY;
-
-       return 0;
-}
-
-/**
- * ipath_init_ht400_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_ht400_funcs(struct ipath_devdata *dd)
-{
-       dd->ipath_f_intrsetup = ipath_ht_intconfig;
-       dd->ipath_f_bus = ipath_setup_ht_config;
-       dd->ipath_f_reset = ipath_setup_ht_reset;
-       dd->ipath_f_get_boardname = ipath_ht_boardname;
-       dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
-       dd->ipath_f_early_init = ipath_ht_early_init;
-       dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
-       dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
-       dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
-       dd->ipath_f_clear_tids = ipath_ht_clear_tids;
-       dd->ipath_f_put_tid = ipath_ht_put_tid;
-       dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
-       dd->ipath_f_setextled = ipath_setup_ht_setextled;
-       dd->ipath_f_get_base_info = ipath_ht_get_base_info;
-
-       /*
-        * initialize chip-specific variables
-        */
-       dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
-
-       /*
-        * setup the register offsets, since they are different for each
-        * chip
-        */
-       dd->ipath_kregs = &ipath_ht_kregs;
-       dd->ipath_cregs = &ipath_ht_cregs;
-
-       /*
-        * do very early init that is needed before ipath_f_bus is
-        * called
-        */
-       ipath_init_ht_variables();
-}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
new file mode 100644 (file)
index 0000000..bf2455a
--- /dev/null
@@ -0,0 +1,1612 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file contains all of the code that is specific to the InfiniPath
+ * HT chip.
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+
+/*
+ * This lists the InfiniPath registers, in the actual chip layout.
+ * This structure should never be directly accessed.
+ *
+ * The names are in InterCap form because they're taken straight from
+ * the chip specification.  Since they're only used in this file, they
+ * don't pollute the rest of the source.
+*/
+
+struct _infinipath_do_not_use_kernel_regs {
+       unsigned long long Revision;
+       unsigned long long Control;
+       unsigned long long PageAlign;
+       unsigned long long PortCnt;
+       unsigned long long DebugPortSelect;
+       unsigned long long DebugPort;
+       unsigned long long SendRegBase;
+       unsigned long long UserRegBase;
+       unsigned long long CounterRegBase;
+       unsigned long long Scratch;
+       unsigned long long ReservedMisc1;
+       unsigned long long InterruptConfig;
+       unsigned long long IntBlocked;
+       unsigned long long IntMask;
+       unsigned long long IntStatus;
+       unsigned long long IntClear;
+       unsigned long long ErrorMask;
+       unsigned long long ErrorStatus;
+       unsigned long long ErrorClear;
+       unsigned long long HwErrMask;
+       unsigned long long HwErrStatus;
+       unsigned long long HwErrClear;
+       unsigned long long HwDiagCtrl;
+       unsigned long long MDIO;
+       unsigned long long IBCStatus;
+       unsigned long long IBCCtrl;
+       unsigned long long ExtStatus;
+       unsigned long long ExtCtrl;
+       unsigned long long GPIOOut;
+       unsigned long long GPIOMask;
+       unsigned long long GPIOStatus;
+       unsigned long long GPIOClear;
+       unsigned long long RcvCtrl;
+       unsigned long long RcvBTHQP;
+       unsigned long long RcvHdrSize;
+       unsigned long long RcvHdrCnt;
+       unsigned long long RcvHdrEntSize;
+       unsigned long long RcvTIDBase;
+       unsigned long long RcvTIDCnt;
+       unsigned long long RcvEgrBase;
+       unsigned long long RcvEgrCnt;
+       unsigned long long RcvBufBase;
+       unsigned long long RcvBufSize;
+       unsigned long long RxIntMemBase;
+       unsigned long long RxIntMemSize;
+       unsigned long long RcvPartitionKey;
+       unsigned long long ReservedRcv[10];
+       unsigned long long SendCtrl;
+       unsigned long long SendPIOBufBase;
+       unsigned long long SendPIOSize;
+       unsigned long long SendPIOBufCnt;
+       unsigned long long SendPIOAvailAddr;
+       unsigned long long TxIntMemBase;
+       unsigned long long TxIntMemSize;
+       unsigned long long ReservedSend[9];
+       unsigned long long SendBufferError;
+       unsigned long long SendBufferErrorCONT1;
+       unsigned long long SendBufferErrorCONT2;
+       unsigned long long SendBufferErrorCONT3;
+       unsigned long long ReservedSBE[4];
+       unsigned long long RcvHdrAddr0;
+       unsigned long long RcvHdrAddr1;
+       unsigned long long RcvHdrAddr2;
+       unsigned long long RcvHdrAddr3;
+       unsigned long long RcvHdrAddr4;
+       unsigned long long RcvHdrAddr5;
+       unsigned long long RcvHdrAddr6;
+       unsigned long long RcvHdrAddr7;
+       unsigned long long RcvHdrAddr8;
+       unsigned long long ReservedRHA[7];
+       unsigned long long RcvHdrTailAddr0;
+       unsigned long long RcvHdrTailAddr1;
+       unsigned long long RcvHdrTailAddr2;
+       unsigned long long RcvHdrTailAddr3;
+       unsigned long long RcvHdrTailAddr4;
+       unsigned long long RcvHdrTailAddr5;
+       unsigned long long RcvHdrTailAddr6;
+       unsigned long long RcvHdrTailAddr7;
+       unsigned long long RcvHdrTailAddr8;
+       unsigned long long ReservedRHTA[7];
+       unsigned long long Sync;        /* Software only */
+       unsigned long long Dump;        /* Software only */
+       unsigned long long SimVer;      /* Software only */
+       unsigned long long ReservedSW[5];
+       unsigned long long SerdesConfig0;
+       unsigned long long SerdesConfig1;
+       unsigned long long SerdesStatus;
+       unsigned long long XGXSConfig;
+       unsigned long long ReservedSW2[4];
+};
+
+#define IPATH_KREG_OFFSET(field) (offsetof(struct \
+    _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
+#define IPATH_CREG_OFFSET(field) (offsetof( \
+    struct infinipath_counters, field) / sizeof(u64))
+
+static const struct ipath_kregs ipath_ht_kregs = {
+       .kr_control = IPATH_KREG_OFFSET(Control),
+       .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
+       .kr_debugport = IPATH_KREG_OFFSET(DebugPort),
+       .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
+       .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
+       .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
+       .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
+       .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
+       .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
+       .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
+       .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
+       .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
+       .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
+       .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
+       .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
+       .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
+       .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
+       .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
+       .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
+       .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
+       .kr_intclear = IPATH_KREG_OFFSET(IntClear),
+       .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
+       .kr_intmask = IPATH_KREG_OFFSET(IntMask),
+       .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
+       .kr_mdio = IPATH_KREG_OFFSET(MDIO),
+       .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
+       .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
+       .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
+       .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
+       .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
+       .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
+       .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
+       .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
+       .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
+       .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
+       .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
+       .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
+       .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
+       .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
+       .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
+       .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
+       .kr_revision = IPATH_KREG_OFFSET(Revision),
+       .kr_scratch = IPATH_KREG_OFFSET(Scratch),
+       .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
+       .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
+       .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
+       .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
+       .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
+       .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
+       .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
+       .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
+       .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
+       .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
+       .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
+       .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
+       .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
+       .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
+       /*
+        * These should not be used directly via ipath_read_kreg64(),
+        * use them with ipath_read_kreg64_port(),
+        */
+       .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
+       .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
+};
+
+static const struct ipath_cregs ipath_ht_cregs = {
+       .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
+       .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
+       .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
+       .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
+       .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
+       .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
+       .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
+       .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
+       .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
+       .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
+       .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
+       .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
+       /* calc from Reg_CounterRegBase + offset */
+       .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
+       .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
+       .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
+       .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
+       .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
+       .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
+       .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
+       .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
+       .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
+       .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
+       .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
+       .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
+       .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
+       .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
+       .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
+       .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
+       .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
+       .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
+       .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
+       .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
+       .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
+};
+
+/* kr_intstatus, kr_intclear, kr_intmask bits */
+#define INFINIPATH_I_RCVURG_MASK 0x1FF
+#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
+
+/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
+#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
+#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
+#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR   0x0000000000800000ULL
+#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR   0x0000000001000000ULL
+#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR   0x0000000002000000ULL
+#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR   0x0000000004000000ULL
+#define INFINIPATH_HWE_HTCMISCERR4          0x0000000008000000ULL
+#define INFINIPATH_HWE_HTCMISCERR5          0x0000000010000000ULL
+#define INFINIPATH_HWE_HTCMISCERR6          0x0000000020000000ULL
+#define INFINIPATH_HWE_HTCMISCERR7          0x0000000040000000ULL
+#define INFINIPATH_HWE_HTCBUSTREQPARITYERR  0x0000000080000000ULL
+#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
+#define INFINIPATH_HWE_HTCBUSIREQPARITYERR  0x0000000200000000ULL
+#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
+#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
+#define INFINIPATH_HWE_HTBPLL_FBSLIP        0x0200000000000000ULL
+#define INFINIPATH_HWE_HTBPLL_RFSLIP        0x0400000000000000ULL
+#define INFINIPATH_HWE_HTAPLL_FBSLIP        0x0800000000000000ULL
+#define INFINIPATH_HWE_HTAPLL_RFSLIP        0x1000000000000000ULL
+#define INFINIPATH_HWE_SERDESPLLFAILED      0x2000000000000000ULL
+
+/* kr_extstatus bits */
+#define INFINIPATH_EXTS_FREQSEL 0x2
+#define INFINIPATH_EXTS_SERDESSEL 0x4
+#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
+#define INFINIPATH_EXTS_MEMBIST_CORRECT     0x0000000000008000
+
+/*
+ * masks and bits that are different in different chips, or present only
+ * in one
+ */
+static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
+    INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
+static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
+    INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
+
+static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
+    INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
+static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
+    INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
+static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
+    INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
+static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
+    INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
+
+#define _IPATH_GPIO_SDA_NUM 1
+#define _IPATH_GPIO_SCL_NUM 0
+
+#define IPATH_GPIO_SDA \
+       (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+#define IPATH_GPIO_SCL \
+       (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+
+/* keep the code below somewhat more readonable; not used elsewhere */
+#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
+                               infinipath_hwe_htclnkabyte1crcerr)
+#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr |    \
+                               infinipath_hwe_htclnkbbyte1crcerr)
+#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
+                               infinipath_hwe_htclnkbbyte0crcerr)
+#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr |    \
+                               infinipath_hwe_htclnkbbyte1crcerr)
+
+static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
+                         char *msg, size_t msgl)
+{
+       char bitsmsg[64];
+       ipath_err_t crcbits = hwerrs &
+               (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
+       /* don't check if 8bit HT */
+       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
+               crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
+       /* don't check if 8bit HT */
+       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
+               crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
+       /*
+        * we'll want to ignore link errors on link that is
+        * not in use, if any.  For now, complain about both
+        */
+       if (crcbits) {
+               u16 ctrl0, ctrl1;
+               snprintf(bitsmsg, sizeof bitsmsg,
+                        "[HT%s lane %s CRC (%llx); ignore till reload]",
+                        !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
+                        "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
+                                   ? "1 (B)" : "0+1 (A+B)"),
+                        !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
+                        : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
+                           "0+1"), (unsigned long long) crcbits);
+               strlcat(msg, bitsmsg, msgl);
+
+               /*
+                * print extra info for debugging.  slave/primary
+                * config word 4, 8 (link control 0, 1)
+                */
+
+               if (pci_read_config_word(dd->pcidev,
+                                        dd->ipath_ht_slave_off + 0x4,
+                                        &ctrl0))
+                       dev_info(&dd->pcidev->dev, "Couldn't read "
+                                "linkctrl0 of slave/primary "
+                                "config block\n");
+               else if (!(ctrl0 & 1 << 6))
+                       /* not if EOC bit set */
+                       ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
+                                 ((ctrl0 >> 8) & 7) ? " CRC" : "",
+                                 ((ctrl0 >> 4) & 1) ? "linkfail" :
+                                 "");
+               if (pci_read_config_word(dd->pcidev,
+                                        dd->ipath_ht_slave_off + 0x8,
+                                        &ctrl1))
+                       dev_info(&dd->pcidev->dev, "Couldn't read "
+                                "linkctrl1 of slave/primary "
+                                "config block\n");
+               else if (!(ctrl1 & 1 << 6))
+                       /* not if EOC bit set */
+                       ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
+                                 ((ctrl1 >> 8) & 7) ? " CRC" : "",
+                                 ((ctrl1 >> 4) & 1) ? "linkfail" :
+                                 "");
+
+               /* disable until driver reloaded */
+               dd->ipath_hwerrmask &= ~crcbits;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+                                dd->ipath_hwerrmask);
+               ipath_dbg("HT crc errs: %s\n", msg);
+       } else
+               ipath_dbg("ignoring HT crc errors 0x%llx, "
+                         "not in use\n", (unsigned long long)
+                         (hwerrs & (_IPATH_HTLINK0_CRCBITS |
+                                    _IPATH_HTLINK1_CRCBITS)));
+}
+
+/**
+ * ipath_ht_handle_hwerrors - display hardware errors
+ * @dd: the infinipath device
+ * @msg: the output buffer
+ * @msgl: the size of the output buffer
+ *
+ * Use same msg buffer as regular errors to avoid
+ * excessive stack use.  Most hardware errors are catastrophic, but for
+ * right now, we'll print them and continue.
+ * We reuse the same message buffer as ipath_handle_errors() to avoid
+ * excessive stack usage.
+ */
+static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
+                                    size_t msgl)
+{
+       ipath_err_t hwerrs;
+       u32 bits, ctrl;
+       int isfatal = 0;
+       char bitsmsg[64];
+
+       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+
+       if (!hwerrs) {
+               ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
+               /*
+                * better than printing cofusing messages
+                * This seems to be related to clearing the crc error, or
+                * the pll error during init.
+                */
+               goto bail;
+       } else if (hwerrs == -1LL) {
+               ipath_dev_err(dd, "Read of hardware error status failed "
+                             "(all bits set); ignoring\n");
+               goto bail;
+       }
+       ipath_stats.sps_hwerrs++;
+
+       /* Always clear the error status register, except MEMBISTFAIL,
+        * regardless of whether we continue or stop using the chip.
+        * We want that set so we know it failed, even across driver reload.
+        * We'll still ignore it in the hwerrmask.  We do this partly for
+        * diagnostics, but also for support */
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+                        hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
+
+       hwerrs &= dd->ipath_hwerrmask;
+
+       /*
+        * make sure we get this much out, unless told to be quiet,
+        * or it's occurred within the last 5 seconds
+        */
+       if ((hwerrs & ~dd->ipath_lasthwerror) ||
+           (ipath_debug & __IPATH_VERBDBG))
+               dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
+                        "(cleared)\n", (unsigned long long) hwerrs);
+       dd->ipath_lasthwerror |= hwerrs;
+
+       if (hwerrs & ~infinipath_hwe_bitsextant)
+               ipath_dev_err(dd, "hwerror interrupt with unknown errors "
+                             "%llx set\n", (unsigned long long)
+                             (hwerrs & ~infinipath_hwe_bitsextant));
+
+       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+       if (ctrl & INFINIPATH_C_FREEZEMODE) {
+               if (hwerrs) {
+                       /*
+                        * if any set that we aren't ignoring; only
+                        * make the complaint once, in case it's stuck
+                        * or recurring, and we get here multiple
+                        * times.
+                        */
+                       if (dd->ipath_flags & IPATH_INITTED) {
+                               ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+                                             "mode), no longer usable, SN %.16s\n",
+                                                 dd->ipath_serial);
+                               isfatal = 1;
+                       }
+                       *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+                       /* mark as having had error */
+                       *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+                       /*
+                        * mark as not usable, at a minimum until driver
+                        * is reloaded, probably until reboot, since no
+                        * other reset is possible.
+                        */
+                       dd->ipath_flags &= ~IPATH_INITTED;
+               } else {
+                       ipath_dbg("Clearing freezemode on ignored hardware "
+                                 "error\n");
+                       ctrl &= ~INFINIPATH_C_FREEZEMODE;
+                       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+                                        ctrl);
+               }
+       }
+
+       *msg = '\0';
+
+       /*
+        * may someday want to decode into which bits are which
+        * functional area for parity errors, etc.
+        */
+       if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
+                     << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
+               bits = (u32) ((hwerrs >>
+                              INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
+                             INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
+               snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
+                        bits);
+               strlcat(msg, bitsmsg, msgl);
+       }
+       if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
+                     << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
+               bits = (u32) ((hwerrs >>
+                              INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
+                             INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
+               snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
+                        bits);
+               strlcat(msg, bitsmsg, msgl);
+       }
+       if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
+                     << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+               bits = (u32) ((hwerrs >>
+                              INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
+                             INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
+               snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
+                        bits);
+               strlcat(msg, bitsmsg, msgl);
+       }
+       if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
+               strlcat(msg, "[IB2IPATH Parity]", msgl);
+       if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
+               strlcat(msg, "[IPATH2IB Parity]", msgl);
+       if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
+               strlcat(msg, "[HTC Ireq Parity]", msgl);
+       if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
+               strlcat(msg, "[HTC Treq Parity]", msgl);
+       if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
+               strlcat(msg, "[HTC Tresp Parity]", msgl);
+
+       if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
+               hwerr_crcbits(dd, hwerrs, msg, msgl);
+
+       if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
+               strlcat(msg, "[HT core Misc5]", msgl);
+       if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
+               strlcat(msg, "[HT core Misc6]", msgl);
+       if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
+               strlcat(msg, "[HT core Misc7]", msgl);
+       if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
+               strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
+                       msgl);
+               /* ignore from now on, so disable until driver reloaded */
+               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+                                dd->ipath_hwerrmask);
+       }
+#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |       \
+                        INFINIPATH_HWE_COREPLL_RFSLIP |        \
+                        INFINIPATH_HWE_HTBPLL_FBSLIP |         \
+                        INFINIPATH_HWE_HTBPLL_RFSLIP |         \
+                        INFINIPATH_HWE_HTAPLL_FBSLIP |         \
+                        INFINIPATH_HWE_HTAPLL_RFSLIP)
+
+       if (hwerrs & _IPATH_PLL_FAIL) {
+               snprintf(bitsmsg, sizeof bitsmsg,
+                        "[PLL failed (%llx), InfiniPath hardware unusable]",
+                        (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
+               strlcat(msg, bitsmsg, msgl);
+               /* ignore from now on, so disable until driver reloaded */
+               dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+                                dd->ipath_hwerrmask);
+       }
+
+       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
+               /*
+                * If it occurs, it is left masked since the eternal
+                * interface is unused
+                */
+               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+                                dd->ipath_hwerrmask);
+       }
+
+       if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
+               strlcat(msg, "[Rx Dsync]", msgl);
+       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
+               strlcat(msg, "[SerDes PLL]", msgl);
+
+       ipath_dev_err(dd, "%s hardware error\n", msg);
+       if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
+               /*
+                * for status file; if no trailing brace is copied,
+                * we'll know it was truncated.
+                */
+               snprintf(dd->ipath_freezemsg,
+                        dd->ipath_freezelen, "{%s}", msg);
+
+bail:;
+}
+
+/**
+ * ipath_ht_boardname - fill in the board name
+ * @dd: the infinipath device
+ * @name: the output buffer
+ * @namelen: the size of the output buffer
+ *
+ * fill in the board name, based on the board revision register
+ */
+static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
+                             size_t namelen)
+{
+       char *n = NULL;
+       u8 boardrev = dd->ipath_boardrev;
+       int ret;
+
+       switch (boardrev) {
+       case 4:         /* Ponderosa is one of the bringup boards */
+               n = "Ponderosa";
+               break;
+       case 5:
+               /*
+                * original production board; two production levels, with
+                * different serial number ranges.   See ipath_ht_early_init() for
+                * case where we enable IPATH_GPIO_INTR for later serial # range.
+                */
+               n = "InfiniPath_QHT7040";
+               break;
+       case 6:
+               n = "OEM_Board_3";
+               break;
+       case 7:
+               /* small form factor production board */
+               n = "InfiniPath_QHT7140";
+               break;
+       case 8:
+               n = "LS/X-1";
+               break;
+       case 9:         /* Comstock bringup test board */
+               n = "Comstock";
+               break;
+       case 10:
+               n = "OEM_Board_2";
+               break;
+       case 11:
+               n = "InfiniPath_HT-470"; /* obsoleted */
+               break;
+       case 12:
+               n = "OEM_Board_4";
+               break;
+       default:                /* don't know, just print the number */
+               ipath_dev_err(dd, "Don't yet know about board "
+                             "with ID %u\n", boardrev);
+               snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
+                        boardrev);
+               break;
+       }
+       if (n)
+               snprintf(name, namelen, "%s", n);
+
+       if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
+               /*
+                * This version of the driver only supports Rev 3.2 and 3.3
+                */
+               ipath_dev_err(dd,
+                             "Unsupported InfiniPath hardware revision %u.%u!\n",
+                             dd->ipath_majrev, dd->ipath_minrev);
+               ret = 1;
+               goto bail;
+       }
+       /*
+        * pkt/word counters are 32 bit, and therefore wrap fast enough
+        * that we snapshot them from a timer, and maintain 64 bit shadow
+        * copies
+        */
+       dd->ipath_flags |= IPATH_32BITCOUNTERS;
+       if (dd->ipath_htspeed != 800)
+               ipath_dev_err(dd,
+                             "Incorrectly configured for HT @ %uMHz\n",
+                             dd->ipath_htspeed);
+       if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 ||
+           dd->ipath_boardrev == 6)
+               dd->ipath_flags |= IPATH_GPIO_INTR;
+       else
+               dd->ipath_flags |= IPATH_POLL_RX_INTR;
+       if (dd->ipath_boardrev == 8) {  /* LS/X-1 */
+               u64 val;
+               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+               if (val & INFINIPATH_EXTS_SERDESSEL) {
+                       /*
+                        * hardware disabled
+                        *
+                        * This means that the chip is hardware disabled,
+                        * and will not be able to bring up the link,
+                        * in any case.  We special case this and abort
+                        * early, to avoid later messages.  We also set
+                        * the DISABLED status bit
+                        */
+                       ipath_dbg("Unit %u is hardware-disabled\n",
+                                 dd->ipath_unit);
+                       *dd->ipath_statusp |= IPATH_STATUS_DISABLED;
+                       /* this value is handled differently */
+                       ret = 2;
+                       goto bail;
+               }
+       }
+       ret = 0;
+
+bail:
+       return ret;
+}
+
+static void ipath_check_htlink(struct ipath_devdata *dd)
+{
+       u8 linkerr, link_off, i;
+
+       for (i = 0; i < 2; i++) {
+               link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
+               if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
+                       dev_info(&dd->pcidev->dev, "Couldn't read "
+                                "linkerror%d of HT slave/primary block\n",
+                                i);
+               else if (linkerr & 0xf0) {
+                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
+                                  "clearing\n", linkerr >> 4, i);
+                       /*
+                        * writing the linkerr bits that are set should
+                        * clear them
+                        */
+                       if (pci_write_config_byte(dd->pcidev, link_off,
+                                                 linkerr))
+                               ipath_dbg("Failed write to clear HT "
+                                         "linkerror%d\n", i);
+                       if (pci_read_config_byte(dd->pcidev, link_off,
+                                                &linkerr))
+                               dev_info(&dd->pcidev->dev,
+                                        "Couldn't reread linkerror%d of "
+                                        "HT slave/primary block\n", i);
+                       else if (linkerr & 0xf0)
+                               dev_info(&dd->pcidev->dev,
+                                        "HT linkerror%d bits 0x%x "
+                                        "couldn't be cleared\n",
+                                        i, linkerr >> 4);
+               }
+       }
+}
+
+static int ipath_setup_ht_reset(struct ipath_devdata *dd)
+{
+       ipath_dbg("No reset possible for this InfiniPath hardware\n");
+       return 0;
+}
+
+#define HT_CAPABILITY_ID   0x08        /* HT capabilities not defined in kernel */
+#define HT_INTR_DISC_CONFIG  0x80      /* HT interrupt and discovery cap */
+#define HT_INTR_REG_INDEX    2 /* intconfig requires indirect accesses */
+
+/*
+ * Bits 13-15 of command==0 is slave/primary block.  Clear any HT CRC
+ * errors.  We only bother to do this at load time, because it's OK if
+ * it happened before we were loaded (first time after boot/reset),
+ * but any time after that, it's fatal anyway.  Also need to not check
+ * for for upper byte errors if we are in 8 bit mode, so figure out
+ * our width.  For now, at least, also complain if it's 8 bit.
+ */
+static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
+                            int pos, u8 cap_type)
+{
+       u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
+       u16 linkctrl = 0;
+       int i;
+
+       dd->ipath_ht_slave_off = pos;
+       /* command word, master_host bit */
+       /* master host || slave */
+       if ((cap_type >> 2) & 1)
+               link_a_b_off = 4;
+       else
+               link_a_b_off = 0;
+       ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
+                  link_a_b_off ? 1 : 0,
+                  link_a_b_off ? 'B' : 'A');
+
+       link_a_b_off += pos;
+
+       /*
+        * check both link control registers; clear both HT CRC sets if
+        * necessary.
+        */
+       for (i = 0; i < 2; i++) {
+               link_off = pos + i * 4 + 0x4;
+               if (pci_read_config_word(pdev, link_off, &linkctrl))
+                       ipath_dev_err(dd, "Couldn't read HT link control%d "
+                                     "register\n", i);
+               else if (linkctrl & (0xf << 8)) {
+                       ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
+                                  "bits %x\n", i, linkctrl & (0xf << 8));
+                       /*
+                        * now write them back to clear the error.
+                        */
+                       pci_write_config_byte(pdev, link_off,
+                                             linkctrl & (0xf << 8));
+               }
+       }
+
+       /*
+        * As with HT CRC bits, same for protocol errors that might occur
+        * during boot.
+        */
+       for (i = 0; i < 2; i++) {
+               link_off = pos + i * 4 + 0xd;
+               if (pci_read_config_byte(pdev, link_off, &linkerr))
+                       dev_info(&pdev->dev, "Couldn't read linkerror%d "
+                                "of HT slave/primary block\n", i);
+               else if (linkerr & 0xf0) {
+                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
+                                  "clearing\n", linkerr >> 4, i);
+                       /*
+                        * writing the linkerr bits that are set will clear
+                        * them
+                        */
+                       if (pci_write_config_byte
+                           (pdev, link_off, linkerr))
+                               ipath_dbg("Failed write to clear HT "
+                                         "linkerror%d\n", i);
+                       if (pci_read_config_byte(pdev, link_off, &linkerr))
+                               dev_info(&pdev->dev, "Couldn't reread "
+                                        "linkerror%d of HT slave/primary "
+                                        "block\n", i);
+                       else if (linkerr & 0xf0)
+                               dev_info(&pdev->dev, "HT linkerror%d bits "
+                                        "0x%x couldn't be cleared\n",
+                                        i, linkerr >> 4);
+               }
+       }
+
+       /*
+        * this is just for our link to the host, not devices connected
+        * through tunnel.
+        */
+
+       if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
+               ipath_dev_err(dd, "Couldn't read HT link width "
+                             "config register\n");
+       else {
+               u32 width;
+               switch (linkwidth & 7) {
+               case 5:
+                       width = 4;
+                       break;
+               case 4:
+                       width = 2;
+                       break;
+               case 3:
+                       width = 32;
+                       break;
+               case 1:
+                       width = 16;
+                       break;
+               case 0:
+               default:        /* if wrong, assume 8 bit */
+                       width = 8;
+                       break;
+               }
+
+               dd->ipath_htwidth = width;
+
+               if (linkwidth != 0x11) {
+                       ipath_dev_err(dd, "Not configured for 16 bit HT "
+                                     "(%x)\n", linkwidth);
+                       if (!(linkwidth & 0xf)) {
+                               ipath_dbg("Will ignore HT lane1 errors\n");
+                               dd->ipath_flags |= IPATH_8BIT_IN_HT0;
+                       }
+               }
+       }
+
+       /*
+        * this is just for our link to the host, not devices connected
+        * through tunnel.
+        */
+       if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
+               ipath_dev_err(dd, "Couldn't read HT link frequency "
+                             "config register\n");
+       else {
+               u32 speed;
+               switch (linkwidth & 0xf) {
+               case 6:
+                       speed = 1000;
+                       break;
+               case 5:
+                       speed = 800;
+                       break;
+               case 4:
+                       speed = 600;
+                       break;
+               case 3:
+                       speed = 500;
+                       break;
+               case 2:
+                       speed = 400;
+                       break;
+               case 1:
+                       speed = 300;
+                       break;
+               default:
+                       /*
+                        * assume reserved and vendor-specific are 200...
+                        */
+               case 0:
+                       speed = 200;
+                       break;
+               }
+               dd->ipath_htspeed = speed;
+       }
+}
+
+static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
+                           int pos)
+{
+       u32 int_handler_addr_lower;
+       u32 int_handler_addr_upper;
+       u64 ihandler;
+       u32 intvec;
+
+       /* use indirection register to get the intr handler */
+       pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x10);
+       pci_read_config_dword(pdev, pos + 4, &int_handler_addr_lower);
+       pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x11);
+       pci_read_config_dword(pdev, pos + 4, &int_handler_addr_upper);
+
+       ihandler = (u64) int_handler_addr_lower |
+               ((u64) int_handler_addr_upper << 32);
+
+       /*
+        * kernels with CONFIG_PCI_MSI set the vector in the irq field of
+        * struct pci_device, so we use that to program the internal
+        * interrupt register (not config space) with that value. The BIOS
+        * must still have done the basic MSI setup.
+        */
+       intvec = pdev->irq;
+       /*
+        * clear any vector bits there; normally not set but we'll overload
+        * this for some debug purposes (setting the HTC debug register
+        * value from software, rather than GPIOs), so it might be set on a
+        * driver reload.
+        */
+       ihandler &= ~0xff0000;
+       /* x86 vector goes in intrinfo[23:16] */
+       ihandler |= intvec << 16;
+       ipath_cdbg(VERBOSE, "ihandler lower %x, upper %x, intvec %x, "
+                  "interruptconfig %llx\n", int_handler_addr_lower,
+                  int_handler_addr_upper, intvec,
+                  (unsigned long long) ihandler);
+
+       /* can't program yet, so save for interrupt setup */
+       dd->ipath_intconfig = ihandler;
+       /* keep going, so we find link control stuff also */
+
+       return ihandler != 0;
+}
+
+/**
+ * ipath_setup_ht_config - setup the interruptconfig register
+ * @dd: the infinipath device
+ * @pdev: the PCI device
+ *
+ * setup the interruptconfig register from the HT config info.
+ * Also clear CRC errors in HT linkcontrol, if necessary.
+ * This is done only for the real hardware.  It is done before
+ * chip address space is initted, so can't touch infinipath registers
+ */
+static int ipath_setup_ht_config(struct ipath_devdata *dd,
+                                struct pci_dev *pdev)
+{
+       int pos, ret = 0;
+       int ihandler = 0;
+
+       /*
+        * Read the capability info to find the interrupt info, and also
+        * handle clearing CRC errors in linkctrl register if necessary.  We
+        * do this early, before we ever enable errors or hardware errors,
+        * mostly to avoid causing the chip to enter freeze mode.
+        */
+       pos = pci_find_capability(pdev, HT_CAPABILITY_ID);
+       if (!pos) {
+               ipath_dev_err(dd, "Couldn't find HyperTransport "
+                             "capability; no interrupts\n");
+               ret = -ENODEV;
+               goto bail;
+       }
+       do {
+               u8 cap_type;
+
+               /* the HT capability type byte is 3 bytes after the
+                * capability byte.
+                */
+               if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
+                       dev_info(&pdev->dev, "Couldn't read config "
+                                "command @ %d\n", pos);
+                       continue;
+               }
+               if (!(cap_type & 0xE0))
+                       slave_or_pri_blk(dd, pdev, pos, cap_type);
+               else if (cap_type == HT_INTR_DISC_CONFIG)
+                       ihandler = set_int_handler(dd, pdev, pos);
+       } while ((pos = pci_find_next_capability(pdev, pos,
+                                                HT_CAPABILITY_ID)));
+
+       if (!ihandler) {
+               ipath_dev_err(dd, "Couldn't find interrupt handler in "
+                             "config space\n");
+               ret = -ENODEV;
+       }
+
+bail:
+       return ret;
+}
+
+/**
+ * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
+ * @dd: the infinipath device
+ *
+ * Called during driver unload.
+ * This is currently a nop for the HT chip, not for all chips
+ */
+static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
+{
+}
+
+/**
+ * ipath_setup_ht_setextled - set the state of the two external LEDs
+ * @dd: the infinipath device
+ * @lst: the L state
+ * @ltst: the LT state
+ *
+ * Set the state of the two external LEDs, to indicate physical and
+ * logical state of IB link.   For this chip (at least with recommended
+ * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
+ * (logical state)
+ *
+ * Note:  We try to match the Mellanox HCA LED behavior as best
+ * we can.  Green indicates physical link state is OK (something is
+ * plugged in, and we can train).
+ * Amber indicates the link is logically up (ACTIVE).
+ * Mellanox further blinks the amber LED to indicate data packet
+ * activity, but we have no hardware support for that, so it would
+ * require waking up every 10-20 msecs and checking the counters
+ * on the chip, and then turning the LED off if appropriate.  That's
+ * visible overhead, so not something we will do.
+ *
+ */
+static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
+                                    u64 lst, u64 ltst)
+{
+       u64 extctl;
+
+       /* the diags use the LED to indicate diag info, so we leave
+        * the external LED alone when the diags are running */
+       if (ipath_diag_inuse)
+               return;
+
+       /*
+        * start by setting both LED control bits to off, then turn
+        * on the appropriate bit(s).
+        */
+       if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
+               /*
+                * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
+                * is inverted,  because it is normally used to indicate
+                * a hardware fault at reset, if there were errors
+                */
+               extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
+                       | INFINIPATH_EXTC_LEDGBLERR_OFF;
+               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
+                       extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
+               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
+                       extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
+       }
+       else {
+               extctl = dd->ipath_extctrl &
+                       ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
+                         INFINIPATH_EXTC_LED2PRIPORT_ON);
+               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
+                       extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
+               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
+                       extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
+       }
+       dd->ipath_extctrl = extctl;
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+}
+
+static void ipath_init_ht_variables(void)
+{
+       ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+       ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+       ipath_gpio_sda = IPATH_GPIO_SDA;
+       ipath_gpio_scl = IPATH_GPIO_SCL;
+
+       infinipath_i_bitsextant =
+               (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
+               (INFINIPATH_I_RCVAVAIL_MASK <<
+                INFINIPATH_I_RCVAVAIL_SHIFT) |
+               INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
+               INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
+
+       infinipath_e_bitsextant =
+               INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
+               INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
+               INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
+               INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
+               INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
+               INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
+               INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+               INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
+               INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
+               INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
+               INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
+               INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
+               INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
+               INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
+               INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
+               INFINIPATH_E_HARDWARE;
+
+       infinipath_hwe_bitsextant =
+               (INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
+                INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
+               (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+                INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
+               (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+                INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+               INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
+               INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
+               INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
+               INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
+               INFINIPATH_HWE_HTCMISCERR4 |
+               INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
+               INFINIPATH_HWE_HTCMISCERR7 |
+               INFINIPATH_HWE_HTCBUSTREQPARITYERR |
+               INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
+               INFINIPATH_HWE_HTCBUSIREQPARITYERR |
+               INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
+               INFINIPATH_HWE_MEMBISTFAILED |
+               INFINIPATH_HWE_COREPLL_FBSLIP |
+               INFINIPATH_HWE_COREPLL_RFSLIP |
+               INFINIPATH_HWE_HTBPLL_FBSLIP |
+               INFINIPATH_HWE_HTBPLL_RFSLIP |
+               INFINIPATH_HWE_HTAPLL_FBSLIP |
+               INFINIPATH_HWE_HTAPLL_RFSLIP |
+               INFINIPATH_HWE_SERDESPLLFAILED |
+               INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
+               INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
+
+       infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+       infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+}
+
+/**
+ * ipath_ht_init_hwerrors - enable hardware errors
+ * @dd: the infinipath device
+ *
+ * now that we have finished initializing everything that might reasonably
+ * cause a hardware error, and cleared those errors bits as they occur,
+ * we can enable hardware errors in the mask (potentially enabling
+ * freeze mode), and enable hardware errors as errors (along with
+ * everything else) in errormask
+ */
+static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
+{
+       ipath_err_t val;
+       u64 extsval;
+
+       extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+
+       if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
+               ipath_dev_err(dd, "MemBIST did not complete!\n");
+
+       ipath_check_htlink(dd);
+
+       /* barring bugs, all hwerrors become interrupts, which can */
+       val = -1LL;
+       /* don't look at crc lane1 if 8 bit */
+       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
+               val &= ~infinipath_hwe_htclnkabyte1crcerr;
+       /* don't look at crc lane1 if 8 bit */
+       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
+               val &= ~infinipath_hwe_htclnkbbyte1crcerr;
+
+       /*
+        * disable RXDSYNCMEMPARITY because external serdes is unused,
+        * and therefore the logic will never be used or initialized,
+        * and uninitialized state will normally result in this error
+        * being asserted.  Similarly for the external serdess pll
+        * lock signal.
+        */
+       val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
+                INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
+
+       /*
+        * Disable MISCERR4 because of an inversion in the HT core
+        * logic checking for errors that cause this bit to be set.
+        * The errata can also cause the protocol error bit to be set
+        * in the HT config space linkerror register(s).
+        */
+       val &= ~INFINIPATH_HWE_HTCMISCERR4;
+
+       /*
+        * PLL ignored because MDIO interface has a logic problem
+        * for reads, on Comstock and Ponderosa.  BRINGUP
+        */
+       if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
+               val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+       dd->ipath_hwerrmask = val;
+}
+
+/**
+ * ipath_ht_bringup_serdes - bring up the serdes
+ * @dd: the infinipath device
+ */
+static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
+{
+       u64 val, config1;
+       int ret = 0, change = 0;
+
+       ipath_dbg("Trying to bringup serdes\n");
+
+       if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
+           INFINIPATH_HWE_SERDESPLLFAILED)
+       {
+               ipath_dbg("At start, serdes PLL failed bit set in "
+                         "hwerrstatus, clearing and continuing\n");
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+                                INFINIPATH_HWE_SERDESPLLFAILED);
+       }
+
+       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+       config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
+
+       ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
+                  "config1=%llx, sstatus=%llx xgxs %llx\n",
+                  (unsigned long long) val, (unsigned long long) config1,
+                  (unsigned long long)
+                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
+                  (unsigned long long)
+                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+       /* force reset on */
+       val |= INFINIPATH_SERDC0_RESET_PLL
+               /* | INFINIPATH_SERDC0_RESET_MASK */
+               ;
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+       udelay(15);             /* need pll reset set at least for a bit */
+
+       if (val & INFINIPATH_SERDC0_RESET_PLL) {
+               u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
+               /* set lane resets, and tx idle, during pll reset */
+               val2 |= INFINIPATH_SERDC0_RESET_MASK |
+                       INFINIPATH_SERDC0_TXIDLE;
+               ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
+                          "%llx)\n", (unsigned long long) val2);
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
+                                val2);
+               /*
+                * be sure chip saw it
+                */
+               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+               /*
+                * need pll reset clear at least 11 usec before lane
+                * resets cleared; give it a few more
+                */
+               udelay(15);
+               val = val2;     /* for check below */
+       }
+
+       if (val & (INFINIPATH_SERDC0_RESET_PLL |
+                  INFINIPATH_SERDC0_RESET_MASK |
+                  INFINIPATH_SERDC0_TXIDLE)) {
+               val &= ~(INFINIPATH_SERDC0_RESET_PLL |
+                        INFINIPATH_SERDC0_RESET_MASK |
+                        INFINIPATH_SERDC0_TXIDLE);
+               /* clear them */
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
+                                val);
+       }
+
+       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+       if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
+            INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
+               val &= ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
+                        INFINIPATH_XGXS_MDIOADDR_SHIFT);
+               /*
+                * we use address 3
+                */
+               val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
+               change = 1;
+       }
+       if (val & INFINIPATH_XGXS_RESET) {
+               /* normally true after boot */
+               val &= ~INFINIPATH_XGXS_RESET;
+               change = 1;
+       }
+       if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+            INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+               /* need to compensate for Tx inversion in partner */
+               val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+                        INFINIPATH_XGXS_RX_POL_SHIFT);
+               val |= dd->ipath_rx_pol_inv <<
+                       INFINIPATH_XGXS_RX_POL_SHIFT;
+               change = 1;
+       }
+       if (change)
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+
+       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+
+       /* clear current and de-emphasis bits */
+       config1 &= ~0x0ffffffff00ULL;
+       /* set current to 20ma */
+       config1 |= 0x00000000000ULL;
+       /* set de-emphasis to -5.68dB */
+       config1 |= 0x0cccc000000ULL;
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
+
+       ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
+                  "config1=%llx, sstatus=%llx xgxs %llx\n",
+                  (unsigned long long) val, (unsigned long long) config1,
+                  (unsigned long long)
+                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
+                  (unsigned long long)
+                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+       if (!ipath_waitfor_mdio_cmdready(dd)) {
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_mdio,
+                                ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
+                                               IPATH_MDIO_CTRL_XGXS_REG_8,
+                                               0));
+               if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
+                                          IPATH_MDIO_DATAVALID, &val))
+                       ipath_dbg("Never got MDIO data for XGXS status "
+                                 "read\n");
+               else
+                       ipath_cdbg(VERBOSE, "MDIO Read reg8, "
+                                  "'bank' 31 %x\n", (u32) val);
+       } else
+               ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
+
+       return ret;             /* for now, say we always succeeded */
+}
+
+/**
+ * ipath_ht_quiet_serdes - set serdes to txidle
+ * @dd: the infinipath device
+ * driver is being unloaded
+ */
+static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
+{
+       u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+
+       val |= INFINIPATH_SERDC0_TXIDLE;
+       ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
+                 (unsigned long long) val);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+}
+
+static int ipath_ht_intconfig(struct ipath_devdata *dd)
+{
+       int ret;
+
+       if (!dd->ipath_intconfig) {
+               ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
+                             "interrupt address\n");
+               ret = 1;
+               goto bail;
+       }
+
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
+                        dd->ipath_intconfig);  /* interrupt address */
+       ret = 0;
+
+bail:
+       return ret;
+}
+
+/**
+ * ipath_pe_put_tid - write a TID in chip
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for special locking etc.
+ * It's used for both the full cleanup on exit, as well as the normal
+ * setup and teardown.
+ */
+static void ipath_ht_put_tid(struct ipath_devdata *dd,
+                            u64 __iomem *tidptr, u32 type,
+                            unsigned long pa)
+{
+       if (pa != dd->ipath_tidinvalid) {
+               if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
+                       dev_info(&dd->pcidev->dev,
+                                "physaddr %lx has more than "
+                                "40 bits, using only 40!!!\n", pa);
+                       pa &= INFINIPATH_RT_ADDR_MASK;
+               }
+               if (type == 0)
+                       pa |= dd->ipath_tidtemplate;
+               else {
+                       /* in words (fixed, full page).  */
+                       u64 lenvalid = PAGE_SIZE >> 2;
+                       lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
+                       pa |= lenvalid | INFINIPATH_RT_VALID;
+               }
+       }
+       if (dd->ipath_kregbase)
+               writeq(pa, tidptr);
+}
+
+/**
+ * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
+ * @dd: the infinipath device
+ * @port: the port
+ *
+ * Used from ipath_close(), and at chip initialization.
+ */
+static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
+{
+       u64 __iomem *tidbase;
+       int i;
+
+       if (!dd->ipath_kregbase)
+               return;
+
+       ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
+
+       /*
+        * need to invalidate all of the expected TID entries for this
+        * port, so we don't have valid entries that might somehow get
+        * used (early in next use of this port, or through some bug)
+        */
+       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
+                                  dd->ipath_rcvtidbase +
+                                  port * dd->ipath_rcvtidcnt *
+                                  sizeof(*tidbase));
+       for (i = 0; i < dd->ipath_rcvtidcnt; i++)
+               ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid);
+
+       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
+                                  dd->ipath_rcvegrbase +
+                                  port * dd->ipath_rcvegrcnt *
+                                  sizeof(*tidbase));
+
+       for (i = 0; i < dd->ipath_rcvegrcnt; i++)
+               ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid);
+}
+
+/**
+ * ipath_ht_tidtemplate - setup constants for TID updates
+ * @dd: the infinipath device
+ *
+ * We setup stuff that we use a lot, to avoid calculating each time
+ */
+static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
+{
+       dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
+       dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
+       dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
+
+       /*
+        * work around chip errata bug 7358, by marking invalid tids
+        * as having max length
+        */
+       dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
+               INFINIPATH_RT_BUFSIZE_SHIFT;
+}
+
+static int ipath_ht_early_init(struct ipath_devdata *dd)
+{
+       u32 __iomem *piobuf;
+       u32 pioincr, val32, egrsize;
+       int i;
+
+       /*
+        * one cache line; long IB headers will spill over into received
+        * buffer
+        */
+       dd->ipath_rcvhdrentsize = 16;
+       dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
+
+       /*
+        * For HT, we allocate a somewhat overly large eager buffer,
+        * such that we can guarantee that we can receive the largest
+        * packet that we can send out.  To truly support a 4KB MTU,
+        * we need to bump this to a large value.  To date, other than
+        * testing, we have never encountered an HCA that can really
+        * send 4KB MTU packets, so we do not handle that (we'll get
+        * errors interrupts if we ever see one).
+        */
+       dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
+       egrsize = dd->ipath_rcvegrbufsize;
+
+       /*
+        * the min() check here is currently a nop, but it may not
+        * always be, depending on just how we do ipath_rcvegrbufsize
+        */
+       dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
+                                dd->ipath_rcvegrbufsize);
+       dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
+       ipath_ht_tidtemplate(dd);
+
+       /*
+        * zero all the TID entries at startup.  We do this for sanity,
+        * in case of a previous driver crash of some kind, and also
+        * because the chip powers up with these memories in an unknown
+        * state.  Use portcnt, not cfgports, since this is for the
+        * full chip, not for current (possibly different) configuration
+        * value.
+        * Chip Errata bug 6447
+        */
+       for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
+               ipath_ht_clear_tids(dd, val32);
+
+       /*
+        * write the pbc of each buffer, to be sure it's initialized, then
+        * cancel all the buffers, and also abort any packets that might
+        * have been in flight for some reason (the latter is for driver
+        * unload/reload, but isn't a bad idea at first init).  PIO send
+        * isn't enabled at this point, so there is no danger of sending
+        * these out on the wire.
+        * Chip Errata bug 6610
+        */
+       piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
+                                 dd->ipath_piobufbase);
+       pioincr = dd->ipath_palign / sizeof(*piobuf);
+       for (i = 0; i < dd->ipath_piobcnt2k; i++) {
+               /*
+                * reasonable word count, just to init pbc
+                */
+               writel(16, piobuf);
+               piobuf += pioincr;
+       }
+       /*
+        * self-clearing
+        */
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+                        INFINIPATH_S_ABORT);
+
+       ipath_get_eeprom_info(dd);
+       if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
+               dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
+               /*
+                * Later production QHT7040 has same changes as QHT7140, so
+                * can use GPIO interrupts.  They have serial #'s starting
+                * with 128, rather than 112.
+                */
+               dd->ipath_flags |= IPATH_GPIO_INTR;
+               dd->ipath_flags &= ~IPATH_POLL_RX_INTR;
+       }
+       return 0;
+}
+
+/**
+ * ipath_init_ht_get_base_info - set chip-specific flags for user code
+ * @dd: the infinipath device
+ * @kbase: ipath_base_info pointer
+ *
+ * We set the PCIE flag because the lower bandwidth on PCIe vs
+ * HyperTransport can affect some user packet algorithims.
+ */
+static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
+{
+       struct ipath_base_info *kinfo = kbase;
+
+       kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
+               IPATH_RUNTIME_RCVHDR_COPY;
+
+       return 0;
+}
+
+/**
+ * ipath_init_iba6110_funcs - set up the chip-specific function pointers
+ * @dd: the infinipath device
+ *
+ * This is global, and is called directly at init to set up the
+ * chip-specific function pointers for later use.
+ */
+void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
+{
+       dd->ipath_f_intrsetup = ipath_ht_intconfig;
+       dd->ipath_f_bus = ipath_setup_ht_config;
+       dd->ipath_f_reset = ipath_setup_ht_reset;
+       dd->ipath_f_get_boardname = ipath_ht_boardname;
+       dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
+       dd->ipath_f_early_init = ipath_ht_early_init;
+       dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
+       dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
+       dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
+       dd->ipath_f_clear_tids = ipath_ht_clear_tids;
+       dd->ipath_f_put_tid = ipath_ht_put_tid;
+       dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
+       dd->ipath_f_setextled = ipath_setup_ht_setextled;
+       dd->ipath_f_get_base_info = ipath_ht_get_base_info;
+
+       /*
+        * initialize chip-specific variables
+        */
+       dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
+
+       /*
+        * setup the register offsets, since they are different for each
+        * chip
+        */
+       dd->ipath_kregs = &ipath_ht_kregs;
+       dd->ipath_cregs = &ipath_ht_cregs;
+
+       /*
+        * do very early init that is needed before ipath_f_bus is
+        * called
+        */
+       ipath_init_ht_variables();
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
new file mode 100644 (file)
index 0000000..d86516d
--- /dev/null
@@ -0,0 +1,1264 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the
+ * InfiniPath PCIe chip.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+
+/*
+ * This file contains all the chip-specific register information and
+ * access functions for the QLogic InfiniPath PCI-Express chip.
+ *
+ * This lists the InfiniPath registers, in the actual chip layout.
+ * This structure should never be directly accessed.
+ */
+struct _infinipath_do_not_use_kernel_regs {
+       unsigned long long Revision;
+       unsigned long long Control;
+       unsigned long long PageAlign;
+       unsigned long long PortCnt;
+       unsigned long long DebugPortSelect;
+       unsigned long long Reserved0;
+       unsigned long long SendRegBase;
+       unsigned long long UserRegBase;
+       unsigned long long CounterRegBase;
+       unsigned long long Scratch;
+       unsigned long long Reserved1;
+       unsigned long long Reserved2;
+       unsigned long long IntBlocked;
+       unsigned long long IntMask;
+       unsigned long long IntStatus;
+       unsigned long long IntClear;
+       unsigned long long ErrorMask;
+       unsigned long long ErrorStatus;
+       unsigned long long ErrorClear;
+       unsigned long long HwErrMask;
+       unsigned long long HwErrStatus;
+       unsigned long long HwErrClear;
+       unsigned long long HwDiagCtrl;
+       unsigned long long MDIO;
+       unsigned long long IBCStatus;
+       unsigned long long IBCCtrl;
+       unsigned long long ExtStatus;
+       unsigned long long ExtCtrl;
+       unsigned long long GPIOOut;
+       unsigned long long GPIOMask;
+       unsigned long long GPIOStatus;
+       unsigned long long GPIOClear;
+       unsigned long long RcvCtrl;
+       unsigned long long RcvBTHQP;
+       unsigned long long RcvHdrSize;
+       unsigned long long RcvHdrCnt;
+       unsigned long long RcvHdrEntSize;
+       unsigned long long RcvTIDBase;
+       unsigned long long RcvTIDCnt;
+       unsigned long long RcvEgrBase;
+       unsigned long long RcvEgrCnt;
+       unsigned long long RcvBufBase;
+       unsigned long long RcvBufSize;
+       unsigned long long RxIntMemBase;
+       unsigned long long RxIntMemSize;
+       unsigned long long RcvPartitionKey;
+       unsigned long long Reserved3;
+       unsigned long long RcvPktLEDCnt;
+       unsigned long long Reserved4[8];
+       unsigned long long SendCtrl;
+       unsigned long long SendPIOBufBase;
+       unsigned long long SendPIOSize;
+       unsigned long long SendPIOBufCnt;
+       unsigned long long SendPIOAvailAddr;
+       unsigned long long TxIntMemBase;
+       unsigned long long TxIntMemSize;
+       unsigned long long Reserved5;
+       unsigned long long PCIeRBufTestReg0;
+       unsigned long long PCIeRBufTestReg1;
+       unsigned long long Reserved51[6];
+       unsigned long long SendBufferError;
+       unsigned long long SendBufferErrorCONT1;
+       unsigned long long Reserved6SBE[6];
+       unsigned long long RcvHdrAddr0;
+       unsigned long long RcvHdrAddr1;
+       unsigned long long RcvHdrAddr2;
+       unsigned long long RcvHdrAddr3;
+       unsigned long long RcvHdrAddr4;
+       unsigned long long Reserved7RHA[11];
+       unsigned long long RcvHdrTailAddr0;
+       unsigned long long RcvHdrTailAddr1;
+       unsigned long long RcvHdrTailAddr2;
+       unsigned long long RcvHdrTailAddr3;
+       unsigned long long RcvHdrTailAddr4;
+       unsigned long long Reserved8RHTA[11];
+       unsigned long long Reserved9SW[8];
+       unsigned long long SerdesConfig0;
+       unsigned long long SerdesConfig1;
+       unsigned long long SerdesStatus;
+       unsigned long long XGXSConfig;
+       unsigned long long IBPLLCfg;
+       unsigned long long Reserved10SW2[3];
+       unsigned long long PCIEQ0SerdesConfig0;
+       unsigned long long PCIEQ0SerdesConfig1;
+       unsigned long long PCIEQ0SerdesStatus;
+       unsigned long long Reserved11;
+       unsigned long long PCIEQ1SerdesConfig0;
+       unsigned long long PCIEQ1SerdesConfig1;
+       unsigned long long PCIEQ1SerdesStatus;
+       unsigned long long Reserved12;
+};
+
+#define IPATH_KREG_OFFSET(field) (offsetof(struct \
+    _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
+#define IPATH_CREG_OFFSET(field) (offsetof( \
+    struct infinipath_counters, field) / sizeof(u64))
+
+static const struct ipath_kregs ipath_pe_kregs = {
+       .kr_control = IPATH_KREG_OFFSET(Control),
+       .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
+       .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
+       .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
+       .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
+       .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
+       .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
+       .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
+       .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
+       .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
+       .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
+       .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
+       .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
+       .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
+       .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
+       .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
+       .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
+       .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
+       .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
+       .kr_intclear = IPATH_KREG_OFFSET(IntClear),
+       .kr_intmask = IPATH_KREG_OFFSET(IntMask),
+       .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
+       .kr_mdio = IPATH_KREG_OFFSET(MDIO),
+       .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
+       .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
+       .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
+       .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
+       .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
+       .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
+       .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
+       .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
+       .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
+       .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
+       .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
+       .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
+       .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
+       .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
+       .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
+       .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
+       .kr_revision = IPATH_KREG_OFFSET(Revision),
+       .kr_scratch = IPATH_KREG_OFFSET(Scratch),
+       .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
+       .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
+       .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
+       .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
+       .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
+       .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
+       .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
+       .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
+       .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
+       .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
+       .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
+       .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
+       .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
+       .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
+       .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
+
+       /*
+        * These should not be used directly via ipath_read_kreg64(),
+        * use them with ipath_read_kreg64_port()
+        */
+       .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
+       .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
+
+       /* The rcvpktled register controls one of the debug port signals, so
+        * a packet activity LED can be connected to it. */
+       .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
+       .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0),
+       .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1),
+       .kr_pcieq0serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig0),
+       .kr_pcieq0serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig1),
+       .kr_pcieq0serdesstatus = IPATH_KREG_OFFSET(PCIEQ0SerdesStatus),
+       .kr_pcieq1serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig0),
+       .kr_pcieq1serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig1),
+       .kr_pcieq1serdesstatus = IPATH_KREG_OFFSET(PCIEQ1SerdesStatus)
+};
+
+static const struct ipath_cregs ipath_pe_cregs = {
+       .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
+       .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
+       .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
+       .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
+       .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
+       .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
+       .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
+       .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
+       .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
+       .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
+       .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
+       .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
+       .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
+       .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
+       .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
+       .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
+       .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
+       .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
+       .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
+       .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
+       .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
+       .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
+       .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
+       .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
+       .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
+       .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
+       .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
+       .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
+       .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
+       .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
+       .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
+       .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
+       .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
+};
+
+/* kr_intstatus, kr_intclear, kr_intmask bits */
+#define INFINIPATH_I_RCVURG_MASK 0x1F
+#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
+
+/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
+#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK  0x000000000000003fULL
+#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0
+#define INFINIPATH_HWE_PCIEPOISONEDTLP      0x0000000010000000ULL
+#define INFINIPATH_HWE_PCIECPLTIMEOUT       0x0000000020000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYXTLH    0x0000000040000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYXADM    0x0000000080000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYRADM    0x0000000100000000ULL
+#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
+#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
+#define INFINIPATH_HWE_PCIE1PLLFAILED       0x0400000000000000ULL
+#define INFINIPATH_HWE_PCIE0PLLFAILED       0x0800000000000000ULL
+#define INFINIPATH_HWE_SERDESPLLFAILED      0x1000000000000000ULL
+
+/* kr_extstatus bits */
+#define INFINIPATH_EXTS_FREQSEL 0x2
+#define INFINIPATH_EXTS_SERDESSEL 0x4
+#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
+#define INFINIPATH_EXTS_MEMBIST_FOUND       0x0000000000008000
+
+#define _IPATH_GPIO_SDA_NUM 1
+#define _IPATH_GPIO_SCL_NUM 0
+
+#define IPATH_GPIO_SDA (1ULL << \
+       (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+#define IPATH_GPIO_SCL (1ULL << \
+       (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+
+/**
+ * ipath_pe_handle_hwerrors - display hardware errors.
+ * @dd: the infinipath device
+ * @msg: the output buffer
+ * @msgl: the size of the output buffer
+ *
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use.  Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue.  We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
+ */
+static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
+                                    size_t msgl)
+{
+       ipath_err_t hwerrs;
+       u32 bits, ctrl;
+       int isfatal = 0;
+       char bitsmsg[64];
+
+       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+       if (!hwerrs) {
+               /*
+                * better than printing cofusing messages
+                * This seems to be related to clearing the crc error, or
+                * the pll error during init.
+                */
+               ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
+               return;
+       } else if (hwerrs == ~0ULL) {
+               ipath_dev_err(dd, "Read of hardware error status failed "
+                             "(all bits set); ignoring\n");
+               return;
+       }
+       ipath_stats.sps_hwerrs++;
+
+       /* Always clear the error status register, except MEMBISTFAIL,
+        * regardless of whether we continue or stop using the chip.
+        * We want that set so we know it failed, even across driver reload.
+        * We'll still ignore it in the hwerrmask.  We do this partly for
+        * diagnostics, but also for support */
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+                        hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
+
+       hwerrs &= dd->ipath_hwerrmask;
+
+       /*
+        * make sure we get this much out, unless told to be quiet,
+        * or it's occurred within the last 5 seconds
+        */
+       if ((hwerrs & ~dd->ipath_lasthwerror) ||
+           (ipath_debug & __IPATH_VERBDBG))
+               dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
+                        "(cleared)\n", (unsigned long long) hwerrs);
+       dd->ipath_lasthwerror |= hwerrs;
+
+       if (hwerrs & ~infinipath_hwe_bitsextant)
+               ipath_dev_err(dd, "hwerror interrupt with unknown errors "
+                             "%llx set\n", (unsigned long long)
+                             (hwerrs & ~infinipath_hwe_bitsextant));
+
+       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+       if (ctrl & INFINIPATH_C_FREEZEMODE) {
+               if (hwerrs) {
+                       /*
+                        * if any set that we aren't ignoring only make the
+                        * complaint once, in case it's stuck or recurring,
+                        * and we get here multiple times
+                        */
+                       if (dd->ipath_flags & IPATH_INITTED) {
+                               ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+                                             "mode), no longer usable, SN %.16s\n",
+                                                 dd->ipath_serial);
+                               isfatal = 1;
+                       }
+                       /*
+                        * Mark as having had an error for driver, and also
+                        * for /sys and status word mapped to user programs.
+                        * This marks unit as not usable, until reset
+                        */
+                       *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+                       *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+                       dd->ipath_flags &= ~IPATH_INITTED;
+               } else {
+                       ipath_dbg("Clearing freezemode on ignored hardware "
+                                 "error\n");
+                       ctrl &= ~INFINIPATH_C_FREEZEMODE;
+                       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+                                        ctrl);
+               }
+       }
+
+       *msg = '\0';
+
+       if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
+               strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
+                       msgl);
+               /* ignore from now on, so disable until driver reloaded */
+               *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+                                dd->ipath_hwerrmask);
+       }
+       if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
+                     << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
+               bits = (u32) ((hwerrs >>
+                              INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
+                             INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
+               snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
+                        bits);
+               strlcat(msg, bitsmsg, msgl);
+       }
+       if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
+                     << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+               bits = (u32) ((hwerrs >>
+                              INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
+                             INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
+               snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
+                        bits);
+               strlcat(msg, bitsmsg, msgl);
+       }
+       if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
+                     << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
+               bits = (u32) ((hwerrs >>
+                              INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) &
+                             INFINIPATH_HWE_PCIEMEMPARITYERR_MASK);
+               snprintf(bitsmsg, sizeof bitsmsg,
+                        "[PCIe Mem Parity Errs %x] ", bits);
+               strlcat(msg, bitsmsg, msgl);
+       }
+       if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
+               strlcat(msg, "[IB2IPATH Parity]", msgl);
+       if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
+               strlcat(msg, "[IPATH2IB Parity]", msgl);
+
+#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |       \
+                        INFINIPATH_HWE_COREPLL_RFSLIP )
+
+       if (hwerrs & _IPATH_PLL_FAIL) {
+               snprintf(bitsmsg, sizeof bitsmsg,
+                        "[PLL failed (%llx), InfiniPath hardware unusable]",
+                        (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
+               strlcat(msg, bitsmsg, msgl);
+               /* ignore from now on, so disable until driver reloaded */
+               dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+                                dd->ipath_hwerrmask);
+       }
+
+       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
+               /*
+                * If it occurs, it is left masked since the eternal
+                * interface is unused
+                */
+               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+                                dd->ipath_hwerrmask);
+       }
+
+       if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
+               strlcat(msg, "[PCIe Poisoned TLP]", msgl);
+       if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
+               strlcat(msg, "[PCIe completion timeout]", msgl);
+
+       /*
+        * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+        * parity or memory parity error failures, because most likely we
+        * won't be able to talk to the core of the chip.  Nonetheless, we
+        * might see them, if they are in parts of the PCIe core that aren't
+        * essential.
+        */
+       if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
+               strlcat(msg, "[PCIePLL1]", msgl);
+       if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
+               strlcat(msg, "[PCIePLL0]", msgl);
+       if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
+               strlcat(msg, "[PCIe XTLH core parity]", msgl);
+       if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
+               strlcat(msg, "[PCIe ADM TX core parity]", msgl);
+       if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
+               strlcat(msg, "[PCIe ADM RX core parity]", msgl);
+
+       if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
+               strlcat(msg, "[Rx Dsync]", msgl);
+       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
+               strlcat(msg, "[SerDes PLL]", msgl);
+
+       ipath_dev_err(dd, "%s hardware error\n", msg);
+       if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
+               /*
+                * for /sys status file ; if no trailing } is copied, we'll
+                * know it was truncated.
+                */
+               snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
+                        "{%s}", msg);
+       }
+}
+
+/**
+ * ipath_pe_boardname - fill in the board name
+ * @dd: the infinipath device
+ * @name: the output buffer
+ * @namelen: the size of the output buffer
+ *
+ * info is based on the board revision register
+ */
+static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
+                             size_t namelen)
+{
+       char *n = NULL;
+       u8 boardrev = dd->ipath_boardrev;
+       int ret;
+
+       switch (boardrev) {
+       case 0:
+               n = "InfiniPath_Emulation";
+               break;
+       case 1:
+               n = "InfiniPath_QLE7140-Bringup";
+               break;
+       case 2:
+               n = "InfiniPath_QLE7140";
+               break;
+       case 3:
+               n = "InfiniPath_QMI7140";
+               break;
+       case 4:
+               n = "InfiniPath_QEM7140";
+               break;
+       case 5:
+               n = "InfiniPath_QMH7140";
+               break;
+       default:
+               ipath_dev_err(dd,
+                             "Don't yet know about board with ID %u\n",
+                             boardrev);
+               snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
+                        boardrev);
+               break;
+       }
+       if (n)
+               snprintf(name, namelen, "%s", n);
+
+       if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
+               ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
+                             dd->ipath_majrev, dd->ipath_minrev);
+               ret = 1;
+       } else
+               ret = 0;
+
+       return ret;
+}
+
+/**
+ * ipath_pe_init_hwerrors - enable hardware errors
+ * @dd: the infinipath device
+ *
+ * now that we have finished initializing everything that might reasonably
+ * cause a hardware error, and cleared those errors bits as they occur,
+ * we can enable hardware errors in the mask (potentially enabling
+ * freeze mode), and enable hardware errors as errors (along with
+ * everything else) in errormask
+ */
+static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
+{
+       ipath_err_t val;
+       u64 extsval;
+
+       extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+
+       if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
+               ipath_dev_err(dd, "MemBIST did not complete!\n");
+
+       val = ~0ULL;    /* barring bugs, all hwerrors become interrupts, */
+
+       if (!dd->ipath_boardrev)        // no PLL for Emulator
+               val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+
+       /* workaround bug 9460 in internal interface bus parity checking */
+       val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
+
+       dd->ipath_hwerrmask = val;
+}
+
+/**
+ * ipath_pe_bringup_serdes - bring up the serdes
+ * @dd: the infinipath device
+ */
+static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
+{
+       u64 val, tmp, config1;
+       int ret = 0, change = 0;
+
+       ipath_dbg("Trying to bringup serdes\n");
+
+       if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
+           INFINIPATH_HWE_SERDESPLLFAILED) {
+               ipath_dbg("At start, serdes PLL failed bit set "
+                         "in hwerrstatus, clearing and continuing\n");
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+                                INFINIPATH_HWE_SERDESPLLFAILED);
+       }
+
+       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+       config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
+
+       ipath_cdbg(VERBOSE, "SerDes status config0=%llx config1=%llx, "
+                  "xgxsconfig %llx\n", (unsigned long long) val,
+                  (unsigned long long) config1, (unsigned long long)
+                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+       /*
+        * Force reset on, also set rxdetect enable.  Must do before reading
+        * serdesstatus at least for simulation, or some of the bits in
+        * serdes status will come back as undefined and cause simulation
+        * failures
+        */
+       val |= INFINIPATH_SERDC0_RESET_PLL | INFINIPATH_SERDC0_RXDETECT_EN
+               | INFINIPATH_SERDC0_L1PWR_DN;
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+       /* be sure chip saw it */
+       tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+       udelay(5);              /* need pll reset set at least for a bit */
+       /*
+        * after PLL is reset, set the per-lane Resets and TxIdle and
+        * clear the PLL reset and rxdetect (to get falling edge).
+        * Leave L1PWR bits set (permanently)
+        */
+       val &= ~(INFINIPATH_SERDC0_RXDETECT_EN | INFINIPATH_SERDC0_RESET_PLL
+                | INFINIPATH_SERDC0_L1PWR_DN);
+       val |= INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE;
+       ipath_cdbg(VERBOSE, "Clearing pll reset and setting lane resets "
+                  "and txidle (%llx)\n", (unsigned long long) val);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+       /* be sure chip saw it */
+       tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+       /* need PLL reset clear for at least 11 usec before lane
+        * resets cleared; give it a few more to be sure */
+       udelay(15);
+       val &= ~(INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE);
+
+       ipath_cdbg(VERBOSE, "Clearing lane resets and txidle "
+                  "(writing %llx)\n", (unsigned long long) val);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+       /* be sure chip saw it */
+       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+
+       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+       if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
+            INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
+               val &=
+                       ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
+                         INFINIPATH_XGXS_MDIOADDR_SHIFT);
+               /* MDIO address 3 */
+               val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
+               change = 1;
+       }
+       if (val & INFINIPATH_XGXS_RESET) {
+               val &= ~INFINIPATH_XGXS_RESET;
+               change = 1;
+       }
+       if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+            INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+               /* need to compensate for Tx inversion in partner */
+               val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+                        INFINIPATH_XGXS_RX_POL_SHIFT);
+               val |= dd->ipath_rx_pol_inv <<
+                       INFINIPATH_XGXS_RX_POL_SHIFT;
+               change = 1;
+       }
+       if (change)
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+
+       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+
+       /* clear current and de-emphasis bits */
+       config1 &= ~0x0ffffffff00ULL;
+       /* set current to 20ma */
+       config1 |= 0x00000000000ULL;
+       /* set de-emphasis to -5.68dB */
+       config1 |= 0x0cccc000000ULL;
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
+
+       ipath_cdbg(VERBOSE, "done: SerDes status config0=%llx "
+                  "config1=%llx, sstatus=%llx xgxs=%llx\n",
+                  (unsigned long long) val, (unsigned long long) config1,
+                  (unsigned long long)
+                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
+                  (unsigned long long)
+                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+       if (!ipath_waitfor_mdio_cmdready(dd)) {
+               ipath_write_kreg(
+                       dd, dd->ipath_kregs->kr_mdio,
+                       ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
+                                      IPATH_MDIO_CTRL_XGXS_REG_8, 0));
+               if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
+                                          IPATH_MDIO_DATAVALID, &val))
+                       ipath_dbg("Never got MDIO data for XGXS "
+                                 "status read\n");
+               else
+                       ipath_cdbg(VERBOSE, "MDIO Read reg8, "
+                                  "'bank' 31 %x\n", (u32) val);
+       } else
+               ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
+
+       return ret;
+}
+
+/**
+ * ipath_pe_quiet_serdes - set serdes to txidle
+ * @dd: the infinipath device
+ * Called when driver is being unloaded
+ */
+static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
+{
+       u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+
+       val |= INFINIPATH_SERDC0_TXIDLE;
+       ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
+                 (unsigned long long) val);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+}
+
+/* this is not yet needed on this chip, so just return 0. */
+static int ipath_pe_intconfig(struct ipath_devdata *dd)
+{
+       return 0;
+}
+
+/**
+ * ipath_setup_pe_setextled - set the state of the two external LEDs
+ * @dd: the infinipath device
+ * @lst: the L state
+ * @ltst: the LT state
+
+ * These LEDs indicate the physical and logical state of IB link.
+ * For this chip (at least with recommended board pinouts), LED1
+ * is Yellow (logical state) and LED2 is Green (physical state),
+ *
+ * Note:  We try to match the Mellanox HCA LED behavior as best
+ * we can.  Green indicates physical link state is OK (something is
+ * plugged in, and we can train).
+ * Amber indicates the link is logically up (ACTIVE).
+ * Mellanox further blinks the amber LED to indicate data packet
+ * activity, but we have no hardware support for that, so it would
+ * require waking up every 10-20 msecs and checking the counters
+ * on the chip, and then turning the LED off if appropriate.  That's
+ * visible overhead, so not something we will do.
+ *
+ */
+static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
+                                    u64 ltst)
+{
+       u64 extctl;
+
+       /* the diags use the LED to indicate diag info, so we leave
+        * the external LED alone when the diags are running */
+       if (ipath_diag_inuse)
+               return;
+
+       extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
+                                      INFINIPATH_EXTC_LED2PRIPORT_ON);
+
+       if (ltst & INFINIPATH_IBCS_LT_STATE_LINKUP)
+               extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
+       if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
+               extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
+       dd->ipath_extctrl = extctl;
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+}
+
+/**
+ * ipath_setup_pe_cleanup - clean up any per-chip chip-specific stuff
+ * @dd: the infinipath device
+ *
+ * This is called during driver unload.
+ * We do the pci_disable_msi here, not in generic code, because it
+ * isn't used for the HT chips. If we do end up needing pci_enable_msi
+ * at some point in the future for HT, we'll move the call back
+ * into the main init_one code.
+ */
+static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
+{
+       dd->ipath_msi_lo = 0;   /* just in case unload fails */
+       pci_disable_msi(dd->pcidev);
+}
+
+/**
+ * ipath_setup_pe_config - setup PCIe config related stuff
+ * @dd: the infinipath device
+ * @pdev: the PCI device
+ *
+ * The pci_enable_msi() call will fail on systems with MSI quirks
+ * such as those with AMD8131, even if the device of interest is not
+ * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed
+ * late in 2.6.16).
+ * All that can be done is to edit the kernel source to remove the quirk
+ * check until that is fixed.
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe.  If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
+ * call back into the main init_one code.
+ * We save the msi lo and hi values, so we can restore them after
+ * chip reset (the kernel PCI infrastructure doesn't yet handle that
+ * correctly).
+ */
+static int ipath_setup_pe_config(struct ipath_devdata *dd,
+                                struct pci_dev *pdev)
+{
+       int pos, ret;
+
+       dd->ipath_msi_lo = 0;   /* used as a flag during reset processing */
+       ret = pci_enable_msi(dd->pcidev);
+       if (ret)
+               ipath_dev_err(dd, "pci_enable_msi failed: %d, "
+                             "interrupts may not work\n", ret);
+       /* continue even if it fails, we may still be OK... */
+
+       if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
+               u16 control;
+               pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
+                                     &dd->ipath_msi_lo);
+               pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
+                                     &dd->ipath_msi_hi);
+               pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
+                                    &control);
+               /* now save the data (vector) info */
+               pci_read_config_word(dd->pcidev,
+                                    pos + ((control & PCI_MSI_FLAGS_64BIT)
+                                           ? 12 : 8),
+                                    &dd->ipath_msi_data);
+               ipath_cdbg(VERBOSE, "Read msi data 0x%x from config offset "
+                          "0x%x, control=0x%x\n", dd->ipath_msi_data,
+                          pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
+                          control);
+               /* we save the cachelinesize also, although it doesn't
+                * really matter */
+               pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
+                                    &dd->ipath_pci_cacheline);
+       } else
+               ipath_dev_err(dd, "Can't find MSI capability, "
+                             "can't save MSI settings for reset\n");
+       if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP))) {
+               u16 linkstat;
+               pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
+                                    &linkstat);
+               linkstat >>= 4;
+               linkstat &= 0x1f;
+               if (linkstat != 8)
+                       ipath_dev_err(dd, "PCIe width %u, "
+                                     "performance reduced\n", linkstat);
+       }
+       else
+               ipath_dev_err(dd, "Can't find PCI Express "
+                             "capability!\n");
+       return 0;
+}
+
+static void ipath_init_pe_variables(void)
+{
+       /*
+        * bits for selecting i2c direction and values,
+        * used for I2C serial flash
+        */
+       ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+       ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+       ipath_gpio_sda = IPATH_GPIO_SDA;
+       ipath_gpio_scl = IPATH_GPIO_SCL;
+
+       /* variables for sanity checking interrupt and errors */
+       infinipath_hwe_bitsextant =
+               (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+                INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+               (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
+                INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
+               INFINIPATH_HWE_PCIE1PLLFAILED |
+               INFINIPATH_HWE_PCIE0PLLFAILED |
+               INFINIPATH_HWE_PCIEPOISONEDTLP |
+               INFINIPATH_HWE_PCIECPLTIMEOUT |
+               INFINIPATH_HWE_PCIEBUSPARITYXTLH |
+               INFINIPATH_HWE_PCIEBUSPARITYXADM |
+               INFINIPATH_HWE_PCIEBUSPARITYRADM |
+               INFINIPATH_HWE_MEMBISTFAILED |
+               INFINIPATH_HWE_COREPLL_FBSLIP |
+               INFINIPATH_HWE_COREPLL_RFSLIP |
+               INFINIPATH_HWE_SERDESPLLFAILED |
+               INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
+               INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
+       infinipath_i_bitsextant =
+               (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
+               (INFINIPATH_I_RCVAVAIL_MASK <<
+                INFINIPATH_I_RCVAVAIL_SHIFT) |
+               INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
+               INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
+       infinipath_e_bitsextant =
+               INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
+               INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
+               INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
+               INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
+               INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
+               INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
+               INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+               INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
+               INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
+               INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
+               INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
+               INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
+               INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
+               INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
+               INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
+               INFINIPATH_E_HARDWARE;
+
+       infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+       infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+}
+
+/* setup the MSI stuff again after a reset.  I'd like to just call
+ * pci_enable_msi() and request_irq() again, but when I do that,
+ * the MSI enable bit doesn't get set in the command word, and
+ * we switch to to a different interrupt vector, which is confusing,
+ * so I instead just do it all inline.  Perhaps somehow can tie this
+ * into the PCIe hotplug support at some point
+ * Note, because I'm doing it all here, I don't call pci_disable_msi()
+ * or free_irq() at the start of ipath_setup_pe_reset().
+ */
+static int ipath_reinit_msi(struct ipath_devdata *dd)
+{
+       int pos;
+       u16 control;
+       int ret;
+
+       if (!dd->ipath_msi_lo) {
+               dev_info(&dd->pcidev->dev, "Can't restore MSI config, "
+                        "initial setup failed?\n");
+               ret = 0;
+               goto bail;
+       }
+
+       if (!(pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
+               ipath_dev_err(dd, "Can't find MSI capability, "
+                             "can't restore MSI settings\n");
+               ret = 0;
+               goto bail;
+       }
+       ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
+                  dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO);
+       pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
+                              dd->ipath_msi_lo);
+       ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
+                  dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI);
+       pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
+                              dd->ipath_msi_hi);
+       pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
+       if (!(control & PCI_MSI_FLAGS_ENABLE)) {
+               ipath_cdbg(VERBOSE, "MSI control at off %x was %x, "
+                          "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS,
+                          control, control | PCI_MSI_FLAGS_ENABLE);
+               control |= PCI_MSI_FLAGS_ENABLE;
+               pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
+                                     control);
+       }
+       /* now rewrite the data (vector) info */
+       pci_write_config_word(dd->pcidev, pos +
+                             ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
+                             dd->ipath_msi_data);
+       /* we restore the cachelinesize also, although it doesn't really
+        * matter */
+       pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
+                             dd->ipath_pci_cacheline);
+       /* and now set the pci master bit again */
+       pci_set_master(dd->pcidev);
+       ret = 1;
+
+bail:
+       return ret;
+}
+
+/* This routine sleeps, so it can only be called from user context, not
+ * from interrupt context.  If we need interrupt context, we can split
+ * it into two routines.
+*/
+static int ipath_setup_pe_reset(struct ipath_devdata *dd)
+{
+       u64 val;
+       int i;
+       int ret;
+
+       /* Use ERROR so it shows up in logs, etc. */
+       ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
+       /* keep chip from being accessed in a few places */
+       dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
+       val = dd->ipath_control | INFINIPATH_C_RESET;
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
+       mb();
+
+       for (i = 1; i <= 5; i++) {
+               int r;
+               /* allow MBIST, etc. to complete; longer on each retry.
+                * We sometimes get machine checks from bus timeout if no
+                * response, so for now, make it *really* long.
+                */
+               msleep(1000 + (1 + i) * 2000);
+               if ((r =
+                    pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
+                                           dd->ipath_pcibar0)))
+                       ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n",
+                                     r);
+               if ((r =
+                    pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
+                                           dd->ipath_pcibar1)))
+                       ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n",
+                                     r);
+               /* now re-enable memory access */
+               if ((r = pci_enable_device(dd->pcidev)))
+                       ipath_dev_err(dd, "pci_enable_device failed after "
+                                     "reset: %d\n", r);
+               /* whether it worked or not, mark as present, again */
+               dd->ipath_flags |= IPATH_PRESENT;
+               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
+               if (val == dd->ipath_revision) {
+                       ipath_cdbg(VERBOSE, "Got matching revision "
+                                  "register %llx on try %d\n",
+                                  (unsigned long long) val, i);
+                       ret = ipath_reinit_msi(dd);
+                       goto bail;
+               }
+               /* Probably getting -1 back */
+               ipath_dbg("Didn't get expected revision register, "
+                         "got %llx, try %d\n", (unsigned long long) val,
+                         i + 1);
+       }
+       ret = 0; /* failed */
+
+bail:
+       return ret;
+}
+
+/**
+ * ipath_pe_put_tid - write a TID in chip
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for special locking etc.
+ * It's used for both the full cleanup on exit, as well as the normal
+ * setup and teardown.
+ */
+static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
+                            u32 type, unsigned long pa)
+{
+       u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+       unsigned long flags = 0; /* keep gcc quiet */
+
+       if (pa != dd->ipath_tidinvalid) {
+               if (pa & ((1U << 11) - 1)) {
+                       dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+                                "not 4KB aligned!\n", pa);
+                       return;
+               }
+               pa >>= 11;
+               /* paranoia check */
+               if (pa & (7<<29))
+                       ipath_dev_err(dd,
+                                     "BUG: Physical page address 0x%lx "
+                                     "has bits set in 31-29\n", pa);
+
+               if (type == 0)
+                       pa |= dd->ipath_tidtemplate;
+               else /* for now, always full 4KB page */
+                       pa |= 2 << 29;
+       }
+
+       /* workaround chip bug 9437 by writing each TID twice
+        * and holding a spinlock around the writes, so they don't
+        * intermix with other TID (eager or expected) writes
+        * Unfortunately, this call can be done from interrupt level
+        * for the port 0 eager TIDs, so we have to use irqsave
+        */
+       spin_lock_irqsave(&dd->ipath_tid_lock, flags);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
+       if (dd->ipath_kregbase)
+               writel(pa, tidp32);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xdeadbeef);
+       mmiowb();
+       spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
+}
+
+/**
+ * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
+ * @dd: the infinipath device
+ * @port: the port
+ *
+ * clear all TID entries for a port, expected and eager.
+ * Used from ipath_close().  On this chip, TIDs are only 32 bits,
+ * not 64, but they are still on 64 bit boundaries, so tidbase
+ * is declared as u64 * for the pointer math, even though we write 32 bits
+ */
+static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
+{
+       u64 __iomem *tidbase;
+       unsigned long tidinv;
+       int i;
+
+       if (!dd->ipath_kregbase)
+               return;
+
+       ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
+
+       tidinv = dd->ipath_tidinvalid;
+       tidbase = (u64 __iomem *)
+               ((char __iomem *)(dd->ipath_kregbase) +
+                dd->ipath_rcvtidbase +
+                port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
+
+       for (i = 0; i < dd->ipath_rcvtidcnt; i++)
+               ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv);
+
+       tidbase = (u64 __iomem *)
+               ((char __iomem *)(dd->ipath_kregbase) +
+                dd->ipath_rcvegrbase +
+                port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
+
+       for (i = 0; i < dd->ipath_rcvegrcnt; i++)
+               ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv);
+}
+
+/**
+ * ipath_pe_tidtemplate - setup constants for TID updates
+ * @dd: the infinipath device
+ *
+ * We setup stuff that we use a lot, to avoid calculating each time
+ */
+static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
+{
+       u32 egrsize = dd->ipath_rcvegrbufsize;
+
+       /* For now, we always allocate 4KB buffers (at init) so we can
+        * receive max size packets.  We may want a module parameter to
+        * specify 2KB or 4KB and/or make be per port instead of per device
+        * for those who want to reduce memory footprint.  Note that the
+        * ipath_rcvhdrentsize size must be large enough to hold the largest
+        * IB header (currently 96 bytes) that we expect to handle (plus of
+        * course the 2 dwords of RHF).
+        */
+       if (egrsize == 2048)
+               dd->ipath_tidtemplate = 1U << 29;
+       else if (egrsize == 4096)
+               dd->ipath_tidtemplate = 2U << 29;
+       else {
+               egrsize = 4096;
+               dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize "
+                        "%u, using %u\n", dd->ipath_rcvegrbufsize,
+                        egrsize);
+               dd->ipath_tidtemplate = 2U << 29;
+       }
+       dd->ipath_tidinvalid = 0;
+}
+
+static int ipath_pe_early_init(struct ipath_devdata *dd)
+{
+       dd->ipath_flags |= IPATH_4BYTE_TID;
+
+       /*
+        * For openfabrics, we need to be able to handle an IB header of
+        * 24 dwords.  HT chip has arbitrary sized receive buffers, so we
+        * made them the same size as the PIO buffers.  This chip does not
+        * handle arbitrary size buffers, so we need the header large enough
+        * to handle largest IB header, but still have room for a 2KB MTU
+        * standard IB packet.
+        */
+       dd->ipath_rcvhdrentsize = 24;
+       dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
+
+       /*
+        * To truly support a 4KB MTU (for usermode), we need to
+        * bump this to a larger value.  For now, we use them for
+        * the kernel only.
+        */
+       dd->ipath_rcvegrbufsize = 2048;
+       /*
+        * the min() check here is currently a nop, but it may not always
+        * be, depending on just how we do ipath_rcvegrbufsize
+        */
+       dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
+                                dd->ipath_rcvegrbufsize +
+                                (dd->ipath_rcvhdrentsize << 2));
+       dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
+
+       /*
+        * We can request a receive interrupt for 1 or
+        * more packets from current offset.  For now, we set this
+        * up for a single packet.
+        */
+       dd->ipath_rhdrhead_intr_off = 1ULL<<32;
+
+       ipath_get_eeprom_info(dd);
+
+       return 0;
+}
+
+int __attribute__((weak)) ipath_unordered_wc(void)
+{
+       return 0;
+}
+
+/**
+ * ipath_init_pe_get_base_info - set chip-specific flags for user code
+ * @dd: the infinipath device
+ * @kbase: ipath_base_info pointer
+ *
+ * We set the PCIE flag because the lower bandwidth on PCIe vs
+ * HyperTransport can affect some user packet algorithims.
+ */
+static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
+{
+       struct ipath_base_info *kinfo = kbase;
+
+       if (ipath_unordered_wc()) {
+               kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
+               ipath_cdbg(PROC, "Intel processor, forcing WC order\n");
+       }
+       else
+               ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
+
+       kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
+
+       return 0;
+}
+
+/**
+ * ipath_init_iba6120_funcs - set up the chip-specific function pointers
+ * @dd: the infinipath device
+ *
+ * This is global, and is called directly at init to set up the
+ * chip-specific function pointers for later use.
+ */
+void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
+{
+       dd->ipath_f_intrsetup = ipath_pe_intconfig;
+       dd->ipath_f_bus = ipath_setup_pe_config;
+       dd->ipath_f_reset = ipath_setup_pe_reset;
+       dd->ipath_f_get_boardname = ipath_pe_boardname;
+       dd->ipath_f_init_hwerrors = ipath_pe_init_hwerrors;
+       dd->ipath_f_early_init = ipath_pe_early_init;
+       dd->ipath_f_handle_hwerrors = ipath_pe_handle_hwerrors;
+       dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
+       dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
+       dd->ipath_f_clear_tids = ipath_pe_clear_tids;
+       dd->ipath_f_put_tid = ipath_pe_put_tid;
+       dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
+       dd->ipath_f_setextled = ipath_setup_pe_setextled;
+       dd->ipath_f_get_base_info = ipath_pe_get_base_info;
+
+       /* initialize chip-specific variables */
+       dd->ipath_f_tidtemplate = ipath_pe_tidtemplate;
+
+       /*
+        * setup the register offsets, since they are different for each
+        * chip
+        */
+       dd->ipath_kregs = &ipath_pe_kregs;
+       dd->ipath_cregs = &ipath_pe_cregs;
+
+       ipath_init_pe_variables();
+}
+
index 414cdd1d80a6fa8893121786825bc5646cd8d385..44669dc2e22d1c502ce95503a19ed8f2f0fdb8ce 100644 (file)
@@ -53,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
 MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
 
 /*
- * Number of buffers reserved for driver (layered drivers and SMA
- * send).  Reserved at end of buffer list.   Initialized based on
+ * Number of buffers reserved for driver (verbs and layered drivers.)
+ * Reserved at end of buffer list.   Initialized based on
  * number of PIO buffers if not set via module interface.
  * The problem with this is that it's global, but we'll use different
  * numbers for different chip types.  So the default value is not
@@ -80,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
  *
  * Allocate the eager TID buffers and program them into infinipath.
  * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like SMA
+ * memory, and either use the buffers as is for things like verbs
  * packets, or pass the buffers up to the ipath layered driver and
  * thence the network layer, replacing them as we do so (see
  * ipath_rcv_layer()).
@@ -240,7 +240,11 @@ static int init_chip_first(struct ipath_devdata *dd,
                          "only supports %u\n", ipath_cfgports,
                          dd->ipath_portcnt);
        }
-       dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
+       /*
+        * Allocate full portcnt array, rather than just cfgports, because
+        * cleanup iterates across all possible ports.
+        */
+       dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt,
                               GFP_KERNEL);
 
        if (!dd->ipath_pd) {
@@ -446,9 +450,9 @@ static void enable_chip(struct ipath_devdata *dd,
        u32 val;
        int i;
 
-       if (!reinit) {
-               init_waitqueue_head(&ipath_sma_state_wait);
-       }
+       if (!reinit)
+               init_waitqueue_head(&ipath_state_wait);
+
        ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
                         dd->ipath_rcvctrl);
 
@@ -687,7 +691,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
        dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
                / (sizeof(u64) * BITS_PER_BYTE / 2);
        if (ipath_kpiobufs == 0) {
-               /* not set by user, or set explictly to default  */
+               /* not set by user (this is default) */
                if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
                        kpiobufs = 32;
                else
@@ -946,6 +950,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
                        dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
        }
 
+       ipath_kpiobufs = val;
        ret = 0;
 bail:
        spin_unlock_irqrestore(&ipath_devs_lock, flags);
index 280e732660a19776d69870c5a82893377008be2f..49bf7bb15b04b2628f52bf10df0b0366c574a3b7 100644 (file)
@@ -34,7 +34,7 @@
 #include <linux/pci.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 /* These are all rcv-related errors which we want to count for stats */
@@ -201,7 +201,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
                                  ib_linkstate(lstate));
                }
                else
-                       ipath_cdbg(SMA, "Unit %u link state %s, last "
+                       ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
                                   "was %s\n", dd->ipath_unit,
                                   ib_linkstate(lstate),
                                   ib_linkstate((unsigned)
@@ -213,7 +213,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
                if (lstate == IPATH_IBSTATE_INIT ||
                    lstate == IPATH_IBSTATE_ARM ||
                    lstate == IPATH_IBSTATE_ACTIVE)
-                       ipath_cdbg(SMA, "Unit %u link state down"
+                       ipath_cdbg(VERBOSE, "Unit %u link state down"
                                   " (state 0x%x), from %s\n",
                                   dd->ipath_unit,
                                   (u32)val & IPATH_IBSTATE_MASK,
@@ -269,7 +269,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
                             INFINIPATH_IBCS_LINKSTATE_MASK)
                            == INFINIPATH_IBCS_L_STATE_ACTIVE)
                                /* if from up to down be more vocal */
-                               ipath_cdbg(SMA,
+                               ipath_cdbg(VERBOSE,
                                           "Unit %u link now down (%s)\n",
                                           dd->ipath_unit,
                                           ipath_ibcstatus_str[ltstate]);
@@ -289,8 +289,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
                *dd->ipath_statusp |=
                        IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
                dd->ipath_f_setextled(dd, lstate, ltstate);
-
-               __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
        } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
                /*
                 * set INIT and DOWN.  Down is checked by most of the other
@@ -598,11 +596,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 
        if (!noprint && *msg)
                ipath_dev_err(dd, "%s error\n", msg);
-       if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
-               ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
-                          "waking\n", dd->ipath_sma_state_wanted,
+       if (dd->ipath_state_wanted & dd->ipath_flags) {
+               ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
+                          "waking\n", dd->ipath_state_wanted,
                           dd->ipath_flags);
-               wake_up_interruptible(&ipath_sma_state_wait);
+               wake_up_interruptible(&ipath_state_wait);
        }
 
        return chkerrpkts;
@@ -708,11 +706,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
 {
        int ret;
 
-       ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-       if (ret > 0)
-               goto set;
-
-       ret = __ipath_verbs_piobufavail(dd);
+       ret = ipath_ib_piobufavail(dd->verbs_dev);
        if (ret > 0)
                goto set;
 
index e9f374fb641ef6f1c8e0870e9e47551bc623188f..a8a56276ff1dbe6dd591a8756f8f0171a8d24063 100644 (file)
@@ -132,12 +132,6 @@ struct _ipath_layer {
        void *l_arg;
 };
 
-/* Verbs layer interface */
-struct _verbs_layer {
-       void *l_arg;
-       struct timer_list l_timer;
-};
-
 struct ipath_devdata {
        struct list_head ipath_list;
 
@@ -198,7 +192,8 @@ struct ipath_devdata {
        void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
        /* fill out chip-specific fields */
        int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-       struct _verbs_layer verbs_layer;
+       struct ipath_ibdev *verbs_dev;
+       struct timer_list verbs_timer;
        /* total dwords sent (summed from counter) */
        u64 ipath_sword;
        /* total dwords rcvd (summed from counter) */
@@ -241,7 +236,7 @@ struct ipath_devdata {
        u64 ipath_tidtemplate;
        /* value to write to free TIDs */
        u64 ipath_tidinvalid;
-       /* PE-800 rcv interrupt setup */
+       /* IBA6120 rcv interrupt setup */
        u64 ipath_rhdrhead_intr_off;
 
        /* size of memory at ipath_kregbase */
@@ -250,8 +245,8 @@ struct ipath_devdata {
        u32 ipath_pioavregs;
        /* IPATH_POLL, etc. */
        u32 ipath_flags;
-       /* ipath_flags sma is waiting for */
-       u32 ipath_sma_state_wanted;
+       /* ipath_flags driver is waiting for */
+       u32 ipath_state_wanted;
        /* last buffer for user use, first buf for kernel use is this
         * index. */
        u32 ipath_lastport_piobuf;
@@ -311,10 +306,6 @@ struct ipath_devdata {
        u32 ipath_pcibar0;
        /* so we can rewrite it after a chip reset */
        u32 ipath_pcibar1;
-       /* sequential tries for SMA send and no bufs */
-       u32 ipath_nosma_bufs;
-       /* duration (seconds) ipath_nosma_bufs set */
-       u32 ipath_nosma_secs;
 
        /* HT/PCI Vendor ID (here for NodeInfo) */
        u16 ipath_vendorid;
@@ -512,6 +503,8 @@ struct ipath_devdata {
        u8 ipath_pci_cacheline;
        /* LID mask control */
        u8 ipath_lmc;
+       /* Rx Polarity inversion (compensate for ~tx on partner) */
+       u8 ipath_rx_pol_inv;
 
        /* local link integrity counter */
        u32 ipath_lli_counter;
@@ -523,18 +516,6 @@ extern struct list_head ipath_dev_list;
 extern spinlock_t ipath_devs_lock;
 extern struct ipath_devdata *ipath_lookup(int unit);
 
-extern u16 ipath_layer_rcv_opcode;
-extern int __ipath_layer_intr(struct ipath_devdata *, u32);
-extern int ipath_layer_intr(struct ipath_devdata *, u32);
-extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
-                            struct sk_buff *);
-extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
-extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
-extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
-
-void ipath_layer_add(struct ipath_devdata *);
-void ipath_layer_remove(struct ipath_devdata *);
-
 int ipath_init_chip(struct ipath_devdata *, int);
 int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
@@ -549,9 +530,8 @@ void ipath_cdev_cleanup(struct cdev **cdevp,
 
 int ipath_diag_add(struct ipath_devdata *);
 void ipath_diag_remove(struct ipath_devdata *);
-void ipath_diag_bringup_link(struct ipath_devdata *);
 
-extern wait_queue_head_t ipath_sma_state_wait;
+extern wait_queue_head_t ipath_state_wait;
 
 int ipath_user_add(struct ipath_devdata *dd);
 void ipath_user_remove(struct ipath_devdata *dd);
@@ -582,12 +562,14 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
 
 int ipath_parse_ushort(const char *str, unsigned short *valp);
 
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-void ipath_set_ib_lstate(struct ipath_devdata *, int);
 void ipath_kreceive(struct ipath_devdata *);
 int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
 int ipath_reset_device(int);
 void ipath_get_faststats(unsigned long);
+int ipath_set_linkstate(struct ipath_devdata *, u8);
+int ipath_set_mtu(struct ipath_devdata *, u16);
+int ipath_set_lid(struct ipath_devdata *, u32, u8);
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 
 /* for use in system calls, where we want to know device type, etc. */
 #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
@@ -642,10 +624,8 @@ void ipath_free_data(struct ipath_portdata *dd);
 int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
 int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
-/* init PE-800-specific func */
-void ipath_init_pe800_funcs(struct ipath_devdata *);
-/* init HT-400-specific func */
-void ipath_init_ht400_funcs(struct ipath_devdata *);
+void ipath_init_iba6120_funcs(struct ipath_devdata *);
+void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
 
@@ -801,7 +781,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
 
 struct device_driver;
 
-extern const char ipath_core_version[];
+extern const char ib_ipath_version[];
 
 int ipath_driver_create_group(struct device_driver *);
 void ipath_driver_remove_group(struct device_driver *);
@@ -810,6 +790,9 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *);
 void ipath_device_remove_group(struct device *, struct ipath_devdata *);
 int ipath_expose_reset(struct device *);
 
+int ipath_diagpkt_add(void);
+void ipath_diagpkt_remove(void);
+
 int ipath_init_ipathfs(void);
 void ipath_exit_ipathfs(void);
 int ipathfs_add_device(struct ipath_devdata *);
@@ -831,10 +814,10 @@ const char *ipath_get_unit_name(int unit);
 
 extern struct mutex ipath_mutex;
 
-#define IPATH_DRV_NAME         "ipath_core"
+#define IPATH_DRV_NAME         "ib_ipath"
 #define IPATH_MAJOR            233
 #define IPATH_USER_MINOR_BASE  0
-#define IPATH_SMA_MINOR                128
+#define IPATH_DIAGPKT_MINOR    127
 #define IPATH_DIAG_MINOR_BASE  129
 #define IPATH_NMINORS          255
 
index a5ca279370aa2a34004cf3ea3f88043985987b10..ba1b93226caa4472ca67fa1d5dade05ee271b53a 100644 (file)
@@ -34,6 +34,7 @@
 #include <asm/io.h>
 
 #include "ipath_verbs.h"
+#include "ipath_kernel.h"
 
 /**
  * ipath_alloc_lkey - allocate an lkey
@@ -60,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
                r = (r + 1) & (rkt->max - 1);
                if (r == n) {
                        spin_unlock_irqrestore(&rkt->lock, flags);
-                       _VERBS_INFO("LKEY table full\n");
+                       ipath_dbg(KERN_INFO "LKEY table full\n");
                        ret = 0;
                        goto bail;
                }
index b28c6f81c73121b2195ef3667b30396d479ea9a1..e46aa4ed2a7e1123898d63e86d35f5ca99a61bf9 100644 (file)
 
 #include "ipath_kernel.h"
 #include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 /* Acquire before ipath_devs_lock. */
 static DEFINE_MUTEX(ipath_layer_mutex);
 
-static int ipath_verbs_registered;
-
 u16 ipath_layer_rcv_opcode;
 
 static int (*layer_intr)(void *, u32);
 static int (*layer_rcv)(void *, void *, struct sk_buff *);
 static int (*layer_rcv_lid)(void *, void *);
-static int (*verbs_piobufavail)(void *);
-static void (*verbs_rcv)(void *, void *, void *, u32);
 
 static void *(*layer_add_one)(int, struct ipath_devdata *);
 static void (*layer_remove_one)(void *);
-static void *(*verbs_add_one)(int, struct ipath_devdata *);
-static void (*verbs_remove_one)(void *);
-static void (*verbs_timer_cb)(void *);
 
 int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
 {
@@ -107,302 +101,16 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
        return ret;
 }
 
-int __ipath_verbs_piobufavail(struct ipath_devdata *dd)
-{
-       int ret = -ENODEV;
-
-       if (dd->verbs_layer.l_arg && verbs_piobufavail)
-               ret = verbs_piobufavail(dd->verbs_layer.l_arg);
-
-       return ret;
-}
-
-int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf,
-                     u32 tlen)
-{
-       int ret = -ENODEV;
-
-       if (dd->verbs_layer.l_arg && verbs_rcv) {
-               verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen);
-               ret = 0;
-       }
-
-       return ret;
-}
-
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+void ipath_layer_lid_changed(struct ipath_devdata *dd)
 {
-       u32 lstate;
-       int ret;
-
-       switch (newstate) {
-       case IPATH_IB_LINKDOWN:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
-                                   INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN_SLEEP:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
-                                   INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN_DISABLE:
-               ipath_set_ib_lstate(dd,
-                                   INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-                                   INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKINIT:
-               if (dd->ipath_flags & IPATH_LINKINIT) {
-                       ret = 0;
-                       goto bail;
-               }
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
-                                   INFINIPATH_IBCC_LINKCMD_SHIFT);
-               lstate = IPATH_LINKINIT;
-               break;
-
-       case IPATH_IB_LINKARM:
-               if (dd->ipath_flags & IPATH_LINKARMED) {
-                       ret = 0;
-                       goto bail;
-               }
-               if (!(dd->ipath_flags &
-                     (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
-                                   INFINIPATH_IBCC_LINKCMD_SHIFT);
-               /*
-                * Since the port can transition to ACTIVE by receiving
-                * a non VL 15 packet, wait for either state.
-                */
-               lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
-               break;
-
-       case IPATH_IB_LINKACTIVE:
-               if (dd->ipath_flags & IPATH_LINKACTIVE) {
-                       ret = 0;
-                       goto bail;
-               }
-               if (!(dd->ipath_flags & IPATH_LINKARMED)) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
-                                   INFINIPATH_IBCC_LINKCMD_SHIFT);
-               lstate = IPATH_LINKACTIVE;
-               break;
-
-       default:
-               ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
-               ret = -EINVAL;
-               goto bail;
-       }
-       ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate);
-
-/**
- * ipath_layer_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size.   For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link initialize (IPATH_IBSTATE_INIT) state...
- */
-int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
-       u32 piosize;
-       int changed = 0;
-       int ret;
-
-       /*
-        * mtu is IB data payload max.  It's the largest power of 2 less
-        * than piosize (or even larger, since it only really controls the
-        * largest we can receive; we can send the max of the mtu and
-        * piosize).  We check that it's one of the valid IB sizes.
-        */
-       if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-           arg != 4096) {
-               ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
-               ret = -EINVAL;
-               goto bail;
-       }
-       if (dd->ipath_ibmtu == arg) {
-               ret = 0;        /* same as current */
-               goto bail;
-       }
-
-       piosize = dd->ipath_ibmaxlen;
-       dd->ipath_ibmtu = arg;
-
-       if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
-               /* Only if it's not the initial value (or reset to it) */
-               if (piosize != dd->ipath_init_ibmaxlen) {
-                       dd->ipath_ibmaxlen = piosize;
-                       changed = 1;
-               }
-       } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
-               piosize = arg + IPATH_PIO_MAXIBHDR;
-               ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
-                          "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
-                          arg);
-               dd->ipath_ibmaxlen = piosize;
-               changed = 1;
-       }
-
-       if (changed) {
-               /*
-                * set the IBC maxpktlength to the size of our pio
-                * buffers in words
-                */
-               u64 ibc = dd->ipath_ibcctrl;
-               ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-                        INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
-
-               piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
-               dd->ipath_ibmaxlen = piosize;
-               piosize /= sizeof(u32); /* in words */
-               /*
-                * for ICRC, which we only send in diag test pkt mode, and
-                * we don't need to worry about that for mtu
-                */
-               piosize += 1;
-
-               ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-               dd->ipath_ibcctrl = ibc;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-               dd->ipath_f_tidtemplate(dd);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_mtu);
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
-{
-       dd->ipath_lid = arg;
-       dd->ipath_lmc = lmc;
-
        mutex_lock(&ipath_layer_mutex);
 
        if (dd->ipath_layer.l_arg && layer_intr)
                layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
 
        mutex_unlock(&ipath_layer_mutex);
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_set_lid);
-
-int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
-{
-       /* XXX - need to inform anyone who cares this just happened. */
-       dd->ipath_guid = guid;
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_guid);
-
-__be64 ipath_layer_get_guid(struct ipath_devdata *dd)
-{
-       return dd->ipath_guid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_guid);
-
-u32 ipath_layer_get_nguid(struct ipath_devdata *dd)
-{
-       return dd->ipath_nguid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_nguid);
-
-u32 ipath_layer_get_majrev(struct ipath_devdata *dd)
-{
-       return dd->ipath_majrev;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_majrev);
-
-u32 ipath_layer_get_minrev(struct ipath_devdata *dd)
-{
-       return dd->ipath_minrev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_minrev);
-
-u32 ipath_layer_get_pcirev(struct ipath_devdata *dd)
-{
-       return dd->ipath_pcirev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pcirev);
-
-u32 ipath_layer_get_flags(struct ipath_devdata *dd)
-{
-       return dd->ipath_flags;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_flags);
-
-struct device *ipath_layer_get_device(struct ipath_devdata *dd)
-{
-       return &dd->pcidev->dev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_device);
-
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd)
-{
-       return dd->ipath_deviceid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid);
-
-u32 ipath_layer_get_vendorid(struct ipath_devdata *dd)
-{
-       return dd->ipath_vendorid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_vendorid);
-
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd)
-{
-       return dd->ipath_lastibcstat;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat);
-
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd)
-{
-       return dd->ipath_ibmtu;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu);
-
 void ipath_layer_add(struct ipath_devdata *dd)
 {
        mutex_lock(&ipath_layer_mutex);
@@ -411,10 +119,6 @@ void ipath_layer_add(struct ipath_devdata *dd)
                dd->ipath_layer.l_arg =
                        layer_add_one(dd->ipath_unit, dd);
 
-       if (verbs_add_one)
-               dd->verbs_layer.l_arg =
-                       verbs_add_one(dd->ipath_unit, dd);
-
        mutex_unlock(&ipath_layer_mutex);
 }
 
@@ -427,11 +131,6 @@ void ipath_layer_remove(struct ipath_devdata *dd)
                dd->ipath_layer.l_arg = NULL;
        }
 
-       if (dd->verbs_layer.l_arg && verbs_remove_one) {
-               verbs_remove_one(dd->verbs_layer.l_arg);
-               dd->verbs_layer.l_arg = NULL;
-       }
-
        mutex_unlock(&ipath_layer_mutex);
 }
 
@@ -463,9 +162,6 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
                if (dd->ipath_layer.l_arg)
                        continue;
 
-               if (!(*dd->ipath_statusp & IPATH_STATUS_SMA))
-                       *dd->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-
                spin_unlock_irqrestore(&ipath_devs_lock, flags);
                dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
                spin_lock_irqsave(&ipath_devs_lock, flags);
@@ -509,107 +205,6 @@ void ipath_layer_unregister(void)
 
 EXPORT_SYMBOL_GPL(ipath_layer_unregister);
 
-static void __ipath_verbs_timer(unsigned long arg)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
-       /*
-        * If port 0 receive packet interrupts are not available, or
-        * can be missed, poll the receive queue
-        */
-       if (dd->ipath_flags & IPATH_POLL_RX_INTR)
-               ipath_kreceive(dd);
-
-       /* Handle verbs layer timeouts. */
-       if (dd->verbs_layer.l_arg && verbs_timer_cb)
-               verbs_timer_cb(dd->verbs_layer.l_arg);
-
-       mod_timer(&dd->verbs_layer.l_timer, jiffies + 1);
-}
-
-/**
- * ipath_verbs_register - verbs layer registration
- * @l_piobufavail: callback for when PIO buffers become available
- * @l_rcv: callback for receiving a packet
- * @l_timer_cb: timer callback
- * @ipath_devdata: device data structure is put here
- */
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
-                        void (*l_remove)(void *arg),
-                        int (*l_piobufavail) (void *arg),
-                        void (*l_rcv) (void *arg, void *rhdr,
-                                       void *data, u32 tlen),
-                        void (*l_timer_cb) (void *arg))
-{
-       struct ipath_devdata *dd, *tmp;
-       unsigned long flags;
-
-       mutex_lock(&ipath_layer_mutex);
-
-       verbs_add_one = l_add;
-       verbs_remove_one = l_remove;
-       verbs_piobufavail = l_piobufavail;
-       verbs_rcv = l_rcv;
-       verbs_timer_cb = l_timer_cb;
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-               if (!(dd->ipath_flags & IPATH_INITTED))
-                       continue;
-
-               if (dd->verbs_layer.l_arg)
-                       continue;
-
-               spin_unlock_irqrestore(&ipath_devs_lock, flags);
-               dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd);
-               spin_lock_irqsave(&ipath_devs_lock, flags);
-       }
-
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-       mutex_unlock(&ipath_layer_mutex);
-
-       ipath_verbs_registered = 1;
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_register);
-
-void ipath_verbs_unregister(void)
-{
-       struct ipath_devdata *dd, *tmp;
-       unsigned long flags;
-
-       mutex_lock(&ipath_layer_mutex);
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-               *dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-
-               if (dd->verbs_layer.l_arg && verbs_remove_one) {
-                       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-                       verbs_remove_one(dd->verbs_layer.l_arg);
-                       spin_lock_irqsave(&ipath_devs_lock, flags);
-                       dd->verbs_layer.l_arg = NULL;
-               }
-       }
-
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       verbs_add_one = NULL;
-       verbs_remove_one = NULL;
-       verbs_piobufavail = NULL;
-       verbs_rcv = NULL;
-       verbs_timer_cb = NULL;
-
-       ipath_verbs_registered = 0;
-
-       mutex_unlock(&ipath_layer_mutex);
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_unregister);
-
 int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
 {
        int ret;
@@ -698,390 +293,6 @@ u16 ipath_layer_get_bcast(struct ipath_devdata *dd)
 
 EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
 
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd)
-{
-       return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey);
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       sge->vaddr += length;
-       sge->length -= length;
-       sge->sge_length -= length;
-       if (sge->sge_length == 0) {
-               if (--ss->num_sge)
-                       *sge = *ss->sg_list++;
-       } else if (sge->length == 0 && sge->mr != NULL) {
-               if (++sge->n >= IPATH_SEGSZ) {
-                       if (++sge->m >= sge->mr->mapsz)
-                               return;
-                       sge->n = 0;
-               }
-               sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
-               sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
-       }
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-       return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-       return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-       data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
-       data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-       return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-       return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-       return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-       data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
-       data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-       return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-                   u32 length)
-{
-       u32 extra = 0;
-       u32 data = 0;
-       u32 last;
-
-       while (1) {
-               u32 len = ss->sge.length;
-               u32 off;
-
-               BUG_ON(len == 0);
-               if (len > length)
-                       len = length;
-               if (len > ss->sge.sge_length)
-                       len = ss->sge.sge_length;
-               /* If the source address is not aligned, try to align it. */
-               off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
-               if (off) {
-                       u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
-                                           ~(sizeof(u32) - 1));
-                       u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
-                       u32 y;
-
-                       y = sizeof(u32) - off;
-                       if (len > y)
-                               len = y;
-                       if (len + extra >= sizeof(u32)) {
-                               data |= set_upper_bits(v, extra *
-                                                      BITS_PER_BYTE);
-                               len = sizeof(u32) - extra;
-                               if (len == length) {
-                                       last = data;
-                                       break;
-                               }
-                               __raw_writel(data, piobuf);
-                               piobuf++;
-                               extra = 0;
-                               data = 0;
-                       } else {
-                               /* Clear unused upper bytes */
-                               data |= clear_upper_bytes(v, len, extra);
-                               if (len == length) {
-                                       last = data;
-                                       break;
-                               }
-                               extra += len;
-                       }
-               } else if (extra) {
-                       /* Source address is aligned. */
-                       u32 *addr = (u32 *) ss->sge.vaddr;
-                       int shift = extra * BITS_PER_BYTE;
-                       int ushift = 32 - shift;
-                       u32 l = len;
-
-                       while (l >= sizeof(u32)) {
-                               u32 v = *addr;
-
-                               data |= set_upper_bits(v, shift);
-                               __raw_writel(data, piobuf);
-                               data = get_upper_bits(v, ushift);
-                               piobuf++;
-                               addr++;
-                               l -= sizeof(u32);
-                       }
-                       /*
-                        * We still have 'extra' number of bytes leftover.
-                        */
-                       if (l) {
-                               u32 v = *addr;
-
-                               if (l + extra >= sizeof(u32)) {
-                                       data |= set_upper_bits(v, shift);
-                                       len -= l + extra - sizeof(u32);
-                                       if (len == length) {
-                                               last = data;
-                                               break;
-                                       }
-                                       __raw_writel(data, piobuf);
-                                       piobuf++;
-                                       extra = 0;
-                                       data = 0;
-                               } else {
-                                       /* Clear unused upper bytes */
-                                       data |= clear_upper_bytes(v, l,
-                                                                 extra);
-                                       if (len == length) {
-                                               last = data;
-                                               break;
-                                       }
-                                       extra += l;
-                               }
-                       } else if (len == length) {
-                               last = data;
-                               break;
-                       }
-               } else if (len == length) {
-                       u32 w;
-
-                       /*
-                        * Need to round up for the last dword in the
-                        * packet.
-                        */
-                       w = (len + 3) >> 2;
-                       __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
-                       piobuf += w - 1;
-                       last = ((u32 *) ss->sge.vaddr)[w - 1];
-                       break;
-               } else {
-                       u32 w = len >> 2;
-
-                       __iowrite32_copy(piobuf, ss->sge.vaddr, w);
-                       piobuf += w;
-
-                       extra = len & (sizeof(u32) - 1);
-                       if (extra) {
-                               u32 v = ((u32 *) ss->sge.vaddr)[w];
-
-                               /* Clear unused upper bytes */
-                               data = clear_upper_bytes(v, extra, 0);
-                       }
-               }
-               update_sge(ss, len);
-               length -= len;
-       }
-       /* Update address before sending packet. */
-       update_sge(ss, length);
-       /* must flush early everything before trigger word */
-       ipath_flush_wc();
-       __raw_writel(last, piobuf);
-       /* be sure trigger word is written */
-       ipath_flush_wc();
-}
-
-/**
- * ipath_verbs_send - send a packet from the verbs layer
- * @dd: the infinipath device
- * @hdrwords: the number of words in the header
- * @hdr: the packet header
- * @len: the length of the packet in bytes
- * @ss: the SGE to send
- *
- * This is like ipath_sma_send_pkt() in that we need to be able to send
- * packets after the chip is initialized (MADs) but also like
- * ipath_layer_send_hdr() since its used by the verbs layer.
- */
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-                    u32 *hdr, u32 len, struct ipath_sge_state *ss)
-{
-       u32 __iomem *piobuf;
-       u32 plen;
-       int ret;
-
-       /* +1 is for the qword padding of pbc */
-       plen = hdrwords + ((len + 3) >> 2) + 1;
-       if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
-               ipath_dbg("packet len 0x%x too long, failing\n", plen);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /* Get a PIO buffer to use. */
-       piobuf = ipath_getpiobuf(dd, NULL);
-       if (unlikely(piobuf == NULL)) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       /*
-        * Write len to control qword, no flags.
-        * We have to flush after the PBC for correctness on some cpus
-        * or WC buffer can be written out of order.
-        */
-       writeq(plen, piobuf);
-       ipath_flush_wc();
-       piobuf += 2;
-       if (len == 0) {
-               /*
-                * If there is just the header portion, must flush before
-                * writing last word of header for correctness, and after
-                * the last header word (trigger word).
-                */
-               __iowrite32_copy(piobuf, hdr, hdrwords - 1);
-               ipath_flush_wc();
-               __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-               ipath_flush_wc();
-               ret = 0;
-               goto bail;
-       }
-
-       __iowrite32_copy(piobuf, hdr, hdrwords);
-       piobuf += hdrwords;
-
-       /* The common case is aligned and contained in one segment. */
-       if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
-                  !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-               u32 w;
-               u32 *addr = (u32 *) ss->sge.vaddr;
-
-               /* Update address before sending packet. */
-               update_sge(ss, len);
-               /* Need to round up for the last dword in the packet. */
-               w = (len + 3) >> 2;
-               __iowrite32_copy(piobuf, addr, w - 1);
-               /* must flush early everything before trigger word */
-               ipath_flush_wc();
-               __raw_writel(addr[w - 1], piobuf + w - 1);
-               /* be sure trigger word is written */
-               ipath_flush_wc();
-               ret = 0;
-               goto bail;
-       }
-       copy_io(piobuf, ss, len);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_send);
-
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-                                 u64 *rwords, u64 *spkts, u64 *rpkts,
-                                 u64 *xmit_wait)
-{
-       int ret;
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ipath_dbg("unit %u not usable\n", dd->ipath_unit);
-               ret = -EINVAL;
-               goto bail;
-       }
-       *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-       *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-       *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-       *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-       *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters);
-
-/**
- * ipath_layer_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_layer_get_counters(struct ipath_devdata *dd,
-                             struct ipath_layer_counters *cntrs)
-{
-       int ret;
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ipath_dbg("unit %u not usable\n", dd->ipath_unit);
-               ret = -EINVAL;
-               goto bail;
-       }
-       cntrs->symbol_error_counter =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
-       cntrs->link_error_recovery_counter =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
-       /*
-        * The link downed counter counts when the other side downs the
-        * connection.  We add in the number of times we downed the link
-        * due to local link integrity errors to compensate.
-        */
-       cntrs->link_downed_counter =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
-       cntrs->port_rcv_errors =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
-       cntrs->port_rcv_remphys_errors =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
-       cntrs->port_xmit_discards =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
-       cntrs->port_xmit_data =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-       cntrs->port_rcv_data =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-       cntrs->port_xmit_packets =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-       cntrs->port_rcv_packets =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-       cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
-       cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_counters);
-
-int ipath_layer_want_buffer(struct ipath_devdata *dd)
-{
-       set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                        dd->ipath_sendctrl);
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_want_buffer);
-
 int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
 {
        int ret = 0;
@@ -1153,389 +364,3 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd)
 }
 
 EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
-
-int ipath_layer_enable_timer(struct ipath_devdata *dd)
-{
-       /*
-        * HT-400 has a design flaw where the chip and kernel idea
-        * of the tail register don't always agree, and therefore we won't
-        * get an interrupt on the next packet received.
-        * If the board supports per packet receive interrupts, use it.
-        * Otherwise, the timer function periodically checks for packets
-        * to cover this case.
-        * Either way, the timer is needed for verbs layer related
-        * processing.
-        */
-       if (dd->ipath_flags & IPATH_GPIO_INTR) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
-                                0x2074076542310ULL);
-               /* Enable GPIO bit 2 interrupt */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                (u64) (1 << 2));
-       }
-
-       init_timer(&dd->verbs_layer.l_timer);
-       dd->verbs_layer.l_timer.function = __ipath_verbs_timer;
-       dd->verbs_layer.l_timer.data = (unsigned long)dd;
-       dd->verbs_layer.l_timer.expires = jiffies + 1;
-       add_timer(&dd->verbs_layer.l_timer);
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_enable_timer);
-
-int ipath_layer_disable_timer(struct ipath_devdata *dd)
-{
-       /* Disable GPIO bit 2 interrupt */
-       if (dd->ipath_flags & IPATH_GPIO_INTR)
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
-
-       del_timer_sync(&dd->verbs_layer.l_timer);
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_disable_timer);
-
-/**
- * ipath_layer_set_verbs_flags - set the verbs layer flags
- * @dd: the infinipath device
- * @flags: the flags to set
- */
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
-{
-       struct ipath_devdata *ss;
-       unsigned long lflags;
-
-       spin_lock_irqsave(&ipath_devs_lock, lflags);
-
-       list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
-               if (!(ss->ipath_flags & IPATH_INITTED))
-                       continue;
-               if ((flags & IPATH_VERBS_KERNEL_SMA) &&
-                   !(*ss->ipath_statusp & IPATH_STATUS_SMA))
-                       *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-               else
-                       *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-       }
-
-       spin_unlock_irqrestore(&ipath_devs_lock, lflags);
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags);
-
-/**
- * ipath_layer_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd)
-{
-       return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys);
-
-/**
- * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
-       unsigned ret;
-
-       if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
-               ret = 0;
-       else
-               ret = dd->ipath_pd[0]->port_pkeys[index];
-
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkey);
-
-/**
- * ipath_layer_get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-       struct ipath_portdata *pd = dd->ipath_pd[0];
-
-       memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys);
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
-       int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (dd->ipath_pkeys[i] != key)
-                       continue;
-               if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
-                       dd->ipath_pkeys[i] = 0;
-                       ret = 1;
-                       goto bail;
-               }
-               break;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
-       int i;
-       u16 lkey = key & 0x7FFF;
-       int any = 0;
-       int ret;
-
-       if (lkey == 0x7FFF) {
-               ret = 0;
-               goto bail;
-       }
-
-       /* Look for an empty slot or a matching PKEY. */
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i]) {
-                       any++;
-                       continue;
-               }
-               /* If it matches exactly, try to increment the ref count */
-               if (dd->ipath_pkeys[i] == key) {
-                       if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
-                               ret = 0;
-                               goto bail;
-                       }
-                       /* Lost the race. Look for an empty slot below. */
-                       atomic_dec(&dd->ipath_pkeyrefs[i]);
-                       any++;
-               }
-               /*
-                * It makes no sense to have both the limited and unlimited
-                * PKEY set at the same time since the unlimited one will
-                * disable the limited one.
-                */
-               if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (!any) {
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i] &&
-                   atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-                       /* for ipathstats, etc. */
-                       ipath_stats.sps_pkeys[i] = lkey;
-                       dd->ipath_pkeys[i] = key;
-                       ret = 1;
-                       goto bail;
-               }
-       }
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_layer_set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-       struct ipath_portdata *pd;
-       int i;
-       int changed = 0;
-
-       pd = dd->ipath_pd[0];
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               u16 key = pkeys[i];
-               u16 okey = pd->port_pkeys[i];
-
-               if (key == okey)
-                       continue;
-               /*
-                * The value of this PKEY table entry is changing.
-                * Remove the old entry in the hardware's array of PKEYs.
-                */
-               if (okey & 0x7FFF)
-                       changed |= rm_pkey(dd, okey);
-               if (key & 0x7FFF) {
-                       int ret = add_pkey(dd, key);
-
-                       if (ret < 0)
-                               key = 0;
-                       else
-                               changed |= ret;
-               }
-               pd->port_pkeys[i] = key;
-       }
-       if (changed) {
-               u64 pkey;
-
-               pkey = (u64) dd->ipath_pkeys[0] |
-                       ((u64) dd->ipath_pkeys[1] << 16) |
-                       ((u64) dd->ipath_pkeys[2] << 32) |
-                       ((u64) dd->ipath_pkeys[3] << 48);
-               ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
-                          (unsigned long long) pkey);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-                                pkey);
-       }
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys);
-
-/**
- * ipath_layer_get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
-       return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate);
-
-/**
- * ipath_layer_set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
-                                        int sleep)
-{
-       if (sleep)
-               dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-       else
-               dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                        dd->ipath_ibcctrl);
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate);
-
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
-{
-       return (dd->ipath_ibcctrl >>
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold);
-
-/**
- * ipath_layer_set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
-       unsigned v;
-
-       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-       if (v != n) {
-               dd->ipath_ibcctrl &=
-                       ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
-                         INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
-               dd->ipath_ibcctrl |=
-                       (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-       }
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold);
-
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
-{
-       return (dd->ipath_ibcctrl >>
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold);
-
-/**
- * ipath_layer_set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
-       unsigned v;
-
-       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-       if (v != n) {
-               dd->ipath_ibcctrl &=
-                       ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
-                         INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
-               dd->ipath_ibcctrl |=
-                       (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-       }
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold);
-
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
-                             size_t namelen)
-{
-       return dd->ipath_f_get_boardname(dd, name, namelen);
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_boardname);
-
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd)
-{
-       return dd->ipath_rcvhdrentsize;
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize);
index 71485096fcacabc1deb7cb5e8295e0020847fc2b..3854a4eae68479a75fb614d6807ffae1009946f9 100644 (file)
  */
 
 struct sk_buff;
-struct ipath_sge_state;
 struct ipath_devdata;
 struct ether_header;
 
-struct ipath_layer_counters {
-       u64 symbol_error_counter;
-       u64 link_error_recovery_counter;
-       u64 link_downed_counter;
-       u64 port_rcv_errors;
-       u64 port_rcv_remphys_errors;
-       u64 port_xmit_discards;
-       u64 port_xmit_data;
-       u64 port_rcv_data;
-       u64 port_xmit_packets;
-       u64 port_rcv_packets;
-       u32 local_link_integrity_errors;
-       u32 excessive_buffer_overrun_errors;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-       void *vaddr;
-       size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-       struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-       u64 user_base;          /* User's address for this region */
-       u64 iova;               /* IB start address of this region */
-       size_t length;
-       u32 lkey;
-       u32 offset;             /* offset (bytes) to start of region */
-       int access_flags;
-       u32 max_segs;           /* number of ipath_segs in all the arrays */
-       u32 mapsz;              /* size of the map array */
-       struct ipath_segarray *map[0];  /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-       struct ipath_mregion *mr;
-       void *vaddr;            /* current pointer into the segment */
-       u32 sge_length;         /* length of the SGE */
-       u32 length;             /* remaining length of the segment */
-       u16 m;                  /* current index: mr->map[m] */
-       u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-struct ipath_sge_state {
-       struct ipath_sge *sg_list;      /* next SGE to be used if any */
-       struct ipath_sge sge;   /* progress state for the current SGE */
-       u8 num_sge;
-};
-
 int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
                         void (*l_remove)(void *),
                         int (*l_intr)(void *, u32),
@@ -114,62 +50,14 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
                                      struct sk_buff *),
                         u16 rcv_opcode,
                         int (*l_rcv_lid)(void *, void *));
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
-                        void (*l_remove)(void *arg),
-                        int (*l_piobufavail)(void *arg),
-                        void (*l_rcv)(void *arg, void *rhdr,
-                                      void *data, u32 tlen),
-                        void (*l_timer_cb)(void *arg));
 void ipath_layer_unregister(void);
-void ipath_verbs_unregister(void);
 int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
 u16 ipath_layer_get_lid(struct ipath_devdata *dd);
 int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
 u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
-int ipath_layer_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
 int ipath_layer_send_hdr(struct ipath_devdata *dd,
                         struct ether_header *hdr);
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-                    u32 * hdr, u32 len, struct ipath_sge_state *ss);
 int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
-                             size_t namelen);
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-                                 u64 *rwords, u64 *spkts, u64 *rpkts,
-                                 u64 *xmit_wait);
-int ipath_layer_get_counters(struct ipath_devdata *dd,
-                            struct ipath_layer_counters *cntrs);
-int ipath_layer_want_buffer(struct ipath_devdata *dd);
-int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
-__be64 ipath_layer_get_guid(struct ipath_devdata *);
-u32 ipath_layer_get_nguid(struct ipath_devdata *);
-u32 ipath_layer_get_majrev(struct ipath_devdata *);
-u32 ipath_layer_get_minrev(struct ipath_devdata *);
-u32 ipath_layer_get_pcirev(struct ipath_devdata *);
-u32 ipath_layer_get_flags(struct ipath_devdata *dd);
-struct device *ipath_layer_get_device(struct ipath_devdata *dd);
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
-u32 ipath_layer_get_vendorid(struct ipath_devdata *);
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
-int ipath_layer_enable_timer(struct ipath_devdata *dd);
-int ipath_layer_disable_timer(struct ipath_devdata *dd);
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
-                                        int sleep);
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
 
 /* ipath_ether interrupt values */
 #define IPATH_LAYER_INT_IF_UP 0x2
@@ -178,9 +66,6 @@ u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
 #define IPATH_LAYER_INT_SEND_CONTINUE 0x10
 #define IPATH_LAYER_INT_BCAST 0x40
 
-/* _verbs_layer.l_flags */
-#define IPATH_VERBS_KERNEL_SMA 0x1
-
 extern unsigned ipath_debug; /* debugging bit mask */
 
 #endif                         /* _IPATH_LAYER_H */
index d3402341b7d0dddc154c1e765e6fe988ad72d091..72d1db89db8f6d3393154eb542c0b47a39208714 100644 (file)
@@ -101,15 +101,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
        nip->num_ports = ibdev->phys_port_cnt;
        /* This is already in network order */
        nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-       nip->node_guid = ipath_layer_get_guid(dd);
+       nip->node_guid = dd->ipath_guid;
        nip->port_guid = nip->sys_guid;
-       nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd));
-       nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd));
-       majrev = ipath_layer_get_majrev(dd);
-       minrev = ipath_layer_get_minrev(dd);
+       nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
+       nip->device_id = cpu_to_be16(dd->ipath_deviceid);
+       majrev = dd->ipath_majrev;
+       minrev = dd->ipath_minrev;
        nip->revision = cpu_to_be32((majrev << 16) | minrev);
        nip->local_port_num = port;
-       vendor = ipath_layer_get_vendorid(dd);
+       vendor = dd->ipath_vendorid;
        nip->vendor_id[0] = 0;
        nip->vendor_id[1] = vendor >> 8;
        nip->vendor_id[2] = vendor;
@@ -133,13 +133,89 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
         */
        if (startgx == 0)
                /* The first is a copy of the read-only HW GUID. */
-               *p = ipath_layer_get_guid(to_idev(ibdev)->dd);
+               *p = to_idev(ibdev)->dd->ipath_guid;
        else
                smp->status |= IB_SMP_INVALID_FIELD;
 
        return reply(smp);
 }
 
+
+static int get_overrunthreshold(struct ipath_devdata *dd)
+{
+       return (dd->ipath_ibcctrl >>
+               INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+}
+
+/**
+ * set_overrunthreshold - set the overrun threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
+{
+       unsigned v;
+
+       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+       if (v != n) {
+               dd->ipath_ibcctrl &=
+                       ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
+                         INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
+               dd->ipath_ibcctrl |=
+                       (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+                                dd->ipath_ibcctrl);
+       }
+       return 0;
+}
+
+static int get_phyerrthreshold(struct ipath_devdata *dd)
+{
+       return (dd->ipath_ibcctrl >>
+               INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+}
+
+/**
+ * set_phyerrthreshold - set the physical error threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
+{
+       unsigned v;
+
+       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+       if (v != n) {
+               dd->ipath_ibcctrl &=
+                       ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
+                         INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
+               dd->ipath_ibcctrl |=
+                       (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+                                dd->ipath_ibcctrl);
+       }
+       return 0;
+}
+
+/**
+ * get_linkdowndefaultstate - get the default linkdown state
+ * @dd: the infinipath device
+ *
+ * Returns zero if the default is POLL, 1 if the default is SLEEP.
+ */
+static int get_linkdowndefaultstate(struct ipath_devdata *dd)
+{
+       return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
+}
+
 static int recv_subn_get_portinfo(struct ib_smp *smp,
                                  struct ib_device *ibdev, u8 port)
 {
@@ -166,7 +242,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
            (dev->mkeyprot_resv_lmc >> 6) == 0)
                pip->mkey = dev->mkey;
        pip->gid_prefix = dev->gid_prefix;
-       lid = ipath_layer_get_lid(dev->dd);
+       lid = dev->dd->ipath_lid;
        pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
        pip->sm_lid = cpu_to_be16(dev->sm_lid);
        pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
@@ -177,14 +253,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
        pip->link_width_supported = 3;  /* 1x or 4x */
        pip->link_width_active = 2;     /* 4x */
        pip->linkspeed_portstate = 0x10;        /* 2.5Gbps */
-       ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+       ibcstat = dev->dd->ipath_lastibcstat;
        pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
        pip->portphysstate_linkdown =
                (ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
-               (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2);
+               (get_linkdowndefaultstate(dev->dd) ? 1 : 2);
        pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
        pip->linkspeedactive_enabled = 0x11;    /* 2.5Gbps, 2.5Gbps */
-       switch (ipath_layer_get_ibmtu(dev->dd)) {
+       switch (dev->dd->ipath_ibmtu) {
        case 4096:
                mtu = IB_MTU_4096;
                break;
@@ -217,7 +293,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
        pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
        /* P_KeyViolations are counted by hardware. */
        pip->pkey_violations =
-               cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) -
+               cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
                             dev->z_pkey_violations) & 0xFFFF);
        pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
        /* Only the hardware GUID is supported for now */
@@ -226,8 +302,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
        /* 32.768 usec. response time (guessing) */
        pip->resv_resptimevalue = 3;
        pip->localphyerrors_overrunerrors =
-               (ipath_layer_get_phyerrthreshold(dev->dd) << 4) |
-               ipath_layer_get_overrunthreshold(dev->dd);
+               (get_phyerrthreshold(dev->dd) << 4) |
+               get_overrunthreshold(dev->dd);
        /* pip->max_credit_hint; */
        /* pip->link_roundtrip_latency[3]; */
 
@@ -237,6 +313,20 @@ bail:
        return ret;
 }
 
+/**
+ * get_pkeys - return the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the pkey table is placed here
+ */
+static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
+{
+       struct ipath_portdata *pd = dd->ipath_pd[0];
+
+       memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
+
+       return 0;
+}
+
 static int recv_subn_get_pkeytable(struct ib_smp *smp,
                                   struct ib_device *ibdev)
 {
@@ -249,9 +339,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp,
        memset(smp->data, 0, sizeof(smp->data));
        if (startpx == 0) {
                struct ipath_ibdev *dev = to_idev(ibdev);
-               unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+               unsigned i, n = ipath_get_npkeys(dev->dd);
 
-               ipath_layer_get_pkeys(dev->dd, p);
+               get_pkeys(dev->dd, p);
 
                for (i = 0; i < n; i++)
                        q[i] = cpu_to_be16(p[i]);
@@ -268,6 +358,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp,
        return recv_subn_get_guidinfo(smp, ibdev);
 }
 
+/**
+ * set_linkdowndefaultstate - set the default linkdown state
+ * @dd: the infinipath device
+ * @sleep: the new state
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
+{
+       if (sleep)
+               dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+       else
+               dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+                        dd->ipath_ibcctrl);
+       return 0;
+}
+
 /**
  * recv_subn_set_portinfo - set port information
  * @smp: the incoming SM packet
@@ -290,7 +398,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
        u8 state;
        u16 lstate;
        u32 mtu;
-       int ret;
+       int ret, ore;
 
        if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
                goto err;
@@ -304,7 +412,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
        dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
 
        lid = be16_to_cpu(pip->lid);
-       if (lid != ipath_layer_get_lid(dev->dd)) {
+       if (lid != dev->dd->ipath_lid) {
                /* Must be a valid unicast LID address. */
                if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
                        goto err;
@@ -342,11 +450,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
        case 0: /* NOP */
                break;
        case 1: /* SLEEP */
-               if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1))
+               if (set_linkdowndefaultstate(dev->dd, 1))
                        goto err;
                break;
        case 2: /* POLL */
-               if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0))
+               if (set_linkdowndefaultstate(dev->dd, 0))
                        goto err;
                break;
        default:
@@ -376,7 +484,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
                /* XXX We have already partially updated our state! */
                goto err;
        }
-       ipath_layer_set_mtu(dev->dd, mtu);
+       ipath_set_mtu(dev->dd, mtu);
 
        dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
 
@@ -392,20 +500,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
         * later.
         */
        if (pip->pkey_violations == 0)
-               dev->z_pkey_violations =
-                       ipath_layer_get_cr_errpkey(dev->dd);
+               dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
 
        if (pip->qkey_violations == 0)
                dev->qkey_violations = 0;
 
-       if (ipath_layer_set_phyerrthreshold(
-                   dev->dd,
-                   (pip->localphyerrors_overrunerrors >> 4) & 0xF))
+       ore = pip->localphyerrors_overrunerrors;
+       if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
                goto err;
 
-       if (ipath_layer_set_overrunthreshold(
-                   dev->dd,
-                   (pip->localphyerrors_overrunerrors & 0xF)))
+       if (set_overrunthreshold(dev->dd, (ore & 0xF)))
                goto err;
 
        dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -423,7 +527,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
         * is down or is being set to down.
         */
        state = pip->linkspeed_portstate & 0xF;
-       flags = ipath_layer_get_flags(dev->dd);
+       flags = dev->dd->ipath_flags;
        lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
        if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
                goto err;
@@ -439,7 +543,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
                /* FALLTHROUGH */
        case IB_PORT_DOWN:
                if (lstate == 0)
-                       if (ipath_layer_get_linkdowndefaultstate(dev->dd))
+                       if (get_linkdowndefaultstate(dev->dd))
                                lstate = IPATH_IB_LINKDOWN_SLEEP;
                        else
                                lstate = IPATH_IB_LINKDOWN;
@@ -451,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
                        lstate = IPATH_IB_LINKDOWN_DISABLE;
                else
                        goto err;
-               ipath_layer_set_linkstate(dev->dd, lstate);
+               ipath_set_linkstate(dev->dd, lstate);
                if (flags & IPATH_LINKACTIVE) {
                        event.event = IB_EVENT_PORT_ERR;
                        ib_dispatch_event(&event);
@@ -460,7 +564,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
        case IB_PORT_ARMED:
                if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
                        break;
-               ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM);
+               ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
                if (flags & IPATH_LINKACTIVE) {
                        event.event = IB_EVENT_PORT_ERR;
                        ib_dispatch_event(&event);
@@ -469,7 +573,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
        case IB_PORT_ACTIVE:
                if (!(flags & IPATH_LINKARMED))
                        break;
-               ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
+               ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
                event.event = IB_EVENT_PORT_ACTIVE;
                ib_dispatch_event(&event);
                break;
@@ -493,6 +597,152 @@ done:
        return ret;
 }
 
+/**
+ * rm_pkey - decrecment the reference count for the given PKEY
+ * @dd: the infinipath device
+ * @key: the PKEY index
+ *
+ * Return true if this was the last reference and the hardware table entry
+ * needs to be changed.
+ */
+static int rm_pkey(struct ipath_devdata *dd, u16 key)
+{
+       int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+               if (dd->ipath_pkeys[i] != key)
+                       continue;
+               if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
+                       dd->ipath_pkeys[i] = 0;
+                       ret = 1;
+                       goto bail;
+               }
+               break;
+       }
+
+       ret = 0;
+
+bail:
+       return ret;
+}
+
+/**
+ * add_pkey - add the given PKEY to the hardware table
+ * @dd: the infinipath device
+ * @key: the PKEY
+ *
+ * Return an error code if unable to add the entry, zero if no change,
+ * or 1 if the hardware PKEY register needs to be updated.
+ */
+static int add_pkey(struct ipath_devdata *dd, u16 key)
+{
+       int i;
+       u16 lkey = key & 0x7FFF;
+       int any = 0;
+       int ret;
+
+       if (lkey == 0x7FFF) {
+               ret = 0;
+               goto bail;
+       }
+
+       /* Look for an empty slot or a matching PKEY. */
+       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+               if (!dd->ipath_pkeys[i]) {
+                       any++;
+                       continue;
+               }
+               /* If it matches exactly, try to increment the ref count */
+               if (dd->ipath_pkeys[i] == key) {
+                       if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
+                               ret = 0;
+                               goto bail;
+                       }
+                       /* Lost the race. Look for an empty slot below. */
+                       atomic_dec(&dd->ipath_pkeyrefs[i]);
+                       any++;
+               }
+               /*
+                * It makes no sense to have both the limited and unlimited
+                * PKEY set at the same time since the unlimited one will
+                * disable the limited one.
+                */
+               if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+                       ret = -EEXIST;
+                       goto bail;
+               }
+       }
+       if (!any) {
+               ret = -EBUSY;
+               goto bail;
+       }
+       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+               if (!dd->ipath_pkeys[i] &&
+                   atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+                       /* for ipathstats, etc. */
+                       ipath_stats.sps_pkeys[i] = lkey;
+                       dd->ipath_pkeys[i] = key;
+                       ret = 1;
+                       goto bail;
+               }
+       }
+       ret = -EBUSY;
+
+bail:
+       return ret;
+}
+
+/**
+ * set_pkeys - set the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the PKEY table
+ */
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+{
+       struct ipath_portdata *pd;
+       int i;
+       int changed = 0;
+
+       pd = dd->ipath_pd[0];
+
+       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+               u16 key = pkeys[i];
+               u16 okey = pd->port_pkeys[i];
+
+               if (key == okey)
+                       continue;
+               /*
+                * The value of this PKEY table entry is changing.
+                * Remove the old entry in the hardware's array of PKEYs.
+                */
+               if (okey & 0x7FFF)
+                       changed |= rm_pkey(dd, okey);
+               if (key & 0x7FFF) {
+                       int ret = add_pkey(dd, key);
+
+                       if (ret < 0)
+                               key = 0;
+                       else
+                               changed |= ret;
+               }
+               pd->port_pkeys[i] = key;
+       }
+       if (changed) {
+               u64 pkey;
+
+               pkey = (u64) dd->ipath_pkeys[0] |
+                       ((u64) dd->ipath_pkeys[1] << 16) |
+                       ((u64) dd->ipath_pkeys[2] << 32) |
+                       ((u64) dd->ipath_pkeys[3] << 48);
+               ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
+                          (unsigned long long) pkey);
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
+                                pkey);
+       }
+       return 0;
+}
+
 static int recv_subn_set_pkeytable(struct ib_smp *smp,
                                   struct ib_device *ibdev)
 {
@@ -500,13 +750,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
        __be16 *p = (__be16 *) smp->data;
        u16 *q = (u16 *) smp->data;
        struct ipath_ibdev *dev = to_idev(ibdev);
-       unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+       unsigned i, n = ipath_get_npkeys(dev->dd);
 
        for (i = 0; i < n; i++)
                q[i] = be16_to_cpu(p[i]);
 
-       if (startpx != 0 ||
-           ipath_layer_set_pkeys(dev->dd, q) != 0)
+       if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
                smp->status |= IB_SMP_INVALID_FIELD;
 
        return recv_subn_get_pkeytable(smp, ibdev);
@@ -844,10 +1093,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
        struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
                pmp->data;
        struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_layer_counters cntrs;
+       struct ipath_verbs_counters cntrs;
        u8 port_select = p->port_select;
 
-       ipath_layer_get_counters(dev->dd, &cntrs);
+       ipath_get_counters(dev->dd, &cntrs);
 
        /* Adjust counters for any resets done. */
        cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
@@ -944,8 +1193,8 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
        u64 swords, rwords, spkts, rpkts, xwait;
        u8 port_select = p->port_select;
 
-       ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-                                     &rpkts, &xwait);
+       ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+                               &rpkts, &xwait);
 
        /* Adjust counters for any resets done. */
        swords -= dev->z_port_xmit_data;
@@ -978,13 +1227,13 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
        struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
                pmp->data;
        struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_layer_counters cntrs;
+       struct ipath_verbs_counters cntrs;
 
        /*
         * Since the HW doesn't support clearing counters, we save the
         * current count and subtract it from future responses.
         */
-       ipath_layer_get_counters(dev->dd, &cntrs);
+       ipath_get_counters(dev->dd, &cntrs);
 
        if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
                dev->z_symbol_error_counter = cntrs.symbol_error_counter;
@@ -1041,8 +1290,8 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
        struct ipath_ibdev *dev = to_idev(ibdev);
        u64 swords, rwords, spkts, rpkts, xwait;
 
-       ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-                                     &rpkts, &xwait);
+       ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+                               &rpkts, &xwait);
 
        if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
                dev->z_port_xmit_data = swords;
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
new file mode 100644 (file)
index 0000000..11b7378
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct ipath_mmap_info
+ */
+void ipath_release_mmap_info(struct kref *ref)
+{
+       struct ipath_mmap_info *ip =
+               container_of(ref, struct ipath_mmap_info, ref);
+
+       vfree(ip->obj);
+       kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void ipath_vma_open(struct vm_area_struct *vma)
+{
+       struct ipath_mmap_info *ip = vma->vm_private_data;
+
+       kref_get(&ip->ref);
+       ip->mmap_cnt++;
+}
+
+static void ipath_vma_close(struct vm_area_struct *vma)
+{
+       struct ipath_mmap_info *ip = vma->vm_private_data;
+
+       ip->mmap_cnt--;
+       kref_put(&ip->ref, ipath_release_mmap_info);
+}
+
+static struct vm_operations_struct ipath_vm_ops = {
+       .open =     ipath_vma_open,
+       .close =    ipath_vma_close,
+};
+
+/**
+ * ipath_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+       struct ipath_ibdev *dev = to_idev(context->device);
+       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+       unsigned long size = vma->vm_end - vma->vm_start;
+       struct ipath_mmap_info *ip, **pp;
+       int ret = -EINVAL;
+
+       /*
+        * Search the device's list of objects waiting for a mmap call.
+        * Normally, this list is very short since a call to create a
+        * CQ, QP, or SRQ is soon followed by a call to mmap().
+        */
+       spin_lock_irq(&dev->pending_lock);
+       for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+               /* Only the creator is allowed to mmap the object */
+               if (context != ip->context || (void *) offset != ip->obj)
+                       continue;
+               /* Don't allow a mmap larger than the object. */
+               if (size > ip->size)
+                       break;
+
+               *pp = ip->next;
+               spin_unlock_irq(&dev->pending_lock);
+
+               ret = remap_vmalloc_range(vma, ip->obj, 0);
+               if (ret)
+                       goto done;
+               vma->vm_ops = &ipath_vm_ops;
+               vma->vm_private_data = ip;
+               ipath_vma_open(vma);
+               goto done;
+       }
+       spin_unlock_irq(&dev->pending_lock);
+done:
+       return ret;
+}
index 4ac31a5da3308dd5c77555d7e86fd69abce9b023..b36f6fb3e37a76f16ae370aa1e6ee163227b7df6 100644 (file)
 
 #include "ipath_verbs.h"
 
+/* Fast memory region */
+struct ipath_fmr {
+       struct ib_fmr ibfmr;
+       u8 page_shift;
+       struct ipath_mregion mr;        /* must be last */
+};
+
+static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+       return container_of(ibfmr, struct ipath_fmr, ibfmr);
+}
+
 /**
  * ipath_get_dma_mr - get a DMA memory region
  * @pd: protection domain for this memory region
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_pe800.c
deleted file mode 100644 (file)
index b83f66d..0000000
+++ /dev/null
@@ -1,1254 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-/*
- * This file contains all of the code that is specific to the
- * InfiniPath PE-800 chip.
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-
-/*
- * This file contains all the chip-specific register information and
- * access functions for the QLogic InfiniPath PE800, the PCI-Express chip.
- *
- * This lists the InfiniPath PE800 registers, in the actual chip layout.
- * This structure should never be directly accessed.
- */
-struct _infinipath_do_not_use_kernel_regs {
-       unsigned long long Revision;
-       unsigned long long Control;
-       unsigned long long PageAlign;
-       unsigned long long PortCnt;
-       unsigned long long DebugPortSelect;
-       unsigned long long Reserved0;
-       unsigned long long SendRegBase;
-       unsigned long long UserRegBase;
-       unsigned long long CounterRegBase;
-       unsigned long long Scratch;
-       unsigned long long Reserved1;
-       unsigned long long Reserved2;
-       unsigned long long IntBlocked;
-       unsigned long long IntMask;
-       unsigned long long IntStatus;
-       unsigned long long IntClear;
-       unsigned long long ErrorMask;
-       unsigned long long ErrorStatus;
-       unsigned long long ErrorClear;
-       unsigned long long HwErrMask;
-       unsigned long long HwErrStatus;
-       unsigned long long HwErrClear;
-       unsigned long long HwDiagCtrl;
-       unsigned long long MDIO;
-       unsigned long long IBCStatus;
-       unsigned long long IBCCtrl;
-       unsigned long long ExtStatus;
-       unsigned long long ExtCtrl;
-       unsigned long long GPIOOut;
-       unsigned long long GPIOMask;
-       unsigned long long GPIOStatus;
-       unsigned long long GPIOClear;
-       unsigned long long RcvCtrl;
-       unsigned long long RcvBTHQP;
-       unsigned long long RcvHdrSize;
-       unsigned long long RcvHdrCnt;
-       unsigned long long RcvHdrEntSize;
-       unsigned long long RcvTIDBase;
-       unsigned long long RcvTIDCnt;
-       unsigned long long RcvEgrBase;
-       unsigned long long RcvEgrCnt;
-       unsigned long long RcvBufBase;
-       unsigned long long RcvBufSize;
-       unsigned long long RxIntMemBase;
-       unsigned long long RxIntMemSize;
-       unsigned long long RcvPartitionKey;
-       unsigned long long Reserved3;
-       unsigned long long RcvPktLEDCnt;
-       unsigned long long Reserved4[8];
-       unsigned long long SendCtrl;
-       unsigned long long SendPIOBufBase;
-       unsigned long long SendPIOSize;
-       unsigned long long SendPIOBufCnt;
-       unsigned long long SendPIOAvailAddr;
-       unsigned long long TxIntMemBase;
-       unsigned long long TxIntMemSize;
-       unsigned long long Reserved5;
-       unsigned long long PCIeRBufTestReg0;
-       unsigned long long PCIeRBufTestReg1;
-       unsigned long long Reserved51[6];
-       unsigned long long SendBufferError;
-       unsigned long long SendBufferErrorCONT1;
-       unsigned long long Reserved6SBE[6];
-       unsigned long long RcvHdrAddr0;
-       unsigned long long RcvHdrAddr1;
-       unsigned long long RcvHdrAddr2;
-       unsigned long long RcvHdrAddr3;
-       unsigned long long RcvHdrAddr4;
-       unsigned long long Reserved7RHA[11];
-       unsigned long long RcvHdrTailAddr0;
-       unsigned long long RcvHdrTailAddr1;
-       unsigned long long RcvHdrTailAddr2;
-       unsigned long long RcvHdrTailAddr3;
-       unsigned long long RcvHdrTailAddr4;
-       unsigned long long Reserved8RHTA[11];
-       unsigned long long Reserved9SW[8];
-       unsigned long long SerdesConfig0;
-       unsigned long long SerdesConfig1;
-       unsigned long long SerdesStatus;
-       unsigned long long XGXSConfig;
-       unsigned long long IBPLLCfg;
-       unsigned long long Reserved10SW2[3];
-       unsigned long long PCIEQ0SerdesConfig0;
-       unsigned long long PCIEQ0SerdesConfig1;
-       unsigned long long PCIEQ0SerdesStatus;
-       unsigned long long Reserved11;
-       unsigned long long PCIEQ1SerdesConfig0;
-       unsigned long long PCIEQ1SerdesConfig1;
-       unsigned long long PCIEQ1SerdesStatus;
-       unsigned long long Reserved12;
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof(struct \
-    _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
-    struct infinipath_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_pe_kregs = {
-       .kr_control = IPATH_KREG_OFFSET(Control),
-       .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
-       .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
-       .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
-       .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
-       .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
-       .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
-       .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
-       .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
-       .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
-       .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
-       .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
-       .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
-       .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
-       .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
-       .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
-       .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
-       .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
-       .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
-       .kr_intclear = IPATH_KREG_OFFSET(IntClear),
-       .kr_intmask = IPATH_KREG_OFFSET(IntMask),
-       .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
-       .kr_mdio = IPATH_KREG_OFFSET(MDIO),
-       .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
-       .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
-       .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
-       .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
-       .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
-       .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
-       .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
-       .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
-       .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
-       .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
-       .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
-       .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
-       .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
-       .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
-       .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
-       .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
-       .kr_revision = IPATH_KREG_OFFSET(Revision),
-       .kr_scratch = IPATH_KREG_OFFSET(Scratch),
-       .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
-       .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
-       .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
-       .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
-       .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
-       .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
-       .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
-       .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
-       .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
-       .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
-       .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
-       .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
-       .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
-       .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
-       .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
-
-       /*
-        * These should not be used directly via ipath_read_kreg64(),
-        * use them with ipath_read_kreg64_port()
-        */
-       .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
-       .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
-
-       /* This group is pe-800-specific; and used only in this file */
-       /* The rcvpktled register controls one of the debug port signals, so
-        * a packet activity LED can be connected to it. */
-       .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
-       .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0),
-       .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1),
-       .kr_pcieq0serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig0),
-       .kr_pcieq0serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig1),
-       .kr_pcieq0serdesstatus = IPATH_KREG_OFFSET(PCIEQ0SerdesStatus),
-       .kr_pcieq1serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig0),
-       .kr_pcieq1serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig1),
-       .kr_pcieq1serdesstatus = IPATH_KREG_OFFSET(PCIEQ1SerdesStatus)
-};
-
-static const struct ipath_cregs ipath_pe_cregs = {
-       .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
-       .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
-       .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
-       .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
-       .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
-       .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
-       .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
-       .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
-       .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
-       .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
-       .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
-       .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
-       .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
-       .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
-       .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
-       .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
-       .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
-       .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
-       .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
-       .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
-       .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
-       .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
-       .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
-       .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
-       .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
-       .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
-       .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
-       .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
-       .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
-       .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
-       .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
-       .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
-       .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
-};
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1F
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK  0x000000000000003fULL
-#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_PCIEPOISONEDTLP      0x0000000010000000ULL
-#define INFINIPATH_HWE_PCIECPLTIMEOUT       0x0000000020000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYXTLH    0x0000000040000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYXADM    0x0000000080000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYRADM    0x0000000100000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
-#define INFINIPATH_HWE_PCIE1PLLFAILED       0x0400000000000000ULL
-#define INFINIPATH_HWE_PCIE0PLLFAILED       0x0800000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED      0x1000000000000000ULL
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_FOUND       0x0000000000008000
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA (1ULL << \
-       (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL (1ULL << \
-       (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-/**
- * ipath_pe_handle_hwerrors - display hardware errors.
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid excessive stack
- * use.  Most hardware errors are catastrophic, but for right now,
- * we'll print them and continue.  We reuse the same message buffer as
- * ipath_handle_errors() to avoid excessive stack usage.
- */
-static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
-                                    size_t msgl)
-{
-       ipath_err_t hwerrs;
-       u32 bits, ctrl;
-       int isfatal = 0;
-       char bitsmsg[64];
-
-       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-       if (!hwerrs) {
-               /*
-                * better than printing cofusing messages
-                * This seems to be related to clearing the crc error, or
-                * the pll error during init.
-                */
-               ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
-               return;
-       } else if (hwerrs == ~0ULL) {
-               ipath_dev_err(dd, "Read of hardware error status failed "
-                             "(all bits set); ignoring\n");
-               return;
-       }
-       ipath_stats.sps_hwerrs++;
-
-       /* Always clear the error status register, except MEMBISTFAIL,
-        * regardless of whether we continue or stop using the chip.
-        * We want that set so we know it failed, even across driver reload.
-        * We'll still ignore it in the hwerrmask.  We do this partly for
-        * diagnostics, but also for support */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
-       hwerrs &= dd->ipath_hwerrmask;
-
-       /*
-        * make sure we get this much out, unless told to be quiet,
-        * or it's occurred within the last 5 seconds
-        */
-       if ((hwerrs & ~dd->ipath_lasthwerror) ||
-           (ipath_debug & __IPATH_VERBDBG))
-               dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
-                        "(cleared)\n", (unsigned long long) hwerrs);
-       dd->ipath_lasthwerror |= hwerrs;
-
-       if (hwerrs & ~infinipath_hwe_bitsextant)
-               ipath_dev_err(dd, "hwerror interrupt with unknown errors "
-                             "%llx set\n", (unsigned long long)
-                             (hwerrs & ~infinipath_hwe_bitsextant));
-
-       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-       if (ctrl & INFINIPATH_C_FREEZEMODE) {
-               if (hwerrs) {
-                       /*
-                        * if any set that we aren't ignoring only make the
-                        * complaint once, in case it's stuck or recurring,
-                        * and we get here multiple times
-                        */
-                       if (dd->ipath_flags & IPATH_INITTED) {
-                               ipath_dev_err(dd, "Fatal Error (freeze "
-                                             "mode), no longer usable\n");
-                               isfatal = 1;
-                       }
-                       /*
-                        * Mark as having had an error for driver, and also
-                        * for /sys and status word mapped to user programs.
-                        * This marks unit as not usable, until reset
-                        */
-                       *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-                       *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-                       dd->ipath_flags &= ~IPATH_INITTED;
-               } else {
-                       ipath_dbg("Clearing freezemode on ignored hardware "
-                                 "error\n");
-                       ctrl &= ~INFINIPATH_C_FREEZEMODE;
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                                        ctrl);
-               }
-       }
-
-       *msg = '\0';
-
-       if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-               strlcat(msg, "[Memory BIST test failed, PE-800 unusable]",
-                       msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-       if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
-                     << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
-               bits = (u32) ((hwerrs >>
-                              INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
-                             INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
-               snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
-                        bits);
-               strlcat(msg, bitsmsg, msgl);
-       }
-       if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
-                     << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-               bits = (u32) ((hwerrs >>
-                              INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
-                             INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
-               snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
-                        bits);
-               strlcat(msg, bitsmsg, msgl);
-       }
-       if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
-                     << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
-               bits = (u32) ((hwerrs >>
-                              INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) &
-                             INFINIPATH_HWE_PCIEMEMPARITYERR_MASK);
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[PCIe Mem Parity Errs %x] ", bits);
-               strlcat(msg, bitsmsg, msgl);
-       }
-       if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
-               strlcat(msg, "[IB2IPATH Parity]", msgl);
-       if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
-               strlcat(msg, "[IPATH2IB Parity]", msgl);
-
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |       \
-                        INFINIPATH_HWE_COREPLL_RFSLIP )
-
-       if (hwerrs & _IPATH_PLL_FAIL) {
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[PLL failed (%llx), PE-800 unusable]",
-                        (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
-               strlcat(msg, bitsmsg, msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
-               /*
-                * If it occurs, it is left masked since the eternal
-                * interface is unused
-                */
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
-               strlcat(msg, "[PCIe Poisoned TLP]", msgl);
-       if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
-               strlcat(msg, "[PCIe completion timeout]", msgl);
-
-       /*
-        * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
-        * parity or memory parity error failures, because most likely we
-        * won't be able to talk to the core of the chip.  Nonetheless, we
-        * might see them, if they are in parts of the PCIe core that aren't
-        * essential.
-        */
-       if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
-               strlcat(msg, "[PCIePLL1]", msgl);
-       if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
-               strlcat(msg, "[PCIePLL0]", msgl);
-       if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
-               strlcat(msg, "[PCIe XTLH core parity]", msgl);
-       if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
-               strlcat(msg, "[PCIe ADM TX core parity]", msgl);
-       if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
-               strlcat(msg, "[PCIe ADM RX core parity]", msgl);
-
-       if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
-               strlcat(msg, "[Rx Dsync]", msgl);
-       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
-               strlcat(msg, "[SerDes PLL]", msgl);
-
-       ipath_dev_err(dd, "%s hardware error\n", msg);
-       if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
-               /*
-                * for /sys status file ; if no trailing } is copied, we'll
-                * know it was truncated.
-                */
-               snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
-                        "{%s}", msg);
-       }
-}
-
-/**
- * ipath_pe_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * info is based on the board revision register
- */
-static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
-                             size_t namelen)
-{
-       char *n = NULL;
-       u8 boardrev = dd->ipath_boardrev;
-       int ret;
-
-       switch (boardrev) {
-       case 0:
-               n = "InfiniPath_Emulation";
-               break;
-       case 1:
-               n = "InfiniPath_PE-800-Bringup";
-               break;
-       case 2:
-               n = "InfiniPath_PE-880";
-               break;
-       case 3:
-               n = "InfiniPath_PE-850";
-               break;
-       case 4:
-               n = "InfiniPath_PE-860";
-               break;
-       default:
-               ipath_dev_err(dd,
-                             "Don't yet know about board with ID %u\n",
-                             boardrev);
-               snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u",
-                        boardrev);
-               break;
-       }
-       if (n)
-               snprintf(name, namelen, "%s", n);
-
-       if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
-               ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n",
-                             dd->ipath_majrev, dd->ipath_minrev);
-               ret = 1;
-       } else
-               ret = 0;
-
-       return ret;
-}
-
-/**
- * ipath_pe_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
-{
-       ipath_err_t val;
-       u64 extsval;
-
-       extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
-       if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
-               ipath_dev_err(dd, "MemBIST did not complete!\n");
-
-       val = ~0ULL;    /* barring bugs, all hwerrors become interrupts, */
-
-       if (!dd->ipath_boardrev)        // no PLL for Emulator
-               val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-
-       /* workaround bug 9460 in internal interface bus parity checking */
-       val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
-
-       dd->ipath_hwerrmask = val;
-}
-
-/**
- * ipath_pe_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
-{
-       u64 val, tmp, config1;
-       int ret = 0, change = 0;
-
-       ipath_dbg("Trying to bringup serdes\n");
-
-       if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
-           INFINIPATH_HWE_SERDESPLLFAILED) {
-               ipath_dbg("At start, serdes PLL failed bit set "
-                         "in hwerrstatus, clearing and continuing\n");
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                                INFINIPATH_HWE_SERDESPLLFAILED);
-       }
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-       config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
-
-       ipath_cdbg(VERBOSE, "SerDes status config0=%llx config1=%llx, "
-                  "xgxsconfig %llx\n", (unsigned long long) val,
-                  (unsigned long long) config1, (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       /*
-        * Force reset on, also set rxdetect enable.  Must do before reading
-        * serdesstatus at least for simulation, or some of the bits in
-        * serdes status will come back as undefined and cause simulation
-        * failures
-        */
-       val |= INFINIPATH_SERDC0_RESET_PLL | INFINIPATH_SERDC0_RXDETECT_EN
-               | INFINIPATH_SERDC0_L1PWR_DN;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-       /* be sure chip saw it */
-       tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       udelay(5);              /* need pll reset set at least for a bit */
-       /*
-        * after PLL is reset, set the per-lane Resets and TxIdle and
-        * clear the PLL reset and rxdetect (to get falling edge).
-        * Leave L1PWR bits set (permanently)
-        */
-       val &= ~(INFINIPATH_SERDC0_RXDETECT_EN | INFINIPATH_SERDC0_RESET_PLL
-                | INFINIPATH_SERDC0_L1PWR_DN);
-       val |= INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE;
-       ipath_cdbg(VERBOSE, "Clearing pll reset and setting lane resets "
-                  "and txidle (%llx)\n", (unsigned long long) val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-       /* be sure chip saw it */
-       tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       /* need PLL reset clear for at least 11 usec before lane
-        * resets cleared; give it a few more to be sure */
-       udelay(15);
-       val &= ~(INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE);
-
-       ipath_cdbg(VERBOSE, "Clearing lane resets and txidle "
-                  "(writing %llx)\n", (unsigned long long) val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-       /* be sure chip saw it */
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-       if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
-            INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
-               val &=
-                       ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
-                         INFINIPATH_XGXS_MDIOADDR_SHIFT);
-               /* MDIO address 3 */
-               val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
-               change = 1;
-       }
-       if (val & INFINIPATH_XGXS_RESET) {
-               val &= ~INFINIPATH_XGXS_RESET;
-               change = 1;
-       }
-       if (change)
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       /* clear current and de-emphasis bits */
-       config1 &= ~0x0ffffffff00ULL;
-       /* set current to 20ma */
-       config1 |= 0x00000000000ULL;
-       /* set de-emphasis to -5.68dB */
-       config1 |= 0x0cccc000000ULL;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
-
-       ipath_cdbg(VERBOSE, "done: SerDes status config0=%llx "
-                  "config1=%llx, sstatus=%llx xgxs=%llx\n",
-                  (unsigned long long) val, (unsigned long long) config1,
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       if (!ipath_waitfor_mdio_cmdready(dd)) {
-               ipath_write_kreg(
-                       dd, dd->ipath_kregs->kr_mdio,
-                       ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
-                                      IPATH_MDIO_CTRL_XGXS_REG_8, 0));
-               if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
-                                          IPATH_MDIO_DATAVALID, &val))
-                       ipath_dbg("Never got MDIO data for XGXS "
-                                 "status read\n");
-               else
-                       ipath_cdbg(VERBOSE, "MDIO Read reg8, "
-                                  "'bank' 31 %x\n", (u32) val);
-       } else
-               ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
-
-       return ret;
-}
-
-/**
- * ipath_pe_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * Called when driver is being unloaded
- */
-static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
-{
-       u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       val |= INFINIPATH_SERDC0_TXIDLE;
-       ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
-                 (unsigned long long) val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-}
-
-/* this is not yet needed on the PE800, so just return 0. */
-static int ipath_pe_intconfig(struct ipath_devdata *dd)
-{
-       return 0;
-}
-
-/**
- * ipath_setup_pe_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
-
- * These LEDs indicate the physical and logical state of IB link.
- * For this chip (at least with recommended board pinouts), LED1
- * is Yellow (logical state) and LED2 is Green (physical state),
- *
- * Note:  We try to match the Mellanox HCA LED behavior as best
- * we can.  Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate.  That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
-                                    u64 ltst)
-{
-       u64 extctl;
-
-       /* the diags use the LED to indicate diag info, so we leave
-        * the external LED alone when the diags are running */
-       if (ipath_diag_inuse)
-               return;
-
-       extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
-                                      INFINIPATH_EXTC_LED2PRIPORT_ON);
-
-       if (ltst & INFINIPATH_IBCS_LT_STATE_LINKUP)
-               extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
-       if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-               extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
-       dd->ipath_extctrl = extctl;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
-}
-
-/**
- * ipath_setup_pe_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * This is called during driver unload.
- * We do the pci_disable_msi here, not in generic code, because it
- * isn't used for the HT-400. If we do end up needing pci_enable_msi
- * at some point in the future for HT-400, we'll move the call back
- * into the main init_one code.
- */
-static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
-{
-       dd->ipath_msi_lo = 0;   /* just in case unload fails */
-       pci_disable_msi(dd->pcidev);
-}
-
-/**
- * ipath_setup_pe_config - setup PCIe config related stuff
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * The pci_enable_msi() call will fail on systems with MSI quirks
- * such as those with AMD8131, even if the device of interest is not
- * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed
- * late in 2.6.16).
- * All that can be done is to edit the kernel source to remove the quirk
- * check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip (HT-400),
- * even those it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for the PE-800.  If we do end up needing
- * pci_enable_msi at some point in the future for HT-400, we'll move the
- * call back into the main init_one code.
- * We save the msi lo and hi values, so we can restore them after
- * chip reset (the kernel PCI infrastructure doesn't yet handle that
- * correctly).
- */
-static int ipath_setup_pe_config(struct ipath_devdata *dd,
-                                struct pci_dev *pdev)
-{
-       int pos, ret;
-
-       dd->ipath_msi_lo = 0;   /* used as a flag during reset processing */
-       ret = pci_enable_msi(dd->pcidev);
-       if (ret)
-               ipath_dev_err(dd, "pci_enable_msi failed: %d, "
-                             "interrupts may not work\n", ret);
-       /* continue even if it fails, we may still be OK... */
-
-       if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
-               u16 control;
-               pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
-                                     &dd->ipath_msi_lo);
-               pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
-                                     &dd->ipath_msi_hi);
-               pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
-                                    &control);
-               /* now save the data (vector) info */
-               pci_read_config_word(dd->pcidev,
-                                    pos + ((control & PCI_MSI_FLAGS_64BIT)
-                                           ? 12 : 8),
-                                    &dd->ipath_msi_data);
-               ipath_cdbg(VERBOSE, "Read msi data 0x%x from config offset "
-                          "0x%x, control=0x%x\n", dd->ipath_msi_data,
-                          pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
-                          control);
-               /* we save the cachelinesize also, although it doesn't
-                * really matter */
-               pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
-                                    &dd->ipath_pci_cacheline);
-       } else
-               ipath_dev_err(dd, "Can't find MSI capability, "
-                             "can't save MSI settings for reset\n");
-       if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP))) {
-               u16 linkstat;
-               pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
-                                    &linkstat);
-               linkstat >>= 4;
-               linkstat &= 0x1f;
-               if (linkstat != 8)
-                       ipath_dev_err(dd, "PCIe width %u, "
-                                     "performance reduced\n", linkstat);
-       }
-       else
-               ipath_dev_err(dd, "Can't find PCI Express "
-                             "capability!\n");
-       return 0;
-}
-
-static void ipath_init_pe_variables(void)
-{
-       /*
-        * bits for selecting i2c direction and values,
-        * used for I2C serial flash
-        */
-       ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-       ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-       ipath_gpio_sda = IPATH_GPIO_SDA;
-       ipath_gpio_scl = IPATH_GPIO_SCL;
-
-       /* variables for sanity checking interrupt and errors */
-       infinipath_hwe_bitsextant =
-               (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
-               (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
-               INFINIPATH_HWE_PCIE1PLLFAILED |
-               INFINIPATH_HWE_PCIE0PLLFAILED |
-               INFINIPATH_HWE_PCIEPOISONEDTLP |
-               INFINIPATH_HWE_PCIECPLTIMEOUT |
-               INFINIPATH_HWE_PCIEBUSPARITYXTLH |
-               INFINIPATH_HWE_PCIEBUSPARITYXADM |
-               INFINIPATH_HWE_PCIEBUSPARITYRADM |
-               INFINIPATH_HWE_MEMBISTFAILED |
-               INFINIPATH_HWE_COREPLL_FBSLIP |
-               INFINIPATH_HWE_COREPLL_RFSLIP |
-               INFINIPATH_HWE_SERDESPLLFAILED |
-               INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
-               INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
-       infinipath_i_bitsextant =
-               (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
-               (INFINIPATH_I_RCVAVAIL_MASK <<
-                INFINIPATH_I_RCVAVAIL_SHIFT) |
-               INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
-               INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
-       infinipath_e_bitsextant =
-               INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
-               INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
-               INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
-               INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
-               INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
-               INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
-               INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-               INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
-               INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
-               INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
-               INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
-               INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
-               INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
-               INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
-               INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
-               INFINIPATH_E_HARDWARE;
-
-       infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-       infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
-}
-
-/* setup the MSI stuff again after a reset.  I'd like to just call
- * pci_enable_msi() and request_irq() again, but when I do that,
- * the MSI enable bit doesn't get set in the command word, and
- * we switch to to a different interrupt vector, which is confusing,
- * so I instead just do it all inline.  Perhaps somehow can tie this
- * into the PCIe hotplug support at some point
- * Note, because I'm doing it all here, I don't call pci_disable_msi()
- * or free_irq() at the start of ipath_setup_pe_reset().
- */
-static int ipath_reinit_msi(struct ipath_devdata *dd)
-{
-       int pos;
-       u16 control;
-       int ret;
-
-       if (!dd->ipath_msi_lo) {
-               dev_info(&dd->pcidev->dev, "Can't restore MSI config, "
-                        "initial setup failed?\n");
-               ret = 0;
-               goto bail;
-       }
-
-       if (!(pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
-               ipath_dev_err(dd, "Can't find MSI capability, "
-                             "can't restore MSI settings\n");
-               ret = 0;
-               goto bail;
-       }
-       ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
-                  dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO);
-       pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
-                              dd->ipath_msi_lo);
-       ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
-                  dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI);
-       pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
-                              dd->ipath_msi_hi);
-       pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
-       if (!(control & PCI_MSI_FLAGS_ENABLE)) {
-               ipath_cdbg(VERBOSE, "MSI control at off %x was %x, "
-                          "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS,
-                          control, control | PCI_MSI_FLAGS_ENABLE);
-               control |= PCI_MSI_FLAGS_ENABLE;
-               pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
-                                     control);
-       }
-       /* now rewrite the data (vector) info */
-       pci_write_config_word(dd->pcidev, pos +
-                             ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
-                             dd->ipath_msi_data);
-       /* we restore the cachelinesize also, although it doesn't really
-        * matter */
-       pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
-                             dd->ipath_pci_cacheline);
-       /* and now set the pci master bit again */
-       pci_set_master(dd->pcidev);
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/* This routine sleeps, so it can only be called from user context, not
- * from interrupt context.  If we need interrupt context, we can split
- * it into two routines.
-*/
-static int ipath_setup_pe_reset(struct ipath_devdata *dd)
-{
-       u64 val;
-       int i;
-       int ret;
-
-       /* Use ERROR so it shows up in logs, etc. */
-       ipath_dev_err(dd, "Resetting PE-800 unit %u\n",
-                     dd->ipath_unit);
-       /* keep chip from being accessed in a few places */
-       dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
-       val = dd->ipath_control | INFINIPATH_C_RESET;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
-       mb();
-
-       for (i = 1; i <= 5; i++) {
-               int r;
-               /* allow MBIST, etc. to complete; longer on each retry.
-                * We sometimes get machine checks from bus timeout if no
-                * response, so for now, make it *really* long.
-                */
-               msleep(1000 + (1 + i) * 2000);
-               if ((r =
-                    pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
-                                           dd->ipath_pcibar0)))
-                       ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n",
-                                     r);
-               if ((r =
-                    pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
-                                           dd->ipath_pcibar1)))
-                       ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n",
-                                     r);
-               /* now re-enable memory access */
-               if ((r = pci_enable_device(dd->pcidev)))
-                       ipath_dev_err(dd, "pci_enable_device failed after "
-                                     "reset: %d\n", r);
-               /* whether it worked or not, mark as present, again */
-               dd->ipath_flags |= IPATH_PRESENT;
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
-               if (val == dd->ipath_revision) {
-                       ipath_cdbg(VERBOSE, "Got matching revision "
-                                  "register %llx on try %d\n",
-                                  (unsigned long long) val, i);
-                       ret = ipath_reinit_msi(dd);
-                       goto bail;
-               }
-               /* Probably getting -1 back */
-               ipath_dbg("Didn't get expected revision register, "
-                         "got %llx, try %d\n", (unsigned long long) val,
-                         i + 1);
-       }
-       ret = 0; /* failed */
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_pe_put_tid - write a TID in chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for special locking etc.
- * It's used for both the full cleanup on exit, as well as the normal
- * setup and teardown.
- */
-static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
-                            u32 type, unsigned long pa)
-{
-       u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
-       unsigned long flags = 0; /* keep gcc quiet */
-
-       if (pa != dd->ipath_tidinvalid) {
-               if (pa & ((1U << 11) - 1)) {
-                       dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
-                                "not 4KB aligned!\n", pa);
-                       return;
-               }
-               pa >>= 11;
-               /* paranoia check */
-               if (pa & (7<<29))
-                       ipath_dev_err(dd,
-                                     "BUG: Physical page address 0x%lx "
-                                     "has bits set in 31-29\n", pa);
-
-               if (type == 0)
-                       pa |= dd->ipath_tidtemplate;
-               else /* for now, always full 4KB page */
-                       pa |= 2 << 29;
-       }
-
-       /* workaround chip bug 9437 by writing each TID twice
-        * and holding a spinlock around the writes, so they don't
-        * intermix with other TID (eager or expected) writes
-        * Unfortunately, this call can be done from interrupt level
-        * for the port 0 eager TIDs, so we have to use irqsave
-        */
-       spin_lock_irqsave(&dd->ipath_tid_lock, flags);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
-       if (dd->ipath_kregbase)
-               writel(pa, tidp32);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xdeadbeef);
-       mmiowb();
-       spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
-}
-
-/**
- * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * clear all TID entries for a port, expected and eager.
- * Used from ipath_close().  On PE800, TIDs are only 32 bits,
- * not 64, but they are still on 64 bit boundaries, so tidbase
- * is declared as u64 * for the pointer math, even though we write 32 bits
- */
-static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
-       u64 __iomem *tidbase;
-       unsigned long tidinv;
-       int i;
-
-       if (!dd->ipath_kregbase)
-               return;
-
-       ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
-       tidinv = dd->ipath_tidinvalid;
-       tidbase = (u64 __iomem *)
-               ((char __iomem *)(dd->ipath_kregbase) +
-                dd->ipath_rcvtidbase +
-                port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
-
-       for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-               ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv);
-
-       tidbase = (u64 __iomem *)
-               ((char __iomem *)(dd->ipath_kregbase) +
-                dd->ipath_rcvegrbase +
-                port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
-
-       for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-               ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv);
-}
-
-/**
- * ipath_pe_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
-{
-       u32 egrsize = dd->ipath_rcvegrbufsize;
-
-       /* For now, we always allocate 4KB buffers (at init) so we can
-        * receive max size packets.  We may want a module parameter to
-        * specify 2KB or 4KB and/or make be per port instead of per device
-        * for those who want to reduce memory footprint.  Note that the
-        * ipath_rcvhdrentsize size must be large enough to hold the largest
-        * IB header (currently 96 bytes) that we expect to handle (plus of
-        * course the 2 dwords of RHF).
-        */
-       if (egrsize == 2048)
-               dd->ipath_tidtemplate = 1U << 29;
-       else if (egrsize == 4096)
-               dd->ipath_tidtemplate = 2U << 29;
-       else {
-               egrsize = 4096;
-               dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize "
-                        "%u, using %u\n", dd->ipath_rcvegrbufsize,
-                        egrsize);
-               dd->ipath_tidtemplate = 2U << 29;
-       }
-       dd->ipath_tidinvalid = 0;
-}
-
-static int ipath_pe_early_init(struct ipath_devdata *dd)
-{
-       dd->ipath_flags |= IPATH_4BYTE_TID;
-
-       /*
-        * For openib, we need to be able to handle an IB header of 96 bytes
-        * or 24 dwords.  HT-400 has arbitrary sized receive buffers, so we
-        * made them the same size as the PIO buffers.  The PE-800 does not
-        * handle arbitrary size buffers, so we need the header large enough
-        * to handle largest IB header, but still have room for a 2KB MTU
-        * standard IB packet.
-        */
-       dd->ipath_rcvhdrentsize = 24;
-       dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
-
-       /* For HT-400, we allocate a somewhat overly large eager buffer,
-        * such that we can guarantee that we can receive the largest packet
-        * that we can send out.  To truly support a 4KB MTU, we need to
-        * bump this to a larger value.  We'll do this when I get around to
-        * testing 4KB sends on the PE-800, which I have not yet done.
-        */
-       dd->ipath_rcvegrbufsize = 2048;
-       /*
-        * the min() check here is currently a nop, but it may not always
-        * be, depending on just how we do ipath_rcvegrbufsize
-        */
-       dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
-                                dd->ipath_rcvegrbufsize +
-                                (dd->ipath_rcvhdrentsize << 2));
-       dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-
-       /*
-        * For PE-800, we can request a receive interrupt for 1 or
-        * more packets from current offset.  For now, we set this
-        * up for a single packet, to match the HT-400 behavior.
-        */
-       dd->ipath_rhdrhead_intr_off = 1ULL<<32;
-
-       ipath_get_eeprom_info(dd);
-
-       return 0;
-}
-
-int __attribute__((weak)) ipath_unordered_wc(void)
-{
-       return 0;
-}
-
-/**
- * ipath_init_pe_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithims.
- */
-static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
-       struct ipath_base_info *kinfo = kbase;
-
-       if (ipath_unordered_wc()) {
-               kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
-               ipath_cdbg(PROC, "Intel processor, forcing WC order\n");
-       }
-       else
-               ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
-
-       kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
-
-       return 0;
-}
-
-/**
- * ipath_init_pe800_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_pe800_funcs(struct ipath_devdata *dd)
-{
-       dd->ipath_f_intrsetup = ipath_pe_intconfig;
-       dd->ipath_f_bus = ipath_setup_pe_config;
-       dd->ipath_f_reset = ipath_setup_pe_reset;
-       dd->ipath_f_get_boardname = ipath_pe_boardname;
-       dd->ipath_f_init_hwerrors = ipath_pe_init_hwerrors;
-       dd->ipath_f_early_init = ipath_pe_early_init;
-       dd->ipath_f_handle_hwerrors = ipath_pe_handle_hwerrors;
-       dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
-       dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
-       dd->ipath_f_clear_tids = ipath_pe_clear_tids;
-       dd->ipath_f_put_tid = ipath_pe_put_tid;
-       dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
-       dd->ipath_f_setextled = ipath_setup_pe_setextled;
-       dd->ipath_f_get_base_info = ipath_pe_get_base_info;
-
-       /* initialize chip-specific variables */
-       dd->ipath_f_tidtemplate = ipath_pe_tidtemplate;
-
-       /*
-        * setup the register offsets, since they are different for each
-        * chip
-        */
-       dd->ipath_kregs = &ipath_pe_kregs;
-       dd->ipath_cregs = &ipath_pe_cregs;
-
-       ipath_init_pe_variables();
-}
-
index 83e557be591ed4165432cd6d51d0094d20cc719e..224b0f40767f0ef6b08a8aa8e451fcad419c6d87 100644 (file)
@@ -35,7 +35,7 @@
 #include <linux/vmalloc.h>
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 #define BITS_PER_PAGE          (PAGE_SIZE*BITS_PER_BYTE)
 #define BITS_PER_PAGE_MASK     (BITS_PER_PAGE-1)
 #define find_next_offset(map, off) find_next_zero_bit((map)->page, \
                                                      BITS_PER_PAGE, off)
 
-#define TRANS_INVALID  0
-#define TRANS_ANY2RST  1
-#define TRANS_RST2INIT 2
-#define TRANS_INIT2INIT        3
-#define TRANS_INIT2RTR 4
-#define TRANS_RTR2RTS  5
-#define TRANS_RTS2RTS  6
-#define TRANS_SQERR2RTS        7
-#define TRANS_ANY2ERR  8
-#define TRANS_RTS2SQD  9  /* XXX Wait for expected ACKs & signal event */
-#define TRANS_SQD2SQD  10 /* error if not drained & parameter change */
-#define TRANS_SQD2RTS  11 /* error if not drained */
-
 /*
  * Convert the AETH credit code into the number of credits.
  */
@@ -287,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
                                free_qpn(qpt, qp->ibqp.qp_num);
                        if (!atomic_dec_and_test(&qp->refcount) ||
                            !ipath_destroy_qp(&qp->ibqp))
-                               _VERBS_INFO("QP memory leak!\n");
+                               ipath_dbg(KERN_INFO "QP memory leak!\n");
                        qp = nqp;
                }
        }
@@ -355,8 +342,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
        qp->s_last = 0;
        qp->s_ssn = 1;
        qp->s_lsn = 0;
-       qp->r_rq.head = 0;
-       qp->r_rq.tail = 0;
+       if (qp->r_rq.wq) {
+               qp->r_rq.wq->head = 0;
+               qp->r_rq.wq->tail = 0;
+       }
        qp->r_reuse_sge = 0;
 }
 
@@ -373,8 +362,8 @@ void ipath_error_qp(struct ipath_qp *qp)
        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
        struct ib_wc wc;
 
-       _VERBS_INFO("QP%d/%d in error state\n",
-                   qp->ibqp.qp_num, qp->remote_qpn);
+       ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+                 qp->ibqp.qp_num, qp->remote_qpn);
 
        spin_lock(&dev->pending_lock);
        /* XXX What if its already removed by the timeout code? */
@@ -410,15 +399,32 @@ void ipath_error_qp(struct ipath_qp *qp)
        qp->s_hdrwords = 0;
        qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 
-       wc.opcode = IB_WC_RECV;
-       spin_lock(&qp->r_rq.lock);
-       while (qp->r_rq.tail != qp->r_rq.head) {
-               wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
-               if (++qp->r_rq.tail >= qp->r_rq.size)
-                       qp->r_rq.tail = 0;
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+       if (qp->r_rq.wq) {
+               struct ipath_rwq *wq;
+               u32 head;
+               u32 tail;
+
+               spin_lock(&qp->r_rq.lock);
+
+               /* sanity check pointers before trusting them */
+               wq = qp->r_rq.wq;
+               head = wq->head;
+               if (head >= qp->r_rq.size)
+                       head = 0;
+               tail = wq->tail;
+               if (tail >= qp->r_rq.size)
+                       tail = 0;
+               wc.opcode = IB_WC_RECV;
+               while (tail != head) {
+                       wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+                       if (++tail >= qp->r_rq.size)
+                               tail = 0;
+                       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+               }
+               wq->tail = tail;
+
+               spin_unlock(&qp->r_rq.lock);
        }
-       spin_unlock(&qp->r_rq.lock);
 }
 
 /**
@@ -426,11 +432,12 @@ void ipath_error_qp(struct ipath_qp *qp)
  * @ibqp: the queue pair who's attributes we're modifying
  * @attr: the new attributes
  * @attr_mask: the mask of attributes to modify
+ * @udata: user data for ipathverbs.so
  *
  * Returns 0 on success, otherwise returns an errno.
  */
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                   int attr_mask)
+                   int attr_mask, struct ib_udata *udata)
 {
        struct ipath_ibdev *dev = to_idev(ibqp->device);
        struct ipath_qp *qp = to_iqp(ibqp);
@@ -448,19 +455,46 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                attr_mask))
                goto inval;
 
-       if (attr_mask & IB_QP_AV)
+       if (attr_mask & IB_QP_AV) {
                if (attr->ah_attr.dlid == 0 ||
                    attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
                        goto inval;
 
+               if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
+                   (attr->ah_attr.grh.sgid_index > 1))
+                       goto inval;
+       }
+
        if (attr_mask & IB_QP_PKEY_INDEX)
-               if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
+               if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
                        goto inval;
 
        if (attr_mask & IB_QP_MIN_RNR_TIMER)
                if (attr->min_rnr_timer > 31)
                        goto inval;
 
+       if (attr_mask & IB_QP_PORT)
+               if (attr->port_num == 0 ||
+                   attr->port_num > ibqp->device->phys_port_cnt)
+                       goto inval;
+
+       if (attr_mask & IB_QP_PATH_MTU)
+               if (attr->path_mtu > IB_MTU_4096)
+                       goto inval;
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+               if (attr->max_dest_rd_atomic > 1)
+                       goto inval;
+
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+               if (attr->max_rd_atomic > 1)
+                       goto inval;
+
+       if (attr_mask & IB_QP_PATH_MIG_STATE)
+               if (attr->path_mig_state != IB_MIG_MIGRATED &&
+                   attr->path_mig_state != IB_MIG_REARM)
+                       goto inval;
+
        switch (new_state) {
        case IB_QPS_RESET:
                ipath_reset_qp(qp);
@@ -511,6 +545,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        if (attr_mask & IB_QP_MIN_RNR_TIMER)
                qp->r_min_rnr_timer = attr->min_rnr_timer;
 
+       if (attr_mask & IB_QP_TIMEOUT)
+               qp->timeout = attr->timeout;
+
        if (attr_mask & IB_QP_QKEY)
                qp->qkey = attr->qkey;
 
@@ -543,7 +580,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        attr->dest_qp_num = qp->remote_qpn;
        attr->qp_access_flags = qp->qp_access_flags;
        attr->cap.max_send_wr = qp->s_size - 1;
-       attr->cap.max_recv_wr = qp->r_rq.size - 1;
+       attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
        attr->cap.max_send_sge = qp->s_max_sge;
        attr->cap.max_recv_sge = qp->r_rq.max_sge;
        attr->cap.max_inline_data = 0;
@@ -557,7 +594,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        attr->max_dest_rd_atomic = 1;
        attr->min_rnr_timer = qp->r_min_rnr_timer;
        attr->port_num = 1;
-       attr->timeout = 0;
+       attr->timeout = qp->timeout;
        attr->retry_cnt = qp->s_retry_cnt;
        attr->rnr_retry = qp->s_rnr_retry;
        attr->alt_port_num = 0;
@@ -569,9 +606,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        init_attr->recv_cq = qp->ibqp.recv_cq;
        init_attr->srq = qp->ibqp.srq;
        init_attr->cap = attr->cap;
-       init_attr->sq_sig_type =
-               (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
-               ? IB_SIGNAL_REQ_WR : 0;
+       if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+               init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+       else
+               init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
        init_attr->qp_type = qp->ibqp.qp_type;
        init_attr->port_num = 1;
        return 0;
@@ -596,13 +634,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
        } else {
                u32 min, max, x;
                u32 credits;
-
+               struct ipath_rwq *wq = qp->r_rq.wq;
+               u32 head;
+               u32 tail;
+
+               /* sanity check pointers before trusting them */
+               head = wq->head;
+               if (head >= qp->r_rq.size)
+                       head = 0;
+               tail = wq->tail;
+               if (tail >= qp->r_rq.size)
+                       tail = 0;
                /*
                 * Compute the number of credits available (RWQEs).
                 * XXX Not holding the r_rq.lock here so there is a small
                 * chance that the pair of reads are not atomic.
                 */
-               credits = qp->r_rq.head - qp->r_rq.tail;
+               credits = head - tail;
                if ((int)credits < 0)
                        credits += qp->r_rq.size;
                /*
@@ -679,27 +727,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
        case IB_QPT_UD:
        case IB_QPT_SMI:
        case IB_QPT_GSI:
-               qp = kmalloc(sizeof(*qp), GFP_KERNEL);
+               sz = sizeof(*qp);
+               if (init_attr->srq) {
+                       struct ipath_srq *srq = to_isrq(init_attr->srq);
+
+                       sz += sizeof(*qp->r_sg_list) *
+                               srq->rq.max_sge;
+               } else
+                       sz += sizeof(*qp->r_sg_list) *
+                               init_attr->cap.max_recv_sge;
+               qp = kmalloc(sz, GFP_KERNEL);
                if (!qp) {
-                       vfree(swq);
                        ret = ERR_PTR(-ENOMEM);
-                       goto bail;
+                       goto bail_swq;
                }
                if (init_attr->srq) {
+                       sz = 0;
                        qp->r_rq.size = 0;
                        qp->r_rq.max_sge = 0;
                        qp->r_rq.wq = NULL;
+                       init_attr->cap.max_recv_wr = 0;
+                       init_attr->cap.max_recv_sge = 0;
                } else {
                        qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
                        qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-                       sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) +
+                       sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
                                sizeof(struct ipath_rwqe);
-                       qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
+                       qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
+                                             qp->r_rq.size * sz);
                        if (!qp->r_rq.wq) {
-                               kfree(qp);
-                               vfree(swq);
                                ret = ERR_PTR(-ENOMEM);
-                               goto bail;
+                               goto bail_qp;
                        }
                }
 
@@ -719,24 +777,19 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
                qp->s_wq = swq;
                qp->s_size = init_attr->cap.max_send_wr + 1;
                qp->s_max_sge = init_attr->cap.max_send_sge;
-               qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
-                       1 << IPATH_S_SIGNAL_REQ_WR : 0;
+               if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+                       qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+               else
+                       qp->s_flags = 0;
                dev = to_idev(ibpd->device);
                err = ipath_alloc_qpn(&dev->qp_table, qp,
                                      init_attr->qp_type);
                if (err) {
-                       vfree(swq);
-                       vfree(qp->r_rq.wq);
-                       kfree(qp);
                        ret = ERR_PTR(err);
-                       goto bail;
+                       goto bail_rwq;
                }
+               qp->ip = NULL;
                ipath_reset_qp(qp);
-
-               /* Tell the core driver that the kernel SMA is present. */
-               if (init_attr->qp_type == IB_QPT_SMI)
-                       ipath_layer_set_verbs_flags(dev->dd,
-                                                   IPATH_VERBS_KERNEL_SMA);
                break;
 
        default:
@@ -747,8 +800,63 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 
        init_attr->cap.max_inline_data = 0;
 
+       /*
+        * Return the address of the RWQ as the offset to mmap.
+        * See ipath_mmap() for details.
+        */
+       if (udata && udata->outlen >= sizeof(__u64)) {
+               struct ipath_mmap_info *ip;
+               __u64 offset = (__u64) qp->r_rq.wq;
+               int err;
+
+               err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+               if (err) {
+                       ret = ERR_PTR(err);
+                       goto bail_rwq;
+               }
+
+               if (qp->r_rq.wq) {
+                       /* Allocate info for ipath_mmap(). */
+                       ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+                       if (!ip) {
+                               ret = ERR_PTR(-ENOMEM);
+                               goto bail_rwq;
+                       }
+                       qp->ip = ip;
+                       ip->context = ibpd->uobject->context;
+                       ip->obj = qp->r_rq.wq;
+                       kref_init(&ip->ref);
+                       ip->mmap_cnt = 0;
+                       ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+                                             qp->r_rq.size * sz);
+                       spin_lock_irq(&dev->pending_lock);
+                       ip->next = dev->pending_mmaps;
+                       dev->pending_mmaps = ip;
+                       spin_unlock_irq(&dev->pending_lock);
+               }
+       }
+
+       spin_lock(&dev->n_qps_lock);
+       if (dev->n_qps_allocated == ib_ipath_max_qps) {
+               spin_unlock(&dev->n_qps_lock);
+               ret = ERR_PTR(-ENOMEM);
+               goto bail_ip;
+       }
+
+       dev->n_qps_allocated++;
+       spin_unlock(&dev->n_qps_lock);
+
        ret = &qp->ibqp;
+       goto bail;
 
+bail_ip:
+       kfree(qp->ip);
+bail_rwq:
+       vfree(qp->r_rq.wq);
+bail_qp:
+       kfree(qp);
+bail_swq:
+       vfree(swq);
 bail:
        return ret;
 }
@@ -768,15 +876,12 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
        struct ipath_ibdev *dev = to_idev(ibqp->device);
        unsigned long flags;
 
-       /* Tell the core driver that the kernel SMA is gone. */
-       if (qp->ibqp.qp_type == IB_QPT_SMI)
-               ipath_layer_set_verbs_flags(dev->dd, 0);
-
-       spin_lock_irqsave(&qp->r_rq.lock, flags);
-       spin_lock(&qp->s_lock);
+       spin_lock_irqsave(&qp->s_lock, flags);
        qp->state = IB_QPS_ERR;
-       spin_unlock(&qp->s_lock);
-       spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+       spin_unlock_irqrestore(&qp->s_lock, flags);
+       spin_lock(&dev->n_qps_lock);
+       dev->n_qps_allocated--;
+       spin_unlock(&dev->n_qps_lock);
 
        /* Stop the sending tasklet. */
        tasklet_kill(&qp->s_task);
@@ -797,8 +902,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
        if (atomic_read(&qp->refcount) != 0)
                ipath_free_qp(&dev->qp_table, qp);
 
+       if (qp->ip)
+               kref_put(&qp->ip->ref, ipath_release_mmap_info);
+       else
+               vfree(qp->r_rq.wq);
        vfree(qp->s_wq);
-       vfree(qp->r_rq.wq);
        kfree(qp);
        return 0;
 }
@@ -850,8 +958,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
        struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 
-       _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
-                   qp->ibqp.qp_num, qp->remote_qpn, wc->status);
+       ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+                 qp->ibqp.qp_num, qp->remote_qpn, wc->status);
 
        spin_lock(&dev->pending_lock);
        /* XXX What if its already removed by the timeout code? */
index 774d1615ce2f134f7058ffb72880190f25c1adde..a08654042c03411db35b26c54acaf5105e4015cb 100644 (file)
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
@@ -540,7 +540,7 @@ static void send_rc_ack(struct ipath_qp *qp)
                lrh0 = IPATH_LRH_GRH;
        }
        /* read pkey_index w/o lock (its atomic) */
-       bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+       bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
        if (qp->r_nak_state)
                ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
                                            (qp->r_nak_state <<
@@ -557,7 +557,7 @@ static void send_rc_ack(struct ipath_qp *qp)
        hdr.lrh[0] = cpu_to_be16(lrh0);
        hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
        hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-       hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
+       hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
        ohdr->bth[0] = cpu_to_be32(bth0);
        ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
        ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
@@ -1323,8 +1323,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                 * the eager header buffer size to 56 bytes so the last 4
                 * bytes of the BTH header (PSN) is in the data buffer.
                 */
-               header_in_data =
-                       ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
                if (header_in_data) {
                        psn = be32_to_cpu(((__be32 *) data)[0]);
                        data += sizeof(__be32);
index 89df8f5ea998087c42af9e77c097933a6bffa35d..6e23b3d632b820eac22febb91b0aab7d0269982a 100644 (file)
@@ -36,8 +36,7 @@
 
 /*
  * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for the InfiniPath HT-400
- * chip.
+ * defines the registers, and their contents, for InfiniPath chips.
  */
 
 /*
 #define INFINIPATH_XGXS_RESET          0x7ULL
 #define INFINIPATH_XGXS_MDIOADDR_MASK  0xfULL
 #define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
+#define INFINIPATH_XGXS_RX_POL_SHIFT 19
+#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
 
 #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL        /* 40 bits valid */
 
-/* TID entries (memory), HT400-only */
+/* TID entries (memory), HT-only */
 #define INFINIPATH_RT_VALID 0x8000000000000000ULL
 #define INFINIPATH_RT_ADDR_SHIFT 0
 #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
index 772bc59fb85c314fc4e1084c1656039a10c1fa6f..5c1da2d25e03e6c16ff4572ff59e88de613be0f5 100644 (file)
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /*
  * Convert the AETH RNR timeout code into the number of milliseconds.
@@ -106,6 +106,54 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
        spin_unlock_irqrestore(&dev->pending_lock, flags);
 }
 
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+{
+       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+       int user = to_ipd(qp->ibqp.pd)->user;
+       int i, j, ret;
+       struct ib_wc wc;
+
+       qp->r_len = 0;
+       for (i = j = 0; i < wqe->num_sge; i++) {
+               if (wqe->sg_list[i].length == 0)
+                       continue;
+               /* Check LKEY */
+               if ((user && wqe->sg_list[i].lkey == 0) ||
+                   !ipath_lkey_ok(&dev->lk_table,
+                                  &qp->r_sg_list[j], &wqe->sg_list[i],
+                                  IB_ACCESS_LOCAL_WRITE))
+                       goto bad_lkey;
+               qp->r_len += wqe->sg_list[i].length;
+               j++;
+       }
+       qp->r_sge.sge = qp->r_sg_list[0];
+       qp->r_sge.sg_list = qp->r_sg_list + 1;
+       qp->r_sge.num_sge = j;
+       ret = 1;
+       goto bail;
+
+bad_lkey:
+       wc.wr_id = wqe->wr_id;
+       wc.status = IB_WC_LOC_PROT_ERR;
+       wc.opcode = IB_WC_RECV;
+       wc.vendor_err = 0;
+       wc.byte_len = 0;
+       wc.imm_data = 0;
+       wc.qp_num = qp->ibqp.qp_num;
+       wc.src_qp = 0;
+       wc.wc_flags = 0;
+       wc.pkey_index = 0;
+       wc.slid = 0;
+       wc.sl = 0;
+       wc.dlid_path_bits = 0;
+       wc.port_num = 0;
+       /* Signal solicited completion event. */
+       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+       ret = 0;
+bail:
+       return ret;
+}
+
 /**
  * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
  * @qp: the QP
@@ -119,71 +167,71 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 {
        unsigned long flags;
        struct ipath_rq *rq;
+       struct ipath_rwq *wq;
        struct ipath_srq *srq;
        struct ipath_rwqe *wqe;
-       int ret = 1;
+       void (*handler)(struct ib_event *, void *);
+       u32 tail;
+       int ret;
 
-       if (!qp->ibqp.srq) {
+       if (qp->ibqp.srq) {
+               srq = to_isrq(qp->ibqp.srq);
+               handler = srq->ibsrq.event_handler;
+               rq = &srq->rq;
+       } else {
+               srq = NULL;
+               handler = NULL;
                rq = &qp->r_rq;
-               spin_lock_irqsave(&rq->lock, flags);
-
-               if (unlikely(rq->tail == rq->head)) {
-                       ret = 0;
-                       goto done;
-               }
-               wqe = get_rwqe_ptr(rq, rq->tail);
-               qp->r_wr_id = wqe->wr_id;
-               if (!wr_id_only) {
-                       qp->r_sge.sge = wqe->sg_list[0];
-                       qp->r_sge.sg_list = wqe->sg_list + 1;
-                       qp->r_sge.num_sge = wqe->num_sge;
-                       qp->r_len = wqe->length;
-               }
-               if (++rq->tail >= rq->size)
-                       rq->tail = 0;
-               goto done;
        }
 
-       srq = to_isrq(qp->ibqp.srq);
-       rq = &srq->rq;
        spin_lock_irqsave(&rq->lock, flags);
-
-       if (unlikely(rq->tail == rq->head)) {
-               ret = 0;
-               goto done;
-       }
-       wqe = get_rwqe_ptr(rq, rq->tail);
+       wq = rq->wq;
+       tail = wq->tail;
+       /* Validate tail before using it since it is user writable. */
+       if (tail >= rq->size)
+               tail = 0;
+       do {
+               if (unlikely(tail == wq->head)) {
+                       spin_unlock_irqrestore(&rq->lock, flags);
+                       ret = 0;
+                       goto bail;
+               }
+               wqe = get_rwqe_ptr(rq, tail);
+               if (++tail >= rq->size)
+                       tail = 0;
+       } while (!wr_id_only && !init_sge(qp, wqe));
        qp->r_wr_id = wqe->wr_id;
-       if (!wr_id_only) {
-               qp->r_sge.sge = wqe->sg_list[0];
-               qp->r_sge.sg_list = wqe->sg_list + 1;
-               qp->r_sge.num_sge = wqe->num_sge;
-               qp->r_len = wqe->length;
-       }
-       if (++rq->tail >= rq->size)
-               rq->tail = 0;
-       if (srq->ibsrq.event_handler) {
-               struct ib_event ev;
+       wq->tail = tail;
+
+       ret = 1;
+       if (handler) {
                u32 n;
 
-               if (rq->head < rq->tail)
-                       n = rq->size + rq->head - rq->tail;
+               /*
+                * validate head pointer value and compute
+                * the number of remaining WQEs.
+                */
+               n = wq->head;
+               if (n >= rq->size)
+                       n = 0;
+               if (n < tail)
+                       n += rq->size - tail;
                else
-                       n = rq->head - rq->tail;
+                       n -= tail;
                if (n < srq->limit) {
+                       struct ib_event ev;
+
                        srq->limit = 0;
                        spin_unlock_irqrestore(&rq->lock, flags);
                        ev.device = qp->ibqp.device;
                        ev.element.srq = qp->ibqp.srq;
                        ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       srq->ibsrq.event_handler(&ev,
-                                                srq->ibsrq.srq_context);
+                       handler(&ev, srq->ibsrq.srq_context);
                        goto bail;
                }
        }
-
-done:
        spin_unlock_irqrestore(&rq->lock, flags);
+
 bail:
        return ret;
 }
@@ -422,6 +470,15 @@ done:
                wake_up(&qp->wait);
 }
 
+static int want_buffer(struct ipath_devdata *dd)
+{
+       set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+                        dd->ipath_sendctrl);
+
+       return 0;
+}
+
 /**
  * ipath_no_bufs_available - tell the layer driver we need buffers
  * @qp: the QP that caused the problem
@@ -438,7 +495,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
                list_add_tail(&qp->piowait, &dev->piowait);
        spin_unlock_irqrestore(&dev->pending_lock, flags);
        /*
-        * Note that as soon as ipath_layer_want_buffer() is called and
+        * Note that as soon as want_buffer() is called and
         * possibly before it returns, ipath_ib_piobufavail()
         * could be called.  If we are still in the tasklet function,
         * tasklet_hi_schedule() will not call us until the next time
@@ -448,7 +505,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
         */
        clear_bit(IPATH_S_BUSY, &qp->s_flags);
        tasklet_unlock(&qp->s_task);
-       ipath_layer_want_buffer(dev->dd);
+       want_buffer(dev->dd);
        dev->n_piowait++;
 }
 
@@ -563,7 +620,7 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
        hdr->hop_limit = grh->hop_limit;
        /* The SGID is 32-bit aligned. */
        hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-       hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd);
+       hdr->sgid.global.interface_id = dev->dd->ipath_guid;
        hdr->dgid = grh->dgid;
 
        /* GRH header size in 32-bit words. */
@@ -595,8 +652,7 @@ void ipath_do_ruc_send(unsigned long data)
        if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
                goto bail;
 
-       if (unlikely(qp->remote_ah_attr.dlid ==
-                    ipath_layer_get_lid(dev->dd))) {
+       if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
                ipath_ruc_loopback(qp);
                goto clear;
        }
@@ -663,8 +719,8 @@ again:
        qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
        qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
                                       SIZE_OF_CRC);
-       qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
-       bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+       qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+       bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
        bth0 |= extra_bytes << 20;
        ohdr->bth[0] = cpu_to_be32(bth0);
        ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
index f760434660bd1e589d666f9d31ab09b6d51b9fa7..941e866d9517ba84edb6078db94565f5090f2429 100644 (file)
@@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
                           struct ib_recv_wr **bad_wr)
 {
        struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_ibdev *dev = to_idev(ibsrq->device);
+       struct ipath_rwq *wq;
        unsigned long flags;
        int ret;
 
        for (; wr; wr = wr->next) {
                struct ipath_rwqe *wqe;
                u32 next;
-               int i, j;
+               int i;
 
-               if (wr->num_sge > srq->rq.max_sge) {
+               if ((unsigned) wr->num_sge > srq->rq.max_sge) {
                        *bad_wr = wr;
                        ret = -ENOMEM;
                        goto bail;
                }
 
                spin_lock_irqsave(&srq->rq.lock, flags);
-               next = srq->rq.head + 1;
+               wq = srq->rq.wq;
+               next = wq->head + 1;
                if (next >= srq->rq.size)
                        next = 0;
-               if (next == srq->rq.tail) {
+               if (next == wq->tail) {
                        spin_unlock_irqrestore(&srq->rq.lock, flags);
                        *bad_wr = wr;
                        ret = -ENOMEM;
                        goto bail;
                }
 
-               wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
+               wqe = get_rwqe_ptr(&srq->rq, wq->head);
                wqe->wr_id = wr->wr_id;
-               wqe->sg_list[0].mr = NULL;
-               wqe->sg_list[0].vaddr = NULL;
-               wqe->sg_list[0].length = 0;
-               wqe->sg_list[0].sge_length = 0;
-               wqe->length = 0;
-               for (i = 0, j = 0; i < wr->num_sge; i++) {
-                       /* Check LKEY */
-                       if (to_ipd(srq->ibsrq.pd)->user &&
-                           wr->sg_list[i].lkey == 0) {
-                               spin_unlock_irqrestore(&srq->rq.lock,
-                                                      flags);
-                               *bad_wr = wr;
-                               ret = -EINVAL;
-                               goto bail;
-                       }
-                       if (wr->sg_list[i].length == 0)
-                               continue;
-                       if (!ipath_lkey_ok(&dev->lk_table,
-                                          &wqe->sg_list[j],
-                                          &wr->sg_list[i],
-                                          IB_ACCESS_LOCAL_WRITE)) {
-                               spin_unlock_irqrestore(&srq->rq.lock,
-                                                      flags);
-                               *bad_wr = wr;
-                               ret = -EINVAL;
-                               goto bail;
-                       }
-                       wqe->length += wr->sg_list[i].length;
-                       j++;
-               }
-               wqe->num_sge = j;
-               srq->rq.head = next;
+               wqe->num_sge = wr->num_sge;
+               for (i = 0; i < wr->num_sge; i++)
+                       wqe->sg_list[i] = wr->sg_list[i];
+               wq->head = next;
                spin_unlock_irqrestore(&srq->rq.lock, flags);
        }
        ret = 0;
@@ -133,53 +106,95 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
 
        if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
                ret = ERR_PTR(-ENOMEM);
-               goto bail;
+               goto done;
        }
 
        if (srq_init_attr->attr.max_wr == 0) {
                ret = ERR_PTR(-EINVAL);
-               goto bail;
+               goto done;
        }
 
        if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
            (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
                ret = ERR_PTR(-EINVAL);
-               goto bail;
+               goto done;
        }
 
        srq = kmalloc(sizeof(*srq), GFP_KERNEL);
        if (!srq) {
                ret = ERR_PTR(-ENOMEM);
-               goto bail;
+               goto done;
        }
 
        /*
         * Need to use vmalloc() if we want to support large #s of entries.
         */
        srq->rq.size = srq_init_attr->attr.max_wr + 1;
-       sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
+       srq->rq.max_sge = srq_init_attr->attr.max_sge;
+       sz = sizeof(struct ib_sge) * srq->rq.max_sge +
                sizeof(struct ipath_rwqe);
-       srq->rq.wq = vmalloc(srq->rq.size * sz);
+       srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
        if (!srq->rq.wq) {
-               kfree(srq);
                ret = ERR_PTR(-ENOMEM);
-               goto bail;
+               goto bail_srq;
        }
 
+       /*
+        * Return the address of the RWQ as the offset to mmap.
+        * See ipath_mmap() for details.
+        */
+       if (udata && udata->outlen >= sizeof(__u64)) {
+               struct ipath_mmap_info *ip;
+               __u64 offset = (__u64) srq->rq.wq;
+               int err;
+
+               err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+               if (err) {
+                       ret = ERR_PTR(err);
+                       goto bail_wq;
+               }
+
+               /* Allocate info for ipath_mmap(). */
+               ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+               if (!ip) {
+                       ret = ERR_PTR(-ENOMEM);
+                       goto bail_wq;
+               }
+               srq->ip = ip;
+               ip->context = ibpd->uobject->context;
+               ip->obj = srq->rq.wq;
+               kref_init(&ip->ref);
+               ip->mmap_cnt = 0;
+               ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+                                     srq->rq.size * sz);
+               spin_lock_irq(&dev->pending_lock);
+               ip->next = dev->pending_mmaps;
+               dev->pending_mmaps = ip;
+               spin_unlock_irq(&dev->pending_lock);
+       } else
+               srq->ip = NULL;
+
        /*
         * ib_create_srq() will initialize srq->ibsrq.
         */
        spin_lock_init(&srq->rq.lock);
-       srq->rq.head = 0;
-       srq->rq.tail = 0;
+       srq->rq.wq->head = 0;
+       srq->rq.wq->tail = 0;
        srq->rq.max_sge = srq_init_attr->attr.max_sge;
        srq->limit = srq_init_attr->attr.srq_limit;
 
+       dev->n_srqs_allocated++;
+
        ret = &srq->ibsrq;
+       goto done;
 
-       dev->n_srqs_allocated++;
+bail_wq:
+       vfree(srq->rq.wq);
 
-bail:
+bail_srq:
+       kfree(srq);
+
+done:
        return ret;
 }
 
@@ -188,83 +203,130 @@ bail:
  * @ibsrq: the SRQ to modify
  * @attr: the new attributes of the SRQ
  * @attr_mask: indicates which attributes to modify
+ * @udata: user data for ipathverbs.so
  */
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask)
+                    enum ib_srq_attr_mask attr_mask,
+                    struct ib_udata *udata)
 {
        struct ipath_srq *srq = to_isrq(ibsrq);
-       unsigned long flags;
-       int ret;
+       int ret = 0;
 
-       if (attr_mask & IB_SRQ_MAX_WR)
-               if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-                   (attr->max_sge > srq->rq.max_sge)) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
+       if (attr_mask & IB_SRQ_MAX_WR) {
+               struct ipath_rwq *owq;
+               struct ipath_rwq *wq;
+               struct ipath_rwqe *p;
+               u32 sz, size, n, head, tail;
 
-       if (attr_mask & IB_SRQ_LIMIT)
-               if (attr->srq_limit >= srq->rq.size) {
+               /* Check that the requested sizes are below the limits. */
+               if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
+                   ((attr_mask & IB_SRQ_LIMIT) ?
+                    attr->srq_limit : srq->limit) > attr->max_wr) {
                        ret = -EINVAL;
                        goto bail;
                }
 
-       if (attr_mask & IB_SRQ_MAX_WR) {
-               struct ipath_rwqe *wq, *p;
-               u32 sz, size, n;
-
                sz = sizeof(struct ipath_rwqe) +
-                       attr->max_sge * sizeof(struct ipath_sge);
+                       srq->rq.max_sge * sizeof(struct ib_sge);
                size = attr->max_wr + 1;
-               wq = vmalloc(size * sz);
+               wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
                if (!wq) {
                        ret = -ENOMEM;
                        goto bail;
                }
 
-               spin_lock_irqsave(&srq->rq.lock, flags);
-               if (srq->rq.head < srq->rq.tail)
-                       n = srq->rq.size + srq->rq.head - srq->rq.tail;
+               /*
+                * Return the address of the RWQ as the offset to mmap.
+                * See ipath_mmap() for details.
+                */
+               if (udata && udata->inlen >= sizeof(__u64)) {
+                       __u64 offset_addr;
+                       __u64 offset = (__u64) wq;
+
+                       ret = ib_copy_from_udata(&offset_addr, udata,
+                                                sizeof(offset_addr));
+                       if (ret) {
+                               vfree(wq);
+                               goto bail;
+                       }
+                       udata->outbuf = (void __user *) offset_addr;
+                       ret = ib_copy_to_udata(udata, &offset,
+                                              sizeof(offset));
+                       if (ret) {
+                               vfree(wq);
+                               goto bail;
+                       }
+               }
+
+               spin_lock_irq(&srq->rq.lock);
+               /*
+                * validate head pointer value and compute
+                * the number of remaining WQEs.
+                */
+               owq = srq->rq.wq;
+               head = owq->head;
+               if (head >= srq->rq.size)
+                       head = 0;
+               tail = owq->tail;
+               if (tail >= srq->rq.size)
+                       tail = 0;
+               n = head;
+               if (n < tail)
+                       n += srq->rq.size - tail;
                else
-                       n = srq->rq.head - srq->rq.tail;
-               if (size <= n || size <= srq->limit) {
-                       spin_unlock_irqrestore(&srq->rq.lock, flags);
+                       n -= tail;
+               if (size <= n) {
+                       spin_unlock_irq(&srq->rq.lock);
                        vfree(wq);
                        ret = -EINVAL;
                        goto bail;
                }
                n = 0;
-               p = wq;
-               while (srq->rq.tail != srq->rq.head) {
+               p = wq->wq;
+               while (tail != head) {
                        struct ipath_rwqe *wqe;
                        int i;
 
-                       wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
+                       wqe = get_rwqe_ptr(&srq->rq, tail);
                        p->wr_id = wqe->wr_id;
-                       p->length = wqe->length;
                        p->num_sge = wqe->num_sge;
                        for (i = 0; i < wqe->num_sge; i++)
                                p->sg_list[i] = wqe->sg_list[i];
                        n++;
                        p = (struct ipath_rwqe *)((char *) p + sz);
-                       if (++srq->rq.tail >= srq->rq.size)
-                               srq->rq.tail = 0;
+                       if (++tail >= srq->rq.size)
+                               tail = 0;
                }
-               vfree(srq->rq.wq);
                srq->rq.wq = wq;
                srq->rq.size = size;
-               srq->rq.head = n;
-               srq->rq.tail = 0;
-               srq->rq.max_sge = attr->max_sge;
-               spin_unlock_irqrestore(&srq->rq.lock, flags);
-       }
-
-       if (attr_mask & IB_SRQ_LIMIT) {
-               spin_lock_irqsave(&srq->rq.lock, flags);
-               srq->limit = attr->srq_limit;
-               spin_unlock_irqrestore(&srq->rq.lock, flags);
+               wq->head = n;
+               wq->tail = 0;
+               if (attr_mask & IB_SRQ_LIMIT)
+                       srq->limit = attr->srq_limit;
+               spin_unlock_irq(&srq->rq.lock);
+
+               vfree(owq);
+
+               if (srq->ip) {
+                       struct ipath_mmap_info *ip = srq->ip;
+                       struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+
+                       ip->obj = wq;
+                       ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+                                             size * sz);
+                       spin_lock_irq(&dev->pending_lock);
+                       ip->next = dev->pending_mmaps;
+                       dev->pending_mmaps = ip;
+                       spin_unlock_irq(&dev->pending_lock);
+               }
+       } else if (attr_mask & IB_SRQ_LIMIT) {
+               spin_lock_irq(&srq->rq.lock);
+               if (attr->srq_limit >= srq->rq.size)
+                       ret = -EINVAL;
+               else
+                       srq->limit = attr->srq_limit;
+               spin_unlock_irq(&srq->rq.lock);
        }
-       ret = 0;
 
 bail:
        return ret;
index 70351b7e35c0a8603247141c972edb47b3477ca4..30a825928fcf3e9791cb1fe17d923571331d0559 100644 (file)
@@ -271,33 +271,6 @@ void ipath_get_faststats(unsigned long opaque)
                }
        }
 
-       if (dd->ipath_nosma_bufs) {
-               dd->ipath_nosma_secs += 5;
-               if (dd->ipath_nosma_secs >= 30) {
-                       ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
-                                  "cancelling pending sends\n",
-                                  dd->ipath_nosma_secs);
-                       /*
-                        * issue an abort as well, in case we have a packet
-                        * stuck in launch fifo.  This could corrupt an
-                        * outgoing user packet in the worst case,
-                        * but this is a pretty catastrophic, anyway.
-                        */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                        INFINIPATH_S_ABORT);
-                       ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
-                                            dd->ipath_piobcnt2k +
-                                            dd->ipath_piobcnt4k -
-                                            dd->ipath_lastport_piobuf);
-                       /* start again, if necessary */
-                       dd->ipath_nosma_secs = 0;
-               } else
-                       ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
-                                  "after %u seconds\n",
-                                  dd->ipath_nosma_bufs,
-                                  dd->ipath_nosma_secs);
-       }
-
 done:
        mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
 }
index b98821d7801d9be3b758825b11e50fd398a2dbe0..e299148c4b68ec4ae793f0805a339d3c75bbb081 100644 (file)
@@ -35,7 +35,6 @@
 #include <linux/pci.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 /**
@@ -76,7 +75,7 @@ bail:
 static ssize_t show_version(struct device_driver *dev, char *buf)
 {
        /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version);
+       return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
 }
 
 static ssize_t show_num_units(struct device_driver *dev, char *buf)
@@ -108,8 +107,8 @@ static const char *ipath_status_str[] = {
        "Initted",
        "Disabled",
        "Admin_Disabled",
-       "OIB_SMA",
-       "SMA",
+       "", /* This used to be the old "OIB_SMA" status. */
+       "", /* This used to be the old "SMA" status. */
        "Present",
        "IB_link_up",
        "IB_configured",
@@ -227,7 +226,6 @@ static ssize_t store_mlid(struct device *dev,
        unit = dd->ipath_unit;
 
        dd->ipath_mlid = mlid;
-       ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST);
 
        goto bail;
 invalid:
@@ -467,7 +465,7 @@ static ssize_t store_link_state(struct device *dev,
        if (ret < 0)
                goto invalid;
 
-       r = ipath_layer_set_linkstate(dd, state);
+       r = ipath_set_linkstate(dd, state);
        if (r < 0) {
                ret = r;
                goto bail;
@@ -502,7 +500,7 @@ static ssize_t store_mtu(struct device *dev,
        if (ret < 0)
                goto invalid;
 
-       r = ipath_layer_set_mtu(dd, mtu);
+       r = ipath_set_mtu(dd, mtu);
        if (r < 0)
                ret = r;
 
@@ -563,6 +561,33 @@ bail:
        return ret;
 }
 
+static ssize_t store_rx_pol_inv(struct device *dev,
+                         struct device_attribute *attr,
+                         const char *buf,
+                         size_t count)
+{
+       struct ipath_devdata *dd = dev_get_drvdata(dev);
+       int ret, r;
+       u16 val;
+
+       ret = ipath_parse_ushort(buf, &val);
+       if (ret < 0)
+               goto invalid;
+
+       r = ipath_set_rx_pol_inv(dd, val);
+       if (r < 0) {
+               ret = r;
+               goto bail;
+       }
+
+       goto bail;
+invalid:
+       ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
+bail:
+       return ret;
+}
+
+
 static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
 static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
 
@@ -589,6 +614,7 @@ static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
 static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
+static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
 
 static struct attribute *dev_attributes[] = {
        &dev_attr_guid.attr,
@@ -603,6 +629,7 @@ static struct attribute *dev_attributes[] = {
        &dev_attr_boardversion.attr,
        &dev_attr_unit.attr,
        &dev_attr_enabled.attr,
+       &dev_attr_rx_pol_inv.attr,
        NULL
 };
 
index c33abea2d5a7c72de0c3f56638e64a6ed7428def..0fd3cded16baffab97b78934c990ae5a0944f194 100644 (file)
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_UC_##x
@@ -261,8 +261,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                 * size to 56 bytes so the last 4 bytes of
                 * the BTH header (PSN) is in the data buffer.
                 */
-               header_in_data =
-                       ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
                if (header_in_data) {
                        psn = be32_to_cpu(((__be32 *) data)[0]);
                        data += sizeof(__be32);
index 3466129af8043d6538399bf420ceac9a6e174ad8..6991d1d74e3cebab1ed461108b384e37cd746848 100644 (file)
 #include <rdma/ib_smi.h>
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
+
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+                   u32 *lengthp, struct ipath_sge_state *ss)
+{
+       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+       int user = to_ipd(qp->ibqp.pd)->user;
+       int i, j, ret;
+       struct ib_wc wc;
+
+       *lengthp = 0;
+       for (i = j = 0; i < wqe->num_sge; i++) {
+               if (wqe->sg_list[i].length == 0)
+                       continue;
+               /* Check LKEY */
+               if ((user && wqe->sg_list[i].lkey == 0) ||
+                   !ipath_lkey_ok(&dev->lk_table,
+                                  j ? &ss->sg_list[j - 1] : &ss->sge,
+                                  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+                       goto bad_lkey;
+               *lengthp += wqe->sg_list[i].length;
+               j++;
+       }
+       ss->num_sge = j;
+       ret = 1;
+       goto bail;
+
+bad_lkey:
+       wc.wr_id = wqe->wr_id;
+       wc.status = IB_WC_LOC_PROT_ERR;
+       wc.opcode = IB_WC_RECV;
+       wc.vendor_err = 0;
+       wc.byte_len = 0;
+       wc.imm_data = 0;
+       wc.qp_num = qp->ibqp.qp_num;
+       wc.src_qp = 0;
+       wc.wc_flags = 0;
+       wc.pkey_index = 0;
+       wc.slid = 0;
+       wc.sl = 0;
+       wc.dlid_path_bits = 0;
+       wc.port_num = 0;
+       /* Signal solicited completion event. */
+       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+       ret = 0;
+bail:
+       return ret;
+}
 
 /**
  * ipath_ud_loopback - handle send on loopback QPs
@@ -46,6 +93,8 @@
  *
  * This is called from ipath_post_ud_send() to forward a WQE addressed
  * to the same HCA.
+ * Note that the receive interrupt handler may be calling ipath_ud_rcv()
+ * while this is being called.
  */
 static void ipath_ud_loopback(struct ipath_qp *sqp,
                              struct ipath_sge_state *ss,
@@ -60,7 +109,11 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
        struct ipath_srq *srq;
        struct ipath_sge_state rsge;
        struct ipath_sge *sge;
+       struct ipath_rwq *wq;
        struct ipath_rwqe *wqe;
+       void (*handler)(struct ib_event *, void *);
+       u32 tail;
+       u32 rlen;
 
        qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
        if (!qp)
@@ -94,6 +147,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
                wc->imm_data = 0;
        }
 
+       if (wr->num_sge > 1) {
+               rsge.sg_list = kmalloc((wr->num_sge - 1) *
+                                       sizeof(struct ipath_sge),
+                                      GFP_ATOMIC);
+       } else
+               rsge.sg_list = NULL;
+
        /*
         * Get the next work request entry to find where to put the data.
         * Note that it is safe to drop the lock after changing rq->tail
@@ -101,37 +161,52 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
         */
        if (qp->ibqp.srq) {
                srq = to_isrq(qp->ibqp.srq);
+               handler = srq->ibsrq.event_handler;
                rq = &srq->rq;
        } else {
                srq = NULL;
+               handler = NULL;
                rq = &qp->r_rq;
        }
+
        spin_lock_irqsave(&rq->lock, flags);
-       if (rq->tail == rq->head) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto done;
+       wq = rq->wq;
+       tail = wq->tail;
+       while (1) {
+               if (unlikely(tail == wq->head)) {
+                       spin_unlock_irqrestore(&rq->lock, flags);
+                       dev->n_pkt_drops++;
+                       goto bail_sge;
+               }
+               wqe = get_rwqe_ptr(rq, tail);
+               if (++tail >= rq->size)
+                       tail = 0;
+               if (init_sge(qp, wqe, &rlen, &rsge))
+                       break;
+               wq->tail = tail;
        }
        /* Silently drop packets which are too big. */
-       wqe = get_rwqe_ptr(rq, rq->tail);
-       if (wc->byte_len > wqe->length) {
+       if (wc->byte_len > rlen) {
                spin_unlock_irqrestore(&rq->lock, flags);
                dev->n_pkt_drops++;
-               goto done;
+               goto bail_sge;
        }
+       wq->tail = tail;
        wc->wr_id = wqe->wr_id;
-       rsge.sge = wqe->sg_list[0];
-       rsge.sg_list = wqe->sg_list + 1;
-       rsge.num_sge = wqe->num_sge;
-       if (++rq->tail >= rq->size)
-               rq->tail = 0;
-       if (srq && srq->ibsrq.event_handler) {
+       if (handler) {
                u32 n;
 
-               if (rq->head < rq->tail)
-                       n = rq->size + rq->head - rq->tail;
+               /*
+                * validate head pointer value and compute
+                * the number of remaining WQEs.
+                */
+               n = wq->head;
+               if (n >= rq->size)
+                       n = 0;
+               if (n < tail)
+                       n += rq->size - tail;
                else
-                       n = rq->head - rq->tail;
+                       n -= tail;
                if (n < srq->limit) {
                        struct ib_event ev;
 
@@ -140,12 +215,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
                        ev.device = qp->ibqp.device;
                        ev.element.srq = qp->ibqp.srq;
                        ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       srq->ibsrq.event_handler(&ev,
-                                                srq->ibsrq.srq_context);
+                       handler(&ev, srq->ibsrq.srq_context);
                } else
                        spin_unlock_irqrestore(&rq->lock, flags);
        } else
                spin_unlock_irqrestore(&rq->lock, flags);
+
        ah_attr = &to_iah(wr->wr.ud.ah)->attr;
        if (ah_attr->ah_flags & IB_AH_GRH) {
                ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
@@ -186,7 +261,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
        wc->src_qp = sqp->ibqp.qp_num;
        /* XXX do we know which pkey matched? Only needed for GSI. */
        wc->pkey_index = 0;
-       wc->slid = ipath_layer_get_lid(dev->dd) |
+       wc->slid = dev->dd->ipath_lid |
                (ah_attr->src_path_bits &
                 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
        wc->sl = ah_attr->sl;
@@ -196,6 +271,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
        ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
                       wr->send_flags & IB_SEND_SOLICITED);
 
+bail_sge:
+       kfree(rsge.sg_list);
 done:
        if (atomic_dec_and_test(&qp->refcount))
                wake_up(&qp->wait);
@@ -276,7 +353,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
                ss.num_sge++;
        }
        /* Check for invalid packet size. */
-       if (len > ipath_layer_get_ibmtu(dev->dd)) {
+       if (len > dev->dd->ipath_ibmtu) {
                ret = -EINVAL;
                goto bail;
        }
@@ -298,7 +375,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
                dev->n_unicast_xmit++;
                lid = ah_attr->dlid &
                        ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
-               if (unlikely(lid == ipath_layer_get_lid(dev->dd))) {
+               if (unlikely(lid == dev->dd->ipath_lid)) {
                        /*
                         * Pass in an uninitialized ib_wc to save stack
                         * space.
@@ -327,7 +404,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
                qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
                        dev->gid_prefix;
                qp->s_hdr.u.l.grh.sgid.global.interface_id =
-                       ipath_layer_get_guid(dev->dd);
+                       dev->dd->ipath_guid;
                qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
                /*
                 * Don't worry about sending to locally attached multicast
@@ -357,7 +434,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
        qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
        qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
        qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
-       lid = ipath_layer_get_lid(dev->dd);
+       lid = dev->dd->ipath_lid;
        if (lid) {
                lid |= ah_attr->src_path_bits &
                        ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
@@ -368,7 +445,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
                bth0 |= 1 << 23;
        bth0 |= extra_bytes << 20;
        bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
-               ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+               ipath_get_pkey(dev->dd, qp->s_pkey_index);
        ohdr->bth[0] = cpu_to_be32(bth0);
        /*
         * Use the multicast QP if the destination LID is a multicast LID.
@@ -433,13 +510,9 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
        int opcode;
        u32 hdrsize;
        u32 pad;
-       unsigned long flags;
        struct ib_wc wc;
        u32 qkey;
        u32 src_qp;
-       struct ipath_rq *rq;
-       struct ipath_srq *srq;
-       struct ipath_rwqe *wqe;
        u16 dlid;
        int header_in_data;
 
@@ -458,8 +531,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                 * the eager header buffer size to 56 bytes so the last 12
                 * bytes of the IB header is in the data buffer.
                 */
-               header_in_data =
-                       ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
                if (header_in_data) {
                        qkey = be32_to_cpu(((__be32 *) data)[1]);
                        src_qp = be32_to_cpu(((__be32 *) data)[2]);
@@ -547,19 +619,10 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 
        /*
         * Get the next work request entry to find where to put the data.
-        * Note that it is safe to drop the lock after changing rq->tail
-        * since ipath_post_receive() won't fill the empty slot.
         */
-       if (qp->ibqp.srq) {
-               srq = to_isrq(qp->ibqp.srq);
-               rq = &srq->rq;
-       } else {
-               srq = NULL;
-               rq = &qp->r_rq;
-       }
-       spin_lock_irqsave(&rq->lock, flags);
-       if (rq->tail == rq->head) {
-               spin_unlock_irqrestore(&rq->lock, flags);
+       if (qp->r_reuse_sge)
+               qp->r_reuse_sge = 0;
+       else if (!ipath_get_rwqe(qp, 0)) {
                /*
                 * Count VL15 packets dropped due to no receive buffer.
                 * Otherwise, count them as buffer overruns since usually,
@@ -573,39 +636,11 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                goto bail;
        }
        /* Silently drop packets which are too big. */
-       wqe = get_rwqe_ptr(rq, rq->tail);
-       if (wc.byte_len > wqe->length) {
-               spin_unlock_irqrestore(&rq->lock, flags);
+       if (wc.byte_len > qp->r_len) {
+               qp->r_reuse_sge = 1;
                dev->n_pkt_drops++;
                goto bail;
        }
-       wc.wr_id = wqe->wr_id;
-       qp->r_sge.sge = wqe->sg_list[0];
-       qp->r_sge.sg_list = wqe->sg_list + 1;
-       qp->r_sge.num_sge = wqe->num_sge;
-       if (++rq->tail >= rq->size)
-               rq->tail = 0;
-       if (srq && srq->ibsrq.event_handler) {
-               u32 n;
-
-               if (rq->head < rq->tail)
-                       n = rq->size + rq->head - rq->tail;
-               else
-                       n = rq->head - rq->tail;
-               if (n < srq->limit) {
-                       struct ib_event ev;
-
-                       srq->limit = 0;
-                       spin_unlock_irqrestore(&rq->lock, flags);
-                       ev.device = qp->ibqp.device;
-                       ev.element.srq = qp->ibqp.srq;
-                       ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       srq->ibsrq.event_handler(&ev,
-                                                srq->ibsrq.srq_context);
-               } else
-                       spin_unlock_irqrestore(&rq->lock, flags);
-       } else
-               spin_unlock_irqrestore(&rq->lock, flags);
        if (has_grh) {
                ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
                               sizeof(struct ib_grh));
@@ -614,6 +649,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
        ipath_copy_sge(&qp->r_sge, data,
                       wc.byte_len - sizeof(struct ib_grh));
+       wc.wr_id = qp->r_wr_id;
        wc.status = IB_WC_SUCCESS;
        wc.opcode = IB_WC_RECV;
        wc.vendor_err = 0;
index d70a9b6b523977f7900961431c6e4bc75934364b..b8381c5e72bd689d7790ec043467f6d0c2d0afb0 100644 (file)
 
 #include <rdma/ib_mad.h>
 #include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
 #include <linux/utsname.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
-/* Not static, because we don't want the compiler removing it */
-const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
-
 static unsigned int ib_ipath_qp_table_size = 251;
 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
@@ -52,10 +50,6 @@ module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
 MODULE_PARM_DESC(lkey_table_size,
                 "LKEY table size in bits (2^n, 1 <= n <= 23)");
 
-unsigned int ib_ipath_debug;   /* debug mask */
-module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "Verbs debug mask");
-
 static unsigned int ib_ipath_max_pds = 0xFFFF;
 module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_pds,
@@ -79,6 +73,10 @@ module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
                   S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
 
+unsigned int ib_ipath_max_qps = 16384;
+module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
 unsigned int ib_ipath_max_sges = 0x60;
 module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
@@ -109,9 +107,9 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
                   uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@pathscale.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
+static unsigned int ib_ipath_disable_sma;
+module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA");
 
 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
        [IB_QPS_RESET] = 0,
@@ -125,6 +123,16 @@ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
        [IB_QPS_ERR] = 0,
 };
 
+struct ipath_ucontext {
+       struct ib_ucontext ibucontext;
+};
+
+static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
+                                                 *ibucontext)
+{
+       return container_of(ibucontext, struct ipath_ucontext, ibucontext);
+}
+
 /*
  * Translate ib_wr_opcode into ib_wc_opcode.
  */
@@ -277,11 +285,12 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                              struct ib_recv_wr **bad_wr)
 {
        struct ipath_qp *qp = to_iqp(ibqp);
+       struct ipath_rwq *wq = qp->r_rq.wq;
        unsigned long flags;
        int ret;
 
        /* Check that state is OK to post receive. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
+       if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
                *bad_wr = wr;
                ret = -EINVAL;
                goto bail;
@@ -290,59 +299,31 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
        for (; wr; wr = wr->next) {
                struct ipath_rwqe *wqe;
                u32 next;
-               int i, j;
+               int i;
 
-               if (wr->num_sge > qp->r_rq.max_sge) {
+               if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
                        *bad_wr = wr;
                        ret = -ENOMEM;
                        goto bail;
                }
 
                spin_lock_irqsave(&qp->r_rq.lock, flags);
-               next = qp->r_rq.head + 1;
+               next = wq->head + 1;
                if (next >= qp->r_rq.size)
                        next = 0;
-               if (next == qp->r_rq.tail) {
+               if (next == wq->tail) {
                        spin_unlock_irqrestore(&qp->r_rq.lock, flags);
                        *bad_wr = wr;
                        ret = -ENOMEM;
                        goto bail;
                }
 
-               wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
+               wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
                wqe->wr_id = wr->wr_id;
-               wqe->sg_list[0].mr = NULL;
-               wqe->sg_list[0].vaddr = NULL;
-               wqe->sg_list[0].length = 0;
-               wqe->sg_list[0].sge_length = 0;
-               wqe->length = 0;
-               for (i = 0, j = 0; i < wr->num_sge; i++) {
-                       /* Check LKEY */
-                       if (to_ipd(qp->ibqp.pd)->user &&
-                           wr->sg_list[i].lkey == 0) {
-                               spin_unlock_irqrestore(&qp->r_rq.lock,
-                                                      flags);
-                               *bad_wr = wr;
-                               ret = -EINVAL;
-                               goto bail;
-                       }
-                       if (wr->sg_list[i].length == 0)
-                               continue;
-                       if (!ipath_lkey_ok(
-                                   &to_idev(qp->ibqp.device)->lk_table,
-                                   &wqe->sg_list[j], &wr->sg_list[i],
-                                   IB_ACCESS_LOCAL_WRITE)) {
-                               spin_unlock_irqrestore(&qp->r_rq.lock,
-                                                      flags);
-                               *bad_wr = wr;
-                               ret = -EINVAL;
-                               goto bail;
-                       }
-                       wqe->length += wr->sg_list[i].length;
-                       j++;
-               }
-               wqe->num_sge = j;
-               qp->r_rq.head = next;
+               wqe->num_sge = wr->num_sge;
+               for (i = 0; i < wr->num_sge; i++)
+                       wqe->sg_list[i] = wr->sg_list[i];
+               wq->head = next;
                spin_unlock_irqrestore(&qp->r_rq.lock, flags);
        }
        ret = 0;
@@ -377,6 +358,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
        switch (qp->ibqp.qp_type) {
        case IB_QPT_SMI:
        case IB_QPT_GSI:
+               if (ib_ipath_disable_sma)
+                       break;
+               /* FALLTHROUGH */
        case IB_QPT_UD:
                ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
                break;
@@ -395,7 +379,7 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
 }
 
 /**
- * ipath_ib_rcv - process and incoming packet
+ * ipath_ib_rcv - process an incoming packet
  * @arg: the device pointer
  * @rhdr: the header of the packet
  * @data: the packet data
@@ -404,9 +388,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
  * This is called from ipath_kreceive() to process an incoming packet at
  * interrupt level. Tlen is the length of the header + data + CRC in bytes.
  */
-static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
+void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
+                 u32 tlen)
 {
-       struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
        struct ipath_ib_header *hdr = rhdr;
        struct ipath_other_headers *ohdr;
        struct ipath_qp *qp;
@@ -427,7 +411,7 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
        lid = be16_to_cpu(hdr->lrh[1]);
        if (lid < IPATH_MULTICAST_LID_BASE) {
                lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
-               if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
+               if (unlikely(lid != dev->dd->ipath_lid)) {
                        dev->rcv_errors++;
                        goto bail;
                }
@@ -495,9 +479,8 @@ bail:;
  * This is called from ipath_do_rcv_timer() at interrupt level to check for
  * QPs which need retransmits and to collect performance numbers.
  */
-static void ipath_ib_timer(void *arg)
+void ipath_ib_timer(struct ipath_ibdev *dev)
 {
-       struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
        struct ipath_qp *resend = NULL;
        struct list_head *last;
        struct ipath_qp *qp;
@@ -539,19 +522,19 @@ static void ipath_ib_timer(void *arg)
        if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
            --dev->pma_sample_start == 0) {
                dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
-               ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
-                                             &dev->ipath_rword,
-                                             &dev->ipath_spkts,
-                                             &dev->ipath_rpkts,
-                                             &dev->ipath_xmit_wait);
+               ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
+                                       &dev->ipath_rword,
+                                       &dev->ipath_spkts,
+                                       &dev->ipath_rpkts,
+                                       &dev->ipath_xmit_wait);
        }
        if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
                if (dev->pma_sample_interval == 0) {
                        u64 ta, tb, tc, td, te;
 
                        dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
-                       ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
-                                                     &tc, &td, &te);
+                       ipath_snapshot_counters(dev->dd, &ta, &tb,
+                                               &tc, &td, &te);
 
                        dev->ipath_sword = ta - dev->ipath_sword;
                        dev->ipath_rword = tb - dev->ipath_rword;
@@ -581,6 +564,362 @@ static void ipath_ib_timer(void *arg)
        }
 }
 
+static void update_sge(struct ipath_sge_state *ss, u32 length)
+{
+       struct ipath_sge *sge = &ss->sge;
+
+       sge->vaddr += length;
+       sge->length -= length;
+       sge->sge_length -= length;
+       if (sge->sge_length == 0) {
+               if (--ss->num_sge)
+                       *sge = *ss->sg_list++;
+       } else if (sge->length == 0 && sge->mr != NULL) {
+               if (++sge->n >= IPATH_SEGSZ) {
+                       if (++sge->m >= sge->mr->mapsz)
+                               return;
+                       sge->n = 0;
+               }
+               sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+               sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+       }
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+       return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+       return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+       data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+       data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+       return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+       return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+       return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+       data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+       data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+       return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
+                   u32 length)
+{
+       u32 extra = 0;
+       u32 data = 0;
+       u32 last;
+
+       while (1) {
+               u32 len = ss->sge.length;
+               u32 off;
+
+               BUG_ON(len == 0);
+               if (len > length)
+                       len = length;
+               if (len > ss->sge.sge_length)
+                       len = ss->sge.sge_length;
+               /* If the source address is not aligned, try to align it. */
+               off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+               if (off) {
+                       u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+                                           ~(sizeof(u32) - 1));
+                       u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+                       u32 y;
+
+                       y = sizeof(u32) - off;
+                       if (len > y)
+                               len = y;
+                       if (len + extra >= sizeof(u32)) {
+                               data |= set_upper_bits(v, extra *
+                                                      BITS_PER_BYTE);
+                               len = sizeof(u32) - extra;
+                               if (len == length) {
+                                       last = data;
+                                       break;
+                               }
+                               __raw_writel(data, piobuf);
+                               piobuf++;
+                               extra = 0;
+                               data = 0;
+                       } else {
+                               /* Clear unused upper bytes */
+                               data |= clear_upper_bytes(v, len, extra);
+                               if (len == length) {
+                                       last = data;
+                                       break;
+                               }
+                               extra += len;
+                       }
+               } else if (extra) {
+                       /* Source address is aligned. */
+                       u32 *addr = (u32 *) ss->sge.vaddr;
+                       int shift = extra * BITS_PER_BYTE;
+                       int ushift = 32 - shift;
+                       u32 l = len;
+
+                       while (l >= sizeof(u32)) {
+                               u32 v = *addr;
+
+                               data |= set_upper_bits(v, shift);
+                               __raw_writel(data, piobuf);
+                               data = get_upper_bits(v, ushift);
+                               piobuf++;
+                               addr++;
+                               l -= sizeof(u32);
+                       }
+                       /*
+                        * We still have 'extra' number of bytes leftover.
+                        */
+                       if (l) {
+                               u32 v = *addr;
+
+                               if (l + extra >= sizeof(u32)) {
+                                       data |= set_upper_bits(v, shift);
+                                       len -= l + extra - sizeof(u32);
+                                       if (len == length) {
+                                               last = data;
+                                               break;
+                                       }
+                                       __raw_writel(data, piobuf);
+                                       piobuf++;
+                                       extra = 0;
+                                       data = 0;
+                               } else {
+                                       /* Clear unused upper bytes */
+                                       data |= clear_upper_bytes(v, l,
+                                                                 extra);
+                                       if (len == length) {
+                                               last = data;
+                                               break;
+                                       }
+                                       extra += l;
+                               }
+                       } else if (len == length) {
+                               last = data;
+                               break;
+                       }
+               } else if (len == length) {
+                       u32 w;
+
+                       /*
+                        * Need to round up for the last dword in the
+                        * packet.
+                        */
+                       w = (len + 3) >> 2;
+                       __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
+                       piobuf += w - 1;
+                       last = ((u32 *) ss->sge.vaddr)[w - 1];
+                       break;
+               } else {
+                       u32 w = len >> 2;
+
+                       __iowrite32_copy(piobuf, ss->sge.vaddr, w);
+                       piobuf += w;
+
+                       extra = len & (sizeof(u32) - 1);
+                       if (extra) {
+                               u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+                               /* Clear unused upper bytes */
+                               data = clear_upper_bytes(v, extra, 0);
+                       }
+               }
+               update_sge(ss, len);
+               length -= len;
+       }
+       /* Update address before sending packet. */
+       update_sge(ss, length);
+       /* must flush early everything before trigger word */
+       ipath_flush_wc();
+       __raw_writel(last, piobuf);
+       /* be sure trigger word is written */
+       ipath_flush_wc();
+}
+
+/**
+ * ipath_verbs_send - send a packet
+ * @dd: the infinipath device
+ * @hdrwords: the number of words in the header
+ * @hdr: the packet header
+ * @len: the length of the packet in bytes
+ * @ss: the SGE to send
+ */
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+                    u32 *hdr, u32 len, struct ipath_sge_state *ss)
+{
+       u32 __iomem *piobuf;
+       u32 plen;
+       int ret;
+
+       /* +1 is for the qword padding of pbc */
+       plen = hdrwords + ((len + 3) >> 2) + 1;
+       if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
+               ipath_dbg("packet len 0x%x too long, failing\n", plen);
+               ret = -EINVAL;
+               goto bail;
+       }
+
+       /* Get a PIO buffer to use. */
+       piobuf = ipath_getpiobuf(dd, NULL);
+       if (unlikely(piobuf == NULL)) {
+               ret = -EBUSY;
+               goto bail;
+       }
+
+       /*
+        * Write len to control qword, no flags.
+        * We have to flush after the PBC for correctness on some cpus
+        * or WC buffer can be written out of order.
+        */
+       writeq(plen, piobuf);
+       ipath_flush_wc();
+       piobuf += 2;
+       if (len == 0) {
+               /*
+                * If there is just the header portion, must flush before
+                * writing last word of header for correctness, and after
+                * the last header word (trigger word).
+                */
+               __iowrite32_copy(piobuf, hdr, hdrwords - 1);
+               ipath_flush_wc();
+               __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+               ipath_flush_wc();
+               ret = 0;
+               goto bail;
+       }
+
+       __iowrite32_copy(piobuf, hdr, hdrwords);
+       piobuf += hdrwords;
+
+       /* The common case is aligned and contained in one segment. */
+       if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+                  !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+               u32 w;
+               u32 *addr = (u32 *) ss->sge.vaddr;
+
+               /* Update address before sending packet. */
+               update_sge(ss, len);
+               /* Need to round up for the last dword in the packet. */
+               w = (len + 3) >> 2;
+               __iowrite32_copy(piobuf, addr, w - 1);
+               /* must flush early everything before trigger word */
+               ipath_flush_wc();
+               __raw_writel(addr[w - 1], piobuf + w - 1);
+               /* be sure trigger word is written */
+               ipath_flush_wc();
+               ret = 0;
+               goto bail;
+       }
+       copy_io(piobuf, ss, len);
+       ret = 0;
+
+bail:
+       return ret;
+}
+
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+                           u64 *rwords, u64 *spkts, u64 *rpkts,
+                           u64 *xmit_wait)
+{
+       int ret;
+
+       if (!(dd->ipath_flags & IPATH_INITTED)) {
+               /* no hardware, freeze, etc. */
+               ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+               ret = -EINVAL;
+               goto bail;
+       }
+       *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+       *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+       *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+       *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+       *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
+
+       ret = 0;
+
+bail:
+       return ret;
+}
+
+/**
+ * ipath_get_counters - get various chip counters
+ * @dd: the infinipath device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int ipath_get_counters(struct ipath_devdata *dd,
+                      struct ipath_verbs_counters *cntrs)
+{
+       int ret;
+
+       if (!(dd->ipath_flags & IPATH_INITTED)) {
+               /* no hardware, freeze, etc. */
+               ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+               ret = -EINVAL;
+               goto bail;
+       }
+       cntrs->symbol_error_counter =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+       cntrs->link_error_recovery_counter =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+       /*
+        * The link downed counter counts when the other side downs the
+        * connection.  We add in the number of times we downed the link
+        * due to local link integrity errors to compensate.
+        */
+       cntrs->link_downed_counter =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
+       cntrs->port_rcv_errors =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
+       cntrs->port_rcv_remphys_errors =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
+       cntrs->port_xmit_discards =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
+       cntrs->port_xmit_data =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+       cntrs->port_rcv_data =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+       cntrs->port_xmit_packets =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+       cntrs->port_rcv_packets =
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+       cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
+       cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
+
+       ret = 0;
+
+bail:
+       return ret;
+}
+
 /**
  * ipath_ib_piobufavail - callback when a PIO buffer is available
  * @arg: the device pointer
@@ -591,9 +930,8 @@ static void ipath_ib_timer(void *arg)
  * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
  * return zero).
  */
-static int ipath_ib_piobufavail(void *arg)
+int ipath_ib_piobufavail(struct ipath_ibdev *dev)
 {
-       struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
        struct ipath_qp *qp;
        unsigned long flags;
 
@@ -624,14 +962,14 @@ static int ipath_query_device(struct ib_device *ibdev,
                IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
                IB_DEVICE_SYS_IMAGE_GUID;
        props->page_size_cap = PAGE_SIZE;
-       props->vendor_id = ipath_layer_get_vendorid(dev->dd);
-       props->vendor_part_id = ipath_layer_get_deviceid(dev->dd);
-       props->hw_ver = ipath_layer_get_pcirev(dev->dd);
+       props->vendor_id = dev->dd->ipath_vendorid;
+       props->vendor_part_id = dev->dd->ipath_deviceid;
+       props->hw_ver = dev->dd->ipath_pcirev;
 
        props->sys_image_guid = dev->sys_image_guid;
 
        props->max_mr_size = ~0ull;
-       props->max_qp = dev->qp_table.max;
+       props->max_qp = ib_ipath_max_qps;
        props->max_qp_wr = ib_ipath_max_qp_wrs;
        props->max_sge = ib_ipath_max_sges;
        props->max_cq = ib_ipath_max_cqs;
@@ -647,7 +985,7 @@ static int ipath_query_device(struct ib_device *ibdev,
        props->max_srq_sge = ib_ipath_max_srq_sges;
        /* props->local_ca_ack_delay */
        props->atomic_cap = IB_ATOMIC_HCA;
-       props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
+       props->max_pkeys = ipath_get_npkeys(dev->dd);
        props->max_mcast_grp = ib_ipath_max_mcast_grps;
        props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
@@ -672,12 +1010,17 @@ const u8 ipath_cvt_physportstate[16] = {
        [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
 };
 
+u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
+{
+       return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
+}
+
 static int ipath_query_port(struct ib_device *ibdev,
                            u8 port, struct ib_port_attr *props)
 {
        struct ipath_ibdev *dev = to_idev(ibdev);
        enum ib_mtu mtu;
-       u16 lid = ipath_layer_get_lid(dev->dd);
+       u16 lid = dev->dd->ipath_lid;
        u64 ibcstat;
 
        memset(props, 0, sizeof(*props));
@@ -685,16 +1028,16 @@ static int ipath_query_port(struct ib_device *ibdev,
        props->lmc = dev->mkeyprot_resv_lmc & 7;
        props->sm_lid = dev->sm_lid;
        props->sm_sl = dev->sm_sl;
-       ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+       ibcstat = dev->dd->ipath_lastibcstat;
        props->state = ((ibcstat >> 4) & 0x3) + 1;
        /* See phys_state_show() */
        props->phys_state = ipath_cvt_physportstate[
-               ipath_layer_get_lastibcstat(dev->dd) & 0xf];
+               dev->dd->ipath_lastibcstat & 0xf];
        props->port_cap_flags = dev->port_cap_flags;
        props->gid_tbl_len = 1;
        props->max_msg_sz = 0x80000000;
-       props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
-       props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
+       props->pkey_tbl_len = ipath_get_npkeys(dev->dd);
+       props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) -
                dev->z_pkey_violations;
        props->qkey_viol_cntr = dev->qkey_violations;
        props->active_width = IB_WIDTH_4X;
@@ -704,7 +1047,7 @@ static int ipath_query_port(struct ib_device *ibdev,
        props->init_type_reply = 0;
 
        props->max_mtu = IB_MTU_4096;
-       switch (ipath_layer_get_ibmtu(dev->dd)) {
+       switch (dev->dd->ipath_ibmtu) {
        case 4096:
                mtu = IB_MTU_4096;
                break;
@@ -763,7 +1106,7 @@ static int ipath_modify_port(struct ib_device *ibdev,
        dev->port_cap_flags |= props->set_port_cap_mask;
        dev->port_cap_flags &= ~props->clr_port_cap_mask;
        if (port_modify_mask & IB_PORT_SHUTDOWN)
-               ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
+               ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
        if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
                dev->qkey_violations = 0;
        return 0;
@@ -780,7 +1123,7 @@ static int ipath_query_gid(struct ib_device *ibdev, u8 port,
                goto bail;
        }
        gid->global.subnet_prefix = dev->gid_prefix;
-       gid->global.interface_id = ipath_layer_get_guid(dev->dd);
+       gid->global.interface_id = dev->dd->ipath_guid;
 
        ret = 0;
 
@@ -803,18 +1146,22 @@ static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
         * we allow allocations of more than we report for this value.
         */
 
-       if (dev->n_pds_allocated == ib_ipath_max_pds) {
+       pd = kmalloc(sizeof *pd, GFP_KERNEL);
+       if (!pd) {
                ret = ERR_PTR(-ENOMEM);
                goto bail;
        }
 
-       pd = kmalloc(sizeof *pd, GFP_KERNEL);
-       if (!pd) {
+       spin_lock(&dev->n_pds_lock);
+       if (dev->n_pds_allocated == ib_ipath_max_pds) {
+               spin_unlock(&dev->n_pds_lock);
+               kfree(pd);
                ret = ERR_PTR(-ENOMEM);
                goto bail;
        }
 
        dev->n_pds_allocated++;
+       spin_unlock(&dev->n_pds_lock);
 
        /* ib_alloc_pd() will initialize pd->ibpd. */
        pd->user = udata != NULL;
@@ -830,7 +1177,9 @@ static int ipath_dealloc_pd(struct ib_pd *ibpd)
        struct ipath_pd *pd = to_ipd(ibpd);
        struct ipath_ibdev *dev = to_idev(ibpd->device);
 
+       spin_lock(&dev->n_pds_lock);
        dev->n_pds_allocated--;
+       spin_unlock(&dev->n_pds_lock);
 
        kfree(pd);
 
@@ -851,11 +1200,6 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
        struct ib_ah *ret;
        struct ipath_ibdev *dev = to_idev(pd->device);
 
-       if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
        /* A multicast address requires a GRH (see ch. 8.4.1). */
        if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
            ah_attr->dlid != IPATH_PERMISSIVE_LID &&
@@ -881,7 +1225,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
                goto bail;
        }
 
+       spin_lock(&dev->n_ahs_lock);
+       if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
+               spin_unlock(&dev->n_ahs_lock);
+               kfree(ah);
+               ret = ERR_PTR(-ENOMEM);
+               goto bail;
+       }
+
        dev->n_ahs_allocated++;
+       spin_unlock(&dev->n_ahs_lock);
 
        /* ib_create_ah() will initialize ah->ibah. */
        ah->attr = *ah_attr;
@@ -903,7 +1256,9 @@ static int ipath_destroy_ah(struct ib_ah *ibah)
        struct ipath_ibdev *dev = to_idev(ibah->device);
        struct ipath_ah *ah = to_iah(ibah);
 
+       spin_lock(&dev->n_ahs_lock);
        dev->n_ahs_allocated--;
+       spin_unlock(&dev->n_ahs_lock);
 
        kfree(ah);
 
@@ -919,25 +1274,50 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
        return 0;
 }
 
+/**
+ * ipath_get_npkeys - return the size of the PKEY table for port 0
+ * @dd: the infinipath device
+ */
+unsigned ipath_get_npkeys(struct ipath_devdata *dd)
+{
+       return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
+}
+
+/**
+ * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * @dd: the infinipath device
+ * @index: the PKEY index
+ */
+unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
+{
+       unsigned ret;
+
+       if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
+               ret = 0;
+       else
+               ret = dd->ipath_pd[0]->port_pkeys[index];
+
+       return ret;
+}
+
 static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
                            u16 *pkey)
 {
        struct ipath_ibdev *dev = to_idev(ibdev);
        int ret;
 
-       if (index >= ipath_layer_get_npkeys(dev->dd)) {
+       if (index >= ipath_get_npkeys(dev->dd)) {
                ret = -EINVAL;
                goto bail;
        }
 
-       *pkey = ipath_layer_get_pkey(dev->dd, index);
+       *pkey = ipath_get_pkey(dev->dd, index);
        ret = 0;
 
 bail:
        return ret;
 }
 
-
 /**
  * ipath_alloc_ucontext - allocate a ucontest
  * @ibdev: the infiniband device
@@ -970,26 +1350,91 @@ static int ipath_dealloc_ucontext(struct ib_ucontext *context)
 
 static int ipath_verbs_register_sysfs(struct ib_device *dev);
 
+static void __verbs_timer(unsigned long arg)
+{
+       struct ipath_devdata *dd = (struct ipath_devdata *) arg;
+
+       /*
+        * If port 0 receive packet interrupts are not available, or
+        * can be missed, poll the receive queue
+        */
+       if (dd->ipath_flags & IPATH_POLL_RX_INTR)
+               ipath_kreceive(dd);
+
+       /* Handle verbs layer timeouts. */
+       ipath_ib_timer(dd->verbs_dev);
+
+       mod_timer(&dd->verbs_timer, jiffies + 1);
+}
+
+static int enable_timer(struct ipath_devdata *dd)
+{
+       /*
+        * Early chips had a design flaw where the chip and kernel idea
+        * of the tail register don't always agree, and therefore we won't
+        * get an interrupt on the next packet received.
+        * If the board supports per packet receive interrupts, use it.
+        * Otherwise, the timer function periodically checks for packets
+        * to cover this case.
+        * Either way, the timer is needed for verbs layer related
+        * processing.
+        */
+       if (dd->ipath_flags & IPATH_GPIO_INTR) {
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
+                                0x2074076542310ULL);
+               /* Enable GPIO bit 2 interrupt */
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+                                (u64) (1 << 2));
+       }
+
+       init_timer(&dd->verbs_timer);
+       dd->verbs_timer.function = __verbs_timer;
+       dd->verbs_timer.data = (unsigned long)dd;
+       dd->verbs_timer.expires = jiffies + 1;
+       add_timer(&dd->verbs_timer);
+
+       return 0;
+}
+
+static int disable_timer(struct ipath_devdata *dd)
+{
+       /* Disable GPIO bit 2 interrupt */
+       if (dd->ipath_flags & IPATH_GPIO_INTR)
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
+
+       del_timer_sync(&dd->verbs_timer);
+
+       return 0;
+}
+
 /**
  * ipath_register_ib_device - register our device with the infiniband core
- * @unit: the device number to register
  * @dd: the device data structure
  * Return the allocated ipath_ibdev pointer or NULL on error.
  */
-static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
+int ipath_register_ib_device(struct ipath_devdata *dd)
 {
-       struct ipath_layer_counters cntrs;
+       struct ipath_verbs_counters cntrs;
        struct ipath_ibdev *idev;
        struct ib_device *dev;
        int ret;
 
        idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
-       if (idev == NULL)
+       if (idev == NULL) {
+               ret = -ENOMEM;
                goto bail;
+       }
 
        dev = &idev->ibdev;
 
        /* Only need to initialize non-zero fields. */
+       spin_lock_init(&idev->n_pds_lock);
+       spin_lock_init(&idev->n_ahs_lock);
+       spin_lock_init(&idev->n_cqs_lock);
+       spin_lock_init(&idev->n_qps_lock);
+       spin_lock_init(&idev->n_srqs_lock);
+       spin_lock_init(&idev->n_mcast_grps_lock);
+
        spin_lock_init(&idev->qp_table.lock);
        spin_lock_init(&idev->lk_table.lock);
        idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
@@ -1030,7 +1475,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
        idev->link_width_enabled = 3;   /* 1x or 4x */
 
        /* Snapshot current HW counters to "clear" them. */
-       ipath_layer_get_counters(dd, &cntrs);
+       ipath_get_counters(dd, &cntrs);
        idev->z_symbol_error_counter = cntrs.symbol_error_counter;
        idev->z_link_error_recovery_counter =
                cntrs.link_error_recovery_counter;
@@ -1054,14 +1499,14 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
         * device types in the system, we can't be sure this is unique.
         */
        if (!sys_image_guid)
-               sys_image_guid = ipath_layer_get_guid(dd);
+               sys_image_guid = dd->ipath_guid;
        idev->sys_image_guid = sys_image_guid;
-       idev->ib_unit = unit;
+       idev->ib_unit = dd->ipath_unit;
        idev->dd = dd;
 
        strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
        dev->owner = THIS_MODULE;
-       dev->node_guid = ipath_layer_get_guid(dd);
+       dev->node_guid = dd->ipath_guid;
        dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
        dev->uverbs_cmd_mask =
                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
@@ -1093,9 +1538,9 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
                (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
                (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
                (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-       dev->node_type = IB_NODE_CA;
+       dev->node_type = RDMA_NODE_IB_CA;
        dev->phys_port_cnt = 1;
-       dev->dma_device = ipath_layer_get_device(dd);
+       dev->dma_device = &dd->pcidev->dev;
        dev->class_dev.dev = dev->dma_device;
        dev->query_device = ipath_query_device;
        dev->modify_device = ipath_modify_device;
@@ -1137,9 +1582,10 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
        dev->attach_mcast = ipath_multicast_attach;
        dev->detach_mcast = ipath_multicast_detach;
        dev->process_mad = ipath_process_mad;
+       dev->mmap = ipath_mmap;
 
        snprintf(dev->node_desc, sizeof(dev->node_desc),
-                IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
+                IPATH_IDSTR " %s", system_utsname.nodename);
 
        ret = ib_register_device(dev);
        if (ret)
@@ -1148,7 +1594,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
        if (ipath_verbs_register_sysfs(dev))
                goto err_class;
 
-       ipath_layer_enable_timer(dd);
+       enable_timer(dd);
 
        goto bail;
 
@@ -1160,37 +1606,32 @@ err_lk:
        kfree(idev->qp_table.table);
 err_qp:
        ib_dealloc_device(dev);
-       _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
-                    unit, -ret);
+       ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
        idev = NULL;
 
 bail:
-       return idev;
+       dd->verbs_dev = idev;
+       return ret;
 }
 
-static void ipath_unregister_ib_device(void *arg)
+void ipath_unregister_ib_device(struct ipath_ibdev *dev)
 {
-       struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
        struct ib_device *ibdev = &dev->ibdev;
 
-       ipath_layer_disable_timer(dev->dd);
+       disable_timer(dev->dd);
 
        ib_unregister_device(ibdev);
 
        if (!list_empty(&dev->pending[0]) ||
            !list_empty(&dev->pending[1]) ||
            !list_empty(&dev->pending[2]))
-               _VERBS_ERROR("ipath%d pending list not empty!\n",
-                            dev->ib_unit);
+               ipath_dev_err(dev->dd, "pending list not empty!\n");
        if (!list_empty(&dev->piowait))
-               _VERBS_ERROR("ipath%d piowait list not empty!\n",
-                            dev->ib_unit);
+               ipath_dev_err(dev->dd, "piowait list not empty!\n");
        if (!list_empty(&dev->rnrwait))
-               _VERBS_ERROR("ipath%d rnrwait list not empty!\n",
-                            dev->ib_unit);
+               ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
        if (!ipath_mcast_tree_empty())
-               _VERBS_ERROR("ipath%d multicast table memory leak!\n",
-                            dev->ib_unit);
+               ipath_dev_err(dev->dd, "multicast table memory leak!\n");
        /*
         * Note that ipath_unregister_ib_device() can be called before all
         * the QPs are destroyed!
@@ -1201,25 +1642,12 @@ static void ipath_unregister_ib_device(void *arg)
        ib_dealloc_device(ibdev);
 }
 
-static int __init ipath_verbs_init(void)
-{
-       return ipath_verbs_register(ipath_register_ib_device,
-                                   ipath_unregister_ib_device,
-                                   ipath_ib_piobufavail, ipath_ib_rcv,
-                                   ipath_ib_timer);
-}
-
-static void __exit ipath_verbs_cleanup(void)
-{
-       ipath_verbs_unregister();
-}
-
 static ssize_t show_rev(struct class_device *cdev, char *buf)
 {
        struct ipath_ibdev *dev =
                container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 
-       return sprintf(buf, "%x\n", ipath_layer_get_pcirev(dev->dd));
+       return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
 }
 
 static ssize_t show_hca(struct class_device *cdev, char *buf)
@@ -1228,7 +1656,7 @@ static ssize_t show_hca(struct class_device *cdev, char *buf)
                container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
        int ret;
 
-       ret = ipath_layer_get_boardname(dev->dd, buf, 128);
+       ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
        if (ret < 0)
                goto bail;
        strcat(buf, "\n");
@@ -1305,6 +1733,3 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev)
 bail:
        return ret;
 }
-
-module_init(ipath_verbs_init);
-module_exit(ipath_verbs_cleanup);
index 2df684727dc1bad27e3f8a369e41e5ad42ba2a04..09bbb3f9a2176f495b1bafa2602da95f5b6faed9 100644 (file)
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kref.h>
 #include <rdma/ib_pack.h>
 
 #include "ipath_layer.h"
-#include "verbs_debug.h"
 
 #define QPN_MAX                 (1 << 24)
 #define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
@@ -50,7 +50,7 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IPATH_UVERBS_ABI_VERSION       1
+#define IPATH_UVERBS_ABI_VERSION       2
 
 /*
  * Define an ib_cq_notify value that is not valid so we know when CQ
@@ -152,19 +152,6 @@ struct ipath_mcast {
        int n_attached;
 };
 
-/* Memory region */
-struct ipath_mr {
-       struct ib_mr ibmr;
-       struct ipath_mregion mr;        /* must be last */
-};
-
-/* Fast memory region */
-struct ipath_fmr {
-       struct ib_fmr ibfmr;
-       u8 page_shift;
-       struct ipath_mregion mr;        /* must be last */
-};
-
 /* Protection domain */
 struct ipath_pd {
        struct ib_pd ibpd;
@@ -178,58 +165,89 @@ struct ipath_ah {
 };
 
 /*
- * Quick description of our CQ/QP locking scheme:
- *
- * We have one global lock that protects dev->cq/qp_table.  Each
- * struct ipath_cq/qp also has its own lock.  An individual qp lock
- * may be taken inside of an individual cq lock.  Both cqs attached to
- * a qp may be locked, with the send cq locked first.  No other
- * nesting should be done.
- *
- * Each struct ipath_cq/qp also has an atomic_t ref count.  The
- * pointer from the cq/qp_table to the struct counts as one reference.
- * This reference also is good for access through the consumer API, so
- * modifying the CQ/QP etc doesn't need to take another reference.
- * Access because of a completion being polled does need a reference.
- *
- * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
- * destroy function to sleep on.
- *
- * This means that access from the consumer API requires nothing but
- * taking the struct's lock.
- *
- * Access because of a completion event should go as follows:
- * - lock cq/qp_table and look up struct
- * - increment ref count in struct
- * - drop cq/qp_table lock
- * - lock struct, do your thing, and unlock struct
- * - decrement ref count; if zero, wake up waiters
- *
- * To destroy a CQ/QP, we can do the following:
- * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
- * - decrement ref count
- * - wait_event until ref count is zero
- *
- * It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
- * QP is destroyed.  Also, the consumer must make sure that calls to
- * qp_modify are serialized.
- *
- * Possible optimizations (wait for profile data to see if/where we
- * have locks bouncing between CPUs):
- * - split cq/qp table lock into n separate (cache-aligned) locks,
- *   indexed (say) by the page in the table
+ * This structure is used by ipath_mmap() to validate an offset
+ * when an mmap() request is made.  The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct ipath_mmap_info {
+       struct ipath_mmap_info *next;
+       struct ib_ucontext *context;
+       void *obj;
+       struct kref ref;
+       unsigned size;
+       unsigned mmap_cnt;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
  */
+struct ipath_cq_wc {
+       u32 head;               /* index of next entry to fill */
+       u32 tail;               /* index of next ib_poll_cq() entry */
+       struct ib_wc queue[1];  /* this is actually size ibcq.cqe + 1 */
+};
 
+/*
+ * The completion queue structure.
+ */
 struct ipath_cq {
        struct ib_cq ibcq;
        struct tasklet_struct comptask;
        spinlock_t lock;
        u8 notify;
        u8 triggered;
-       u32 head;               /* new records added to the head */
-       u32 tail;               /* poll_cq() reads from here. */
-       struct ib_wc *queue;    /* this is actually ibcq.cqe + 1 */
+       struct ipath_cq_wc *queue;
+       struct ipath_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct ipath_seg {
+       void *vaddr;
+       size_t length;
+};
+
+/* The number of ipath_segs that fit in a page. */
+#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
+
+struct ipath_segarray {
+       struct ipath_seg segs[IPATH_SEGSZ];
+};
+
+struct ipath_mregion {
+       u64 user_base;          /* User's address for this region */
+       u64 iova;               /* IB start address of this region */
+       size_t length;
+       u32 lkey;
+       u32 offset;             /* offset (bytes) to start of region */
+       int access_flags;
+       u32 max_segs;           /* number of ipath_segs in all the arrays */
+       u32 mapsz;              /* size of the map array */
+       struct ipath_segarray *map[0];  /* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct ipath_sge {
+       struct ipath_mregion *mr;
+       void *vaddr;            /* current pointer into the segment */
+       u32 sge_length;         /* length of the SGE */
+       u32 length;             /* remaining length of the segment */
+       u16 m;                  /* current index: mr->map[m] */
+       u16 n;                  /* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct ipath_mr {
+       struct ib_mr ibmr;
+       struct ipath_mregion mr;        /* must be last */
 };
 
 /*
@@ -248,32 +266,50 @@ struct ipath_swqe {
 
 /*
  * Receive work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->r_max_sge.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
  */
 struct ipath_rwqe {
        u64 wr_id;
-       u32 length;             /* total length of data in sg_list */
        u8 num_sge;
-       struct ipath_sge sg_list[0];
+       struct ib_sge sg_list[0];
 };
 
-struct ipath_rq {
-       spinlock_t lock;
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct ipath_rwq {
        u32 head;               /* new work requests posted to the head */
        u32 tail;               /* receives pull requests from here. */
+       struct ipath_rwqe wq[0];
+};
+
+struct ipath_rq {
+       struct ipath_rwq *wq;
+       spinlock_t lock;
        u32 size;               /* size of RWQE array */
        u8 max_sge;
-       struct ipath_rwqe *wq;  /* RWQE array */
 };
 
 struct ipath_srq {
        struct ib_srq ibsrq;
        struct ipath_rq rq;
+       struct ipath_mmap_info *ip;
        /* send signal when number of RWQEs < limit */
        u32 limit;
 };
 
+struct ipath_sge_state {
+       struct ipath_sge *sg_list;      /* next SGE to be used if any */
+       struct ipath_sge sge;   /* progress state for the current SGE */
+       u8 num_sge;
+};
+
 /*
  * Variables prefixed with s_ are for the requester (sender).
  * Variables prefixed with r_ are for the responder (receiver).
@@ -293,6 +329,7 @@ struct ipath_qp {
        atomic_t refcount;
        wait_queue_head_t wait;
        struct tasklet_struct s_task;
+       struct ipath_mmap_info *ip;
        struct ipath_sge_state *s_cur_sge;
        struct ipath_sge_state s_sge;   /* current send request data */
        /* current RDMA read send data */
@@ -334,6 +371,7 @@ struct ipath_qp {
        u8 s_retry;             /* requester retry counter */
        u8 s_rnr_retry;         /* requester RNR retry counter */
        u8 s_pkey_index;        /* PKEY index to use */
+       u8 timeout;             /* Timeout for this QP */
        enum ib_mtu path_mtu;
        u32 remote_qpn;
        u32 qkey;               /* QKEY for this QP (for UD or RD) */
@@ -345,7 +383,8 @@ struct ipath_qp {
        u32 s_ssn;              /* SSN of tail entry */
        u32 s_lsn;              /* limit sequence number (credit) */
        struct ipath_swqe *s_wq;        /* send work queue */
-       struct ipath_rq r_rq;   /* receive work queue */
+       struct ipath_rq r_rq;           /* receive work queue */
+       struct ipath_sge r_sg_list[0];  /* verified SGEs */
 };
 
 /*
@@ -369,15 +408,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
 
 /*
  * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rq.wq.  This function does the array index computation.
+ * struct ipath_rwq.wq.  This function does the array index computation.
  */
 static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
                                              unsigned n)
 {
        return (struct ipath_rwqe *)
-               ((char *) rq->wq +
+               ((char *) rq->wq->wq +
                 (sizeof(struct ipath_rwqe) +
-                 rq->max_sge * sizeof(struct ipath_sge)) * n);
+                 rq->max_sge * sizeof(struct ib_sge)) * n);
 }
 
 /*
@@ -417,6 +456,7 @@ struct ipath_ibdev {
        struct ib_device ibdev;
        struct list_head dev_list;
        struct ipath_devdata *dd;
+       struct ipath_mmap_info *pending_mmaps;
        int ib_unit;            /* This is the device number */
        u16 sm_lid;             /* in host order */
        u8 sm_sl;
@@ -435,11 +475,20 @@ struct ipath_ibdev {
        __be64 sys_image_guid;  /* in network order */
        __be64 gid_prefix;      /* in network order */
        __be64 mkey;
+
        u32 n_pds_allocated;    /* number of PDs allocated for device */
+       spinlock_t n_pds_lock;
        u32 n_ahs_allocated;    /* number of AHs allocated for device */
+       spinlock_t n_ahs_lock;
        u32 n_cqs_allocated;    /* number of CQs allocated for device */
+       spinlock_t n_cqs_lock;
+       u32 n_qps_allocated;    /* number of QPs allocated for device */
+       spinlock_t n_qps_lock;
        u32 n_srqs_allocated;   /* number of SRQs allocated for device */
+       spinlock_t n_srqs_lock;
        u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+       spinlock_t n_mcast_grps_lock;
+
        u64 ipath_sword;        /* total dwords sent (sample result) */
        u64 ipath_rword;        /* total dwords received (sample result) */
        u64 ipath_spkts;        /* total packets sent (sample result) */
@@ -494,8 +543,19 @@ struct ipath_ibdev {
        struct ipath_opcode_stats opstats[128];
 };
 
-struct ipath_ucontext {
-       struct ib_ucontext ibucontext;
+struct ipath_verbs_counters {
+       u64 symbol_error_counter;
+       u64 link_error_recovery_counter;
+       u64 link_downed_counter;
+       u64 port_rcv_errors;
+       u64 port_rcv_remphys_errors;
+       u64 port_xmit_discards;
+       u64 port_xmit_data;
+       u64 port_rcv_data;
+       u64 port_xmit_packets;
+       u64 port_rcv_packets;
+       u32 local_link_integrity_errors;
+       u32 excessive_buffer_overrun_errors;
 };
 
 static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
@@ -503,11 +563,6 @@ static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
        return container_of(ibmr, struct ipath_mr, ibmr);
 }
 
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-       return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
 static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
 {
        return container_of(ibpd, struct ipath_pd, ibpd);
@@ -545,12 +600,6 @@ int ipath_process_mad(struct ib_device *ibdev,
                      struct ib_grh *in_grh,
                      struct ib_mad *in_mad, struct ib_mad *out_mad);
 
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
-                                                 *ibucontext)
-{
-       return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
 /*
  * Compare the lower 24 bits of the two values.
  * Returns an integer <, ==, or > than zero.
@@ -562,6 +611,13 @@ static inline int ipath_cmp24(u32 a, u32 b)
 
 struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
 
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+                           u64 *rwords, u64 *spkts, u64 *rpkts,
+                           u64 *xmit_wait);
+
+int ipath_get_counters(struct ipath_devdata *dd,
+                      struct ipath_verbs_counters *cntrs);
+
 int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 
 int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
@@ -579,7 +635,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 int ipath_destroy_qp(struct ib_qp *ibqp);
 
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                   int attr_mask);
+                   int attr_mask, struct ib_udata *udata);
 
 int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                   int attr_mask, struct ib_qp_init_attr *init_attr);
@@ -592,6 +648,9 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
 
 void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
 
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+                    u32 *hdr, u32 len, struct ipath_sge_state *ss);
+
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
 
 int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
@@ -638,7 +697,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
                                struct ib_udata *udata);
 
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask);
+                    enum ib_srq_attr_mask attr_mask,
+                    struct ib_udata *udata);
 
 int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
 
@@ -680,6 +740,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list);
 
 int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
 
+void ipath_release_mmap_info(struct kref *ref);
+
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
 void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
 
 void ipath_insert_rnr_queue(struct ipath_qp *qp);
@@ -700,6 +764,22 @@ int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
 int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
                      u32 pmtu, u32 *bth0p, u32 *bth2p);
 
+int ipath_register_ib_device(struct ipath_devdata *);
+
+void ipath_unregister_ib_device(struct ipath_ibdev *);
+
+void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
+
+int ipath_ib_piobufavail(struct ipath_ibdev *);
+
+void ipath_ib_timer(struct ipath_ibdev *);
+
+unsigned ipath_get_npkeys(struct ipath_devdata *);
+
+u32 ipath_get_cr_errpkey(struct ipath_devdata *);
+
+unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
+
 extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
 
 extern const u8 ipath_cvt_physportstate[];
@@ -714,6 +794,8 @@ extern unsigned int ib_ipath_max_cqs;
 
 extern unsigned int ib_ipath_max_qp_wrs;
 
+extern unsigned int ib_ipath_max_qps;
+
 extern unsigned int ib_ipath_max_sges;
 
 extern unsigned int ib_ipath_max_mcast_grps;
index ee0e1d96d723441c1d6767354d15d1a21373f7fe..085e28b939ec5699d0ea3cf3639fe4f6d3929403 100644 (file)
@@ -207,12 +207,17 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
                goto bail;
        }
 
+       spin_lock(&dev->n_mcast_grps_lock);
        if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
+               spin_unlock(&dev->n_mcast_grps_lock);
                ret = ENOMEM;
                goto bail;
        }
 
        dev->n_mcast_grps_allocated++;
+       spin_unlock(&dev->n_mcast_grps_lock);
+
+       mcast->n_attached++;
 
        list_add_tail_rcu(&mqp->list, &mcast->qp_list);
 
@@ -343,7 +348,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                atomic_dec(&mcast->refcount);
                wait_event(mcast->wait, !atomic_read(&mcast->refcount));
                ipath_mcast_free(mcast);
+               spin_lock(&dev->n_mcast_grps_lock);
                dev->n_mcast_grps_allocated--;
+               spin_unlock(&dev->n_mcast_grps_lock);
        }
 
        ret = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
new file mode 100644 (file)
index 0000000..036fde6
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on PowerPC only.  Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include "ipath_kernel.h"
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is ordered
+ *
+ * PowerPC systems (at least those in the 970 processor family)
+ * write partially filled store buffers in address order, but will write
+ * completely filled store buffers in "random" order, and therefore must
+ * have serialization for correctness with current InfiniPath chips.
+ *
+ */
+int ipath_unordered_wc(void)
+{
+       return 1;
+}
diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h
deleted file mode 100644 (file)
index 6186676..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _VERBS_DEBUG_H
-#define _VERBS_DEBUG_H
-
-/*
- * This file contains tracing code for the ib_ipath kernel module.
- */
-#ifndef _VERBS_DEBUGGING       /* tracing enabled or not */
-#define _VERBS_DEBUGGING 1
-#endif
-
-extern unsigned ib_ipath_debug;
-
-#define _VERBS_ERROR(fmt,...) \
-       do { \
-               printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
-       } while(0)
-
-#define _VERBS_UNIT_ERROR(unit,fmt,...) \
-       do { \
-               printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
-       } while(0)
-
-#if _VERBS_DEBUGGING
-
-/*
- * Mask values for debugging.  The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or
- * disable dynamically.
- * This can be set at modprobe time also:
- *      modprobe ib_path ib_ipath_debug=3
- */
-
-#define __VERBS_INFO        0x1        /* generic low verbosity stuff */
-#define __VERBS_DBG         0x2        /* generic debug */
-#define __VERBS_VDBG        0x4        /* verbose debug */
-#define __VERBS_SMADBG      0x8000     /* sma packet debug */
-
-#define _VERBS_INFO(fmt,...) \
-       do { \
-               if (unlikely(ib_ipath_debug&__VERBS_INFO)) \
-                       printk(KERN_INFO "%s: " fmt,"ib_ipath", \
-                              ##__VA_ARGS__); \
-       } while(0)
-
-#define _VERBS_DBG(fmt,...) \
-       do { \
-               if (unlikely(ib_ipath_debug&__VERBS_DBG)) \
-                       printk(KERN_DEBUG "%s: " fmt, __func__, \
-                              ##__VA_ARGS__); \
-       } while(0)
-
-#define _VERBS_VDBG(fmt,...) \
-       do { \
-               if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \
-                       printk(KERN_DEBUG "%s: " fmt, __func__, \
-                              ##__VA_ARGS__); \
-       } while(0)
-
-#define _VERBS_SMADBG(fmt,...) \
-       do { \
-               if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \
-                       printk(KERN_DEBUG "%s: " fmt, __func__, \
-                              ##__VA_ARGS__); \
-       } while(0)
-
-#else /* ! _VERBS_DEBUGGING */
-
-#define _VERBS_INFO(fmt,...)
-#define _VERBS_DBG(fmt,...)
-#define _VERBS_VDBG(fmt,...)
-#define _VERBS_SMADBG(fmt,...)
-
-#endif /* _VERBS_DEBUGGING */
-
-#endif /* _VERBS_DEBUG_H */
index e215041b2db9cb7cedd3f4616370659cf1a0dd28..69599455aca2c490e88d93fe631e0f17ed243e6f 100644 (file)
@@ -90,7 +90,7 @@ static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
        case MTHCA_RATE_TAVOR_1X:     return IB_RATE_2_5_GBPS;
        case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
        case MTHCA_RATE_TAVOR_4X:     return IB_RATE_10_GBPS;
-       default:                      return port_rate;
+       default:                      return mult_to_ib_rate(port_rate);
        }
 }
 
index c3bec7490f52e1dae838288b92ba14e03a6f7d2d..cd044ea2dfa42ecd53d967fbe46356e85fa104be 100644 (file)
@@ -34,6 +34,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #include "mthca_dev.h"
 
@@ -48,9 +49,41 @@ enum {
 
 static DEFINE_SPINLOCK(catas_lock);
 
+static LIST_HEAD(catas_list);
+static struct workqueue_struct *catas_wq;
+static struct work_struct catas_work;
+
+static int catas_reset_disable;
+module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
+MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
+
+static void catas_reset(void *work_ptr)
+{
+       struct mthca_dev *dev, *tmpdev;
+       LIST_HEAD(tlist);
+       int ret;
+
+       mutex_lock(&mthca_device_mutex);
+
+       spin_lock_irq(&catas_lock);
+       list_splice_init(&catas_list, &tlist);
+       spin_unlock_irq(&catas_lock);
+
+       list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+               ret = __mthca_restart_one(dev->pdev);
+               if (ret)
+                       mthca_err(dev, "Reset failed (%d)\n", ret);
+               else
+                       mthca_dbg(dev, "Reset succeeded\n");
+       }
+
+       mutex_unlock(&mthca_device_mutex);
+}
+
 static void handle_catas(struct mthca_dev *dev)
 {
        struct ib_event event;
+       unsigned long flags;
        const char *type;
        int i;
 
@@ -82,6 +115,14 @@ static void handle_catas(struct mthca_dev *dev)
        for (i = 0; i < dev->catas_err.size; ++i)
                mthca_err(dev, "  buf[%02x]: %08x\n",
                          i, swab32(readl(dev->catas_err.map + i)));
+
+       if (catas_reset_disable)
+               return;
+
+       spin_lock_irqsave(&catas_lock, flags);
+       list_add(&dev->catas_err.list, &catas_list);
+       queue_work(catas_wq, &catas_work);
+       spin_unlock_irqrestore(&catas_lock, flags);
 }
 
 static void poll_catas(unsigned long dev_ptr)
@@ -135,6 +176,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
        dev->catas_err.timer.data     = (unsigned long) dev;
        dev->catas_err.timer.function = poll_catas;
        dev->catas_err.timer.expires  = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+       INIT_LIST_HEAD(&dev->catas_err.list);
        add_timer(&dev->catas_err.timer);
 }
 
@@ -153,4 +195,24 @@ void mthca_stop_catas_poll(struct mthca_dev *dev)
                                    dev->catas_err.addr),
                                   dev->catas_err.size * 4);
        }
+
+       spin_lock_irq(&catas_lock);
+       list_del(&dev->catas_err.list);
+       spin_unlock_irq(&catas_lock);
+}
+
+int __init mthca_catas_init(void)
+{
+       INIT_WORK(&catas_work, catas_reset, NULL);
+
+       catas_wq = create_singlethread_workqueue("mthca_catas");
+       if (!catas_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void mthca_catas_cleanup(void)
+{
+       destroy_workqueue(catas_wq);
 }
index deabc14b4ea4c3ba2edb1a74dc808184d3171982..99a94d7109350b4ed69870f1d32178d0235406a9 100644 (file)
@@ -34,7 +34,7 @@
  * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <linux/sched.h>
+#include <linux/completion.h>
 #include <linux/pci.h>
 #include <linux/errno.h>
 #include <asm/io.h>
index 3e27a084257e9f23b7a4fba49561e15cfddf5b91..e393681ba7d46a1988508c3f5d25e27b2cd4261d 100644 (file)
@@ -544,11 +544,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
                wq = &(*cur_qp)->rq;
                wqe = be32_to_cpu(cqe->wqe);
                wqe_index = wqe >> wq->wqe_shift;
-               /*
-               * WQE addr == base - 1 might be reported in receive completion
-               * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
-               * Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
-               */
+               /*
+                * WQE addr == base - 1 might be reported in receive completion
+                * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
+                * Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
+                */
                if (unlikely(wqe_index < 0))
                        wqe_index = wq->max - 1;
                entry->wr_id = (*cur_qp)->wrid[wqe_index];
index f8160b8de0908ed51b160c880634c1394a6b0712..fe5cecf70feddd6ec1b4c4d79dd32713d558126c 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/timer.h>
 #include <linux/mutex.h>
+#include <linux/list.h>
 
 #include <asm/semaphore.h>
 
@@ -283,8 +284,11 @@ struct mthca_catas_err {
        unsigned long           stop;
        u32                     size;
        struct timer_list       timer;
+       struct list_head        list;
 };
 
+extern struct mutex mthca_device_mutex;
+
 struct mthca_dev {
        struct ib_device  ib_dev;
        struct pci_dev   *pdev;
@@ -450,6 +454,9 @@ void mthca_unregister_device(struct mthca_dev *dev);
 
 void mthca_start_catas_poll(struct mthca_dev *dev);
 void mthca_stop_catas_poll(struct mthca_dev *dev);
+int __mthca_restart_one(struct pci_dev *pdev);
+int mthca_catas_init(void);
+void mthca_catas_cleanup(void);
 
 int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
 void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
@@ -506,7 +513,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
                    struct ib_srq_attr *attr, struct mthca_srq *srq);
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask);
+                    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mthca_max_srq_sge(struct mthca_dev *dev);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
@@ -521,7 +528,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
                    enum ib_event_type event_type);
 int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
                   struct ib_qp_init_attr *qp_init_attr);
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+                   struct ib_udata *udata);
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                          struct ib_send_wr **bad_wr);
 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
index d9bc030bcccc78627f434a226f199110dafefa15..45e106f148071c95a93125beb4e20f878988f0ec 100644 (file)
@@ -119,7 +119,7 @@ static void smp_snoop(struct ib_device *ibdev,
 
                        mthca_update_rate(to_mdev(ibdev), port_num);
                        update_sm_ah(to_mdev(ibdev), port_num,
-                                    be16_to_cpu(pinfo->lid),
+                                    be16_to_cpu(pinfo->sm_lid),
                                     pinfo->neighbormtu_mastersmsl & 0xf);
 
                        event.device           = ibdev;
index 7b82c1907f04288c88122d475a08ff9e30ef893a..47ea02148368419a8618cd05baa289c3c6293db0 100644 (file)
@@ -80,6 +80,8 @@ static int tune_pci = 0;
 module_param(tune_pci, int, 0444);
 MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
 
+struct mutex mthca_device_mutex;
+
 static const char mthca_version[] __devinitdata =
        DRV_NAME ": Mellanox InfiniBand HCA driver v"
        DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -978,28 +980,15 @@ static struct {
                                        MTHCA_FLAG_SINAI_OPT }
 };
 
-static int __devinit mthca_init_one(struct pci_dev *pdev,
-                                   const struct pci_device_id *id)
+static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 {
-       static int mthca_version_printed = 0;
        int ddr_hidden = 0;
        int err;
        struct mthca_dev *mdev;
 
-       if (!mthca_version_printed) {
-               printk(KERN_INFO "%s", mthca_version);
-               ++mthca_version_printed;
-       }
-
        printk(KERN_INFO PFX "Initializing %s\n",
               pci_name(pdev));
 
-       if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
-               printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
-                      pci_name(pdev), id->driver_data);
-               return -ENODEV;
-       }
-
        err = pci_enable_device(pdev);
        if (err) {
                dev_err(&pdev->dev, "Cannot enable PCI device, "
@@ -1065,7 +1054,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 
        mdev->pdev = pdev;
 
-       mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
+       mdev->mthca_flags = mthca_hca_table[hca_type].flags;
        if (ddr_hidden)
                mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
 
@@ -1099,13 +1088,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
        if (err)
                goto err_cmd;
 
-       if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
+       if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
                mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
                           (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
                           (int) (mdev->fw_ver & 0xffff),
-                          (int) (mthca_hca_table[id->driver_data].latest_fw >> 32),
-                          (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff,
-                          (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff));
+                          (int) (mthca_hca_table[hca_type].latest_fw >> 32),
+                          (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff,
+                          (int) (mthca_hca_table[hca_type].latest_fw & 0xffff));
                mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
        }
 
@@ -1122,6 +1111,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
                goto err_unregister;
 
        pci_set_drvdata(pdev, mdev);
+       mdev->hca_type = hca_type;
 
        return 0;
 
@@ -1166,7 +1156,7 @@ err_disable_pdev:
        return err;
 }
 
-static void __devexit mthca_remove_one(struct pci_dev *pdev)
+static void __mthca_remove_one(struct pci_dev *pdev)
 {
        struct mthca_dev *mdev = pci_get_drvdata(pdev);
        u8 status;
@@ -1211,6 +1201,51 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
        }
 }
 
+int __mthca_restart_one(struct pci_dev *pdev)
+{
+       struct mthca_dev *mdev;
+
+       mdev = pci_get_drvdata(pdev);
+       if (!mdev)
+               return -ENODEV;
+       __mthca_remove_one(pdev);
+       return __mthca_init_one(pdev, mdev->hca_type);
+}
+
+static int __devinit mthca_init_one(struct pci_dev *pdev,
+                            const struct pci_device_id *id)
+{
+       static int mthca_version_printed = 0;
+       int ret;
+
+       mutex_lock(&mthca_device_mutex);
+
+       if (!mthca_version_printed) {
+               printk(KERN_INFO "%s", mthca_version);
+               ++mthca_version_printed;
+       }
+
+       if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
+               printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
+                      pci_name(pdev), id->driver_data);
+               mutex_unlock(&mthca_device_mutex);
+               return -ENODEV;
+       }
+
+       ret = __mthca_init_one(pdev, id->driver_data);
+
+       mutex_unlock(&mthca_device_mutex);
+
+       return ret;
+}
+
+static void __devexit mthca_remove_one(struct pci_dev *pdev)
+{
+       mutex_lock(&mthca_device_mutex);
+       __mthca_remove_one(pdev);
+       mutex_unlock(&mthca_device_mutex);
+}
+
 static struct pci_device_id mthca_pci_table[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
          .driver_data = TAVOR },
@@ -1248,13 +1283,24 @@ static int __init mthca_init(void)
 {
        int ret;
 
+       mutex_init(&mthca_device_mutex);
+       ret = mthca_catas_init();
+       if (ret)
+               return ret;
+
        ret = pci_register_driver(&mthca_driver);
-       return ret < 0 ? ret : 0;
+       if (ret < 0) {
+               mthca_catas_cleanup();
+               return ret;
+       }
+
+       return 0;
 }
 
 static void __exit mthca_cleanup(void)
 {
        pci_unregister_driver(&mthca_driver);
+       mthca_catas_cleanup();
 }
 
 module_init(mthca_init);
index 265b1d1c4a62dde6a09ce185a2fb2910c101f254..981fe2eebdfa36228693781627c65464286aadb3 100644 (file)
@@ -1288,7 +1288,7 @@ int mthca_register_device(struct mthca_dev *dev)
                (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
                (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
                (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-       dev->ib_dev.node_type            = IB_NODE_CA;
+       dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
        dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
        dev->ib_dev.dma_device           = &dev->pdev->dev;
        dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
index 2e8f6f36e0a5b862e87a3de76e0f0478ad62e25f..5e5c58b9920b5c3d53b6d0a61a3afcaee037e281 100644 (file)
@@ -408,7 +408,7 @@ static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
        ib_ah_attr->sl            = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
        ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
        ib_ah_attr->static_rate   = mthca_rate_to_ib(dev,
-                                                    path->static_rate & 0x7,
+                                                    path->static_rate & 0xf,
                                                     ib_ah_attr->port_num);
        ib_ah_attr->ah_flags      = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
        if (ib_ah_attr->ah_flags) {
@@ -472,10 +472,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
        if (qp->transport == RC || qp->transport == UC) {
                to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
                to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+               qp_attr->alt_pkey_index =
+                       be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+               qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;
        }
 
-       qp_attr->pkey_index     = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
-       qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+       qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
+       qp_attr->port_num   =
+               (be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;
 
        /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
        qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
@@ -486,11 +490,9 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
                1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
        qp_attr->min_rnr_timer      =
                (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
-       qp_attr->port_num           = qp_attr->ah_attr.port_num;
        qp_attr->timeout            = context->pri_path.ackto >> 3;
        qp_attr->retry_cnt          = (be32_to_cpu(context->params1) >> 16) & 0x7;
        qp_attr->rnr_retry          = context->pri_path.rnr_retry >> 5;
-       qp_attr->alt_port_num       = qp_attr->alt_ah_attr.port_num;
        qp_attr->alt_timeout        = context->alt_path.ackto >> 3;
        qp_init_attr->cap           = qp_attr->cap;
 
@@ -527,7 +529,8 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
        return 0;
 }
 
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+                   struct ib_udata *udata)
 {
        struct mthca_dev *dev = to_mdev(ibqp->device);
        struct mthca_qp *qp = to_mqp(ibqp);
@@ -842,11 +845,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
         * entries and reinitialize the QP.
         */
        if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
-               mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
+               mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
                               qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
                if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-                       mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
-                                      qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+                       mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
 
                mthca_wq_reset(&qp->sq);
                qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
index b60a9d79ae54ae569e7e7c765389c0ee05fd8260..0f316c87bf642397eb0844469c5c179b17076c1c 100644 (file)
@@ -358,7 +358,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
 }
 
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask)
+                    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
 {
        struct mthca_dev *dev = to_mdev(ibsrq->device);
        struct mthca_srq *srq = to_msrq(ibsrq);
index 8e9219842be42a687e08f20cfdd7a04a72e2ca65..8b728486410d12aef77f16563220624d766fad18 100644 (file)
@@ -60,7 +60,7 @@ int mthca_init_uar_table(struct mthca_dev *dev)
        ret = mthca_alloc_init(&dev->uar_table.alloc,
                               dev->limits.num_uars,
                               dev->limits.num_uars - 1,
-                              dev->limits.reserved_uars);
+                              dev->limits.reserved_uars + 1);
        if (ret)
                return ret;
 
index 474aa214ab57ed923b16b4376979ca5b63d28249..0b8a79d53a00dd06941242a5db79d905a9d70405 100644 (file)
@@ -336,6 +336,8 @@ static inline void ipoib_unregister_debugfs(void) { }
 extern int ipoib_sendq_size;
 extern int ipoib_recvq_size;
 
+extern struct ib_sa_client ipoib_sa_client;
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 extern int ipoib_debug_level;
 
index 5033666b14817e75488b1859e7b217d41f653f8b..f426a69d9a436cce55addba00be1d1d2cdd7ff30 100644 (file)
@@ -169,117 +169,129 @@ static int ipoib_ib_post_receives(struct net_device *dev)
        return 0;
 }
 
-static void ipoib_ib_handle_wc(struct net_device *dev,
-                              struct ib_wc *wc)
+static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       unsigned int wr_id = wc->wr_id;
+       unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
+       struct sk_buff *skb;
+       dma_addr_t addr;
 
-       ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
+       ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
                       wr_id, wc->opcode, wc->status);
 
-       if (wr_id & IPOIB_OP_RECV) {
-               wr_id &= ~IPOIB_OP_RECV;
-
-               if (wr_id < ipoib_recvq_size) {
-                       struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
-                       dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
-
-                       if (unlikely(wc->status != IB_WC_SUCCESS)) {
-                               if (wc->status != IB_WC_WR_FLUSH_ERR)
-                                       ipoib_warn(priv, "failed recv event "
-                                                  "(status=%d, wrid=%d vend_err %x)\n",
-                                                  wc->status, wr_id, wc->vendor_err);
-                               dma_unmap_single(priv->ca->dma_device, addr,
-                                                IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
-                               dev_kfree_skb_any(skb);
-                               priv->rx_ring[wr_id].skb = NULL;
-                               return;
-                       }
+       if (unlikely(wr_id >= ipoib_recvq_size)) {
+               ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
+                          wr_id, ipoib_recvq_size);
+               return;
+       }
 
-                       /*
-                        * If we can't allocate a new RX buffer, dump
-                        * this packet and reuse the old buffer.
-                        */
-                       if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
-                               ++priv->stats.rx_dropped;
-                               goto repost;
-                       }
+       skb  = priv->rx_ring[wr_id].skb;
+       addr = priv->rx_ring[wr_id].mapping;
 
-                       ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
-                                      wc->byte_len, wc->slid);
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               if (wc->status != IB_WC_WR_FLUSH_ERR)
+                       ipoib_warn(priv, "failed recv event "
+                                  "(status=%d, wrid=%d vend_err %x)\n",
+                                  wc->status, wr_id, wc->vendor_err);
+               dma_unmap_single(priv->ca->dma_device, addr,
+                                IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+               dev_kfree_skb_any(skb);
+               priv->rx_ring[wr_id].skb = NULL;
+               return;
+       }
 
-                       dma_unmap_single(priv->ca->dma_device, addr,
-                                        IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+       /*
+        * If we can't allocate a new RX buffer, dump
+        * this packet and reuse the old buffer.
+        */
+       if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+               ++priv->stats.rx_dropped;
+               goto repost;
+       }
 
-                       skb_put(skb, wc->byte_len);
-                       skb_pull(skb, IB_GRH_BYTES);
+       ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+                      wc->byte_len, wc->slid);
 
-                       if (wc->slid != priv->local_lid ||
-                           wc->src_qp != priv->qp->qp_num) {
-                               skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-                               skb->mac.raw = skb->data;
-                               skb_pull(skb, IPOIB_ENCAP_LEN);
+       dma_unmap_single(priv->ca->dma_device, addr,
+                        IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 
-                               dev->last_rx = jiffies;
-                               ++priv->stats.rx_packets;
-                               priv->stats.rx_bytes += skb->len;
+       skb_put(skb, wc->byte_len);
+       skb_pull(skb, IB_GRH_BYTES);
 
-                               skb->dev = dev;
-                               /* XXX get correct PACKET_ type here */
-                               skb->pkt_type = PACKET_HOST;
-                               netif_rx_ni(skb);
-                       } else {
-                               ipoib_dbg_data(priv, "dropping loopback packet\n");
-                               dev_kfree_skb_any(skb);
-                       }
+       if (wc->slid != priv->local_lid ||
+           wc->src_qp != priv->qp->qp_num) {
+               skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+               skb->mac.raw = skb->data;
+               skb_pull(skb, IPOIB_ENCAP_LEN);
 
-               repost:
-                       if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
-                               ipoib_warn(priv, "ipoib_ib_post_receive failed "
-                                          "for buf %d\n", wr_id);
-               } else
-                       ipoib_warn(priv, "completion event with wrid %d\n",
-                                  wr_id);
+               dev->last_rx = jiffies;
+               ++priv->stats.rx_packets;
+               priv->stats.rx_bytes += skb->len;
 
+               skb->dev = dev;
+               /* XXX get correct PACKET_ type here */
+               skb->pkt_type = PACKET_HOST;
+               netif_rx_ni(skb);
        } else {
-               struct ipoib_tx_buf *tx_req;
-               unsigned long flags;
+               ipoib_dbg_data(priv, "dropping loopback packet\n");
+               dev_kfree_skb_any(skb);
+       }
 
-               if (wr_id >= ipoib_sendq_size) {
-                       ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
-                                  wr_id, ipoib_sendq_size);
-                       return;
-               }
+repost:
+       if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
+               ipoib_warn(priv, "ipoib_ib_post_receive failed "
+                          "for buf %d\n", wr_id);
+}
 
-               ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
+static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       unsigned int wr_id = wc->wr_id;
+       struct ipoib_tx_buf *tx_req;
+       unsigned long flags;
 
-               tx_req = &priv->tx_ring[wr_id];
+       ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
+                      wr_id, wc->opcode, wc->status);
 
-               dma_unmap_single(priv->ca->dma_device,
-                                pci_unmap_addr(tx_req, mapping),
-                                tx_req->skb->len,
-                                DMA_TO_DEVICE);
+       if (unlikely(wr_id >= ipoib_sendq_size)) {
+               ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
+                          wr_id, ipoib_sendq_size);
+               return;
+       }
 
-               ++priv->stats.tx_packets;
-               priv->stats.tx_bytes += tx_req->skb->len;
+       tx_req = &priv->tx_ring[wr_id];
 
-               dev_kfree_skb_any(tx_req->skb);
+       dma_unmap_single(priv->ca->dma_device,
+                        pci_unmap_addr(tx_req, mapping),
+                        tx_req->skb->len,
+                        DMA_TO_DEVICE);
 
-               spin_lock_irqsave(&priv->tx_lock, flags);
-               ++priv->tx_tail;
-               if (netif_queue_stopped(dev) &&
-                   test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
-                   priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
-                       netif_wake_queue(dev);
-               spin_unlock_irqrestore(&priv->tx_lock, flags);
+       ++priv->stats.tx_packets;
+       priv->stats.tx_bytes += tx_req->skb->len;
 
-               if (wc->status != IB_WC_SUCCESS &&
-                   wc->status != IB_WC_WR_FLUSH_ERR)
-                       ipoib_warn(priv, "failed send event "
-                                  "(status=%d, wrid=%d vend_err %x)\n",
-                                  wc->status, wr_id, wc->vendor_err);
-       }
+       dev_kfree_skb_any(tx_req->skb);
+
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       ++priv->tx_tail;
+       if (netif_queue_stopped(dev) &&
+           test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
+           priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
+               netif_wake_queue(dev);
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+       if (wc->status != IB_WC_SUCCESS &&
+           wc->status != IB_WC_WR_FLUSH_ERR)
+               ipoib_warn(priv, "failed send event "
+                          "(status=%d, wrid=%d vend_err %x)\n",
+                          wc->status, wr_id, wc->vendor_err);
+}
+
+static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
+{
+       if (wc->wr_id & IPOIB_OP_RECV)
+               ipoib_ib_handle_rx_wc(dev, wc);
+       else
+               ipoib_ib_handle_tx_wc(dev, wc);
 }
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
@@ -320,7 +332,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
        struct ipoib_tx_buf *tx_req;
        dma_addr_t addr;
 
-       if (skb->len > dev->mtu + INFINIBAND_ALEN) {
+       if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) {
                ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
                           skb->len, dev->mtu + INFINIBAND_ALEN);
                ++priv->stats.tx_dropped;
@@ -619,8 +631,10 @@ void ipoib_ib_dev_flush(void *_dev)
         * The device could have been brought down between the start and when
         * we get here, don't bring it back up if it's not configured up
         */
-       if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+       if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
                ipoib_ib_dev_up(dev);
+               ipoib_mcast_restart_task(dev);
+       }
 
        mutex_lock(&priv->vlan_mutex);
 
index cf71d2a5515c1d734026827355680847a6a51893..1eaf00e9862c5a4a0381d0570b5ac7ee0723f93c 100644 (file)
@@ -40,7 +40,6 @@
 
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 
 #include <linux/if_arp.h>      /* For ARPHRD_xxx */
@@ -82,6 +81,8 @@ static const u8 ipv4_bcast_addr[] = {
 
 struct workqueue_struct *ipoib_workqueue;
 
+struct ib_sa_client ipoib_sa_client;
+
 static void ipoib_add_one(struct ib_device *device);
 static void ipoib_remove_one(struct ib_device *device);
 
@@ -336,7 +337,8 @@ void ipoib_flush_paths(struct net_device *dev)
        struct ipoib_path *path, *tp;
        LIST_HEAD(remove_list);
 
-       spin_lock_irq(&priv->lock);
+       spin_lock_irq(&priv->tx_lock);
+       spin_lock(&priv->lock);
 
        list_splice(&priv->path_list, &remove_list);
        INIT_LIST_HEAD(&priv->path_list);
@@ -347,12 +349,15 @@ void ipoib_flush_paths(struct net_device *dev)
        list_for_each_entry_safe(path, tp, &remove_list, list) {
                if (path->query)
                        ib_sa_cancel_query(path->query_id, path->query);
-               spin_unlock_irq(&priv->lock);
+               spin_unlock(&priv->lock);
+               spin_unlock_irq(&priv->tx_lock);
                wait_for_completion(&path->done);
                path_free(dev, path);
-               spin_lock_irq(&priv->lock);
+               spin_lock_irq(&priv->tx_lock);
+               spin_lock(&priv->lock);
        }
-       spin_unlock_irq(&priv->lock);
+       spin_unlock(&priv->lock);
+       spin_unlock_irq(&priv->tx_lock);
 }
 
 static void path_rec_completion(int status,
@@ -459,7 +464,7 @@ static int path_rec_start(struct net_device *dev,
        init_completion(&path->done);
 
        path->query_id =
-               ib_sa_path_rec_get(priv->ca, priv->port,
+               ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
                                   &path->pathrec,
                                   IB_SA_PATH_REC_DGID          |
                                   IB_SA_PATH_REC_SGID          |
@@ -615,7 +620,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
        struct ipoib_neigh *neigh;
        unsigned long flags;
 
-       if (!spin_trylock_irqsave(&priv->tx_lock, flags))
+       if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
                return NETDEV_TX_LOCKED;
 
        /*
@@ -628,7 +633,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
                return NETDEV_TX_BUSY;
        }
 
-       if (skb->dst && skb->dst->neighbour) {
+       if (likely(skb->dst && skb->dst->neighbour)) {
                if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
                        ipoib_path_lookup(skb, dev);
                        goto out;
@@ -1107,13 +1112,16 @@ static void ipoib_add_one(struct ib_device *device)
        struct ipoib_dev_priv *priv;
        int s, e, p;
 
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
        dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
        if (!dev_list)
                return;
 
        INIT_LIST_HEAD(dev_list);
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                s = 0;
                e = 0;
        } else {
@@ -1137,6 +1145,9 @@ static void ipoib_remove_one(struct ib_device *device)
        struct ipoib_dev_priv *priv, *tmp;
        struct list_head *dev_list;
 
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
        dev_list = ib_get_client_data(device, &ipoib_client);
 
        list_for_each_entry_safe(priv, tmp, dev_list, list) {
@@ -1181,13 +1192,16 @@ static int __init ipoib_init_module(void)
                goto err_fs;
        }
 
+       ib_sa_register_client(&ipoib_sa_client);
+
        ret = ib_register_client(&ipoib_client);
        if (ret)
-               goto err_wq;
+               goto err_sa;
 
        return 0;
 
-err_wq:
+err_sa:
+       ib_sa_unregister_client(&ipoib_sa_client);
        destroy_workqueue(ipoib_workqueue);
 
 err_fs:
@@ -1199,6 +1213,7 @@ err_fs:
 static void __exit ipoib_cleanup_module(void)
 {
        ib_unregister_client(&ipoib_client);
+       ib_sa_unregister_client(&ipoib_sa_client);
        ipoib_unregister_debugfs();
        destroy_workqueue(ipoib_workqueue);
 }
index ec356ce7cdcdefc3fbb91d6f7e3b9bf8ea071a82..3faa1820f0e9830855d434e2e35bb54586e9d5d3 100644 (file)
@@ -361,7 +361,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
 
        init_completion(&mcast->done);
 
-       ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
+       ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec,
                                     IB_SA_MCMEMBER_REC_MGID            |
                                     IB_SA_MCMEMBER_REC_PORT_GID        |
                                     IB_SA_MCMEMBER_REC_PKEY            |
@@ -472,22 +472,32 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 
        if (create) {
                comp_mask |=
-                       IB_SA_MCMEMBER_REC_QKEY         |
-                       IB_SA_MCMEMBER_REC_SL           |
-                       IB_SA_MCMEMBER_REC_FLOW_LABEL   |
-                       IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+                       IB_SA_MCMEMBER_REC_QKEY                 |
+                       IB_SA_MCMEMBER_REC_MTU_SELECTOR         |
+                       IB_SA_MCMEMBER_REC_MTU                  |
+                       IB_SA_MCMEMBER_REC_TRAFFIC_CLASS        |
+                       IB_SA_MCMEMBER_REC_RATE_SELECTOR        |
+                       IB_SA_MCMEMBER_REC_RATE                 |
+                       IB_SA_MCMEMBER_REC_SL                   |
+                       IB_SA_MCMEMBER_REC_FLOW_LABEL           |
+                       IB_SA_MCMEMBER_REC_HOP_LIMIT;
 
                rec.qkey          = priv->broadcast->mcmember.qkey;
+               rec.mtu_selector  = IB_SA_EQ;
+               rec.mtu           = priv->broadcast->mcmember.mtu;
+               rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+               rec.rate_selector = IB_SA_EQ;
+               rec.rate          = priv->broadcast->mcmember.rate;
                rec.sl            = priv->broadcast->mcmember.sl;
                rec.flow_label    = priv->broadcast->mcmember.flow_label;
-               rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+               rec.hop_limit     = priv->broadcast->mcmember.hop_limit;
        }
 
        init_completion(&mcast->done);
 
-       ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
-                                    mcast->backoff * 1000, GFP_ATOMIC,
-                                    ipoib_mcast_join_complete,
+       ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port,
+                                    &rec, comp_mask, mcast->backoff * 1000,
+                                    GFP_ATOMIC, ipoib_mcast_join_complete,
                                     mcast, &mcast->query);
 
        if (ret < 0) {
@@ -528,7 +538,7 @@ void ipoib_mcast_join_task(void *dev_ptr)
                        priv->local_rate = attr.active_speed *
                                ib_width_enum_to_int(attr.active_width);
                } else
-                       ipoib_warn(priv, "ib_query_port failed\n");
+               ipoib_warn(priv, "ib_query_port failed\n");
        }
 
        if (!priv->broadcast) {
@@ -681,7 +691,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
         * Just make one shot at leaving and don't wait for a reply;
         * if we fail, too bad.
         */
-       ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec,
+       ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
                                        IB_SA_MCMEMBER_REC_MGID         |
                                        IB_SA_MCMEMBER_REC_PORT_GID     |
                                        IB_SA_MCMEMBER_REC_PKEY         |
@@ -795,7 +805,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
        }
 
        if (priv->broadcast) {
-               rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
+               rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
                list_add_tail(&priv->broadcast->list, &remove_list);
                priv->broadcast = NULL;
        }
index fead87d1eff95202d09b2d37f403e0f2d4ad347e..365a1b5f19e044f8c427b8f34440b3e93c3a6171 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_ISER
        tristate "ISCSI RDMA Protocol"
-       depends on INFINIBAND && SCSI
+       depends on INFINIBAND && SCSI && INET
        select SCSI_ISCSI_ATTRS
        ---help---
          Support for the ISCSI RDMA Protocol over InfiniBand.  This
index 1437d7ee3b1901727fcb1e7ba7ffd0408949704e..e9cf1a9f1e1cda80338acfefb242f1003a581e48 100644 (file)
@@ -555,6 +555,7 @@ static struct scsi_host_template iscsi_iser_sht = {
        .queuecommand           = iscsi_queuecommand,
        .can_queue              = ISCSI_XMIT_CMDS_MAX - 1,
        .sg_tablesize           = ISCSI_ISER_SG_TABLESIZE,
+       .max_sectors            = 1024,
        .cmd_per_lun            = ISCSI_MAX_CMD_PER_LUN,
        .eh_abort_handler       = iscsi_eh_abort,
        .eh_host_reset_handler  = iscsi_eh_host_reset,
index 3350ba690cfe1ef770e8df27456b724ad00008f9..7e1a411db2a30d215369d7e005e5c6a458ada015 100644 (file)
                       __func__ , ## arg);              \
        } while (0)
 
+#define SHIFT_4K       12
+#define SIZE_4K        (1UL << SHIFT_4K)
+#define MASK_4K        (~(SIZE_4K-1))
+
                                        /* support upto 512KB in one RDMA */
-#define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> PAGE_SHIFT)
+#define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> SHIFT_4K)
 #define ISCSI_ISER_MAX_LUN             256
 #define ISCSI_ISER_MAX_CMD_LEN         16
 
@@ -171,6 +175,7 @@ struct iser_mem_reg {
        u64  va;
        u64  len;
        void *mem_h;
+       int  is_fmr;
 };
 
 struct iser_regd_buf {
index 31950a522a1c8cb5f2115a68a39aacec6c2e65ec..d0b03f4265811c04f70d5d2a33acdaa9dacf8a56 100644 (file)
@@ -42,6 +42,7 @@
 #include "iscsi_iser.h"
 
 #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
+
 /**
  * Decrements the reference count for the
  * registered buffer & releases it
@@ -55,7 +56,7 @@ int iser_regd_buff_release(struct iser_regd_buf *regd_buf)
        if ((atomic_read(&regd_buf->ref_count) == 0) ||
            atomic_dec_and_test(&regd_buf->ref_count)) {
                /* if we used the dma mr, unreg is just NOP */
-               if (regd_buf->reg.rkey != 0)
+               if (regd_buf->reg.is_fmr)
                        iser_unreg_mem(&regd_buf->reg);
 
                if (regd_buf->dma_addr) {
@@ -90,9 +91,9 @@ void iser_reg_single(struct iser_device *device,
        BUG_ON(dma_mapping_error(dma_addr));
 
        regd_buf->reg.lkey = device->mr->lkey;
-       regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */
        regd_buf->reg.len  = regd_buf->data_size;
        regd_buf->reg.va   = dma_addr;
+       regd_buf->reg.is_fmr = 0;
 
        regd_buf->dma_addr  = dma_addr;
        regd_buf->direction = direction;
@@ -239,7 +240,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
        int i;
 
        /* compute the offset of first element */
-       page_vec->offset = (u64) sg[0].offset;
+       page_vec->offset = (u64) sg[0].offset & ~MASK_4K;
 
        for (i = 0; i < data->dma_nents; i++) {
                total_sz += sg_dma_len(&sg[i]);
@@ -247,21 +248,30 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
                first_addr = sg_dma_address(&sg[i]);
                last_addr  = first_addr + sg_dma_len(&sg[i]);
 
-               start_aligned = !(first_addr & ~PAGE_MASK);
-               end_aligned   = !(last_addr  & ~PAGE_MASK);
+               start_aligned = !(first_addr & ~MASK_4K);
+               end_aligned   = !(last_addr  & ~MASK_4K);
 
                /* continue to collect page fragments till aligned or SG ends */
                while (!end_aligned && (i + 1 < data->dma_nents)) {
                        i++;
                        total_sz += sg_dma_len(&sg[i]);
                        last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]);
-                       end_aligned = !(last_addr  & ~PAGE_MASK);
+                       end_aligned = !(last_addr  & ~MASK_4K);
                }
 
-               first_addr = first_addr & PAGE_MASK;
-
-               for (page = first_addr; page < last_addr; page += PAGE_SIZE)
-                       page_vec->pages[cur_page++] = page;
+               /* handle the 1st page in the 1st DMA element */
+               if (cur_page == 0) {
+                       page = first_addr & MASK_4K;
+                       page_vec->pages[cur_page] = page;
+                       cur_page++;
+                       page += SIZE_4K;
+               } else
+                       page = first_addr;
+
+               for (; page < last_addr; page += SIZE_4K) {
+                       page_vec->pages[cur_page] = page;
+                       cur_page++;
+               }
 
        }
        page_vec->data_size = total_sz;
@@ -269,8 +279,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
        return cur_page;
 }
 
-#define MASK_4K                        ((1UL << 12) - 1) /* 0xFFF */
-#define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & MASK_4K) == 0)
+#define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
 
 /**
  * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -320,9 +329,9 @@ static void iser_data_buf_dump(struct iser_data_buf *data)
        struct scatterlist *sg = (struct scatterlist *)data->buf;
        int i;
 
-       for (i = 0; i < data->size; i++)
+       for (i = 0; i < data->dma_nents; i++)
                iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
-                        "off:%d sz:%d dma_len:%d\n",
+                        "off:0x%x sz:0x%x dma_len:0x%x\n",
                         i, (unsigned long)sg_dma_address(&sg[i]),
                         sg[i].page, sg[i].offset,
                         sg[i].length,sg_dma_len(&sg[i]));
@@ -352,7 +361,7 @@ static void iser_page_vec_build(struct iser_data_buf *data,
 
        page_vec->length = page_vec_len;
 
-       if (page_vec_len * PAGE_SIZE < page_vec->data_size) {
+       if (page_vec_len * SIZE_4K < page_vec->data_size) {
                iser_err("page_vec too short to hold this SG\n");
                iser_data_buf_dump(data);
                iser_dump_page_vec(page_vec);
@@ -370,15 +379,18 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
                      enum   iser_data_dir        cmd_dir)
 {
        struct iser_conn     *ib_conn = iser_ctask->iser_conn->ib_conn;
+       struct iser_device   *device = ib_conn->device;
        struct iser_data_buf *mem = &iser_ctask->data[cmd_dir];
        struct iser_regd_buf *regd_buf;
        int aligned_len;
        int err;
+       int i;
+       struct scatterlist *sg;
 
        regd_buf = &iser_ctask->rdma_regd[cmd_dir];
 
        aligned_len = iser_data_buf_aligned_len(mem);
-       if (aligned_len != mem->size) {
+       if (aligned_len != mem->dma_nents) {
                iser_err("rdma alignment violation %d/%d aligned\n",
                         aligned_len, mem->size);
                iser_data_buf_dump(mem);
@@ -389,10 +401,38 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
                mem = &iser_ctask->data_copy[cmd_dir];
        }
 
-       iser_page_vec_build(mem, ib_conn->page_vec);
-       err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
-       if (err)
-               return err;
+       /* if there a single dma entry, FMR is not needed */
+       if (mem->dma_nents == 1) {
+               sg = (struct scatterlist *)mem->buf;
+
+               regd_buf->reg.lkey = device->mr->lkey;
+               regd_buf->reg.rkey = device->mr->rkey;
+               regd_buf->reg.len  = sg_dma_len(&sg[0]);
+               regd_buf->reg.va   = sg_dma_address(&sg[0]);
+               regd_buf->reg.is_fmr = 0;
+
+               iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
+                        "va: 0x%08lX sz: %ld]\n",
+                        (unsigned int)regd_buf->reg.lkey,
+                        (unsigned int)regd_buf->reg.rkey,
+                        (unsigned long)regd_buf->reg.va,
+                        (unsigned long)regd_buf->reg.len);
+       } else { /* use FMR for multiple dma entries */
+               iser_page_vec_build(mem, ib_conn->page_vec);
+               err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
+               if (err) {
+                       iser_data_buf_dump(mem);
+                       iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents,
+                                ntoh24(iser_ctask->desc.iscsi_header.dlength));
+                       iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
+                                ib_conn->page_vec->data_size, ib_conn->page_vec->length,
+                                ib_conn->page_vec->offset);
+                       for (i=0 ; i<ib_conn->page_vec->length ; i++)
+                               iser_err("page_vec[%d] = 0x%llx\n", i,
+                                        (unsigned long long) ib_conn->page_vec->pages[i]);
+                       return err;
+               }
+       }
 
        /* take a reference on this regd buf such that it will not be released *
         * (eg in send dto completion) before we get the scsi response         */
index 72febf1f8ff8d74b974d12333fe4e7e4a81ef50e..ecdca7fc1e4cf0466f58bd6d89b4153891b71e0b 100644 (file)
@@ -88,8 +88,9 @@ static int iser_create_device_ib_res(struct iser_device *device)
                     iser_cq_tasklet_fn,
                     (unsigned long)device);
 
-       device->mr = ib_get_dma_mr(device->pd,
-                                  IB_ACCESS_LOCAL_WRITE);
+       device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
+                                  IB_ACCESS_REMOTE_WRITE |
+                                  IB_ACCESS_REMOTE_READ);
        if (IS_ERR(device->mr))
                goto dma_mr_err;
 
@@ -150,7 +151,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
        }
        ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
 
-       params.page_shift        = PAGE_SHIFT;
+       params.page_shift        = SHIFT_4K;
        /* when the first/last SG element are not start/end *
         * page aligned, the map whould be of N+1 pages     */
        params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
@@ -604,8 +605,9 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,
 
        mem_reg->lkey  = mem->fmr->lkey;
        mem_reg->rkey  = mem->fmr->rkey;
-       mem_reg->len   = page_vec->length * PAGE_SIZE;
+       mem_reg->len   = page_vec->length * SIZE_4K;
        mem_reg->va    = io_addr;
+       mem_reg->is_fmr = 1;
        mem_reg->mem_h = (void *)mem;
 
        mem_reg->va   += page_vec->offset;
index fd8344cdc0dbcab633326255de9ae1ecc6125bd0..44b9e5be6687941757e62ac0a7fd9be231dcda68 100644 (file)
@@ -96,6 +96,8 @@ static struct ib_client srp_client = {
        .remove = srp_remove_one
 };
 
+static struct ib_sa_client srp_sa_client;
+
 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
 {
        return (struct srp_target_port *) host->hostdata;
@@ -267,7 +269,8 @@ static int srp_lookup_path(struct srp_target_port *target)
 
        init_completion(&target->done);
 
-       target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev,
+       target->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
+                                                  target->srp_host->dev->dev,
                                                   target->srp_host->port,
                                                   &target->path,
                                                   IB_SA_PATH_REC_DGID          |
@@ -330,7 +333,7 @@ static int srp_send_req(struct srp_target_port *target)
        req->priv.req_buf_fmt   = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
                                              SRP_BUF_FORMAT_INDIRECT);
        /*
-        * In the published SRP specification (draft rev. 16a), the 
+        * In the published SRP specification (draft rev. 16a), the
         * port identifier format is 8 bytes of ID extension followed
         * by 8 bytes of GUID.  Older drafts put the two halves in the
         * opposite order, so that the GUID comes first.
@@ -1449,12 +1452,28 @@ static ssize_t show_zero_req_lim(struct class_device *cdev, char *buf)
        return sprintf(buf, "%d\n", target->zero_req_lim);
 }
 
-static CLASS_DEVICE_ATTR(id_ext,       S_IRUGO, show_id_ext,           NULL);
-static CLASS_DEVICE_ATTR(ioc_guid,     S_IRUGO, show_ioc_guid,         NULL);
-static CLASS_DEVICE_ATTR(service_id,   S_IRUGO, show_service_id,       NULL);
-static CLASS_DEVICE_ATTR(pkey,         S_IRUGO, show_pkey,             NULL);
-static CLASS_DEVICE_ATTR(dgid,         S_IRUGO, show_dgid,             NULL);
-static CLASS_DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim,     NULL);
+static ssize_t show_local_ib_port(struct class_device *cdev, char *buf)
+{
+       struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+       return sprintf(buf, "%d\n", target->srp_host->port);
+}
+
+static ssize_t show_local_ib_device(struct class_device *cdev, char *buf)
+{
+       struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+       return sprintf(buf, "%s\n", target->srp_host->dev->dev->name);
+}
+
+static CLASS_DEVICE_ATTR(id_ext,         S_IRUGO, show_id_ext,          NULL);
+static CLASS_DEVICE_ATTR(ioc_guid,       S_IRUGO, show_ioc_guid,        NULL);
+static CLASS_DEVICE_ATTR(service_id,     S_IRUGO, show_service_id,      NULL);
+static CLASS_DEVICE_ATTR(pkey,           S_IRUGO, show_pkey,            NULL);
+static CLASS_DEVICE_ATTR(dgid,           S_IRUGO, show_dgid,            NULL);
+static CLASS_DEVICE_ATTR(zero_req_lim,   S_IRUGO, show_zero_req_lim,    NULL);
+static CLASS_DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,  NULL);
+static CLASS_DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
 
 static struct class_device_attribute *srp_host_attrs[] = {
        &class_device_attr_id_ext,
@@ -1463,6 +1482,8 @@ static struct class_device_attribute *srp_host_attrs[] = {
        &class_device_attr_pkey,
        &class_device_attr_dgid,
        &class_device_attr_zero_req_lim,
+       &class_device_attr_local_ib_port,
+       &class_device_attr_local_ib_device,
        NULL
 };
 
@@ -1881,7 +1902,7 @@ static void srp_add_one(struct ib_device *device)
        if (IS_ERR(srp_dev->fmr_pool))
                srp_dev->fmr_pool = NULL;
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                s = 0;
                e = 0;
        } else {
@@ -1980,9 +2001,12 @@ static int __init srp_init_module(void)
                return ret;
        }
 
+       ib_sa_register_client(&srp_sa_client);
+
        ret = ib_register_client(&srp_client);
        if (ret) {
                printk(KERN_ERR PFX "couldn't register IB client\n");
+               ib_sa_unregister_client(&srp_sa_client);
                class_unregister(&srp_class);
                return ret;
        }
@@ -1993,6 +2017,7 @@ static int __init srp_init_module(void)
 static void __exit srp_cleanup_module(void)
 {
        ib_unregister_client(&srp_client);
+       ib_sa_unregister_client(&srp_sa_client);
        class_unregister(&srp_class);
 }
 
index c69d23bb255e54683357eed0026aaa0040e999d7..efd51e01c06eb63a731b6ad508c4f94cf49d3ce3 100644 (file)
@@ -45,8 +45,8 @@
 #include <linux/pmu.h>
 
 #include <asm/machdep.h>
-#include <asm/backlight.h>
 #ifdef CONFIG_PPC_PMAC
+#include <asm/backlight.h>
 #include <asm/pmac_feature.h>
 #endif
 
index b783a6984abc0efd6f88ab8e814a70adc3656fc9..393aba95cf12709e2621f0e070b11612fcf74c36 100644 (file)
@@ -442,16 +442,16 @@ init_rx_bufs(struct net_device *dev, int num) {
                if (rbd) {
                        rbd->pad = 0;
                        rbd->count = 0;
-                       rbd->skb = dev_alloc_skb(RX_SKB_SIZE);
+                       rbd->skb = dev_alloc_skb(RX_SKBSIZE);
                        if (!rbd->skb) {
                                printk("dev_alloc_skb failed");
                        }
                        rbd->next = rfd->rbd;
                        if (i) {
                                rfd->rbd->prev = rbd;
-                               rbd->size = RX_SKB_SIZE;
+                               rbd->size = RX_SKBSIZE;
                        } else {
-                               rbd->size = (RX_SKB_SIZE | RBD_EL);
+                               rbd->size = (RX_SKBSIZE | RBD_EL);
                                lp->rbd_tail = rbd;
                        }
 
index 38df58fdb358e8d7719fb2b577c6ccb91cf1a7c3..d4dcc856b3cd2322c30d8a4881c8b51ec491d26c 100644 (file)
@@ -385,7 +385,7 @@ static int mv643xx_eth_receive_queue(struct net_device *dev, int budget)
        struct pkt_info pkt_info;
 
        while (budget-- > 0 && eth_port_receive(mp, &pkt_info) == ETH_OK) {
-               dma_unmap_single(NULL, pkt_info.buf_ptr, RX_SKB_SIZE,
+               dma_unmap_single(NULL, pkt_info.buf_ptr, ETH_RX_SKB_SIZE,
                                                        DMA_FROM_DEVICE);
                mp->rx_desc_count--;
                received_packets++;
index a2c56b2de58984d901340ebc01bb639f25c9fc01..3305fb6079eb1d968bd8a3ee4357b6d3be2c7f74 100644 (file)
@@ -1818,7 +1818,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
        int n, len, insize = 0;
 
         /* Ignore all Wacom devices */
-        if (dev->descriptor.idVendor == USB_VENDOR_ID_WACOM)
+        if (le16_to_cpu(dev->descriptor.idVendor) == USB_VENDOR_ID_WACOM)
                 return NULL;
 
        for (n = 0; hid_blacklist[n].idVendor; n++)
index 390439b3d899e54ea31fb96bd21cd3128ac85bff..1b4f75d1f8a949bb3321733d0aa5e61fe7bdf04f 100644 (file)
@@ -3197,11 +3197,11 @@ static void fbcon_exit(void)
                return;
 
 #ifdef CONFIG_ATARI
-       free_irq(IRQ_AUTO_4, fbcon_vbl_handler);
+       free_irq(IRQ_AUTO_4, fb_vbl_handler);
 #endif
 #ifdef CONFIG_MAC
        if (MACH_IS_MAC && vbl_detected)
-               free_irq(IRQ_MAC_VBL, fbcon_vbl_handler);
+               free_irq(IRQ_MAC_VBL, fb_vbl_handler);
 #endif
 
        kfree((void *)softback_buf);
index 67d1e1c8813d3ac55f4504400c1e2a99be403218..4acde4f7dbf8925a159fce3a9594be4bd4727e4a 100644 (file)
@@ -1826,8 +1826,8 @@ static int __devinit riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd)
 {
        struct riva_par *par = info->par;
        struct device_node *dp;
-       unsigned char *pedid = NULL;
-       unsigned char *disptype = NULL;
+       const unsigned char *pedid = NULL;
+       const unsigned char *disptype = NULL;
        static char *propnames[] = {
                "DFP,EDID", "LCD,EDID", "EDID", "EDID1", "EDID,B", "EDID,A", NULL };
        int i;
index 0feb3bd49cb85160efb5275ef9c8c2590683408f..1eb9a2ec0a3b279a80d01baba7b87ca6d8d33705 100644 (file)
@@ -1,3 +1,7 @@
+Version 1.46
+------------
+Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
+
 Version 1.45
 ------------
 Do not time out lockw calls when using posix extensions. Do not
@@ -6,7 +10,8 @@ on requests on other threads.  Improve POSIX locking emulation,
 (lock cancel now works, and unlock of merged range works even
 to Windows servers now).  Fix oops on mount to lanman servers
 (win9x, os/2 etc.) when null password.  Do not send listxattr
-(SMB to query all EAs) if nouser_xattr specified.
+(SMB to query all EAs) if nouser_xattr specified.  Fix SE Linux
+problem (instantiate inodes/dentries in right order for readdir).
 
 Version 1.44
 ------------
index ad58eb0c4d6d8d5600165490a8e0f7e7ab7f843c..fd1e52ebcee6c8fd1bc1a365ebc33353b0f7c4e4 100644 (file)
@@ -40,5 +40,7 @@ struct cifs_sb_info {
        mode_t  mnt_file_mode;
        mode_t  mnt_dir_mode;
        int     mnt_cifs_flags;
+       int     prepathlen;
+       char *  prepath;
 };
 #endif                         /* _CIFS_FS_SB_H */
index 3cd750029be29e3dadf30f41514f4698039a647d..c3ef1c0d0e684786969bdb1c1caf7e860e2afd9e 100644 (file)
@@ -189,7 +189,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_files = 0;       /* undefined */
        buf->f_ffree = 0;       /* unlimited */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 /* BB we could add a second check for a QFS Unix capability bit */
 /* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */
     if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS &
@@ -199,7 +198,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
     /* Only need to call the old QFSInfo if failed
     on newer one */
     if(rc)
-#endif /* CIFS_EXPERIMENTAL */
        rc = CIFSSMBQFSInfo(xid, pTcon, buf);
 
        /* Old Windows servers do not support level 103, retry with level 
index 39ee8ef3bdeb01a63eba58d56611cee6211c7c40..bea875d9a46acda0578f7c92f7c84232824a0ff0 100644 (file)
@@ -100,5 +100,5 @@ extern ssize_t      cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
 extern int cifs_ioctl (struct inode * inode, struct file * filep,
                       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.45"
+#define CIFS_VERSION   "1.46"
 #endif                         /* _CIFSFS_H */
index 86239023545b2d65dd6bc7240df7c5d2b88f9614..81df2bf8e75a70d222f85757c5afdd1df9f20246 100644 (file)
@@ -1344,6 +1344,7 @@ struct smb_t2_rsp {
 #define SMB_QUERY_ATTR_FLAGS            0x206  /* append,immutable etc. */
 #define SMB_QUERY_POSIX_PERMISSION      0x207
 #define SMB_QUERY_POSIX_LOCK            0x208
+/* #define SMB_POSIX_OPEN              0x209 */
 #define SMB_QUERY_FILE_INTERNAL_INFO    0x3ee
 #define SMB_QUERY_FILE_ACCESS_INFO      0x3f0
 #define SMB_QUERY_FILE_NAME_INFO2       0x3f1 /* 0x30 bytes */
@@ -1363,6 +1364,7 @@ struct smb_t2_rsp {
 #define SMB_SET_XATTR                   0x205
 #define SMB_SET_ATTR_FLAGS              0x206  /* append, immutable etc. */
 #define SMB_SET_POSIX_LOCK              0x208
+#define SMB_POSIX_OPEN                  0x209
 #define SMB_SET_FILE_BASIC_INFO2        0x3ec
 #define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */
 #define SMB_FILE_ALL_INFO2              0x3fa
index 5d394c726860740316e6fb96172aeccc888d6214..0e9ba0b9d71eb008138eab55841bffe5d73be196 100644 (file)
@@ -89,6 +89,7 @@ struct smb_vol {
        unsigned int wsize;
        unsigned int sockopt;
        unsigned short int port;
+       char * prepath;
 };
 
 static int ipv4_connect(struct sockaddr_in *psin_server, 
@@ -993,6 +994,28 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
                                printk(KERN_WARNING "CIFS: domain name too long\n");
                                return 1;
                        }
+                } else if (strnicmp(data, "prefixpath", 10) == 0) {
+                        if (!value || !*value) {
+                                printk(KERN_WARNING
+                                       "CIFS: invalid path prefix\n");
+                                return 1;       /* needs_arg; */
+                        }
+                        if ((temp_len = strnlen(value, 1024)) < 1024) {
+                               if(value[0] != '/')
+                                       temp_len++;  /* missing leading slash */
+                                vol->prepath = kmalloc(temp_len+1,GFP_KERNEL);
+                                if(vol->prepath == NULL)
+                                        return 1;
+                               if(value[0] != '/') {
+                                       vol->prepath[0] = '/';
+                                       strcpy(vol->prepath+1,value);
+                               } else
+                                       strcpy(vol->prepath,value);
+                               cFYI(1,("prefix path %s",vol->prepath));
+                        } else {
+                                printk(KERN_WARNING "CIFS: prefix too long\n");
+                                return 1;
+                        }
                } else if (strnicmp(data, "iocharset", 9) == 0) {
                        if (!value || !*value) {
                                printk(KERN_WARNING "CIFS: invalid iocharset specified\n");
@@ -1605,6 +1628,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
        if (cifs_parse_mount_options(mount_data, devname, &volume_info)) {
                kfree(volume_info.UNC);
                kfree(volume_info.password);
+               kfree(volume_info.prepath);
                FreeXid(xid);
                return -EINVAL;
        }
@@ -1619,6 +1643,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
            locations such as env variables and files on disk */
                kfree(volume_info.UNC);
                kfree(volume_info.password);
+               kfree(volume_info.prepath);
                FreeXid(xid);
                return -EINVAL;
        }
@@ -1639,6 +1664,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        /* we failed translating address */
                        kfree(volume_info.UNC);
                        kfree(volume_info.password);
+                       kfree(volume_info.prepath);
                        FreeXid(xid);
                        return -EINVAL;
                }
@@ -1651,6 +1677,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                cERROR(1,("Connecting to DFS root not implemented yet"));
                kfree(volume_info.UNC);
                kfree(volume_info.password);
+               kfree(volume_info.prepath);
                FreeXid(xid);
                return -EINVAL;
        } else /* which servers DFS root would we conect to */ {
@@ -1658,6 +1685,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                       ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified"));
                kfree(volume_info.UNC);
                kfree(volume_info.password);
+               kfree(volume_info.prepath);
                FreeXid(xid);
                return -EINVAL;
        }
@@ -1672,6 +1700,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset));
                        kfree(volume_info.UNC);
                        kfree(volume_info.password);
+                       kfree(volume_info.prepath);
                        FreeXid(xid);
                        return -ELIBACC;
                }
@@ -1688,6 +1717,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
        else {
                kfree(volume_info.UNC);
                kfree(volume_info.password);
+               kfree(volume_info.prepath);
                FreeXid(xid);
                return -EINVAL;
        }
@@ -1710,6 +1740,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                                sock_release(csocket);
                        kfree(volume_info.UNC);
                        kfree(volume_info.password);
+                       kfree(volume_info.prepath);
                        FreeXid(xid);
                        return rc;
                }
@@ -1720,6 +1751,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        sock_release(csocket);
                        kfree(volume_info.UNC);
                        kfree(volume_info.password);
+                       kfree(volume_info.prepath);
                        FreeXid(xid);
                        return rc;
                } else {
@@ -1744,6 +1776,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                                sock_release(csocket);
                                kfree(volume_info.UNC);
                                kfree(volume_info.password);
+                               kfree(volume_info.prepath);
                                FreeXid(xid);
                                return rc;
                        }
@@ -1831,6 +1864,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        /* Windows ME may prefer this */
                        cFYI(1,("readsize set to minimum 2048"));
                }
+               /* calculate prepath */
+               cifs_sb->prepath = volume_info.prepath;
+               if(cifs_sb->prepath) {
+                       cifs_sb->prepathlen = strlen(cifs_sb->prepath);
+                       cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb);
+                       volume_info.prepath = NULL;
+               } else 
+                       cifs_sb->prepathlen = 0;
                cifs_sb->mnt_uid = volume_info.linux_uid;
                cifs_sb->mnt_gid = volume_info.linux_gid;
                cifs_sb->mnt_file_mode = volume_info.file_mode;
@@ -2008,6 +2049,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
        the password ptr is put in the new session structure (in which case the
        password will be freed at unmount time) */
        kfree(volume_info.UNC);
+       kfree(volume_info.prepath);
        FreeXid(xid);
        return rc;
 }
@@ -3195,6 +3237,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
        int xid;
        struct cifsSesInfo *ses = NULL;
        struct task_struct *cifsd_task;
+       char * tmp;
 
        xid = GetXid();
 
@@ -3228,6 +3271,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
        }
        
        cifs_sb->tcon = NULL;
+       tmp = cifs_sb->prepath;
+       cifs_sb->prepathlen = 0;
+       cifs_sb->prepath = NULL;
+       kfree(tmp);
        if (ses)
                schedule_timeout_interruptible(msecs_to_jiffies(500));
        if (ses)
index 914239d53634dce734a66a1a2ceee6aa8a00f854..66b825ade3e194b9176717275d29ce2731730c4f 100644 (file)
@@ -46,7 +46,8 @@ char *
 build_path_from_dentry(struct dentry *direntry)
 {
        struct dentry *temp;
-       int namelen = 0;
+       int namelen;
+       int pplen;
        char *full_path;
        char dirsep;
 
@@ -56,7 +57,9 @@ build_path_from_dentry(struct dentry *direntry)
                when the server crashed */
 
        dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
+       pplen = CIFS_SB(direntry->d_sb)->prepathlen;
 cifs_bp_rename_retry:
+       namelen = pplen; 
        for (temp = direntry; !IS_ROOT(temp);) {
                namelen += (1 + temp->d_name.len);
                temp = temp->d_parent;
@@ -70,7 +73,6 @@ cifs_bp_rename_retry:
        if(full_path == NULL)
                return full_path;
        full_path[namelen] = 0; /* trailing null */
-
        for (temp = direntry; !IS_ROOT(temp);) {
                namelen -= 1 + temp->d_name.len;
                if (namelen < 0) {
@@ -79,7 +81,7 @@ cifs_bp_rename_retry:
                        full_path[namelen] = dirsep;
                        strncpy(full_path + namelen + 1, temp->d_name.name,
                                temp->d_name.len);
-                       cFYI(0, (" name: %s ", full_path + namelen));
+                       cFYI(0, ("name: %s", full_path + namelen));
                }
                temp = temp->d_parent;
                if(temp == NULL) {
@@ -88,18 +90,23 @@ cifs_bp_rename_retry:
                        return NULL;
                }
        }
-       if (namelen != 0) {
+       if (namelen != pplen) {
                cERROR(1,
-                      ("We did not end path lookup where we expected namelen is %d",
+                      ("did not end path lookup where expected namelen is %d",
                        namelen));
-               /* presumably this is only possible if we were racing with a rename 
+               /* presumably this is only possible if racing with a rename 
                of one of the parent directories  (we can not lock the dentries
                above us to prevent this, but retrying should be harmless) */
                kfree(full_path);
-               namelen = 0;
                goto cifs_bp_rename_retry;
        }
-
+       /* DIR_SEP already set for byte  0 / vs \ but not for
+          subsequent slashes in prepath which currently must
+          be entered the right way - not sure if there is an alternative
+          since the '\' is a valid posix character so we can not switch
+          those safely to '/' if any are found in the middle of the prepath */
+       /* BB test paths to Windows with '/' in the midst of prepath */
+       strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen);
        return full_path;
 }
 
index e9c5ba9084fc6a477ea13fdd26c0a666ddebc2f3..ddb012a68023fe0818d50670f2fb81a990dbab56 100644 (file)
@@ -752,6 +752,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                        int stored_rc = 0;
                        struct cifsLockInfo *li, *tmp;
 
+                       rc = 0;
                        down(&fid->lock_sem);
                        list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
                                if (pfLock->fl_start <= li->offset &&
@@ -766,7 +767,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                                        kfree(li);
                                }
                        }
-               up(&fid->lock_sem);
+                       up(&fid->lock_sem);
                }
        }
 
index 067648b7179b5b6b7ad390c406ff8233b9bdea78..18fcec190f8b5c6e4fdb6f5856ed114a473a411e 100644 (file)
@@ -269,7 +269,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
                                rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
                                        ea_value, buf_size,
                                        ACL_TYPE_ACCESS);
-                               CIFSSMBClose(xid, pTcon, fid)
+                               CIFSSMBClose(xid, pTcon, fid);
                        }
                } */  /* BB enable after fixing up return data */
                                
index cb05bf69745a5fbf12491b71aaa3e8b7cb56131c..50764550a60c388d858293e3395a68f1d87e4ffb 100644 (file)
@@ -1,16 +1,20 @@
 __NR_chmod,
 __NR_fchmod,
+#ifdef __NR_chown
 __NR_chown,
 __NR_fchown,
 __NR_lchown,
+#endif
 __NR_setxattr,
 __NR_lsetxattr,
 __NR_fsetxattr,
 __NR_removexattr,
 __NR_lremovexattr,
 __NR_fremovexattr,
+#ifdef __NR_fchownat
 __NR_fchownat,
 __NR_fchmodat,
+#endif
 #ifdef __NR_chown32
 __NR_chown32,
 __NR_fchown32,
index 161a7a58fbab51b2261ffcaa08c0986466d8846a..6621bd82cbe82319dc075c42f557bfb3c663555a 100644 (file)
@@ -1,14 +1,18 @@
 __NR_rename,
 __NR_mkdir,
 __NR_rmdir,
+#ifdef __NR_creat
 __NR_creat,
+#endif
 __NR_link,
 __NR_unlink,
 __NR_symlink,
 __NR_mknod,
+#ifdef __NR_mkdirat
 __NR_mkdirat,
 __NR_mknodat,
 __NR_unlinkat,
 __NR_renameat,
 __NR_linkat,
 __NR_symlinkat,
+#endif
index cf62b69cb69a12b42e18dcaace44cb82287f28d3..499c14691c71dfe7ca8a9c043def011438d099af 100644 (file)
@@ -86,7 +86,7 @@ void ppc4xx_init(unsigned long r3, unsigned long r4, unsigned long r5,
 #define PCI_DRAM_OFFSET        0
 #endif
 
-#elif CONFIG_44x
+#elif defined(CONFIG_44x)
 
 #if defined(CONFIG_BAMBOO)
 #include <platforms/4xx/bamboo.h>
index 851aa1bcfc1a25c65db7fd2829e1844fb6086e33..2b2ae4fdce8bbd689e5be2fe52be1ccc3add879b 100644 (file)
@@ -31,7 +31,7 @@ extern const char linux_banner[];
 #define STACK_MAGIC    0xdeadbeef
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
+#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
 
index f0b135cd86da1e54789bce46eb7bb8f6487edcaa..224178a000d2e1930c13b5db60642de3d0bd4ff6 100644 (file)
@@ -1013,6 +1013,7 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
        return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
+pgprot_t vm_get_page_prot(unsigned long vm_flags);
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 struct page *vmalloc_to_page(void *addr);
 unsigned long vmalloc_to_pfn(void *addr);
index 0ff67398928d3f407546468f552e59bb518c271c..81b62307621d26209236ffaf6ddaa751597face5 100644 (file)
@@ -40,7 +40,7 @@ struct rdma_dev_addr {
        unsigned char src_dev_addr[MAX_ADDR_LEN];
        unsigned char dst_dev_addr[MAX_ADDR_LEN];
        unsigned char broadcast[MAX_ADDR_LEN];
-       enum ib_node_type dev_type;
+       enum rdma_node_type dev_type;
 };
 
 /**
@@ -72,6 +72,9 @@ int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
 
 void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+             const unsigned char *dst_dev_addr);
+
 static inline int ip_addr_size(struct sockaddr *addr)
 {
        return addr->sa_family == AF_INET6 ?
@@ -113,4 +116,16 @@ static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
        memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
 }
 
+static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
+                                   union ib_gid *gid)
+{
+       memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
+}
+
+static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
+                                   union ib_gid *gid)
+{
+       memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
+}
+
 #endif /* IB_ADDR_H */
index c99e4420fd7ec308d9d2826c0bdd0493312bc5dd..97715b0c20b69f6e50b75b2c03e24fa50758fdb6 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
 #ifndef IB_SA_H
 #define IB_SA_H
 
+#include <linux/completion.h>
 #include <linux/compiler.h>
 
+#include <asm/atomic.h>
+
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_mad.h>
 
@@ -79,8 +83,8 @@ enum {
 };
 
 enum ib_sa_selector {
-       IB_SA_GTE  = 0,
-       IB_SA_LTE  = 1,
+       IB_SA_GT   = 0,
+       IB_SA_LT   = 1,
        IB_SA_EQ   = 2,
        /*
         * The meaning of "best" depends on the attribute: for
@@ -250,11 +254,28 @@ struct ib_sa_service_rec {
        u64             data64[2];
 };
 
+struct ib_sa_client {
+       atomic_t users;
+       struct completion comp;
+};
+
+/**
+ * ib_sa_register_client - Register an SA client.
+ */
+void ib_sa_register_client(struct ib_sa_client *client);
+
+/**
+ * ib_sa_unregister_client - Deregister an SA client.
+ * @client: Client object to deregister.
+ */
+void ib_sa_unregister_client(struct ib_sa_client *client);
+
 struct ib_sa_query;
 
 void ib_sa_cancel_query(int id, struct ib_sa_query *query);
 
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+                      struct ib_device *device, u8 port_num,
                       struct ib_sa_path_rec *rec,
                       ib_sa_comp_mask comp_mask,
                       int timeout_ms, gfp_t gfp_mask,
@@ -264,7 +285,8 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
                       void *context,
                       struct ib_sa_query **query);
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+                            struct ib_device *device, u8 port_num,
                             u8 method,
                             struct ib_sa_mcmember_rec *rec,
                             ib_sa_comp_mask comp_mask,
@@ -275,7 +297,8 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
                             void *context,
                             struct ib_sa_query **query);
 
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+                        struct ib_device *device, u8 port_num,
                         u8 method,
                         struct ib_sa_service_rec *rec,
                         ib_sa_comp_mask comp_mask,
@@ -288,6 +311,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
 
 /**
  * ib_sa_mcmember_rec_set - Start an MCMember set query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:MCMember Record to send in query
@@ -311,7 +335,8 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
  * cancel the query.
  */
 static inline int
-ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
+ib_sa_mcmember_rec_set(struct ib_sa_client *client,
+                      struct ib_device *device, u8 port_num,
                       struct ib_sa_mcmember_rec *rec,
                       ib_sa_comp_mask comp_mask,
                       int timeout_ms, gfp_t gfp_mask,
@@ -321,7 +346,7 @@ ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
                       void *context,
                       struct ib_sa_query **query)
 {
-       return ib_sa_mcmember_rec_query(device, port_num,
+       return ib_sa_mcmember_rec_query(client, device, port_num,
                                        IB_MGMT_METHOD_SET,
                                        rec, comp_mask,
                                        timeout_ms, gfp_mask, callback,
@@ -330,6 +355,7 @@ ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
 
 /**
  * ib_sa_mcmember_rec_delete - Start an MCMember delete query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:MCMember Record to send in query
@@ -353,7 +379,8 @@ ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
  * cancel the query.
  */
 static inline int
-ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
+ib_sa_mcmember_rec_delete(struct ib_sa_client *client,
+                         struct ib_device *device, u8 port_num,
                          struct ib_sa_mcmember_rec *rec,
                          ib_sa_comp_mask comp_mask,
                          int timeout_ms, gfp_t gfp_mask,
@@ -363,7 +390,7 @@ ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
                          void *context,
                          struct ib_sa_query **query)
 {
-       return ib_sa_mcmember_rec_query(device, port_num,
+       return ib_sa_mcmember_rec_query(client, device, port_num,
                                        IB_SA_METHOD_DELETE,
                                        rec, comp_mask,
                                        timeout_ms, gfp_mask, callback,
index 7b5372010f4b0a5168f5b7226389b990fa62aa92..db1b814b62cca06583b9d3f53dd96194ec653d90 100644 (file)
@@ -275,6 +275,8 @@ struct ib_uverbs_resize_cq {
 
 struct ib_uverbs_resize_cq_resp {
        __u32 cqe;
+       __u32 reserved;
+       __u64 driver_data[0];
 };
 
 struct ib_uverbs_poll_cq {
index ee1f3a355666dbddaa388fc3d613bc329c24c9d3..8eacc3510993ff9fdbccd7f1b372e0fba71262b5 100644 (file)
@@ -56,12 +56,22 @@ union ib_gid {
        } global;
 };
 
-enum ib_node_type {
-       IB_NODE_CA      = 1,
-       IB_NODE_SWITCH,
-       IB_NODE_ROUTER
+enum rdma_node_type {
+       /* IB values map to NodeInfo:NodeType. */
+       RDMA_NODE_IB_CA         = 1,
+       RDMA_NODE_IB_SWITCH,
+       RDMA_NODE_IB_ROUTER,
+       RDMA_NODE_RNIC
 };
 
+enum rdma_transport_type {
+       RDMA_TRANSPORT_IB,
+       RDMA_TRANSPORT_IWARP
+};
+
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
+
 enum ib_device_cap_flags {
        IB_DEVICE_RESIZE_MAX_WR         = 1,
        IB_DEVICE_BAD_PKEY_CNTR         = (1<<1),
@@ -78,6 +88,9 @@ enum ib_device_cap_flags {
        IB_DEVICE_RC_RNR_NAK_GEN        = (1<<12),
        IB_DEVICE_SRQ_RESIZE            = (1<<13),
        IB_DEVICE_N_NOTIFY_CQ           = (1<<14),
+       IB_DEVICE_ZERO_STAG             = (1<<15),
+       IB_DEVICE_SEND_W_INV            = (1<<16),
+       IB_DEVICE_MEM_WINDOW            = (1<<17)
 };
 
 enum ib_atomic_cap {
@@ -835,6 +848,8 @@ struct ib_cache {
        u8                     *lmc_cache;
 };
 
+struct iw_cm_verbs;
+
 struct ib_device {
        struct device                *dma_device;
 
@@ -851,6 +866,8 @@ struct ib_device {
 
        u32                           flags;
 
+       struct iw_cm_verbs           *iwcm;
+
        int                        (*query_device)(struct ib_device *device,
                                                   struct ib_device_attr *device_attr);
        int                        (*query_port)(struct ib_device *device,
@@ -888,7 +905,8 @@ struct ib_device {
                                                 struct ib_udata *udata);
        int                        (*modify_srq)(struct ib_srq *srq,
                                                 struct ib_srq_attr *srq_attr,
-                                                enum ib_srq_attr_mask srq_attr_mask);
+                                                enum ib_srq_attr_mask srq_attr_mask,
+                                                struct ib_udata *udata);
        int                        (*query_srq)(struct ib_srq *srq,
                                                struct ib_srq_attr *srq_attr);
        int                        (*destroy_srq)(struct ib_srq *srq);
@@ -900,7 +918,8 @@ struct ib_device {
                                                struct ib_udata *udata);
        int                        (*modify_qp)(struct ib_qp *qp,
                                                struct ib_qp_attr *qp_attr,
-                                               int qp_attr_mask);
+                                               int qp_attr_mask,
+                                               struct ib_udata *udata);
        int                        (*query_qp)(struct ib_qp *qp,
                                               struct ib_qp_attr *qp_attr,
                                               int qp_attr_mask,
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
new file mode 100644 (file)
index 0000000..aeefa9b
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IW_CM_H
+#define IW_CM_H
+
+#include <linux/in.h>
+#include <rdma/ib_cm.h>
+
+struct iw_cm_id;
+
+enum iw_cm_event_type {
+       IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */
+       IW_CM_EVENT_CONNECT_REPLY,       /* reply from active connect request */
+       IW_CM_EVENT_ESTABLISHED,         /* passive side accept successful */
+       IW_CM_EVENT_DISCONNECT,          /* orderly shutdown */
+       IW_CM_EVENT_CLOSE                /* close complete */
+};
+
+enum iw_cm_event_status {
+       IW_CM_EVENT_STATUS_OK = 0,       /* request successful */
+       IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */
+       IW_CM_EVENT_STATUS_REJECTED,     /* connect request rejected */
+       IW_CM_EVENT_STATUS_TIMEOUT,      /* the operation timed out */
+       IW_CM_EVENT_STATUS_RESET,        /* reset from remote peer */
+       IW_CM_EVENT_STATUS_EINVAL,       /* asynchronous failure for bad parm */
+};
+
+struct iw_cm_event {
+       enum iw_cm_event_type event;
+       enum iw_cm_event_status status;
+       struct sockaddr_in local_addr;
+       struct sockaddr_in remote_addr;
+       void *private_data;
+       u8 private_data_len;
+       void* provider_data;
+};
+
+/**
+ * iw_cm_handler - Function to be called by the IW CM when delivering events
+ * to the client.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id,
+                            struct iw_cm_event *event);
+
+/**
+ * iw_event_handler - Function called by the provider when delivering provider
+ * events to the IW CM.  Returns either 0 indicating the event was processed
+ * or -errno if the event could not be processed.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_event_handler)(struct iw_cm_id *cm_id,
+                                struct iw_cm_event *event);
+
+struct iw_cm_id {
+       iw_cm_handler           cm_handler;      /* client callback function */
+       void                    *context;        /* client cb context */
+       struct ib_device        *device;
+       struct sockaddr_in      local_addr;
+       struct sockaddr_in      remote_addr;
+       void                    *provider_data;  /* provider private data */
+       iw_event_handler        event_handler;   /* cb for provider
+                                                   events */
+       /* Used by provider to add and remove refs on IW cm_id */
+       void (*add_ref)(struct iw_cm_id *);
+       void (*rem_ref)(struct iw_cm_id *);
+};
+
+struct iw_cm_conn_param {
+       const void *private_data;
+       u16 private_data_len;
+       u32 ord;
+       u32 ird;
+       u32 qpn;
+};
+
+struct iw_cm_verbs {
+       void            (*add_ref)(struct ib_qp *qp);
+
+       void            (*rem_ref)(struct ib_qp *qp);
+
+       struct ib_qp *  (*get_qp)(struct ib_device *device,
+                                 int qpn);
+
+       int             (*connect)(struct iw_cm_id *cm_id,
+                                  struct iw_cm_conn_param *conn_param);
+
+       int             (*accept)(struct iw_cm_id *cm_id,
+                                 struct iw_cm_conn_param *conn_param);
+
+       int             (*reject)(struct iw_cm_id *cm_id,
+                                 const void *pdata, u8 pdata_len);
+
+       int             (*create_listen)(struct iw_cm_id *cm_id,
+                                        int backlog);
+
+       int             (*destroy_listen)(struct iw_cm_id *cm_id);
+};
+
+/**
+ * iw_create_cm_id - Create an IW CM identifier.
+ *
+ * @device: The IB device on which to create the IW CM identier.
+ * @event_handler: User callback invoked to report events associated with the
+ *   returned IW CM identifier.
+ * @context: User specified context associated with the id.
+ */
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+                                iw_cm_handler cm_handler, void *context);
+
+/**
+ * iw_destroy_cm_id - Destroy an IW CM identifier.
+ *
+ * @cm_id: The previously created IW CM identifier to destroy.
+ *
+ * The client can assume that no events will be delivered for the CM ID after
+ * this function returns.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id);
+
+/**
+ * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP
+ *
+ * @cm_id: The IW CM idenfier to unbind from the QP.
+ * @qp: The QP
+ *
+ * This is called by the provider when destroying the QP to ensure
+ * that any references held by the IWCM are released. It may also
+ * be called by the IWCM when destroying a CM_ID to that any
+ * references held by the provider are released.
+ */
+void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp);
+
+/**
+ * iw_cm_get_qp - Return the ib_qp associated with a QPN
+ *
+ * @ib_device: The IB device
+ * @qpn: The queue pair number
+ */
+struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn);
+
+/**
+ * iw_cm_listen - Listen for incoming connection requests on the
+ * specified IW CM id.
+ *
+ * @cm_id: The IW CM identifier.
+ * @backlog: The maximum number of outstanding un-accepted inbound listen
+ *   requests to queue.
+ *
+ * The source address and port number are specified in the IW CM identifier
+ * structure.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog);
+
+/**
+ * iw_cm_accept - Called to accept an incoming connect request.
+ *
+ * @cm_id: The IW CM identifier associated with the connection request.
+ * @iw_param: Pointer to a structure containing connection establishment
+ *   parameters.
+ *
+ * The specified cm_id will have been provided in the event data for a
+ * CONNECT_REQUEST event. Subsequent events related to this connection will be
+ * delivered to the specified IW CM identifier prior and may occur prior to
+ * the return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_reject - Reject an incoming connection request.
+ *
+ * @cm_id: Connection identifier associated with the request.
+ * @private_daa: Pointer to data to deliver to the remote peer as part of the
+ *   reject message.
+ * @private_data_len: The number of bytes in the private_data parameter.
+ *
+ * The client can assume that no events will be delivered to the specified IW
+ * CM identifier following the return of this function. The private_data
+ * buffer is available for reuse when this function returns.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data,
+                u8 private_data_len);
+
+/**
+ * iw_cm_connect - Called to request a connection to a remote peer.
+ *
+ * @cm_id: The IW CM identifier for the connection.
+ * @iw_param: Pointer to a structure containing connection  establishment
+ *   parameters.
+ *
+ * Events may be delivered to the specified IW CM identifier prior to the
+ * return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_disconnect - Close the specified connection.
+ *
+ * @cm_id: The IW CM identifier to close.
+ * @abrupt: If 0, the connection will be closed gracefully, otherwise, the
+ *   connection will be reset.
+ *
+ * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is
+ * delivered.
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt);
+
+/**
+ * iw_cm_init_qp_attr - Called to initialize the attributes of the QP
+ * associated with a IW CM identifier.
+ *
+ * @cm_id: The IW CM identifier associated with the QP
+ * @qp_attr: Pointer to the QP attributes structure.
+ * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are
+ *   valid.
+ */
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
+                      int *qp_attr_mask);
+
+#endif /* IW_CM_H */
index 402c63d7226ba79cba167e8e9a7e8c8f0b6fafa2..deb5a0a4cee5c8732fb7adb5a092ef46b0363b1d 100644 (file)
@@ -117,6 +117,14 @@ struct rdma_cm_id {
 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
                                  void *context, enum rdma_port_space ps);
 
+/**
+  * rdma_destroy_id - Destroys an RDMA identifier.
+  *
+  * @id: RDMA identifier.
+  *
+  * Note: calling this function has the effect of canceling in-flight
+  * asynchronous operations associated with the id.
+  */
 void rdma_destroy_id(struct rdma_cm_id *id);
 
 /**
@@ -237,6 +245,10 @@ int rdma_listen(struct rdma_cm_id *id, int backlog);
  * Typically, this routine is only called by the listener to accept a connection
  * request.  It must also be called on the active side of a connection if the
  * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
  */
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
 
index 8c21625ef93823ebf7f9a574236d71e009e0bb58..3b1289fadf06bc7594cc59334f4256eabadfa676 100644 (file)
@@ -28,8 +28,10 @@ int audit_classify_syscall(int abi, unsigned syscall)
        switch(syscall) {
        case __NR_open:
                return 2;
+#ifdef __NR_openat
        case __NR_openat:
                return 3;
+#endif
 #ifdef __NR_socketcall
        case __NR_socketcall:
                return 4;
index e66a0b524affef544588a8c4f8a92d3473b79a34..d799d896d74aae5f19c604928eb6a52738676d66 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -64,6 +64,13 @@ pgprot_t protection_map[16] = {
        __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
 };
 
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+       return protection_map[vm_flags &
+                               (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+}
+EXPORT_SYMBOL(vm_get_page_prot);
+
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50;      /* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
index 2f4334d19ccd2eb475e06cea9d61d0b47a6fbcba..5d5813cec4c859a1dafd4dc2d38d45029adf91c9 100644 (file)
@@ -1,5 +1,5 @@
 menu "Apple Onboard Audio driver"
-       depends on SND!=n && PPC
+       depends on SND!=n && PPC_PMAC
 
 config SND_AOA
        tristate "Apple Onboard Audio driver"