#include <linux/err.h>
#include <linux/export.h>
#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
#include <linux/seq_file.h>
#include <linux/pci.h>
#include <linux/msi.h>
+#include <asm/isc.h>
+#include <asm/airq.h>
#include <asm/facility.h>
#include <asm/pci_insn.h>
#include <asm/pci_clp.h>
#define DEBUG /* enable pr_debug */
+#define SIC_IRQ_MODE_ALL 0
+#define SIC_IRQ_MODE_SINGLE 1
+
#define ZPCI_NR_DMA_SPACES 1
+#define ZPCI_MSI_VEC_BITS 6
#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
/* list of all detected zpci devices */
static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
static DEFINE_SPINLOCK(zpci_domain_lock);
+struct callback {
+ irq_handler_t handler;
+ void *data;
+};
+
+struct zdev_irq_map {
+ unsigned long aibv; /* AI bit vector */
+ int msi_vecs; /* consecutive MSI-vectors used */
+ int __unused;
+ struct callback cb[ZPCI_NR_MSI_VECS]; /* callback handler array */
+ spinlock_t lock; /* protect callbacks against de-reg */
+};
+
+struct intr_bucket {
+ /* amap of adapters, one bit per dev, corresponds to one irq nr */
+ unsigned long *alloc;
+ /* AI summary bit, global page for all devices */
+ unsigned long *aisb;
+ /* pointer to aibv and callback data in zdev */
+ struct zdev_irq_map *imap[ZPCI_NR_DEVICES];
+ /* protects the whole bucket struct */
+ spinlock_t lock;
+};
+
+static struct intr_bucket *bucket;
+
+/* Adapter local summary indicator */
+static u8 *zpci_irq_si;
+
+static atomic_t irq_retries = ATOMIC_INIT(0);
+
/* I/O Map */
static DEFINE_SPINLOCK(zpci_iomap_lock);
static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
struct zpci_iomap_entry *zpci_iomap_start;
EXPORT_SYMBOL_GPL(zpci_iomap_start);
+/* highest irq summary bit */
+static int __read_mostly aisb_max;
+
+static struct kmem_cache *zdev_irq_cache;
+
+static inline int irq_to_msi_nr(unsigned int irq)
+{
+ return irq & ZPCI_MSI_MASK;
+}
+
+static inline int irq_to_dev_nr(unsigned int irq)
+{
+ return irq >> ZPCI_MSI_VEC_BITS;
+}
+
+static inline struct zdev_irq_map *get_imap(unsigned int irq)
+{
+ return bucket->imap[irq_to_dev_nr(irq)];
+}
+
struct zpci_dev *get_zdev(struct pci_dev *pdev)
{
return (struct zpci_dev *) pdev->sysdata;
return (cc) ? -EIO : 0;
}
+/* Modify PCI: Register adapter interruptions */
+static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
+ u64 aibv)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+ struct zpci_fib *fib;
+ int rc;
+
+ fib = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!fib)
+ return -ENOMEM;
+
+ fib->isc = PCI_ISC;
+ fib->noi = zdev->irq_map->msi_vecs;
+ fib->sum = 1; /* enable summary notifications */
+ fib->aibv = aibv;
+ fib->aibvo = 0; /* every function has its own page */
+ fib->aisb = (u64) bucket->aisb + aisb / 8;
+ fib->aisbo = aisb & ZPCI_MSI_MASK;
+
+ rc = mpcifc_instr(req, fib);
+ pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
+
+ free_page((unsigned long) fib);
+ return rc;
+}
+
+struct mod_pci_args {
+ u64 base;
+ u64 limit;
+ u64 iota;
+};
+
+static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn);
+ struct zpci_fib *fib;
+ int rc;
+
+ /* The FIB must be available even if it's not used */
+ fib = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!fib)
+ return -ENOMEM;
+
+ fib->pba = args->base;
+ fib->pal = args->limit;
+ fib->iota = args->iota;
+
+ rc = mpcifc_instr(req, fib);
+ free_page((unsigned long) fib);
+ return rc;
+}
+
+/* Modify PCI: Unregister adapter interruptions */
+static int zpci_unregister_airq(struct zpci_dev *zdev)
+{
+ struct mod_pci_args args = { 0, 0, 0 };
+
+ return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args);
+}
+
#define ZPCI_PCIAS_CFGSPC 15
static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
return rc;
}
+void synchronize_irq(unsigned int irq)
+{
+ /*
+ * Not needed, the handler is protected by a lock and IRQs that occur
+ * after the handler is deleted are just NOPs.
+ */
+}
+EXPORT_SYMBOL_GPL(synchronize_irq);
+
+void enable_irq(unsigned int irq)
+{
+ struct msi_desc *msi = irq_get_msi_desc(irq);
+
+ zpci_msi_set_mask_bits(msi, 1, 0);
+}
+EXPORT_SYMBOL_GPL(enable_irq);
+
+void disable_irq(unsigned int irq)
+{
+ struct msi_desc *msi = irq_get_msi_desc(irq);
+
+ zpci_msi_set_mask_bits(msi, 1, 1);
+}
+EXPORT_SYMBOL_GPL(disable_irq);
+
+void disable_irq_nosync(unsigned int irq)
+{
+ disable_irq(irq);
+}
+EXPORT_SYMBOL_GPL(disable_irq_nosync);
+
+unsigned long probe_irq_on(void)
+{
+ return 0;
+}
+EXPORT_SYMBOL_GPL(probe_irq_on);
+
+int probe_irq_off(unsigned long val)
+{
+ return 0;
+}
+EXPORT_SYMBOL_GPL(probe_irq_off);
+
+unsigned int probe_irq_mask(unsigned long val)
+{
+ return val;
+}
+EXPORT_SYMBOL_GPL(probe_irq_mask);
+
void __devinit pcibios_fixup_bus(struct pci_bus *bus)
{
}
.write = pci_write,
};
+/* store the last handled bit to implement fair scheduling of devices */
+static DEFINE_PER_CPU(unsigned long, next_sbit);
+
+static void zpci_irq_handler(void *dont, void *need)
+{
+ unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit);
+ int rescan = 0, max = aisb_max;
+ struct zdev_irq_map *imap;
+
+ kstat_cpu(smp_processor_id()).irqs[IOINT_PCI]++;
+ sbit = start;
+
+scan:
+ /* find summary_bit */
+ for_each_set_bit_left_cont(sbit, bucket->aisb, max) {
+ clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6));
+ last = sbit;
+
+ /* find vector bit */
+ imap = bucket->imap[sbit];
+ for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) {
+ kstat_cpu(smp_processor_id()).irqs[IOINT_MSI]++;
+ clear_bit(63 - mbit, &imap->aibv);
+
+ spin_lock(&imap->lock);
+ if (imap->cb[mbit].handler)
+ imap->cb[mbit].handler(mbit,
+ imap->cb[mbit].data);
+ spin_unlock(&imap->lock);
+ }
+ }
+
+ if (rescan)
+ goto out;
+
+ /* scan the skipped bits */
+ if (start > 0) {
+ sbit = 0;
+ max = start;
+ start = 0;
+ goto scan;
+ }
+
+ /* enable interrupts again */
+ sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+
+ /* check again to not lose initiative */
+ rmb();
+ max = aisb_max;
+ sbit = find_first_bit_left(bucket->aisb, max);
+ if (sbit != max) {
+ atomic_inc(&irq_retries);
+ rescan++;
+ goto scan;
+ }
+out:
+ /* store next device bit to scan */
+ __get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last;
+}
+
+/* msi_vecs - number of requested interrupts, 0 place function to error state */
+static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+ unsigned int aisb, msi_nr;
+ struct msi_desc *msi;
+ int rc;
+
+ /* store the number of used MSI vectors */
+ zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS);
+
+ spin_lock(&bucket->lock);
+ aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE);
+ /* alloc map exhausted? */
+ if (aisb == PAGE_SIZE) {
+ spin_unlock(&bucket->lock);
+ return -EIO;
+ }
+ set_bit(aisb, bucket->alloc);
+ spin_unlock(&bucket->lock);
+
+ zdev->aisb = aisb;
+ if (aisb + 1 > aisb_max)
+ aisb_max = aisb + 1;
+
+ /* wire up IRQ shortcut pointer */
+ bucket->imap[zdev->aisb] = zdev->irq_map;
+ pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map);
+
+ /* TODO: irq number 0 wont be found if we return less than requested MSIs.
+ * ignore it for now and fix in common code.
+ */
+ msi_nr = aisb << ZPCI_MSI_VEC_BITS;
+
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ rc = zpci_setup_msi_irq(zdev, msi, msi_nr,
+ aisb << ZPCI_MSI_VEC_BITS);
+ if (rc)
+ return rc;
+ msi_nr++;
+ }
+
+ rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv);
+ if (rc) {
+ clear_bit(aisb, bucket->alloc);
+ dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
+ return rc;
+ }
+ return (zdev->irq_map->msi_vecs == msi_vecs) ?
+ 0 : zdev->irq_map->msi_vecs;
+}
+
+static void zpci_teardown_msi(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+ struct msi_desc *msi;
+ int aisb, rc;
+
+ rc = zpci_unregister_airq(zdev);
+ if (rc) {
+ dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
+ return;
+ }
+
+ msi = list_first_entry(&pdev->msi_list, struct msi_desc, list);
+ aisb = irq_to_dev_nr(msi->irq);
+
+ list_for_each_entry(msi, &pdev->msi_list, list)
+ zpci_teardown_msi_irq(zdev, msi);
+
+ clear_bit(aisb, bucket->alloc);
+ if (aisb + 1 == aisb_max)
+ aisb_max--;
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
+ if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
+ return -EINVAL;
+ return zpci_setup_msi(pdev, nvec);
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
+{
+ pr_info("%s: on pdev: %p\n", __func__, pdev);
+ zpci_teardown_msi(pdev);
+}
+
static void zpci_map_resources(struct zpci_dev *zdev)
{
struct pci_dev *pdev = zdev->pdev;
zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
if (!zdev)
return ERR_PTR(-ENOMEM);
+
+ /* Alloc aibv & callback space */
+ zdev->irq_map = kmem_cache_alloc(zdev_irq_cache, GFP_KERNEL);
+ if (!zdev->irq_map)
+ goto error;
+ memset(zdev->irq_map, 0, sizeof(*zdev->irq_map));
+ WARN_ON((u64) zdev->irq_map & 0xff);
return zdev;
+
+error:
+ kfree(zdev);
+ return ERR_PTR(-ENOMEM);
}
void zpci_free_device(struct zpci_dev *zdev)
{
+ kmem_cache_free(zdev_irq_cache, zdev->irq_map);
kfree(zdev);
}
pdev->sysdata = NULL;
}
+int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data)
+{
+ int msi_nr = irq_to_msi_nr(irq);
+ struct zdev_irq_map *imap;
+ struct msi_desc *msi;
+
+ msi = irq_get_msi_desc(irq);
+ if (!msi)
+ return -EIO;
+
+ imap = get_imap(irq);
+ spin_lock_init(&imap->lock);
+
+ pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr);
+ imap->cb[msi_nr].handler = handler;
+ imap->cb[msi_nr].data = data;
+
+ /*
+ * The generic MSI code returns with the interrupt disabled on the
+ * card, using the MSI mask bits. Firmware doesn't appear to unmask
+ * at that level, so we do it here by hand.
+ */
+ zpci_msi_set_mask_bits(msi, 1, 0);
+ return 0;
+}
+
+void zpci_free_irq(unsigned int irq)
+{
+ struct zdev_irq_map *imap = get_imap(irq);
+ int msi_nr = irq_to_msi_nr(irq);
+ unsigned long flags;
+
+ pr_debug("%s: for irq: %d\n", __func__, irq);
+
+ spin_lock_irqsave(&imap->lock, flags);
+ imap->cb[msi_nr].handler = NULL;
+ imap->cb[msi_nr].data = NULL;
+ spin_unlock_irqrestore(&imap->lock, flags);
+}
+
+int request_irq(unsigned int irq, irq_handler_t handler,
+ unsigned long irqflags, const char *devname, void *dev_id)
+{
+ pr_debug("%s: irq: %d handler: %p flags: %lx dev: %s\n",
+ __func__, irq, handler, irqflags, devname);
+
+ return zpci_request_irq(irq, handler, dev_id);
+}
+EXPORT_SYMBOL_GPL(request_irq);
+
+void free_irq(unsigned int irq, void *dev_id)
+{
+ zpci_free_irq(irq);
+}
+EXPORT_SYMBOL_GPL(free_irq);
+
+static int __init zpci_irq_init(void)
+{
+ int cpu, rc;
+
+ bucket = kzalloc(sizeof(*bucket), GFP_KERNEL);
+ if (!bucket)
+ return -ENOMEM;
+
+ bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!bucket->aisb) {
+ rc = -ENOMEM;
+ goto out_aisb;
+ }
+
+ bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!bucket->alloc) {
+ rc = -ENOMEM;
+ goto out_alloc;
+ }
+
+ isc_register(PCI_ISC);
+ zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC);
+ if (IS_ERR(zpci_irq_si)) {
+ rc = PTR_ERR(zpci_irq_si);
+ zpci_irq_si = NULL;
+ goto out_ai;
+ }
+
+ for_each_online_cpu(cpu)
+ per_cpu(next_sbit, cpu) = 0;
+
+ spin_lock_init(&bucket->lock);
+ /* set summary to 1 to be called every time for the ISC */
+ *zpci_irq_si = 1;
+ sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ return 0;
+
+out_ai:
+ isc_unregister(PCI_ISC);
+ free_page((unsigned long) bucket->alloc);
+out_alloc:
+ free_page((unsigned long) bucket->aisb);
+out_aisb:
+ kfree(bucket);
+ return rc;
+}
+
+static void zpci_irq_exit(void)
+{
+ free_page((unsigned long) bucket->alloc);
+ free_page((unsigned long) bucket->aisb);
+ s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC);
+ isc_unregister(PCI_ISC);
+ kfree(bucket);
+}
+
static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
unsigned long flags, int domain)
{
static int zpci_mem_init(void)
{
+ zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map),
+ L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL);
+ if (!zdev_irq_cache)
+ goto error_zdev;
+
/* TODO: use realloc */
zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
GFP_KERNEL);
if (!zpci_iomap_start)
- goto error_zdev;
+ goto error_iomap;
return 0;
+error_iomap:
+ kmem_cache_destroy(zdev_irq_cache);
error_zdev:
return -ENOMEM;
}
static void zpci_mem_exit(void)
{
kfree(zpci_iomap_start);
+ kmem_cache_destroy(zdev_irq_cache);
}
unsigned int pci_probe = 1;
if (rc)
goto out_mem;
+ rc = zpci_msihash_init();
+ if (rc)
+ goto out_hash;
+
+ rc = zpci_irq_init();
+ if (rc)
+ goto out_irq;
+
rc = clp_find_pci_devices();
if (rc)
goto out_find;
return 0;
out_find:
+ zpci_irq_exit();
+out_irq:
+ zpci_msihash_exit();
+out_hash:
zpci_mem_exit();
out_mem:
return rc;
--- /dev/null
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define COMPONENT "zPCI"
+#define pr_fmt(fmt) COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/rculist.h>
+#include <linux/hash.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <asm/hw_irq.h>
+
+/* mapping of irq numbers to msi_desc */
+static struct hlist_head *msi_hash;
+static unsigned int msihash_shift = 6;
+#define msi_hashfn(nr) hash_long(nr, msihash_shift)
+
+static DEFINE_SPINLOCK(msi_map_lock);
+
+struct msi_desc *__irq_get_msi_desc(unsigned int irq)
+{
+ struct hlist_node *entry;
+ struct msi_map *map;
+
+ hlist_for_each_entry_rcu(map, entry,
+ &msi_hash[msi_hashfn(irq)], msi_chain)
+ if (map->irq == irq)
+ return map->msi;
+ return NULL;
+}
+
+int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
+{
+ if (msi->msi_attrib.is_msix) {
+ int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL;
+ msi->masked = readl(msi->mask_base + offset);
+ writel(flag, msi->mask_base + offset);
+ } else {
+ if (msi->msi_attrib.maskbit) {
+ int pos;
+ u32 mask_bits;
+
+ pos = (long) msi->mask_base;
+ pci_read_config_dword(msi->dev, pos, &mask_bits);
+ mask_bits &= ~(mask);
+ mask_bits |= flag & mask;
+ pci_write_config_dword(msi->dev, pos, mask_bits);
+ } else {
+ return 0;
+ }
+ }
+
+ msi->msi_attrib.maskbit = !!flag;
+ return 1;
+}
+
+int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi,
+ unsigned int nr, int offset)
+{
+ struct msi_map *map;
+ struct msi_msg msg;
+ int rc;
+
+ map = kmalloc(sizeof(*map), GFP_KERNEL);
+ if (map == NULL)
+ return -ENOMEM;
+
+ map->irq = nr;
+ map->msi = msi;
+ zdev->msi_map[nr & ZPCI_MSI_MASK] = map;
+
+ pr_debug("%s hashing irq: %u to bucket nr: %llu\n",
+ __func__, nr, msi_hashfn(nr));
+ hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]);
+
+ spin_lock(&msi_map_lock);
+ rc = irq_set_msi_desc(nr, msi);
+ if (rc) {
+ spin_unlock(&msi_map_lock);
+ hlist_del_rcu(&map->msi_chain);
+ kfree(map);
+ zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL;
+ return rc;
+ }
+ spin_unlock(&msi_map_lock);
+
+ msg.data = nr - offset;
+ msg.address_lo = zdev->msi_addr & 0xffffffff;
+ msg.address_hi = zdev->msi_addr >> 32;
+ write_msi_msg(nr, &msg);
+ return 0;
+}
+
+void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi)
+{
+ int irq = msi->irq & ZPCI_MSI_MASK;
+ struct msi_map *map;
+
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
+ zpci_msi_set_mask_bits(msi, 1, 1);
+
+ spin_lock(&msi_map_lock);
+ map = zdev->msi_map[irq];
+ hlist_del_rcu(&map->msi_chain);
+ kfree(map);
+ zdev->msi_map[irq] = NULL;
+ spin_unlock(&msi_map_lock);
+}
+
+/*
+ * The msi hash table has 256 entries which is good for 4..20
+ * devices (a typical device allocates 10 + CPUs MSI's). Maybe make
+ * the hash table size adjustable later.
+ */
+int __init zpci_msihash_init(void)
+{
+ unsigned int i;
+
+ msi_hash = kmalloc(256 * sizeof(*msi_hash), GFP_KERNEL);
+ if (!msi_hash)
+ return -ENOMEM;
+
+ for (i = 0; i < (1U << msihash_shift); i++)
+ INIT_HLIST_HEAD(&msi_hash[i]);
+ return 0;
+}
+
+void __init zpci_msihash_exit(void)
+{
+ kfree(msi_hash);
+}