From b8dd786f9417e5885929bfe33a235c76a9c1c569 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Mon, 22 Dec 2008 07:15:03 -0800 Subject: [PATCH] mlx4_core: Add support for multiple completion event vectors When using MSI-X mode, create a completion event queue for each CPU. Report the number of completion EQs in a new struct mlx4_caps member, num_comp_vectors, and extend the mlx4_cq_alloc() interface with a vector parameter so that consumers can specify which completion EQ should be used to report events for the CQ being created. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/net/mlx4/cq.c | 11 ++- drivers/net/mlx4/en_cq.c | 9 ++- drivers/net/mlx4/en_main.c | 4 +- drivers/net/mlx4/eq.c | 117 ++++++++++++++++++++++-------- drivers/net/mlx4/main.c | 53 ++++++++++---- drivers/net/mlx4/mlx4.h | 14 ++-- drivers/net/mlx4/profile.c | 4 +- include/linux/mlx4/device.h | 4 +- 10 files changed, 157 insertions(+), 63 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 18308494a195..2198753bf13d 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -222,7 +222,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector } err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, 0); + cq->db.dma, &cq->mcq, vector, 0); if (err) goto err_dbmap; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2e80f8f47b02..dcefe1fceb5c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -578,7 +578,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) ibdev->num_ports++; ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; - ibdev->ib_dev.num_comp_vectors = 1; + ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c index b7ad2829d67e..ac57b6a42c6e 100644 --- a/drivers/net/mlx4/cq.c +++ b/drivers/net/mlx4/cq.c @@ -189,7 +189,7 @@ EXPORT_SYMBOL_GPL(mlx4_cq_resize); int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, - int collapsed) + unsigned vector, int collapsed) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; @@ -198,6 +198,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, u64 mtt_addr; int err; + if (vector >= dev->caps.num_comp_vectors) + return -EINVAL; + + cq->vector = vector; + cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap); if (cq->cqn == -1) return -ENOMEM; @@ -227,7 +232,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, cq_context->flags = cpu_to_be32(!!collapsed << 18); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); - cq_context->comp_eqn = priv->eq_table.eq[MLX4_EQ_COMP].eqn; + cq_context->comp_eqn = priv->eq_table.eq[vector].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); @@ -276,7 +281,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); - synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq); + synchronize_irq(priv->eq_table.eq[cq->vector].irq); spin_lock_irq(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c index 1368a8010af4..674f836e225b 100644 --- a/drivers/net/mlx4/en_cq.c +++ b/drivers/net/mlx4/en_cq.c @@ -51,10 +51,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, int err; cq->size = entries; - if (mode == RX) + if (mode == RX) { cq->buf_size = cq->size * sizeof(struct mlx4_cqe); - else + cq->vector = ring % mdev->dev->caps.num_comp_vectors; + } else { cq->buf_size = sizeof(struct mlx4_cqe); + cq->vector = 0; + } cq->ring = ring; cq->is_tx = mode; @@ -86,7 +89,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) memset(cq->buf, 0, cq->buf_size); err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, - cq->wqres.db.dma, &cq->mcq, cq->is_tx); + cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx); if (err) return err; diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c index 4b9794e97a79..c1c05852a95e 100644 --- a/drivers/net/mlx4/en_main.c +++ b/drivers/net/mlx4/en_main.c @@ -170,9 +170,9 @@ static void *mlx4_en_add(struct mlx4_dev *dev) mlx4_info(mdev, "Using %d tx rings for port:%d\n", mdev->profile.prof[i].tx_ring_num, i); if (!mdev->profile.prof[i].rx_ring_num) { - mdev->profile.prof[i].rx_ring_num = 1; + mdev->profile.prof[i].rx_ring_num = dev->caps.num_comp_vectors; mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n", - 1, i); + mdev->profile.prof[i].rx_ring_num, i); } else mlx4_info(mdev, "Using %d rx rings for port:%d\n", mdev->profile.prof[i].rx_ring_num, i); diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index de169338cd90..5d867ebe6a4d 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -266,7 +266,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr) writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); return IRQ_RETVAL(work); @@ -304,6 +304,17 @@ static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, MLX4_CMD_TIME_CLASS_A); } +static int mlx4_num_eq_uar(struct mlx4_dev *dev) +{ + /* + * Each UAR holds 4 EQ doorbells. To figure out how many UARs + * we need to map, take the difference of highest index and + * the lowest index we'll use and add 1. + */ + return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 - + dev->caps.reserved_eqs / 4 + 1; +} + static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -483,9 +494,11 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) if (eq_table->have_irq) free_irq(dev->pdev->irq, dev); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) if (eq_table->eq[i].have_irq) free_irq(eq_table->eq[i].irq, eq_table->eq + i); + + kfree(eq_table->irq_names); } static int mlx4_map_clr_int(struct mlx4_dev *dev) @@ -551,57 +564,93 @@ void mlx4_unmap_eq_icm(struct mlx4_dev *dev) __free_page(priv->eq_table.icm_page); } +int mlx4_alloc_eq_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs, + sizeof *priv->eq_table.eq, GFP_KERNEL); + if (!priv->eq_table.eq) + return -ENOMEM; + + return 0; +} + +void mlx4_free_eq_table(struct mlx4_dev *dev) +{ + kfree(mlx4_priv(dev)->eq_table.eq); +} + int mlx4_init_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int i; + priv->eq_table.uar_map = kcalloc(sizeof *priv->eq_table.uar_map, + mlx4_num_eq_uar(dev), GFP_KERNEL); + if (!priv->eq_table.uar_map) { + err = -ENOMEM; + goto err_out_free; + } + err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); if (err) - return err; + goto err_out_free; - for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i) + for (i = 0; i < mlx4_num_eq_uar(dev); ++i) priv->eq_table.uar_map[i] = NULL; err = mlx4_map_clr_int(dev); if (err) - goto err_out_free; + goto err_out_bitmap; priv->eq_table.clr_mask = swab32(1 << (priv->eq_table.inta_pin & 31)); priv->eq_table.clr_int = priv->clr_base + (priv->eq_table.inta_pin < 32 ? 4 : 0); - err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0, - &priv->eq_table.eq[MLX4_EQ_COMP]); - if (err) - goto err_out_unmap; + priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL); + if (!priv->eq_table.irq_names) { + err = -ENOMEM; + goto err_out_bitmap; + } + + for (i = 0; i < dev->caps.num_comp_vectors; ++i) { + err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE, + (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, + &priv->eq_table.eq[i]); + if (err) + goto err_out_unmap; + } err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0, - &priv->eq_table.eq[MLX4_EQ_ASYNC]); + (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0, + &priv->eq_table.eq[dev->caps.num_comp_vectors]); if (err) goto err_out_comp; if (dev->flags & MLX4_FLAG_MSI_X) { - static const char *eq_name[] = { - [MLX4_EQ_COMP] = DRV_NAME " (comp)", - [MLX4_EQ_ASYNC] = DRV_NAME " (async)" - }; + static const char async_eq_name[] = "mlx4-async"; + const char *eq_name; + + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { + if (i < dev->caps.num_comp_vectors) { + snprintf(priv->eq_table.irq_names + i * 16, 16, + "mlx4-comp-%d", i); + eq_name = priv->eq_table.irq_names + i * 16; + } else + eq_name = async_eq_name; - for (i = 0; i < MLX4_NUM_EQ; ++i) { err = request_irq(priv->eq_table.eq[i].irq, - mlx4_msi_x_interrupt, - 0, eq_name[i], priv->eq_table.eq + i); + mlx4_msi_x_interrupt, 0, eq_name, + priv->eq_table.eq + i); if (err) goto err_out_async; priv->eq_table.eq[i].have_irq = 1; } - } else { err = request_irq(dev->pdev->irq, mlx4_interrupt, IRQF_SHARED, DRV_NAME, dev); @@ -612,28 +661,36 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) } err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, - priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); if (err) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", - priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); + priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) eq_set_ci(&priv->eq_table.eq[i], 1); return 0; err_out_async: - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]); + mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]); err_out_comp: - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]); + i = dev->caps.num_comp_vectors - 1; err_out_unmap: + while (i >= 0) { + mlx4_free_eq(dev, &priv->eq_table.eq[i]); + --i; + } mlx4_unmap_clr_int(dev); mlx4_free_irqs(dev); -err_out_free: +err_out_bitmap: mlx4_bitmap_cleanup(&priv->eq_table.bitmap); + +err_out_free: + kfree(priv->eq_table.uar_map); + return err; } @@ -643,18 +700,20 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) int i; mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, - priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); mlx4_free_irqs(dev); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); mlx4_unmap_clr_int(dev); - for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i) + for (i = 0; i < mlx4_num_eq_uar(dev); ++i) if (priv->eq_table.uar_map[i]) iounmap(priv->eq_table.uar_map[i]); mlx4_bitmap_cleanup(&priv->eq_table.bitmap); + + kfree(priv->eq_table.uar_map); } diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 90a0281d15ea..710c79e7a2db 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -421,9 +421,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, ((u64) (MLX4_CMPT_TYPE_EQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, - roundup_pow_of_two(MLX4_NUM_EQ + - dev->caps.reserved_eqs), - MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0); + dev->caps.num_eqs, dev->caps.num_eqs, 0, 0); if (err) goto err_cq; @@ -810,12 +808,12 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) if (dev->flags & MLX4_FLAG_MSI_X) { mlx4_warn(dev, "NOP command failed to generate MSI-X " "interrupt IRQ %d).\n", - priv->eq_table.eq[MLX4_EQ_ASYNC].irq); + priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_warn(dev, "Trying again without MSI-X.\n"); } else { mlx4_err(dev, "NOP command failed to generate interrupt " "(IRQ %d), aborting.\n", - priv->eq_table.eq[MLX4_EQ_ASYNC].irq); + priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } @@ -908,31 +906,50 @@ err_uar_table_free: static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - struct msix_entry entries[MLX4_NUM_EQ]; + struct msix_entry *entries; + int nreq; int err; int i; if (msi_x) { - for (i = 0; i < MLX4_NUM_EQ; ++i) + nreq = min(dev->caps.num_eqs - dev->caps.reserved_eqs, + num_possible_cpus() + 1); + entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); + if (!entries) + goto no_msi; + + for (i = 0; i < nreq; ++i) entries[i].entry = i; - err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries)); + retry: + err = pci_enable_msix(dev->pdev, entries, nreq); if (err) { - if (err > 0) - mlx4_info(dev, "Only %d MSI-X vectors available, " - "not using MSI-X\n", err); + /* Try again if at least 2 vectors are available */ + if (err > 1) { + mlx4_info(dev, "Requested %d vectors, " + "but only %d MSI-X vectors available, " + "trying again\n", nreq, err); + nreq = err; + goto retry; + } + goto no_msi; } - for (i = 0; i < MLX4_NUM_EQ; ++i) + dev->caps.num_comp_vectors = nreq - 1; + for (i = 0; i < nreq; ++i) priv->eq_table.eq[i].irq = entries[i].vector; dev->flags |= MLX4_FLAG_MSI_X; + + kfree(entries); return; } no_msi: - for (i = 0; i < MLX4_NUM_EQ; ++i) + dev->caps.num_comp_vectors = 1; + + for (i = 0; i < 2; ++i) priv->eq_table.eq[i].irq = dev->pdev->irq; } @@ -1074,6 +1091,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_cmd; + err = mlx4_alloc_eq_table(dev); + if (err) + goto err_close; + mlx4_enable_msi_x(dev); err = mlx4_setup_hca(dev); @@ -1084,7 +1105,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) } if (err) - goto err_close; + goto err_free_eq; for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); @@ -1114,6 +1135,9 @@ err_port: mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); +err_free_eq: + mlx4_free_eq_table(dev); + err_close: if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); @@ -1177,6 +1201,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) iounmap(priv->kar); mlx4_uar_free(dev, &priv->driver_uar); mlx4_cleanup_uar_table(dev); + mlx4_free_eq_table(dev); mlx4_close_hca(dev); mlx4_cmd_cleanup(dev); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 34c909deaff3..e0213bad61c7 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -62,12 +62,6 @@ enum { MLX4_MTT_ENTRY_PER_SEG = 8 }; -enum { - MLX4_EQ_ASYNC, - MLX4_EQ_COMP, - MLX4_NUM_EQ -}; - enum { MLX4_NUM_PDS = 1 << 15 }; @@ -205,10 +199,11 @@ struct mlx4_cq_table { struct mlx4_eq_table { struct mlx4_bitmap bitmap; + char *irq_names; void __iomem *clr_int; - void __iomem *uar_map[(MLX4_NUM_EQ + 6) / 4]; + void __iomem **uar_map; u32 clr_mask; - struct mlx4_eq eq[MLX4_NUM_EQ]; + struct mlx4_eq *eq; u64 icm_virt; struct page *icm_page; dma_addr_t icm_dma; @@ -328,6 +323,9 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap); int mlx4_reset(struct mlx4_dev *dev); +int mlx4_alloc_eq_table(struct mlx4_dev *dev); +void mlx4_free_eq_table(struct mlx4_dev *dev); + int mlx4_init_pd_table(struct mlx4_dev *dev); int mlx4_init_uar_table(struct mlx4_dev *dev); int mlx4_init_mr_table(struct mlx4_dev *dev); diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c index 9ca42b213d54..919fb9eb1b62 100644 --- a/drivers/net/mlx4/profile.c +++ b/drivers/net/mlx4/profile.c @@ -107,7 +107,9 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, profile[MLX4_RES_AUXC].num = request->num_qp; profile[MLX4_RES_SRQ].num = request->num_srq; profile[MLX4_RES_CQ].num = request->num_cq; - profile[MLX4_RES_EQ].num = MLX4_NUM_EQ + dev_cap->reserved_eqs; + profile[MLX4_RES_EQ].num = min(dev_cap->max_eqs, + dev_cap->reserved_eqs + + num_possible_cpus() + 1); profile[MLX4_RES_DMPT].num = request->num_mpt; profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; profile[MLX4_RES_MTT].num = request->num_mtt; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 371086fd946f..8f659cc29960 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -206,6 +206,7 @@ struct mlx4_caps { int reserved_cqs; int num_eqs; int reserved_eqs; + int num_comp_vectors; int num_mpts; int num_mtt_segs; int fmr_reserved_mtts; @@ -328,6 +329,7 @@ struct mlx4_cq { int arm_sn; int cqn; + unsigned vector; atomic_t refcount; struct completion free; @@ -437,7 +439,7 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, - int collapsed); + unsigned vector, int collapsed); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); -- 2.34.1