net/mlx4_core: Enforce irq affinity changes immediatly
authorYuval Atias <yuvala@mellanox.com>
Wed, 14 May 2014 09:15:10 +0000 (12:15 +0300)
committerDavid S. Miller <davem@davemloft.net>
Wed, 14 May 2014 19:40:32 +0000 (15:40 -0400)
During heavy traffic, napi is constatntly polling the complition queue
and no interrupt is fired. Because of that, changes to irq affinity are
ignored until traffic is stopped and resumed.

By registering to the irq notifier mechanism, and forcing interrupt when
affinity is changed, irq affinity changes will be immediatly enforced.

Signed-off-by: Yuval Atias <yuvala@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlx4/cq.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/eq.c
include/linux/mlx4/device.h

index 0487121e4a0fe495d4252f01b24d16bdb2fefb06..8542030b89cf5b5d0d0c60eaa9ecc96076d72341 100644 (file)
@@ -293,6 +293,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
        atomic_set(&cq->refcount, 1);
        init_completion(&cq->free);
 
+       cq->irq = priv->eq_table.eq[cq->vector].irq;
+       cq->irq_affinity_change = false;
+
        return 0;
 
 err_radix:
index a1512450816db8a4dd66566b1a7451a0866c5f17..e8c0d2b832b79f4f46b82cf52bca646e9b13008b 100644 (file)
@@ -895,10 +895,17 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
        mlx4_en_cq_unlock_napi(cq);
 
        /* If we used up all the quota - we're probably not done yet... */
-       if (done == budget)
+       if (done == budget) {
                INC_PERF_COUNTER(priv->pstats.napi_quota);
-       else {
+               if (unlikely(cq->mcq.irq_affinity_change)) {
+                       cq->mcq.irq_affinity_change = false;
+                       napi_complete(napi);
+                       mlx4_en_arm_cq(priv, cq);
+                       return 0;
+               }
+       } else {
                /* Done for now */
+               cq->mcq.irq_affinity_change = false;
                napi_complete(napi);
                mlx4_en_arm_cq(priv, cq);
        }
index 89585c6311c341ec27e26bcf1c4c86d01887d8e0..cb964056d71023a40ff5f68b7ceb3a367607494a 100644 (file)
@@ -474,9 +474,15 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
        /* If we used up all the quota - we're probably not done yet... */
        if (done < budget) {
                /* Done for now */
+               cq->mcq.irq_affinity_change = false;
                napi_complete(napi);
                mlx4_en_arm_cq(priv, cq);
                return done;
+       } else if (unlikely(cq->mcq.irq_affinity_change)) {
+               cq->mcq.irq_affinity_change = false;
+               napi_complete(napi);
+               mlx4_en_arm_cq(priv, cq);
+               return 0;
        }
        return budget;
 }
index 6c088bc1845b3b9d14900e0e280c472d332dd37f..d954ec1eac173752e23e57653ccd4d2cae2de944 100644 (file)
@@ -53,6 +53,11 @@ enum {
        MLX4_EQ_ENTRY_SIZE      = 0x20
 };
 
+struct mlx4_irq_notify {
+       void *arg;
+       struct irq_affinity_notify notify;
+};
+
 #define MLX4_EQ_STATUS_OK         ( 0 << 28)
 #define MLX4_EQ_STATUS_WRITE_FAIL  (10 << 28)
 #define MLX4_EQ_OWNER_SW          ( 0 << 24)
@@ -1083,6 +1088,57 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
        iounmap(priv->clr_base);
 }
 
+static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify,
+                                    const cpumask_t *mask)
+{
+       struct mlx4_irq_notify *n = container_of(notify,
+                                                struct mlx4_irq_notify,
+                                                notify);
+       struct mlx4_priv *priv = (struct mlx4_priv *)n->arg;
+       struct radix_tree_iter iter;
+       void **slot;
+
+       radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) {
+               struct mlx4_cq *cq = (struct mlx4_cq *)(*slot);
+
+               if (cq->irq == notify->irq)
+                       cq->irq_affinity_change = true;
+       }
+}
+
+static void mlx4_release_irq_notifier(struct kref *ref)
+{
+       struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify,
+                                                notify.kref);
+       kfree(n);
+}
+
+static void mlx4_assign_irq_notifier(struct mlx4_priv *priv,
+                                    struct mlx4_dev *dev, int irq)
+{
+       struct mlx4_irq_notify *irq_notifier = NULL;
+       int err = 0;
+
+       irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL);
+       if (!irq_notifier) {
+               mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n",
+                         irq);
+               return;
+       }
+
+       irq_notifier->notify.irq = irq;
+       irq_notifier->notify.notify = mlx4_irq_notifier_notify;
+       irq_notifier->notify.release = mlx4_release_irq_notifier;
+       irq_notifier->arg = priv;
+       err = irq_set_affinity_notifier(irq, &irq_notifier->notify);
+       if (err) {
+               kfree(irq_notifier);
+               irq_notifier = NULL;
+               mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq);
+       }
+}
+
+
 int mlx4_alloc_eq_table(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1353,6 +1409,9 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
                                continue;
                                /*we dont want to break here*/
                        }
+                       mlx4_assign_irq_notifier(priv, dev,
+                                                priv->eq_table.eq[vec].irq);
+
                        eq_set_ci(&priv->eq_table.eq[vec], 1);
                }
        }
@@ -1379,6 +1438,9 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec)
                  Belonging to a legacy EQ*/
                mutex_lock(&priv->msix_ctl.pool_lock);
                if (priv->msix_ctl.pool_bm & 1ULL << i) {
+                       irq_set_affinity_notifier(
+                               priv->eq_table.eq[vec].irq,
+                               NULL);
                        free_irq(priv->eq_table.eq[vec].irq,
                                 &priv->eq_table.eq[vec]);
                        priv->msix_ctl.pool_bm &= ~(1ULL << i);
index ba87bd21295a533c8d6941bc4c11d29bceba0112..c0468e6f0442a9d92be3509e3228b9bb4e078d06 100644 (file)
@@ -577,6 +577,9 @@ struct mlx4_cq {
 
        u32                     cons_index;
 
+       u16                     irq;
+       bool                    irq_affinity_change;
+
        __be32                 *set_ci_db;
        __be32                 *arm_db;
        int                     arm_sn;