IPoIB: Use separate CQ for UD send completions
authorEli Cohen <eli@dev.mellanox.co.il>
Tue, 29 Apr 2008 20:46:53 +0000 (13:46 -0700)
committerRoland Dreier <rolandd@cisco.com>
Tue, 29 Apr 2008 20:46:53 +0000 (13:46 -0700)
Use a dedicated CQ for UD send completions. Also, do not arm the UD
send CQ, which reduces the number of interrupts generated.  This patch
farther reduces overhead by not calling poll CQ for every posted send
WR -- it does polls only when there 16 or more outstanding work requests.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_verbs.c

index f1f142dc64b17e8608f0f668345a13af3b65fb65..9044f8803532e035e264e2b166644b05d84563e4 100644 (file)
@@ -95,6 +95,8 @@ enum {
        IPOIB_MCAST_FLAG_SENDONLY = 1,
        IPOIB_MCAST_FLAG_BUSY     = 2,  /* joining or already joined */
        IPOIB_MCAST_FLAG_ATTACHED = 3,
+
+       MAX_SEND_CQE              = 16,
 };
 
 #define        IPOIB_OP_RECV   (1ul << 31)
@@ -285,7 +287,8 @@ struct ipoib_dev_priv {
        u16               pkey_index;
        struct ib_pd     *pd;
        struct ib_mr     *mr;
-       struct ib_cq     *cq;
+       struct ib_cq     *recv_cq;
+       struct ib_cq     *send_cq;
        struct ib_qp     *qp;
        u32               qkey;
 
@@ -305,6 +308,7 @@ struct ipoib_dev_priv {
        struct ib_sge        tx_sge[MAX_SKB_FRAGS + 1];
        struct ib_send_wr    tx_wr;
        unsigned             tx_outstanding;
+       struct ib_wc         send_wc[MAX_SEND_CQE];
 
        struct ib_recv_wr    rx_wr;
        struct ib_sge        rx_sge[IPOIB_UD_RX_SG];
@@ -662,7 +666,6 @@ static inline int ipoib_register_debugfs(void) { return 0; }
 static inline void ipoib_unregister_debugfs(void) { }
 #endif
 
-
 #define ipoib_printk(level, priv, format, arg...)      \
        printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
 #define ipoib_warn(priv, format, arg...)               \
index 9db7b0bd9134b350befa605a40b122081fe861a4..97e67d36378fa1532cb2224624ae20ea53acefe2 100644 (file)
@@ -249,8 +249,8 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_init_attr attr = {
                .event_handler = ipoib_cm_rx_event_handler,
-               .send_cq = priv->cq, /* For drain WR */
-               .recv_cq = priv->cq,
+               .send_cq = priv->recv_cq, /* For drain WR */
+               .recv_cq = priv->recv_cq,
                .srq = priv->cm.srq,
                .cap.max_send_wr = 1, /* For drain WR */
                .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
@@ -951,8 +951,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_init_attr attr = {
-               .send_cq                = priv->cq,
-               .recv_cq                = priv->cq,
+               .send_cq                = priv->recv_cq,
+               .recv_cq                = priv->recv_cq,
                .srq                    = priv->cm.srq,
                .cap.max_send_wr        = ipoib_sendq_size,
                .cap.max_send_sge       = 1,
index 9a47428366c91d0f35884ff2cecb0bbf49c41846..10279b79c44d92b9f2384f653fcf9204cf77159d 100644 (file)
@@ -71,7 +71,7 @@ static int ipoib_set_coalesce(struct net_device *dev,
            coal->rx_max_coalesced_frames > 0xffff)
                return -EINVAL;
 
-       ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames,
+       ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
                           coal->rx_coalesce_usecs);
        if (ret && ret != -ENOSYS) {
                ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
index 7cf1fa7074abfa28ca31e298c8c2c04a53b75ab8..97b815c1a3fc0988a33129e47ace54748d9d9dbb 100644 (file)
@@ -364,7 +364,6 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        unsigned int wr_id = wc->wr_id;
        struct ipoib_tx_buf *tx_req;
-       unsigned long flags;
 
        ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
                       wr_id, wc->status);
@@ -384,13 +383,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
        dev_kfree_skb_any(tx_req->skb);
 
-       spin_lock_irqsave(&priv->tx_lock, flags);
        ++priv->tx_tail;
        if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
            netif_queue_stopped(dev) &&
            test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
                netif_wake_queue(dev);
-       spin_unlock_irqrestore(&priv->tx_lock, flags);
 
        if (wc->status != IB_WC_SUCCESS &&
            wc->status != IB_WC_WR_FLUSH_ERR)
@@ -399,6 +396,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
                           wc->status, wr_id, wc->vendor_err);
 }
 
+static int poll_tx(struct ipoib_dev_priv *priv)
+{
+       int n, i;
+
+       n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
+       for (i = 0; i < n; ++i)
+               ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
+
+       return n == MAX_SEND_CQE;
+}
+
 int ipoib_poll(struct napi_struct *napi, int budget)
 {
        struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
@@ -414,7 +422,7 @@ poll_more:
                int max = (budget - done);
 
                t = min(IPOIB_NUM_WC, max);
-               n = ib_poll_cq(priv->cq, t, priv->ibwc);
+               n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
 
                for (i = 0; i < n; i++) {
                        struct ib_wc *wc = priv->ibwc + i;
@@ -425,12 +433,8 @@ poll_more:
                                        ipoib_cm_handle_rx_wc(dev, wc);
                                else
                                        ipoib_ib_handle_rx_wc(dev, wc);
-                       } else {
-                               if (wc->wr_id & IPOIB_OP_CM)
-                                       ipoib_cm_handle_tx_wc(dev, wc);
-                               else
-                                       ipoib_ib_handle_tx_wc(dev, wc);
-                       }
+                       } else
+                               ipoib_cm_handle_tx_wc(priv->dev, wc);
                }
 
                if (n != t)
@@ -439,7 +443,7 @@ poll_more:
 
        if (done < budget) {
                netif_rx_complete(dev, napi);
-               if (unlikely(ib_req_notify_cq(priv->cq,
+               if (unlikely(ib_req_notify_cq(priv->recv_cq,
                                              IB_CQ_NEXT_COMP |
                                              IB_CQ_REPORT_MISSED_EVENTS)) &&
                    netif_rx_reschedule(dev, napi))
@@ -562,12 +566,16 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 
                address->last_send = priv->tx_head;
                ++priv->tx_head;
+               skb_orphan(skb);
 
                if (++priv->tx_outstanding == ipoib_sendq_size) {
                        ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
                        netif_stop_queue(dev);
                }
        }
+
+       if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+               poll_tx(priv);
 }
 
 static void __ipoib_reap_ah(struct net_device *dev)
@@ -714,7 +722,7 @@ void ipoib_drain_cq(struct net_device *dev)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int i, n;
        do {
-               n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
+               n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
                for (i = 0; i < n; ++i) {
                        /*
                         * Convert any successful completions to flush
@@ -729,14 +737,13 @@ void ipoib_drain_cq(struct net_device *dev)
                                        ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
                                else
                                        ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
-                       } else {
-                               if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
-                                       ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
-                               else
-                                       ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
-                       }
+                       } else
+                               ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
                }
        } while (n == IPOIB_NUM_WC);
+
+       while (poll_tx(priv))
+               ; /* nothing */
 }
 
 int ipoib_ib_dev_stop(struct net_device *dev, int flush)
@@ -826,7 +833,7 @@ timeout:
                msleep(1);
        }
 
-       ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
+       ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
 
        return 0;
 }
index 7a4ed9d3d8447368466aad077abea153c5b776c0..2442090ac8d1dbfe083bfca69d46adc2b484c157 100644 (file)
@@ -1298,7 +1298,8 @@ static int __init ipoib_init_module(void)
 
        ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
        ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
-       ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
+       ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
+                                                    IPOIB_MIN_QUEUE_SIZE));
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
        ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
 #endif
index 07c03f178a498b0ff6488a32a1cfe45057864c5b..c1e7ece1fd440ffd61efc2260ade377cc87df47e 100644 (file)
@@ -171,26 +171,33 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                goto out_free_pd;
        }
 
-       size = ipoib_sendq_size + ipoib_recvq_size + 1;
+       size = ipoib_recvq_size + 1;
        ret = ipoib_cm_dev_init(dev);
        if (!ret) {
+               size += ipoib_sendq_size;
                if (ipoib_cm_has_srq(dev))
                        size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
                else
                        size += ipoib_recvq_size * ipoib_max_conn_qp;
        }
 
-       priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
-       if (IS_ERR(priv->cq)) {
-               printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
+       priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
+       if (IS_ERR(priv->recv_cq)) {
+               printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
                goto out_free_mr;
        }
 
-       if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
-               goto out_free_cq;
+       priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0);
+       if (IS_ERR(priv->send_cq)) {
+               printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
+               goto out_free_recv_cq;
+       }
+
+       if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP))
+               goto out_free_send_cq;
 
-       init_attr.send_cq = priv->cq;
-       init_attr.recv_cq = priv->cq;
+       init_attr.send_cq = priv->send_cq;
+       init_attr.recv_cq = priv->recv_cq;
 
        if (priv->hca_caps & IB_DEVICE_UD_TSO)
                init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
@@ -201,7 +208,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
        priv->qp = ib_create_qp(priv->pd, &init_attr);
        if (IS_ERR(priv->qp)) {
                printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
-               goto out_free_cq;
+               goto out_free_send_cq;
        }
 
        priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
@@ -230,8 +237,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 
        return 0;
 
-out_free_cq:
-       ib_destroy_cq(priv->cq);
+out_free_send_cq:
+       ib_destroy_cq(priv->send_cq);
+
+out_free_recv_cq:
+       ib_destroy_cq(priv->recv_cq);
 
 out_free_mr:
        ib_dereg_mr(priv->mr);
@@ -254,8 +264,11 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
                clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
        }
 
-       if (ib_destroy_cq(priv->cq))
-               ipoib_warn(priv, "ib_cq_destroy failed\n");
+       if (ib_destroy_cq(priv->send_cq))
+               ipoib_warn(priv, "ib_cq_destroy (send) failed\n");
+
+       if (ib_destroy_cq(priv->recv_cq))
+               ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
 
        ipoib_cm_dev_cleanup(dev);