net: implement mechanism for HW based QOS

author John Fastabend <john.r.fastabend@intel.com>

Mon, 17 Jan 2011 08:06:04 +0000 (08:06 +0000)

committer David S. Miller <davem@davemloft.net>

Thu, 20 Jan 2011 07:31:10 +0000 (23:31 -0800)
author John Fastabend <john.r.fastabend@intel.com>
Mon, 17 Jan 2011 08:06:04 +0000 (08:06 +0000)
committer David S. Miller <davem@davemloft.net>
Thu, 20 Jan 2011 07:31:10 +0000 (23:31 -0800)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index 68a4627b74f5319373b0a3877f5e0872e73e3783..371fa8839d51f78c7a73d700da25e29f6035b546 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -646,6 +646,14 @@ struct xps_dev_maps {
      (nr_cpu_ids * sizeof(struct xps_map *)))
  #endif /* CONFIG_XPS */
  
+#define TC_MAX_QUEUE   16
+#define TC_BITMASK     15
+/* HW offloaded queuing disciplines txq count and offset maps */
+struct netdev_tc_txq {
+       u16 count;
+       u16 offset;
+};
+
  /*
   * This structure defines the management hooks for network devices.
   * The following hooks can be defined; unless noted otherwise, they are
@@ -756,6 +764,11 @@ struct xps_dev_maps {
   * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
   *                       struct nlattr *port[]);
   * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
+ * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
+ *     Called to setup 'tc' number of traffic classes in the net device. This
+ *     is always called from the stack with the rtnl lock held and netif tx
+ *     queues stopped. This allows the netdevice to perform queue management
+ *     safely.
   */
  #define HAVE_NET_DEVICE_OPS
  struct net_device_ops {
@@ -814,6 +827,7 @@ struct net_device_ops {
                                                    struct nlattr *port[]);
         int                     (*ndo_get_vf_port)(struct net_device *dev,
                                                    int vf, struct sk_buff *skb);
+       int                     (*ndo_setup_tc)(struct net_device *dev, u8 tc);
  #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
         int                     (*ndo_fcoe_enable)(struct net_device *dev);
         int                     (*ndo_fcoe_disable)(struct net_device *dev);
@@ -1146,6 +1160,9 @@ struct net_device {
         /* Data Center Bridging netlink ops */
         const struct dcbnl_rtnl_ops *dcbnl_ops;
  #endif
+       u8 num_tc;
+       struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
+       u8 prio_tc_map[TC_BITMASK + 1];
  
  #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
         /* max exchange id for FCoE LRO by ddp */
@@ -1164,6 +1181,57 @@ struct net_device {
  
  #define        NETDEV_ALIGN            32
  
+static inline
+int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
+{
+       return dev->prio_tc_map[prio & TC_BITMASK];
+}
+
+static inline
+int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
+{
+       if (tc >= dev->num_tc)
+               return -EINVAL;
+
+       dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK;
+       return 0;
+}
+
+static inline
+void netdev_reset_tc(struct net_device *dev)
+{
+       dev->num_tc = 0;
+       memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
+       memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+}
+
+static inline
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
+{
+       if (tc >= dev->num_tc)
+               return -EINVAL;
+
+       dev->tc_to_txq[tc].count = count;
+       dev->tc_to_txq[tc].offset = offset;
+       return 0;
+}
+
+static inline
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
+{
+       if (num_tc > TC_MAX_QUEUE)
+               return -EINVAL;
+
+       dev->num_tc = num_tc;
+       return 0;
+}
+
+static inline
+int netdev_get_num_tc(struct net_device *dev)
+{
+       return dev->num_tc;
+}
+
  static inline
  struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
                                          unsigned int index)
diff --git a/net/core/dev.c b/net/core/dev.c

index 2b85d4ae981f4985256b6db8af1bc95d9b2b0212..8b1d886ed23b6b4bbf1f32a17c985058cfb2ab64 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1593,6 +1593,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
         rcu_read_unlock();
  }
  
+/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+ * @dev: Network device
+ * @txq: number of queues available
+ *
+ * If real_num_tx_queues is changed the tc mappings may no longer be
+ * valid. To resolve this verify the tc mapping remains valid and if
+ * not NULL the mapping. With no priorities mapping to this
+ * offset/count pair it will no longer be used. In the worst case TC0
+ * is invalid nothing can be done so disable priority mappings. If is
+ * expected that drivers will fix this mapping if they can before
+ * calling netif_set_real_num_tx_queues.
+ */
+void netif_setup_tc(struct net_device *dev, unsigned int txq)
+{
+       int i;
+       struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+
+       /* If TC0 is invalidated disable TC mapping */
+       if (tc->offset + tc->count > txq) {
+               pr_warning("Number of in use tx queues changed "
+                          "invalidating tc mappings. Priority "
+                          "traffic classification disabled!\n");
+               dev->num_tc = 0;
+               return;
+       }
+
+       /* Invalidated prio to tc mappings set to TC0 */
+       for (i = 1; i < TC_BITMASK + 1; i++) {
+               int q = netdev_get_prio_tc_map(dev, i);
+
+               tc = &dev->tc_to_txq[q];
+               if (tc->offset + tc->count > txq) {
+                       pr_warning("Number of in use tx queues "
+                                  "changed. Priority %i to tc "
+                                  "mapping %i is no longer valid "
+                                  "setting map to 0\n",
+                                  i, q);
+                       netdev_set_prio_tc_map(dev, i, 0);
+               }
+       }
+}
+
  /*
   * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
   * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1612,6 +1654,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
                 if (rc)
                         return rc;
  
+               if (dev->num_tc)
+                       netif_setup_tc(dev, txq);
+
                 if (txq < dev->real_num_tx_queues)
                         qdisc_reset_all_tx_gt(dev, txq);
         }
@@ -2161,6 +2206,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
                   unsigned int num_tx_queues)
  {
         u32 hash;
+       u16 qoffset = 0;
+       u16 qcount = num_tx_queues;
  
         if (skb_rx_queue_recorded(skb)) {
                 hash = skb_get_rx_queue(skb);
@@ -2169,13 +2216,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
                 return hash;
         }
  
+       if (dev->num_tc) {
+               u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+               qoffset = dev->tc_to_txq[tc].offset;
+               qcount = dev->tc_to_txq[tc].count;
+       }
+
         if (skb->sk && skb->sk->sk_hash)
                 hash = skb->sk->sk_hash;
         else
                 hash = (__force u16) skb->protocol ^ skb->rxhash;
         hash = jhash_1word(hash, hashrnd);
  
-       return (u16) (((u64) hash * num_tx_queues) >> 32);
+       return (u16) (((u64) hash * qcount) >> 32) + qoffset;
  }
  EXPORT_SYMBOL(__skb_tx_hash);
author	John Fastabend <john.r.fastabend@intel.com>
	Mon, 17 Jan 2011 08:06:04 +0000 (08:06 +0000)
committer	David S. Miller <davem@davemloft.net>
	Thu, 20 Jan 2011 07:31:10 +0000 (23:31 -0800)
include/linux/netdevice.h		patch \| blob \| history
net/core/dev.c		patch \| blob \| history