Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[firefly-linux-kernel-4.4.55.git] / net / core / dev.c
index 1e0a1847c3bbee7fd5aeb8654bf0d9da08ce5ca9..974199daa911488df13c8902344ee857f23440e4 100644 (file)
 #define PTYPE_HASH_MASK        (PTYPE_HASH_SIZE - 1)
 
 static DEFINE_SPINLOCK(ptype_lock);
+static DEFINE_SPINLOCK(offload_lock);
 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 static struct list_head ptype_all __read_mostly;       /* Taps */
+static struct list_head offload_base __read_mostly;
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -470,6 +472,82 @@ void dev_remove_pack(struct packet_type *pt)
 }
 EXPORT_SYMBOL(dev_remove_pack);
 
+
+/**
+ *     dev_add_offload - register offload handlers
+ *     @po: protocol offload declaration
+ *
+ *     Add protocol offload handlers to the networking stack. The passed
+ *     &proto_offload is linked into kernel lists and may not be freed until
+ *     it has been removed from the kernel lists.
+ *
+ *     This call does not sleep therefore it can not
+ *     guarantee all CPU's that are in middle of receiving packets
+ *     will see the new offload handlers (until the next received packet).
+ */
+void dev_add_offload(struct packet_offload *po)
+{
+       struct list_head *head = &offload_base;
+
+       spin_lock(&offload_lock);
+       list_add_rcu(&po->list, head);
+       spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(dev_add_offload);
+
+/**
+ *     __dev_remove_offload     - remove offload handler
+ *     @po: packet offload declaration
+ *
+ *     Remove a protocol offload handler that was previously added to the
+ *     kernel offload handlers by dev_add_offload(). The passed &offload_type
+ *     is removed from the kernel lists and can be freed or reused once this
+ *     function returns.
+ *
+ *      The packet type might still be in use by receivers
+ *     and must not be freed until after all the CPU's have gone
+ *     through a quiescent state.
+ */
+void __dev_remove_offload(struct packet_offload *po)
+{
+       struct list_head *head = &offload_base;
+       struct packet_offload *po1;
+
+       spin_lock(&offload_lock);
+
+       list_for_each_entry(po1, head, list) {
+               if (po == po1) {
+                       list_del_rcu(&po->list);
+                       goto out;
+               }
+       }
+
+       pr_warn("dev_remove_offload: %p not found\n", po);
+out:
+       spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(__dev_remove_offload);
+
+/**
+ *     dev_remove_offload       - remove packet offload handler
+ *     @po: packet offload declaration
+ *
+ *     Remove a packet offload handler that was previously added to the kernel
+ *     offload handlers by dev_add_offload(). The passed &offload_type is
+ *     removed from the kernel lists and can be freed or reused once this
+ *     function returns.
+ *
+ *     This call sleeps to guarantee that no CPU is looking at the packet
+ *     type after return.
+ */
+void dev_remove_offload(struct packet_offload *po)
+{
+       __dev_remove_offload(po);
+
+       synchronize_net();
+}
+EXPORT_SYMBOL(dev_remove_offload);
+
 /******************************************************************************
 
                      Device Boot-time Settings Routines
@@ -1666,7 +1744,7 @@ static inline int deliver_skb(struct sk_buff *skb,
 
 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
 {
-       if (ptype->af_packet_priv == NULL)
+       if (!ptype->af_packet_priv || !skb->sk)
                return false;
 
        if (ptype->id_match)
@@ -1994,7 +2072,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
        netdev_features_t features)
 {
        struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-       struct packet_type *ptype;
+       struct packet_offload *ptype;
        __be16 type = skb->protocol;
        int vlan_depth = ETH_HLEN;
        int err;
@@ -2023,18 +2101,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
        }
 
        rcu_read_lock();
-       list_for_each_entry_rcu(ptype,
-                       &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-               if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+       list_for_each_entry_rcu(ptype, &offload_base, list) {
+               if (ptype->type == type && ptype->callbacks.gso_segment) {
                        if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-                               err = ptype->gso_send_check(skb);
+                               err = ptype->callbacks.gso_send_check(skb);
                                segs = ERR_PTR(err);
                                if (err || skb_gso_ok(skb, features))
                                        break;
                                __skb_push(skb, (skb->data -
                                                 skb_network_header(skb)));
                        }
-                       segs = ptype->gso_segment(skb, features);
+                       segs = ptype->callbacks.gso_segment(skb, features);
                        break;
                }
        }
@@ -2818,8 +2895,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                if (unlikely(tcpu != next_cpu) &&
                    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
                     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
-                     rflow->last_qtail)) >= 0))
+                     rflow->last_qtail)) >= 0)) {
+                       tcpu = next_cpu;
                        rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+               }
 
                if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
                        *rflowp = rflow;
@@ -3300,18 +3379,18 @@ ncls:
                                && !skb_pfmemalloc_protocol(skb))
                goto drop;
 
-       rx_handler = rcu_dereference(skb->dev->rx_handler);
        if (vlan_tx_tag_present(skb)) {
                if (pt_prev) {
                        ret = deliver_skb(skb, pt_prev, orig_dev);
                        pt_prev = NULL;
                }
-               if (vlan_do_receive(&skb, !rx_handler))
+               if (vlan_do_receive(&skb))
                        goto another_round;
                else if (unlikely(!skb))
                        goto unlock;
        }
 
+       rx_handler = rcu_dereference(skb->dev->rx_handler);
        if (rx_handler) {
                if (pt_prev) {
                        ret = deliver_skb(skb, pt_prev, orig_dev);
@@ -3331,6 +3410,9 @@ ncls:
                }
        }
 
+       if (vlan_tx_nonzero_tag_present(skb))
+               skb->pkt_type = PACKET_OTHERHOST;
+
        /* deliver only exact match when indicated */
        null_or_dev = deliver_exact ? skb->dev : NULL;
 
@@ -3441,9 +3523,9 @@ static void flush_backlog(void *arg)
 
 static int napi_gro_complete(struct sk_buff *skb)
 {
-       struct packet_type *ptype;
+       struct packet_offload *ptype;
        __be16 type = skb->protocol;
-       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       struct list_head *head = &offload_base;
        int err = -ENOENT;
 
        if (NAPI_GRO_CB(skb)->count == 1) {
@@ -3453,10 +3535,10 @@ static int napi_gro_complete(struct sk_buff *skb)
 
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, head, list) {
-               if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+               if (ptype->type != type || !ptype->callbacks.gro_complete)
                        continue;
 
-               err = ptype->gro_complete(skb);
+               err = ptype->callbacks.gro_complete(skb);
                break;
        }
        rcu_read_unlock();
@@ -3471,17 +3553,31 @@ out:
        return netif_receive_skb(skb);
 }
 
-inline void napi_gro_flush(struct napi_struct *napi)
+/* napi->gro_list contains packets ordered by age.
+ * youngest packets at the head of it.
+ * Complete skbs in reverse order to reduce latencies.
+ */
+void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
-       struct sk_buff *skb, *next;
+       struct sk_buff *skb, *prev = NULL;
 
-       for (skb = napi->gro_list; skb; skb = next) {
-               next = skb->next;
+       /* scan list and build reverse chain */
+       for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
+               skb->prev = prev;
+               prev = skb;
+       }
+
+       for (skb = prev; skb; skb = prev) {
                skb->next = NULL;
+
+               if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
+                       return;
+
+               prev = skb->prev;
                napi_gro_complete(skb);
+               napi->gro_count--;
        }
 
-       napi->gro_count = 0;
        napi->gro_list = NULL;
 }
 EXPORT_SYMBOL(napi_gro_flush);
@@ -3489,9 +3585,9 @@ EXPORT_SYMBOL(napi_gro_flush);
 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
        struct sk_buff **pp = NULL;
-       struct packet_type *ptype;
+       struct packet_offload *ptype;
        __be16 type = skb->protocol;
-       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       struct list_head *head = &offload_base;
        int same_flow;
        int mac_len;
        enum gro_result ret;
@@ -3504,7 +3600,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, head, list) {
-               if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+               if (ptype->type != type || !ptype->callbacks.gro_receive)
                        continue;
 
                skb_set_network_header(skb, skb_gro_offset(skb));
@@ -3514,7 +3610,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
                NAPI_GRO_CB(skb)->flush = 0;
                NAPI_GRO_CB(skb)->free = 0;
 
-               pp = ptype->gro_receive(&napi->gro_list, skb);
+               pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
                break;
        }
        rcu_read_unlock();
@@ -3542,6 +3638,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 
        napi->gro_count++;
        NAPI_GRO_CB(skb)->count = 1;
+       NAPI_GRO_CB(skb)->age = jiffies;
        skb_shinfo(skb)->gso_size = skb_gro_len(skb);
        skb->next = napi->gro_list;
        napi->gro_list = skb;
@@ -3631,20 +3728,22 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(napi_skb_finish);
 
-void skb_gro_reset_offset(struct sk_buff *skb)
+static void skb_gro_reset_offset(struct sk_buff *skb)
 {
+       const struct skb_shared_info *pinfo = skb_shinfo(skb);
+       const skb_frag_t *frag0 = &pinfo->frags[0];
+
        NAPI_GRO_CB(skb)->data_offset = 0;
        NAPI_GRO_CB(skb)->frag0 = NULL;
        NAPI_GRO_CB(skb)->frag0_len = 0;
 
        if (skb->mac_header == skb->tail &&
-           !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
-               NAPI_GRO_CB(skb)->frag0 =
-                       skb_frag_address(&skb_shinfo(skb)->frags[0]);
-               NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
+           pinfo->nr_frags &&
+           !PageHighMem(skb_frag_page(frag0))) {
+               NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
+               NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
        }
 }
-EXPORT_SYMBOL(skb_gro_reset_offset);
 
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
@@ -3876,7 +3975,7 @@ void napi_complete(struct napi_struct *n)
        if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
                return;
 
-       napi_gro_flush(n);
+       napi_gro_flush(n, false);
        local_irq_save(flags);
        __napi_complete(n);
        local_irq_restore(flags);
@@ -3981,8 +4080,17 @@ static void net_rx_action(struct softirq_action *h)
                                local_irq_enable();
                                napi_complete(n);
                                local_irq_disable();
-                       } else
+                       } else {
+                               if (n->gro_list) {
+                                       /* flush too old packets
+                                        * If HZ < 1000, flush all packets.
+                                        */
+                                       local_irq_enable();
+                                       napi_gro_flush(n, HZ >= 1000);
+                                       local_irq_disable();
+                               }
                                list_move_tail(&n->poll_list, &sd->poll_list);
+                       }
                }
 
                netpoll_poll_unlock(have);
@@ -6235,7 +6343,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
                goto out;
 
        /* Ensure the device has been registrered */
-       err = -EINVAL;
        if (dev->reg_state != NETREG_REGISTERED)
                goto out;
 
@@ -6633,6 +6740,8 @@ static int __init net_dev_init(void)
        for (i = 0; i < PTYPE_HASH_SIZE; i++)
                INIT_LIST_HEAD(&ptype_base[i]);
 
+       INIT_LIST_HEAD(&offload_base);
+
        if (register_pernet_subsys(&netdev_net_ops))
                goto out;