#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
static DEFINE_SPINLOCK(ptype_lock);
+static DEFINE_SPINLOCK(offload_lock);
static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
static struct list_head ptype_all __read_mostly; /* Taps */
+static struct list_head offload_base __read_mostly;
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
}
EXPORT_SYMBOL(dev_remove_pack);
+
+/**
+ * dev_add_offload - register offload handlers
+ * @po: protocol offload declaration
+ *
+ * Add protocol offload handlers to the networking stack. The passed
+ * &proto_offload is linked into kernel lists and may not be freed until
+ * it has been removed from the kernel lists.
+ *
+ * This call does not sleep therefore it can not
+ * guarantee all CPU's that are in middle of receiving packets
+ * will see the new offload handlers (until the next received packet).
+ */
+void dev_add_offload(struct packet_offload *po)
+{
+ struct list_head *head = &offload_base;
+
+ spin_lock(&offload_lock);
+ list_add_rcu(&po->list, head);
+ spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(dev_add_offload);
+
+/**
+ * __dev_remove_offload - remove offload handler
+ * @po: packet offload declaration
+ *
+ * Remove a protocol offload handler that was previously added to the
+ * kernel offload handlers by dev_add_offload(). The passed &offload_type
+ * is removed from the kernel lists and can be freed or reused once this
+ * function returns.
+ *
+ * The packet type might still be in use by receivers
+ * and must not be freed until after all the CPU's have gone
+ * through a quiescent state.
+ */
+void __dev_remove_offload(struct packet_offload *po)
+{
+ struct list_head *head = &offload_base;
+ struct packet_offload *po1;
+
+ spin_lock(&offload_lock);
+
+ list_for_each_entry(po1, head, list) {
+ if (po == po1) {
+ list_del_rcu(&po->list);
+ goto out;
+ }
+ }
+
+ pr_warn("dev_remove_offload: %p not found\n", po);
+out:
+ spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(__dev_remove_offload);
+
+/**
+ * dev_remove_offload - remove packet offload handler
+ * @po: packet offload declaration
+ *
+ * Remove a packet offload handler that was previously added to the kernel
+ * offload handlers by dev_add_offload(). The passed &offload_type is
+ * removed from the kernel lists and can be freed or reused once this
+ * function returns.
+ *
+ * This call sleeps to guarantee that no CPU is looking at the packet
+ * type after return.
+ */
+void dev_remove_offload(struct packet_offload *po)
+{
+ __dev_remove_offload(po);
+
+ synchronize_net();
+}
+EXPORT_SYMBOL(dev_remove_offload);
+
/******************************************************************************
Device Boot-time Settings Routines
static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
{
- if (ptype->af_packet_priv == NULL)
+ if (!ptype->af_packet_priv || !skb->sk)
return false;
if (ptype->id_match)
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
int vlan_depth = ETH_HLEN;
int err;
}
rcu_read_lock();
- list_for_each_entry_rcu(ptype,
- &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
- err = ptype->gso_send_check(skb);
+ err = ptype->callbacks.gso_send_check(skb);
segs = ERR_PTR(err);
if (err || skb_gso_ok(skb, features))
break;
__skb_push(skb, (skb->data -
skb_network_header(skb)));
}
- segs = ptype->gso_segment(skb, features);
+ segs = ptype->callbacks.gso_segment(skb, features);
break;
}
}
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
- rflow->last_qtail)) >= 0))
+ rflow->last_qtail)) >= 0)) {
+ tcpu = next_cpu;
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+ }
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
&& !skb_pfmemalloc_protocol(skb))
goto drop;
- rx_handler = rcu_dereference(skb->dev->rx_handler);
if (vlan_tx_tag_present(skb)) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
}
- if (vlan_do_receive(&skb, !rx_handler))
+ if (vlan_do_receive(&skb))
goto another_round;
else if (unlikely(!skb))
goto unlock;
}
+ rx_handler = rcu_dereference(skb->dev->rx_handler);
if (rx_handler) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
}
}
+ if (vlan_tx_nonzero_tag_present(skb))
+ skb->pkt_type = PACKET_OTHERHOST;
+
/* deliver only exact match when indicated */
null_or_dev = deliver_exact ? skb->dev : NULL;
static int napi_gro_complete(struct sk_buff *skb)
{
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ struct list_head *head = &offload_base;
int err = -ENOENT;
if (NAPI_GRO_CB(skb)->count == 1) {
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+ if (ptype->type != type || !ptype->callbacks.gro_complete)
continue;
- err = ptype->gro_complete(skb);
+ err = ptype->callbacks.gro_complete(skb);
break;
}
rcu_read_unlock();
return netif_receive_skb(skb);
}
-inline void napi_gro_flush(struct napi_struct *napi)
+/* napi->gro_list contains packets ordered by age.
+ * youngest packets at the head of it.
+ * Complete skbs in reverse order to reduce latencies.
+ */
+void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{
- struct sk_buff *skb, *next;
+ struct sk_buff *skb, *prev = NULL;
- for (skb = napi->gro_list; skb; skb = next) {
- next = skb->next;
+ /* scan list and build reverse chain */
+ for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
+ skb->prev = prev;
+ prev = skb;
+ }
+
+ for (skb = prev; skb; skb = prev) {
skb->next = NULL;
+
+ if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
+ return;
+
+ prev = skb->prev;
napi_gro_complete(skb);
+ napi->gro_count--;
}
- napi->gro_count = 0;
napi->gro_list = NULL;
}
EXPORT_SYMBOL(napi_gro_flush);
enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ struct list_head *head = &offload_base;
int same_flow;
int mac_len;
enum gro_result ret;
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+ if (ptype->type != type || !ptype->callbacks.gro_receive)
continue;
skb_set_network_header(skb, skb_gro_offset(skb));
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
- pp = ptype->gro_receive(&napi->gro_list, skb);
+ pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;
}
rcu_read_unlock();
napi->gro_count++;
NAPI_GRO_CB(skb)->count = 1;
+ NAPI_GRO_CB(skb)->age = jiffies;
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list;
napi->gro_list = skb;
}
EXPORT_SYMBOL(napi_skb_finish);
-void skb_gro_reset_offset(struct sk_buff *skb)
+static void skb_gro_reset_offset(struct sk_buff *skb)
{
+ const struct skb_shared_info *pinfo = skb_shinfo(skb);
+ const skb_frag_t *frag0 = &pinfo->frags[0];
+
NAPI_GRO_CB(skb)->data_offset = 0;
NAPI_GRO_CB(skb)->frag0 = NULL;
NAPI_GRO_CB(skb)->frag0_len = 0;
if (skb->mac_header == skb->tail &&
- !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
- NAPI_GRO_CB(skb)->frag0 =
- skb_frag_address(&skb_shinfo(skb)->frags[0]);
- NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
+ pinfo->nr_frags &&
+ !PageHighMem(skb_frag_page(frag0))) {
+ NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
+ NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
}
}
-EXPORT_SYMBOL(skb_gro_reset_offset);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
return;
- napi_gro_flush(n);
+ napi_gro_flush(n, false);
local_irq_save(flags);
__napi_complete(n);
local_irq_restore(flags);
local_irq_enable();
napi_complete(n);
local_irq_disable();
- } else
+ } else {
+ if (n->gro_list) {
+ /* flush too old packets
+ * If HZ < 1000, flush all packets.
+ */
+ local_irq_enable();
+ napi_gro_flush(n, HZ >= 1000);
+ local_irq_disable();
+ }
list_move_tail(&n->poll_list, &sd->poll_list);
+ }
}
netpoll_poll_unlock(have);
goto out;
/* Ensure the device has been registrered */
- err = -EINVAL;
if (dev->reg_state != NETREG_REGISTERED)
goto out;
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
+ INIT_LIST_HEAD(&offload_base);
+
if (register_pernet_subsys(&netdev_net_ops))
goto out;