xen-netback: disable rogue vif in kthread context
[firefly-linux-kernel-4.4.55.git] / drivers / net / xen-netback / netback.c
index 8518a0d1f6f9e7a6fb47cc5037366e880fb0e49e..3f021e054ba1cc004dacde3f82df06a4e90e03a2 100644 (file)
@@ -56,7 +56,7 @@ bool separate_tx_rx_irq = 1;
 module_param(separate_tx_rx_irq, bool, 0644);
 
 /* When guest ring is filled up, qdisc queues the packets for us, but we have
- * to timeout them, otherwise other guests' packets can get stucked there
+ * to timeout them, otherwise other guests' packets can get stuck there
  */
 unsigned int rx_drain_timeout_msecs = 10000;
 module_param(rx_drain_timeout_msecs, uint, 0444);
@@ -99,6 +99,9 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
        return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
 }
 
+#define callback_param(vif, pending_idx) \
+       (vif->pending_tx_info[pending_idx].callback_struct)
+
 /* Find the containing VIF's structure from a pointer in pending_tx_info array
  */
 static inline struct xenvif* ubuf_to_vif(struct ubuf_info *ubuf)
@@ -188,8 +191,8 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
         * into multiple copies tend to give large frags their
         * own buffers as before.
         */
-       if ((offset + size > MAX_BUFFER_OFFSET) &&
-           (size <= MAX_BUFFER_OFFSET) && offset && !head)
+       BUG_ON(size > MAX_BUFFER_OFFSET);
+       if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head)
                return true;
 
        return false;
@@ -238,7 +241,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
        struct gnttab_copy *copy_gop;
        struct xenvif_rx_meta *meta;
        unsigned long bytes;
-       int gso_type;
+       int gso_type = XEN_NETIF_GSO_TYPE_NONE;
 
        /* Data must not cross a page boundary. */
        BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
@@ -304,12 +307,12 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
                }
 
                /* Leave a gap for the GSO descriptor. */
-               if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-                       gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-               else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-                       gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-               else
-                       gso_type = XEN_NETIF_GSO_TYPE_NONE;
+               if (skb_is_gso(skb)) {
+                       if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+                               gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
+                       else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+                               gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
+               }
 
                if (*head && ((1 << gso_type) & vif->gso_mask))
                        vif->rx.req_cons++;
@@ -343,22 +346,18 @@ static int xenvif_gop_skb(struct sk_buff *skb,
        int head = 1;
        int old_meta_prod;
        int gso_type;
-       int gso_size;
        struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg;
        grant_ref_t foreign_grefs[MAX_SKB_FRAGS];
        struct xenvif *foreign_vif = NULL;
 
        old_meta_prod = npo->meta_prod;
 
-       if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
-               gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-               gso_size = skb_shinfo(skb)->gso_size;
-       } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
-               gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-               gso_size = skb_shinfo(skb)->gso_size;
-       } else {
-               gso_type = XEN_NETIF_GSO_TYPE_NONE;
-               gso_size = 0;
+       gso_type = XEN_NETIF_GSO_TYPE_NONE;
+       if (skb_is_gso(skb)) {
+               if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+                       gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
+               else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+                       gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
        }
 
        /* Set up a GSO prefix descriptor, if necessary */
@@ -366,7 +365,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
                req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
                meta = npo->meta + npo->meta_prod++;
                meta->gso_type = gso_type;
-               meta->gso_size = gso_size;
+               meta->gso_size = skb_shinfo(skb)->gso_size;
                meta->size = 0;
                meta->id = req->id;
        }
@@ -376,7 +375,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 
        if ((1 << gso_type) & vif->gso_mask) {
                meta->gso_type = gso_type;
-               meta->gso_size = gso_size;
+               meta->gso_size = skb_shinfo(skb)->gso_size;
        } else {
                meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
                meta->gso_size = 0;
@@ -512,6 +511,8 @@ static void xenvif_rx_action(struct xenvif *vif)
 
        while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
                RING_IDX max_slots_needed;
+               RING_IDX old_req_cons;
+               RING_IDX ring_slots_used;
                int i;
 
                /* We need a cheap worse case estimate for the number of
@@ -523,11 +524,31 @@ static void xenvif_rx_action(struct xenvif *vif)
                                                PAGE_SIZE);
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                        unsigned int size;
+                       unsigned int offset;
+
                        size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
-                       max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE);
+                       offset = skb_shinfo(skb)->frags[i].page_offset;
+
+                       /* For a worse-case estimate we need to factor in
+                        * the fragment page offset as this will affect the
+                        * number of times xenvif_gop_frag_copy() will
+                        * call start_new_rx_buffer().
+                        */
+                       max_slots_needed += DIV_ROUND_UP(offset + size,
+                                                        PAGE_SIZE);
                }
-               if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
-                   skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+
+               /* To avoid the estimate becoming too pessimal for some
+                * frontends that limit posted rx requests, cap the estimate
+                * at MAX_SKB_FRAGS.
+                */
+               if (max_slots_needed > MAX_SKB_FRAGS)
+                       max_slots_needed = MAX_SKB_FRAGS;
+
+               /* We may need one more slot for GSO metadata */
+               if (skb_is_gso(skb) &&
+                  (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
+                   skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
                        max_slots_needed++;
 
                /* If the skb may not fit then bail out now */
@@ -539,8 +560,11 @@ static void xenvif_rx_action(struct xenvif *vif)
                } else
                        vif->rx_last_skb_slots = 0;
 
+               old_req_cons = vif->rx.req_cons;
                XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo);
-               BUG_ON(XENVIF_RX_CB(skb)->meta_slots_used > max_slots_needed);
+               ring_slots_used = vif->rx.req_cons - old_req_cons;
+
+               BUG_ON(ring_slots_used > max_slots_needed);
 
                __skb_queue_tail(&rxq, skb);
        }
@@ -687,7 +711,8 @@ static void xenvif_tx_err(struct xenvif *vif,
 static void xenvif_fatal_tx_err(struct xenvif *vif)
 {
        netdev_err(vif->dev, "fatal error; disabling device\n");
-       xenvif_carrier_off(vif);
+       vif->disabled = true;
+       xenvif_kick_thread(vif);
 }
 
 static int xenvif_count_requests(struct xenvif *vif,
@@ -1023,12 +1048,12 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
                /* If this is not the first frag, chain it to the previous*/
                if (unlikely(prev_pending_idx == INVALID_PENDING_IDX))
                        skb_shinfo(skb)->destructor_arg =
-                               &vif->pending_tx_info[pending_idx].callback_struct;
+                               &callback_param(vif, pending_idx);
                else if (likely(pending_idx != prev_pending_idx))
-                       vif->pending_tx_info[prev_pending_idx].callback_struct.ctx =
-                               &(vif->pending_tx_info[pending_idx].callback_struct);
+                       callback_param(vif, prev_pending_idx).ctx =
+                               &callback_param(vif, pending_idx);
 
-               vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
+               callback_param(vif, pending_idx).ctx = NULL;
                prev_pending_idx = pending_idx;
 
                txp = &vif->pending_tx_info[pending_idx].req;
@@ -1170,8 +1195,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
        struct sk_buff *skb;
        int ret;
 
-       while (xenvif_tx_pending_slots_available(vif) &&
-              (skb_queue_len(&vif->tx_queue) < budget)) {
+       while (skb_queue_len(&vif->tx_queue) < budget) {
                struct xen_netif_tx_request txreq;
                struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
                struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
@@ -1189,7 +1213,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
                                   vif->tx.sring->req_prod, vif->tx.req_cons,
                                   XEN_NETIF_TX_RING_SIZE);
                        xenvif_fatal_tx_err(vif);
-                       continue;
+                       break;
                }
 
                work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx);
@@ -1399,13 +1423,13 @@ static int xenvif_tx_submit(struct xenvif *vif)
                memcpy(skb->data,
                       (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
                       data_len);
-               vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
+               callback_param(vif, pending_idx).ctx = NULL;
                if (data_len < txp->size) {
                        /* Append the packet payload as a fragment. */
                        txp->offset += data_len;
                        txp->size -= data_len;
                        skb_shinfo(skb)->destructor_arg =
-                               &vif->pending_tx_info[pending_idx].callback_struct;
+                               &callback_param(vif, pending_idx);
                } else {
                        /* Schedule a response immediately. */
                        xenvif_idx_unmap(vif, pending_idx);
@@ -1511,13 +1535,6 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
        wake_up(&vif->dealloc_wq);
        spin_unlock_irqrestore(&vif->callback_lock, flags);
 
-       if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx) &&
-           xenvif_tx_pending_slots_available(vif)) {
-               local_bh_disable();
-               napi_schedule(&vif->napi);
-               local_bh_enable();
-       }
-
        if (likely(zerocopy_success))
                vif->tx_zerocopy_success++;
        else
@@ -1556,7 +1573,6 @@ static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
                                            idx_to_kaddr(vif, pending_idx),
                                            GNTMAP_host_map,
                                            vif->grant_tx_handle[pending_idx]);
-                       /* Btw. already unmapped? */
                        xenvif_grant_handle_reset(vif, pending_idx);
                        ++gop;
                }
@@ -1572,7 +1588,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
                                        vif->pages_to_unmap,
                                        gop - vif->tx_unmap_ops);
                if (ret) {
-                       netdev_err(vif->dev, "Unmap fail: nr_ops %x ret %d\n",
+                       netdev_err(vif->dev, "Unmap fail: nr_ops %tx ret %d\n",
                                   gop - vif->tx_unmap_ops, ret);
                        for (i = 0; i < gop - vif->tx_unmap_ops; ++i) {
                                if (gop[i].status != GNTST_okay)
@@ -1689,12 +1705,20 @@ void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)
                            idx_to_kaddr(vif, pending_idx),
                            GNTMAP_host_map,
                            vif->grant_tx_handle[pending_idx]);
-       /* Btw. already unmapped? */
        xenvif_grant_handle_reset(vif, pending_idx);
 
        ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
                                &vif->mmap_pages[pending_idx], 1);
-       BUG_ON(ret);
+       if (ret) {
+               netdev_err(vif->dev,
+                          "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
+                          ret,
+                          pending_idx,
+                          tx_unmap_op.host_addr,
+                          tx_unmap_op.handle,
+                          tx_unmap_op.status);
+               BUG();
+       }
 
        xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
 }
@@ -1709,8 +1733,7 @@ static inline int rx_work_todo(struct xenvif *vif)
 static inline int tx_work_todo(struct xenvif *vif)
 {
 
-       if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) &&
-           xenvif_tx_pending_slots_available(vif))
+       if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)))
                return 1;
 
        return 0;
@@ -1786,7 +1809,18 @@ int xenvif_kthread_guest_rx(void *data)
        while (!kthread_should_stop()) {
                wait_event_interruptible(vif->wq,
                                         rx_work_todo(vif) ||
+                                        vif->disabled ||
                                         kthread_should_stop());
+
+               /* This frontend is found to be rogue, disable it in
+                * kthread context. Currently this is only set when
+                * netback finds out frontend sends malformed packet,
+                * but we cannot disable the interface in softirq
+                * context so we defer it here.
+                */
+               if (unlikely(vif->disabled && netif_carrier_ok(vif->dev)))
+                       xenvif_carrier_off(vif);
+
                if (kthread_should_stop())
                        break;