b1c90f8ccd3d39a4dfb806c428cd8e665d2f0a82
[firefly-linux-kernel-4.4.55.git] / drivers / net / vmxnet3 / vmxnet3_drv.c
1 /*
2  * Linux driver for VMware's vmxnet3 ethernet NIC.
3  *
4  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13  * NON INFRINGEMENT. See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * The full GNU General Public License is included in this distribution in
21  * the file called "COPYING".
22  *
23  * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24  *
25  */
26
27 #include <linux/module.h>
28 #include <net/ip6_checksum.h>
29
30 #include "vmxnet3_int.h"
31
32 char vmxnet3_driver_name[] = "vmxnet3";
33 #define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34
35 /*
36  * PCI Device ID Table
37  * Last entry must be all 0s
38  */
39 static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40         {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41         {0}
42 };
43
44 MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45
46 static int enable_mq = 1;
47
48 static void
49 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50
51 /*
52  *    Enable/Disable the given intr
53  */
54 static void
55 vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56 {
57         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58 }
59
60
61 static void
62 vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63 {
64         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65 }
66
67
68 /*
69  *    Enable/Disable all intrs used by the device
70  */
71 static void
72 vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73 {
74         int i;
75
76         for (i = 0; i < adapter->intr.num_intrs; i++)
77                 vmxnet3_enable_intr(adapter, i);
78         adapter->shared->devRead.intrConf.intrCtrl &=
79                                         cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80 }
81
82
83 static void
84 vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85 {
86         int i;
87
88         adapter->shared->devRead.intrConf.intrCtrl |=
89                                         cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90         for (i = 0; i < adapter->intr.num_intrs; i++)
91                 vmxnet3_disable_intr(adapter, i);
92 }
93
94
95 static void
96 vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97 {
98         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99 }
100
101
102 static bool
103 vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104 {
105         return tq->stopped;
106 }
107
108
109 static void
110 vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111 {
112         tq->stopped = false;
113         netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114 }
115
116
117 static void
118 vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119 {
120         tq->stopped = false;
121         netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122 }
123
124
125 static void
126 vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127 {
128         tq->stopped = true;
129         tq->num_stop++;
130         netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131 }
132
133
134 /*
135  * Check the link state. This may start or stop the tx queue.
136  */
137 static void
138 vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139 {
140         u32 ret;
141         int i;
142         unsigned long flags;
143
144         spin_lock_irqsave(&adapter->cmd_lock, flags);
145         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148
149         adapter->link_speed = ret >> 16;
150         if (ret & 1) { /* Link is up. */
151                 netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152                             adapter->link_speed);
153                 if (!netif_carrier_ok(adapter->netdev))
154                         netif_carrier_on(adapter->netdev);
155
156                 if (affectTxQueue) {
157                         for (i = 0; i < adapter->num_tx_queues; i++)
158                                 vmxnet3_tq_start(&adapter->tx_queue[i],
159                                                  adapter);
160                 }
161         } else {
162                 netdev_info(adapter->netdev, "NIC Link is Down\n");
163                 if (netif_carrier_ok(adapter->netdev))
164                         netif_carrier_off(adapter->netdev);
165
166                 if (affectTxQueue) {
167                         for (i = 0; i < adapter->num_tx_queues; i++)
168                                 vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
169                 }
170         }
171 }
172
173 static void
174 vmxnet3_process_events(struct vmxnet3_adapter *adapter)
175 {
176         int i;
177         unsigned long flags;
178         u32 events = le32_to_cpu(adapter->shared->ecr);
179         if (!events)
180                 return;
181
182         vmxnet3_ack_events(adapter, events);
183
184         /* Check if link state has changed */
185         if (events & VMXNET3_ECR_LINK)
186                 vmxnet3_check_link(adapter, true);
187
188         /* Check if there is an error on xmit/recv queues */
189         if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
190                 spin_lock_irqsave(&adapter->cmd_lock, flags);
191                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
192                                        VMXNET3_CMD_GET_QUEUE_STATUS);
193                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
194
195                 for (i = 0; i < adapter->num_tx_queues; i++)
196                         if (adapter->tqd_start[i].status.stopped)
197                                 dev_err(&adapter->netdev->dev,
198                                         "%s: tq[%d] error 0x%x\n",
199                                         adapter->netdev->name, i, le32_to_cpu(
200                                         adapter->tqd_start[i].status.error));
201                 for (i = 0; i < adapter->num_rx_queues; i++)
202                         if (adapter->rqd_start[i].status.stopped)
203                                 dev_err(&adapter->netdev->dev,
204                                         "%s: rq[%d] error 0x%x\n",
205                                         adapter->netdev->name, i,
206                                         adapter->rqd_start[i].status.error);
207
208                 schedule_work(&adapter->work);
209         }
210 }
211
212 #ifdef __BIG_ENDIAN_BITFIELD
213 /*
214  * The device expects the bitfields in shared structures to be written in
215  * little endian. When CPU is big endian, the following routines are used to
216  * correctly read and write into ABI.
217  * The general technique used here is : double word bitfields are defined in
218  * opposite order for big endian architecture. Then before reading them in
219  * driver the complete double word is translated using le32_to_cpu. Similarly
220  * After the driver writes into bitfields, cpu_to_le32 is used to translate the
221  * double words into required format.
222  * In order to avoid touching bits in shared structure more than once, temporary
223  * descriptors are used. These are passed as srcDesc to following functions.
224  */
225 static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
226                                 struct Vmxnet3_RxDesc *dstDesc)
227 {
228         u32 *src = (u32 *)srcDesc + 2;
229         u32 *dst = (u32 *)dstDesc + 2;
230         dstDesc->addr = le64_to_cpu(srcDesc->addr);
231         *dst = le32_to_cpu(*src);
232         dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
233 }
234
235 static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
236                                struct Vmxnet3_TxDesc *dstDesc)
237 {
238         int i;
239         u32 *src = (u32 *)(srcDesc + 1);
240         u32 *dst = (u32 *)(dstDesc + 1);
241
242         /* Working backwards so that the gen bit is set at the end. */
243         for (i = 2; i > 0; i--) {
244                 src--;
245                 dst--;
246                 *dst = cpu_to_le32(*src);
247         }
248 }
249
250
251 static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
252                                 struct Vmxnet3_RxCompDesc *dstDesc)
253 {
254         int i = 0;
255         u32 *src = (u32 *)srcDesc;
256         u32 *dst = (u32 *)dstDesc;
257         for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
258                 *dst = le32_to_cpu(*src);
259                 src++;
260                 dst++;
261         }
262 }
263
264
265 /* Used to read bitfield values from double words. */
266 static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
267 {
268         u32 temp = le32_to_cpu(*bitfield);
269         u32 mask = ((1 << size) - 1) << pos;
270         temp &= mask;
271         temp >>= pos;
272         return temp;
273 }
274
275
276
277 #endif  /* __BIG_ENDIAN_BITFIELD */
278
279 #ifdef __BIG_ENDIAN_BITFIELD
280
281 #   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
282                         txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
283                         VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
284 #   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
285                         txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
286                         VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
287 #   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
288                         VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
289                         VMXNET3_TCD_GEN_SIZE)
290 #   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
291                         VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
292 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
293                         (dstrcd) = (tmp); \
294                         vmxnet3_RxCompToCPU((rcd), (tmp)); \
295                 } while (0)
296 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
297                         (dstrxd) = (tmp); \
298                         vmxnet3_RxDescToCPU((rxd), (tmp)); \
299                 } while (0)
300
301 #else
302
303 #   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
304 #   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
305 #   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
306 #   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
307 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
308 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
309
310 #endif /* __BIG_ENDIAN_BITFIELD  */
311
312
313 static void
314 vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
315                      struct pci_dev *pdev)
316 {
317         if (tbi->map_type == VMXNET3_MAP_SINGLE)
318                 pci_unmap_single(pdev, tbi->dma_addr, tbi->len,
319                                  PCI_DMA_TODEVICE);
320         else if (tbi->map_type == VMXNET3_MAP_PAGE)
321                 pci_unmap_page(pdev, tbi->dma_addr, tbi->len,
322                                PCI_DMA_TODEVICE);
323         else
324                 BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
325
326         tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
327 }
328
329
330 static int
331 vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
332                   struct pci_dev *pdev, struct vmxnet3_adapter *adapter)
333 {
334         struct sk_buff *skb;
335         int entries = 0;
336
337         /* no out of order completion */
338         BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
339         BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
340
341         skb = tq->buf_info[eop_idx].skb;
342         BUG_ON(skb == NULL);
343         tq->buf_info[eop_idx].skb = NULL;
344
345         VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
346
347         while (tq->tx_ring.next2comp != eop_idx) {
348                 vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
349                                      pdev);
350
351                 /* update next2comp w/o tx_lock. Since we are marking more,
352                  * instead of less, tx ring entries avail, the worst case is
353                  * that the tx routine incorrectly re-queues a pkt due to
354                  * insufficient tx ring entries.
355                  */
356                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
357                 entries++;
358         }
359
360         dev_kfree_skb_any(skb);
361         return entries;
362 }
363
364
365 static int
366 vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
367                         struct vmxnet3_adapter *adapter)
368 {
369         int completed = 0;
370         union Vmxnet3_GenericDesc *gdesc;
371
372         gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
373         while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
374                 completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
375                                                &gdesc->tcd), tq, adapter->pdev,
376                                                adapter);
377
378                 vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
379                 gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
380         }
381
382         if (completed) {
383                 spin_lock(&tq->tx_lock);
384                 if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
385                              vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
386                              VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
387                              netif_carrier_ok(adapter->netdev))) {
388                         vmxnet3_tq_wake(tq, adapter);
389                 }
390                 spin_unlock(&tq->tx_lock);
391         }
392         return completed;
393 }
394
395
396 static void
397 vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
398                    struct vmxnet3_adapter *adapter)
399 {
400         int i;
401
402         while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
403                 struct vmxnet3_tx_buf_info *tbi;
404
405                 tbi = tq->buf_info + tq->tx_ring.next2comp;
406
407                 vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
408                 if (tbi->skb) {
409                         dev_kfree_skb_any(tbi->skb);
410                         tbi->skb = NULL;
411                 }
412                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
413         }
414
415         /* sanity check, verify all buffers are indeed unmapped and freed */
416         for (i = 0; i < tq->tx_ring.size; i++) {
417                 BUG_ON(tq->buf_info[i].skb != NULL ||
418                        tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
419         }
420
421         tq->tx_ring.gen = VMXNET3_INIT_GEN;
422         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
423
424         tq->comp_ring.gen = VMXNET3_INIT_GEN;
425         tq->comp_ring.next2proc = 0;
426 }
427
428
429 static void
430 vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
431                    struct vmxnet3_adapter *adapter)
432 {
433         if (tq->tx_ring.base) {
434                 pci_free_consistent(adapter->pdev, tq->tx_ring.size *
435                                     sizeof(struct Vmxnet3_TxDesc),
436                                     tq->tx_ring.base, tq->tx_ring.basePA);
437                 tq->tx_ring.base = NULL;
438         }
439         if (tq->data_ring.base) {
440                 pci_free_consistent(adapter->pdev, tq->data_ring.size *
441                                     sizeof(struct Vmxnet3_TxDataDesc),
442                                     tq->data_ring.base, tq->data_ring.basePA);
443                 tq->data_ring.base = NULL;
444         }
445         if (tq->comp_ring.base) {
446                 pci_free_consistent(adapter->pdev, tq->comp_ring.size *
447                                     sizeof(struct Vmxnet3_TxCompDesc),
448                                     tq->comp_ring.base, tq->comp_ring.basePA);
449                 tq->comp_ring.base = NULL;
450         }
451         kfree(tq->buf_info);
452         tq->buf_info = NULL;
453 }
454
455
456 /* Destroy all tx queues */
457 void
458 vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
459 {
460         int i;
461
462         for (i = 0; i < adapter->num_tx_queues; i++)
463                 vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
464 }
465
466
467 static void
468 vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
469                 struct vmxnet3_adapter *adapter)
470 {
471         int i;
472
473         /* reset the tx ring contents to 0 and reset the tx ring states */
474         memset(tq->tx_ring.base, 0, tq->tx_ring.size *
475                sizeof(struct Vmxnet3_TxDesc));
476         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
477         tq->tx_ring.gen = VMXNET3_INIT_GEN;
478
479         memset(tq->data_ring.base, 0, tq->data_ring.size *
480                sizeof(struct Vmxnet3_TxDataDesc));
481
482         /* reset the tx comp ring contents to 0 and reset comp ring states */
483         memset(tq->comp_ring.base, 0, tq->comp_ring.size *
484                sizeof(struct Vmxnet3_TxCompDesc));
485         tq->comp_ring.next2proc = 0;
486         tq->comp_ring.gen = VMXNET3_INIT_GEN;
487
488         /* reset the bookkeeping data */
489         memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
490         for (i = 0; i < tq->tx_ring.size; i++)
491                 tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
492
493         /* stats are not reset */
494 }
495
496
497 static int
498 vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
499                   struct vmxnet3_adapter *adapter)
500 {
501         BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
502                tq->comp_ring.base || tq->buf_info);
503
504         tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size
505                            * sizeof(struct Vmxnet3_TxDesc),
506                            &tq->tx_ring.basePA);
507         if (!tq->tx_ring.base) {
508                 netdev_err(adapter->netdev, "failed to allocate tx ring\n");
509                 goto err;
510         }
511
512         tq->data_ring.base = pci_alloc_consistent(adapter->pdev,
513                              tq->data_ring.size *
514                              sizeof(struct Vmxnet3_TxDataDesc),
515                              &tq->data_ring.basePA);
516         if (!tq->data_ring.base) {
517                 netdev_err(adapter->netdev, "failed to allocate data ring\n");
518                 goto err;
519         }
520
521         tq->comp_ring.base = pci_alloc_consistent(adapter->pdev,
522                              tq->comp_ring.size *
523                              sizeof(struct Vmxnet3_TxCompDesc),
524                              &tq->comp_ring.basePA);
525         if (!tq->comp_ring.base) {
526                 netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
527                 goto err;
528         }
529
530         tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]),
531                                GFP_KERNEL);
532         if (!tq->buf_info)
533                 goto err;
534
535         return 0;
536
537 err:
538         vmxnet3_tq_destroy(tq, adapter);
539         return -ENOMEM;
540 }
541
542 static void
543 vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
544 {
545         int i;
546
547         for (i = 0; i < adapter->num_tx_queues; i++)
548                 vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
549 }
550
551 /*
552  *    starting from ring->next2fill, allocate rx buffers for the given ring
553  *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
554  *    are allocated or allocation fails
555  */
556
557 static int
558 vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
559                         int num_to_alloc, struct vmxnet3_adapter *adapter)
560 {
561         int num_allocated = 0;
562         struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
563         struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
564         u32 val;
565
566         while (num_allocated <= num_to_alloc) {
567                 struct vmxnet3_rx_buf_info *rbi;
568                 union Vmxnet3_GenericDesc *gd;
569
570                 rbi = rbi_base + ring->next2fill;
571                 gd = ring->base + ring->next2fill;
572
573                 if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
574                         if (rbi->skb == NULL) {
575                                 rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
576                                                                        rbi->len,
577                                                                        GFP_KERNEL);
578                                 if (unlikely(rbi->skb == NULL)) {
579                                         rq->stats.rx_buf_alloc_failure++;
580                                         break;
581                                 }
582
583                                 rbi->dma_addr = pci_map_single(adapter->pdev,
584                                                 rbi->skb->data, rbi->len,
585                                                 PCI_DMA_FROMDEVICE);
586                         } else {
587                                 /* rx buffer skipped by the device */
588                         }
589                         val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
590                 } else {
591                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
592                                rbi->len  != PAGE_SIZE);
593
594                         if (rbi->page == NULL) {
595                                 rbi->page = alloc_page(GFP_ATOMIC);
596                                 if (unlikely(rbi->page == NULL)) {
597                                         rq->stats.rx_buf_alloc_failure++;
598                                         break;
599                                 }
600                                 rbi->dma_addr = pci_map_page(adapter->pdev,
601                                                 rbi->page, 0, PAGE_SIZE,
602                                                 PCI_DMA_FROMDEVICE);
603                         } else {
604                                 /* rx buffers skipped by the device */
605                         }
606                         val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
607                 }
608
609                 BUG_ON(rbi->dma_addr == 0);
610                 gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
611                 gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
612                                            | val | rbi->len);
613
614                 /* Fill the last buffer but dont mark it ready, or else the
615                  * device will think that the queue is full */
616                 if (num_allocated == num_to_alloc)
617                         break;
618
619                 gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
620                 num_allocated++;
621                 vmxnet3_cmd_ring_adv_next2fill(ring);
622         }
623
624         netdev_dbg(adapter->netdev,
625                 "alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
626                 num_allocated, ring->next2fill, ring->next2comp);
627
628         /* so that the device can distinguish a full ring and an empty ring */
629         BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
630
631         return num_allocated;
632 }
633
634
635 static void
636 vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
637                     struct vmxnet3_rx_buf_info *rbi)
638 {
639         struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
640                 skb_shinfo(skb)->nr_frags;
641
642         BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
643
644         __skb_frag_set_page(frag, rbi->page);
645         frag->page_offset = 0;
646         skb_frag_size_set(frag, rcd->len);
647         skb->data_len += rcd->len;
648         skb->truesize += PAGE_SIZE;
649         skb_shinfo(skb)->nr_frags++;
650 }
651
652
653 static void
654 vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
655                 struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
656                 struct vmxnet3_adapter *adapter)
657 {
658         u32 dw2, len;
659         unsigned long buf_offset;
660         int i;
661         union Vmxnet3_GenericDesc *gdesc;
662         struct vmxnet3_tx_buf_info *tbi = NULL;
663
664         BUG_ON(ctx->copy_size > skb_headlen(skb));
665
666         /* use the previous gen bit for the SOP desc */
667         dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
668
669         ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
670         gdesc = ctx->sop_txd; /* both loops below can be skipped */
671
672         /* no need to map the buffer if headers are copied */
673         if (ctx->copy_size) {
674                 ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
675                                         tq->tx_ring.next2fill *
676                                         sizeof(struct Vmxnet3_TxDataDesc));
677                 ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
678                 ctx->sop_txd->dword[3] = 0;
679
680                 tbi = tq->buf_info + tq->tx_ring.next2fill;
681                 tbi->map_type = VMXNET3_MAP_NONE;
682
683                 netdev_dbg(adapter->netdev,
684                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
685                         tq->tx_ring.next2fill,
686                         le64_to_cpu(ctx->sop_txd->txd.addr),
687                         ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
688                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
689
690                 /* use the right gen for non-SOP desc */
691                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
692         }
693
694         /* linear part can use multiple tx desc if it's big */
695         len = skb_headlen(skb) - ctx->copy_size;
696         buf_offset = ctx->copy_size;
697         while (len) {
698                 u32 buf_size;
699
700                 if (len < VMXNET3_MAX_TX_BUF_SIZE) {
701                         buf_size = len;
702                         dw2 |= len;
703                 } else {
704                         buf_size = VMXNET3_MAX_TX_BUF_SIZE;
705                         /* spec says that for TxDesc.len, 0 == 2^14 */
706                 }
707
708                 tbi = tq->buf_info + tq->tx_ring.next2fill;
709                 tbi->map_type = VMXNET3_MAP_SINGLE;
710                 tbi->dma_addr = pci_map_single(adapter->pdev,
711                                 skb->data + buf_offset, buf_size,
712                                 PCI_DMA_TODEVICE);
713
714                 tbi->len = buf_size;
715
716                 gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
717                 BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
718
719                 gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
720                 gdesc->dword[2] = cpu_to_le32(dw2);
721                 gdesc->dword[3] = 0;
722
723                 netdev_dbg(adapter->netdev,
724                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
725                         tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
726                         le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
727                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
728                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
729
730                 len -= buf_size;
731                 buf_offset += buf_size;
732         }
733
734         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
735                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
736                 u32 buf_size;
737
738                 buf_offset = 0;
739                 len = skb_frag_size(frag);
740                 while (len) {
741                         tbi = tq->buf_info + tq->tx_ring.next2fill;
742                         if (len < VMXNET3_MAX_TX_BUF_SIZE) {
743                                 buf_size = len;
744                                 dw2 |= len;
745                         } else {
746                                 buf_size = VMXNET3_MAX_TX_BUF_SIZE;
747                                 /* spec says that for TxDesc.len, 0 == 2^14 */
748                         }
749                         tbi->map_type = VMXNET3_MAP_PAGE;
750                         tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
751                                                          buf_offset, buf_size,
752                                                          DMA_TO_DEVICE);
753
754                         tbi->len = buf_size;
755
756                         gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
757                         BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
758
759                         gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
760                         gdesc->dword[2] = cpu_to_le32(dw2);
761                         gdesc->dword[3] = 0;
762
763                         netdev_dbg(adapter->netdev,
764                                 "txd[%u]: 0x%llu %u %u\n",
765                                 tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
766                                 le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
767                         vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
768                         dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
769
770                         len -= buf_size;
771                         buf_offset += buf_size;
772                 }
773         }
774
775         ctx->eop_txd = gdesc;
776
777         /* set the last buf_info for the pkt */
778         tbi->skb = skb;
779         tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
780 }
781
782
783 /* Init all tx queues */
784 static void
785 vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
786 {
787         int i;
788
789         for (i = 0; i < adapter->num_tx_queues; i++)
790                 vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
791 }
792
793
794 /*
795  *    parse and copy relevant protocol headers:
796  *      For a tso pkt, relevant headers are L2/3/4 including options
797  *      For a pkt requesting csum offloading, they are L2/3 and may include L4
798  *      if it's a TCP/UDP pkt
799  *
800  * Returns:
801  *    -1:  error happens during parsing
802  *     0:  protocol headers parsed, but too big to be copied
803  *     1:  protocol headers parsed and copied
804  *
805  * Other effects:
806  *    1. related *ctx fields are updated.
807  *    2. ctx->copy_size is # of bytes copied
808  *    3. the portion copied is guaranteed to be in the linear part
809  *
810  */
811 static int
812 vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
813                            struct vmxnet3_tx_ctx *ctx,
814                            struct vmxnet3_adapter *adapter)
815 {
816         struct Vmxnet3_TxDataDesc *tdd;
817
818         if (ctx->mss) { /* TSO */
819                 ctx->eth_ip_hdr_size = skb_transport_offset(skb);
820                 ctx->l4_hdr_size = tcp_hdrlen(skb);
821                 ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
822         } else {
823                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
824                         ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
825
826                         if (ctx->ipv4) {
827                                 const struct iphdr *iph = ip_hdr(skb);
828
829                                 if (iph->protocol == IPPROTO_TCP)
830                                         ctx->l4_hdr_size = tcp_hdrlen(skb);
831                                 else if (iph->protocol == IPPROTO_UDP)
832                                         ctx->l4_hdr_size = sizeof(struct udphdr);
833                                 else
834                                         ctx->l4_hdr_size = 0;
835                         } else {
836                                 /* for simplicity, don't copy L4 headers */
837                                 ctx->l4_hdr_size = 0;
838                         }
839                         ctx->copy_size = min(ctx->eth_ip_hdr_size +
840                                          ctx->l4_hdr_size, skb->len);
841                 } else {
842                         ctx->eth_ip_hdr_size = 0;
843                         ctx->l4_hdr_size = 0;
844                         /* copy as much as allowed */
845                         ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
846                                              , skb_headlen(skb));
847                 }
848
849                 /* make sure headers are accessible directly */
850                 if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
851                         goto err;
852         }
853
854         if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
855                 tq->stats.oversized_hdr++;
856                 ctx->copy_size = 0;
857                 return 0;
858         }
859
860         tdd = tq->data_ring.base + tq->tx_ring.next2fill;
861
862         memcpy(tdd->data, skb->data, ctx->copy_size);
863         netdev_dbg(adapter->netdev,
864                 "copy %u bytes to dataRing[%u]\n",
865                 ctx->copy_size, tq->tx_ring.next2fill);
866         return 1;
867
868 err:
869         return -1;
870 }
871
872
873 static void
874 vmxnet3_prepare_tso(struct sk_buff *skb,
875                     struct vmxnet3_tx_ctx *ctx)
876 {
877         struct tcphdr *tcph = tcp_hdr(skb);
878
879         if (ctx->ipv4) {
880                 struct iphdr *iph = ip_hdr(skb);
881
882                 iph->check = 0;
883                 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
884                                                  IPPROTO_TCP, 0);
885         } else {
886                 struct ipv6hdr *iph = ipv6_hdr(skb);
887
888                 tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
889                                                IPPROTO_TCP, 0);
890         }
891 }
892
893 static int txd_estimate(const struct sk_buff *skb)
894 {
895         int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
896         int i;
897
898         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
899                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
900
901                 count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
902         }
903         return count;
904 }
905
906 /*
907  * Transmits a pkt thru a given tq
908  * Returns:
909  *    NETDEV_TX_OK:      descriptors are setup successfully
910  *    NETDEV_TX_OK:      error occurred, the pkt is dropped
911  *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
912  *
913  * Side-effects:
914  *    1. tx ring may be changed
915  *    2. tq stats may be updated accordingly
916  *    3. shared->txNumDeferred may be updated
917  */
918
919 static int
920 vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
921                 struct vmxnet3_adapter *adapter, struct net_device *netdev)
922 {
923         int ret;
924         u32 count;
925         unsigned long flags;
926         struct vmxnet3_tx_ctx ctx;
927         union Vmxnet3_GenericDesc *gdesc;
928 #ifdef __BIG_ENDIAN_BITFIELD
929         /* Use temporary descriptor to avoid touching bits multiple times */
930         union Vmxnet3_GenericDesc tempTxDesc;
931 #endif
932
933         count = txd_estimate(skb);
934
935         ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
936
937         ctx.mss = skb_shinfo(skb)->gso_size;
938         if (ctx.mss) {
939                 if (skb_header_cloned(skb)) {
940                         if (unlikely(pskb_expand_head(skb, 0, 0,
941                                                       GFP_ATOMIC) != 0)) {
942                                 tq->stats.drop_tso++;
943                                 goto drop_pkt;
944                         }
945                         tq->stats.copy_skb_header++;
946                 }
947                 vmxnet3_prepare_tso(skb, &ctx);
948         } else {
949                 if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
950
951                         /* non-tso pkts must not use more than
952                          * VMXNET3_MAX_TXD_PER_PKT entries
953                          */
954                         if (skb_linearize(skb) != 0) {
955                                 tq->stats.drop_too_many_frags++;
956                                 goto drop_pkt;
957                         }
958                         tq->stats.linearized++;
959
960                         /* recalculate the # of descriptors to use */
961                         count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
962                 }
963         }
964
965         spin_lock_irqsave(&tq->tx_lock, flags);
966
967         if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
968                 tq->stats.tx_ring_full++;
969                 netdev_dbg(adapter->netdev,
970                         "tx queue stopped on %s, next2comp %u"
971                         " next2fill %u\n", adapter->netdev->name,
972                         tq->tx_ring.next2comp, tq->tx_ring.next2fill);
973
974                 vmxnet3_tq_stop(tq, adapter);
975                 spin_unlock_irqrestore(&tq->tx_lock, flags);
976                 return NETDEV_TX_BUSY;
977         }
978
979
980         ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
981         if (ret >= 0) {
982                 BUG_ON(ret <= 0 && ctx.copy_size != 0);
983                 /* hdrs parsed, check against other limits */
984                 if (ctx.mss) {
985                         if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
986                                      VMXNET3_MAX_TX_BUF_SIZE)) {
987                                 goto hdr_too_big;
988                         }
989                 } else {
990                         if (skb->ip_summed == CHECKSUM_PARTIAL) {
991                                 if (unlikely(ctx.eth_ip_hdr_size +
992                                              skb->csum_offset >
993                                              VMXNET3_MAX_CSUM_OFFSET)) {
994                                         goto hdr_too_big;
995                                 }
996                         }
997                 }
998         } else {
999                 tq->stats.drop_hdr_inspect_err++;
1000                 goto unlock_drop_pkt;
1001         }
1002
1003         /* fill tx descs related to addr & len */
1004         vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1005
1006         /* setup the EOP desc */
1007         ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1008
1009         /* setup the SOP desc */
1010 #ifdef __BIG_ENDIAN_BITFIELD
1011         gdesc = &tempTxDesc;
1012         gdesc->dword[2] = ctx.sop_txd->dword[2];
1013         gdesc->dword[3] = ctx.sop_txd->dword[3];
1014 #else
1015         gdesc = ctx.sop_txd;
1016 #endif
1017         if (ctx.mss) {
1018                 gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1019                 gdesc->txd.om = VMXNET3_OM_TSO;
1020                 gdesc->txd.msscof = ctx.mss;
1021                 le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1022                              gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1023         } else {
1024                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1025                         gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1026                         gdesc->txd.om = VMXNET3_OM_CSUM;
1027                         gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1028                                             skb->csum_offset;
1029                 } else {
1030                         gdesc->txd.om = 0;
1031                         gdesc->txd.msscof = 0;
1032                 }
1033                 le32_add_cpu(&tq->shared->txNumDeferred, 1);
1034         }
1035
1036         if (vlan_tx_tag_present(skb)) {
1037                 gdesc->txd.ti = 1;
1038                 gdesc->txd.tci = vlan_tx_tag_get(skb);
1039         }
1040
1041         /* finally flips the GEN bit of the SOP desc. */
1042         gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1043                                                   VMXNET3_TXD_GEN);
1044 #ifdef __BIG_ENDIAN_BITFIELD
1045         /* Finished updating in bitfields of Tx Desc, so write them in original
1046          * place.
1047          */
1048         vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1049                            (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1050         gdesc = ctx.sop_txd;
1051 #endif
1052         netdev_dbg(adapter->netdev,
1053                 "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1054                 (u32)(ctx.sop_txd -
1055                 tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1056                 le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1057
1058         spin_unlock_irqrestore(&tq->tx_lock, flags);
1059
1060         if (le32_to_cpu(tq->shared->txNumDeferred) >=
1061                                         le32_to_cpu(tq->shared->txThreshold)) {
1062                 tq->shared->txNumDeferred = 0;
1063                 VMXNET3_WRITE_BAR0_REG(adapter,
1064                                        VMXNET3_REG_TXPROD + tq->qid * 8,
1065                                        tq->tx_ring.next2fill);
1066         }
1067
1068         return NETDEV_TX_OK;
1069
1070 hdr_too_big:
1071         tq->stats.drop_oversized_hdr++;
1072 unlock_drop_pkt:
1073         spin_unlock_irqrestore(&tq->tx_lock, flags);
1074 drop_pkt:
1075         tq->stats.drop_total++;
1076         dev_kfree_skb(skb);
1077         return NETDEV_TX_OK;
1078 }
1079
1080
1081 static netdev_tx_t
1082 vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1083 {
1084         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1085
1086         BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1087         return vmxnet3_tq_xmit(skb,
1088                                &adapter->tx_queue[skb->queue_mapping],
1089                                adapter, netdev);
1090 }
1091
1092
1093 static void
1094 vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1095                 struct sk_buff *skb,
1096                 union Vmxnet3_GenericDesc *gdesc)
1097 {
1098         if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1099                 /* typical case: TCP/UDP over IP and both csums are correct */
1100                 if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1101                                                         VMXNET3_RCD_CSUM_OK) {
1102                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1103                         BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1104                         BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1105                         BUG_ON(gdesc->rcd.frg);
1106                 } else {
1107                         if (gdesc->rcd.csum) {
1108                                 skb->csum = htons(gdesc->rcd.csum);
1109                                 skb->ip_summed = CHECKSUM_PARTIAL;
1110                         } else {
1111                                 skb_checksum_none_assert(skb);
1112                         }
1113                 }
1114         } else {
1115                 skb_checksum_none_assert(skb);
1116         }
1117 }
1118
1119
1120 static void
1121 vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1122                  struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1123 {
1124         rq->stats.drop_err++;
1125         if (!rcd->fcs)
1126                 rq->stats.drop_fcs++;
1127
1128         rq->stats.drop_total++;
1129
1130         /*
1131          * We do not unmap and chain the rx buffer to the skb.
1132          * We basically pretend this buffer is not used and will be recycled
1133          * by vmxnet3_rq_alloc_rx_buf()
1134          */
1135
1136         /*
1137          * ctx->skb may be NULL if this is the first and the only one
1138          * desc for the pkt
1139          */
1140         if (ctx->skb)
1141                 dev_kfree_skb_irq(ctx->skb);
1142
1143         ctx->skb = NULL;
1144 }
1145
1146
1147 static int
1148 vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1149                        struct vmxnet3_adapter *adapter, int quota)
1150 {
1151         static const u32 rxprod_reg[2] = {
1152                 VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1153         };
1154         u32 num_rxd = 0;
1155         bool skip_page_frags = false;
1156         struct Vmxnet3_RxCompDesc *rcd;
1157         struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1158 #ifdef __BIG_ENDIAN_BITFIELD
1159         struct Vmxnet3_RxDesc rxCmdDesc;
1160         struct Vmxnet3_RxCompDesc rxComp;
1161 #endif
1162         vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1163                           &rxComp);
1164         while (rcd->gen == rq->comp_ring.gen) {
1165                 struct vmxnet3_rx_buf_info *rbi;
1166                 struct sk_buff *skb, *new_skb = NULL;
1167                 struct page *new_page = NULL;
1168                 int num_to_alloc;
1169                 struct Vmxnet3_RxDesc *rxd;
1170                 u32 idx, ring_idx;
1171                 struct vmxnet3_cmd_ring *ring = NULL;
1172                 if (num_rxd >= quota) {
1173                         /* we may stop even before we see the EOP desc of
1174                          * the current pkt
1175                          */
1176                         break;
1177                 }
1178                 num_rxd++;
1179                 BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1180                 idx = rcd->rxdIdx;
1181                 ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1182                 ring = rq->rx_ring + ring_idx;
1183                 vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1184                                   &rxCmdDesc);
1185                 rbi = rq->buf_info[ring_idx] + idx;
1186
1187                 BUG_ON(rxd->addr != rbi->dma_addr ||
1188                        rxd->len != rbi->len);
1189
1190                 if (unlikely(rcd->eop && rcd->err)) {
1191                         vmxnet3_rx_error(rq, rcd, ctx, adapter);
1192                         goto rcd_done;
1193                 }
1194
1195                 if (rcd->sop) { /* first buf of the pkt */
1196                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1197                                rcd->rqID != rq->qid);
1198
1199                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1200                         BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1201
1202                         if (unlikely(rcd->len == 0)) {
1203                                 /* Pretend the rx buffer is skipped. */
1204                                 BUG_ON(!(rcd->sop && rcd->eop));
1205                                 netdev_dbg(adapter->netdev,
1206                                         "rxRing[%u][%u] 0 length\n",
1207                                         ring_idx, idx);
1208                                 goto rcd_done;
1209                         }
1210
1211                         skip_page_frags = false;
1212                         ctx->skb = rbi->skb;
1213                         new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1214                                                             rbi->len);
1215                         if (new_skb == NULL) {
1216                                 /* Skb allocation failed, do not handover this
1217                                  * skb to stack. Reuse it. Drop the existing pkt
1218                                  */
1219                                 rq->stats.rx_buf_alloc_failure++;
1220                                 ctx->skb = NULL;
1221                                 rq->stats.drop_total++;
1222                                 skip_page_frags = true;
1223                                 goto rcd_done;
1224                         }
1225
1226                         pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
1227                                          PCI_DMA_FROMDEVICE);
1228
1229 #ifdef VMXNET3_RSS
1230                         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1231                             (adapter->netdev->features & NETIF_F_RXHASH))
1232                                 ctx->skb->rxhash = le32_to_cpu(rcd->rssHash);
1233 #endif
1234                         skb_put(ctx->skb, rcd->len);
1235
1236                         /* Immediate refill */
1237                         rbi->skb = new_skb;
1238                         rbi->dma_addr = pci_map_single(adapter->pdev,
1239                                                        rbi->skb->data, rbi->len,
1240                                                        PCI_DMA_FROMDEVICE);
1241                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1242                         rxd->len = rbi->len;
1243
1244                 } else {
1245                         BUG_ON(ctx->skb == NULL && !skip_page_frags);
1246
1247                         /* non SOP buffer must be type 1 in most cases */
1248                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1249                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1250
1251                         /* If an sop buffer was dropped, skip all
1252                          * following non-sop fragments. They will be reused.
1253                          */
1254                         if (skip_page_frags)
1255                                 goto rcd_done;
1256
1257                         new_page = alloc_page(GFP_ATOMIC);
1258                         if (unlikely(new_page == NULL)) {
1259                                 /* Replacement page frag could not be allocated.
1260                                  * Reuse this page. Drop the pkt and free the
1261                                  * skb which contained this page as a frag. Skip
1262                                  * processing all the following non-sop frags.
1263                                  */
1264                                 rq->stats.rx_buf_alloc_failure++;
1265                                 dev_kfree_skb(ctx->skb);
1266                                 ctx->skb = NULL;
1267                                 skip_page_frags = true;
1268                                 goto rcd_done;
1269                         }
1270
1271                         if (rcd->len) {
1272                                 pci_unmap_page(adapter->pdev,
1273                                                rbi->dma_addr, rbi->len,
1274                                                PCI_DMA_FROMDEVICE);
1275
1276                                 vmxnet3_append_frag(ctx->skb, rcd, rbi);
1277                         }
1278
1279                         /* Immediate refill */
1280                         rbi->page = new_page;
1281                         rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page,
1282                                                      0, PAGE_SIZE,
1283                                                      PCI_DMA_FROMDEVICE);
1284                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1285                         rxd->len = rbi->len;
1286                 }
1287
1288
1289                 skb = ctx->skb;
1290                 if (rcd->eop) {
1291                         skb->len += skb->data_len;
1292
1293                         vmxnet3_rx_csum(adapter, skb,
1294                                         (union Vmxnet3_GenericDesc *)rcd);
1295                         skb->protocol = eth_type_trans(skb, adapter->netdev);
1296
1297                         if (unlikely(rcd->ts))
1298                                 __vlan_hwaccel_put_tag(skb, rcd->tci);
1299
1300                         if (adapter->netdev->features & NETIF_F_LRO)
1301                                 netif_receive_skb(skb);
1302                         else
1303                                 napi_gro_receive(&rq->napi, skb);
1304
1305                         ctx->skb = NULL;
1306                 }
1307
1308 rcd_done:
1309                 /* device may have skipped some rx descs */
1310                 ring->next2comp = idx;
1311                 num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1312                 ring = rq->rx_ring + ring_idx;
1313                 while (num_to_alloc) {
1314                         vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1315                                           &rxCmdDesc);
1316                         BUG_ON(!rxd->addr);
1317
1318                         /* Recv desc is ready to be used by the device */
1319                         rxd->gen = ring->gen;
1320                         vmxnet3_cmd_ring_adv_next2fill(ring);
1321                         num_to_alloc--;
1322                 }
1323
1324                 /* if needed, update the register */
1325                 if (unlikely(rq->shared->updateRxProd)) {
1326                         VMXNET3_WRITE_BAR0_REG(adapter,
1327                                                rxprod_reg[ring_idx] + rq->qid * 8,
1328                                                ring->next2fill);
1329                 }
1330
1331                 vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1332                 vmxnet3_getRxComp(rcd,
1333                                   &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1334         }
1335
1336         return num_rxd;
1337 }
1338
1339
1340 static void
1341 vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1342                    struct vmxnet3_adapter *adapter)
1343 {
1344         u32 i, ring_idx;
1345         struct Vmxnet3_RxDesc *rxd;
1346
1347         for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1348                 for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1349 #ifdef __BIG_ENDIAN_BITFIELD
1350                         struct Vmxnet3_RxDesc rxDesc;
1351 #endif
1352                         vmxnet3_getRxDesc(rxd,
1353                                 &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1354
1355                         if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1356                                         rq->buf_info[ring_idx][i].skb) {
1357                                 pci_unmap_single(adapter->pdev, rxd->addr,
1358                                                  rxd->len, PCI_DMA_FROMDEVICE);
1359                                 dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1360                                 rq->buf_info[ring_idx][i].skb = NULL;
1361                         } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1362                                         rq->buf_info[ring_idx][i].page) {
1363                                 pci_unmap_page(adapter->pdev, rxd->addr,
1364                                                rxd->len, PCI_DMA_FROMDEVICE);
1365                                 put_page(rq->buf_info[ring_idx][i].page);
1366                                 rq->buf_info[ring_idx][i].page = NULL;
1367                         }
1368                 }
1369
1370                 rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1371                 rq->rx_ring[ring_idx].next2fill =
1372                                         rq->rx_ring[ring_idx].next2comp = 0;
1373         }
1374
1375         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1376         rq->comp_ring.next2proc = 0;
1377 }
1378
1379
1380 static void
1381 vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1382 {
1383         int i;
1384
1385         for (i = 0; i < adapter->num_rx_queues; i++)
1386                 vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1387 }
1388
1389
1390 void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1391                         struct vmxnet3_adapter *adapter)
1392 {
1393         int i;
1394         int j;
1395
1396         /* all rx buffers must have already been freed */
1397         for (i = 0; i < 2; i++) {
1398                 if (rq->buf_info[i]) {
1399                         for (j = 0; j < rq->rx_ring[i].size; j++)
1400                                 BUG_ON(rq->buf_info[i][j].page != NULL);
1401                 }
1402         }
1403
1404
1405         kfree(rq->buf_info[0]);
1406
1407         for (i = 0; i < 2; i++) {
1408                 if (rq->rx_ring[i].base) {
1409                         pci_free_consistent(adapter->pdev, rq->rx_ring[i].size
1410                                             * sizeof(struct Vmxnet3_RxDesc),
1411                                             rq->rx_ring[i].base,
1412                                             rq->rx_ring[i].basePA);
1413                         rq->rx_ring[i].base = NULL;
1414                 }
1415                 rq->buf_info[i] = NULL;
1416         }
1417
1418         if (rq->comp_ring.base) {
1419                 pci_free_consistent(adapter->pdev, rq->comp_ring.size *
1420                                     sizeof(struct Vmxnet3_RxCompDesc),
1421                                     rq->comp_ring.base, rq->comp_ring.basePA);
1422                 rq->comp_ring.base = NULL;
1423         }
1424 }
1425
1426
1427 static int
1428 vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1429                 struct vmxnet3_adapter  *adapter)
1430 {
1431         int i;
1432
1433         /* initialize buf_info */
1434         for (i = 0; i < rq->rx_ring[0].size; i++) {
1435
1436                 /* 1st buf for a pkt is skbuff */
1437                 if (i % adapter->rx_buf_per_pkt == 0) {
1438                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1439                         rq->buf_info[0][i].len = adapter->skb_buf_size;
1440                 } else { /* subsequent bufs for a pkt is frag */
1441                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1442                         rq->buf_info[0][i].len = PAGE_SIZE;
1443                 }
1444         }
1445         for (i = 0; i < rq->rx_ring[1].size; i++) {
1446                 rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1447                 rq->buf_info[1][i].len = PAGE_SIZE;
1448         }
1449
1450         /* reset internal state and allocate buffers for both rings */
1451         for (i = 0; i < 2; i++) {
1452                 rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1453
1454                 memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1455                        sizeof(struct Vmxnet3_RxDesc));
1456                 rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1457         }
1458         if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1459                                     adapter) == 0) {
1460                 /* at least has 1 rx buffer for the 1st ring */
1461                 return -ENOMEM;
1462         }
1463         vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1464
1465         /* reset the comp ring */
1466         rq->comp_ring.next2proc = 0;
1467         memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1468                sizeof(struct Vmxnet3_RxCompDesc));
1469         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1470
1471         /* reset rxctx */
1472         rq->rx_ctx.skb = NULL;
1473
1474         /* stats are not reset */
1475         return 0;
1476 }
1477
1478
1479 static int
1480 vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1481 {
1482         int i, err = 0;
1483
1484         for (i = 0; i < adapter->num_rx_queues; i++) {
1485                 err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1486                 if (unlikely(err)) {
1487                         dev_err(&adapter->netdev->dev, "%s: failed to "
1488                                 "initialize rx queue%i\n",
1489                                 adapter->netdev->name, i);
1490                         break;
1491                 }
1492         }
1493         return err;
1494
1495 }
1496
1497
1498 static int
1499 vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1500 {
1501         int i;
1502         size_t sz;
1503         struct vmxnet3_rx_buf_info *bi;
1504
1505         for (i = 0; i < 2; i++) {
1506
1507                 sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1508                 rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz,
1509                                                         &rq->rx_ring[i].basePA);
1510                 if (!rq->rx_ring[i].base) {
1511                         netdev_err(adapter->netdev,
1512                                    "failed to allocate rx ring %d\n", i);
1513                         goto err;
1514                 }
1515         }
1516
1517         sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1518         rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz,
1519                                                   &rq->comp_ring.basePA);
1520         if (!rq->comp_ring.base) {
1521                 netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1522                 goto err;
1523         }
1524
1525         sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1526                                                    rq->rx_ring[1].size);
1527         bi = kzalloc(sz, GFP_KERNEL);
1528         if (!bi)
1529                 goto err;
1530
1531         rq->buf_info[0] = bi;
1532         rq->buf_info[1] = bi + rq->rx_ring[0].size;
1533
1534         return 0;
1535
1536 err:
1537         vmxnet3_rq_destroy(rq, adapter);
1538         return -ENOMEM;
1539 }
1540
1541
1542 static int
1543 vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1544 {
1545         int i, err = 0;
1546
1547         for (i = 0; i < adapter->num_rx_queues; i++) {
1548                 err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1549                 if (unlikely(err)) {
1550                         dev_err(&adapter->netdev->dev,
1551                                 "%s: failed to create rx queue%i\n",
1552                                 adapter->netdev->name, i);
1553                         goto err_out;
1554                 }
1555         }
1556         return err;
1557 err_out:
1558         vmxnet3_rq_destroy_all(adapter);
1559         return err;
1560
1561 }
1562
1563 /* Multiple queue aware polling function for tx and rx */
1564
1565 static int
1566 vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1567 {
1568         int rcd_done = 0, i;
1569         if (unlikely(adapter->shared->ecr))
1570                 vmxnet3_process_events(adapter);
1571         for (i = 0; i < adapter->num_tx_queues; i++)
1572                 vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1573
1574         for (i = 0; i < adapter->num_rx_queues; i++)
1575                 rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1576                                                    adapter, budget);
1577         return rcd_done;
1578 }
1579
1580
1581 static int
1582 vmxnet3_poll(struct napi_struct *napi, int budget)
1583 {
1584         struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1585                                           struct vmxnet3_rx_queue, napi);
1586         int rxd_done;
1587
1588         rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1589
1590         if (rxd_done < budget) {
1591                 napi_complete(napi);
1592                 vmxnet3_enable_all_intrs(rx_queue->adapter);
1593         }
1594         return rxd_done;
1595 }
1596
1597 /*
1598  * NAPI polling function for MSI-X mode with multiple Rx queues
1599  * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1600  */
1601
1602 static int
1603 vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1604 {
1605         struct vmxnet3_rx_queue *rq = container_of(napi,
1606                                                 struct vmxnet3_rx_queue, napi);
1607         struct vmxnet3_adapter *adapter = rq->adapter;
1608         int rxd_done;
1609
1610         /* When sharing interrupt with corresponding tx queue, process
1611          * tx completions in that queue as well
1612          */
1613         if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1614                 struct vmxnet3_tx_queue *tq =
1615                                 &adapter->tx_queue[rq - adapter->rx_queue];
1616                 vmxnet3_tq_tx_complete(tq, adapter);
1617         }
1618
1619         rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1620
1621         if (rxd_done < budget) {
1622                 napi_complete(napi);
1623                 vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1624         }
1625         return rxd_done;
1626 }
1627
1628
1629 #ifdef CONFIG_PCI_MSI
1630
1631 /*
1632  * Handle completion interrupts on tx queues
1633  * Returns whether or not the intr is handled
1634  */
1635
1636 static irqreturn_t
1637 vmxnet3_msix_tx(int irq, void *data)
1638 {
1639         struct vmxnet3_tx_queue *tq = data;
1640         struct vmxnet3_adapter *adapter = tq->adapter;
1641
1642         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1643                 vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1644
1645         /* Handle the case where only one irq is allocate for all tx queues */
1646         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1647                 int i;
1648                 for (i = 0; i < adapter->num_tx_queues; i++) {
1649                         struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1650                         vmxnet3_tq_tx_complete(txq, adapter);
1651                 }
1652         } else {
1653                 vmxnet3_tq_tx_complete(tq, adapter);
1654         }
1655         vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1656
1657         return IRQ_HANDLED;
1658 }
1659
1660
1661 /*
1662  * Handle completion interrupts on rx queues. Returns whether or not the
1663  * intr is handled
1664  */
1665
1666 static irqreturn_t
1667 vmxnet3_msix_rx(int irq, void *data)
1668 {
1669         struct vmxnet3_rx_queue *rq = data;
1670         struct vmxnet3_adapter *adapter = rq->adapter;
1671
1672         /* disable intr if needed */
1673         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1674                 vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1675         napi_schedule(&rq->napi);
1676
1677         return IRQ_HANDLED;
1678 }
1679
1680 /*
1681  *----------------------------------------------------------------------------
1682  *
1683  * vmxnet3_msix_event --
1684  *
1685  *    vmxnet3 msix event intr handler
1686  *
1687  * Result:
1688  *    whether or not the intr is handled
1689  *
1690  *----------------------------------------------------------------------------
1691  */
1692
1693 static irqreturn_t
1694 vmxnet3_msix_event(int irq, void *data)
1695 {
1696         struct net_device *dev = data;
1697         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1698
1699         /* disable intr if needed */
1700         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1701                 vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1702
1703         if (adapter->shared->ecr)
1704                 vmxnet3_process_events(adapter);
1705
1706         vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1707
1708         return IRQ_HANDLED;
1709 }
1710
1711 #endif /* CONFIG_PCI_MSI  */
1712
1713
1714 /* Interrupt handler for vmxnet3  */
1715 static irqreturn_t
1716 vmxnet3_intr(int irq, void *dev_id)
1717 {
1718         struct net_device *dev = dev_id;
1719         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1720
1721         if (adapter->intr.type == VMXNET3_IT_INTX) {
1722                 u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1723                 if (unlikely(icr == 0))
1724                         /* not ours */
1725                         return IRQ_NONE;
1726         }
1727
1728
1729         /* disable intr if needed */
1730         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1731                 vmxnet3_disable_all_intrs(adapter);
1732
1733         napi_schedule(&adapter->rx_queue[0].napi);
1734
1735         return IRQ_HANDLED;
1736 }
1737
1738 #ifdef CONFIG_NET_POLL_CONTROLLER
1739
1740 /* netpoll callback. */
1741 static void
1742 vmxnet3_netpoll(struct net_device *netdev)
1743 {
1744         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1745
1746         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1747                 vmxnet3_disable_all_intrs(adapter);
1748
1749         vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
1750         vmxnet3_enable_all_intrs(adapter);
1751
1752 }
1753 #endif  /* CONFIG_NET_POLL_CONTROLLER */
1754
1755 static int
1756 vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1757 {
1758         struct vmxnet3_intr *intr = &adapter->intr;
1759         int err = 0, i;
1760         int vector = 0;
1761
1762 #ifdef CONFIG_PCI_MSI
1763         if (adapter->intr.type == VMXNET3_IT_MSIX) {
1764                 for (i = 0; i < adapter->num_tx_queues; i++) {
1765                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1766                                 sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1767                                         adapter->netdev->name, vector);
1768                                 err = request_irq(
1769                                               intr->msix_entries[vector].vector,
1770                                               vmxnet3_msix_tx, 0,
1771                                               adapter->tx_queue[i].name,
1772                                               &adapter->tx_queue[i]);
1773                         } else {
1774                                 sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1775                                         adapter->netdev->name, vector);
1776                         }
1777                         if (err) {
1778                                 dev_err(&adapter->netdev->dev,
1779                                         "Failed to request irq for MSIX, %s, "
1780                                         "error %d\n",
1781                                         adapter->tx_queue[i].name, err);
1782                                 return err;
1783                         }
1784
1785                         /* Handle the case where only 1 MSIx was allocated for
1786                          * all tx queues */
1787                         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1788                                 for (; i < adapter->num_tx_queues; i++)
1789                                         adapter->tx_queue[i].comp_ring.intr_idx
1790                                                                 = vector;
1791                                 vector++;
1792                                 break;
1793                         } else {
1794                                 adapter->tx_queue[i].comp_ring.intr_idx
1795                                                                 = vector++;
1796                         }
1797                 }
1798                 if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1799                         vector = 0;
1800
1801                 for (i = 0; i < adapter->num_rx_queues; i++) {
1802                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1803                                 sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1804                                         adapter->netdev->name, vector);
1805                         else
1806                                 sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1807                                         adapter->netdev->name, vector);
1808                         err = request_irq(intr->msix_entries[vector].vector,
1809                                           vmxnet3_msix_rx, 0,
1810                                           adapter->rx_queue[i].name,
1811                                           &(adapter->rx_queue[i]));
1812                         if (err) {
1813                                 netdev_err(adapter->netdev,
1814                                            "Failed to request irq for MSIX, "
1815                                            "%s, error %d\n",
1816                                            adapter->rx_queue[i].name, err);
1817                                 return err;
1818                         }
1819
1820                         adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1821                 }
1822
1823                 sprintf(intr->event_msi_vector_name, "%s-event-%d",
1824                         adapter->netdev->name, vector);
1825                 err = request_irq(intr->msix_entries[vector].vector,
1826                                   vmxnet3_msix_event, 0,
1827                                   intr->event_msi_vector_name, adapter->netdev);
1828                 intr->event_intr_idx = vector;
1829
1830         } else if (intr->type == VMXNET3_IT_MSI) {
1831                 adapter->num_rx_queues = 1;
1832                 err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1833                                   adapter->netdev->name, adapter->netdev);
1834         } else {
1835 #endif
1836                 adapter->num_rx_queues = 1;
1837                 err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1838                                   IRQF_SHARED, adapter->netdev->name,
1839                                   adapter->netdev);
1840 #ifdef CONFIG_PCI_MSI
1841         }
1842 #endif
1843         intr->num_intrs = vector + 1;
1844         if (err) {
1845                 netdev_err(adapter->netdev,
1846                            "Failed to request irq (intr type:%d), error %d\n",
1847                            intr->type, err);
1848         } else {
1849                 /* Number of rx queues will not change after this */
1850                 for (i = 0; i < adapter->num_rx_queues; i++) {
1851                         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1852                         rq->qid = i;
1853                         rq->qid2 = i + adapter->num_rx_queues;
1854                 }
1855
1856
1857
1858                 /* init our intr settings */
1859                 for (i = 0; i < intr->num_intrs; i++)
1860                         intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1861                 if (adapter->intr.type != VMXNET3_IT_MSIX) {
1862                         adapter->intr.event_intr_idx = 0;
1863                         for (i = 0; i < adapter->num_tx_queues; i++)
1864                                 adapter->tx_queue[i].comp_ring.intr_idx = 0;
1865                         adapter->rx_queue[0].comp_ring.intr_idx = 0;
1866                 }
1867
1868                 netdev_info(adapter->netdev,
1869                             "intr type %u, mode %u, %u vectors allocated\n",
1870                             intr->type, intr->mask_mode, intr->num_intrs);
1871         }
1872
1873         return err;
1874 }
1875
1876
1877 static void
1878 vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1879 {
1880         struct vmxnet3_intr *intr = &adapter->intr;
1881         BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1882
1883         switch (intr->type) {
1884 #ifdef CONFIG_PCI_MSI
1885         case VMXNET3_IT_MSIX:
1886         {
1887                 int i, vector = 0;
1888
1889                 if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1890                         for (i = 0; i < adapter->num_tx_queues; i++) {
1891                                 free_irq(intr->msix_entries[vector++].vector,
1892                                          &(adapter->tx_queue[i]));
1893                                 if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1894                                         break;
1895                         }
1896                 }
1897
1898                 for (i = 0; i < adapter->num_rx_queues; i++) {
1899                         free_irq(intr->msix_entries[vector++].vector,
1900                                  &(adapter->rx_queue[i]));
1901                 }
1902
1903                 free_irq(intr->msix_entries[vector].vector,
1904                          adapter->netdev);
1905                 BUG_ON(vector >= intr->num_intrs);
1906                 break;
1907         }
1908 #endif
1909         case VMXNET3_IT_MSI:
1910                 free_irq(adapter->pdev->irq, adapter->netdev);
1911                 break;
1912         case VMXNET3_IT_INTX:
1913                 free_irq(adapter->pdev->irq, adapter->netdev);
1914                 break;
1915         default:
1916                 BUG();
1917         }
1918 }
1919
1920
1921 static void
1922 vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1923 {
1924         u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1925         u16 vid;
1926
1927         /* allow untagged pkts */
1928         VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1929
1930         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1931                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1932 }
1933
1934
1935 static int
1936 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
1937 {
1938         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1939
1940         if (!(netdev->flags & IFF_PROMISC)) {
1941                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1942                 unsigned long flags;
1943
1944                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1945                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1946                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1947                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1948                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1949         }
1950
1951         set_bit(vid, adapter->active_vlans);
1952
1953         return 0;
1954 }
1955
1956
1957 static int
1958 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
1959 {
1960         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1961
1962         if (!(netdev->flags & IFF_PROMISC)) {
1963                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1964                 unsigned long flags;
1965
1966                 VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1967                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1968                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1969                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1970                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1971         }
1972
1973         clear_bit(vid, adapter->active_vlans);
1974
1975         return 0;
1976 }
1977
1978
1979 static u8 *
1980 vmxnet3_copy_mc(struct net_device *netdev)
1981 {
1982         u8 *buf = NULL;
1983         u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
1984
1985         /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
1986         if (sz <= 0xffff) {
1987                 /* We may be called with BH disabled */
1988                 buf = kmalloc(sz, GFP_ATOMIC);
1989                 if (buf) {
1990                         struct netdev_hw_addr *ha;
1991                         int i = 0;
1992
1993                         netdev_for_each_mc_addr(ha, netdev)
1994                                 memcpy(buf + i++ * ETH_ALEN, ha->addr,
1995                                        ETH_ALEN);
1996                 }
1997         }
1998         return buf;
1999 }
2000
2001
2002 static void
2003 vmxnet3_set_mc(struct net_device *netdev)
2004 {
2005         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2006         unsigned long flags;
2007         struct Vmxnet3_RxFilterConf *rxConf =
2008                                         &adapter->shared->devRead.rxFilterConf;
2009         u8 *new_table = NULL;
2010         u32 new_mode = VMXNET3_RXM_UCAST;
2011
2012         if (netdev->flags & IFF_PROMISC) {
2013                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2014                 memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2015
2016                 new_mode |= VMXNET3_RXM_PROMISC;
2017         } else {
2018                 vmxnet3_restore_vlan(adapter);
2019         }
2020
2021         if (netdev->flags & IFF_BROADCAST)
2022                 new_mode |= VMXNET3_RXM_BCAST;
2023
2024         if (netdev->flags & IFF_ALLMULTI)
2025                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2026         else
2027                 if (!netdev_mc_empty(netdev)) {
2028                         new_table = vmxnet3_copy_mc(netdev);
2029                         if (new_table) {
2030                                 new_mode |= VMXNET3_RXM_MCAST;
2031                                 rxConf->mfTableLen = cpu_to_le16(
2032                                         netdev_mc_count(netdev) * ETH_ALEN);
2033                                 rxConf->mfTablePA = cpu_to_le64(virt_to_phys(
2034                                                     new_table));
2035                         } else {
2036                                 netdev_info(netdev, "failed to copy mcast list"
2037                                             ", setting ALL_MULTI\n");
2038                                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2039                         }
2040                 }
2041
2042
2043         if (!(new_mode & VMXNET3_RXM_MCAST)) {
2044                 rxConf->mfTableLen = 0;
2045                 rxConf->mfTablePA = 0;
2046         }
2047
2048         spin_lock_irqsave(&adapter->cmd_lock, flags);
2049         if (new_mode != rxConf->rxMode) {
2050                 rxConf->rxMode = cpu_to_le32(new_mode);
2051                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2052                                        VMXNET3_CMD_UPDATE_RX_MODE);
2053                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2054                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2055         }
2056
2057         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2058                                VMXNET3_CMD_UPDATE_MAC_FILTERS);
2059         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2060
2061         kfree(new_table);
2062 }
2063
2064 void
2065 vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2066 {
2067         int i;
2068
2069         for (i = 0; i < adapter->num_rx_queues; i++)
2070                 vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2071 }
2072
2073
2074 /*
2075  *   Set up driver_shared based on settings in adapter.
2076  */
2077
2078 static void
2079 vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2080 {
2081         struct Vmxnet3_DriverShared *shared = adapter->shared;
2082         struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2083         struct Vmxnet3_TxQueueConf *tqc;
2084         struct Vmxnet3_RxQueueConf *rqc;
2085         int i;
2086
2087         memset(shared, 0, sizeof(*shared));
2088
2089         /* driver settings */
2090         shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2091         devRead->misc.driverInfo.version = cpu_to_le32(
2092                                                 VMXNET3_DRIVER_VERSION_NUM);
2093         devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2094                                 VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2095         devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2096         *((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2097                                 *((u32 *)&devRead->misc.driverInfo.gos));
2098         devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2099         devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2100
2101         devRead->misc.ddPA = cpu_to_le64(virt_to_phys(adapter));
2102         devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2103
2104         /* set up feature flags */
2105         if (adapter->netdev->features & NETIF_F_RXCSUM)
2106                 devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2107
2108         if (adapter->netdev->features & NETIF_F_LRO) {
2109                 devRead->misc.uptFeatures |= UPT1_F_LRO;
2110                 devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2111         }
2112         if (adapter->netdev->features & NETIF_F_HW_VLAN_RX)
2113                 devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2114
2115         devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2116         devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2117         devRead->misc.queueDescLen = cpu_to_le32(
2118                 adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2119                 adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2120
2121         /* tx queue settings */
2122         devRead->misc.numTxQueues =  adapter->num_tx_queues;
2123         for (i = 0; i < adapter->num_tx_queues; i++) {
2124                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2125                 BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2126                 tqc = &adapter->tqd_start[i].conf;
2127                 tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2128                 tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2129                 tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2130                 tqc->ddPA           = cpu_to_le64(virt_to_phys(tq->buf_info));
2131                 tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2132                 tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2133                 tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2134                 tqc->ddLen          = cpu_to_le32(
2135                                         sizeof(struct vmxnet3_tx_buf_info) *
2136                                         tqc->txRingSize);
2137                 tqc->intrIdx        = tq->comp_ring.intr_idx;
2138         }
2139
2140         /* rx queue settings */
2141         devRead->misc.numRxQueues = adapter->num_rx_queues;
2142         for (i = 0; i < adapter->num_rx_queues; i++) {
2143                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2144                 rqc = &adapter->rqd_start[i].conf;
2145                 rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2146                 rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2147                 rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2148                 rqc->ddPA            = cpu_to_le64(virt_to_phys(
2149                                                         rq->buf_info));
2150                 rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2151                 rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2152                 rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2153                 rqc->ddLen           = cpu_to_le32(
2154                                         sizeof(struct vmxnet3_rx_buf_info) *
2155                                         (rqc->rxRingSize[0] +
2156                                          rqc->rxRingSize[1]));
2157                 rqc->intrIdx         = rq->comp_ring.intr_idx;
2158         }
2159
2160 #ifdef VMXNET3_RSS
2161         memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2162
2163         if (adapter->rss) {
2164                 struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2165                 static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2166                         0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2167                         0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2168                         0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2169                         0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2170                         0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2171                 };
2172
2173                 devRead->misc.uptFeatures |= UPT1_F_RSS;
2174                 devRead->misc.numRxQueues = adapter->num_rx_queues;
2175                 rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2176                                     UPT1_RSS_HASH_TYPE_IPV4 |
2177                                     UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2178                                     UPT1_RSS_HASH_TYPE_IPV6;
2179                 rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2180                 rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2181                 rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2182                 memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2183
2184                 for (i = 0; i < rssConf->indTableSize; i++)
2185                         rssConf->indTable[i] = ethtool_rxfh_indir_default(
2186                                 i, adapter->num_rx_queues);
2187
2188                 devRead->rssConfDesc.confVer = 1;
2189                 devRead->rssConfDesc.confLen = sizeof(*rssConf);
2190                 devRead->rssConfDesc.confPA  = virt_to_phys(rssConf);
2191         }
2192
2193 #endif /* VMXNET3_RSS */
2194
2195         /* intr settings */
2196         devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2197                                      VMXNET3_IMM_AUTO;
2198         devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2199         for (i = 0; i < adapter->intr.num_intrs; i++)
2200                 devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2201
2202         devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2203         devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2204
2205         /* rx filter settings */
2206         devRead->rxFilterConf.rxMode = 0;
2207         vmxnet3_restore_vlan(adapter);
2208         vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2209
2210         /* the rest are already zeroed */
2211 }
2212
2213
2214 int
2215 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2216 {
2217         int err, i;
2218         u32 ret;
2219         unsigned long flags;
2220
2221         netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2222                 " ring sizes %u %u %u\n", adapter->netdev->name,
2223                 adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2224                 adapter->tx_queue[0].tx_ring.size,
2225                 adapter->rx_queue[0].rx_ring[0].size,
2226                 adapter->rx_queue[0].rx_ring[1].size);
2227
2228         vmxnet3_tq_init_all(adapter);
2229         err = vmxnet3_rq_init_all(adapter);
2230         if (err) {
2231                 netdev_err(adapter->netdev,
2232                            "Failed to init rx queue error %d\n", err);
2233                 goto rq_err;
2234         }
2235
2236         err = vmxnet3_request_irqs(adapter);
2237         if (err) {
2238                 netdev_err(adapter->netdev,
2239                            "Failed to setup irq for error %d\n", err);
2240                 goto irq_err;
2241         }
2242
2243         vmxnet3_setup_driver_shared(adapter);
2244
2245         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2246                                adapter->shared_pa));
2247         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2248                                adapter->shared_pa));
2249         spin_lock_irqsave(&adapter->cmd_lock, flags);
2250         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2251                                VMXNET3_CMD_ACTIVATE_DEV);
2252         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2253         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2254
2255         if (ret != 0) {
2256                 netdev_err(adapter->netdev,
2257                            "Failed to activate dev: error %u\n", ret);
2258                 err = -EINVAL;
2259                 goto activate_err;
2260         }
2261
2262         for (i = 0; i < adapter->num_rx_queues; i++) {
2263                 VMXNET3_WRITE_BAR0_REG(adapter,
2264                                 VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2265                                 adapter->rx_queue[i].rx_ring[0].next2fill);
2266                 VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2267                                 (i * VMXNET3_REG_ALIGN)),
2268                                 adapter->rx_queue[i].rx_ring[1].next2fill);
2269         }
2270
2271         /* Apply the rx filter settins last. */
2272         vmxnet3_set_mc(adapter->netdev);
2273
2274         /*
2275          * Check link state when first activating device. It will start the
2276          * tx queue if the link is up.
2277          */
2278         vmxnet3_check_link(adapter, true);
2279         for (i = 0; i < adapter->num_rx_queues; i++)
2280                 napi_enable(&adapter->rx_queue[i].napi);
2281         vmxnet3_enable_all_intrs(adapter);
2282         clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2283         return 0;
2284
2285 activate_err:
2286         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2287         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2288         vmxnet3_free_irqs(adapter);
2289 irq_err:
2290 rq_err:
2291         /* free up buffers we allocated */
2292         vmxnet3_rq_cleanup_all(adapter);
2293         return err;
2294 }
2295
2296
2297 void
2298 vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2299 {
2300         unsigned long flags;
2301         spin_lock_irqsave(&adapter->cmd_lock, flags);
2302         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2303         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2304 }
2305
2306
2307 int
2308 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2309 {
2310         int i;
2311         unsigned long flags;
2312         if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2313                 return 0;
2314
2315
2316         spin_lock_irqsave(&adapter->cmd_lock, flags);
2317         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2318                                VMXNET3_CMD_QUIESCE_DEV);
2319         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2320         vmxnet3_disable_all_intrs(adapter);
2321
2322         for (i = 0; i < adapter->num_rx_queues; i++)
2323                 napi_disable(&adapter->rx_queue[i].napi);
2324         netif_tx_disable(adapter->netdev);
2325         adapter->link_speed = 0;
2326         netif_carrier_off(adapter->netdev);
2327
2328         vmxnet3_tq_cleanup_all(adapter);
2329         vmxnet3_rq_cleanup_all(adapter);
2330         vmxnet3_free_irqs(adapter);
2331         return 0;
2332 }
2333
2334
2335 static void
2336 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2337 {
2338         u32 tmp;
2339
2340         tmp = *(u32 *)mac;
2341         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2342
2343         tmp = (mac[5] << 8) | mac[4];
2344         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2345 }
2346
2347
2348 static int
2349 vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2350 {
2351         struct sockaddr *addr = p;
2352         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2353
2354         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2355         vmxnet3_write_mac_addr(adapter, addr->sa_data);
2356
2357         return 0;
2358 }
2359
2360
2361 /* ==================== initialization and cleanup routines ============ */
2362
2363 static int
2364 vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2365 {
2366         int err;
2367         unsigned long mmio_start, mmio_len;
2368         struct pci_dev *pdev = adapter->pdev;
2369
2370         err = pci_enable_device(pdev);
2371         if (err) {
2372                 dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2373                 return err;
2374         }
2375
2376         if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2377                 if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2378                         dev_err(&pdev->dev,
2379                                 "pci_set_consistent_dma_mask failed\n");
2380                         err = -EIO;
2381                         goto err_set_mask;
2382                 }
2383                 *dma64 = true;
2384         } else {
2385                 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2386                         dev_err(&pdev->dev,
2387                                 "pci_set_dma_mask failed\n");
2388                         err = -EIO;
2389                         goto err_set_mask;
2390                 }
2391                 *dma64 = false;
2392         }
2393
2394         err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2395                                            vmxnet3_driver_name);
2396         if (err) {
2397                 dev_err(&pdev->dev,
2398                         "Failed to request region for adapter: error %d\n", err);
2399                 goto err_set_mask;
2400         }
2401
2402         pci_set_master(pdev);
2403
2404         mmio_start = pci_resource_start(pdev, 0);
2405         mmio_len = pci_resource_len(pdev, 0);
2406         adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2407         if (!adapter->hw_addr0) {
2408                 dev_err(&pdev->dev, "Failed to map bar0\n");
2409                 err = -EIO;
2410                 goto err_ioremap;
2411         }
2412
2413         mmio_start = pci_resource_start(pdev, 1);
2414         mmio_len = pci_resource_len(pdev, 1);
2415         adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2416         if (!adapter->hw_addr1) {
2417                 dev_err(&pdev->dev, "Failed to map bar1\n");
2418                 err = -EIO;
2419                 goto err_bar1;
2420         }
2421         return 0;
2422
2423 err_bar1:
2424         iounmap(adapter->hw_addr0);
2425 err_ioremap:
2426         pci_release_selected_regions(pdev, (1 << 2) - 1);
2427 err_set_mask:
2428         pci_disable_device(pdev);
2429         return err;
2430 }
2431
2432
2433 static void
2434 vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2435 {
2436         BUG_ON(!adapter->pdev);
2437
2438         iounmap(adapter->hw_addr0);
2439         iounmap(adapter->hw_addr1);
2440         pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2441         pci_disable_device(adapter->pdev);
2442 }
2443
2444
2445 static void
2446 vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2447 {
2448         size_t sz, i, ring0_size, ring1_size, comp_size;
2449         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[0];
2450
2451
2452         if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2453                                     VMXNET3_MAX_ETH_HDR_SIZE) {
2454                 adapter->skb_buf_size = adapter->netdev->mtu +
2455                                         VMXNET3_MAX_ETH_HDR_SIZE;
2456                 if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2457                         adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2458
2459                 adapter->rx_buf_per_pkt = 1;
2460         } else {
2461                 adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2462                 sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2463                                             VMXNET3_MAX_ETH_HDR_SIZE;
2464                 adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2465         }
2466
2467         /*
2468          * for simplicity, force the ring0 size to be a multiple of
2469          * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2470          */
2471         sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2472         ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2473         ring0_size = (ring0_size + sz - 1) / sz * sz;
2474         ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2475                            sz * sz);
2476         ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2477         comp_size = ring0_size + ring1_size;
2478
2479         for (i = 0; i < adapter->num_rx_queues; i++) {
2480                 rq = &adapter->rx_queue[i];
2481                 rq->rx_ring[0].size = ring0_size;
2482                 rq->rx_ring[1].size = ring1_size;
2483                 rq->comp_ring.size = comp_size;
2484         }
2485 }
2486
2487
2488 int
2489 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2490                       u32 rx_ring_size, u32 rx_ring2_size)
2491 {
2492         int err = 0, i;
2493
2494         for (i = 0; i < adapter->num_tx_queues; i++) {
2495                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2496                 tq->tx_ring.size   = tx_ring_size;
2497                 tq->data_ring.size = tx_ring_size;
2498                 tq->comp_ring.size = tx_ring_size;
2499                 tq->shared = &adapter->tqd_start[i].ctrl;
2500                 tq->stopped = true;
2501                 tq->adapter = adapter;
2502                 tq->qid = i;
2503                 err = vmxnet3_tq_create(tq, adapter);
2504                 /*
2505                  * Too late to change num_tx_queues. We cannot do away with
2506                  * lesser number of queues than what we asked for
2507                  */
2508                 if (err)
2509                         goto queue_err;
2510         }
2511
2512         adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2513         adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2514         vmxnet3_adjust_rx_ring_size(adapter);
2515         for (i = 0; i < adapter->num_rx_queues; i++) {
2516                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2517                 /* qid and qid2 for rx queues will be assigned later when num
2518                  * of rx queues is finalized after allocating intrs */
2519                 rq->shared = &adapter->rqd_start[i].ctrl;
2520                 rq->adapter = adapter;
2521                 err = vmxnet3_rq_create(rq, adapter);
2522                 if (err) {
2523                         if (i == 0) {
2524                                 netdev_err(adapter->netdev,
2525                                            "Could not allocate any rx queues. "
2526                                            "Aborting.\n");
2527                                 goto queue_err;
2528                         } else {
2529                                 netdev_info(adapter->netdev,
2530                                             "Number of rx queues changed "
2531                                             "to : %d.\n", i);
2532                                 adapter->num_rx_queues = i;
2533                                 err = 0;
2534                                 break;
2535                         }
2536                 }
2537         }
2538         return err;
2539 queue_err:
2540         vmxnet3_tq_destroy_all(adapter);
2541         return err;
2542 }
2543
2544 static int
2545 vmxnet3_open(struct net_device *netdev)
2546 {
2547         struct vmxnet3_adapter *adapter;
2548         int err, i;
2549
2550         adapter = netdev_priv(netdev);
2551
2552         for (i = 0; i < adapter->num_tx_queues; i++)
2553                 spin_lock_init(&adapter->tx_queue[i].tx_lock);
2554
2555         err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2556                                     VMXNET3_DEF_RX_RING_SIZE,
2557                                     VMXNET3_DEF_RX_RING_SIZE);
2558         if (err)
2559                 goto queue_err;
2560
2561         err = vmxnet3_activate_dev(adapter);
2562         if (err)
2563                 goto activate_err;
2564
2565         return 0;
2566
2567 activate_err:
2568         vmxnet3_rq_destroy_all(adapter);
2569         vmxnet3_tq_destroy_all(adapter);
2570 queue_err:
2571         return err;
2572 }
2573
2574
2575 static int
2576 vmxnet3_close(struct net_device *netdev)
2577 {
2578         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2579
2580         /*
2581          * Reset_work may be in the middle of resetting the device, wait for its
2582          * completion.
2583          */
2584         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2585                 msleep(1);
2586
2587         vmxnet3_quiesce_dev(adapter);
2588
2589         vmxnet3_rq_destroy_all(adapter);
2590         vmxnet3_tq_destroy_all(adapter);
2591
2592         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2593
2594
2595         return 0;
2596 }
2597
2598
2599 void
2600 vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2601 {
2602         int i;
2603
2604         /*
2605          * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2606          * vmxnet3_close() will deadlock.
2607          */
2608         BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2609
2610         /* we need to enable NAPI, otherwise dev_close will deadlock */
2611         for (i = 0; i < adapter->num_rx_queues; i++)
2612                 napi_enable(&adapter->rx_queue[i].napi);
2613         dev_close(adapter->netdev);
2614 }
2615
2616
2617 static int
2618 vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2619 {
2620         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2621         int err = 0;
2622
2623         if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2624                 return -EINVAL;
2625
2626         netdev->mtu = new_mtu;
2627
2628         /*
2629          * Reset_work may be in the middle of resetting the device, wait for its
2630          * completion.
2631          */
2632         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2633                 msleep(1);
2634
2635         if (netif_running(netdev)) {
2636                 vmxnet3_quiesce_dev(adapter);
2637                 vmxnet3_reset_dev(adapter);
2638
2639                 /* we need to re-create the rx queue based on the new mtu */
2640                 vmxnet3_rq_destroy_all(adapter);
2641                 vmxnet3_adjust_rx_ring_size(adapter);
2642                 err = vmxnet3_rq_create_all(adapter);
2643                 if (err) {
2644                         netdev_err(netdev,
2645                                    "failed to re-create rx queues, "
2646                                    " error %d. Closing it.\n", err);
2647                         goto out;
2648                 }
2649
2650                 err = vmxnet3_activate_dev(adapter);
2651                 if (err) {
2652                         netdev_err(netdev,
2653                                    "failed to re-activate, error %d. "
2654                                    "Closing it\n", err);
2655                         goto out;
2656                 }
2657         }
2658
2659 out:
2660         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2661         if (err)
2662                 vmxnet3_force_close(adapter);
2663
2664         return err;
2665 }
2666
2667
2668 static void
2669 vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2670 {
2671         struct net_device *netdev = adapter->netdev;
2672
2673         netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2674                 NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX |
2675                 NETIF_F_HW_VLAN_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2676                 NETIF_F_LRO;
2677         if (dma64)
2678                 netdev->hw_features |= NETIF_F_HIGHDMA;
2679         netdev->vlan_features = netdev->hw_features &
2680                                 ~(NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX);
2681         netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_FILTER;
2682 }
2683
2684
2685 static void
2686 vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2687 {
2688         u32 tmp;
2689
2690         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2691         *(u32 *)mac = tmp;
2692
2693         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2694         mac[4] = tmp & 0xff;
2695         mac[5] = (tmp >> 8) & 0xff;
2696 }
2697
2698 #ifdef CONFIG_PCI_MSI
2699
2700 /*
2701  * Enable MSIx vectors.
2702  * Returns :
2703  *      0 on successful enabling of required vectors,
2704  *      VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2705  *       could be enabled.
2706  *      number of vectors which can be enabled otherwise (this number is smaller
2707  *       than VMXNET3_LINUX_MIN_MSIX_VECT)
2708  */
2709
2710 static int
2711 vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2712                              int vectors)
2713 {
2714         int err = 0, vector_threshold;
2715         vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2716
2717         while (vectors >= vector_threshold) {
2718                 err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2719                                       vectors);
2720                 if (!err) {
2721                         adapter->intr.num_intrs = vectors;
2722                         return 0;
2723                 } else if (err < 0) {
2724                         dev_err(&adapter->netdev->dev,
2725                                    "Failed to enable MSI-X, error: %d\n", err);
2726                         vectors = 0;
2727                 } else if (err < vector_threshold) {
2728                         break;
2729                 } else {
2730                         /* If fails to enable required number of MSI-x vectors
2731                          * try enabling minimum number of vectors required.
2732                          */
2733                         dev_err(&adapter->netdev->dev,
2734                                 "Failed to enable %d MSI-X, trying %d instead\n",
2735                                     vectors, vector_threshold);
2736                         vectors = vector_threshold;
2737                 }
2738         }
2739
2740         dev_info(&adapter->pdev->dev,
2741                  "Number of MSI-X interrupts which can be allocated "
2742                  "is lower than min threshold required.\n");
2743         return err;
2744 }
2745
2746
2747 #endif /* CONFIG_PCI_MSI */
2748
2749 static void
2750 vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2751 {
2752         u32 cfg;
2753         unsigned long flags;
2754
2755         /* intr settings */
2756         spin_lock_irqsave(&adapter->cmd_lock, flags);
2757         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2758                                VMXNET3_CMD_GET_CONF_INTR);
2759         cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2760         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2761         adapter->intr.type = cfg & 0x3;
2762         adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2763
2764         if (adapter->intr.type == VMXNET3_IT_AUTO) {
2765                 adapter->intr.type = VMXNET3_IT_MSIX;
2766         }
2767
2768 #ifdef CONFIG_PCI_MSI
2769         if (adapter->intr.type == VMXNET3_IT_MSIX) {
2770                 int vector, err = 0;
2771
2772                 adapter->intr.num_intrs = (adapter->share_intr ==
2773                                            VMXNET3_INTR_TXSHARE) ? 1 :
2774                                            adapter->num_tx_queues;
2775                 adapter->intr.num_intrs += (adapter->share_intr ==
2776                                            VMXNET3_INTR_BUDDYSHARE) ? 0 :
2777                                            adapter->num_rx_queues;
2778                 adapter->intr.num_intrs += 1;           /* for link event */
2779
2780                 adapter->intr.num_intrs = (adapter->intr.num_intrs >
2781                                            VMXNET3_LINUX_MIN_MSIX_VECT
2782                                            ? adapter->intr.num_intrs :
2783                                            VMXNET3_LINUX_MIN_MSIX_VECT);
2784
2785                 for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2786                         adapter->intr.msix_entries[vector].entry = vector;
2787
2788                 err = vmxnet3_acquire_msix_vectors(adapter,
2789                                                    adapter->intr.num_intrs);
2790                 /* If we cannot allocate one MSIx vector per queue
2791                  * then limit the number of rx queues to 1
2792                  */
2793                 if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2794                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2795                             || adapter->num_rx_queues != 1) {
2796                                 adapter->share_intr = VMXNET3_INTR_TXSHARE;
2797                                 netdev_err(adapter->netdev,
2798                                            "Number of rx queues : 1\n");
2799                                 adapter->num_rx_queues = 1;
2800                                 adapter->intr.num_intrs =
2801                                                 VMXNET3_LINUX_MIN_MSIX_VECT;
2802                         }
2803                         return;
2804                 }
2805                 if (!err)
2806                         return;
2807
2808                 /* If we cannot allocate MSIx vectors use only one rx queue */
2809                 dev_info(&adapter->pdev->dev,
2810                          "Failed to enable MSI-X, error %d. "
2811                          "Limiting #rx queues to 1, try MSI.\n", err);
2812
2813                 adapter->intr.type = VMXNET3_IT_MSI;
2814         }
2815
2816         if (adapter->intr.type == VMXNET3_IT_MSI) {
2817                 int err;
2818                 err = pci_enable_msi(adapter->pdev);
2819                 if (!err) {
2820                         adapter->num_rx_queues = 1;
2821                         adapter->intr.num_intrs = 1;
2822                         return;
2823                 }
2824         }
2825 #endif /* CONFIG_PCI_MSI */
2826
2827         adapter->num_rx_queues = 1;
2828         dev_info(&adapter->netdev->dev,
2829                  "Using INTx interrupt, #Rx queues: 1.\n");
2830         adapter->intr.type = VMXNET3_IT_INTX;
2831
2832         /* INT-X related setting */
2833         adapter->intr.num_intrs = 1;
2834 }
2835
2836
2837 static void
2838 vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2839 {
2840         if (adapter->intr.type == VMXNET3_IT_MSIX)
2841                 pci_disable_msix(adapter->pdev);
2842         else if (adapter->intr.type == VMXNET3_IT_MSI)
2843                 pci_disable_msi(adapter->pdev);
2844         else
2845                 BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2846 }
2847
2848
2849 static void
2850 vmxnet3_tx_timeout(struct net_device *netdev)
2851 {
2852         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2853         adapter->tx_timeout_count++;
2854
2855         netdev_err(adapter->netdev, "tx hang\n");
2856         schedule_work(&adapter->work);
2857         netif_wake_queue(adapter->netdev);
2858 }
2859
2860
2861 static void
2862 vmxnet3_reset_work(struct work_struct *data)
2863 {
2864         struct vmxnet3_adapter *adapter;
2865
2866         adapter = container_of(data, struct vmxnet3_adapter, work);
2867
2868         /* if another thread is resetting the device, no need to proceed */
2869         if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2870                 return;
2871
2872         /* if the device is closed, we must leave it alone */
2873         rtnl_lock();
2874         if (netif_running(adapter->netdev)) {
2875                 netdev_notice(adapter->netdev, "resetting\n");
2876                 vmxnet3_quiesce_dev(adapter);
2877                 vmxnet3_reset_dev(adapter);
2878                 vmxnet3_activate_dev(adapter);
2879         } else {
2880                 netdev_info(adapter->netdev, "already closed\n");
2881         }
2882         rtnl_unlock();
2883
2884         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2885 }
2886
2887
2888 static int
2889 vmxnet3_probe_device(struct pci_dev *pdev,
2890                      const struct pci_device_id *id)
2891 {
2892         static const struct net_device_ops vmxnet3_netdev_ops = {
2893                 .ndo_open = vmxnet3_open,
2894                 .ndo_stop = vmxnet3_close,
2895                 .ndo_start_xmit = vmxnet3_xmit_frame,
2896                 .ndo_set_mac_address = vmxnet3_set_mac_addr,
2897                 .ndo_change_mtu = vmxnet3_change_mtu,
2898                 .ndo_set_features = vmxnet3_set_features,
2899                 .ndo_get_stats64 = vmxnet3_get_stats64,
2900                 .ndo_tx_timeout = vmxnet3_tx_timeout,
2901                 .ndo_set_rx_mode = vmxnet3_set_mc,
2902                 .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2903                 .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2904 #ifdef CONFIG_NET_POLL_CONTROLLER
2905                 .ndo_poll_controller = vmxnet3_netpoll,
2906 #endif
2907         };
2908         int err;
2909         bool dma64 = false; /* stupid gcc */
2910         u32 ver;
2911         struct net_device *netdev;
2912         struct vmxnet3_adapter *adapter;
2913         u8 mac[ETH_ALEN];
2914         int size;
2915         int num_tx_queues;
2916         int num_rx_queues;
2917
2918         if (!pci_msi_enabled())
2919                 enable_mq = 0;
2920
2921 #ifdef VMXNET3_RSS
2922         if (enable_mq)
2923                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2924                                     (int)num_online_cpus());
2925         else
2926 #endif
2927                 num_rx_queues = 1;
2928         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2929
2930         if (enable_mq)
2931                 num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2932                                     (int)num_online_cpus());
2933         else
2934                 num_tx_queues = 1;
2935
2936         num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2937         netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2938                                    max(num_tx_queues, num_rx_queues));
2939         dev_info(&pdev->dev,
2940                  "# of Tx queues : %d, # of Rx queues : %d\n",
2941                  num_tx_queues, num_rx_queues);
2942
2943         if (!netdev)
2944                 return -ENOMEM;
2945
2946         pci_set_drvdata(pdev, netdev);
2947         adapter = netdev_priv(netdev);
2948         adapter->netdev = netdev;
2949         adapter->pdev = pdev;
2950
2951         spin_lock_init(&adapter->cmd_lock);
2952         adapter->shared = pci_alloc_consistent(adapter->pdev,
2953                                                sizeof(struct Vmxnet3_DriverShared),
2954                                                &adapter->shared_pa);
2955         if (!adapter->shared) {
2956                 dev_err(&pdev->dev, "Failed to allocate memory\n");
2957                 err = -ENOMEM;
2958                 goto err_alloc_shared;
2959         }
2960
2961         adapter->num_rx_queues = num_rx_queues;
2962         adapter->num_tx_queues = num_tx_queues;
2963
2964         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2965         size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2966         adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
2967                                                   &adapter->queue_desc_pa);
2968
2969         if (!adapter->tqd_start) {
2970                 dev_err(&pdev->dev, "Failed to allocate memory\n");
2971                 err = -ENOMEM;
2972                 goto err_alloc_queue_desc;
2973         }
2974         adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
2975                                                             adapter->num_tx_queues);
2976
2977         adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
2978         if (adapter->pm_conf == NULL) {
2979                 err = -ENOMEM;
2980                 goto err_alloc_pm;
2981         }
2982
2983 #ifdef VMXNET3_RSS
2984
2985         adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
2986         if (adapter->rss_conf == NULL) {
2987                 err = -ENOMEM;
2988                 goto err_alloc_rss;
2989         }
2990 #endif /* VMXNET3_RSS */
2991
2992         err = vmxnet3_alloc_pci_resources(adapter, &dma64);
2993         if (err < 0)
2994                 goto err_alloc_pci;
2995
2996         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
2997         if (ver & 1) {
2998                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
2999         } else {
3000                 dev_err(&pdev->dev,
3001                         "Incompatible h/w version (0x%x) for adapter\n", ver);
3002                 err = -EBUSY;
3003                 goto err_ver;
3004         }
3005
3006         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3007         if (ver & 1) {
3008                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3009         } else {
3010                 dev_err(&pdev->dev,
3011                         "Incompatible upt version (0x%x) for adapter\n", ver);
3012                 err = -EBUSY;
3013                 goto err_ver;
3014         }
3015
3016         SET_NETDEV_DEV(netdev, &pdev->dev);
3017         vmxnet3_declare_features(adapter, dma64);
3018
3019         if (adapter->num_tx_queues == adapter->num_rx_queues)
3020                 adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3021         else
3022                 adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3023
3024         vmxnet3_alloc_intr_resources(adapter);
3025
3026 #ifdef VMXNET3_RSS
3027         if (adapter->num_rx_queues > 1 &&
3028             adapter->intr.type == VMXNET3_IT_MSIX) {
3029                 adapter->rss = true;
3030                 netdev->hw_features |= NETIF_F_RXHASH;
3031                 netdev->features |= NETIF_F_RXHASH;
3032                 dev_dbg(&pdev->dev, "RSS is enabled.\n");
3033         } else {
3034                 adapter->rss = false;
3035         }
3036 #endif
3037
3038         vmxnet3_read_mac_addr(adapter, mac);
3039         memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3040
3041         netdev->netdev_ops = &vmxnet3_netdev_ops;
3042         vmxnet3_set_ethtool_ops(netdev);
3043         netdev->watchdog_timeo = 5 * HZ;
3044
3045         INIT_WORK(&adapter->work, vmxnet3_reset_work);
3046         set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3047
3048         if (adapter->intr.type == VMXNET3_IT_MSIX) {
3049                 int i;
3050                 for (i = 0; i < adapter->num_rx_queues; i++) {
3051                         netif_napi_add(adapter->netdev,
3052                                        &adapter->rx_queue[i].napi,
3053                                        vmxnet3_poll_rx_only, 64);
3054                 }
3055         } else {
3056                 netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3057                                vmxnet3_poll, 64);
3058         }
3059
3060         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3061         netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3062
3063         err = register_netdev(netdev);
3064
3065         if (err) {
3066                 dev_err(&pdev->dev, "Failed to register adapter\n");
3067                 goto err_register;
3068         }
3069
3070         vmxnet3_check_link(adapter, false);
3071         return 0;
3072
3073 err_register:
3074         vmxnet3_free_intr_resources(adapter);
3075 err_ver:
3076         vmxnet3_free_pci_resources(adapter);
3077 err_alloc_pci:
3078 #ifdef VMXNET3_RSS
3079         kfree(adapter->rss_conf);
3080 err_alloc_rss:
3081 #endif
3082         kfree(adapter->pm_conf);
3083 err_alloc_pm:
3084         pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3085                             adapter->queue_desc_pa);
3086 err_alloc_queue_desc:
3087         pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3088                             adapter->shared, adapter->shared_pa);
3089 err_alloc_shared:
3090         pci_set_drvdata(pdev, NULL);
3091         free_netdev(netdev);
3092         return err;
3093 }
3094
3095
3096 static void
3097 vmxnet3_remove_device(struct pci_dev *pdev)
3098 {
3099         struct net_device *netdev = pci_get_drvdata(pdev);
3100         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3101         int size = 0;
3102         int num_rx_queues;
3103
3104 #ifdef VMXNET3_RSS
3105         if (enable_mq)
3106                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3107                                     (int)num_online_cpus());
3108         else
3109 #endif
3110                 num_rx_queues = 1;
3111         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3112
3113         cancel_work_sync(&adapter->work);
3114
3115         unregister_netdev(netdev);
3116
3117         vmxnet3_free_intr_resources(adapter);
3118         vmxnet3_free_pci_resources(adapter);
3119 #ifdef VMXNET3_RSS
3120         kfree(adapter->rss_conf);
3121 #endif
3122         kfree(adapter->pm_conf);
3123
3124         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3125         size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3126         pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3127                             adapter->queue_desc_pa);
3128         pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3129                             adapter->shared, adapter->shared_pa);
3130         free_netdev(netdev);
3131 }
3132
3133
3134 #ifdef CONFIG_PM
3135
3136 static int
3137 vmxnet3_suspend(struct device *device)
3138 {
3139         struct pci_dev *pdev = to_pci_dev(device);
3140         struct net_device *netdev = pci_get_drvdata(pdev);
3141         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3142         struct Vmxnet3_PMConf *pmConf;
3143         struct ethhdr *ehdr;
3144         struct arphdr *ahdr;
3145         u8 *arpreq;
3146         struct in_device *in_dev;
3147         struct in_ifaddr *ifa;
3148         unsigned long flags;
3149         int i = 0;
3150
3151         if (!netif_running(netdev))
3152                 return 0;
3153
3154         for (i = 0; i < adapter->num_rx_queues; i++)
3155                 napi_disable(&adapter->rx_queue[i].napi);
3156
3157         vmxnet3_disable_all_intrs(adapter);
3158         vmxnet3_free_irqs(adapter);
3159         vmxnet3_free_intr_resources(adapter);
3160
3161         netif_device_detach(netdev);
3162         netif_tx_stop_all_queues(netdev);
3163
3164         /* Create wake-up filters. */
3165         pmConf = adapter->pm_conf;
3166         memset(pmConf, 0, sizeof(*pmConf));
3167
3168         if (adapter->wol & WAKE_UCAST) {
3169                 pmConf->filters[i].patternSize = ETH_ALEN;
3170                 pmConf->filters[i].maskSize = 1;
3171                 memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3172                 pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3173
3174                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3175                 i++;
3176         }
3177
3178         if (adapter->wol & WAKE_ARP) {
3179                 in_dev = in_dev_get(netdev);
3180                 if (!in_dev)
3181                         goto skip_arp;
3182
3183                 ifa = (struct in_ifaddr *)in_dev->ifa_list;
3184                 if (!ifa)
3185                         goto skip_arp;
3186
3187                 pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3188                         sizeof(struct arphdr) +         /* ARP header */
3189                         2 * ETH_ALEN +          /* 2 Ethernet addresses*/
3190                         2 * sizeof(u32);        /*2 IPv4 addresses */
3191                 pmConf->filters[i].maskSize =
3192                         (pmConf->filters[i].patternSize - 1) / 8 + 1;
3193
3194                 /* ETH_P_ARP in Ethernet header. */
3195                 ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3196                 ehdr->h_proto = htons(ETH_P_ARP);
3197
3198                 /* ARPOP_REQUEST in ARP header. */
3199                 ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3200                 ahdr->ar_op = htons(ARPOP_REQUEST);
3201                 arpreq = (u8 *)(ahdr + 1);
3202
3203                 /* The Unicast IPv4 address in 'tip' field. */
3204                 arpreq += 2 * ETH_ALEN + sizeof(u32);
3205                 *(u32 *)arpreq = ifa->ifa_address;
3206
3207                 /* The mask for the relevant bits. */
3208                 pmConf->filters[i].mask[0] = 0x00;
3209                 pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3210                 pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3211                 pmConf->filters[i].mask[3] = 0x00;
3212                 pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3213                 pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3214                 in_dev_put(in_dev);
3215
3216                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3217                 i++;
3218         }
3219
3220 skip_arp:
3221         if (adapter->wol & WAKE_MAGIC)
3222                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3223
3224         pmConf->numFilters = i;
3225
3226         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3227         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3228                                                                   *pmConf));
3229         adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3230                                                                  pmConf));
3231
3232         spin_lock_irqsave(&adapter->cmd_lock, flags);
3233         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3234                                VMXNET3_CMD_UPDATE_PMCFG);
3235         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3236
3237         pci_save_state(pdev);
3238         pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3239                         adapter->wol);
3240         pci_disable_device(pdev);
3241         pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3242
3243         return 0;
3244 }
3245
3246
3247 static int
3248 vmxnet3_resume(struct device *device)
3249 {
3250         int err, i = 0;
3251         unsigned long flags;
3252         struct pci_dev *pdev = to_pci_dev(device);
3253         struct net_device *netdev = pci_get_drvdata(pdev);
3254         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3255         struct Vmxnet3_PMConf *pmConf;
3256
3257         if (!netif_running(netdev))
3258                 return 0;
3259
3260         /* Destroy wake-up filters. */
3261         pmConf = adapter->pm_conf;
3262         memset(pmConf, 0, sizeof(*pmConf));
3263
3264         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3265         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3266                                                                   *pmConf));
3267         adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3268                                                                  pmConf));
3269
3270         netif_device_attach(netdev);
3271         pci_set_power_state(pdev, PCI_D0);
3272         pci_restore_state(pdev);
3273         err = pci_enable_device_mem(pdev);
3274         if (err != 0)
3275                 return err;
3276
3277         pci_enable_wake(pdev, PCI_D0, 0);
3278
3279         spin_lock_irqsave(&adapter->cmd_lock, flags);
3280         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3281                                VMXNET3_CMD_UPDATE_PMCFG);
3282         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3283         vmxnet3_alloc_intr_resources(adapter);
3284         vmxnet3_request_irqs(adapter);
3285         for (i = 0; i < adapter->num_rx_queues; i++)
3286                 napi_enable(&adapter->rx_queue[i].napi);
3287         vmxnet3_enable_all_intrs(adapter);
3288
3289         return 0;
3290 }
3291
3292 static const struct dev_pm_ops vmxnet3_pm_ops = {
3293         .suspend = vmxnet3_suspend,
3294         .resume = vmxnet3_resume,
3295 };
3296 #endif
3297
3298 static struct pci_driver vmxnet3_driver = {
3299         .name           = vmxnet3_driver_name,
3300         .id_table       = vmxnet3_pciid_table,
3301         .probe          = vmxnet3_probe_device,
3302         .remove         = vmxnet3_remove_device,
3303 #ifdef CONFIG_PM
3304         .driver.pm      = &vmxnet3_pm_ops,
3305 #endif
3306 };
3307
3308
3309 static int __init
3310 vmxnet3_init_module(void)
3311 {
3312         pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3313                 VMXNET3_DRIVER_VERSION_REPORT);
3314         return pci_register_driver(&vmxnet3_driver);
3315 }
3316
3317 module_init(vmxnet3_init_module);
3318
3319
3320 static void
3321 vmxnet3_exit_module(void)
3322 {
3323         pci_unregister_driver(&vmxnet3_driver);
3324 }
3325
3326 module_exit(vmxnet3_exit_module);
3327
3328 MODULE_AUTHOR("VMware, Inc.");
3329 MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3330 MODULE_LICENSE("GPL v2");
3331 MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);