igb: Prevent dropped Tx timestamps via work items and interrupts.
[firefly-linux-kernel-4.4.55.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2012 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61
62 #define MAJ 4
63 #define MIN 0
64 #define BUILD 1
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70                                 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73 static const struct e1000_info *igb_info_tbl[] = {
74         [board_82575] = &e1000_82575_info,
75 };
76
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108         /* required last entry */
109         {0, }
110 };
111
112 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
113
114 void igb_reset(struct igb_adapter *);
115 static int igb_setup_all_tx_resources(struct igb_adapter *);
116 static int igb_setup_all_rx_resources(struct igb_adapter *);
117 static void igb_free_all_tx_resources(struct igb_adapter *);
118 static void igb_free_all_rx_resources(struct igb_adapter *);
119 static void igb_setup_mrqc(struct igb_adapter *);
120 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121 static void __devexit igb_remove(struct pci_dev *pdev);
122 static int igb_sw_init(struct igb_adapter *);
123 static int igb_open(struct net_device *);
124 static int igb_close(struct net_device *);
125 static void igb_configure_tx(struct igb_adapter *);
126 static void igb_configure_rx(struct igb_adapter *);
127 static void igb_clean_all_tx_rings(struct igb_adapter *);
128 static void igb_clean_all_rx_rings(struct igb_adapter *);
129 static void igb_clean_tx_ring(struct igb_ring *);
130 static void igb_clean_rx_ring(struct igb_ring *);
131 static void igb_set_rx_mode(struct net_device *);
132 static void igb_update_phy_info(unsigned long);
133 static void igb_watchdog(unsigned long);
134 static void igb_watchdog_task(struct work_struct *);
135 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137                                                  struct rtnl_link_stats64 *stats);
138 static int igb_change_mtu(struct net_device *, int);
139 static int igb_set_mac(struct net_device *, void *);
140 static void igb_set_uta(struct igb_adapter *adapter);
141 static irqreturn_t igb_intr(int irq, void *);
142 static irqreturn_t igb_intr_msi(int irq, void *);
143 static irqreturn_t igb_msix_other(int irq, void *);
144 static irqreturn_t igb_msix_ring(int irq, void *);
145 #ifdef CONFIG_IGB_DCA
146 static void igb_update_dca(struct igb_q_vector *);
147 static void igb_setup_dca(struct igb_adapter *);
148 #endif /* CONFIG_IGB_DCA */
149 static int igb_poll(struct napi_struct *, int);
150 static bool igb_clean_tx_irq(struct igb_q_vector *);
151 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153 static void igb_tx_timeout(struct net_device *);
154 static void igb_reset_task(struct work_struct *);
155 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156 static int igb_vlan_rx_add_vid(struct net_device *, u16);
157 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158 static void igb_restore_vlan(struct igb_adapter *);
159 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160 static void igb_ping_all_vfs(struct igb_adapter *);
161 static void igb_msg_task(struct igb_adapter *);
162 static void igb_vmm_control(struct igb_adapter *);
163 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167                                int vf, u16 vlan, u8 qos);
168 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170                                  struct ifla_vf_info *ivi);
171 static void igb_check_vf_rate_limit(struct igb_adapter *);
172
173 #ifdef CONFIG_PCI_IOV
174 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
176 static int igb_check_vf_assignment(struct igb_adapter *adapter);
177 #endif
178
179 #ifdef CONFIG_PM
180 #ifdef CONFIG_PM_SLEEP
181 static int igb_suspend(struct device *);
182 #endif
183 static int igb_resume(struct device *);
184 #ifdef CONFIG_PM_RUNTIME
185 static int igb_runtime_suspend(struct device *dev);
186 static int igb_runtime_resume(struct device *dev);
187 static int igb_runtime_idle(struct device *dev);
188 #endif
189 static const struct dev_pm_ops igb_pm_ops = {
190         SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
191         SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
192                         igb_runtime_idle)
193 };
194 #endif
195 static void igb_shutdown(struct pci_dev *);
196 #ifdef CONFIG_IGB_DCA
197 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
198 static struct notifier_block dca_notifier = {
199         .notifier_call  = igb_notify_dca,
200         .next           = NULL,
201         .priority       = 0
202 };
203 #endif
204 #ifdef CONFIG_NET_POLL_CONTROLLER
205 /* for netdump / net console */
206 static void igb_netpoll(struct net_device *);
207 #endif
208 #ifdef CONFIG_PCI_IOV
209 static unsigned int max_vfs = 0;
210 module_param(max_vfs, uint, 0);
211 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
212                  "per physical function");
213 #endif /* CONFIG_PCI_IOV */
214
215 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
216                      pci_channel_state_t);
217 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
218 static void igb_io_resume(struct pci_dev *);
219
220 static struct pci_error_handlers igb_err_handler = {
221         .error_detected = igb_io_error_detected,
222         .slot_reset = igb_io_slot_reset,
223         .resume = igb_io_resume,
224 };
225
226 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
227
228 static struct pci_driver igb_driver = {
229         .name     = igb_driver_name,
230         .id_table = igb_pci_tbl,
231         .probe    = igb_probe,
232         .remove   = __devexit_p(igb_remove),
233 #ifdef CONFIG_PM
234         .driver.pm = &igb_pm_ops,
235 #endif
236         .shutdown = igb_shutdown,
237         .err_handler = &igb_err_handler
238 };
239
240 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
241 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
242 MODULE_LICENSE("GPL");
243 MODULE_VERSION(DRV_VERSION);
244
245 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
246 static int debug = -1;
247 module_param(debug, int, 0);
248 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
249
250 struct igb_reg_info {
251         u32 ofs;
252         char *name;
253 };
254
255 static const struct igb_reg_info igb_reg_info_tbl[] = {
256
257         /* General Registers */
258         {E1000_CTRL, "CTRL"},
259         {E1000_STATUS, "STATUS"},
260         {E1000_CTRL_EXT, "CTRL_EXT"},
261
262         /* Interrupt Registers */
263         {E1000_ICR, "ICR"},
264
265         /* RX Registers */
266         {E1000_RCTL, "RCTL"},
267         {E1000_RDLEN(0), "RDLEN"},
268         {E1000_RDH(0), "RDH"},
269         {E1000_RDT(0), "RDT"},
270         {E1000_RXDCTL(0), "RXDCTL"},
271         {E1000_RDBAL(0), "RDBAL"},
272         {E1000_RDBAH(0), "RDBAH"},
273
274         /* TX Registers */
275         {E1000_TCTL, "TCTL"},
276         {E1000_TDBAL(0), "TDBAL"},
277         {E1000_TDBAH(0), "TDBAH"},
278         {E1000_TDLEN(0), "TDLEN"},
279         {E1000_TDH(0), "TDH"},
280         {E1000_TDT(0), "TDT"},
281         {E1000_TXDCTL(0), "TXDCTL"},
282         {E1000_TDFH, "TDFH"},
283         {E1000_TDFT, "TDFT"},
284         {E1000_TDFHS, "TDFHS"},
285         {E1000_TDFPC, "TDFPC"},
286
287         /* List Terminator */
288         {}
289 };
290
291 /*
292  * igb_regdump - register printout routine
293  */
294 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
295 {
296         int n = 0;
297         char rname[16];
298         u32 regs[8];
299
300         switch (reginfo->ofs) {
301         case E1000_RDLEN(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_RDLEN(n));
304                 break;
305         case E1000_RDH(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_RDH(n));
308                 break;
309         case E1000_RDT(0):
310                 for (n = 0; n < 4; n++)
311                         regs[n] = rd32(E1000_RDT(n));
312                 break;
313         case E1000_RXDCTL(0):
314                 for (n = 0; n < 4; n++)
315                         regs[n] = rd32(E1000_RXDCTL(n));
316                 break;
317         case E1000_RDBAL(0):
318                 for (n = 0; n < 4; n++)
319                         regs[n] = rd32(E1000_RDBAL(n));
320                 break;
321         case E1000_RDBAH(0):
322                 for (n = 0; n < 4; n++)
323                         regs[n] = rd32(E1000_RDBAH(n));
324                 break;
325         case E1000_TDBAL(0):
326                 for (n = 0; n < 4; n++)
327                         regs[n] = rd32(E1000_RDBAL(n));
328                 break;
329         case E1000_TDBAH(0):
330                 for (n = 0; n < 4; n++)
331                         regs[n] = rd32(E1000_TDBAH(n));
332                 break;
333         case E1000_TDLEN(0):
334                 for (n = 0; n < 4; n++)
335                         regs[n] = rd32(E1000_TDLEN(n));
336                 break;
337         case E1000_TDH(0):
338                 for (n = 0; n < 4; n++)
339                         regs[n] = rd32(E1000_TDH(n));
340                 break;
341         case E1000_TDT(0):
342                 for (n = 0; n < 4; n++)
343                         regs[n] = rd32(E1000_TDT(n));
344                 break;
345         case E1000_TXDCTL(0):
346                 for (n = 0; n < 4; n++)
347                         regs[n] = rd32(E1000_TXDCTL(n));
348                 break;
349         default:
350                 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
351                 return;
352         }
353
354         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
355         pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
356                 regs[2], regs[3]);
357 }
358
359 /*
360  * igb_dump - Print registers, tx-rings and rx-rings
361  */
362 static void igb_dump(struct igb_adapter *adapter)
363 {
364         struct net_device *netdev = adapter->netdev;
365         struct e1000_hw *hw = &adapter->hw;
366         struct igb_reg_info *reginfo;
367         struct igb_ring *tx_ring;
368         union e1000_adv_tx_desc *tx_desc;
369         struct my_u0 { u64 a; u64 b; } *u0;
370         struct igb_ring *rx_ring;
371         union e1000_adv_rx_desc *rx_desc;
372         u32 staterr;
373         u16 i, n;
374
375         if (!netif_msg_hw(adapter))
376                 return;
377
378         /* Print netdevice Info */
379         if (netdev) {
380                 dev_info(&adapter->pdev->dev, "Net device Info\n");
381                 pr_info("Device Name     state            trans_start      "
382                         "last_rx\n");
383                 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
384                         netdev->state, netdev->trans_start, netdev->last_rx);
385         }
386
387         /* Print Registers */
388         dev_info(&adapter->pdev->dev, "Register Dump\n");
389         pr_info(" Register Name   Value\n");
390         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
391              reginfo->name; reginfo++) {
392                 igb_regdump(hw, reginfo);
393         }
394
395         /* Print TX Ring Summary */
396         if (!netdev || !netif_running(netdev))
397                 goto exit;
398
399         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
400         pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
401         for (n = 0; n < adapter->num_tx_queues; n++) {
402                 struct igb_tx_buffer *buffer_info;
403                 tx_ring = adapter->tx_ring[n];
404                 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
405                 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
406                         n, tx_ring->next_to_use, tx_ring->next_to_clean,
407                         (u64)buffer_info->dma,
408                         buffer_info->length,
409                         buffer_info->next_to_watch,
410                         (u64)buffer_info->time_stamp);
411         }
412
413         /* Print TX Rings */
414         if (!netif_msg_tx_done(adapter))
415                 goto rx_ring_summary;
416
417         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
418
419         /* Transmit Descriptor Formats
420          *
421          * Advanced Transmit Descriptor
422          *   +--------------------------------------------------------------+
423          * 0 |         Buffer Address [63:0]                                |
424          *   +--------------------------------------------------------------+
425          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
426          *   +--------------------------------------------------------------+
427          *   63      46 45    40 39 38 36 35 32 31   24             15       0
428          */
429
430         for (n = 0; n < adapter->num_tx_queues; n++) {
431                 tx_ring = adapter->tx_ring[n];
432                 pr_info("------------------------------------\n");
433                 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
434                 pr_info("------------------------------------\n");
435                 pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
436                         "[bi->dma       ] leng  ntw timestamp        "
437                         "bi->skb\n");
438
439                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
440                         const char *next_desc;
441                         struct igb_tx_buffer *buffer_info;
442                         tx_desc = IGB_TX_DESC(tx_ring, i);
443                         buffer_info = &tx_ring->tx_buffer_info[i];
444                         u0 = (struct my_u0 *)tx_desc;
445                         if (i == tx_ring->next_to_use &&
446                             i == tx_ring->next_to_clean)
447                                 next_desc = " NTC/U";
448                         else if (i == tx_ring->next_to_use)
449                                 next_desc = " NTU";
450                         else if (i == tx_ring->next_to_clean)
451                                 next_desc = " NTC";
452                         else
453                                 next_desc = "";
454
455                         pr_info("T [0x%03X]    %016llX %016llX %016llX"
456                                 " %04X  %p %016llX %p%s\n", i,
457                                 le64_to_cpu(u0->a),
458                                 le64_to_cpu(u0->b),
459                                 (u64)buffer_info->dma,
460                                 buffer_info->length,
461                                 buffer_info->next_to_watch,
462                                 (u64)buffer_info->time_stamp,
463                                 buffer_info->skb, next_desc);
464
465                         if (netif_msg_pktdata(adapter) && buffer_info->skb)
466                                 print_hex_dump(KERN_INFO, "",
467                                         DUMP_PREFIX_ADDRESS,
468                                         16, 1, buffer_info->skb->data,
469                                         buffer_info->length, true);
470                 }
471         }
472
473         /* Print RX Rings Summary */
474 rx_ring_summary:
475         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
476         pr_info("Queue [NTU] [NTC]\n");
477         for (n = 0; n < adapter->num_rx_queues; n++) {
478                 rx_ring = adapter->rx_ring[n];
479                 pr_info(" %5d %5X %5X\n",
480                         n, rx_ring->next_to_use, rx_ring->next_to_clean);
481         }
482
483         /* Print RX Rings */
484         if (!netif_msg_rx_status(adapter))
485                 goto exit;
486
487         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
488
489         /* Advanced Receive Descriptor (Read) Format
490          *    63                                           1        0
491          *    +-----------------------------------------------------+
492          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
493          *    +----------------------------------------------+------+
494          *  8 |       Header Buffer Address [63:1]           |  DD  |
495          *    +-----------------------------------------------------+
496          *
497          *
498          * Advanced Receive Descriptor (Write-Back) Format
499          *
500          *   63       48 47    32 31  30      21 20 17 16   4 3     0
501          *   +------------------------------------------------------+
502          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
503          *   | Checksum   Ident  |   |           |    | Type | Type |
504          *   +------------------------------------------------------+
505          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
506          *   +------------------------------------------------------+
507          *   63       48 47    32 31            20 19               0
508          */
509
510         for (n = 0; n < adapter->num_rx_queues; n++) {
511                 rx_ring = adapter->rx_ring[n];
512                 pr_info("------------------------------------\n");
513                 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
514                 pr_info("------------------------------------\n");
515                 pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
516                         "[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
517                 pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
518                         "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
519
520                 for (i = 0; i < rx_ring->count; i++) {
521                         const char *next_desc;
522                         struct igb_rx_buffer *buffer_info;
523                         buffer_info = &rx_ring->rx_buffer_info[i];
524                         rx_desc = IGB_RX_DESC(rx_ring, i);
525                         u0 = (struct my_u0 *)rx_desc;
526                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
527
528                         if (i == rx_ring->next_to_use)
529                                 next_desc = " NTU";
530                         else if (i == rx_ring->next_to_clean)
531                                 next_desc = " NTC";
532                         else
533                                 next_desc = "";
534
535                         if (staterr & E1000_RXD_STAT_DD) {
536                                 /* Descriptor Done */
537                                 pr_info("%s[0x%03X]     %016llX %016llX -------"
538                                         "--------- %p%s\n", "RWB", i,
539                                         le64_to_cpu(u0->a),
540                                         le64_to_cpu(u0->b),
541                                         buffer_info->skb, next_desc);
542                         } else {
543                                 pr_info("%s[0x%03X]     %016llX %016llX %016llX"
544                                         " %p%s\n", "R  ", i,
545                                         le64_to_cpu(u0->a),
546                                         le64_to_cpu(u0->b),
547                                         (u64)buffer_info->dma,
548                                         buffer_info->skb, next_desc);
549
550                                 if (netif_msg_pktdata(adapter) &&
551                                     buffer_info->dma && buffer_info->skb) {
552                                         print_hex_dump(KERN_INFO, "",
553                                                   DUMP_PREFIX_ADDRESS,
554                                                   16, 1, buffer_info->skb->data,
555                                                   IGB_RX_HDR_LEN, true);
556                                         print_hex_dump(KERN_INFO, "",
557                                           DUMP_PREFIX_ADDRESS,
558                                           16, 1,
559                                           page_address(buffer_info->page) +
560                                                       buffer_info->page_offset,
561                                           PAGE_SIZE/2, true);
562                                 }
563                         }
564                 }
565         }
566
567 exit:
568         return;
569 }
570
571 /**
572  * igb_get_hw_dev - return device
573  * used by hardware layer to print debugging information
574  **/
575 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
576 {
577         struct igb_adapter *adapter = hw->back;
578         return adapter->netdev;
579 }
580
581 /**
582  * igb_init_module - Driver Registration Routine
583  *
584  * igb_init_module is the first routine called when the driver is
585  * loaded. All it does is register with the PCI subsystem.
586  **/
587 static int __init igb_init_module(void)
588 {
589         int ret;
590         pr_info("%s - version %s\n",
591                igb_driver_string, igb_driver_version);
592
593         pr_info("%s\n", igb_copyright);
594
595 #ifdef CONFIG_IGB_DCA
596         dca_register_notify(&dca_notifier);
597 #endif
598         ret = pci_register_driver(&igb_driver);
599         return ret;
600 }
601
602 module_init(igb_init_module);
603
604 /**
605  * igb_exit_module - Driver Exit Cleanup Routine
606  *
607  * igb_exit_module is called just before the driver is removed
608  * from memory.
609  **/
610 static void __exit igb_exit_module(void)
611 {
612 #ifdef CONFIG_IGB_DCA
613         dca_unregister_notify(&dca_notifier);
614 #endif
615         pci_unregister_driver(&igb_driver);
616 }
617
618 module_exit(igb_exit_module);
619
620 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
621 /**
622  * igb_cache_ring_register - Descriptor ring to register mapping
623  * @adapter: board private structure to initialize
624  *
625  * Once we know the feature-set enabled for the device, we'll cache
626  * the register offset the descriptor ring is assigned to.
627  **/
628 static void igb_cache_ring_register(struct igb_adapter *adapter)
629 {
630         int i = 0, j = 0;
631         u32 rbase_offset = adapter->vfs_allocated_count;
632
633         switch (adapter->hw.mac.type) {
634         case e1000_82576:
635                 /* The queues are allocated for virtualization such that VF 0
636                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
637                  * In order to avoid collision we start at the first free queue
638                  * and continue consuming queues in the same sequence
639                  */
640                 if (adapter->vfs_allocated_count) {
641                         for (; i < adapter->rss_queues; i++)
642                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
643                                                                Q_IDX_82576(i);
644                 }
645         case e1000_82575:
646         case e1000_82580:
647         case e1000_i350:
648         case e1000_i210:
649         case e1000_i211:
650         default:
651                 for (; i < adapter->num_rx_queues; i++)
652                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
653                 for (; j < adapter->num_tx_queues; j++)
654                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
655                 break;
656         }
657 }
658
659 static void igb_free_queues(struct igb_adapter *adapter)
660 {
661         int i;
662
663         for (i = 0; i < adapter->num_tx_queues; i++) {
664                 kfree(adapter->tx_ring[i]);
665                 adapter->tx_ring[i] = NULL;
666         }
667         for (i = 0; i < adapter->num_rx_queues; i++) {
668                 kfree(adapter->rx_ring[i]);
669                 adapter->rx_ring[i] = NULL;
670         }
671         adapter->num_rx_queues = 0;
672         adapter->num_tx_queues = 0;
673 }
674
675 /**
676  * igb_alloc_queues - Allocate memory for all rings
677  * @adapter: board private structure to initialize
678  *
679  * We allocate one ring per queue at run-time since we don't know the
680  * number of queues at compile-time.
681  **/
682 static int igb_alloc_queues(struct igb_adapter *adapter)
683 {
684         struct igb_ring *ring;
685         int i;
686         int orig_node = adapter->node;
687
688         for (i = 0; i < adapter->num_tx_queues; i++) {
689                 if (orig_node == -1) {
690                         int cur_node = next_online_node(adapter->node);
691                         if (cur_node == MAX_NUMNODES)
692                                 cur_node = first_online_node;
693                         adapter->node = cur_node;
694                 }
695                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
696                                     adapter->node);
697                 if (!ring)
698                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
699                 if (!ring)
700                         goto err;
701                 ring->count = adapter->tx_ring_count;
702                 ring->queue_index = i;
703                 ring->dev = &adapter->pdev->dev;
704                 ring->netdev = adapter->netdev;
705                 ring->numa_node = adapter->node;
706                 /* For 82575, context index must be unique per ring. */
707                 if (adapter->hw.mac.type == e1000_82575)
708                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
709                 adapter->tx_ring[i] = ring;
710         }
711         /* Restore the adapter's original node */
712         adapter->node = orig_node;
713
714         for (i = 0; i < adapter->num_rx_queues; i++) {
715                 if (orig_node == -1) {
716                         int cur_node = next_online_node(adapter->node);
717                         if (cur_node == MAX_NUMNODES)
718                                 cur_node = first_online_node;
719                         adapter->node = cur_node;
720                 }
721                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
722                                     adapter->node);
723                 if (!ring)
724                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
725                 if (!ring)
726                         goto err;
727                 ring->count = adapter->rx_ring_count;
728                 ring->queue_index = i;
729                 ring->dev = &adapter->pdev->dev;
730                 ring->netdev = adapter->netdev;
731                 ring->numa_node = adapter->node;
732                 /* set flag indicating ring supports SCTP checksum offload */
733                 if (adapter->hw.mac.type >= e1000_82576)
734                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
735
736                 /*
737                  * On i350, i210, and i211, loopback VLAN packets
738                  * have the tag byte-swapped.
739                  * */
740                 if (adapter->hw.mac.type >= e1000_i350)
741                         set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
742
743                 adapter->rx_ring[i] = ring;
744         }
745         /* Restore the adapter's original node */
746         adapter->node = orig_node;
747
748         igb_cache_ring_register(adapter);
749
750         return 0;
751
752 err:
753         /* Restore the adapter's original node */
754         adapter->node = orig_node;
755         igb_free_queues(adapter);
756
757         return -ENOMEM;
758 }
759
760 /**
761  *  igb_write_ivar - configure ivar for given MSI-X vector
762  *  @hw: pointer to the HW structure
763  *  @msix_vector: vector number we are allocating to a given ring
764  *  @index: row index of IVAR register to write within IVAR table
765  *  @offset: column offset of in IVAR, should be multiple of 8
766  *
767  *  This function is intended to handle the writing of the IVAR register
768  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
769  *  each containing an cause allocation for an Rx and Tx ring, and a
770  *  variable number of rows depending on the number of queues supported.
771  **/
772 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
773                            int index, int offset)
774 {
775         u32 ivar = array_rd32(E1000_IVAR0, index);
776
777         /* clear any bits that are currently set */
778         ivar &= ~((u32)0xFF << offset);
779
780         /* write vector and valid bit */
781         ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
782
783         array_wr32(E1000_IVAR0, index, ivar);
784 }
785
786 #define IGB_N0_QUEUE -1
787 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
788 {
789         struct igb_adapter *adapter = q_vector->adapter;
790         struct e1000_hw *hw = &adapter->hw;
791         int rx_queue = IGB_N0_QUEUE;
792         int tx_queue = IGB_N0_QUEUE;
793         u32 msixbm = 0;
794
795         if (q_vector->rx.ring)
796                 rx_queue = q_vector->rx.ring->reg_idx;
797         if (q_vector->tx.ring)
798                 tx_queue = q_vector->tx.ring->reg_idx;
799
800         switch (hw->mac.type) {
801         case e1000_82575:
802                 /* The 82575 assigns vectors using a bitmask, which matches the
803                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
804                    or more queues to a vector, we write the appropriate bits
805                    into the MSIXBM register for that vector. */
806                 if (rx_queue > IGB_N0_QUEUE)
807                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
808                 if (tx_queue > IGB_N0_QUEUE)
809                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
810                 if (!adapter->msix_entries && msix_vector == 0)
811                         msixbm |= E1000_EIMS_OTHER;
812                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
813                 q_vector->eims_value = msixbm;
814                 break;
815         case e1000_82576:
816                 /*
817                  * 82576 uses a table that essentially consists of 2 columns
818                  * with 8 rows.  The ordering is column-major so we use the
819                  * lower 3 bits as the row index, and the 4th bit as the
820                  * column offset.
821                  */
822                 if (rx_queue > IGB_N0_QUEUE)
823                         igb_write_ivar(hw, msix_vector,
824                                        rx_queue & 0x7,
825                                        (rx_queue & 0x8) << 1);
826                 if (tx_queue > IGB_N0_QUEUE)
827                         igb_write_ivar(hw, msix_vector,
828                                        tx_queue & 0x7,
829                                        ((tx_queue & 0x8) << 1) + 8);
830                 q_vector->eims_value = 1 << msix_vector;
831                 break;
832         case e1000_82580:
833         case e1000_i350:
834         case e1000_i210:
835         case e1000_i211:
836                 /*
837                  * On 82580 and newer adapters the scheme is similar to 82576
838                  * however instead of ordering column-major we have things
839                  * ordered row-major.  So we traverse the table by using
840                  * bit 0 as the column offset, and the remaining bits as the
841                  * row index.
842                  */
843                 if (rx_queue > IGB_N0_QUEUE)
844                         igb_write_ivar(hw, msix_vector,
845                                        rx_queue >> 1,
846                                        (rx_queue & 0x1) << 4);
847                 if (tx_queue > IGB_N0_QUEUE)
848                         igb_write_ivar(hw, msix_vector,
849                                        tx_queue >> 1,
850                                        ((tx_queue & 0x1) << 4) + 8);
851                 q_vector->eims_value = 1 << msix_vector;
852                 break;
853         default:
854                 BUG();
855                 break;
856         }
857
858         /* add q_vector eims value to global eims_enable_mask */
859         adapter->eims_enable_mask |= q_vector->eims_value;
860
861         /* configure q_vector to set itr on first interrupt */
862         q_vector->set_itr = 1;
863 }
864
865 /**
866  * igb_configure_msix - Configure MSI-X hardware
867  *
868  * igb_configure_msix sets up the hardware to properly
869  * generate MSI-X interrupts.
870  **/
871 static void igb_configure_msix(struct igb_adapter *adapter)
872 {
873         u32 tmp;
874         int i, vector = 0;
875         struct e1000_hw *hw = &adapter->hw;
876
877         adapter->eims_enable_mask = 0;
878
879         /* set vector for other causes, i.e. link changes */
880         switch (hw->mac.type) {
881         case e1000_82575:
882                 tmp = rd32(E1000_CTRL_EXT);
883                 /* enable MSI-X PBA support*/
884                 tmp |= E1000_CTRL_EXT_PBA_CLR;
885
886                 /* Auto-Mask interrupts upon ICR read. */
887                 tmp |= E1000_CTRL_EXT_EIAME;
888                 tmp |= E1000_CTRL_EXT_IRCA;
889
890                 wr32(E1000_CTRL_EXT, tmp);
891
892                 /* enable msix_other interrupt */
893                 array_wr32(E1000_MSIXBM(0), vector++,
894                                       E1000_EIMS_OTHER);
895                 adapter->eims_other = E1000_EIMS_OTHER;
896
897                 break;
898
899         case e1000_82576:
900         case e1000_82580:
901         case e1000_i350:
902         case e1000_i210:
903         case e1000_i211:
904                 /* Turn on MSI-X capability first, or our settings
905                  * won't stick.  And it will take days to debug. */
906                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
907                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
908                                 E1000_GPIE_NSICR);
909
910                 /* enable msix_other interrupt */
911                 adapter->eims_other = 1 << vector;
912                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
913
914                 wr32(E1000_IVAR_MISC, tmp);
915                 break;
916         default:
917                 /* do nothing, since nothing else supports MSI-X */
918                 break;
919         } /* switch (hw->mac.type) */
920
921         adapter->eims_enable_mask |= adapter->eims_other;
922
923         for (i = 0; i < adapter->num_q_vectors; i++)
924                 igb_assign_vector(adapter->q_vector[i], vector++);
925
926         wrfl();
927 }
928
929 /**
930  * igb_request_msix - Initialize MSI-X interrupts
931  *
932  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
933  * kernel.
934  **/
935 static int igb_request_msix(struct igb_adapter *adapter)
936 {
937         struct net_device *netdev = adapter->netdev;
938         struct e1000_hw *hw = &adapter->hw;
939         int i, err = 0, vector = 0;
940
941         err = request_irq(adapter->msix_entries[vector].vector,
942                           igb_msix_other, 0, netdev->name, adapter);
943         if (err)
944                 goto out;
945         vector++;
946
947         for (i = 0; i < adapter->num_q_vectors; i++) {
948                 struct igb_q_vector *q_vector = adapter->q_vector[i];
949
950                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
951
952                 if (q_vector->rx.ring && q_vector->tx.ring)
953                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
954                                 q_vector->rx.ring->queue_index);
955                 else if (q_vector->tx.ring)
956                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
957                                 q_vector->tx.ring->queue_index);
958                 else if (q_vector->rx.ring)
959                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
960                                 q_vector->rx.ring->queue_index);
961                 else
962                         sprintf(q_vector->name, "%s-unused", netdev->name);
963
964                 err = request_irq(adapter->msix_entries[vector].vector,
965                                   igb_msix_ring, 0, q_vector->name,
966                                   q_vector);
967                 if (err)
968                         goto out;
969                 vector++;
970         }
971
972         igb_configure_msix(adapter);
973         return 0;
974 out:
975         return err;
976 }
977
978 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
979 {
980         if (adapter->msix_entries) {
981                 pci_disable_msix(adapter->pdev);
982                 kfree(adapter->msix_entries);
983                 adapter->msix_entries = NULL;
984         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
985                 pci_disable_msi(adapter->pdev);
986         }
987 }
988
989 /**
990  * igb_free_q_vectors - Free memory allocated for interrupt vectors
991  * @adapter: board private structure to initialize
992  *
993  * This function frees the memory allocated to the q_vectors.  In addition if
994  * NAPI is enabled it will delete any references to the NAPI struct prior
995  * to freeing the q_vector.
996  **/
997 static void igb_free_q_vectors(struct igb_adapter *adapter)
998 {
999         int v_idx;
1000
1001         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1002                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1003                 adapter->q_vector[v_idx] = NULL;
1004                 if (!q_vector)
1005                         continue;
1006                 netif_napi_del(&q_vector->napi);
1007                 kfree(q_vector);
1008         }
1009         adapter->num_q_vectors = 0;
1010 }
1011
1012 /**
1013  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1014  *
1015  * This function resets the device so that it has 0 rx queues, tx queues, and
1016  * MSI-X interrupts allocated.
1017  */
1018 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1019 {
1020         igb_free_queues(adapter);
1021         igb_free_q_vectors(adapter);
1022         igb_reset_interrupt_capability(adapter);
1023 }
1024
1025 /**
1026  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1027  *
1028  * Attempt to configure interrupts using the best available
1029  * capabilities of the hardware and kernel.
1030  **/
1031 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1032 {
1033         int err;
1034         int numvecs, i;
1035
1036         /* Number of supported queues. */
1037         adapter->num_rx_queues = adapter->rss_queues;
1038         if (adapter->vfs_allocated_count)
1039                 adapter->num_tx_queues = 1;
1040         else
1041                 adapter->num_tx_queues = adapter->rss_queues;
1042
1043         /* start with one vector for every rx queue */
1044         numvecs = adapter->num_rx_queues;
1045
1046         /* if tx handler is separate add 1 for every tx queue */
1047         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1048                 numvecs += adapter->num_tx_queues;
1049
1050         /* store the number of vectors reserved for queues */
1051         adapter->num_q_vectors = numvecs;
1052
1053         /* add 1 vector for link status interrupts */
1054         numvecs++;
1055         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1056                                         GFP_KERNEL);
1057
1058         if (!adapter->msix_entries)
1059                 goto msi_only;
1060
1061         for (i = 0; i < numvecs; i++)
1062                 adapter->msix_entries[i].entry = i;
1063
1064         err = pci_enable_msix(adapter->pdev,
1065                               adapter->msix_entries,
1066                               numvecs);
1067         if (err == 0)
1068                 goto out;
1069
1070         igb_reset_interrupt_capability(adapter);
1071
1072         /* If we can't do MSI-X, try MSI */
1073 msi_only:
1074 #ifdef CONFIG_PCI_IOV
1075         /* disable SR-IOV for non MSI-X configurations */
1076         if (adapter->vf_data) {
1077                 struct e1000_hw *hw = &adapter->hw;
1078                 /* disable iov and allow time for transactions to clear */
1079                 pci_disable_sriov(adapter->pdev);
1080                 msleep(500);
1081
1082                 kfree(adapter->vf_data);
1083                 adapter->vf_data = NULL;
1084                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1085                 wrfl();
1086                 msleep(100);
1087                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1088         }
1089 #endif
1090         adapter->vfs_allocated_count = 0;
1091         adapter->rss_queues = 1;
1092         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1093         adapter->num_rx_queues = 1;
1094         adapter->num_tx_queues = 1;
1095         adapter->num_q_vectors = 1;
1096         if (!pci_enable_msi(adapter->pdev))
1097                 adapter->flags |= IGB_FLAG_HAS_MSI;
1098 out:
1099         /* Notify the stack of the (possibly) reduced queue counts. */
1100         rtnl_lock();
1101         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1102         err = netif_set_real_num_rx_queues(adapter->netdev,
1103                 adapter->num_rx_queues);
1104         rtnl_unlock();
1105         return err;
1106 }
1107
1108 /**
1109  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1110  * @adapter: board private structure to initialize
1111  *
1112  * We allocate one q_vector per queue interrupt.  If allocation fails we
1113  * return -ENOMEM.
1114  **/
1115 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1116 {
1117         struct igb_q_vector *q_vector;
1118         struct e1000_hw *hw = &adapter->hw;
1119         int v_idx;
1120         int orig_node = adapter->node;
1121
1122         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1123                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1124                                                 adapter->num_tx_queues)) &&
1125                     (adapter->num_rx_queues == v_idx))
1126                         adapter->node = orig_node;
1127                 if (orig_node == -1) {
1128                         int cur_node = next_online_node(adapter->node);
1129                         if (cur_node == MAX_NUMNODES)
1130                                 cur_node = first_online_node;
1131                         adapter->node = cur_node;
1132                 }
1133                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1134                                         adapter->node);
1135                 if (!q_vector)
1136                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1137                                            GFP_KERNEL);
1138                 if (!q_vector)
1139                         goto err_out;
1140                 q_vector->adapter = adapter;
1141                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1142                 q_vector->itr_val = IGB_START_ITR;
1143                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1144                 adapter->q_vector[v_idx] = q_vector;
1145         }
1146         /* Restore the adapter's original node */
1147         adapter->node = orig_node;
1148
1149         return 0;
1150
1151 err_out:
1152         /* Restore the adapter's original node */
1153         adapter->node = orig_node;
1154         igb_free_q_vectors(adapter);
1155         return -ENOMEM;
1156 }
1157
1158 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1159                                       int ring_idx, int v_idx)
1160 {
1161         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1162
1163         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1164         q_vector->rx.ring->q_vector = q_vector;
1165         q_vector->rx.count++;
1166         q_vector->itr_val = adapter->rx_itr_setting;
1167         if (q_vector->itr_val && q_vector->itr_val <= 3)
1168                 q_vector->itr_val = IGB_START_ITR;
1169 }
1170
1171 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1172                                       int ring_idx, int v_idx)
1173 {
1174         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1175
1176         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1177         q_vector->tx.ring->q_vector = q_vector;
1178         q_vector->tx.count++;
1179         q_vector->itr_val = adapter->tx_itr_setting;
1180         q_vector->tx.work_limit = adapter->tx_work_limit;
1181         if (q_vector->itr_val && q_vector->itr_val <= 3)
1182                 q_vector->itr_val = IGB_START_ITR;
1183 }
1184
1185 /**
1186  * igb_map_ring_to_vector - maps allocated queues to vectors
1187  *
1188  * This function maps the recently allocated queues to vectors.
1189  **/
1190 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1191 {
1192         int i;
1193         int v_idx = 0;
1194
1195         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1196             (adapter->num_q_vectors < adapter->num_tx_queues))
1197                 return -ENOMEM;
1198
1199         if (adapter->num_q_vectors >=
1200             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1201                 for (i = 0; i < adapter->num_rx_queues; i++)
1202                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1203                 for (i = 0; i < adapter->num_tx_queues; i++)
1204                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1205         } else {
1206                 for (i = 0; i < adapter->num_rx_queues; i++) {
1207                         if (i < adapter->num_tx_queues)
1208                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1209                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1210                 }
1211                 for (; i < adapter->num_tx_queues; i++)
1212                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1213         }
1214         return 0;
1215 }
1216
1217 /**
1218  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1219  *
1220  * This function initializes the interrupts and allocates all of the queues.
1221  **/
1222 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1223 {
1224         struct pci_dev *pdev = adapter->pdev;
1225         int err;
1226
1227         err = igb_set_interrupt_capability(adapter);
1228         if (err)
1229                 return err;
1230
1231         err = igb_alloc_q_vectors(adapter);
1232         if (err) {
1233                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1234                 goto err_alloc_q_vectors;
1235         }
1236
1237         err = igb_alloc_queues(adapter);
1238         if (err) {
1239                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1240                 goto err_alloc_queues;
1241         }
1242
1243         err = igb_map_ring_to_vector(adapter);
1244         if (err) {
1245                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1246                 goto err_map_queues;
1247         }
1248
1249
1250         return 0;
1251 err_map_queues:
1252         igb_free_queues(adapter);
1253 err_alloc_queues:
1254         igb_free_q_vectors(adapter);
1255 err_alloc_q_vectors:
1256         igb_reset_interrupt_capability(adapter);
1257         return err;
1258 }
1259
1260 /**
1261  * igb_request_irq - initialize interrupts
1262  *
1263  * Attempts to configure interrupts using the best available
1264  * capabilities of the hardware and kernel.
1265  **/
1266 static int igb_request_irq(struct igb_adapter *adapter)
1267 {
1268         struct net_device *netdev = adapter->netdev;
1269         struct pci_dev *pdev = adapter->pdev;
1270         int err = 0;
1271
1272         if (adapter->msix_entries) {
1273                 err = igb_request_msix(adapter);
1274                 if (!err)
1275                         goto request_done;
1276                 /* fall back to MSI */
1277                 igb_clear_interrupt_scheme(adapter);
1278                 if (!pci_enable_msi(pdev))
1279                         adapter->flags |= IGB_FLAG_HAS_MSI;
1280                 igb_free_all_tx_resources(adapter);
1281                 igb_free_all_rx_resources(adapter);
1282                 adapter->num_tx_queues = 1;
1283                 adapter->num_rx_queues = 1;
1284                 adapter->num_q_vectors = 1;
1285                 err = igb_alloc_q_vectors(adapter);
1286                 if (err) {
1287                         dev_err(&pdev->dev,
1288                                 "Unable to allocate memory for vectors\n");
1289                         goto request_done;
1290                 }
1291                 err = igb_alloc_queues(adapter);
1292                 if (err) {
1293                         dev_err(&pdev->dev,
1294                                 "Unable to allocate memory for queues\n");
1295                         igb_free_q_vectors(adapter);
1296                         goto request_done;
1297                 }
1298                 igb_setup_all_tx_resources(adapter);
1299                 igb_setup_all_rx_resources(adapter);
1300         }
1301
1302         igb_assign_vector(adapter->q_vector[0], 0);
1303
1304         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1305                 err = request_irq(pdev->irq, igb_intr_msi, 0,
1306                                   netdev->name, adapter);
1307                 if (!err)
1308                         goto request_done;
1309
1310                 /* fall back to legacy interrupts */
1311                 igb_reset_interrupt_capability(adapter);
1312                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1313         }
1314
1315         err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1316                           netdev->name, adapter);
1317
1318         if (err)
1319                 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1320                         err);
1321
1322 request_done:
1323         return err;
1324 }
1325
1326 static void igb_free_irq(struct igb_adapter *adapter)
1327 {
1328         if (adapter->msix_entries) {
1329                 int vector = 0, i;
1330
1331                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1332
1333                 for (i = 0; i < adapter->num_q_vectors; i++)
1334                         free_irq(adapter->msix_entries[vector++].vector,
1335                                  adapter->q_vector[i]);
1336         } else {
1337                 free_irq(adapter->pdev->irq, adapter);
1338         }
1339 }
1340
1341 /**
1342  * igb_irq_disable - Mask off interrupt generation on the NIC
1343  * @adapter: board private structure
1344  **/
1345 static void igb_irq_disable(struct igb_adapter *adapter)
1346 {
1347         struct e1000_hw *hw = &adapter->hw;
1348
1349         /*
1350          * we need to be careful when disabling interrupts.  The VFs are also
1351          * mapped into these registers and so clearing the bits can cause
1352          * issues on the VF drivers so we only need to clear what we set
1353          */
1354         if (adapter->msix_entries) {
1355                 u32 regval = rd32(E1000_EIAM);
1356                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1357                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1358                 regval = rd32(E1000_EIAC);
1359                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1360         }
1361
1362         wr32(E1000_IAM, 0);
1363         wr32(E1000_IMC, ~0);
1364         wrfl();
1365         if (adapter->msix_entries) {
1366                 int i;
1367                 for (i = 0; i < adapter->num_q_vectors; i++)
1368                         synchronize_irq(adapter->msix_entries[i].vector);
1369         } else {
1370                 synchronize_irq(adapter->pdev->irq);
1371         }
1372 }
1373
1374 /**
1375  * igb_irq_enable - Enable default interrupt generation settings
1376  * @adapter: board private structure
1377  **/
1378 static void igb_irq_enable(struct igb_adapter *adapter)
1379 {
1380         struct e1000_hw *hw = &adapter->hw;
1381
1382         if (adapter->msix_entries) {
1383                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1384                 u32 regval = rd32(E1000_EIAC);
1385                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1386                 regval = rd32(E1000_EIAM);
1387                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1388                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1389                 if (adapter->vfs_allocated_count) {
1390                         wr32(E1000_MBVFIMR, 0xFF);
1391                         ims |= E1000_IMS_VMMB;
1392                 }
1393                 wr32(E1000_IMS, ims);
1394         } else {
1395                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1396                                 E1000_IMS_DRSTA);
1397                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1398                                 E1000_IMS_DRSTA);
1399         }
1400 }
1401
1402 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1403 {
1404         struct e1000_hw *hw = &adapter->hw;
1405         u16 vid = adapter->hw.mng_cookie.vlan_id;
1406         u16 old_vid = adapter->mng_vlan_id;
1407
1408         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1409                 /* add VID to filter table */
1410                 igb_vfta_set(hw, vid, true);
1411                 adapter->mng_vlan_id = vid;
1412         } else {
1413                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1414         }
1415
1416         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1417             (vid != old_vid) &&
1418             !test_bit(old_vid, adapter->active_vlans)) {
1419                 /* remove VID from filter table */
1420                 igb_vfta_set(hw, old_vid, false);
1421         }
1422 }
1423
1424 /**
1425  * igb_release_hw_control - release control of the h/w to f/w
1426  * @adapter: address of board private structure
1427  *
1428  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1429  * For ASF and Pass Through versions of f/w this means that the
1430  * driver is no longer loaded.
1431  *
1432  **/
1433 static void igb_release_hw_control(struct igb_adapter *adapter)
1434 {
1435         struct e1000_hw *hw = &adapter->hw;
1436         u32 ctrl_ext;
1437
1438         /* Let firmware take over control of h/w */
1439         ctrl_ext = rd32(E1000_CTRL_EXT);
1440         wr32(E1000_CTRL_EXT,
1441                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1442 }
1443
1444 /**
1445  * igb_get_hw_control - get control of the h/w from f/w
1446  * @adapter: address of board private structure
1447  *
1448  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1449  * For ASF and Pass Through versions of f/w this means that
1450  * the driver is loaded.
1451  *
1452  **/
1453 static void igb_get_hw_control(struct igb_adapter *adapter)
1454 {
1455         struct e1000_hw *hw = &adapter->hw;
1456         u32 ctrl_ext;
1457
1458         /* Let firmware know the driver has taken over */
1459         ctrl_ext = rd32(E1000_CTRL_EXT);
1460         wr32(E1000_CTRL_EXT,
1461                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1462 }
1463
1464 /**
1465  * igb_configure - configure the hardware for RX and TX
1466  * @adapter: private board structure
1467  **/
1468 static void igb_configure(struct igb_adapter *adapter)
1469 {
1470         struct net_device *netdev = adapter->netdev;
1471         int i;
1472
1473         igb_get_hw_control(adapter);
1474         igb_set_rx_mode(netdev);
1475
1476         igb_restore_vlan(adapter);
1477
1478         igb_setup_tctl(adapter);
1479         igb_setup_mrqc(adapter);
1480         igb_setup_rctl(adapter);
1481
1482         igb_configure_tx(adapter);
1483         igb_configure_rx(adapter);
1484
1485         igb_rx_fifo_flush_82575(&adapter->hw);
1486
1487         /* call igb_desc_unused which always leaves
1488          * at least 1 descriptor unused to make sure
1489          * next_to_use != next_to_clean */
1490         for (i = 0; i < adapter->num_rx_queues; i++) {
1491                 struct igb_ring *ring = adapter->rx_ring[i];
1492                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1493         }
1494 }
1495
1496 /**
1497  * igb_power_up_link - Power up the phy/serdes link
1498  * @adapter: address of board private structure
1499  **/
1500 void igb_power_up_link(struct igb_adapter *adapter)
1501 {
1502         igb_reset_phy(&adapter->hw);
1503
1504         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1505                 igb_power_up_phy_copper(&adapter->hw);
1506         else
1507                 igb_power_up_serdes_link_82575(&adapter->hw);
1508 }
1509
1510 /**
1511  * igb_power_down_link - Power down the phy/serdes link
1512  * @adapter: address of board private structure
1513  */
1514 static void igb_power_down_link(struct igb_adapter *adapter)
1515 {
1516         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1517                 igb_power_down_phy_copper_82575(&adapter->hw);
1518         else
1519                 igb_shutdown_serdes_link_82575(&adapter->hw);
1520 }
1521
1522 /**
1523  * igb_up - Open the interface and prepare it to handle traffic
1524  * @adapter: board private structure
1525  **/
1526 int igb_up(struct igb_adapter *adapter)
1527 {
1528         struct e1000_hw *hw = &adapter->hw;
1529         int i;
1530
1531         /* hardware has been reset, we need to reload some things */
1532         igb_configure(adapter);
1533
1534         clear_bit(__IGB_DOWN, &adapter->state);
1535
1536         for (i = 0; i < adapter->num_q_vectors; i++)
1537                 napi_enable(&(adapter->q_vector[i]->napi));
1538
1539         if (adapter->msix_entries)
1540                 igb_configure_msix(adapter);
1541         else
1542                 igb_assign_vector(adapter->q_vector[0], 0);
1543
1544         /* Clear any pending interrupts. */
1545         rd32(E1000_ICR);
1546         igb_irq_enable(adapter);
1547
1548         /* notify VFs that reset has been completed */
1549         if (adapter->vfs_allocated_count) {
1550                 u32 reg_data = rd32(E1000_CTRL_EXT);
1551                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1552                 wr32(E1000_CTRL_EXT, reg_data);
1553         }
1554
1555         netif_tx_start_all_queues(adapter->netdev);
1556
1557         /* start the watchdog. */
1558         hw->mac.get_link_status = 1;
1559         schedule_work(&adapter->watchdog_task);
1560
1561         return 0;
1562 }
1563
1564 void igb_down(struct igb_adapter *adapter)
1565 {
1566         struct net_device *netdev = adapter->netdev;
1567         struct e1000_hw *hw = &adapter->hw;
1568         u32 tctl, rctl;
1569         int i;
1570
1571         /* signal that we're down so the interrupt handler does not
1572          * reschedule our watchdog timer */
1573         set_bit(__IGB_DOWN, &adapter->state);
1574
1575         /* disable receives in the hardware */
1576         rctl = rd32(E1000_RCTL);
1577         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1578         /* flush and sleep below */
1579
1580         netif_tx_stop_all_queues(netdev);
1581
1582         /* disable transmits in the hardware */
1583         tctl = rd32(E1000_TCTL);
1584         tctl &= ~E1000_TCTL_EN;
1585         wr32(E1000_TCTL, tctl);
1586         /* flush both disables and wait for them to finish */
1587         wrfl();
1588         msleep(10);
1589
1590         for (i = 0; i < adapter->num_q_vectors; i++)
1591                 napi_disable(&(adapter->q_vector[i]->napi));
1592
1593         igb_irq_disable(adapter);
1594
1595         del_timer_sync(&adapter->watchdog_timer);
1596         del_timer_sync(&adapter->phy_info_timer);
1597
1598         netif_carrier_off(netdev);
1599
1600         /* record the stats before reset*/
1601         spin_lock(&adapter->stats64_lock);
1602         igb_update_stats(adapter, &adapter->stats64);
1603         spin_unlock(&adapter->stats64_lock);
1604
1605         adapter->link_speed = 0;
1606         adapter->link_duplex = 0;
1607
1608         if (!pci_channel_offline(adapter->pdev))
1609                 igb_reset(adapter);
1610         igb_clean_all_tx_rings(adapter);
1611         igb_clean_all_rx_rings(adapter);
1612 #ifdef CONFIG_IGB_DCA
1613
1614         /* since we reset the hardware DCA settings were cleared */
1615         igb_setup_dca(adapter);
1616 #endif
1617 }
1618
1619 void igb_reinit_locked(struct igb_adapter *adapter)
1620 {
1621         WARN_ON(in_interrupt());
1622         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1623                 msleep(1);
1624         igb_down(adapter);
1625         igb_up(adapter);
1626         clear_bit(__IGB_RESETTING, &adapter->state);
1627 }
1628
1629 void igb_reset(struct igb_adapter *adapter)
1630 {
1631         struct pci_dev *pdev = adapter->pdev;
1632         struct e1000_hw *hw = &adapter->hw;
1633         struct e1000_mac_info *mac = &hw->mac;
1634         struct e1000_fc_info *fc = &hw->fc;
1635         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1636         u16 hwm;
1637
1638         /* Repartition Pba for greater than 9k mtu
1639          * To take effect CTRL.RST is required.
1640          */
1641         switch (mac->type) {
1642         case e1000_i350:
1643         case e1000_82580:
1644                 pba = rd32(E1000_RXPBS);
1645                 pba = igb_rxpbs_adjust_82580(pba);
1646                 break;
1647         case e1000_82576:
1648                 pba = rd32(E1000_RXPBS);
1649                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1650                 break;
1651         case e1000_82575:
1652         case e1000_i210:
1653         case e1000_i211:
1654         default:
1655                 pba = E1000_PBA_34K;
1656                 break;
1657         }
1658
1659         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1660             (mac->type < e1000_82576)) {
1661                 /* adjust PBA for jumbo frames */
1662                 wr32(E1000_PBA, pba);
1663
1664                 /* To maintain wire speed transmits, the Tx FIFO should be
1665                  * large enough to accommodate two full transmit packets,
1666                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1667                  * the Rx FIFO should be large enough to accommodate at least
1668                  * one full receive packet and is similarly rounded up and
1669                  * expressed in KB. */
1670                 pba = rd32(E1000_PBA);
1671                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1672                 tx_space = pba >> 16;
1673                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1674                 pba &= 0xffff;
1675                 /* the tx fifo also stores 16 bytes of information about the tx
1676                  * but don't include ethernet FCS because hardware appends it */
1677                 min_tx_space = (adapter->max_frame_size +
1678                                 sizeof(union e1000_adv_tx_desc) -
1679                                 ETH_FCS_LEN) * 2;
1680                 min_tx_space = ALIGN(min_tx_space, 1024);
1681                 min_tx_space >>= 10;
1682                 /* software strips receive CRC, so leave room for it */
1683                 min_rx_space = adapter->max_frame_size;
1684                 min_rx_space = ALIGN(min_rx_space, 1024);
1685                 min_rx_space >>= 10;
1686
1687                 /* If current Tx allocation is less than the min Tx FIFO size,
1688                  * and the min Tx FIFO size is less than the current Rx FIFO
1689                  * allocation, take space away from current Rx allocation */
1690                 if (tx_space < min_tx_space &&
1691                     ((min_tx_space - tx_space) < pba)) {
1692                         pba = pba - (min_tx_space - tx_space);
1693
1694                         /* if short on rx space, rx wins and must trump tx
1695                          * adjustment */
1696                         if (pba < min_rx_space)
1697                                 pba = min_rx_space;
1698                 }
1699                 wr32(E1000_PBA, pba);
1700         }
1701
1702         /* flow control settings */
1703         /* The high water mark must be low enough to fit one full frame
1704          * (or the size used for early receive) above it in the Rx FIFO.
1705          * Set it to the lower of:
1706          * - 90% of the Rx FIFO size, or
1707          * - the full Rx FIFO size minus one full frame */
1708         hwm = min(((pba << 10) * 9 / 10),
1709                         ((pba << 10) - 2 * adapter->max_frame_size));
1710
1711         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1712         fc->low_water = fc->high_water - 16;
1713         fc->pause_time = 0xFFFF;
1714         fc->send_xon = 1;
1715         fc->current_mode = fc->requested_mode;
1716
1717         /* disable receive for all VFs and wait one second */
1718         if (adapter->vfs_allocated_count) {
1719                 int i;
1720                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1721                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1722
1723                 /* ping all the active vfs to let them know we are going down */
1724                 igb_ping_all_vfs(adapter);
1725
1726                 /* disable transmits and receives */
1727                 wr32(E1000_VFRE, 0);
1728                 wr32(E1000_VFTE, 0);
1729         }
1730
1731         /* Allow time for pending master requests to run */
1732         hw->mac.ops.reset_hw(hw);
1733         wr32(E1000_WUC, 0);
1734
1735         if (hw->mac.ops.init_hw(hw))
1736                 dev_err(&pdev->dev, "Hardware Error\n");
1737
1738         /*
1739          * Flow control settings reset on hardware reset, so guarantee flow
1740          * control is off when forcing speed.
1741          */
1742         if (!hw->mac.autoneg)
1743                 igb_force_mac_fc(hw);
1744
1745         igb_init_dmac(adapter, pba);
1746         if (!netif_running(adapter->netdev))
1747                 igb_power_down_link(adapter);
1748
1749         igb_update_mng_vlan(adapter);
1750
1751         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1752         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1753
1754 #ifdef CONFIG_IGB_PTP
1755         /* Re-enable PTP, where applicable. */
1756         igb_ptp_reset(adapter);
1757 #endif /* CONFIG_IGB_PTP */
1758
1759         igb_get_phy_info(hw);
1760 }
1761
1762 static netdev_features_t igb_fix_features(struct net_device *netdev,
1763         netdev_features_t features)
1764 {
1765         /*
1766          * Since there is no support for separate rx/tx vlan accel
1767          * enable/disable make sure tx flag is always in same state as rx.
1768          */
1769         if (features & NETIF_F_HW_VLAN_RX)
1770                 features |= NETIF_F_HW_VLAN_TX;
1771         else
1772                 features &= ~NETIF_F_HW_VLAN_TX;
1773
1774         return features;
1775 }
1776
1777 static int igb_set_features(struct net_device *netdev,
1778         netdev_features_t features)
1779 {
1780         netdev_features_t changed = netdev->features ^ features;
1781         struct igb_adapter *adapter = netdev_priv(netdev);
1782
1783         if (changed & NETIF_F_HW_VLAN_RX)
1784                 igb_vlan_mode(netdev, features);
1785
1786         if (!(changed & NETIF_F_RXALL))
1787                 return 0;
1788
1789         netdev->features = features;
1790
1791         if (netif_running(netdev))
1792                 igb_reinit_locked(adapter);
1793         else
1794                 igb_reset(adapter);
1795
1796         return 0;
1797 }
1798
1799 static const struct net_device_ops igb_netdev_ops = {
1800         .ndo_open               = igb_open,
1801         .ndo_stop               = igb_close,
1802         .ndo_start_xmit         = igb_xmit_frame,
1803         .ndo_get_stats64        = igb_get_stats64,
1804         .ndo_set_rx_mode        = igb_set_rx_mode,
1805         .ndo_set_mac_address    = igb_set_mac,
1806         .ndo_change_mtu         = igb_change_mtu,
1807         .ndo_do_ioctl           = igb_ioctl,
1808         .ndo_tx_timeout         = igb_tx_timeout,
1809         .ndo_validate_addr      = eth_validate_addr,
1810         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1811         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1812         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1813         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1814         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1815         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1816 #ifdef CONFIG_NET_POLL_CONTROLLER
1817         .ndo_poll_controller    = igb_netpoll,
1818 #endif
1819         .ndo_fix_features       = igb_fix_features,
1820         .ndo_set_features       = igb_set_features,
1821 };
1822
1823 /**
1824  * igb_set_fw_version - Configure version string for ethtool
1825  * @adapter: adapter struct
1826  *
1827  **/
1828 void igb_set_fw_version(struct igb_adapter *adapter)
1829 {
1830         struct e1000_hw *hw = &adapter->hw;
1831         u16 eeprom_verh, eeprom_verl, comb_verh, comb_verl, comb_offset;
1832         u16 major, build, patch, fw_version;
1833         u32 etrack_id;
1834
1835         hw->nvm.ops.read(hw, 5, 1, &fw_version);
1836         if (adapter->hw.mac.type != e1000_i211) {
1837                 hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verh);
1838                 hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verl);
1839                 etrack_id = (eeprom_verh << IGB_ETRACK_SHIFT) | eeprom_verl;
1840
1841                 /* combo image version needs to be found */
1842                 hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
1843                 if ((comb_offset != 0x0) &&
1844                     (comb_offset != IGB_NVM_VER_INVALID)) {
1845                         hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
1846                                          + 1), 1, &comb_verh);
1847                         hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
1848                                          1, &comb_verl);
1849
1850                         /* Only display Option Rom if it exists and is valid */
1851                         if ((comb_verh && comb_verl) &&
1852                             ((comb_verh != IGB_NVM_VER_INVALID) &&
1853                              (comb_verl != IGB_NVM_VER_INVALID))) {
1854                                 major = comb_verl >> IGB_COMB_VER_SHFT;
1855                                 build = (comb_verl << IGB_COMB_VER_SHFT) |
1856                                         (comb_verh >> IGB_COMB_VER_SHFT);
1857                                 patch = comb_verh & IGB_COMB_VER_MASK;
1858                                 snprintf(adapter->fw_version,
1859                                          sizeof(adapter->fw_version),
1860                                          "%d.%d%d, 0x%08x, %d.%d.%d",
1861                                          (fw_version & IGB_MAJOR_MASK) >>
1862                                          IGB_MAJOR_SHIFT,
1863                                          (fw_version & IGB_MINOR_MASK) >>
1864                                          IGB_MINOR_SHIFT,
1865                                          (fw_version & IGB_BUILD_MASK),
1866                                          etrack_id, major, build, patch);
1867                                 goto out;
1868                         }
1869                 }
1870                 snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1871                          "%d.%d%d, 0x%08x",
1872                          (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1873                          (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1874                          (fw_version & IGB_BUILD_MASK), etrack_id);
1875         } else {
1876                 snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1877                          "%d.%d%d",
1878                          (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1879                          (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1880                          (fw_version & IGB_BUILD_MASK));
1881         }
1882 out:
1883         return;
1884 }
1885
1886 /**
1887  * igb_probe - Device Initialization Routine
1888  * @pdev: PCI device information struct
1889  * @ent: entry in igb_pci_tbl
1890  *
1891  * Returns 0 on success, negative on failure
1892  *
1893  * igb_probe initializes an adapter identified by a pci_dev structure.
1894  * The OS initialization, configuring of the adapter private structure,
1895  * and a hardware reset occur.
1896  **/
1897 static int __devinit igb_probe(struct pci_dev *pdev,
1898                                const struct pci_device_id *ent)
1899 {
1900         struct net_device *netdev;
1901         struct igb_adapter *adapter;
1902         struct e1000_hw *hw;
1903         u16 eeprom_data = 0;
1904         s32 ret_val;
1905         static int global_quad_port_a; /* global quad port a indication */
1906         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1907         unsigned long mmio_start, mmio_len;
1908         int err, pci_using_dac;
1909         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1910         u8 part_str[E1000_PBANUM_LENGTH];
1911
1912         /* Catch broken hardware that put the wrong VF device ID in
1913          * the PCIe SR-IOV capability.
1914          */
1915         if (pdev->is_virtfn) {
1916                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1917                         pci_name(pdev), pdev->vendor, pdev->device);
1918                 return -EINVAL;
1919         }
1920
1921         err = pci_enable_device_mem(pdev);
1922         if (err)
1923                 return err;
1924
1925         pci_using_dac = 0;
1926         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1927         if (!err) {
1928                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1929                 if (!err)
1930                         pci_using_dac = 1;
1931         } else {
1932                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1933                 if (err) {
1934                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1935                         if (err) {
1936                                 dev_err(&pdev->dev, "No usable DMA "
1937                                         "configuration, aborting\n");
1938                                 goto err_dma;
1939                         }
1940                 }
1941         }
1942
1943         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1944                                            IORESOURCE_MEM),
1945                                            igb_driver_name);
1946         if (err)
1947                 goto err_pci_reg;
1948
1949         pci_enable_pcie_error_reporting(pdev);
1950
1951         pci_set_master(pdev);
1952         pci_save_state(pdev);
1953
1954         err = -ENOMEM;
1955         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1956                                    IGB_MAX_TX_QUEUES);
1957         if (!netdev)
1958                 goto err_alloc_etherdev;
1959
1960         SET_NETDEV_DEV(netdev, &pdev->dev);
1961
1962         pci_set_drvdata(pdev, netdev);
1963         adapter = netdev_priv(netdev);
1964         adapter->netdev = netdev;
1965         adapter->pdev = pdev;
1966         hw = &adapter->hw;
1967         hw->back = adapter;
1968         adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1969
1970         mmio_start = pci_resource_start(pdev, 0);
1971         mmio_len = pci_resource_len(pdev, 0);
1972
1973         err = -EIO;
1974         hw->hw_addr = ioremap(mmio_start, mmio_len);
1975         if (!hw->hw_addr)
1976                 goto err_ioremap;
1977
1978         netdev->netdev_ops = &igb_netdev_ops;
1979         igb_set_ethtool_ops(netdev);
1980         netdev->watchdog_timeo = 5 * HZ;
1981
1982         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1983
1984         netdev->mem_start = mmio_start;
1985         netdev->mem_end = mmio_start + mmio_len;
1986
1987         /* PCI config space info */
1988         hw->vendor_id = pdev->vendor;
1989         hw->device_id = pdev->device;
1990         hw->revision_id = pdev->revision;
1991         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1992         hw->subsystem_device_id = pdev->subsystem_device;
1993
1994         /* Copy the default MAC, PHY and NVM function pointers */
1995         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1996         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1997         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1998         /* Initialize skew-specific constants */
1999         err = ei->get_invariants(hw);
2000         if (err)
2001                 goto err_sw_init;
2002
2003         /* setup the private structure */
2004         err = igb_sw_init(adapter);
2005         if (err)
2006                 goto err_sw_init;
2007
2008         igb_get_bus_info_pcie(hw);
2009
2010         hw->phy.autoneg_wait_to_complete = false;
2011
2012         /* Copper options */
2013         if (hw->phy.media_type == e1000_media_type_copper) {
2014                 hw->phy.mdix = AUTO_ALL_MODES;
2015                 hw->phy.disable_polarity_correction = false;
2016                 hw->phy.ms_type = e1000_ms_hw_default;
2017         }
2018
2019         if (igb_check_reset_block(hw))
2020                 dev_info(&pdev->dev,
2021                         "PHY reset is blocked due to SOL/IDER session.\n");
2022
2023         /*
2024          * features is initialized to 0 in allocation, it might have bits
2025          * set by igb_sw_init so we should use an or instead of an
2026          * assignment.
2027          */
2028         netdev->features |= NETIF_F_SG |
2029                             NETIF_F_IP_CSUM |
2030                             NETIF_F_IPV6_CSUM |
2031                             NETIF_F_TSO |
2032                             NETIF_F_TSO6 |
2033                             NETIF_F_RXHASH |
2034                             NETIF_F_RXCSUM |
2035                             NETIF_F_HW_VLAN_RX |
2036                             NETIF_F_HW_VLAN_TX;
2037
2038         /* copy netdev features into list of user selectable features */
2039         netdev->hw_features |= netdev->features;
2040         netdev->hw_features |= NETIF_F_RXALL;
2041
2042         /* set this bit last since it cannot be part of hw_features */
2043         netdev->features |= NETIF_F_HW_VLAN_FILTER;
2044
2045         netdev->vlan_features |= NETIF_F_TSO |
2046                                  NETIF_F_TSO6 |
2047                                  NETIF_F_IP_CSUM |
2048                                  NETIF_F_IPV6_CSUM |
2049                                  NETIF_F_SG;
2050
2051         netdev->priv_flags |= IFF_SUPP_NOFCS;
2052
2053         if (pci_using_dac) {
2054                 netdev->features |= NETIF_F_HIGHDMA;
2055                 netdev->vlan_features |= NETIF_F_HIGHDMA;
2056         }
2057
2058         if (hw->mac.type >= e1000_82576) {
2059                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
2060                 netdev->features |= NETIF_F_SCTP_CSUM;
2061         }
2062
2063         netdev->priv_flags |= IFF_UNICAST_FLT;
2064
2065         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2066
2067         /* before reading the NVM, reset the controller to put the device in a
2068          * known good starting state */
2069         hw->mac.ops.reset_hw(hw);
2070
2071         /*
2072          * make sure the NVM is good , i211 parts have special NVM that
2073          * doesn't contain a checksum
2074          */
2075         if (hw->mac.type != e1000_i211) {
2076                 if (hw->nvm.ops.validate(hw) < 0) {
2077                         dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2078                         err = -EIO;
2079                         goto err_eeprom;
2080                 }
2081         }
2082
2083         /* copy the MAC address out of the NVM */
2084         if (hw->mac.ops.read_mac_addr(hw))
2085                 dev_err(&pdev->dev, "NVM Read Error\n");
2086
2087         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2088         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2089
2090         if (!is_valid_ether_addr(netdev->perm_addr)) {
2091                 dev_err(&pdev->dev, "Invalid MAC Address\n");
2092                 err = -EIO;
2093                 goto err_eeprom;
2094         }
2095
2096         /* get firmware version for ethtool -i */
2097         igb_set_fw_version(adapter);
2098
2099         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2100                     (unsigned long) adapter);
2101         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2102                     (unsigned long) adapter);
2103
2104         INIT_WORK(&adapter->reset_task, igb_reset_task);
2105         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2106
2107         /* Initialize link properties that are user-changeable */
2108         adapter->fc_autoneg = true;
2109         hw->mac.autoneg = true;
2110         hw->phy.autoneg_advertised = 0x2f;
2111
2112         hw->fc.requested_mode = e1000_fc_default;
2113         hw->fc.current_mode = e1000_fc_default;
2114
2115         igb_validate_mdi_setting(hw);
2116
2117         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2118          * enable the ACPI Magic Packet filter
2119          */
2120
2121         if (hw->bus.func == 0)
2122                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2123         else if (hw->mac.type >= e1000_82580)
2124                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2125                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2126                                  &eeprom_data);
2127         else if (hw->bus.func == 1)
2128                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2129
2130         if (eeprom_data & eeprom_apme_mask)
2131                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2132
2133         /* now that we have the eeprom settings, apply the special cases where
2134          * the eeprom may be wrong or the board simply won't support wake on
2135          * lan on a particular port */
2136         switch (pdev->device) {
2137         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2138                 adapter->eeprom_wol = 0;
2139                 break;
2140         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2141         case E1000_DEV_ID_82576_FIBER:
2142         case E1000_DEV_ID_82576_SERDES:
2143                 /* Wake events only supported on port A for dual fiber
2144                  * regardless of eeprom setting */
2145                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2146                         adapter->eeprom_wol = 0;
2147                 break;
2148         case E1000_DEV_ID_82576_QUAD_COPPER:
2149         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2150                 /* if quad port adapter, disable WoL on all but port A */
2151                 if (global_quad_port_a != 0)
2152                         adapter->eeprom_wol = 0;
2153                 else
2154                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2155                 /* Reset for multiple quad port adapters */
2156                 if (++global_quad_port_a == 4)
2157                         global_quad_port_a = 0;
2158                 break;
2159         }
2160
2161         /* initialize the wol settings based on the eeprom settings */
2162         adapter->wol = adapter->eeprom_wol;
2163         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2164
2165         /* reset the hardware with the new settings */
2166         igb_reset(adapter);
2167
2168         /* let the f/w know that the h/w is now under the control of the
2169          * driver. */
2170         igb_get_hw_control(adapter);
2171
2172         strcpy(netdev->name, "eth%d");
2173         err = register_netdev(netdev);
2174         if (err)
2175                 goto err_register;
2176
2177         /* carrier off reporting is important to ethtool even BEFORE open */
2178         netif_carrier_off(netdev);
2179
2180 #ifdef CONFIG_IGB_DCA
2181         if (dca_add_requester(&pdev->dev) == 0) {
2182                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2183                 dev_info(&pdev->dev, "DCA enabled\n");
2184                 igb_setup_dca(adapter);
2185         }
2186
2187 #endif
2188
2189 #ifdef CONFIG_IGB_PTP
2190         /* do hw tstamp init after resetting */
2191         igb_ptp_init(adapter);
2192 #endif /* CONFIG_IGB_PTP */
2193
2194         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2195         /* print bus type/speed/width info */
2196         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2197                  netdev->name,
2198                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2199                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2200                                                             "unknown"),
2201                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2202                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2203                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2204                    "unknown"),
2205                  netdev->dev_addr);
2206
2207         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2208         if (ret_val)
2209                 strcpy(part_str, "Unknown");
2210         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2211         dev_info(&pdev->dev,
2212                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2213                 adapter->msix_entries ? "MSI-X" :
2214                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2215                 adapter->num_rx_queues, adapter->num_tx_queues);
2216         switch (hw->mac.type) {
2217         case e1000_i350:
2218         case e1000_i210:
2219         case e1000_i211:
2220                 igb_set_eee_i350(hw);
2221                 break;
2222         default:
2223                 break;
2224         }
2225
2226         pm_runtime_put_noidle(&pdev->dev);
2227         return 0;
2228
2229 err_register:
2230         igb_release_hw_control(adapter);
2231 err_eeprom:
2232         if (!igb_check_reset_block(hw))
2233                 igb_reset_phy(hw);
2234
2235         if (hw->flash_address)
2236                 iounmap(hw->flash_address);
2237 err_sw_init:
2238         igb_clear_interrupt_scheme(adapter);
2239         iounmap(hw->hw_addr);
2240 err_ioremap:
2241         free_netdev(netdev);
2242 err_alloc_etherdev:
2243         pci_release_selected_regions(pdev,
2244                                      pci_select_bars(pdev, IORESOURCE_MEM));
2245 err_pci_reg:
2246 err_dma:
2247         pci_disable_device(pdev);
2248         return err;
2249 }
2250
2251 /**
2252  * igb_remove - Device Removal Routine
2253  * @pdev: PCI device information struct
2254  *
2255  * igb_remove is called by the PCI subsystem to alert the driver
2256  * that it should release a PCI device.  The could be caused by a
2257  * Hot-Plug event, or because the driver is going to be removed from
2258  * memory.
2259  **/
2260 static void __devexit igb_remove(struct pci_dev *pdev)
2261 {
2262         struct net_device *netdev = pci_get_drvdata(pdev);
2263         struct igb_adapter *adapter = netdev_priv(netdev);
2264         struct e1000_hw *hw = &adapter->hw;
2265
2266         pm_runtime_get_noresume(&pdev->dev);
2267 #ifdef CONFIG_IGB_PTP
2268         igb_ptp_stop(adapter);
2269 #endif /* CONFIG_IGB_PTP */
2270
2271         /*
2272          * The watchdog timer may be rescheduled, so explicitly
2273          * disable watchdog from being rescheduled.
2274          */
2275         set_bit(__IGB_DOWN, &adapter->state);
2276         del_timer_sync(&adapter->watchdog_timer);
2277         del_timer_sync(&adapter->phy_info_timer);
2278
2279         cancel_work_sync(&adapter->reset_task);
2280         cancel_work_sync(&adapter->watchdog_task);
2281
2282 #ifdef CONFIG_IGB_DCA
2283         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2284                 dev_info(&pdev->dev, "DCA disabled\n");
2285                 dca_remove_requester(&pdev->dev);
2286                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2287                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2288         }
2289 #endif
2290
2291         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2292          * would have already happened in close and is redundant. */
2293         igb_release_hw_control(adapter);
2294
2295         unregister_netdev(netdev);
2296
2297         igb_clear_interrupt_scheme(adapter);
2298
2299 #ifdef CONFIG_PCI_IOV
2300         /* reclaim resources allocated to VFs */
2301         if (adapter->vf_data) {
2302                 /* disable iov and allow time for transactions to clear */
2303                 if (!igb_check_vf_assignment(adapter)) {
2304                         pci_disable_sriov(pdev);
2305                         msleep(500);
2306                 } else {
2307                         dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2308                 }
2309
2310                 kfree(adapter->vf_data);
2311                 adapter->vf_data = NULL;
2312                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2313                 wrfl();
2314                 msleep(100);
2315                 dev_info(&pdev->dev, "IOV Disabled\n");
2316         }
2317 #endif
2318
2319         iounmap(hw->hw_addr);
2320         if (hw->flash_address)
2321                 iounmap(hw->flash_address);
2322         pci_release_selected_regions(pdev,
2323                                      pci_select_bars(pdev, IORESOURCE_MEM));
2324
2325         kfree(adapter->shadow_vfta);
2326         free_netdev(netdev);
2327
2328         pci_disable_pcie_error_reporting(pdev);
2329
2330         pci_disable_device(pdev);
2331 }
2332
2333 /**
2334  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2335  * @adapter: board private structure to initialize
2336  *
2337  * This function initializes the vf specific data storage and then attempts to
2338  * allocate the VFs.  The reason for ordering it this way is because it is much
2339  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2340  * the memory for the VFs.
2341  **/
2342 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2343 {
2344 #ifdef CONFIG_PCI_IOV
2345         struct pci_dev *pdev = adapter->pdev;
2346         struct e1000_hw *hw = &adapter->hw;
2347         int old_vfs = igb_find_enabled_vfs(adapter);
2348         int i;
2349
2350         /* Virtualization features not supported on i210 family. */
2351         if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2352                 return;
2353
2354         if (old_vfs) {
2355                 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2356                          "max_vfs setting of %d\n", old_vfs, max_vfs);
2357                 adapter->vfs_allocated_count = old_vfs;
2358         }
2359
2360         if (!adapter->vfs_allocated_count)
2361                 return;
2362
2363         adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2364                                 sizeof(struct vf_data_storage), GFP_KERNEL);
2365
2366         /* if allocation failed then we do not support SR-IOV */
2367         if (!adapter->vf_data) {
2368                 adapter->vfs_allocated_count = 0;
2369                 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2370                         "Data Storage\n");
2371                 goto out;
2372         }
2373
2374         if (!old_vfs) {
2375                 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2376                         goto err_out;
2377         }
2378         dev_info(&pdev->dev, "%d VFs allocated\n",
2379                  adapter->vfs_allocated_count);
2380         for (i = 0; i < adapter->vfs_allocated_count; i++)
2381                 igb_vf_configure(adapter, i);
2382
2383         /* DMA Coalescing is not supported in IOV mode. */
2384         adapter->flags &= ~IGB_FLAG_DMAC;
2385         goto out;
2386 err_out:
2387         kfree(adapter->vf_data);
2388         adapter->vf_data = NULL;
2389         adapter->vfs_allocated_count = 0;
2390 out:
2391         return;
2392 #endif /* CONFIG_PCI_IOV */
2393 }
2394
2395 /**
2396  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2397  * @adapter: board private structure to initialize
2398  *
2399  * igb_sw_init initializes the Adapter private data structure.
2400  * Fields are initialized based on PCI device information and
2401  * OS network device settings (MTU size).
2402  **/
2403 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2404 {
2405         struct e1000_hw *hw = &adapter->hw;
2406         struct net_device *netdev = adapter->netdev;
2407         struct pci_dev *pdev = adapter->pdev;
2408         u32 max_rss_queues;
2409
2410         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2411
2412         /* set default ring sizes */
2413         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2414         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2415
2416         /* set default ITR values */
2417         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2418         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2419
2420         /* set default work limits */
2421         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2422
2423         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2424                                   VLAN_HLEN;
2425         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2426
2427         adapter->node = -1;
2428
2429         spin_lock_init(&adapter->stats64_lock);
2430 #ifdef CONFIG_PCI_IOV
2431         switch (hw->mac.type) {
2432         case e1000_82576:
2433         case e1000_i350:
2434                 if (max_vfs > 7) {
2435                         dev_warn(&pdev->dev,
2436                                  "Maximum of 7 VFs per PF, using max\n");
2437                         adapter->vfs_allocated_count = 7;
2438                 } else
2439                         adapter->vfs_allocated_count = max_vfs;
2440                 break;
2441         default:
2442                 break;
2443         }
2444 #endif /* CONFIG_PCI_IOV */
2445
2446         /* Determine the maximum number of RSS queues supported. */
2447         switch (hw->mac.type) {
2448         case e1000_i211:
2449                 max_rss_queues = IGB_MAX_RX_QUEUES_I211;
2450                 break;
2451         case e1000_82575:
2452         case e1000_i210:
2453                 max_rss_queues = IGB_MAX_RX_QUEUES_82575;
2454                 break;
2455         case e1000_i350:
2456                 /* I350 cannot do RSS and SR-IOV at the same time */
2457                 if (!!adapter->vfs_allocated_count) {
2458                         max_rss_queues = 1;
2459                         break;
2460                 }
2461                 /* fall through */
2462         case e1000_82576:
2463                 if (!!adapter->vfs_allocated_count) {
2464                         max_rss_queues = 2;
2465                         break;
2466                 }
2467                 /* fall through */
2468         case e1000_82580:
2469         default:
2470                 max_rss_queues = IGB_MAX_RX_QUEUES;
2471                 break;
2472         }
2473
2474         adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
2475
2476         /* Determine if we need to pair queues. */
2477         switch (hw->mac.type) {
2478         case e1000_82575:
2479         case e1000_i211:
2480                 /* Device supports enough interrupts without queue pairing. */
2481                 break;
2482         case e1000_82576:
2483                 /*
2484                  * If VFs are going to be allocated with RSS queues then we
2485                  * should pair the queues in order to conserve interrupts due
2486                  * to limited supply.
2487                  */
2488                 if ((adapter->rss_queues > 1) &&
2489                     (adapter->vfs_allocated_count > 6))
2490                         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2491                 /* fall through */
2492         case e1000_82580:
2493         case e1000_i350:
2494         case e1000_i210:
2495         default:
2496                 /*
2497                  * If rss_queues > half of max_rss_queues, pair the queues in
2498                  * order to conserve interrupts due to limited supply.
2499                  */
2500                 if (adapter->rss_queues > (max_rss_queues / 2))
2501                         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2502                 break;
2503         }
2504
2505         /* Setup and initialize a copy of the hw vlan table array */
2506         adapter->shadow_vfta = kzalloc(sizeof(u32) *
2507                                 E1000_VLAN_FILTER_TBL_SIZE,
2508                                 GFP_ATOMIC);
2509
2510         /* This call may decrease the number of queues */
2511         if (igb_init_interrupt_scheme(adapter)) {
2512                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2513                 return -ENOMEM;
2514         }
2515
2516         igb_probe_vfs(adapter);
2517
2518         /* Explicitly disable IRQ since the NIC can be in any state. */
2519         igb_irq_disable(adapter);
2520
2521         if (hw->mac.type >= e1000_i350)
2522                 adapter->flags &= ~IGB_FLAG_DMAC;
2523
2524         set_bit(__IGB_DOWN, &adapter->state);
2525         return 0;
2526 }
2527
2528 /**
2529  * igb_open - Called when a network interface is made active
2530  * @netdev: network interface device structure
2531  *
2532  * Returns 0 on success, negative value on failure
2533  *
2534  * The open entry point is called when a network interface is made
2535  * active by the system (IFF_UP).  At this point all resources needed
2536  * for transmit and receive operations are allocated, the interrupt
2537  * handler is registered with the OS, the watchdog timer is started,
2538  * and the stack is notified that the interface is ready.
2539  **/
2540 static int __igb_open(struct net_device *netdev, bool resuming)
2541 {
2542         struct igb_adapter *adapter = netdev_priv(netdev);
2543         struct e1000_hw *hw = &adapter->hw;
2544         struct pci_dev *pdev = adapter->pdev;
2545         int err;
2546         int i;
2547
2548         /* disallow open during test */
2549         if (test_bit(__IGB_TESTING, &adapter->state)) {
2550                 WARN_ON(resuming);
2551                 return -EBUSY;
2552         }
2553
2554         if (!resuming)
2555                 pm_runtime_get_sync(&pdev->dev);
2556
2557         netif_carrier_off(netdev);
2558
2559         /* allocate transmit descriptors */
2560         err = igb_setup_all_tx_resources(adapter);
2561         if (err)
2562                 goto err_setup_tx;
2563
2564         /* allocate receive descriptors */
2565         err = igb_setup_all_rx_resources(adapter);
2566         if (err)
2567                 goto err_setup_rx;
2568
2569         igb_power_up_link(adapter);
2570
2571         /* before we allocate an interrupt, we must be ready to handle it.
2572          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2573          * as soon as we call pci_request_irq, so we have to setup our
2574          * clean_rx handler before we do so.  */
2575         igb_configure(adapter);
2576
2577         err = igb_request_irq(adapter);
2578         if (err)
2579                 goto err_req_irq;
2580
2581         /* From here on the code is the same as igb_up() */
2582         clear_bit(__IGB_DOWN, &adapter->state);
2583
2584         for (i = 0; i < adapter->num_q_vectors; i++)
2585                 napi_enable(&(adapter->q_vector[i]->napi));
2586
2587         /* Clear any pending interrupts. */
2588         rd32(E1000_ICR);
2589
2590         igb_irq_enable(adapter);
2591
2592         /* notify VFs that reset has been completed */
2593         if (adapter->vfs_allocated_count) {
2594                 u32 reg_data = rd32(E1000_CTRL_EXT);
2595                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2596                 wr32(E1000_CTRL_EXT, reg_data);
2597         }
2598
2599         netif_tx_start_all_queues(netdev);
2600
2601         if (!resuming)
2602                 pm_runtime_put(&pdev->dev);
2603
2604         /* start the watchdog. */
2605         hw->mac.get_link_status = 1;
2606         schedule_work(&adapter->watchdog_task);
2607
2608         return 0;
2609
2610 err_req_irq:
2611         igb_release_hw_control(adapter);
2612         igb_power_down_link(adapter);
2613         igb_free_all_rx_resources(adapter);
2614 err_setup_rx:
2615         igb_free_all_tx_resources(adapter);
2616 err_setup_tx:
2617         igb_reset(adapter);
2618         if (!resuming)
2619                 pm_runtime_put(&pdev->dev);
2620
2621         return err;
2622 }
2623
2624 static int igb_open(struct net_device *netdev)
2625 {
2626         return __igb_open(netdev, false);
2627 }
2628
2629 /**
2630  * igb_close - Disables a network interface
2631  * @netdev: network interface device structure
2632  *
2633  * Returns 0, this is not allowed to fail
2634  *
2635  * The close entry point is called when an interface is de-activated
2636  * by the OS.  The hardware is still under the driver's control, but
2637  * needs to be disabled.  A global MAC reset is issued to stop the
2638  * hardware, and all transmit and receive resources are freed.
2639  **/
2640 static int __igb_close(struct net_device *netdev, bool suspending)
2641 {
2642         struct igb_adapter *adapter = netdev_priv(netdev);
2643         struct pci_dev *pdev = adapter->pdev;
2644
2645         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2646
2647         if (!suspending)
2648                 pm_runtime_get_sync(&pdev->dev);
2649
2650         igb_down(adapter);
2651         igb_free_irq(adapter);
2652
2653         igb_free_all_tx_resources(adapter);
2654         igb_free_all_rx_resources(adapter);
2655
2656         if (!suspending)
2657                 pm_runtime_put_sync(&pdev->dev);
2658         return 0;
2659 }
2660
2661 static int igb_close(struct net_device *netdev)
2662 {
2663         return __igb_close(netdev, false);
2664 }
2665
2666 /**
2667  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2668  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2669  *
2670  * Return 0 on success, negative on failure
2671  **/
2672 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2673 {
2674         struct device *dev = tx_ring->dev;
2675         int orig_node = dev_to_node(dev);
2676         int size;
2677
2678         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2679         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2680         if (!tx_ring->tx_buffer_info)
2681                 tx_ring->tx_buffer_info = vzalloc(size);
2682         if (!tx_ring->tx_buffer_info)
2683                 goto err;
2684
2685         /* round up to nearest 4K */
2686         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2687         tx_ring->size = ALIGN(tx_ring->size, 4096);
2688
2689         set_dev_node(dev, tx_ring->numa_node);
2690         tx_ring->desc = dma_alloc_coherent(dev,
2691                                            tx_ring->size,
2692                                            &tx_ring->dma,
2693                                            GFP_KERNEL);
2694         set_dev_node(dev, orig_node);
2695         if (!tx_ring->desc)
2696                 tx_ring->desc = dma_alloc_coherent(dev,
2697                                                    tx_ring->size,
2698                                                    &tx_ring->dma,
2699                                                    GFP_KERNEL);
2700
2701         if (!tx_ring->desc)
2702                 goto err;
2703
2704         tx_ring->next_to_use = 0;
2705         tx_ring->next_to_clean = 0;
2706
2707         return 0;
2708
2709 err:
2710         vfree(tx_ring->tx_buffer_info);
2711         dev_err(dev,
2712                 "Unable to allocate memory for the transmit descriptor ring\n");
2713         return -ENOMEM;
2714 }
2715
2716 /**
2717  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2718  *                                (Descriptors) for all queues
2719  * @adapter: board private structure
2720  *
2721  * Return 0 on success, negative on failure
2722  **/
2723 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2724 {
2725         struct pci_dev *pdev = adapter->pdev;
2726         int i, err = 0;
2727
2728         for (i = 0; i < adapter->num_tx_queues; i++) {
2729                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2730                 if (err) {
2731                         dev_err(&pdev->dev,
2732                                 "Allocation for Tx Queue %u failed\n", i);
2733                         for (i--; i >= 0; i--)
2734                                 igb_free_tx_resources(adapter->tx_ring[i]);
2735                         break;
2736                 }
2737         }
2738
2739         return err;
2740 }
2741
2742 /**
2743  * igb_setup_tctl - configure the transmit control registers
2744  * @adapter: Board private structure
2745  **/
2746 void igb_setup_tctl(struct igb_adapter *adapter)
2747 {
2748         struct e1000_hw *hw = &adapter->hw;
2749         u32 tctl;
2750
2751         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2752         wr32(E1000_TXDCTL(0), 0);
2753
2754         /* Program the Transmit Control Register */
2755         tctl = rd32(E1000_TCTL);
2756         tctl &= ~E1000_TCTL_CT;
2757         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2758                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2759
2760         igb_config_collision_dist(hw);
2761
2762         /* Enable transmits */
2763         tctl |= E1000_TCTL_EN;
2764
2765         wr32(E1000_TCTL, tctl);
2766 }
2767
2768 /**
2769  * igb_configure_tx_ring - Configure transmit ring after Reset
2770  * @adapter: board private structure
2771  * @ring: tx ring to configure
2772  *
2773  * Configure a transmit ring after a reset.
2774  **/
2775 void igb_configure_tx_ring(struct igb_adapter *adapter,
2776                            struct igb_ring *ring)
2777 {
2778         struct e1000_hw *hw = &adapter->hw;
2779         u32 txdctl = 0;
2780         u64 tdba = ring->dma;
2781         int reg_idx = ring->reg_idx;
2782
2783         /* disable the queue */
2784         wr32(E1000_TXDCTL(reg_idx), 0);
2785         wrfl();
2786         mdelay(10);
2787
2788         wr32(E1000_TDLEN(reg_idx),
2789                         ring->count * sizeof(union e1000_adv_tx_desc));
2790         wr32(E1000_TDBAL(reg_idx),
2791                         tdba & 0x00000000ffffffffULL);
2792         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2793
2794         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2795         wr32(E1000_TDH(reg_idx), 0);
2796         writel(0, ring->tail);
2797
2798         txdctl |= IGB_TX_PTHRESH;
2799         txdctl |= IGB_TX_HTHRESH << 8;
2800         txdctl |= IGB_TX_WTHRESH << 16;
2801
2802         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2803         wr32(E1000_TXDCTL(reg_idx), txdctl);
2804 }
2805
2806 /**
2807  * igb_configure_tx - Configure transmit Unit after Reset
2808  * @adapter: board private structure
2809  *
2810  * Configure the Tx unit of the MAC after a reset.
2811  **/
2812 static void igb_configure_tx(struct igb_adapter *adapter)
2813 {
2814         int i;
2815
2816         for (i = 0; i < adapter->num_tx_queues; i++)
2817                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2818 }
2819
2820 /**
2821  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2822  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2823  *
2824  * Returns 0 on success, negative on failure
2825  **/
2826 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2827 {
2828         struct device *dev = rx_ring->dev;
2829         int orig_node = dev_to_node(dev);
2830         int size, desc_len;
2831
2832         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2833         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2834         if (!rx_ring->rx_buffer_info)
2835                 rx_ring->rx_buffer_info = vzalloc(size);
2836         if (!rx_ring->rx_buffer_info)
2837                 goto err;
2838
2839         desc_len = sizeof(union e1000_adv_rx_desc);
2840
2841         /* Round up to nearest 4K */
2842         rx_ring->size = rx_ring->count * desc_len;
2843         rx_ring->size = ALIGN(rx_ring->size, 4096);
2844
2845         set_dev_node(dev, rx_ring->numa_node);
2846         rx_ring->desc = dma_alloc_coherent(dev,
2847                                            rx_ring->size,
2848                                            &rx_ring->dma,
2849                                            GFP_KERNEL);
2850         set_dev_node(dev, orig_node);
2851         if (!rx_ring->desc)
2852                 rx_ring->desc = dma_alloc_coherent(dev,
2853                                                    rx_ring->size,
2854                                                    &rx_ring->dma,
2855                                                    GFP_KERNEL);
2856
2857         if (!rx_ring->desc)
2858                 goto err;
2859
2860         rx_ring->next_to_clean = 0;
2861         rx_ring->next_to_use = 0;
2862
2863         return 0;
2864
2865 err:
2866         vfree(rx_ring->rx_buffer_info);
2867         rx_ring->rx_buffer_info = NULL;
2868         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2869                 " ring\n");
2870         return -ENOMEM;
2871 }
2872
2873 /**
2874  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2875  *                                (Descriptors) for all queues
2876  * @adapter: board private structure
2877  *
2878  * Return 0 on success, negative on failure
2879  **/
2880 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2881 {
2882         struct pci_dev *pdev = adapter->pdev;
2883         int i, err = 0;
2884
2885         for (i = 0; i < adapter->num_rx_queues; i++) {
2886                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2887                 if (err) {
2888                         dev_err(&pdev->dev,
2889                                 "Allocation for Rx Queue %u failed\n", i);
2890                         for (i--; i >= 0; i--)
2891                                 igb_free_rx_resources(adapter->rx_ring[i]);
2892                         break;
2893                 }
2894         }
2895
2896         return err;
2897 }
2898
2899 /**
2900  * igb_setup_mrqc - configure the multiple receive queue control registers
2901  * @adapter: Board private structure
2902  **/
2903 static void igb_setup_mrqc(struct igb_adapter *adapter)
2904 {
2905         struct e1000_hw *hw = &adapter->hw;
2906         u32 mrqc, rxcsum;
2907         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2908         union e1000_reta {
2909                 u32 dword;
2910                 u8  bytes[4];
2911         } reta;
2912         static const u8 rsshash[40] = {
2913                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2914                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2915                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2916                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2917
2918         /* Fill out hash function seeds */
2919         for (j = 0; j < 10; j++) {
2920                 u32 rsskey = rsshash[(j * 4)];
2921                 rsskey |= rsshash[(j * 4) + 1] << 8;
2922                 rsskey |= rsshash[(j * 4) + 2] << 16;
2923                 rsskey |= rsshash[(j * 4) + 3] << 24;
2924                 array_wr32(E1000_RSSRK(0), j, rsskey);
2925         }
2926
2927         num_rx_queues = adapter->rss_queues;
2928
2929         if (adapter->vfs_allocated_count) {
2930                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2931                 switch (hw->mac.type) {
2932                 case e1000_i350:
2933                 case e1000_82580:
2934                         num_rx_queues = 1;
2935                         shift = 0;
2936                         break;
2937                 case e1000_82576:
2938                         shift = 3;
2939                         num_rx_queues = 2;
2940                         break;
2941                 case e1000_82575:
2942                         shift = 2;
2943                         shift2 = 6;
2944                 default:
2945                         break;
2946                 }
2947         } else {
2948                 if (hw->mac.type == e1000_82575)
2949                         shift = 6;
2950         }
2951
2952         for (j = 0; j < (32 * 4); j++) {
2953                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2954                 if (shift2)
2955                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2956                 if ((j & 3) == 3)
2957                         wr32(E1000_RETA(j >> 2), reta.dword);
2958         }
2959
2960         /*
2961          * Disable raw packet checksumming so that RSS hash is placed in
2962          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2963          * offloads as they are enabled by default
2964          */
2965         rxcsum = rd32(E1000_RXCSUM);
2966         rxcsum |= E1000_RXCSUM_PCSD;
2967
2968         if (adapter->hw.mac.type >= e1000_82576)
2969                 /* Enable Receive Checksum Offload for SCTP */
2970                 rxcsum |= E1000_RXCSUM_CRCOFL;
2971
2972         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2973         wr32(E1000_RXCSUM, rxcsum);
2974         /*
2975          * Generate RSS hash based on TCP port numbers and/or
2976          * IPv4/v6 src and dst addresses since UDP cannot be
2977          * hashed reliably due to IP fragmentation
2978          */
2979
2980         mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2981                E1000_MRQC_RSS_FIELD_IPV4_TCP |
2982                E1000_MRQC_RSS_FIELD_IPV6 |
2983                E1000_MRQC_RSS_FIELD_IPV6_TCP |
2984                E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2985
2986         /* If VMDq is enabled then we set the appropriate mode for that, else
2987          * we default to RSS so that an RSS hash is calculated per packet even
2988          * if we are only using one queue */
2989         if (adapter->vfs_allocated_count) {
2990                 if (hw->mac.type > e1000_82575) {
2991                         /* Set the default pool for the PF's first queue */
2992                         u32 vtctl = rd32(E1000_VT_CTL);
2993                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2994                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2995                         vtctl |= adapter->vfs_allocated_count <<
2996                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2997                         wr32(E1000_VT_CTL, vtctl);
2998                 }
2999                 if (adapter->rss_queues > 1)
3000                         mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
3001                 else
3002                         mrqc |= E1000_MRQC_ENABLE_VMDQ;
3003         } else {
3004                 if (hw->mac.type != e1000_i211)
3005                         mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
3006         }
3007         igb_vmm_control(adapter);
3008
3009         wr32(E1000_MRQC, mrqc);
3010 }
3011
3012 /**
3013  * igb_setup_rctl - configure the receive control registers
3014  * @adapter: Board private structure
3015  **/
3016 void igb_setup_rctl(struct igb_adapter *adapter)
3017 {
3018         struct e1000_hw *hw = &adapter->hw;
3019         u32 rctl;
3020
3021         rctl = rd32(E1000_RCTL);
3022
3023         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3024         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3025
3026         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3027                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3028
3029         /*
3030          * enable stripping of CRC. It's unlikely this will break BMC
3031          * redirection as it did with e1000. Newer features require
3032          * that the HW strips the CRC.
3033          */
3034         rctl |= E1000_RCTL_SECRC;
3035
3036         /* disable store bad packets and clear size bits. */
3037         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3038
3039         /* enable LPE to prevent packets larger than max_frame_size */
3040         rctl |= E1000_RCTL_LPE;
3041
3042         /* disable queue 0 to prevent tail write w/o re-config */
3043         wr32(E1000_RXDCTL(0), 0);
3044
3045         /* Attention!!!  For SR-IOV PF driver operations you must enable
3046          * queue drop for all VF and PF queues to prevent head of line blocking
3047          * if an un-trusted VF does not provide descriptors to hardware.
3048          */
3049         if (adapter->vfs_allocated_count) {
3050                 /* set all queue drop enable bits */
3051                 wr32(E1000_QDE, ALL_QUEUES);
3052         }
3053
3054         /* This is useful for sniffing bad packets. */
3055         if (adapter->netdev->features & NETIF_F_RXALL) {
3056                 /* UPE and MPE will be handled by normal PROMISC logic
3057                  * in e1000e_set_rx_mode */
3058                 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3059                          E1000_RCTL_BAM | /* RX All Bcast Pkts */
3060                          E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3061
3062                 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3063                           E1000_RCTL_DPF | /* Allow filtered pause */
3064                           E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3065                 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3066                  * and that breaks VLANs.
3067                  */
3068         }
3069
3070         wr32(E1000_RCTL, rctl);
3071 }
3072
3073 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3074                                    int vfn)
3075 {
3076         struct e1000_hw *hw = &adapter->hw;
3077         u32 vmolr;
3078
3079         /* if it isn't the PF check to see if VFs are enabled and
3080          * increase the size to support vlan tags */
3081         if (vfn < adapter->vfs_allocated_count &&
3082             adapter->vf_data[vfn].vlans_enabled)
3083                 size += VLAN_TAG_SIZE;
3084
3085         vmolr = rd32(E1000_VMOLR(vfn));
3086         vmolr &= ~E1000_VMOLR_RLPML_MASK;
3087         vmolr |= size | E1000_VMOLR_LPE;
3088         wr32(E1000_VMOLR(vfn), vmolr);
3089
3090         return 0;
3091 }
3092
3093 /**
3094  * igb_rlpml_set - set maximum receive packet size
3095  * @adapter: board private structure
3096  *
3097  * Configure maximum receivable packet size.
3098  **/
3099 static void igb_rlpml_set(struct igb_adapter *adapter)
3100 {
3101         u32 max_frame_size = adapter->max_frame_size;
3102         struct e1000_hw *hw = &adapter->hw;
3103         u16 pf_id = adapter->vfs_allocated_count;
3104
3105         if (pf_id) {
3106                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3107                 /*
3108                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
3109                  * to our max jumbo frame size, in case we need to enable
3110                  * jumbo frames on one of the rings later.
3111                  * This will not pass over-length frames into the default
3112                  * queue because it's gated by the VMOLR.RLPML.
3113                  */
3114                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3115         }
3116
3117         wr32(E1000_RLPML, max_frame_size);
3118 }
3119
3120 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3121                                  int vfn, bool aupe)
3122 {
3123         struct e1000_hw *hw = &adapter->hw;
3124         u32 vmolr;
3125
3126         /*
3127          * This register exists only on 82576 and newer so if we are older then
3128          * we should exit and do nothing
3129          */
3130         if (hw->mac.type < e1000_82576)
3131                 return;
3132
3133         vmolr = rd32(E1000_VMOLR(vfn));
3134         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3135         if (aupe)
3136                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3137         else
3138                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3139
3140         /* clear all bits that might not be set */
3141         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3142
3143         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3144                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3145         /*
3146          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3147          * multicast packets
3148          */
3149         if (vfn <= adapter->vfs_allocated_count)
3150                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3151
3152         wr32(E1000_VMOLR(vfn), vmolr);
3153 }
3154
3155 /**
3156  * igb_configure_rx_ring - Configure a receive ring after Reset
3157  * @adapter: board private structure
3158  * @ring: receive ring to be configured
3159  *
3160  * Configure the Rx unit of the MAC after a reset.
3161  **/
3162 void igb_configure_rx_ring(struct igb_adapter *adapter,
3163                            struct igb_ring *ring)
3164 {
3165         struct e1000_hw *hw = &adapter->hw;
3166         u64 rdba = ring->dma;
3167         int reg_idx = ring->reg_idx;
3168         u32 srrctl = 0, rxdctl = 0;
3169
3170         /* disable the queue */
3171         wr32(E1000_RXDCTL(reg_idx), 0);
3172
3173         /* Set DMA base address registers */
3174         wr32(E1000_RDBAL(reg_idx),
3175              rdba & 0x00000000ffffffffULL);
3176         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3177         wr32(E1000_RDLEN(reg_idx),
3178                        ring->count * sizeof(union e1000_adv_rx_desc));
3179
3180         /* initialize head and tail */
3181         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3182         wr32(E1000_RDH(reg_idx), 0);
3183         writel(0, ring->tail);
3184
3185         /* set descriptor configuration */
3186         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3187 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3188         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3189 #else
3190         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3191 #endif
3192         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3193 #ifdef CONFIG_IGB_PTP
3194         if (hw->mac.type >= e1000_82580)
3195                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3196 #endif /* CONFIG_IGB_PTP */
3197         /* Only set Drop Enable if we are supporting multiple queues */
3198         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3199                 srrctl |= E1000_SRRCTL_DROP_EN;
3200
3201         wr32(E1000_SRRCTL(reg_idx), srrctl);
3202
3203         /* set filtering for VMDQ pools */
3204         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3205
3206         rxdctl |= IGB_RX_PTHRESH;
3207         rxdctl |= IGB_RX_HTHRESH << 8;
3208         rxdctl |= IGB_RX_WTHRESH << 16;
3209
3210         /* enable receive descriptor fetching */
3211         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3212         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3213 }
3214
3215 /**
3216  * igb_configure_rx - Configure receive Unit after Reset
3217  * @adapter: board private structure
3218  *
3219  * Configure the Rx unit of the MAC after a reset.
3220  **/
3221 static void igb_configure_rx(struct igb_adapter *adapter)
3222 {
3223         int i;
3224
3225         /* set UTA to appropriate mode */
3226         igb_set_uta(adapter);
3227
3228         /* set the correct pool for the PF default MAC address in entry 0 */
3229         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3230                          adapter->vfs_allocated_count);
3231
3232         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3233          * the Base and Length of the Rx Descriptor Ring */
3234         for (i = 0; i < adapter->num_rx_queues; i++)
3235                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3236 }
3237
3238 /**
3239  * igb_free_tx_resources - Free Tx Resources per Queue
3240  * @tx_ring: Tx descriptor ring for a specific queue
3241  *
3242  * Free all transmit software resources
3243  **/
3244 void igb_free_tx_resources(struct igb_ring *tx_ring)
3245 {
3246         igb_clean_tx_ring(tx_ring);
3247
3248         vfree(tx_ring->tx_buffer_info);
3249         tx_ring->tx_buffer_info = NULL;
3250
3251         /* if not set, then don't free */
3252         if (!tx_ring->desc)
3253                 return;
3254
3255         dma_free_coherent(tx_ring->dev, tx_ring->size,
3256                           tx_ring->desc, tx_ring->dma);
3257
3258         tx_ring->desc = NULL;
3259 }
3260
3261 /**
3262  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3263  * @adapter: board private structure
3264  *
3265  * Free all transmit software resources
3266  **/
3267 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3268 {
3269         int i;
3270
3271         for (i = 0; i < adapter->num_tx_queues; i++)
3272                 igb_free_tx_resources(adapter->tx_ring[i]);
3273 }
3274
3275 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3276                                     struct igb_tx_buffer *tx_buffer)
3277 {
3278         if (tx_buffer->skb) {
3279                 dev_kfree_skb_any(tx_buffer->skb);
3280                 if (tx_buffer->dma)
3281                         dma_unmap_single(ring->dev,
3282                                          tx_buffer->dma,
3283                                          tx_buffer->length,
3284                                          DMA_TO_DEVICE);
3285         } else if (tx_buffer->dma) {
3286                 dma_unmap_page(ring->dev,
3287                                tx_buffer->dma,
3288                                tx_buffer->length,
3289                                DMA_TO_DEVICE);
3290         }
3291         tx_buffer->next_to_watch = NULL;
3292         tx_buffer->skb = NULL;
3293         tx_buffer->dma = 0;
3294         /* buffer_info must be completely set up in the transmit path */
3295 }
3296
3297 /**
3298  * igb_clean_tx_ring - Free Tx Buffers
3299  * @tx_ring: ring to be cleaned
3300  **/
3301 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3302 {
3303         struct igb_tx_buffer *buffer_info;
3304         unsigned long size;
3305         u16 i;
3306
3307         if (!tx_ring->tx_buffer_info)
3308                 return;
3309         /* Free all the Tx ring sk_buffs */
3310
3311         for (i = 0; i < tx_ring->count; i++) {
3312                 buffer_info = &tx_ring->tx_buffer_info[i];
3313                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3314         }
3315
3316         netdev_tx_reset_queue(txring_txq(tx_ring));
3317
3318         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3319         memset(tx_ring->tx_buffer_info, 0, size);
3320
3321         /* Zero out the descriptor ring */
3322         memset(tx_ring->desc, 0, tx_ring->size);
3323
3324         tx_ring->next_to_use = 0;
3325         tx_ring->next_to_clean = 0;
3326 }
3327
3328 /**
3329  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3330  * @adapter: board private structure
3331  **/
3332 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3333 {
3334         int i;
3335
3336         for (i = 0; i < adapter->num_tx_queues; i++)
3337                 igb_clean_tx_ring(adapter->tx_ring[i]);
3338 }
3339
3340 /**
3341  * igb_free_rx_resources - Free Rx Resources
3342  * @rx_ring: ring to clean the resources from
3343  *
3344  * Free all receive software resources
3345  **/
3346 void igb_free_rx_resources(struct igb_ring *rx_ring)
3347 {
3348         igb_clean_rx_ring(rx_ring);
3349
3350         vfree(rx_ring->rx_buffer_info);
3351         rx_ring->rx_buffer_info = NULL;
3352
3353         /* if not set, then don't free */
3354         if (!rx_ring->desc)
3355                 return;
3356
3357         dma_free_coherent(rx_ring->dev, rx_ring->size,
3358                           rx_ring->desc, rx_ring->dma);
3359
3360         rx_ring->desc = NULL;
3361 }
3362
3363 /**
3364  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3365  * @adapter: board private structure
3366  *
3367  * Free all receive software resources
3368  **/
3369 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3370 {
3371         int i;
3372
3373         for (i = 0; i < adapter->num_rx_queues; i++)
3374                 igb_free_rx_resources(adapter->rx_ring[i]);
3375 }
3376
3377 /**
3378  * igb_clean_rx_ring - Free Rx Buffers per Queue
3379  * @rx_ring: ring to free buffers from
3380  **/
3381 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3382 {
3383         unsigned long size;
3384         u16 i;
3385
3386         if (!rx_ring->rx_buffer_info)
3387                 return;
3388
3389         /* Free all the Rx ring sk_buffs */
3390         for (i = 0; i < rx_ring->count; i++) {
3391                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3392                 if (buffer_info->dma) {
3393                         dma_unmap_single(rx_ring->dev,
3394                                          buffer_info->dma,
3395                                          IGB_RX_HDR_LEN,
3396                                          DMA_FROM_DEVICE);
3397                         buffer_info->dma = 0;
3398                 }
3399
3400                 if (buffer_info->skb) {
3401                         dev_kfree_skb(buffer_info->skb);
3402                         buffer_info->skb = NULL;
3403                 }
3404                 if (buffer_info->page_dma) {
3405                         dma_unmap_page(rx_ring->dev,
3406                                        buffer_info->page_dma,
3407                                        PAGE_SIZE / 2,
3408                                        DMA_FROM_DEVICE);
3409                         buffer_info->page_dma = 0;
3410                 }
3411                 if (buffer_info->page) {
3412                         put_page(buffer_info->page);
3413                         buffer_info->page = NULL;
3414                         buffer_info->page_offset = 0;
3415                 }
3416         }
3417
3418         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3419         memset(rx_ring->rx_buffer_info, 0, size);
3420
3421         /* Zero out the descriptor ring */
3422         memset(rx_ring->desc, 0, rx_ring->size);
3423
3424         rx_ring->next_to_clean = 0;
3425         rx_ring->next_to_use = 0;
3426 }
3427
3428 /**
3429  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3430  * @adapter: board private structure
3431  **/
3432 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3433 {
3434         int i;
3435
3436         for (i = 0; i < adapter->num_rx_queues; i++)
3437                 igb_clean_rx_ring(adapter->rx_ring[i]);
3438 }
3439
3440 /**
3441  * igb_set_mac - Change the Ethernet Address of the NIC
3442  * @netdev: network interface device structure
3443  * @p: pointer to an address structure
3444  *
3445  * Returns 0 on success, negative on failure
3446  **/
3447 static int igb_set_mac(struct net_device *netdev, void *p)
3448 {
3449         struct igb_adapter *adapter = netdev_priv(netdev);
3450         struct e1000_hw *hw = &adapter->hw;
3451         struct sockaddr *addr = p;
3452
3453         if (!is_valid_ether_addr(addr->sa_data))
3454                 return -EADDRNOTAVAIL;
3455
3456         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3457         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3458
3459         /* set the correct pool for the new PF MAC address in entry 0 */
3460         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3461                          adapter->vfs_allocated_count);
3462
3463         return 0;
3464 }
3465
3466 /**
3467  * igb_write_mc_addr_list - write multicast addresses to MTA
3468  * @netdev: network interface device structure
3469  *
3470  * Writes multicast address list to the MTA hash table.
3471  * Returns: -ENOMEM on failure
3472  *                0 on no addresses written
3473  *                X on writing X addresses to MTA
3474  **/
3475 static int igb_write_mc_addr_list(struct net_device *netdev)
3476 {
3477         struct igb_adapter *adapter = netdev_priv(netdev);
3478         struct e1000_hw *hw = &adapter->hw;
3479         struct netdev_hw_addr *ha;
3480         u8  *mta_list;
3481         int i;
3482
3483         if (netdev_mc_empty(netdev)) {
3484                 /* nothing to program, so clear mc list */
3485                 igb_update_mc_addr_list(hw, NULL, 0);
3486                 igb_restore_vf_multicasts(adapter);
3487                 return 0;
3488         }
3489
3490         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3491         if (!mta_list)
3492                 return -ENOMEM;
3493
3494         /* The shared function expects a packed array of only addresses. */
3495         i = 0;
3496         netdev_for_each_mc_addr(ha, netdev)
3497                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3498
3499         igb_update_mc_addr_list(hw, mta_list, i);
3500         kfree(mta_list);
3501
3502         return netdev_mc_count(netdev);
3503 }
3504
3505 /**
3506  * igb_write_uc_addr_list - write unicast addresses to RAR table
3507  * @netdev: network interface device structure
3508  *
3509  * Writes unicast address list to the RAR table.
3510  * Returns: -ENOMEM on failure/insufficient address space
3511  *                0 on no addresses written
3512  *                X on writing X addresses to the RAR table
3513  **/
3514 static int igb_write_uc_addr_list(struct net_device *netdev)
3515 {
3516         struct igb_adapter *adapter = netdev_priv(netdev);
3517         struct e1000_hw *hw = &adapter->hw;
3518         unsigned int vfn = adapter->vfs_allocated_count;
3519         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3520         int count = 0;
3521
3522         /* return ENOMEM indicating insufficient memory for addresses */
3523         if (netdev_uc_count(netdev) > rar_entries)
3524                 return -ENOMEM;
3525
3526         if (!netdev_uc_empty(netdev) && rar_entries) {
3527                 struct netdev_hw_addr *ha;
3528
3529                 netdev_for_each_uc_addr(ha, netdev) {
3530                         if (!rar_entries)
3531                                 break;
3532                         igb_rar_set_qsel(adapter, ha->addr,
3533                                          rar_entries--,
3534                                          vfn);
3535                         count++;
3536                 }
3537         }
3538         /* write the addresses in reverse order to avoid write combining */
3539         for (; rar_entries > 0 ; rar_entries--) {
3540                 wr32(E1000_RAH(rar_entries), 0);
3541                 wr32(E1000_RAL(rar_entries), 0);
3542         }
3543         wrfl();
3544
3545         return count;
3546 }
3547
3548 /**
3549  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3550  * @netdev: network interface device structure
3551  *
3552  * The set_rx_mode entry point is called whenever the unicast or multicast
3553  * address lists or the network interface flags are updated.  This routine is
3554  * responsible for configuring the hardware for proper unicast, multicast,
3555  * promiscuous mode, and all-multi behavior.
3556  **/
3557 static void igb_set_rx_mode(struct net_device *netdev)
3558 {
3559         struct igb_adapter *adapter = netdev_priv(netdev);
3560         struct e1000_hw *hw = &adapter->hw;
3561         unsigned int vfn = adapter->vfs_allocated_count;
3562         u32 rctl, vmolr = 0;
3563         int count;
3564
3565         /* Check for Promiscuous and All Multicast modes */
3566         rctl = rd32(E1000_RCTL);
3567
3568         /* clear the effected bits */
3569         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3570
3571         if (netdev->flags & IFF_PROMISC) {
3572                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3573                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3574         } else {
3575                 if (netdev->flags & IFF_ALLMULTI) {
3576                         rctl |= E1000_RCTL_MPE;
3577                         vmolr |= E1000_VMOLR_MPME;
3578                 } else {
3579                         /*
3580                          * Write addresses to the MTA, if the attempt fails
3581                          * then we should just turn on promiscuous mode so
3582                          * that we can at least receive multicast traffic
3583                          */
3584                         count = igb_write_mc_addr_list(netdev);
3585                         if (count < 0) {
3586                                 rctl |= E1000_RCTL_MPE;
3587                                 vmolr |= E1000_VMOLR_MPME;
3588                         } else if (count) {
3589                                 vmolr |= E1000_VMOLR_ROMPE;
3590                         }
3591                 }
3592                 /*
3593                  * Write addresses to available RAR registers, if there is not
3594                  * sufficient space to store all the addresses then enable
3595                  * unicast promiscuous mode
3596                  */
3597                 count = igb_write_uc_addr_list(netdev);
3598                 if (count < 0) {
3599                         rctl |= E1000_RCTL_UPE;
3600                         vmolr |= E1000_VMOLR_ROPE;
3601                 }
3602                 rctl |= E1000_RCTL_VFE;
3603         }
3604         wr32(E1000_RCTL, rctl);
3605
3606         /*
3607          * In order to support SR-IOV and eventually VMDq it is necessary to set
3608          * the VMOLR to enable the appropriate modes.  Without this workaround
3609          * we will have issues with VLAN tag stripping not being done for frames
3610          * that are only arriving because we are the default pool
3611          */
3612         if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3613                 return;
3614
3615         vmolr |= rd32(E1000_VMOLR(vfn)) &
3616                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3617         wr32(E1000_VMOLR(vfn), vmolr);
3618         igb_restore_vf_multicasts(adapter);
3619 }
3620
3621 static void igb_check_wvbr(struct igb_adapter *adapter)
3622 {
3623         struct e1000_hw *hw = &adapter->hw;
3624         u32 wvbr = 0;
3625
3626         switch (hw->mac.type) {
3627         case e1000_82576:
3628         case e1000_i350:
3629                 if (!(wvbr = rd32(E1000_WVBR)))
3630                         return;
3631                 break;
3632         default:
3633                 break;
3634         }
3635
3636         adapter->wvbr |= wvbr;
3637 }
3638
3639 #define IGB_STAGGERED_QUEUE_OFFSET 8
3640
3641 static void igb_spoof_check(struct igb_adapter *adapter)
3642 {
3643         int j;
3644
3645         if (!adapter->wvbr)
3646                 return;
3647
3648         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3649                 if (adapter->wvbr & (1 << j) ||
3650                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3651                         dev_warn(&adapter->pdev->dev,
3652                                 "Spoof event(s) detected on VF %d\n", j);
3653                         adapter->wvbr &=
3654                                 ~((1 << j) |
3655                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3656                 }
3657         }
3658 }
3659
3660 /* Need to wait a few seconds after link up to get diagnostic information from
3661  * the phy */
3662 static void igb_update_phy_info(unsigned long data)
3663 {
3664         struct igb_adapter *adapter = (struct igb_adapter *) data;
3665         igb_get_phy_info(&adapter->hw);
3666 }
3667
3668 /**
3669  * igb_has_link - check shared code for link and determine up/down
3670  * @adapter: pointer to driver private info
3671  **/
3672 bool igb_has_link(struct igb_adapter *adapter)
3673 {
3674         struct e1000_hw *hw = &adapter->hw;
3675         bool link_active = false;
3676         s32 ret_val = 0;
3677
3678         /* get_link_status is set on LSC (link status) interrupt or
3679          * rx sequence error interrupt.  get_link_status will stay
3680          * false until the e1000_check_for_link establishes link
3681          * for copper adapters ONLY
3682          */
3683         switch (hw->phy.media_type) {
3684         case e1000_media_type_copper:
3685                 if (hw->mac.get_link_status) {
3686                         ret_val = hw->mac.ops.check_for_link(hw);
3687                         link_active = !hw->mac.get_link_status;
3688                 } else {
3689                         link_active = true;
3690                 }
3691                 break;
3692         case e1000_media_type_internal_serdes:
3693                 ret_val = hw->mac.ops.check_for_link(hw);
3694                 link_active = hw->mac.serdes_has_link;
3695                 break;
3696         default:
3697         case e1000_media_type_unknown:
3698                 break;
3699         }
3700
3701         return link_active;
3702 }
3703
3704 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3705 {
3706         bool ret = false;
3707         u32 ctrl_ext, thstat;
3708
3709         /* check for thermal sensor event on i350 copper only */
3710         if (hw->mac.type == e1000_i350) {
3711                 thstat = rd32(E1000_THSTAT);
3712                 ctrl_ext = rd32(E1000_CTRL_EXT);
3713
3714                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3715                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3716                         ret = !!(thstat & event);
3717                 }
3718         }
3719
3720         return ret;
3721 }
3722
3723 /**
3724  * igb_watchdog - Timer Call-back
3725  * @data: pointer to adapter cast into an unsigned long
3726  **/
3727 static void igb_watchdog(unsigned long data)
3728 {
3729         struct igb_adapter *adapter = (struct igb_adapter *)data;
3730         /* Do the rest outside of interrupt context */
3731         schedule_work(&adapter->watchdog_task);
3732 }
3733
3734 static void igb_watchdog_task(struct work_struct *work)
3735 {
3736         struct igb_adapter *adapter = container_of(work,
3737                                                    struct igb_adapter,
3738                                                    watchdog_task);
3739         struct e1000_hw *hw = &adapter->hw;
3740         struct net_device *netdev = adapter->netdev;
3741         u32 link;
3742         int i;
3743
3744         link = igb_has_link(adapter);
3745         if (link) {
3746                 /* Cancel scheduled suspend requests. */
3747                 pm_runtime_resume(netdev->dev.parent);
3748
3749                 if (!netif_carrier_ok(netdev)) {
3750                         u32 ctrl;
3751                         hw->mac.ops.get_speed_and_duplex(hw,
3752                                                          &adapter->link_speed,
3753                                                          &adapter->link_duplex);
3754
3755                         ctrl = rd32(E1000_CTRL);
3756                         /* Links status message must follow this format */
3757                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3758                                "Duplex, Flow Control: %s\n",
3759                                netdev->name,
3760                                adapter->link_speed,
3761                                adapter->link_duplex == FULL_DUPLEX ?
3762                                "Full" : "Half",
3763                                (ctrl & E1000_CTRL_TFCE) &&
3764                                (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3765                                (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3766                                (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3767
3768                         /* check for thermal sensor event */
3769                         if (igb_thermal_sensor_event(hw,
3770                             E1000_THSTAT_LINK_THROTTLE)) {
3771                                 netdev_info(netdev, "The network adapter link "
3772                                             "speed was downshifted because it "
3773                                             "overheated\n");
3774                         }
3775
3776                         /* adjust timeout factor according to speed/duplex */
3777                         adapter->tx_timeout_factor = 1;
3778                         switch (adapter->link_speed) {
3779                         case SPEED_10:
3780                                 adapter->tx_timeout_factor = 14;
3781                                 break;
3782                         case SPEED_100:
3783                                 /* maybe add some timeout factor ? */
3784                                 break;
3785                         }
3786
3787                         netif_carrier_on(netdev);
3788
3789                         igb_ping_all_vfs(adapter);
3790                         igb_check_vf_rate_limit(adapter);
3791
3792                         /* link state has changed, schedule phy info update */
3793                         if (!test_bit(__IGB_DOWN, &adapter->state))
3794                                 mod_timer(&adapter->phy_info_timer,
3795                                           round_jiffies(jiffies + 2 * HZ));
3796                 }
3797         } else {
3798                 if (netif_carrier_ok(netdev)) {
3799                         adapter->link_speed = 0;
3800                         adapter->link_duplex = 0;
3801
3802                         /* check for thermal sensor event */
3803                         if (igb_thermal_sensor_event(hw,
3804                             E1000_THSTAT_PWR_DOWN)) {
3805                                 netdev_err(netdev, "The network adapter was "
3806                                            "stopped because it overheated\n");
3807                         }
3808
3809                         /* Links status message must follow this format */
3810                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3811                                netdev->name);
3812                         netif_carrier_off(netdev);
3813
3814                         igb_ping_all_vfs(adapter);
3815
3816                         /* link state has changed, schedule phy info update */
3817                         if (!test_bit(__IGB_DOWN, &adapter->state))
3818                                 mod_timer(&adapter->phy_info_timer,
3819                                           round_jiffies(jiffies + 2 * HZ));
3820
3821                         pm_schedule_suspend(netdev->dev.parent,
3822                                             MSEC_PER_SEC * 5);
3823                 }
3824         }
3825
3826         spin_lock(&adapter->stats64_lock);
3827         igb_update_stats(adapter, &adapter->stats64);
3828         spin_unlock(&adapter->stats64_lock);
3829
3830         for (i = 0; i < adapter->num_tx_queues; i++) {
3831                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3832                 if (!netif_carrier_ok(netdev)) {
3833                         /* We've lost link, so the controller stops DMA,
3834                          * but we've got queued Tx work that's never going
3835                          * to get done, so reset controller to flush Tx.
3836                          * (Do the reset outside of interrupt context). */
3837                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3838                                 adapter->tx_timeout_count++;
3839                                 schedule_work(&adapter->reset_task);
3840                                 /* return immediately since reset is imminent */
3841                                 return;
3842                         }
3843                 }
3844
3845                 /* Force detection of hung controller every watchdog period */
3846                 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3847         }
3848
3849         /* Cause software interrupt to ensure rx ring is cleaned */
3850         if (adapter->msix_entries) {
3851                 u32 eics = 0;
3852                 for (i = 0; i < adapter->num_q_vectors; i++)
3853                         eics |= adapter->q_vector[i]->eims_value;
3854                 wr32(E1000_EICS, eics);
3855         } else {
3856                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3857         }
3858
3859         igb_spoof_check(adapter);
3860
3861         /* Reset the timer */
3862         if (!test_bit(__IGB_DOWN, &adapter->state))
3863                 mod_timer(&adapter->watchdog_timer,
3864                           round_jiffies(jiffies + 2 * HZ));
3865 }
3866
3867 enum latency_range {
3868         lowest_latency = 0,
3869         low_latency = 1,
3870         bulk_latency = 2,
3871         latency_invalid = 255
3872 };
3873
3874 /**
3875  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3876  *
3877  *      Stores a new ITR value based on strictly on packet size.  This
3878  *      algorithm is less sophisticated than that used in igb_update_itr,
3879  *      due to the difficulty of synchronizing statistics across multiple
3880  *      receive rings.  The divisors and thresholds used by this function
3881  *      were determined based on theoretical maximum wire speed and testing
3882  *      data, in order to minimize response time while increasing bulk
3883  *      throughput.
3884  *      This functionality is controlled by the InterruptThrottleRate module
3885  *      parameter (see igb_param.c)
3886  *      NOTE:  This function is called only when operating in a multiqueue
3887  *             receive environment.
3888  * @q_vector: pointer to q_vector
3889  **/
3890 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3891 {
3892         int new_val = q_vector->itr_val;
3893         int avg_wire_size = 0;
3894         struct igb_adapter *adapter = q_vector->adapter;
3895         unsigned int packets;
3896
3897         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3898          * ints/sec - ITR timer value of 120 ticks.
3899          */
3900         if (adapter->link_speed != SPEED_1000) {
3901                 new_val = IGB_4K_ITR;
3902                 goto set_itr_val;
3903         }
3904
3905         packets = q_vector->rx.total_packets;
3906         if (packets)
3907                 avg_wire_size = q_vector->rx.total_bytes / packets;
3908
3909         packets = q_vector->tx.total_packets;
3910         if (packets)
3911                 avg_wire_size = max_t(u32, avg_wire_size,
3912                                       q_vector->tx.total_bytes / packets);
3913
3914         /* if avg_wire_size isn't set no work was done */
3915         if (!avg_wire_size)
3916                 goto clear_counts;
3917
3918         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3919         avg_wire_size += 24;
3920
3921         /* Don't starve jumbo frames */
3922         avg_wire_size = min(avg_wire_size, 3000);
3923
3924         /* Give a little boost to mid-size frames */
3925         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3926                 new_val = avg_wire_size / 3;
3927         else
3928                 new_val = avg_wire_size / 2;
3929
3930         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3931         if (new_val < IGB_20K_ITR &&
3932             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3933              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3934                 new_val = IGB_20K_ITR;
3935
3936 set_itr_val:
3937         if (new_val != q_vector->itr_val) {
3938                 q_vector->itr_val = new_val;
3939                 q_vector->set_itr = 1;
3940         }
3941 clear_counts:
3942         q_vector->rx.total_bytes = 0;
3943         q_vector->rx.total_packets = 0;
3944         q_vector->tx.total_bytes = 0;
3945         q_vector->tx.total_packets = 0;
3946 }
3947
3948 /**
3949  * igb_update_itr - update the dynamic ITR value based on statistics
3950  *      Stores a new ITR value based on packets and byte
3951  *      counts during the last interrupt.  The advantage of per interrupt
3952  *      computation is faster updates and more accurate ITR for the current
3953  *      traffic pattern.  Constants in this function were computed
3954  *      based on theoretical maximum wire speed and thresholds were set based
3955  *      on testing data as well as attempting to minimize response time
3956  *      while increasing bulk throughput.
3957  *      this functionality is controlled by the InterruptThrottleRate module
3958  *      parameter (see igb_param.c)
3959  *      NOTE:  These calculations are only valid when operating in a single-
3960  *             queue environment.
3961  * @q_vector: pointer to q_vector
3962  * @ring_container: ring info to update the itr for
3963  **/
3964 static void igb_update_itr(struct igb_q_vector *q_vector,
3965                            struct igb_ring_container *ring_container)
3966 {
3967         unsigned int packets = ring_container->total_packets;
3968         unsigned int bytes = ring_container->total_bytes;
3969         u8 itrval = ring_container->itr;
3970
3971         /* no packets, exit with status unchanged */
3972         if (packets == 0)
3973                 return;
3974
3975         switch (itrval) {
3976         case lowest_latency:
3977                 /* handle TSO and jumbo frames */
3978                 if (bytes/packets > 8000)
3979                         itrval = bulk_latency;
3980                 else if ((packets < 5) && (bytes > 512))
3981                         itrval = low_latency;
3982                 break;
3983         case low_latency:  /* 50 usec aka 20000 ints/s */
3984                 if (bytes > 10000) {
3985                         /* this if handles the TSO accounting */
3986                         if (bytes/packets > 8000) {
3987                                 itrval = bulk_latency;
3988                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3989                                 itrval = bulk_latency;
3990                         } else if ((packets > 35)) {
3991                                 itrval = lowest_latency;
3992                         }
3993                 } else if (bytes/packets > 2000) {
3994                         itrval = bulk_latency;
3995                 } else if (packets <= 2 && bytes < 512) {
3996                         itrval = lowest_latency;
3997                 }
3998                 break;
3999         case bulk_latency: /* 250 usec aka 4000 ints/s */
4000                 if (bytes > 25000) {
4001                         if (packets > 35)
4002                                 itrval = low_latency;
4003                 } else if (bytes < 1500) {
4004                         itrval = low_latency;
4005                 }
4006                 break;
4007         }
4008
4009         /* clear work counters since we have the values we need */
4010         ring_container->total_bytes = 0;
4011         ring_container->total_packets = 0;
4012
4013         /* write updated itr to ring container */
4014         ring_container->itr = itrval;
4015 }
4016
4017 static void igb_set_itr(struct igb_q_vector *q_vector)
4018 {
4019         struct igb_adapter *adapter = q_vector->adapter;
4020         u32 new_itr = q_vector->itr_val;
4021         u8 current_itr = 0;
4022
4023         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4024         if (adapter->link_speed != SPEED_1000) {
4025                 current_itr = 0;
4026                 new_itr = IGB_4K_ITR;
4027                 goto set_itr_now;
4028         }
4029
4030         igb_update_itr(q_vector, &q_vector->tx);
4031         igb_update_itr(q_vector, &q_vector->rx);
4032
4033         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4034
4035         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4036         if (current_itr == lowest_latency &&
4037             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4038              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4039                 current_itr = low_latency;
4040
4041         switch (current_itr) {
4042         /* counts and packets in update_itr are dependent on these numbers */
4043         case lowest_latency:
4044                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4045                 break;
4046         case low_latency:
4047                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4048                 break;
4049         case bulk_latency:
4050                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4051                 break;
4052         default:
4053                 break;
4054         }
4055
4056 set_itr_now:
4057         if (new_itr != q_vector->itr_val) {
4058                 /* this attempts to bias the interrupt rate towards Bulk
4059                  * by adding intermediate steps when interrupt rate is
4060                  * increasing */
4061                 new_itr = new_itr > q_vector->itr_val ?
4062                              max((new_itr * q_vector->itr_val) /
4063                                  (new_itr + (q_vector->itr_val >> 2)),
4064                                  new_itr) :
4065                              new_itr;
4066                 /* Don't write the value here; it resets the adapter's
4067                  * internal timer, and causes us to delay far longer than
4068                  * we should between interrupts.  Instead, we write the ITR
4069                  * value at the beginning of the next interrupt so the timing
4070                  * ends up being correct.
4071                  */
4072                 q_vector->itr_val = new_itr;
4073                 q_vector->set_itr = 1;
4074         }
4075 }
4076
4077 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4078                             u32 type_tucmd, u32 mss_l4len_idx)
4079 {
4080         struct e1000_adv_tx_context_desc *context_desc;
4081         u16 i = tx_ring->next_to_use;
4082
4083         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4084
4085         i++;
4086         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4087
4088         /* set bits to identify this as an advanced context descriptor */
4089         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4090
4091         /* For 82575, context index must be unique per ring. */
4092         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4093                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4094
4095         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
4096         context_desc->seqnum_seed       = 0;
4097         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
4098         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
4099 }
4100
4101 static int igb_tso(struct igb_ring *tx_ring,
4102                    struct igb_tx_buffer *first,
4103                    u8 *hdr_len)
4104 {
4105         struct sk_buff *skb = first->skb;
4106         u32 vlan_macip_lens, type_tucmd;
4107         u32 mss_l4len_idx, l4len;
4108
4109         if (!skb_is_gso(skb))
4110                 return 0;
4111
4112         if (skb_header_cloned(skb)) {
4113                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4114                 if (err)
4115                         return err;
4116         }
4117
4118         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4119         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4120
4121         if (first->protocol == __constant_htons(ETH_P_IP)) {
4122                 struct iphdr *iph = ip_hdr(skb);
4123                 iph->tot_len = 0;
4124                 iph->check = 0;
4125                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4126                                                          iph->daddr, 0,
4127                                                          IPPROTO_TCP,
4128                                                          0);
4129                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4130                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4131                                    IGB_TX_FLAGS_CSUM |
4132                                    IGB_TX_FLAGS_IPV4;
4133         } else if (skb_is_gso_v6(skb)) {
4134                 ipv6_hdr(skb)->payload_len = 0;
4135                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4136                                                        &ipv6_hdr(skb)->daddr,
4137                                                        0, IPPROTO_TCP, 0);
4138                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4139                                    IGB_TX_FLAGS_CSUM;
4140         }
4141
4142         /* compute header lengths */
4143         l4len = tcp_hdrlen(skb);
4144         *hdr_len = skb_transport_offset(skb) + l4len;
4145
4146         /* update gso size and bytecount with header size */
4147         first->gso_segs = skb_shinfo(skb)->gso_segs;
4148         first->bytecount += (first->gso_segs - 1) * *hdr_len;
4149
4150         /* MSS L4LEN IDX */
4151         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4152         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4153
4154         /* VLAN MACLEN IPLEN */
4155         vlan_macip_lens = skb_network_header_len(skb);
4156         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4157         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4158
4159         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4160
4161         return 1;
4162 }
4163
4164 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4165 {
4166         struct sk_buff *skb = first->skb;
4167         u32 vlan_macip_lens = 0;
4168         u32 mss_l4len_idx = 0;
4169         u32 type_tucmd = 0;
4170
4171         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4172                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4173                         return;
4174         } else {
4175                 u8 l4_hdr = 0;
4176                 switch (first->protocol) {
4177                 case __constant_htons(ETH_P_IP):
4178                         vlan_macip_lens |= skb_network_header_len(skb);
4179                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4180                         l4_hdr = ip_hdr(skb)->protocol;
4181                         break;
4182                 case __constant_htons(ETH_P_IPV6):
4183                         vlan_macip_lens |= skb_network_header_len(skb);
4184                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4185                         break;
4186                 default:
4187                         if (unlikely(net_ratelimit())) {
4188                                 dev_warn(tx_ring->dev,
4189                                  "partial checksum but proto=%x!\n",
4190                                  first->protocol);
4191                         }
4192                         break;
4193                 }
4194
4195                 switch (l4_hdr) {
4196                 case IPPROTO_TCP:
4197                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4198                         mss_l4len_idx = tcp_hdrlen(skb) <<
4199                                         E1000_ADVTXD_L4LEN_SHIFT;
4200                         break;
4201                 case IPPROTO_SCTP:
4202                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4203                         mss_l4len_idx = sizeof(struct sctphdr) <<
4204                                         E1000_ADVTXD_L4LEN_SHIFT;
4205                         break;
4206                 case IPPROTO_UDP:
4207                         mss_l4len_idx = sizeof(struct udphdr) <<
4208                                         E1000_ADVTXD_L4LEN_SHIFT;
4209                         break;
4210                 default:
4211                         if (unlikely(net_ratelimit())) {
4212                                 dev_warn(tx_ring->dev,
4213                                  "partial checksum but l4 proto=%x!\n",
4214                                  l4_hdr);
4215                         }
4216                         break;
4217                 }
4218
4219                 /* update TX checksum flag */
4220                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4221         }
4222
4223         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4224         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4225
4226         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4227 }
4228
4229 static __le32 igb_tx_cmd_type(u32 tx_flags)
4230 {
4231         /* set type for advanced descriptor with frame checksum insertion */
4232         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4233                                       E1000_ADVTXD_DCMD_IFCS |
4234                                       E1000_ADVTXD_DCMD_DEXT);
4235
4236         /* set HW vlan bit if vlan is present */
4237         if (tx_flags & IGB_TX_FLAGS_VLAN)
4238                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4239
4240 #ifdef CONFIG_IGB_PTP
4241         /* set timestamp bit if present */
4242         if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP))
4243                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4244 #endif /* CONFIG_IGB_PTP */
4245
4246         /* set segmentation bits for TSO */
4247         if (tx_flags & IGB_TX_FLAGS_TSO)
4248                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4249
4250         return cmd_type;
4251 }
4252
4253 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4254                                  union e1000_adv_tx_desc *tx_desc,
4255                                  u32 tx_flags, unsigned int paylen)
4256 {
4257         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4258
4259         /* 82575 requires a unique index per ring if any offload is enabled */
4260         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4261             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4262                 olinfo_status |= tx_ring->reg_idx << 4;
4263
4264         /* insert L4 checksum */
4265         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4266                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4267
4268                 /* insert IPv4 checksum */
4269                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4270                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4271         }
4272
4273         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4274 }
4275
4276 /*
4277  * The largest size we can write to the descriptor is 65535.  In order to
4278  * maintain a power of two alignment we have to limit ourselves to 32K.
4279  */
4280 #define IGB_MAX_TXD_PWR 15
4281 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4282
4283 static void igb_tx_map(struct igb_ring *tx_ring,
4284                        struct igb_tx_buffer *first,
4285                        const u8 hdr_len)
4286 {
4287         struct sk_buff *skb = first->skb;
4288         struct igb_tx_buffer *tx_buffer_info;
4289         union e1000_adv_tx_desc *tx_desc;
4290         dma_addr_t dma;
4291         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4292         unsigned int data_len = skb->data_len;
4293         unsigned int size = skb_headlen(skb);
4294         unsigned int paylen = skb->len - hdr_len;
4295         __le32 cmd_type;
4296         u32 tx_flags = first->tx_flags;
4297         u16 i = tx_ring->next_to_use;
4298
4299         tx_desc = IGB_TX_DESC(tx_ring, i);
4300
4301         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4302         cmd_type = igb_tx_cmd_type(tx_flags);
4303
4304         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4305         if (dma_mapping_error(tx_ring->dev, dma))
4306                 goto dma_error;
4307
4308         /* record length, and DMA address */
4309         first->length = size;
4310         first->dma = dma;
4311         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4312
4313         for (;;) {
4314                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4315                         tx_desc->read.cmd_type_len =
4316                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4317
4318                         i++;
4319                         tx_desc++;
4320                         if (i == tx_ring->count) {
4321                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4322                                 i = 0;
4323                         }
4324
4325                         dma += IGB_MAX_DATA_PER_TXD;
4326                         size -= IGB_MAX_DATA_PER_TXD;
4327
4328                         tx_desc->read.olinfo_status = 0;
4329                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4330                 }
4331
4332                 if (likely(!data_len))
4333                         break;
4334
4335                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4336
4337                 i++;
4338                 tx_desc++;
4339                 if (i == tx_ring->count) {
4340                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4341                         i = 0;
4342                 }
4343
4344                 size = skb_frag_size(frag);
4345                 data_len -= size;
4346
4347                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4348                                    size, DMA_TO_DEVICE);
4349                 if (dma_mapping_error(tx_ring->dev, dma))
4350                         goto dma_error;
4351
4352                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4353                 tx_buffer_info->length = size;
4354                 tx_buffer_info->dma = dma;
4355
4356                 tx_desc->read.olinfo_status = 0;
4357                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4358
4359                 frag++;
4360         }
4361
4362         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4363
4364         /* write last descriptor with RS and EOP bits */
4365         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4366         if (unlikely(skb->no_fcs))
4367                 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4368         tx_desc->read.cmd_type_len = cmd_type;
4369
4370         /* set the timestamp */
4371         first->time_stamp = jiffies;
4372
4373         /*
4374          * Force memory writes to complete before letting h/w know there
4375          * are new descriptors to fetch.  (Only applicable for weak-ordered
4376          * memory model archs, such as IA-64).
4377          *
4378          * We also need this memory barrier to make certain all of the
4379          * status bits have been updated before next_to_watch is written.
4380          */
4381         wmb();
4382
4383         /* set next_to_watch value indicating a packet is present */
4384         first->next_to_watch = tx_desc;
4385
4386         i++;
4387         if (i == tx_ring->count)
4388                 i = 0;
4389
4390         tx_ring->next_to_use = i;
4391
4392         writel(i, tx_ring->tail);
4393
4394         /* we need this if more than one processor can write to our tail
4395          * at a time, it syncronizes IO on IA64/Altix systems */
4396         mmiowb();
4397
4398         return;
4399
4400 dma_error:
4401         dev_err(tx_ring->dev, "TX DMA map failed\n");
4402
4403         /* clear dma mappings for failed tx_buffer_info map */
4404         for (;;) {
4405                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4406                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4407                 if (tx_buffer_info == first)
4408                         break;
4409                 if (i == 0)
4410                         i = tx_ring->count;
4411                 i--;
4412         }
4413
4414         tx_ring->next_to_use = i;
4415 }
4416
4417 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4418 {
4419         struct net_device *netdev = tx_ring->netdev;
4420
4421         netif_stop_subqueue(netdev, tx_ring->queue_index);
4422
4423         /* Herbert's original patch had:
4424          *  smp_mb__after_netif_stop_queue();
4425          * but since that doesn't exist yet, just open code it. */
4426         smp_mb();
4427
4428         /* We need to check again in a case another CPU has just
4429          * made room available. */
4430         if (igb_desc_unused(tx_ring) < size)
4431                 return -EBUSY;
4432
4433         /* A reprieve! */
4434         netif_wake_subqueue(netdev, tx_ring->queue_index);
4435
4436         u64_stats_update_begin(&tx_ring->tx_syncp2);
4437         tx_ring->tx_stats.restart_queue2++;
4438         u64_stats_update_end(&tx_ring->tx_syncp2);
4439
4440         return 0;
4441 }
4442
4443 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4444 {
4445         if (igb_desc_unused(tx_ring) >= size)
4446                 return 0;
4447         return __igb_maybe_stop_tx(tx_ring, size);
4448 }
4449
4450 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4451                                 struct igb_ring *tx_ring)
4452 {
4453 #ifdef CONFIG_IGB_PTP
4454         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
4455 #endif /* CONFIG_IGB_PTP */
4456         struct igb_tx_buffer *first;
4457         int tso;
4458         u32 tx_flags = 0;
4459         __be16 protocol = vlan_get_protocol(skb);
4460         u8 hdr_len = 0;
4461
4462         /* need: 1 descriptor per page,
4463          *       + 2 desc gap to keep tail from touching head,
4464          *       + 1 desc for skb->data,
4465          *       + 1 desc for context descriptor,
4466          * otherwise try next time */
4467         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4468                 /* this is a hard error */
4469                 return NETDEV_TX_BUSY;
4470         }
4471
4472         /* record the location of the first descriptor for this packet */
4473         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4474         first->skb = skb;
4475         first->bytecount = skb->len;
4476         first->gso_segs = 1;
4477
4478 #ifdef CONFIG_IGB_PTP
4479         if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
4480                      !(adapter->ptp_tx_skb))) {
4481                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4482                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4483
4484                 adapter->ptp_tx_skb = skb_get(skb);
4485                 if (adapter->hw.mac.type == e1000_82576)
4486                         schedule_work(&adapter->ptp_tx_work);
4487         }
4488 #endif /* CONFIG_IGB_PTP */
4489
4490         if (vlan_tx_tag_present(skb)) {
4491                 tx_flags |= IGB_TX_FLAGS_VLAN;
4492                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4493         }
4494
4495         /* record initial flags and protocol */
4496         first->tx_flags = tx_flags;
4497         first->protocol = protocol;
4498
4499         tso = igb_tso(tx_ring, first, &hdr_len);
4500         if (tso < 0)
4501                 goto out_drop;
4502         else if (!tso)
4503                 igb_tx_csum(tx_ring, first);
4504
4505         igb_tx_map(tx_ring, first, hdr_len);
4506
4507         /* Make sure there is space in the ring for the next send. */
4508         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4509
4510         return NETDEV_TX_OK;
4511
4512 out_drop:
4513         igb_unmap_and_free_tx_resource(tx_ring, first);
4514
4515         return NETDEV_TX_OK;
4516 }
4517
4518 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4519                                                     struct sk_buff *skb)
4520 {
4521         unsigned int r_idx = skb->queue_mapping;
4522
4523         if (r_idx >= adapter->num_tx_queues)
4524                 r_idx = r_idx % adapter->num_tx_queues;
4525
4526         return adapter->tx_ring[r_idx];
4527 }
4528
4529 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4530                                   struct net_device *netdev)
4531 {
4532         struct igb_adapter *adapter = netdev_priv(netdev);
4533
4534         if (test_bit(__IGB_DOWN, &adapter->state)) {
4535                 dev_kfree_skb_any(skb);
4536                 return NETDEV_TX_OK;
4537         }
4538
4539         if (skb->len <= 0) {
4540                 dev_kfree_skb_any(skb);
4541                 return NETDEV_TX_OK;
4542         }
4543
4544         /*
4545          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4546          * in order to meet this minimum size requirement.
4547          */
4548         if (skb->len < 17) {
4549                 if (skb_padto(skb, 17))
4550                         return NETDEV_TX_OK;
4551                 skb->len = 17;
4552         }
4553
4554         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4555 }
4556
4557 /**
4558  * igb_tx_timeout - Respond to a Tx Hang
4559  * @netdev: network interface device structure
4560  **/
4561 static void igb_tx_timeout(struct net_device *netdev)
4562 {
4563         struct igb_adapter *adapter = netdev_priv(netdev);
4564         struct e1000_hw *hw = &adapter->hw;
4565
4566         /* Do the reset outside of interrupt context */
4567         adapter->tx_timeout_count++;
4568
4569         if (hw->mac.type >= e1000_82580)
4570                 hw->dev_spec._82575.global_device_reset = true;
4571
4572         schedule_work(&adapter->reset_task);
4573         wr32(E1000_EICS,
4574              (adapter->eims_enable_mask & ~adapter->eims_other));
4575 }
4576
4577 static void igb_reset_task(struct work_struct *work)
4578 {
4579         struct igb_adapter *adapter;
4580         adapter = container_of(work, struct igb_adapter, reset_task);
4581
4582         igb_dump(adapter);
4583         netdev_err(adapter->netdev, "Reset adapter\n");
4584         igb_reinit_locked(adapter);
4585 }
4586
4587 /**
4588  * igb_get_stats64 - Get System Network Statistics
4589  * @netdev: network interface device structure
4590  * @stats: rtnl_link_stats64 pointer
4591  *
4592  **/
4593 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4594                                                  struct rtnl_link_stats64 *stats)
4595 {
4596         struct igb_adapter *adapter = netdev_priv(netdev);
4597
4598         spin_lock(&adapter->stats64_lock);
4599         igb_update_stats(adapter, &adapter->stats64);
4600         memcpy(stats, &adapter->stats64, sizeof(*stats));
4601         spin_unlock(&adapter->stats64_lock);
4602
4603         return stats;
4604 }
4605
4606 /**
4607  * igb_change_mtu - Change the Maximum Transfer Unit
4608  * @netdev: network interface device structure
4609  * @new_mtu: new value for maximum frame size
4610  *
4611  * Returns 0 on success, negative on failure
4612  **/
4613 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4614 {
4615         struct igb_adapter *adapter = netdev_priv(netdev);
4616         struct pci_dev *pdev = adapter->pdev;
4617         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4618
4619         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4620                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4621                 return -EINVAL;
4622         }
4623
4624 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4625         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4626                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4627                 return -EINVAL;
4628         }
4629
4630         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4631                 msleep(1);
4632
4633         /* igb_down has a dependency on max_frame_size */
4634         adapter->max_frame_size = max_frame;
4635
4636         if (netif_running(netdev))
4637                 igb_down(adapter);
4638
4639         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4640                  netdev->mtu, new_mtu);
4641         netdev->mtu = new_mtu;
4642
4643         if (netif_running(netdev))
4644                 igb_up(adapter);
4645         else
4646                 igb_reset(adapter);
4647
4648         clear_bit(__IGB_RESETTING, &adapter->state);
4649
4650         return 0;
4651 }
4652
4653 /**
4654  * igb_update_stats - Update the board statistics counters
4655  * @adapter: board private structure
4656  **/
4657
4658 void igb_update_stats(struct igb_adapter *adapter,
4659                       struct rtnl_link_stats64 *net_stats)
4660 {
4661         struct e1000_hw *hw = &adapter->hw;
4662         struct pci_dev *pdev = adapter->pdev;
4663         u32 reg, mpc;
4664         u16 phy_tmp;
4665         int i;
4666         u64 bytes, packets;
4667         unsigned int start;
4668         u64 _bytes, _packets;
4669
4670 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4671
4672         /*
4673          * Prevent stats update while adapter is being reset, or if the pci
4674          * connection is down.
4675          */
4676         if (adapter->link_speed == 0)
4677                 return;
4678         if (pci_channel_offline(pdev))
4679                 return;
4680
4681         bytes = 0;
4682         packets = 0;
4683         for (i = 0; i < adapter->num_rx_queues; i++) {
4684                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4685                 struct igb_ring *ring = adapter->rx_ring[i];
4686
4687                 ring->rx_stats.drops += rqdpc_tmp;
4688                 net_stats->rx_fifo_errors += rqdpc_tmp;
4689
4690                 do {
4691                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4692                         _bytes = ring->rx_stats.bytes;
4693                         _packets = ring->rx_stats.packets;
4694                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4695                 bytes += _bytes;
4696                 packets += _packets;
4697         }
4698
4699         net_stats->rx_bytes = bytes;
4700         net_stats->rx_packets = packets;
4701
4702         bytes = 0;
4703         packets = 0;
4704         for (i = 0; i < adapter->num_tx_queues; i++) {
4705                 struct igb_ring *ring = adapter->tx_ring[i];
4706                 do {
4707                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4708                         _bytes = ring->tx_stats.bytes;
4709                         _packets = ring->tx_stats.packets;
4710                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4711                 bytes += _bytes;
4712                 packets += _packets;
4713         }
4714         net_stats->tx_bytes = bytes;
4715         net_stats->tx_packets = packets;
4716
4717         /* read stats registers */
4718         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4719         adapter->stats.gprc += rd32(E1000_GPRC);
4720         adapter->stats.gorc += rd32(E1000_GORCL);
4721         rd32(E1000_GORCH); /* clear GORCL */
4722         adapter->stats.bprc += rd32(E1000_BPRC);
4723         adapter->stats.mprc += rd32(E1000_MPRC);
4724         adapter->stats.roc += rd32(E1000_ROC);
4725
4726         adapter->stats.prc64 += rd32(E1000_PRC64);
4727         adapter->stats.prc127 += rd32(E1000_PRC127);
4728         adapter->stats.prc255 += rd32(E1000_PRC255);
4729         adapter->stats.prc511 += rd32(E1000_PRC511);
4730         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4731         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4732         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4733         adapter->stats.sec += rd32(E1000_SEC);
4734
4735         mpc = rd32(E1000_MPC);
4736         adapter->stats.mpc += mpc;
4737         net_stats->rx_fifo_errors += mpc;
4738         adapter->stats.scc += rd32(E1000_SCC);
4739         adapter->stats.ecol += rd32(E1000_ECOL);
4740         adapter->stats.mcc += rd32(E1000_MCC);
4741         adapter->stats.latecol += rd32(E1000_LATECOL);
4742         adapter->stats.dc += rd32(E1000_DC);
4743         adapter->stats.rlec += rd32(E1000_RLEC);
4744         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4745         adapter->stats.xontxc += rd32(E1000_XONTXC);
4746         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4747         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4748         adapter->stats.fcruc += rd32(E1000_FCRUC);
4749         adapter->stats.gptc += rd32(E1000_GPTC);
4750         adapter->stats.gotc += rd32(E1000_GOTCL);
4751         rd32(E1000_GOTCH); /* clear GOTCL */
4752         adapter->stats.rnbc += rd32(E1000_RNBC);
4753         adapter->stats.ruc += rd32(E1000_RUC);
4754         adapter->stats.rfc += rd32(E1000_RFC);
4755         adapter->stats.rjc += rd32(E1000_RJC);
4756         adapter->stats.tor += rd32(E1000_TORH);
4757         adapter->stats.tot += rd32(E1000_TOTH);
4758         adapter->stats.tpr += rd32(E1000_TPR);
4759
4760         adapter->stats.ptc64 += rd32(E1000_PTC64);
4761         adapter->stats.ptc127 += rd32(E1000_PTC127);
4762         adapter->stats.ptc255 += rd32(E1000_PTC255);
4763         adapter->stats.ptc511 += rd32(E1000_PTC511);
4764         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4765         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4766
4767         adapter->stats.mptc += rd32(E1000_MPTC);
4768         adapter->stats.bptc += rd32(E1000_BPTC);
4769
4770         adapter->stats.tpt += rd32(E1000_TPT);
4771         adapter->stats.colc += rd32(E1000_COLC);
4772
4773         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4774         /* read internal phy specific stats */
4775         reg = rd32(E1000_CTRL_EXT);
4776         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4777                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4778                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4779         }
4780
4781         adapter->stats.tsctc += rd32(E1000_TSCTC);
4782         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4783
4784         adapter->stats.iac += rd32(E1000_IAC);
4785         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4786         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4787         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4788         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4789         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4790         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4791         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4792         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4793
4794         /* Fill out the OS statistics structure */
4795         net_stats->multicast = adapter->stats.mprc;
4796         net_stats->collisions = adapter->stats.colc;
4797
4798         /* Rx Errors */
4799
4800         /* RLEC on some newer hardware can be incorrect so build
4801          * our own version based on RUC and ROC */
4802         net_stats->rx_errors = adapter->stats.rxerrc +
4803                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4804                 adapter->stats.ruc + adapter->stats.roc +
4805                 adapter->stats.cexterr;
4806         net_stats->rx_length_errors = adapter->stats.ruc +
4807                                       adapter->stats.roc;
4808         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4809         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4810         net_stats->rx_missed_errors = adapter->stats.mpc;
4811
4812         /* Tx Errors */
4813         net_stats->tx_errors = adapter->stats.ecol +
4814                                adapter->stats.latecol;
4815         net_stats->tx_aborted_errors = adapter->stats.ecol;
4816         net_stats->tx_window_errors = adapter->stats.latecol;
4817         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4818
4819         /* Tx Dropped needs to be maintained elsewhere */
4820
4821         /* Phy Stats */
4822         if (hw->phy.media_type == e1000_media_type_copper) {
4823                 if ((adapter->link_speed == SPEED_1000) &&
4824                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4825                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4826                         adapter->phy_stats.idle_errors += phy_tmp;
4827                 }
4828         }
4829
4830         /* Management Stats */
4831         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4832         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4833         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4834
4835         /* OS2BMC Stats */
4836         reg = rd32(E1000_MANC);
4837         if (reg & E1000_MANC_EN_BMC2OS) {
4838                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4839                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4840                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4841                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4842         }
4843 }
4844
4845 static irqreturn_t igb_msix_other(int irq, void *data)
4846 {
4847         struct igb_adapter *adapter = data;
4848         struct e1000_hw *hw = &adapter->hw;
4849         u32 icr = rd32(E1000_ICR);
4850         /* reading ICR causes bit 31 of EICR to be cleared */
4851
4852         if (icr & E1000_ICR_DRSTA)
4853                 schedule_work(&adapter->reset_task);
4854
4855         if (icr & E1000_ICR_DOUTSYNC) {
4856                 /* HW is reporting DMA is out of sync */
4857                 adapter->stats.doosync++;
4858                 /* The DMA Out of Sync is also indication of a spoof event
4859                  * in IOV mode. Check the Wrong VM Behavior register to
4860                  * see if it is really a spoof event. */
4861                 igb_check_wvbr(adapter);
4862         }
4863
4864         /* Check for a mailbox event */
4865         if (icr & E1000_ICR_VMMB)
4866                 igb_msg_task(adapter);
4867
4868         if (icr & E1000_ICR_LSC) {
4869                 hw->mac.get_link_status = 1;
4870                 /* guard against interrupt when we're going down */
4871                 if (!test_bit(__IGB_DOWN, &adapter->state))
4872                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4873         }
4874
4875 #ifdef CONFIG_IGB_PTP
4876         if (icr & E1000_ICR_TS) {
4877                 u32 tsicr = rd32(E1000_TSICR);
4878
4879                 if (tsicr & E1000_TSICR_TXTS) {
4880                         /* acknowledge the interrupt */
4881                         wr32(E1000_TSICR, E1000_TSICR_TXTS);
4882                         /* retrieve hardware timestamp */
4883                         schedule_work(&adapter->ptp_tx_work);
4884                 }
4885         }
4886 #endif /* CONFIG_IGB_PTP */
4887
4888         wr32(E1000_EIMS, adapter->eims_other);
4889
4890         return IRQ_HANDLED;
4891 }
4892
4893 static void igb_write_itr(struct igb_q_vector *q_vector)
4894 {
4895         struct igb_adapter *adapter = q_vector->adapter;
4896         u32 itr_val = q_vector->itr_val & 0x7FFC;
4897
4898         if (!q_vector->set_itr)
4899                 return;
4900
4901         if (!itr_val)
4902                 itr_val = 0x4;
4903
4904         if (adapter->hw.mac.type == e1000_82575)
4905                 itr_val |= itr_val << 16;
4906         else
4907                 itr_val |= E1000_EITR_CNT_IGNR;
4908
4909         writel(itr_val, q_vector->itr_register);
4910         q_vector->set_itr = 0;
4911 }
4912
4913 static irqreturn_t igb_msix_ring(int irq, void *data)
4914 {
4915         struct igb_q_vector *q_vector = data;
4916
4917         /* Write the ITR value calculated from the previous interrupt. */
4918         igb_write_itr(q_vector);
4919
4920         napi_schedule(&q_vector->napi);
4921
4922         return IRQ_HANDLED;
4923 }
4924
4925 #ifdef CONFIG_IGB_DCA
4926 static void igb_update_dca(struct igb_q_vector *q_vector)
4927 {
4928         struct igb_adapter *adapter = q_vector->adapter;
4929         struct e1000_hw *hw = &adapter->hw;
4930         int cpu = get_cpu();
4931
4932         if (q_vector->cpu == cpu)
4933                 goto out_no_update;
4934
4935         if (q_vector->tx.ring) {
4936                 int q = q_vector->tx.ring->reg_idx;
4937                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4938                 if (hw->mac.type == e1000_82575) {
4939                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4940                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4941                 } else {
4942                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4943                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4944                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4945                 }
4946                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4947                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4948         }
4949         if (q_vector->rx.ring) {
4950                 int q = q_vector->rx.ring->reg_idx;
4951                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4952                 if (hw->mac.type == e1000_82575) {
4953                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4954                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4955                 } else {
4956                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4957                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4958                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4959                 }
4960                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4961                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4962                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4963                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4964         }
4965         q_vector->cpu = cpu;
4966 out_no_update:
4967         put_cpu();
4968 }
4969
4970 static void igb_setup_dca(struct igb_adapter *adapter)
4971 {
4972         struct e1000_hw *hw = &adapter->hw;
4973         int i;
4974
4975         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4976                 return;
4977
4978         /* Always use CB2 mode, difference is masked in the CB driver. */
4979         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4980
4981         for (i = 0; i < adapter->num_q_vectors; i++) {
4982                 adapter->q_vector[i]->cpu = -1;
4983                 igb_update_dca(adapter->q_vector[i]);
4984         }
4985 }
4986
4987 static int __igb_notify_dca(struct device *dev, void *data)
4988 {
4989         struct net_device *netdev = dev_get_drvdata(dev);
4990         struct igb_adapter *adapter = netdev_priv(netdev);
4991         struct pci_dev *pdev = adapter->pdev;
4992         struct e1000_hw *hw = &adapter->hw;
4993         unsigned long event = *(unsigned long *)data;
4994
4995         switch (event) {
4996         case DCA_PROVIDER_ADD:
4997                 /* if already enabled, don't do it again */
4998                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4999                         break;
5000                 if (dca_add_requester(dev) == 0) {
5001                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
5002                         dev_info(&pdev->dev, "DCA enabled\n");
5003                         igb_setup_dca(adapter);
5004                         break;
5005                 }
5006                 /* Fall Through since DCA is disabled. */
5007         case DCA_PROVIDER_REMOVE:
5008                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
5009                         /* without this a class_device is left
5010                          * hanging around in the sysfs model */
5011                         dca_remove_requester(dev);
5012                         dev_info(&pdev->dev, "DCA disabled\n");
5013                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
5014                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
5015                 }
5016                 break;
5017         }
5018
5019         return 0;
5020 }
5021
5022 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
5023                           void *p)
5024 {
5025         int ret_val;
5026
5027         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
5028                                          __igb_notify_dca);
5029
5030         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
5031 }
5032 #endif /* CONFIG_IGB_DCA */
5033
5034 #ifdef CONFIG_PCI_IOV
5035 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5036 {
5037         unsigned char mac_addr[ETH_ALEN];
5038         struct pci_dev *pdev = adapter->pdev;
5039         struct e1000_hw *hw = &adapter->hw;
5040         struct pci_dev *pvfdev;
5041         unsigned int device_id;
5042         u16 thisvf_devfn;
5043
5044         eth_random_addr(mac_addr);
5045         igb_set_vf_mac(adapter, vf, mac_addr);
5046
5047         switch (adapter->hw.mac.type) {
5048         case e1000_82576:
5049                 device_id = IGB_82576_VF_DEV_ID;
5050                 /* VF Stride for 82576 is 2 */
5051                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
5052                         (pdev->devfn & 1);
5053                 break;
5054         case e1000_i350:
5055                 device_id = IGB_I350_VF_DEV_ID;
5056                 /* VF Stride for I350 is 4 */
5057                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5058                                 (pdev->devfn & 3);
5059                 break;
5060         default:
5061                 device_id = 0;
5062                 thisvf_devfn = 0;
5063                 break;
5064         }
5065
5066         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5067         while (pvfdev) {
5068                 if (pvfdev->devfn == thisvf_devfn)
5069                         break;
5070                 pvfdev = pci_get_device(hw->vendor_id,
5071                                         device_id, pvfdev);
5072         }
5073
5074         if (pvfdev)
5075                 adapter->vf_data[vf].vfdev = pvfdev;
5076         else
5077                 dev_err(&pdev->dev,
5078                         "Couldn't find pci dev ptr for VF %4.4x\n",
5079                         thisvf_devfn);
5080         return pvfdev != NULL;
5081 }
5082
5083 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5084 {
5085         struct e1000_hw *hw = &adapter->hw;
5086         struct pci_dev *pdev = adapter->pdev;
5087         struct pci_dev *pvfdev;
5088         u16 vf_devfn = 0;
5089         u16 vf_stride;
5090         unsigned int device_id;
5091         int vfs_found = 0;
5092
5093         switch (adapter->hw.mac.type) {
5094         case e1000_82576:
5095                 device_id = IGB_82576_VF_DEV_ID;
5096                 /* VF Stride for 82576 is 2 */
5097                 vf_stride = 2;
5098                 break;
5099         case e1000_i350:
5100                 device_id = IGB_I350_VF_DEV_ID;
5101                 /* VF Stride for I350 is 4 */
5102                 vf_stride = 4;
5103                 break;
5104         default:
5105                 device_id = 0;
5106                 vf_stride = 0;
5107                 break;
5108         }
5109
5110         vf_devfn = pdev->devfn + 0x80;
5111         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5112         while (pvfdev) {
5113                 if (pvfdev->devfn == vf_devfn &&
5114                     (pvfdev->bus->number >= pdev->bus->number))
5115                         vfs_found++;
5116                 vf_devfn += vf_stride;
5117                 pvfdev = pci_get_device(hw->vendor_id,
5118                                         device_id, pvfdev);
5119         }
5120
5121         return vfs_found;
5122 }
5123
5124 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5125 {
5126         int i;
5127         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5128                 if (adapter->vf_data[i].vfdev) {
5129                         if (adapter->vf_data[i].vfdev->dev_flags &
5130                             PCI_DEV_FLAGS_ASSIGNED)
5131                                 return true;
5132                 }
5133         }
5134         return false;
5135 }
5136
5137 #endif
5138 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5139 {
5140         struct e1000_hw *hw = &adapter->hw;
5141         u32 ping;
5142         int i;
5143
5144         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5145                 ping = E1000_PF_CONTROL_MSG;
5146                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5147                         ping |= E1000_VT_MSGTYPE_CTS;
5148                 igb_write_mbx(hw, &ping, 1, i);
5149         }
5150 }
5151
5152 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5153 {
5154         struct e1000_hw *hw = &adapter->hw;
5155         u32 vmolr = rd32(E1000_VMOLR(vf));
5156         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5157
5158         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5159                             IGB_VF_FLAG_MULTI_PROMISC);
5160         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5161
5162         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5163                 vmolr |= E1000_VMOLR_MPME;
5164                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5165                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5166         } else {
5167                 /*
5168                  * if we have hashes and we are clearing a multicast promisc
5169                  * flag we need to write the hashes to the MTA as this step
5170                  * was previously skipped
5171                  */
5172                 if (vf_data->num_vf_mc_hashes > 30) {
5173                         vmolr |= E1000_VMOLR_MPME;
5174                 } else if (vf_data->num_vf_mc_hashes) {
5175                         int j;
5176                         vmolr |= E1000_VMOLR_ROMPE;
5177                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5178                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5179                 }
5180         }
5181
5182         wr32(E1000_VMOLR(vf), vmolr);
5183
5184         /* there are flags left unprocessed, likely not supported */
5185         if (*msgbuf & E1000_VT_MSGINFO_MASK)
5186                 return -EINVAL;
5187
5188         return 0;
5189
5190 }
5191
5192 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5193                                   u32 *msgbuf, u32 vf)
5194 {
5195         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5196         u16 *hash_list = (u16 *)&msgbuf[1];
5197         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5198         int i;
5199
5200         /* salt away the number of multicast addresses assigned
5201          * to this VF for later use to restore when the PF multi cast
5202          * list changes
5203          */
5204         vf_data->num_vf_mc_hashes = n;
5205
5206         /* only up to 30 hash values supported */
5207         if (n > 30)
5208                 n = 30;
5209
5210         /* store the hashes for later use */
5211         for (i = 0; i < n; i++)
5212                 vf_data->vf_mc_hashes[i] = hash_list[i];
5213
5214         /* Flush and reset the mta with the new values */
5215         igb_set_rx_mode(adapter->netdev);
5216
5217         return 0;
5218 }
5219
5220 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5221 {
5222         struct e1000_hw *hw = &adapter->hw;
5223         struct vf_data_storage *vf_data;
5224         int i, j;
5225
5226         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5227                 u32 vmolr = rd32(E1000_VMOLR(i));
5228                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5229
5230                 vf_data = &adapter->vf_data[i];
5231
5232                 if ((vf_data->num_vf_mc_hashes > 30) ||
5233                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5234                         vmolr |= E1000_VMOLR_MPME;
5235                 } else if (vf_data->num_vf_mc_hashes) {
5236                         vmolr |= E1000_VMOLR_ROMPE;
5237                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5238                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5239                 }
5240                 wr32(E1000_VMOLR(i), vmolr);
5241         }
5242 }
5243
5244 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5245 {
5246         struct e1000_hw *hw = &adapter->hw;
5247         u32 pool_mask, reg, vid;
5248         int i;
5249
5250         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5251
5252         /* Find the vlan filter for this id */
5253         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5254                 reg = rd32(E1000_VLVF(i));
5255
5256                 /* remove the vf from the pool */
5257                 reg &= ~pool_mask;
5258
5259                 /* if pool is empty then remove entry from vfta */
5260                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5261                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5262                         reg = 0;
5263                         vid = reg & E1000_VLVF_VLANID_MASK;
5264                         igb_vfta_set(hw, vid, false);
5265                 }
5266
5267                 wr32(E1000_VLVF(i), reg);
5268         }
5269
5270         adapter->vf_data[vf].vlans_enabled = 0;
5271 }
5272
5273 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5274 {
5275         struct e1000_hw *hw = &adapter->hw;
5276         u32 reg, i;
5277
5278         /* The vlvf table only exists on 82576 hardware and newer */
5279         if (hw->mac.type < e1000_82576)
5280                 return -1;
5281
5282         /* we only need to do this if VMDq is enabled */
5283         if (!adapter->vfs_allocated_count)
5284                 return -1;
5285
5286         /* Find the vlan filter for this id */
5287         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5288                 reg = rd32(E1000_VLVF(i));
5289                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5290                     vid == (reg & E1000_VLVF_VLANID_MASK))
5291                         break;
5292         }
5293
5294         if (add) {
5295                 if (i == E1000_VLVF_ARRAY_SIZE) {
5296                         /* Did not find a matching VLAN ID entry that was
5297                          * enabled.  Search for a free filter entry, i.e.
5298                          * one without the enable bit set
5299                          */
5300                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5301                                 reg = rd32(E1000_VLVF(i));
5302                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5303                                         break;
5304                         }
5305                 }
5306                 if (i < E1000_VLVF_ARRAY_SIZE) {
5307                         /* Found an enabled/available entry */
5308                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5309
5310                         /* if !enabled we need to set this up in vfta */
5311                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5312                                 /* add VID to filter table */
5313                                 igb_vfta_set(hw, vid, true);
5314                                 reg |= E1000_VLVF_VLANID_ENABLE;
5315                         }
5316                         reg &= ~E1000_VLVF_VLANID_MASK;
5317                         reg |= vid;
5318                         wr32(E1000_VLVF(i), reg);
5319
5320                         /* do not modify RLPML for PF devices */
5321                         if (vf >= adapter->vfs_allocated_count)
5322                                 return 0;
5323
5324                         if (!adapter->vf_data[vf].vlans_enabled) {
5325                                 u32 size;
5326                                 reg = rd32(E1000_VMOLR(vf));
5327                                 size = reg & E1000_VMOLR_RLPML_MASK;
5328                                 size += 4;
5329                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5330                                 reg |= size;
5331                                 wr32(E1000_VMOLR(vf), reg);
5332                         }
5333
5334                         adapter->vf_data[vf].vlans_enabled++;
5335                 }
5336         } else {
5337                 if (i < E1000_VLVF_ARRAY_SIZE) {
5338                         /* remove vf from the pool */
5339                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5340                         /* if pool is empty then remove entry from vfta */
5341                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5342                                 reg = 0;
5343                                 igb_vfta_set(hw, vid, false);
5344                         }
5345                         wr32(E1000_VLVF(i), reg);
5346
5347                         /* do not modify RLPML for PF devices */
5348                         if (vf >= adapter->vfs_allocated_count)
5349                                 return 0;
5350
5351                         adapter->vf_data[vf].vlans_enabled--;
5352                         if (!adapter->vf_data[vf].vlans_enabled) {
5353                                 u32 size;
5354                                 reg = rd32(E1000_VMOLR(vf));
5355                                 size = reg & E1000_VMOLR_RLPML_MASK;
5356                                 size -= 4;
5357                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5358                                 reg |= size;
5359                                 wr32(E1000_VMOLR(vf), reg);
5360                         }
5361                 }
5362         }
5363         return 0;
5364 }
5365
5366 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5367 {
5368         struct e1000_hw *hw = &adapter->hw;
5369
5370         if (vid)
5371                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5372         else
5373                 wr32(E1000_VMVIR(vf), 0);
5374 }
5375
5376 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5377                                int vf, u16 vlan, u8 qos)
5378 {
5379         int err = 0;
5380         struct igb_adapter *adapter = netdev_priv(netdev);
5381
5382         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5383                 return -EINVAL;
5384         if (vlan || qos) {
5385                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5386                 if (err)
5387                         goto out;
5388                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5389                 igb_set_vmolr(adapter, vf, !vlan);
5390                 adapter->vf_data[vf].pf_vlan = vlan;
5391                 adapter->vf_data[vf].pf_qos = qos;
5392                 dev_info(&adapter->pdev->dev,
5393                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5394                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5395                         dev_warn(&adapter->pdev->dev,
5396                                  "The VF VLAN has been set,"
5397                                  " but the PF device is not up.\n");
5398                         dev_warn(&adapter->pdev->dev,
5399                                  "Bring the PF device up before"
5400                                  " attempting to use the VF device.\n");
5401                 }
5402         } else {
5403                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5404                                    false, vf);
5405                 igb_set_vmvir(adapter, vlan, vf);
5406                 igb_set_vmolr(adapter, vf, true);
5407                 adapter->vf_data[vf].pf_vlan = 0;
5408                 adapter->vf_data[vf].pf_qos = 0;
5409        }
5410 out:
5411        return err;
5412 }
5413
5414 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5415 {
5416         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5417         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5418
5419         return igb_vlvf_set(adapter, vid, add, vf);
5420 }
5421
5422 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5423 {
5424         /* clear flags - except flag that indicates PF has set the MAC */
5425         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5426         adapter->vf_data[vf].last_nack = jiffies;
5427
5428         /* reset offloads to defaults */
5429         igb_set_vmolr(adapter, vf, true);
5430
5431         /* reset vlans for device */
5432         igb_clear_vf_vfta(adapter, vf);
5433         if (adapter->vf_data[vf].pf_vlan)
5434                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5435                                     adapter->vf_data[vf].pf_vlan,
5436                                     adapter->vf_data[vf].pf_qos);
5437         else
5438                 igb_clear_vf_vfta(adapter, vf);
5439
5440         /* reset multicast table array for vf */
5441         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5442
5443         /* Flush and reset the mta with the new values */
5444         igb_set_rx_mode(adapter->netdev);
5445 }
5446
5447 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5448 {
5449         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5450
5451         /* generate a new mac address as we were hotplug removed/added */
5452         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5453                 eth_random_addr(vf_mac);
5454
5455         /* process remaining reset events */
5456         igb_vf_reset(adapter, vf);
5457 }
5458
5459 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5460 {
5461         struct e1000_hw *hw = &adapter->hw;
5462         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5463         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5464         u32 reg, msgbuf[3];
5465         u8 *addr = (u8 *)(&msgbuf[1]);
5466
5467         /* process all the same items cleared in a function level reset */
5468         igb_vf_reset(adapter, vf);
5469
5470         /* set vf mac address */
5471         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5472
5473         /* enable transmit and receive for vf */
5474         reg = rd32(E1000_VFTE);
5475         wr32(E1000_VFTE, reg | (1 << vf));
5476         reg = rd32(E1000_VFRE);
5477         wr32(E1000_VFRE, reg | (1 << vf));
5478
5479         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5480
5481         /* reply to reset with ack and vf mac address */
5482         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5483         memcpy(addr, vf_mac, 6);
5484         igb_write_mbx(hw, msgbuf, 3, vf);
5485 }
5486
5487 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5488 {
5489         /*
5490          * The VF MAC Address is stored in a packed array of bytes
5491          * starting at the second 32 bit word of the msg array
5492          */
5493         unsigned char *addr = (char *)&msg[1];
5494         int err = -1;
5495
5496         if (is_valid_ether_addr(addr))
5497                 err = igb_set_vf_mac(adapter, vf, addr);
5498
5499         return err;
5500 }
5501
5502 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5503 {
5504         struct e1000_hw *hw = &adapter->hw;
5505         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5506         u32 msg = E1000_VT_MSGTYPE_NACK;
5507
5508         /* if device isn't clear to send it shouldn't be reading either */
5509         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5510             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5511                 igb_write_mbx(hw, &msg, 1, vf);
5512                 vf_data->last_nack = jiffies;
5513         }
5514 }
5515
5516 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5517 {
5518         struct pci_dev *pdev = adapter->pdev;
5519         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5520         struct e1000_hw *hw = &adapter->hw;
5521         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5522         s32 retval;
5523
5524         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5525
5526         if (retval) {
5527                 /* if receive failed revoke VF CTS stats and restart init */
5528                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5529                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5530                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5531                         return;
5532                 goto out;
5533         }
5534
5535         /* this is a message we already processed, do nothing */
5536         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5537                 return;
5538
5539         /*
5540          * until the vf completes a reset it should not be
5541          * allowed to start any configuration.
5542          */
5543
5544         if (msgbuf[0] == E1000_VF_RESET) {
5545                 igb_vf_reset_msg(adapter, vf);
5546                 return;
5547         }
5548
5549         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5550                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5551                         return;
5552                 retval = -1;
5553                 goto out;
5554         }
5555
5556         switch ((msgbuf[0] & 0xFFFF)) {
5557         case E1000_VF_SET_MAC_ADDR:
5558                 retval = -EINVAL;
5559                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5560                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5561                 else
5562                         dev_warn(&pdev->dev,
5563                                  "VF %d attempted to override administratively "
5564                                  "set MAC address\nReload the VF driver to "
5565                                  "resume operations\n", vf);
5566                 break;
5567         case E1000_VF_SET_PROMISC:
5568                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5569                 break;
5570         case E1000_VF_SET_MULTICAST:
5571                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5572                 break;
5573         case E1000_VF_SET_LPE:
5574                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5575                 break;
5576         case E1000_VF_SET_VLAN:
5577                 retval = -1;
5578                 if (vf_data->pf_vlan)
5579                         dev_warn(&pdev->dev,
5580                                  "VF %d attempted to override administratively "
5581                                  "set VLAN tag\nReload the VF driver to "
5582                                  "resume operations\n", vf);
5583                 else
5584                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5585                 break;
5586         default:
5587                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5588                 retval = -1;
5589                 break;
5590         }
5591
5592         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5593 out:
5594         /* notify the VF of the results of what it sent us */
5595         if (retval)
5596                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5597         else
5598                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5599
5600         igb_write_mbx(hw, msgbuf, 1, vf);
5601 }
5602
5603 static void igb_msg_task(struct igb_adapter *adapter)
5604 {
5605         struct e1000_hw *hw = &adapter->hw;
5606         u32 vf;
5607
5608         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5609                 /* process any reset requests */
5610                 if (!igb_check_for_rst(hw, vf))
5611                         igb_vf_reset_event(adapter, vf);
5612
5613                 /* process any messages pending */
5614                 if (!igb_check_for_msg(hw, vf))
5615                         igb_rcv_msg_from_vf(adapter, vf);
5616
5617                 /* process any acks */
5618                 if (!igb_check_for_ack(hw, vf))
5619                         igb_rcv_ack_from_vf(adapter, vf);
5620         }
5621 }
5622
5623 /**
5624  *  igb_set_uta - Set unicast filter table address
5625  *  @adapter: board private structure
5626  *
5627  *  The unicast table address is a register array of 32-bit registers.
5628  *  The table is meant to be used in a way similar to how the MTA is used
5629  *  however due to certain limitations in the hardware it is necessary to
5630  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5631  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5632  **/
5633 static void igb_set_uta(struct igb_adapter *adapter)
5634 {
5635         struct e1000_hw *hw = &adapter->hw;
5636         int i;
5637
5638         /* The UTA table only exists on 82576 hardware and newer */
5639         if (hw->mac.type < e1000_82576)
5640                 return;
5641
5642         /* we only need to do this if VMDq is enabled */
5643         if (!adapter->vfs_allocated_count)
5644                 return;
5645
5646         for (i = 0; i < hw->mac.uta_reg_count; i++)
5647                 array_wr32(E1000_UTA, i, ~0);
5648 }
5649
5650 /**
5651  * igb_intr_msi - Interrupt Handler
5652  * @irq: interrupt number
5653  * @data: pointer to a network interface device structure
5654  **/
5655 static irqreturn_t igb_intr_msi(int irq, void *data)
5656 {
5657         struct igb_adapter *adapter = data;
5658         struct igb_q_vector *q_vector = adapter->q_vector[0];
5659         struct e1000_hw *hw = &adapter->hw;
5660         /* read ICR disables interrupts using IAM */
5661         u32 icr = rd32(E1000_ICR);
5662
5663         igb_write_itr(q_vector);
5664
5665         if (icr & E1000_ICR_DRSTA)
5666                 schedule_work(&adapter->reset_task);
5667
5668         if (icr & E1000_ICR_DOUTSYNC) {
5669                 /* HW is reporting DMA is out of sync */
5670                 adapter->stats.doosync++;
5671         }
5672
5673         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5674                 hw->mac.get_link_status = 1;
5675                 if (!test_bit(__IGB_DOWN, &adapter->state))
5676                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5677         }
5678
5679 #ifdef CONFIG_IGB_PTP
5680         if (icr & E1000_ICR_TS) {
5681                 u32 tsicr = rd32(E1000_TSICR);
5682
5683                 if (tsicr & E1000_TSICR_TXTS) {
5684                         /* acknowledge the interrupt */
5685                         wr32(E1000_TSICR, E1000_TSICR_TXTS);
5686                         /* retrieve hardware timestamp */
5687                         schedule_work(&adapter->ptp_tx_work);
5688                 }
5689         }
5690 #endif /* CONFIG_IGB_PTP */
5691
5692         napi_schedule(&q_vector->napi);
5693
5694         return IRQ_HANDLED;
5695 }
5696
5697 /**
5698  * igb_intr - Legacy Interrupt Handler
5699  * @irq: interrupt number
5700  * @data: pointer to a network interface device structure
5701  **/
5702 static irqreturn_t igb_intr(int irq, void *data)
5703 {
5704         struct igb_adapter *adapter = data;
5705         struct igb_q_vector *q_vector = adapter->q_vector[0];
5706         struct e1000_hw *hw = &adapter->hw;
5707         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5708          * need for the IMC write */
5709         u32 icr = rd32(E1000_ICR);
5710
5711         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5712          * not set, then the adapter didn't send an interrupt */
5713         if (!(icr & E1000_ICR_INT_ASSERTED))
5714                 return IRQ_NONE;
5715
5716         igb_write_itr(q_vector);
5717
5718         if (icr & E1000_ICR_DRSTA)
5719                 schedule_work(&adapter->reset_task);
5720
5721         if (icr & E1000_ICR_DOUTSYNC) {
5722                 /* HW is reporting DMA is out of sync */
5723                 adapter->stats.doosync++;
5724         }
5725
5726         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5727                 hw->mac.get_link_status = 1;
5728                 /* guard against interrupt when we're going down */
5729                 if (!test_bit(__IGB_DOWN, &adapter->state))
5730                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5731         }
5732
5733 #ifdef CONFIG_IGB_PTP
5734         if (icr & E1000_ICR_TS) {
5735                 u32 tsicr = rd32(E1000_TSICR);
5736
5737                 if (tsicr & E1000_TSICR_TXTS) {
5738                         /* acknowledge the interrupt */
5739                         wr32(E1000_TSICR, E1000_TSICR_TXTS);
5740                         /* retrieve hardware timestamp */
5741                         schedule_work(&adapter->ptp_tx_work);
5742                 }
5743         }
5744 #endif /* CONFIG_IGB_PTP */
5745
5746         napi_schedule(&q_vector->napi);
5747
5748         return IRQ_HANDLED;
5749 }
5750
5751 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5752 {
5753         struct igb_adapter *adapter = q_vector->adapter;
5754         struct e1000_hw *hw = &adapter->hw;
5755
5756         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5757             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5758                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5759                         igb_set_itr(q_vector);
5760                 else
5761                         igb_update_ring_itr(q_vector);
5762         }
5763
5764         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5765                 if (adapter->msix_entries)
5766                         wr32(E1000_EIMS, q_vector->eims_value);
5767                 else
5768                         igb_irq_enable(adapter);
5769         }
5770 }
5771
5772 /**
5773  * igb_poll - NAPI Rx polling callback
5774  * @napi: napi polling structure
5775  * @budget: count of how many packets we should handle
5776  **/
5777 static int igb_poll(struct napi_struct *napi, int budget)
5778 {
5779         struct igb_q_vector *q_vector = container_of(napi,
5780                                                      struct igb_q_vector,
5781                                                      napi);
5782         bool clean_complete = true;
5783
5784 #ifdef CONFIG_IGB_DCA
5785         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5786                 igb_update_dca(q_vector);
5787 #endif
5788         if (q_vector->tx.ring)
5789                 clean_complete = igb_clean_tx_irq(q_vector);
5790
5791         if (q_vector->rx.ring)
5792                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5793
5794         /* If all work not completed, return budget and keep polling */
5795         if (!clean_complete)
5796                 return budget;
5797
5798         /* If not enough Rx work done, exit the polling mode */
5799         napi_complete(napi);
5800         igb_ring_irq_enable(q_vector);
5801
5802         return 0;
5803 }
5804
5805 /**
5806  * igb_clean_tx_irq - Reclaim resources after transmit completes
5807  * @q_vector: pointer to q_vector containing needed info
5808  *
5809  * returns true if ring is completely cleaned
5810  **/
5811 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5812 {
5813         struct igb_adapter *adapter = q_vector->adapter;
5814         struct igb_ring *tx_ring = q_vector->tx.ring;
5815         struct igb_tx_buffer *tx_buffer;
5816         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5817         unsigned int total_bytes = 0, total_packets = 0;
5818         unsigned int budget = q_vector->tx.work_limit;
5819         unsigned int i = tx_ring->next_to_clean;
5820
5821         if (test_bit(__IGB_DOWN, &adapter->state))
5822                 return true;
5823
5824         tx_buffer = &tx_ring->tx_buffer_info[i];
5825         tx_desc = IGB_TX_DESC(tx_ring, i);
5826         i -= tx_ring->count;
5827
5828         for (; budget; budget--) {
5829                 eop_desc = tx_buffer->next_to_watch;
5830
5831                 /* prevent any other reads prior to eop_desc */
5832                 rmb();
5833
5834                 /* if next_to_watch is not set then there is no work pending */
5835                 if (!eop_desc)
5836                         break;
5837
5838                 /* if DD is not set pending work has not been completed */
5839                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5840                         break;
5841
5842                 /* clear next_to_watch to prevent false hangs */
5843                 tx_buffer->next_to_watch = NULL;
5844
5845                 /* update the statistics for this packet */
5846                 total_bytes += tx_buffer->bytecount;
5847                 total_packets += tx_buffer->gso_segs;
5848
5849                 /* free the skb */
5850                 dev_kfree_skb_any(tx_buffer->skb);
5851                 tx_buffer->skb = NULL;
5852
5853                 /* unmap skb header data */
5854                 dma_unmap_single(tx_ring->dev,
5855                                  tx_buffer->dma,
5856                                  tx_buffer->length,
5857                                  DMA_TO_DEVICE);
5858
5859                 /* clear last DMA location and unmap remaining buffers */
5860                 while (tx_desc != eop_desc) {
5861                         tx_buffer->dma = 0;
5862
5863                         tx_buffer++;
5864                         tx_desc++;
5865                         i++;
5866                         if (unlikely(!i)) {
5867                                 i -= tx_ring->count;
5868                                 tx_buffer = tx_ring->tx_buffer_info;
5869                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
5870                         }
5871
5872                         /* unmap any remaining paged data */
5873                         if (tx_buffer->dma) {
5874                                 dma_unmap_page(tx_ring->dev,
5875                                                tx_buffer->dma,
5876                                                tx_buffer->length,
5877                                                DMA_TO_DEVICE);
5878                         }
5879                 }
5880
5881                 /* clear last DMA location */
5882                 tx_buffer->dma = 0;
5883
5884                 /* move us one more past the eop_desc for start of next pkt */
5885                 tx_buffer++;
5886                 tx_desc++;
5887                 i++;
5888                 if (unlikely(!i)) {
5889                         i -= tx_ring->count;
5890                         tx_buffer = tx_ring->tx_buffer_info;
5891                         tx_desc = IGB_TX_DESC(tx_ring, 0);
5892                 }
5893         }
5894
5895         netdev_tx_completed_queue(txring_txq(tx_ring),
5896                                   total_packets, total_bytes);
5897         i += tx_ring->count;
5898         tx_ring->next_to_clean = i;
5899         u64_stats_update_begin(&tx_ring->tx_syncp);
5900         tx_ring->tx_stats.bytes += total_bytes;
5901         tx_ring->tx_stats.packets += total_packets;
5902         u64_stats_update_end(&tx_ring->tx_syncp);
5903         q_vector->tx.total_bytes += total_bytes;
5904         q_vector->tx.total_packets += total_packets;
5905
5906         if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5907                 struct e1000_hw *hw = &adapter->hw;
5908
5909                 eop_desc = tx_buffer->next_to_watch;
5910
5911                 /* Detect a transmit hang in hardware, this serializes the
5912                  * check with the clearing of time_stamp and movement of i */
5913                 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5914                 if (eop_desc &&
5915                     time_after(jiffies, tx_buffer->time_stamp +
5916                                (adapter->tx_timeout_factor * HZ)) &&
5917                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5918
5919                         /* detected Tx unit hang */
5920                         dev_err(tx_ring->dev,
5921                                 "Detected Tx Unit Hang\n"
5922                                 "  Tx Queue             <%d>\n"
5923                                 "  TDH                  <%x>\n"
5924                                 "  TDT                  <%x>\n"
5925                                 "  next_to_use          <%x>\n"
5926                                 "  next_to_clean        <%x>\n"
5927                                 "buffer_info[next_to_clean]\n"
5928                                 "  time_stamp           <%lx>\n"
5929                                 "  next_to_watch        <%p>\n"
5930                                 "  jiffies              <%lx>\n"
5931                                 "  desc.status          <%x>\n",
5932                                 tx_ring->queue_index,
5933                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5934                                 readl(tx_ring->tail),
5935                                 tx_ring->next_to_use,
5936                                 tx_ring->next_to_clean,
5937                                 tx_buffer->time_stamp,
5938                                 eop_desc,
5939                                 jiffies,
5940                                 eop_desc->wb.status);
5941                         netif_stop_subqueue(tx_ring->netdev,
5942                                             tx_ring->queue_index);
5943
5944                         /* we are about to reset, no point in enabling stuff */
5945                         return true;
5946                 }
5947         }
5948
5949         if (unlikely(total_packets &&
5950                      netif_carrier_ok(tx_ring->netdev) &&
5951                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5952                 /* Make sure that anybody stopping the queue after this
5953                  * sees the new next_to_clean.
5954                  */
5955                 smp_mb();
5956                 if (__netif_subqueue_stopped(tx_ring->netdev,
5957                                              tx_ring->queue_index) &&
5958                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5959                         netif_wake_subqueue(tx_ring->netdev,
5960                                             tx_ring->queue_index);
5961
5962                         u64_stats_update_begin(&tx_ring->tx_syncp);
5963                         tx_ring->tx_stats.restart_queue++;
5964                         u64_stats_update_end(&tx_ring->tx_syncp);
5965                 }
5966         }
5967
5968         return !!budget;
5969 }
5970
5971 static inline void igb_rx_checksum(struct igb_ring *ring,
5972                                    union e1000_adv_rx_desc *rx_desc,
5973                                    struct sk_buff *skb)
5974 {
5975         skb_checksum_none_assert(skb);
5976
5977         /* Ignore Checksum bit is set */
5978         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5979                 return;
5980
5981         /* Rx checksum disabled via ethtool */
5982         if (!(ring->netdev->features & NETIF_F_RXCSUM))
5983                 return;
5984
5985         /* TCP/UDP checksum error bit is set */
5986         if (igb_test_staterr(rx_desc,
5987                              E1000_RXDEXT_STATERR_TCPE |
5988                              E1000_RXDEXT_STATERR_IPE)) {
5989                 /*
5990                  * work around errata with sctp packets where the TCPE aka
5991                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5992                  * packets, (aka let the stack check the crc32c)
5993                  */
5994                 if (!((skb->len == 60) &&
5995                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5996                         u64_stats_update_begin(&ring->rx_syncp);
5997                         ring->rx_stats.csum_err++;
5998                         u64_stats_update_end(&ring->rx_syncp);
5999                 }
6000                 /* let the stack verify checksum errors */
6001                 return;
6002         }
6003         /* It must be a TCP or UDP packet with a valid checksum */
6004         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
6005                                       E1000_RXD_STAT_UDPCS))
6006                 skb->ip_summed = CHECKSUM_UNNECESSARY;
6007
6008         dev_dbg(ring->dev, "cksum success: bits %08X\n",
6009                 le32_to_cpu(rx_desc->wb.upper.status_error));
6010 }
6011
6012 static inline void igb_rx_hash(struct igb_ring *ring,
6013                                union e1000_adv_rx_desc *rx_desc,
6014                                struct sk_buff *skb)
6015 {
6016         if (ring->netdev->features & NETIF_F_RXHASH)
6017                 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
6018 }
6019
6020 static void igb_rx_vlan(struct igb_ring *ring,
6021                         union e1000_adv_rx_desc *rx_desc,
6022                         struct sk_buff *skb)
6023 {
6024         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6025                 u16 vid;
6026                 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6027                     test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6028                         vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6029                 else
6030                         vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6031
6032                 __vlan_hwaccel_put_tag(skb, vid);
6033         }
6034 }
6035
6036 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6037 {
6038         /* HW will not DMA in data larger than the given buffer, even if it
6039          * parses the (NFS, of course) header to be larger.  In that case, it
6040          * fills the header buffer and spills the rest into the page.
6041          */
6042         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6043                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6044         if (hlen > IGB_RX_HDR_LEN)
6045                 hlen = IGB_RX_HDR_LEN;
6046         return hlen;
6047 }
6048
6049 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6050 {
6051         struct igb_ring *rx_ring = q_vector->rx.ring;
6052         union e1000_adv_rx_desc *rx_desc;
6053         const int current_node = numa_node_id();
6054         unsigned int total_bytes = 0, total_packets = 0;
6055         u16 cleaned_count = igb_desc_unused(rx_ring);
6056         u16 i = rx_ring->next_to_clean;
6057
6058         rx_desc = IGB_RX_DESC(rx_ring, i);
6059
6060         while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6061                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6062                 struct sk_buff *skb = buffer_info->skb;
6063                 union e1000_adv_rx_desc *next_rxd;
6064
6065                 buffer_info->skb = NULL;
6066                 prefetch(skb->data);
6067
6068                 i++;
6069                 if (i == rx_ring->count)
6070                         i = 0;
6071
6072                 next_rxd = IGB_RX_DESC(rx_ring, i);
6073                 prefetch(next_rxd);
6074
6075                 /*
6076                  * This memory barrier is needed to keep us from reading
6077                  * any other fields out of the rx_desc until we know the
6078                  * RXD_STAT_DD bit is set
6079                  */
6080                 rmb();
6081
6082                 if (!skb_is_nonlinear(skb)) {
6083                         __skb_put(skb, igb_get_hlen(rx_desc));
6084                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
6085                                          IGB_RX_HDR_LEN,
6086                                          DMA_FROM_DEVICE);
6087                         buffer_info->dma = 0;
6088                 }
6089
6090                 if (rx_desc->wb.upper.length) {
6091                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6092
6093                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6094                                                 buffer_info->page,
6095                                                 buffer_info->page_offset,
6096                                                 length);
6097
6098                         skb->len += length;
6099                         skb->data_len += length;
6100                         skb->truesize += PAGE_SIZE / 2;
6101
6102                         if ((page_count(buffer_info->page) != 1) ||
6103                             (page_to_nid(buffer_info->page) != current_node))
6104                                 buffer_info->page = NULL;
6105                         else
6106                                 get_page(buffer_info->page);
6107
6108                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6109                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
6110                         buffer_info->page_dma = 0;
6111                 }
6112
6113                 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6114                         struct igb_rx_buffer *next_buffer;
6115                         next_buffer = &rx_ring->rx_buffer_info[i];
6116                         buffer_info->skb = next_buffer->skb;
6117                         buffer_info->dma = next_buffer->dma;
6118                         next_buffer->skb = skb;
6119                         next_buffer->dma = 0;
6120                         goto next_desc;
6121                 }
6122
6123                 if (unlikely((igb_test_staterr(rx_desc,
6124                                                E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6125                              && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6126                         dev_kfree_skb_any(skb);
6127                         goto next_desc;
6128                 }
6129
6130 #ifdef CONFIG_IGB_PTP
6131                 igb_ptp_rx_hwtstamp(q_vector, rx_desc, skb);
6132 #endif /* CONFIG_IGB_PTP */
6133                 igb_rx_hash(rx_ring, rx_desc, skb);
6134                 igb_rx_checksum(rx_ring, rx_desc, skb);
6135                 igb_rx_vlan(rx_ring, rx_desc, skb);
6136
6137                 total_bytes += skb->len;
6138                 total_packets++;
6139
6140                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6141
6142                 napi_gro_receive(&q_vector->napi, skb);
6143
6144                 budget--;
6145 next_desc:
6146                 if (!budget)
6147                         break;
6148
6149                 cleaned_count++;
6150                 /* return some buffers to hardware, one at a time is too slow */
6151                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6152                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
6153                         cleaned_count = 0;
6154                 }
6155
6156                 /* use prefetched values */
6157                 rx_desc = next_rxd;
6158         }
6159
6160         rx_ring->next_to_clean = i;
6161         u64_stats_update_begin(&rx_ring->rx_syncp);
6162         rx_ring->rx_stats.packets += total_packets;
6163         rx_ring->rx_stats.bytes += total_bytes;
6164         u64_stats_update_end(&rx_ring->rx_syncp);
6165         q_vector->rx.total_packets += total_packets;
6166         q_vector->rx.total_bytes += total_bytes;
6167
6168         if (cleaned_count)
6169                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6170
6171         return !!budget;
6172 }
6173
6174 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6175                                  struct igb_rx_buffer *bi)
6176 {
6177         struct sk_buff *skb = bi->skb;
6178         dma_addr_t dma = bi->dma;
6179
6180         if (dma)
6181                 return true;
6182
6183         if (likely(!skb)) {
6184                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6185                                                 IGB_RX_HDR_LEN);
6186                 bi->skb = skb;
6187                 if (!skb) {
6188                         rx_ring->rx_stats.alloc_failed++;
6189                         return false;
6190                 }
6191
6192                 /* initialize skb for ring */
6193                 skb_record_rx_queue(skb, rx_ring->queue_index);
6194         }
6195
6196         dma = dma_map_single(rx_ring->dev, skb->data,
6197                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6198
6199         if (dma_mapping_error(rx_ring->dev, dma)) {
6200                 rx_ring->rx_stats.alloc_failed++;
6201                 return false;
6202         }
6203
6204         bi->dma = dma;
6205         return true;
6206 }
6207
6208 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6209                                   struct igb_rx_buffer *bi)
6210 {
6211         struct page *page = bi->page;
6212         dma_addr_t page_dma = bi->page_dma;
6213         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6214
6215         if (page_dma)
6216                 return true;
6217
6218         if (!page) {
6219                 page = __skb_alloc_page(GFP_ATOMIC, bi->skb);
6220                 bi->page = page;
6221                 if (unlikely(!page)) {
6222                         rx_ring->rx_stats.alloc_failed++;
6223                         return false;
6224                 }
6225         }
6226
6227         page_dma = dma_map_page(rx_ring->dev, page,
6228                                 page_offset, PAGE_SIZE / 2,
6229                                 DMA_FROM_DEVICE);
6230
6231         if (dma_mapping_error(rx_ring->dev, page_dma)) {
6232                 rx_ring->rx_stats.alloc_failed++;
6233                 return false;
6234         }
6235
6236         bi->page_dma = page_dma;
6237         bi->page_offset = page_offset;
6238         return true;
6239 }
6240
6241 /**
6242  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6243  * @adapter: address of board private structure
6244  **/
6245 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6246 {
6247         union e1000_adv_rx_desc *rx_desc;
6248         struct igb_rx_buffer *bi;
6249         u16 i = rx_ring->next_to_use;
6250
6251         rx_desc = IGB_RX_DESC(rx_ring, i);
6252         bi = &rx_ring->rx_buffer_info[i];
6253         i -= rx_ring->count;
6254
6255         while (cleaned_count--) {
6256                 if (!igb_alloc_mapped_skb(rx_ring, bi))
6257                         break;
6258
6259                 /* Refresh the desc even if buffer_addrs didn't change
6260                  * because each write-back erases this info. */
6261                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6262
6263                 if (!igb_alloc_mapped_page(rx_ring, bi))
6264                         break;
6265
6266                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6267
6268                 rx_desc++;
6269                 bi++;
6270                 i++;
6271                 if (unlikely(!i)) {
6272                         rx_desc = IGB_RX_DESC(rx_ring, 0);
6273                         bi = rx_ring->rx_buffer_info;
6274                         i -= rx_ring->count;
6275                 }
6276
6277                 /* clear the hdr_addr for the next_to_use descriptor */
6278                 rx_desc->read.hdr_addr = 0;
6279         }
6280
6281         i += rx_ring->count;
6282
6283         if (rx_ring->next_to_use != i) {
6284                 rx_ring->next_to_use = i;
6285
6286                 /* Force memory writes to complete before letting h/w
6287                  * know there are new descriptors to fetch.  (Only
6288                  * applicable for weak-ordered memory model archs,
6289                  * such as IA-64). */
6290                 wmb();
6291                 writel(i, rx_ring->tail);
6292         }
6293 }
6294
6295 /**
6296  * igb_mii_ioctl -
6297  * @netdev:
6298  * @ifreq:
6299  * @cmd:
6300  **/
6301 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6302 {
6303         struct igb_adapter *adapter = netdev_priv(netdev);
6304         struct mii_ioctl_data *data = if_mii(ifr);
6305
6306         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6307                 return -EOPNOTSUPP;
6308
6309         switch (cmd) {
6310         case SIOCGMIIPHY:
6311                 data->phy_id = adapter->hw.phy.addr;
6312                 break;
6313         case SIOCGMIIREG:
6314                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6315                                      &data->val_out))
6316                         return -EIO;
6317                 break;
6318         case SIOCSMIIREG:
6319         default:
6320                 return -EOPNOTSUPP;
6321         }
6322         return 0;
6323 }
6324
6325 /**
6326  * igb_ioctl -
6327  * @netdev:
6328  * @ifreq:
6329  * @cmd:
6330  **/
6331 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6332 {
6333         switch (cmd) {
6334         case SIOCGMIIPHY:
6335         case SIOCGMIIREG:
6336         case SIOCSMIIREG:
6337                 return igb_mii_ioctl(netdev, ifr, cmd);
6338 #ifdef CONFIG_IGB_PTP
6339         case SIOCSHWTSTAMP:
6340                 return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd);
6341 #endif /* CONFIG_IGB_PTP */
6342         default:
6343                 return -EOPNOTSUPP;
6344         }
6345 }
6346
6347 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6348 {
6349         struct igb_adapter *adapter = hw->back;
6350         u16 cap_offset;
6351
6352         cap_offset = adapter->pdev->pcie_cap;
6353         if (!cap_offset)
6354                 return -E1000_ERR_CONFIG;
6355
6356         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6357
6358         return 0;
6359 }
6360
6361 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6362 {
6363         struct igb_adapter *adapter = hw->back;
6364         u16 cap_offset;
6365
6366         cap_offset = adapter->pdev->pcie_cap;
6367         if (!cap_offset)
6368                 return -E1000_ERR_CONFIG;
6369
6370         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6371
6372         return 0;
6373 }
6374
6375 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6376 {
6377         struct igb_adapter *adapter = netdev_priv(netdev);
6378         struct e1000_hw *hw = &adapter->hw;
6379         u32 ctrl, rctl;
6380         bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6381
6382         if (enable) {
6383                 /* enable VLAN tag insert/strip */
6384                 ctrl = rd32(E1000_CTRL);
6385                 ctrl |= E1000_CTRL_VME;
6386                 wr32(E1000_CTRL, ctrl);
6387
6388                 /* Disable CFI check */
6389                 rctl = rd32(E1000_RCTL);
6390                 rctl &= ~E1000_RCTL_CFIEN;
6391                 wr32(E1000_RCTL, rctl);
6392         } else {
6393                 /* disable VLAN tag insert/strip */
6394                 ctrl = rd32(E1000_CTRL);
6395                 ctrl &= ~E1000_CTRL_VME;
6396                 wr32(E1000_CTRL, ctrl);
6397         }
6398
6399         igb_rlpml_set(adapter);
6400 }
6401
6402 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6403 {
6404         struct igb_adapter *adapter = netdev_priv(netdev);
6405         struct e1000_hw *hw = &adapter->hw;
6406         int pf_id = adapter->vfs_allocated_count;
6407
6408         /* attempt to add filter to vlvf array */
6409         igb_vlvf_set(adapter, vid, true, pf_id);
6410
6411         /* add the filter since PF can receive vlans w/o entry in vlvf */
6412         igb_vfta_set(hw, vid, true);
6413
6414         set_bit(vid, adapter->active_vlans);
6415
6416         return 0;
6417 }
6418
6419 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6420 {
6421         struct igb_adapter *adapter = netdev_priv(netdev);
6422         struct e1000_hw *hw = &adapter->hw;
6423         int pf_id = adapter->vfs_allocated_count;
6424         s32 err;
6425
6426         /* remove vlan from VLVF table array */
6427         err = igb_vlvf_set(adapter, vid, false, pf_id);
6428
6429         /* if vid was not present in VLVF just remove it from table */
6430         if (err)
6431                 igb_vfta_set(hw, vid, false);
6432
6433         clear_bit(vid, adapter->active_vlans);
6434
6435         return 0;
6436 }
6437
6438 static void igb_restore_vlan(struct igb_adapter *adapter)
6439 {
6440         u16 vid;
6441
6442         igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6443
6444         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6445                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6446 }
6447
6448 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6449 {
6450         struct pci_dev *pdev = adapter->pdev;
6451         struct e1000_mac_info *mac = &adapter->hw.mac;
6452
6453         mac->autoneg = 0;
6454
6455         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6456          * for the switch() below to work */
6457         if ((spd & 1) || (dplx & ~1))
6458                 goto err_inval;
6459
6460         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6461         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6462             spd != SPEED_1000 &&
6463             dplx != DUPLEX_FULL)
6464                 goto err_inval;
6465
6466         switch (spd + dplx) {
6467         case SPEED_10 + DUPLEX_HALF:
6468                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6469                 break;
6470         case SPEED_10 + DUPLEX_FULL:
6471                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6472                 break;
6473         case SPEED_100 + DUPLEX_HALF:
6474                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6475                 break;
6476         case SPEED_100 + DUPLEX_FULL:
6477                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6478                 break;
6479         case SPEED_1000 + DUPLEX_FULL:
6480                 mac->autoneg = 1;
6481                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6482                 break;
6483         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6484         default:
6485                 goto err_inval;
6486         }
6487
6488         /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
6489         adapter->hw.phy.mdix = AUTO_ALL_MODES;
6490
6491         return 0;
6492
6493 err_inval:
6494         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6495         return -EINVAL;
6496 }
6497
6498 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6499                           bool runtime)
6500 {
6501         struct net_device *netdev = pci_get_drvdata(pdev);
6502         struct igb_adapter *adapter = netdev_priv(netdev);
6503         struct e1000_hw *hw = &adapter->hw;
6504         u32 ctrl, rctl, status;
6505         u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6506 #ifdef CONFIG_PM
6507         int retval = 0;
6508 #endif
6509
6510         netif_device_detach(netdev);
6511
6512         if (netif_running(netdev))
6513                 __igb_close(netdev, true);
6514
6515         igb_clear_interrupt_scheme(adapter);
6516
6517 #ifdef CONFIG_PM
6518         retval = pci_save_state(pdev);
6519         if (retval)
6520                 return retval;
6521 #endif
6522
6523         status = rd32(E1000_STATUS);
6524         if (status & E1000_STATUS_LU)
6525                 wufc &= ~E1000_WUFC_LNKC;
6526
6527         if (wufc) {
6528                 igb_setup_rctl(adapter);
6529                 igb_set_rx_mode(netdev);
6530
6531                 /* turn on all-multi mode if wake on multicast is enabled */
6532                 if (wufc & E1000_WUFC_MC) {
6533                         rctl = rd32(E1000_RCTL);
6534                         rctl |= E1000_RCTL_MPE;
6535                         wr32(E1000_RCTL, rctl);
6536                 }
6537
6538                 ctrl = rd32(E1000_CTRL);
6539                 /* advertise wake from D3Cold */
6540                 #define E1000_CTRL_ADVD3WUC 0x00100000
6541                 /* phy power management enable */
6542                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6543                 ctrl |= E1000_CTRL_ADVD3WUC;
6544                 wr32(E1000_CTRL, ctrl);
6545
6546                 /* Allow time for pending master requests to run */
6547                 igb_disable_pcie_master(hw);
6548
6549                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6550                 wr32(E1000_WUFC, wufc);
6551         } else {
6552                 wr32(E1000_WUC, 0);
6553                 wr32(E1000_WUFC, 0);
6554         }
6555
6556         *enable_wake = wufc || adapter->en_mng_pt;
6557         if (!*enable_wake)
6558                 igb_power_down_link(adapter);
6559         else
6560                 igb_power_up_link(adapter);
6561
6562         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6563          * would have already happened in close and is redundant. */
6564         igb_release_hw_control(adapter);
6565
6566         pci_disable_device(pdev);
6567
6568         return 0;
6569 }
6570
6571 #ifdef CONFIG_PM
6572 #ifdef CONFIG_PM_SLEEP
6573 static int igb_suspend(struct device *dev)
6574 {
6575         int retval;
6576         bool wake;
6577         struct pci_dev *pdev = to_pci_dev(dev);
6578
6579         retval = __igb_shutdown(pdev, &wake, 0);
6580         if (retval)
6581                 return retval;
6582
6583         if (wake) {
6584                 pci_prepare_to_sleep(pdev);
6585         } else {
6586                 pci_wake_from_d3(pdev, false);
6587                 pci_set_power_state(pdev, PCI_D3hot);
6588         }
6589
6590         return 0;
6591 }
6592 #endif /* CONFIG_PM_SLEEP */
6593
6594 static int igb_resume(struct device *dev)
6595 {
6596         struct pci_dev *pdev = to_pci_dev(dev);
6597         struct net_device *netdev = pci_get_drvdata(pdev);
6598         struct igb_adapter *adapter = netdev_priv(netdev);
6599         struct e1000_hw *hw = &adapter->hw;
6600         u32 err;
6601
6602         pci_set_power_state(pdev, PCI_D0);
6603         pci_restore_state(pdev);
6604         pci_save_state(pdev);
6605
6606         err = pci_enable_device_mem(pdev);
6607         if (err) {
6608                 dev_err(&pdev->dev,
6609                         "igb: Cannot enable PCI device from suspend\n");
6610                 return err;
6611         }
6612         pci_set_master(pdev);
6613
6614         pci_enable_wake(pdev, PCI_D3hot, 0);
6615         pci_enable_wake(pdev, PCI_D3cold, 0);
6616
6617         if (igb_init_interrupt_scheme(adapter)) {
6618                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6619                 return -ENOMEM;
6620         }
6621
6622         igb_reset(adapter);
6623
6624         /* let the f/w know that the h/w is now under the control of the
6625          * driver. */
6626         igb_get_hw_control(adapter);
6627
6628         wr32(E1000_WUS, ~0);
6629
6630         if (netdev->flags & IFF_UP) {
6631                 err = __igb_open(netdev, true);
6632                 if (err)
6633                         return err;
6634         }
6635
6636         netif_device_attach(netdev);
6637         return 0;
6638 }
6639
6640 #ifdef CONFIG_PM_RUNTIME
6641 static int igb_runtime_idle(struct device *dev)
6642 {
6643         struct pci_dev *pdev = to_pci_dev(dev);
6644         struct net_device *netdev = pci_get_drvdata(pdev);
6645         struct igb_adapter *adapter = netdev_priv(netdev);
6646
6647         if (!igb_has_link(adapter))
6648                 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6649
6650         return -EBUSY;
6651 }
6652
6653 static int igb_runtime_suspend(struct device *dev)
6654 {
6655         struct pci_dev *pdev = to_pci_dev(dev);
6656         int retval;
6657         bool wake;
6658
6659         retval = __igb_shutdown(pdev, &wake, 1);
6660         if (retval)
6661                 return retval;
6662
6663         if (wake) {
6664                 pci_prepare_to_sleep(pdev);
6665         } else {
6666                 pci_wake_from_d3(pdev, false);
6667                 pci_set_power_state(pdev, PCI_D3hot);
6668         }
6669
6670         return 0;
6671 }
6672
6673 static int igb_runtime_resume(struct device *dev)
6674 {
6675         return igb_resume(dev);
6676 }
6677 #endif /* CONFIG_PM_RUNTIME */
6678 #endif
6679
6680 static void igb_shutdown(struct pci_dev *pdev)
6681 {
6682         bool wake;
6683
6684         __igb_shutdown(pdev, &wake, 0);
6685
6686         if (system_state == SYSTEM_POWER_OFF) {
6687                 pci_wake_from_d3(pdev, wake);
6688                 pci_set_power_state(pdev, PCI_D3hot);
6689         }
6690 }
6691
6692 #ifdef CONFIG_NET_POLL_CONTROLLER
6693 /*
6694  * Polling 'interrupt' - used by things like netconsole to send skbs
6695  * without having to re-enable interrupts. It's not called while
6696  * the interrupt routine is executing.
6697  */
6698 static void igb_netpoll(struct net_device *netdev)
6699 {
6700         struct igb_adapter *adapter = netdev_priv(netdev);
6701         struct e1000_hw *hw = &adapter->hw;
6702         struct igb_q_vector *q_vector;
6703         int i;
6704
6705         for (i = 0; i < adapter->num_q_vectors; i++) {
6706                 q_vector = adapter->q_vector[i];
6707                 if (adapter->msix_entries)
6708                         wr32(E1000_EIMC, q_vector->eims_value);
6709                 else
6710                         igb_irq_disable(adapter);
6711                 napi_schedule(&q_vector->napi);
6712         }
6713 }
6714 #endif /* CONFIG_NET_POLL_CONTROLLER */
6715
6716 /**
6717  * igb_io_error_detected - called when PCI error is detected
6718  * @pdev: Pointer to PCI device
6719  * @state: The current pci connection state
6720  *
6721  * This function is called after a PCI bus error affecting
6722  * this device has been detected.
6723  */
6724 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6725                                               pci_channel_state_t state)
6726 {
6727         struct net_device *netdev = pci_get_drvdata(pdev);
6728         struct igb_adapter *adapter = netdev_priv(netdev);
6729
6730         netif_device_detach(netdev);
6731
6732         if (state == pci_channel_io_perm_failure)
6733                 return PCI_ERS_RESULT_DISCONNECT;
6734
6735         if (netif_running(netdev))
6736                 igb_down(adapter);
6737         pci_disable_device(pdev);
6738
6739         /* Request a slot slot reset. */
6740         return PCI_ERS_RESULT_NEED_RESET;
6741 }
6742
6743 /**
6744  * igb_io_slot_reset - called after the pci bus has been reset.
6745  * @pdev: Pointer to PCI device
6746  *
6747  * Restart the card from scratch, as if from a cold-boot. Implementation
6748  * resembles the first-half of the igb_resume routine.
6749  */
6750 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6751 {
6752         struct net_device *netdev = pci_get_drvdata(pdev);
6753         struct igb_adapter *adapter = netdev_priv(netdev);
6754         struct e1000_hw *hw = &adapter->hw;
6755         pci_ers_result_t result;
6756         int err;
6757
6758         if (pci_enable_device_mem(pdev)) {
6759                 dev_err(&pdev->dev,
6760                         "Cannot re-enable PCI device after reset.\n");
6761                 result = PCI_ERS_RESULT_DISCONNECT;
6762         } else {
6763                 pci_set_master(pdev);
6764                 pci_restore_state(pdev);
6765                 pci_save_state(pdev);
6766
6767                 pci_enable_wake(pdev, PCI_D3hot, 0);
6768                 pci_enable_wake(pdev, PCI_D3cold, 0);
6769
6770                 igb_reset(adapter);
6771                 wr32(E1000_WUS, ~0);
6772                 result = PCI_ERS_RESULT_RECOVERED;
6773         }
6774
6775         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6776         if (err) {
6777                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6778                         "failed 0x%0x\n", err);
6779                 /* non-fatal, continue */
6780         }
6781
6782         return result;
6783 }
6784
6785 /**
6786  * igb_io_resume - called when traffic can start flowing again.
6787  * @pdev: Pointer to PCI device
6788  *
6789  * This callback is called when the error recovery driver tells us that
6790  * its OK to resume normal operation. Implementation resembles the
6791  * second-half of the igb_resume routine.
6792  */
6793 static void igb_io_resume(struct pci_dev *pdev)
6794 {
6795         struct net_device *netdev = pci_get_drvdata(pdev);
6796         struct igb_adapter *adapter = netdev_priv(netdev);
6797
6798         if (netif_running(netdev)) {
6799                 if (igb_up(adapter)) {
6800                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6801                         return;
6802                 }
6803         }
6804
6805         netif_device_attach(netdev);
6806
6807         /* let the f/w know that the h/w is now under the control of the
6808          * driver. */
6809         igb_get_hw_control(adapter);
6810 }
6811
6812 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6813                              u8 qsel)
6814 {
6815         u32 rar_low, rar_high;
6816         struct e1000_hw *hw = &adapter->hw;
6817
6818         /* HW expects these in little endian so we reverse the byte order
6819          * from network order (big endian) to little endian
6820          */
6821         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6822                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6823         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6824
6825         /* Indicate to hardware the Address is Valid. */
6826         rar_high |= E1000_RAH_AV;
6827
6828         if (hw->mac.type == e1000_82575)
6829                 rar_high |= E1000_RAH_POOL_1 * qsel;
6830         else
6831                 rar_high |= E1000_RAH_POOL_1 << qsel;
6832
6833         wr32(E1000_RAL(index), rar_low);
6834         wrfl();
6835         wr32(E1000_RAH(index), rar_high);
6836         wrfl();
6837 }
6838
6839 static int igb_set_vf_mac(struct igb_adapter *adapter,
6840                           int vf, unsigned char *mac_addr)
6841 {
6842         struct e1000_hw *hw = &adapter->hw;
6843         /* VF MAC addresses start at end of receive addresses and moves
6844          * torwards the first, as a result a collision should not be possible */
6845         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6846
6847         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6848
6849         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6850
6851         return 0;
6852 }
6853
6854 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6855 {
6856         struct igb_adapter *adapter = netdev_priv(netdev);
6857         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6858                 return -EINVAL;
6859         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6860         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6861         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6862                                       " change effective.");
6863         if (test_bit(__IGB_DOWN, &adapter->state)) {
6864                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6865                          " but the PF device is not up.\n");
6866                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6867                          " attempting to use the VF device.\n");
6868         }
6869         return igb_set_vf_mac(adapter, vf, mac);
6870 }
6871
6872 static int igb_link_mbps(int internal_link_speed)
6873 {
6874         switch (internal_link_speed) {
6875         case SPEED_100:
6876                 return 100;
6877         case SPEED_1000:
6878                 return 1000;
6879         default:
6880                 return 0;
6881         }
6882 }
6883
6884 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6885                                   int link_speed)
6886 {
6887         int rf_dec, rf_int;
6888         u32 bcnrc_val;
6889
6890         if (tx_rate != 0) {
6891                 /* Calculate the rate factor values to set */
6892                 rf_int = link_speed / tx_rate;
6893                 rf_dec = (link_speed - (rf_int * tx_rate));
6894                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6895
6896                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6897                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6898                                E1000_RTTBCNRC_RF_INT_MASK);
6899                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6900         } else {
6901                 bcnrc_val = 0;
6902         }
6903
6904         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6905         /*
6906          * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
6907          * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
6908          */
6909         wr32(E1000_RTTBCNRM, 0x14);
6910         wr32(E1000_RTTBCNRC, bcnrc_val);
6911 }
6912
6913 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6914 {
6915         int actual_link_speed, i;
6916         bool reset_rate = false;
6917
6918         /* VF TX rate limit was not set or not supported */
6919         if ((adapter->vf_rate_link_speed == 0) ||
6920             (adapter->hw.mac.type != e1000_82576))
6921                 return;
6922
6923         actual_link_speed = igb_link_mbps(adapter->link_speed);
6924         if (actual_link_speed != adapter->vf_rate_link_speed) {
6925                 reset_rate = true;
6926                 adapter->vf_rate_link_speed = 0;
6927                 dev_info(&adapter->pdev->dev,
6928                          "Link speed has been changed. VF Transmit "
6929                          "rate is disabled\n");
6930         }
6931
6932         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6933                 if (reset_rate)
6934                         adapter->vf_data[i].tx_rate = 0;
6935
6936                 igb_set_vf_rate_limit(&adapter->hw, i,
6937                                       adapter->vf_data[i].tx_rate,
6938                                       actual_link_speed);
6939         }
6940 }
6941
6942 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6943 {
6944         struct igb_adapter *adapter = netdev_priv(netdev);
6945         struct e1000_hw *hw = &adapter->hw;
6946         int actual_link_speed;
6947
6948         if (hw->mac.type != e1000_82576)
6949                 return -EOPNOTSUPP;
6950
6951         actual_link_speed = igb_link_mbps(adapter->link_speed);
6952         if ((vf >= adapter->vfs_allocated_count) ||
6953             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6954             (tx_rate < 0) || (tx_rate > actual_link_speed))
6955                 return -EINVAL;
6956
6957         adapter->vf_rate_link_speed = actual_link_speed;
6958         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6959         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6960
6961         return 0;
6962 }
6963
6964 static int igb_ndo_get_vf_config(struct net_device *netdev,
6965                                  int vf, struct ifla_vf_info *ivi)
6966 {
6967         struct igb_adapter *adapter = netdev_priv(netdev);
6968         if (vf >= adapter->vfs_allocated_count)
6969                 return -EINVAL;
6970         ivi->vf = vf;
6971         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6972         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6973         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6974         ivi->qos = adapter->vf_data[vf].pf_qos;
6975         return 0;
6976 }
6977
6978 static void igb_vmm_control(struct igb_adapter *adapter)
6979 {
6980         struct e1000_hw *hw = &adapter->hw;
6981         u32 reg;
6982
6983         switch (hw->mac.type) {
6984         case e1000_82575:
6985         case e1000_i210:
6986         case e1000_i211:
6987         default:
6988                 /* replication is not supported for 82575 */
6989                 return;
6990         case e1000_82576:
6991                 /* notify HW that the MAC is adding vlan tags */
6992                 reg = rd32(E1000_DTXCTL);
6993                 reg |= E1000_DTXCTL_VLAN_ADDED;
6994                 wr32(E1000_DTXCTL, reg);
6995         case e1000_82580:
6996                 /* enable replication vlan tag stripping */
6997                 reg = rd32(E1000_RPLOLR);
6998                 reg |= E1000_RPLOLR_STRVLAN;
6999                 wr32(E1000_RPLOLR, reg);
7000         case e1000_i350:
7001                 /* none of the above registers are supported by i350 */
7002                 break;
7003         }
7004
7005         if (adapter->vfs_allocated_count) {
7006                 igb_vmdq_set_loopback_pf(hw, true);
7007                 igb_vmdq_set_replication_pf(hw, true);
7008                 igb_vmdq_set_anti_spoofing_pf(hw, true,
7009                                                 adapter->vfs_allocated_count);
7010         } else {
7011                 igb_vmdq_set_loopback_pf(hw, false);
7012                 igb_vmdq_set_replication_pf(hw, false);
7013         }
7014 }
7015
7016 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7017 {
7018         struct e1000_hw *hw = &adapter->hw;
7019         u32 dmac_thr;
7020         u16 hwm;
7021
7022         if (hw->mac.type > e1000_82580) {
7023                 if (adapter->flags & IGB_FLAG_DMAC) {
7024                         u32 reg;
7025
7026                         /* force threshold to 0. */
7027                         wr32(E1000_DMCTXTH, 0);
7028
7029                         /*
7030                          * DMA Coalescing high water mark needs to be greater
7031                          * than the Rx threshold. Set hwm to PBA - max frame
7032                          * size in 16B units, capping it at PBA - 6KB.
7033                          */
7034                         hwm = 64 * pba - adapter->max_frame_size / 16;
7035                         if (hwm < 64 * (pba - 6))
7036                                 hwm = 64 * (pba - 6);
7037                         reg = rd32(E1000_FCRTC);
7038                         reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7039                         reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7040                                 & E1000_FCRTC_RTH_COAL_MASK);
7041                         wr32(E1000_FCRTC, reg);
7042
7043                         /*
7044                          * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7045                          * frame size, capping it at PBA - 10KB.
7046                          */
7047                         dmac_thr = pba - adapter->max_frame_size / 512;
7048                         if (dmac_thr < pba - 10)
7049                                 dmac_thr = pba - 10;
7050                         reg = rd32(E1000_DMACR);
7051                         reg &= ~E1000_DMACR_DMACTHR_MASK;
7052                         reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7053                                 & E1000_DMACR_DMACTHR_MASK);
7054
7055                         /* transition to L0x or L1 if available..*/
7056                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7057
7058                         /* watchdog timer= +-1000 usec in 32usec intervals */
7059                         reg |= (1000 >> 5);
7060
7061                         /* Disable BMC-to-OS Watchdog Enable */
7062                         reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7063                         wr32(E1000_DMACR, reg);
7064
7065                         /*
7066                          * no lower threshold to disable
7067                          * coalescing(smart fifb)-UTRESH=0
7068                          */
7069                         wr32(E1000_DMCRTRH, 0);
7070
7071                         reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7072
7073                         wr32(E1000_DMCTLX, reg);
7074
7075                         /*
7076                          * free space in tx packet buffer to wake from
7077                          * DMA coal
7078                          */
7079                         wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7080                              (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7081
7082                         /*
7083                          * make low power state decision controlled
7084                          * by DMA coal
7085                          */
7086                         reg = rd32(E1000_PCIEMISC);
7087                         reg &= ~E1000_PCIEMISC_LX_DECISION;
7088                         wr32(E1000_PCIEMISC, reg);
7089                 } /* endif adapter->dmac is not disabled */
7090         } else if (hw->mac.type == e1000_82580) {
7091                 u32 reg = rd32(E1000_PCIEMISC);
7092                 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7093                 wr32(E1000_DMACR, 0);
7094         }
7095 }
7096
7097 /* igb_main.c */