1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2012 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
58 #include <linux/dca.h>
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
73 static const struct e1000_info *igb_info_tbl[] = {
74 [board_82575] = &e1000_82575_info,
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108 /* required last entry */
112 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
114 void igb_reset(struct igb_adapter *);
115 static int igb_setup_all_tx_resources(struct igb_adapter *);
116 static int igb_setup_all_rx_resources(struct igb_adapter *);
117 static void igb_free_all_tx_resources(struct igb_adapter *);
118 static void igb_free_all_rx_resources(struct igb_adapter *);
119 static void igb_setup_mrqc(struct igb_adapter *);
120 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121 static void __devexit igb_remove(struct pci_dev *pdev);
122 static int igb_sw_init(struct igb_adapter *);
123 static int igb_open(struct net_device *);
124 static int igb_close(struct net_device *);
125 static void igb_configure_tx(struct igb_adapter *);
126 static void igb_configure_rx(struct igb_adapter *);
127 static void igb_clean_all_tx_rings(struct igb_adapter *);
128 static void igb_clean_all_rx_rings(struct igb_adapter *);
129 static void igb_clean_tx_ring(struct igb_ring *);
130 static void igb_clean_rx_ring(struct igb_ring *);
131 static void igb_set_rx_mode(struct net_device *);
132 static void igb_update_phy_info(unsigned long);
133 static void igb_watchdog(unsigned long);
134 static void igb_watchdog_task(struct work_struct *);
135 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137 struct rtnl_link_stats64 *stats);
138 static int igb_change_mtu(struct net_device *, int);
139 static int igb_set_mac(struct net_device *, void *);
140 static void igb_set_uta(struct igb_adapter *adapter);
141 static irqreturn_t igb_intr(int irq, void *);
142 static irqreturn_t igb_intr_msi(int irq, void *);
143 static irqreturn_t igb_msix_other(int irq, void *);
144 static irqreturn_t igb_msix_ring(int irq, void *);
145 #ifdef CONFIG_IGB_DCA
146 static void igb_update_dca(struct igb_q_vector *);
147 static void igb_setup_dca(struct igb_adapter *);
148 #endif /* CONFIG_IGB_DCA */
149 static int igb_poll(struct napi_struct *, int);
150 static bool igb_clean_tx_irq(struct igb_q_vector *);
151 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153 static void igb_tx_timeout(struct net_device *);
154 static void igb_reset_task(struct work_struct *);
155 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156 static int igb_vlan_rx_add_vid(struct net_device *, u16);
157 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158 static void igb_restore_vlan(struct igb_adapter *);
159 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160 static void igb_ping_all_vfs(struct igb_adapter *);
161 static void igb_msg_task(struct igb_adapter *);
162 static void igb_vmm_control(struct igb_adapter *);
163 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167 int vf, u16 vlan, u8 qos);
168 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170 struct ifla_vf_info *ivi);
171 static void igb_check_vf_rate_limit(struct igb_adapter *);
173 #ifdef CONFIG_PCI_IOV
174 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
176 static int igb_check_vf_assignment(struct igb_adapter *adapter);
180 #ifdef CONFIG_PM_SLEEP
181 static int igb_suspend(struct device *);
183 static int igb_resume(struct device *);
184 #ifdef CONFIG_PM_RUNTIME
185 static int igb_runtime_suspend(struct device *dev);
186 static int igb_runtime_resume(struct device *dev);
187 static int igb_runtime_idle(struct device *dev);
189 static const struct dev_pm_ops igb_pm_ops = {
190 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
191 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
195 static void igb_shutdown(struct pci_dev *);
196 #ifdef CONFIG_IGB_DCA
197 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
198 static struct notifier_block dca_notifier = {
199 .notifier_call = igb_notify_dca,
204 #ifdef CONFIG_NET_POLL_CONTROLLER
205 /* for netdump / net console */
206 static void igb_netpoll(struct net_device *);
208 #ifdef CONFIG_PCI_IOV
209 static unsigned int max_vfs = 0;
210 module_param(max_vfs, uint, 0);
211 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
212 "per physical function");
213 #endif /* CONFIG_PCI_IOV */
215 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
216 pci_channel_state_t);
217 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
218 static void igb_io_resume(struct pci_dev *);
220 static struct pci_error_handlers igb_err_handler = {
221 .error_detected = igb_io_error_detected,
222 .slot_reset = igb_io_slot_reset,
223 .resume = igb_io_resume,
226 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
228 static struct pci_driver igb_driver = {
229 .name = igb_driver_name,
230 .id_table = igb_pci_tbl,
232 .remove = __devexit_p(igb_remove),
234 .driver.pm = &igb_pm_ops,
236 .shutdown = igb_shutdown,
237 .err_handler = &igb_err_handler
240 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
241 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
242 MODULE_LICENSE("GPL");
243 MODULE_VERSION(DRV_VERSION);
245 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
246 static int debug = -1;
247 module_param(debug, int, 0);
248 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
250 struct igb_reg_info {
255 static const struct igb_reg_info igb_reg_info_tbl[] = {
257 /* General Registers */
258 {E1000_CTRL, "CTRL"},
259 {E1000_STATUS, "STATUS"},
260 {E1000_CTRL_EXT, "CTRL_EXT"},
262 /* Interrupt Registers */
266 {E1000_RCTL, "RCTL"},
267 {E1000_RDLEN(0), "RDLEN"},
268 {E1000_RDH(0), "RDH"},
269 {E1000_RDT(0), "RDT"},
270 {E1000_RXDCTL(0), "RXDCTL"},
271 {E1000_RDBAL(0), "RDBAL"},
272 {E1000_RDBAH(0), "RDBAH"},
275 {E1000_TCTL, "TCTL"},
276 {E1000_TDBAL(0), "TDBAL"},
277 {E1000_TDBAH(0), "TDBAH"},
278 {E1000_TDLEN(0), "TDLEN"},
279 {E1000_TDH(0), "TDH"},
280 {E1000_TDT(0), "TDT"},
281 {E1000_TXDCTL(0), "TXDCTL"},
282 {E1000_TDFH, "TDFH"},
283 {E1000_TDFT, "TDFT"},
284 {E1000_TDFHS, "TDFHS"},
285 {E1000_TDFPC, "TDFPC"},
287 /* List Terminator */
292 * igb_regdump - register printout routine
294 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
300 switch (reginfo->ofs) {
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_RDLEN(n));
306 for (n = 0; n < 4; n++)
307 regs[n] = rd32(E1000_RDH(n));
310 for (n = 0; n < 4; n++)
311 regs[n] = rd32(E1000_RDT(n));
313 case E1000_RXDCTL(0):
314 for (n = 0; n < 4; n++)
315 regs[n] = rd32(E1000_RXDCTL(n));
318 for (n = 0; n < 4; n++)
319 regs[n] = rd32(E1000_RDBAL(n));
322 for (n = 0; n < 4; n++)
323 regs[n] = rd32(E1000_RDBAH(n));
326 for (n = 0; n < 4; n++)
327 regs[n] = rd32(E1000_RDBAL(n));
330 for (n = 0; n < 4; n++)
331 regs[n] = rd32(E1000_TDBAH(n));
334 for (n = 0; n < 4; n++)
335 regs[n] = rd32(E1000_TDLEN(n));
338 for (n = 0; n < 4; n++)
339 regs[n] = rd32(E1000_TDH(n));
342 for (n = 0; n < 4; n++)
343 regs[n] = rd32(E1000_TDT(n));
345 case E1000_TXDCTL(0):
346 for (n = 0; n < 4; n++)
347 regs[n] = rd32(E1000_TXDCTL(n));
350 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
354 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
355 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
360 * igb_dump - Print registers, tx-rings and rx-rings
362 static void igb_dump(struct igb_adapter *adapter)
364 struct net_device *netdev = adapter->netdev;
365 struct e1000_hw *hw = &adapter->hw;
366 struct igb_reg_info *reginfo;
367 struct igb_ring *tx_ring;
368 union e1000_adv_tx_desc *tx_desc;
369 struct my_u0 { u64 a; u64 b; } *u0;
370 struct igb_ring *rx_ring;
371 union e1000_adv_rx_desc *rx_desc;
375 if (!netif_msg_hw(adapter))
378 /* Print netdevice Info */
380 dev_info(&adapter->pdev->dev, "Net device Info\n");
381 pr_info("Device Name state trans_start "
383 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
384 netdev->state, netdev->trans_start, netdev->last_rx);
387 /* Print Registers */
388 dev_info(&adapter->pdev->dev, "Register Dump\n");
389 pr_info(" Register Name Value\n");
390 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
391 reginfo->name; reginfo++) {
392 igb_regdump(hw, reginfo);
395 /* Print TX Ring Summary */
396 if (!netdev || !netif_running(netdev))
399 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
400 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
401 for (n = 0; n < adapter->num_tx_queues; n++) {
402 struct igb_tx_buffer *buffer_info;
403 tx_ring = adapter->tx_ring[n];
404 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
405 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
406 n, tx_ring->next_to_use, tx_ring->next_to_clean,
407 (u64)buffer_info->dma,
409 buffer_info->next_to_watch,
410 (u64)buffer_info->time_stamp);
414 if (!netif_msg_tx_done(adapter))
415 goto rx_ring_summary;
417 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
419 /* Transmit Descriptor Formats
421 * Advanced Transmit Descriptor
422 * +--------------------------------------------------------------+
423 * 0 | Buffer Address [63:0] |
424 * +--------------------------------------------------------------+
425 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
426 * +--------------------------------------------------------------+
427 * 63 46 45 40 39 38 36 35 32 31 24 15 0
430 for (n = 0; n < adapter->num_tx_queues; n++) {
431 tx_ring = adapter->tx_ring[n];
432 pr_info("------------------------------------\n");
433 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
434 pr_info("------------------------------------\n");
435 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
436 "[bi->dma ] leng ntw timestamp "
439 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
440 const char *next_desc;
441 struct igb_tx_buffer *buffer_info;
442 tx_desc = IGB_TX_DESC(tx_ring, i);
443 buffer_info = &tx_ring->tx_buffer_info[i];
444 u0 = (struct my_u0 *)tx_desc;
445 if (i == tx_ring->next_to_use &&
446 i == tx_ring->next_to_clean)
447 next_desc = " NTC/U";
448 else if (i == tx_ring->next_to_use)
450 else if (i == tx_ring->next_to_clean)
455 pr_info("T [0x%03X] %016llX %016llX %016llX"
456 " %04X %p %016llX %p%s\n", i,
459 (u64)buffer_info->dma,
461 buffer_info->next_to_watch,
462 (u64)buffer_info->time_stamp,
463 buffer_info->skb, next_desc);
465 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
466 print_hex_dump(KERN_INFO, "",
468 16, 1, phys_to_virt(buffer_info->dma),
469 buffer_info->length, true);
473 /* Print RX Rings Summary */
475 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
476 pr_info("Queue [NTU] [NTC]\n");
477 for (n = 0; n < adapter->num_rx_queues; n++) {
478 rx_ring = adapter->rx_ring[n];
479 pr_info(" %5d %5X %5X\n",
480 n, rx_ring->next_to_use, rx_ring->next_to_clean);
484 if (!netif_msg_rx_status(adapter))
487 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
489 /* Advanced Receive Descriptor (Read) Format
491 * +-----------------------------------------------------+
492 * 0 | Packet Buffer Address [63:1] |A0/NSE|
493 * +----------------------------------------------+------+
494 * 8 | Header Buffer Address [63:1] | DD |
495 * +-----------------------------------------------------+
498 * Advanced Receive Descriptor (Write-Back) Format
500 * 63 48 47 32 31 30 21 20 17 16 4 3 0
501 * +------------------------------------------------------+
502 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
503 * | Checksum Ident | | | | Type | Type |
504 * +------------------------------------------------------+
505 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
506 * +------------------------------------------------------+
507 * 63 48 47 32 31 20 19 0
510 for (n = 0; n < adapter->num_rx_queues; n++) {
511 rx_ring = adapter->rx_ring[n];
512 pr_info("------------------------------------\n");
513 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
514 pr_info("------------------------------------\n");
515 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
516 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
517 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
518 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
520 for (i = 0; i < rx_ring->count; i++) {
521 const char *next_desc;
522 struct igb_rx_buffer *buffer_info;
523 buffer_info = &rx_ring->rx_buffer_info[i];
524 rx_desc = IGB_RX_DESC(rx_ring, i);
525 u0 = (struct my_u0 *)rx_desc;
526 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
528 if (i == rx_ring->next_to_use)
530 else if (i == rx_ring->next_to_clean)
535 if (staterr & E1000_RXD_STAT_DD) {
536 /* Descriptor Done */
537 pr_info("%s[0x%03X] %016llX %016llX -------"
538 "--------- %p%s\n", "RWB", i,
541 buffer_info->skb, next_desc);
543 pr_info("%s[0x%03X] %016llX %016llX %016llX"
547 (u64)buffer_info->dma,
548 buffer_info->skb, next_desc);
550 if (netif_msg_pktdata(adapter)) {
551 print_hex_dump(KERN_INFO, "",
554 phys_to_virt(buffer_info->dma),
555 IGB_RX_HDR_LEN, true);
556 print_hex_dump(KERN_INFO, "",
560 buffer_info->page_dma +
561 buffer_info->page_offset),
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
578 struct igb_adapter *adapter = hw->back;
579 return adapter->netdev;
583 * igb_init_module - Driver Registration Routine
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
588 static int __init igb_init_module(void)
591 pr_info("%s - version %s\n",
592 igb_driver_string, igb_driver_version);
594 pr_info("%s\n", igb_copyright);
596 #ifdef CONFIG_IGB_DCA
597 dca_register_notify(&dca_notifier);
599 ret = pci_register_driver(&igb_driver);
603 module_init(igb_init_module);
606 * igb_exit_module - Driver Exit Cleanup Routine
608 * igb_exit_module is called just before the driver is removed
611 static void __exit igb_exit_module(void)
613 #ifdef CONFIG_IGB_DCA
614 dca_unregister_notify(&dca_notifier);
616 pci_unregister_driver(&igb_driver);
619 module_exit(igb_exit_module);
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
632 u32 rbase_offset = adapter->vfs_allocated_count;
634 switch (adapter->hw.mac.type) {
636 /* The queues are allocated for virtualization such that VF 0
637 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638 * In order to avoid collision we start at the first free queue
639 * and continue consuming queues in the same sequence
641 if (adapter->vfs_allocated_count) {
642 for (; i < adapter->rss_queues; i++)
643 adapter->rx_ring[i]->reg_idx = rbase_offset +
652 for (; i < adapter->num_rx_queues; i++)
653 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654 for (; j < adapter->num_tx_queues; j++)
655 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
660 static void igb_free_queues(struct igb_adapter *adapter)
664 for (i = 0; i < adapter->num_tx_queues; i++) {
665 kfree(adapter->tx_ring[i]);
666 adapter->tx_ring[i] = NULL;
668 for (i = 0; i < adapter->num_rx_queues; i++) {
669 kfree(adapter->rx_ring[i]);
670 adapter->rx_ring[i] = NULL;
672 adapter->num_rx_queues = 0;
673 adapter->num_tx_queues = 0;
677 * igb_alloc_queues - Allocate memory for all rings
678 * @adapter: board private structure to initialize
680 * We allocate one ring per queue at run-time since we don't know the
681 * number of queues at compile-time.
683 static int igb_alloc_queues(struct igb_adapter *adapter)
685 struct igb_ring *ring;
687 int orig_node = adapter->node;
689 for (i = 0; i < adapter->num_tx_queues; i++) {
690 if (orig_node == -1) {
691 int cur_node = next_online_node(adapter->node);
692 if (cur_node == MAX_NUMNODES)
693 cur_node = first_online_node;
694 adapter->node = cur_node;
696 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702 ring->count = adapter->tx_ring_count;
703 ring->queue_index = i;
704 ring->dev = &adapter->pdev->dev;
705 ring->netdev = adapter->netdev;
706 ring->numa_node = adapter->node;
707 /* For 82575, context index must be unique per ring. */
708 if (adapter->hw.mac.type == e1000_82575)
709 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
710 adapter->tx_ring[i] = ring;
712 /* Restore the adapter's original node */
713 adapter->node = orig_node;
715 for (i = 0; i < adapter->num_rx_queues; i++) {
716 if (orig_node == -1) {
717 int cur_node = next_online_node(adapter->node);
718 if (cur_node == MAX_NUMNODES)
719 cur_node = first_online_node;
720 adapter->node = cur_node;
722 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728 ring->count = adapter->rx_ring_count;
729 ring->queue_index = i;
730 ring->dev = &adapter->pdev->dev;
731 ring->netdev = adapter->netdev;
732 ring->numa_node = adapter->node;
733 /* set flag indicating ring supports SCTP checksum offload */
734 if (adapter->hw.mac.type >= e1000_82576)
735 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
738 * On i350, i210, and i211, loopback VLAN packets
739 * have the tag byte-swapped.
741 if (adapter->hw.mac.type >= e1000_i350)
742 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
744 adapter->rx_ring[i] = ring;
746 /* Restore the adapter's original node */
747 adapter->node = orig_node;
749 igb_cache_ring_register(adapter);
754 /* Restore the adapter's original node */
755 adapter->node = orig_node;
756 igb_free_queues(adapter);
762 * igb_write_ivar - configure ivar for given MSI-X vector
763 * @hw: pointer to the HW structure
764 * @msix_vector: vector number we are allocating to a given ring
765 * @index: row index of IVAR register to write within IVAR table
766 * @offset: column offset of in IVAR, should be multiple of 8
768 * This function is intended to handle the writing of the IVAR register
769 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
770 * each containing an cause allocation for an Rx and Tx ring, and a
771 * variable number of rows depending on the number of queues supported.
773 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
774 int index, int offset)
776 u32 ivar = array_rd32(E1000_IVAR0, index);
778 /* clear any bits that are currently set */
779 ivar &= ~((u32)0xFF << offset);
781 /* write vector and valid bit */
782 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
784 array_wr32(E1000_IVAR0, index, ivar);
787 #define IGB_N0_QUEUE -1
788 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
790 struct igb_adapter *adapter = q_vector->adapter;
791 struct e1000_hw *hw = &adapter->hw;
792 int rx_queue = IGB_N0_QUEUE;
793 int tx_queue = IGB_N0_QUEUE;
796 if (q_vector->rx.ring)
797 rx_queue = q_vector->rx.ring->reg_idx;
798 if (q_vector->tx.ring)
799 tx_queue = q_vector->tx.ring->reg_idx;
801 switch (hw->mac.type) {
803 /* The 82575 assigns vectors using a bitmask, which matches the
804 bitmask for the EICR/EIMS/EIMC registers. To assign one
805 or more queues to a vector, we write the appropriate bits
806 into the MSIXBM register for that vector. */
807 if (rx_queue > IGB_N0_QUEUE)
808 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
809 if (tx_queue > IGB_N0_QUEUE)
810 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
811 if (!adapter->msix_entries && msix_vector == 0)
812 msixbm |= E1000_EIMS_OTHER;
813 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
814 q_vector->eims_value = msixbm;
818 * 82576 uses a table that essentially consists of 2 columns
819 * with 8 rows. The ordering is column-major so we use the
820 * lower 3 bits as the row index, and the 4th bit as the
823 if (rx_queue > IGB_N0_QUEUE)
824 igb_write_ivar(hw, msix_vector,
826 (rx_queue & 0x8) << 1);
827 if (tx_queue > IGB_N0_QUEUE)
828 igb_write_ivar(hw, msix_vector,
830 ((tx_queue & 0x8) << 1) + 8);
831 q_vector->eims_value = 1 << msix_vector;
838 * On 82580 and newer adapters the scheme is similar to 82576
839 * however instead of ordering column-major we have things
840 * ordered row-major. So we traverse the table by using
841 * bit 0 as the column offset, and the remaining bits as the
844 if (rx_queue > IGB_N0_QUEUE)
845 igb_write_ivar(hw, msix_vector,
847 (rx_queue & 0x1) << 4);
848 if (tx_queue > IGB_N0_QUEUE)
849 igb_write_ivar(hw, msix_vector,
851 ((tx_queue & 0x1) << 4) + 8);
852 q_vector->eims_value = 1 << msix_vector;
859 /* add q_vector eims value to global eims_enable_mask */
860 adapter->eims_enable_mask |= q_vector->eims_value;
862 /* configure q_vector to set itr on first interrupt */
863 q_vector->set_itr = 1;
867 * igb_configure_msix - Configure MSI-X hardware
869 * igb_configure_msix sets up the hardware to properly
870 * generate MSI-X interrupts.
872 static void igb_configure_msix(struct igb_adapter *adapter)
876 struct e1000_hw *hw = &adapter->hw;
878 adapter->eims_enable_mask = 0;
880 /* set vector for other causes, i.e. link changes */
881 switch (hw->mac.type) {
883 tmp = rd32(E1000_CTRL_EXT);
884 /* enable MSI-X PBA support*/
885 tmp |= E1000_CTRL_EXT_PBA_CLR;
887 /* Auto-Mask interrupts upon ICR read. */
888 tmp |= E1000_CTRL_EXT_EIAME;
889 tmp |= E1000_CTRL_EXT_IRCA;
891 wr32(E1000_CTRL_EXT, tmp);
893 /* enable msix_other interrupt */
894 array_wr32(E1000_MSIXBM(0), vector++,
896 adapter->eims_other = E1000_EIMS_OTHER;
905 /* Turn on MSI-X capability first, or our settings
906 * won't stick. And it will take days to debug. */
907 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908 E1000_GPIE_PBA | E1000_GPIE_EIAME |
911 /* enable msix_other interrupt */
912 adapter->eims_other = 1 << vector;
913 tmp = (vector++ | E1000_IVAR_VALID) << 8;
915 wr32(E1000_IVAR_MISC, tmp);
918 /* do nothing, since nothing else supports MSI-X */
920 } /* switch (hw->mac.type) */
922 adapter->eims_enable_mask |= adapter->eims_other;
924 for (i = 0; i < adapter->num_q_vectors; i++)
925 igb_assign_vector(adapter->q_vector[i], vector++);
931 * igb_request_msix - Initialize MSI-X interrupts
933 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
936 static int igb_request_msix(struct igb_adapter *adapter)
938 struct net_device *netdev = adapter->netdev;
939 struct e1000_hw *hw = &adapter->hw;
940 int i, err = 0, vector = 0;
942 err = request_irq(adapter->msix_entries[vector].vector,
943 igb_msix_other, 0, netdev->name, adapter);
948 for (i = 0; i < adapter->num_q_vectors; i++) {
949 struct igb_q_vector *q_vector = adapter->q_vector[i];
951 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
953 if (q_vector->rx.ring && q_vector->tx.ring)
954 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955 q_vector->rx.ring->queue_index);
956 else if (q_vector->tx.ring)
957 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958 q_vector->tx.ring->queue_index);
959 else if (q_vector->rx.ring)
960 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961 q_vector->rx.ring->queue_index);
963 sprintf(q_vector->name, "%s-unused", netdev->name);
965 err = request_irq(adapter->msix_entries[vector].vector,
966 igb_msix_ring, 0, q_vector->name,
973 igb_configure_msix(adapter);
979 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
981 if (adapter->msix_entries) {
982 pci_disable_msix(adapter->pdev);
983 kfree(adapter->msix_entries);
984 adapter->msix_entries = NULL;
985 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986 pci_disable_msi(adapter->pdev);
991 * igb_free_q_vectors - Free memory allocated for interrupt vectors
992 * @adapter: board private structure to initialize
994 * This function frees the memory allocated to the q_vectors. In addition if
995 * NAPI is enabled it will delete any references to the NAPI struct prior
996 * to freeing the q_vector.
998 static void igb_free_q_vectors(struct igb_adapter *adapter)
1002 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004 adapter->q_vector[v_idx] = NULL;
1007 netif_napi_del(&q_vector->napi);
1010 adapter->num_q_vectors = 0;
1014 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1016 * This function resets the device so that it has 0 rx queues, tx queues, and
1017 * MSI-X interrupts allocated.
1019 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1021 igb_free_queues(adapter);
1022 igb_free_q_vectors(adapter);
1023 igb_reset_interrupt_capability(adapter);
1027 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1029 * Attempt to configure interrupts using the best available
1030 * capabilities of the hardware and kernel.
1032 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1037 /* Number of supported queues. */
1038 adapter->num_rx_queues = adapter->rss_queues;
1039 if (adapter->vfs_allocated_count)
1040 adapter->num_tx_queues = 1;
1042 adapter->num_tx_queues = adapter->rss_queues;
1044 /* start with one vector for every rx queue */
1045 numvecs = adapter->num_rx_queues;
1047 /* if tx handler is separate add 1 for every tx queue */
1048 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049 numvecs += adapter->num_tx_queues;
1051 /* i210 and i211 can only have 4 MSIX vectors for rx/tx queues. */
1052 if ((adapter->hw.mac.type == e1000_i210)
1053 || (adapter->hw.mac.type == e1000_i211))
1056 /* store the number of vectors reserved for queues */
1057 adapter->num_q_vectors = numvecs;
1059 /* add 1 vector for link status interrupts */
1061 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1064 if (!adapter->msix_entries)
1067 for (i = 0; i < numvecs; i++)
1068 adapter->msix_entries[i].entry = i;
1070 err = pci_enable_msix(adapter->pdev,
1071 adapter->msix_entries,
1076 igb_reset_interrupt_capability(adapter);
1078 /* If we can't do MSI-X, try MSI */
1080 #ifdef CONFIG_PCI_IOV
1081 /* disable SR-IOV for non MSI-X configurations */
1082 if (adapter->vf_data) {
1083 struct e1000_hw *hw = &adapter->hw;
1084 /* disable iov and allow time for transactions to clear */
1085 pci_disable_sriov(adapter->pdev);
1088 kfree(adapter->vf_data);
1089 adapter->vf_data = NULL;
1090 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1093 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1096 adapter->vfs_allocated_count = 0;
1097 adapter->rss_queues = 1;
1098 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1099 adapter->num_rx_queues = 1;
1100 adapter->num_tx_queues = 1;
1101 adapter->num_q_vectors = 1;
1102 if (!pci_enable_msi(adapter->pdev))
1103 adapter->flags |= IGB_FLAG_HAS_MSI;
1105 /* Notify the stack of the (possibly) reduced queue counts. */
1107 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1108 err = netif_set_real_num_rx_queues(adapter->netdev,
1109 adapter->num_rx_queues);
1115 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1116 * @adapter: board private structure to initialize
1118 * We allocate one q_vector per queue interrupt. If allocation fails we
1121 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1123 struct igb_q_vector *q_vector;
1124 struct e1000_hw *hw = &adapter->hw;
1126 int orig_node = adapter->node;
1128 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1129 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1130 adapter->num_tx_queues)) &&
1131 (adapter->num_rx_queues == v_idx))
1132 adapter->node = orig_node;
1133 if (orig_node == -1) {
1134 int cur_node = next_online_node(adapter->node);
1135 if (cur_node == MAX_NUMNODES)
1136 cur_node = first_online_node;
1137 adapter->node = cur_node;
1139 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1142 q_vector = kzalloc(sizeof(struct igb_q_vector),
1146 q_vector->adapter = adapter;
1147 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1148 q_vector->itr_val = IGB_START_ITR;
1149 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1150 adapter->q_vector[v_idx] = q_vector;
1152 /* Restore the adapter's original node */
1153 adapter->node = orig_node;
1158 /* Restore the adapter's original node */
1159 adapter->node = orig_node;
1160 igb_free_q_vectors(adapter);
1164 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1165 int ring_idx, int v_idx)
1167 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1169 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1170 q_vector->rx.ring->q_vector = q_vector;
1171 q_vector->rx.count++;
1172 q_vector->itr_val = adapter->rx_itr_setting;
1173 if (q_vector->itr_val && q_vector->itr_val <= 3)
1174 q_vector->itr_val = IGB_START_ITR;
1177 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1178 int ring_idx, int v_idx)
1180 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1182 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1183 q_vector->tx.ring->q_vector = q_vector;
1184 q_vector->tx.count++;
1185 q_vector->itr_val = adapter->tx_itr_setting;
1186 q_vector->tx.work_limit = adapter->tx_work_limit;
1187 if (q_vector->itr_val && q_vector->itr_val <= 3)
1188 q_vector->itr_val = IGB_START_ITR;
1192 * igb_map_ring_to_vector - maps allocated queues to vectors
1194 * This function maps the recently allocated queues to vectors.
1196 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1201 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1202 (adapter->num_q_vectors < adapter->num_tx_queues))
1205 if (adapter->num_q_vectors >=
1206 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1207 for (i = 0; i < adapter->num_rx_queues; i++)
1208 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209 for (i = 0; i < adapter->num_tx_queues; i++)
1210 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1212 for (i = 0; i < adapter->num_rx_queues; i++) {
1213 if (i < adapter->num_tx_queues)
1214 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1215 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1217 for (; i < adapter->num_tx_queues; i++)
1218 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1224 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1226 * This function initializes the interrupts and allocates all of the queues.
1228 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1230 struct pci_dev *pdev = adapter->pdev;
1233 err = igb_set_interrupt_capability(adapter);
1237 err = igb_alloc_q_vectors(adapter);
1239 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1240 goto err_alloc_q_vectors;
1243 err = igb_alloc_queues(adapter);
1245 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1246 goto err_alloc_queues;
1249 err = igb_map_ring_to_vector(adapter);
1251 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1252 goto err_map_queues;
1258 igb_free_queues(adapter);
1260 igb_free_q_vectors(adapter);
1261 err_alloc_q_vectors:
1262 igb_reset_interrupt_capability(adapter);
1267 * igb_request_irq - initialize interrupts
1269 * Attempts to configure interrupts using the best available
1270 * capabilities of the hardware and kernel.
1272 static int igb_request_irq(struct igb_adapter *adapter)
1274 struct net_device *netdev = adapter->netdev;
1275 struct pci_dev *pdev = adapter->pdev;
1278 if (adapter->msix_entries) {
1279 err = igb_request_msix(adapter);
1282 /* fall back to MSI */
1283 igb_clear_interrupt_scheme(adapter);
1284 if (!pci_enable_msi(pdev))
1285 adapter->flags |= IGB_FLAG_HAS_MSI;
1286 igb_free_all_tx_resources(adapter);
1287 igb_free_all_rx_resources(adapter);
1288 adapter->num_tx_queues = 1;
1289 adapter->num_rx_queues = 1;
1290 adapter->num_q_vectors = 1;
1291 err = igb_alloc_q_vectors(adapter);
1294 "Unable to allocate memory for vectors\n");
1297 err = igb_alloc_queues(adapter);
1300 "Unable to allocate memory for queues\n");
1301 igb_free_q_vectors(adapter);
1304 igb_setup_all_tx_resources(adapter);
1305 igb_setup_all_rx_resources(adapter);
1308 igb_assign_vector(adapter->q_vector[0], 0);
1310 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1311 err = request_irq(pdev->irq, igb_intr_msi, 0,
1312 netdev->name, adapter);
1316 /* fall back to legacy interrupts */
1317 igb_reset_interrupt_capability(adapter);
1318 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1321 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1322 netdev->name, adapter);
1325 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1332 static void igb_free_irq(struct igb_adapter *adapter)
1334 if (adapter->msix_entries) {
1337 free_irq(adapter->msix_entries[vector++].vector, adapter);
1339 for (i = 0; i < adapter->num_q_vectors; i++)
1340 free_irq(adapter->msix_entries[vector++].vector,
1341 adapter->q_vector[i]);
1343 free_irq(adapter->pdev->irq, adapter);
1348 * igb_irq_disable - Mask off interrupt generation on the NIC
1349 * @adapter: board private structure
1351 static void igb_irq_disable(struct igb_adapter *adapter)
1353 struct e1000_hw *hw = &adapter->hw;
1356 * we need to be careful when disabling interrupts. The VFs are also
1357 * mapped into these registers and so clearing the bits can cause
1358 * issues on the VF drivers so we only need to clear what we set
1360 if (adapter->msix_entries) {
1361 u32 regval = rd32(E1000_EIAM);
1362 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1363 wr32(E1000_EIMC, adapter->eims_enable_mask);
1364 regval = rd32(E1000_EIAC);
1365 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1369 wr32(E1000_IMC, ~0);
1371 if (adapter->msix_entries) {
1373 for (i = 0; i < adapter->num_q_vectors; i++)
1374 synchronize_irq(adapter->msix_entries[i].vector);
1376 synchronize_irq(adapter->pdev->irq);
1381 * igb_irq_enable - Enable default interrupt generation settings
1382 * @adapter: board private structure
1384 static void igb_irq_enable(struct igb_adapter *adapter)
1386 struct e1000_hw *hw = &adapter->hw;
1388 if (adapter->msix_entries) {
1389 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1390 u32 regval = rd32(E1000_EIAC);
1391 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1392 regval = rd32(E1000_EIAM);
1393 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1394 wr32(E1000_EIMS, adapter->eims_enable_mask);
1395 if (adapter->vfs_allocated_count) {
1396 wr32(E1000_MBVFIMR, 0xFF);
1397 ims |= E1000_IMS_VMMB;
1399 wr32(E1000_IMS, ims);
1401 wr32(E1000_IMS, IMS_ENABLE_MASK |
1403 wr32(E1000_IAM, IMS_ENABLE_MASK |
1408 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1410 struct e1000_hw *hw = &adapter->hw;
1411 u16 vid = adapter->hw.mng_cookie.vlan_id;
1412 u16 old_vid = adapter->mng_vlan_id;
1414 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1415 /* add VID to filter table */
1416 igb_vfta_set(hw, vid, true);
1417 adapter->mng_vlan_id = vid;
1419 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1422 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1424 !test_bit(old_vid, adapter->active_vlans)) {
1425 /* remove VID from filter table */
1426 igb_vfta_set(hw, old_vid, false);
1431 * igb_release_hw_control - release control of the h/w to f/w
1432 * @adapter: address of board private structure
1434 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1435 * For ASF and Pass Through versions of f/w this means that the
1436 * driver is no longer loaded.
1439 static void igb_release_hw_control(struct igb_adapter *adapter)
1441 struct e1000_hw *hw = &adapter->hw;
1444 /* Let firmware take over control of h/w */
1445 ctrl_ext = rd32(E1000_CTRL_EXT);
1446 wr32(E1000_CTRL_EXT,
1447 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1451 * igb_get_hw_control - get control of the h/w from f/w
1452 * @adapter: address of board private structure
1454 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1455 * For ASF and Pass Through versions of f/w this means that
1456 * the driver is loaded.
1459 static void igb_get_hw_control(struct igb_adapter *adapter)
1461 struct e1000_hw *hw = &adapter->hw;
1464 /* Let firmware know the driver has taken over */
1465 ctrl_ext = rd32(E1000_CTRL_EXT);
1466 wr32(E1000_CTRL_EXT,
1467 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1471 * igb_configure - configure the hardware for RX and TX
1472 * @adapter: private board structure
1474 static void igb_configure(struct igb_adapter *adapter)
1476 struct net_device *netdev = adapter->netdev;
1479 igb_get_hw_control(adapter);
1480 igb_set_rx_mode(netdev);
1482 igb_restore_vlan(adapter);
1484 igb_setup_tctl(adapter);
1485 igb_setup_mrqc(adapter);
1486 igb_setup_rctl(adapter);
1488 igb_configure_tx(adapter);
1489 igb_configure_rx(adapter);
1491 igb_rx_fifo_flush_82575(&adapter->hw);
1493 /* call igb_desc_unused which always leaves
1494 * at least 1 descriptor unused to make sure
1495 * next_to_use != next_to_clean */
1496 for (i = 0; i < adapter->num_rx_queues; i++) {
1497 struct igb_ring *ring = adapter->rx_ring[i];
1498 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1503 * igb_power_up_link - Power up the phy/serdes link
1504 * @adapter: address of board private structure
1506 void igb_power_up_link(struct igb_adapter *adapter)
1508 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1509 igb_power_up_phy_copper(&adapter->hw);
1511 igb_power_up_serdes_link_82575(&adapter->hw);
1512 igb_reset_phy(&adapter->hw);
1516 * igb_power_down_link - Power down the phy/serdes link
1517 * @adapter: address of board private structure
1519 static void igb_power_down_link(struct igb_adapter *adapter)
1521 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1522 igb_power_down_phy_copper_82575(&adapter->hw);
1524 igb_shutdown_serdes_link_82575(&adapter->hw);
1528 * igb_up - Open the interface and prepare it to handle traffic
1529 * @adapter: board private structure
1531 int igb_up(struct igb_adapter *adapter)
1533 struct e1000_hw *hw = &adapter->hw;
1536 /* hardware has been reset, we need to reload some things */
1537 igb_configure(adapter);
1539 clear_bit(__IGB_DOWN, &adapter->state);
1541 for (i = 0; i < adapter->num_q_vectors; i++)
1542 napi_enable(&(adapter->q_vector[i]->napi));
1544 if (adapter->msix_entries)
1545 igb_configure_msix(adapter);
1547 igb_assign_vector(adapter->q_vector[0], 0);
1549 /* Clear any pending interrupts. */
1551 igb_irq_enable(adapter);
1553 /* notify VFs that reset has been completed */
1554 if (adapter->vfs_allocated_count) {
1555 u32 reg_data = rd32(E1000_CTRL_EXT);
1556 reg_data |= E1000_CTRL_EXT_PFRSTD;
1557 wr32(E1000_CTRL_EXT, reg_data);
1560 netif_tx_start_all_queues(adapter->netdev);
1562 /* start the watchdog. */
1563 hw->mac.get_link_status = 1;
1564 schedule_work(&adapter->watchdog_task);
1569 void igb_down(struct igb_adapter *adapter)
1571 struct net_device *netdev = adapter->netdev;
1572 struct e1000_hw *hw = &adapter->hw;
1576 /* signal that we're down so the interrupt handler does not
1577 * reschedule our watchdog timer */
1578 set_bit(__IGB_DOWN, &adapter->state);
1580 /* disable receives in the hardware */
1581 rctl = rd32(E1000_RCTL);
1582 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1583 /* flush and sleep below */
1585 netif_tx_stop_all_queues(netdev);
1587 /* disable transmits in the hardware */
1588 tctl = rd32(E1000_TCTL);
1589 tctl &= ~E1000_TCTL_EN;
1590 wr32(E1000_TCTL, tctl);
1591 /* flush both disables and wait for them to finish */
1595 for (i = 0; i < adapter->num_q_vectors; i++)
1596 napi_disable(&(adapter->q_vector[i]->napi));
1598 igb_irq_disable(adapter);
1600 del_timer_sync(&adapter->watchdog_timer);
1601 del_timer_sync(&adapter->phy_info_timer);
1603 netif_carrier_off(netdev);
1605 /* record the stats before reset*/
1606 spin_lock(&adapter->stats64_lock);
1607 igb_update_stats(adapter, &adapter->stats64);
1608 spin_unlock(&adapter->stats64_lock);
1610 adapter->link_speed = 0;
1611 adapter->link_duplex = 0;
1613 if (!pci_channel_offline(adapter->pdev))
1615 igb_clean_all_tx_rings(adapter);
1616 igb_clean_all_rx_rings(adapter);
1617 #ifdef CONFIG_IGB_DCA
1619 /* since we reset the hardware DCA settings were cleared */
1620 igb_setup_dca(adapter);
1624 void igb_reinit_locked(struct igb_adapter *adapter)
1626 WARN_ON(in_interrupt());
1627 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1631 clear_bit(__IGB_RESETTING, &adapter->state);
1634 void igb_reset(struct igb_adapter *adapter)
1636 struct pci_dev *pdev = adapter->pdev;
1637 struct e1000_hw *hw = &adapter->hw;
1638 struct e1000_mac_info *mac = &hw->mac;
1639 struct e1000_fc_info *fc = &hw->fc;
1640 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1643 /* Repartition Pba for greater than 9k mtu
1644 * To take effect CTRL.RST is required.
1646 switch (mac->type) {
1649 pba = rd32(E1000_RXPBS);
1650 pba = igb_rxpbs_adjust_82580(pba);
1653 pba = rd32(E1000_RXPBS);
1654 pba &= E1000_RXPBS_SIZE_MASK_82576;
1660 pba = E1000_PBA_34K;
1664 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1665 (mac->type < e1000_82576)) {
1666 /* adjust PBA for jumbo frames */
1667 wr32(E1000_PBA, pba);
1669 /* To maintain wire speed transmits, the Tx FIFO should be
1670 * large enough to accommodate two full transmit packets,
1671 * rounded up to the next 1KB and expressed in KB. Likewise,
1672 * the Rx FIFO should be large enough to accommodate at least
1673 * one full receive packet and is similarly rounded up and
1674 * expressed in KB. */
1675 pba = rd32(E1000_PBA);
1676 /* upper 16 bits has Tx packet buffer allocation size in KB */
1677 tx_space = pba >> 16;
1678 /* lower 16 bits has Rx packet buffer allocation size in KB */
1680 /* the tx fifo also stores 16 bytes of information about the tx
1681 * but don't include ethernet FCS because hardware appends it */
1682 min_tx_space = (adapter->max_frame_size +
1683 sizeof(union e1000_adv_tx_desc) -
1685 min_tx_space = ALIGN(min_tx_space, 1024);
1686 min_tx_space >>= 10;
1687 /* software strips receive CRC, so leave room for it */
1688 min_rx_space = adapter->max_frame_size;
1689 min_rx_space = ALIGN(min_rx_space, 1024);
1690 min_rx_space >>= 10;
1692 /* If current Tx allocation is less than the min Tx FIFO size,
1693 * and the min Tx FIFO size is less than the current Rx FIFO
1694 * allocation, take space away from current Rx allocation */
1695 if (tx_space < min_tx_space &&
1696 ((min_tx_space - tx_space) < pba)) {
1697 pba = pba - (min_tx_space - tx_space);
1699 /* if short on rx space, rx wins and must trump tx
1701 if (pba < min_rx_space)
1704 wr32(E1000_PBA, pba);
1707 /* flow control settings */
1708 /* The high water mark must be low enough to fit one full frame
1709 * (or the size used for early receive) above it in the Rx FIFO.
1710 * Set it to the lower of:
1711 * - 90% of the Rx FIFO size, or
1712 * - the full Rx FIFO size minus one full frame */
1713 hwm = min(((pba << 10) * 9 / 10),
1714 ((pba << 10) - 2 * adapter->max_frame_size));
1716 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1717 fc->low_water = fc->high_water - 16;
1718 fc->pause_time = 0xFFFF;
1720 fc->current_mode = fc->requested_mode;
1722 /* disable receive for all VFs and wait one second */
1723 if (adapter->vfs_allocated_count) {
1725 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1726 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1728 /* ping all the active vfs to let them know we are going down */
1729 igb_ping_all_vfs(adapter);
1731 /* disable transmits and receives */
1732 wr32(E1000_VFRE, 0);
1733 wr32(E1000_VFTE, 0);
1736 /* Allow time for pending master requests to run */
1737 hw->mac.ops.reset_hw(hw);
1740 if (hw->mac.ops.init_hw(hw))
1741 dev_err(&pdev->dev, "Hardware Error\n");
1744 * Flow control settings reset on hardware reset, so guarantee flow
1745 * control is off when forcing speed.
1747 if (!hw->mac.autoneg)
1748 igb_force_mac_fc(hw);
1750 igb_init_dmac(adapter, pba);
1751 if (!netif_running(adapter->netdev))
1752 igb_power_down_link(adapter);
1754 igb_update_mng_vlan(adapter);
1756 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1757 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1759 igb_get_phy_info(hw);
1762 static netdev_features_t igb_fix_features(struct net_device *netdev,
1763 netdev_features_t features)
1766 * Since there is no support for separate rx/tx vlan accel
1767 * enable/disable make sure tx flag is always in same state as rx.
1769 if (features & NETIF_F_HW_VLAN_RX)
1770 features |= NETIF_F_HW_VLAN_TX;
1772 features &= ~NETIF_F_HW_VLAN_TX;
1777 static int igb_set_features(struct net_device *netdev,
1778 netdev_features_t features)
1780 netdev_features_t changed = netdev->features ^ features;
1781 struct igb_adapter *adapter = netdev_priv(netdev);
1783 if (changed & NETIF_F_HW_VLAN_RX)
1784 igb_vlan_mode(netdev, features);
1786 if (!(changed & NETIF_F_RXALL))
1789 netdev->features = features;
1791 if (netif_running(netdev))
1792 igb_reinit_locked(adapter);
1799 static const struct net_device_ops igb_netdev_ops = {
1800 .ndo_open = igb_open,
1801 .ndo_stop = igb_close,
1802 .ndo_start_xmit = igb_xmit_frame,
1803 .ndo_get_stats64 = igb_get_stats64,
1804 .ndo_set_rx_mode = igb_set_rx_mode,
1805 .ndo_set_mac_address = igb_set_mac,
1806 .ndo_change_mtu = igb_change_mtu,
1807 .ndo_do_ioctl = igb_ioctl,
1808 .ndo_tx_timeout = igb_tx_timeout,
1809 .ndo_validate_addr = eth_validate_addr,
1810 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1811 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1812 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1813 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1814 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1815 .ndo_get_vf_config = igb_ndo_get_vf_config,
1816 #ifdef CONFIG_NET_POLL_CONTROLLER
1817 .ndo_poll_controller = igb_netpoll,
1819 .ndo_fix_features = igb_fix_features,
1820 .ndo_set_features = igb_set_features,
1824 * igb_probe - Device Initialization Routine
1825 * @pdev: PCI device information struct
1826 * @ent: entry in igb_pci_tbl
1828 * Returns 0 on success, negative on failure
1830 * igb_probe initializes an adapter identified by a pci_dev structure.
1831 * The OS initialization, configuring of the adapter private structure,
1832 * and a hardware reset occur.
1834 static int __devinit igb_probe(struct pci_dev *pdev,
1835 const struct pci_device_id *ent)
1837 struct net_device *netdev;
1838 struct igb_adapter *adapter;
1839 struct e1000_hw *hw;
1840 u16 eeprom_data = 0;
1842 static int global_quad_port_a; /* global quad port a indication */
1843 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1844 unsigned long mmio_start, mmio_len;
1845 int err, pci_using_dac;
1846 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1847 u8 part_str[E1000_PBANUM_LENGTH];
1849 /* Catch broken hardware that put the wrong VF device ID in
1850 * the PCIe SR-IOV capability.
1852 if (pdev->is_virtfn) {
1853 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1854 pci_name(pdev), pdev->vendor, pdev->device);
1858 err = pci_enable_device_mem(pdev);
1863 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1865 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1869 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1871 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1873 dev_err(&pdev->dev, "No usable DMA "
1874 "configuration, aborting\n");
1880 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1886 pci_enable_pcie_error_reporting(pdev);
1888 pci_set_master(pdev);
1889 pci_save_state(pdev);
1892 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1895 goto err_alloc_etherdev;
1897 SET_NETDEV_DEV(netdev, &pdev->dev);
1899 pci_set_drvdata(pdev, netdev);
1900 adapter = netdev_priv(netdev);
1901 adapter->netdev = netdev;
1902 adapter->pdev = pdev;
1905 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1907 mmio_start = pci_resource_start(pdev, 0);
1908 mmio_len = pci_resource_len(pdev, 0);
1911 hw->hw_addr = ioremap(mmio_start, mmio_len);
1915 netdev->netdev_ops = &igb_netdev_ops;
1916 igb_set_ethtool_ops(netdev);
1917 netdev->watchdog_timeo = 5 * HZ;
1919 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1921 netdev->mem_start = mmio_start;
1922 netdev->mem_end = mmio_start + mmio_len;
1924 /* PCI config space info */
1925 hw->vendor_id = pdev->vendor;
1926 hw->device_id = pdev->device;
1927 hw->revision_id = pdev->revision;
1928 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1929 hw->subsystem_device_id = pdev->subsystem_device;
1931 /* Copy the default MAC, PHY and NVM function pointers */
1932 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1933 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1934 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1935 /* Initialize skew-specific constants */
1936 err = ei->get_invariants(hw);
1940 /* setup the private structure */
1941 err = igb_sw_init(adapter);
1945 igb_get_bus_info_pcie(hw);
1947 hw->phy.autoneg_wait_to_complete = false;
1949 /* Copper options */
1950 if (hw->phy.media_type == e1000_media_type_copper) {
1951 hw->phy.mdix = AUTO_ALL_MODES;
1952 hw->phy.disable_polarity_correction = false;
1953 hw->phy.ms_type = e1000_ms_hw_default;
1956 if (igb_check_reset_block(hw))
1957 dev_info(&pdev->dev,
1958 "PHY reset is blocked due to SOL/IDER session.\n");
1961 * features is initialized to 0 in allocation, it might have bits
1962 * set by igb_sw_init so we should use an or instead of an
1965 netdev->features |= NETIF_F_SG |
1972 NETIF_F_HW_VLAN_RX |
1975 /* copy netdev features into list of user selectable features */
1976 netdev->hw_features |= netdev->features;
1977 netdev->hw_features |= NETIF_F_RXALL;
1979 /* set this bit last since it cannot be part of hw_features */
1980 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1982 netdev->vlan_features |= NETIF_F_TSO |
1988 netdev->priv_flags |= IFF_SUPP_NOFCS;
1990 if (pci_using_dac) {
1991 netdev->features |= NETIF_F_HIGHDMA;
1992 netdev->vlan_features |= NETIF_F_HIGHDMA;
1995 if (hw->mac.type >= e1000_82576) {
1996 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1997 netdev->features |= NETIF_F_SCTP_CSUM;
2000 netdev->priv_flags |= IFF_UNICAST_FLT;
2002 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2004 /* before reading the NVM, reset the controller to put the device in a
2005 * known good starting state */
2006 hw->mac.ops.reset_hw(hw);
2009 * make sure the NVM is good , i211 parts have special NVM that
2010 * doesn't contain a checksum
2012 if (hw->mac.type != e1000_i211) {
2013 if (hw->nvm.ops.validate(hw) < 0) {
2014 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2020 /* copy the MAC address out of the NVM */
2021 if (hw->mac.ops.read_mac_addr(hw))
2022 dev_err(&pdev->dev, "NVM Read Error\n");
2024 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2025 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2027 if (!is_valid_ether_addr(netdev->perm_addr)) {
2028 dev_err(&pdev->dev, "Invalid MAC Address\n");
2033 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2034 (unsigned long) adapter);
2035 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2036 (unsigned long) adapter);
2038 INIT_WORK(&adapter->reset_task, igb_reset_task);
2039 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2041 /* Initialize link properties that are user-changeable */
2042 adapter->fc_autoneg = true;
2043 hw->mac.autoneg = true;
2044 hw->phy.autoneg_advertised = 0x2f;
2046 hw->fc.requested_mode = e1000_fc_default;
2047 hw->fc.current_mode = e1000_fc_default;
2049 igb_validate_mdi_setting(hw);
2051 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2052 * enable the ACPI Magic Packet filter
2055 if (hw->bus.func == 0)
2056 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2057 else if (hw->mac.type >= e1000_82580)
2058 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2059 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2061 else if (hw->bus.func == 1)
2062 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2064 if (eeprom_data & eeprom_apme_mask)
2065 adapter->eeprom_wol |= E1000_WUFC_MAG;
2067 /* now that we have the eeprom settings, apply the special cases where
2068 * the eeprom may be wrong or the board simply won't support wake on
2069 * lan on a particular port */
2070 switch (pdev->device) {
2071 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2072 adapter->eeprom_wol = 0;
2074 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2075 case E1000_DEV_ID_82576_FIBER:
2076 case E1000_DEV_ID_82576_SERDES:
2077 /* Wake events only supported on port A for dual fiber
2078 * regardless of eeprom setting */
2079 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2080 adapter->eeprom_wol = 0;
2082 case E1000_DEV_ID_82576_QUAD_COPPER:
2083 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2084 /* if quad port adapter, disable WoL on all but port A */
2085 if (global_quad_port_a != 0)
2086 adapter->eeprom_wol = 0;
2088 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2089 /* Reset for multiple quad port adapters */
2090 if (++global_quad_port_a == 4)
2091 global_quad_port_a = 0;
2095 /* initialize the wol settings based on the eeprom settings */
2096 adapter->wol = adapter->eeprom_wol;
2097 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2099 /* reset the hardware with the new settings */
2102 /* let the f/w know that the h/w is now under the control of the
2104 igb_get_hw_control(adapter);
2106 strcpy(netdev->name, "eth%d");
2107 err = register_netdev(netdev);
2111 /* carrier off reporting is important to ethtool even BEFORE open */
2112 netif_carrier_off(netdev);
2114 #ifdef CONFIG_IGB_DCA
2115 if (dca_add_requester(&pdev->dev) == 0) {
2116 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2117 dev_info(&pdev->dev, "DCA enabled\n");
2118 igb_setup_dca(adapter);
2122 #ifdef CONFIG_IGB_PTP
2123 /* do hw tstamp init after resetting */
2124 igb_ptp_init(adapter);
2127 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2128 /* print bus type/speed/width info */
2129 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2131 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2132 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2134 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2135 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2136 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2140 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2142 strcpy(part_str, "Unknown");
2143 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2144 dev_info(&pdev->dev,
2145 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2146 adapter->msix_entries ? "MSI-X" :
2147 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2148 adapter->num_rx_queues, adapter->num_tx_queues);
2149 switch (hw->mac.type) {
2153 igb_set_eee_i350(hw);
2159 pm_runtime_put_noidle(&pdev->dev);
2163 igb_release_hw_control(adapter);
2165 if (!igb_check_reset_block(hw))
2168 if (hw->flash_address)
2169 iounmap(hw->flash_address);
2171 igb_clear_interrupt_scheme(adapter);
2172 iounmap(hw->hw_addr);
2174 free_netdev(netdev);
2176 pci_release_selected_regions(pdev,
2177 pci_select_bars(pdev, IORESOURCE_MEM));
2180 pci_disable_device(pdev);
2185 * igb_remove - Device Removal Routine
2186 * @pdev: PCI device information struct
2188 * igb_remove is called by the PCI subsystem to alert the driver
2189 * that it should release a PCI device. The could be caused by a
2190 * Hot-Plug event, or because the driver is going to be removed from
2193 static void __devexit igb_remove(struct pci_dev *pdev)
2195 struct net_device *netdev = pci_get_drvdata(pdev);
2196 struct igb_adapter *adapter = netdev_priv(netdev);
2197 struct e1000_hw *hw = &adapter->hw;
2199 pm_runtime_get_noresume(&pdev->dev);
2200 #ifdef CONFIG_IGB_PTP
2201 igb_ptp_remove(adapter);
2205 * The watchdog timer may be rescheduled, so explicitly
2206 * disable watchdog from being rescheduled.
2208 set_bit(__IGB_DOWN, &adapter->state);
2209 del_timer_sync(&adapter->watchdog_timer);
2210 del_timer_sync(&adapter->phy_info_timer);
2212 cancel_work_sync(&adapter->reset_task);
2213 cancel_work_sync(&adapter->watchdog_task);
2215 #ifdef CONFIG_IGB_DCA
2216 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2217 dev_info(&pdev->dev, "DCA disabled\n");
2218 dca_remove_requester(&pdev->dev);
2219 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2220 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2224 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2225 * would have already happened in close and is redundant. */
2226 igb_release_hw_control(adapter);
2228 unregister_netdev(netdev);
2230 igb_clear_interrupt_scheme(adapter);
2232 #ifdef CONFIG_PCI_IOV
2233 /* reclaim resources allocated to VFs */
2234 if (adapter->vf_data) {
2235 /* disable iov and allow time for transactions to clear */
2236 if (!igb_check_vf_assignment(adapter)) {
2237 pci_disable_sriov(pdev);
2240 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2243 kfree(adapter->vf_data);
2244 adapter->vf_data = NULL;
2245 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2248 dev_info(&pdev->dev, "IOV Disabled\n");
2252 iounmap(hw->hw_addr);
2253 if (hw->flash_address)
2254 iounmap(hw->flash_address);
2255 pci_release_selected_regions(pdev,
2256 pci_select_bars(pdev, IORESOURCE_MEM));
2258 kfree(adapter->shadow_vfta);
2259 free_netdev(netdev);
2261 pci_disable_pcie_error_reporting(pdev);
2263 pci_disable_device(pdev);
2267 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2268 * @adapter: board private structure to initialize
2270 * This function initializes the vf specific data storage and then attempts to
2271 * allocate the VFs. The reason for ordering it this way is because it is much
2272 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2273 * the memory for the VFs.
2275 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2277 #ifdef CONFIG_PCI_IOV
2278 struct pci_dev *pdev = adapter->pdev;
2279 struct e1000_hw *hw = &adapter->hw;
2280 int old_vfs = igb_find_enabled_vfs(adapter);
2283 /* Virtualization features not supported on i210 family. */
2284 if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2288 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2289 "max_vfs setting of %d\n", old_vfs, max_vfs);
2290 adapter->vfs_allocated_count = old_vfs;
2293 if (!adapter->vfs_allocated_count)
2296 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2297 sizeof(struct vf_data_storage), GFP_KERNEL);
2299 /* if allocation failed then we do not support SR-IOV */
2300 if (!adapter->vf_data) {
2301 adapter->vfs_allocated_count = 0;
2302 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2308 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2311 dev_info(&pdev->dev, "%d VFs allocated\n",
2312 adapter->vfs_allocated_count);
2313 for (i = 0; i < adapter->vfs_allocated_count; i++)
2314 igb_vf_configure(adapter, i);
2316 /* DMA Coalescing is not supported in IOV mode. */
2317 adapter->flags &= ~IGB_FLAG_DMAC;
2320 kfree(adapter->vf_data);
2321 adapter->vf_data = NULL;
2322 adapter->vfs_allocated_count = 0;
2325 #endif /* CONFIG_PCI_IOV */
2329 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2330 * @adapter: board private structure to initialize
2332 * igb_sw_init initializes the Adapter private data structure.
2333 * Fields are initialized based on PCI device information and
2334 * OS network device settings (MTU size).
2336 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2338 struct e1000_hw *hw = &adapter->hw;
2339 struct net_device *netdev = adapter->netdev;
2340 struct pci_dev *pdev = adapter->pdev;
2342 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2344 /* set default ring sizes */
2345 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2346 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2348 /* set default ITR values */
2349 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2350 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2352 /* set default work limits */
2353 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2355 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2357 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2361 spin_lock_init(&adapter->stats64_lock);
2362 #ifdef CONFIG_PCI_IOV
2363 switch (hw->mac.type) {
2367 dev_warn(&pdev->dev,
2368 "Maximum of 7 VFs per PF, using max\n");
2369 adapter->vfs_allocated_count = 7;
2371 adapter->vfs_allocated_count = max_vfs;
2375 adapter->vfs_allocated_count = 0;
2380 #endif /* CONFIG_PCI_IOV */
2381 switch (hw->mac.type) {
2383 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES_I210,
2387 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES_I211,
2391 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES,
2395 /* i350 cannot do RSS and SR-IOV at the same time */
2396 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2397 adapter->rss_queues = 1;
2400 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2401 * then we should combine the queues into a queue pair in order to
2402 * conserve interrupts due to limited supply
2404 if ((adapter->rss_queues > 4) ||
2405 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2406 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2408 /* Setup and initialize a copy of the hw vlan table array */
2409 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2410 E1000_VLAN_FILTER_TBL_SIZE,
2413 /* This call may decrease the number of queues */
2414 if (igb_init_interrupt_scheme(adapter)) {
2415 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2419 igb_probe_vfs(adapter);
2421 /* Explicitly disable IRQ since the NIC can be in any state. */
2422 igb_irq_disable(adapter);
2424 if (hw->mac.type >= e1000_i350)
2425 adapter->flags &= ~IGB_FLAG_DMAC;
2427 set_bit(__IGB_DOWN, &adapter->state);
2432 * igb_open - Called when a network interface is made active
2433 * @netdev: network interface device structure
2435 * Returns 0 on success, negative value on failure
2437 * The open entry point is called when a network interface is made
2438 * active by the system (IFF_UP). At this point all resources needed
2439 * for transmit and receive operations are allocated, the interrupt
2440 * handler is registered with the OS, the watchdog timer is started,
2441 * and the stack is notified that the interface is ready.
2443 static int __igb_open(struct net_device *netdev, bool resuming)
2445 struct igb_adapter *adapter = netdev_priv(netdev);
2446 struct e1000_hw *hw = &adapter->hw;
2447 struct pci_dev *pdev = adapter->pdev;
2451 /* disallow open during test */
2452 if (test_bit(__IGB_TESTING, &adapter->state)) {
2458 pm_runtime_get_sync(&pdev->dev);
2460 netif_carrier_off(netdev);
2462 /* allocate transmit descriptors */
2463 err = igb_setup_all_tx_resources(adapter);
2467 /* allocate receive descriptors */
2468 err = igb_setup_all_rx_resources(adapter);
2472 igb_power_up_link(adapter);
2474 /* before we allocate an interrupt, we must be ready to handle it.
2475 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2476 * as soon as we call pci_request_irq, so we have to setup our
2477 * clean_rx handler before we do so. */
2478 igb_configure(adapter);
2480 err = igb_request_irq(adapter);
2484 /* From here on the code is the same as igb_up() */
2485 clear_bit(__IGB_DOWN, &adapter->state);
2487 for (i = 0; i < adapter->num_q_vectors; i++)
2488 napi_enable(&(adapter->q_vector[i]->napi));
2490 /* Clear any pending interrupts. */
2493 igb_irq_enable(adapter);
2495 /* notify VFs that reset has been completed */
2496 if (adapter->vfs_allocated_count) {
2497 u32 reg_data = rd32(E1000_CTRL_EXT);
2498 reg_data |= E1000_CTRL_EXT_PFRSTD;
2499 wr32(E1000_CTRL_EXT, reg_data);
2502 netif_tx_start_all_queues(netdev);
2505 pm_runtime_put(&pdev->dev);
2507 /* start the watchdog. */
2508 hw->mac.get_link_status = 1;
2509 schedule_work(&adapter->watchdog_task);
2514 igb_release_hw_control(adapter);
2515 igb_power_down_link(adapter);
2516 igb_free_all_rx_resources(adapter);
2518 igb_free_all_tx_resources(adapter);
2522 pm_runtime_put(&pdev->dev);
2527 static int igb_open(struct net_device *netdev)
2529 return __igb_open(netdev, false);
2533 * igb_close - Disables a network interface
2534 * @netdev: network interface device structure
2536 * Returns 0, this is not allowed to fail
2538 * The close entry point is called when an interface is de-activated
2539 * by the OS. The hardware is still under the driver's control, but
2540 * needs to be disabled. A global MAC reset is issued to stop the
2541 * hardware, and all transmit and receive resources are freed.
2543 static int __igb_close(struct net_device *netdev, bool suspending)
2545 struct igb_adapter *adapter = netdev_priv(netdev);
2546 struct pci_dev *pdev = adapter->pdev;
2548 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2551 pm_runtime_get_sync(&pdev->dev);
2554 igb_free_irq(adapter);
2556 igb_free_all_tx_resources(adapter);
2557 igb_free_all_rx_resources(adapter);
2560 pm_runtime_put_sync(&pdev->dev);
2564 static int igb_close(struct net_device *netdev)
2566 return __igb_close(netdev, false);
2570 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2571 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2573 * Return 0 on success, negative on failure
2575 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2577 struct device *dev = tx_ring->dev;
2578 int orig_node = dev_to_node(dev);
2581 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2582 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2583 if (!tx_ring->tx_buffer_info)
2584 tx_ring->tx_buffer_info = vzalloc(size);
2585 if (!tx_ring->tx_buffer_info)
2588 /* round up to nearest 4K */
2589 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2590 tx_ring->size = ALIGN(tx_ring->size, 4096);
2592 set_dev_node(dev, tx_ring->numa_node);
2593 tx_ring->desc = dma_alloc_coherent(dev,
2597 set_dev_node(dev, orig_node);
2599 tx_ring->desc = dma_alloc_coherent(dev,
2607 tx_ring->next_to_use = 0;
2608 tx_ring->next_to_clean = 0;
2613 vfree(tx_ring->tx_buffer_info);
2615 "Unable to allocate memory for the transmit descriptor ring\n");
2620 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2621 * (Descriptors) for all queues
2622 * @adapter: board private structure
2624 * Return 0 on success, negative on failure
2626 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2628 struct pci_dev *pdev = adapter->pdev;
2631 for (i = 0; i < adapter->num_tx_queues; i++) {
2632 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2635 "Allocation for Tx Queue %u failed\n", i);
2636 for (i--; i >= 0; i--)
2637 igb_free_tx_resources(adapter->tx_ring[i]);
2646 * igb_setup_tctl - configure the transmit control registers
2647 * @adapter: Board private structure
2649 void igb_setup_tctl(struct igb_adapter *adapter)
2651 struct e1000_hw *hw = &adapter->hw;
2654 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2655 wr32(E1000_TXDCTL(0), 0);
2657 /* Program the Transmit Control Register */
2658 tctl = rd32(E1000_TCTL);
2659 tctl &= ~E1000_TCTL_CT;
2660 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2661 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2663 igb_config_collision_dist(hw);
2665 /* Enable transmits */
2666 tctl |= E1000_TCTL_EN;
2668 wr32(E1000_TCTL, tctl);
2672 * igb_configure_tx_ring - Configure transmit ring after Reset
2673 * @adapter: board private structure
2674 * @ring: tx ring to configure
2676 * Configure a transmit ring after a reset.
2678 void igb_configure_tx_ring(struct igb_adapter *adapter,
2679 struct igb_ring *ring)
2681 struct e1000_hw *hw = &adapter->hw;
2683 u64 tdba = ring->dma;
2684 int reg_idx = ring->reg_idx;
2686 /* disable the queue */
2687 wr32(E1000_TXDCTL(reg_idx), 0);
2691 wr32(E1000_TDLEN(reg_idx),
2692 ring->count * sizeof(union e1000_adv_tx_desc));
2693 wr32(E1000_TDBAL(reg_idx),
2694 tdba & 0x00000000ffffffffULL);
2695 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2697 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2698 wr32(E1000_TDH(reg_idx), 0);
2699 writel(0, ring->tail);
2701 txdctl |= IGB_TX_PTHRESH;
2702 txdctl |= IGB_TX_HTHRESH << 8;
2703 txdctl |= IGB_TX_WTHRESH << 16;
2705 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2706 wr32(E1000_TXDCTL(reg_idx), txdctl);
2710 * igb_configure_tx - Configure transmit Unit after Reset
2711 * @adapter: board private structure
2713 * Configure the Tx unit of the MAC after a reset.
2715 static void igb_configure_tx(struct igb_adapter *adapter)
2719 for (i = 0; i < adapter->num_tx_queues; i++)
2720 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2724 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2725 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2727 * Returns 0 on success, negative on failure
2729 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2731 struct device *dev = rx_ring->dev;
2732 int orig_node = dev_to_node(dev);
2735 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2736 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2737 if (!rx_ring->rx_buffer_info)
2738 rx_ring->rx_buffer_info = vzalloc(size);
2739 if (!rx_ring->rx_buffer_info)
2742 desc_len = sizeof(union e1000_adv_rx_desc);
2744 /* Round up to nearest 4K */
2745 rx_ring->size = rx_ring->count * desc_len;
2746 rx_ring->size = ALIGN(rx_ring->size, 4096);
2748 set_dev_node(dev, rx_ring->numa_node);
2749 rx_ring->desc = dma_alloc_coherent(dev,
2753 set_dev_node(dev, orig_node);
2755 rx_ring->desc = dma_alloc_coherent(dev,
2763 rx_ring->next_to_clean = 0;
2764 rx_ring->next_to_use = 0;
2769 vfree(rx_ring->rx_buffer_info);
2770 rx_ring->rx_buffer_info = NULL;
2771 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2777 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2778 * (Descriptors) for all queues
2779 * @adapter: board private structure
2781 * Return 0 on success, negative on failure
2783 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2785 struct pci_dev *pdev = adapter->pdev;
2788 for (i = 0; i < adapter->num_rx_queues; i++) {
2789 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2792 "Allocation for Rx Queue %u failed\n", i);
2793 for (i--; i >= 0; i--)
2794 igb_free_rx_resources(adapter->rx_ring[i]);
2803 * igb_setup_mrqc - configure the multiple receive queue control registers
2804 * @adapter: Board private structure
2806 static void igb_setup_mrqc(struct igb_adapter *adapter)
2808 struct e1000_hw *hw = &adapter->hw;
2810 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2815 static const u8 rsshash[40] = {
2816 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2817 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2818 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2819 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2821 /* Fill out hash function seeds */
2822 for (j = 0; j < 10; j++) {
2823 u32 rsskey = rsshash[(j * 4)];
2824 rsskey |= rsshash[(j * 4) + 1] << 8;
2825 rsskey |= rsshash[(j * 4) + 2] << 16;
2826 rsskey |= rsshash[(j * 4) + 3] << 24;
2827 array_wr32(E1000_RSSRK(0), j, rsskey);
2830 num_rx_queues = adapter->rss_queues;
2832 if (adapter->vfs_allocated_count) {
2833 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2834 switch (hw->mac.type) {
2851 if (hw->mac.type == e1000_82575)
2855 for (j = 0; j < (32 * 4); j++) {
2856 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2858 reta.bytes[j & 3] |= num_rx_queues << shift2;
2860 wr32(E1000_RETA(j >> 2), reta.dword);
2864 * Disable raw packet checksumming so that RSS hash is placed in
2865 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2866 * offloads as they are enabled by default
2868 rxcsum = rd32(E1000_RXCSUM);
2869 rxcsum |= E1000_RXCSUM_PCSD;
2871 if (adapter->hw.mac.type >= e1000_82576)
2872 /* Enable Receive Checksum Offload for SCTP */
2873 rxcsum |= E1000_RXCSUM_CRCOFL;
2875 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2876 wr32(E1000_RXCSUM, rxcsum);
2878 * Generate RSS hash based on TCP port numbers and/or
2879 * IPv4/v6 src and dst addresses since UDP cannot be
2880 * hashed reliably due to IP fragmentation
2883 mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2884 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2885 E1000_MRQC_RSS_FIELD_IPV6 |
2886 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2887 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2889 /* If VMDq is enabled then we set the appropriate mode for that, else
2890 * we default to RSS so that an RSS hash is calculated per packet even
2891 * if we are only using one queue */
2892 if (adapter->vfs_allocated_count) {
2893 if (hw->mac.type > e1000_82575) {
2894 /* Set the default pool for the PF's first queue */
2895 u32 vtctl = rd32(E1000_VT_CTL);
2896 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2897 E1000_VT_CTL_DISABLE_DEF_POOL);
2898 vtctl |= adapter->vfs_allocated_count <<
2899 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2900 wr32(E1000_VT_CTL, vtctl);
2902 if (adapter->rss_queues > 1)
2903 mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2905 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2907 if (hw->mac.type != e1000_i211)
2908 mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
2910 igb_vmm_control(adapter);
2912 wr32(E1000_MRQC, mrqc);
2916 * igb_setup_rctl - configure the receive control registers
2917 * @adapter: Board private structure
2919 void igb_setup_rctl(struct igb_adapter *adapter)
2921 struct e1000_hw *hw = &adapter->hw;
2924 rctl = rd32(E1000_RCTL);
2926 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2927 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2929 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2930 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2933 * enable stripping of CRC. It's unlikely this will break BMC
2934 * redirection as it did with e1000. Newer features require
2935 * that the HW strips the CRC.
2937 rctl |= E1000_RCTL_SECRC;
2939 /* disable store bad packets and clear size bits. */
2940 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2942 /* enable LPE to prevent packets larger than max_frame_size */
2943 rctl |= E1000_RCTL_LPE;
2945 /* disable queue 0 to prevent tail write w/o re-config */
2946 wr32(E1000_RXDCTL(0), 0);
2948 /* Attention!!! For SR-IOV PF driver operations you must enable
2949 * queue drop for all VF and PF queues to prevent head of line blocking
2950 * if an un-trusted VF does not provide descriptors to hardware.
2952 if (adapter->vfs_allocated_count) {
2953 /* set all queue drop enable bits */
2954 wr32(E1000_QDE, ALL_QUEUES);
2957 /* This is useful for sniffing bad packets. */
2958 if (adapter->netdev->features & NETIF_F_RXALL) {
2959 /* UPE and MPE will be handled by normal PROMISC logic
2960 * in e1000e_set_rx_mode */
2961 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
2962 E1000_RCTL_BAM | /* RX All Bcast Pkts */
2963 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
2965 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
2966 E1000_RCTL_DPF | /* Allow filtered pause */
2967 E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
2968 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
2969 * and that breaks VLANs.
2973 wr32(E1000_RCTL, rctl);
2976 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2979 struct e1000_hw *hw = &adapter->hw;
2982 /* if it isn't the PF check to see if VFs are enabled and
2983 * increase the size to support vlan tags */
2984 if (vfn < adapter->vfs_allocated_count &&
2985 adapter->vf_data[vfn].vlans_enabled)
2986 size += VLAN_TAG_SIZE;
2988 vmolr = rd32(E1000_VMOLR(vfn));
2989 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2990 vmolr |= size | E1000_VMOLR_LPE;
2991 wr32(E1000_VMOLR(vfn), vmolr);
2997 * igb_rlpml_set - set maximum receive packet size
2998 * @adapter: board private structure
3000 * Configure maximum receivable packet size.
3002 static void igb_rlpml_set(struct igb_adapter *adapter)
3004 u32 max_frame_size = adapter->max_frame_size;
3005 struct e1000_hw *hw = &adapter->hw;
3006 u16 pf_id = adapter->vfs_allocated_count;
3009 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3011 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3012 * to our max jumbo frame size, in case we need to enable
3013 * jumbo frames on one of the rings later.
3014 * This will not pass over-length frames into the default
3015 * queue because it's gated by the VMOLR.RLPML.
3017 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3020 wr32(E1000_RLPML, max_frame_size);
3023 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3026 struct e1000_hw *hw = &adapter->hw;
3030 * This register exists only on 82576 and newer so if we are older then
3031 * we should exit and do nothing
3033 if (hw->mac.type < e1000_82576)
3036 vmolr = rd32(E1000_VMOLR(vfn));
3037 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3039 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3041 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3043 /* clear all bits that might not be set */
3044 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3046 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3047 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3049 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3052 if (vfn <= adapter->vfs_allocated_count)
3053 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3055 wr32(E1000_VMOLR(vfn), vmolr);
3059 * igb_configure_rx_ring - Configure a receive ring after Reset
3060 * @adapter: board private structure
3061 * @ring: receive ring to be configured
3063 * Configure the Rx unit of the MAC after a reset.
3065 void igb_configure_rx_ring(struct igb_adapter *adapter,
3066 struct igb_ring *ring)
3068 struct e1000_hw *hw = &adapter->hw;
3069 u64 rdba = ring->dma;
3070 int reg_idx = ring->reg_idx;
3071 u32 srrctl = 0, rxdctl = 0;
3073 /* disable the queue */
3074 wr32(E1000_RXDCTL(reg_idx), 0);
3076 /* Set DMA base address registers */
3077 wr32(E1000_RDBAL(reg_idx),
3078 rdba & 0x00000000ffffffffULL);
3079 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3080 wr32(E1000_RDLEN(reg_idx),
3081 ring->count * sizeof(union e1000_adv_rx_desc));
3083 /* initialize head and tail */
3084 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3085 wr32(E1000_RDH(reg_idx), 0);
3086 writel(0, ring->tail);
3088 /* set descriptor configuration */
3089 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3090 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3091 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3093 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3095 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3096 if (hw->mac.type >= e1000_82580)
3097 srrctl |= E1000_SRRCTL_TIMESTAMP;
3098 /* Only set Drop Enable if we are supporting multiple queues */
3099 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3100 srrctl |= E1000_SRRCTL_DROP_EN;
3102 wr32(E1000_SRRCTL(reg_idx), srrctl);
3104 /* set filtering for VMDQ pools */
3105 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3107 rxdctl |= IGB_RX_PTHRESH;
3108 rxdctl |= IGB_RX_HTHRESH << 8;
3109 rxdctl |= IGB_RX_WTHRESH << 16;
3111 /* enable receive descriptor fetching */
3112 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3113 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3117 * igb_configure_rx - Configure receive Unit after Reset
3118 * @adapter: board private structure
3120 * Configure the Rx unit of the MAC after a reset.
3122 static void igb_configure_rx(struct igb_adapter *adapter)
3126 /* set UTA to appropriate mode */
3127 igb_set_uta(adapter);
3129 /* set the correct pool for the PF default MAC address in entry 0 */
3130 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3131 adapter->vfs_allocated_count);
3133 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3134 * the Base and Length of the Rx Descriptor Ring */
3135 for (i = 0; i < adapter->num_rx_queues; i++)
3136 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3140 * igb_free_tx_resources - Free Tx Resources per Queue
3141 * @tx_ring: Tx descriptor ring for a specific queue
3143 * Free all transmit software resources
3145 void igb_free_tx_resources(struct igb_ring *tx_ring)
3147 igb_clean_tx_ring(tx_ring);
3149 vfree(tx_ring->tx_buffer_info);
3150 tx_ring->tx_buffer_info = NULL;
3152 /* if not set, then don't free */
3156 dma_free_coherent(tx_ring->dev, tx_ring->size,
3157 tx_ring->desc, tx_ring->dma);
3159 tx_ring->desc = NULL;
3163 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3164 * @adapter: board private structure
3166 * Free all transmit software resources
3168 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3172 for (i = 0; i < adapter->num_tx_queues; i++)
3173 igb_free_tx_resources(adapter->tx_ring[i]);
3176 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3177 struct igb_tx_buffer *tx_buffer)
3179 if (tx_buffer->skb) {
3180 dev_kfree_skb_any(tx_buffer->skb);
3182 dma_unmap_single(ring->dev,
3186 } else if (tx_buffer->dma) {
3187 dma_unmap_page(ring->dev,
3192 tx_buffer->next_to_watch = NULL;
3193 tx_buffer->skb = NULL;
3195 /* buffer_info must be completely set up in the transmit path */
3199 * igb_clean_tx_ring - Free Tx Buffers
3200 * @tx_ring: ring to be cleaned
3202 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3204 struct igb_tx_buffer *buffer_info;
3208 if (!tx_ring->tx_buffer_info)
3210 /* Free all the Tx ring sk_buffs */
3212 for (i = 0; i < tx_ring->count; i++) {
3213 buffer_info = &tx_ring->tx_buffer_info[i];
3214 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3217 netdev_tx_reset_queue(txring_txq(tx_ring));
3219 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3220 memset(tx_ring->tx_buffer_info, 0, size);
3222 /* Zero out the descriptor ring */
3223 memset(tx_ring->desc, 0, tx_ring->size);
3225 tx_ring->next_to_use = 0;
3226 tx_ring->next_to_clean = 0;
3230 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3231 * @adapter: board private structure
3233 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3237 for (i = 0; i < adapter->num_tx_queues; i++)
3238 igb_clean_tx_ring(adapter->tx_ring[i]);
3242 * igb_free_rx_resources - Free Rx Resources
3243 * @rx_ring: ring to clean the resources from
3245 * Free all receive software resources
3247 void igb_free_rx_resources(struct igb_ring *rx_ring)
3249 igb_clean_rx_ring(rx_ring);
3251 vfree(rx_ring->rx_buffer_info);
3252 rx_ring->rx_buffer_info = NULL;
3254 /* if not set, then don't free */
3258 dma_free_coherent(rx_ring->dev, rx_ring->size,
3259 rx_ring->desc, rx_ring->dma);
3261 rx_ring->desc = NULL;
3265 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3266 * @adapter: board private structure
3268 * Free all receive software resources
3270 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3274 for (i = 0; i < adapter->num_rx_queues; i++)
3275 igb_free_rx_resources(adapter->rx_ring[i]);
3279 * igb_clean_rx_ring - Free Rx Buffers per Queue
3280 * @rx_ring: ring to free buffers from
3282 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3287 if (!rx_ring->rx_buffer_info)
3290 /* Free all the Rx ring sk_buffs */
3291 for (i = 0; i < rx_ring->count; i++) {
3292 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3293 if (buffer_info->dma) {
3294 dma_unmap_single(rx_ring->dev,
3298 buffer_info->dma = 0;
3301 if (buffer_info->skb) {
3302 dev_kfree_skb(buffer_info->skb);
3303 buffer_info->skb = NULL;
3305 if (buffer_info->page_dma) {
3306 dma_unmap_page(rx_ring->dev,
3307 buffer_info->page_dma,
3310 buffer_info->page_dma = 0;
3312 if (buffer_info->page) {
3313 put_page(buffer_info->page);
3314 buffer_info->page = NULL;
3315 buffer_info->page_offset = 0;
3319 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3320 memset(rx_ring->rx_buffer_info, 0, size);
3322 /* Zero out the descriptor ring */
3323 memset(rx_ring->desc, 0, rx_ring->size);
3325 rx_ring->next_to_clean = 0;
3326 rx_ring->next_to_use = 0;
3330 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3331 * @adapter: board private structure
3333 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3337 for (i = 0; i < adapter->num_rx_queues; i++)
3338 igb_clean_rx_ring(adapter->rx_ring[i]);
3342 * igb_set_mac - Change the Ethernet Address of the NIC
3343 * @netdev: network interface device structure
3344 * @p: pointer to an address structure
3346 * Returns 0 on success, negative on failure
3348 static int igb_set_mac(struct net_device *netdev, void *p)
3350 struct igb_adapter *adapter = netdev_priv(netdev);
3351 struct e1000_hw *hw = &adapter->hw;
3352 struct sockaddr *addr = p;
3354 if (!is_valid_ether_addr(addr->sa_data))
3355 return -EADDRNOTAVAIL;
3357 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3358 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3360 /* set the correct pool for the new PF MAC address in entry 0 */
3361 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3362 adapter->vfs_allocated_count);
3368 * igb_write_mc_addr_list - write multicast addresses to MTA
3369 * @netdev: network interface device structure
3371 * Writes multicast address list to the MTA hash table.
3372 * Returns: -ENOMEM on failure
3373 * 0 on no addresses written
3374 * X on writing X addresses to MTA
3376 static int igb_write_mc_addr_list(struct net_device *netdev)
3378 struct igb_adapter *adapter = netdev_priv(netdev);
3379 struct e1000_hw *hw = &adapter->hw;
3380 struct netdev_hw_addr *ha;
3384 if (netdev_mc_empty(netdev)) {
3385 /* nothing to program, so clear mc list */
3386 igb_update_mc_addr_list(hw, NULL, 0);
3387 igb_restore_vf_multicasts(adapter);
3391 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3395 /* The shared function expects a packed array of only addresses. */
3397 netdev_for_each_mc_addr(ha, netdev)
3398 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3400 igb_update_mc_addr_list(hw, mta_list, i);
3403 return netdev_mc_count(netdev);
3407 * igb_write_uc_addr_list - write unicast addresses to RAR table
3408 * @netdev: network interface device structure
3410 * Writes unicast address list to the RAR table.
3411 * Returns: -ENOMEM on failure/insufficient address space
3412 * 0 on no addresses written
3413 * X on writing X addresses to the RAR table
3415 static int igb_write_uc_addr_list(struct net_device *netdev)
3417 struct igb_adapter *adapter = netdev_priv(netdev);
3418 struct e1000_hw *hw = &adapter->hw;
3419 unsigned int vfn = adapter->vfs_allocated_count;
3420 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3423 /* return ENOMEM indicating insufficient memory for addresses */
3424 if (netdev_uc_count(netdev) > rar_entries)
3427 if (!netdev_uc_empty(netdev) && rar_entries) {
3428 struct netdev_hw_addr *ha;
3430 netdev_for_each_uc_addr(ha, netdev) {
3433 igb_rar_set_qsel(adapter, ha->addr,
3439 /* write the addresses in reverse order to avoid write combining */
3440 for (; rar_entries > 0 ; rar_entries--) {
3441 wr32(E1000_RAH(rar_entries), 0);
3442 wr32(E1000_RAL(rar_entries), 0);
3450 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3451 * @netdev: network interface device structure
3453 * The set_rx_mode entry point is called whenever the unicast or multicast
3454 * address lists or the network interface flags are updated. This routine is
3455 * responsible for configuring the hardware for proper unicast, multicast,
3456 * promiscuous mode, and all-multi behavior.
3458 static void igb_set_rx_mode(struct net_device *netdev)
3460 struct igb_adapter *adapter = netdev_priv(netdev);
3461 struct e1000_hw *hw = &adapter->hw;
3462 unsigned int vfn = adapter->vfs_allocated_count;
3463 u32 rctl, vmolr = 0;
3466 /* Check for Promiscuous and All Multicast modes */
3467 rctl = rd32(E1000_RCTL);
3469 /* clear the effected bits */
3470 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3472 if (netdev->flags & IFF_PROMISC) {
3473 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3474 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3476 if (netdev->flags & IFF_ALLMULTI) {
3477 rctl |= E1000_RCTL_MPE;
3478 vmolr |= E1000_VMOLR_MPME;
3481 * Write addresses to the MTA, if the attempt fails
3482 * then we should just turn on promiscuous mode so
3483 * that we can at least receive multicast traffic
3485 count = igb_write_mc_addr_list(netdev);
3487 rctl |= E1000_RCTL_MPE;
3488 vmolr |= E1000_VMOLR_MPME;
3490 vmolr |= E1000_VMOLR_ROMPE;
3494 * Write addresses to available RAR registers, if there is not
3495 * sufficient space to store all the addresses then enable
3496 * unicast promiscuous mode
3498 count = igb_write_uc_addr_list(netdev);
3500 rctl |= E1000_RCTL_UPE;
3501 vmolr |= E1000_VMOLR_ROPE;
3503 rctl |= E1000_RCTL_VFE;
3505 wr32(E1000_RCTL, rctl);
3508 * In order to support SR-IOV and eventually VMDq it is necessary to set
3509 * the VMOLR to enable the appropriate modes. Without this workaround
3510 * we will have issues with VLAN tag stripping not being done for frames
3511 * that are only arriving because we are the default pool
3513 if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3516 vmolr |= rd32(E1000_VMOLR(vfn)) &
3517 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3518 wr32(E1000_VMOLR(vfn), vmolr);
3519 igb_restore_vf_multicasts(adapter);
3522 static void igb_check_wvbr(struct igb_adapter *adapter)
3524 struct e1000_hw *hw = &adapter->hw;
3527 switch (hw->mac.type) {
3530 if (!(wvbr = rd32(E1000_WVBR)))
3537 adapter->wvbr |= wvbr;
3540 #define IGB_STAGGERED_QUEUE_OFFSET 8
3542 static void igb_spoof_check(struct igb_adapter *adapter)
3549 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3550 if (adapter->wvbr & (1 << j) ||
3551 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3552 dev_warn(&adapter->pdev->dev,
3553 "Spoof event(s) detected on VF %d\n", j);
3556 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3561 /* Need to wait a few seconds after link up to get diagnostic information from
3563 static void igb_update_phy_info(unsigned long data)
3565 struct igb_adapter *adapter = (struct igb_adapter *) data;
3566 igb_get_phy_info(&adapter->hw);
3570 * igb_has_link - check shared code for link and determine up/down
3571 * @adapter: pointer to driver private info
3573 bool igb_has_link(struct igb_adapter *adapter)
3575 struct e1000_hw *hw = &adapter->hw;
3576 bool link_active = false;
3579 /* get_link_status is set on LSC (link status) interrupt or
3580 * rx sequence error interrupt. get_link_status will stay
3581 * false until the e1000_check_for_link establishes link
3582 * for copper adapters ONLY
3584 switch (hw->phy.media_type) {
3585 case e1000_media_type_copper:
3586 if (hw->mac.get_link_status) {
3587 ret_val = hw->mac.ops.check_for_link(hw);
3588 link_active = !hw->mac.get_link_status;
3593 case e1000_media_type_internal_serdes:
3594 ret_val = hw->mac.ops.check_for_link(hw);
3595 link_active = hw->mac.serdes_has_link;
3598 case e1000_media_type_unknown:
3605 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3608 u32 ctrl_ext, thstat;
3610 /* check for thermal sensor event on i350 copper only */
3611 if (hw->mac.type == e1000_i350) {
3612 thstat = rd32(E1000_THSTAT);
3613 ctrl_ext = rd32(E1000_CTRL_EXT);
3615 if ((hw->phy.media_type == e1000_media_type_copper) &&
3616 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3617 ret = !!(thstat & event);
3625 * igb_watchdog - Timer Call-back
3626 * @data: pointer to adapter cast into an unsigned long
3628 static void igb_watchdog(unsigned long data)
3630 struct igb_adapter *adapter = (struct igb_adapter *)data;
3631 /* Do the rest outside of interrupt context */
3632 schedule_work(&adapter->watchdog_task);
3635 static void igb_watchdog_task(struct work_struct *work)
3637 struct igb_adapter *adapter = container_of(work,
3640 struct e1000_hw *hw = &adapter->hw;
3641 struct net_device *netdev = adapter->netdev;
3645 link = igb_has_link(adapter);
3647 /* Cancel scheduled suspend requests. */
3648 pm_runtime_resume(netdev->dev.parent);
3650 if (!netif_carrier_ok(netdev)) {
3652 hw->mac.ops.get_speed_and_duplex(hw,
3653 &adapter->link_speed,
3654 &adapter->link_duplex);
3656 ctrl = rd32(E1000_CTRL);
3657 /* Links status message must follow this format */
3658 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3659 "Duplex, Flow Control: %s\n",
3661 adapter->link_speed,
3662 adapter->link_duplex == FULL_DUPLEX ?
3664 (ctrl & E1000_CTRL_TFCE) &&
3665 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3666 (ctrl & E1000_CTRL_RFCE) ? "RX" :
3667 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None");
3669 /* check for thermal sensor event */
3670 if (igb_thermal_sensor_event(hw,
3671 E1000_THSTAT_LINK_THROTTLE)) {
3672 netdev_info(netdev, "The network adapter link "
3673 "speed was downshifted because it "
3677 /* adjust timeout factor according to speed/duplex */
3678 adapter->tx_timeout_factor = 1;
3679 switch (adapter->link_speed) {
3681 adapter->tx_timeout_factor = 14;
3684 /* maybe add some timeout factor ? */
3688 netif_carrier_on(netdev);
3690 igb_ping_all_vfs(adapter);
3691 igb_check_vf_rate_limit(adapter);
3693 /* link state has changed, schedule phy info update */
3694 if (!test_bit(__IGB_DOWN, &adapter->state))
3695 mod_timer(&adapter->phy_info_timer,
3696 round_jiffies(jiffies + 2 * HZ));
3699 if (netif_carrier_ok(netdev)) {
3700 adapter->link_speed = 0;
3701 adapter->link_duplex = 0;
3703 /* check for thermal sensor event */
3704 if (igb_thermal_sensor_event(hw,
3705 E1000_THSTAT_PWR_DOWN)) {
3706 netdev_err(netdev, "The network adapter was "
3707 "stopped because it overheated\n");
3710 /* Links status message must follow this format */
3711 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3713 netif_carrier_off(netdev);
3715 igb_ping_all_vfs(adapter);
3717 /* link state has changed, schedule phy info update */
3718 if (!test_bit(__IGB_DOWN, &adapter->state))
3719 mod_timer(&adapter->phy_info_timer,
3720 round_jiffies(jiffies + 2 * HZ));
3722 pm_schedule_suspend(netdev->dev.parent,
3727 spin_lock(&adapter->stats64_lock);
3728 igb_update_stats(adapter, &adapter->stats64);
3729 spin_unlock(&adapter->stats64_lock);
3731 for (i = 0; i < adapter->num_tx_queues; i++) {
3732 struct igb_ring *tx_ring = adapter->tx_ring[i];
3733 if (!netif_carrier_ok(netdev)) {
3734 /* We've lost link, so the controller stops DMA,
3735 * but we've got queued Tx work that's never going
3736 * to get done, so reset controller to flush Tx.
3737 * (Do the reset outside of interrupt context). */
3738 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3739 adapter->tx_timeout_count++;
3740 schedule_work(&adapter->reset_task);
3741 /* return immediately since reset is imminent */
3746 /* Force detection of hung controller every watchdog period */
3747 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3750 /* Cause software interrupt to ensure rx ring is cleaned */
3751 if (adapter->msix_entries) {
3753 for (i = 0; i < adapter->num_q_vectors; i++)
3754 eics |= adapter->q_vector[i]->eims_value;
3755 wr32(E1000_EICS, eics);
3757 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3760 igb_spoof_check(adapter);
3762 /* Reset the timer */
3763 if (!test_bit(__IGB_DOWN, &adapter->state))
3764 mod_timer(&adapter->watchdog_timer,
3765 round_jiffies(jiffies + 2 * HZ));
3768 enum latency_range {
3772 latency_invalid = 255
3776 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3778 * Stores a new ITR value based on strictly on packet size. This
3779 * algorithm is less sophisticated than that used in igb_update_itr,
3780 * due to the difficulty of synchronizing statistics across multiple
3781 * receive rings. The divisors and thresholds used by this function
3782 * were determined based on theoretical maximum wire speed and testing
3783 * data, in order to minimize response time while increasing bulk
3785 * This functionality is controlled by the InterruptThrottleRate module
3786 * parameter (see igb_param.c)
3787 * NOTE: This function is called only when operating in a multiqueue
3788 * receive environment.
3789 * @q_vector: pointer to q_vector
3791 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3793 int new_val = q_vector->itr_val;
3794 int avg_wire_size = 0;
3795 struct igb_adapter *adapter = q_vector->adapter;
3796 unsigned int packets;
3798 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3799 * ints/sec - ITR timer value of 120 ticks.
3801 if (adapter->link_speed != SPEED_1000) {
3802 new_val = IGB_4K_ITR;
3806 packets = q_vector->rx.total_packets;
3808 avg_wire_size = q_vector->rx.total_bytes / packets;
3810 packets = q_vector->tx.total_packets;
3812 avg_wire_size = max_t(u32, avg_wire_size,
3813 q_vector->tx.total_bytes / packets);
3815 /* if avg_wire_size isn't set no work was done */
3819 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3820 avg_wire_size += 24;
3822 /* Don't starve jumbo frames */
3823 avg_wire_size = min(avg_wire_size, 3000);
3825 /* Give a little boost to mid-size frames */
3826 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3827 new_val = avg_wire_size / 3;
3829 new_val = avg_wire_size / 2;
3831 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3832 if (new_val < IGB_20K_ITR &&
3833 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3834 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3835 new_val = IGB_20K_ITR;
3838 if (new_val != q_vector->itr_val) {
3839 q_vector->itr_val = new_val;
3840 q_vector->set_itr = 1;
3843 q_vector->rx.total_bytes = 0;
3844 q_vector->rx.total_packets = 0;
3845 q_vector->tx.total_bytes = 0;
3846 q_vector->tx.total_packets = 0;
3850 * igb_update_itr - update the dynamic ITR value based on statistics
3851 * Stores a new ITR value based on packets and byte
3852 * counts during the last interrupt. The advantage of per interrupt
3853 * computation is faster updates and more accurate ITR for the current
3854 * traffic pattern. Constants in this function were computed
3855 * based on theoretical maximum wire speed and thresholds were set based
3856 * on testing data as well as attempting to minimize response time
3857 * while increasing bulk throughput.
3858 * this functionality is controlled by the InterruptThrottleRate module
3859 * parameter (see igb_param.c)
3860 * NOTE: These calculations are only valid when operating in a single-
3861 * queue environment.
3862 * @q_vector: pointer to q_vector
3863 * @ring_container: ring info to update the itr for
3865 static void igb_update_itr(struct igb_q_vector *q_vector,
3866 struct igb_ring_container *ring_container)
3868 unsigned int packets = ring_container->total_packets;
3869 unsigned int bytes = ring_container->total_bytes;
3870 u8 itrval = ring_container->itr;
3872 /* no packets, exit with status unchanged */
3877 case lowest_latency:
3878 /* handle TSO and jumbo frames */
3879 if (bytes/packets > 8000)
3880 itrval = bulk_latency;
3881 else if ((packets < 5) && (bytes > 512))
3882 itrval = low_latency;
3884 case low_latency: /* 50 usec aka 20000 ints/s */
3885 if (bytes > 10000) {
3886 /* this if handles the TSO accounting */
3887 if (bytes/packets > 8000) {
3888 itrval = bulk_latency;
3889 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3890 itrval = bulk_latency;
3891 } else if ((packets > 35)) {
3892 itrval = lowest_latency;
3894 } else if (bytes/packets > 2000) {
3895 itrval = bulk_latency;
3896 } else if (packets <= 2 && bytes < 512) {
3897 itrval = lowest_latency;
3900 case bulk_latency: /* 250 usec aka 4000 ints/s */
3901 if (bytes > 25000) {
3903 itrval = low_latency;
3904 } else if (bytes < 1500) {
3905 itrval = low_latency;
3910 /* clear work counters since we have the values we need */
3911 ring_container->total_bytes = 0;
3912 ring_container->total_packets = 0;
3914 /* write updated itr to ring container */
3915 ring_container->itr = itrval;
3918 static void igb_set_itr(struct igb_q_vector *q_vector)
3920 struct igb_adapter *adapter = q_vector->adapter;
3921 u32 new_itr = q_vector->itr_val;
3924 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3925 if (adapter->link_speed != SPEED_1000) {
3927 new_itr = IGB_4K_ITR;
3931 igb_update_itr(q_vector, &q_vector->tx);
3932 igb_update_itr(q_vector, &q_vector->rx);
3934 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3936 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3937 if (current_itr == lowest_latency &&
3938 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3939 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3940 current_itr = low_latency;
3942 switch (current_itr) {
3943 /* counts and packets in update_itr are dependent on these numbers */
3944 case lowest_latency:
3945 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3948 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3951 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3958 if (new_itr != q_vector->itr_val) {
3959 /* this attempts to bias the interrupt rate towards Bulk
3960 * by adding intermediate steps when interrupt rate is
3962 new_itr = new_itr > q_vector->itr_val ?
3963 max((new_itr * q_vector->itr_val) /
3964 (new_itr + (q_vector->itr_val >> 2)),
3967 /* Don't write the value here; it resets the adapter's
3968 * internal timer, and causes us to delay far longer than
3969 * we should between interrupts. Instead, we write the ITR
3970 * value at the beginning of the next interrupt so the timing
3971 * ends up being correct.
3973 q_vector->itr_val = new_itr;
3974 q_vector->set_itr = 1;
3978 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3979 u32 type_tucmd, u32 mss_l4len_idx)
3981 struct e1000_adv_tx_context_desc *context_desc;
3982 u16 i = tx_ring->next_to_use;
3984 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3987 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3989 /* set bits to identify this as an advanced context descriptor */
3990 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3992 /* For 82575, context index must be unique per ring. */
3993 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3994 mss_l4len_idx |= tx_ring->reg_idx << 4;
3996 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
3997 context_desc->seqnum_seed = 0;
3998 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
3999 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4002 static int igb_tso(struct igb_ring *tx_ring,
4003 struct igb_tx_buffer *first,
4006 struct sk_buff *skb = first->skb;
4007 u32 vlan_macip_lens, type_tucmd;
4008 u32 mss_l4len_idx, l4len;
4010 if (!skb_is_gso(skb))
4013 if (skb_header_cloned(skb)) {
4014 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4019 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4020 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4022 if (first->protocol == __constant_htons(ETH_P_IP)) {
4023 struct iphdr *iph = ip_hdr(skb);
4026 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4030 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4031 first->tx_flags |= IGB_TX_FLAGS_TSO |
4034 } else if (skb_is_gso_v6(skb)) {
4035 ipv6_hdr(skb)->payload_len = 0;
4036 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4037 &ipv6_hdr(skb)->daddr,
4039 first->tx_flags |= IGB_TX_FLAGS_TSO |
4043 /* compute header lengths */
4044 l4len = tcp_hdrlen(skb);
4045 *hdr_len = skb_transport_offset(skb) + l4len;
4047 /* update gso size and bytecount with header size */
4048 first->gso_segs = skb_shinfo(skb)->gso_segs;
4049 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4052 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4053 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4055 /* VLAN MACLEN IPLEN */
4056 vlan_macip_lens = skb_network_header_len(skb);
4057 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4058 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4060 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4065 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4067 struct sk_buff *skb = first->skb;
4068 u32 vlan_macip_lens = 0;
4069 u32 mss_l4len_idx = 0;
4072 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4073 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4077 switch (first->protocol) {
4078 case __constant_htons(ETH_P_IP):
4079 vlan_macip_lens |= skb_network_header_len(skb);
4080 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4081 l4_hdr = ip_hdr(skb)->protocol;
4083 case __constant_htons(ETH_P_IPV6):
4084 vlan_macip_lens |= skb_network_header_len(skb);
4085 l4_hdr = ipv6_hdr(skb)->nexthdr;
4088 if (unlikely(net_ratelimit())) {
4089 dev_warn(tx_ring->dev,
4090 "partial checksum but proto=%x!\n",
4098 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4099 mss_l4len_idx = tcp_hdrlen(skb) <<
4100 E1000_ADVTXD_L4LEN_SHIFT;
4103 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4104 mss_l4len_idx = sizeof(struct sctphdr) <<
4105 E1000_ADVTXD_L4LEN_SHIFT;
4108 mss_l4len_idx = sizeof(struct udphdr) <<
4109 E1000_ADVTXD_L4LEN_SHIFT;
4112 if (unlikely(net_ratelimit())) {
4113 dev_warn(tx_ring->dev,
4114 "partial checksum but l4 proto=%x!\n",
4120 /* update TX checksum flag */
4121 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4124 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4125 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4127 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4130 static __le32 igb_tx_cmd_type(u32 tx_flags)
4132 /* set type for advanced descriptor with frame checksum insertion */
4133 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4134 E1000_ADVTXD_DCMD_IFCS |
4135 E1000_ADVTXD_DCMD_DEXT);
4137 /* set HW vlan bit if vlan is present */
4138 if (tx_flags & IGB_TX_FLAGS_VLAN)
4139 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4141 /* set timestamp bit if present */
4142 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4143 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4145 /* set segmentation bits for TSO */
4146 if (tx_flags & IGB_TX_FLAGS_TSO)
4147 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4152 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4153 union e1000_adv_tx_desc *tx_desc,
4154 u32 tx_flags, unsigned int paylen)
4156 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4158 /* 82575 requires a unique index per ring if any offload is enabled */
4159 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4160 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4161 olinfo_status |= tx_ring->reg_idx << 4;
4163 /* insert L4 checksum */
4164 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4165 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4167 /* insert IPv4 checksum */
4168 if (tx_flags & IGB_TX_FLAGS_IPV4)
4169 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4172 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4176 * The largest size we can write to the descriptor is 65535. In order to
4177 * maintain a power of two alignment we have to limit ourselves to 32K.
4179 #define IGB_MAX_TXD_PWR 15
4180 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4182 static void igb_tx_map(struct igb_ring *tx_ring,
4183 struct igb_tx_buffer *first,
4186 struct sk_buff *skb = first->skb;
4187 struct igb_tx_buffer *tx_buffer_info;
4188 union e1000_adv_tx_desc *tx_desc;
4190 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4191 unsigned int data_len = skb->data_len;
4192 unsigned int size = skb_headlen(skb);
4193 unsigned int paylen = skb->len - hdr_len;
4195 u32 tx_flags = first->tx_flags;
4196 u16 i = tx_ring->next_to_use;
4198 tx_desc = IGB_TX_DESC(tx_ring, i);
4200 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4201 cmd_type = igb_tx_cmd_type(tx_flags);
4203 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4204 if (dma_mapping_error(tx_ring->dev, dma))
4207 /* record length, and DMA address */
4208 first->length = size;
4210 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4213 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4214 tx_desc->read.cmd_type_len =
4215 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4219 if (i == tx_ring->count) {
4220 tx_desc = IGB_TX_DESC(tx_ring, 0);
4224 dma += IGB_MAX_DATA_PER_TXD;
4225 size -= IGB_MAX_DATA_PER_TXD;
4227 tx_desc->read.olinfo_status = 0;
4228 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4231 if (likely(!data_len))
4234 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4238 if (i == tx_ring->count) {
4239 tx_desc = IGB_TX_DESC(tx_ring, 0);
4243 size = skb_frag_size(frag);
4246 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4247 size, DMA_TO_DEVICE);
4248 if (dma_mapping_error(tx_ring->dev, dma))
4251 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4252 tx_buffer_info->length = size;
4253 tx_buffer_info->dma = dma;
4255 tx_desc->read.olinfo_status = 0;
4256 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4261 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4263 /* write last descriptor with RS and EOP bits */
4264 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4265 if (unlikely(skb->no_fcs))
4266 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4267 tx_desc->read.cmd_type_len = cmd_type;
4269 /* set the timestamp */
4270 first->time_stamp = jiffies;
4273 * Force memory writes to complete before letting h/w know there
4274 * are new descriptors to fetch. (Only applicable for weak-ordered
4275 * memory model archs, such as IA-64).
4277 * We also need this memory barrier to make certain all of the
4278 * status bits have been updated before next_to_watch is written.
4282 /* set next_to_watch value indicating a packet is present */
4283 first->next_to_watch = tx_desc;
4286 if (i == tx_ring->count)
4289 tx_ring->next_to_use = i;
4291 writel(i, tx_ring->tail);
4293 /* we need this if more than one processor can write to our tail
4294 * at a time, it syncronizes IO on IA64/Altix systems */
4300 dev_err(tx_ring->dev, "TX DMA map failed\n");
4302 /* clear dma mappings for failed tx_buffer_info map */
4304 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4305 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4306 if (tx_buffer_info == first)
4313 tx_ring->next_to_use = i;
4316 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4318 struct net_device *netdev = tx_ring->netdev;
4320 netif_stop_subqueue(netdev, tx_ring->queue_index);
4322 /* Herbert's original patch had:
4323 * smp_mb__after_netif_stop_queue();
4324 * but since that doesn't exist yet, just open code it. */
4327 /* We need to check again in a case another CPU has just
4328 * made room available. */
4329 if (igb_desc_unused(tx_ring) < size)
4333 netif_wake_subqueue(netdev, tx_ring->queue_index);
4335 u64_stats_update_begin(&tx_ring->tx_syncp2);
4336 tx_ring->tx_stats.restart_queue2++;
4337 u64_stats_update_end(&tx_ring->tx_syncp2);
4342 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4344 if (igb_desc_unused(tx_ring) >= size)
4346 return __igb_maybe_stop_tx(tx_ring, size);
4349 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4350 struct igb_ring *tx_ring)
4352 struct igb_tx_buffer *first;
4355 __be16 protocol = vlan_get_protocol(skb);
4358 /* need: 1 descriptor per page,
4359 * + 2 desc gap to keep tail from touching head,
4360 * + 1 desc for skb->data,
4361 * + 1 desc for context descriptor,
4362 * otherwise try next time */
4363 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4364 /* this is a hard error */
4365 return NETDEV_TX_BUSY;
4368 /* record the location of the first descriptor for this packet */
4369 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4371 first->bytecount = skb->len;
4372 first->gso_segs = 1;
4374 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4375 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4376 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4379 if (vlan_tx_tag_present(skb)) {
4380 tx_flags |= IGB_TX_FLAGS_VLAN;
4381 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4384 /* record initial flags and protocol */
4385 first->tx_flags = tx_flags;
4386 first->protocol = protocol;
4388 tso = igb_tso(tx_ring, first, &hdr_len);
4392 igb_tx_csum(tx_ring, first);
4394 igb_tx_map(tx_ring, first, hdr_len);
4396 /* Make sure there is space in the ring for the next send. */
4397 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4399 return NETDEV_TX_OK;
4402 igb_unmap_and_free_tx_resource(tx_ring, first);
4404 return NETDEV_TX_OK;
4407 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4408 struct sk_buff *skb)
4410 unsigned int r_idx = skb->queue_mapping;
4412 if (r_idx >= adapter->num_tx_queues)
4413 r_idx = r_idx % adapter->num_tx_queues;
4415 return adapter->tx_ring[r_idx];
4418 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4419 struct net_device *netdev)
4421 struct igb_adapter *adapter = netdev_priv(netdev);
4423 if (test_bit(__IGB_DOWN, &adapter->state)) {
4424 dev_kfree_skb_any(skb);
4425 return NETDEV_TX_OK;
4428 if (skb->len <= 0) {
4429 dev_kfree_skb_any(skb);
4430 return NETDEV_TX_OK;
4434 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4435 * in order to meet this minimum size requirement.
4437 if (skb->len < 17) {
4438 if (skb_padto(skb, 17))
4439 return NETDEV_TX_OK;
4443 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4447 * igb_tx_timeout - Respond to a Tx Hang
4448 * @netdev: network interface device structure
4450 static void igb_tx_timeout(struct net_device *netdev)
4452 struct igb_adapter *adapter = netdev_priv(netdev);
4453 struct e1000_hw *hw = &adapter->hw;
4455 /* Do the reset outside of interrupt context */
4456 adapter->tx_timeout_count++;
4458 if (hw->mac.type >= e1000_82580)
4459 hw->dev_spec._82575.global_device_reset = true;
4461 schedule_work(&adapter->reset_task);
4463 (adapter->eims_enable_mask & ~adapter->eims_other));
4466 static void igb_reset_task(struct work_struct *work)
4468 struct igb_adapter *adapter;
4469 adapter = container_of(work, struct igb_adapter, reset_task);
4472 netdev_err(adapter->netdev, "Reset adapter\n");
4473 igb_reinit_locked(adapter);
4477 * igb_get_stats64 - Get System Network Statistics
4478 * @netdev: network interface device structure
4479 * @stats: rtnl_link_stats64 pointer
4482 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4483 struct rtnl_link_stats64 *stats)
4485 struct igb_adapter *adapter = netdev_priv(netdev);
4487 spin_lock(&adapter->stats64_lock);
4488 igb_update_stats(adapter, &adapter->stats64);
4489 memcpy(stats, &adapter->stats64, sizeof(*stats));
4490 spin_unlock(&adapter->stats64_lock);
4496 * igb_change_mtu - Change the Maximum Transfer Unit
4497 * @netdev: network interface device structure
4498 * @new_mtu: new value for maximum frame size
4500 * Returns 0 on success, negative on failure
4502 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4504 struct igb_adapter *adapter = netdev_priv(netdev);
4505 struct pci_dev *pdev = adapter->pdev;
4506 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4508 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4509 dev_err(&pdev->dev, "Invalid MTU setting\n");
4513 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4514 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4515 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4519 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4522 /* igb_down has a dependency on max_frame_size */
4523 adapter->max_frame_size = max_frame;
4525 if (netif_running(netdev))
4528 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4529 netdev->mtu, new_mtu);
4530 netdev->mtu = new_mtu;
4532 if (netif_running(netdev))
4537 clear_bit(__IGB_RESETTING, &adapter->state);
4543 * igb_update_stats - Update the board statistics counters
4544 * @adapter: board private structure
4547 void igb_update_stats(struct igb_adapter *adapter,
4548 struct rtnl_link_stats64 *net_stats)
4550 struct e1000_hw *hw = &adapter->hw;
4551 struct pci_dev *pdev = adapter->pdev;
4557 u64 _bytes, _packets;
4559 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4562 * Prevent stats update while adapter is being reset, or if the pci
4563 * connection is down.
4565 if (adapter->link_speed == 0)
4567 if (pci_channel_offline(pdev))
4572 for (i = 0; i < adapter->num_rx_queues; i++) {
4573 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4574 struct igb_ring *ring = adapter->rx_ring[i];
4576 ring->rx_stats.drops += rqdpc_tmp;
4577 net_stats->rx_fifo_errors += rqdpc_tmp;
4580 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4581 _bytes = ring->rx_stats.bytes;
4582 _packets = ring->rx_stats.packets;
4583 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4585 packets += _packets;
4588 net_stats->rx_bytes = bytes;
4589 net_stats->rx_packets = packets;
4593 for (i = 0; i < adapter->num_tx_queues; i++) {
4594 struct igb_ring *ring = adapter->tx_ring[i];
4596 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4597 _bytes = ring->tx_stats.bytes;
4598 _packets = ring->tx_stats.packets;
4599 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4601 packets += _packets;
4603 net_stats->tx_bytes = bytes;
4604 net_stats->tx_packets = packets;
4606 /* read stats registers */
4607 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4608 adapter->stats.gprc += rd32(E1000_GPRC);
4609 adapter->stats.gorc += rd32(E1000_GORCL);
4610 rd32(E1000_GORCH); /* clear GORCL */
4611 adapter->stats.bprc += rd32(E1000_BPRC);
4612 adapter->stats.mprc += rd32(E1000_MPRC);
4613 adapter->stats.roc += rd32(E1000_ROC);
4615 adapter->stats.prc64 += rd32(E1000_PRC64);
4616 adapter->stats.prc127 += rd32(E1000_PRC127);
4617 adapter->stats.prc255 += rd32(E1000_PRC255);
4618 adapter->stats.prc511 += rd32(E1000_PRC511);
4619 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4620 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4621 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4622 adapter->stats.sec += rd32(E1000_SEC);
4624 mpc = rd32(E1000_MPC);
4625 adapter->stats.mpc += mpc;
4626 net_stats->rx_fifo_errors += mpc;
4627 adapter->stats.scc += rd32(E1000_SCC);
4628 adapter->stats.ecol += rd32(E1000_ECOL);
4629 adapter->stats.mcc += rd32(E1000_MCC);
4630 adapter->stats.latecol += rd32(E1000_LATECOL);
4631 adapter->stats.dc += rd32(E1000_DC);
4632 adapter->stats.rlec += rd32(E1000_RLEC);
4633 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4634 adapter->stats.xontxc += rd32(E1000_XONTXC);
4635 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4636 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4637 adapter->stats.fcruc += rd32(E1000_FCRUC);
4638 adapter->stats.gptc += rd32(E1000_GPTC);
4639 adapter->stats.gotc += rd32(E1000_GOTCL);
4640 rd32(E1000_GOTCH); /* clear GOTCL */
4641 adapter->stats.rnbc += rd32(E1000_RNBC);
4642 adapter->stats.ruc += rd32(E1000_RUC);
4643 adapter->stats.rfc += rd32(E1000_RFC);
4644 adapter->stats.rjc += rd32(E1000_RJC);
4645 adapter->stats.tor += rd32(E1000_TORH);
4646 adapter->stats.tot += rd32(E1000_TOTH);
4647 adapter->stats.tpr += rd32(E1000_TPR);
4649 adapter->stats.ptc64 += rd32(E1000_PTC64);
4650 adapter->stats.ptc127 += rd32(E1000_PTC127);
4651 adapter->stats.ptc255 += rd32(E1000_PTC255);
4652 adapter->stats.ptc511 += rd32(E1000_PTC511);
4653 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4654 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4656 adapter->stats.mptc += rd32(E1000_MPTC);
4657 adapter->stats.bptc += rd32(E1000_BPTC);
4659 adapter->stats.tpt += rd32(E1000_TPT);
4660 adapter->stats.colc += rd32(E1000_COLC);
4662 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4663 /* read internal phy specific stats */
4664 reg = rd32(E1000_CTRL_EXT);
4665 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4666 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4667 adapter->stats.tncrs += rd32(E1000_TNCRS);
4670 adapter->stats.tsctc += rd32(E1000_TSCTC);
4671 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4673 adapter->stats.iac += rd32(E1000_IAC);
4674 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4675 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4676 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4677 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4678 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4679 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4680 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4681 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4683 /* Fill out the OS statistics structure */
4684 net_stats->multicast = adapter->stats.mprc;
4685 net_stats->collisions = adapter->stats.colc;
4689 /* RLEC on some newer hardware can be incorrect so build
4690 * our own version based on RUC and ROC */
4691 net_stats->rx_errors = adapter->stats.rxerrc +
4692 adapter->stats.crcerrs + adapter->stats.algnerrc +
4693 adapter->stats.ruc + adapter->stats.roc +
4694 adapter->stats.cexterr;
4695 net_stats->rx_length_errors = adapter->stats.ruc +
4697 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4698 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4699 net_stats->rx_missed_errors = adapter->stats.mpc;
4702 net_stats->tx_errors = adapter->stats.ecol +
4703 adapter->stats.latecol;
4704 net_stats->tx_aborted_errors = adapter->stats.ecol;
4705 net_stats->tx_window_errors = adapter->stats.latecol;
4706 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4708 /* Tx Dropped needs to be maintained elsewhere */
4711 if (hw->phy.media_type == e1000_media_type_copper) {
4712 if ((adapter->link_speed == SPEED_1000) &&
4713 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4714 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4715 adapter->phy_stats.idle_errors += phy_tmp;
4719 /* Management Stats */
4720 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4721 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4722 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4725 reg = rd32(E1000_MANC);
4726 if (reg & E1000_MANC_EN_BMC2OS) {
4727 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4728 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4729 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4730 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4734 static irqreturn_t igb_msix_other(int irq, void *data)
4736 struct igb_adapter *adapter = data;
4737 struct e1000_hw *hw = &adapter->hw;
4738 u32 icr = rd32(E1000_ICR);
4739 /* reading ICR causes bit 31 of EICR to be cleared */
4741 if (icr & E1000_ICR_DRSTA)
4742 schedule_work(&adapter->reset_task);
4744 if (icr & E1000_ICR_DOUTSYNC) {
4745 /* HW is reporting DMA is out of sync */
4746 adapter->stats.doosync++;
4747 /* The DMA Out of Sync is also indication of a spoof event
4748 * in IOV mode. Check the Wrong VM Behavior register to
4749 * see if it is really a spoof event. */
4750 igb_check_wvbr(adapter);
4753 /* Check for a mailbox event */
4754 if (icr & E1000_ICR_VMMB)
4755 igb_msg_task(adapter);
4757 if (icr & E1000_ICR_LSC) {
4758 hw->mac.get_link_status = 1;
4759 /* guard against interrupt when we're going down */
4760 if (!test_bit(__IGB_DOWN, &adapter->state))
4761 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4764 wr32(E1000_EIMS, adapter->eims_other);
4769 static void igb_write_itr(struct igb_q_vector *q_vector)
4771 struct igb_adapter *adapter = q_vector->adapter;
4772 u32 itr_val = q_vector->itr_val & 0x7FFC;
4774 if (!q_vector->set_itr)
4780 if (adapter->hw.mac.type == e1000_82575)
4781 itr_val |= itr_val << 16;
4783 itr_val |= E1000_EITR_CNT_IGNR;
4785 writel(itr_val, q_vector->itr_register);
4786 q_vector->set_itr = 0;
4789 static irqreturn_t igb_msix_ring(int irq, void *data)
4791 struct igb_q_vector *q_vector = data;
4793 /* Write the ITR value calculated from the previous interrupt. */
4794 igb_write_itr(q_vector);
4796 napi_schedule(&q_vector->napi);
4801 #ifdef CONFIG_IGB_DCA
4802 static void igb_update_dca(struct igb_q_vector *q_vector)
4804 struct igb_adapter *adapter = q_vector->adapter;
4805 struct e1000_hw *hw = &adapter->hw;
4806 int cpu = get_cpu();
4808 if (q_vector->cpu == cpu)
4811 if (q_vector->tx.ring) {
4812 int q = q_vector->tx.ring->reg_idx;
4813 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4814 if (hw->mac.type == e1000_82575) {
4815 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4816 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4818 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4819 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4820 E1000_DCA_TXCTRL_CPUID_SHIFT;
4822 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4823 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4825 if (q_vector->rx.ring) {
4826 int q = q_vector->rx.ring->reg_idx;
4827 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4828 if (hw->mac.type == e1000_82575) {
4829 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4830 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4832 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4833 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4834 E1000_DCA_RXCTRL_CPUID_SHIFT;
4836 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4837 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4838 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4839 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4841 q_vector->cpu = cpu;
4846 static void igb_setup_dca(struct igb_adapter *adapter)
4848 struct e1000_hw *hw = &adapter->hw;
4851 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4854 /* Always use CB2 mode, difference is masked in the CB driver. */
4855 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4857 for (i = 0; i < adapter->num_q_vectors; i++) {
4858 adapter->q_vector[i]->cpu = -1;
4859 igb_update_dca(adapter->q_vector[i]);
4863 static int __igb_notify_dca(struct device *dev, void *data)
4865 struct net_device *netdev = dev_get_drvdata(dev);
4866 struct igb_adapter *adapter = netdev_priv(netdev);
4867 struct pci_dev *pdev = adapter->pdev;
4868 struct e1000_hw *hw = &adapter->hw;
4869 unsigned long event = *(unsigned long *)data;
4872 case DCA_PROVIDER_ADD:
4873 /* if already enabled, don't do it again */
4874 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4876 if (dca_add_requester(dev) == 0) {
4877 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4878 dev_info(&pdev->dev, "DCA enabled\n");
4879 igb_setup_dca(adapter);
4882 /* Fall Through since DCA is disabled. */
4883 case DCA_PROVIDER_REMOVE:
4884 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4885 /* without this a class_device is left
4886 * hanging around in the sysfs model */
4887 dca_remove_requester(dev);
4888 dev_info(&pdev->dev, "DCA disabled\n");
4889 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4890 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4898 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4903 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4906 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4908 #endif /* CONFIG_IGB_DCA */
4910 #ifdef CONFIG_PCI_IOV
4911 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4913 unsigned char mac_addr[ETH_ALEN];
4914 struct pci_dev *pdev = adapter->pdev;
4915 struct e1000_hw *hw = &adapter->hw;
4916 struct pci_dev *pvfdev;
4917 unsigned int device_id;
4920 random_ether_addr(mac_addr);
4921 igb_set_vf_mac(adapter, vf, mac_addr);
4923 switch (adapter->hw.mac.type) {
4925 device_id = IGB_82576_VF_DEV_ID;
4926 /* VF Stride for 82576 is 2 */
4927 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4931 device_id = IGB_I350_VF_DEV_ID;
4932 /* VF Stride for I350 is 4 */
4933 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4942 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4944 if (pvfdev->devfn == thisvf_devfn)
4946 pvfdev = pci_get_device(hw->vendor_id,
4951 adapter->vf_data[vf].vfdev = pvfdev;
4954 "Couldn't find pci dev ptr for VF %4.4x\n",
4956 return pvfdev != NULL;
4959 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4961 struct e1000_hw *hw = &adapter->hw;
4962 struct pci_dev *pdev = adapter->pdev;
4963 struct pci_dev *pvfdev;
4966 unsigned int device_id;
4969 switch (adapter->hw.mac.type) {
4971 device_id = IGB_82576_VF_DEV_ID;
4972 /* VF Stride for 82576 is 2 */
4976 device_id = IGB_I350_VF_DEV_ID;
4977 /* VF Stride for I350 is 4 */
4986 vf_devfn = pdev->devfn + 0x80;
4987 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4989 if (pvfdev->devfn == vf_devfn &&
4990 (pvfdev->bus->number >= pdev->bus->number))
4992 vf_devfn += vf_stride;
4993 pvfdev = pci_get_device(hw->vendor_id,
5000 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5003 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5004 if (adapter->vf_data[i].vfdev) {
5005 if (adapter->vf_data[i].vfdev->dev_flags &
5006 PCI_DEV_FLAGS_ASSIGNED)
5014 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5016 struct e1000_hw *hw = &adapter->hw;
5020 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5021 ping = E1000_PF_CONTROL_MSG;
5022 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5023 ping |= E1000_VT_MSGTYPE_CTS;
5024 igb_write_mbx(hw, &ping, 1, i);
5028 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5030 struct e1000_hw *hw = &adapter->hw;
5031 u32 vmolr = rd32(E1000_VMOLR(vf));
5032 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5034 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5035 IGB_VF_FLAG_MULTI_PROMISC);
5036 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5038 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5039 vmolr |= E1000_VMOLR_MPME;
5040 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5041 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5044 * if we have hashes and we are clearing a multicast promisc
5045 * flag we need to write the hashes to the MTA as this step
5046 * was previously skipped
5048 if (vf_data->num_vf_mc_hashes > 30) {
5049 vmolr |= E1000_VMOLR_MPME;
5050 } else if (vf_data->num_vf_mc_hashes) {
5052 vmolr |= E1000_VMOLR_ROMPE;
5053 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5054 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5058 wr32(E1000_VMOLR(vf), vmolr);
5060 /* there are flags left unprocessed, likely not supported */
5061 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5068 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5069 u32 *msgbuf, u32 vf)
5071 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5072 u16 *hash_list = (u16 *)&msgbuf[1];
5073 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5076 /* salt away the number of multicast addresses assigned
5077 * to this VF for later use to restore when the PF multi cast
5080 vf_data->num_vf_mc_hashes = n;
5082 /* only up to 30 hash values supported */
5086 /* store the hashes for later use */
5087 for (i = 0; i < n; i++)
5088 vf_data->vf_mc_hashes[i] = hash_list[i];
5090 /* Flush and reset the mta with the new values */
5091 igb_set_rx_mode(adapter->netdev);
5096 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5098 struct e1000_hw *hw = &adapter->hw;
5099 struct vf_data_storage *vf_data;
5102 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5103 u32 vmolr = rd32(E1000_VMOLR(i));
5104 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5106 vf_data = &adapter->vf_data[i];
5108 if ((vf_data->num_vf_mc_hashes > 30) ||
5109 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5110 vmolr |= E1000_VMOLR_MPME;
5111 } else if (vf_data->num_vf_mc_hashes) {
5112 vmolr |= E1000_VMOLR_ROMPE;
5113 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5114 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5116 wr32(E1000_VMOLR(i), vmolr);
5120 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5122 struct e1000_hw *hw = &adapter->hw;
5123 u32 pool_mask, reg, vid;
5126 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5128 /* Find the vlan filter for this id */
5129 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5130 reg = rd32(E1000_VLVF(i));
5132 /* remove the vf from the pool */
5135 /* if pool is empty then remove entry from vfta */
5136 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5137 (reg & E1000_VLVF_VLANID_ENABLE)) {
5139 vid = reg & E1000_VLVF_VLANID_MASK;
5140 igb_vfta_set(hw, vid, false);
5143 wr32(E1000_VLVF(i), reg);
5146 adapter->vf_data[vf].vlans_enabled = 0;
5149 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5151 struct e1000_hw *hw = &adapter->hw;
5154 /* The vlvf table only exists on 82576 hardware and newer */
5155 if (hw->mac.type < e1000_82576)
5158 /* we only need to do this if VMDq is enabled */
5159 if (!adapter->vfs_allocated_count)
5162 /* Find the vlan filter for this id */
5163 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5164 reg = rd32(E1000_VLVF(i));
5165 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5166 vid == (reg & E1000_VLVF_VLANID_MASK))
5171 if (i == E1000_VLVF_ARRAY_SIZE) {
5172 /* Did not find a matching VLAN ID entry that was
5173 * enabled. Search for a free filter entry, i.e.
5174 * one without the enable bit set
5176 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5177 reg = rd32(E1000_VLVF(i));
5178 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5182 if (i < E1000_VLVF_ARRAY_SIZE) {
5183 /* Found an enabled/available entry */
5184 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5186 /* if !enabled we need to set this up in vfta */
5187 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5188 /* add VID to filter table */
5189 igb_vfta_set(hw, vid, true);
5190 reg |= E1000_VLVF_VLANID_ENABLE;
5192 reg &= ~E1000_VLVF_VLANID_MASK;
5194 wr32(E1000_VLVF(i), reg);
5196 /* do not modify RLPML for PF devices */
5197 if (vf >= adapter->vfs_allocated_count)
5200 if (!adapter->vf_data[vf].vlans_enabled) {
5202 reg = rd32(E1000_VMOLR(vf));
5203 size = reg & E1000_VMOLR_RLPML_MASK;
5205 reg &= ~E1000_VMOLR_RLPML_MASK;
5207 wr32(E1000_VMOLR(vf), reg);
5210 adapter->vf_data[vf].vlans_enabled++;
5213 if (i < E1000_VLVF_ARRAY_SIZE) {
5214 /* remove vf from the pool */
5215 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5216 /* if pool is empty then remove entry from vfta */
5217 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5219 igb_vfta_set(hw, vid, false);
5221 wr32(E1000_VLVF(i), reg);
5223 /* do not modify RLPML for PF devices */
5224 if (vf >= adapter->vfs_allocated_count)
5227 adapter->vf_data[vf].vlans_enabled--;
5228 if (!adapter->vf_data[vf].vlans_enabled) {
5230 reg = rd32(E1000_VMOLR(vf));
5231 size = reg & E1000_VMOLR_RLPML_MASK;
5233 reg &= ~E1000_VMOLR_RLPML_MASK;
5235 wr32(E1000_VMOLR(vf), reg);
5242 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5244 struct e1000_hw *hw = &adapter->hw;
5247 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5249 wr32(E1000_VMVIR(vf), 0);
5252 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5253 int vf, u16 vlan, u8 qos)
5256 struct igb_adapter *adapter = netdev_priv(netdev);
5258 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5261 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5264 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5265 igb_set_vmolr(adapter, vf, !vlan);
5266 adapter->vf_data[vf].pf_vlan = vlan;
5267 adapter->vf_data[vf].pf_qos = qos;
5268 dev_info(&adapter->pdev->dev,
5269 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5270 if (test_bit(__IGB_DOWN, &adapter->state)) {
5271 dev_warn(&adapter->pdev->dev,
5272 "The VF VLAN has been set,"
5273 " but the PF device is not up.\n");
5274 dev_warn(&adapter->pdev->dev,
5275 "Bring the PF device up before"
5276 " attempting to use the VF device.\n");
5279 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5281 igb_set_vmvir(adapter, vlan, vf);
5282 igb_set_vmolr(adapter, vf, true);
5283 adapter->vf_data[vf].pf_vlan = 0;
5284 adapter->vf_data[vf].pf_qos = 0;
5290 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5292 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5293 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5295 return igb_vlvf_set(adapter, vid, add, vf);
5298 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5300 /* clear flags - except flag that indicates PF has set the MAC */
5301 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5302 adapter->vf_data[vf].last_nack = jiffies;
5304 /* reset offloads to defaults */
5305 igb_set_vmolr(adapter, vf, true);
5307 /* reset vlans for device */
5308 igb_clear_vf_vfta(adapter, vf);
5309 if (adapter->vf_data[vf].pf_vlan)
5310 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5311 adapter->vf_data[vf].pf_vlan,
5312 adapter->vf_data[vf].pf_qos);
5314 igb_clear_vf_vfta(adapter, vf);
5316 /* reset multicast table array for vf */
5317 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5319 /* Flush and reset the mta with the new values */
5320 igb_set_rx_mode(adapter->netdev);
5323 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5325 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5327 /* generate a new mac address as we were hotplug removed/added */
5328 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5329 random_ether_addr(vf_mac);
5331 /* process remaining reset events */
5332 igb_vf_reset(adapter, vf);
5335 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5337 struct e1000_hw *hw = &adapter->hw;
5338 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5339 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5341 u8 *addr = (u8 *)(&msgbuf[1]);
5343 /* process all the same items cleared in a function level reset */
5344 igb_vf_reset(adapter, vf);
5346 /* set vf mac address */
5347 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5349 /* enable transmit and receive for vf */
5350 reg = rd32(E1000_VFTE);
5351 wr32(E1000_VFTE, reg | (1 << vf));
5352 reg = rd32(E1000_VFRE);
5353 wr32(E1000_VFRE, reg | (1 << vf));
5355 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5357 /* reply to reset with ack and vf mac address */
5358 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5359 memcpy(addr, vf_mac, 6);
5360 igb_write_mbx(hw, msgbuf, 3, vf);
5363 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5366 * The VF MAC Address is stored in a packed array of bytes
5367 * starting at the second 32 bit word of the msg array
5369 unsigned char *addr = (char *)&msg[1];
5372 if (is_valid_ether_addr(addr))
5373 err = igb_set_vf_mac(adapter, vf, addr);
5378 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5380 struct e1000_hw *hw = &adapter->hw;
5381 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5382 u32 msg = E1000_VT_MSGTYPE_NACK;
5384 /* if device isn't clear to send it shouldn't be reading either */
5385 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5386 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5387 igb_write_mbx(hw, &msg, 1, vf);
5388 vf_data->last_nack = jiffies;
5392 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5394 struct pci_dev *pdev = adapter->pdev;
5395 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5396 struct e1000_hw *hw = &adapter->hw;
5397 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5400 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5403 /* if receive failed revoke VF CTS stats and restart init */
5404 dev_err(&pdev->dev, "Error receiving message from VF\n");
5405 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5406 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5411 /* this is a message we already processed, do nothing */
5412 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5416 * until the vf completes a reset it should not be
5417 * allowed to start any configuration.
5420 if (msgbuf[0] == E1000_VF_RESET) {
5421 igb_vf_reset_msg(adapter, vf);
5425 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5426 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5432 switch ((msgbuf[0] & 0xFFFF)) {
5433 case E1000_VF_SET_MAC_ADDR:
5435 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5436 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5438 dev_warn(&pdev->dev,
5439 "VF %d attempted to override administratively "
5440 "set MAC address\nReload the VF driver to "
5441 "resume operations\n", vf);
5443 case E1000_VF_SET_PROMISC:
5444 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5446 case E1000_VF_SET_MULTICAST:
5447 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5449 case E1000_VF_SET_LPE:
5450 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5452 case E1000_VF_SET_VLAN:
5454 if (vf_data->pf_vlan)
5455 dev_warn(&pdev->dev,
5456 "VF %d attempted to override administratively "
5457 "set VLAN tag\nReload the VF driver to "
5458 "resume operations\n", vf);
5460 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5463 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5468 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5470 /* notify the VF of the results of what it sent us */
5472 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5474 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5476 igb_write_mbx(hw, msgbuf, 1, vf);
5479 static void igb_msg_task(struct igb_adapter *adapter)
5481 struct e1000_hw *hw = &adapter->hw;
5484 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5485 /* process any reset requests */
5486 if (!igb_check_for_rst(hw, vf))
5487 igb_vf_reset_event(adapter, vf);
5489 /* process any messages pending */
5490 if (!igb_check_for_msg(hw, vf))
5491 igb_rcv_msg_from_vf(adapter, vf);
5493 /* process any acks */
5494 if (!igb_check_for_ack(hw, vf))
5495 igb_rcv_ack_from_vf(adapter, vf);
5500 * igb_set_uta - Set unicast filter table address
5501 * @adapter: board private structure
5503 * The unicast table address is a register array of 32-bit registers.
5504 * The table is meant to be used in a way similar to how the MTA is used
5505 * however due to certain limitations in the hardware it is necessary to
5506 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5507 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5509 static void igb_set_uta(struct igb_adapter *adapter)
5511 struct e1000_hw *hw = &adapter->hw;
5514 /* The UTA table only exists on 82576 hardware and newer */
5515 if (hw->mac.type < e1000_82576)
5518 /* we only need to do this if VMDq is enabled */
5519 if (!adapter->vfs_allocated_count)
5522 for (i = 0; i < hw->mac.uta_reg_count; i++)
5523 array_wr32(E1000_UTA, i, ~0);
5527 * igb_intr_msi - Interrupt Handler
5528 * @irq: interrupt number
5529 * @data: pointer to a network interface device structure
5531 static irqreturn_t igb_intr_msi(int irq, void *data)
5533 struct igb_adapter *adapter = data;
5534 struct igb_q_vector *q_vector = adapter->q_vector[0];
5535 struct e1000_hw *hw = &adapter->hw;
5536 /* read ICR disables interrupts using IAM */
5537 u32 icr = rd32(E1000_ICR);
5539 igb_write_itr(q_vector);
5541 if (icr & E1000_ICR_DRSTA)
5542 schedule_work(&adapter->reset_task);
5544 if (icr & E1000_ICR_DOUTSYNC) {
5545 /* HW is reporting DMA is out of sync */
5546 adapter->stats.doosync++;
5549 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5550 hw->mac.get_link_status = 1;
5551 if (!test_bit(__IGB_DOWN, &adapter->state))
5552 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5555 napi_schedule(&q_vector->napi);
5561 * igb_intr - Legacy Interrupt Handler
5562 * @irq: interrupt number
5563 * @data: pointer to a network interface device structure
5565 static irqreturn_t igb_intr(int irq, void *data)
5567 struct igb_adapter *adapter = data;
5568 struct igb_q_vector *q_vector = adapter->q_vector[0];
5569 struct e1000_hw *hw = &adapter->hw;
5570 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5571 * need for the IMC write */
5572 u32 icr = rd32(E1000_ICR);
5574 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5575 * not set, then the adapter didn't send an interrupt */
5576 if (!(icr & E1000_ICR_INT_ASSERTED))
5579 igb_write_itr(q_vector);
5581 if (icr & E1000_ICR_DRSTA)
5582 schedule_work(&adapter->reset_task);
5584 if (icr & E1000_ICR_DOUTSYNC) {
5585 /* HW is reporting DMA is out of sync */
5586 adapter->stats.doosync++;
5589 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5590 hw->mac.get_link_status = 1;
5591 /* guard against interrupt when we're going down */
5592 if (!test_bit(__IGB_DOWN, &adapter->state))
5593 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5596 napi_schedule(&q_vector->napi);
5601 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5603 struct igb_adapter *adapter = q_vector->adapter;
5604 struct e1000_hw *hw = &adapter->hw;
5606 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5607 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5608 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5609 igb_set_itr(q_vector);
5611 igb_update_ring_itr(q_vector);
5614 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5615 if (adapter->msix_entries)
5616 wr32(E1000_EIMS, q_vector->eims_value);
5618 igb_irq_enable(adapter);
5623 * igb_poll - NAPI Rx polling callback
5624 * @napi: napi polling structure
5625 * @budget: count of how many packets we should handle
5627 static int igb_poll(struct napi_struct *napi, int budget)
5629 struct igb_q_vector *q_vector = container_of(napi,
5630 struct igb_q_vector,
5632 bool clean_complete = true;
5634 #ifdef CONFIG_IGB_DCA
5635 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5636 igb_update_dca(q_vector);
5638 if (q_vector->tx.ring)
5639 clean_complete = igb_clean_tx_irq(q_vector);
5641 if (q_vector->rx.ring)
5642 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5644 /* If all work not completed, return budget and keep polling */
5645 if (!clean_complete)
5648 /* If not enough Rx work done, exit the polling mode */
5649 napi_complete(napi);
5650 igb_ring_irq_enable(q_vector);
5655 #ifdef CONFIG_IGB_PTP
5657 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5658 * @q_vector: pointer to q_vector containing needed info
5659 * @buffer: pointer to igb_tx_buffer structure
5661 * If we were asked to do hardware stamping and such a time stamp is
5662 * available, then it must have been for this skb here because we only
5663 * allow only one such packet into the queue.
5665 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5666 struct igb_tx_buffer *buffer_info)
5668 struct igb_adapter *adapter = q_vector->adapter;
5669 struct e1000_hw *hw = &adapter->hw;
5670 struct skb_shared_hwtstamps shhwtstamps;
5673 /* if skb does not support hw timestamp or TX stamp not valid exit */
5674 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5675 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5678 regval = rd32(E1000_TXSTMPL);
5679 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5681 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5682 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5687 * igb_clean_tx_irq - Reclaim resources after transmit completes
5688 * @q_vector: pointer to q_vector containing needed info
5689 * returns true if ring is completely cleaned
5691 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5693 struct igb_adapter *adapter = q_vector->adapter;
5694 struct igb_ring *tx_ring = q_vector->tx.ring;
5695 struct igb_tx_buffer *tx_buffer;
5696 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5697 unsigned int total_bytes = 0, total_packets = 0;
5698 unsigned int budget = q_vector->tx.work_limit;
5699 unsigned int i = tx_ring->next_to_clean;
5701 if (test_bit(__IGB_DOWN, &adapter->state))
5704 tx_buffer = &tx_ring->tx_buffer_info[i];
5705 tx_desc = IGB_TX_DESC(tx_ring, i);
5706 i -= tx_ring->count;
5708 for (; budget; budget--) {
5709 eop_desc = tx_buffer->next_to_watch;
5711 /* prevent any other reads prior to eop_desc */
5714 /* if next_to_watch is not set then there is no work pending */
5718 /* if DD is not set pending work has not been completed */
5719 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5722 /* clear next_to_watch to prevent false hangs */
5723 tx_buffer->next_to_watch = NULL;
5725 /* update the statistics for this packet */
5726 total_bytes += tx_buffer->bytecount;
5727 total_packets += tx_buffer->gso_segs;
5729 #ifdef CONFIG_IGB_PTP
5730 /* retrieve hardware timestamp */
5731 igb_tx_hwtstamp(q_vector, tx_buffer);
5735 dev_kfree_skb_any(tx_buffer->skb);
5736 tx_buffer->skb = NULL;
5738 /* unmap skb header data */
5739 dma_unmap_single(tx_ring->dev,
5744 /* clear last DMA location and unmap remaining buffers */
5745 while (tx_desc != eop_desc) {
5752 i -= tx_ring->count;
5753 tx_buffer = tx_ring->tx_buffer_info;
5754 tx_desc = IGB_TX_DESC(tx_ring, 0);
5757 /* unmap any remaining paged data */
5758 if (tx_buffer->dma) {
5759 dma_unmap_page(tx_ring->dev,
5766 /* clear last DMA location */
5769 /* move us one more past the eop_desc for start of next pkt */
5774 i -= tx_ring->count;
5775 tx_buffer = tx_ring->tx_buffer_info;
5776 tx_desc = IGB_TX_DESC(tx_ring, 0);
5780 netdev_tx_completed_queue(txring_txq(tx_ring),
5781 total_packets, total_bytes);
5782 i += tx_ring->count;
5783 tx_ring->next_to_clean = i;
5784 u64_stats_update_begin(&tx_ring->tx_syncp);
5785 tx_ring->tx_stats.bytes += total_bytes;
5786 tx_ring->tx_stats.packets += total_packets;
5787 u64_stats_update_end(&tx_ring->tx_syncp);
5788 q_vector->tx.total_bytes += total_bytes;
5789 q_vector->tx.total_packets += total_packets;
5791 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5792 struct e1000_hw *hw = &adapter->hw;
5794 eop_desc = tx_buffer->next_to_watch;
5796 /* Detect a transmit hang in hardware, this serializes the
5797 * check with the clearing of time_stamp and movement of i */
5798 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5800 time_after(jiffies, tx_buffer->time_stamp +
5801 (adapter->tx_timeout_factor * HZ)) &&
5802 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5804 /* detected Tx unit hang */
5805 dev_err(tx_ring->dev,
5806 "Detected Tx Unit Hang\n"
5810 " next_to_use <%x>\n"
5811 " next_to_clean <%x>\n"
5812 "buffer_info[next_to_clean]\n"
5813 " time_stamp <%lx>\n"
5814 " next_to_watch <%p>\n"
5816 " desc.status <%x>\n",
5817 tx_ring->queue_index,
5818 rd32(E1000_TDH(tx_ring->reg_idx)),
5819 readl(tx_ring->tail),
5820 tx_ring->next_to_use,
5821 tx_ring->next_to_clean,
5822 tx_buffer->time_stamp,
5825 eop_desc->wb.status);
5826 netif_stop_subqueue(tx_ring->netdev,
5827 tx_ring->queue_index);
5829 /* we are about to reset, no point in enabling stuff */
5834 if (unlikely(total_packets &&
5835 netif_carrier_ok(tx_ring->netdev) &&
5836 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5837 /* Make sure that anybody stopping the queue after this
5838 * sees the new next_to_clean.
5841 if (__netif_subqueue_stopped(tx_ring->netdev,
5842 tx_ring->queue_index) &&
5843 !(test_bit(__IGB_DOWN, &adapter->state))) {
5844 netif_wake_subqueue(tx_ring->netdev,
5845 tx_ring->queue_index);
5847 u64_stats_update_begin(&tx_ring->tx_syncp);
5848 tx_ring->tx_stats.restart_queue++;
5849 u64_stats_update_end(&tx_ring->tx_syncp);
5856 static inline void igb_rx_checksum(struct igb_ring *ring,
5857 union e1000_adv_rx_desc *rx_desc,
5858 struct sk_buff *skb)
5860 skb_checksum_none_assert(skb);
5862 /* Ignore Checksum bit is set */
5863 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5866 /* Rx checksum disabled via ethtool */
5867 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5870 /* TCP/UDP checksum error bit is set */
5871 if (igb_test_staterr(rx_desc,
5872 E1000_RXDEXT_STATERR_TCPE |
5873 E1000_RXDEXT_STATERR_IPE)) {
5875 * work around errata with sctp packets where the TCPE aka
5876 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5877 * packets, (aka let the stack check the crc32c)
5879 if (!((skb->len == 60) &&
5880 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5881 u64_stats_update_begin(&ring->rx_syncp);
5882 ring->rx_stats.csum_err++;
5883 u64_stats_update_end(&ring->rx_syncp);
5885 /* let the stack verify checksum errors */
5888 /* It must be a TCP or UDP packet with a valid checksum */
5889 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5890 E1000_RXD_STAT_UDPCS))
5891 skb->ip_summed = CHECKSUM_UNNECESSARY;
5893 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5894 le32_to_cpu(rx_desc->wb.upper.status_error));
5897 static inline void igb_rx_hash(struct igb_ring *ring,
5898 union e1000_adv_rx_desc *rx_desc,
5899 struct sk_buff *skb)
5901 if (ring->netdev->features & NETIF_F_RXHASH)
5902 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5905 #ifdef CONFIG_IGB_PTP
5906 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5907 union e1000_adv_rx_desc *rx_desc,
5908 struct sk_buff *skb)
5910 struct igb_adapter *adapter = q_vector->adapter;
5911 struct e1000_hw *hw = &adapter->hw;
5914 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5915 E1000_RXDADV_STAT_TS))
5919 * If this bit is set, then the RX registers contain the time stamp. No
5920 * other packet will be time stamped until we read these registers, so
5921 * read the registers to make them available again. Because only one
5922 * packet can be time stamped at a time, we know that the register
5923 * values must belong to this one here and therefore we don't need to
5924 * compare any of the additional attributes stored for it.
5926 * If nothing went wrong, then it should have a shared tx_flags that we
5927 * can turn into a skb_shared_hwtstamps.
5929 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5930 u32 *stamp = (u32 *)skb->data;
5931 regval = le32_to_cpu(*(stamp + 2));
5932 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5933 skb_pull(skb, IGB_TS_HDR_LEN);
5935 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5938 regval = rd32(E1000_RXSTMPL);
5939 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5942 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5946 static void igb_rx_vlan(struct igb_ring *ring,
5947 union e1000_adv_rx_desc *rx_desc,
5948 struct sk_buff *skb)
5950 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5952 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5953 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5954 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5956 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5958 __vlan_hwaccel_put_tag(skb, vid);
5962 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5964 /* HW will not DMA in data larger than the given buffer, even if it
5965 * parses the (NFS, of course) header to be larger. In that case, it
5966 * fills the header buffer and spills the rest into the page.
5968 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5969 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5970 if (hlen > IGB_RX_HDR_LEN)
5971 hlen = IGB_RX_HDR_LEN;
5975 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5977 struct igb_ring *rx_ring = q_vector->rx.ring;
5978 union e1000_adv_rx_desc *rx_desc;
5979 const int current_node = numa_node_id();
5980 unsigned int total_bytes = 0, total_packets = 0;
5981 u16 cleaned_count = igb_desc_unused(rx_ring);
5982 u16 i = rx_ring->next_to_clean;
5984 rx_desc = IGB_RX_DESC(rx_ring, i);
5986 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5987 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5988 struct sk_buff *skb = buffer_info->skb;
5989 union e1000_adv_rx_desc *next_rxd;
5991 buffer_info->skb = NULL;
5992 prefetch(skb->data);
5995 if (i == rx_ring->count)
5998 next_rxd = IGB_RX_DESC(rx_ring, i);
6002 * This memory barrier is needed to keep us from reading
6003 * any other fields out of the rx_desc until we know the
6004 * RXD_STAT_DD bit is set
6008 if (!skb_is_nonlinear(skb)) {
6009 __skb_put(skb, igb_get_hlen(rx_desc));
6010 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6013 buffer_info->dma = 0;
6016 if (rx_desc->wb.upper.length) {
6017 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6019 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6021 buffer_info->page_offset,
6025 skb->data_len += length;
6026 skb->truesize += PAGE_SIZE / 2;
6028 if ((page_count(buffer_info->page) != 1) ||
6029 (page_to_nid(buffer_info->page) != current_node))
6030 buffer_info->page = NULL;
6032 get_page(buffer_info->page);
6034 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6035 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6036 buffer_info->page_dma = 0;
6039 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6040 struct igb_rx_buffer *next_buffer;
6041 next_buffer = &rx_ring->rx_buffer_info[i];
6042 buffer_info->skb = next_buffer->skb;
6043 buffer_info->dma = next_buffer->dma;
6044 next_buffer->skb = skb;
6045 next_buffer->dma = 0;
6049 if (unlikely((igb_test_staterr(rx_desc,
6050 E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6051 && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6052 dev_kfree_skb_any(skb);
6056 #ifdef CONFIG_IGB_PTP
6057 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6059 igb_rx_hash(rx_ring, rx_desc, skb);
6060 igb_rx_checksum(rx_ring, rx_desc, skb);
6061 igb_rx_vlan(rx_ring, rx_desc, skb);
6063 total_bytes += skb->len;
6066 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6068 napi_gro_receive(&q_vector->napi, skb);
6076 /* return some buffers to hardware, one at a time is too slow */
6077 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6078 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6082 /* use prefetched values */
6086 rx_ring->next_to_clean = i;
6087 u64_stats_update_begin(&rx_ring->rx_syncp);
6088 rx_ring->rx_stats.packets += total_packets;
6089 rx_ring->rx_stats.bytes += total_bytes;
6090 u64_stats_update_end(&rx_ring->rx_syncp);
6091 q_vector->rx.total_packets += total_packets;
6092 q_vector->rx.total_bytes += total_bytes;
6095 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6100 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6101 struct igb_rx_buffer *bi)
6103 struct sk_buff *skb = bi->skb;
6104 dma_addr_t dma = bi->dma;
6110 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6114 rx_ring->rx_stats.alloc_failed++;
6118 /* initialize skb for ring */
6119 skb_record_rx_queue(skb, rx_ring->queue_index);
6122 dma = dma_map_single(rx_ring->dev, skb->data,
6123 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6125 if (dma_mapping_error(rx_ring->dev, dma)) {
6126 rx_ring->rx_stats.alloc_failed++;
6134 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6135 struct igb_rx_buffer *bi)
6137 struct page *page = bi->page;
6138 dma_addr_t page_dma = bi->page_dma;
6139 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6145 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6147 if (unlikely(!page)) {
6148 rx_ring->rx_stats.alloc_failed++;
6153 page_dma = dma_map_page(rx_ring->dev, page,
6154 page_offset, PAGE_SIZE / 2,
6157 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6158 rx_ring->rx_stats.alloc_failed++;
6162 bi->page_dma = page_dma;
6163 bi->page_offset = page_offset;
6168 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6169 * @adapter: address of board private structure
6171 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6173 union e1000_adv_rx_desc *rx_desc;
6174 struct igb_rx_buffer *bi;
6175 u16 i = rx_ring->next_to_use;
6177 rx_desc = IGB_RX_DESC(rx_ring, i);
6178 bi = &rx_ring->rx_buffer_info[i];
6179 i -= rx_ring->count;
6181 while (cleaned_count--) {
6182 if (!igb_alloc_mapped_skb(rx_ring, bi))
6185 /* Refresh the desc even if buffer_addrs didn't change
6186 * because each write-back erases this info. */
6187 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6189 if (!igb_alloc_mapped_page(rx_ring, bi))
6192 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6198 rx_desc = IGB_RX_DESC(rx_ring, 0);
6199 bi = rx_ring->rx_buffer_info;
6200 i -= rx_ring->count;
6203 /* clear the hdr_addr for the next_to_use descriptor */
6204 rx_desc->read.hdr_addr = 0;
6207 i += rx_ring->count;
6209 if (rx_ring->next_to_use != i) {
6210 rx_ring->next_to_use = i;
6212 /* Force memory writes to complete before letting h/w
6213 * know there are new descriptors to fetch. (Only
6214 * applicable for weak-ordered memory model archs,
6215 * such as IA-64). */
6217 writel(i, rx_ring->tail);
6227 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6229 struct igb_adapter *adapter = netdev_priv(netdev);
6230 struct mii_ioctl_data *data = if_mii(ifr);
6232 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6237 data->phy_id = adapter->hw.phy.addr;
6240 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6252 * igb_hwtstamp_ioctl - control hardware time stamping
6257 * Outgoing time stamping can be enabled and disabled. Play nice and
6258 * disable it when requested, although it shouldn't case any overhead
6259 * when no packet needs it. At most one packet in the queue may be
6260 * marked for time stamping, otherwise it would be impossible to tell
6261 * for sure to which packet the hardware time stamp belongs.
6263 * Incoming time stamping has to be configured via the hardware
6264 * filters. Not all combinations are supported, in particular event
6265 * type has to be specified. Matching the kind of event packet is
6266 * not supported, with the exception of "all V2 events regardless of
6270 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6271 struct ifreq *ifr, int cmd)
6273 struct igb_adapter *adapter = netdev_priv(netdev);
6274 struct e1000_hw *hw = &adapter->hw;
6275 struct hwtstamp_config config;
6276 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6277 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6278 u32 tsync_rx_cfg = 0;
6283 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6286 /* reserved for future extensions */
6290 switch (config.tx_type) {
6291 case HWTSTAMP_TX_OFF:
6293 case HWTSTAMP_TX_ON:
6299 switch (config.rx_filter) {
6300 case HWTSTAMP_FILTER_NONE:
6303 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6304 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6305 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6306 case HWTSTAMP_FILTER_ALL:
6308 * register TSYNCRXCFG must be set, therefore it is not
6309 * possible to time stamp both Sync and Delay_Req messages
6310 * => fall back to time stamping all packets
6312 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6313 config.rx_filter = HWTSTAMP_FILTER_ALL;
6315 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6316 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6317 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6320 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6321 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6322 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6325 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6326 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6327 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6328 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6331 config.rx_filter = HWTSTAMP_FILTER_SOME;
6333 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6334 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6335 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6336 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6339 config.rx_filter = HWTSTAMP_FILTER_SOME;
6341 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6342 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6343 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6344 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6345 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6353 if (hw->mac.type == e1000_82575) {
6354 if (tsync_rx_ctl | tsync_tx_ctl)
6360 * Per-packet timestamping only works if all packets are
6361 * timestamped, so enable timestamping in all packets as
6362 * long as one rx filter was configured.
6364 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6365 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6366 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6369 /* enable/disable TX */
6370 regval = rd32(E1000_TSYNCTXCTL);
6371 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6372 regval |= tsync_tx_ctl;
6373 wr32(E1000_TSYNCTXCTL, regval);
6375 /* enable/disable RX */
6376 regval = rd32(E1000_TSYNCRXCTL);
6377 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6378 regval |= tsync_rx_ctl;
6379 wr32(E1000_TSYNCRXCTL, regval);
6381 /* define which PTP packets are time stamped */
6382 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6384 /* define ethertype filter for timestamped packets */
6387 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6388 E1000_ETQF_1588 | /* enable timestamping */
6389 ETH_P_1588)); /* 1588 eth protocol type */
6391 wr32(E1000_ETQF(3), 0);
6393 #define PTP_PORT 319
6394 /* L4 Queue Filter[3]: filter by destination port and protocol */
6396 u32 ftqf = (IPPROTO_UDP /* UDP */
6397 | E1000_FTQF_VF_BP /* VF not compared */
6398 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6399 | E1000_FTQF_MASK); /* mask all inputs */
6400 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6402 wr32(E1000_IMIR(3), htons(PTP_PORT));
6403 wr32(E1000_IMIREXT(3),
6404 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6405 if (hw->mac.type == e1000_82576) {
6406 /* enable source port check */
6407 wr32(E1000_SPQF(3), htons(PTP_PORT));
6408 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6410 wr32(E1000_FTQF(3), ftqf);
6412 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6416 adapter->hwtstamp_config = config;
6418 /* clear TX/RX time stamp registers, just to be sure */
6419 regval = rd32(E1000_TXSTMPH);
6420 regval = rd32(E1000_RXSTMPH);
6422 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6432 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6438 return igb_mii_ioctl(netdev, ifr, cmd);
6440 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6446 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6448 struct igb_adapter *adapter = hw->back;
6451 cap_offset = adapter->pdev->pcie_cap;
6453 return -E1000_ERR_CONFIG;
6455 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6460 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6462 struct igb_adapter *adapter = hw->back;
6465 cap_offset = adapter->pdev->pcie_cap;
6467 return -E1000_ERR_CONFIG;
6469 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6474 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6476 struct igb_adapter *adapter = netdev_priv(netdev);
6477 struct e1000_hw *hw = &adapter->hw;
6479 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6482 /* enable VLAN tag insert/strip */
6483 ctrl = rd32(E1000_CTRL);
6484 ctrl |= E1000_CTRL_VME;
6485 wr32(E1000_CTRL, ctrl);
6487 /* Disable CFI check */
6488 rctl = rd32(E1000_RCTL);
6489 rctl &= ~E1000_RCTL_CFIEN;
6490 wr32(E1000_RCTL, rctl);
6492 /* disable VLAN tag insert/strip */
6493 ctrl = rd32(E1000_CTRL);
6494 ctrl &= ~E1000_CTRL_VME;
6495 wr32(E1000_CTRL, ctrl);
6498 igb_rlpml_set(adapter);
6501 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6503 struct igb_adapter *adapter = netdev_priv(netdev);
6504 struct e1000_hw *hw = &adapter->hw;
6505 int pf_id = adapter->vfs_allocated_count;
6507 /* attempt to add filter to vlvf array */
6508 igb_vlvf_set(adapter, vid, true, pf_id);
6510 /* add the filter since PF can receive vlans w/o entry in vlvf */
6511 igb_vfta_set(hw, vid, true);
6513 set_bit(vid, adapter->active_vlans);
6518 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6520 struct igb_adapter *adapter = netdev_priv(netdev);
6521 struct e1000_hw *hw = &adapter->hw;
6522 int pf_id = adapter->vfs_allocated_count;
6525 /* remove vlan from VLVF table array */
6526 err = igb_vlvf_set(adapter, vid, false, pf_id);
6528 /* if vid was not present in VLVF just remove it from table */
6530 igb_vfta_set(hw, vid, false);
6532 clear_bit(vid, adapter->active_vlans);
6537 static void igb_restore_vlan(struct igb_adapter *adapter)
6541 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6543 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6544 igb_vlan_rx_add_vid(adapter->netdev, vid);
6547 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6549 struct pci_dev *pdev = adapter->pdev;
6550 struct e1000_mac_info *mac = &adapter->hw.mac;
6554 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6555 * for the switch() below to work */
6556 if ((spd & 1) || (dplx & ~1))
6559 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6560 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6561 spd != SPEED_1000 &&
6562 dplx != DUPLEX_FULL)
6565 switch (spd + dplx) {
6566 case SPEED_10 + DUPLEX_HALF:
6567 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6569 case SPEED_10 + DUPLEX_FULL:
6570 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6572 case SPEED_100 + DUPLEX_HALF:
6573 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6575 case SPEED_100 + DUPLEX_FULL:
6576 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6578 case SPEED_1000 + DUPLEX_FULL:
6580 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6582 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6589 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6593 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6596 struct net_device *netdev = pci_get_drvdata(pdev);
6597 struct igb_adapter *adapter = netdev_priv(netdev);
6598 struct e1000_hw *hw = &adapter->hw;
6599 u32 ctrl, rctl, status;
6600 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6605 netif_device_detach(netdev);
6607 if (netif_running(netdev))
6608 __igb_close(netdev, true);
6610 igb_clear_interrupt_scheme(adapter);
6613 retval = pci_save_state(pdev);
6618 status = rd32(E1000_STATUS);
6619 if (status & E1000_STATUS_LU)
6620 wufc &= ~E1000_WUFC_LNKC;
6623 igb_setup_rctl(adapter);
6624 igb_set_rx_mode(netdev);
6626 /* turn on all-multi mode if wake on multicast is enabled */
6627 if (wufc & E1000_WUFC_MC) {
6628 rctl = rd32(E1000_RCTL);
6629 rctl |= E1000_RCTL_MPE;
6630 wr32(E1000_RCTL, rctl);
6633 ctrl = rd32(E1000_CTRL);
6634 /* advertise wake from D3Cold */
6635 #define E1000_CTRL_ADVD3WUC 0x00100000
6636 /* phy power management enable */
6637 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6638 ctrl |= E1000_CTRL_ADVD3WUC;
6639 wr32(E1000_CTRL, ctrl);
6641 /* Allow time for pending master requests to run */
6642 igb_disable_pcie_master(hw);
6644 wr32(E1000_WUC, E1000_WUC_PME_EN);
6645 wr32(E1000_WUFC, wufc);
6648 wr32(E1000_WUFC, 0);
6651 *enable_wake = wufc || adapter->en_mng_pt;
6653 igb_power_down_link(adapter);
6655 igb_power_up_link(adapter);
6657 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6658 * would have already happened in close and is redundant. */
6659 igb_release_hw_control(adapter);
6661 pci_disable_device(pdev);
6667 #ifdef CONFIG_PM_SLEEP
6668 static int igb_suspend(struct device *dev)
6672 struct pci_dev *pdev = to_pci_dev(dev);
6674 retval = __igb_shutdown(pdev, &wake, 0);
6679 pci_prepare_to_sleep(pdev);
6681 pci_wake_from_d3(pdev, false);
6682 pci_set_power_state(pdev, PCI_D3hot);
6687 #endif /* CONFIG_PM_SLEEP */
6689 static int igb_resume(struct device *dev)
6691 struct pci_dev *pdev = to_pci_dev(dev);
6692 struct net_device *netdev = pci_get_drvdata(pdev);
6693 struct igb_adapter *adapter = netdev_priv(netdev);
6694 struct e1000_hw *hw = &adapter->hw;
6697 pci_set_power_state(pdev, PCI_D0);
6698 pci_restore_state(pdev);
6699 pci_save_state(pdev);
6701 err = pci_enable_device_mem(pdev);
6704 "igb: Cannot enable PCI device from suspend\n");
6707 pci_set_master(pdev);
6709 pci_enable_wake(pdev, PCI_D3hot, 0);
6710 pci_enable_wake(pdev, PCI_D3cold, 0);
6712 if (igb_init_interrupt_scheme(adapter)) {
6713 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6719 /* let the f/w know that the h/w is now under the control of the
6721 igb_get_hw_control(adapter);
6723 wr32(E1000_WUS, ~0);
6725 if (netdev->flags & IFF_UP) {
6726 err = __igb_open(netdev, true);
6731 netif_device_attach(netdev);
6735 #ifdef CONFIG_PM_RUNTIME
6736 static int igb_runtime_idle(struct device *dev)
6738 struct pci_dev *pdev = to_pci_dev(dev);
6739 struct net_device *netdev = pci_get_drvdata(pdev);
6740 struct igb_adapter *adapter = netdev_priv(netdev);
6742 if (!igb_has_link(adapter))
6743 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6748 static int igb_runtime_suspend(struct device *dev)
6750 struct pci_dev *pdev = to_pci_dev(dev);
6754 retval = __igb_shutdown(pdev, &wake, 1);
6759 pci_prepare_to_sleep(pdev);
6761 pci_wake_from_d3(pdev, false);
6762 pci_set_power_state(pdev, PCI_D3hot);
6768 static int igb_runtime_resume(struct device *dev)
6770 return igb_resume(dev);
6772 #endif /* CONFIG_PM_RUNTIME */
6775 static void igb_shutdown(struct pci_dev *pdev)
6779 __igb_shutdown(pdev, &wake, 0);
6781 if (system_state == SYSTEM_POWER_OFF) {
6782 pci_wake_from_d3(pdev, wake);
6783 pci_set_power_state(pdev, PCI_D3hot);
6787 #ifdef CONFIG_NET_POLL_CONTROLLER
6789 * Polling 'interrupt' - used by things like netconsole to send skbs
6790 * without having to re-enable interrupts. It's not called while
6791 * the interrupt routine is executing.
6793 static void igb_netpoll(struct net_device *netdev)
6795 struct igb_adapter *adapter = netdev_priv(netdev);
6796 struct e1000_hw *hw = &adapter->hw;
6797 struct igb_q_vector *q_vector;
6800 for (i = 0; i < adapter->num_q_vectors; i++) {
6801 q_vector = adapter->q_vector[i];
6802 if (adapter->msix_entries)
6803 wr32(E1000_EIMC, q_vector->eims_value);
6805 igb_irq_disable(adapter);
6806 napi_schedule(&q_vector->napi);
6809 #endif /* CONFIG_NET_POLL_CONTROLLER */
6812 * igb_io_error_detected - called when PCI error is detected
6813 * @pdev: Pointer to PCI device
6814 * @state: The current pci connection state
6816 * This function is called after a PCI bus error affecting
6817 * this device has been detected.
6819 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6820 pci_channel_state_t state)
6822 struct net_device *netdev = pci_get_drvdata(pdev);
6823 struct igb_adapter *adapter = netdev_priv(netdev);
6825 netif_device_detach(netdev);
6827 if (state == pci_channel_io_perm_failure)
6828 return PCI_ERS_RESULT_DISCONNECT;
6830 if (netif_running(netdev))
6832 pci_disable_device(pdev);
6834 /* Request a slot slot reset. */
6835 return PCI_ERS_RESULT_NEED_RESET;
6839 * igb_io_slot_reset - called after the pci bus has been reset.
6840 * @pdev: Pointer to PCI device
6842 * Restart the card from scratch, as if from a cold-boot. Implementation
6843 * resembles the first-half of the igb_resume routine.
6845 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6847 struct net_device *netdev = pci_get_drvdata(pdev);
6848 struct igb_adapter *adapter = netdev_priv(netdev);
6849 struct e1000_hw *hw = &adapter->hw;
6850 pci_ers_result_t result;
6853 if (pci_enable_device_mem(pdev)) {
6855 "Cannot re-enable PCI device after reset.\n");
6856 result = PCI_ERS_RESULT_DISCONNECT;
6858 pci_set_master(pdev);
6859 pci_restore_state(pdev);
6860 pci_save_state(pdev);
6862 pci_enable_wake(pdev, PCI_D3hot, 0);
6863 pci_enable_wake(pdev, PCI_D3cold, 0);
6866 wr32(E1000_WUS, ~0);
6867 result = PCI_ERS_RESULT_RECOVERED;
6870 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6872 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6873 "failed 0x%0x\n", err);
6874 /* non-fatal, continue */
6881 * igb_io_resume - called when traffic can start flowing again.
6882 * @pdev: Pointer to PCI device
6884 * This callback is called when the error recovery driver tells us that
6885 * its OK to resume normal operation. Implementation resembles the
6886 * second-half of the igb_resume routine.
6888 static void igb_io_resume(struct pci_dev *pdev)
6890 struct net_device *netdev = pci_get_drvdata(pdev);
6891 struct igb_adapter *adapter = netdev_priv(netdev);
6893 if (netif_running(netdev)) {
6894 if (igb_up(adapter)) {
6895 dev_err(&pdev->dev, "igb_up failed after reset\n");
6900 netif_device_attach(netdev);
6902 /* let the f/w know that the h/w is now under the control of the
6904 igb_get_hw_control(adapter);
6907 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6910 u32 rar_low, rar_high;
6911 struct e1000_hw *hw = &adapter->hw;
6913 /* HW expects these in little endian so we reverse the byte order
6914 * from network order (big endian) to little endian
6916 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6917 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6918 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6920 /* Indicate to hardware the Address is Valid. */
6921 rar_high |= E1000_RAH_AV;
6923 if (hw->mac.type == e1000_82575)
6924 rar_high |= E1000_RAH_POOL_1 * qsel;
6926 rar_high |= E1000_RAH_POOL_1 << qsel;
6928 wr32(E1000_RAL(index), rar_low);
6930 wr32(E1000_RAH(index), rar_high);
6934 static int igb_set_vf_mac(struct igb_adapter *adapter,
6935 int vf, unsigned char *mac_addr)
6937 struct e1000_hw *hw = &adapter->hw;
6938 /* VF MAC addresses start at end of receive addresses and moves
6939 * torwards the first, as a result a collision should not be possible */
6940 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6942 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6944 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6949 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6951 struct igb_adapter *adapter = netdev_priv(netdev);
6952 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6954 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6955 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6956 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6957 " change effective.");
6958 if (test_bit(__IGB_DOWN, &adapter->state)) {
6959 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6960 " but the PF device is not up.\n");
6961 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6962 " attempting to use the VF device.\n");
6964 return igb_set_vf_mac(adapter, vf, mac);
6967 static int igb_link_mbps(int internal_link_speed)
6969 switch (internal_link_speed) {
6979 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6986 /* Calculate the rate factor values to set */
6987 rf_int = link_speed / tx_rate;
6988 rf_dec = (link_speed - (rf_int * tx_rate));
6989 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6991 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6992 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6993 E1000_RTTBCNRC_RF_INT_MASK);
6994 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6999 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7001 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
7002 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
7004 wr32(E1000_RTTBCNRM, 0x14);
7005 wr32(E1000_RTTBCNRC, bcnrc_val);
7008 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7010 int actual_link_speed, i;
7011 bool reset_rate = false;
7013 /* VF TX rate limit was not set or not supported */
7014 if ((adapter->vf_rate_link_speed == 0) ||
7015 (adapter->hw.mac.type != e1000_82576))
7018 actual_link_speed = igb_link_mbps(adapter->link_speed);
7019 if (actual_link_speed != adapter->vf_rate_link_speed) {
7021 adapter->vf_rate_link_speed = 0;
7022 dev_info(&adapter->pdev->dev,
7023 "Link speed has been changed. VF Transmit "
7024 "rate is disabled\n");
7027 for (i = 0; i < adapter->vfs_allocated_count; i++) {
7029 adapter->vf_data[i].tx_rate = 0;
7031 igb_set_vf_rate_limit(&adapter->hw, i,
7032 adapter->vf_data[i].tx_rate,
7037 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7039 struct igb_adapter *adapter = netdev_priv(netdev);
7040 struct e1000_hw *hw = &adapter->hw;
7041 int actual_link_speed;
7043 if (hw->mac.type != e1000_82576)
7046 actual_link_speed = igb_link_mbps(adapter->link_speed);
7047 if ((vf >= adapter->vfs_allocated_count) ||
7048 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7049 (tx_rate < 0) || (tx_rate > actual_link_speed))
7052 adapter->vf_rate_link_speed = actual_link_speed;
7053 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7054 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7059 static int igb_ndo_get_vf_config(struct net_device *netdev,
7060 int vf, struct ifla_vf_info *ivi)
7062 struct igb_adapter *adapter = netdev_priv(netdev);
7063 if (vf >= adapter->vfs_allocated_count)
7066 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7067 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7068 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7069 ivi->qos = adapter->vf_data[vf].pf_qos;
7073 static void igb_vmm_control(struct igb_adapter *adapter)
7075 struct e1000_hw *hw = &adapter->hw;
7078 switch (hw->mac.type) {
7083 /* replication is not supported for 82575 */
7086 /* notify HW that the MAC is adding vlan tags */
7087 reg = rd32(E1000_DTXCTL);
7088 reg |= E1000_DTXCTL_VLAN_ADDED;
7089 wr32(E1000_DTXCTL, reg);
7091 /* enable replication vlan tag stripping */
7092 reg = rd32(E1000_RPLOLR);
7093 reg |= E1000_RPLOLR_STRVLAN;
7094 wr32(E1000_RPLOLR, reg);
7096 /* none of the above registers are supported by i350 */
7100 if (adapter->vfs_allocated_count) {
7101 igb_vmdq_set_loopback_pf(hw, true);
7102 igb_vmdq_set_replication_pf(hw, true);
7103 igb_vmdq_set_anti_spoofing_pf(hw, true,
7104 adapter->vfs_allocated_count);
7106 igb_vmdq_set_loopback_pf(hw, false);
7107 igb_vmdq_set_replication_pf(hw, false);
7111 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7113 struct e1000_hw *hw = &adapter->hw;
7117 if (hw->mac.type > e1000_82580) {
7118 if (adapter->flags & IGB_FLAG_DMAC) {
7121 /* force threshold to 0. */
7122 wr32(E1000_DMCTXTH, 0);
7125 * DMA Coalescing high water mark needs to be greater
7126 * than the Rx threshold. Set hwm to PBA - max frame
7127 * size in 16B units, capping it at PBA - 6KB.
7129 hwm = 64 * pba - adapter->max_frame_size / 16;
7130 if (hwm < 64 * (pba - 6))
7131 hwm = 64 * (pba - 6);
7132 reg = rd32(E1000_FCRTC);
7133 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7134 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7135 & E1000_FCRTC_RTH_COAL_MASK);
7136 wr32(E1000_FCRTC, reg);
7139 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7140 * frame size, capping it at PBA - 10KB.
7142 dmac_thr = pba - adapter->max_frame_size / 512;
7143 if (dmac_thr < pba - 10)
7144 dmac_thr = pba - 10;
7145 reg = rd32(E1000_DMACR);
7146 reg &= ~E1000_DMACR_DMACTHR_MASK;
7147 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7148 & E1000_DMACR_DMACTHR_MASK);
7150 /* transition to L0x or L1 if available..*/
7151 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7153 /* watchdog timer= +-1000 usec in 32usec intervals */
7156 /* Disable BMC-to-OS Watchdog Enable */
7157 reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7158 wr32(E1000_DMACR, reg);
7161 * no lower threshold to disable
7162 * coalescing(smart fifb)-UTRESH=0
7164 wr32(E1000_DMCRTRH, 0);
7166 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7168 wr32(E1000_DMCTLX, reg);
7171 * free space in tx packet buffer to wake from
7174 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7175 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7178 * make low power state decision controlled
7181 reg = rd32(E1000_PCIEMISC);
7182 reg &= ~E1000_PCIEMISC_LX_DECISION;
7183 wr32(E1000_PCIEMISC, reg);
7184 } /* endif adapter->dmac is not disabled */
7185 } else if (hw->mac.type == e1000_82580) {
7186 u32 reg = rd32(E1000_PCIEMISC);
7187 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7188 wr32(E1000_DMACR, 0);