1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #include <linux/prefetch.h>
50 #include <linux/dca.h>
57 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
58 __stringify(BUILD) "-k"
59 char igb_driver_name[] = "igb";
60 char igb_driver_version[] = DRV_VERSION;
61 static const char igb_driver_string[] =
62 "Intel(R) Gigabit Ethernet Network Driver";
63 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
65 static const struct e1000_info *igb_info_tbl[] = {
66 [board_82575] = &e1000_82575_info,
69 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
95 /* required last entry */
99 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
101 void igb_reset(struct igb_adapter *);
102 static int igb_setup_all_tx_resources(struct igb_adapter *);
103 static int igb_setup_all_rx_resources(struct igb_adapter *);
104 static void igb_free_all_tx_resources(struct igb_adapter *);
105 static void igb_free_all_rx_resources(struct igb_adapter *);
106 static void igb_setup_mrqc(struct igb_adapter *);
107 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
108 static void __devexit igb_remove(struct pci_dev *pdev);
109 static void igb_init_hw_timer(struct igb_adapter *adapter);
110 static int igb_sw_init(struct igb_adapter *);
111 static int igb_open(struct net_device *);
112 static int igb_close(struct net_device *);
113 static void igb_configure_tx(struct igb_adapter *);
114 static void igb_configure_rx(struct igb_adapter *);
115 static void igb_clean_all_tx_rings(struct igb_adapter *);
116 static void igb_clean_all_rx_rings(struct igb_adapter *);
117 static void igb_clean_tx_ring(struct igb_ring *);
118 static void igb_clean_rx_ring(struct igb_ring *);
119 static void igb_set_rx_mode(struct net_device *);
120 static void igb_update_phy_info(unsigned long);
121 static void igb_watchdog(unsigned long);
122 static void igb_watchdog_task(struct work_struct *);
123 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
124 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
125 struct rtnl_link_stats64 *stats);
126 static int igb_change_mtu(struct net_device *, int);
127 static int igb_set_mac(struct net_device *, void *);
128 static void igb_set_uta(struct igb_adapter *adapter);
129 static irqreturn_t igb_intr(int irq, void *);
130 static irqreturn_t igb_intr_msi(int irq, void *);
131 static irqreturn_t igb_msix_other(int irq, void *);
132 static irqreturn_t igb_msix_ring(int irq, void *);
133 #ifdef CONFIG_IGB_DCA
134 static void igb_update_dca(struct igb_q_vector *);
135 static void igb_setup_dca(struct igb_adapter *);
136 #endif /* CONFIG_IGB_DCA */
137 static bool igb_clean_tx_irq(struct igb_q_vector *);
138 static int igb_poll(struct napi_struct *, int);
139 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
140 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
141 static void igb_tx_timeout(struct net_device *);
142 static void igb_reset_task(struct work_struct *);
143 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
144 static void igb_vlan_rx_add_vid(struct net_device *, u16);
145 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
146 static void igb_restore_vlan(struct igb_adapter *);
147 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
148 static void igb_ping_all_vfs(struct igb_adapter *);
149 static void igb_msg_task(struct igb_adapter *);
150 static void igb_vmm_control(struct igb_adapter *);
151 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
152 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
153 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
154 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
155 int vf, u16 vlan, u8 qos);
156 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
157 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
158 struct ifla_vf_info *ivi);
159 static void igb_check_vf_rate_limit(struct igb_adapter *);
162 static int igb_suspend(struct pci_dev *, pm_message_t);
163 static int igb_resume(struct pci_dev *);
165 static void igb_shutdown(struct pci_dev *);
166 #ifdef CONFIG_IGB_DCA
167 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
168 static struct notifier_block dca_notifier = {
169 .notifier_call = igb_notify_dca,
174 #ifdef CONFIG_NET_POLL_CONTROLLER
175 /* for netdump / net console */
176 static void igb_netpoll(struct net_device *);
178 #ifdef CONFIG_PCI_IOV
179 static unsigned int max_vfs = 0;
180 module_param(max_vfs, uint, 0);
181 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
182 "per physical function");
183 #endif /* CONFIG_PCI_IOV */
185 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
186 pci_channel_state_t);
187 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
188 static void igb_io_resume(struct pci_dev *);
190 static struct pci_error_handlers igb_err_handler = {
191 .error_detected = igb_io_error_detected,
192 .slot_reset = igb_io_slot_reset,
193 .resume = igb_io_resume,
197 static struct pci_driver igb_driver = {
198 .name = igb_driver_name,
199 .id_table = igb_pci_tbl,
201 .remove = __devexit_p(igb_remove),
203 /* Power Management Hooks */
204 .suspend = igb_suspend,
205 .resume = igb_resume,
207 .shutdown = igb_shutdown,
208 .err_handler = &igb_err_handler
211 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
212 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
213 MODULE_LICENSE("GPL");
214 MODULE_VERSION(DRV_VERSION);
216 struct igb_reg_info {
221 static const struct igb_reg_info igb_reg_info_tbl[] = {
223 /* General Registers */
224 {E1000_CTRL, "CTRL"},
225 {E1000_STATUS, "STATUS"},
226 {E1000_CTRL_EXT, "CTRL_EXT"},
228 /* Interrupt Registers */
232 {E1000_RCTL, "RCTL"},
233 {E1000_RDLEN(0), "RDLEN"},
234 {E1000_RDH(0), "RDH"},
235 {E1000_RDT(0), "RDT"},
236 {E1000_RXDCTL(0), "RXDCTL"},
237 {E1000_RDBAL(0), "RDBAL"},
238 {E1000_RDBAH(0), "RDBAH"},
241 {E1000_TCTL, "TCTL"},
242 {E1000_TDBAL(0), "TDBAL"},
243 {E1000_TDBAH(0), "TDBAH"},
244 {E1000_TDLEN(0), "TDLEN"},
245 {E1000_TDH(0), "TDH"},
246 {E1000_TDT(0), "TDT"},
247 {E1000_TXDCTL(0), "TXDCTL"},
248 {E1000_TDFH, "TDFH"},
249 {E1000_TDFT, "TDFT"},
250 {E1000_TDFHS, "TDFHS"},
251 {E1000_TDFPC, "TDFPC"},
253 /* List Terminator */
258 * igb_regdump - register printout routine
260 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266 switch (reginfo->ofs) {
268 for (n = 0; n < 4; n++)
269 regs[n] = rd32(E1000_RDLEN(n));
272 for (n = 0; n < 4; n++)
273 regs[n] = rd32(E1000_RDH(n));
276 for (n = 0; n < 4; n++)
277 regs[n] = rd32(E1000_RDT(n));
279 case E1000_RXDCTL(0):
280 for (n = 0; n < 4; n++)
281 regs[n] = rd32(E1000_RXDCTL(n));
284 for (n = 0; n < 4; n++)
285 regs[n] = rd32(E1000_RDBAL(n));
288 for (n = 0; n < 4; n++)
289 regs[n] = rd32(E1000_RDBAH(n));
292 for (n = 0; n < 4; n++)
293 regs[n] = rd32(E1000_RDBAL(n));
296 for (n = 0; n < 4; n++)
297 regs[n] = rd32(E1000_TDBAH(n));
300 for (n = 0; n < 4; n++)
301 regs[n] = rd32(E1000_TDLEN(n));
304 for (n = 0; n < 4; n++)
305 regs[n] = rd32(E1000_TDH(n));
308 for (n = 0; n < 4; n++)
309 regs[n] = rd32(E1000_TDT(n));
311 case E1000_TXDCTL(0):
312 for (n = 0; n < 4; n++)
313 regs[n] = rd32(E1000_TXDCTL(n));
316 printk(KERN_INFO "%-15s %08x\n",
317 reginfo->name, rd32(reginfo->ofs));
321 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
322 printk(KERN_INFO "%-15s ", rname);
323 for (n = 0; n < 4; n++)
324 printk(KERN_CONT "%08x ", regs[n]);
325 printk(KERN_CONT "\n");
329 * igb_dump - Print registers, tx-rings and rx-rings
331 static void igb_dump(struct igb_adapter *adapter)
333 struct net_device *netdev = adapter->netdev;
334 struct e1000_hw *hw = &adapter->hw;
335 struct igb_reg_info *reginfo;
337 struct igb_ring *tx_ring;
338 union e1000_adv_tx_desc *tx_desc;
339 struct my_u0 { u64 a; u64 b; } *u0;
340 struct igb_buffer *buffer_info;
341 struct igb_ring *rx_ring;
342 union e1000_adv_rx_desc *rx_desc;
346 if (!netif_msg_hw(adapter))
349 /* Print netdevice Info */
351 dev_info(&adapter->pdev->dev, "Net device Info\n");
352 printk(KERN_INFO "Device Name state "
353 "trans_start last_rx\n");
354 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
361 /* Print Registers */
362 dev_info(&adapter->pdev->dev, "Register Dump\n");
363 printk(KERN_INFO " Register Name Value\n");
364 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
365 reginfo->name; reginfo++) {
366 igb_regdump(hw, reginfo);
369 /* Print TX Ring Summary */
370 if (!netdev || !netif_running(netdev))
373 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
374 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
375 " leng ntw timestamp\n");
376 for (n = 0; n < adapter->num_tx_queues; n++) {
377 tx_ring = adapter->tx_ring[n];
378 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
379 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
380 n, tx_ring->next_to_use, tx_ring->next_to_clean,
381 (u64)buffer_info->dma,
383 buffer_info->next_to_watch,
384 (u64)buffer_info->time_stamp);
388 if (!netif_msg_tx_done(adapter))
389 goto rx_ring_summary;
391 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
393 /* Transmit Descriptor Formats
395 * Advanced Transmit Descriptor
396 * +--------------------------------------------------------------+
397 * 0 | Buffer Address [63:0] |
398 * +--------------------------------------------------------------+
399 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
400 * +--------------------------------------------------------------+
401 * 63 46 45 40 39 38 36 35 32 31 24 15 0
404 for (n = 0; n < adapter->num_tx_queues; n++) {
405 tx_ring = adapter->tx_ring[n];
406 printk(KERN_INFO "------------------------------------\n");
407 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
408 printk(KERN_INFO "------------------------------------\n");
409 printk(KERN_INFO "T [desc] [address 63:0 ] "
410 "[PlPOCIStDDM Ln] [bi->dma ] "
411 "leng ntw timestamp bi->skb\n");
413 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
414 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
415 buffer_info = &tx_ring->buffer_info[i];
416 u0 = (struct my_u0 *)tx_desc;
417 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
418 " %04X %3X %016llX %p", i,
421 (u64)buffer_info->dma,
423 buffer_info->next_to_watch,
424 (u64)buffer_info->time_stamp,
426 if (i == tx_ring->next_to_use &&
427 i == tx_ring->next_to_clean)
428 printk(KERN_CONT " NTC/U\n");
429 else if (i == tx_ring->next_to_use)
430 printk(KERN_CONT " NTU\n");
431 else if (i == tx_ring->next_to_clean)
432 printk(KERN_CONT " NTC\n");
434 printk(KERN_CONT "\n");
436 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
437 print_hex_dump(KERN_INFO, "",
439 16, 1, phys_to_virt(buffer_info->dma),
440 buffer_info->length, true);
444 /* Print RX Rings Summary */
446 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
447 printk(KERN_INFO "Queue [NTU] [NTC]\n");
448 for (n = 0; n < adapter->num_rx_queues; n++) {
449 rx_ring = adapter->rx_ring[n];
450 printk(KERN_INFO " %5d %5X %5X\n", n,
451 rx_ring->next_to_use, rx_ring->next_to_clean);
455 if (!netif_msg_rx_status(adapter))
458 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
460 /* Advanced Receive Descriptor (Read) Format
462 * +-----------------------------------------------------+
463 * 0 | Packet Buffer Address [63:1] |A0/NSE|
464 * +----------------------------------------------+------+
465 * 8 | Header Buffer Address [63:1] | DD |
466 * +-----------------------------------------------------+
469 * Advanced Receive Descriptor (Write-Back) Format
471 * 63 48 47 32 31 30 21 20 17 16 4 3 0
472 * +------------------------------------------------------+
473 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
474 * | Checksum Ident | | | | Type | Type |
475 * +------------------------------------------------------+
476 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
477 * +------------------------------------------------------+
478 * 63 48 47 32 31 20 19 0
481 for (n = 0; n < adapter->num_rx_queues; n++) {
482 rx_ring = adapter->rx_ring[n];
483 printk(KERN_INFO "------------------------------------\n");
484 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
485 printk(KERN_INFO "------------------------------------\n");
486 printk(KERN_INFO "R [desc] [ PktBuf A0] "
487 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
488 "<-- Adv Rx Read format\n");
489 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
490 "[vl er S cks ln] ---------------- [bi->skb] "
491 "<-- Adv Rx Write-Back format\n");
493 for (i = 0; i < rx_ring->count; i++) {
494 buffer_info = &rx_ring->buffer_info[i];
495 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
496 u0 = (struct my_u0 *)rx_desc;
497 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
498 if (staterr & E1000_RXD_STAT_DD) {
499 /* Descriptor Done */
500 printk(KERN_INFO "RWB[0x%03X] %016llX "
501 "%016llX ---------------- %p", i,
506 printk(KERN_INFO "R [0x%03X] %016llX "
507 "%016llX %016llX %p", i,
510 (u64)buffer_info->dma,
513 if (netif_msg_pktdata(adapter)) {
514 print_hex_dump(KERN_INFO, "",
517 phys_to_virt(buffer_info->dma),
518 rx_ring->rx_buffer_len, true);
519 if (rx_ring->rx_buffer_len
521 print_hex_dump(KERN_INFO, "",
525 buffer_info->page_dma +
526 buffer_info->page_offset),
531 if (i == rx_ring->next_to_use)
532 printk(KERN_CONT " NTU\n");
533 else if (i == rx_ring->next_to_clean)
534 printk(KERN_CONT " NTC\n");
536 printk(KERN_CONT "\n");
547 * igb_read_clock - read raw cycle counter (to be used by time counter)
549 static cycle_t igb_read_clock(const struct cyclecounter *tc)
551 struct igb_adapter *adapter =
552 container_of(tc, struct igb_adapter, cycles);
553 struct e1000_hw *hw = &adapter->hw;
558 * The timestamp latches on lowest register read. For the 82580
559 * the lowest register is SYSTIMR instead of SYSTIML. However we never
560 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
562 if (hw->mac.type == e1000_82580) {
563 stamp = rd32(E1000_SYSTIMR) >> 8;
564 shift = IGB_82580_TSYNC_SHIFT;
567 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
568 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
578 struct igb_adapter *adapter = hw->back;
579 return adapter->netdev;
583 * igb_init_module - Driver Registration Routine
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
588 static int __init igb_init_module(void)
591 printk(KERN_INFO "%s - version %s\n",
592 igb_driver_string, igb_driver_version);
594 printk(KERN_INFO "%s\n", igb_copyright);
596 #ifdef CONFIG_IGB_DCA
597 dca_register_notify(&dca_notifier);
599 ret = pci_register_driver(&igb_driver);
603 module_init(igb_init_module);
606 * igb_exit_module - Driver Exit Cleanup Routine
608 * igb_exit_module is called just before the driver is removed
611 static void __exit igb_exit_module(void)
613 #ifdef CONFIG_IGB_DCA
614 dca_unregister_notify(&dca_notifier);
616 pci_unregister_driver(&igb_driver);
619 module_exit(igb_exit_module);
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
632 u32 rbase_offset = adapter->vfs_allocated_count;
634 switch (adapter->hw.mac.type) {
636 /* The queues are allocated for virtualization such that VF 0
637 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638 * In order to avoid collision we start at the first free queue
639 * and continue consuming queues in the same sequence
641 if (adapter->vfs_allocated_count) {
642 for (; i < adapter->rss_queues; i++)
643 adapter->rx_ring[i]->reg_idx = rbase_offset +
650 for (; i < adapter->num_rx_queues; i++)
651 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652 for (; j < adapter->num_tx_queues; j++)
653 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658 static void igb_free_queues(struct igb_adapter *adapter)
662 for (i = 0; i < adapter->num_tx_queues; i++) {
663 kfree(adapter->tx_ring[i]);
664 adapter->tx_ring[i] = NULL;
666 for (i = 0; i < adapter->num_rx_queues; i++) {
667 kfree(adapter->rx_ring[i]);
668 adapter->rx_ring[i] = NULL;
670 adapter->num_rx_queues = 0;
671 adapter->num_tx_queues = 0;
675 * igb_alloc_queues - Allocate memory for all rings
676 * @adapter: board private structure to initialize
678 * We allocate one ring per queue at run-time since we don't know the
679 * number of queues at compile-time.
681 static int igb_alloc_queues(struct igb_adapter *adapter)
683 struct igb_ring *ring;
686 for (i = 0; i < adapter->num_tx_queues; i++) {
687 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
690 ring->count = adapter->tx_ring_count;
691 ring->queue_index = i;
692 ring->dev = &adapter->pdev->dev;
693 ring->netdev = adapter->netdev;
694 /* For 82575, context index must be unique per ring. */
695 if (adapter->hw.mac.type == e1000_82575)
696 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
697 adapter->tx_ring[i] = ring;
700 for (i = 0; i < adapter->num_rx_queues; i++) {
701 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
704 ring->count = adapter->rx_ring_count;
705 ring->queue_index = i;
706 ring->dev = &adapter->pdev->dev;
707 ring->netdev = adapter->netdev;
708 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
709 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
710 /* set flag indicating ring supports SCTP checksum offload */
711 if (adapter->hw.mac.type >= e1000_82576)
712 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
713 adapter->rx_ring[i] = ring;
716 igb_cache_ring_register(adapter);
721 igb_free_queues(adapter);
726 #define IGB_N0_QUEUE -1
727 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
730 struct igb_adapter *adapter = q_vector->adapter;
731 struct e1000_hw *hw = &adapter->hw;
733 int rx_queue = IGB_N0_QUEUE;
734 int tx_queue = IGB_N0_QUEUE;
736 if (q_vector->rx_ring)
737 rx_queue = q_vector->rx_ring->reg_idx;
738 if (q_vector->tx_ring)
739 tx_queue = q_vector->tx_ring->reg_idx;
741 switch (hw->mac.type) {
743 /* The 82575 assigns vectors using a bitmask, which matches the
744 bitmask for the EICR/EIMS/EIMC registers. To assign one
745 or more queues to a vector, we write the appropriate bits
746 into the MSIXBM register for that vector. */
747 if (rx_queue > IGB_N0_QUEUE)
748 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
749 if (tx_queue > IGB_N0_QUEUE)
750 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
751 if (!adapter->msix_entries && msix_vector == 0)
752 msixbm |= E1000_EIMS_OTHER;
753 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
754 q_vector->eims_value = msixbm;
757 /* 82576 uses a table-based method for assigning vectors.
758 Each queue has a single entry in the table to which we write
759 a vector number along with a "valid" bit. Sadly, the layout
760 of the table is somewhat counterintuitive. */
761 if (rx_queue > IGB_N0_QUEUE) {
762 index = (rx_queue & 0x7);
763 ivar = array_rd32(E1000_IVAR0, index);
765 /* vector goes into low byte of register */
766 ivar = ivar & 0xFFFFFF00;
767 ivar |= msix_vector | E1000_IVAR_VALID;
769 /* vector goes into third byte of register */
770 ivar = ivar & 0xFF00FFFF;
771 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
773 array_wr32(E1000_IVAR0, index, ivar);
775 if (tx_queue > IGB_N0_QUEUE) {
776 index = (tx_queue & 0x7);
777 ivar = array_rd32(E1000_IVAR0, index);
779 /* vector goes into second byte of register */
780 ivar = ivar & 0xFFFF00FF;
781 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
783 /* vector goes into high byte of register */
784 ivar = ivar & 0x00FFFFFF;
785 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
787 array_wr32(E1000_IVAR0, index, ivar);
789 q_vector->eims_value = 1 << msix_vector;
793 /* 82580 uses the same table-based approach as 82576 but has fewer
794 entries as a result we carry over for queues greater than 4. */
795 if (rx_queue > IGB_N0_QUEUE) {
796 index = (rx_queue >> 1);
797 ivar = array_rd32(E1000_IVAR0, index);
798 if (rx_queue & 0x1) {
799 /* vector goes into third byte of register */
800 ivar = ivar & 0xFF00FFFF;
801 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
803 /* vector goes into low byte of register */
804 ivar = ivar & 0xFFFFFF00;
805 ivar |= msix_vector | E1000_IVAR_VALID;
807 array_wr32(E1000_IVAR0, index, ivar);
809 if (tx_queue > IGB_N0_QUEUE) {
810 index = (tx_queue >> 1);
811 ivar = array_rd32(E1000_IVAR0, index);
812 if (tx_queue & 0x1) {
813 /* vector goes into high byte of register */
814 ivar = ivar & 0x00FFFFFF;
815 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
817 /* vector goes into second byte of register */
818 ivar = ivar & 0xFFFF00FF;
819 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
821 array_wr32(E1000_IVAR0, index, ivar);
823 q_vector->eims_value = 1 << msix_vector;
830 /* add q_vector eims value to global eims_enable_mask */
831 adapter->eims_enable_mask |= q_vector->eims_value;
833 /* configure q_vector to set itr on first interrupt */
834 q_vector->set_itr = 1;
838 * igb_configure_msix - Configure MSI-X hardware
840 * igb_configure_msix sets up the hardware to properly
841 * generate MSI-X interrupts.
843 static void igb_configure_msix(struct igb_adapter *adapter)
847 struct e1000_hw *hw = &adapter->hw;
849 adapter->eims_enable_mask = 0;
851 /* set vector for other causes, i.e. link changes */
852 switch (hw->mac.type) {
854 tmp = rd32(E1000_CTRL_EXT);
855 /* enable MSI-X PBA support*/
856 tmp |= E1000_CTRL_EXT_PBA_CLR;
858 /* Auto-Mask interrupts upon ICR read. */
859 tmp |= E1000_CTRL_EXT_EIAME;
860 tmp |= E1000_CTRL_EXT_IRCA;
862 wr32(E1000_CTRL_EXT, tmp);
864 /* enable msix_other interrupt */
865 array_wr32(E1000_MSIXBM(0), vector++,
867 adapter->eims_other = E1000_EIMS_OTHER;
874 /* Turn on MSI-X capability first, or our settings
875 * won't stick. And it will take days to debug. */
876 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
877 E1000_GPIE_PBA | E1000_GPIE_EIAME |
880 /* enable msix_other interrupt */
881 adapter->eims_other = 1 << vector;
882 tmp = (vector++ | E1000_IVAR_VALID) << 8;
884 wr32(E1000_IVAR_MISC, tmp);
887 /* do nothing, since nothing else supports MSI-X */
889 } /* switch (hw->mac.type) */
891 adapter->eims_enable_mask |= adapter->eims_other;
893 for (i = 0; i < adapter->num_q_vectors; i++)
894 igb_assign_vector(adapter->q_vector[i], vector++);
900 * igb_request_msix - Initialize MSI-X interrupts
902 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
905 static int igb_request_msix(struct igb_adapter *adapter)
907 struct net_device *netdev = adapter->netdev;
908 struct e1000_hw *hw = &adapter->hw;
909 int i, err = 0, vector = 0;
911 err = request_irq(adapter->msix_entries[vector].vector,
912 igb_msix_other, 0, netdev->name, adapter);
917 for (i = 0; i < adapter->num_q_vectors; i++) {
918 struct igb_q_vector *q_vector = adapter->q_vector[i];
920 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
922 if (q_vector->rx_ring && q_vector->tx_ring)
923 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
924 q_vector->rx_ring->queue_index);
925 else if (q_vector->tx_ring)
926 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
927 q_vector->tx_ring->queue_index);
928 else if (q_vector->rx_ring)
929 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
930 q_vector->rx_ring->queue_index);
932 sprintf(q_vector->name, "%s-unused", netdev->name);
934 err = request_irq(adapter->msix_entries[vector].vector,
935 igb_msix_ring, 0, q_vector->name,
942 igb_configure_msix(adapter);
948 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
950 if (adapter->msix_entries) {
951 pci_disable_msix(adapter->pdev);
952 kfree(adapter->msix_entries);
953 adapter->msix_entries = NULL;
954 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
955 pci_disable_msi(adapter->pdev);
960 * igb_free_q_vectors - Free memory allocated for interrupt vectors
961 * @adapter: board private structure to initialize
963 * This function frees the memory allocated to the q_vectors. In addition if
964 * NAPI is enabled it will delete any references to the NAPI struct prior
965 * to freeing the q_vector.
967 static void igb_free_q_vectors(struct igb_adapter *adapter)
971 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
972 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
973 adapter->q_vector[v_idx] = NULL;
976 netif_napi_del(&q_vector->napi);
979 adapter->num_q_vectors = 0;
983 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
985 * This function resets the device so that it has 0 rx queues, tx queues, and
986 * MSI-X interrupts allocated.
988 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
990 igb_free_queues(adapter);
991 igb_free_q_vectors(adapter);
992 igb_reset_interrupt_capability(adapter);
996 * igb_set_interrupt_capability - set MSI or MSI-X if supported
998 * Attempt to configure interrupts using the best available
999 * capabilities of the hardware and kernel.
1001 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1006 /* Number of supported queues. */
1007 adapter->num_rx_queues = adapter->rss_queues;
1008 if (adapter->vfs_allocated_count)
1009 adapter->num_tx_queues = 1;
1011 adapter->num_tx_queues = adapter->rss_queues;
1013 /* start with one vector for every rx queue */
1014 numvecs = adapter->num_rx_queues;
1016 /* if tx handler is separate add 1 for every tx queue */
1017 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1018 numvecs += adapter->num_tx_queues;
1020 /* store the number of vectors reserved for queues */
1021 adapter->num_q_vectors = numvecs;
1023 /* add 1 vector for link status interrupts */
1025 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1027 if (!adapter->msix_entries)
1030 for (i = 0; i < numvecs; i++)
1031 adapter->msix_entries[i].entry = i;
1033 err = pci_enable_msix(adapter->pdev,
1034 adapter->msix_entries,
1039 igb_reset_interrupt_capability(adapter);
1041 /* If we can't do MSI-X, try MSI */
1043 #ifdef CONFIG_PCI_IOV
1044 /* disable SR-IOV for non MSI-X configurations */
1045 if (adapter->vf_data) {
1046 struct e1000_hw *hw = &adapter->hw;
1047 /* disable iov and allow time for transactions to clear */
1048 pci_disable_sriov(adapter->pdev);
1051 kfree(adapter->vf_data);
1052 adapter->vf_data = NULL;
1053 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1055 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1058 adapter->vfs_allocated_count = 0;
1059 adapter->rss_queues = 1;
1060 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1061 adapter->num_rx_queues = 1;
1062 adapter->num_tx_queues = 1;
1063 adapter->num_q_vectors = 1;
1064 if (!pci_enable_msi(adapter->pdev))
1065 adapter->flags |= IGB_FLAG_HAS_MSI;
1067 /* Notify the stack of the (possibly) reduced queue counts. */
1068 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1069 return netif_set_real_num_rx_queues(adapter->netdev,
1070 adapter->num_rx_queues);
1074 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1075 * @adapter: board private structure to initialize
1077 * We allocate one q_vector per queue interrupt. If allocation fails we
1080 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1082 struct igb_q_vector *q_vector;
1083 struct e1000_hw *hw = &adapter->hw;
1086 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1087 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1090 q_vector->adapter = adapter;
1091 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1092 q_vector->itr_val = IGB_START_ITR;
1093 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1094 adapter->q_vector[v_idx] = q_vector;
1099 igb_free_q_vectors(adapter);
1103 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1104 int ring_idx, int v_idx)
1106 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1108 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1109 q_vector->rx_ring->q_vector = q_vector;
1110 q_vector->itr_val = adapter->rx_itr_setting;
1111 if (q_vector->itr_val && q_vector->itr_val <= 3)
1112 q_vector->itr_val = IGB_START_ITR;
1115 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1116 int ring_idx, int v_idx)
1118 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1120 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1121 q_vector->tx_ring->q_vector = q_vector;
1122 q_vector->itr_val = adapter->tx_itr_setting;
1123 if (q_vector->itr_val && q_vector->itr_val <= 3)
1124 q_vector->itr_val = IGB_START_ITR;
1128 * igb_map_ring_to_vector - maps allocated queues to vectors
1130 * This function maps the recently allocated queues to vectors.
1132 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1137 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1138 (adapter->num_q_vectors < adapter->num_tx_queues))
1141 if (adapter->num_q_vectors >=
1142 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1143 for (i = 0; i < adapter->num_rx_queues; i++)
1144 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145 for (i = 0; i < adapter->num_tx_queues; i++)
1146 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1148 for (i = 0; i < adapter->num_rx_queues; i++) {
1149 if (i < adapter->num_tx_queues)
1150 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1151 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1153 for (; i < adapter->num_tx_queues; i++)
1154 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1160 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1162 * This function initializes the interrupts and allocates all of the queues.
1164 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1166 struct pci_dev *pdev = adapter->pdev;
1169 err = igb_set_interrupt_capability(adapter);
1173 err = igb_alloc_q_vectors(adapter);
1175 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1176 goto err_alloc_q_vectors;
1179 err = igb_alloc_queues(adapter);
1181 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1182 goto err_alloc_queues;
1185 err = igb_map_ring_to_vector(adapter);
1187 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1188 goto err_map_queues;
1194 igb_free_queues(adapter);
1196 igb_free_q_vectors(adapter);
1197 err_alloc_q_vectors:
1198 igb_reset_interrupt_capability(adapter);
1203 * igb_request_irq - initialize interrupts
1205 * Attempts to configure interrupts using the best available
1206 * capabilities of the hardware and kernel.
1208 static int igb_request_irq(struct igb_adapter *adapter)
1210 struct net_device *netdev = adapter->netdev;
1211 struct pci_dev *pdev = adapter->pdev;
1214 if (adapter->msix_entries) {
1215 err = igb_request_msix(adapter);
1218 /* fall back to MSI */
1219 igb_clear_interrupt_scheme(adapter);
1220 if (!pci_enable_msi(adapter->pdev))
1221 adapter->flags |= IGB_FLAG_HAS_MSI;
1222 igb_free_all_tx_resources(adapter);
1223 igb_free_all_rx_resources(adapter);
1224 adapter->num_tx_queues = 1;
1225 adapter->num_rx_queues = 1;
1226 adapter->num_q_vectors = 1;
1227 err = igb_alloc_q_vectors(adapter);
1230 "Unable to allocate memory for vectors\n");
1233 err = igb_alloc_queues(adapter);
1236 "Unable to allocate memory for queues\n");
1237 igb_free_q_vectors(adapter);
1240 igb_setup_all_tx_resources(adapter);
1241 igb_setup_all_rx_resources(adapter);
1243 igb_assign_vector(adapter->q_vector[0], 0);
1246 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1247 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1248 netdev->name, adapter);
1252 /* fall back to legacy interrupts */
1253 igb_reset_interrupt_capability(adapter);
1254 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1257 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1258 netdev->name, adapter);
1261 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1268 static void igb_free_irq(struct igb_adapter *adapter)
1270 if (adapter->msix_entries) {
1273 free_irq(adapter->msix_entries[vector++].vector, adapter);
1275 for (i = 0; i < adapter->num_q_vectors; i++) {
1276 struct igb_q_vector *q_vector = adapter->q_vector[i];
1277 free_irq(adapter->msix_entries[vector++].vector,
1281 free_irq(adapter->pdev->irq, adapter);
1286 * igb_irq_disable - Mask off interrupt generation on the NIC
1287 * @adapter: board private structure
1289 static void igb_irq_disable(struct igb_adapter *adapter)
1291 struct e1000_hw *hw = &adapter->hw;
1294 * we need to be careful when disabling interrupts. The VFs are also
1295 * mapped into these registers and so clearing the bits can cause
1296 * issues on the VF drivers so we only need to clear what we set
1298 if (adapter->msix_entries) {
1299 u32 regval = rd32(E1000_EIAM);
1300 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1301 wr32(E1000_EIMC, adapter->eims_enable_mask);
1302 regval = rd32(E1000_EIAC);
1303 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1307 wr32(E1000_IMC, ~0);
1309 if (adapter->msix_entries) {
1311 for (i = 0; i < adapter->num_q_vectors; i++)
1312 synchronize_irq(adapter->msix_entries[i].vector);
1314 synchronize_irq(adapter->pdev->irq);
1319 * igb_irq_enable - Enable default interrupt generation settings
1320 * @adapter: board private structure
1322 static void igb_irq_enable(struct igb_adapter *adapter)
1324 struct e1000_hw *hw = &adapter->hw;
1326 if (adapter->msix_entries) {
1327 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1328 u32 regval = rd32(E1000_EIAC);
1329 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1330 regval = rd32(E1000_EIAM);
1331 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1332 wr32(E1000_EIMS, adapter->eims_enable_mask);
1333 if (adapter->vfs_allocated_count) {
1334 wr32(E1000_MBVFIMR, 0xFF);
1335 ims |= E1000_IMS_VMMB;
1337 if (adapter->hw.mac.type == e1000_82580)
1338 ims |= E1000_IMS_DRSTA;
1340 wr32(E1000_IMS, ims);
1342 wr32(E1000_IMS, IMS_ENABLE_MASK |
1344 wr32(E1000_IAM, IMS_ENABLE_MASK |
1349 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1351 struct e1000_hw *hw = &adapter->hw;
1352 u16 vid = adapter->hw.mng_cookie.vlan_id;
1353 u16 old_vid = adapter->mng_vlan_id;
1355 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1356 /* add VID to filter table */
1357 igb_vfta_set(hw, vid, true);
1358 adapter->mng_vlan_id = vid;
1360 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1363 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1365 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1366 /* remove VID from filter table */
1367 igb_vfta_set(hw, old_vid, false);
1372 * igb_release_hw_control - release control of the h/w to f/w
1373 * @adapter: address of board private structure
1375 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1376 * For ASF and Pass Through versions of f/w this means that the
1377 * driver is no longer loaded.
1380 static void igb_release_hw_control(struct igb_adapter *adapter)
1382 struct e1000_hw *hw = &adapter->hw;
1385 /* Let firmware take over control of h/w */
1386 ctrl_ext = rd32(E1000_CTRL_EXT);
1387 wr32(E1000_CTRL_EXT,
1388 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1392 * igb_get_hw_control - get control of the h/w from f/w
1393 * @adapter: address of board private structure
1395 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1396 * For ASF and Pass Through versions of f/w this means that
1397 * the driver is loaded.
1400 static void igb_get_hw_control(struct igb_adapter *adapter)
1402 struct e1000_hw *hw = &adapter->hw;
1405 /* Let firmware know the driver has taken over */
1406 ctrl_ext = rd32(E1000_CTRL_EXT);
1407 wr32(E1000_CTRL_EXT,
1408 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1412 * igb_configure - configure the hardware for RX and TX
1413 * @adapter: private board structure
1415 static void igb_configure(struct igb_adapter *adapter)
1417 struct net_device *netdev = adapter->netdev;
1420 igb_get_hw_control(adapter);
1421 igb_set_rx_mode(netdev);
1423 igb_restore_vlan(adapter);
1425 igb_setup_tctl(adapter);
1426 igb_setup_mrqc(adapter);
1427 igb_setup_rctl(adapter);
1429 igb_configure_tx(adapter);
1430 igb_configure_rx(adapter);
1432 igb_rx_fifo_flush_82575(&adapter->hw);
1434 /* call igb_desc_unused which always leaves
1435 * at least 1 descriptor unused to make sure
1436 * next_to_use != next_to_clean */
1437 for (i = 0; i < adapter->num_rx_queues; i++) {
1438 struct igb_ring *ring = adapter->rx_ring[i];
1439 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1444 * igb_power_up_link - Power up the phy/serdes link
1445 * @adapter: address of board private structure
1447 void igb_power_up_link(struct igb_adapter *adapter)
1449 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1450 igb_power_up_phy_copper(&adapter->hw);
1452 igb_power_up_serdes_link_82575(&adapter->hw);
1456 * igb_power_down_link - Power down the phy/serdes link
1457 * @adapter: address of board private structure
1459 static void igb_power_down_link(struct igb_adapter *adapter)
1461 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1462 igb_power_down_phy_copper_82575(&adapter->hw);
1464 igb_shutdown_serdes_link_82575(&adapter->hw);
1468 * igb_up - Open the interface and prepare it to handle traffic
1469 * @adapter: board private structure
1471 int igb_up(struct igb_adapter *adapter)
1473 struct e1000_hw *hw = &adapter->hw;
1476 /* hardware has been reset, we need to reload some things */
1477 igb_configure(adapter);
1479 clear_bit(__IGB_DOWN, &adapter->state);
1481 for (i = 0; i < adapter->num_q_vectors; i++) {
1482 struct igb_q_vector *q_vector = adapter->q_vector[i];
1483 napi_enable(&q_vector->napi);
1485 if (adapter->msix_entries)
1486 igb_configure_msix(adapter);
1488 igb_assign_vector(adapter->q_vector[0], 0);
1490 /* Clear any pending interrupts. */
1492 igb_irq_enable(adapter);
1494 /* notify VFs that reset has been completed */
1495 if (adapter->vfs_allocated_count) {
1496 u32 reg_data = rd32(E1000_CTRL_EXT);
1497 reg_data |= E1000_CTRL_EXT_PFRSTD;
1498 wr32(E1000_CTRL_EXT, reg_data);
1501 netif_tx_start_all_queues(adapter->netdev);
1503 /* start the watchdog. */
1504 hw->mac.get_link_status = 1;
1505 schedule_work(&adapter->watchdog_task);
1510 void igb_down(struct igb_adapter *adapter)
1512 struct net_device *netdev = adapter->netdev;
1513 struct e1000_hw *hw = &adapter->hw;
1517 /* signal that we're down so the interrupt handler does not
1518 * reschedule our watchdog timer */
1519 set_bit(__IGB_DOWN, &adapter->state);
1521 /* disable receives in the hardware */
1522 rctl = rd32(E1000_RCTL);
1523 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1524 /* flush and sleep below */
1526 netif_tx_stop_all_queues(netdev);
1528 /* disable transmits in the hardware */
1529 tctl = rd32(E1000_TCTL);
1530 tctl &= ~E1000_TCTL_EN;
1531 wr32(E1000_TCTL, tctl);
1532 /* flush both disables and wait for them to finish */
1536 for (i = 0; i < adapter->num_q_vectors; i++) {
1537 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538 napi_disable(&q_vector->napi);
1541 igb_irq_disable(adapter);
1543 del_timer_sync(&adapter->watchdog_timer);
1544 del_timer_sync(&adapter->phy_info_timer);
1546 netif_carrier_off(netdev);
1548 /* record the stats before reset*/
1549 spin_lock(&adapter->stats64_lock);
1550 igb_update_stats(adapter, &adapter->stats64);
1551 spin_unlock(&adapter->stats64_lock);
1553 adapter->link_speed = 0;
1554 adapter->link_duplex = 0;
1556 if (!pci_channel_offline(adapter->pdev))
1558 igb_clean_all_tx_rings(adapter);
1559 igb_clean_all_rx_rings(adapter);
1560 #ifdef CONFIG_IGB_DCA
1562 /* since we reset the hardware DCA settings were cleared */
1563 igb_setup_dca(adapter);
1567 void igb_reinit_locked(struct igb_adapter *adapter)
1569 WARN_ON(in_interrupt());
1570 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1574 clear_bit(__IGB_RESETTING, &adapter->state);
1577 void igb_reset(struct igb_adapter *adapter)
1579 struct pci_dev *pdev = adapter->pdev;
1580 struct e1000_hw *hw = &adapter->hw;
1581 struct e1000_mac_info *mac = &hw->mac;
1582 struct e1000_fc_info *fc = &hw->fc;
1583 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1586 /* Repartition Pba for greater than 9k mtu
1587 * To take effect CTRL.RST is required.
1589 switch (mac->type) {
1592 pba = rd32(E1000_RXPBS);
1593 pba = igb_rxpbs_adjust_82580(pba);
1596 pba = rd32(E1000_RXPBS);
1597 pba &= E1000_RXPBS_SIZE_MASK_82576;
1601 pba = E1000_PBA_34K;
1605 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1606 (mac->type < e1000_82576)) {
1607 /* adjust PBA for jumbo frames */
1608 wr32(E1000_PBA, pba);
1610 /* To maintain wire speed transmits, the Tx FIFO should be
1611 * large enough to accommodate two full transmit packets,
1612 * rounded up to the next 1KB and expressed in KB. Likewise,
1613 * the Rx FIFO should be large enough to accommodate at least
1614 * one full receive packet and is similarly rounded up and
1615 * expressed in KB. */
1616 pba = rd32(E1000_PBA);
1617 /* upper 16 bits has Tx packet buffer allocation size in KB */
1618 tx_space = pba >> 16;
1619 /* lower 16 bits has Rx packet buffer allocation size in KB */
1621 /* the tx fifo also stores 16 bytes of information about the tx
1622 * but don't include ethernet FCS because hardware appends it */
1623 min_tx_space = (adapter->max_frame_size +
1624 sizeof(union e1000_adv_tx_desc) -
1626 min_tx_space = ALIGN(min_tx_space, 1024);
1627 min_tx_space >>= 10;
1628 /* software strips receive CRC, so leave room for it */
1629 min_rx_space = adapter->max_frame_size;
1630 min_rx_space = ALIGN(min_rx_space, 1024);
1631 min_rx_space >>= 10;
1633 /* If current Tx allocation is less than the min Tx FIFO size,
1634 * and the min Tx FIFO size is less than the current Rx FIFO
1635 * allocation, take space away from current Rx allocation */
1636 if (tx_space < min_tx_space &&
1637 ((min_tx_space - tx_space) < pba)) {
1638 pba = pba - (min_tx_space - tx_space);
1640 /* if short on rx space, rx wins and must trump tx
1642 if (pba < min_rx_space)
1645 wr32(E1000_PBA, pba);
1648 /* flow control settings */
1649 /* The high water mark must be low enough to fit one full frame
1650 * (or the size used for early receive) above it in the Rx FIFO.
1651 * Set it to the lower of:
1652 * - 90% of the Rx FIFO size, or
1653 * - the full Rx FIFO size minus one full frame */
1654 hwm = min(((pba << 10) * 9 / 10),
1655 ((pba << 10) - 2 * adapter->max_frame_size));
1657 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1658 fc->low_water = fc->high_water - 16;
1659 fc->pause_time = 0xFFFF;
1661 fc->current_mode = fc->requested_mode;
1663 /* disable receive for all VFs and wait one second */
1664 if (adapter->vfs_allocated_count) {
1666 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1667 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1669 /* ping all the active vfs to let them know we are going down */
1670 igb_ping_all_vfs(adapter);
1672 /* disable transmits and receives */
1673 wr32(E1000_VFRE, 0);
1674 wr32(E1000_VFTE, 0);
1677 /* Allow time for pending master requests to run */
1678 hw->mac.ops.reset_hw(hw);
1681 if (hw->mac.ops.init_hw(hw))
1682 dev_err(&pdev->dev, "Hardware Error\n");
1683 if (hw->mac.type > e1000_82580) {
1684 if (adapter->flags & IGB_FLAG_DMAC) {
1688 * DMA Coalescing high water mark needs to be higher
1689 * than * the * Rx threshold. The Rx threshold is
1690 * currently * pba - 6, so we * should use a high water
1691 * mark of pba * - 4. */
1692 hwm = (pba - 4) << 10;
1694 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1695 & E1000_DMACR_DMACTHR_MASK);
1697 /* transition to L0x or L1 if available..*/
1698 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1700 /* watchdog timer= +-1000 usec in 32usec intervals */
1702 wr32(E1000_DMACR, reg);
1704 /* no lower threshold to disable coalescing(smart fifb)
1706 wr32(E1000_DMCRTRH, 0);
1708 /* set hwm to PBA - 2 * max frame size */
1709 wr32(E1000_FCRTC, hwm);
1712 * This sets the time to wait before requesting tran-
1713 * sition to * low power state to number of usecs needed
1714 * to receive 1 512 * byte frame at gigabit line rate
1716 reg = rd32(E1000_DMCTLX);
1717 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1719 /* Delay 255 usec before entering Lx state. */
1721 wr32(E1000_DMCTLX, reg);
1723 /* free space in Tx packet buffer to wake from DMAC */
1726 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1729 /* make low power state decision controlled by DMAC */
1730 reg = rd32(E1000_PCIEMISC);
1731 reg |= E1000_PCIEMISC_LX_DECISION;
1732 wr32(E1000_PCIEMISC, reg);
1733 } /* end if IGB_FLAG_DMAC set */
1735 if (hw->mac.type == e1000_82580) {
1736 u32 reg = rd32(E1000_PCIEMISC);
1737 wr32(E1000_PCIEMISC,
1738 reg & ~E1000_PCIEMISC_LX_DECISION);
1740 if (!netif_running(adapter->netdev))
1741 igb_power_down_link(adapter);
1743 igb_update_mng_vlan(adapter);
1745 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1748 igb_get_phy_info(hw);
1751 static int igb_set_features(struct net_device *netdev, u32 features)
1753 struct igb_adapter *adapter = netdev_priv(netdev);
1756 for (i = 0; i < adapter->num_rx_queues; i++) {
1757 if (features & NETIF_F_RXCSUM)
1758 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1760 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1766 static const struct net_device_ops igb_netdev_ops = {
1767 .ndo_open = igb_open,
1768 .ndo_stop = igb_close,
1769 .ndo_start_xmit = igb_xmit_frame_adv,
1770 .ndo_get_stats64 = igb_get_stats64,
1771 .ndo_set_rx_mode = igb_set_rx_mode,
1772 .ndo_set_multicast_list = igb_set_rx_mode,
1773 .ndo_set_mac_address = igb_set_mac,
1774 .ndo_change_mtu = igb_change_mtu,
1775 .ndo_do_ioctl = igb_ioctl,
1776 .ndo_tx_timeout = igb_tx_timeout,
1777 .ndo_validate_addr = eth_validate_addr,
1778 .ndo_vlan_rx_register = igb_vlan_rx_register,
1779 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1780 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1781 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1782 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1783 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1784 .ndo_get_vf_config = igb_ndo_get_vf_config,
1785 #ifdef CONFIG_NET_POLL_CONTROLLER
1786 .ndo_poll_controller = igb_netpoll,
1788 .ndo_set_features = igb_set_features,
1792 * igb_probe - Device Initialization Routine
1793 * @pdev: PCI device information struct
1794 * @ent: entry in igb_pci_tbl
1796 * Returns 0 on success, negative on failure
1798 * igb_probe initializes an adapter identified by a pci_dev structure.
1799 * The OS initialization, configuring of the adapter private structure,
1800 * and a hardware reset occur.
1802 static int __devinit igb_probe(struct pci_dev *pdev,
1803 const struct pci_device_id *ent)
1805 struct net_device *netdev;
1806 struct igb_adapter *adapter;
1807 struct e1000_hw *hw;
1808 u16 eeprom_data = 0;
1810 static int global_quad_port_a; /* global quad port a indication */
1811 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1812 unsigned long mmio_start, mmio_len;
1813 int err, pci_using_dac;
1814 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1815 u8 part_str[E1000_PBANUM_LENGTH];
1817 /* Catch broken hardware that put the wrong VF device ID in
1818 * the PCIe SR-IOV capability.
1820 if (pdev->is_virtfn) {
1821 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1822 pci_name(pdev), pdev->vendor, pdev->device);
1826 err = pci_enable_device_mem(pdev);
1831 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1833 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1837 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1839 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1841 dev_err(&pdev->dev, "No usable DMA "
1842 "configuration, aborting\n");
1848 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1854 pci_enable_pcie_error_reporting(pdev);
1856 pci_set_master(pdev);
1857 pci_save_state(pdev);
1860 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1861 IGB_ABS_MAX_TX_QUEUES);
1863 goto err_alloc_etherdev;
1865 SET_NETDEV_DEV(netdev, &pdev->dev);
1867 pci_set_drvdata(pdev, netdev);
1868 adapter = netdev_priv(netdev);
1869 adapter->netdev = netdev;
1870 adapter->pdev = pdev;
1873 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1875 mmio_start = pci_resource_start(pdev, 0);
1876 mmio_len = pci_resource_len(pdev, 0);
1879 hw->hw_addr = ioremap(mmio_start, mmio_len);
1883 netdev->netdev_ops = &igb_netdev_ops;
1884 igb_set_ethtool_ops(netdev);
1885 netdev->watchdog_timeo = 5 * HZ;
1887 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1889 netdev->mem_start = mmio_start;
1890 netdev->mem_end = mmio_start + mmio_len;
1892 /* PCI config space info */
1893 hw->vendor_id = pdev->vendor;
1894 hw->device_id = pdev->device;
1895 hw->revision_id = pdev->revision;
1896 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1897 hw->subsystem_device_id = pdev->subsystem_device;
1899 /* Copy the default MAC, PHY and NVM function pointers */
1900 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1901 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1902 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1903 /* Initialize skew-specific constants */
1904 err = ei->get_invariants(hw);
1908 /* setup the private structure */
1909 err = igb_sw_init(adapter);
1913 igb_get_bus_info_pcie(hw);
1915 hw->phy.autoneg_wait_to_complete = false;
1917 /* Copper options */
1918 if (hw->phy.media_type == e1000_media_type_copper) {
1919 hw->phy.mdix = AUTO_ALL_MODES;
1920 hw->phy.disable_polarity_correction = false;
1921 hw->phy.ms_type = e1000_ms_hw_default;
1924 if (igb_check_reset_block(hw))
1925 dev_info(&pdev->dev,
1926 "PHY reset is blocked due to SOL/IDER session.\n");
1928 netdev->hw_features = NETIF_F_SG |
1935 netdev->features = netdev->hw_features |
1936 NETIF_F_HW_VLAN_TX |
1937 NETIF_F_HW_VLAN_RX |
1938 NETIF_F_HW_VLAN_FILTER;
1940 netdev->vlan_features |= NETIF_F_TSO;
1941 netdev->vlan_features |= NETIF_F_TSO6;
1942 netdev->vlan_features |= NETIF_F_IP_CSUM;
1943 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1944 netdev->vlan_features |= NETIF_F_SG;
1946 if (pci_using_dac) {
1947 netdev->features |= NETIF_F_HIGHDMA;
1948 netdev->vlan_features |= NETIF_F_HIGHDMA;
1951 if (hw->mac.type >= e1000_82576) {
1952 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1953 netdev->features |= NETIF_F_SCTP_CSUM;
1956 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1958 /* before reading the NVM, reset the controller to put the device in a
1959 * known good starting state */
1960 hw->mac.ops.reset_hw(hw);
1962 /* make sure the NVM is good */
1963 if (hw->nvm.ops.validate(hw) < 0) {
1964 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1969 /* copy the MAC address out of the NVM */
1970 if (hw->mac.ops.read_mac_addr(hw))
1971 dev_err(&pdev->dev, "NVM Read Error\n");
1973 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1974 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1976 if (!is_valid_ether_addr(netdev->perm_addr)) {
1977 dev_err(&pdev->dev, "Invalid MAC Address\n");
1982 setup_timer(&adapter->watchdog_timer, igb_watchdog,
1983 (unsigned long) adapter);
1984 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1985 (unsigned long) adapter);
1987 INIT_WORK(&adapter->reset_task, igb_reset_task);
1988 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1990 /* Initialize link properties that are user-changeable */
1991 adapter->fc_autoneg = true;
1992 hw->mac.autoneg = true;
1993 hw->phy.autoneg_advertised = 0x2f;
1995 hw->fc.requested_mode = e1000_fc_default;
1996 hw->fc.current_mode = e1000_fc_default;
1998 igb_validate_mdi_setting(hw);
2000 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2001 * enable the ACPI Magic Packet filter
2004 if (hw->bus.func == 0)
2005 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2006 else if (hw->mac.type == e1000_82580)
2007 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2008 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2010 else if (hw->bus.func == 1)
2011 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2013 if (eeprom_data & eeprom_apme_mask)
2014 adapter->eeprom_wol |= E1000_WUFC_MAG;
2016 /* now that we have the eeprom settings, apply the special cases where
2017 * the eeprom may be wrong or the board simply won't support wake on
2018 * lan on a particular port */
2019 switch (pdev->device) {
2020 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2021 adapter->eeprom_wol = 0;
2023 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2024 case E1000_DEV_ID_82576_FIBER:
2025 case E1000_DEV_ID_82576_SERDES:
2026 /* Wake events only supported on port A for dual fiber
2027 * regardless of eeprom setting */
2028 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2029 adapter->eeprom_wol = 0;
2031 case E1000_DEV_ID_82576_QUAD_COPPER:
2032 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2033 /* if quad port adapter, disable WoL on all but port A */
2034 if (global_quad_port_a != 0)
2035 adapter->eeprom_wol = 0;
2037 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2038 /* Reset for multiple quad port adapters */
2039 if (++global_quad_port_a == 4)
2040 global_quad_port_a = 0;
2044 /* initialize the wol settings based on the eeprom settings */
2045 adapter->wol = adapter->eeprom_wol;
2046 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2048 /* reset the hardware with the new settings */
2051 /* let the f/w know that the h/w is now under the control of the
2053 igb_get_hw_control(adapter);
2055 strcpy(netdev->name, "eth%d");
2056 err = register_netdev(netdev);
2060 /* carrier off reporting is important to ethtool even BEFORE open */
2061 netif_carrier_off(netdev);
2063 #ifdef CONFIG_IGB_DCA
2064 if (dca_add_requester(&pdev->dev) == 0) {
2065 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2066 dev_info(&pdev->dev, "DCA enabled\n");
2067 igb_setup_dca(adapter);
2071 /* do hw tstamp init after resetting */
2072 igb_init_hw_timer(adapter);
2074 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2075 /* print bus type/speed/width info */
2076 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2078 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2079 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2081 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2082 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2083 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2087 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2089 strcpy(part_str, "Unknown");
2090 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2091 dev_info(&pdev->dev,
2092 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2093 adapter->msix_entries ? "MSI-X" :
2094 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2095 adapter->num_rx_queues, adapter->num_tx_queues);
2096 switch (hw->mac.type) {
2098 igb_set_eee_i350(hw);
2106 igb_release_hw_control(adapter);
2108 if (!igb_check_reset_block(hw))
2111 if (hw->flash_address)
2112 iounmap(hw->flash_address);
2114 igb_clear_interrupt_scheme(adapter);
2115 iounmap(hw->hw_addr);
2117 free_netdev(netdev);
2119 pci_release_selected_regions(pdev,
2120 pci_select_bars(pdev, IORESOURCE_MEM));
2123 pci_disable_device(pdev);
2128 * igb_remove - Device Removal Routine
2129 * @pdev: PCI device information struct
2131 * igb_remove is called by the PCI subsystem to alert the driver
2132 * that it should release a PCI device. The could be caused by a
2133 * Hot-Plug event, or because the driver is going to be removed from
2136 static void __devexit igb_remove(struct pci_dev *pdev)
2138 struct net_device *netdev = pci_get_drvdata(pdev);
2139 struct igb_adapter *adapter = netdev_priv(netdev);
2140 struct e1000_hw *hw = &adapter->hw;
2143 * The watchdog timer may be rescheduled, so explicitly
2144 * disable watchdog from being rescheduled.
2146 set_bit(__IGB_DOWN, &adapter->state);
2147 del_timer_sync(&adapter->watchdog_timer);
2148 del_timer_sync(&adapter->phy_info_timer);
2150 cancel_work_sync(&adapter->reset_task);
2151 cancel_work_sync(&adapter->watchdog_task);
2153 #ifdef CONFIG_IGB_DCA
2154 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2155 dev_info(&pdev->dev, "DCA disabled\n");
2156 dca_remove_requester(&pdev->dev);
2157 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2158 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2162 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2163 * would have already happened in close and is redundant. */
2164 igb_release_hw_control(adapter);
2166 unregister_netdev(netdev);
2168 igb_clear_interrupt_scheme(adapter);
2170 #ifdef CONFIG_PCI_IOV
2171 /* reclaim resources allocated to VFs */
2172 if (adapter->vf_data) {
2173 /* disable iov and allow time for transactions to clear */
2174 pci_disable_sriov(pdev);
2177 kfree(adapter->vf_data);
2178 adapter->vf_data = NULL;
2179 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2181 dev_info(&pdev->dev, "IOV Disabled\n");
2185 iounmap(hw->hw_addr);
2186 if (hw->flash_address)
2187 iounmap(hw->flash_address);
2188 pci_release_selected_regions(pdev,
2189 pci_select_bars(pdev, IORESOURCE_MEM));
2191 free_netdev(netdev);
2193 pci_disable_pcie_error_reporting(pdev);
2195 pci_disable_device(pdev);
2199 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2200 * @adapter: board private structure to initialize
2202 * This function initializes the vf specific data storage and then attempts to
2203 * allocate the VFs. The reason for ordering it this way is because it is much
2204 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2205 * the memory for the VFs.
2207 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2209 #ifdef CONFIG_PCI_IOV
2210 struct pci_dev *pdev = adapter->pdev;
2212 if (adapter->vfs_allocated_count) {
2213 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2214 sizeof(struct vf_data_storage),
2216 /* if allocation failed then we do not support SR-IOV */
2217 if (!adapter->vf_data) {
2218 adapter->vfs_allocated_count = 0;
2219 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2224 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2225 kfree(adapter->vf_data);
2226 adapter->vf_data = NULL;
2227 #endif /* CONFIG_PCI_IOV */
2228 adapter->vfs_allocated_count = 0;
2229 #ifdef CONFIG_PCI_IOV
2231 unsigned char mac_addr[ETH_ALEN];
2233 dev_info(&pdev->dev, "%d vfs allocated\n",
2234 adapter->vfs_allocated_count);
2235 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2236 random_ether_addr(mac_addr);
2237 igb_set_vf_mac(adapter, i, mac_addr);
2239 /* DMA Coalescing is not supported in IOV mode. */
2240 if (adapter->flags & IGB_FLAG_DMAC)
2241 adapter->flags &= ~IGB_FLAG_DMAC;
2243 #endif /* CONFIG_PCI_IOV */
2248 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2249 * @adapter: board private structure to initialize
2251 * igb_init_hw_timer initializes the function pointer and values for the hw
2252 * timer found in hardware.
2254 static void igb_init_hw_timer(struct igb_adapter *adapter)
2256 struct e1000_hw *hw = &adapter->hw;
2258 switch (hw->mac.type) {
2261 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2262 adapter->cycles.read = igb_read_clock;
2263 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2264 adapter->cycles.mult = 1;
2266 * The 82580 timesync updates the system timer every 8ns by 8ns
2267 * and the value cannot be shifted. Instead we need to shift
2268 * the registers to generate a 64bit timer value. As a result
2269 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2270 * 24 in order to generate a larger value for synchronization.
2272 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2273 /* disable system timer temporarily by setting bit 31 */
2274 wr32(E1000_TSAUXC, 0x80000000);
2277 /* Set registers so that rollover occurs soon to test this. */
2278 wr32(E1000_SYSTIMR, 0x00000000);
2279 wr32(E1000_SYSTIML, 0x80000000);
2280 wr32(E1000_SYSTIMH, 0x000000FF);
2283 /* enable system timer by clearing bit 31 */
2284 wr32(E1000_TSAUXC, 0x0);
2287 timecounter_init(&adapter->clock,
2289 ktime_to_ns(ktime_get_real()));
2291 * Synchronize our NIC clock against system wall clock. NIC
2292 * time stamp reading requires ~3us per sample, each sample
2293 * was pretty stable even under load => only require 10
2294 * samples for each offset comparison.
2296 memset(&adapter->compare, 0, sizeof(adapter->compare));
2297 adapter->compare.source = &adapter->clock;
2298 adapter->compare.target = ktime_get_real;
2299 adapter->compare.num_samples = 10;
2300 timecompare_update(&adapter->compare, 0);
2304 * Initialize hardware timer: we keep it running just in case
2305 * that some program needs it later on.
2307 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2308 adapter->cycles.read = igb_read_clock;
2309 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2310 adapter->cycles.mult = 1;
2312 * Scale the NIC clock cycle by a large factor so that
2313 * relatively small clock corrections can be added or
2314 * subtracted at each clock tick. The drawbacks of a large
2315 * factor are a) that the clock register overflows more quickly
2316 * (not such a big deal) and b) that the increment per tick has
2317 * to fit into 24 bits. As a result we need to use a shift of
2318 * 19 so we can fit a value of 16 into the TIMINCA register.
2320 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2322 (1 << E1000_TIMINCA_16NS_SHIFT) |
2323 (16 << IGB_82576_TSYNC_SHIFT));
2325 /* Set registers so that rollover occurs soon to test this. */
2326 wr32(E1000_SYSTIML, 0x00000000);
2327 wr32(E1000_SYSTIMH, 0xFF800000);
2330 timecounter_init(&adapter->clock,
2332 ktime_to_ns(ktime_get_real()));
2334 * Synchronize our NIC clock against system wall clock. NIC
2335 * time stamp reading requires ~3us per sample, each sample
2336 * was pretty stable even under load => only require 10
2337 * samples for each offset comparison.
2339 memset(&adapter->compare, 0, sizeof(adapter->compare));
2340 adapter->compare.source = &adapter->clock;
2341 adapter->compare.target = ktime_get_real;
2342 adapter->compare.num_samples = 10;
2343 timecompare_update(&adapter->compare, 0);
2346 /* 82575 does not support timesync */
2354 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2355 * @adapter: board private structure to initialize
2357 * igb_sw_init initializes the Adapter private data structure.
2358 * Fields are initialized based on PCI device information and
2359 * OS network device settings (MTU size).
2361 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2363 struct e1000_hw *hw = &adapter->hw;
2364 struct net_device *netdev = adapter->netdev;
2365 struct pci_dev *pdev = adapter->pdev;
2367 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2369 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2370 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2371 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2372 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2374 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2375 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2377 spin_lock_init(&adapter->stats64_lock);
2378 #ifdef CONFIG_PCI_IOV
2379 switch (hw->mac.type) {
2383 dev_warn(&pdev->dev,
2384 "Maximum of 7 VFs per PF, using max\n");
2385 adapter->vfs_allocated_count = 7;
2387 adapter->vfs_allocated_count = max_vfs;
2392 #endif /* CONFIG_PCI_IOV */
2393 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2394 /* i350 cannot do RSS and SR-IOV at the same time */
2395 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2396 adapter->rss_queues = 1;
2399 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2400 * then we should combine the queues into a queue pair in order to
2401 * conserve interrupts due to limited supply
2403 if ((adapter->rss_queues > 4) ||
2404 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2405 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2407 /* This call may decrease the number of queues */
2408 if (igb_init_interrupt_scheme(adapter)) {
2409 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2413 igb_probe_vfs(adapter);
2415 /* Explicitly disable IRQ since the NIC can be in any state. */
2416 igb_irq_disable(adapter);
2418 if (hw->mac.type == e1000_i350)
2419 adapter->flags &= ~IGB_FLAG_DMAC;
2421 set_bit(__IGB_DOWN, &adapter->state);
2426 * igb_open - Called when a network interface is made active
2427 * @netdev: network interface device structure
2429 * Returns 0 on success, negative value on failure
2431 * The open entry point is called when a network interface is made
2432 * active by the system (IFF_UP). At this point all resources needed
2433 * for transmit and receive operations are allocated, the interrupt
2434 * handler is registered with the OS, the watchdog timer is started,
2435 * and the stack is notified that the interface is ready.
2437 static int igb_open(struct net_device *netdev)
2439 struct igb_adapter *adapter = netdev_priv(netdev);
2440 struct e1000_hw *hw = &adapter->hw;
2444 /* disallow open during test */
2445 if (test_bit(__IGB_TESTING, &adapter->state))
2448 netif_carrier_off(netdev);
2450 /* allocate transmit descriptors */
2451 err = igb_setup_all_tx_resources(adapter);
2455 /* allocate receive descriptors */
2456 err = igb_setup_all_rx_resources(adapter);
2460 igb_power_up_link(adapter);
2462 /* before we allocate an interrupt, we must be ready to handle it.
2463 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2464 * as soon as we call pci_request_irq, so we have to setup our
2465 * clean_rx handler before we do so. */
2466 igb_configure(adapter);
2468 err = igb_request_irq(adapter);
2472 /* From here on the code is the same as igb_up() */
2473 clear_bit(__IGB_DOWN, &adapter->state);
2475 for (i = 0; i < adapter->num_q_vectors; i++) {
2476 struct igb_q_vector *q_vector = adapter->q_vector[i];
2477 napi_enable(&q_vector->napi);
2480 /* Clear any pending interrupts. */
2483 igb_irq_enable(adapter);
2485 /* notify VFs that reset has been completed */
2486 if (adapter->vfs_allocated_count) {
2487 u32 reg_data = rd32(E1000_CTRL_EXT);
2488 reg_data |= E1000_CTRL_EXT_PFRSTD;
2489 wr32(E1000_CTRL_EXT, reg_data);
2492 netif_tx_start_all_queues(netdev);
2494 /* start the watchdog. */
2495 hw->mac.get_link_status = 1;
2496 schedule_work(&adapter->watchdog_task);
2501 igb_release_hw_control(adapter);
2502 igb_power_down_link(adapter);
2503 igb_free_all_rx_resources(adapter);
2505 igb_free_all_tx_resources(adapter);
2513 * igb_close - Disables a network interface
2514 * @netdev: network interface device structure
2516 * Returns 0, this is not allowed to fail
2518 * The close entry point is called when an interface is de-activated
2519 * by the OS. The hardware is still under the driver's control, but
2520 * needs to be disabled. A global MAC reset is issued to stop the
2521 * hardware, and all transmit and receive resources are freed.
2523 static int igb_close(struct net_device *netdev)
2525 struct igb_adapter *adapter = netdev_priv(netdev);
2527 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2530 igb_free_irq(adapter);
2532 igb_free_all_tx_resources(adapter);
2533 igb_free_all_rx_resources(adapter);
2539 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2540 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2542 * Return 0 on success, negative on failure
2544 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2546 struct device *dev = tx_ring->dev;
2549 size = sizeof(struct igb_buffer) * tx_ring->count;
2550 tx_ring->buffer_info = vzalloc(size);
2551 if (!tx_ring->buffer_info)
2554 /* round up to nearest 4K */
2555 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2556 tx_ring->size = ALIGN(tx_ring->size, 4096);
2558 tx_ring->desc = dma_alloc_coherent(dev,
2566 tx_ring->next_to_use = 0;
2567 tx_ring->next_to_clean = 0;
2571 vfree(tx_ring->buffer_info);
2573 "Unable to allocate memory for the transmit descriptor ring\n");
2578 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2579 * (Descriptors) for all queues
2580 * @adapter: board private structure
2582 * Return 0 on success, negative on failure
2584 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2586 struct pci_dev *pdev = adapter->pdev;
2589 for (i = 0; i < adapter->num_tx_queues; i++) {
2590 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2593 "Allocation for Tx Queue %u failed\n", i);
2594 for (i--; i >= 0; i--)
2595 igb_free_tx_resources(adapter->tx_ring[i]);
2600 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2601 int r_idx = i % adapter->num_tx_queues;
2602 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2608 * igb_setup_tctl - configure the transmit control registers
2609 * @adapter: Board private structure
2611 void igb_setup_tctl(struct igb_adapter *adapter)
2613 struct e1000_hw *hw = &adapter->hw;
2616 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2617 wr32(E1000_TXDCTL(0), 0);
2619 /* Program the Transmit Control Register */
2620 tctl = rd32(E1000_TCTL);
2621 tctl &= ~E1000_TCTL_CT;
2622 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2623 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2625 igb_config_collision_dist(hw);
2627 /* Enable transmits */
2628 tctl |= E1000_TCTL_EN;
2630 wr32(E1000_TCTL, tctl);
2634 * igb_configure_tx_ring - Configure transmit ring after Reset
2635 * @adapter: board private structure
2636 * @ring: tx ring to configure
2638 * Configure a transmit ring after a reset.
2640 void igb_configure_tx_ring(struct igb_adapter *adapter,
2641 struct igb_ring *ring)
2643 struct e1000_hw *hw = &adapter->hw;
2645 u64 tdba = ring->dma;
2646 int reg_idx = ring->reg_idx;
2648 /* disable the queue */
2649 txdctl = rd32(E1000_TXDCTL(reg_idx));
2650 wr32(E1000_TXDCTL(reg_idx),
2651 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2655 wr32(E1000_TDLEN(reg_idx),
2656 ring->count * sizeof(union e1000_adv_tx_desc));
2657 wr32(E1000_TDBAL(reg_idx),
2658 tdba & 0x00000000ffffffffULL);
2659 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2661 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2662 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2663 writel(0, ring->head);
2664 writel(0, ring->tail);
2666 txdctl |= IGB_TX_PTHRESH;
2667 txdctl |= IGB_TX_HTHRESH << 8;
2668 txdctl |= IGB_TX_WTHRESH << 16;
2670 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2671 wr32(E1000_TXDCTL(reg_idx), txdctl);
2675 * igb_configure_tx - Configure transmit Unit after Reset
2676 * @adapter: board private structure
2678 * Configure the Tx unit of the MAC after a reset.
2680 static void igb_configure_tx(struct igb_adapter *adapter)
2684 for (i = 0; i < adapter->num_tx_queues; i++)
2685 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2689 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2690 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2692 * Returns 0 on success, negative on failure
2694 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2696 struct device *dev = rx_ring->dev;
2699 size = sizeof(struct igb_buffer) * rx_ring->count;
2700 rx_ring->buffer_info = vzalloc(size);
2701 if (!rx_ring->buffer_info)
2704 desc_len = sizeof(union e1000_adv_rx_desc);
2706 /* Round up to nearest 4K */
2707 rx_ring->size = rx_ring->count * desc_len;
2708 rx_ring->size = ALIGN(rx_ring->size, 4096);
2710 rx_ring->desc = dma_alloc_coherent(dev,
2718 rx_ring->next_to_clean = 0;
2719 rx_ring->next_to_use = 0;
2724 vfree(rx_ring->buffer_info);
2725 rx_ring->buffer_info = NULL;
2726 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2732 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2733 * (Descriptors) for all queues
2734 * @adapter: board private structure
2736 * Return 0 on success, negative on failure
2738 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2740 struct pci_dev *pdev = adapter->pdev;
2743 for (i = 0; i < adapter->num_rx_queues; i++) {
2744 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2747 "Allocation for Rx Queue %u failed\n", i);
2748 for (i--; i >= 0; i--)
2749 igb_free_rx_resources(adapter->rx_ring[i]);
2758 * igb_setup_mrqc - configure the multiple receive queue control registers
2759 * @adapter: Board private structure
2761 static void igb_setup_mrqc(struct igb_adapter *adapter)
2763 struct e1000_hw *hw = &adapter->hw;
2765 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2770 static const u8 rsshash[40] = {
2771 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2772 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2773 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2774 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2776 /* Fill out hash function seeds */
2777 for (j = 0; j < 10; j++) {
2778 u32 rsskey = rsshash[(j * 4)];
2779 rsskey |= rsshash[(j * 4) + 1] << 8;
2780 rsskey |= rsshash[(j * 4) + 2] << 16;
2781 rsskey |= rsshash[(j * 4) + 3] << 24;
2782 array_wr32(E1000_RSSRK(0), j, rsskey);
2785 num_rx_queues = adapter->rss_queues;
2787 if (adapter->vfs_allocated_count) {
2788 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2789 switch (hw->mac.type) {
2806 if (hw->mac.type == e1000_82575)
2810 for (j = 0; j < (32 * 4); j++) {
2811 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2813 reta.bytes[j & 3] |= num_rx_queues << shift2;
2815 wr32(E1000_RETA(j >> 2), reta.dword);
2819 * Disable raw packet checksumming so that RSS hash is placed in
2820 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2821 * offloads as they are enabled by default
2823 rxcsum = rd32(E1000_RXCSUM);
2824 rxcsum |= E1000_RXCSUM_PCSD;
2826 if (adapter->hw.mac.type >= e1000_82576)
2827 /* Enable Receive Checksum Offload for SCTP */
2828 rxcsum |= E1000_RXCSUM_CRCOFL;
2830 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2831 wr32(E1000_RXCSUM, rxcsum);
2833 /* If VMDq is enabled then we set the appropriate mode for that, else
2834 * we default to RSS so that an RSS hash is calculated per packet even
2835 * if we are only using one queue */
2836 if (adapter->vfs_allocated_count) {
2837 if (hw->mac.type > e1000_82575) {
2838 /* Set the default pool for the PF's first queue */
2839 u32 vtctl = rd32(E1000_VT_CTL);
2840 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2841 E1000_VT_CTL_DISABLE_DEF_POOL);
2842 vtctl |= adapter->vfs_allocated_count <<
2843 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2844 wr32(E1000_VT_CTL, vtctl);
2846 if (adapter->rss_queues > 1)
2847 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2849 mrqc = E1000_MRQC_ENABLE_VMDQ;
2851 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2853 igb_vmm_control(adapter);
2856 * Generate RSS hash based on TCP port numbers and/or
2857 * IPv4/v6 src and dst addresses since UDP cannot be
2858 * hashed reliably due to IP fragmentation
2860 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2861 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2862 E1000_MRQC_RSS_FIELD_IPV6 |
2863 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2864 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2866 wr32(E1000_MRQC, mrqc);
2870 * igb_setup_rctl - configure the receive control registers
2871 * @adapter: Board private structure
2873 void igb_setup_rctl(struct igb_adapter *adapter)
2875 struct e1000_hw *hw = &adapter->hw;
2878 rctl = rd32(E1000_RCTL);
2880 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2881 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2883 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2884 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2887 * enable stripping of CRC. It's unlikely this will break BMC
2888 * redirection as it did with e1000. Newer features require
2889 * that the HW strips the CRC.
2891 rctl |= E1000_RCTL_SECRC;
2893 /* disable store bad packets and clear size bits. */
2894 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2896 /* enable LPE to prevent packets larger than max_frame_size */
2897 rctl |= E1000_RCTL_LPE;
2899 /* disable queue 0 to prevent tail write w/o re-config */
2900 wr32(E1000_RXDCTL(0), 0);
2902 /* Attention!!! For SR-IOV PF driver operations you must enable
2903 * queue drop for all VF and PF queues to prevent head of line blocking
2904 * if an un-trusted VF does not provide descriptors to hardware.
2906 if (adapter->vfs_allocated_count) {
2907 /* set all queue drop enable bits */
2908 wr32(E1000_QDE, ALL_QUEUES);
2911 wr32(E1000_RCTL, rctl);
2914 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2917 struct e1000_hw *hw = &adapter->hw;
2920 /* if it isn't the PF check to see if VFs are enabled and
2921 * increase the size to support vlan tags */
2922 if (vfn < adapter->vfs_allocated_count &&
2923 adapter->vf_data[vfn].vlans_enabled)
2924 size += VLAN_TAG_SIZE;
2926 vmolr = rd32(E1000_VMOLR(vfn));
2927 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2928 vmolr |= size | E1000_VMOLR_LPE;
2929 wr32(E1000_VMOLR(vfn), vmolr);
2935 * igb_rlpml_set - set maximum receive packet size
2936 * @adapter: board private structure
2938 * Configure maximum receivable packet size.
2940 static void igb_rlpml_set(struct igb_adapter *adapter)
2942 u32 max_frame_size = adapter->max_frame_size;
2943 struct e1000_hw *hw = &adapter->hw;
2944 u16 pf_id = adapter->vfs_allocated_count;
2947 max_frame_size += VLAN_TAG_SIZE;
2949 /* if vfs are enabled we set RLPML to the largest possible request
2950 * size and set the VMOLR RLPML to the size we need */
2952 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2953 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2956 wr32(E1000_RLPML, max_frame_size);
2959 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2962 struct e1000_hw *hw = &adapter->hw;
2966 * This register exists only on 82576 and newer so if we are older then
2967 * we should exit and do nothing
2969 if (hw->mac.type < e1000_82576)
2972 vmolr = rd32(E1000_VMOLR(vfn));
2973 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2975 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2977 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2979 /* clear all bits that might not be set */
2980 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2982 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2983 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2985 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2988 if (vfn <= adapter->vfs_allocated_count)
2989 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2991 wr32(E1000_VMOLR(vfn), vmolr);
2995 * igb_configure_rx_ring - Configure a receive ring after Reset
2996 * @adapter: board private structure
2997 * @ring: receive ring to be configured
2999 * Configure the Rx unit of the MAC after a reset.
3001 void igb_configure_rx_ring(struct igb_adapter *adapter,
3002 struct igb_ring *ring)
3004 struct e1000_hw *hw = &adapter->hw;
3005 u64 rdba = ring->dma;
3006 int reg_idx = ring->reg_idx;
3009 /* disable the queue */
3010 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3011 wr32(E1000_RXDCTL(reg_idx),
3012 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
3014 /* Set DMA base address registers */
3015 wr32(E1000_RDBAL(reg_idx),
3016 rdba & 0x00000000ffffffffULL);
3017 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3018 wr32(E1000_RDLEN(reg_idx),
3019 ring->count * sizeof(union e1000_adv_rx_desc));
3021 /* initialize head and tail */
3022 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3023 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3024 writel(0, ring->head);
3025 writel(0, ring->tail);
3027 /* set descriptor configuration */
3028 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3029 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3030 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3031 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3032 srrctl |= IGB_RXBUFFER_16384 >>
3033 E1000_SRRCTL_BSIZEPKT_SHIFT;
3035 srrctl |= (PAGE_SIZE / 2) >>
3036 E1000_SRRCTL_BSIZEPKT_SHIFT;
3038 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3040 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3041 E1000_SRRCTL_BSIZEPKT_SHIFT;
3042 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3044 if (hw->mac.type == e1000_82580)
3045 srrctl |= E1000_SRRCTL_TIMESTAMP;
3046 /* Only set Drop Enable if we are supporting multiple queues */
3047 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3048 srrctl |= E1000_SRRCTL_DROP_EN;
3050 wr32(E1000_SRRCTL(reg_idx), srrctl);
3052 /* set filtering for VMDQ pools */
3053 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3055 /* enable receive descriptor fetching */
3056 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3057 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3058 rxdctl &= 0xFFF00000;
3059 rxdctl |= IGB_RX_PTHRESH;
3060 rxdctl |= IGB_RX_HTHRESH << 8;
3061 rxdctl |= IGB_RX_WTHRESH << 16;
3062 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3066 * igb_configure_rx - Configure receive Unit after Reset
3067 * @adapter: board private structure
3069 * Configure the Rx unit of the MAC after a reset.
3071 static void igb_configure_rx(struct igb_adapter *adapter)
3075 /* set UTA to appropriate mode */
3076 igb_set_uta(adapter);
3078 /* set the correct pool for the PF default MAC address in entry 0 */
3079 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3080 adapter->vfs_allocated_count);
3082 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3083 * the Base and Length of the Rx Descriptor Ring */
3084 for (i = 0; i < adapter->num_rx_queues; i++)
3085 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3089 * igb_free_tx_resources - Free Tx Resources per Queue
3090 * @tx_ring: Tx descriptor ring for a specific queue
3092 * Free all transmit software resources
3094 void igb_free_tx_resources(struct igb_ring *tx_ring)
3096 igb_clean_tx_ring(tx_ring);
3098 vfree(tx_ring->buffer_info);
3099 tx_ring->buffer_info = NULL;
3101 /* if not set, then don't free */
3105 dma_free_coherent(tx_ring->dev, tx_ring->size,
3106 tx_ring->desc, tx_ring->dma);
3108 tx_ring->desc = NULL;
3112 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3113 * @adapter: board private structure
3115 * Free all transmit software resources
3117 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3121 for (i = 0; i < adapter->num_tx_queues; i++)
3122 igb_free_tx_resources(adapter->tx_ring[i]);
3125 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3126 struct igb_buffer *buffer_info)
3128 if (buffer_info->dma) {
3129 if (buffer_info->mapped_as_page)
3130 dma_unmap_page(tx_ring->dev,
3132 buffer_info->length,
3135 dma_unmap_single(tx_ring->dev,
3137 buffer_info->length,
3139 buffer_info->dma = 0;
3141 if (buffer_info->skb) {
3142 dev_kfree_skb_any(buffer_info->skb);
3143 buffer_info->skb = NULL;
3145 buffer_info->time_stamp = 0;
3146 buffer_info->length = 0;
3147 buffer_info->next_to_watch = 0;
3148 buffer_info->mapped_as_page = false;
3152 * igb_clean_tx_ring - Free Tx Buffers
3153 * @tx_ring: ring to be cleaned
3155 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3157 struct igb_buffer *buffer_info;
3161 if (!tx_ring->buffer_info)
3163 /* Free all the Tx ring sk_buffs */
3165 for (i = 0; i < tx_ring->count; i++) {
3166 buffer_info = &tx_ring->buffer_info[i];
3167 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3170 size = sizeof(struct igb_buffer) * tx_ring->count;
3171 memset(tx_ring->buffer_info, 0, size);
3173 /* Zero out the descriptor ring */
3174 memset(tx_ring->desc, 0, tx_ring->size);
3176 tx_ring->next_to_use = 0;
3177 tx_ring->next_to_clean = 0;
3181 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3182 * @adapter: board private structure
3184 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3188 for (i = 0; i < adapter->num_tx_queues; i++)
3189 igb_clean_tx_ring(adapter->tx_ring[i]);
3193 * igb_free_rx_resources - Free Rx Resources
3194 * @rx_ring: ring to clean the resources from
3196 * Free all receive software resources
3198 void igb_free_rx_resources(struct igb_ring *rx_ring)
3200 igb_clean_rx_ring(rx_ring);
3202 vfree(rx_ring->buffer_info);
3203 rx_ring->buffer_info = NULL;
3205 /* if not set, then don't free */
3209 dma_free_coherent(rx_ring->dev, rx_ring->size,
3210 rx_ring->desc, rx_ring->dma);
3212 rx_ring->desc = NULL;
3216 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3217 * @adapter: board private structure
3219 * Free all receive software resources
3221 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3225 for (i = 0; i < adapter->num_rx_queues; i++)
3226 igb_free_rx_resources(adapter->rx_ring[i]);
3230 * igb_clean_rx_ring - Free Rx Buffers per Queue
3231 * @rx_ring: ring to free buffers from
3233 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3235 struct igb_buffer *buffer_info;
3239 if (!rx_ring->buffer_info)
3242 /* Free all the Rx ring sk_buffs */
3243 for (i = 0; i < rx_ring->count; i++) {
3244 buffer_info = &rx_ring->buffer_info[i];
3245 if (buffer_info->dma) {
3246 dma_unmap_single(rx_ring->dev,
3248 rx_ring->rx_buffer_len,
3250 buffer_info->dma = 0;
3253 if (buffer_info->skb) {
3254 dev_kfree_skb(buffer_info->skb);
3255 buffer_info->skb = NULL;
3257 if (buffer_info->page_dma) {
3258 dma_unmap_page(rx_ring->dev,
3259 buffer_info->page_dma,
3262 buffer_info->page_dma = 0;
3264 if (buffer_info->page) {
3265 put_page(buffer_info->page);
3266 buffer_info->page = NULL;
3267 buffer_info->page_offset = 0;
3271 size = sizeof(struct igb_buffer) * rx_ring->count;
3272 memset(rx_ring->buffer_info, 0, size);
3274 /* Zero out the descriptor ring */
3275 memset(rx_ring->desc, 0, rx_ring->size);
3277 rx_ring->next_to_clean = 0;
3278 rx_ring->next_to_use = 0;
3282 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3283 * @adapter: board private structure
3285 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3289 for (i = 0; i < adapter->num_rx_queues; i++)
3290 igb_clean_rx_ring(adapter->rx_ring[i]);
3294 * igb_set_mac - Change the Ethernet Address of the NIC
3295 * @netdev: network interface device structure
3296 * @p: pointer to an address structure
3298 * Returns 0 on success, negative on failure
3300 static int igb_set_mac(struct net_device *netdev, void *p)
3302 struct igb_adapter *adapter = netdev_priv(netdev);
3303 struct e1000_hw *hw = &adapter->hw;
3304 struct sockaddr *addr = p;
3306 if (!is_valid_ether_addr(addr->sa_data))
3307 return -EADDRNOTAVAIL;
3309 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3310 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3312 /* set the correct pool for the new PF MAC address in entry 0 */
3313 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3314 adapter->vfs_allocated_count);
3320 * igb_write_mc_addr_list - write multicast addresses to MTA
3321 * @netdev: network interface device structure
3323 * Writes multicast address list to the MTA hash table.
3324 * Returns: -ENOMEM on failure
3325 * 0 on no addresses written
3326 * X on writing X addresses to MTA
3328 static int igb_write_mc_addr_list(struct net_device *netdev)
3330 struct igb_adapter *adapter = netdev_priv(netdev);
3331 struct e1000_hw *hw = &adapter->hw;
3332 struct netdev_hw_addr *ha;
3336 if (netdev_mc_empty(netdev)) {
3337 /* nothing to program, so clear mc list */
3338 igb_update_mc_addr_list(hw, NULL, 0);
3339 igb_restore_vf_multicasts(adapter);
3343 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3347 /* The shared function expects a packed array of only addresses. */
3349 netdev_for_each_mc_addr(ha, netdev)
3350 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3352 igb_update_mc_addr_list(hw, mta_list, i);
3355 return netdev_mc_count(netdev);
3359 * igb_write_uc_addr_list - write unicast addresses to RAR table
3360 * @netdev: network interface device structure
3362 * Writes unicast address list to the RAR table.
3363 * Returns: -ENOMEM on failure/insufficient address space
3364 * 0 on no addresses written
3365 * X on writing X addresses to the RAR table
3367 static int igb_write_uc_addr_list(struct net_device *netdev)
3369 struct igb_adapter *adapter = netdev_priv(netdev);
3370 struct e1000_hw *hw = &adapter->hw;
3371 unsigned int vfn = adapter->vfs_allocated_count;
3372 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3375 /* return ENOMEM indicating insufficient memory for addresses */
3376 if (netdev_uc_count(netdev) > rar_entries)
3379 if (!netdev_uc_empty(netdev) && rar_entries) {
3380 struct netdev_hw_addr *ha;
3382 netdev_for_each_uc_addr(ha, netdev) {
3385 igb_rar_set_qsel(adapter, ha->addr,
3391 /* write the addresses in reverse order to avoid write combining */
3392 for (; rar_entries > 0 ; rar_entries--) {
3393 wr32(E1000_RAH(rar_entries), 0);
3394 wr32(E1000_RAL(rar_entries), 0);
3402 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3403 * @netdev: network interface device structure
3405 * The set_rx_mode entry point is called whenever the unicast or multicast
3406 * address lists or the network interface flags are updated. This routine is
3407 * responsible for configuring the hardware for proper unicast, multicast,
3408 * promiscuous mode, and all-multi behavior.
3410 static void igb_set_rx_mode(struct net_device *netdev)
3412 struct igb_adapter *adapter = netdev_priv(netdev);
3413 struct e1000_hw *hw = &adapter->hw;
3414 unsigned int vfn = adapter->vfs_allocated_count;
3415 u32 rctl, vmolr = 0;
3418 /* Check for Promiscuous and All Multicast modes */
3419 rctl = rd32(E1000_RCTL);
3421 /* clear the effected bits */
3422 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3424 if (netdev->flags & IFF_PROMISC) {
3425 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3426 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3428 if (netdev->flags & IFF_ALLMULTI) {
3429 rctl |= E1000_RCTL_MPE;
3430 vmolr |= E1000_VMOLR_MPME;
3433 * Write addresses to the MTA, if the attempt fails
3434 * then we should just turn on promiscuous mode so
3435 * that we can at least receive multicast traffic
3437 count = igb_write_mc_addr_list(netdev);
3439 rctl |= E1000_RCTL_MPE;
3440 vmolr |= E1000_VMOLR_MPME;
3442 vmolr |= E1000_VMOLR_ROMPE;
3446 * Write addresses to available RAR registers, if there is not
3447 * sufficient space to store all the addresses then enable
3448 * unicast promiscuous mode
3450 count = igb_write_uc_addr_list(netdev);
3452 rctl |= E1000_RCTL_UPE;
3453 vmolr |= E1000_VMOLR_ROPE;
3455 rctl |= E1000_RCTL_VFE;
3457 wr32(E1000_RCTL, rctl);
3460 * In order to support SR-IOV and eventually VMDq it is necessary to set
3461 * the VMOLR to enable the appropriate modes. Without this workaround
3462 * we will have issues with VLAN tag stripping not being done for frames
3463 * that are only arriving because we are the default pool
3465 if (hw->mac.type < e1000_82576)
3468 vmolr |= rd32(E1000_VMOLR(vfn)) &
3469 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3470 wr32(E1000_VMOLR(vfn), vmolr);
3471 igb_restore_vf_multicasts(adapter);
3474 static void igb_check_wvbr(struct igb_adapter *adapter)
3476 struct e1000_hw *hw = &adapter->hw;
3479 switch (hw->mac.type) {
3482 if (!(wvbr = rd32(E1000_WVBR)))
3489 adapter->wvbr |= wvbr;
3492 #define IGB_STAGGERED_QUEUE_OFFSET 8
3494 static void igb_spoof_check(struct igb_adapter *adapter)
3501 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3502 if (adapter->wvbr & (1 << j) ||
3503 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3504 dev_warn(&adapter->pdev->dev,
3505 "Spoof event(s) detected on VF %d\n", j);
3508 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3513 /* Need to wait a few seconds after link up to get diagnostic information from
3515 static void igb_update_phy_info(unsigned long data)
3517 struct igb_adapter *adapter = (struct igb_adapter *) data;
3518 igb_get_phy_info(&adapter->hw);
3522 * igb_has_link - check shared code for link and determine up/down
3523 * @adapter: pointer to driver private info
3525 bool igb_has_link(struct igb_adapter *adapter)
3527 struct e1000_hw *hw = &adapter->hw;
3528 bool link_active = false;
3531 /* get_link_status is set on LSC (link status) interrupt or
3532 * rx sequence error interrupt. get_link_status will stay
3533 * false until the e1000_check_for_link establishes link
3534 * for copper adapters ONLY
3536 switch (hw->phy.media_type) {
3537 case e1000_media_type_copper:
3538 if (hw->mac.get_link_status) {
3539 ret_val = hw->mac.ops.check_for_link(hw);
3540 link_active = !hw->mac.get_link_status;
3545 case e1000_media_type_internal_serdes:
3546 ret_val = hw->mac.ops.check_for_link(hw);
3547 link_active = hw->mac.serdes_has_link;
3550 case e1000_media_type_unknown:
3557 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3560 u32 ctrl_ext, thstat;
3562 /* check for thermal sensor event on i350, copper only */
3563 if (hw->mac.type == e1000_i350) {
3564 thstat = rd32(E1000_THSTAT);
3565 ctrl_ext = rd32(E1000_CTRL_EXT);
3567 if ((hw->phy.media_type == e1000_media_type_copper) &&
3568 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3569 ret = !!(thstat & event);
3577 * igb_watchdog - Timer Call-back
3578 * @data: pointer to adapter cast into an unsigned long
3580 static void igb_watchdog(unsigned long data)
3582 struct igb_adapter *adapter = (struct igb_adapter *)data;
3583 /* Do the rest outside of interrupt context */
3584 schedule_work(&adapter->watchdog_task);
3587 static void igb_watchdog_task(struct work_struct *work)
3589 struct igb_adapter *adapter = container_of(work,
3592 struct e1000_hw *hw = &adapter->hw;
3593 struct net_device *netdev = adapter->netdev;
3597 link = igb_has_link(adapter);
3599 if (!netif_carrier_ok(netdev)) {
3601 hw->mac.ops.get_speed_and_duplex(hw,
3602 &adapter->link_speed,
3603 &adapter->link_duplex);
3605 ctrl = rd32(E1000_CTRL);
3606 /* Links status message must follow this format */
3607 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3608 "Flow Control: %s\n",
3610 adapter->link_speed,
3611 adapter->link_duplex == FULL_DUPLEX ?
3612 "Full Duplex" : "Half Duplex",
3613 ((ctrl & E1000_CTRL_TFCE) &&
3614 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3615 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3616 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3618 /* check for thermal sensor event */
3619 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3620 printk(KERN_INFO "igb: %s The network adapter "
3621 "link speed was downshifted "
3622 "because it overheated.\n",
3626 /* adjust timeout factor according to speed/duplex */
3627 adapter->tx_timeout_factor = 1;
3628 switch (adapter->link_speed) {
3630 adapter->tx_timeout_factor = 14;
3633 /* maybe add some timeout factor ? */
3637 netif_carrier_on(netdev);
3639 igb_ping_all_vfs(adapter);
3640 igb_check_vf_rate_limit(adapter);
3642 /* link state has changed, schedule phy info update */
3643 if (!test_bit(__IGB_DOWN, &adapter->state))
3644 mod_timer(&adapter->phy_info_timer,
3645 round_jiffies(jiffies + 2 * HZ));
3648 if (netif_carrier_ok(netdev)) {
3649 adapter->link_speed = 0;
3650 adapter->link_duplex = 0;
3652 /* check for thermal sensor event */
3653 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3654 printk(KERN_ERR "igb: %s The network adapter "
3655 "was stopped because it "
3660 /* Links status message must follow this format */
3661 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3663 netif_carrier_off(netdev);
3665 igb_ping_all_vfs(adapter);
3667 /* link state has changed, schedule phy info update */
3668 if (!test_bit(__IGB_DOWN, &adapter->state))
3669 mod_timer(&adapter->phy_info_timer,
3670 round_jiffies(jiffies + 2 * HZ));
3674 spin_lock(&adapter->stats64_lock);
3675 igb_update_stats(adapter, &adapter->stats64);
3676 spin_unlock(&adapter->stats64_lock);
3678 for (i = 0; i < adapter->num_tx_queues; i++) {
3679 struct igb_ring *tx_ring = adapter->tx_ring[i];
3680 if (!netif_carrier_ok(netdev)) {
3681 /* We've lost link, so the controller stops DMA,
3682 * but we've got queued Tx work that's never going
3683 * to get done, so reset controller to flush Tx.
3684 * (Do the reset outside of interrupt context). */
3685 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3686 adapter->tx_timeout_count++;
3687 schedule_work(&adapter->reset_task);
3688 /* return immediately since reset is imminent */
3693 /* Force detection of hung controller every watchdog period */
3694 tx_ring->detect_tx_hung = true;
3697 /* Cause software interrupt to ensure rx ring is cleaned */
3698 if (adapter->msix_entries) {
3700 for (i = 0; i < adapter->num_q_vectors; i++) {
3701 struct igb_q_vector *q_vector = adapter->q_vector[i];
3702 eics |= q_vector->eims_value;
3704 wr32(E1000_EICS, eics);
3706 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3709 igb_spoof_check(adapter);
3711 /* Reset the timer */
3712 if (!test_bit(__IGB_DOWN, &adapter->state))
3713 mod_timer(&adapter->watchdog_timer,
3714 round_jiffies(jiffies + 2 * HZ));
3717 enum latency_range {
3721 latency_invalid = 255
3725 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3727 * Stores a new ITR value based on strictly on packet size. This
3728 * algorithm is less sophisticated than that used in igb_update_itr,
3729 * due to the difficulty of synchronizing statistics across multiple
3730 * receive rings. The divisors and thresholds used by this function
3731 * were determined based on theoretical maximum wire speed and testing
3732 * data, in order to minimize response time while increasing bulk
3734 * This functionality is controlled by the InterruptThrottleRate module
3735 * parameter (see igb_param.c)
3736 * NOTE: This function is called only when operating in a multiqueue
3737 * receive environment.
3738 * @q_vector: pointer to q_vector
3740 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3742 int new_val = q_vector->itr_val;
3743 int avg_wire_size = 0;
3744 struct igb_adapter *adapter = q_vector->adapter;
3745 struct igb_ring *ring;
3746 unsigned int packets;
3748 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3749 * ints/sec - ITR timer value of 120 ticks.
3751 if (adapter->link_speed != SPEED_1000) {
3756 ring = q_vector->rx_ring;
3758 packets = ACCESS_ONCE(ring->total_packets);
3761 avg_wire_size = ring->total_bytes / packets;
3764 ring = q_vector->tx_ring;
3766 packets = ACCESS_ONCE(ring->total_packets);
3769 avg_wire_size = max_t(u32, avg_wire_size,
3770 ring->total_bytes / packets);
3773 /* if avg_wire_size isn't set no work was done */
3777 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3778 avg_wire_size += 24;
3780 /* Don't starve jumbo frames */
3781 avg_wire_size = min(avg_wire_size, 3000);
3783 /* Give a little boost to mid-size frames */
3784 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3785 new_val = avg_wire_size / 3;
3787 new_val = avg_wire_size / 2;
3789 /* when in itr mode 3 do not exceed 20K ints/sec */
3790 if (adapter->rx_itr_setting == 3 && new_val < 196)
3794 if (new_val != q_vector->itr_val) {
3795 q_vector->itr_val = new_val;
3796 q_vector->set_itr = 1;
3799 if (q_vector->rx_ring) {
3800 q_vector->rx_ring->total_bytes = 0;
3801 q_vector->rx_ring->total_packets = 0;
3803 if (q_vector->tx_ring) {
3804 q_vector->tx_ring->total_bytes = 0;
3805 q_vector->tx_ring->total_packets = 0;
3810 * igb_update_itr - update the dynamic ITR value based on statistics
3811 * Stores a new ITR value based on packets and byte
3812 * counts during the last interrupt. The advantage of per interrupt
3813 * computation is faster updates and more accurate ITR for the current
3814 * traffic pattern. Constants in this function were computed
3815 * based on theoretical maximum wire speed and thresholds were set based
3816 * on testing data as well as attempting to minimize response time
3817 * while increasing bulk throughput.
3818 * this functionality is controlled by the InterruptThrottleRate module
3819 * parameter (see igb_param.c)
3820 * NOTE: These calculations are only valid when operating in a single-
3821 * queue environment.
3822 * @adapter: pointer to adapter
3823 * @itr_setting: current q_vector->itr_val
3824 * @packets: the number of packets during this measurement interval
3825 * @bytes: the number of bytes during this measurement interval
3827 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3828 int packets, int bytes)
3830 unsigned int retval = itr_setting;
3833 goto update_itr_done;
3835 switch (itr_setting) {
3836 case lowest_latency:
3837 /* handle TSO and jumbo frames */
3838 if (bytes/packets > 8000)
3839 retval = bulk_latency;
3840 else if ((packets < 5) && (bytes > 512))
3841 retval = low_latency;
3843 case low_latency: /* 50 usec aka 20000 ints/s */
3844 if (bytes > 10000) {
3845 /* this if handles the TSO accounting */
3846 if (bytes/packets > 8000) {
3847 retval = bulk_latency;
3848 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3849 retval = bulk_latency;
3850 } else if ((packets > 35)) {
3851 retval = lowest_latency;
3853 } else if (bytes/packets > 2000) {
3854 retval = bulk_latency;
3855 } else if (packets <= 2 && bytes < 512) {
3856 retval = lowest_latency;
3859 case bulk_latency: /* 250 usec aka 4000 ints/s */
3860 if (bytes > 25000) {
3862 retval = low_latency;
3863 } else if (bytes < 1500) {
3864 retval = low_latency;
3873 static void igb_set_itr(struct igb_adapter *adapter)
3875 struct igb_q_vector *q_vector = adapter->q_vector[0];
3877 u32 new_itr = q_vector->itr_val;
3879 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3880 if (adapter->link_speed != SPEED_1000) {
3886 adapter->rx_itr = igb_update_itr(adapter,
3888 q_vector->rx_ring->total_packets,
3889 q_vector->rx_ring->total_bytes);
3891 adapter->tx_itr = igb_update_itr(adapter,
3893 q_vector->tx_ring->total_packets,
3894 q_vector->tx_ring->total_bytes);
3895 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3897 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3898 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3899 current_itr = low_latency;
3901 switch (current_itr) {
3902 /* counts and packets in update_itr are dependent on these numbers */
3903 case lowest_latency:
3904 new_itr = 56; /* aka 70,000 ints/sec */
3907 new_itr = 196; /* aka 20,000 ints/sec */
3910 new_itr = 980; /* aka 4,000 ints/sec */
3917 q_vector->rx_ring->total_bytes = 0;
3918 q_vector->rx_ring->total_packets = 0;
3919 q_vector->tx_ring->total_bytes = 0;
3920 q_vector->tx_ring->total_packets = 0;
3922 if (new_itr != q_vector->itr_val) {
3923 /* this attempts to bias the interrupt rate towards Bulk
3924 * by adding intermediate steps when interrupt rate is
3926 new_itr = new_itr > q_vector->itr_val ?
3927 max((new_itr * q_vector->itr_val) /
3928 (new_itr + (q_vector->itr_val >> 2)),
3931 /* Don't write the value here; it resets the adapter's
3932 * internal timer, and causes us to delay far longer than
3933 * we should between interrupts. Instead, we write the ITR
3934 * value at the beginning of the next interrupt so the timing
3935 * ends up being correct.
3937 q_vector->itr_val = new_itr;
3938 q_vector->set_itr = 1;
3942 #define IGB_TX_FLAGS_CSUM 0x00000001
3943 #define IGB_TX_FLAGS_VLAN 0x00000002
3944 #define IGB_TX_FLAGS_TSO 0x00000004
3945 #define IGB_TX_FLAGS_IPV4 0x00000008
3946 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3947 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3948 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3950 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3951 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3953 struct e1000_adv_tx_context_desc *context_desc;
3956 struct igb_buffer *buffer_info;
3957 u32 info = 0, tu_cmd = 0;
3961 if (skb_header_cloned(skb)) {
3962 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3967 l4len = tcp_hdrlen(skb);
3970 if (skb->protocol == htons(ETH_P_IP)) {
3971 struct iphdr *iph = ip_hdr(skb);
3974 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3978 } else if (skb_is_gso_v6(skb)) {
3979 ipv6_hdr(skb)->payload_len = 0;
3980 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3981 &ipv6_hdr(skb)->daddr,
3985 i = tx_ring->next_to_use;
3987 buffer_info = &tx_ring->buffer_info[i];
3988 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3989 /* VLAN MACLEN IPLEN */
3990 if (tx_flags & IGB_TX_FLAGS_VLAN)
3991 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3992 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3993 *hdr_len += skb_network_offset(skb);
3994 info |= skb_network_header_len(skb);
3995 *hdr_len += skb_network_header_len(skb);
3996 context_desc->vlan_macip_lens = cpu_to_le32(info);
3998 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3999 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4001 if (skb->protocol == htons(ETH_P_IP))
4002 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4003 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4005 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4008 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4009 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4011 /* For 82575, context index must be unique per ring. */
4012 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4013 mss_l4len_idx |= tx_ring->reg_idx << 4;
4015 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4016 context_desc->seqnum_seed = 0;
4018 buffer_info->time_stamp = jiffies;
4019 buffer_info->next_to_watch = i;
4020 buffer_info->dma = 0;
4022 if (i == tx_ring->count)
4025 tx_ring->next_to_use = i;
4030 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4031 struct sk_buff *skb, u32 tx_flags)
4033 struct e1000_adv_tx_context_desc *context_desc;
4034 struct device *dev = tx_ring->dev;
4035 struct igb_buffer *buffer_info;
4036 u32 info = 0, tu_cmd = 0;
4039 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4040 (tx_flags & IGB_TX_FLAGS_VLAN)) {
4041 i = tx_ring->next_to_use;
4042 buffer_info = &tx_ring->buffer_info[i];
4043 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4045 if (tx_flags & IGB_TX_FLAGS_VLAN)
4046 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4048 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4049 if (skb->ip_summed == CHECKSUM_PARTIAL)
4050 info |= skb_network_header_len(skb);
4052 context_desc->vlan_macip_lens = cpu_to_le32(info);
4054 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4056 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4059 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4060 const struct vlan_ethhdr *vhdr =
4061 (const struct vlan_ethhdr*)skb->data;
4063 protocol = vhdr->h_vlan_encapsulated_proto;
4065 protocol = skb->protocol;
4069 case cpu_to_be16(ETH_P_IP):
4070 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4071 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4072 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4073 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4074 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4076 case cpu_to_be16(ETH_P_IPV6):
4077 /* XXX what about other V6 headers?? */
4078 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4079 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4080 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4081 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4084 if (unlikely(net_ratelimit()))
4086 "partial checksum but proto=%x!\n",
4092 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4093 context_desc->seqnum_seed = 0;
4094 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4095 context_desc->mss_l4len_idx =
4096 cpu_to_le32(tx_ring->reg_idx << 4);
4098 buffer_info->time_stamp = jiffies;
4099 buffer_info->next_to_watch = i;
4100 buffer_info->dma = 0;
4103 if (i == tx_ring->count)
4105 tx_ring->next_to_use = i;
4112 #define IGB_MAX_TXD_PWR 16
4113 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4115 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4118 struct igb_buffer *buffer_info;
4119 struct device *dev = tx_ring->dev;
4120 unsigned int hlen = skb_headlen(skb);
4121 unsigned int count = 0, i;
4123 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4125 i = tx_ring->next_to_use;
4127 buffer_info = &tx_ring->buffer_info[i];
4128 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4129 buffer_info->length = hlen;
4130 /* set time_stamp *before* dma to help avoid a possible race */
4131 buffer_info->time_stamp = jiffies;
4132 buffer_info->next_to_watch = i;
4133 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4135 if (dma_mapping_error(dev, buffer_info->dma))
4138 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4139 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4140 unsigned int len = frag->size;
4144 if (i == tx_ring->count)
4147 buffer_info = &tx_ring->buffer_info[i];
4148 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4149 buffer_info->length = len;
4150 buffer_info->time_stamp = jiffies;
4151 buffer_info->next_to_watch = i;
4152 buffer_info->mapped_as_page = true;
4153 buffer_info->dma = dma_map_page(dev,
4158 if (dma_mapping_error(dev, buffer_info->dma))
4163 tx_ring->buffer_info[i].skb = skb;
4164 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4165 /* multiply data chunks by size of headers */
4166 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4167 tx_ring->buffer_info[i].gso_segs = gso_segs;
4168 tx_ring->buffer_info[first].next_to_watch = i;
4173 dev_err(dev, "TX DMA map failed\n");
4175 /* clear timestamp and dma mappings for failed buffer_info mapping */
4176 buffer_info->dma = 0;
4177 buffer_info->time_stamp = 0;
4178 buffer_info->length = 0;
4179 buffer_info->next_to_watch = 0;
4180 buffer_info->mapped_as_page = false;
4182 /* clear timestamp and dma mappings for remaining portion of packet */
4187 buffer_info = &tx_ring->buffer_info[i];
4188 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4194 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4195 u32 tx_flags, int count, u32 paylen,
4198 union e1000_adv_tx_desc *tx_desc;
4199 struct igb_buffer *buffer_info;
4200 u32 olinfo_status = 0, cmd_type_len;
4201 unsigned int i = tx_ring->next_to_use;
4203 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4204 E1000_ADVTXD_DCMD_DEXT);
4206 if (tx_flags & IGB_TX_FLAGS_VLAN)
4207 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4209 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4210 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4212 if (tx_flags & IGB_TX_FLAGS_TSO) {
4213 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4215 /* insert tcp checksum */
4216 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4218 /* insert ip checksum */
4219 if (tx_flags & IGB_TX_FLAGS_IPV4)
4220 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4222 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4223 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4226 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4227 (tx_flags & (IGB_TX_FLAGS_CSUM |
4229 IGB_TX_FLAGS_VLAN)))
4230 olinfo_status |= tx_ring->reg_idx << 4;
4232 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4235 buffer_info = &tx_ring->buffer_info[i];
4236 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4237 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4238 tx_desc->read.cmd_type_len =
4239 cpu_to_le32(cmd_type_len | buffer_info->length);
4240 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4243 if (i == tx_ring->count)
4245 } while (count > 0);
4247 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4248 /* Force memory writes to complete before letting h/w
4249 * know there are new descriptors to fetch. (Only
4250 * applicable for weak-ordered memory model archs,
4251 * such as IA-64). */
4254 tx_ring->next_to_use = i;
4255 writel(i, tx_ring->tail);
4256 /* we need this if more than one processor can write to our tail
4257 * at a time, it syncronizes IO on IA64/Altix systems */
4261 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4263 struct net_device *netdev = tx_ring->netdev;
4265 netif_stop_subqueue(netdev, tx_ring->queue_index);
4267 /* Herbert's original patch had:
4268 * smp_mb__after_netif_stop_queue();
4269 * but since that doesn't exist yet, just open code it. */
4272 /* We need to check again in a case another CPU has just
4273 * made room available. */
4274 if (igb_desc_unused(tx_ring) < size)
4278 netif_wake_subqueue(netdev, tx_ring->queue_index);
4280 u64_stats_update_begin(&tx_ring->tx_syncp2);
4281 tx_ring->tx_stats.restart_queue2++;
4282 u64_stats_update_end(&tx_ring->tx_syncp2);
4287 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4289 if (igb_desc_unused(tx_ring) >= size)
4291 return __igb_maybe_stop_tx(tx_ring, size);
4294 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4295 struct igb_ring *tx_ring)
4302 /* need: 1 descriptor per page,
4303 * + 2 desc gap to keep tail from touching head,
4304 * + 1 desc for skb->data,
4305 * + 1 desc for context descriptor,
4306 * otherwise try next time */
4307 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4308 /* this is a hard error */
4309 return NETDEV_TX_BUSY;
4312 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4313 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4314 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4317 if (vlan_tx_tag_present(skb)) {
4318 tx_flags |= IGB_TX_FLAGS_VLAN;
4319 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4322 if (skb->protocol == htons(ETH_P_IP))
4323 tx_flags |= IGB_TX_FLAGS_IPV4;
4325 first = tx_ring->next_to_use;
4326 if (skb_is_gso(skb)) {
4327 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4330 dev_kfree_skb_any(skb);
4331 return NETDEV_TX_OK;
4336 tx_flags |= IGB_TX_FLAGS_TSO;
4337 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4338 (skb->ip_summed == CHECKSUM_PARTIAL))
4339 tx_flags |= IGB_TX_FLAGS_CSUM;
4342 * count reflects descriptors mapped, if 0 or less then mapping error
4343 * has occurred and we need to rewind the descriptor queue
4345 count = igb_tx_map_adv(tx_ring, skb, first);
4347 dev_kfree_skb_any(skb);
4348 tx_ring->buffer_info[first].time_stamp = 0;
4349 tx_ring->next_to_use = first;
4350 return NETDEV_TX_OK;
4353 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4355 /* Make sure there is space in the ring for the next send. */
4356 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4358 return NETDEV_TX_OK;
4361 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4362 struct net_device *netdev)
4364 struct igb_adapter *adapter = netdev_priv(netdev);
4365 struct igb_ring *tx_ring;
4368 if (test_bit(__IGB_DOWN, &adapter->state)) {
4369 dev_kfree_skb_any(skb);
4370 return NETDEV_TX_OK;
4373 if (skb->len <= 0) {
4374 dev_kfree_skb_any(skb);
4375 return NETDEV_TX_OK;
4378 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4379 tx_ring = adapter->multi_tx_table[r_idx];
4381 /* This goes back to the question of how to logically map a tx queue
4382 * to a flow. Right now, performance is impacted slightly negatively
4383 * if using multiple tx queues. If the stack breaks away from a
4384 * single qdisc implementation, we can look at this again. */
4385 return igb_xmit_frame_ring_adv(skb, tx_ring);
4389 * igb_tx_timeout - Respond to a Tx Hang
4390 * @netdev: network interface device structure
4392 static void igb_tx_timeout(struct net_device *netdev)
4394 struct igb_adapter *adapter = netdev_priv(netdev);
4395 struct e1000_hw *hw = &adapter->hw;
4397 /* Do the reset outside of interrupt context */
4398 adapter->tx_timeout_count++;
4400 if (hw->mac.type == e1000_82580)
4401 hw->dev_spec._82575.global_device_reset = true;
4403 schedule_work(&adapter->reset_task);
4405 (adapter->eims_enable_mask & ~adapter->eims_other));
4408 static void igb_reset_task(struct work_struct *work)
4410 struct igb_adapter *adapter;
4411 adapter = container_of(work, struct igb_adapter, reset_task);
4414 netdev_err(adapter->netdev, "Reset adapter\n");
4415 igb_reinit_locked(adapter);
4419 * igb_get_stats64 - Get System Network Statistics
4420 * @netdev: network interface device structure
4421 * @stats: rtnl_link_stats64 pointer
4424 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4425 struct rtnl_link_stats64 *stats)
4427 struct igb_adapter *adapter = netdev_priv(netdev);
4429 spin_lock(&adapter->stats64_lock);
4430 igb_update_stats(adapter, &adapter->stats64);
4431 memcpy(stats, &adapter->stats64, sizeof(*stats));
4432 spin_unlock(&adapter->stats64_lock);
4438 * igb_change_mtu - Change the Maximum Transfer Unit
4439 * @netdev: network interface device structure
4440 * @new_mtu: new value for maximum frame size
4442 * Returns 0 on success, negative on failure
4444 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4446 struct igb_adapter *adapter = netdev_priv(netdev);
4447 struct pci_dev *pdev = adapter->pdev;
4448 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4449 u32 rx_buffer_len, i;
4451 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4452 dev_err(&pdev->dev, "Invalid MTU setting\n");
4456 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4457 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4461 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4464 /* igb_down has a dependency on max_frame_size */
4465 adapter->max_frame_size = max_frame;
4467 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4468 * means we reserve 2 more, this pushes us to allocate from the next
4470 * i.e. RXBUFFER_2048 --> size-4096 slab
4473 if (adapter->hw.mac.type == e1000_82580)
4474 max_frame += IGB_TS_HDR_LEN;
4476 if (max_frame <= IGB_RXBUFFER_1024)
4477 rx_buffer_len = IGB_RXBUFFER_1024;
4478 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4479 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4481 rx_buffer_len = IGB_RXBUFFER_128;
4483 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4484 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4485 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4487 if ((adapter->hw.mac.type == e1000_82580) &&
4488 (rx_buffer_len == IGB_RXBUFFER_128))
4489 rx_buffer_len += IGB_RXBUFFER_64;
4491 if (netif_running(netdev))
4494 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4495 netdev->mtu, new_mtu);
4496 netdev->mtu = new_mtu;
4498 for (i = 0; i < adapter->num_rx_queues; i++)
4499 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4501 if (netif_running(netdev))
4506 clear_bit(__IGB_RESETTING, &adapter->state);
4512 * igb_update_stats - Update the board statistics counters
4513 * @adapter: board private structure
4516 void igb_update_stats(struct igb_adapter *adapter,
4517 struct rtnl_link_stats64 *net_stats)
4519 struct e1000_hw *hw = &adapter->hw;
4520 struct pci_dev *pdev = adapter->pdev;
4526 u64 _bytes, _packets;
4528 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4531 * Prevent stats update while adapter is being reset, or if the pci
4532 * connection is down.
4534 if (adapter->link_speed == 0)
4536 if (pci_channel_offline(pdev))
4541 for (i = 0; i < adapter->num_rx_queues; i++) {
4542 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4543 struct igb_ring *ring = adapter->rx_ring[i];
4545 ring->rx_stats.drops += rqdpc_tmp;
4546 net_stats->rx_fifo_errors += rqdpc_tmp;
4549 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4550 _bytes = ring->rx_stats.bytes;
4551 _packets = ring->rx_stats.packets;
4552 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4554 packets += _packets;
4557 net_stats->rx_bytes = bytes;
4558 net_stats->rx_packets = packets;
4562 for (i = 0; i < adapter->num_tx_queues; i++) {
4563 struct igb_ring *ring = adapter->tx_ring[i];
4565 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4566 _bytes = ring->tx_stats.bytes;
4567 _packets = ring->tx_stats.packets;
4568 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4570 packets += _packets;
4572 net_stats->tx_bytes = bytes;
4573 net_stats->tx_packets = packets;
4575 /* read stats registers */
4576 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4577 adapter->stats.gprc += rd32(E1000_GPRC);
4578 adapter->stats.gorc += rd32(E1000_GORCL);
4579 rd32(E1000_GORCH); /* clear GORCL */
4580 adapter->stats.bprc += rd32(E1000_BPRC);
4581 adapter->stats.mprc += rd32(E1000_MPRC);
4582 adapter->stats.roc += rd32(E1000_ROC);
4584 adapter->stats.prc64 += rd32(E1000_PRC64);
4585 adapter->stats.prc127 += rd32(E1000_PRC127);
4586 adapter->stats.prc255 += rd32(E1000_PRC255);
4587 adapter->stats.prc511 += rd32(E1000_PRC511);
4588 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4589 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4590 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4591 adapter->stats.sec += rd32(E1000_SEC);
4593 mpc = rd32(E1000_MPC);
4594 adapter->stats.mpc += mpc;
4595 net_stats->rx_fifo_errors += mpc;
4596 adapter->stats.scc += rd32(E1000_SCC);
4597 adapter->stats.ecol += rd32(E1000_ECOL);
4598 adapter->stats.mcc += rd32(E1000_MCC);
4599 adapter->stats.latecol += rd32(E1000_LATECOL);
4600 adapter->stats.dc += rd32(E1000_DC);
4601 adapter->stats.rlec += rd32(E1000_RLEC);
4602 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4603 adapter->stats.xontxc += rd32(E1000_XONTXC);
4604 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4605 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4606 adapter->stats.fcruc += rd32(E1000_FCRUC);
4607 adapter->stats.gptc += rd32(E1000_GPTC);
4608 adapter->stats.gotc += rd32(E1000_GOTCL);
4609 rd32(E1000_GOTCH); /* clear GOTCL */
4610 adapter->stats.rnbc += rd32(E1000_RNBC);
4611 adapter->stats.ruc += rd32(E1000_RUC);
4612 adapter->stats.rfc += rd32(E1000_RFC);
4613 adapter->stats.rjc += rd32(E1000_RJC);
4614 adapter->stats.tor += rd32(E1000_TORH);
4615 adapter->stats.tot += rd32(E1000_TOTH);
4616 adapter->stats.tpr += rd32(E1000_TPR);
4618 adapter->stats.ptc64 += rd32(E1000_PTC64);
4619 adapter->stats.ptc127 += rd32(E1000_PTC127);
4620 adapter->stats.ptc255 += rd32(E1000_PTC255);
4621 adapter->stats.ptc511 += rd32(E1000_PTC511);
4622 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4623 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4625 adapter->stats.mptc += rd32(E1000_MPTC);
4626 adapter->stats.bptc += rd32(E1000_BPTC);
4628 adapter->stats.tpt += rd32(E1000_TPT);
4629 adapter->stats.colc += rd32(E1000_COLC);
4631 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4632 /* read internal phy specific stats */
4633 reg = rd32(E1000_CTRL_EXT);
4634 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4635 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4636 adapter->stats.tncrs += rd32(E1000_TNCRS);
4639 adapter->stats.tsctc += rd32(E1000_TSCTC);
4640 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4642 adapter->stats.iac += rd32(E1000_IAC);
4643 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4644 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4645 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4646 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4647 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4648 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4649 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4650 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4652 /* Fill out the OS statistics structure */
4653 net_stats->multicast = adapter->stats.mprc;
4654 net_stats->collisions = adapter->stats.colc;
4658 /* RLEC on some newer hardware can be incorrect so build
4659 * our own version based on RUC and ROC */
4660 net_stats->rx_errors = adapter->stats.rxerrc +
4661 adapter->stats.crcerrs + adapter->stats.algnerrc +
4662 adapter->stats.ruc + adapter->stats.roc +
4663 adapter->stats.cexterr;
4664 net_stats->rx_length_errors = adapter->stats.ruc +
4666 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4667 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4668 net_stats->rx_missed_errors = adapter->stats.mpc;
4671 net_stats->tx_errors = adapter->stats.ecol +
4672 adapter->stats.latecol;
4673 net_stats->tx_aborted_errors = adapter->stats.ecol;
4674 net_stats->tx_window_errors = adapter->stats.latecol;
4675 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4677 /* Tx Dropped needs to be maintained elsewhere */
4680 if (hw->phy.media_type == e1000_media_type_copper) {
4681 if ((adapter->link_speed == SPEED_1000) &&
4682 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4683 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4684 adapter->phy_stats.idle_errors += phy_tmp;
4688 /* Management Stats */
4689 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4690 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4691 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4694 reg = rd32(E1000_MANC);
4695 if (reg & E1000_MANC_EN_BMC2OS) {
4696 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4697 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4698 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4699 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4703 static irqreturn_t igb_msix_other(int irq, void *data)
4705 struct igb_adapter *adapter = data;
4706 struct e1000_hw *hw = &adapter->hw;
4707 u32 icr = rd32(E1000_ICR);
4708 /* reading ICR causes bit 31 of EICR to be cleared */
4710 if (icr & E1000_ICR_DRSTA)
4711 schedule_work(&adapter->reset_task);
4713 if (icr & E1000_ICR_DOUTSYNC) {
4714 /* HW is reporting DMA is out of sync */
4715 adapter->stats.doosync++;
4716 /* The DMA Out of Sync is also indication of a spoof event
4717 * in IOV mode. Check the Wrong VM Behavior register to
4718 * see if it is really a spoof event. */
4719 igb_check_wvbr(adapter);
4722 /* Check for a mailbox event */
4723 if (icr & E1000_ICR_VMMB)
4724 igb_msg_task(adapter);
4726 if (icr & E1000_ICR_LSC) {
4727 hw->mac.get_link_status = 1;
4728 /* guard against interrupt when we're going down */
4729 if (!test_bit(__IGB_DOWN, &adapter->state))
4730 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4733 if (adapter->vfs_allocated_count)
4734 wr32(E1000_IMS, E1000_IMS_LSC |
4736 E1000_IMS_DOUTSYNC);
4738 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4739 wr32(E1000_EIMS, adapter->eims_other);
4744 static void igb_write_itr(struct igb_q_vector *q_vector)
4746 struct igb_adapter *adapter = q_vector->adapter;
4747 u32 itr_val = q_vector->itr_val & 0x7FFC;
4749 if (!q_vector->set_itr)
4755 if (adapter->hw.mac.type == e1000_82575)
4756 itr_val |= itr_val << 16;
4758 itr_val |= 0x8000000;
4760 writel(itr_val, q_vector->itr_register);
4761 q_vector->set_itr = 0;
4764 static irqreturn_t igb_msix_ring(int irq, void *data)
4766 struct igb_q_vector *q_vector = data;
4768 /* Write the ITR value calculated from the previous interrupt. */
4769 igb_write_itr(q_vector);
4771 napi_schedule(&q_vector->napi);
4776 #ifdef CONFIG_IGB_DCA
4777 static void igb_update_dca(struct igb_q_vector *q_vector)
4779 struct igb_adapter *adapter = q_vector->adapter;
4780 struct e1000_hw *hw = &adapter->hw;
4781 int cpu = get_cpu();
4783 if (q_vector->cpu == cpu)
4786 if (q_vector->tx_ring) {
4787 int q = q_vector->tx_ring->reg_idx;
4788 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4789 if (hw->mac.type == e1000_82575) {
4790 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4791 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4793 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4794 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4795 E1000_DCA_TXCTRL_CPUID_SHIFT;
4797 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4798 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4800 if (q_vector->rx_ring) {
4801 int q = q_vector->rx_ring->reg_idx;
4802 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4803 if (hw->mac.type == e1000_82575) {
4804 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4805 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4807 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4808 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4809 E1000_DCA_RXCTRL_CPUID_SHIFT;
4811 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4812 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4813 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4814 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4816 q_vector->cpu = cpu;
4821 static void igb_setup_dca(struct igb_adapter *adapter)
4823 struct e1000_hw *hw = &adapter->hw;
4826 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4829 /* Always use CB2 mode, difference is masked in the CB driver. */
4830 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4832 for (i = 0; i < adapter->num_q_vectors; i++) {
4833 adapter->q_vector[i]->cpu = -1;
4834 igb_update_dca(adapter->q_vector[i]);
4838 static int __igb_notify_dca(struct device *dev, void *data)
4840 struct net_device *netdev = dev_get_drvdata(dev);
4841 struct igb_adapter *adapter = netdev_priv(netdev);
4842 struct pci_dev *pdev = adapter->pdev;
4843 struct e1000_hw *hw = &adapter->hw;
4844 unsigned long event = *(unsigned long *)data;
4847 case DCA_PROVIDER_ADD:
4848 /* if already enabled, don't do it again */
4849 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4851 if (dca_add_requester(dev) == 0) {
4852 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4853 dev_info(&pdev->dev, "DCA enabled\n");
4854 igb_setup_dca(adapter);
4857 /* Fall Through since DCA is disabled. */
4858 case DCA_PROVIDER_REMOVE:
4859 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4860 /* without this a class_device is left
4861 * hanging around in the sysfs model */
4862 dca_remove_requester(dev);
4863 dev_info(&pdev->dev, "DCA disabled\n");
4864 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4865 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4873 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4878 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4881 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4883 #endif /* CONFIG_IGB_DCA */
4885 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4887 struct e1000_hw *hw = &adapter->hw;
4891 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4892 ping = E1000_PF_CONTROL_MSG;
4893 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4894 ping |= E1000_VT_MSGTYPE_CTS;
4895 igb_write_mbx(hw, &ping, 1, i);
4899 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4901 struct e1000_hw *hw = &adapter->hw;
4902 u32 vmolr = rd32(E1000_VMOLR(vf));
4903 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4905 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4906 IGB_VF_FLAG_MULTI_PROMISC);
4907 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4909 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4910 vmolr |= E1000_VMOLR_MPME;
4911 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4912 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4915 * if we have hashes and we are clearing a multicast promisc
4916 * flag we need to write the hashes to the MTA as this step
4917 * was previously skipped
4919 if (vf_data->num_vf_mc_hashes > 30) {
4920 vmolr |= E1000_VMOLR_MPME;
4921 } else if (vf_data->num_vf_mc_hashes) {
4923 vmolr |= E1000_VMOLR_ROMPE;
4924 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4925 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4929 wr32(E1000_VMOLR(vf), vmolr);
4931 /* there are flags left unprocessed, likely not supported */
4932 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4939 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4940 u32 *msgbuf, u32 vf)
4942 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4943 u16 *hash_list = (u16 *)&msgbuf[1];
4944 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4947 /* salt away the number of multicast addresses assigned
4948 * to this VF for later use to restore when the PF multi cast
4951 vf_data->num_vf_mc_hashes = n;
4953 /* only up to 30 hash values supported */
4957 /* store the hashes for later use */
4958 for (i = 0; i < n; i++)
4959 vf_data->vf_mc_hashes[i] = hash_list[i];
4961 /* Flush and reset the mta with the new values */
4962 igb_set_rx_mode(adapter->netdev);
4967 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4969 struct e1000_hw *hw = &adapter->hw;
4970 struct vf_data_storage *vf_data;
4973 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4974 u32 vmolr = rd32(E1000_VMOLR(i));
4975 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4977 vf_data = &adapter->vf_data[i];
4979 if ((vf_data->num_vf_mc_hashes > 30) ||
4980 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4981 vmolr |= E1000_VMOLR_MPME;
4982 } else if (vf_data->num_vf_mc_hashes) {
4983 vmolr |= E1000_VMOLR_ROMPE;
4984 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4985 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4987 wr32(E1000_VMOLR(i), vmolr);
4991 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4993 struct e1000_hw *hw = &adapter->hw;
4994 u32 pool_mask, reg, vid;
4997 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4999 /* Find the vlan filter for this id */
5000 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5001 reg = rd32(E1000_VLVF(i));
5003 /* remove the vf from the pool */
5006 /* if pool is empty then remove entry from vfta */
5007 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5008 (reg & E1000_VLVF_VLANID_ENABLE)) {
5010 vid = reg & E1000_VLVF_VLANID_MASK;
5011 igb_vfta_set(hw, vid, false);
5014 wr32(E1000_VLVF(i), reg);
5017 adapter->vf_data[vf].vlans_enabled = 0;
5020 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5022 struct e1000_hw *hw = &adapter->hw;
5025 /* The vlvf table only exists on 82576 hardware and newer */
5026 if (hw->mac.type < e1000_82576)
5029 /* we only need to do this if VMDq is enabled */
5030 if (!adapter->vfs_allocated_count)
5033 /* Find the vlan filter for this id */
5034 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5035 reg = rd32(E1000_VLVF(i));
5036 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5037 vid == (reg & E1000_VLVF_VLANID_MASK))
5042 if (i == E1000_VLVF_ARRAY_SIZE) {
5043 /* Did not find a matching VLAN ID entry that was
5044 * enabled. Search for a free filter entry, i.e.
5045 * one without the enable bit set
5047 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5048 reg = rd32(E1000_VLVF(i));
5049 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5053 if (i < E1000_VLVF_ARRAY_SIZE) {
5054 /* Found an enabled/available entry */
5055 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5057 /* if !enabled we need to set this up in vfta */
5058 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5059 /* add VID to filter table */
5060 igb_vfta_set(hw, vid, true);
5061 reg |= E1000_VLVF_VLANID_ENABLE;
5063 reg &= ~E1000_VLVF_VLANID_MASK;
5065 wr32(E1000_VLVF(i), reg);
5067 /* do not modify RLPML for PF devices */
5068 if (vf >= adapter->vfs_allocated_count)
5071 if (!adapter->vf_data[vf].vlans_enabled) {
5073 reg = rd32(E1000_VMOLR(vf));
5074 size = reg & E1000_VMOLR_RLPML_MASK;
5076 reg &= ~E1000_VMOLR_RLPML_MASK;
5078 wr32(E1000_VMOLR(vf), reg);
5081 adapter->vf_data[vf].vlans_enabled++;
5085 if (i < E1000_VLVF_ARRAY_SIZE) {
5086 /* remove vf from the pool */
5087 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5088 /* if pool is empty then remove entry from vfta */
5089 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5091 igb_vfta_set(hw, vid, false);
5093 wr32(E1000_VLVF(i), reg);
5095 /* do not modify RLPML for PF devices */
5096 if (vf >= adapter->vfs_allocated_count)
5099 adapter->vf_data[vf].vlans_enabled--;
5100 if (!adapter->vf_data[vf].vlans_enabled) {
5102 reg = rd32(E1000_VMOLR(vf));
5103 size = reg & E1000_VMOLR_RLPML_MASK;
5105 reg &= ~E1000_VMOLR_RLPML_MASK;
5107 wr32(E1000_VMOLR(vf), reg);
5114 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5116 struct e1000_hw *hw = &adapter->hw;
5119 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5121 wr32(E1000_VMVIR(vf), 0);
5124 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5125 int vf, u16 vlan, u8 qos)
5128 struct igb_adapter *adapter = netdev_priv(netdev);
5130 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5133 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5136 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5137 igb_set_vmolr(adapter, vf, !vlan);
5138 adapter->vf_data[vf].pf_vlan = vlan;
5139 adapter->vf_data[vf].pf_qos = qos;
5140 dev_info(&adapter->pdev->dev,
5141 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5142 if (test_bit(__IGB_DOWN, &adapter->state)) {
5143 dev_warn(&adapter->pdev->dev,
5144 "The VF VLAN has been set,"
5145 " but the PF device is not up.\n");
5146 dev_warn(&adapter->pdev->dev,
5147 "Bring the PF device up before"
5148 " attempting to use the VF device.\n");
5151 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5153 igb_set_vmvir(adapter, vlan, vf);
5154 igb_set_vmolr(adapter, vf, true);
5155 adapter->vf_data[vf].pf_vlan = 0;
5156 adapter->vf_data[vf].pf_qos = 0;
5162 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5164 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5165 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5167 return igb_vlvf_set(adapter, vid, add, vf);
5170 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5172 /* clear flags - except flag that indicates PF has set the MAC */
5173 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5174 adapter->vf_data[vf].last_nack = jiffies;
5176 /* reset offloads to defaults */
5177 igb_set_vmolr(adapter, vf, true);
5179 /* reset vlans for device */
5180 igb_clear_vf_vfta(adapter, vf);
5181 if (adapter->vf_data[vf].pf_vlan)
5182 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5183 adapter->vf_data[vf].pf_vlan,
5184 adapter->vf_data[vf].pf_qos);
5186 igb_clear_vf_vfta(adapter, vf);
5188 /* reset multicast table array for vf */
5189 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5191 /* Flush and reset the mta with the new values */
5192 igb_set_rx_mode(adapter->netdev);
5195 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5197 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5199 /* generate a new mac address as we were hotplug removed/added */
5200 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5201 random_ether_addr(vf_mac);
5203 /* process remaining reset events */
5204 igb_vf_reset(adapter, vf);
5207 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5209 struct e1000_hw *hw = &adapter->hw;
5210 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5211 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5213 u8 *addr = (u8 *)(&msgbuf[1]);
5215 /* process all the same items cleared in a function level reset */
5216 igb_vf_reset(adapter, vf);
5218 /* set vf mac address */
5219 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5221 /* enable transmit and receive for vf */
5222 reg = rd32(E1000_VFTE);
5223 wr32(E1000_VFTE, reg | (1 << vf));
5224 reg = rd32(E1000_VFRE);
5225 wr32(E1000_VFRE, reg | (1 << vf));
5227 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5229 /* reply to reset with ack and vf mac address */
5230 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5231 memcpy(addr, vf_mac, 6);
5232 igb_write_mbx(hw, msgbuf, 3, vf);
5235 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5238 * The VF MAC Address is stored in a packed array of bytes
5239 * starting at the second 32 bit word of the msg array
5241 unsigned char *addr = (char *)&msg[1];
5244 if (is_valid_ether_addr(addr))
5245 err = igb_set_vf_mac(adapter, vf, addr);
5250 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5252 struct e1000_hw *hw = &adapter->hw;
5253 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5254 u32 msg = E1000_VT_MSGTYPE_NACK;
5256 /* if device isn't clear to send it shouldn't be reading either */
5257 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5258 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5259 igb_write_mbx(hw, &msg, 1, vf);
5260 vf_data->last_nack = jiffies;
5264 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5266 struct pci_dev *pdev = adapter->pdev;
5267 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5268 struct e1000_hw *hw = &adapter->hw;
5269 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5272 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5275 /* if receive failed revoke VF CTS stats and restart init */
5276 dev_err(&pdev->dev, "Error receiving message from VF\n");
5277 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5278 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5283 /* this is a message we already processed, do nothing */
5284 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5288 * until the vf completes a reset it should not be
5289 * allowed to start any configuration.
5292 if (msgbuf[0] == E1000_VF_RESET) {
5293 igb_vf_reset_msg(adapter, vf);
5297 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5298 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5304 switch ((msgbuf[0] & 0xFFFF)) {
5305 case E1000_VF_SET_MAC_ADDR:
5307 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5308 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5310 dev_warn(&pdev->dev,
5311 "VF %d attempted to override administratively "
5312 "set MAC address\nReload the VF driver to "
5313 "resume operations\n", vf);
5315 case E1000_VF_SET_PROMISC:
5316 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5318 case E1000_VF_SET_MULTICAST:
5319 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5321 case E1000_VF_SET_LPE:
5322 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5324 case E1000_VF_SET_VLAN:
5326 if (vf_data->pf_vlan)
5327 dev_warn(&pdev->dev,
5328 "VF %d attempted to override administratively "
5329 "set VLAN tag\nReload the VF driver to "
5330 "resume operations\n", vf);
5332 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5335 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5340 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5342 /* notify the VF of the results of what it sent us */
5344 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5346 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5348 igb_write_mbx(hw, msgbuf, 1, vf);
5351 static void igb_msg_task(struct igb_adapter *adapter)
5353 struct e1000_hw *hw = &adapter->hw;
5356 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5357 /* process any reset requests */
5358 if (!igb_check_for_rst(hw, vf))
5359 igb_vf_reset_event(adapter, vf);
5361 /* process any messages pending */
5362 if (!igb_check_for_msg(hw, vf))
5363 igb_rcv_msg_from_vf(adapter, vf);
5365 /* process any acks */
5366 if (!igb_check_for_ack(hw, vf))
5367 igb_rcv_ack_from_vf(adapter, vf);
5372 * igb_set_uta - Set unicast filter table address
5373 * @adapter: board private structure
5375 * The unicast table address is a register array of 32-bit registers.
5376 * The table is meant to be used in a way similar to how the MTA is used
5377 * however due to certain limitations in the hardware it is necessary to
5378 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5379 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5381 static void igb_set_uta(struct igb_adapter *adapter)
5383 struct e1000_hw *hw = &adapter->hw;
5386 /* The UTA table only exists on 82576 hardware and newer */
5387 if (hw->mac.type < e1000_82576)
5390 /* we only need to do this if VMDq is enabled */
5391 if (!adapter->vfs_allocated_count)
5394 for (i = 0; i < hw->mac.uta_reg_count; i++)
5395 array_wr32(E1000_UTA, i, ~0);
5399 * igb_intr_msi - Interrupt Handler
5400 * @irq: interrupt number
5401 * @data: pointer to a network interface device structure
5403 static irqreturn_t igb_intr_msi(int irq, void *data)
5405 struct igb_adapter *adapter = data;
5406 struct igb_q_vector *q_vector = adapter->q_vector[0];
5407 struct e1000_hw *hw = &adapter->hw;
5408 /* read ICR disables interrupts using IAM */
5409 u32 icr = rd32(E1000_ICR);
5411 igb_write_itr(q_vector);
5413 if (icr & E1000_ICR_DRSTA)
5414 schedule_work(&adapter->reset_task);
5416 if (icr & E1000_ICR_DOUTSYNC) {
5417 /* HW is reporting DMA is out of sync */
5418 adapter->stats.doosync++;
5421 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5422 hw->mac.get_link_status = 1;
5423 if (!test_bit(__IGB_DOWN, &adapter->state))
5424 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5427 napi_schedule(&q_vector->napi);
5433 * igb_intr - Legacy Interrupt Handler
5434 * @irq: interrupt number
5435 * @data: pointer to a network interface device structure
5437 static irqreturn_t igb_intr(int irq, void *data)
5439 struct igb_adapter *adapter = data;
5440 struct igb_q_vector *q_vector = adapter->q_vector[0];
5441 struct e1000_hw *hw = &adapter->hw;
5442 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5443 * need for the IMC write */
5444 u32 icr = rd32(E1000_ICR);
5446 return IRQ_NONE; /* Not our interrupt */
5448 igb_write_itr(q_vector);
5450 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5451 * not set, then the adapter didn't send an interrupt */
5452 if (!(icr & E1000_ICR_INT_ASSERTED))
5455 if (icr & E1000_ICR_DRSTA)
5456 schedule_work(&adapter->reset_task);
5458 if (icr & E1000_ICR_DOUTSYNC) {
5459 /* HW is reporting DMA is out of sync */
5460 adapter->stats.doosync++;
5463 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5464 hw->mac.get_link_status = 1;
5465 /* guard against interrupt when we're going down */
5466 if (!test_bit(__IGB_DOWN, &adapter->state))
5467 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5470 napi_schedule(&q_vector->napi);
5475 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5477 struct igb_adapter *adapter = q_vector->adapter;
5478 struct e1000_hw *hw = &adapter->hw;
5480 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5481 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5482 if (!adapter->msix_entries)
5483 igb_set_itr(adapter);
5485 igb_update_ring_itr(q_vector);
5488 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5489 if (adapter->msix_entries)
5490 wr32(E1000_EIMS, q_vector->eims_value);
5492 igb_irq_enable(adapter);
5497 * igb_poll - NAPI Rx polling callback
5498 * @napi: napi polling structure
5499 * @budget: count of how many packets we should handle
5501 static int igb_poll(struct napi_struct *napi, int budget)
5503 struct igb_q_vector *q_vector = container_of(napi,
5504 struct igb_q_vector,
5506 int tx_clean_complete = 1, work_done = 0;
5508 #ifdef CONFIG_IGB_DCA
5509 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5510 igb_update_dca(q_vector);
5512 if (q_vector->tx_ring)
5513 tx_clean_complete = igb_clean_tx_irq(q_vector);
5515 if (q_vector->rx_ring)
5516 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5518 if (!tx_clean_complete)
5521 /* If not enough Rx work done, exit the polling mode */
5522 if (work_done < budget) {
5523 napi_complete(napi);
5524 igb_ring_irq_enable(q_vector);
5531 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5532 * @adapter: board private structure
5533 * @shhwtstamps: timestamp structure to update
5534 * @regval: unsigned 64bit system time value.
5536 * We need to convert the system time value stored in the RX/TXSTMP registers
5537 * into a hwtstamp which can be used by the upper level timestamping functions
5539 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5540 struct skb_shared_hwtstamps *shhwtstamps,
5546 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5547 * 24 to match clock shift we setup earlier.
5549 if (adapter->hw.mac.type == e1000_82580)
5550 regval <<= IGB_82580_TSYNC_SHIFT;
5552 ns = timecounter_cyc2time(&adapter->clock, regval);
5553 timecompare_update(&adapter->compare, ns);
5554 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5555 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5556 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5560 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5561 * @q_vector: pointer to q_vector containing needed info
5562 * @buffer: pointer to igb_buffer structure
5564 * If we were asked to do hardware stamping and such a time stamp is
5565 * available, then it must have been for this skb here because we only
5566 * allow only one such packet into the queue.
5568 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5570 struct igb_adapter *adapter = q_vector->adapter;
5571 struct e1000_hw *hw = &adapter->hw;
5572 struct skb_shared_hwtstamps shhwtstamps;
5575 /* if skb does not support hw timestamp or TX stamp not valid exit */
5576 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5577 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5580 regval = rd32(E1000_TXSTMPL);
5581 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5583 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5584 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5588 * igb_clean_tx_irq - Reclaim resources after transmit completes
5589 * @q_vector: pointer to q_vector containing needed info
5590 * returns true if ring is completely cleaned
5592 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5594 struct igb_adapter *adapter = q_vector->adapter;
5595 struct igb_ring *tx_ring = q_vector->tx_ring;
5596 struct net_device *netdev = tx_ring->netdev;
5597 struct e1000_hw *hw = &adapter->hw;
5598 struct igb_buffer *buffer_info;
5599 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5600 unsigned int total_bytes = 0, total_packets = 0;
5601 unsigned int i, eop, count = 0;
5602 bool cleaned = false;
5604 i = tx_ring->next_to_clean;
5605 eop = tx_ring->buffer_info[i].next_to_watch;
5606 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5608 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5609 (count < tx_ring->count)) {
5610 rmb(); /* read buffer_info after eop_desc status */
5611 for (cleaned = false; !cleaned; count++) {
5612 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5613 buffer_info = &tx_ring->buffer_info[i];
5614 cleaned = (i == eop);
5616 if (buffer_info->skb) {
5617 total_bytes += buffer_info->bytecount;
5618 /* gso_segs is currently only valid for tcp */
5619 total_packets += buffer_info->gso_segs;
5620 igb_tx_hwtstamp(q_vector, buffer_info);
5623 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5624 tx_desc->wb.status = 0;
5627 if (i == tx_ring->count)
5630 eop = tx_ring->buffer_info[i].next_to_watch;
5631 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5634 tx_ring->next_to_clean = i;
5636 if (unlikely(count &&
5637 netif_carrier_ok(netdev) &&
5638 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5639 /* Make sure that anybody stopping the queue after this
5640 * sees the new next_to_clean.
5643 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5644 !(test_bit(__IGB_DOWN, &adapter->state))) {
5645 netif_wake_subqueue(netdev, tx_ring->queue_index);
5647 u64_stats_update_begin(&tx_ring->tx_syncp);
5648 tx_ring->tx_stats.restart_queue++;
5649 u64_stats_update_end(&tx_ring->tx_syncp);
5653 if (tx_ring->detect_tx_hung) {
5654 /* Detect a transmit hang in hardware, this serializes the
5655 * check with the clearing of time_stamp and movement of i */
5656 tx_ring->detect_tx_hung = false;
5657 if (tx_ring->buffer_info[i].time_stamp &&
5658 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5659 (adapter->tx_timeout_factor * HZ)) &&
5660 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5662 /* detected Tx unit hang */
5663 dev_err(tx_ring->dev,
5664 "Detected Tx Unit Hang\n"
5668 " next_to_use <%x>\n"
5669 " next_to_clean <%x>\n"
5670 "buffer_info[next_to_clean]\n"
5671 " time_stamp <%lx>\n"
5672 " next_to_watch <%x>\n"
5674 " desc.status <%x>\n",
5675 tx_ring->queue_index,
5676 readl(tx_ring->head),
5677 readl(tx_ring->tail),
5678 tx_ring->next_to_use,
5679 tx_ring->next_to_clean,
5680 tx_ring->buffer_info[eop].time_stamp,
5683 eop_desc->wb.status);
5684 netif_stop_subqueue(netdev, tx_ring->queue_index);
5687 tx_ring->total_bytes += total_bytes;
5688 tx_ring->total_packets += total_packets;
5689 u64_stats_update_begin(&tx_ring->tx_syncp);
5690 tx_ring->tx_stats.bytes += total_bytes;
5691 tx_ring->tx_stats.packets += total_packets;
5692 u64_stats_update_end(&tx_ring->tx_syncp);
5693 return count < tx_ring->count;
5697 * igb_receive_skb - helper function to handle rx indications
5698 * @q_vector: structure containing interrupt and ring information
5699 * @skb: packet to send up
5700 * @vlan_tag: vlan tag for packet
5702 static void igb_receive_skb(struct igb_q_vector *q_vector,
5703 struct sk_buff *skb,
5706 struct igb_adapter *adapter = q_vector->adapter;
5708 if (vlan_tag && adapter->vlgrp)
5709 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5712 napi_gro_receive(&q_vector->napi, skb);
5715 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5716 u32 status_err, struct sk_buff *skb)
5718 skb_checksum_none_assert(skb);
5720 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5721 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5722 (status_err & E1000_RXD_STAT_IXSM))
5725 /* TCP/UDP checksum error bit is set */
5727 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5729 * work around errata with sctp packets where the TCPE aka
5730 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5731 * packets, (aka let the stack check the crc32c)
5733 if ((skb->len == 60) &&
5734 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5735 u64_stats_update_begin(&ring->rx_syncp);
5736 ring->rx_stats.csum_err++;
5737 u64_stats_update_end(&ring->rx_syncp);
5739 /* let the stack verify checksum errors */
5742 /* It must be a TCP or UDP packet with a valid checksum */
5743 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5744 skb->ip_summed = CHECKSUM_UNNECESSARY;
5746 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5749 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5750 struct sk_buff *skb)
5752 struct igb_adapter *adapter = q_vector->adapter;
5753 struct e1000_hw *hw = &adapter->hw;
5757 * If this bit is set, then the RX registers contain the time stamp. No
5758 * other packet will be time stamped until we read these registers, so
5759 * read the registers to make them available again. Because only one
5760 * packet can be time stamped at a time, we know that the register
5761 * values must belong to this one here and therefore we don't need to
5762 * compare any of the additional attributes stored for it.
5764 * If nothing went wrong, then it should have a shared tx_flags that we
5765 * can turn into a skb_shared_hwtstamps.
5767 if (staterr & E1000_RXDADV_STAT_TSIP) {
5768 u32 *stamp = (u32 *)skb->data;
5769 regval = le32_to_cpu(*(stamp + 2));
5770 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5771 skb_pull(skb, IGB_TS_HDR_LEN);
5773 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5776 regval = rd32(E1000_RXSTMPL);
5777 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5780 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5782 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5783 union e1000_adv_rx_desc *rx_desc)
5785 /* HW will not DMA in data larger than the given buffer, even if it
5786 * parses the (NFS, of course) header to be larger. In that case, it
5787 * fills the header buffer and spills the rest into the page.
5789 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5790 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5791 if (hlen > rx_ring->rx_buffer_len)
5792 hlen = rx_ring->rx_buffer_len;
5796 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5797 int *work_done, int budget)
5799 struct igb_ring *rx_ring = q_vector->rx_ring;
5800 struct net_device *netdev = rx_ring->netdev;
5801 struct device *dev = rx_ring->dev;
5802 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5803 struct igb_buffer *buffer_info , *next_buffer;
5804 struct sk_buff *skb;
5805 bool cleaned = false;
5806 int cleaned_count = 0;
5807 int current_node = numa_node_id();
5808 unsigned int total_bytes = 0, total_packets = 0;
5814 i = rx_ring->next_to_clean;
5815 buffer_info = &rx_ring->buffer_info[i];
5816 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5817 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5819 while (staterr & E1000_RXD_STAT_DD) {
5820 if (*work_done >= budget)
5823 rmb(); /* read descriptor and rx_buffer_info after status DD */
5825 skb = buffer_info->skb;
5826 prefetch(skb->data - NET_IP_ALIGN);
5827 buffer_info->skb = NULL;
5830 if (i == rx_ring->count)
5833 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5835 next_buffer = &rx_ring->buffer_info[i];
5837 length = le16_to_cpu(rx_desc->wb.upper.length);
5841 if (buffer_info->dma) {
5842 dma_unmap_single(dev, buffer_info->dma,
5843 rx_ring->rx_buffer_len,
5845 buffer_info->dma = 0;
5846 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5847 skb_put(skb, length);
5850 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5854 dma_unmap_page(dev, buffer_info->page_dma,
5855 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5856 buffer_info->page_dma = 0;
5858 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5860 buffer_info->page_offset,
5863 if ((page_count(buffer_info->page) != 1) ||
5864 (page_to_nid(buffer_info->page) != current_node))
5865 buffer_info->page = NULL;
5867 get_page(buffer_info->page);
5870 skb->data_len += length;
5871 skb->truesize += length;
5874 if (!(staterr & E1000_RXD_STAT_EOP)) {
5875 buffer_info->skb = next_buffer->skb;
5876 buffer_info->dma = next_buffer->dma;
5877 next_buffer->skb = skb;
5878 next_buffer->dma = 0;
5882 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5883 dev_kfree_skb_irq(skb);
5887 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5888 igb_rx_hwtstamp(q_vector, staterr, skb);
5889 total_bytes += skb->len;
5892 igb_rx_checksum_adv(rx_ring, staterr, skb);
5894 skb->protocol = eth_type_trans(skb, netdev);
5895 skb_record_rx_queue(skb, rx_ring->queue_index);
5897 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5898 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5900 igb_receive_skb(q_vector, skb, vlan_tag);
5903 rx_desc->wb.upper.status_error = 0;
5905 /* return some buffers to hardware, one at a time is too slow */
5906 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5907 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5911 /* use prefetched values */
5913 buffer_info = next_buffer;
5914 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5917 rx_ring->next_to_clean = i;
5918 cleaned_count = igb_desc_unused(rx_ring);
5921 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5923 rx_ring->total_packets += total_packets;
5924 rx_ring->total_bytes += total_bytes;
5925 u64_stats_update_begin(&rx_ring->rx_syncp);
5926 rx_ring->rx_stats.packets += total_packets;
5927 rx_ring->rx_stats.bytes += total_bytes;
5928 u64_stats_update_end(&rx_ring->rx_syncp);
5933 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5934 * @adapter: address of board private structure
5936 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5938 struct net_device *netdev = rx_ring->netdev;
5939 union e1000_adv_rx_desc *rx_desc;
5940 struct igb_buffer *buffer_info;
5941 struct sk_buff *skb;
5945 i = rx_ring->next_to_use;
5946 buffer_info = &rx_ring->buffer_info[i];
5948 bufsz = rx_ring->rx_buffer_len;
5950 while (cleaned_count--) {
5951 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5953 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5954 if (!buffer_info->page) {
5955 buffer_info->page = netdev_alloc_page(netdev);
5956 if (unlikely(!buffer_info->page)) {
5957 u64_stats_update_begin(&rx_ring->rx_syncp);
5958 rx_ring->rx_stats.alloc_failed++;
5959 u64_stats_update_end(&rx_ring->rx_syncp);
5962 buffer_info->page_offset = 0;
5964 buffer_info->page_offset ^= PAGE_SIZE / 2;
5966 buffer_info->page_dma =
5967 dma_map_page(rx_ring->dev, buffer_info->page,
5968 buffer_info->page_offset,
5971 if (dma_mapping_error(rx_ring->dev,
5972 buffer_info->page_dma)) {
5973 buffer_info->page_dma = 0;
5974 u64_stats_update_begin(&rx_ring->rx_syncp);
5975 rx_ring->rx_stats.alloc_failed++;
5976 u64_stats_update_end(&rx_ring->rx_syncp);
5981 skb = buffer_info->skb;
5983 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5984 if (unlikely(!skb)) {
5985 u64_stats_update_begin(&rx_ring->rx_syncp);
5986 rx_ring->rx_stats.alloc_failed++;
5987 u64_stats_update_end(&rx_ring->rx_syncp);
5991 buffer_info->skb = skb;
5993 if (!buffer_info->dma) {
5994 buffer_info->dma = dma_map_single(rx_ring->dev,
5998 if (dma_mapping_error(rx_ring->dev,
5999 buffer_info->dma)) {
6000 buffer_info->dma = 0;
6001 u64_stats_update_begin(&rx_ring->rx_syncp);
6002 rx_ring->rx_stats.alloc_failed++;
6003 u64_stats_update_end(&rx_ring->rx_syncp);
6007 /* Refresh the desc even if buffer_addrs didn't change because
6008 * each write-back erases this info. */
6009 if (bufsz < IGB_RXBUFFER_1024) {
6010 rx_desc->read.pkt_addr =
6011 cpu_to_le64(buffer_info->page_dma);
6012 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6014 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6015 rx_desc->read.hdr_addr = 0;
6019 if (i == rx_ring->count)
6021 buffer_info = &rx_ring->buffer_info[i];
6025 if (rx_ring->next_to_use != i) {
6026 rx_ring->next_to_use = i;
6028 i = (rx_ring->count - 1);
6032 /* Force memory writes to complete before letting h/w
6033 * know there are new descriptors to fetch. (Only
6034 * applicable for weak-ordered memory model archs,
6035 * such as IA-64). */
6037 writel(i, rx_ring->tail);
6047 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6049 struct igb_adapter *adapter = netdev_priv(netdev);
6050 struct mii_ioctl_data *data = if_mii(ifr);
6052 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6057 data->phy_id = adapter->hw.phy.addr;
6060 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6072 * igb_hwtstamp_ioctl - control hardware time stamping
6077 * Outgoing time stamping can be enabled and disabled. Play nice and
6078 * disable it when requested, although it shouldn't case any overhead
6079 * when no packet needs it. At most one packet in the queue may be
6080 * marked for time stamping, otherwise it would be impossible to tell
6081 * for sure to which packet the hardware time stamp belongs.
6083 * Incoming time stamping has to be configured via the hardware
6084 * filters. Not all combinations are supported, in particular event
6085 * type has to be specified. Matching the kind of event packet is
6086 * not supported, with the exception of "all V2 events regardless of
6090 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6091 struct ifreq *ifr, int cmd)
6093 struct igb_adapter *adapter = netdev_priv(netdev);
6094 struct e1000_hw *hw = &adapter->hw;
6095 struct hwtstamp_config config;
6096 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6097 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6098 u32 tsync_rx_cfg = 0;
6103 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6106 /* reserved for future extensions */
6110 switch (config.tx_type) {
6111 case HWTSTAMP_TX_OFF:
6113 case HWTSTAMP_TX_ON:
6119 switch (config.rx_filter) {
6120 case HWTSTAMP_FILTER_NONE:
6123 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6124 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6125 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6126 case HWTSTAMP_FILTER_ALL:
6128 * register TSYNCRXCFG must be set, therefore it is not
6129 * possible to time stamp both Sync and Delay_Req messages
6130 * => fall back to time stamping all packets
6132 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6133 config.rx_filter = HWTSTAMP_FILTER_ALL;
6135 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6136 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6137 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6140 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6141 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6142 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6145 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6146 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6147 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6148 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6151 config.rx_filter = HWTSTAMP_FILTER_SOME;
6153 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6154 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6155 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6156 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6159 config.rx_filter = HWTSTAMP_FILTER_SOME;
6161 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6162 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6163 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6164 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6165 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6172 if (hw->mac.type == e1000_82575) {
6173 if (tsync_rx_ctl | tsync_tx_ctl)
6179 * Per-packet timestamping only works if all packets are
6180 * timestamped, so enable timestamping in all packets as
6181 * long as one rx filter was configured.
6183 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6184 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6185 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6188 /* enable/disable TX */
6189 regval = rd32(E1000_TSYNCTXCTL);
6190 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6191 regval |= tsync_tx_ctl;
6192 wr32(E1000_TSYNCTXCTL, regval);
6194 /* enable/disable RX */
6195 regval = rd32(E1000_TSYNCRXCTL);
6196 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6197 regval |= tsync_rx_ctl;
6198 wr32(E1000_TSYNCRXCTL, regval);
6200 /* define which PTP packets are time stamped */
6201 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6203 /* define ethertype filter for timestamped packets */
6206 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6207 E1000_ETQF_1588 | /* enable timestamping */
6208 ETH_P_1588)); /* 1588 eth protocol type */
6210 wr32(E1000_ETQF(3), 0);
6212 #define PTP_PORT 319
6213 /* L4 Queue Filter[3]: filter by destination port and protocol */
6215 u32 ftqf = (IPPROTO_UDP /* UDP */
6216 | E1000_FTQF_VF_BP /* VF not compared */
6217 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6218 | E1000_FTQF_MASK); /* mask all inputs */
6219 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6221 wr32(E1000_IMIR(3), htons(PTP_PORT));
6222 wr32(E1000_IMIREXT(3),
6223 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6224 if (hw->mac.type == e1000_82576) {
6225 /* enable source port check */
6226 wr32(E1000_SPQF(3), htons(PTP_PORT));
6227 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6229 wr32(E1000_FTQF(3), ftqf);
6231 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6235 adapter->hwtstamp_config = config;
6237 /* clear TX/RX time stamp registers, just to be sure */
6238 regval = rd32(E1000_TXSTMPH);
6239 regval = rd32(E1000_RXSTMPH);
6241 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6251 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6257 return igb_mii_ioctl(netdev, ifr, cmd);
6259 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6265 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6267 struct igb_adapter *adapter = hw->back;
6270 cap_offset = adapter->pdev->pcie_cap;
6272 return -E1000_ERR_CONFIG;
6274 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6279 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6281 struct igb_adapter *adapter = hw->back;
6284 cap_offset = adapter->pdev->pcie_cap;
6286 return -E1000_ERR_CONFIG;
6288 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6293 static void igb_vlan_rx_register(struct net_device *netdev,
6294 struct vlan_group *grp)
6296 struct igb_adapter *adapter = netdev_priv(netdev);
6297 struct e1000_hw *hw = &adapter->hw;
6300 igb_irq_disable(adapter);
6301 adapter->vlgrp = grp;
6304 /* enable VLAN tag insert/strip */
6305 ctrl = rd32(E1000_CTRL);
6306 ctrl |= E1000_CTRL_VME;
6307 wr32(E1000_CTRL, ctrl);
6309 /* Disable CFI check */
6310 rctl = rd32(E1000_RCTL);
6311 rctl &= ~E1000_RCTL_CFIEN;
6312 wr32(E1000_RCTL, rctl);
6314 /* disable VLAN tag insert/strip */
6315 ctrl = rd32(E1000_CTRL);
6316 ctrl &= ~E1000_CTRL_VME;
6317 wr32(E1000_CTRL, ctrl);
6320 igb_rlpml_set(adapter);
6322 if (!test_bit(__IGB_DOWN, &adapter->state))
6323 igb_irq_enable(adapter);
6326 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6328 struct igb_adapter *adapter = netdev_priv(netdev);
6329 struct e1000_hw *hw = &adapter->hw;
6330 int pf_id = adapter->vfs_allocated_count;
6332 /* attempt to add filter to vlvf array */
6333 igb_vlvf_set(adapter, vid, true, pf_id);
6335 /* add the filter since PF can receive vlans w/o entry in vlvf */
6336 igb_vfta_set(hw, vid, true);
6339 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6341 struct igb_adapter *adapter = netdev_priv(netdev);
6342 struct e1000_hw *hw = &adapter->hw;
6343 int pf_id = adapter->vfs_allocated_count;
6346 igb_irq_disable(adapter);
6347 vlan_group_set_device(adapter->vlgrp, vid, NULL);
6349 if (!test_bit(__IGB_DOWN, &adapter->state))
6350 igb_irq_enable(adapter);
6352 /* remove vlan from VLVF table array */
6353 err = igb_vlvf_set(adapter, vid, false, pf_id);
6355 /* if vid was not present in VLVF just remove it from table */
6357 igb_vfta_set(hw, vid, false);
6360 static void igb_restore_vlan(struct igb_adapter *adapter)
6362 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6364 if (adapter->vlgrp) {
6366 for (vid = 0; vid < VLAN_N_VID; vid++) {
6367 if (!vlan_group_get_device(adapter->vlgrp, vid))
6369 igb_vlan_rx_add_vid(adapter->netdev, vid);
6374 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6376 struct pci_dev *pdev = adapter->pdev;
6377 struct e1000_mac_info *mac = &adapter->hw.mac;
6381 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6382 * for the switch() below to work */
6383 if ((spd & 1) || (dplx & ~1))
6386 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6387 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6388 spd != SPEED_1000 &&
6389 dplx != DUPLEX_FULL)
6392 switch (spd + dplx) {
6393 case SPEED_10 + DUPLEX_HALF:
6394 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6396 case SPEED_10 + DUPLEX_FULL:
6397 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6399 case SPEED_100 + DUPLEX_HALF:
6400 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6402 case SPEED_100 + DUPLEX_FULL:
6403 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6405 case SPEED_1000 + DUPLEX_FULL:
6407 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6409 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6416 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6420 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6422 struct net_device *netdev = pci_get_drvdata(pdev);
6423 struct igb_adapter *adapter = netdev_priv(netdev);
6424 struct e1000_hw *hw = &adapter->hw;
6425 u32 ctrl, rctl, status;
6426 u32 wufc = adapter->wol;
6431 netif_device_detach(netdev);
6433 if (netif_running(netdev))
6436 igb_clear_interrupt_scheme(adapter);
6439 retval = pci_save_state(pdev);
6444 status = rd32(E1000_STATUS);
6445 if (status & E1000_STATUS_LU)
6446 wufc &= ~E1000_WUFC_LNKC;
6449 igb_setup_rctl(adapter);
6450 igb_set_rx_mode(netdev);
6452 /* turn on all-multi mode if wake on multicast is enabled */
6453 if (wufc & E1000_WUFC_MC) {
6454 rctl = rd32(E1000_RCTL);
6455 rctl |= E1000_RCTL_MPE;
6456 wr32(E1000_RCTL, rctl);
6459 ctrl = rd32(E1000_CTRL);
6460 /* advertise wake from D3Cold */
6461 #define E1000_CTRL_ADVD3WUC 0x00100000
6462 /* phy power management enable */
6463 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6464 ctrl |= E1000_CTRL_ADVD3WUC;
6465 wr32(E1000_CTRL, ctrl);
6467 /* Allow time for pending master requests to run */
6468 igb_disable_pcie_master(hw);
6470 wr32(E1000_WUC, E1000_WUC_PME_EN);
6471 wr32(E1000_WUFC, wufc);
6474 wr32(E1000_WUFC, 0);
6477 *enable_wake = wufc || adapter->en_mng_pt;
6479 igb_power_down_link(adapter);
6481 igb_power_up_link(adapter);
6483 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6484 * would have already happened in close and is redundant. */
6485 igb_release_hw_control(adapter);
6487 pci_disable_device(pdev);
6493 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6498 retval = __igb_shutdown(pdev, &wake);
6503 pci_prepare_to_sleep(pdev);
6505 pci_wake_from_d3(pdev, false);
6506 pci_set_power_state(pdev, PCI_D3hot);
6512 static int igb_resume(struct pci_dev *pdev)
6514 struct net_device *netdev = pci_get_drvdata(pdev);
6515 struct igb_adapter *adapter = netdev_priv(netdev);
6516 struct e1000_hw *hw = &adapter->hw;
6519 pci_set_power_state(pdev, PCI_D0);
6520 pci_restore_state(pdev);
6521 pci_save_state(pdev);
6523 err = pci_enable_device_mem(pdev);
6526 "igb: Cannot enable PCI device from suspend\n");
6529 pci_set_master(pdev);
6531 pci_enable_wake(pdev, PCI_D3hot, 0);
6532 pci_enable_wake(pdev, PCI_D3cold, 0);
6534 if (igb_init_interrupt_scheme(adapter)) {
6535 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6541 /* let the f/w know that the h/w is now under the control of the
6543 igb_get_hw_control(adapter);
6545 wr32(E1000_WUS, ~0);
6547 if (netif_running(netdev)) {
6548 err = igb_open(netdev);
6553 netif_device_attach(netdev);
6559 static void igb_shutdown(struct pci_dev *pdev)
6563 __igb_shutdown(pdev, &wake);
6565 if (system_state == SYSTEM_POWER_OFF) {
6566 pci_wake_from_d3(pdev, wake);
6567 pci_set_power_state(pdev, PCI_D3hot);
6571 #ifdef CONFIG_NET_POLL_CONTROLLER
6573 * Polling 'interrupt' - used by things like netconsole to send skbs
6574 * without having to re-enable interrupts. It's not called while
6575 * the interrupt routine is executing.
6577 static void igb_netpoll(struct net_device *netdev)
6579 struct igb_adapter *adapter = netdev_priv(netdev);
6580 struct e1000_hw *hw = &adapter->hw;
6583 if (!adapter->msix_entries) {
6584 struct igb_q_vector *q_vector = adapter->q_vector[0];
6585 igb_irq_disable(adapter);
6586 napi_schedule(&q_vector->napi);
6590 for (i = 0; i < adapter->num_q_vectors; i++) {
6591 struct igb_q_vector *q_vector = adapter->q_vector[i];
6592 wr32(E1000_EIMC, q_vector->eims_value);
6593 napi_schedule(&q_vector->napi);
6596 #endif /* CONFIG_NET_POLL_CONTROLLER */
6599 * igb_io_error_detected - called when PCI error is detected
6600 * @pdev: Pointer to PCI device
6601 * @state: The current pci connection state
6603 * This function is called after a PCI bus error affecting
6604 * this device has been detected.
6606 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6607 pci_channel_state_t state)
6609 struct net_device *netdev = pci_get_drvdata(pdev);
6610 struct igb_adapter *adapter = netdev_priv(netdev);
6612 netif_device_detach(netdev);
6614 if (state == pci_channel_io_perm_failure)
6615 return PCI_ERS_RESULT_DISCONNECT;
6617 if (netif_running(netdev))
6619 pci_disable_device(pdev);
6621 /* Request a slot slot reset. */
6622 return PCI_ERS_RESULT_NEED_RESET;
6626 * igb_io_slot_reset - called after the pci bus has been reset.
6627 * @pdev: Pointer to PCI device
6629 * Restart the card from scratch, as if from a cold-boot. Implementation
6630 * resembles the first-half of the igb_resume routine.
6632 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6634 struct net_device *netdev = pci_get_drvdata(pdev);
6635 struct igb_adapter *adapter = netdev_priv(netdev);
6636 struct e1000_hw *hw = &adapter->hw;
6637 pci_ers_result_t result;
6640 if (pci_enable_device_mem(pdev)) {
6642 "Cannot re-enable PCI device after reset.\n");
6643 result = PCI_ERS_RESULT_DISCONNECT;
6645 pci_set_master(pdev);
6646 pci_restore_state(pdev);
6647 pci_save_state(pdev);
6649 pci_enable_wake(pdev, PCI_D3hot, 0);
6650 pci_enable_wake(pdev, PCI_D3cold, 0);
6653 wr32(E1000_WUS, ~0);
6654 result = PCI_ERS_RESULT_RECOVERED;
6657 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6659 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6660 "failed 0x%0x\n", err);
6661 /* non-fatal, continue */
6668 * igb_io_resume - called when traffic can start flowing again.
6669 * @pdev: Pointer to PCI device
6671 * This callback is called when the error recovery driver tells us that
6672 * its OK to resume normal operation. Implementation resembles the
6673 * second-half of the igb_resume routine.
6675 static void igb_io_resume(struct pci_dev *pdev)
6677 struct net_device *netdev = pci_get_drvdata(pdev);
6678 struct igb_adapter *adapter = netdev_priv(netdev);
6680 if (netif_running(netdev)) {
6681 if (igb_up(adapter)) {
6682 dev_err(&pdev->dev, "igb_up failed after reset\n");
6687 netif_device_attach(netdev);
6689 /* let the f/w know that the h/w is now under the control of the
6691 igb_get_hw_control(adapter);
6694 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6697 u32 rar_low, rar_high;
6698 struct e1000_hw *hw = &adapter->hw;
6700 /* HW expects these in little endian so we reverse the byte order
6701 * from network order (big endian) to little endian
6703 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6704 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6705 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6707 /* Indicate to hardware the Address is Valid. */
6708 rar_high |= E1000_RAH_AV;
6710 if (hw->mac.type == e1000_82575)
6711 rar_high |= E1000_RAH_POOL_1 * qsel;
6713 rar_high |= E1000_RAH_POOL_1 << qsel;
6715 wr32(E1000_RAL(index), rar_low);
6717 wr32(E1000_RAH(index), rar_high);
6721 static int igb_set_vf_mac(struct igb_adapter *adapter,
6722 int vf, unsigned char *mac_addr)
6724 struct e1000_hw *hw = &adapter->hw;
6725 /* VF MAC addresses start at end of receive addresses and moves
6726 * torwards the first, as a result a collision should not be possible */
6727 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6729 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6731 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6736 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6738 struct igb_adapter *adapter = netdev_priv(netdev);
6739 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6741 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6742 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6743 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6744 " change effective.");
6745 if (test_bit(__IGB_DOWN, &adapter->state)) {
6746 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6747 " but the PF device is not up.\n");
6748 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6749 " attempting to use the VF device.\n");
6751 return igb_set_vf_mac(adapter, vf, mac);
6754 static int igb_link_mbps(int internal_link_speed)
6756 switch (internal_link_speed) {
6766 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6773 /* Calculate the rate factor values to set */
6774 rf_int = link_speed / tx_rate;
6775 rf_dec = (link_speed - (rf_int * tx_rate));
6776 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6778 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6779 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6780 E1000_RTTBCNRC_RF_INT_MASK);
6781 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6786 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6787 wr32(E1000_RTTBCNRC, bcnrc_val);
6790 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6792 int actual_link_speed, i;
6793 bool reset_rate = false;
6795 /* VF TX rate limit was not set or not supported */
6796 if ((adapter->vf_rate_link_speed == 0) ||
6797 (adapter->hw.mac.type != e1000_82576))
6800 actual_link_speed = igb_link_mbps(adapter->link_speed);
6801 if (actual_link_speed != adapter->vf_rate_link_speed) {
6803 adapter->vf_rate_link_speed = 0;
6804 dev_info(&adapter->pdev->dev,
6805 "Link speed has been changed. VF Transmit "
6806 "rate is disabled\n");
6809 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6811 adapter->vf_data[i].tx_rate = 0;
6813 igb_set_vf_rate_limit(&adapter->hw, i,
6814 adapter->vf_data[i].tx_rate,
6819 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6821 struct igb_adapter *adapter = netdev_priv(netdev);
6822 struct e1000_hw *hw = &adapter->hw;
6823 int actual_link_speed;
6825 if (hw->mac.type != e1000_82576)
6828 actual_link_speed = igb_link_mbps(adapter->link_speed);
6829 if ((vf >= adapter->vfs_allocated_count) ||
6830 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6831 (tx_rate < 0) || (tx_rate > actual_link_speed))
6834 adapter->vf_rate_link_speed = actual_link_speed;
6835 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6836 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6841 static int igb_ndo_get_vf_config(struct net_device *netdev,
6842 int vf, struct ifla_vf_info *ivi)
6844 struct igb_adapter *adapter = netdev_priv(netdev);
6845 if (vf >= adapter->vfs_allocated_count)
6848 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6849 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6850 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6851 ivi->qos = adapter->vf_data[vf].pf_qos;
6855 static void igb_vmm_control(struct igb_adapter *adapter)
6857 struct e1000_hw *hw = &adapter->hw;
6860 switch (hw->mac.type) {
6863 /* replication is not supported for 82575 */
6866 /* notify HW that the MAC is adding vlan tags */
6867 reg = rd32(E1000_DTXCTL);
6868 reg |= E1000_DTXCTL_VLAN_ADDED;
6869 wr32(E1000_DTXCTL, reg);
6871 /* enable replication vlan tag stripping */
6872 reg = rd32(E1000_RPLOLR);
6873 reg |= E1000_RPLOLR_STRVLAN;
6874 wr32(E1000_RPLOLR, reg);
6876 /* none of the above registers are supported by i350 */
6880 if (adapter->vfs_allocated_count) {
6881 igb_vmdq_set_loopback_pf(hw, true);
6882 igb_vmdq_set_replication_pf(hw, true);
6883 igb_vmdq_set_anti_spoofing_pf(hw, true,
6884 adapter->vfs_allocated_count);
6886 igb_vmdq_set_loopback_pf(hw, false);
6887 igb_vmdq_set_replication_pf(hw, false);