2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 #include <linux/bitmap.h>
38 #include <linux/crc32.h>
39 #include <linux/ctype.h>
40 #include <linux/debugfs.h>
41 #include <linux/err.h>
42 #include <linux/etherdevice.h>
43 #include <linux/firmware.h>
45 #include <linux/if_vlan.h>
46 #include <linux/init.h>
47 #include <linux/log2.h>
48 #include <linux/mdio.h>
49 #include <linux/module.h>
50 #include <linux/moduleparam.h>
51 #include <linux/mutex.h>
52 #include <linux/netdevice.h>
53 #include <linux/pci.h>
54 #include <linux/aer.h>
55 #include <linux/rtnetlink.h>
56 #include <linux/sched.h>
57 #include <linux/seq_file.h>
58 #include <linux/sockios.h>
59 #include <linux/vmalloc.h>
60 #include <linux/workqueue.h>
61 #include <net/neighbour.h>
62 #include <net/netevent.h>
63 #include <net/addrconf.h>
64 #include <net/bonding.h>
65 #include <asm/uaccess.h>
71 #include "cxgb4_dcb.h"
72 #include "cxgb4_debugfs.h"
78 #define DRV_VERSION "2.0.0-ko"
79 #define DRV_DESC "Chelsio T4/T5 Network Driver"
82 * Max interrupt hold-off timer value in us. Queues fall back to this value
83 * under extreme memory pressure so it's largish to give the system time to
86 #define MAX_SGE_TIMERVAL 200U
90 * Physical Function provisioning constants.
92 PFRES_NVI = 4, /* # of Virtual Interfaces */
93 PFRES_NETHCTRL = 128, /* # of EQs used for ETH or CTRL Qs */
94 PFRES_NIQFLINT = 128, /* # of ingress Qs/w Free List(s)/intr
96 PFRES_NEQ = 256, /* # of egress queues */
97 PFRES_NIQ = 0, /* # of ingress queues */
98 PFRES_TC = 0, /* PCI-E traffic class */
99 PFRES_NEXACTF = 128, /* # of exact MPS filters */
101 PFRES_R_CAPS = FW_CMD_CAP_PF,
102 PFRES_WX_CAPS = FW_CMD_CAP_PF,
104 #ifdef CONFIG_PCI_IOV
106 * Virtual Function provisioning constants. We need two extra Ingress
107 * Queues with Interrupt capability to serve as the VF's Firmware
108 * Event Queue and Forwarded Interrupt Queue (when using MSI mode) --
109 * neither will have Free Lists associated with them). For each
110 * Ethernet/Control Egress Queue and for each Free List, we need an
113 VFRES_NPORTS = 1, /* # of "ports" per VF */
114 VFRES_NQSETS = 2, /* # of "Queue Sets" per VF */
116 VFRES_NVI = VFRES_NPORTS, /* # of Virtual Interfaces */
117 VFRES_NETHCTRL = VFRES_NQSETS, /* # of EQs used for ETH or CTRL Qs */
118 VFRES_NIQFLINT = VFRES_NQSETS+2,/* # of ingress Qs/w Free List(s)/intr */
119 VFRES_NEQ = VFRES_NQSETS*2, /* # of egress queues */
120 VFRES_NIQ = 0, /* # of non-fl/int ingress queues */
121 VFRES_TC = 0, /* PCI-E traffic class */
122 VFRES_NEXACTF = 16, /* # of exact MPS filters */
124 VFRES_R_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF|FW_CMD_CAP_PORT,
125 VFRES_WX_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF,
130 * Provide a Port Access Rights Mask for the specified PF/VF. This is very
131 * static and likely not to be useful in the long run. We really need to
132 * implement some form of persistent configuration which the firmware
135 static unsigned int pfvfres_pmask(struct adapter *adapter,
136 unsigned int pf, unsigned int vf)
138 unsigned int portn, portvec;
141 * Give PF's access to all of the ports.
144 return FW_PFVF_CMD_PMASK_M;
147 * For VFs, we'll assign them access to the ports based purely on the
148 * PF. We assign active ports in order, wrapping around if there are
149 * fewer active ports than PFs: e.g. active port[pf % nports].
150 * Unfortunately the adapter's port_info structs haven't been
151 * initialized yet so we have to compute this.
153 if (adapter->params.nports == 0)
156 portn = pf % adapter->params.nports;
157 portvec = adapter->params.portvec;
160 * Isolate the lowest set bit in the port vector. If we're at
161 * the port number that we want, return that as the pmask.
162 * otherwise mask that bit out of the port vector and
163 * decrement our port number ...
165 unsigned int pmask = portvec ^ (portvec & (portvec-1));
175 MAX_TXQ_ENTRIES = 16384,
176 MAX_CTRL_TXQ_ENTRIES = 1024,
177 MAX_RSPQ_ENTRIES = 16384,
178 MAX_RX_BUFFERS = 16384,
179 MIN_TXQ_ENTRIES = 32,
180 MIN_CTRL_TXQ_ENTRIES = 32,
181 MIN_RSPQ_ENTRIES = 128,
185 /* Host shadow copy of ingress filter entry. This is in host native format
186 * and doesn't match the ordering or bit order, etc. of the hardware of the
187 * firmware command. The use of bit-field structure elements is purely to
188 * remind ourselves of the field size limitations and save memory in the case
189 * where the filter table is large.
191 struct filter_entry {
192 /* Administrative fields for filter.
194 u32 valid:1; /* filter allocated and valid */
195 u32 locked:1; /* filter is administratively locked */
197 u32 pending:1; /* filter action is pending firmware reply */
198 u32 smtidx:8; /* Source MAC Table index for smac */
199 struct l2t_entry *l2t; /* Layer Two Table entry for dmac */
201 /* The filter itself. Most of this is a straight copy of information
202 * provided by the extended ioctl(). Some fields are translated to
203 * internal forms -- for instance the Ingress Queue ID passed in from
204 * the ioctl() is translated into the Absolute Ingress Queue ID.
206 struct ch_filter_specification fs;
209 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
210 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
211 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
213 #define CH_DEVICE(devid, data) { PCI_VDEVICE(CHELSIO, devid), (data) }
215 static const struct pci_device_id cxgb4_pci_tbl[] = {
216 CH_DEVICE(0xa000, 0), /* PE10K */
217 CH_DEVICE(0x4001, -1),
218 CH_DEVICE(0x4002, -1),
219 CH_DEVICE(0x4003, -1),
220 CH_DEVICE(0x4004, -1),
221 CH_DEVICE(0x4005, -1),
222 CH_DEVICE(0x4006, -1),
223 CH_DEVICE(0x4007, -1),
224 CH_DEVICE(0x4008, -1),
225 CH_DEVICE(0x4009, -1),
226 CH_DEVICE(0x400a, -1),
227 CH_DEVICE(0x400d, -1),
228 CH_DEVICE(0x400e, -1),
229 CH_DEVICE(0x4080, -1),
230 CH_DEVICE(0x4081, -1),
231 CH_DEVICE(0x4082, -1),
232 CH_DEVICE(0x4083, -1),
233 CH_DEVICE(0x4084, -1),
234 CH_DEVICE(0x4085, -1),
235 CH_DEVICE(0x4086, -1),
236 CH_DEVICE(0x4087, -1),
237 CH_DEVICE(0x4088, -1),
238 CH_DEVICE(0x4401, 4),
239 CH_DEVICE(0x4402, 4),
240 CH_DEVICE(0x4403, 4),
241 CH_DEVICE(0x4404, 4),
242 CH_DEVICE(0x4405, 4),
243 CH_DEVICE(0x4406, 4),
244 CH_DEVICE(0x4407, 4),
245 CH_DEVICE(0x4408, 4),
246 CH_DEVICE(0x4409, 4),
247 CH_DEVICE(0x440a, 4),
248 CH_DEVICE(0x440d, 4),
249 CH_DEVICE(0x440e, 4),
250 CH_DEVICE(0x4480, 4),
251 CH_DEVICE(0x4481, 4),
252 CH_DEVICE(0x4482, 4),
253 CH_DEVICE(0x4483, 4),
254 CH_DEVICE(0x4484, 4),
255 CH_DEVICE(0x4485, 4),
256 CH_DEVICE(0x4486, 4),
257 CH_DEVICE(0x4487, 4),
258 CH_DEVICE(0x4488, 4),
259 CH_DEVICE(0x5001, 4),
260 CH_DEVICE(0x5002, 4),
261 CH_DEVICE(0x5003, 4),
262 CH_DEVICE(0x5004, 4),
263 CH_DEVICE(0x5005, 4),
264 CH_DEVICE(0x5006, 4),
265 CH_DEVICE(0x5007, 4),
266 CH_DEVICE(0x5008, 4),
267 CH_DEVICE(0x5009, 4),
268 CH_DEVICE(0x500A, 4),
269 CH_DEVICE(0x500B, 4),
270 CH_DEVICE(0x500C, 4),
271 CH_DEVICE(0x500D, 4),
272 CH_DEVICE(0x500E, 4),
273 CH_DEVICE(0x500F, 4),
274 CH_DEVICE(0x5010, 4),
275 CH_DEVICE(0x5011, 4),
276 CH_DEVICE(0x5012, 4),
277 CH_DEVICE(0x5013, 4),
278 CH_DEVICE(0x5014, 4),
279 CH_DEVICE(0x5015, 4),
280 CH_DEVICE(0x5080, 4),
281 CH_DEVICE(0x5081, 4),
282 CH_DEVICE(0x5082, 4),
283 CH_DEVICE(0x5083, 4),
284 CH_DEVICE(0x5084, 4),
285 CH_DEVICE(0x5085, 4),
286 CH_DEVICE(0x5086, 4),
287 CH_DEVICE(0x5087, 4),
288 CH_DEVICE(0x5088, 4),
289 CH_DEVICE(0x5401, 4),
290 CH_DEVICE(0x5402, 4),
291 CH_DEVICE(0x5403, 4),
292 CH_DEVICE(0x5404, 4),
293 CH_DEVICE(0x5405, 4),
294 CH_DEVICE(0x5406, 4),
295 CH_DEVICE(0x5407, 4),
296 CH_DEVICE(0x5408, 4),
297 CH_DEVICE(0x5409, 4),
298 CH_DEVICE(0x540A, 4),
299 CH_DEVICE(0x540B, 4),
300 CH_DEVICE(0x540C, 4),
301 CH_DEVICE(0x540D, 4),
302 CH_DEVICE(0x540E, 4),
303 CH_DEVICE(0x540F, 4),
304 CH_DEVICE(0x5410, 4),
305 CH_DEVICE(0x5411, 4),
306 CH_DEVICE(0x5412, 4),
307 CH_DEVICE(0x5413, 4),
308 CH_DEVICE(0x5414, 4),
309 CH_DEVICE(0x5415, 4),
310 CH_DEVICE(0x5480, 4),
311 CH_DEVICE(0x5481, 4),
312 CH_DEVICE(0x5482, 4),
313 CH_DEVICE(0x5483, 4),
314 CH_DEVICE(0x5484, 4),
315 CH_DEVICE(0x5485, 4),
316 CH_DEVICE(0x5486, 4),
317 CH_DEVICE(0x5487, 4),
318 CH_DEVICE(0x5488, 4),
322 #define FW4_FNAME "cxgb4/t4fw.bin"
323 #define FW5_FNAME "cxgb4/t5fw.bin"
324 #define FW4_CFNAME "cxgb4/t4-config.txt"
325 #define FW5_CFNAME "cxgb4/t5-config.txt"
327 MODULE_DESCRIPTION(DRV_DESC);
328 MODULE_AUTHOR("Chelsio Communications");
329 MODULE_LICENSE("Dual BSD/GPL");
330 MODULE_VERSION(DRV_VERSION);
331 MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
332 MODULE_FIRMWARE(FW4_FNAME);
333 MODULE_FIRMWARE(FW5_FNAME);
336 * Normally we're willing to become the firmware's Master PF but will be happy
337 * if another PF has already become the Master and initialized the adapter.
338 * Setting "force_init" will cause this driver to forcibly establish itself as
339 * the Master PF and initialize the adapter.
341 static uint force_init;
343 module_param(force_init, uint, 0644);
344 MODULE_PARM_DESC(force_init, "Forcibly become Master PF and initialize adapter");
347 * Normally if the firmware we connect to has Configuration File support, we
348 * use that and only fall back to the old Driver-based initialization if the
349 * Configuration File fails for some reason. If force_old_init is set, then
350 * we'll always use the old Driver-based initialization sequence.
352 static uint force_old_init;
354 module_param(force_old_init, uint, 0644);
355 MODULE_PARM_DESC(force_old_init, "Force old initialization sequence");
357 static int dflt_msg_enable = DFLT_MSG_ENABLE;
359 module_param(dflt_msg_enable, int, 0644);
360 MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap");
363 * The driver uses the best interrupt scheme available on a platform in the
364 * order MSI-X, MSI, legacy INTx interrupts. This parameter determines which
365 * of these schemes the driver may consider as follows:
367 * msi = 2: choose from among all three options
368 * msi = 1: only consider MSI and INTx interrupts
369 * msi = 0: force INTx interrupts
373 module_param(msi, int, 0644);
374 MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
377 * Queue interrupt hold-off timer values. Queues default to the first of these
380 static unsigned int intr_holdoff[SGE_NTIMERS - 1] = { 5, 10, 20, 50, 100 };
382 module_param_array(intr_holdoff, uint, NULL, 0644);
383 MODULE_PARM_DESC(intr_holdoff, "values for queue interrupt hold-off timers "
384 "0..4 in microseconds");
386 static unsigned int intr_cnt[SGE_NCOUNTERS - 1] = { 4, 8, 16 };
388 module_param_array(intr_cnt, uint, NULL, 0644);
389 MODULE_PARM_DESC(intr_cnt,
390 "thresholds 1..3 for queue interrupt packet counters");
393 * Normally we tell the chip to deliver Ingress Packets into our DMA buffers
394 * offset by 2 bytes in order to have the IP headers line up on 4-byte
395 * boundaries. This is a requirement for many architectures which will throw
396 * a machine check fault if an attempt is made to access one of the 4-byte IP
397 * header fields on a non-4-byte boundary. And it's a major performance issue
398 * even on some architectures which allow it like some implementations of the
399 * x86 ISA. However, some architectures don't mind this and for some very
400 * edge-case performance sensitive applications (like forwarding large volumes
401 * of small packets), setting this DMA offset to 0 will decrease the number of
402 * PCI-E Bus transfers enough to measurably affect performance.
404 static int rx_dma_offset = 2;
408 #ifdef CONFIG_PCI_IOV
409 module_param(vf_acls, bool, 0644);
410 MODULE_PARM_DESC(vf_acls, "if set enable virtualization L2 ACL enforcement");
412 /* Configure the number of PCI-E Virtual Function which are to be instantiated
413 * on SR-IOV Capable Physical Functions.
415 static unsigned int num_vf[NUM_OF_PF_WITH_SRIOV];
417 module_param_array(num_vf, uint, NULL, 0644);
418 MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
421 /* TX Queue select used to determine what algorithm to use for selecting TX
422 * queue. Select between the kernel provided function (select_queue=0) or user
423 * cxgb_select_queue function (select_queue=1)
425 * Default: select_queue=0
427 static int select_queue;
428 module_param(select_queue, int, 0644);
429 MODULE_PARM_DESC(select_queue,
430 "Select between kernel provided method of selecting or driver method of selecting TX queue. Default is kernel method.");
433 * The filter TCAM has a fixed portion and a variable portion. The fixed
434 * portion can match on source/destination IP IPv4/IPv6 addresses and TCP/UDP
435 * ports. The variable portion is 36 bits which can include things like Exact
436 * Match MAC Index (9 bits), Ether Type (16 bits), IP Protocol (8 bits),
437 * [Inner] VLAN Tag (17 bits), etc. which, if all were somehow selected, would
438 * far exceed the 36-bit budget for this "compressed" header portion of the
439 * filter. Thus, we have a scarce resource which must be carefully managed.
441 * By default we set this up to mostly match the set of filter matching
442 * capabilities of T3 but with accommodations for some of T4's more
443 * interesting features:
445 * { IP Fragment (1), MPS Match Type (3), IP Protocol (8),
446 * [Inner] VLAN (17), Port (3), FCoE (1) }
449 TP_VLAN_PRI_MAP_DEFAULT = HW_TPL_FR_MT_PR_IV_P_FC,
450 TP_VLAN_PRI_MAP_FIRST = FCOE_SHIFT,
451 TP_VLAN_PRI_MAP_LAST = FRAGMENTATION_SHIFT,
454 static unsigned int tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT;
456 module_param(tp_vlan_pri_map, uint, 0644);
457 MODULE_PARM_DESC(tp_vlan_pri_map, "global compressed filter configuration");
459 static struct dentry *cxgb4_debugfs_root;
461 static LIST_HEAD(adapter_list);
462 static DEFINE_MUTEX(uld_mutex);
463 /* Adapter list to be accessed from atomic context */
464 static LIST_HEAD(adap_rcu_list);
465 static DEFINE_SPINLOCK(adap_rcu_lock);
466 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
467 static const char *uld_str[] = { "RDMA", "iSCSI" };
469 static void link_report(struct net_device *dev)
471 if (!netif_carrier_ok(dev))
472 netdev_info(dev, "link down\n");
474 static const char *fc[] = { "no", "Rx", "Tx", "Tx/Rx" };
476 const char *s = "10Mbps";
477 const struct port_info *p = netdev_priv(dev);
479 switch (p->link_cfg.speed) {
494 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s,
499 #ifdef CONFIG_CHELSIO_T4_DCB
500 /* Set up/tear down Data Center Bridging Priority mapping for a net device. */
501 static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable)
503 struct port_info *pi = netdev_priv(dev);
504 struct adapter *adap = pi->adapter;
505 struct sge_eth_txq *txq = &adap->sge.ethtxq[pi->first_qset];
508 /* We use a simple mapping of Port TX Queue Index to DCB
509 * Priority when we're enabling DCB.
511 for (i = 0; i < pi->nqsets; i++, txq++) {
515 name = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
517 FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH) |
518 FW_PARAMS_PARAM_YZ_V(txq->q.cntxt_id));
519 value = enable ? i : 0xffffffff;
521 /* Since we can be called while atomic (from "interrupt
522 * level") we need to issue the Set Parameters Commannd
523 * without sleeping (timeout < 0).
525 err = t4_set_params_nosleep(adap, adap->mbox, adap->fn, 0, 1,
529 dev_err(adap->pdev_dev,
530 "Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n",
531 enable ? "set" : "unset", pi->port_id, i, -err);
533 txq->dcb_prio = value;
536 #endif /* CONFIG_CHELSIO_T4_DCB */
538 void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
540 struct net_device *dev = adapter->port[port_id];
542 /* Skip changes from disabled ports. */
543 if (netif_running(dev) && link_stat != netif_carrier_ok(dev)) {
545 netif_carrier_on(dev);
547 #ifdef CONFIG_CHELSIO_T4_DCB
548 cxgb4_dcb_state_init(dev);
549 dcb_tx_queue_prio_enable(dev, false);
550 #endif /* CONFIG_CHELSIO_T4_DCB */
551 netif_carrier_off(dev);
558 void t4_os_portmod_changed(const struct adapter *adap, int port_id)
560 static const char *mod_str[] = {
561 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
564 const struct net_device *dev = adap->port[port_id];
565 const struct port_info *pi = netdev_priv(dev);
567 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
568 netdev_info(dev, "port module unplugged\n");
569 else if (pi->mod_type < ARRAY_SIZE(mod_str))
570 netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]);
574 * Configure the exact and hash address filters to handle a port's multicast
575 * and secondary unicast MAC addresses.
577 static int set_addr_filters(const struct net_device *dev, bool sleep)
585 const struct netdev_hw_addr *ha;
586 int uc_cnt = netdev_uc_count(dev);
587 int mc_cnt = netdev_mc_count(dev);
588 const struct port_info *pi = netdev_priv(dev);
589 unsigned int mb = pi->adapter->fn;
591 /* first do the secondary unicast addresses */
592 netdev_for_each_uc_addr(ha, dev) {
593 addr[naddr++] = ha->addr;
594 if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
595 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
596 naddr, addr, filt_idx, &uhash, sleep);
605 /* next set up the multicast addresses */
606 netdev_for_each_mc_addr(ha, dev) {
607 addr[naddr++] = ha->addr;
608 if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
609 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
610 naddr, addr, filt_idx, &mhash, sleep);
619 return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0,
620 uhash | mhash, sleep);
623 int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */
624 module_param(dbfifo_int_thresh, int, 0644);
625 MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold");
628 * usecs to sleep while draining the dbfifo
630 static int dbfifo_drain_delay = 1000;
631 module_param(dbfifo_drain_delay, int, 0644);
632 MODULE_PARM_DESC(dbfifo_drain_delay,
633 "usecs to sleep while draining the dbfifo");
636 * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
637 * If @mtu is -1 it is left unchanged.
639 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
642 struct port_info *pi = netdev_priv(dev);
644 ret = set_addr_filters(dev, sleep_ok);
646 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, mtu,
647 (dev->flags & IFF_PROMISC) ? 1 : 0,
648 (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
654 * link_start - enable a port
655 * @dev: the port to enable
657 * Performs the MAC and PHY actions needed to enable a port.
659 static int link_start(struct net_device *dev)
662 struct port_info *pi = netdev_priv(dev);
663 unsigned int mb = pi->adapter->fn;
666 * We do not set address filters and promiscuity here, the stack does
667 * that step explicitly.
669 ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
670 !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true);
672 ret = t4_change_mac(pi->adapter, mb, pi->viid,
673 pi->xact_addr_filt, dev->dev_addr, true,
676 pi->xact_addr_filt = ret;
681 ret = t4_link_start(pi->adapter, mb, pi->tx_chan,
685 ret = t4_enable_vi_params(pi->adapter, mb, pi->viid, true,
686 true, CXGB4_DCB_ENABLED);
693 int cxgb4_dcb_enabled(const struct net_device *dev)
695 #ifdef CONFIG_CHELSIO_T4_DCB
696 struct port_info *pi = netdev_priv(dev);
698 if (!pi->dcb.enabled)
701 return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) ||
702 (pi->dcb.state == CXGB4_DCB_STATE_HOST));
707 EXPORT_SYMBOL(cxgb4_dcb_enabled);
709 #ifdef CONFIG_CHELSIO_T4_DCB
710 /* Handle a Data Center Bridging update message from the firmware. */
711 static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd)
713 int port = FW_PORT_CMD_PORTID_G(ntohl(pcmd->op_to_portid));
714 struct net_device *dev = adap->port[port];
715 int old_dcb_enabled = cxgb4_dcb_enabled(dev);
718 cxgb4_dcb_handle_fw_update(adap, pcmd);
719 new_dcb_enabled = cxgb4_dcb_enabled(dev);
721 /* If the DCB has become enabled or disabled on the port then we're
722 * going to need to set up/tear down DCB Priority parameters for the
723 * TX Queues associated with the port.
725 if (new_dcb_enabled != old_dcb_enabled)
726 dcb_tx_queue_prio_enable(dev, new_dcb_enabled);
728 #endif /* CONFIG_CHELSIO_T4_DCB */
730 /* Clear a filter and release any of its resources that we own. This also
731 * clears the filter's "pending" status.
733 static void clear_filter(struct adapter *adap, struct filter_entry *f)
735 /* If the new or old filter have loopback rewriteing rules then we'll
736 * need to free any existing Layer Two Table (L2T) entries of the old
737 * filter rule. The firmware will handle freeing up any Source MAC
738 * Table (SMT) entries used for rewriting Source MAC Addresses in
742 cxgb4_l2t_release(f->l2t);
744 /* The zeroing of the filter rule below clears the filter valid,
745 * pending, locked flags, l2t pointer, etc. so it's all we need for
748 memset(f, 0, sizeof(*f));
751 /* Handle a filter write/deletion reply.
753 static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
755 unsigned int idx = GET_TID(rpl);
756 unsigned int nidx = idx - adap->tids.ftid_base;
758 struct filter_entry *f;
760 if (idx >= adap->tids.ftid_base && nidx <
761 (adap->tids.nftids + adap->tids.nsftids)) {
763 ret = GET_TCB_COOKIE(rpl->cookie);
764 f = &adap->tids.ftid_tab[idx];
766 if (ret == FW_FILTER_WR_FLT_DELETED) {
767 /* Clear the filter when we get confirmation from the
768 * hardware that the filter has been deleted.
770 clear_filter(adap, f);
771 } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
772 dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
774 clear_filter(adap, f);
775 } else if (ret == FW_FILTER_WR_FLT_ADDED) {
776 f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
777 f->pending = 0; /* asynchronous setup completed */
780 /* Something went wrong. Issue a warning about the
781 * problem and clear everything out.
783 dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
785 clear_filter(adap, f);
790 /* Response queue handler for the FW event queue.
792 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
793 const struct pkt_gl *gl)
795 u8 opcode = ((const struct rss_header *)rsp)->opcode;
797 rsp++; /* skip RSS header */
799 /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
801 if (unlikely(opcode == CPL_FW4_MSG &&
802 ((const struct cpl_fw4_msg *)rsp)->type == FW_TYPE_RSSCPL)) {
804 opcode = ((const struct rss_header *)rsp)->opcode;
806 if (opcode != CPL_SGE_EGR_UPDATE) {
807 dev_err(q->adap->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
813 if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
814 const struct cpl_sge_egr_update *p = (void *)rsp;
815 unsigned int qid = EGR_QID(ntohl(p->opcode_qid));
818 txq = q->adap->sge.egr_map[qid - q->adap->sge.egr_start];
820 if ((u8 *)txq < (u8 *)q->adap->sge.ofldtxq) {
821 struct sge_eth_txq *eq;
823 eq = container_of(txq, struct sge_eth_txq, q);
824 netif_tx_wake_queue(eq->txq);
826 struct sge_ofld_txq *oq;
828 oq = container_of(txq, struct sge_ofld_txq, q);
829 tasklet_schedule(&oq->qresume_tsk);
831 } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
832 const struct cpl_fw6_msg *p = (void *)rsp;
834 #ifdef CONFIG_CHELSIO_T4_DCB
835 const struct fw_port_cmd *pcmd = (const void *)p->data;
836 unsigned int cmd = FW_CMD_OP_G(ntohl(pcmd->op_to_portid));
837 unsigned int action =
838 FW_PORT_CMD_ACTION_G(ntohl(pcmd->action_to_len16));
840 if (cmd == FW_PORT_CMD &&
841 action == FW_PORT_ACTION_GET_PORT_INFO) {
842 int port = FW_PORT_CMD_PORTID_G(
843 be32_to_cpu(pcmd->op_to_portid));
844 struct net_device *dev = q->adap->port[port];
845 int state_input = ((pcmd->u.info.dcbxdis_pkd &
846 FW_PORT_CMD_DCBXDIS_F)
847 ? CXGB4_DCB_INPUT_FW_DISABLED
848 : CXGB4_DCB_INPUT_FW_ENABLED);
850 cxgb4_dcb_state_fsm(dev, state_input);
853 if (cmd == FW_PORT_CMD &&
854 action == FW_PORT_ACTION_L2_DCB_CFG)
855 dcb_rpl(q->adap, pcmd);
859 t4_handle_fw_rpl(q->adap, p->data);
860 } else if (opcode == CPL_L2T_WRITE_RPL) {
861 const struct cpl_l2t_write_rpl *p = (void *)rsp;
863 do_l2t_write_rpl(q->adap, p);
864 } else if (opcode == CPL_SET_TCB_RPL) {
865 const struct cpl_set_tcb_rpl *p = (void *)rsp;
867 filter_rpl(q->adap, p);
869 dev_err(q->adap->pdev_dev,
870 "unexpected CPL %#x on FW event queue\n", opcode);
876 * uldrx_handler - response queue handler for ULD queues
877 * @q: the response queue that received the packet
878 * @rsp: the response queue descriptor holding the offload message
879 * @gl: the gather list of packet fragments
881 * Deliver an ingress offload packet to a ULD. All processing is done by
882 * the ULD, we just maintain statistics.
884 static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
885 const struct pkt_gl *gl)
887 struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
889 /* FW can send CPLs encapsulated in a CPL_FW4_MSG.
891 if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG &&
892 ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
895 if (ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], rsp, gl)) {
901 else if (gl == CXGB4_MSG_AN)
908 static void disable_msi(struct adapter *adapter)
910 if (adapter->flags & USING_MSIX) {
911 pci_disable_msix(adapter->pdev);
912 adapter->flags &= ~USING_MSIX;
913 } else if (adapter->flags & USING_MSI) {
914 pci_disable_msi(adapter->pdev);
915 adapter->flags &= ~USING_MSI;
920 * Interrupt handler for non-data events used with MSI-X.
922 static irqreturn_t t4_nondata_intr(int irq, void *cookie)
924 struct adapter *adap = cookie;
926 u32 v = t4_read_reg(adap, MYPF_REG(PL_PF_INT_CAUSE));
929 t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE), v);
931 t4_slow_intr_handler(adap);
936 * Name the MSI-X interrupts.
938 static void name_msix_vecs(struct adapter *adap)
940 int i, j, msi_idx = 2, n = sizeof(adap->msix_info[0].desc);
942 /* non-data interrupts */
943 snprintf(adap->msix_info[0].desc, n, "%s", adap->port[0]->name);
946 snprintf(adap->msix_info[1].desc, n, "%s-FWeventq",
947 adap->port[0]->name);
949 /* Ethernet queues */
950 for_each_port(adap, j) {
951 struct net_device *d = adap->port[j];
952 const struct port_info *pi = netdev_priv(d);
954 for (i = 0; i < pi->nqsets; i++, msi_idx++)
955 snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
960 for_each_ofldrxq(&adap->sge, i)
961 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-ofld%d",
962 adap->port[0]->name, i);
964 for_each_rdmarxq(&adap->sge, i)
965 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
966 adap->port[0]->name, i);
968 for_each_rdmaciq(&adap->sge, i)
969 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d",
970 adap->port[0]->name, i);
973 static int request_msix_queue_irqs(struct adapter *adap)
975 struct sge *s = &adap->sge;
976 int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
979 err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
980 adap->msix_info[1].desc, &s->fw_evtq);
984 for_each_ethrxq(s, ethqidx) {
985 err = request_irq(adap->msix_info[msi_index].vec,
987 adap->msix_info[msi_index].desc,
988 &s->ethrxq[ethqidx].rspq);
993 for_each_ofldrxq(s, ofldqidx) {
994 err = request_irq(adap->msix_info[msi_index].vec,
996 adap->msix_info[msi_index].desc,
997 &s->ofldrxq[ofldqidx].rspq);
1002 for_each_rdmarxq(s, rdmaqidx) {
1003 err = request_irq(adap->msix_info[msi_index].vec,
1004 t4_sge_intr_msix, 0,
1005 adap->msix_info[msi_index].desc,
1006 &s->rdmarxq[rdmaqidx].rspq);
1011 for_each_rdmaciq(s, rdmaciqqidx) {
1012 err = request_irq(adap->msix_info[msi_index].vec,
1013 t4_sge_intr_msix, 0,
1014 adap->msix_info[msi_index].desc,
1015 &s->rdmaciq[rdmaciqqidx].rspq);
1023 while (--rdmaciqqidx >= 0)
1024 free_irq(adap->msix_info[--msi_index].vec,
1025 &s->rdmaciq[rdmaciqqidx].rspq);
1026 while (--rdmaqidx >= 0)
1027 free_irq(adap->msix_info[--msi_index].vec,
1028 &s->rdmarxq[rdmaqidx].rspq);
1029 while (--ofldqidx >= 0)
1030 free_irq(adap->msix_info[--msi_index].vec,
1031 &s->ofldrxq[ofldqidx].rspq);
1032 while (--ethqidx >= 0)
1033 free_irq(adap->msix_info[--msi_index].vec,
1034 &s->ethrxq[ethqidx].rspq);
1035 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
1039 static void free_msix_queue_irqs(struct adapter *adap)
1041 int i, msi_index = 2;
1042 struct sge *s = &adap->sge;
1044 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
1045 for_each_ethrxq(s, i)
1046 free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq);
1047 for_each_ofldrxq(s, i)
1048 free_irq(adap->msix_info[msi_index++].vec, &s->ofldrxq[i].rspq);
1049 for_each_rdmarxq(s, i)
1050 free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
1051 for_each_rdmaciq(s, i)
1052 free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq);
1056 * write_rss - write the RSS table for a given port
1058 * @queues: array of queue indices for RSS
1060 * Sets up the portion of the HW RSS table for the port's VI to distribute
1061 * packets to the Rx queues in @queues.
1063 static int write_rss(const struct port_info *pi, const u16 *queues)
1067 const struct sge_eth_rxq *q = &pi->adapter->sge.ethrxq[pi->first_qset];
1069 rss = kmalloc(pi->rss_size * sizeof(u16), GFP_KERNEL);
1073 /* map the queue indices to queue ids */
1074 for (i = 0; i < pi->rss_size; i++, queues++)
1075 rss[i] = q[*queues].rspq.abs_id;
1077 err = t4_config_rss_range(pi->adapter, pi->adapter->fn, pi->viid, 0,
1078 pi->rss_size, rss, pi->rss_size);
1084 * setup_rss - configure RSS
1085 * @adap: the adapter
1087 * Sets up RSS for each port.
1089 static int setup_rss(struct adapter *adap)
1093 for_each_port(adap, i) {
1094 const struct port_info *pi = adap2pinfo(adap, i);
1096 err = write_rss(pi, pi->rss);
1104 * Return the channel of the ingress queue with the given qid.
1106 static unsigned int rxq_to_chan(const struct sge *p, unsigned int qid)
1108 qid -= p->ingr_start;
1109 return netdev2pinfo(p->ingr_map[qid]->netdev)->tx_chan;
1113 * Wait until all NAPI handlers are descheduled.
1115 static void quiesce_rx(struct adapter *adap)
1119 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
1120 struct sge_rspq *q = adap->sge.ingr_map[i];
1122 if (q && q->handler)
1123 napi_disable(&q->napi);
1128 * Enable NAPI scheduling and interrupt generation for all Rx queues.
1130 static void enable_rx(struct adapter *adap)
1134 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
1135 struct sge_rspq *q = adap->sge.ingr_map[i];
1140 napi_enable(&q->napi);
1141 /* 0-increment GTS to start the timer and enable interrupts */
1142 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
1143 SEINTARM(q->intr_params) |
1144 INGRESSQID(q->cntxt_id));
1149 * setup_sge_queues - configure SGE Tx/Rx/response queues
1150 * @adap: the adapter
1152 * Determines how many sets of SGE queues to use and initializes them.
1153 * We support multiple queue sets per port if we have MSI-X, otherwise
1154 * just one queue set per port.
1156 static int setup_sge_queues(struct adapter *adap)
1158 int err, msi_idx, i, j;
1159 struct sge *s = &adap->sge;
1161 bitmap_zero(s->starving_fl, MAX_EGRQ);
1162 bitmap_zero(s->txq_maperr, MAX_EGRQ);
1164 if (adap->flags & USING_MSIX)
1165 msi_idx = 1; /* vector 0 is for non-queue interrupts */
1167 err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
1171 msi_idx = -((int)s->intrq.abs_id + 1);
1174 err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
1175 msi_idx, NULL, fwevtq_handler);
1177 freeout: t4_free_sge_resources(adap);
1181 for_each_port(adap, i) {
1182 struct net_device *dev = adap->port[i];
1183 struct port_info *pi = netdev_priv(dev);
1184 struct sge_eth_rxq *q = &s->ethrxq[pi->first_qset];
1185 struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
1187 for (j = 0; j < pi->nqsets; j++, q++) {
1190 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
1196 memset(&q->stats, 0, sizeof(q->stats));
1198 for (j = 0; j < pi->nqsets; j++, t++) {
1199 err = t4_sge_alloc_eth_txq(adap, t, dev,
1200 netdev_get_tx_queue(dev, j),
1201 s->fw_evtq.cntxt_id);
1207 j = s->ofldqsets / adap->params.nports; /* ofld queues per channel */
1208 for_each_ofldrxq(s, i) {
1209 struct sge_ofld_rxq *q = &s->ofldrxq[i];
1210 struct net_device *dev = adap->port[i / j];
1214 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx,
1215 q->fl.size ? &q->fl : NULL,
1219 memset(&q->stats, 0, sizeof(q->stats));
1220 s->ofld_rxq[i] = q->rspq.abs_id;
1221 err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], dev,
1222 s->fw_evtq.cntxt_id);
1227 for_each_rdmarxq(s, i) {
1228 struct sge_ofld_rxq *q = &s->rdmarxq[i];
1232 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
1233 msi_idx, q->fl.size ? &q->fl : NULL,
1237 memset(&q->stats, 0, sizeof(q->stats));
1238 s->rdma_rxq[i] = q->rspq.abs_id;
1241 for_each_rdmaciq(s, i) {
1242 struct sge_ofld_rxq *q = &s->rdmaciq[i];
1246 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
1247 msi_idx, q->fl.size ? &q->fl : NULL,
1251 memset(&q->stats, 0, sizeof(q->stats));
1252 s->rdma_ciq[i] = q->rspq.abs_id;
1255 for_each_port(adap, i) {
1257 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
1258 * have RDMA queues, and that's the right value.
1260 err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
1261 s->fw_evtq.cntxt_id,
1262 s->rdmarxq[i].rspq.cntxt_id);
1267 t4_write_reg(adap, is_t4(adap->params.chip) ?
1268 MPS_TRC_RSS_CONTROL :
1269 MPS_T5_TRC_RSS_CONTROL,
1270 RSSCONTROL(netdev2pinfo(adap->port[0])->tx_chan) |
1271 QUEUENUMBER(s->ethrxq[0].rspq.abs_id));
1276 * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
1277 * The allocated memory is cleared.
1279 void *t4_alloc_mem(size_t size)
1281 void *p = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1289 * Free memory allocated through alloc_mem().
1291 void t4_free_mem(void *addr)
1293 if (is_vmalloc_addr(addr))
1299 /* Send a Work Request to write the filter at a specified index. We construct
1300 * a Firmware Filter Work Request to have the work done and put the indicated
1301 * filter into "pending" mode which will prevent any further actions against
1302 * it till we get a reply from the firmware on the completion status of the
1305 static int set_filter_wr(struct adapter *adapter, int fidx)
1307 struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
1308 struct sk_buff *skb;
1309 struct fw_filter_wr *fwr;
1312 /* If the new filter requires loopback Destination MAC and/or VLAN
1313 * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
1316 if (f->fs.newdmac || f->fs.newvlan) {
1317 /* allocate L2T entry for new filter */
1318 f->l2t = t4_l2t_alloc_switching(adapter->l2t);
1321 if (t4_l2t_set_switching(adapter, f->l2t, f->fs.vlan,
1322 f->fs.eport, f->fs.dmac)) {
1323 cxgb4_l2t_release(f->l2t);
1329 ftid = adapter->tids.ftid_base + fidx;
1331 skb = alloc_skb(sizeof(*fwr), GFP_KERNEL | __GFP_NOFAIL);
1332 fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
1333 memset(fwr, 0, sizeof(*fwr));
1335 /* It would be nice to put most of the following in t4_hw.c but most
1336 * of the work is translating the cxgbtool ch_filter_specification
1337 * into the Work Request and the definition of that structure is
1338 * currently in cxgbtool.h which isn't appropriate to pull into the
1339 * common code. We may eventually try to come up with a more neutral
1340 * filter specification structure but for now it's easiest to simply
1341 * put this fairly direct code in line ...
1343 fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
1344 fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16));
1346 htonl(FW_FILTER_WR_TID_V(ftid) |
1347 FW_FILTER_WR_RQTYPE_V(f->fs.type) |
1348 FW_FILTER_WR_NOREPLY_V(0) |
1349 FW_FILTER_WR_IQ_V(f->fs.iq));
1350 fwr->del_filter_to_l2tix =
1351 htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
1352 FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
1353 FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
1354 FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
1355 FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
1356 FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
1357 FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
1358 FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
1359 FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
1360 f->fs.newvlan == VLAN_REWRITE) |
1361 FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
1362 f->fs.newvlan == VLAN_REWRITE) |
1363 FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
1364 FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
1365 FW_FILTER_WR_PRIO_V(f->fs.prio) |
1366 FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
1367 fwr->ethtype = htons(f->fs.val.ethtype);
1368 fwr->ethtypem = htons(f->fs.mask.ethtype);
1369 fwr->frag_to_ovlan_vldm =
1370 (FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
1371 FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
1372 FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
1373 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
1374 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
1375 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
1377 fwr->rx_chan_rx_rpl_iq =
1378 htons(FW_FILTER_WR_RX_CHAN_V(0) |
1379 FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
1380 fwr->maci_to_matchtypem =
1381 htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
1382 FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
1383 FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
1384 FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
1385 FW_FILTER_WR_PORT_V(f->fs.val.iport) |
1386 FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
1387 FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
1388 FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
1389 fwr->ptcl = f->fs.val.proto;
1390 fwr->ptclm = f->fs.mask.proto;
1391 fwr->ttyp = f->fs.val.tos;
1392 fwr->ttypm = f->fs.mask.tos;
1393 fwr->ivlan = htons(f->fs.val.ivlan);
1394 fwr->ivlanm = htons(f->fs.mask.ivlan);
1395 fwr->ovlan = htons(f->fs.val.ovlan);
1396 fwr->ovlanm = htons(f->fs.mask.ovlan);
1397 memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
1398 memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
1399 memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
1400 memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
1401 fwr->lp = htons(f->fs.val.lport);
1402 fwr->lpm = htons(f->fs.mask.lport);
1403 fwr->fp = htons(f->fs.val.fport);
1404 fwr->fpm = htons(f->fs.mask.fport);
1406 memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
1408 /* Mark the filter as "pending" and ship off the Filter Work Request.
1409 * When we get the Work Request Reply we'll clear the pending status.
1412 set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
1413 t4_ofld_send(adapter, skb);
1417 /* Delete the filter at a specified index.
1419 static int del_filter_wr(struct adapter *adapter, int fidx)
1421 struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
1422 struct sk_buff *skb;
1423 struct fw_filter_wr *fwr;
1424 unsigned int len, ftid;
1427 ftid = adapter->tids.ftid_base + fidx;
1429 skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
1430 fwr = (struct fw_filter_wr *)__skb_put(skb, len);
1431 t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id);
1433 /* Mark the filter as "pending" and ship off the Filter Work Request.
1434 * When we get the Work Request Reply we'll clear the pending status.
1437 t4_mgmt_tx(adapter, skb);
1441 static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
1442 void *accel_priv, select_queue_fallback_t fallback)
1446 #ifdef CONFIG_CHELSIO_T4_DCB
1447 /* If a Data Center Bridging has been successfully negotiated on this
1448 * link then we'll use the skb's priority to map it to a TX Queue.
1449 * The skb's priority is determined via the VLAN Tag Priority Code
1452 if (cxgb4_dcb_enabled(dev)) {
1456 err = vlan_get_tag(skb, &vlan_tci);
1457 if (unlikely(err)) {
1458 if (net_ratelimit())
1460 "TX Packet without VLAN Tag on DCB Link\n");
1463 txq = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
1467 #endif /* CONFIG_CHELSIO_T4_DCB */
1470 txq = (skb_rx_queue_recorded(skb)
1471 ? skb_get_rx_queue(skb)
1472 : smp_processor_id());
1474 while (unlikely(txq >= dev->real_num_tx_queues))
1475 txq -= dev->real_num_tx_queues;
1480 return fallback(dev, skb) % dev->real_num_tx_queues;
1483 static inline int is_offload(const struct adapter *adap)
1485 return adap->params.offload;
1489 * Implementation of ethtool operations.
1492 static u32 get_msglevel(struct net_device *dev)
1494 return netdev2adap(dev)->msg_enable;
1497 static void set_msglevel(struct net_device *dev, u32 val)
1499 netdev2adap(dev)->msg_enable = val;
1502 static char stats_strings[][ETH_GSTRING_LEN] = {
1505 "TxBroadcastFrames ",
1506 "TxMulticastFrames ",
1512 "TxFrames128To255 ",
1513 "TxFrames256To511 ",
1514 "TxFrames512To1023 ",
1515 "TxFrames1024To1518 ",
1516 "TxFrames1519ToMax ",
1531 "RxBroadcastFrames ",
1532 "RxMulticastFrames ",
1544 "RxFrames128To255 ",
1545 "RxFrames256To511 ",
1546 "RxFrames512To1023 ",
1547 "RxFrames1024To1518 ",
1548 "RxFrames1519ToMax ",
1560 "RxBG0FramesDropped ",
1561 "RxBG1FramesDropped ",
1562 "RxBG2FramesDropped ",
1563 "RxBG3FramesDropped ",
1564 "RxBG0FramesTrunc ",
1565 "RxBG1FramesTrunc ",
1566 "RxBG2FramesTrunc ",
1567 "RxBG3FramesTrunc ",
1576 "WriteCoalSuccess ",
1580 static int get_sset_count(struct net_device *dev, int sset)
1584 return ARRAY_SIZE(stats_strings);
1590 #define T4_REGMAP_SIZE (160 * 1024)
1591 #define T5_REGMAP_SIZE (332 * 1024)
1593 static int get_regs_len(struct net_device *dev)
1595 struct adapter *adap = netdev2adap(dev);
1596 if (is_t4(adap->params.chip))
1597 return T4_REGMAP_SIZE;
1599 return T5_REGMAP_SIZE;
1602 static int get_eeprom_len(struct net_device *dev)
1607 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1609 struct adapter *adapter = netdev2adap(dev);
1611 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1612 strlcpy(info->version, DRV_VERSION, sizeof(info->version));
1613 strlcpy(info->bus_info, pci_name(adapter->pdev),
1614 sizeof(info->bus_info));
1616 if (adapter->params.fw_vers)
1617 snprintf(info->fw_version, sizeof(info->fw_version),
1618 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1619 FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
1620 FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers),
1621 FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers),
1622 FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers),
1623 FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers),
1624 FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers),
1625 FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers),
1626 FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers));
1629 static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
1631 if (stringset == ETH_SS_STATS)
1632 memcpy(data, stats_strings, sizeof(stats_strings));
1636 * port stats maintained per queue of the port. They should be in the same
1637 * order as in stats_strings above.
1639 struct queue_port_stats {
1649 static void collect_sge_port_stats(const struct adapter *adap,
1650 const struct port_info *p, struct queue_port_stats *s)
1653 const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
1654 const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
1656 memset(s, 0, sizeof(*s));
1657 for (i = 0; i < p->nqsets; i++, rx++, tx++) {
1659 s->tx_csum += tx->tx_cso;
1660 s->rx_csum += rx->stats.rx_cso;
1661 s->vlan_ex += rx->stats.vlan_ex;
1662 s->vlan_ins += tx->vlan_ins;
1663 s->gro_pkts += rx->stats.lro_pkts;
1664 s->gro_merged += rx->stats.lro_merged;
1668 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
1671 struct port_info *pi = netdev_priv(dev);
1672 struct adapter *adapter = pi->adapter;
1675 t4_get_port_stats(adapter, pi->tx_chan, (struct port_stats *)data);
1677 data += sizeof(struct port_stats) / sizeof(u64);
1678 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1679 data += sizeof(struct queue_port_stats) / sizeof(u64);
1680 if (!is_t4(adapter->params.chip)) {
1681 t4_write_reg(adapter, SGE_STAT_CFG, STATSOURCE_T5(7));
1682 val1 = t4_read_reg(adapter, SGE_STAT_TOTAL);
1683 val2 = t4_read_reg(adapter, SGE_STAT_MATCH);
1684 *data = val1 - val2;
1689 memset(data, 0, 2 * sizeof(u64));
1695 * Return a version number to identify the type of adapter. The scheme is:
1696 * - bits 0..9: chip version
1697 * - bits 10..15: chip revision
1698 * - bits 16..23: register dump version
1700 static inline unsigned int mk_adap_vers(const struct adapter *ap)
1702 return CHELSIO_CHIP_VERSION(ap->params.chip) |
1703 (CHELSIO_CHIP_RELEASE(ap->params.chip) << 10) | (1 << 16);
1706 static void reg_block_dump(struct adapter *ap, void *buf, unsigned int start,
1709 u32 *p = buf + start;
1711 for ( ; start <= end; start += sizeof(u32))
1712 *p++ = t4_read_reg(ap, start);
1715 static void get_regs(struct net_device *dev, struct ethtool_regs *regs,
1718 static const unsigned int t4_reg_ranges[] = {
1939 static const unsigned int t5_reg_ranges[] = {
2368 struct adapter *ap = netdev2adap(dev);
2369 static const unsigned int *reg_ranges;
2370 int arr_size = 0, buf_size = 0;
2372 if (is_t4(ap->params.chip)) {
2373 reg_ranges = &t4_reg_ranges[0];
2374 arr_size = ARRAY_SIZE(t4_reg_ranges);
2375 buf_size = T4_REGMAP_SIZE;
2377 reg_ranges = &t5_reg_ranges[0];
2378 arr_size = ARRAY_SIZE(t5_reg_ranges);
2379 buf_size = T5_REGMAP_SIZE;
2382 regs->version = mk_adap_vers(ap);
2384 memset(buf, 0, buf_size);
2385 for (i = 0; i < arr_size; i += 2)
2386 reg_block_dump(ap, buf, reg_ranges[i], reg_ranges[i + 1]);
2389 static int restart_autoneg(struct net_device *dev)
2391 struct port_info *p = netdev_priv(dev);
2393 if (!netif_running(dev))
2395 if (p->link_cfg.autoneg != AUTONEG_ENABLE)
2397 t4_restart_aneg(p->adapter, p->adapter->fn, p->tx_chan);
2401 static int identify_port(struct net_device *dev,
2402 enum ethtool_phys_id_state state)
2405 struct adapter *adap = netdev2adap(dev);
2407 if (state == ETHTOOL_ID_ACTIVE)
2409 else if (state == ETHTOOL_ID_INACTIVE)
2414 return t4_identify_port(adap, adap->fn, netdev2pinfo(dev)->viid, val);
2417 static unsigned int from_fw_linkcaps(unsigned int type, unsigned int caps)
2421 if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
2422 type == FW_PORT_TYPE_BT_XAUI) {
2424 if (caps & FW_PORT_CAP_SPEED_100M)
2425 v |= SUPPORTED_100baseT_Full;
2426 if (caps & FW_PORT_CAP_SPEED_1G)
2427 v |= SUPPORTED_1000baseT_Full;
2428 if (caps & FW_PORT_CAP_SPEED_10G)
2429 v |= SUPPORTED_10000baseT_Full;
2430 } else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
2431 v |= SUPPORTED_Backplane;
2432 if (caps & FW_PORT_CAP_SPEED_1G)
2433 v |= SUPPORTED_1000baseKX_Full;
2434 if (caps & FW_PORT_CAP_SPEED_10G)
2435 v |= SUPPORTED_10000baseKX4_Full;
2436 } else if (type == FW_PORT_TYPE_KR)
2437 v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
2438 else if (type == FW_PORT_TYPE_BP_AP)
2439 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
2440 SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
2441 else if (type == FW_PORT_TYPE_BP4_AP)
2442 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
2443 SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
2444 SUPPORTED_10000baseKX4_Full;
2445 else if (type == FW_PORT_TYPE_FIBER_XFI ||
2446 type == FW_PORT_TYPE_FIBER_XAUI || type == FW_PORT_TYPE_SFP)
2447 v |= SUPPORTED_FIBRE;
2448 else if (type == FW_PORT_TYPE_BP40_BA)
2449 v |= SUPPORTED_40000baseSR4_Full;
2451 if (caps & FW_PORT_CAP_ANEG)
2452 v |= SUPPORTED_Autoneg;
2456 static unsigned int to_fw_linkcaps(unsigned int caps)
2460 if (caps & ADVERTISED_100baseT_Full)
2461 v |= FW_PORT_CAP_SPEED_100M;
2462 if (caps & ADVERTISED_1000baseT_Full)
2463 v |= FW_PORT_CAP_SPEED_1G;
2464 if (caps & ADVERTISED_10000baseT_Full)
2465 v |= FW_PORT_CAP_SPEED_10G;
2466 if (caps & ADVERTISED_40000baseSR4_Full)
2467 v |= FW_PORT_CAP_SPEED_40G;
2471 static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
2473 const struct port_info *p = netdev_priv(dev);
2475 if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
2476 p->port_type == FW_PORT_TYPE_BT_XFI ||
2477 p->port_type == FW_PORT_TYPE_BT_XAUI)
2478 cmd->port = PORT_TP;
2479 else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
2480 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
2481 cmd->port = PORT_FIBRE;
2482 else if (p->port_type == FW_PORT_TYPE_SFP ||
2483 p->port_type == FW_PORT_TYPE_QSFP_10G ||
2484 p->port_type == FW_PORT_TYPE_QSFP) {
2485 if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
2486 p->mod_type == FW_PORT_MOD_TYPE_SR ||
2487 p->mod_type == FW_PORT_MOD_TYPE_ER ||
2488 p->mod_type == FW_PORT_MOD_TYPE_LRM)
2489 cmd->port = PORT_FIBRE;
2490 else if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
2491 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
2492 cmd->port = PORT_DA;
2494 cmd->port = PORT_OTHER;
2496 cmd->port = PORT_OTHER;
2498 if (p->mdio_addr >= 0) {
2499 cmd->phy_address = p->mdio_addr;
2500 cmd->transceiver = XCVR_EXTERNAL;
2501 cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
2502 MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
2504 cmd->phy_address = 0; /* not really, but no better option */
2505 cmd->transceiver = XCVR_INTERNAL;
2506 cmd->mdio_support = 0;
2509 cmd->supported = from_fw_linkcaps(p->port_type, p->link_cfg.supported);
2510 cmd->advertising = from_fw_linkcaps(p->port_type,
2511 p->link_cfg.advertising);
2512 ethtool_cmd_speed_set(cmd,
2513 netif_carrier_ok(dev) ? p->link_cfg.speed : 0);
2514 cmd->duplex = DUPLEX_FULL;
2515 cmd->autoneg = p->link_cfg.autoneg;
2521 static unsigned int speed_to_caps(int speed)
2524 return FW_PORT_CAP_SPEED_100M;
2526 return FW_PORT_CAP_SPEED_1G;
2528 return FW_PORT_CAP_SPEED_10G;
2530 return FW_PORT_CAP_SPEED_40G;
2534 static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
2537 struct port_info *p = netdev_priv(dev);
2538 struct link_config *lc = &p->link_cfg;
2539 u32 speed = ethtool_cmd_speed(cmd);
2541 if (cmd->duplex != DUPLEX_FULL) /* only full-duplex supported */
2544 if (!(lc->supported & FW_PORT_CAP_ANEG)) {
2546 * PHY offers a single speed. See if that's what's
2549 if (cmd->autoneg == AUTONEG_DISABLE &&
2550 (lc->supported & speed_to_caps(speed)))
2555 if (cmd->autoneg == AUTONEG_DISABLE) {
2556 cap = speed_to_caps(speed);
2558 if (!(lc->supported & cap) ||
2563 lc->requested_speed = cap;
2564 lc->advertising = 0;
2566 cap = to_fw_linkcaps(cmd->advertising);
2567 if (!(lc->supported & cap))
2569 lc->requested_speed = 0;
2570 lc->advertising = cap | FW_PORT_CAP_ANEG;
2572 lc->autoneg = cmd->autoneg;
2574 if (netif_running(dev))
2575 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
2580 static void get_pauseparam(struct net_device *dev,
2581 struct ethtool_pauseparam *epause)
2583 struct port_info *p = netdev_priv(dev);
2585 epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
2586 epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
2587 epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
2590 static int set_pauseparam(struct net_device *dev,
2591 struct ethtool_pauseparam *epause)
2593 struct port_info *p = netdev_priv(dev);
2594 struct link_config *lc = &p->link_cfg;
2596 if (epause->autoneg == AUTONEG_DISABLE)
2597 lc->requested_fc = 0;
2598 else if (lc->supported & FW_PORT_CAP_ANEG)
2599 lc->requested_fc = PAUSE_AUTONEG;
2603 if (epause->rx_pause)
2604 lc->requested_fc |= PAUSE_RX;
2605 if (epause->tx_pause)
2606 lc->requested_fc |= PAUSE_TX;
2607 if (netif_running(dev))
2608 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
2613 static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
2615 const struct port_info *pi = netdev_priv(dev);
2616 const struct sge *s = &pi->adapter->sge;
2618 e->rx_max_pending = MAX_RX_BUFFERS;
2619 e->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
2620 e->rx_jumbo_max_pending = 0;
2621 e->tx_max_pending = MAX_TXQ_ENTRIES;
2623 e->rx_pending = s->ethrxq[pi->first_qset].fl.size - 8;
2624 e->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
2625 e->rx_jumbo_pending = 0;
2626 e->tx_pending = s->ethtxq[pi->first_qset].q.size;
2629 static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
2632 const struct port_info *pi = netdev_priv(dev);
2633 struct adapter *adapter = pi->adapter;
2634 struct sge *s = &adapter->sge;
2636 if (e->rx_pending > MAX_RX_BUFFERS || e->rx_jumbo_pending ||
2637 e->tx_pending > MAX_TXQ_ENTRIES ||
2638 e->rx_mini_pending > MAX_RSPQ_ENTRIES ||
2639 e->rx_mini_pending < MIN_RSPQ_ENTRIES ||
2640 e->rx_pending < MIN_FL_ENTRIES || e->tx_pending < MIN_TXQ_ENTRIES)
2643 if (adapter->flags & FULL_INIT_DONE)
2646 for (i = 0; i < pi->nqsets; ++i) {
2647 s->ethtxq[pi->first_qset + i].q.size = e->tx_pending;
2648 s->ethrxq[pi->first_qset + i].fl.size = e->rx_pending + 8;
2649 s->ethrxq[pi->first_qset + i].rspq.size = e->rx_mini_pending;
2654 static int closest_timer(const struct sge *s, int time)
2656 int i, delta, match = 0, min_delta = INT_MAX;
2658 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
2659 delta = time - s->timer_val[i];
2662 if (delta < min_delta) {
2670 static int closest_thres(const struct sge *s, int thres)
2672 int i, delta, match = 0, min_delta = INT_MAX;
2674 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
2675 delta = thres - s->counter_val[i];
2678 if (delta < min_delta) {
2687 * Return a queue's interrupt hold-off time in us. 0 means no timer.
2689 static unsigned int qtimer_val(const struct adapter *adap,
2690 const struct sge_rspq *q)
2692 unsigned int idx = q->intr_params >> 1;
2694 return idx < SGE_NTIMERS ? adap->sge.timer_val[idx] : 0;
2698 * set_rspq_intr_params - set a queue's interrupt holdoff parameters
2700 * @us: the hold-off time in us, or 0 to disable timer
2701 * @cnt: the hold-off packet count, or 0 to disable counter
2703 * Sets an Rx queue's interrupt hold-off time and packet count. At least
2704 * one of the two needs to be enabled for the queue to generate interrupts.
2706 static int set_rspq_intr_params(struct sge_rspq *q,
2707 unsigned int us, unsigned int cnt)
2709 struct adapter *adap = q->adap;
2711 if ((us | cnt) == 0)
2718 new_idx = closest_thres(&adap->sge, cnt);
2719 if (q->desc && q->pktcnt_idx != new_idx) {
2720 /* the queue has already been created, update it */
2721 v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
2722 FW_PARAMS_PARAM_X_V(
2723 FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
2724 FW_PARAMS_PARAM_YZ_V(q->cntxt_id);
2725 err = t4_set_params(adap, adap->fn, adap->fn, 0, 1, &v,
2730 q->pktcnt_idx = new_idx;
2733 us = us == 0 ? 6 : closest_timer(&adap->sge, us);
2734 q->intr_params = QINTR_TIMER_IDX(us) | (cnt > 0 ? QINTR_CNT_EN : 0);
2739 * set_rx_intr_params - set a net devices's RX interrupt holdoff paramete!
2740 * @dev: the network device
2741 * @us: the hold-off time in us, or 0 to disable timer
2742 * @cnt: the hold-off packet count, or 0 to disable counter
2744 * Set the RX interrupt hold-off parameters for a network device.
2746 static int set_rx_intr_params(struct net_device *dev,
2747 unsigned int us, unsigned int cnt)
2750 struct port_info *pi = netdev_priv(dev);
2751 struct adapter *adap = pi->adapter;
2752 struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset];
2754 for (i = 0; i < pi->nqsets; i++, q++) {
2755 err = set_rspq_intr_params(&q->rspq, us, cnt);
2762 static int set_adaptive_rx_setting(struct net_device *dev, int adaptive_rx)
2765 struct port_info *pi = netdev_priv(dev);
2766 struct adapter *adap = pi->adapter;
2767 struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset];
2769 for (i = 0; i < pi->nqsets; i++, q++)
2770 q->rspq.adaptive_rx = adaptive_rx;
2775 static int get_adaptive_rx_setting(struct net_device *dev)
2777 struct port_info *pi = netdev_priv(dev);
2778 struct adapter *adap = pi->adapter;
2779 struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset];
2781 return q->rspq.adaptive_rx;
2784 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
2786 set_adaptive_rx_setting(dev, c->use_adaptive_rx_coalesce);
2787 return set_rx_intr_params(dev, c->rx_coalesce_usecs,
2788 c->rx_max_coalesced_frames);
2791 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
2793 const struct port_info *pi = netdev_priv(dev);
2794 const struct adapter *adap = pi->adapter;
2795 const struct sge_rspq *rq = &adap->sge.ethrxq[pi->first_qset].rspq;
2797 c->rx_coalesce_usecs = qtimer_val(adap, rq);
2798 c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ?
2799 adap->sge.counter_val[rq->pktcnt_idx] : 0;
2800 c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev);
2805 * eeprom_ptov - translate a physical EEPROM address to virtual
2806 * @phys_addr: the physical EEPROM address
2807 * @fn: the PCI function number
2808 * @sz: size of function-specific area
2810 * Translate a physical EEPROM address to virtual. The first 1K is
2811 * accessed through virtual addresses starting at 31K, the rest is
2812 * accessed through virtual addresses starting at 0.
2814 * The mapping is as follows:
2815 * [0..1K) -> [31K..32K)
2816 * [1K..1K+A) -> [31K-A..31K)
2817 * [1K+A..ES) -> [0..ES-A-1K)
2819 * where A = @fn * @sz, and ES = EEPROM size.
2821 static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
2824 if (phys_addr < 1024)
2825 return phys_addr + (31 << 10);
2826 if (phys_addr < 1024 + fn)
2827 return 31744 - fn + phys_addr - 1024;
2828 if (phys_addr < EEPROMSIZE)
2829 return phys_addr - 1024 - fn;
2834 * The next two routines implement eeprom read/write from physical addresses.
2836 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
2838 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
2841 vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v);
2842 return vaddr < 0 ? vaddr : 0;
2845 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
2847 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
2850 vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v);
2851 return vaddr < 0 ? vaddr : 0;
2854 #define EEPROM_MAGIC 0x38E2F10C
2856 static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
2860 struct adapter *adapter = netdev2adap(dev);
2862 u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL);
2866 e->magic = EEPROM_MAGIC;
2867 for (i = e->offset & ~3; !err && i < e->offset + e->len; i += 4)
2868 err = eeprom_rd_phys(adapter, i, (u32 *)&buf[i]);
2871 memcpy(data, buf + e->offset, e->len);
2876 static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
2881 u32 aligned_offset, aligned_len, *p;
2882 struct adapter *adapter = netdev2adap(dev);
2884 if (eeprom->magic != EEPROM_MAGIC)
2887 aligned_offset = eeprom->offset & ~3;
2888 aligned_len = (eeprom->len + (eeprom->offset & 3) + 3) & ~3;
2890 if (adapter->fn > 0) {
2891 u32 start = 1024 + adapter->fn * EEPROMPFSIZE;
2893 if (aligned_offset < start ||
2894 aligned_offset + aligned_len > start + EEPROMPFSIZE)
2898 if (aligned_offset != eeprom->offset || aligned_len != eeprom->len) {
2900 * RMW possibly needed for first or last words.
2902 buf = kmalloc(aligned_len, GFP_KERNEL);
2905 err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
2906 if (!err && aligned_len > 4)
2907 err = eeprom_rd_phys(adapter,
2908 aligned_offset + aligned_len - 4,
2909 (u32 *)&buf[aligned_len - 4]);
2912 memcpy(buf + (eeprom->offset & 3), data, eeprom->len);
2916 err = t4_seeprom_wp(adapter, false);
2920 for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2921 err = eeprom_wr_phys(adapter, aligned_offset, *p);
2922 aligned_offset += 4;
2926 err = t4_seeprom_wp(adapter, true);
2933 static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
2936 const struct firmware *fw;
2937 struct adapter *adap = netdev2adap(netdev);
2938 unsigned int mbox = PCIE_FW_MASTER_M + 1;
2940 ef->data[sizeof(ef->data) - 1] = '\0';
2941 ret = request_firmware(&fw, ef->data, adap->pdev_dev);
2945 /* If the adapter has been fully initialized then we'll go ahead and
2946 * try to get the firmware's cooperation in upgrading to the new
2947 * firmware image otherwise we'll try to do the entire job from the
2948 * host ... and we always "force" the operation in this path.
2950 if (adap->flags & FULL_INIT_DONE)
2953 ret = t4_fw_upgrade(adap, mbox, fw->data, fw->size, 1);
2954 release_firmware(fw);
2956 dev_info(adap->pdev_dev, "loaded firmware %s,"
2957 " reload cxgb4 driver\n", ef->data);
2961 #define WOL_SUPPORTED (WAKE_BCAST | WAKE_MAGIC)
2962 #define BCAST_CRC 0xa0ccc1a6
2964 static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
2966 wol->supported = WAKE_BCAST | WAKE_MAGIC;
2967 wol->wolopts = netdev2adap(dev)->wol;
2968 memset(&wol->sopass, 0, sizeof(wol->sopass));
2971 static int set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
2974 struct port_info *pi = netdev_priv(dev);
2976 if (wol->wolopts & ~WOL_SUPPORTED)
2978 t4_wol_magic_enable(pi->adapter, pi->tx_chan,
2979 (wol->wolopts & WAKE_MAGIC) ? dev->dev_addr : NULL);
2980 if (wol->wolopts & WAKE_BCAST) {
2981 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0xfe, ~0ULL,
2984 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 1,
2985 ~6ULL, ~0ULL, BCAST_CRC, true);
2987 t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0, 0, 0, 0, false);
2991 static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
2993 const struct port_info *pi = netdev_priv(dev);
2994 netdev_features_t changed = dev->features ^ features;
2997 if (!(changed & NETIF_F_HW_VLAN_CTAG_RX))
3000 err = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, -1,
3002 !!(features & NETIF_F_HW_VLAN_CTAG_RX), true);
3004 dev->features = features ^ NETIF_F_HW_VLAN_CTAG_RX;
3008 static u32 get_rss_table_size(struct net_device *dev)
3010 const struct port_info *pi = netdev_priv(dev);
3012 return pi->rss_size;
3015 static int get_rss_table(struct net_device *dev, u32 *p, u8 *key)
3017 const struct port_info *pi = netdev_priv(dev);
3018 unsigned int n = pi->rss_size;
3025 static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key)
3028 struct port_info *pi = netdev_priv(dev);
3030 for (i = 0; i < pi->rss_size; i++)
3032 if (pi->adapter->flags & FULL_INIT_DONE)
3033 return write_rss(pi, pi->rss);
3037 static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
3040 const struct port_info *pi = netdev_priv(dev);
3042 switch (info->cmd) {
3043 case ETHTOOL_GRXFH: {
3044 unsigned int v = pi->rss_mode;
3047 switch (info->flow_type) {
3049 if (v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN_F)
3050 info->data = RXH_IP_SRC | RXH_IP_DST |
3051 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3052 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F)
3053 info->data = RXH_IP_SRC | RXH_IP_DST;
3056 if ((v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN_F) &&
3057 (v & FW_RSS_VI_CONFIG_CMD_UDPEN_F))
3058 info->data = RXH_IP_SRC | RXH_IP_DST |
3059 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3060 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F)
3061 info->data = RXH_IP_SRC | RXH_IP_DST;
3064 case AH_ESP_V4_FLOW:
3066 if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F)
3067 info->data = RXH_IP_SRC | RXH_IP_DST;
3070 if (v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN_F)
3071 info->data = RXH_IP_SRC | RXH_IP_DST |
3072 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3073 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F)
3074 info->data = RXH_IP_SRC | RXH_IP_DST;
3077 if ((v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN_F) &&
3078 (v & FW_RSS_VI_CONFIG_CMD_UDPEN_F))
3079 info->data = RXH_IP_SRC | RXH_IP_DST |
3080 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3081 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F)
3082 info->data = RXH_IP_SRC | RXH_IP_DST;
3085 case AH_ESP_V6_FLOW:
3087 if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F)
3088 info->data = RXH_IP_SRC | RXH_IP_DST;
3093 case ETHTOOL_GRXRINGS:
3094 info->data = pi->nqsets;
3100 static const struct ethtool_ops cxgb_ethtool_ops = {
3101 .get_settings = get_settings,
3102 .set_settings = set_settings,
3103 .get_drvinfo = get_drvinfo,
3104 .get_msglevel = get_msglevel,
3105 .set_msglevel = set_msglevel,
3106 .get_ringparam = get_sge_param,
3107 .set_ringparam = set_sge_param,
3108 .get_coalesce = get_coalesce,
3109 .set_coalesce = set_coalesce,
3110 .get_eeprom_len = get_eeprom_len,
3111 .get_eeprom = get_eeprom,
3112 .set_eeprom = set_eeprom,
3113 .get_pauseparam = get_pauseparam,
3114 .set_pauseparam = set_pauseparam,
3115 .get_link = ethtool_op_get_link,
3116 .get_strings = get_strings,
3117 .set_phys_id = identify_port,
3118 .nway_reset = restart_autoneg,
3119 .get_sset_count = get_sset_count,
3120 .get_ethtool_stats = get_stats,
3121 .get_regs_len = get_regs_len,
3122 .get_regs = get_regs,
3125 .get_rxnfc = get_rxnfc,
3126 .get_rxfh_indir_size = get_rss_table_size,
3127 .get_rxfh = get_rss_table,
3128 .set_rxfh = set_rss_table,
3129 .flash_device = set_flash,
3132 static int setup_debugfs(struct adapter *adap)
3134 if (IS_ERR_OR_NULL(adap->debugfs_root))
3137 #ifdef CONFIG_DEBUG_FS
3138 t4_setup_debugfs(adap);
3144 * upper-layer driver support
3148 * Allocate an active-open TID and set it to the supplied value.
3150 int cxgb4_alloc_atid(struct tid_info *t, void *data)
3154 spin_lock_bh(&t->atid_lock);
3156 union aopen_entry *p = t->afree;
3158 atid = (p - t->atid_tab) + t->atid_base;
3163 spin_unlock_bh(&t->atid_lock);
3166 EXPORT_SYMBOL(cxgb4_alloc_atid);
3169 * Release an active-open TID.
3171 void cxgb4_free_atid(struct tid_info *t, unsigned int atid)
3173 union aopen_entry *p = &t->atid_tab[atid - t->atid_base];
3175 spin_lock_bh(&t->atid_lock);
3179 spin_unlock_bh(&t->atid_lock);
3181 EXPORT_SYMBOL(cxgb4_free_atid);
3184 * Allocate a server TID and set it to the supplied value.
3186 int cxgb4_alloc_stid(struct tid_info *t, int family, void *data)
3190 spin_lock_bh(&t->stid_lock);
3191 if (family == PF_INET) {
3192 stid = find_first_zero_bit(t->stid_bmap, t->nstids);
3193 if (stid < t->nstids)
3194 __set_bit(stid, t->stid_bmap);
3198 stid = bitmap_find_free_region(t->stid_bmap, t->nstids, 2);
3203 t->stid_tab[stid].data = data;
3204 stid += t->stid_base;
3205 /* IPv6 requires max of 520 bits or 16 cells in TCAM
3206 * This is equivalent to 4 TIDs. With CLIP enabled it
3209 if (family == PF_INET)
3212 t->stids_in_use += 4;
3214 spin_unlock_bh(&t->stid_lock);
3217 EXPORT_SYMBOL(cxgb4_alloc_stid);
3219 /* Allocate a server filter TID and set it to the supplied value.
3221 int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data)
3225 spin_lock_bh(&t->stid_lock);
3226 if (family == PF_INET) {
3227 stid = find_next_zero_bit(t->stid_bmap,
3228 t->nstids + t->nsftids, t->nstids);
3229 if (stid < (t->nstids + t->nsftids))
3230 __set_bit(stid, t->stid_bmap);
3237 t->stid_tab[stid].data = data;
3239 stid += t->sftid_base;
3242 spin_unlock_bh(&t->stid_lock);
3245 EXPORT_SYMBOL(cxgb4_alloc_sftid);
3247 /* Release a server TID.
3249 void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family)
3251 /* Is it a server filter TID? */
3252 if (t->nsftids && (stid >= t->sftid_base)) {
3253 stid -= t->sftid_base;
3256 stid -= t->stid_base;
3259 spin_lock_bh(&t->stid_lock);
3260 if (family == PF_INET)
3261 __clear_bit(stid, t->stid_bmap);
3263 bitmap_release_region(t->stid_bmap, stid, 2);
3264 t->stid_tab[stid].data = NULL;
3265 if (family == PF_INET)
3268 t->stids_in_use -= 4;
3269 spin_unlock_bh(&t->stid_lock);
3271 EXPORT_SYMBOL(cxgb4_free_stid);
3274 * Populate a TID_RELEASE WR. Caller must properly size the skb.
3276 static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
3279 struct cpl_tid_release *req;
3281 set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
3282 req = (struct cpl_tid_release *)__skb_put(skb, sizeof(*req));
3283 INIT_TP_WR(req, tid);
3284 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
3288 * Queue a TID release request and if necessary schedule a work queue to
3291 static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
3294 void **p = &t->tid_tab[tid];
3295 struct adapter *adap = container_of(t, struct adapter, tids);
3297 spin_lock_bh(&adap->tid_release_lock);
3298 *p = adap->tid_release_head;
3299 /* Low 2 bits encode the Tx channel number */
3300 adap->tid_release_head = (void **)((uintptr_t)p | chan);
3301 if (!adap->tid_release_task_busy) {
3302 adap->tid_release_task_busy = true;
3303 queue_work(adap->workq, &adap->tid_release_task);
3305 spin_unlock_bh(&adap->tid_release_lock);
3309 * Process the list of pending TID release requests.
3311 static void process_tid_release_list(struct work_struct *work)
3313 struct sk_buff *skb;
3314 struct adapter *adap;
3316 adap = container_of(work, struct adapter, tid_release_task);
3318 spin_lock_bh(&adap->tid_release_lock);
3319 while (adap->tid_release_head) {
3320 void **p = adap->tid_release_head;
3321 unsigned int chan = (uintptr_t)p & 3;
3322 p = (void *)p - chan;
3324 adap->tid_release_head = *p;
3326 spin_unlock_bh(&adap->tid_release_lock);
3328 while (!(skb = alloc_skb(sizeof(struct cpl_tid_release),
3330 schedule_timeout_uninterruptible(1);
3332 mk_tid_release(skb, chan, p - adap->tids.tid_tab);
3333 t4_ofld_send(adap, skb);
3334 spin_lock_bh(&adap->tid_release_lock);
3336 adap->tid_release_task_busy = false;
3337 spin_unlock_bh(&adap->tid_release_lock);
3341 * Release a TID and inform HW. If we are unable to allocate the release
3342 * message we defer to a work queue.
3344 void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid)
3347 struct sk_buff *skb;
3348 struct adapter *adap = container_of(t, struct adapter, tids);
3350 old = t->tid_tab[tid];
3351 skb = alloc_skb(sizeof(struct cpl_tid_release), GFP_ATOMIC);
3353 t->tid_tab[tid] = NULL;
3354 mk_tid_release(skb, chan, tid);
3355 t4_ofld_send(adap, skb);
3357 cxgb4_queue_tid_release(t, chan, tid);
3359 atomic_dec(&t->tids_in_use);
3361 EXPORT_SYMBOL(cxgb4_remove_tid);
3364 * Allocate and initialize the TID tables. Returns 0 on success.
3366 static int tid_init(struct tid_info *t)
3369 unsigned int stid_bmap_size;
3370 unsigned int natids = t->natids;
3371 struct adapter *adap = container_of(t, struct adapter, tids);
3373 stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
3374 size = t->ntids * sizeof(*t->tid_tab) +
3375 natids * sizeof(*t->atid_tab) +
3376 t->nstids * sizeof(*t->stid_tab) +
3377 t->nsftids * sizeof(*t->stid_tab) +
3378 stid_bmap_size * sizeof(long) +
3379 t->nftids * sizeof(*t->ftid_tab) +
3380 t->nsftids * sizeof(*t->ftid_tab);
3382 t->tid_tab = t4_alloc_mem(size);
3386 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
3387 t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
3388 t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
3389 t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
3390 spin_lock_init(&t->stid_lock);
3391 spin_lock_init(&t->atid_lock);
3393 t->stids_in_use = 0;
3395 t->atids_in_use = 0;
3396 atomic_set(&t->tids_in_use, 0);
3398 /* Setup the free list for atid_tab and clear the stid bitmap. */
3401 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
3402 t->afree = t->atid_tab;
3404 bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
3405 /* Reserve stid 0 for T4/T5 adapters */
3406 if (!t->stid_base &&
3407 (is_t4(adap->params.chip) || is_t5(adap->params.chip)))
3408 __set_bit(0, t->stid_bmap);
3413 int cxgb4_clip_get(const struct net_device *dev,
3414 const struct in6_addr *lip)
3416 struct adapter *adap;
3417 struct fw_clip_cmd c;
3419 adap = netdev2adap(dev);
3420 memset(&c, 0, sizeof(c));
3421 c.op_to_write = htonl(FW_CMD_OP_V(FW_CLIP_CMD) |
3422 FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
3423 c.alloc_to_len16 = htonl(FW_CLIP_CMD_ALLOC_F | FW_LEN16(c));
3424 c.ip_hi = *(__be64 *)(lip->s6_addr);
3425 c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
3426 return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
3428 EXPORT_SYMBOL(cxgb4_clip_get);
3430 int cxgb4_clip_release(const struct net_device *dev,
3431 const struct in6_addr *lip)
3433 struct adapter *adap;
3434 struct fw_clip_cmd c;
3436 adap = netdev2adap(dev);
3437 memset(&c, 0, sizeof(c));
3438 c.op_to_write = htonl(FW_CMD_OP_V(FW_CLIP_CMD) |
3439 FW_CMD_REQUEST_F | FW_CMD_READ_F);
3440 c.alloc_to_len16 = htonl(FW_CLIP_CMD_FREE_F | FW_LEN16(c));
3441 c.ip_hi = *(__be64 *)(lip->s6_addr);
3442 c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
3443 return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
3445 EXPORT_SYMBOL(cxgb4_clip_release);
3448 * cxgb4_create_server - create an IP server
3450 * @stid: the server TID
3451 * @sip: local IP address to bind server to
3452 * @sport: the server's TCP port
3453 * @queue: queue to direct messages from this server to
3455 * Create an IP server for the given port and address.
3456 * Returns <0 on error and one of the %NET_XMIT_* values on success.
3458 int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
3459 __be32 sip, __be16 sport, __be16 vlan,
3463 struct sk_buff *skb;
3464 struct adapter *adap;
3465 struct cpl_pass_open_req *req;
3468 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
3472 adap = netdev2adap(dev);
3473 req = (struct cpl_pass_open_req *)__skb_put(skb, sizeof(*req));
3475 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
3476 req->local_port = sport;
3477 req->peer_port = htons(0);
3478 req->local_ip = sip;
3479 req->peer_ip = htonl(0);
3480 chan = rxq_to_chan(&adap->sge, queue);
3481 req->opt0 = cpu_to_be64(TX_CHAN_V(chan));
3482 req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
3483 SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
3484 ret = t4_mgmt_tx(adap, skb);
3485 return net_xmit_eval(ret);
3487 EXPORT_SYMBOL(cxgb4_create_server);
3489 /* cxgb4_create_server6 - create an IPv6 server
3491 * @stid: the server TID
3492 * @sip: local IPv6 address to bind server to
3493 * @sport: the server's TCP port
3494 * @queue: queue to direct messages from this server to
3496 * Create an IPv6 server for the given port and address.
3497 * Returns <0 on error and one of the %NET_XMIT_* values on success.
3499 int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
3500 const struct in6_addr *sip, __be16 sport,
3504 struct sk_buff *skb;
3505 struct adapter *adap;
3506 struct cpl_pass_open_req6 *req;
3509 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
3513 adap = netdev2adap(dev);
3514 req = (struct cpl_pass_open_req6 *)__skb_put(skb, sizeof(*req));
3516 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, stid));
3517 req->local_port = sport;
3518 req->peer_port = htons(0);
3519 req->local_ip_hi = *(__be64 *)(sip->s6_addr);
3520 req->local_ip_lo = *(__be64 *)(sip->s6_addr + 8);
3521 req->peer_ip_hi = cpu_to_be64(0);
3522 req->peer_ip_lo = cpu_to_be64(0);
3523 chan = rxq_to_chan(&adap->sge, queue);
3524 req->opt0 = cpu_to_be64(TX_CHAN_V(chan));
3525 req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
3526 SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
3527 ret = t4_mgmt_tx(adap, skb);
3528 return net_xmit_eval(ret);
3530 EXPORT_SYMBOL(cxgb4_create_server6);
3532 int cxgb4_remove_server(const struct net_device *dev, unsigned int stid,
3533 unsigned int queue, bool ipv6)
3535 struct sk_buff *skb;
3536 struct adapter *adap;
3537 struct cpl_close_listsvr_req *req;
3540 adap = netdev2adap(dev);
3542 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
3546 req = (struct cpl_close_listsvr_req *)__skb_put(skb, sizeof(*req));
3548 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
3549 req->reply_ctrl = htons(NO_REPLY(0) | (ipv6 ? LISTSVR_IPV6(1) :
3550 LISTSVR_IPV6(0)) | QUEUENO(queue));
3551 ret = t4_mgmt_tx(adap, skb);
3552 return net_xmit_eval(ret);
3554 EXPORT_SYMBOL(cxgb4_remove_server);
3557 * cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
3558 * @mtus: the HW MTU table
3559 * @mtu: the target MTU
3560 * @idx: index of selected entry in the MTU table
3562 * Returns the index and the value in the HW MTU table that is closest to
3563 * but does not exceed @mtu, unless @mtu is smaller than any value in the
3564 * table, in which case that smallest available value is selected.
3566 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
3571 while (i < NMTUS - 1 && mtus[i + 1] <= mtu)
3577 EXPORT_SYMBOL(cxgb4_best_mtu);
3580 * cxgb4_best_aligned_mtu - find best MTU, [hopefully] data size aligned
3581 * @mtus: the HW MTU table
3582 * @header_size: Header Size
3583 * @data_size_max: maximum Data Segment Size
3584 * @data_size_align: desired Data Segment Size Alignment (2^N)
3585 * @mtu_idxp: HW MTU Table Index return value pointer (possibly NULL)
3587 * Similar to cxgb4_best_mtu() but instead of searching the Hardware
3588 * MTU Table based solely on a Maximum MTU parameter, we break that
3589 * parameter up into a Header Size and Maximum Data Segment Size, and
3590 * provide a desired Data Segment Size Alignment. If we find an MTU in
3591 * the Hardware MTU Table which will result in a Data Segment Size with
3592 * the requested alignment _and_ that MTU isn't "too far" from the
3593 * closest MTU, then we'll return that rather than the closest MTU.
3595 unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
3596 unsigned short header_size,
3597 unsigned short data_size_max,
3598 unsigned short data_size_align,
3599 unsigned int *mtu_idxp)
3601 unsigned short max_mtu = header_size + data_size_max;
3602 unsigned short data_size_align_mask = data_size_align - 1;
3603 int mtu_idx, aligned_mtu_idx;
3605 /* Scan the MTU Table till we find an MTU which is larger than our
3606 * Maximum MTU or we reach the end of the table. Along the way,
3607 * record the last MTU found, if any, which will result in a Data
3608 * Segment Length matching the requested alignment.
3610 for (mtu_idx = 0, aligned_mtu_idx = -1; mtu_idx < NMTUS; mtu_idx++) {
3611 unsigned short data_size = mtus[mtu_idx] - header_size;
3613 /* If this MTU minus the Header Size would result in a
3614 * Data Segment Size of the desired alignment, remember it.
3616 if ((data_size & data_size_align_mask) == 0)
3617 aligned_mtu_idx = mtu_idx;
3619 /* If we're not at the end of the Hardware MTU Table and the
3620 * next element is larger than our Maximum MTU, drop out of
3623 if (mtu_idx+1 < NMTUS && mtus[mtu_idx+1] > max_mtu)
3627 /* If we fell out of the loop because we ran to the end of the table,
3628 * then we just have to use the last [largest] entry.
3630 if (mtu_idx == NMTUS)
3633 /* If we found an MTU which resulted in the requested Data Segment
3634 * Length alignment and that's "not far" from the largest MTU which is
3635 * less than or equal to the maximum MTU, then use that.
3637 if (aligned_mtu_idx >= 0 &&
3638 mtu_idx - aligned_mtu_idx <= 1)
3639 mtu_idx = aligned_mtu_idx;
3641 /* If the caller has passed in an MTU Index pointer, pass the
3642 * MTU Index back. Return the MTU value.
3645 *mtu_idxp = mtu_idx;
3646 return mtus[mtu_idx];
3648 EXPORT_SYMBOL(cxgb4_best_aligned_mtu);
3651 * cxgb4_port_chan - get the HW channel of a port
3652 * @dev: the net device for the port
3654 * Return the HW Tx channel of the given port.
3656 unsigned int cxgb4_port_chan(const struct net_device *dev)
3658 return netdev2pinfo(dev)->tx_chan;
3660 EXPORT_SYMBOL(cxgb4_port_chan);
3662 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo)
3664 struct adapter *adap = netdev2adap(dev);
3665 u32 v1, v2, lp_count, hp_count;
3667 v1 = t4_read_reg(adap, A_SGE_DBFIFO_STATUS);
3668 v2 = t4_read_reg(adap, SGE_DBFIFO_STATUS2);
3669 if (is_t4(adap->params.chip)) {
3670 lp_count = G_LP_COUNT(v1);
3671 hp_count = G_HP_COUNT(v1);
3673 lp_count = G_LP_COUNT_T5(v1);
3674 hp_count = G_HP_COUNT_T5(v2);
3676 return lpfifo ? lp_count : hp_count;
3678 EXPORT_SYMBOL(cxgb4_dbfifo_count);
3681 * cxgb4_port_viid - get the VI id of a port
3682 * @dev: the net device for the port
3684 * Return the VI id of the given port.
3686 unsigned int cxgb4_port_viid(const struct net_device *dev)
3688 return netdev2pinfo(dev)->viid;
3690 EXPORT_SYMBOL(cxgb4_port_viid);
3693 * cxgb4_port_idx - get the index of a port
3694 * @dev: the net device for the port
3696 * Return the index of the given port.
3698 unsigned int cxgb4_port_idx(const struct net_device *dev)
3700 return netdev2pinfo(dev)->port_id;
3702 EXPORT_SYMBOL(cxgb4_port_idx);
3704 void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
3705 struct tp_tcp_stats *v6)
3707 struct adapter *adap = pci_get_drvdata(pdev);
3709 spin_lock(&adap->stats_lock);
3710 t4_tp_get_tcp_stats(adap, v4, v6);
3711 spin_unlock(&adap->stats_lock);
3713 EXPORT_SYMBOL(cxgb4_get_tcp_stats);
3715 void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
3716 const unsigned int *pgsz_order)
3718 struct adapter *adap = netdev2adap(dev);
3720 t4_write_reg(adap, ULP_RX_ISCSI_TAGMASK, tag_mask);
3721 t4_write_reg(adap, ULP_RX_ISCSI_PSZ, HPZ0(pgsz_order[0]) |
3722 HPZ1(pgsz_order[1]) | HPZ2(pgsz_order[2]) |
3723 HPZ3(pgsz_order[3]));
3725 EXPORT_SYMBOL(cxgb4_iscsi_init);
3727 int cxgb4_flush_eq_cache(struct net_device *dev)
3729 struct adapter *adap = netdev2adap(dev);
3732 ret = t4_fwaddrspace_write(adap, adap->mbox,
3733 0xe1000000 + A_SGE_CTXT_CMD, 0x20000000);
3736 EXPORT_SYMBOL(cxgb4_flush_eq_cache);
3738 static int read_eq_indices(struct adapter *adap, u16 qid, u16 *pidx, u16 *cidx)
3740 u32 addr = t4_read_reg(adap, A_SGE_DBQ_CTXT_BADDR) + 24 * qid + 8;
3744 spin_lock(&adap->win0_lock);
3745 ret = t4_memory_rw(adap, 0, MEM_EDC0, addr,
3746 sizeof(indices), (__be32 *)&indices,
3748 spin_unlock(&adap->win0_lock);
3750 *cidx = (be64_to_cpu(indices) >> 25) & 0xffff;
3751 *pidx = (be64_to_cpu(indices) >> 9) & 0xffff;
3756 int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx,
3759 struct adapter *adap = netdev2adap(dev);
3760 u16 hw_pidx, hw_cidx;
3763 ret = read_eq_indices(adap, qid, &hw_pidx, &hw_cidx);
3767 if (pidx != hw_pidx) {
3770 if (pidx >= hw_pidx)
3771 delta = pidx - hw_pidx;
3773 delta = size - hw_pidx + pidx;
3775 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
3776 QID(qid) | PIDX(delta));
3781 EXPORT_SYMBOL(cxgb4_sync_txq_pidx);
3783 void cxgb4_disable_db_coalescing(struct net_device *dev)
3785 struct adapter *adap;
3787 adap = netdev2adap(dev);
3788 t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_NOCOALESCE,
3791 EXPORT_SYMBOL(cxgb4_disable_db_coalescing);
3793 void cxgb4_enable_db_coalescing(struct net_device *dev)
3795 struct adapter *adap;
3797 adap = netdev2adap(dev);
3798 t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_NOCOALESCE, 0);
3800 EXPORT_SYMBOL(cxgb4_enable_db_coalescing);
3802 int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
3804 struct adapter *adap;
3805 u32 offset, memtype, memaddr;
3806 u32 edc0_size, edc1_size, mc0_size, mc1_size, size;
3807 u32 edc0_end, edc1_end, mc0_end, mc1_end;
3810 adap = netdev2adap(dev);
3812 offset = ((stag >> 8) * 32) + adap->vres.stag.start;
3814 /* Figure out where the offset lands in the Memory Type/Address scheme.
3815 * This code assumes that the memory is laid out starting at offset 0
3816 * with no breaks as: EDC0, EDC1, MC0, MC1. All cards have both EDC0
3817 * and EDC1. Some cards will have neither MC0 nor MC1, most cards have
3818 * MC0, and some have both MC0 and MC1.
3820 size = t4_read_reg(adap, MA_EDRAM0_BAR_A);
3821 edc0_size = EDRAM0_SIZE_G(size) << 20;
3822 size = t4_read_reg(adap, MA_EDRAM1_BAR_A);
3823 edc1_size = EDRAM1_SIZE_G(size) << 20;
3824 size = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
3825 mc0_size = EXT_MEM0_SIZE_G(size) << 20;
3827 edc0_end = edc0_size;
3828 edc1_end = edc0_end + edc1_size;
3829 mc0_end = edc1_end + mc0_size;
3831 if (offset < edc0_end) {
3834 } else if (offset < edc1_end) {
3836 memaddr = offset - edc0_end;
3838 if (offset < mc0_end) {
3840 memaddr = offset - edc1_end;
3841 } else if (is_t4(adap->params.chip)) {
3842 /* T4 only has a single memory channel */
3845 size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
3846 mc1_size = EXT_MEM1_SIZE_G(size) << 20;
3847 mc1_end = mc0_end + mc1_size;
3848 if (offset < mc1_end) {
3850 memaddr = offset - mc0_end;
3852 /* offset beyond the end of any memory */
3858 spin_lock(&adap->win0_lock);
3859 ret = t4_memory_rw(adap, 0, memtype, memaddr, 32, tpte, T4_MEMORY_READ);
3860 spin_unlock(&adap->win0_lock);
3864 dev_err(adap->pdev_dev, "stag %#x, offset %#x out of range\n",
3868 EXPORT_SYMBOL(cxgb4_read_tpte);
3870 u64 cxgb4_read_sge_timestamp(struct net_device *dev)
3873 struct adapter *adap;
3875 adap = netdev2adap(dev);
3876 lo = t4_read_reg(adap, SGE_TIMESTAMP_LO);
3877 hi = GET_TSVAL(t4_read_reg(adap, SGE_TIMESTAMP_HI));
3879 return ((u64)hi << 32) | (u64)lo;
3881 EXPORT_SYMBOL(cxgb4_read_sge_timestamp);
3883 static struct pci_driver cxgb4_driver;
3885 static void check_neigh_update(struct neighbour *neigh)
3887 const struct device *parent;
3888 const struct net_device *netdev = neigh->dev;
3890 if (netdev->priv_flags & IFF_802_1Q_VLAN)
3891 netdev = vlan_dev_real_dev(netdev);
3892 parent = netdev->dev.parent;
3893 if (parent && parent->driver == &cxgb4_driver.driver)
3894 t4_l2t_update(dev_get_drvdata(parent), neigh);
3897 static int netevent_cb(struct notifier_block *nb, unsigned long event,
3901 case NETEVENT_NEIGH_UPDATE:
3902 check_neigh_update(data);
3904 case NETEVENT_REDIRECT:
3911 static bool netevent_registered;
3912 static struct notifier_block cxgb4_netevent_nb = {
3913 .notifier_call = netevent_cb
3916 static void drain_db_fifo(struct adapter *adap, int usecs)
3918 u32 v1, v2, lp_count, hp_count;
3921 v1 = t4_read_reg(adap, A_SGE_DBFIFO_STATUS);
3922 v2 = t4_read_reg(adap, SGE_DBFIFO_STATUS2);
3923 if (is_t4(adap->params.chip)) {
3924 lp_count = G_LP_COUNT(v1);
3925 hp_count = G_HP_COUNT(v1);
3927 lp_count = G_LP_COUNT_T5(v1);
3928 hp_count = G_HP_COUNT_T5(v2);
3931 if (lp_count == 0 && hp_count == 0)
3933 set_current_state(TASK_UNINTERRUPTIBLE);
3934 schedule_timeout(usecs_to_jiffies(usecs));
3938 static void disable_txq_db(struct sge_txq *q)
3940 unsigned long flags;
3942 spin_lock_irqsave(&q->db_lock, flags);
3944 spin_unlock_irqrestore(&q->db_lock, flags);
3947 static void enable_txq_db(struct adapter *adap, struct sge_txq *q)
3949 spin_lock_irq(&q->db_lock);
3950 if (q->db_pidx_inc) {
3951 /* Make sure that all writes to the TX descriptors
3952 * are committed before we tell HW about them.
3955 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
3956 QID(q->cntxt_id) | PIDX(q->db_pidx_inc));
3960 spin_unlock_irq(&q->db_lock);
3963 static void disable_dbs(struct adapter *adap)
3967 for_each_ethrxq(&adap->sge, i)
3968 disable_txq_db(&adap->sge.ethtxq[i].q);
3969 for_each_ofldrxq(&adap->sge, i)
3970 disable_txq_db(&adap->sge.ofldtxq[i].q);
3971 for_each_port(adap, i)
3972 disable_txq_db(&adap->sge.ctrlq[i].q);
3975 static void enable_dbs(struct adapter *adap)
3979 for_each_ethrxq(&adap->sge, i)
3980 enable_txq_db(adap, &adap->sge.ethtxq[i].q);
3981 for_each_ofldrxq(&adap->sge, i)
3982 enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
3983 for_each_port(adap, i)
3984 enable_txq_db(adap, &adap->sge.ctrlq[i].q);
3987 static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
3989 if (adap->uld_handle[CXGB4_ULD_RDMA])
3990 ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
3994 static void process_db_full(struct work_struct *work)
3996 struct adapter *adap;
3998 adap = container_of(work, struct adapter, db_full_task);
4000 drain_db_fifo(adap, dbfifo_drain_delay);
4002 notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
4003 t4_set_reg_field(adap, SGE_INT_ENABLE3,
4004 DBFIFO_HP_INT | DBFIFO_LP_INT,
4005 DBFIFO_HP_INT | DBFIFO_LP_INT);
4008 static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
4010 u16 hw_pidx, hw_cidx;
4013 spin_lock_irq(&q->db_lock);
4014 ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx);
4017 if (q->db_pidx != hw_pidx) {
4020 if (q->db_pidx >= hw_pidx)
4021 delta = q->db_pidx - hw_pidx;
4023 delta = q->size - hw_pidx + q->db_pidx;
4025 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
4026 QID(q->cntxt_id) | PIDX(delta));
4031 spin_unlock_irq(&q->db_lock);
4033 CH_WARN(adap, "DB drop recovery failed.\n");
4035 static void recover_all_queues(struct adapter *adap)
4039 for_each_ethrxq(&adap->sge, i)
4040 sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
4041 for_each_ofldrxq(&adap->sge, i)
4042 sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
4043 for_each_port(adap, i)
4044 sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
4047 static void process_db_drop(struct work_struct *work)
4049 struct adapter *adap;
4051 adap = container_of(work, struct adapter, db_drop_task);
4053 if (is_t4(adap->params.chip)) {
4054 drain_db_fifo(adap, dbfifo_drain_delay);
4055 notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP);
4056 drain_db_fifo(adap, dbfifo_drain_delay);
4057 recover_all_queues(adap);
4058 drain_db_fifo(adap, dbfifo_drain_delay);
4060 notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
4062 u32 dropped_db = t4_read_reg(adap, 0x010ac);
4063 u16 qid = (dropped_db >> 15) & 0x1ffff;
4064 u16 pidx_inc = dropped_db & 0x1fff;
4066 unsigned short udb_density;
4067 unsigned long qpshift;
4071 dev_warn(adap->pdev_dev,
4072 "Dropped DB 0x%x qid %d bar2 %d coalesce %d pidx %d\n",
4074 (dropped_db >> 14) & 1,
4075 (dropped_db >> 13) & 1,
4078 drain_db_fifo(adap, 1);
4080 s_qpp = QUEUESPERPAGEPF1 * adap->fn;
4081 udb_density = 1 << QUEUESPERPAGEPF0_GET(t4_read_reg(adap,
4082 SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp);
4083 qpshift = PAGE_SHIFT - ilog2(udb_density);
4084 udb = qid << qpshift;
4086 page = udb / PAGE_SIZE;
4087 udb += (qid - (page * udb_density)) * 128;
4089 writel(PIDX(pidx_inc), adap->bar2 + udb + 8);
4091 /* Re-enable BAR2 WC */
4092 t4_set_reg_field(adap, 0x10b0, 1<<15, 1<<15);
4095 t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_DROPPED_DB, 0);
4098 void t4_db_full(struct adapter *adap)
4100 if (is_t4(adap->params.chip)) {
4102 notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
4103 t4_set_reg_field(adap, SGE_INT_ENABLE3,
4104 DBFIFO_HP_INT | DBFIFO_LP_INT, 0);
4105 queue_work(adap->workq, &adap->db_full_task);
4109 void t4_db_dropped(struct adapter *adap)
4111 if (is_t4(adap->params.chip)) {
4113 notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
4115 queue_work(adap->workq, &adap->db_drop_task);
4118 static void uld_attach(struct adapter *adap, unsigned int uld)
4121 struct cxgb4_lld_info lli;
4124 lli.pdev = adap->pdev;
4126 lli.l2t = adap->l2t;
4127 lli.tids = &adap->tids;
4128 lli.ports = adap->port;
4129 lli.vr = &adap->vres;
4130 lli.mtus = adap->params.mtus;
4131 if (uld == CXGB4_ULD_RDMA) {
4132 lli.rxq_ids = adap->sge.rdma_rxq;
4133 lli.ciq_ids = adap->sge.rdma_ciq;
4134 lli.nrxq = adap->sge.rdmaqs;
4135 lli.nciq = adap->sge.rdmaciqs;
4136 } else if (uld == CXGB4_ULD_ISCSI) {
4137 lli.rxq_ids = adap->sge.ofld_rxq;
4138 lli.nrxq = adap->sge.ofldqsets;
4140 lli.ntxq = adap->sge.ofldqsets;
4141 lli.nchan = adap->params.nports;
4142 lli.nports = adap->params.nports;
4143 lli.wr_cred = adap->params.ofldq_wr_cred;
4144 lli.adapter_type = adap->params.chip;
4145 lli.iscsi_iolen = MAXRXDATA_GET(t4_read_reg(adap, TP_PARA_REG2));
4146 lli.cclk_ps = 1000000000 / adap->params.vpd.cclk;
4147 lli.udb_density = 1 << QUEUESPERPAGEPF0_GET(
4148 t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF) >>
4150 lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET(
4151 t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >>
4153 lli.filt_mode = adap->params.tp.vlan_pri_map;
4154 /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
4155 for (i = 0; i < NCHAN; i++)
4157 lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS);
4158 lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL);
4159 lli.fw_vers = adap->params.fw_vers;
4160 lli.dbfifo_int_thresh = dbfifo_int_thresh;
4161 lli.sge_ingpadboundary = adap->sge.fl_align;
4162 lli.sge_egrstatuspagesize = adap->sge.stat_len;
4163 lli.sge_pktshift = adap->sge.pktshift;
4164 lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
4165 lli.max_ordird_qp = adap->params.max_ordird_qp;
4166 lli.max_ird_adapter = adap->params.max_ird_adapter;
4167 lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
4169 handle = ulds[uld].add(&lli);
4170 if (IS_ERR(handle)) {
4171 dev_warn(adap->pdev_dev,
4172 "could not attach to the %s driver, error %ld\n",
4173 uld_str[uld], PTR_ERR(handle));
4177 adap->uld_handle[uld] = handle;
4179 if (!netevent_registered) {
4180 register_netevent_notifier(&cxgb4_netevent_nb);
4181 netevent_registered = true;
4184 if (adap->flags & FULL_INIT_DONE)
4185 ulds[uld].state_change(handle, CXGB4_STATE_UP);
4188 static void attach_ulds(struct adapter *adap)
4192 spin_lock(&adap_rcu_lock);
4193 list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list);
4194 spin_unlock(&adap_rcu_lock);
4196 mutex_lock(&uld_mutex);
4197 list_add_tail(&adap->list_node, &adapter_list);
4198 for (i = 0; i < CXGB4_ULD_MAX; i++)
4200 uld_attach(adap, i);
4201 mutex_unlock(&uld_mutex);
4204 static void detach_ulds(struct adapter *adap)
4208 mutex_lock(&uld_mutex);
4209 list_del(&adap->list_node);
4210 for (i = 0; i < CXGB4_ULD_MAX; i++)
4211 if (adap->uld_handle[i]) {
4212 ulds[i].state_change(adap->uld_handle[i],
4213 CXGB4_STATE_DETACH);
4214 adap->uld_handle[i] = NULL;
4216 if (netevent_registered && list_empty(&adapter_list)) {
4217 unregister_netevent_notifier(&cxgb4_netevent_nb);
4218 netevent_registered = false;
4220 mutex_unlock(&uld_mutex);
4222 spin_lock(&adap_rcu_lock);
4223 list_del_rcu(&adap->rcu_node);
4224 spin_unlock(&adap_rcu_lock);
4227 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
4231 mutex_lock(&uld_mutex);
4232 for (i = 0; i < CXGB4_ULD_MAX; i++)
4233 if (adap->uld_handle[i])
4234 ulds[i].state_change(adap->uld_handle[i], new_state);
4235 mutex_unlock(&uld_mutex);
4239 * cxgb4_register_uld - register an upper-layer driver
4240 * @type: the ULD type
4241 * @p: the ULD methods
4243 * Registers an upper-layer driver with this driver and notifies the ULD
4244 * about any presently available devices that support its type. Returns
4245 * %-EBUSY if a ULD of the same type is already registered.
4247 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
4250 struct adapter *adap;
4252 if (type >= CXGB4_ULD_MAX)
4254 mutex_lock(&uld_mutex);
4255 if (ulds[type].add) {
4260 list_for_each_entry(adap, &adapter_list, list_node)
4261 uld_attach(adap, type);
4262 out: mutex_unlock(&uld_mutex);
4265 EXPORT_SYMBOL(cxgb4_register_uld);
4268 * cxgb4_unregister_uld - unregister an upper-layer driver
4269 * @type: the ULD type
4271 * Unregisters an existing upper-layer driver.
4273 int cxgb4_unregister_uld(enum cxgb4_uld type)
4275 struct adapter *adap;
4277 if (type >= CXGB4_ULD_MAX)
4279 mutex_lock(&uld_mutex);
4280 list_for_each_entry(adap, &adapter_list, list_node)
4281 adap->uld_handle[type] = NULL;
4282 ulds[type].add = NULL;
4283 mutex_unlock(&uld_mutex);
4286 EXPORT_SYMBOL(cxgb4_unregister_uld);
4288 /* Check if netdev on which event is occured belongs to us or not. Return
4289 * success (true) if it belongs otherwise failure (false).
4290 * Called with rcu_read_lock() held.
4292 #if IS_ENABLED(CONFIG_IPV6)
4293 static bool cxgb4_netdev(const struct net_device *netdev)
4295 struct adapter *adap;
4298 list_for_each_entry_rcu(adap, &adap_rcu_list, rcu_node)
4299 for (i = 0; i < MAX_NPORTS; i++)
4300 if (adap->port[i] == netdev)
4305 static int clip_add(struct net_device *event_dev, struct inet6_ifaddr *ifa,
4306 unsigned long event)
4308 int ret = NOTIFY_DONE;
4311 if (cxgb4_netdev(event_dev)) {
4314 ret = cxgb4_clip_get(event_dev, &ifa->addr);
4322 cxgb4_clip_release(event_dev, &ifa->addr);
4333 static int cxgb4_inet6addr_handler(struct notifier_block *this,
4334 unsigned long event, void *data)
4336 struct inet6_ifaddr *ifa = data;
4337 struct net_device *event_dev;
4338 int ret = NOTIFY_DONE;
4339 struct bonding *bond = netdev_priv(ifa->idev->dev);
4340 struct list_head *iter;
4341 struct slave *slave;
4342 struct pci_dev *first_pdev = NULL;
4344 if (ifa->idev->dev->priv_flags & IFF_802_1Q_VLAN) {
4345 event_dev = vlan_dev_real_dev(ifa->idev->dev);
4346 ret = clip_add(event_dev, ifa, event);
4347 } else if (ifa->idev->dev->flags & IFF_MASTER) {
4348 /* It is possible that two different adapters are bonded in one
4349 * bond. We need to find such different adapters and add clip
4350 * in all of them only once.
4352 bond_for_each_slave(bond, slave, iter) {
4354 ret = clip_add(slave->dev, ifa, event);
4355 /* If clip_add is success then only initialize
4356 * first_pdev since it means it is our device
4358 if (ret == NOTIFY_OK)
4359 first_pdev = to_pci_dev(
4360 slave->dev->dev.parent);
4361 } else if (first_pdev !=
4362 to_pci_dev(slave->dev->dev.parent))
4363 ret = clip_add(slave->dev, ifa, event);
4366 ret = clip_add(ifa->idev->dev, ifa, event);
4371 static struct notifier_block cxgb4_inet6addr_notifier = {
4372 .notifier_call = cxgb4_inet6addr_handler
4375 /* Retrieves IPv6 addresses from a root device (bond, vlan) associated with
4376 * a physical device.
4377 * The physical device reference is needed to send the actul CLIP command.
4379 static int update_dev_clip(struct net_device *root_dev, struct net_device *dev)
4381 struct inet6_dev *idev = NULL;
4382 struct inet6_ifaddr *ifa;
4385 idev = __in6_dev_get(root_dev);
4389 read_lock_bh(&idev->lock);
4390 list_for_each_entry(ifa, &idev->addr_list, if_list) {
4391 ret = cxgb4_clip_get(dev, &ifa->addr);
4395 read_unlock_bh(&idev->lock);
4400 static int update_root_dev_clip(struct net_device *dev)
4402 struct net_device *root_dev = NULL;
4405 /* First populate the real net device's IPv6 addresses */
4406 ret = update_dev_clip(dev, dev);
4410 /* Parse all bond and vlan devices layered on top of the physical dev */
4411 root_dev = netdev_master_upper_dev_get_rcu(dev);
4413 ret = update_dev_clip(root_dev, dev);
4418 for (i = 0; i < VLAN_N_VID; i++) {
4419 root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i);
4423 ret = update_dev_clip(root_dev, dev);
4430 static void update_clip(const struct adapter *adap)
4433 struct net_device *dev;
4438 for (i = 0; i < MAX_NPORTS; i++) {
4439 dev = adap->port[i];
4443 ret = update_root_dev_clip(dev);
4450 #endif /* IS_ENABLED(CONFIG_IPV6) */
4453 * cxgb_up - enable the adapter
4454 * @adap: adapter being enabled
4456 * Called when the first port is enabled, this function performs the
4457 * actions necessary to make an adapter operational, such as completing
4458 * the initialization of HW modules, and enabling interrupts.
4460 * Must be called with the rtnl lock held.
4462 static int cxgb_up(struct adapter *adap)
4466 err = setup_sge_queues(adap);
4469 err = setup_rss(adap);
4473 if (adap->flags & USING_MSIX) {
4474 name_msix_vecs(adap);
4475 err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
4476 adap->msix_info[0].desc, adap);
4480 err = request_msix_queue_irqs(adap);
4482 free_irq(adap->msix_info[0].vec, adap);
4486 err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
4487 (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
4488 adap->port[0]->name, adap);
4494 t4_intr_enable(adap);
4495 adap->flags |= FULL_INIT_DONE;
4496 notify_ulds(adap, CXGB4_STATE_UP);
4497 #if IS_ENABLED(CONFIG_IPV6)
4503 dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
4505 t4_free_sge_resources(adap);
4509 static void cxgb_down(struct adapter *adapter)
4511 t4_intr_disable(adapter);
4512 cancel_work_sync(&adapter->tid_release_task);
4513 cancel_work_sync(&adapter->db_full_task);
4514 cancel_work_sync(&adapter->db_drop_task);
4515 adapter->tid_release_task_busy = false;
4516 adapter->tid_release_head = NULL;
4518 if (adapter->flags & USING_MSIX) {
4519 free_msix_queue_irqs(adapter);
4520 free_irq(adapter->msix_info[0].vec, adapter);
4522 free_irq(adapter->pdev->irq, adapter);
4523 quiesce_rx(adapter);
4524 t4_sge_stop(adapter);
4525 t4_free_sge_resources(adapter);
4526 adapter->flags &= ~FULL_INIT_DONE;
4530 * net_device operations
4532 static int cxgb_open(struct net_device *dev)
4535 struct port_info *pi = netdev_priv(dev);
4536 struct adapter *adapter = pi->adapter;
4538 netif_carrier_off(dev);
4540 if (!(adapter->flags & FULL_INIT_DONE)) {
4541 err = cxgb_up(adapter);
4546 err = link_start(dev);
4548 netif_tx_start_all_queues(dev);
4552 static int cxgb_close(struct net_device *dev)
4554 struct port_info *pi = netdev_priv(dev);
4555 struct adapter *adapter = pi->adapter;
4557 netif_tx_stop_all_queues(dev);
4558 netif_carrier_off(dev);
4559 return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false);
4562 /* Return an error number if the indicated filter isn't writable ...
4564 static int writable_filter(struct filter_entry *f)
4574 /* Delete the filter at the specified index (if valid). The checks for all
4575 * the common problems with doing this like the filter being locked, currently
4576 * pending in another operation, etc.
4578 static int delete_filter(struct adapter *adapter, unsigned int fidx)
4580 struct filter_entry *f;
4583 if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
4586 f = &adapter->tids.ftid_tab[fidx];
4587 ret = writable_filter(f);
4591 return del_filter_wr(adapter, fidx);
4596 int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
4597 __be32 sip, __be16 sport, __be16 vlan,
4598 unsigned int queue, unsigned char port, unsigned char mask)
4601 struct filter_entry *f;
4602 struct adapter *adap;
4606 adap = netdev2adap(dev);
4608 /* Adjust stid to correct filter index */
4609 stid -= adap->tids.sftid_base;
4610 stid += adap->tids.nftids;
4612 /* Check to make sure the filter requested is writable ...
4614 f = &adap->tids.ftid_tab[stid];
4615 ret = writable_filter(f);
4619 /* Clear out any old resources being used by the filter before
4620 * we start constructing the new filter.
4623 clear_filter(adap, f);
4625 /* Clear out filter specifications */
4626 memset(&f->fs, 0, sizeof(struct ch_filter_specification));
4627 f->fs.val.lport = cpu_to_be16(sport);
4628 f->fs.mask.lport = ~0;
4630 if ((val[0] | val[1] | val[2] | val[3]) != 0) {
4631 for (i = 0; i < 4; i++) {
4632 f->fs.val.lip[i] = val[i];
4633 f->fs.mask.lip[i] = ~0;
4635 if (adap->params.tp.vlan_pri_map & F_PORT) {
4636 f->fs.val.iport = port;
4637 f->fs.mask.iport = mask;
4641 if (adap->params.tp.vlan_pri_map & F_PROTOCOL) {
4642 f->fs.val.proto = IPPROTO_TCP;
4643 f->fs.mask.proto = ~0;
4648 /* Mark filter as locked */
4652 ret = set_filter_wr(adap, stid);
4654 clear_filter(adap, f);
4660 EXPORT_SYMBOL(cxgb4_create_server_filter);
4662 int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
4663 unsigned int queue, bool ipv6)
4666 struct filter_entry *f;
4667 struct adapter *adap;
4669 adap = netdev2adap(dev);
4671 /* Adjust stid to correct filter index */
4672 stid -= adap->tids.sftid_base;
4673 stid += adap->tids.nftids;
4675 f = &adap->tids.ftid_tab[stid];
4676 /* Unlock the filter */
4679 ret = delete_filter(adap, stid);
4685 EXPORT_SYMBOL(cxgb4_remove_server_filter);
4687 static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev,
4688 struct rtnl_link_stats64 *ns)
4690 struct port_stats stats;
4691 struct port_info *p = netdev_priv(dev);
4692 struct adapter *adapter = p->adapter;
4694 /* Block retrieving statistics during EEH error
4695 * recovery. Otherwise, the recovery might fail
4696 * and the PCI device will be removed permanently
4698 spin_lock(&adapter->stats_lock);
4699 if (!netif_device_present(dev)) {
4700 spin_unlock(&adapter->stats_lock);
4703 t4_get_port_stats(adapter, p->tx_chan, &stats);
4704 spin_unlock(&adapter->stats_lock);
4706 ns->tx_bytes = stats.tx_octets;
4707 ns->tx_packets = stats.tx_frames;
4708 ns->rx_bytes = stats.rx_octets;
4709 ns->rx_packets = stats.rx_frames;
4710 ns->multicast = stats.rx_mcast_frames;
4712 /* detailed rx_errors */
4713 ns->rx_length_errors = stats.rx_jabber + stats.rx_too_long +
4715 ns->rx_over_errors = 0;
4716 ns->rx_crc_errors = stats.rx_fcs_err;
4717 ns->rx_frame_errors = stats.rx_symbol_err;
4718 ns->rx_fifo_errors = stats.rx_ovflow0 + stats.rx_ovflow1 +
4719 stats.rx_ovflow2 + stats.rx_ovflow3 +
4720 stats.rx_trunc0 + stats.rx_trunc1 +
4721 stats.rx_trunc2 + stats.rx_trunc3;
4722 ns->rx_missed_errors = 0;
4724 /* detailed tx_errors */
4725 ns->tx_aborted_errors = 0;
4726 ns->tx_carrier_errors = 0;
4727 ns->tx_fifo_errors = 0;
4728 ns->tx_heartbeat_errors = 0;
4729 ns->tx_window_errors = 0;
4731 ns->tx_errors = stats.tx_error_frames;
4732 ns->rx_errors = stats.rx_symbol_err + stats.rx_fcs_err +
4733 ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors;
4737 static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
4740 int ret = 0, prtad, devad;
4741 struct port_info *pi = netdev_priv(dev);
4742 struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data;
4746 if (pi->mdio_addr < 0)
4748 data->phy_id = pi->mdio_addr;
4752 if (mdio_phy_id_is_c45(data->phy_id)) {
4753 prtad = mdio_phy_id_prtad(data->phy_id);
4754 devad = mdio_phy_id_devad(data->phy_id);
4755 } else if (data->phy_id < 32) {
4756 prtad = data->phy_id;
4758 data->reg_num &= 0x1f;
4762 mbox = pi->adapter->fn;
4763 if (cmd == SIOCGMIIREG)
4764 ret = t4_mdio_rd(pi->adapter, mbox, prtad, devad,
4765 data->reg_num, &data->val_out);
4767 ret = t4_mdio_wr(pi->adapter, mbox, prtad, devad,
4768 data->reg_num, data->val_in);
4776 static void cxgb_set_rxmode(struct net_device *dev)
4778 /* unfortunately we can't return errors to the stack */
4779 set_rxmode(dev, -1, false);
4782 static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
4785 struct port_info *pi = netdev_priv(dev);
4787 if (new_mtu < 81 || new_mtu > MAX_MTU) /* accommodate SACK */
4789 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, new_mtu, -1,
4796 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
4799 struct sockaddr *addr = p;
4800 struct port_info *pi = netdev_priv(dev);
4802 if (!is_valid_ether_addr(addr->sa_data))
4803 return -EADDRNOTAVAIL;
4805 ret = t4_change_mac(pi->adapter, pi->adapter->fn, pi->viid,
4806 pi->xact_addr_filt, addr->sa_data, true, true);
4810 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
4811 pi->xact_addr_filt = ret;
4815 #ifdef CONFIG_NET_POLL_CONTROLLER
4816 static void cxgb_netpoll(struct net_device *dev)
4818 struct port_info *pi = netdev_priv(dev);
4819 struct adapter *adap = pi->adapter;
4821 if (adap->flags & USING_MSIX) {
4823 struct sge_eth_rxq *rx = &adap->sge.ethrxq[pi->first_qset];
4825 for (i = pi->nqsets; i; i--, rx++)
4826 t4_sge_intr_msix(0, &rx->rspq);
4828 t4_intr_handler(adap)(0, adap);
4832 static const struct net_device_ops cxgb4_netdev_ops = {
4833 .ndo_open = cxgb_open,
4834 .ndo_stop = cxgb_close,
4835 .ndo_start_xmit = t4_eth_xmit,
4836 .ndo_select_queue = cxgb_select_queue,
4837 .ndo_get_stats64 = cxgb_get_stats,
4838 .ndo_set_rx_mode = cxgb_set_rxmode,
4839 .ndo_set_mac_address = cxgb_set_mac_addr,
4840 .ndo_set_features = cxgb_set_features,
4841 .ndo_validate_addr = eth_validate_addr,
4842 .ndo_do_ioctl = cxgb_ioctl,
4843 .ndo_change_mtu = cxgb_change_mtu,
4844 #ifdef CONFIG_NET_POLL_CONTROLLER
4845 .ndo_poll_controller = cxgb_netpoll,
4849 void t4_fatal_err(struct adapter *adap)
4851 t4_set_reg_field(adap, SGE_CONTROL, GLOBALENABLE, 0);
4852 t4_intr_disable(adap);
4853 dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
4856 /* Return the specified PCI-E Configuration Space register from our Physical
4857 * Function. We try first via a Firmware LDST Command since we prefer to let
4858 * the firmware own all of these registers, but if that fails we go for it
4859 * directly ourselves.
4861 static u32 t4_read_pcie_cfg4(struct adapter *adap, int reg)
4863 struct fw_ldst_cmd ldst_cmd;
4867 /* Construct and send the Firmware LDST Command to retrieve the
4868 * specified PCI-E Configuration Space register.
4870 memset(&ldst_cmd, 0, sizeof(ldst_cmd));
4871 ldst_cmd.op_to_addrspace =
4872 htonl(FW_CMD_OP_V(FW_LDST_CMD) |
4875 FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_FUNC_PCIE));
4876 ldst_cmd.cycles_to_len16 = htonl(FW_LEN16(ldst_cmd));
4877 ldst_cmd.u.pcie.select_naccess = FW_LDST_CMD_NACCESS_V(1);
4878 ldst_cmd.u.pcie.ctrl_to_fn =
4879 (FW_LDST_CMD_LC_F | FW_LDST_CMD_FN_V(adap->fn));
4880 ldst_cmd.u.pcie.r = reg;
4881 ret = t4_wr_mbox(adap, adap->mbox, &ldst_cmd, sizeof(ldst_cmd),
4884 /* If the LDST Command suucceeded, exctract the returned register
4885 * value. Otherwise read it directly ourself.
4888 val = ntohl(ldst_cmd.u.pcie.data[0]);
4890 t4_hw_pci_read_cfg4(adap, reg, &val);
4895 static void setup_memwin(struct adapter *adap)
4897 u32 mem_win0_base, mem_win1_base, mem_win2_base, mem_win2_aperture;
4899 if (is_t4(adap->params.chip)) {
4902 /* Truncation intentional: we only read the bottom 32-bits of
4903 * the 64-bit BAR0/BAR1 ... We use the hardware backdoor
4904 * mechanism to read BAR0 instead of using
4905 * pci_resource_start() because we could be operating from
4906 * within a Virtual Machine which is trapping our accesses to
4907 * our Configuration Space and we need to set up the PCI-E
4908 * Memory Window decoders with the actual addresses which will
4909 * be coming across the PCI-E link.
4911 bar0 = t4_read_pcie_cfg4(adap, PCI_BASE_ADDRESS_0);
4912 bar0 &= PCI_BASE_ADDRESS_MEM_MASK;
4913 adap->t4_bar0 = bar0;
4915 mem_win0_base = bar0 + MEMWIN0_BASE;
4916 mem_win1_base = bar0 + MEMWIN1_BASE;
4917 mem_win2_base = bar0 + MEMWIN2_BASE;
4918 mem_win2_aperture = MEMWIN2_APERTURE;
4920 /* For T5, only relative offset inside the PCIe BAR is passed */
4921 mem_win0_base = MEMWIN0_BASE;
4922 mem_win1_base = MEMWIN1_BASE;
4923 mem_win2_base = MEMWIN2_BASE_T5;
4924 mem_win2_aperture = MEMWIN2_APERTURE_T5;
4926 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 0),
4927 mem_win0_base | BIR(0) |
4928 WINDOW(ilog2(MEMWIN0_APERTURE) - 10));
4929 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 1),
4930 mem_win1_base | BIR(0) |
4931 WINDOW(ilog2(MEMWIN1_APERTURE) - 10));
4932 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2),
4933 mem_win2_base | BIR(0) |
4934 WINDOW(ilog2(mem_win2_aperture) - 10));
4935 t4_read_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2));
4938 static void setup_memwin_rdma(struct adapter *adap)
4940 if (adap->vres.ocq.size) {
4944 start = t4_read_pcie_cfg4(adap, PCI_BASE_ADDRESS_2);
4945 start &= PCI_BASE_ADDRESS_MEM_MASK;
4946 start += OCQ_WIN_OFFSET(adap->pdev, &adap->vres);
4947 sz_kb = roundup_pow_of_two(adap->vres.ocq.size) >> 10;
4949 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 3),
4950 start | BIR(1) | WINDOW(ilog2(sz_kb)));
4952 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3),
4953 adap->vres.ocq.start);
4955 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3));
4959 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
4964 /* get device capabilities */
4965 memset(c, 0, sizeof(*c));
4966 c->op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4967 FW_CMD_REQUEST_F | FW_CMD_READ_F);
4968 c->cfvalid_to_len16 = htonl(FW_LEN16(*c));
4969 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
4973 /* select capabilities we'll be using */
4974 if (c->niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
4976 c->niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
4978 c->niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
4979 } else if (vf_acls) {
4980 dev_err(adap->pdev_dev, "virtualization ACLs not supported");
4983 c->op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4984 FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
4985 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), NULL);
4989 ret = t4_config_glbl_rss(adap, adap->fn,
4990 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
4991 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN_F |
4992 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP_F);
4996 ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, MAX_EGRQ, 64, MAX_INGQ,
4997 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF, FW_CMD_CAP_PF);
5003 /* tweak some settings */
5004 t4_write_reg(adap, TP_SHIFT_CNT, 0x64f8849);
5005 t4_write_reg(adap, ULP_RX_TDDP_PSZ, HPZ0(PAGE_SHIFT - 12));
5006 t4_write_reg(adap, TP_PIO_ADDR, TP_INGRESS_CONFIG);
5007 v = t4_read_reg(adap, TP_PIO_DATA);
5008 t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR);
5010 /* first 4 Tx modulation queues point to consecutive Tx channels */
5011 adap->params.tp.tx_modq_map = 0xE4;
5012 t4_write_reg(adap, A_TP_TX_MOD_QUEUE_REQ_MAP,
5013 V_TX_MOD_QUEUE_REQ_MAP(adap->params.tp.tx_modq_map));
5015 /* associate each Tx modulation queue with consecutive Tx channels */
5017 t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA,
5018 &v, 1, A_TP_TX_SCHED_HDR);
5019 t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA,
5020 &v, 1, A_TP_TX_SCHED_FIFO);
5021 t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA,
5022 &v, 1, A_TP_TX_SCHED_PCMD);
5024 #define T4_TX_MODQ_10G_WEIGHT_DEFAULT 16 /* in KB units */
5025 if (is_offload(adap)) {
5026 t4_write_reg(adap, A_TP_TX_MOD_QUEUE_WEIGHT0,
5027 V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
5028 V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
5029 V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
5030 V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT));
5031 t4_write_reg(adap, A_TP_TX_MOD_CHANNEL_WEIGHT,
5032 V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
5033 V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
5034 V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
5035 V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT));
5038 /* get basic stuff going */
5039 return t4_early_init(adap, adap->fn);
5043 * Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
5045 #define MAX_ATIDS 8192U
5048 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
5050 * If the firmware we're dealing with has Configuration File support, then
5051 * we use that to perform all configuration
5055 * Tweak configuration based on module parameters, etc. Most of these have
5056 * defaults assigned to them by Firmware Configuration Files (if we're using
5057 * them) but need to be explicitly set if we're using hard-coded
5058 * initialization. But even in the case of using Firmware Configuration
5059 * Files, we'd like to expose the ability to change these via module
5060 * parameters so these are essentially common tweaks/settings for
5061 * Configuration Files and hard-coded initialization ...
5063 static int adap_init0_tweaks(struct adapter *adapter)
5066 * Fix up various Host-Dependent Parameters like Page Size, Cache
5067 * Line Size, etc. The firmware default is for a 4KB Page Size and
5068 * 64B Cache Line Size ...
5070 t4_fixup_host_params(adapter, PAGE_SIZE, L1_CACHE_BYTES);
5073 * Process module parameters which affect early initialization.
5075 if (rx_dma_offset != 2 && rx_dma_offset != 0) {
5076 dev_err(&adapter->pdev->dev,
5077 "Ignoring illegal rx_dma_offset=%d, using 2\n",
5081 t4_set_reg_field(adapter, SGE_CONTROL,
5083 PKTSHIFT(rx_dma_offset));
5086 * Don't include the "IP Pseudo Header" in CPL_RX_PKT checksums: Linux
5087 * adds the pseudo header itself.
5089 t4_tp_wr_bits_indirect(adapter, TP_INGRESS_CONFIG,
5090 CSUM_HAS_PSEUDO_HDR, 0);
5096 * Attempt to initialize the adapter via a Firmware Configuration File.
5098 static int adap_init0_config(struct adapter *adapter, int reset)
5100 struct fw_caps_config_cmd caps_cmd;
5101 const struct firmware *cf;
5102 unsigned long mtype = 0, maddr = 0;
5103 u32 finiver, finicsum, cfcsum;
5105 int config_issued = 0;
5106 char *fw_config_file, fw_config_file_path[256];
5107 char *config_name = NULL;
5110 * Reset device if necessary.
5113 ret = t4_fw_reset(adapter, adapter->mbox,
5114 PIORSTMODE | PIORST);
5120 * If we have a T4 configuration file under /lib/firmware/cxgb4/,
5121 * then use that. Otherwise, use the configuration file stored
5122 * in the adapter flash ...
5124 switch (CHELSIO_CHIP_VERSION(adapter->params.chip)) {
5126 fw_config_file = FW4_CFNAME;
5129 fw_config_file = FW5_CFNAME;
5132 dev_err(adapter->pdev_dev, "Device %d is not supported\n",
5133 adapter->pdev->device);
5138 ret = request_firmware(&cf, fw_config_file, adapter->pdev_dev);
5140 config_name = "On FLASH";
5141 mtype = FW_MEMTYPE_CF_FLASH;
5142 maddr = t4_flash_cfg_addr(adapter);
5144 u32 params[7], val[7];
5146 sprintf(fw_config_file_path,
5147 "/lib/firmware/%s", fw_config_file);
5148 config_name = fw_config_file_path;
5150 if (cf->size >= FLASH_CFG_MAX_SIZE)
5153 params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
5154 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_CF));
5155 ret = t4_query_params(adapter, adapter->mbox,
5156 adapter->fn, 0, 1, params, val);
5159 * For t4_memory_rw() below addresses and
5160 * sizes have to be in terms of multiples of 4
5161 * bytes. So, if the Configuration File isn't
5162 * a multiple of 4 bytes in length we'll have
5163 * to write that out separately since we can't
5164 * guarantee that the bytes following the
5165 * residual byte in the buffer returned by
5166 * request_firmware() are zeroed out ...
5168 size_t resid = cf->size & 0x3;
5169 size_t size = cf->size & ~0x3;
5170 __be32 *data = (__be32 *)cf->data;
5172 mtype = FW_PARAMS_PARAM_Y_G(val[0]);
5173 maddr = FW_PARAMS_PARAM_Z_G(val[0]) << 16;
5175 spin_lock(&adapter->win0_lock);
5176 ret = t4_memory_rw(adapter, 0, mtype, maddr,
5177 size, data, T4_MEMORY_WRITE);
5178 if (ret == 0 && resid != 0) {
5185 last.word = data[size >> 2];
5186 for (i = resid; i < 4; i++)
5188 ret = t4_memory_rw(adapter, 0, mtype,
5193 spin_unlock(&adapter->win0_lock);
5197 release_firmware(cf);
5203 * Issue a Capability Configuration command to the firmware to get it
5204 * to parse the Configuration File. We don't use t4_fw_config_file()
5205 * because we want the ability to modify various features after we've
5206 * processed the configuration file ...
5208 memset(&caps_cmd, 0, sizeof(caps_cmd));
5209 caps_cmd.op_to_write =
5210 htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
5213 caps_cmd.cfvalid_to_len16 =
5214 htonl(FW_CAPS_CONFIG_CMD_CFVALID_F |
5215 FW_CAPS_CONFIG_CMD_MEMTYPE_CF_V(mtype) |
5216 FW_CAPS_CONFIG_CMD_MEMADDR64K_CF_V(maddr >> 16) |
5217 FW_LEN16(caps_cmd));
5218 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
5221 /* If the CAPS_CONFIG failed with an ENOENT (for a Firmware
5222 * Configuration File in FLASH), our last gasp effort is to use the
5223 * Firmware Configuration File which is embedded in the firmware. A
5224 * very few early versions of the firmware didn't have one embedded
5225 * but we can ignore those.
5227 if (ret == -ENOENT) {
5228 memset(&caps_cmd, 0, sizeof(caps_cmd));
5229 caps_cmd.op_to_write =
5230 htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
5233 caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
5234 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd,
5235 sizeof(caps_cmd), &caps_cmd);
5236 config_name = "Firmware Default";
5243 finiver = ntohl(caps_cmd.finiver);
5244 finicsum = ntohl(caps_cmd.finicsum);
5245 cfcsum = ntohl(caps_cmd.cfcsum);
5246 if (finicsum != cfcsum)
5247 dev_warn(adapter->pdev_dev, "Configuration File checksum "\
5248 "mismatch: [fini] csum=%#x, computed csum=%#x\n",
5252 * And now tell the firmware to use the configuration we just loaded.
5254 caps_cmd.op_to_write =
5255 htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
5258 caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
5259 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
5265 * Tweak configuration based on system architecture, module
5268 ret = adap_init0_tweaks(adapter);
5273 * And finally tell the firmware to initialize itself using the
5274 * parameters from the Configuration File.
5276 ret = t4_fw_initialize(adapter, adapter->mbox);
5281 * Return successfully and note that we're operating with parameters
5282 * not supplied by the driver, rather than from hard-wired
5283 * initialization constants burried in the driver.
5285 adapter->flags |= USING_SOFT_PARAMS;
5286 dev_info(adapter->pdev_dev, "Successfully configured using Firmware "\
5287 "Configuration File \"%s\", version %#x, computed checksum %#x\n",
5288 config_name, finiver, cfcsum);
5292 * Something bad happened. Return the error ... (If the "error"
5293 * is that there's no Configuration File on the adapter we don't
5294 * want to issue a warning since this is fairly common.)
5297 if (config_issued && ret != -ENOENT)
5298 dev_warn(adapter->pdev_dev, "\"%s\" configuration file error %d\n",
5304 * Attempt to initialize the adapter via hard-coded, driver supplied
5307 static int adap_init0_no_config(struct adapter *adapter, int reset)
5309 struct sge *s = &adapter->sge;
5310 struct fw_caps_config_cmd caps_cmd;
5315 * Reset device if necessary
5318 ret = t4_fw_reset(adapter, adapter->mbox,
5319 PIORSTMODE | PIORST);
5325 * Get device capabilities and select which we'll be using.
5327 memset(&caps_cmd, 0, sizeof(caps_cmd));
5328 caps_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
5329 FW_CMD_REQUEST_F | FW_CMD_READ_F);
5330 caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
5331 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
5336 if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
5338 caps_cmd.niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
5340 caps_cmd.niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
5341 } else if (vf_acls) {
5342 dev_err(adapter->pdev_dev, "virtualization ACLs not supported");
5345 caps_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
5346 FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
5347 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
5353 * Tweak configuration based on system architecture, module
5356 ret = adap_init0_tweaks(adapter);
5361 * Select RSS Global Mode we want to use. We use "Basic Virtual"
5362 * mode which maps each Virtual Interface to its own section of
5363 * the RSS Table and we turn on all map and hash enables ...
5365 adapter->flags |= RSS_TNLALLLOOKUP;
5366 ret = t4_config_glbl_rss(adapter, adapter->mbox,
5367 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
5368 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN_F |
5369 FW_RSS_GLB_CONFIG_CMD_HASHTOEPLITZ_F |
5370 ((adapter->flags & RSS_TNLALLLOOKUP) ?
5371 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP_F : 0));
5376 * Set up our own fundamental resource provisioning ...
5378 ret = t4_cfg_pfvf(adapter, adapter->mbox, adapter->fn, 0,
5379 PFRES_NEQ, PFRES_NETHCTRL,
5380 PFRES_NIQFLINT, PFRES_NIQ,
5381 PFRES_TC, PFRES_NVI,
5382 FW_PFVF_CMD_CMASK_M,
5383 pfvfres_pmask(adapter, adapter->fn, 0),
5385 PFRES_R_CAPS, PFRES_WX_CAPS);
5390 * Perform low level SGE initialization. We need to do this before we
5391 * send the firmware the INITIALIZE command because that will cause
5392 * any other PF Drivers which are waiting for the Master
5393 * Initialization to proceed forward.
5395 for (i = 0; i < SGE_NTIMERS - 1; i++)
5396 s->timer_val[i] = min(intr_holdoff[i], MAX_SGE_TIMERVAL);
5397 s->timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL;
5398 s->counter_val[0] = 1;
5399 for (i = 1; i < SGE_NCOUNTERS; i++)
5400 s->counter_val[i] = min(intr_cnt[i - 1],
5401 THRESHOLD_0_GET(THRESHOLD_0_MASK));
5402 t4_sge_init(adapter);
5404 #ifdef CONFIG_PCI_IOV
5406 * Provision resource limits for Virtual Functions. We currently
5407 * grant them all the same static resource limits except for the Port
5408 * Access Rights Mask which we're assigning based on the PF. All of
5409 * the static provisioning stuff for both the PF and VF really needs
5410 * to be managed in a persistent manner for each device which the
5411 * firmware controls.
5416 for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) {
5417 if (num_vf[pf] <= 0)
5420 /* VF numbering starts at 1! */
5421 for (vf = 1; vf <= num_vf[pf]; vf++) {
5422 ret = t4_cfg_pfvf(adapter, adapter->mbox,
5424 VFRES_NEQ, VFRES_NETHCTRL,
5425 VFRES_NIQFLINT, VFRES_NIQ,
5426 VFRES_TC, VFRES_NVI,
5427 FW_PFVF_CMD_CMASK_M,
5431 VFRES_R_CAPS, VFRES_WX_CAPS);
5433 dev_warn(adapter->pdev_dev,
5435 "provision pf/vf=%d/%d; "
5436 "err=%d\n", pf, vf, ret);
5443 * Set up the default filter mode. Later we'll want to implement this
5444 * via a firmware command, etc. ... This needs to be done before the
5445 * firmare initialization command ... If the selected set of fields
5446 * isn't equal to the default value, we'll need to make sure that the
5447 * field selections will fit in the 36-bit budget.
5449 if (tp_vlan_pri_map != TP_VLAN_PRI_MAP_DEFAULT) {
5452 for (j = TP_VLAN_PRI_MAP_FIRST; j <= TP_VLAN_PRI_MAP_LAST; j++)
5453 switch (tp_vlan_pri_map & (1 << j)) {
5455 /* compressed filter field not enabled */
5475 case ETHERTYPE_MASK:
5481 case MPSHITTYPE_MASK:
5484 case FRAGMENTATION_MASK:
5490 dev_err(adapter->pdev_dev,
5491 "tp_vlan_pri_map=%#x needs %d bits > 36;"\
5492 " using %#x\n", tp_vlan_pri_map, bits,
5493 TP_VLAN_PRI_MAP_DEFAULT);
5494 tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT;
5497 v = tp_vlan_pri_map;
5498 t4_write_indirect(adapter, TP_PIO_ADDR, TP_PIO_DATA,
5499 &v, 1, TP_VLAN_PRI_MAP);
5502 * We need Five Tuple Lookup mode to be set in TP_GLOBAL_CONFIG order
5503 * to support any of the compressed filter fields above. Newer
5504 * versions of the firmware do this automatically but it doesn't hurt
5505 * to set it here. Meanwhile, we do _not_ need to set Lookup Every
5506 * Packet in TP_INGRESS_CONFIG to support matching non-TCP packets
5507 * since the firmware automatically turns this on and off when we have
5508 * a non-zero number of filters active (since it does have a
5509 * performance impact).
5511 if (tp_vlan_pri_map)
5512 t4_set_reg_field(adapter, TP_GLOBAL_CONFIG,
5513 FIVETUPLELOOKUP_MASK,
5514 FIVETUPLELOOKUP_MASK);
5517 * Tweak some settings.
5519 t4_write_reg(adapter, TP_SHIFT_CNT, SYNSHIFTMAX(6) |
5520 RXTSHIFTMAXR1(4) | RXTSHIFTMAXR2(15) |
5521 PERSHIFTBACKOFFMAX(8) | PERSHIFTMAX(8) |
5522 KEEPALIVEMAXR1(4) | KEEPALIVEMAXR2(9));
5525 * Get basic stuff going by issuing the Firmware Initialize command.
5526 * Note that this _must_ be after all PFVF commands ...
5528 ret = t4_fw_initialize(adapter, adapter->mbox);
5533 * Return successfully!
5535 dev_info(adapter->pdev_dev, "Successfully configured using built-in "\
5536 "driver parameters\n");
5540 * Something bad happened. Return the error ...
5546 static struct fw_info fw_info_array[] = {
5549 .fs_name = FW4_CFNAME,
5550 .fw_mod_name = FW4_FNAME,
5552 .chip = FW_HDR_CHIP_T4,
5553 .fw_ver = __cpu_to_be32(FW_VERSION(T4)),
5554 .intfver_nic = FW_INTFVER(T4, NIC),
5555 .intfver_vnic = FW_INTFVER(T4, VNIC),
5556 .intfver_ri = FW_INTFVER(T4, RI),
5557 .intfver_iscsi = FW_INTFVER(T4, ISCSI),
5558 .intfver_fcoe = FW_INTFVER(T4, FCOE),
5562 .fs_name = FW5_CFNAME,
5563 .fw_mod_name = FW5_FNAME,
5565 .chip = FW_HDR_CHIP_T5,
5566 .fw_ver = __cpu_to_be32(FW_VERSION(T5)),
5567 .intfver_nic = FW_INTFVER(T5, NIC),
5568 .intfver_vnic = FW_INTFVER(T5, VNIC),
5569 .intfver_ri = FW_INTFVER(T5, RI),
5570 .intfver_iscsi = FW_INTFVER(T5, ISCSI),
5571 .intfver_fcoe = FW_INTFVER(T5, FCOE),
5576 static struct fw_info *find_fw_info(int chip)
5580 for (i = 0; i < ARRAY_SIZE(fw_info_array); i++) {
5581 if (fw_info_array[i].chip == chip)
5582 return &fw_info_array[i];
5588 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
5590 static int adap_init0(struct adapter *adap)
5594 enum dev_state state;
5595 u32 params[7], val[7];
5596 struct fw_caps_config_cmd caps_cmd;
5600 * Contact FW, advertising Master capability (and potentially forcing
5601 * ourselves as the Master PF if our module parameter force_init is
5604 ret = t4_fw_hello(adap, adap->mbox, adap->fn,
5605 force_init ? MASTER_MUST : MASTER_MAY,
5608 dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
5612 if (ret == adap->mbox)
5613 adap->flags |= MASTER_PF;
5614 if (force_init && state == DEV_STATE_INIT)
5615 state = DEV_STATE_UNINIT;
5618 * If we're the Master PF Driver and the device is uninitialized,
5619 * then let's consider upgrading the firmware ... (We always want
5620 * to check the firmware version number in order to A. get it for
5621 * later reporting and B. to warn if the currently loaded firmware
5622 * is excessively mismatched relative to the driver.)
5624 t4_get_fw_version(adap, &adap->params.fw_vers);
5625 t4_get_tp_version(adap, &adap->params.tp_vers);
5626 if ((adap->flags & MASTER_PF) && state != DEV_STATE_INIT) {
5627 struct fw_info *fw_info;
5628 struct fw_hdr *card_fw;
5629 const struct firmware *fw;
5630 const u8 *fw_data = NULL;
5631 unsigned int fw_size = 0;
5633 /* This is the firmware whose headers the driver was compiled
5636 fw_info = find_fw_info(CHELSIO_CHIP_VERSION(adap->params.chip));
5637 if (fw_info == NULL) {
5638 dev_err(adap->pdev_dev,
5639 "unable to get firmware info for chip %d.\n",
5640 CHELSIO_CHIP_VERSION(adap->params.chip));
5644 /* allocate memory to read the header of the firmware on the
5647 card_fw = t4_alloc_mem(sizeof(*card_fw));
5649 /* Get FW from from /lib/firmware/ */
5650 ret = request_firmware(&fw, fw_info->fw_mod_name,
5653 dev_err(adap->pdev_dev,
5654 "unable to load firmware image %s, error %d\n",
5655 fw_info->fw_mod_name, ret);
5661 /* upgrade FW logic */
5662 ret = t4_prep_fw(adap, fw_info, fw_data, fw_size, card_fw,
5667 release_firmware(fw);
5668 t4_free_mem(card_fw);
5675 * Grab VPD parameters. This should be done after we establish a
5676 * connection to the firmware since some of the VPD parameters
5677 * (notably the Core Clock frequency) are retrieved via requests to
5678 * the firmware. On the other hand, we need these fairly early on
5679 * so we do this right after getting ahold of the firmware.
5681 ret = get_vpd_params(adap, &adap->params.vpd);
5686 * Find out what ports are available to us. Note that we need to do
5687 * this before calling adap_init0_no_config() since it needs nports
5691 FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
5692 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PORTVEC);
5693 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1, &v, &port_vec);
5697 adap->params.nports = hweight32(port_vec);
5698 adap->params.portvec = port_vec;
5701 * If the firmware is initialized already (and we're not forcing a
5702 * master initialization), note that we're living with existing
5703 * adapter parameters. Otherwise, it's time to try initializing the
5706 if (state == DEV_STATE_INIT) {
5707 dev_info(adap->pdev_dev, "Coming up as %s: "\
5708 "Adapter already initialized\n",
5709 adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
5710 adap->flags |= USING_SOFT_PARAMS;
5712 dev_info(adap->pdev_dev, "Coming up as MASTER: "\
5713 "Initializing adapter\n");
5715 * If the firmware doesn't support Configuration
5716 * Files warn user and exit,
5719 dev_warn(adap->pdev_dev, "Firmware doesn't support "
5720 "configuration file.\n");
5722 ret = adap_init0_no_config(adap, reset);
5725 * Find out whether we're dealing with a version of
5726 * the firmware which has configuration file support.
5728 params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
5729 FW_PARAMS_PARAM_X_V(
5730 FW_PARAMS_PARAM_DEV_CF));
5731 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1,
5735 * If the firmware doesn't support Configuration
5736 * Files, use the old Driver-based, hard-wired
5737 * initialization. Otherwise, try using the
5738 * Configuration File support and fall back to the
5739 * Driver-based initialization if there's no
5740 * Configuration File found.
5743 ret = adap_init0_no_config(adap, reset);
5746 * The firmware provides us with a memory
5747 * buffer where we can load a Configuration
5748 * File from the host if we want to override
5749 * the Configuration File in flash.
5752 ret = adap_init0_config(adap, reset);
5753 if (ret == -ENOENT) {
5754 dev_info(adap->pdev_dev,
5755 "No Configuration File present "
5756 "on adapter. Using hard-wired "
5757 "configuration parameters.\n");
5758 ret = adap_init0_no_config(adap, reset);
5763 dev_err(adap->pdev_dev,
5764 "could not initialize adapter, error %d\n",
5771 * If we're living with non-hard-coded parameters (either from a
5772 * Firmware Configuration File or values programmed by a different PF
5773 * Driver), give the SGE code a chance to pull in anything that it
5774 * needs ... Note that this must be called after we retrieve our VPD
5775 * parameters in order to know how to convert core ticks to seconds.
5777 if (adap->flags & USING_SOFT_PARAMS) {
5778 ret = t4_sge_init(adap);
5783 if (is_bypass_device(adap->pdev->device))
5784 adap->params.bypass = 1;
5787 * Grab some of our basic fundamental operating parameters.
5789 #define FW_PARAM_DEV(param) \
5790 (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \
5791 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param))
5793 #define FW_PARAM_PFVF(param) \
5794 FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \
5795 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param)| \
5796 FW_PARAMS_PARAM_Y_V(0) | \
5797 FW_PARAMS_PARAM_Z_V(0)
5799 params[0] = FW_PARAM_PFVF(EQ_START);
5800 params[1] = FW_PARAM_PFVF(L2T_START);
5801 params[2] = FW_PARAM_PFVF(L2T_END);
5802 params[3] = FW_PARAM_PFVF(FILTER_START);
5803 params[4] = FW_PARAM_PFVF(FILTER_END);
5804 params[5] = FW_PARAM_PFVF(IQFLINT_START);
5805 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, params, val);
5808 adap->sge.egr_start = val[0];
5809 adap->l2t_start = val[1];
5810 adap->l2t_end = val[2];
5811 adap->tids.ftid_base = val[3];
5812 adap->tids.nftids = val[4] - val[3] + 1;
5813 adap->sge.ingr_start = val[5];
5815 /* query params related to active filter region */
5816 params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
5817 params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
5818 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val);
5819 /* If Active filter size is set we enable establishing
5820 * offload connection through firmware work request
5822 if ((val[0] != val[1]) && (ret >= 0)) {
5823 adap->flags |= FW_OFLD_CONN;
5824 adap->tids.aftid_base = val[0];
5825 adap->tids.aftid_end = val[1];
5828 /* If we're running on newer firmware, let it know that we're
5829 * prepared to deal with encapsulated CPL messages. Older
5830 * firmware won't understand this and we'll just get
5831 * unencapsulated messages ...
5833 params[0] = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
5835 (void) t4_set_params(adap, adap->mbox, adap->fn, 0, 1, params, val);
5838 * Find out whether we're allowed to use the T5+ ULPTX MEMWRITE DSGL
5839 * capability. Earlier versions of the firmware didn't have the
5840 * ULPTX_MEMWRITE_DSGL so we'll interpret a query failure as no
5841 * permission to use ULPTX MEMWRITE DSGL.
5843 if (is_t4(adap->params.chip)) {
5844 adap->params.ulptx_memwrite_dsgl = false;
5846 params[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
5847 ret = t4_query_params(adap, adap->mbox, adap->fn, 0,
5849 adap->params.ulptx_memwrite_dsgl = (ret == 0 && val[0] != 0);
5853 * Get device capabilities so we can determine what resources we need
5856 memset(&caps_cmd, 0, sizeof(caps_cmd));
5857 caps_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
5858 FW_CMD_REQUEST_F | FW_CMD_READ_F);
5859 caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
5860 ret = t4_wr_mbox(adap, adap->mbox, &caps_cmd, sizeof(caps_cmd),
5865 if (caps_cmd.ofldcaps) {
5866 /* query offload-related parameters */
5867 params[0] = FW_PARAM_DEV(NTID);
5868 params[1] = FW_PARAM_PFVF(SERVER_START);
5869 params[2] = FW_PARAM_PFVF(SERVER_END);
5870 params[3] = FW_PARAM_PFVF(TDDP_START);
5871 params[4] = FW_PARAM_PFVF(TDDP_END);
5872 params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
5873 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6,
5877 adap->tids.ntids = val[0];
5878 adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
5879 adap->tids.stid_base = val[1];
5880 adap->tids.nstids = val[2] - val[1] + 1;
5882 * Setup server filter region. Divide the availble filter
5883 * region into two parts. Regular filters get 1/3rd and server
5884 * filters get 2/3rd part. This is only enabled if workarond
5886 * 1. For regular filters.
5887 * 2. Server filter: This are special filters which are used
5888 * to redirect SYN packets to offload queue.
5890 if (adap->flags & FW_OFLD_CONN && !is_bypass(adap)) {
5891 adap->tids.sftid_base = adap->tids.ftid_base +
5892 DIV_ROUND_UP(adap->tids.nftids, 3);
5893 adap->tids.nsftids = adap->tids.nftids -
5894 DIV_ROUND_UP(adap->tids.nftids, 3);
5895 adap->tids.nftids = adap->tids.sftid_base -
5896 adap->tids.ftid_base;
5898 adap->vres.ddp.start = val[3];
5899 adap->vres.ddp.size = val[4] - val[3] + 1;
5900 adap->params.ofldq_wr_cred = val[5];
5902 adap->params.offload = 1;
5904 if (caps_cmd.rdmacaps) {
5905 params[0] = FW_PARAM_PFVF(STAG_START);
5906 params[1] = FW_PARAM_PFVF(STAG_END);
5907 params[2] = FW_PARAM_PFVF(RQ_START);
5908 params[3] = FW_PARAM_PFVF(RQ_END);
5909 params[4] = FW_PARAM_PFVF(PBL_START);
5910 params[5] = FW_PARAM_PFVF(PBL_END);
5911 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6,
5915 adap->vres.stag.start = val[0];
5916 adap->vres.stag.size = val[1] - val[0] + 1;
5917 adap->vres.rq.start = val[2];
5918 adap->vres.rq.size = val[3] - val[2] + 1;
5919 adap->vres.pbl.start = val[4];
5920 adap->vres.pbl.size = val[5] - val[4] + 1;
5922 params[0] = FW_PARAM_PFVF(SQRQ_START);
5923 params[1] = FW_PARAM_PFVF(SQRQ_END);
5924 params[2] = FW_PARAM_PFVF(CQ_START);
5925 params[3] = FW_PARAM_PFVF(CQ_END);
5926 params[4] = FW_PARAM_PFVF(OCQ_START);
5927 params[5] = FW_PARAM_PFVF(OCQ_END);
5928 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, params,
5932 adap->vres.qp.start = val[0];
5933 adap->vres.qp.size = val[1] - val[0] + 1;
5934 adap->vres.cq.start = val[2];
5935 adap->vres.cq.size = val[3] - val[2] + 1;
5936 adap->vres.ocq.start = val[4];
5937 adap->vres.ocq.size = val[5] - val[4] + 1;
5939 params[0] = FW_PARAM_DEV(MAXORDIRD_QP);
5940 params[1] = FW_PARAM_DEV(MAXIRD_ADAPTER);
5941 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params,
5944 adap->params.max_ordird_qp = 8;
5945 adap->params.max_ird_adapter = 32 * adap->tids.ntids;
5948 adap->params.max_ordird_qp = val[0];
5949 adap->params.max_ird_adapter = val[1];
5951 dev_info(adap->pdev_dev,
5952 "max_ordird_qp %d max_ird_adapter %d\n",
5953 adap->params.max_ordird_qp,
5954 adap->params.max_ird_adapter);
5956 if (caps_cmd.iscsicaps) {
5957 params[0] = FW_PARAM_PFVF(ISCSI_START);
5958 params[1] = FW_PARAM_PFVF(ISCSI_END);
5959 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2,
5963 adap->vres.iscsi.start = val[0];
5964 adap->vres.iscsi.size = val[1] - val[0] + 1;
5966 #undef FW_PARAM_PFVF
5969 /* The MTU/MSS Table is initialized by now, so load their values. If
5970 * we're initializing the adapter, then we'll make any modifications
5971 * we want to the MTU/MSS Table and also initialize the congestion
5974 t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
5975 if (state != DEV_STATE_INIT) {
5978 /* The default MTU Table contains values 1492 and 1500.
5979 * However, for TCP, it's better to have two values which are
5980 * a multiple of 8 +/- 4 bytes apart near this popular MTU.
5981 * This allows us to have a TCP Data Payload which is a
5982 * multiple of 8 regardless of what combination of TCP Options
5983 * are in use (always a multiple of 4 bytes) which is
5984 * important for performance reasons. For instance, if no
5985 * options are in use, then we have a 20-byte IP header and a
5986 * 20-byte TCP header. In this case, a 1500-byte MSS would
5987 * result in a TCP Data Payload of 1500 - 40 == 1460 bytes
5988 * which is not a multiple of 8. So using an MSS of 1488 in
5989 * this case results in a TCP Data Payload of 1448 bytes which
5990 * is a multiple of 8. On the other hand, if 12-byte TCP Time
5991 * Stamps have been negotiated, then an MTU of 1500 bytes
5992 * results in a TCP Data Payload of 1448 bytes which, as
5993 * above, is a multiple of 8 bytes ...
5995 for (i = 0; i < NMTUS; i++)
5996 if (adap->params.mtus[i] == 1492) {
5997 adap->params.mtus[i] = 1488;
6001 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
6002 adap->params.b_wnd);
6004 t4_init_tp_params(adap);
6005 adap->flags |= FW_OK;
6009 * Something bad happened. If a command timed out or failed with EIO
6010 * FW does not operate within its spec or something catastrophic
6011 * happened to HW/FW, stop issuing commands.
6014 if (ret != -ETIMEDOUT && ret != -EIO)
6015 t4_fw_bye(adap, adap->mbox);
6021 static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
6022 pci_channel_state_t state)
6025 struct adapter *adap = pci_get_drvdata(pdev);
6031 adap->flags &= ~FW_OK;
6032 notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
6033 spin_lock(&adap->stats_lock);
6034 for_each_port(adap, i) {
6035 struct net_device *dev = adap->port[i];
6037 netif_device_detach(dev);
6038 netif_carrier_off(dev);
6040 spin_unlock(&adap->stats_lock);
6041 if (adap->flags & FULL_INIT_DONE)
6044 if ((adap->flags & DEV_ENABLED)) {
6045 pci_disable_device(pdev);
6046 adap->flags &= ~DEV_ENABLED;
6048 out: return state == pci_channel_io_perm_failure ?
6049 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
6052 static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
6055 struct fw_caps_config_cmd c;
6056 struct adapter *adap = pci_get_drvdata(pdev);
6059 pci_restore_state(pdev);
6060 pci_save_state(pdev);
6061 return PCI_ERS_RESULT_RECOVERED;
6064 if (!(adap->flags & DEV_ENABLED)) {
6065 if (pci_enable_device(pdev)) {
6066 dev_err(&pdev->dev, "Cannot reenable PCI "
6067 "device after reset\n");
6068 return PCI_ERS_RESULT_DISCONNECT;
6070 adap->flags |= DEV_ENABLED;
6073 pci_set_master(pdev);
6074 pci_restore_state(pdev);
6075 pci_save_state(pdev);
6076 pci_cleanup_aer_uncorrect_error_status(pdev);
6078 if (t4_wait_dev_ready(adap->regs) < 0)
6079 return PCI_ERS_RESULT_DISCONNECT;
6080 if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL) < 0)
6081 return PCI_ERS_RESULT_DISCONNECT;
6082 adap->flags |= FW_OK;
6083 if (adap_init1(adap, &c))
6084 return PCI_ERS_RESULT_DISCONNECT;
6086 for_each_port(adap, i) {
6087 struct port_info *p = adap2pinfo(adap, i);
6089 ret = t4_alloc_vi(adap, adap->fn, p->tx_chan, adap->fn, 0, 1,
6092 return PCI_ERS_RESULT_DISCONNECT;
6094 p->xact_addr_filt = -1;
6097 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
6098 adap->params.b_wnd);
6101 return PCI_ERS_RESULT_DISCONNECT;
6102 return PCI_ERS_RESULT_RECOVERED;
6105 static void eeh_resume(struct pci_dev *pdev)
6108 struct adapter *adap = pci_get_drvdata(pdev);
6114 for_each_port(adap, i) {
6115 struct net_device *dev = adap->port[i];
6117 if (netif_running(dev)) {
6119 cxgb_set_rxmode(dev);
6121 netif_device_attach(dev);
6126 static const struct pci_error_handlers cxgb4_eeh = {
6127 .error_detected = eeh_err_detected,
6128 .slot_reset = eeh_slot_reset,
6129 .resume = eeh_resume,
6132 static inline bool is_x_10g_port(const struct link_config *lc)
6134 return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
6135 (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
6138 static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
6139 unsigned int us, unsigned int cnt,
6140 unsigned int size, unsigned int iqe_size)
6143 set_rspq_intr_params(q, us, cnt);
6144 q->iqe_len = iqe_size;
6149 * Perform default configuration of DMA queues depending on the number and type
6150 * of ports we found and the number of available CPUs. Most settings can be
6151 * modified by the admin prior to actual use.
6153 static void cfg_queues(struct adapter *adap)
6155 struct sge *s = &adap->sge;
6156 int i, n10g = 0, qidx = 0;
6157 #ifndef CONFIG_CHELSIO_T4_DCB
6162 for_each_port(adap, i)
6163 n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
6164 #ifdef CONFIG_CHELSIO_T4_DCB
6165 /* For Data Center Bridging support we need to be able to support up
6166 * to 8 Traffic Priorities; each of which will be assigned to its
6167 * own TX Queue in order to prevent Head-Of-Line Blocking.
6169 if (adap->params.nports * 8 > MAX_ETH_QSETS) {
6170 dev_err(adap->pdev_dev, "MAX_ETH_QSETS=%d < %d!\n",
6171 MAX_ETH_QSETS, adap->params.nports * 8);
6175 for_each_port(adap, i) {
6176 struct port_info *pi = adap2pinfo(adap, i);
6178 pi->first_qset = qidx;
6182 #else /* !CONFIG_CHELSIO_T4_DCB */
6184 * We default to 1 queue per non-10G port and up to # of cores queues
6188 q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
6189 if (q10g > netif_get_num_default_rss_queues())
6190 q10g = netif_get_num_default_rss_queues();
6192 for_each_port(adap, i) {
6193 struct port_info *pi = adap2pinfo(adap, i);
6195 pi->first_qset = qidx;
6196 pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
6199 #endif /* !CONFIG_CHELSIO_T4_DCB */
6202 s->max_ethqsets = qidx; /* MSI-X may lower it later */
6204 if (is_offload(adap)) {
6206 * For offload we use 1 queue/channel if all ports are up to 1G,
6207 * otherwise we divide all available queues amongst the channels
6208 * capped by the number of available cores.
6211 i = min_t(int, ARRAY_SIZE(s->ofldrxq),
6213 s->ofldqsets = roundup(i, adap->params.nports);
6215 s->ofldqsets = adap->params.nports;
6216 /* For RDMA one Rx queue per channel suffices */
6217 s->rdmaqs = adap->params.nports;
6218 s->rdmaciqs = adap->params.nports;
6221 for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
6222 struct sge_eth_rxq *r = &s->ethrxq[i];
6224 init_rspq(adap, &r->rspq, 5, 10, 1024, 64);
6228 for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
6229 s->ethtxq[i].q.size = 1024;
6231 for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
6232 s->ctrlq[i].q.size = 512;
6234 for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
6235 s->ofldtxq[i].q.size = 1024;
6237 for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) {
6238 struct sge_ofld_rxq *r = &s->ofldrxq[i];
6240 init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
6241 r->rspq.uld = CXGB4_ULD_ISCSI;
6245 for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
6246 struct sge_ofld_rxq *r = &s->rdmarxq[i];
6248 init_rspq(adap, &r->rspq, 5, 1, 511, 64);
6249 r->rspq.uld = CXGB4_ULD_RDMA;
6253 ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
6254 if (ciq_size > SGE_MAX_IQ_SIZE) {
6255 CH_WARN(adap, "CIQ size too small for available IQs\n");
6256 ciq_size = SGE_MAX_IQ_SIZE;
6259 for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) {
6260 struct sge_ofld_rxq *r = &s->rdmaciq[i];
6262 init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
6263 r->rspq.uld = CXGB4_ULD_RDMA;
6266 init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
6267 init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64);
6271 * Reduce the number of Ethernet queues across all ports to at most n.
6272 * n provides at least one queue per port.
6274 static void reduce_ethqs(struct adapter *adap, int n)
6277 struct port_info *pi;
6279 while (n < adap->sge.ethqsets)
6280 for_each_port(adap, i) {
6281 pi = adap2pinfo(adap, i);
6282 if (pi->nqsets > 1) {
6284 adap->sge.ethqsets--;
6285 if (adap->sge.ethqsets <= n)
6291 for_each_port(adap, i) {
6292 pi = adap2pinfo(adap, i);
6298 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
6299 #define EXTRA_VECS 2
6301 static int enable_msix(struct adapter *adap)
6305 struct sge *s = &adap->sge;
6306 unsigned int nchan = adap->params.nports;
6307 struct msix_entry entries[MAX_INGQ + 1];
6309 for (i = 0; i < ARRAY_SIZE(entries); ++i)
6310 entries[i].entry = i;
6312 want = s->max_ethqsets + EXTRA_VECS;
6313 if (is_offload(adap)) {
6314 want += s->rdmaqs + s->rdmaciqs + s->ofldqsets;
6315 /* need nchan for each possible ULD */
6316 ofld_need = 3 * nchan;
6318 #ifdef CONFIG_CHELSIO_T4_DCB
6319 /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
6322 need = 8 * adap->params.nports + EXTRA_VECS + ofld_need;
6324 need = adap->params.nports + EXTRA_VECS + ofld_need;
6326 want = pci_enable_msix_range(adap->pdev, entries, need, want);
6331 * Distribute available vectors to the various queue groups.
6332 * Every group gets its minimum requirement and NIC gets top
6333 * priority for leftovers.
6335 i = want - EXTRA_VECS - ofld_need;
6336 if (i < s->max_ethqsets) {
6337 s->max_ethqsets = i;
6338 if (i < s->ethqsets)
6339 reduce_ethqs(adap, i);
6341 if (is_offload(adap)) {
6342 i = want - EXTRA_VECS - s->max_ethqsets;
6343 i -= ofld_need - nchan;
6344 s->ofldqsets = (i / nchan) * nchan; /* round down */
6346 for (i = 0; i < want; ++i)
6347 adap->msix_info[i].vec = entries[i].vector;
6354 static int init_rss(struct adapter *adap)
6358 for_each_port(adap, i) {
6359 struct port_info *pi = adap2pinfo(adap, i);
6361 pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL);
6364 for (j = 0; j < pi->rss_size; j++)
6365 pi->rss[j] = ethtool_rxfh_indir_default(j, pi->nqsets);
6370 static void print_port_info(const struct net_device *dev)
6374 const char *spd = "";
6375 const struct port_info *pi = netdev_priv(dev);
6376 const struct adapter *adap = pi->adapter;
6378 if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_2_5GB)
6380 else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_5_0GB)
6382 else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_8_0GB)
6385 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100M)
6386 bufp += sprintf(bufp, "100/");
6387 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
6388 bufp += sprintf(bufp, "1000/");
6389 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
6390 bufp += sprintf(bufp, "10G/");
6391 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
6392 bufp += sprintf(bufp, "40G/");
6395 sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
6397 netdev_info(dev, "Chelsio %s rev %d %s %sNIC PCIe x%d%s%s\n",
6398 adap->params.vpd.id,
6399 CHELSIO_CHIP_RELEASE(adap->params.chip), buf,
6400 is_offload(adap) ? "R" : "", adap->params.pci.width, spd,
6401 (adap->flags & USING_MSIX) ? " MSI-X" :
6402 (adap->flags & USING_MSI) ? " MSI" : "");
6403 netdev_info(dev, "S/N: %s, P/N: %s\n",
6404 adap->params.vpd.sn, adap->params.vpd.pn);
6407 static void enable_pcie_relaxed_ordering(struct pci_dev *dev)
6409 pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
6413 * Free the following resources:
6414 * - memory used for tables
6417 * - resources FW is holding for us
6419 static void free_some_resources(struct adapter *adapter)
6423 t4_free_mem(adapter->l2t);
6424 t4_free_mem(adapter->tids.tid_tab);
6425 disable_msi(adapter);
6427 for_each_port(adapter, i)
6428 if (adapter->port[i]) {
6429 kfree(adap2pinfo(adapter, i)->rss);
6430 free_netdev(adapter->port[i]);
6432 if (adapter->flags & FW_OK)
6433 t4_fw_bye(adapter, adapter->fn);
6436 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
6437 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
6438 NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
6439 #define SEGMENT_SIZE 128
6441 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
6443 int func, i, err, s_qpp, qpp, num_seg;
6444 struct port_info *pi;
6445 bool highdma = false;
6446 struct adapter *adapter = NULL;
6449 printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
6451 err = pci_request_regions(pdev, KBUILD_MODNAME);
6453 /* Just info, some other driver may have claimed the device. */
6454 dev_info(&pdev->dev, "cannot obtain PCI resources\n");
6458 err = pci_enable_device(pdev);
6460 dev_err(&pdev->dev, "cannot enable PCI device\n");
6461 goto out_release_regions;
6464 regs = pci_ioremap_bar(pdev, 0);
6466 dev_err(&pdev->dev, "cannot map device registers\n");
6468 goto out_disable_device;
6471 err = t4_wait_dev_ready(regs);
6473 goto out_unmap_bar0;
6475 /* We control everything through one PF */
6476 func = SOURCEPF_GET(readl(regs + PL_WHOAMI));
6477 if (func != ent->driver_data) {
6479 pci_disable_device(pdev);
6480 pci_save_state(pdev); /* to restore SR-IOV later */
6484 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
6486 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
6488 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
6489 "coherent allocations\n");
6490 goto out_unmap_bar0;
6493 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
6495 dev_err(&pdev->dev, "no usable DMA configuration\n");
6496 goto out_unmap_bar0;
6500 pci_enable_pcie_error_reporting(pdev);
6501 enable_pcie_relaxed_ordering(pdev);
6502 pci_set_master(pdev);
6503 pci_save_state(pdev);
6505 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
6508 goto out_unmap_bar0;
6511 adapter->workq = create_singlethread_workqueue("cxgb4");
6512 if (!adapter->workq) {
6514 goto out_free_adapter;
6517 /* PCI device has been enabled */
6518 adapter->flags |= DEV_ENABLED;
6520 adapter->regs = regs;
6521 adapter->pdev = pdev;
6522 adapter->pdev_dev = &pdev->dev;
6523 adapter->mbox = func;
6525 adapter->msg_enable = dflt_msg_enable;
6526 memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
6528 spin_lock_init(&adapter->stats_lock);
6529 spin_lock_init(&adapter->tid_release_lock);
6530 spin_lock_init(&adapter->win0_lock);
6532 INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
6533 INIT_WORK(&adapter->db_full_task, process_db_full);
6534 INIT_WORK(&adapter->db_drop_task, process_db_drop);
6536 err = t4_prep_adapter(adapter);
6538 goto out_free_adapter;
6541 if (!is_t4(adapter->params.chip)) {
6542 s_qpp = QUEUESPERPAGEPF1 * adapter->fn;
6543 qpp = 1 << QUEUESPERPAGEPF0_GET(t4_read_reg(adapter,
6544 SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp);
6545 num_seg = PAGE_SIZE / SEGMENT_SIZE;
6547 /* Each segment size is 128B. Write coalescing is enabled only
6548 * when SGE_EGRESS_QUEUES_PER_PAGE_PF reg value for the
6549 * queue is less no of segments that can be accommodated in
6552 if (qpp > num_seg) {
6554 "Incorrect number of egress queues per page\n");
6556 goto out_free_adapter;
6558 adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
6559 pci_resource_len(pdev, 2));
6560 if (!adapter->bar2) {
6561 dev_err(&pdev->dev, "cannot map device bar2 region\n");
6563 goto out_free_adapter;
6567 setup_memwin(adapter);
6568 err = adap_init0(adapter);
6569 setup_memwin_rdma(adapter);
6573 for_each_port(adapter, i) {
6574 struct net_device *netdev;
6576 netdev = alloc_etherdev_mq(sizeof(struct port_info),
6583 SET_NETDEV_DEV(netdev, &pdev->dev);
6585 adapter->port[i] = netdev;
6586 pi = netdev_priv(netdev);
6587 pi->adapter = adapter;
6588 pi->xact_addr_filt = -1;
6590 netdev->irq = pdev->irq;
6592 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
6593 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
6594 NETIF_F_RXCSUM | NETIF_F_RXHASH |
6595 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
6597 netdev->hw_features |= NETIF_F_HIGHDMA;
6598 netdev->features |= netdev->hw_features;
6599 netdev->vlan_features = netdev->features & VLAN_FEAT;
6601 netdev->priv_flags |= IFF_UNICAST_FLT;
6603 netdev->netdev_ops = &cxgb4_netdev_ops;
6604 #ifdef CONFIG_CHELSIO_T4_DCB
6605 netdev->dcbnl_ops = &cxgb4_dcb_ops;
6606 cxgb4_dcb_state_init(netdev);
6608 netdev->ethtool_ops = &cxgb_ethtool_ops;
6611 pci_set_drvdata(pdev, adapter);
6613 if (adapter->flags & FW_OK) {
6614 err = t4_port_init(adapter, func, func, 0);
6620 * Configure queues and allocate tables now, they can be needed as
6621 * soon as the first register_netdev completes.
6623 cfg_queues(adapter);
6625 adapter->l2t = t4_init_l2t();
6626 if (!adapter->l2t) {
6627 /* We tolerate a lack of L2T, giving up some functionality */
6628 dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
6629 adapter->params.offload = 0;
6632 if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
6633 dev_warn(&pdev->dev, "could not allocate TID table, "
6635 adapter->params.offload = 0;
6638 /* See what interrupts we'll be using */
6639 if (msi > 1 && enable_msix(adapter) == 0)
6640 adapter->flags |= USING_MSIX;
6641 else if (msi > 0 && pci_enable_msi(pdev) == 0)
6642 adapter->flags |= USING_MSI;
6644 err = init_rss(adapter);
6649 * The card is now ready to go. If any errors occur during device
6650 * registration we do not fail the whole card but rather proceed only
6651 * with the ports we manage to register successfully. However we must
6652 * register at least one net device.
6654 for_each_port(adapter, i) {
6655 pi = adap2pinfo(adapter, i);
6656 netif_set_real_num_tx_queues(adapter->port[i], pi->nqsets);
6657 netif_set_real_num_rx_queues(adapter->port[i], pi->nqsets);
6659 err = register_netdev(adapter->port[i]);
6662 adapter->chan_map[pi->tx_chan] = i;
6663 print_port_info(adapter->port[i]);
6666 dev_err(&pdev->dev, "could not register any net devices\n");
6670 dev_warn(&pdev->dev, "only %d net devices registered\n", i);
6674 if (cxgb4_debugfs_root) {
6675 adapter->debugfs_root = debugfs_create_dir(pci_name(pdev),
6676 cxgb4_debugfs_root);
6677 setup_debugfs(adapter);
6680 /* PCIe EEH recovery on powerpc platforms needs fundamental reset */
6681 pdev->needs_freset = 1;
6683 if (is_offload(adapter))
6684 attach_ulds(adapter);
6687 #ifdef CONFIG_PCI_IOV
6688 if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
6689 if (pci_enable_sriov(pdev, num_vf[func]) == 0)
6690 dev_info(&pdev->dev,
6691 "instantiated %u virtual functions\n",
6697 free_some_resources(adapter);
6699 if (!is_t4(adapter->params.chip))
6700 iounmap(adapter->bar2);
6703 destroy_workqueue(adapter->workq);
6709 pci_disable_pcie_error_reporting(pdev);
6710 pci_disable_device(pdev);
6711 out_release_regions:
6712 pci_release_regions(pdev);
6716 static void remove_one(struct pci_dev *pdev)
6718 struct adapter *adapter = pci_get_drvdata(pdev);
6720 #ifdef CONFIG_PCI_IOV
6721 pci_disable_sriov(pdev);
6728 /* Tear down per-adapter Work Queue first since it can contain
6729 * references to our adapter data structure.
6731 destroy_workqueue(adapter->workq);
6733 if (is_offload(adapter))
6734 detach_ulds(adapter);
6736 for_each_port(adapter, i)
6737 if (adapter->port[i]->reg_state == NETREG_REGISTERED)
6738 unregister_netdev(adapter->port[i]);
6740 debugfs_remove_recursive(adapter->debugfs_root);
6742 /* If we allocated filters, free up state associated with any
6745 if (adapter->tids.ftid_tab) {
6746 struct filter_entry *f = &adapter->tids.ftid_tab[0];
6747 for (i = 0; i < (adapter->tids.nftids +
6748 adapter->tids.nsftids); i++, f++)
6750 clear_filter(adapter, f);
6753 if (adapter->flags & FULL_INIT_DONE)
6756 free_some_resources(adapter);
6757 iounmap(adapter->regs);
6758 if (!is_t4(adapter->params.chip))
6759 iounmap(adapter->bar2);
6760 pci_disable_pcie_error_reporting(pdev);
6761 if ((adapter->flags & DEV_ENABLED)) {
6762 pci_disable_device(pdev);
6763 adapter->flags &= ~DEV_ENABLED;
6765 pci_release_regions(pdev);
6769 pci_release_regions(pdev);
6772 static struct pci_driver cxgb4_driver = {
6773 .name = KBUILD_MODNAME,
6774 .id_table = cxgb4_pci_tbl,
6776 .remove = remove_one,
6777 .shutdown = remove_one,
6778 .err_handler = &cxgb4_eeh,
6781 static int __init cxgb4_init_module(void)
6785 /* Debugfs support is optional, just warn if this fails */
6786 cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
6787 if (!cxgb4_debugfs_root)
6788 pr_warn("could not create debugfs entry, continuing\n");
6790 ret = pci_register_driver(&cxgb4_driver);
6792 debugfs_remove(cxgb4_debugfs_root);
6794 #if IS_ENABLED(CONFIG_IPV6)
6795 register_inet6addr_notifier(&cxgb4_inet6addr_notifier);
6801 static void __exit cxgb4_cleanup_module(void)
6803 #if IS_ENABLED(CONFIG_IPV6)
6804 unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier);
6806 pci_unregister_driver(&cxgb4_driver);
6807 debugfs_remove(cxgb4_debugfs_root); /* NULL ok */
6810 module_init(cxgb4_init_module);
6811 module_exit(cxgb4_cleanup_module);