1 /*******************************************************************************
3 * Intel Ethernet Controller XL710 Family Linux Driver
4 * Copyright(c) 2013 - 2014 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25 ******************************************************************************/
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
30 #include "i40e_prototype.h"
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
45 * i40e_program_fdir_filter - Program a Flow Director filter
46 * @fdir_data: Packet data that will be filter parameters
47 * @raw_packet: the pre-allocated packet buffer for FDir
49 * @add: True for add/update, False for remove
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52 struct i40e_pf *pf, bool add)
54 struct i40e_filter_program_desc *fdir_desc;
55 struct i40e_tx_buffer *tx_buf, *first;
56 struct i40e_tx_desc *tx_desc;
57 struct i40e_ring *tx_ring;
58 unsigned int fpt, dcc;
66 /* find existing FDIR VSI */
68 for (i = 0; i < pf->num_alloc_vsi; i++)
69 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
74 tx_ring = vsi->tx_rings[0];
77 /* we need two descriptors to add/del a filter and we can wait */
79 if (I40E_DESC_UNUSED(tx_ring) > 1)
81 msleep_interruptible(1);
83 } while (delay < I40E_FD_CLEAN_DELAY);
85 if (!(I40E_DESC_UNUSED(tx_ring) > 1))
88 dma = dma_map_single(dev, raw_packet,
89 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90 if (dma_mapping_error(dev, dma))
93 /* grab the next descriptor */
94 i = tx_ring->next_to_use;
95 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96 first = &tx_ring->tx_bi[i];
97 memset(first, 0, sizeof(struct i40e_tx_buffer));
99 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
101 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 I40E_TXD_FLTR_QW0_QINDEX_MASK;
104 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
107 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 I40E_TXD_FLTR_QW0_PCTYPE_MASK;
110 /* Use LAN VSI Id if not programmed by user */
111 if (fdir_data->dest_vsi == 0)
112 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
115 fpt |= ((u32)fdir_data->dest_vsi <<
116 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
119 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
122 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
125 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
128 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 I40E_TXD_FLTR_QW1_DEST_MASK;
131 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
134 if (fdir_data->cnt_index != 0) {
135 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 dcc |= ((u32)fdir_data->cnt_index <<
137 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
141 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 fdir_desc->rsvd = cpu_to_le32(0);
143 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
146 /* Now program a dummy descriptor */
147 i = tx_ring->next_to_use;
148 tx_desc = I40E_TX_DESC(tx_ring, i);
149 tx_buf = &tx_ring->tx_bi[i];
151 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
153 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
155 /* record length, and DMA address */
156 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157 dma_unmap_addr_set(tx_buf, dma, dma);
159 tx_desc->buffer_addr = cpu_to_le64(dma);
160 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
162 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 tx_buf->raw_buf = (void *)raw_packet;
165 tx_desc->cmd_type_offset_bsz =
166 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
168 /* Force memory writes to complete before letting h/w
169 * know there are new descriptors to fetch.
173 /* Mark the data descriptor to be watched */
174 first->next_to_watch = tx_desc;
176 writel(tx_ring->next_to_use, tx_ring->tail);
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
186 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187 * @vsi: pointer to the targeted VSI
188 * @fd_data: the flow director data required for the FDir descriptor
189 * @add: true adds a filter, false removes it
191 * Returns 0 if the filters were successfully added or removed
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 struct i40e_fdir_filter *fd_data,
197 struct i40e_pf *pf = vsi->back;
203 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
207 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
210 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
212 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 + sizeof(struct iphdr));
216 ip->daddr = fd_data->dst_ip[0];
217 udp->dest = fd_data->dst_port;
218 ip->saddr = fd_data->src_ip[0];
219 udp->source = fd_data->src_port;
221 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
224 dev_info(&pf->pdev->dev,
225 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 fd_data->pctype, fd_data->fd_id, ret);
228 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
230 dev_info(&pf->pdev->dev,
231 "Filter OK for PCTYPE %d loc = %d\n",
232 fd_data->pctype, fd_data->fd_id);
234 dev_info(&pf->pdev->dev,
235 "Filter deleted for PCTYPE %d loc = %d\n",
236 fd_data->pctype, fd_data->fd_id);
238 return err ? -EOPNOTSUPP : 0;
241 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
243 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
244 * @vsi: pointer to the targeted VSI
245 * @fd_data: the flow director data required for the FDir descriptor
246 * @add: true adds a filter, false removes it
248 * Returns 0 if the filters were successfully added or removed
250 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
251 struct i40e_fdir_filter *fd_data,
254 struct i40e_pf *pf = vsi->back;
261 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
262 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
263 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
264 0x0, 0x72, 0, 0, 0, 0};
266 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
269 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
271 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
272 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
273 + sizeof(struct iphdr));
275 ip->daddr = fd_data->dst_ip[0];
276 tcp->dest = fd_data->dst_port;
277 ip->saddr = fd_data->src_ip[0];
278 tcp->source = fd_data->src_port;
282 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
283 if (I40E_DEBUG_FD & pf->hw.debug_mask)
284 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
285 pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
288 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
289 (pf->fd_tcp_rule - 1) : 0;
290 if (pf->fd_tcp_rule == 0) {
291 pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
292 if (I40E_DEBUG_FD & pf->hw.debug_mask)
293 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
297 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
298 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
301 dev_info(&pf->pdev->dev,
302 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
303 fd_data->pctype, fd_data->fd_id, ret);
305 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
307 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
308 fd_data->pctype, fd_data->fd_id);
310 dev_info(&pf->pdev->dev,
311 "Filter deleted for PCTYPE %d loc = %d\n",
312 fd_data->pctype, fd_data->fd_id);
315 return err ? -EOPNOTSUPP : 0;
319 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
320 * a specific flow spec
321 * @vsi: pointer to the targeted VSI
322 * @fd_data: the flow director data required for the FDir descriptor
323 * @add: true adds a filter, false removes it
325 * Always returns -EOPNOTSUPP
327 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
328 struct i40e_fdir_filter *fd_data,
334 #define I40E_IP_DUMMY_PACKET_LEN 34
336 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
337 * a specific flow spec
338 * @vsi: pointer to the targeted VSI
339 * @fd_data: the flow director data required for the FDir descriptor
340 * @add: true adds a filter, false removes it
342 * Returns 0 if the filters were successfully added or removed
344 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
345 struct i40e_fdir_filter *fd_data,
348 struct i40e_pf *pf = vsi->back;
354 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
355 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
358 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
359 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
360 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
363 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
364 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
366 ip->saddr = fd_data->src_ip[0];
367 ip->daddr = fd_data->dst_ip[0];
371 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
374 dev_info(&pf->pdev->dev,
375 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
376 fd_data->pctype, fd_data->fd_id, ret);
378 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
380 dev_info(&pf->pdev->dev,
381 "Filter OK for PCTYPE %d loc = %d\n",
382 fd_data->pctype, fd_data->fd_id);
384 dev_info(&pf->pdev->dev,
385 "Filter deleted for PCTYPE %d loc = %d\n",
386 fd_data->pctype, fd_data->fd_id);
390 return err ? -EOPNOTSUPP : 0;
394 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
395 * @vsi: pointer to the targeted VSI
396 * @cmd: command to get or set RX flow classification rules
397 * @add: true adds a filter, false removes it
400 int i40e_add_del_fdir(struct i40e_vsi *vsi,
401 struct i40e_fdir_filter *input, bool add)
403 struct i40e_pf *pf = vsi->back;
406 switch (input->flow_type & ~FLOW_EXT) {
408 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
411 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
414 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
417 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
420 switch (input->ip4_proto) {
422 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
425 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
428 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
431 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
436 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
441 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
446 * i40e_fd_handle_status - check the Programming Status for FD
447 * @rx_ring: the Rx ring for this descriptor
448 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
449 * @prog_id: the id originally used for programming
451 * This is used to verify if the FD programming or invalidation
452 * requested by SW to the HW is successful or not and take actions accordingly.
454 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
455 union i40e_rx_desc *rx_desc, u8 prog_id)
457 struct i40e_pf *pf = rx_ring->vsi->back;
458 struct pci_dev *pdev = pf->pdev;
459 u32 fcnt_prog, fcnt_avail;
463 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
464 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
465 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
467 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
468 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
469 (I40E_DEBUG_FD & pf->hw.debug_mask))
470 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
471 rx_desc->wb.qword0.hi_dword.fd_id);
473 /* Check if the programming error is for ATR.
474 * If so, auto disable ATR and set a state for
475 * flush in progress. Next time we come here if flush is in
476 * progress do nothing, once flush is complete the state will
479 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
483 /* store the current atr filter count */
484 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
486 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
487 (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
488 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
489 set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
492 /* filter programming failed most likely due to table full */
493 fcnt_prog = i40e_get_global_fd_count(pf);
494 fcnt_avail = pf->fdir_pf_filter_count;
495 /* If ATR is running fcnt_prog can quickly change,
496 * if we are very close to full, it makes sense to disable
497 * FD ATR/SB and then re-enable it when there is room.
499 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
500 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
501 !(pf->auto_disable_flags &
502 I40E_FLAG_FD_SB_ENABLED)) {
503 if (I40E_DEBUG_FD & pf->hw.debug_mask)
504 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
505 pf->auto_disable_flags |=
506 I40E_FLAG_FD_SB_ENABLED;
510 "FD filter programming failed due to incorrect filter parameters\n");
512 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
513 if (I40E_DEBUG_FD & pf->hw.debug_mask)
514 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
515 rx_desc->wb.qword0.hi_dword.fd_id);
520 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
521 * @ring: the ring that owns the buffer
522 * @tx_buffer: the buffer to free
524 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
525 struct i40e_tx_buffer *tx_buffer)
527 if (tx_buffer->skb) {
528 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
529 kfree(tx_buffer->raw_buf);
531 dev_kfree_skb_any(tx_buffer->skb);
533 if (dma_unmap_len(tx_buffer, len))
534 dma_unmap_single(ring->dev,
535 dma_unmap_addr(tx_buffer, dma),
536 dma_unmap_len(tx_buffer, len),
538 } else if (dma_unmap_len(tx_buffer, len)) {
539 dma_unmap_page(ring->dev,
540 dma_unmap_addr(tx_buffer, dma),
541 dma_unmap_len(tx_buffer, len),
544 tx_buffer->next_to_watch = NULL;
545 tx_buffer->skb = NULL;
546 dma_unmap_len_set(tx_buffer, len, 0);
547 /* tx_buffer must be completely set up in the transmit path */
551 * i40e_clean_tx_ring - Free any empty Tx buffers
552 * @tx_ring: ring to be cleaned
554 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
556 unsigned long bi_size;
559 /* ring already cleared, nothing to do */
563 /* Free all the Tx ring sk_buffs */
564 for (i = 0; i < tx_ring->count; i++)
565 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
567 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
568 memset(tx_ring->tx_bi, 0, bi_size);
570 /* Zero out the descriptor ring */
571 memset(tx_ring->desc, 0, tx_ring->size);
573 tx_ring->next_to_use = 0;
574 tx_ring->next_to_clean = 0;
576 if (!tx_ring->netdev)
579 /* cleanup Tx queue statistics */
580 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
581 tx_ring->queue_index));
585 * i40e_free_tx_resources - Free Tx resources per queue
586 * @tx_ring: Tx descriptor ring for a specific queue
588 * Free all transmit software resources
590 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
592 i40e_clean_tx_ring(tx_ring);
593 kfree(tx_ring->tx_bi);
594 tx_ring->tx_bi = NULL;
597 dma_free_coherent(tx_ring->dev, tx_ring->size,
598 tx_ring->desc, tx_ring->dma);
599 tx_ring->desc = NULL;
604 * i40e_get_head - Retrieve head from head writeback
605 * @tx_ring: tx ring to fetch head of
607 * Returns value of Tx ring head based on value stored
608 * in head write-back location
610 static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
612 void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
614 return le32_to_cpu(*(volatile __le32 *)head);
618 * i40e_get_tx_pending - how many tx descriptors not processed
619 * @tx_ring: the ring of descriptors
621 * Since there is no access to the ring head register
622 * in XL710, we need to use our local copies
624 static u32 i40e_get_tx_pending(struct i40e_ring *ring)
628 head = i40e_get_head(ring);
629 tail = readl(ring->tail);
632 return (head < tail) ?
633 tail - head : (tail + ring->count - head);
639 * i40e_check_tx_hang - Is there a hang in the Tx queue
640 * @tx_ring: the ring of descriptors
642 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
644 u32 tx_done = tx_ring->stats.packets;
645 u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
646 u32 tx_pending = i40e_get_tx_pending(tx_ring);
647 struct i40e_pf *pf = tx_ring->vsi->back;
650 clear_check_for_tx_hang(tx_ring);
652 /* Check for a hung queue, but be thorough. This verifies
653 * that a transmit has been completed since the previous
654 * check AND there is at least one packet pending. The
655 * ARMED bit is set to indicate a potential hang. The
656 * bit is cleared if a pause frame is received to remove
657 * false hang detection due to PFC or 802.3x frames. By
658 * requiring this to fail twice we avoid races with
659 * PFC clearing the ARMED bit and conditions where we
660 * run the check_tx_hang logic with a transmit completion
661 * pending but without time to complete it yet.
663 if ((tx_done_old == tx_done) && tx_pending) {
664 /* make sure it is true for two checks in a row */
665 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
667 } else if (tx_done_old == tx_done &&
668 (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
669 if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
670 dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
671 tx_pending, tx_ring->queue_index);
672 pf->tx_sluggish_count++;
674 /* update completed stats and disarm the hang check */
675 tx_ring->tx_stats.tx_done_old = tx_done;
676 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
682 #define WB_STRIDE 0x3
685 * i40e_clean_tx_irq - Reclaim resources after transmit completes
686 * @tx_ring: tx ring to clean
687 * @budget: how many cleans we're allowed
689 * Returns true if there's any budget left (e.g. the clean is finished)
691 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
693 u16 i = tx_ring->next_to_clean;
694 struct i40e_tx_buffer *tx_buf;
695 struct i40e_tx_desc *tx_head;
696 struct i40e_tx_desc *tx_desc;
697 unsigned int total_packets = 0;
698 unsigned int total_bytes = 0;
700 tx_buf = &tx_ring->tx_bi[i];
701 tx_desc = I40E_TX_DESC(tx_ring, i);
704 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
707 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
709 /* if next_to_watch is not set then there is no work pending */
713 /* prevent any other reads prior to eop_desc */
714 read_barrier_depends();
716 /* we have caught up to head, no work left to do */
717 if (tx_head == tx_desc)
720 /* clear next_to_watch to prevent false hangs */
721 tx_buf->next_to_watch = NULL;
723 /* update the statistics for this packet */
724 total_bytes += tx_buf->bytecount;
725 total_packets += tx_buf->gso_segs;
728 dev_consume_skb_any(tx_buf->skb);
730 /* unmap skb header data */
731 dma_unmap_single(tx_ring->dev,
732 dma_unmap_addr(tx_buf, dma),
733 dma_unmap_len(tx_buf, len),
736 /* clear tx_buffer data */
738 dma_unmap_len_set(tx_buf, len, 0);
740 /* unmap remaining buffers */
741 while (tx_desc != eop_desc) {
748 tx_buf = tx_ring->tx_bi;
749 tx_desc = I40E_TX_DESC(tx_ring, 0);
752 /* unmap any remaining paged data */
753 if (dma_unmap_len(tx_buf, len)) {
754 dma_unmap_page(tx_ring->dev,
755 dma_unmap_addr(tx_buf, dma),
756 dma_unmap_len(tx_buf, len),
758 dma_unmap_len_set(tx_buf, len, 0);
762 /* move us one more past the eop_desc for start of next pkt */
768 tx_buf = tx_ring->tx_bi;
769 tx_desc = I40E_TX_DESC(tx_ring, 0);
774 /* update budget accounting */
776 } while (likely(budget));
779 tx_ring->next_to_clean = i;
780 u64_stats_update_begin(&tx_ring->syncp);
781 tx_ring->stats.bytes += total_bytes;
782 tx_ring->stats.packets += total_packets;
783 u64_stats_update_end(&tx_ring->syncp);
784 tx_ring->q_vector->tx.total_bytes += total_bytes;
785 tx_ring->q_vector->tx.total_packets += total_packets;
787 /* check to see if there are any non-cache aligned descriptors
788 * waiting to be written back, and kick the hardware to force
789 * them to be written back in case of napi polling
792 !((i & WB_STRIDE) == WB_STRIDE) &&
793 !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
794 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
795 tx_ring->arm_wb = true;
797 tx_ring->arm_wb = false;
799 if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
800 /* schedule immediate reset if we believe we hung */
801 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
804 " next_to_use <%x>\n"
805 " next_to_clean <%x>\n",
807 tx_ring->queue_index,
808 tx_ring->next_to_use, i);
810 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
812 dev_info(tx_ring->dev,
813 "tx hang detected on queue %d, reset requested\n",
814 tx_ring->queue_index);
816 /* do not fire the reset immediately, wait for the stack to
817 * decide we are truly stuck, also prevents every queue from
818 * simultaneously requesting a reset
821 /* the adapter is about to reset, no point in enabling polling */
825 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
826 tx_ring->queue_index),
827 total_packets, total_bytes);
829 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
830 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
831 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
832 /* Make sure that anybody stopping the queue after this
833 * sees the new next_to_clean.
836 if (__netif_subqueue_stopped(tx_ring->netdev,
837 tx_ring->queue_index) &&
838 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
839 netif_wake_subqueue(tx_ring->netdev,
840 tx_ring->queue_index);
841 ++tx_ring->tx_stats.restart_queue;
849 * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
850 * @vsi: the VSI we care about
851 * @q_vector: the vector on which to force writeback
854 static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
856 u16 flags = q_vector->tx.ring[0].flags;
858 if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
861 if (q_vector->arm_wb_state)
864 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK;
867 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
868 vsi->base_vector - 1),
870 q_vector->arm_wb_state = true;
871 } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
872 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
873 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
874 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
875 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
876 /* allow 00 to be written to the index */
879 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
880 vsi->base_vector - 1), val);
882 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
883 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
884 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
885 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
886 /* allow 00 to be written to the index */
888 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
893 * i40e_set_new_dynamic_itr - Find new ITR level
894 * @rc: structure containing ring performance data
896 * Stores a new ITR value based on packets and byte counts during
897 * the last interrupt. The advantage of per interrupt computation
898 * is faster updates and more accurate ITR for the current traffic
899 * pattern. Constants in this function were computed based on
900 * theoretical maximum wire speed and thresholds were set based on
901 * testing data as well as attempting to minimize response time
902 * while increasing bulk throughput.
904 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
906 enum i40e_latency_range new_latency_range = rc->latency_range;
907 u32 new_itr = rc->itr;
910 if (rc->total_packets == 0 || !rc->itr)
913 /* simple throttlerate management
914 * 0-10MB/s lowest (100000 ints/s)
915 * 10-20MB/s low (20000 ints/s)
916 * 20-1249MB/s bulk (8000 ints/s)
918 bytes_per_int = rc->total_bytes / rc->itr;
919 switch (new_latency_range) {
920 case I40E_LOWEST_LATENCY:
921 if (bytes_per_int > 10)
922 new_latency_range = I40E_LOW_LATENCY;
924 case I40E_LOW_LATENCY:
925 if (bytes_per_int > 20)
926 new_latency_range = I40E_BULK_LATENCY;
927 else if (bytes_per_int <= 10)
928 new_latency_range = I40E_LOWEST_LATENCY;
930 case I40E_BULK_LATENCY:
931 if (bytes_per_int <= 20)
932 new_latency_range = I40E_LOW_LATENCY;
935 if (bytes_per_int <= 20)
936 new_latency_range = I40E_LOW_LATENCY;
939 rc->latency_range = new_latency_range;
941 switch (new_latency_range) {
942 case I40E_LOWEST_LATENCY:
943 new_itr = I40E_ITR_100K;
945 case I40E_LOW_LATENCY:
946 new_itr = I40E_ITR_20K;
948 case I40E_BULK_LATENCY:
949 new_itr = I40E_ITR_8K;
955 if (new_itr != rc->itr)
959 rc->total_packets = 0;
963 * i40e_clean_programming_status - clean the programming status descriptor
964 * @rx_ring: the rx ring that has this descriptor
965 * @rx_desc: the rx descriptor written back by HW
967 * Flow director should handle FD_FILTER_STATUS to check its filter programming
968 * status being successful or not and take actions accordingly. FCoE should
969 * handle its context/filter programming/invalidation status and take actions.
972 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
973 union i40e_rx_desc *rx_desc)
978 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
979 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
980 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
982 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
983 i40e_fd_handle_status(rx_ring, rx_desc, id);
985 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
986 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
987 i40e_fcoe_handle_status(rx_ring, rx_desc, id);
992 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
993 * @tx_ring: the tx ring to set up
995 * Return 0 on success, negative on error
997 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
999 struct device *dev = tx_ring->dev;
1005 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
1006 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
1007 if (!tx_ring->tx_bi)
1010 /* round up to nearest 4K */
1011 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
1012 /* add u32 for head writeback, align after this takes care of
1013 * guaranteeing this is at least one cache line in size
1015 tx_ring->size += sizeof(u32);
1016 tx_ring->size = ALIGN(tx_ring->size, 4096);
1017 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1018 &tx_ring->dma, GFP_KERNEL);
1019 if (!tx_ring->desc) {
1020 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1025 tx_ring->next_to_use = 0;
1026 tx_ring->next_to_clean = 0;
1030 kfree(tx_ring->tx_bi);
1031 tx_ring->tx_bi = NULL;
1036 * i40e_clean_rx_ring - Free Rx buffers
1037 * @rx_ring: ring to be cleaned
1039 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1041 struct device *dev = rx_ring->dev;
1042 struct i40e_rx_buffer *rx_bi;
1043 unsigned long bi_size;
1046 /* ring already cleared, nothing to do */
1047 if (!rx_ring->rx_bi)
1050 if (ring_is_ps_enabled(rx_ring)) {
1051 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1053 rx_bi = &rx_ring->rx_bi[0];
1054 if (rx_bi->hdr_buf) {
1055 dma_free_coherent(dev,
1059 for (i = 0; i < rx_ring->count; i++) {
1060 rx_bi = &rx_ring->rx_bi[i];
1062 rx_bi->hdr_buf = NULL;
1066 /* Free all the Rx ring sk_buffs */
1067 for (i = 0; i < rx_ring->count; i++) {
1068 rx_bi = &rx_ring->rx_bi[i];
1070 dma_unmap_single(dev,
1072 rx_ring->rx_buf_len,
1077 dev_kfree_skb(rx_bi->skb);
1081 if (rx_bi->page_dma) {
1086 rx_bi->page_dma = 0;
1088 __free_page(rx_bi->page);
1090 rx_bi->page_offset = 0;
1094 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1095 memset(rx_ring->rx_bi, 0, bi_size);
1097 /* Zero out the descriptor ring */
1098 memset(rx_ring->desc, 0, rx_ring->size);
1100 rx_ring->next_to_clean = 0;
1101 rx_ring->next_to_use = 0;
1105 * i40e_free_rx_resources - Free Rx resources
1106 * @rx_ring: ring to clean the resources from
1108 * Free all receive software resources
1110 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1112 i40e_clean_rx_ring(rx_ring);
1113 kfree(rx_ring->rx_bi);
1114 rx_ring->rx_bi = NULL;
1116 if (rx_ring->desc) {
1117 dma_free_coherent(rx_ring->dev, rx_ring->size,
1118 rx_ring->desc, rx_ring->dma);
1119 rx_ring->desc = NULL;
1124 * i40e_alloc_rx_headers - allocate rx header buffers
1125 * @rx_ring: ring to alloc buffers
1127 * Allocate rx header buffers for the entire ring. As these are static,
1128 * this is only called when setting up a new ring.
1130 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1132 struct device *dev = rx_ring->dev;
1133 struct i40e_rx_buffer *rx_bi;
1139 if (rx_ring->rx_bi[0].hdr_buf)
1141 /* Make sure the buffers don't cross cache line boundaries. */
1142 buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1143 buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1147 for (i = 0; i < rx_ring->count; i++) {
1148 rx_bi = &rx_ring->rx_bi[i];
1149 rx_bi->dma = dma + (i * buf_size);
1150 rx_bi->hdr_buf = buffer + (i * buf_size);
1155 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1156 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1158 * Returns 0 on success, negative on failure
1160 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1162 struct device *dev = rx_ring->dev;
1165 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1166 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1167 if (!rx_ring->rx_bi)
1170 u64_stats_init(&rx_ring->syncp);
1172 /* Round up to nearest 4K */
1173 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1174 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1175 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1176 rx_ring->size = ALIGN(rx_ring->size, 4096);
1177 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1178 &rx_ring->dma, GFP_KERNEL);
1180 if (!rx_ring->desc) {
1181 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1186 rx_ring->next_to_clean = 0;
1187 rx_ring->next_to_use = 0;
1191 kfree(rx_ring->rx_bi);
1192 rx_ring->rx_bi = NULL;
1197 * i40e_release_rx_desc - Store the new tail and head values
1198 * @rx_ring: ring to bump
1199 * @val: new head index
1201 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1203 rx_ring->next_to_use = val;
1204 /* Force memory writes to complete before letting h/w
1205 * know there are new descriptors to fetch. (Only
1206 * applicable for weak-ordered memory model archs,
1210 writel(val, rx_ring->tail);
1214 * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1215 * @rx_ring: ring to place buffers on
1216 * @cleaned_count: number of buffers to replace
1218 void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1220 u16 i = rx_ring->next_to_use;
1221 union i40e_rx_desc *rx_desc;
1222 struct i40e_rx_buffer *bi;
1224 /* do nothing if no valid netdev defined */
1225 if (!rx_ring->netdev || !cleaned_count)
1228 while (cleaned_count--) {
1229 rx_desc = I40E_RX_DESC(rx_ring, i);
1230 bi = &rx_ring->rx_bi[i];
1232 if (bi->skb) /* desc is in use */
1235 bi->page = alloc_page(GFP_ATOMIC);
1237 rx_ring->rx_stats.alloc_page_failed++;
1242 if (!bi->page_dma) {
1243 /* use a half page if we're re-using */
1244 bi->page_offset ^= PAGE_SIZE / 2;
1245 bi->page_dma = dma_map_page(rx_ring->dev,
1250 if (dma_mapping_error(rx_ring->dev,
1252 rx_ring->rx_stats.alloc_page_failed++;
1258 dma_sync_single_range_for_device(rx_ring->dev,
1261 rx_ring->rx_hdr_len,
1263 /* Refresh the desc even if buffer_addrs didn't change
1264 * because each write-back erases this info.
1266 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1267 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1269 if (i == rx_ring->count)
1274 if (rx_ring->next_to_use != i)
1275 i40e_release_rx_desc(rx_ring, i);
1279 * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1280 * @rx_ring: ring to place buffers on
1281 * @cleaned_count: number of buffers to replace
1283 void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1285 u16 i = rx_ring->next_to_use;
1286 union i40e_rx_desc *rx_desc;
1287 struct i40e_rx_buffer *bi;
1288 struct sk_buff *skb;
1290 /* do nothing if no valid netdev defined */
1291 if (!rx_ring->netdev || !cleaned_count)
1294 while (cleaned_count--) {
1295 rx_desc = I40E_RX_DESC(rx_ring, i);
1296 bi = &rx_ring->rx_bi[i];
1300 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1301 rx_ring->rx_buf_len);
1303 rx_ring->rx_stats.alloc_buff_failed++;
1306 /* initialize queue mapping */
1307 skb_record_rx_queue(skb, rx_ring->queue_index);
1312 bi->dma = dma_map_single(rx_ring->dev,
1314 rx_ring->rx_buf_len,
1316 if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1317 rx_ring->rx_stats.alloc_buff_failed++;
1323 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1324 rx_desc->read.hdr_addr = 0;
1326 if (i == rx_ring->count)
1331 if (rx_ring->next_to_use != i)
1332 i40e_release_rx_desc(rx_ring, i);
1336 * i40e_receive_skb - Send a completed packet up the stack
1337 * @rx_ring: rx ring in play
1338 * @skb: packet to send up
1339 * @vlan_tag: vlan tag for packet
1341 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1342 struct sk_buff *skb, u16 vlan_tag)
1344 struct i40e_q_vector *q_vector = rx_ring->q_vector;
1345 struct i40e_vsi *vsi = rx_ring->vsi;
1346 u64 flags = vsi->back->flags;
1348 if (vlan_tag & VLAN_VID_MASK)
1349 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1351 if (flags & I40E_FLAG_IN_NETPOLL)
1354 napi_gro_receive(&q_vector->napi, skb);
1358 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1359 * @vsi: the VSI we care about
1360 * @skb: skb currently being received and modified
1361 * @rx_status: status value of last descriptor in packet
1362 * @rx_error: error value of last descriptor in packet
1363 * @rx_ptype: ptype value of last descriptor in packet
1365 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1366 struct sk_buff *skb,
1371 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1372 bool ipv4 = false, ipv6 = false;
1373 bool ipv4_tunnel, ipv6_tunnel;
1378 ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1379 (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1380 ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1381 (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1383 skb->ip_summed = CHECKSUM_NONE;
1385 /* Rx csum enabled and ip headers found? */
1386 if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1389 /* did the hardware decode the packet and checksum? */
1390 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1393 /* both known and outer_ip must be set for the below code to work */
1394 if (!(decoded.known && decoded.outer_ip))
1397 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1398 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1400 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1401 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1405 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1406 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1409 /* likely incorrect csum if alternate IP extension headers found */
1411 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1412 /* don't increment checksum err here, non-fatal err */
1415 /* there was some L4 error, count error and punt packet to the stack */
1416 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1419 /* handle packets that were not able to be checksummed due
1420 * to arrival speed, in this case the stack can compute
1423 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1426 /* If VXLAN traffic has an outer UDPv4 checksum we need to check
1427 * it in the driver, hardware does not do it for us.
1428 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1429 * so the total length of IPv4 header is IHL*4 bytes
1430 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1432 if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) &&
1434 skb->transport_header = skb->mac_header +
1435 sizeof(struct ethhdr) +
1436 (ip_hdr(skb)->ihl * 4);
1438 /* Add 4 bytes for VLAN tagged packets */
1439 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1440 skb->protocol == htons(ETH_P_8021AD))
1443 if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
1444 (udp_hdr(skb)->check != 0)) {
1445 rx_udp_csum = udp_csum(skb);
1447 csum = csum_tcpudp_magic(
1448 iph->saddr, iph->daddr,
1449 (skb->len - skb_transport_offset(skb)),
1450 IPPROTO_UDP, rx_udp_csum);
1452 if (udp_hdr(skb)->check != csum)
1455 } /* else its GRE and so no outer UDP header */
1458 skb->ip_summed = CHECKSUM_UNNECESSARY;
1459 skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1464 vsi->back->hw_csum_rx_error++;
1468 * i40e_rx_hash - returns the hash value from the Rx descriptor
1469 * @ring: descriptor ring
1470 * @rx_desc: specific descriptor
1472 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1473 union i40e_rx_desc *rx_desc)
1475 const __le64 rss_mask =
1476 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1477 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1479 if ((ring->netdev->features & NETIF_F_RXHASH) &&
1480 (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1481 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1487 * i40e_ptype_to_hash - get a hash type
1488 * @ptype: the ptype value from the descriptor
1490 * Returns a hash type to be used by skb_set_hash
1492 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1494 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1497 return PKT_HASH_TYPE_NONE;
1499 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1500 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1501 return PKT_HASH_TYPE_L4;
1502 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1503 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1504 return PKT_HASH_TYPE_L3;
1506 return PKT_HASH_TYPE_L2;
1510 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1511 * @rx_ring: rx ring to clean
1512 * @budget: how many cleans we're allowed
1514 * Returns true if there's any budget left (e.g. the clean is finished)
1516 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
1518 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1519 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1520 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1521 const int current_node = numa_node_id();
1522 struct i40e_vsi *vsi = rx_ring->vsi;
1523 u16 i = rx_ring->next_to_clean;
1524 union i40e_rx_desc *rx_desc;
1525 u32 rx_error, rx_status;
1533 struct i40e_rx_buffer *rx_bi;
1534 struct sk_buff *skb;
1536 /* return some buffers to hardware, one at a time is too slow */
1537 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1538 i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
1542 i = rx_ring->next_to_clean;
1543 rx_desc = I40E_RX_DESC(rx_ring, i);
1544 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1545 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1546 I40E_RXD_QW1_STATUS_SHIFT;
1548 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1551 /* This memory barrier is needed to keep us from reading
1552 * any other fields out of the rx_desc until we know the
1556 if (i40e_rx_is_programming_status(qword)) {
1557 i40e_clean_programming_status(rx_ring, rx_desc);
1558 I40E_RX_INCREMENT(rx_ring, i);
1561 rx_bi = &rx_ring->rx_bi[i];
1564 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1565 rx_ring->rx_hdr_len);
1567 rx_ring->rx_stats.alloc_buff_failed++;
1571 /* initialize queue mapping */
1572 skb_record_rx_queue(skb, rx_ring->queue_index);
1573 /* we are reusing so sync this buffer for CPU use */
1574 dma_sync_single_range_for_cpu(rx_ring->dev,
1577 rx_ring->rx_hdr_len,
1580 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1581 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1582 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1583 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1584 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1585 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1587 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1588 I40E_RXD_QW1_ERROR_SHIFT;
1589 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1590 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1592 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1593 I40E_RXD_QW1_PTYPE_SHIFT;
1594 prefetch(rx_bi->page);
1597 if (rx_hbo || rx_sph) {
1600 len = I40E_RX_HDR_SIZE;
1602 len = rx_header_len;
1603 memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1604 } else if (skb->len == 0) {
1607 len = (rx_packet_len > skb_headlen(skb) ?
1608 skb_headlen(skb) : rx_packet_len);
1609 memcpy(__skb_put(skb, len),
1610 rx_bi->page + rx_bi->page_offset,
1612 rx_bi->page_offset += len;
1613 rx_packet_len -= len;
1616 /* Get the rest of the data if this was a header split */
1617 if (rx_packet_len) {
1618 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1623 skb->len += rx_packet_len;
1624 skb->data_len += rx_packet_len;
1625 skb->truesize += rx_packet_len;
1627 if ((page_count(rx_bi->page) == 1) &&
1628 (page_to_nid(rx_bi->page) == current_node))
1629 get_page(rx_bi->page);
1633 dma_unmap_page(rx_ring->dev,
1637 rx_bi->page_dma = 0;
1639 I40E_RX_INCREMENT(rx_ring, i);
1642 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1643 struct i40e_rx_buffer *next_buffer;
1645 next_buffer = &rx_ring->rx_bi[i];
1646 next_buffer->skb = skb;
1647 rx_ring->rx_stats.non_eop_descs++;
1651 /* ERR_MASK will only have valid bits if EOP set */
1652 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1653 dev_kfree_skb_any(skb);
1657 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1658 i40e_ptype_to_hash(rx_ptype));
1659 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1660 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1661 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1662 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1663 rx_ring->last_rx_timestamp = jiffies;
1666 /* probably a little skewed due to removing CRC */
1667 total_rx_bytes += skb->len;
1670 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1672 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1674 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1675 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1678 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1679 dev_kfree_skb_any(skb);
1683 skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
1684 i40e_receive_skb(rx_ring, skb, vlan_tag);
1686 rx_desc->wb.qword1.status_error_len = 0;
1688 } while (likely(total_rx_packets < budget));
1690 u64_stats_update_begin(&rx_ring->syncp);
1691 rx_ring->stats.packets += total_rx_packets;
1692 rx_ring->stats.bytes += total_rx_bytes;
1693 u64_stats_update_end(&rx_ring->syncp);
1694 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1695 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1697 return total_rx_packets;
1701 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1702 * @rx_ring: rx ring to clean
1703 * @budget: how many cleans we're allowed
1705 * Returns number of packets cleaned
1707 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1709 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1710 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1711 struct i40e_vsi *vsi = rx_ring->vsi;
1712 union i40e_rx_desc *rx_desc;
1713 u32 rx_error, rx_status;
1720 struct i40e_rx_buffer *rx_bi;
1721 struct sk_buff *skb;
1723 /* return some buffers to hardware, one at a time is too slow */
1724 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1725 i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
1729 i = rx_ring->next_to_clean;
1730 rx_desc = I40E_RX_DESC(rx_ring, i);
1731 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1732 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1733 I40E_RXD_QW1_STATUS_SHIFT;
1735 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1738 /* This memory barrier is needed to keep us from reading
1739 * any other fields out of the rx_desc until we know the
1744 if (i40e_rx_is_programming_status(qword)) {
1745 i40e_clean_programming_status(rx_ring, rx_desc);
1746 I40E_RX_INCREMENT(rx_ring, i);
1749 rx_bi = &rx_ring->rx_bi[i];
1751 prefetch(skb->data);
1753 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1754 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1756 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1757 I40E_RXD_QW1_ERROR_SHIFT;
1758 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1760 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1761 I40E_RXD_QW1_PTYPE_SHIFT;
1765 /* Get the header and possibly the whole packet
1766 * If this is an skb from previous receive dma will be 0
1768 skb_put(skb, rx_packet_len);
1769 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1773 I40E_RX_INCREMENT(rx_ring, i);
1776 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1777 rx_ring->rx_stats.non_eop_descs++;
1781 /* ERR_MASK will only have valid bits if EOP set */
1782 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1783 dev_kfree_skb_any(skb);
1784 /* TODO: shouldn't we increment a counter indicating the
1790 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1791 i40e_ptype_to_hash(rx_ptype));
1792 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1793 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1794 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1795 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1796 rx_ring->last_rx_timestamp = jiffies;
1799 /* probably a little skewed due to removing CRC */
1800 total_rx_bytes += skb->len;
1803 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1805 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1807 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1808 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1811 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1812 dev_kfree_skb_any(skb);
1816 i40e_receive_skb(rx_ring, skb, vlan_tag);
1818 rx_desc->wb.qword1.status_error_len = 0;
1819 } while (likely(total_rx_packets < budget));
1821 u64_stats_update_begin(&rx_ring->syncp);
1822 rx_ring->stats.packets += total_rx_packets;
1823 rx_ring->stats.bytes += total_rx_bytes;
1824 u64_stats_update_end(&rx_ring->syncp);
1825 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1826 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1828 return total_rx_packets;
1832 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1833 * @vsi: the VSI we care about
1834 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1837 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1838 struct i40e_q_vector *q_vector)
1840 struct i40e_hw *hw = &vsi->back->hw;
1845 vector = (q_vector->v_idx + vsi->base_vector);
1846 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
1847 old_itr = q_vector->rx.itr;
1848 i40e_set_new_dynamic_itr(&q_vector->rx);
1849 if (old_itr != q_vector->rx.itr) {
1850 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1851 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1853 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1854 (q_vector->rx.itr <<
1855 I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1857 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1858 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1860 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
1862 if (!test_bit(__I40E_DOWN, &vsi->state))
1863 wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val);
1865 i40e_irq_dynamic_enable(vsi,
1866 q_vector->v_idx + vsi->base_vector);
1868 if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
1869 old_itr = q_vector->tx.itr;
1870 i40e_set_new_dynamic_itr(&q_vector->tx);
1871 if (old_itr != q_vector->tx.itr) {
1872 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1873 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1875 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1876 (q_vector->tx.itr <<
1877 I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1879 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1880 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1882 I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
1884 if (!test_bit(__I40E_DOWN, &vsi->state))
1885 wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->v_idx +
1886 vsi->base_vector - 1), val);
1888 i40e_irq_dynamic_enable(vsi,
1889 q_vector->v_idx + vsi->base_vector);
1894 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1895 * @napi: napi struct with our devices info in it
1896 * @budget: amount of work driver is allowed to do this pass, in packets
1898 * This function will clean all queues associated with a q_vector.
1900 * Returns the amount of work done
1902 int i40e_napi_poll(struct napi_struct *napi, int budget)
1904 struct i40e_q_vector *q_vector =
1905 container_of(napi, struct i40e_q_vector, napi);
1906 struct i40e_vsi *vsi = q_vector->vsi;
1907 struct i40e_ring *ring;
1908 bool clean_complete = true;
1909 bool arm_wb = false;
1910 int budget_per_ring;
1913 if (test_bit(__I40E_DOWN, &vsi->state)) {
1914 napi_complete(napi);
1918 /* Since the actual Tx work is minimal, we can give the Tx a larger
1919 * budget and be more aggressive about cleaning up the Tx descriptors.
1921 i40e_for_each_ring(ring, q_vector->tx) {
1922 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1923 arm_wb |= ring->arm_wb;
1926 /* We attempt to distribute budget to each Rx queue fairly, but don't
1927 * allow the budget to go below 1 because that would exit polling early.
1929 budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1931 i40e_for_each_ring(ring, q_vector->rx) {
1932 if (ring_is_ps_enabled(ring))
1933 cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1935 cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
1936 /* if we didn't clean as many as budgeted, we must be done */
1937 clean_complete &= (budget_per_ring != cleaned);
1940 /* If work not completed, return budget and polling will return */
1941 if (!clean_complete) {
1943 i40e_force_wb(vsi, q_vector);
1947 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
1948 q_vector->arm_wb_state = false;
1950 /* Work is done so exit the polling mode and re-enable the interrupt */
1951 napi_complete(napi);
1952 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1953 i40e_update_enable_itr(vsi, q_vector);
1954 } else { /* Legacy mode */
1955 struct i40e_hw *hw = &vsi->back->hw;
1956 /* We re-enable the queue 0 cause, but
1957 * don't worry about dynamic_enable
1958 * because we left it on for the other
1959 * possible interrupts during napi
1961 u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) |
1962 I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1964 wr32(hw, I40E_QINT_RQCTL(0), qval);
1965 qval = rd32(hw, I40E_QINT_TQCTL(0)) |
1966 I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1967 wr32(hw, I40E_QINT_TQCTL(0), qval);
1968 i40e_irq_dynamic_enable_icr0(vsi->back);
1974 * i40e_atr - Add a Flow Director ATR filter
1975 * @tx_ring: ring to add programming descriptor to
1977 * @tx_flags: send tx flags
1978 * @protocol: wire protocol
1980 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1981 u32 tx_flags, __be16 protocol)
1983 struct i40e_filter_program_desc *fdir_desc;
1984 struct i40e_pf *pf = tx_ring->vsi->back;
1986 unsigned char *network;
1988 struct ipv6hdr *ipv6;
1992 u32 flex_ptype, dtype_cmd;
1995 /* make sure ATR is enabled */
1996 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1999 if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2002 /* if sampling is disabled do nothing */
2003 if (!tx_ring->atr_sample_rate)
2006 if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2009 if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) {
2010 /* snag network header to get L4 type and address */
2011 hdr.network = skb_network_header(skb);
2013 /* Currently only IPv4/IPv6 with TCP is supported
2014 * access ihl as u8 to avoid unaligned access on ia64
2016 if (tx_flags & I40E_TX_FLAGS_IPV4)
2017 hlen = (hdr.network[0] & 0x0F) << 2;
2018 else if (protocol == htons(ETH_P_IPV6))
2019 hlen = sizeof(struct ipv6hdr);
2023 hdr.network = skb_inner_network_header(skb);
2024 hlen = skb_inner_network_header_len(skb);
2027 /* Currently only IPv4/IPv6 with TCP is supported
2028 * Note: tx_flags gets modified to reflect inner protocols in
2029 * tx_enable_csum function if encap is enabled.
2031 if ((tx_flags & I40E_TX_FLAGS_IPV4) &&
2032 (hdr.ipv4->protocol != IPPROTO_TCP))
2034 else if ((tx_flags & I40E_TX_FLAGS_IPV6) &&
2035 (hdr.ipv6->nexthdr != IPPROTO_TCP))
2038 th = (struct tcphdr *)(hdr.network + hlen);
2040 /* Due to lack of space, no more new filters can be programmed */
2041 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2043 if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
2044 /* HW ATR eviction will take care of removing filters on FIN
2047 if (th->fin || th->rst)
2051 tx_ring->atr_count++;
2053 /* sample on all syn/fin/rst packets or once every atr sample rate */
2057 (tx_ring->atr_count < tx_ring->atr_sample_rate))
2060 tx_ring->atr_count = 0;
2062 /* grab the next descriptor */
2063 i = tx_ring->next_to_use;
2064 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2067 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2069 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2070 I40E_TXD_FLTR_QW0_QINDEX_MASK;
2071 flex_ptype |= (protocol == htons(ETH_P_IP)) ?
2072 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2073 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2074 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2075 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2077 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2079 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2081 dtype_cmd |= (th->fin || th->rst) ?
2082 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2083 I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2084 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2085 I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2087 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2088 I40E_TXD_FLTR_QW1_DEST_SHIFT;
2090 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2091 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2093 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2094 if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL))
2096 ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2097 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2098 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2101 ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2102 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2103 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2105 if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
2106 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2108 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2109 fdir_desc->rsvd = cpu_to_le32(0);
2110 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2111 fdir_desc->fd_id = cpu_to_le32(0);
2115 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2117 * @tx_ring: ring to send buffer on
2118 * @flags: the tx flags to be set
2120 * Checks the skb and set up correspondingly several generic transmit flags
2121 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2123 * Returns error code indicate the frame should be dropped upon error and the
2124 * otherwise returns 0 to indicate the flags has been set properly.
2127 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2128 struct i40e_ring *tx_ring,
2131 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2132 struct i40e_ring *tx_ring,
2136 __be16 protocol = skb->protocol;
2139 if (protocol == htons(ETH_P_8021Q) &&
2140 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2141 /* When HW VLAN acceleration is turned off by the user the
2142 * stack sets the protocol to 8021q so that the driver
2143 * can take any steps required to support the SW only
2144 * VLAN handling. In our case the driver doesn't need
2145 * to take any further steps so just set the protocol
2146 * to the encapsulated ethertype.
2148 skb->protocol = vlan_get_protocol(skb);
2152 /* if we have a HW VLAN tag being added, default to the HW one */
2153 if (skb_vlan_tag_present(skb)) {
2154 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2155 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2156 /* else if it is a SW VLAN, check the next protocol and store the tag */
2157 } else if (protocol == htons(ETH_P_8021Q)) {
2158 struct vlan_hdr *vhdr, _vhdr;
2159 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2163 protocol = vhdr->h_vlan_encapsulated_proto;
2164 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2165 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2168 if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2171 /* Insert 802.1p priority into VLAN header */
2172 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2173 (skb->priority != TC_PRIO_CONTROL)) {
2174 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2175 tx_flags |= (skb->priority & 0x7) <<
2176 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2177 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2178 struct vlan_ethhdr *vhdr;
2181 rc = skb_cow_head(skb, 0);
2184 vhdr = (struct vlan_ethhdr *)skb->data;
2185 vhdr->h_vlan_TCI = htons(tx_flags >>
2186 I40E_TX_FLAGS_VLAN_SHIFT);
2188 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2198 * i40e_tso - set up the tso context descriptor
2199 * @tx_ring: ptr to the ring to send
2200 * @skb: ptr to the skb we're sending
2201 * @hdr_len: ptr to the size of the packet header
2202 * @cd_tunneling: ptr to context descriptor bits
2204 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2206 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2207 u8 *hdr_len, u64 *cd_type_cmd_tso_mss,
2210 u32 cd_cmd, cd_tso_len, cd_mss;
2211 struct ipv6hdr *ipv6h;
2212 struct tcphdr *tcph;
2217 if (!skb_is_gso(skb))
2220 err = skb_cow_head(skb, 0);
2224 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
2225 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
2227 if (iph->version == 4) {
2228 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2231 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2233 } else if (ipv6h->version == 6) {
2234 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2235 ipv6h->payload_len = 0;
2236 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2240 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
2241 *hdr_len = (skb->encapsulation
2242 ? (skb_inner_transport_header(skb) - skb->data)
2243 : skb_transport_offset(skb)) + l4len;
2245 /* find the field values */
2246 cd_cmd = I40E_TX_CTX_DESC_TSO;
2247 cd_tso_len = skb->len - *hdr_len;
2248 cd_mss = skb_shinfo(skb)->gso_size;
2249 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2251 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2252 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2257 * i40e_tsyn - set up the tsyn context descriptor
2258 * @tx_ring: ptr to the ring to send
2259 * @skb: ptr to the skb we're sending
2260 * @tx_flags: the collected send information
2262 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2264 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2265 u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2269 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2272 /* Tx timestamps cannot be sampled when doing TSO */
2273 if (tx_flags & I40E_TX_FLAGS_TSO)
2276 /* only timestamp the outbound packet if the user has requested it and
2277 * we are not already transmitting a packet to be timestamped
2279 pf = i40e_netdev_to_pf(tx_ring->netdev);
2280 if (!(pf->flags & I40E_FLAG_PTP))
2284 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2285 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2286 pf->ptp_tx_skb = skb_get(skb);
2291 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2292 I40E_TXD_CTX_QW1_CMD_SHIFT;
2298 * i40e_tx_enable_csum - Enable Tx checksum offloads
2300 * @tx_flags: pointer to Tx flags currently set
2301 * @td_cmd: Tx descriptor command bits to set
2302 * @td_offset: Tx descriptor header offsets to set
2303 * @cd_tunneling: ptr to context desc bits
2305 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2306 u32 *td_cmd, u32 *td_offset,
2307 struct i40e_ring *tx_ring,
2310 struct ipv6hdr *this_ipv6_hdr;
2311 unsigned int this_tcp_hdrlen;
2312 struct iphdr *this_ip_hdr;
2313 u32 network_hdr_len;
2315 struct udphdr *oudph;
2319 if (skb->encapsulation) {
2320 switch (ip_hdr(skb)->protocol) {
2322 oudph = udp_hdr(skb);
2324 l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
2325 *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
2330 network_hdr_len = skb_inner_network_header_len(skb);
2331 this_ip_hdr = inner_ip_hdr(skb);
2332 this_ipv6_hdr = inner_ipv6_hdr(skb);
2333 this_tcp_hdrlen = inner_tcp_hdrlen(skb);
2335 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2336 if (*tx_flags & I40E_TX_FLAGS_TSO) {
2337 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
2338 ip_hdr(skb)->check = 0;
2341 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2343 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2344 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
2345 if (*tx_flags & I40E_TX_FLAGS_TSO)
2346 ip_hdr(skb)->check = 0;
2349 /* Now set the ctx descriptor fields */
2350 *cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
2351 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
2353 ((skb_inner_network_offset(skb) -
2354 skb_transport_offset(skb)) >> 1) <<
2355 I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2356 if (this_ip_hdr->version == 6) {
2357 *tx_flags &= ~I40E_TX_FLAGS_IPV4;
2358 *tx_flags |= I40E_TX_FLAGS_IPV6;
2360 if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) &&
2361 (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) &&
2362 (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) {
2363 oudph->check = ~csum_tcpudp_magic(oiph->saddr,
2365 (skb->len - skb_transport_offset(skb)),
2367 *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2370 network_hdr_len = skb_network_header_len(skb);
2371 this_ip_hdr = ip_hdr(skb);
2372 this_ipv6_hdr = ipv6_hdr(skb);
2373 this_tcp_hdrlen = tcp_hdrlen(skb);
2376 /* Enable IP checksum offloads */
2377 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2378 l4_hdr = this_ip_hdr->protocol;
2379 /* the stack computes the IP header already, the only time we
2380 * need the hardware to recompute it is in the case of TSO.
2382 if (*tx_flags & I40E_TX_FLAGS_TSO) {
2383 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
2384 this_ip_hdr->check = 0;
2386 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
2388 /* Now set the td_offset for IP header length */
2389 *td_offset = (network_hdr_len >> 2) <<
2390 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2391 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2392 l4_hdr = this_ipv6_hdr->nexthdr;
2393 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2394 /* Now set the td_offset for IP header length */
2395 *td_offset = (network_hdr_len >> 2) <<
2396 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2398 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2399 *td_offset |= (skb_network_offset(skb) >> 1) <<
2400 I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2402 /* Enable L4 checksum offloads */
2405 /* enable checksum offloads */
2406 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2407 *td_offset |= (this_tcp_hdrlen >> 2) <<
2408 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2411 /* enable SCTP checksum offload */
2412 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2413 *td_offset |= (sizeof(struct sctphdr) >> 2) <<
2414 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2417 /* enable UDP checksum offload */
2418 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2419 *td_offset |= (sizeof(struct udphdr) >> 2) <<
2420 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2428 * i40e_create_tx_ctx Build the Tx context descriptor
2429 * @tx_ring: ring to create the descriptor on
2430 * @cd_type_cmd_tso_mss: Quad Word 1
2431 * @cd_tunneling: Quad Word 0 - bits 0-31
2432 * @cd_l2tag2: Quad Word 0 - bits 32-63
2434 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2435 const u64 cd_type_cmd_tso_mss,
2436 const u32 cd_tunneling, const u32 cd_l2tag2)
2438 struct i40e_tx_context_desc *context_desc;
2439 int i = tx_ring->next_to_use;
2441 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2442 !cd_tunneling && !cd_l2tag2)
2445 /* grab the next descriptor */
2446 context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2449 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2451 /* cpu_to_le32 and assign to struct fields */
2452 context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2453 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2454 context_desc->rsvd = cpu_to_le16(0);
2455 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2459 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2460 * @tx_ring: the ring to be checked
2461 * @size: the size buffer we want to assure is available
2463 * Returns -EBUSY if a stop is needed, else 0
2465 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2467 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2468 /* Memory barrier before checking head and tail */
2471 /* Check again in a case another CPU has just made room available. */
2472 if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2475 /* A reprieve! - use start_queue because it doesn't call schedule */
2476 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2477 ++tx_ring->tx_stats.restart_queue;
2482 * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2483 * @tx_ring: the ring to be checked
2484 * @size: the size buffer we want to assure is available
2486 * Returns 0 if stop is not needed
2489 inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2491 static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2494 if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2496 return __i40e_maybe_stop_tx(tx_ring, size);
2500 * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2502 * @tx_flags: collected send information
2504 * Note: Our HW can't scatter-gather more than 8 fragments to build
2505 * a packet on the wire and so we need to figure out the cases where we
2506 * need to linearize the skb.
2508 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
2510 struct skb_frag_struct *frag;
2511 bool linearize = false;
2512 unsigned int size = 0;
2516 num_frags = skb_shinfo(skb)->nr_frags;
2517 gso_segs = skb_shinfo(skb)->gso_segs;
2519 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2522 if (num_frags < (I40E_MAX_BUFFER_TXD))
2523 goto linearize_chk_done;
2524 /* try the simple math, if we have too many frags per segment */
2525 if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2526 I40E_MAX_BUFFER_TXD) {
2528 goto linearize_chk_done;
2530 frag = &skb_shinfo(skb)->frags[0];
2531 /* we might still have more fragments per segment */
2533 size += skb_frag_size(frag);
2535 if ((size >= skb_shinfo(skb)->gso_size) &&
2536 (j < I40E_MAX_BUFFER_TXD)) {
2537 size = (size % skb_shinfo(skb)->gso_size);
2540 if (j == I40E_MAX_BUFFER_TXD) {
2545 } while (num_frags);
2547 if (num_frags >= I40E_MAX_BUFFER_TXD)
2556 * i40e_tx_map - Build the Tx descriptor
2557 * @tx_ring: ring to send buffer on
2559 * @first: first buffer info buffer to use
2560 * @tx_flags: collected send information
2561 * @hdr_len: size of the packet header
2562 * @td_cmd: the command field in the descriptor
2563 * @td_offset: offset for checksum or crc
2566 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2567 struct i40e_tx_buffer *first, u32 tx_flags,
2568 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2570 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2571 struct i40e_tx_buffer *first, u32 tx_flags,
2572 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2575 unsigned int data_len = skb->data_len;
2576 unsigned int size = skb_headlen(skb);
2577 struct skb_frag_struct *frag;
2578 struct i40e_tx_buffer *tx_bi;
2579 struct i40e_tx_desc *tx_desc;
2580 u16 i = tx_ring->next_to_use;
2585 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2586 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2587 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2588 I40E_TX_FLAGS_VLAN_SHIFT;
2591 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2592 gso_segs = skb_shinfo(skb)->gso_segs;
2596 /* multiply data chunks by size of headers */
2597 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2598 first->gso_segs = gso_segs;
2600 first->tx_flags = tx_flags;
2602 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2604 tx_desc = I40E_TX_DESC(tx_ring, i);
2607 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2608 if (dma_mapping_error(tx_ring->dev, dma))
2611 /* record length, and DMA address */
2612 dma_unmap_len_set(tx_bi, len, size);
2613 dma_unmap_addr_set(tx_bi, dma, dma);
2615 tx_desc->buffer_addr = cpu_to_le64(dma);
2617 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2618 tx_desc->cmd_type_offset_bsz =
2619 build_ctob(td_cmd, td_offset,
2620 I40E_MAX_DATA_PER_TXD, td_tag);
2624 if (i == tx_ring->count) {
2625 tx_desc = I40E_TX_DESC(tx_ring, 0);
2629 dma += I40E_MAX_DATA_PER_TXD;
2630 size -= I40E_MAX_DATA_PER_TXD;
2632 tx_desc->buffer_addr = cpu_to_le64(dma);
2635 if (likely(!data_len))
2638 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2643 if (i == tx_ring->count) {
2644 tx_desc = I40E_TX_DESC(tx_ring, 0);
2648 size = skb_frag_size(frag);
2651 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2654 tx_bi = &tx_ring->tx_bi[i];
2657 /* Place RS bit on last descriptor of any packet that spans across the
2658 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2660 if (((i & WB_STRIDE) != WB_STRIDE) &&
2661 (first <= &tx_ring->tx_bi[i]) &&
2662 (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2663 tx_desc->cmd_type_offset_bsz =
2664 build_ctob(td_cmd, td_offset, size, td_tag) |
2665 cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2666 I40E_TXD_QW1_CMD_SHIFT);
2668 tx_desc->cmd_type_offset_bsz =
2669 build_ctob(td_cmd, td_offset, size, td_tag) |
2670 cpu_to_le64((u64)I40E_TXD_CMD <<
2671 I40E_TXD_QW1_CMD_SHIFT);
2674 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2675 tx_ring->queue_index),
2678 /* Force memory writes to complete before letting h/w
2679 * know there are new descriptors to fetch. (Only
2680 * applicable for weak-ordered memory model archs,
2685 /* set next_to_watch value indicating a packet is present */
2686 first->next_to_watch = tx_desc;
2689 if (i == tx_ring->count)
2692 tx_ring->next_to_use = i;
2694 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2695 /* notify HW of packet */
2696 if (!skb->xmit_more ||
2697 netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2698 tx_ring->queue_index)))
2699 writel(i, tx_ring->tail);
2701 prefetchw(tx_desc + 1);
2706 dev_info(tx_ring->dev, "TX DMA map failed\n");
2708 /* clear dma mappings for failed tx_bi map */
2710 tx_bi = &tx_ring->tx_bi[i];
2711 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2719 tx_ring->next_to_use = i;
2723 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2725 * @tx_ring: ring to send buffer on
2727 * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2728 * there is not enough descriptors available in this ring since we need at least
2732 inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2733 struct i40e_ring *tx_ring)
2735 static inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2736 struct i40e_ring *tx_ring)
2742 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2743 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2744 * + 4 desc gap to avoid the cache line where head is,
2745 * + 1 desc for context descriptor,
2746 * otherwise try next time
2748 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2749 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2751 count += TXD_USE_COUNT(skb_headlen(skb));
2752 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2753 tx_ring->tx_stats.tx_busy++;
2760 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2762 * @tx_ring: ring to send buffer on
2764 * Returns NETDEV_TX_OK if sent, else an error code
2766 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2767 struct i40e_ring *tx_ring)
2769 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2770 u32 cd_tunneling = 0, cd_l2tag2 = 0;
2771 struct i40e_tx_buffer *first;
2779 if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2780 return NETDEV_TX_BUSY;
2782 /* prepare the xmit flags */
2783 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2786 /* obtain protocol of skb */
2787 protocol = vlan_get_protocol(skb);
2789 /* record the location of the first descriptor for this packet */
2790 first = &tx_ring->tx_bi[tx_ring->next_to_use];
2792 /* setup IPv4/IPv6 offloads */
2793 if (protocol == htons(ETH_P_IP))
2794 tx_flags |= I40E_TX_FLAGS_IPV4;
2795 else if (protocol == htons(ETH_P_IPV6))
2796 tx_flags |= I40E_TX_FLAGS_IPV6;
2798 tso = i40e_tso(tx_ring, skb, &hdr_len,
2799 &cd_type_cmd_tso_mss, &cd_tunneling);
2804 tx_flags |= I40E_TX_FLAGS_TSO;
2806 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2809 tx_flags |= I40E_TX_FLAGS_TSYN;
2811 if (i40e_chk_linearize(skb, tx_flags))
2812 if (skb_linearize(skb))
2815 skb_tx_timestamp(skb);
2817 /* always enable CRC insertion offload */
2818 td_cmd |= I40E_TX_DESC_CMD_ICRC;
2820 /* Always offload the checksum, since it's in the data descriptor */
2821 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2822 tx_flags |= I40E_TX_FLAGS_CSUM;
2824 i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2825 tx_ring, &cd_tunneling);
2828 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2829 cd_tunneling, cd_l2tag2);
2831 /* Add Flow Director ATR if it's enabled.
2833 * NOTE: this must always be directly before the data descriptor.
2835 i40e_atr(tx_ring, skb, tx_flags, protocol);
2837 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2840 return NETDEV_TX_OK;
2843 dev_kfree_skb_any(skb);
2844 return NETDEV_TX_OK;
2848 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2850 * @netdev: network interface device structure
2852 * Returns NETDEV_TX_OK if sent, else an error code
2854 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2856 struct i40e_netdev_priv *np = netdev_priv(netdev);
2857 struct i40e_vsi *vsi = np->vsi;
2858 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2860 /* hardware can't handle really short frames, hardware padding works
2863 if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2864 return NETDEV_TX_OK;
2866 return i40e_xmit_frame_ring(skb, tx_ring);