xref: /linux/drivers/net/ethernet/chelsio/cxgb4vf/sge.c (revision 622c62b52fae7c1367f0fd55442d5e162c052d5f)
1c6e0d914SCasey Leedom /*
2c6e0d914SCasey Leedom  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3c6e0d914SCasey Leedom  * driver for Linux.
4c6e0d914SCasey Leedom  *
5c6e0d914SCasey Leedom  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6c6e0d914SCasey Leedom  *
7c6e0d914SCasey Leedom  * This software is available to you under a choice of one of two
8c6e0d914SCasey Leedom  * licenses.  You may choose to be licensed under the terms of the GNU
9c6e0d914SCasey Leedom  * General Public License (GPL) Version 2, available from the file
10c6e0d914SCasey Leedom  * COPYING in the main directory of this source tree, or the
11c6e0d914SCasey Leedom  * OpenIB.org BSD license below:
12c6e0d914SCasey Leedom  *
13c6e0d914SCasey Leedom  *     Redistribution and use in source and binary forms, with or
14c6e0d914SCasey Leedom  *     without modification, are permitted provided that the following
15c6e0d914SCasey Leedom  *     conditions are met:
16c6e0d914SCasey Leedom  *
17c6e0d914SCasey Leedom  *      - Redistributions of source code must retain the above
18c6e0d914SCasey Leedom  *        copyright notice, this list of conditions and the following
19c6e0d914SCasey Leedom  *        disclaimer.
20c6e0d914SCasey Leedom  *
21c6e0d914SCasey Leedom  *      - Redistributions in binary form must reproduce the above
22c6e0d914SCasey Leedom  *        copyright notice, this list of conditions and the following
23c6e0d914SCasey Leedom  *        disclaimer in the documentation and/or other materials
24c6e0d914SCasey Leedom  *        provided with the distribution.
25c6e0d914SCasey Leedom  *
26c6e0d914SCasey Leedom  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27c6e0d914SCasey Leedom  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28c6e0d914SCasey Leedom  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29c6e0d914SCasey Leedom  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30c6e0d914SCasey Leedom  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31c6e0d914SCasey Leedom  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32c6e0d914SCasey Leedom  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33c6e0d914SCasey Leedom  * SOFTWARE.
34c6e0d914SCasey Leedom  */
35c6e0d914SCasey Leedom 
36c6e0d914SCasey Leedom #include <linux/skbuff.h>
37c6e0d914SCasey Leedom #include <linux/netdevice.h>
38c6e0d914SCasey Leedom #include <linux/etherdevice.h>
39c6e0d914SCasey Leedom #include <linux/if_vlan.h>
40c6e0d914SCasey Leedom #include <linux/ip.h>
41c6e0d914SCasey Leedom #include <net/ipv6.h>
42c6e0d914SCasey Leedom #include <net/tcp.h>
43c6e0d914SCasey Leedom #include <linux/dma-mapping.h>
4470c71606SPaul Gortmaker #include <linux/prefetch.h>
45c6e0d914SCasey Leedom 
46c6e0d914SCasey Leedom #include "t4vf_common.h"
47c6e0d914SCasey Leedom #include "t4vf_defs.h"
48c6e0d914SCasey Leedom 
49c6e0d914SCasey Leedom #include "../cxgb4/t4_regs.h"
50c6e0d914SCasey Leedom #include "../cxgb4/t4fw_api.h"
51c6e0d914SCasey Leedom #include "../cxgb4/t4_msg.h"
52c6e0d914SCasey Leedom 
53c6e0d914SCasey Leedom /*
54c6e0d914SCasey Leedom  * Decoded Adapter Parameters.
55c6e0d914SCasey Leedom  */
56c6e0d914SCasey Leedom static u32 FL_PG_ORDER;		/* large page allocation size */
57c6e0d914SCasey Leedom static u32 STAT_LEN;		/* length of status page at ring end */
58c6e0d914SCasey Leedom static u32 PKTSHIFT;		/* padding between CPL and packet data */
59c6e0d914SCasey Leedom static u32 FL_ALIGN;		/* response queue message alignment */
60c6e0d914SCasey Leedom 
61c6e0d914SCasey Leedom /*
62c6e0d914SCasey Leedom  * Constants ...
63c6e0d914SCasey Leedom  */
64c6e0d914SCasey Leedom enum {
65c6e0d914SCasey Leedom 	/*
66c6e0d914SCasey Leedom 	 * Egress Queue sizes, producer and consumer indices are all in units
67c6e0d914SCasey Leedom 	 * of Egress Context Units bytes.  Note that as far as the hardware is
68c6e0d914SCasey Leedom 	 * concerned, the free list is an Egress Queue (the host produces free
69c6e0d914SCasey Leedom 	 * buffers which the hardware consumes) and free list entries are
70c6e0d914SCasey Leedom 	 * 64-bit PCI DMA addresses.
71c6e0d914SCasey Leedom 	 */
72c6e0d914SCasey Leedom 	EQ_UNIT = SGE_EQ_IDXSIZE,
73c6e0d914SCasey Leedom 	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
74c6e0d914SCasey Leedom 	TXD_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
75c6e0d914SCasey Leedom 
76c6e0d914SCasey Leedom 	/*
77c6e0d914SCasey Leedom 	 * Max number of TX descriptors we clean up at a time.  Should be
78c6e0d914SCasey Leedom 	 * modest as freeing skbs isn't cheap and it happens while holding
79c6e0d914SCasey Leedom 	 * locks.  We just need to free packets faster than they arrive, we
80c6e0d914SCasey Leedom 	 * eventually catch up and keep the amortized cost reasonable.
81c6e0d914SCasey Leedom 	 */
82c6e0d914SCasey Leedom 	MAX_TX_RECLAIM = 16,
83c6e0d914SCasey Leedom 
84c6e0d914SCasey Leedom 	/*
85c6e0d914SCasey Leedom 	 * Max number of Rx buffers we replenish at a time.  Again keep this
86c6e0d914SCasey Leedom 	 * modest, allocating buffers isn't cheap either.
87c6e0d914SCasey Leedom 	 */
88c6e0d914SCasey Leedom 	MAX_RX_REFILL = 16,
89c6e0d914SCasey Leedom 
90c6e0d914SCasey Leedom 	/*
91c6e0d914SCasey Leedom 	 * Period of the Rx queue check timer.  This timer is infrequent as it
92c6e0d914SCasey Leedom 	 * has something to do only when the system experiences severe memory
93c6e0d914SCasey Leedom 	 * shortage.
94c6e0d914SCasey Leedom 	 */
95c6e0d914SCasey Leedom 	RX_QCHECK_PERIOD = (HZ / 2),
96c6e0d914SCasey Leedom 
97c6e0d914SCasey Leedom 	/*
98c6e0d914SCasey Leedom 	 * Period of the TX queue check timer and the maximum number of TX
99c6e0d914SCasey Leedom 	 * descriptors to be reclaimed by the TX timer.
100c6e0d914SCasey Leedom 	 */
101c6e0d914SCasey Leedom 	TX_QCHECK_PERIOD = (HZ / 2),
102c6e0d914SCasey Leedom 	MAX_TIMER_TX_RECLAIM = 100,
103c6e0d914SCasey Leedom 
104c6e0d914SCasey Leedom 	/*
105c6e0d914SCasey Leedom 	 * An FL with <= FL_STARVE_THRES buffers is starving and a periodic
106c6e0d914SCasey Leedom 	 * timer will attempt to refill it.
107c6e0d914SCasey Leedom 	 */
108c6e0d914SCasey Leedom 	FL_STARVE_THRES = 4,
109c6e0d914SCasey Leedom 
110c6e0d914SCasey Leedom 	/*
111c6e0d914SCasey Leedom 	 * Suspend an Ethernet TX queue with fewer available descriptors than
112c6e0d914SCasey Leedom 	 * this.  We always want to have room for a maximum sized packet:
113c6e0d914SCasey Leedom 	 * inline immediate data + MAX_SKB_FRAGS. This is the same as
114c6e0d914SCasey Leedom 	 * calc_tx_flits() for a TSO packet with nr_frags == MAX_SKB_FRAGS
115c6e0d914SCasey Leedom 	 * (see that function and its helpers for a description of the
116c6e0d914SCasey Leedom 	 * calculation).
117c6e0d914SCasey Leedom 	 */
118c6e0d914SCasey Leedom 	ETHTXQ_MAX_FRAGS = MAX_SKB_FRAGS + 1,
119c6e0d914SCasey Leedom 	ETHTXQ_MAX_SGL_LEN = ((3 * (ETHTXQ_MAX_FRAGS-1))/2 +
120c6e0d914SCasey Leedom 				   ((ETHTXQ_MAX_FRAGS-1) & 1) +
121c6e0d914SCasey Leedom 				   2),
122c6e0d914SCasey Leedom 	ETHTXQ_MAX_HDR = (sizeof(struct fw_eth_tx_pkt_vm_wr) +
123c6e0d914SCasey Leedom 			  sizeof(struct cpl_tx_pkt_lso_core) +
124c6e0d914SCasey Leedom 			  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64),
125c6e0d914SCasey Leedom 	ETHTXQ_MAX_FLITS = ETHTXQ_MAX_SGL_LEN + ETHTXQ_MAX_HDR,
126c6e0d914SCasey Leedom 
127c6e0d914SCasey Leedom 	ETHTXQ_STOP_THRES = 1 + DIV_ROUND_UP(ETHTXQ_MAX_FLITS, TXD_PER_EQ_UNIT),
128c6e0d914SCasey Leedom 
129c6e0d914SCasey Leedom 	/*
130c6e0d914SCasey Leedom 	 * Max TX descriptor space we allow for an Ethernet packet to be
131c6e0d914SCasey Leedom 	 * inlined into a WR.  This is limited by the maximum value which
132c6e0d914SCasey Leedom 	 * we can specify for immediate data in the firmware Ethernet TX
133c6e0d914SCasey Leedom 	 * Work Request.
134c6e0d914SCasey Leedom 	 */
135c6e0d914SCasey Leedom 	MAX_IMM_TX_PKT_LEN = FW_WR_IMMDLEN_MASK,
136c6e0d914SCasey Leedom 
137c6e0d914SCasey Leedom 	/*
138c6e0d914SCasey Leedom 	 * Max size of a WR sent through a control TX queue.
139c6e0d914SCasey Leedom 	 */
140c6e0d914SCasey Leedom 	MAX_CTRL_WR_LEN = 256,
141c6e0d914SCasey Leedom 
142c6e0d914SCasey Leedom 	/*
143c6e0d914SCasey Leedom 	 * Maximum amount of data which we'll ever need to inline into a
144c6e0d914SCasey Leedom 	 * TX ring: max(MAX_IMM_TX_PKT_LEN, MAX_CTRL_WR_LEN).
145c6e0d914SCasey Leedom 	 */
146c6e0d914SCasey Leedom 	MAX_IMM_TX_LEN = (MAX_IMM_TX_PKT_LEN > MAX_CTRL_WR_LEN
147c6e0d914SCasey Leedom 			  ? MAX_IMM_TX_PKT_LEN
148c6e0d914SCasey Leedom 			  : MAX_CTRL_WR_LEN),
149c6e0d914SCasey Leedom 
150c6e0d914SCasey Leedom 	/*
151c6e0d914SCasey Leedom 	 * For incoming packets less than RX_COPY_THRES, we copy the data into
152c6e0d914SCasey Leedom 	 * an skb rather than referencing the data.  We allocate enough
153c6e0d914SCasey Leedom 	 * in-line room in skb's to accommodate pulling in RX_PULL_LEN bytes
154c6e0d914SCasey Leedom 	 * of the data (header).
155c6e0d914SCasey Leedom 	 */
156c6e0d914SCasey Leedom 	RX_COPY_THRES = 256,
157c6e0d914SCasey Leedom 	RX_PULL_LEN = 128,
158c6e0d914SCasey Leedom 
159c6e0d914SCasey Leedom 	/*
160eb6c503dSCasey Leedom 	 * Main body length for sk_buffs used for RX Ethernet packets with
161eb6c503dSCasey Leedom 	 * fragments.  Should be >= RX_PULL_LEN but possibly bigger to give
162eb6c503dSCasey Leedom 	 * pskb_may_pull() some room.
163c6e0d914SCasey Leedom 	 */
164eb6c503dSCasey Leedom 	RX_SKB_LEN = 512,
165eb6c503dSCasey Leedom };
166c6e0d914SCasey Leedom 
167c6e0d914SCasey Leedom /*
168c6e0d914SCasey Leedom  * Software state per TX descriptor.
169c6e0d914SCasey Leedom  */
170c6e0d914SCasey Leedom struct tx_sw_desc {
171c6e0d914SCasey Leedom 	struct sk_buff *skb;		/* socket buffer of TX data source */
172c6e0d914SCasey Leedom 	struct ulptx_sgl *sgl;		/* scatter/gather list in TX Queue */
173c6e0d914SCasey Leedom };
174c6e0d914SCasey Leedom 
175c6e0d914SCasey Leedom /*
176c6e0d914SCasey Leedom  * Software state per RX Free List descriptor.  We keep track of the allocated
177c6e0d914SCasey Leedom  * FL page, its size, and its PCI DMA address (if the page is mapped).  The FL
178c6e0d914SCasey Leedom  * page size and its PCI DMA mapped state are stored in the low bits of the
179c6e0d914SCasey Leedom  * PCI DMA address as per below.
180c6e0d914SCasey Leedom  */
181c6e0d914SCasey Leedom struct rx_sw_desc {
182c6e0d914SCasey Leedom 	struct page *page;		/* Free List page buffer */
183c6e0d914SCasey Leedom 	dma_addr_t dma_addr;		/* PCI DMA address (if mapped) */
184c6e0d914SCasey Leedom 					/*   and flags (see below) */
185c6e0d914SCasey Leedom };
186c6e0d914SCasey Leedom 
187c6e0d914SCasey Leedom /*
188c6e0d914SCasey Leedom  * The low bits of rx_sw_desc.dma_addr have special meaning.  Note that the
189c6e0d914SCasey Leedom  * SGE also uses the low 4 bits to determine the size of the buffer.  It uses
190c6e0d914SCasey Leedom  * those bits to index into the SGE_FL_BUFFER_SIZE[index] register array.
191c6e0d914SCasey Leedom  * Since we only use SGE_FL_BUFFER_SIZE0 and SGE_FL_BUFFER_SIZE1, these low 4
192c6e0d914SCasey Leedom  * bits can only contain a 0 or a 1 to indicate which size buffer we're giving
193c6e0d914SCasey Leedom  * to the SGE.  Thus, our software state of "is the buffer mapped for DMA" is
194c6e0d914SCasey Leedom  * maintained in an inverse sense so the hardware never sees that bit high.
195c6e0d914SCasey Leedom  */
196c6e0d914SCasey Leedom enum {
197c6e0d914SCasey Leedom 	RX_LARGE_BUF    = 1 << 0,	/* buffer is SGE_FL_BUFFER_SIZE[1] */
198c6e0d914SCasey Leedom 	RX_UNMAPPED_BUF = 1 << 1,	/* buffer is not mapped */
199c6e0d914SCasey Leedom };
200c6e0d914SCasey Leedom 
201c6e0d914SCasey Leedom /**
202c6e0d914SCasey Leedom  *	get_buf_addr - return DMA buffer address of software descriptor
203c6e0d914SCasey Leedom  *	@sdesc: pointer to the software buffer descriptor
204c6e0d914SCasey Leedom  *
205c6e0d914SCasey Leedom  *	Return the DMA buffer address of a software descriptor (stripping out
206c6e0d914SCasey Leedom  *	our low-order flag bits).
207c6e0d914SCasey Leedom  */
208c6e0d914SCasey Leedom static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *sdesc)
209c6e0d914SCasey Leedom {
210c6e0d914SCasey Leedom 	return sdesc->dma_addr & ~(dma_addr_t)(RX_LARGE_BUF | RX_UNMAPPED_BUF);
211c6e0d914SCasey Leedom }
212c6e0d914SCasey Leedom 
213c6e0d914SCasey Leedom /**
214c6e0d914SCasey Leedom  *	is_buf_mapped - is buffer mapped for DMA?
215c6e0d914SCasey Leedom  *	@sdesc: pointer to the software buffer descriptor
216c6e0d914SCasey Leedom  *
217c6e0d914SCasey Leedom  *	Determine whether the buffer associated with a software descriptor in
218c6e0d914SCasey Leedom  *	mapped for DMA or not.
219c6e0d914SCasey Leedom  */
220c6e0d914SCasey Leedom static inline bool is_buf_mapped(const struct rx_sw_desc *sdesc)
221c6e0d914SCasey Leedom {
222c6e0d914SCasey Leedom 	return !(sdesc->dma_addr & RX_UNMAPPED_BUF);
223c6e0d914SCasey Leedom }
224c6e0d914SCasey Leedom 
225c6e0d914SCasey Leedom /**
226c6e0d914SCasey Leedom  *	need_skb_unmap - does the platform need unmapping of sk_buffs?
227c6e0d914SCasey Leedom  *
22825985edcSLucas De Marchi  *	Returns true if the platform needs sk_buff unmapping.  The compiler
22925985edcSLucas De Marchi  *	optimizes away unnecessary code if this returns true.
230c6e0d914SCasey Leedom  */
231c6e0d914SCasey Leedom static inline int need_skb_unmap(void)
232c6e0d914SCasey Leedom {
23357b2eaf7SFUJITA Tomonori #ifdef CONFIG_NEED_DMA_MAP_STATE
23457b2eaf7SFUJITA Tomonori 	return 1;
23557b2eaf7SFUJITA Tomonori #else
23657b2eaf7SFUJITA Tomonori 	return 0;
23757b2eaf7SFUJITA Tomonori #endif
238c6e0d914SCasey Leedom }
239c6e0d914SCasey Leedom 
240c6e0d914SCasey Leedom /**
241c6e0d914SCasey Leedom  *	txq_avail - return the number of available slots in a TX queue
242c6e0d914SCasey Leedom  *	@tq: the TX queue
243c6e0d914SCasey Leedom  *
244c6e0d914SCasey Leedom  *	Returns the number of available descriptors in a TX queue.
245c6e0d914SCasey Leedom  */
246c6e0d914SCasey Leedom static inline unsigned int txq_avail(const struct sge_txq *tq)
247c6e0d914SCasey Leedom {
248c6e0d914SCasey Leedom 	return tq->size - 1 - tq->in_use;
249c6e0d914SCasey Leedom }
250c6e0d914SCasey Leedom 
251c6e0d914SCasey Leedom /**
252c6e0d914SCasey Leedom  *	fl_cap - return the capacity of a Free List
253c6e0d914SCasey Leedom  *	@fl: the Free List
254c6e0d914SCasey Leedom  *
255c6e0d914SCasey Leedom  *	Returns the capacity of a Free List.  The capacity is less than the
256c6e0d914SCasey Leedom  *	size because an Egress Queue Index Unit worth of descriptors needs to
257c6e0d914SCasey Leedom  *	be left unpopulated, otherwise the Producer and Consumer indices PIDX
258c6e0d914SCasey Leedom  *	and CIDX will match and the hardware will think the FL is empty.
259c6e0d914SCasey Leedom  */
260c6e0d914SCasey Leedom static inline unsigned int fl_cap(const struct sge_fl *fl)
261c6e0d914SCasey Leedom {
262c6e0d914SCasey Leedom 	return fl->size - FL_PER_EQ_UNIT;
263c6e0d914SCasey Leedom }
264c6e0d914SCasey Leedom 
265c6e0d914SCasey Leedom /**
266c6e0d914SCasey Leedom  *	fl_starving - return whether a Free List is starving.
267c6e0d914SCasey Leedom  *	@fl: the Free List
268c6e0d914SCasey Leedom  *
269c6e0d914SCasey Leedom  *	Tests specified Free List to see whether the number of buffers
270c6e0d914SCasey Leedom  *	available to the hardware has falled below our "starvation"
27125985edcSLucas De Marchi  *	threshold.
272c6e0d914SCasey Leedom  */
273c6e0d914SCasey Leedom static inline bool fl_starving(const struct sge_fl *fl)
274c6e0d914SCasey Leedom {
275c6e0d914SCasey Leedom 	return fl->avail - fl->pend_cred <= FL_STARVE_THRES;
276c6e0d914SCasey Leedom }
277c6e0d914SCasey Leedom 
278c6e0d914SCasey Leedom /**
279c6e0d914SCasey Leedom  *	map_skb -  map an skb for DMA to the device
280c6e0d914SCasey Leedom  *	@dev: the egress net device
281c6e0d914SCasey Leedom  *	@skb: the packet to map
282c6e0d914SCasey Leedom  *	@addr: a pointer to the base of the DMA mapping array
283c6e0d914SCasey Leedom  *
284c6e0d914SCasey Leedom  *	Map an skb for DMA to the device and return an array of DMA addresses.
285c6e0d914SCasey Leedom  */
286c6e0d914SCasey Leedom static int map_skb(struct device *dev, const struct sk_buff *skb,
287c6e0d914SCasey Leedom 		   dma_addr_t *addr)
288c6e0d914SCasey Leedom {
289c6e0d914SCasey Leedom 	const skb_frag_t *fp, *end;
290c6e0d914SCasey Leedom 	const struct skb_shared_info *si;
291c6e0d914SCasey Leedom 
292c6e0d914SCasey Leedom 	*addr = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
293c6e0d914SCasey Leedom 	if (dma_mapping_error(dev, *addr))
294c6e0d914SCasey Leedom 		goto out_err;
295c6e0d914SCasey Leedom 
296c6e0d914SCasey Leedom 	si = skb_shinfo(skb);
297c6e0d914SCasey Leedom 	end = &si->frags[si->nr_frags];
298c6e0d914SCasey Leedom 	for (fp = si->frags; fp < end; fp++) {
299a0006a86SIan Campbell 		*++addr = skb_frag_dma_map(dev, fp, 0, skb_frag_size(fp),
300a0006a86SIan Campbell 					   DMA_TO_DEVICE);
301c6e0d914SCasey Leedom 		if (dma_mapping_error(dev, *addr))
302c6e0d914SCasey Leedom 			goto unwind;
303c6e0d914SCasey Leedom 	}
304c6e0d914SCasey Leedom 	return 0;
305c6e0d914SCasey Leedom 
306c6e0d914SCasey Leedom unwind:
307c6e0d914SCasey Leedom 	while (fp-- > si->frags)
3089e903e08SEric Dumazet 		dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE);
309c6e0d914SCasey Leedom 	dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE);
310c6e0d914SCasey Leedom 
311c6e0d914SCasey Leedom out_err:
312c6e0d914SCasey Leedom 	return -ENOMEM;
313c6e0d914SCasey Leedom }
314c6e0d914SCasey Leedom 
315c6e0d914SCasey Leedom static void unmap_sgl(struct device *dev, const struct sk_buff *skb,
316c6e0d914SCasey Leedom 		      const struct ulptx_sgl *sgl, const struct sge_txq *tq)
317c6e0d914SCasey Leedom {
318c6e0d914SCasey Leedom 	const struct ulptx_sge_pair *p;
319c6e0d914SCasey Leedom 	unsigned int nfrags = skb_shinfo(skb)->nr_frags;
320c6e0d914SCasey Leedom 
321c6e0d914SCasey Leedom 	if (likely(skb_headlen(skb)))
322c6e0d914SCasey Leedom 		dma_unmap_single(dev, be64_to_cpu(sgl->addr0),
323c6e0d914SCasey Leedom 				 be32_to_cpu(sgl->len0), DMA_TO_DEVICE);
324c6e0d914SCasey Leedom 	else {
325c6e0d914SCasey Leedom 		dma_unmap_page(dev, be64_to_cpu(sgl->addr0),
326c6e0d914SCasey Leedom 			       be32_to_cpu(sgl->len0), DMA_TO_DEVICE);
327c6e0d914SCasey Leedom 		nfrags--;
328c6e0d914SCasey Leedom 	}
329c6e0d914SCasey Leedom 
330c6e0d914SCasey Leedom 	/*
331c6e0d914SCasey Leedom 	 * the complexity below is because of the possibility of a wrap-around
332c6e0d914SCasey Leedom 	 * in the middle of an SGL
333c6e0d914SCasey Leedom 	 */
334c6e0d914SCasey Leedom 	for (p = sgl->sge; nfrags >= 2; nfrags -= 2) {
335c6e0d914SCasey Leedom 		if (likely((u8 *)(p + 1) <= (u8 *)tq->stat)) {
336c6e0d914SCasey Leedom unmap:
337c6e0d914SCasey Leedom 			dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
338c6e0d914SCasey Leedom 				       be32_to_cpu(p->len[0]), DMA_TO_DEVICE);
339c6e0d914SCasey Leedom 			dma_unmap_page(dev, be64_to_cpu(p->addr[1]),
340c6e0d914SCasey Leedom 				       be32_to_cpu(p->len[1]), DMA_TO_DEVICE);
341c6e0d914SCasey Leedom 			p++;
342c6e0d914SCasey Leedom 		} else if ((u8 *)p == (u8 *)tq->stat) {
343c6e0d914SCasey Leedom 			p = (const struct ulptx_sge_pair *)tq->desc;
344c6e0d914SCasey Leedom 			goto unmap;
345c6e0d914SCasey Leedom 		} else if ((u8 *)p + 8 == (u8 *)tq->stat) {
346c6e0d914SCasey Leedom 			const __be64 *addr = (const __be64 *)tq->desc;
347c6e0d914SCasey Leedom 
348c6e0d914SCasey Leedom 			dma_unmap_page(dev, be64_to_cpu(addr[0]),
349c6e0d914SCasey Leedom 				       be32_to_cpu(p->len[0]), DMA_TO_DEVICE);
350c6e0d914SCasey Leedom 			dma_unmap_page(dev, be64_to_cpu(addr[1]),
351c6e0d914SCasey Leedom 				       be32_to_cpu(p->len[1]), DMA_TO_DEVICE);
352c6e0d914SCasey Leedom 			p = (const struct ulptx_sge_pair *)&addr[2];
353c6e0d914SCasey Leedom 		} else {
354c6e0d914SCasey Leedom 			const __be64 *addr = (const __be64 *)tq->desc;
355c6e0d914SCasey Leedom 
356c6e0d914SCasey Leedom 			dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
357c6e0d914SCasey Leedom 				       be32_to_cpu(p->len[0]), DMA_TO_DEVICE);
358c6e0d914SCasey Leedom 			dma_unmap_page(dev, be64_to_cpu(addr[0]),
359c6e0d914SCasey Leedom 				       be32_to_cpu(p->len[1]), DMA_TO_DEVICE);
360c6e0d914SCasey Leedom 			p = (const struct ulptx_sge_pair *)&addr[1];
361c6e0d914SCasey Leedom 		}
362c6e0d914SCasey Leedom 	}
363c6e0d914SCasey Leedom 	if (nfrags) {
364c6e0d914SCasey Leedom 		__be64 addr;
365c6e0d914SCasey Leedom 
366c6e0d914SCasey Leedom 		if ((u8 *)p == (u8 *)tq->stat)
367c6e0d914SCasey Leedom 			p = (const struct ulptx_sge_pair *)tq->desc;
368c6e0d914SCasey Leedom 		addr = ((u8 *)p + 16 <= (u8 *)tq->stat
369c6e0d914SCasey Leedom 			? p->addr[0]
370c6e0d914SCasey Leedom 			: *(const __be64 *)tq->desc);
371c6e0d914SCasey Leedom 		dma_unmap_page(dev, be64_to_cpu(addr), be32_to_cpu(p->len[0]),
372c6e0d914SCasey Leedom 			       DMA_TO_DEVICE);
373c6e0d914SCasey Leedom 	}
374c6e0d914SCasey Leedom }
375c6e0d914SCasey Leedom 
376c6e0d914SCasey Leedom /**
377c6e0d914SCasey Leedom  *	free_tx_desc - reclaims TX descriptors and their buffers
378c6e0d914SCasey Leedom  *	@adapter: the adapter
379c6e0d914SCasey Leedom  *	@tq: the TX queue to reclaim descriptors from
380c6e0d914SCasey Leedom  *	@n: the number of descriptors to reclaim
381c6e0d914SCasey Leedom  *	@unmap: whether the buffers should be unmapped for DMA
382c6e0d914SCasey Leedom  *
383c6e0d914SCasey Leedom  *	Reclaims TX descriptors from an SGE TX queue and frees the associated
384c6e0d914SCasey Leedom  *	TX buffers.  Called with the TX queue lock held.
385c6e0d914SCasey Leedom  */
386c6e0d914SCasey Leedom static void free_tx_desc(struct adapter *adapter, struct sge_txq *tq,
387c6e0d914SCasey Leedom 			 unsigned int n, bool unmap)
388c6e0d914SCasey Leedom {
389c6e0d914SCasey Leedom 	struct tx_sw_desc *sdesc;
390c6e0d914SCasey Leedom 	unsigned int cidx = tq->cidx;
391c6e0d914SCasey Leedom 	struct device *dev = adapter->pdev_dev;
392c6e0d914SCasey Leedom 
393c6e0d914SCasey Leedom 	const int need_unmap = need_skb_unmap() && unmap;
394c6e0d914SCasey Leedom 
395c6e0d914SCasey Leedom 	sdesc = &tq->sdesc[cidx];
396c6e0d914SCasey Leedom 	while (n--) {
397c6e0d914SCasey Leedom 		/*
398c6e0d914SCasey Leedom 		 * If we kept a reference to the original TX skb, we need to
399c6e0d914SCasey Leedom 		 * unmap it from PCI DMA space (if required) and free it.
400c6e0d914SCasey Leedom 		 */
401c6e0d914SCasey Leedom 		if (sdesc->skb) {
402c6e0d914SCasey Leedom 			if (need_unmap)
403c6e0d914SCasey Leedom 				unmap_sgl(dev, sdesc->skb, sdesc->sgl, tq);
404c6e0d914SCasey Leedom 			kfree_skb(sdesc->skb);
405c6e0d914SCasey Leedom 			sdesc->skb = NULL;
406c6e0d914SCasey Leedom 		}
407c6e0d914SCasey Leedom 
408c6e0d914SCasey Leedom 		sdesc++;
409c6e0d914SCasey Leedom 		if (++cidx == tq->size) {
410c6e0d914SCasey Leedom 			cidx = 0;
411c6e0d914SCasey Leedom 			sdesc = tq->sdesc;
412c6e0d914SCasey Leedom 		}
413c6e0d914SCasey Leedom 	}
414c6e0d914SCasey Leedom 	tq->cidx = cidx;
415c6e0d914SCasey Leedom }
416c6e0d914SCasey Leedom 
417c6e0d914SCasey Leedom /*
418c6e0d914SCasey Leedom  * Return the number of reclaimable descriptors in a TX queue.
419c6e0d914SCasey Leedom  */
420c6e0d914SCasey Leedom static inline int reclaimable(const struct sge_txq *tq)
421c6e0d914SCasey Leedom {
422c6e0d914SCasey Leedom 	int hw_cidx = be16_to_cpu(tq->stat->cidx);
423c6e0d914SCasey Leedom 	int reclaimable = hw_cidx - tq->cidx;
424c6e0d914SCasey Leedom 	if (reclaimable < 0)
425c6e0d914SCasey Leedom 		reclaimable += tq->size;
426c6e0d914SCasey Leedom 	return reclaimable;
427c6e0d914SCasey Leedom }
428c6e0d914SCasey Leedom 
429c6e0d914SCasey Leedom /**
430c6e0d914SCasey Leedom  *	reclaim_completed_tx - reclaims completed TX descriptors
431c6e0d914SCasey Leedom  *	@adapter: the adapter
432c6e0d914SCasey Leedom  *	@tq: the TX queue to reclaim completed descriptors from
433c6e0d914SCasey Leedom  *	@unmap: whether the buffers should be unmapped for DMA
434c6e0d914SCasey Leedom  *
435c6e0d914SCasey Leedom  *	Reclaims TX descriptors that the SGE has indicated it has processed,
436c6e0d914SCasey Leedom  *	and frees the associated buffers if possible.  Called with the TX
437c6e0d914SCasey Leedom  *	queue locked.
438c6e0d914SCasey Leedom  */
439c6e0d914SCasey Leedom static inline void reclaim_completed_tx(struct adapter *adapter,
440c6e0d914SCasey Leedom 					struct sge_txq *tq,
441c6e0d914SCasey Leedom 					bool unmap)
442c6e0d914SCasey Leedom {
443c6e0d914SCasey Leedom 	int avail = reclaimable(tq);
444c6e0d914SCasey Leedom 
445c6e0d914SCasey Leedom 	if (avail) {
446c6e0d914SCasey Leedom 		/*
447c6e0d914SCasey Leedom 		 * Limit the amount of clean up work we do at a time to keep
448c6e0d914SCasey Leedom 		 * the TX lock hold time O(1).
449c6e0d914SCasey Leedom 		 */
450c6e0d914SCasey Leedom 		if (avail > MAX_TX_RECLAIM)
451c6e0d914SCasey Leedom 			avail = MAX_TX_RECLAIM;
452c6e0d914SCasey Leedom 
453c6e0d914SCasey Leedom 		free_tx_desc(adapter, tq, avail, unmap);
454c6e0d914SCasey Leedom 		tq->in_use -= avail;
455c6e0d914SCasey Leedom 	}
456c6e0d914SCasey Leedom }
457c6e0d914SCasey Leedom 
458c6e0d914SCasey Leedom /**
459c6e0d914SCasey Leedom  *	get_buf_size - return the size of an RX Free List buffer.
460c6e0d914SCasey Leedom  *	@sdesc: pointer to the software buffer descriptor
461c6e0d914SCasey Leedom  */
462c6e0d914SCasey Leedom static inline int get_buf_size(const struct rx_sw_desc *sdesc)
463c6e0d914SCasey Leedom {
464c6e0d914SCasey Leedom 	return FL_PG_ORDER > 0 && (sdesc->dma_addr & RX_LARGE_BUF)
465c6e0d914SCasey Leedom 		? (PAGE_SIZE << FL_PG_ORDER)
466c6e0d914SCasey Leedom 		: PAGE_SIZE;
467c6e0d914SCasey Leedom }
468c6e0d914SCasey Leedom 
469c6e0d914SCasey Leedom /**
470c6e0d914SCasey Leedom  *	free_rx_bufs - free RX buffers on an SGE Free List
471c6e0d914SCasey Leedom  *	@adapter: the adapter
472c6e0d914SCasey Leedom  *	@fl: the SGE Free List to free buffers from
473c6e0d914SCasey Leedom  *	@n: how many buffers to free
474c6e0d914SCasey Leedom  *
475c6e0d914SCasey Leedom  *	Release the next @n buffers on an SGE Free List RX queue.   The
476c6e0d914SCasey Leedom  *	buffers must be made inaccessible to hardware before calling this
477c6e0d914SCasey Leedom  *	function.
478c6e0d914SCasey Leedom  */
479c6e0d914SCasey Leedom static void free_rx_bufs(struct adapter *adapter, struct sge_fl *fl, int n)
480c6e0d914SCasey Leedom {
481c6e0d914SCasey Leedom 	while (n--) {
482c6e0d914SCasey Leedom 		struct rx_sw_desc *sdesc = &fl->sdesc[fl->cidx];
483c6e0d914SCasey Leedom 
484c6e0d914SCasey Leedom 		if (is_buf_mapped(sdesc))
485c6e0d914SCasey Leedom 			dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
486c6e0d914SCasey Leedom 				       get_buf_size(sdesc), PCI_DMA_FROMDEVICE);
487c6e0d914SCasey Leedom 		put_page(sdesc->page);
488c6e0d914SCasey Leedom 		sdesc->page = NULL;
489c6e0d914SCasey Leedom 		if (++fl->cidx == fl->size)
490c6e0d914SCasey Leedom 			fl->cidx = 0;
491c6e0d914SCasey Leedom 		fl->avail--;
492c6e0d914SCasey Leedom 	}
493c6e0d914SCasey Leedom }
494c6e0d914SCasey Leedom 
495c6e0d914SCasey Leedom /**
496c6e0d914SCasey Leedom  *	unmap_rx_buf - unmap the current RX buffer on an SGE Free List
497c6e0d914SCasey Leedom  *	@adapter: the adapter
498c6e0d914SCasey Leedom  *	@fl: the SGE Free List
499c6e0d914SCasey Leedom  *
500c6e0d914SCasey Leedom  *	Unmap the current buffer on an SGE Free List RX queue.   The
501c6e0d914SCasey Leedom  *	buffer must be made inaccessible to HW before calling this function.
502c6e0d914SCasey Leedom  *
503c6e0d914SCasey Leedom  *	This is similar to @free_rx_bufs above but does not free the buffer.
504c6e0d914SCasey Leedom  *	Do note that the FL still loses any further access to the buffer.
505c6e0d914SCasey Leedom  *	This is used predominantly to "transfer ownership" of an FL buffer
506c6e0d914SCasey Leedom  *	to another entity (typically an skb's fragment list).
507c6e0d914SCasey Leedom  */
508c6e0d914SCasey Leedom static void unmap_rx_buf(struct adapter *adapter, struct sge_fl *fl)
509c6e0d914SCasey Leedom {
510c6e0d914SCasey Leedom 	struct rx_sw_desc *sdesc = &fl->sdesc[fl->cidx];
511c6e0d914SCasey Leedom 
512c6e0d914SCasey Leedom 	if (is_buf_mapped(sdesc))
513c6e0d914SCasey Leedom 		dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
514c6e0d914SCasey Leedom 			       get_buf_size(sdesc), PCI_DMA_FROMDEVICE);
515c6e0d914SCasey Leedom 	sdesc->page = NULL;
516c6e0d914SCasey Leedom 	if (++fl->cidx == fl->size)
517c6e0d914SCasey Leedom 		fl->cidx = 0;
518c6e0d914SCasey Leedom 	fl->avail--;
519c6e0d914SCasey Leedom }
520c6e0d914SCasey Leedom 
521c6e0d914SCasey Leedom /**
522c6e0d914SCasey Leedom  *	ring_fl_db - righ doorbell on free list
523c6e0d914SCasey Leedom  *	@adapter: the adapter
524c6e0d914SCasey Leedom  *	@fl: the Free List whose doorbell should be rung ...
525c6e0d914SCasey Leedom  *
526c6e0d914SCasey Leedom  *	Tell the Scatter Gather Engine that there are new free list entries
527c6e0d914SCasey Leedom  *	available.
528c6e0d914SCasey Leedom  */
529c6e0d914SCasey Leedom static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl)
530c6e0d914SCasey Leedom {
531*622c62b5SSantosh Rastapur 	u32 val;
532*622c62b5SSantosh Rastapur 
533c6e0d914SCasey Leedom 	/*
534c6e0d914SCasey Leedom 	 * The SGE keeps track of its Producer and Consumer Indices in terms
535c6e0d914SCasey Leedom 	 * of Egress Queue Units so we can only tell it about integral numbers
536c6e0d914SCasey Leedom 	 * of multiples of Free List Entries per Egress Queue Units ...
537c6e0d914SCasey Leedom 	 */
538c6e0d914SCasey Leedom 	if (fl->pend_cred >= FL_PER_EQ_UNIT) {
539*622c62b5SSantosh Rastapur 		val = PIDX(fl->pend_cred / FL_PER_EQ_UNIT);
540*622c62b5SSantosh Rastapur 		if (!is_t4(adapter->chip))
541*622c62b5SSantosh Rastapur 			val |= DBTYPE(1);
542c6e0d914SCasey Leedom 		wmb();
543c6e0d914SCasey Leedom 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
544ce91a923SNaresh Kumar Inna 			     DBPRIO(1) |
545*622c62b5SSantosh Rastapur 			     QID(fl->cntxt_id) | val);
546c6e0d914SCasey Leedom 		fl->pend_cred %= FL_PER_EQ_UNIT;
547c6e0d914SCasey Leedom 	}
548c6e0d914SCasey Leedom }
549c6e0d914SCasey Leedom 
550c6e0d914SCasey Leedom /**
551c6e0d914SCasey Leedom  *	set_rx_sw_desc - initialize software RX buffer descriptor
552c6e0d914SCasey Leedom  *	@sdesc: pointer to the softwore RX buffer descriptor
553c6e0d914SCasey Leedom  *	@page: pointer to the page data structure backing the RX buffer
554c6e0d914SCasey Leedom  *	@dma_addr: PCI DMA address (possibly with low-bit flags)
555c6e0d914SCasey Leedom  */
556c6e0d914SCasey Leedom static inline void set_rx_sw_desc(struct rx_sw_desc *sdesc, struct page *page,
557c6e0d914SCasey Leedom 				  dma_addr_t dma_addr)
558c6e0d914SCasey Leedom {
559c6e0d914SCasey Leedom 	sdesc->page = page;
560c6e0d914SCasey Leedom 	sdesc->dma_addr = dma_addr;
561c6e0d914SCasey Leedom }
562c6e0d914SCasey Leedom 
563c6e0d914SCasey Leedom /*
564c6e0d914SCasey Leedom  * Support for poisoning RX buffers ...
565c6e0d914SCasey Leedom  */
566c6e0d914SCasey Leedom #define POISON_BUF_VAL -1
567c6e0d914SCasey Leedom 
568c6e0d914SCasey Leedom static inline void poison_buf(struct page *page, size_t sz)
569c6e0d914SCasey Leedom {
570c6e0d914SCasey Leedom #if POISON_BUF_VAL >= 0
571c6e0d914SCasey Leedom 	memset(page_address(page), POISON_BUF_VAL, sz);
572c6e0d914SCasey Leedom #endif
573c6e0d914SCasey Leedom }
574c6e0d914SCasey Leedom 
575c6e0d914SCasey Leedom /**
576c6e0d914SCasey Leedom  *	refill_fl - refill an SGE RX buffer ring
577c6e0d914SCasey Leedom  *	@adapter: the adapter
578c6e0d914SCasey Leedom  *	@fl: the Free List ring to refill
579c6e0d914SCasey Leedom  *	@n: the number of new buffers to allocate
580c6e0d914SCasey Leedom  *	@gfp: the gfp flags for the allocations
581c6e0d914SCasey Leedom  *
582c6e0d914SCasey Leedom  *	(Re)populate an SGE free-buffer queue with up to @n new packet buffers,
583c6e0d914SCasey Leedom  *	allocated with the supplied gfp flags.  The caller must assure that
584c6e0d914SCasey Leedom  *	@n does not exceed the queue's capacity -- i.e. (cidx == pidx) _IN
585c6e0d914SCasey Leedom  *	EGRESS QUEUE UNITS_ indicates an empty Free List!  Returns the number
586c6e0d914SCasey Leedom  *	of buffers allocated.  If afterwards the queue is found critically low,
587c6e0d914SCasey Leedom  *	mark it as starving in the bitmap of starving FLs.
588c6e0d914SCasey Leedom  */
589c6e0d914SCasey Leedom static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl,
590c6e0d914SCasey Leedom 			      int n, gfp_t gfp)
591c6e0d914SCasey Leedom {
592c6e0d914SCasey Leedom 	struct page *page;
593c6e0d914SCasey Leedom 	dma_addr_t dma_addr;
594c6e0d914SCasey Leedom 	unsigned int cred = fl->avail;
595c6e0d914SCasey Leedom 	__be64 *d = &fl->desc[fl->pidx];
596c6e0d914SCasey Leedom 	struct rx_sw_desc *sdesc = &fl->sdesc[fl->pidx];
597c6e0d914SCasey Leedom 
598c6e0d914SCasey Leedom 	/*
599c6e0d914SCasey Leedom 	 * Sanity: ensure that the result of adding n Free List buffers
600c6e0d914SCasey Leedom 	 * won't result in wrapping the SGE's Producer Index around to
601c6e0d914SCasey Leedom 	 * it's Consumer Index thereby indicating an empty Free List ...
602c6e0d914SCasey Leedom 	 */
603c6e0d914SCasey Leedom 	BUG_ON(fl->avail + n > fl->size - FL_PER_EQ_UNIT);
604c6e0d914SCasey Leedom 
605c6e0d914SCasey Leedom 	/*
606c6e0d914SCasey Leedom 	 * If we support large pages, prefer large buffers and fail over to
607c6e0d914SCasey Leedom 	 * small pages if we can't allocate large pages to satisfy the refill.
608c6e0d914SCasey Leedom 	 * If we don't support large pages, drop directly into the small page
609c6e0d914SCasey Leedom 	 * allocation code.
610c6e0d914SCasey Leedom 	 */
611c6e0d914SCasey Leedom 	if (FL_PG_ORDER == 0)
612c6e0d914SCasey Leedom 		goto alloc_small_pages;
613c6e0d914SCasey Leedom 
614c6e0d914SCasey Leedom 	while (n) {
615c6e0d914SCasey Leedom 		page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
616c6e0d914SCasey Leedom 				   FL_PG_ORDER);
617c6e0d914SCasey Leedom 		if (unlikely(!page)) {
618c6e0d914SCasey Leedom 			/*
619c6e0d914SCasey Leedom 			 * We've failed inour attempt to allocate a "large
620c6e0d914SCasey Leedom 			 * page".  Fail over to the "small page" allocation
621c6e0d914SCasey Leedom 			 * below.
622c6e0d914SCasey Leedom 			 */
623c6e0d914SCasey Leedom 			fl->large_alloc_failed++;
624c6e0d914SCasey Leedom 			break;
625c6e0d914SCasey Leedom 		}
626c6e0d914SCasey Leedom 		poison_buf(page, PAGE_SIZE << FL_PG_ORDER);
627c6e0d914SCasey Leedom 
628c6e0d914SCasey Leedom 		dma_addr = dma_map_page(adapter->pdev_dev, page, 0,
629c6e0d914SCasey Leedom 					PAGE_SIZE << FL_PG_ORDER,
630c6e0d914SCasey Leedom 					PCI_DMA_FROMDEVICE);
631c6e0d914SCasey Leedom 		if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
632c6e0d914SCasey Leedom 			/*
633c6e0d914SCasey Leedom 			 * We've run out of DMA mapping space.  Free up the
634c6e0d914SCasey Leedom 			 * buffer and return with what we've managed to put
635c6e0d914SCasey Leedom 			 * into the free list.  We don't want to fail over to
636c6e0d914SCasey Leedom 			 * the small page allocation below in this case
637c6e0d914SCasey Leedom 			 * because DMA mapping resources are typically
638c6e0d914SCasey Leedom 			 * critical resources once they become scarse.
639c6e0d914SCasey Leedom 			 */
640c6e0d914SCasey Leedom 			__free_pages(page, FL_PG_ORDER);
641c6e0d914SCasey Leedom 			goto out;
642c6e0d914SCasey Leedom 		}
643c6e0d914SCasey Leedom 		dma_addr |= RX_LARGE_BUF;
644c6e0d914SCasey Leedom 		*d++ = cpu_to_be64(dma_addr);
645c6e0d914SCasey Leedom 
646c6e0d914SCasey Leedom 		set_rx_sw_desc(sdesc, page, dma_addr);
647c6e0d914SCasey Leedom 		sdesc++;
648c6e0d914SCasey Leedom 
649c6e0d914SCasey Leedom 		fl->avail++;
650c6e0d914SCasey Leedom 		if (++fl->pidx == fl->size) {
651c6e0d914SCasey Leedom 			fl->pidx = 0;
652c6e0d914SCasey Leedom 			sdesc = fl->sdesc;
653c6e0d914SCasey Leedom 			d = fl->desc;
654c6e0d914SCasey Leedom 		}
655c6e0d914SCasey Leedom 		n--;
656c6e0d914SCasey Leedom 	}
657c6e0d914SCasey Leedom 
658c6e0d914SCasey Leedom alloc_small_pages:
659c6e0d914SCasey Leedom 	while (n--) {
6600614002bSMel Gorman 		page = __skb_alloc_page(gfp | __GFP_NOWARN, NULL);
661c6e0d914SCasey Leedom 		if (unlikely(!page)) {
662c6e0d914SCasey Leedom 			fl->alloc_failed++;
663c6e0d914SCasey Leedom 			break;
664c6e0d914SCasey Leedom 		}
665c6e0d914SCasey Leedom 		poison_buf(page, PAGE_SIZE);
666c6e0d914SCasey Leedom 
667c6e0d914SCasey Leedom 		dma_addr = dma_map_page(adapter->pdev_dev, page, 0, PAGE_SIZE,
668c6e0d914SCasey Leedom 				       PCI_DMA_FROMDEVICE);
669c6e0d914SCasey Leedom 		if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
6701f2149c1SEric Dumazet 			put_page(page);
671c6e0d914SCasey Leedom 			break;
672c6e0d914SCasey Leedom 		}
673c6e0d914SCasey Leedom 		*d++ = cpu_to_be64(dma_addr);
674c6e0d914SCasey Leedom 
675c6e0d914SCasey Leedom 		set_rx_sw_desc(sdesc, page, dma_addr);
676c6e0d914SCasey Leedom 		sdesc++;
677c6e0d914SCasey Leedom 
678c6e0d914SCasey Leedom 		fl->avail++;
679c6e0d914SCasey Leedom 		if (++fl->pidx == fl->size) {
680c6e0d914SCasey Leedom 			fl->pidx = 0;
681c6e0d914SCasey Leedom 			sdesc = fl->sdesc;
682c6e0d914SCasey Leedom 			d = fl->desc;
683c6e0d914SCasey Leedom 		}
684c6e0d914SCasey Leedom 	}
685c6e0d914SCasey Leedom 
686c6e0d914SCasey Leedom out:
687c6e0d914SCasey Leedom 	/*
688c6e0d914SCasey Leedom 	 * Update our accounting state to incorporate the new Free List
689c6e0d914SCasey Leedom 	 * buffers, tell the hardware about them and return the number of
69090802ed9SPaul Bolle 	 * buffers which we were able to allocate.
691c6e0d914SCasey Leedom 	 */
692c6e0d914SCasey Leedom 	cred = fl->avail - cred;
693c6e0d914SCasey Leedom 	fl->pend_cred += cred;
694c6e0d914SCasey Leedom 	ring_fl_db(adapter, fl);
695c6e0d914SCasey Leedom 
696c6e0d914SCasey Leedom 	if (unlikely(fl_starving(fl))) {
697c6e0d914SCasey Leedom 		smp_wmb();
698c6e0d914SCasey Leedom 		set_bit(fl->cntxt_id, adapter->sge.starving_fl);
699c6e0d914SCasey Leedom 	}
700c6e0d914SCasey Leedom 
701c6e0d914SCasey Leedom 	return cred;
702c6e0d914SCasey Leedom }
703c6e0d914SCasey Leedom 
704c6e0d914SCasey Leedom /*
705c6e0d914SCasey Leedom  * Refill a Free List to its capacity or the Maximum Refill Increment,
706c6e0d914SCasey Leedom  * whichever is smaller ...
707c6e0d914SCasey Leedom  */
708c6e0d914SCasey Leedom static inline void __refill_fl(struct adapter *adapter, struct sge_fl *fl)
709c6e0d914SCasey Leedom {
710c6e0d914SCasey Leedom 	refill_fl(adapter, fl,
711c6e0d914SCasey Leedom 		  min((unsigned int)MAX_RX_REFILL, fl_cap(fl) - fl->avail),
712c6e0d914SCasey Leedom 		  GFP_ATOMIC);
713c6e0d914SCasey Leedom }
714c6e0d914SCasey Leedom 
715c6e0d914SCasey Leedom /**
716c6e0d914SCasey Leedom  *	alloc_ring - allocate resources for an SGE descriptor ring
717c6e0d914SCasey Leedom  *	@dev: the PCI device's core device
718c6e0d914SCasey Leedom  *	@nelem: the number of descriptors
719c6e0d914SCasey Leedom  *	@hwsize: the size of each hardware descriptor
720c6e0d914SCasey Leedom  *	@swsize: the size of each software descriptor
721c6e0d914SCasey Leedom  *	@busaddrp: the physical PCI bus address of the allocated ring
722c6e0d914SCasey Leedom  *	@swringp: return address pointer for software ring
723c6e0d914SCasey Leedom  *	@stat_size: extra space in hardware ring for status information
724c6e0d914SCasey Leedom  *
725c6e0d914SCasey Leedom  *	Allocates resources for an SGE descriptor ring, such as TX queues,
726c6e0d914SCasey Leedom  *	free buffer lists, response queues, etc.  Each SGE ring requires
727c6e0d914SCasey Leedom  *	space for its hardware descriptors plus, optionally, space for software
728c6e0d914SCasey Leedom  *	state associated with each hardware entry (the metadata).  The function
729c6e0d914SCasey Leedom  *	returns three values: the virtual address for the hardware ring (the
730c6e0d914SCasey Leedom  *	return value of the function), the PCI bus address of the hardware
731c6e0d914SCasey Leedom  *	ring (in *busaddrp), and the address of the software ring (in swringp).
732c6e0d914SCasey Leedom  *	Both the hardware and software rings are returned zeroed out.
733c6e0d914SCasey Leedom  */
734c6e0d914SCasey Leedom static void *alloc_ring(struct device *dev, size_t nelem, size_t hwsize,
735c6e0d914SCasey Leedom 			size_t swsize, dma_addr_t *busaddrp, void *swringp,
736c6e0d914SCasey Leedom 			size_t stat_size)
737c6e0d914SCasey Leedom {
738c6e0d914SCasey Leedom 	/*
739c6e0d914SCasey Leedom 	 * Allocate the hardware ring and PCI DMA bus address space for said.
740c6e0d914SCasey Leedom 	 */
741c6e0d914SCasey Leedom 	size_t hwlen = nelem * hwsize + stat_size;
742c6e0d914SCasey Leedom 	void *hwring = dma_alloc_coherent(dev, hwlen, busaddrp, GFP_KERNEL);
743c6e0d914SCasey Leedom 
744c6e0d914SCasey Leedom 	if (!hwring)
745c6e0d914SCasey Leedom 		return NULL;
746c6e0d914SCasey Leedom 
747c6e0d914SCasey Leedom 	/*
748c6e0d914SCasey Leedom 	 * If the caller wants a software ring, allocate it and return a
749c6e0d914SCasey Leedom 	 * pointer to it in *swringp.
750c6e0d914SCasey Leedom 	 */
751c6e0d914SCasey Leedom 	BUG_ON((swsize != 0) != (swringp != NULL));
752c6e0d914SCasey Leedom 	if (swsize) {
753c6e0d914SCasey Leedom 		void *swring = kcalloc(nelem, swsize, GFP_KERNEL);
754c6e0d914SCasey Leedom 
755c6e0d914SCasey Leedom 		if (!swring) {
756c6e0d914SCasey Leedom 			dma_free_coherent(dev, hwlen, hwring, *busaddrp);
757c6e0d914SCasey Leedom 			return NULL;
758c6e0d914SCasey Leedom 		}
759c6e0d914SCasey Leedom 		*(void **)swringp = swring;
760c6e0d914SCasey Leedom 	}
761c6e0d914SCasey Leedom 
762c6e0d914SCasey Leedom 	/*
763c6e0d914SCasey Leedom 	 * Zero out the hardware ring and return its address as our function
764c6e0d914SCasey Leedom 	 * value.
765c6e0d914SCasey Leedom 	 */
766c6e0d914SCasey Leedom 	memset(hwring, 0, hwlen);
767c6e0d914SCasey Leedom 	return hwring;
768c6e0d914SCasey Leedom }
769c6e0d914SCasey Leedom 
770c6e0d914SCasey Leedom /**
771c6e0d914SCasey Leedom  *	sgl_len - calculates the size of an SGL of the given capacity
772c6e0d914SCasey Leedom  *	@n: the number of SGL entries
773c6e0d914SCasey Leedom  *
774c6e0d914SCasey Leedom  *	Calculates the number of flits (8-byte units) needed for a Direct
775c6e0d914SCasey Leedom  *	Scatter/Gather List that can hold the given number of entries.
776c6e0d914SCasey Leedom  */
777c6e0d914SCasey Leedom static inline unsigned int sgl_len(unsigned int n)
778c6e0d914SCasey Leedom {
779c6e0d914SCasey Leedom 	/*
780c6e0d914SCasey Leedom 	 * A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA
781c6e0d914SCasey Leedom 	 * addresses.  The DSGL Work Request starts off with a 32-bit DSGL
782c6e0d914SCasey Leedom 	 * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N,
783c6e0d914SCasey Leedom 	 * repeated sequences of { Length[i], Length[i+1], Address[i],
784c6e0d914SCasey Leedom 	 * Address[i+1] } (this ensures that all addresses are on 64-bit
785c6e0d914SCasey Leedom 	 * boundaries).  If N is even, then Length[N+1] should be set to 0 and
786c6e0d914SCasey Leedom 	 * Address[N+1] is omitted.
787c6e0d914SCasey Leedom 	 *
788c6e0d914SCasey Leedom 	 * The following calculation incorporates all of the above.  It's
789c6e0d914SCasey Leedom 	 * somewhat hard to follow but, briefly: the "+2" accounts for the
790c6e0d914SCasey Leedom 	 * first two flits which include the DSGL header, Length0 and
791c6e0d914SCasey Leedom 	 * Address0; the "(3*(n-1))/2" covers the main body of list entries (3
792c6e0d914SCasey Leedom 	 * flits for every pair of the remaining N) +1 if (n-1) is odd; and
793c6e0d914SCasey Leedom 	 * finally the "+((n-1)&1)" adds the one remaining flit needed if
794c6e0d914SCasey Leedom 	 * (n-1) is odd ...
795c6e0d914SCasey Leedom 	 */
796c6e0d914SCasey Leedom 	n--;
797c6e0d914SCasey Leedom 	return (3 * n) / 2 + (n & 1) + 2;
798c6e0d914SCasey Leedom }
799c6e0d914SCasey Leedom 
800c6e0d914SCasey Leedom /**
801c6e0d914SCasey Leedom  *	flits_to_desc - returns the num of TX descriptors for the given flits
802c6e0d914SCasey Leedom  *	@flits: the number of flits
803c6e0d914SCasey Leedom  *
804c6e0d914SCasey Leedom  *	Returns the number of TX descriptors needed for the supplied number
805c6e0d914SCasey Leedom  *	of flits.
806c6e0d914SCasey Leedom  */
807c6e0d914SCasey Leedom static inline unsigned int flits_to_desc(unsigned int flits)
808c6e0d914SCasey Leedom {
809c6e0d914SCasey Leedom 	BUG_ON(flits > SGE_MAX_WR_LEN / sizeof(__be64));
810c6e0d914SCasey Leedom 	return DIV_ROUND_UP(flits, TXD_PER_EQ_UNIT);
811c6e0d914SCasey Leedom }
812c6e0d914SCasey Leedom 
813c6e0d914SCasey Leedom /**
814c6e0d914SCasey Leedom  *	is_eth_imm - can an Ethernet packet be sent as immediate data?
815c6e0d914SCasey Leedom  *	@skb: the packet
816c6e0d914SCasey Leedom  *
817c6e0d914SCasey Leedom  *	Returns whether an Ethernet packet is small enough to fit completely as
818c6e0d914SCasey Leedom  *	immediate data.
819c6e0d914SCasey Leedom  */
820c6e0d914SCasey Leedom static inline int is_eth_imm(const struct sk_buff *skb)
821c6e0d914SCasey Leedom {
822c6e0d914SCasey Leedom 	/*
823c6e0d914SCasey Leedom 	 * The VF Driver uses the FW_ETH_TX_PKT_VM_WR firmware Work Request
824c6e0d914SCasey Leedom 	 * which does not accommodate immediate data.  We could dike out all
825c6e0d914SCasey Leedom 	 * of the support code for immediate data but that would tie our hands
826c6e0d914SCasey Leedom 	 * too much if we ever want to enhace the firmware.  It would also
827c6e0d914SCasey Leedom 	 * create more differences between the PF and VF Drivers.
828c6e0d914SCasey Leedom 	 */
829c6e0d914SCasey Leedom 	return false;
830c6e0d914SCasey Leedom }
831c6e0d914SCasey Leedom 
832c6e0d914SCasey Leedom /**
833c6e0d914SCasey Leedom  *	calc_tx_flits - calculate the number of flits for a packet TX WR
834c6e0d914SCasey Leedom  *	@skb: the packet
835c6e0d914SCasey Leedom  *
836c6e0d914SCasey Leedom  *	Returns the number of flits needed for a TX Work Request for the
837c6e0d914SCasey Leedom  *	given Ethernet packet, including the needed WR and CPL headers.
838c6e0d914SCasey Leedom  */
839c6e0d914SCasey Leedom static inline unsigned int calc_tx_flits(const struct sk_buff *skb)
840c6e0d914SCasey Leedom {
841c6e0d914SCasey Leedom 	unsigned int flits;
842c6e0d914SCasey Leedom 
843c6e0d914SCasey Leedom 	/*
844c6e0d914SCasey Leedom 	 * If the skb is small enough, we can pump it out as a work request
845c6e0d914SCasey Leedom 	 * with only immediate data.  In that case we just have to have the
846c6e0d914SCasey Leedom 	 * TX Packet header plus the skb data in the Work Request.
847c6e0d914SCasey Leedom 	 */
848c6e0d914SCasey Leedom 	if (is_eth_imm(skb))
849c6e0d914SCasey Leedom 		return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt),
850c6e0d914SCasey Leedom 				    sizeof(__be64));
851c6e0d914SCasey Leedom 
852c6e0d914SCasey Leedom 	/*
853c6e0d914SCasey Leedom 	 * Otherwise, we're going to have to construct a Scatter gather list
854c6e0d914SCasey Leedom 	 * of the skb body and fragments.  We also include the flits necessary
855c6e0d914SCasey Leedom 	 * for the TX Packet Work Request and CPL.  We always have a firmware
856c6e0d914SCasey Leedom 	 * Write Header (incorporated as part of the cpl_tx_pkt_lso and
857c6e0d914SCasey Leedom 	 * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
858c6e0d914SCasey Leedom 	 * message or, if we're doing a Large Send Offload, an LSO CPL message
859c6e0d914SCasey Leedom 	 * with an embeded TX Packet Write CPL message.
860c6e0d914SCasey Leedom 	 */
861c6e0d914SCasey Leedom 	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
862c6e0d914SCasey Leedom 	if (skb_shinfo(skb)->gso_size)
863c6e0d914SCasey Leedom 		flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
864c6e0d914SCasey Leedom 			  sizeof(struct cpl_tx_pkt_lso_core) +
865c6e0d914SCasey Leedom 			  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
866c6e0d914SCasey Leedom 	else
867c6e0d914SCasey Leedom 		flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
868c6e0d914SCasey Leedom 			  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
869c6e0d914SCasey Leedom 	return flits;
870c6e0d914SCasey Leedom }
871c6e0d914SCasey Leedom 
872c6e0d914SCasey Leedom /**
873c6e0d914SCasey Leedom  *	write_sgl - populate a Scatter/Gather List for a packet
874c6e0d914SCasey Leedom  *	@skb: the packet
875c6e0d914SCasey Leedom  *	@tq: the TX queue we are writing into
876c6e0d914SCasey Leedom  *	@sgl: starting location for writing the SGL
877c6e0d914SCasey Leedom  *	@end: points right after the end of the SGL
878c6e0d914SCasey Leedom  *	@start: start offset into skb main-body data to include in the SGL
879c6e0d914SCasey Leedom  *	@addr: the list of DMA bus addresses for the SGL elements
880c6e0d914SCasey Leedom  *
881c6e0d914SCasey Leedom  *	Generates a Scatter/Gather List for the buffers that make up a packet.
882c6e0d914SCasey Leedom  *	The caller must provide adequate space for the SGL that will be written.
883c6e0d914SCasey Leedom  *	The SGL includes all of the packet's page fragments and the data in its
884c6e0d914SCasey Leedom  *	main body except for the first @start bytes.  @pos must be 16-byte
885c6e0d914SCasey Leedom  *	aligned and within a TX descriptor with available space.  @end points
886c6e0d914SCasey Leedom  *	write after the end of the SGL but does not account for any potential
887c6e0d914SCasey Leedom  *	wrap around, i.e., @end > @tq->stat.
888c6e0d914SCasey Leedom  */
889c6e0d914SCasey Leedom static void write_sgl(const struct sk_buff *skb, struct sge_txq *tq,
890c6e0d914SCasey Leedom 		      struct ulptx_sgl *sgl, u64 *end, unsigned int start,
891c6e0d914SCasey Leedom 		      const dma_addr_t *addr)
892c6e0d914SCasey Leedom {
893c6e0d914SCasey Leedom 	unsigned int i, len;
894c6e0d914SCasey Leedom 	struct ulptx_sge_pair *to;
895c6e0d914SCasey Leedom 	const struct skb_shared_info *si = skb_shinfo(skb);
896c6e0d914SCasey Leedom 	unsigned int nfrags = si->nr_frags;
897c6e0d914SCasey Leedom 	struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1];
898c6e0d914SCasey Leedom 
899c6e0d914SCasey Leedom 	len = skb_headlen(skb) - start;
900c6e0d914SCasey Leedom 	if (likely(len)) {
901c6e0d914SCasey Leedom 		sgl->len0 = htonl(len);
902c6e0d914SCasey Leedom 		sgl->addr0 = cpu_to_be64(addr[0] + start);
903c6e0d914SCasey Leedom 		nfrags++;
904c6e0d914SCasey Leedom 	} else {
9059e903e08SEric Dumazet 		sgl->len0 = htonl(skb_frag_size(&si->frags[0]));
906c6e0d914SCasey Leedom 		sgl->addr0 = cpu_to_be64(addr[1]);
907c6e0d914SCasey Leedom 	}
908c6e0d914SCasey Leedom 
909c6e0d914SCasey Leedom 	sgl->cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) |
910c6e0d914SCasey Leedom 			      ULPTX_NSGE(nfrags));
911c6e0d914SCasey Leedom 	if (likely(--nfrags == 0))
912c6e0d914SCasey Leedom 		return;
913c6e0d914SCasey Leedom 	/*
914c6e0d914SCasey Leedom 	 * Most of the complexity below deals with the possibility we hit the
915c6e0d914SCasey Leedom 	 * end of the queue in the middle of writing the SGL.  For this case
916c6e0d914SCasey Leedom 	 * only we create the SGL in a temporary buffer and then copy it.
917c6e0d914SCasey Leedom 	 */
918c6e0d914SCasey Leedom 	to = (u8 *)end > (u8 *)tq->stat ? buf : sgl->sge;
919c6e0d914SCasey Leedom 
920c6e0d914SCasey Leedom 	for (i = (nfrags != si->nr_frags); nfrags >= 2; nfrags -= 2, to++) {
9219e903e08SEric Dumazet 		to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i]));
9229e903e08SEric Dumazet 		to->len[1] = cpu_to_be32(skb_frag_size(&si->frags[++i]));
923c6e0d914SCasey Leedom 		to->addr[0] = cpu_to_be64(addr[i]);
924c6e0d914SCasey Leedom 		to->addr[1] = cpu_to_be64(addr[++i]);
925c6e0d914SCasey Leedom 	}
926c6e0d914SCasey Leedom 	if (nfrags) {
9279e903e08SEric Dumazet 		to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i]));
928c6e0d914SCasey Leedom 		to->len[1] = cpu_to_be32(0);
929c6e0d914SCasey Leedom 		to->addr[0] = cpu_to_be64(addr[i + 1]);
930c6e0d914SCasey Leedom 	}
931c6e0d914SCasey Leedom 	if (unlikely((u8 *)end > (u8 *)tq->stat)) {
932c6e0d914SCasey Leedom 		unsigned int part0 = (u8 *)tq->stat - (u8 *)sgl->sge, part1;
933c6e0d914SCasey Leedom 
934c6e0d914SCasey Leedom 		if (likely(part0))
935c6e0d914SCasey Leedom 			memcpy(sgl->sge, buf, part0);
936c6e0d914SCasey Leedom 		part1 = (u8 *)end - (u8 *)tq->stat;
937c6e0d914SCasey Leedom 		memcpy(tq->desc, (u8 *)buf + part0, part1);
938c6e0d914SCasey Leedom 		end = (void *)tq->desc + part1;
939c6e0d914SCasey Leedom 	}
940c6e0d914SCasey Leedom 	if ((uintptr_t)end & 8)           /* 0-pad to multiple of 16 */
94164699336SJoe Perches 		*end = 0;
942c6e0d914SCasey Leedom }
943c6e0d914SCasey Leedom 
944c6e0d914SCasey Leedom /**
945c6e0d914SCasey Leedom  *	check_ring_tx_db - check and potentially ring a TX queue's doorbell
946c6e0d914SCasey Leedom  *	@adapter: the adapter
947c6e0d914SCasey Leedom  *	@tq: the TX queue
948c6e0d914SCasey Leedom  *	@n: number of new descriptors to give to HW
949c6e0d914SCasey Leedom  *
950c6e0d914SCasey Leedom  *	Ring the doorbel for a TX queue.
951c6e0d914SCasey Leedom  */
952c6e0d914SCasey Leedom static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
953c6e0d914SCasey Leedom 			      int n)
954c6e0d914SCasey Leedom {
955c6e0d914SCasey Leedom 	/*
956c6e0d914SCasey Leedom 	 * Warn if we write doorbells with the wrong priority and write
957c6e0d914SCasey Leedom 	 * descriptors before telling HW.
958c6e0d914SCasey Leedom 	 */
959ce91a923SNaresh Kumar Inna 	WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO(1));
960c6e0d914SCasey Leedom 	wmb();
961c6e0d914SCasey Leedom 	t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
962c6e0d914SCasey Leedom 		     QID(tq->cntxt_id) | PIDX(n));
963c6e0d914SCasey Leedom }
964c6e0d914SCasey Leedom 
965c6e0d914SCasey Leedom /**
966c6e0d914SCasey Leedom  *	inline_tx_skb - inline a packet's data into TX descriptors
967c6e0d914SCasey Leedom  *	@skb: the packet
968c6e0d914SCasey Leedom  *	@tq: the TX queue where the packet will be inlined
969c6e0d914SCasey Leedom  *	@pos: starting position in the TX queue to inline the packet
970c6e0d914SCasey Leedom  *
971c6e0d914SCasey Leedom  *	Inline a packet's contents directly into TX descriptors, starting at
972c6e0d914SCasey Leedom  *	the given position within the TX DMA ring.
973c6e0d914SCasey Leedom  *	Most of the complexity of this operation is dealing with wrap arounds
974c6e0d914SCasey Leedom  *	in the middle of the packet we want to inline.
975c6e0d914SCasey Leedom  */
976c6e0d914SCasey Leedom static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *tq,
977c6e0d914SCasey Leedom 			  void *pos)
978c6e0d914SCasey Leedom {
979c6e0d914SCasey Leedom 	u64 *p;
980c6e0d914SCasey Leedom 	int left = (void *)tq->stat - pos;
981c6e0d914SCasey Leedom 
982c6e0d914SCasey Leedom 	if (likely(skb->len <= left)) {
983c6e0d914SCasey Leedom 		if (likely(!skb->data_len))
984c6e0d914SCasey Leedom 			skb_copy_from_linear_data(skb, pos, skb->len);
985c6e0d914SCasey Leedom 		else
986c6e0d914SCasey Leedom 			skb_copy_bits(skb, 0, pos, skb->len);
987c6e0d914SCasey Leedom 		pos += skb->len;
988c6e0d914SCasey Leedom 	} else {
989c6e0d914SCasey Leedom 		skb_copy_bits(skb, 0, pos, left);
990c6e0d914SCasey Leedom 		skb_copy_bits(skb, left, tq->desc, skb->len - left);
991c6e0d914SCasey Leedom 		pos = (void *)tq->desc + (skb->len - left);
992c6e0d914SCasey Leedom 	}
993c6e0d914SCasey Leedom 
994c6e0d914SCasey Leedom 	/* 0-pad to multiple of 16 */
995c6e0d914SCasey Leedom 	p = PTR_ALIGN(pos, 8);
996c6e0d914SCasey Leedom 	if ((uintptr_t)p & 8)
997c6e0d914SCasey Leedom 		*p = 0;
998c6e0d914SCasey Leedom }
999c6e0d914SCasey Leedom 
1000c6e0d914SCasey Leedom /*
1001c6e0d914SCasey Leedom  * Figure out what HW csum a packet wants and return the appropriate control
1002c6e0d914SCasey Leedom  * bits.
1003c6e0d914SCasey Leedom  */
1004c6e0d914SCasey Leedom static u64 hwcsum(const struct sk_buff *skb)
1005c6e0d914SCasey Leedom {
1006c6e0d914SCasey Leedom 	int csum_type;
1007c6e0d914SCasey Leedom 	const struct iphdr *iph = ip_hdr(skb);
1008c6e0d914SCasey Leedom 
1009c6e0d914SCasey Leedom 	if (iph->version == 4) {
1010c6e0d914SCasey Leedom 		if (iph->protocol == IPPROTO_TCP)
1011c6e0d914SCasey Leedom 			csum_type = TX_CSUM_TCPIP;
1012c6e0d914SCasey Leedom 		else if (iph->protocol == IPPROTO_UDP)
1013c6e0d914SCasey Leedom 			csum_type = TX_CSUM_UDPIP;
1014c6e0d914SCasey Leedom 		else {
1015c6e0d914SCasey Leedom nocsum:
1016c6e0d914SCasey Leedom 			/*
1017c6e0d914SCasey Leedom 			 * unknown protocol, disable HW csum
1018c6e0d914SCasey Leedom 			 * and hope a bad packet is detected
1019c6e0d914SCasey Leedom 			 */
1020c6e0d914SCasey Leedom 			return TXPKT_L4CSUM_DIS;
1021c6e0d914SCasey Leedom 		}
1022c6e0d914SCasey Leedom 	} else {
1023c6e0d914SCasey Leedom 		/*
1024c6e0d914SCasey Leedom 		 * this doesn't work with extension headers
1025c6e0d914SCasey Leedom 		 */
1026c6e0d914SCasey Leedom 		const struct ipv6hdr *ip6h = (const struct ipv6hdr *)iph;
1027c6e0d914SCasey Leedom 
1028c6e0d914SCasey Leedom 		if (ip6h->nexthdr == IPPROTO_TCP)
1029c6e0d914SCasey Leedom 			csum_type = TX_CSUM_TCPIP6;
1030c6e0d914SCasey Leedom 		else if (ip6h->nexthdr == IPPROTO_UDP)
1031c6e0d914SCasey Leedom 			csum_type = TX_CSUM_UDPIP6;
1032c6e0d914SCasey Leedom 		else
1033c6e0d914SCasey Leedom 			goto nocsum;
1034c6e0d914SCasey Leedom 	}
1035c6e0d914SCasey Leedom 
1036c6e0d914SCasey Leedom 	if (likely(csum_type >= TX_CSUM_TCPIP))
1037c6e0d914SCasey Leedom 		return TXPKT_CSUM_TYPE(csum_type) |
1038c6e0d914SCasey Leedom 			TXPKT_IPHDR_LEN(skb_network_header_len(skb)) |
1039c6e0d914SCasey Leedom 			TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN);
1040c6e0d914SCasey Leedom 	else {
1041c6e0d914SCasey Leedom 		int start = skb_transport_offset(skb);
1042c6e0d914SCasey Leedom 
1043c6e0d914SCasey Leedom 		return TXPKT_CSUM_TYPE(csum_type) |
1044c6e0d914SCasey Leedom 			TXPKT_CSUM_START(start) |
1045c6e0d914SCasey Leedom 			TXPKT_CSUM_LOC(start + skb->csum_offset);
1046c6e0d914SCasey Leedom 	}
1047c6e0d914SCasey Leedom }
1048c6e0d914SCasey Leedom 
1049c6e0d914SCasey Leedom /*
1050c6e0d914SCasey Leedom  * Stop an Ethernet TX queue and record that state change.
1051c6e0d914SCasey Leedom  */
1052c6e0d914SCasey Leedom static void txq_stop(struct sge_eth_txq *txq)
1053c6e0d914SCasey Leedom {
1054c6e0d914SCasey Leedom 	netif_tx_stop_queue(txq->txq);
1055c6e0d914SCasey Leedom 	txq->q.stops++;
1056c6e0d914SCasey Leedom }
1057c6e0d914SCasey Leedom 
1058c6e0d914SCasey Leedom /*
1059c6e0d914SCasey Leedom  * Advance our software state for a TX queue by adding n in use descriptors.
1060c6e0d914SCasey Leedom  */
1061c6e0d914SCasey Leedom static inline void txq_advance(struct sge_txq *tq, unsigned int n)
1062c6e0d914SCasey Leedom {
1063c6e0d914SCasey Leedom 	tq->in_use += n;
1064c6e0d914SCasey Leedom 	tq->pidx += n;
1065c6e0d914SCasey Leedom 	if (tq->pidx >= tq->size)
1066c6e0d914SCasey Leedom 		tq->pidx -= tq->size;
1067c6e0d914SCasey Leedom }
1068c6e0d914SCasey Leedom 
1069c6e0d914SCasey Leedom /**
1070c6e0d914SCasey Leedom  *	t4vf_eth_xmit - add a packet to an Ethernet TX queue
1071c6e0d914SCasey Leedom  *	@skb: the packet
1072c6e0d914SCasey Leedom  *	@dev: the egress net device
1073c6e0d914SCasey Leedom  *
1074c6e0d914SCasey Leedom  *	Add a packet to an SGE Ethernet TX queue.  Runs with softirqs disabled.
1075c6e0d914SCasey Leedom  */
1076c6e0d914SCasey Leedom int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1077c6e0d914SCasey Leedom {
10787f9dd2faSCasey Leedom 	u32 wr_mid;
1079c6e0d914SCasey Leedom 	u64 cntrl, *end;
1080c6e0d914SCasey Leedom 	int qidx, credits;
1081c6e0d914SCasey Leedom 	unsigned int flits, ndesc;
1082c6e0d914SCasey Leedom 	struct adapter *adapter;
1083c6e0d914SCasey Leedom 	struct sge_eth_txq *txq;
1084c6e0d914SCasey Leedom 	const struct port_info *pi;
1085c6e0d914SCasey Leedom 	struct fw_eth_tx_pkt_vm_wr *wr;
1086c6e0d914SCasey Leedom 	struct cpl_tx_pkt_core *cpl;
1087c6e0d914SCasey Leedom 	const struct skb_shared_info *ssi;
1088c6e0d914SCasey Leedom 	dma_addr_t addr[MAX_SKB_FRAGS + 1];
1089c6e0d914SCasey Leedom 	const size_t fw_hdr_copy_len = (sizeof(wr->ethmacdst) +
1090c6e0d914SCasey Leedom 					sizeof(wr->ethmacsrc) +
1091c6e0d914SCasey Leedom 					sizeof(wr->ethtype) +
1092c6e0d914SCasey Leedom 					sizeof(wr->vlantci));
1093c6e0d914SCasey Leedom 
1094c6e0d914SCasey Leedom 	/*
1095c6e0d914SCasey Leedom 	 * The chip minimum packet length is 10 octets but the firmware
1096c6e0d914SCasey Leedom 	 * command that we are using requires that we copy the Ethernet header
1097c6e0d914SCasey Leedom 	 * (including the VLAN tag) into the header so we reject anything
1098c6e0d914SCasey Leedom 	 * smaller than that ...
1099c6e0d914SCasey Leedom 	 */
1100c6e0d914SCasey Leedom 	if (unlikely(skb->len < fw_hdr_copy_len))
1101c6e0d914SCasey Leedom 		goto out_free;
1102c6e0d914SCasey Leedom 
1103c6e0d914SCasey Leedom 	/*
1104c6e0d914SCasey Leedom 	 * Figure out which TX Queue we're going to use.
1105c6e0d914SCasey Leedom 	 */
1106c6e0d914SCasey Leedom 	pi = netdev_priv(dev);
1107c6e0d914SCasey Leedom 	adapter = pi->adapter;
1108c6e0d914SCasey Leedom 	qidx = skb_get_queue_mapping(skb);
1109c6e0d914SCasey Leedom 	BUG_ON(qidx >= pi->nqsets);
1110c6e0d914SCasey Leedom 	txq = &adapter->sge.ethtxq[pi->first_qset + qidx];
1111c6e0d914SCasey Leedom 
1112c6e0d914SCasey Leedom 	/*
1113c6e0d914SCasey Leedom 	 * Take this opportunity to reclaim any TX Descriptors whose DMA
1114c6e0d914SCasey Leedom 	 * transfers have completed.
1115c6e0d914SCasey Leedom 	 */
1116c6e0d914SCasey Leedom 	reclaim_completed_tx(adapter, &txq->q, true);
1117c6e0d914SCasey Leedom 
1118c6e0d914SCasey Leedom 	/*
1119c6e0d914SCasey Leedom 	 * Calculate the number of flits and TX Descriptors we're going to
1120c6e0d914SCasey Leedom 	 * need along with how many TX Descriptors will be left over after
1121c6e0d914SCasey Leedom 	 * we inject our Work Request.
1122c6e0d914SCasey Leedom 	 */
1123c6e0d914SCasey Leedom 	flits = calc_tx_flits(skb);
1124c6e0d914SCasey Leedom 	ndesc = flits_to_desc(flits);
1125c6e0d914SCasey Leedom 	credits = txq_avail(&txq->q) - ndesc;
1126c6e0d914SCasey Leedom 
1127c6e0d914SCasey Leedom 	if (unlikely(credits < 0)) {
1128c6e0d914SCasey Leedom 		/*
1129c6e0d914SCasey Leedom 		 * Not enough room for this packet's Work Request.  Stop the
1130c6e0d914SCasey Leedom 		 * TX Queue and return a "busy" condition.  The queue will get
1131c6e0d914SCasey Leedom 		 * started later on when the firmware informs us that space
1132c6e0d914SCasey Leedom 		 * has opened up.
1133c6e0d914SCasey Leedom 		 */
1134c6e0d914SCasey Leedom 		txq_stop(txq);
1135c6e0d914SCasey Leedom 		dev_err(adapter->pdev_dev,
1136c6e0d914SCasey Leedom 			"%s: TX ring %u full while queue awake!\n",
1137c6e0d914SCasey Leedom 			dev->name, qidx);
1138c6e0d914SCasey Leedom 		return NETDEV_TX_BUSY;
1139c6e0d914SCasey Leedom 	}
1140c6e0d914SCasey Leedom 
1141c6e0d914SCasey Leedom 	if (!is_eth_imm(skb) &&
1142c6e0d914SCasey Leedom 	    unlikely(map_skb(adapter->pdev_dev, skb, addr) < 0)) {
1143c6e0d914SCasey Leedom 		/*
1144c6e0d914SCasey Leedom 		 * We need to map the skb into PCI DMA space (because it can't
1145c6e0d914SCasey Leedom 		 * be in-lined directly into the Work Request) and the mapping
1146c6e0d914SCasey Leedom 		 * operation failed.  Record the error and drop the packet.
1147c6e0d914SCasey Leedom 		 */
1148c6e0d914SCasey Leedom 		txq->mapping_err++;
1149c6e0d914SCasey Leedom 		goto out_free;
1150c6e0d914SCasey Leedom 	}
1151c6e0d914SCasey Leedom 
11527f9dd2faSCasey Leedom 	wr_mid = FW_WR_LEN16(DIV_ROUND_UP(flits, 2));
1153c6e0d914SCasey Leedom 	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
1154c6e0d914SCasey Leedom 		/*
1155c6e0d914SCasey Leedom 		 * After we're done injecting the Work Request for this
115625985edcSLucas De Marchi 		 * packet, we'll be below our "stop threshold" so stop the TX
11577f9dd2faSCasey Leedom 		 * Queue now and schedule a request for an SGE Egress Queue
11587f9dd2faSCasey Leedom 		 * Update message.  The queue will get started later on when
11597f9dd2faSCasey Leedom 		 * the firmware processes this Work Request and sends us an
11607f9dd2faSCasey Leedom 		 * Egress Queue Status Update message indicating that space
11617f9dd2faSCasey Leedom 		 * has opened up.
1162c6e0d914SCasey Leedom 		 */
1163c6e0d914SCasey Leedom 		txq_stop(txq);
11647f9dd2faSCasey Leedom 		wr_mid |= FW_WR_EQUEQ | FW_WR_EQUIQ;
1165c6e0d914SCasey Leedom 	}
1166c6e0d914SCasey Leedom 
1167c6e0d914SCasey Leedom 	/*
1168c6e0d914SCasey Leedom 	 * Start filling in our Work Request.  Note that we do _not_ handle
1169c6e0d914SCasey Leedom 	 * the WR Header wrapping around the TX Descriptor Ring.  If our
1170c6e0d914SCasey Leedom 	 * maximum header size ever exceeds one TX Descriptor, we'll need to
1171c6e0d914SCasey Leedom 	 * do something else here.
1172c6e0d914SCasey Leedom 	 */
1173c6e0d914SCasey Leedom 	BUG_ON(DIV_ROUND_UP(ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1);
1174c6e0d914SCasey Leedom 	wr = (void *)&txq->q.desc[txq->q.pidx];
11757f9dd2faSCasey Leedom 	wr->equiq_to_len16 = cpu_to_be32(wr_mid);
1176c6e0d914SCasey Leedom 	wr->r3[0] = cpu_to_be64(0);
1177c6e0d914SCasey Leedom 	wr->r3[1] = cpu_to_be64(0);
1178c6e0d914SCasey Leedom 	skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
1179c6e0d914SCasey Leedom 	end = (u64 *)wr + flits;
1180c6e0d914SCasey Leedom 
1181c6e0d914SCasey Leedom 	/*
1182c6e0d914SCasey Leedom 	 * If this is a Large Send Offload packet we'll put in an LSO CPL
1183c6e0d914SCasey Leedom 	 * message with an encapsulated TX Packet CPL message.  Otherwise we
1184c6e0d914SCasey Leedom 	 * just use a TX Packet CPL message.
1185c6e0d914SCasey Leedom 	 */
1186c6e0d914SCasey Leedom 	ssi = skb_shinfo(skb);
1187c6e0d914SCasey Leedom 	if (ssi->gso_size) {
1188c6e0d914SCasey Leedom 		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
1189c6e0d914SCasey Leedom 		bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
1190c6e0d914SCasey Leedom 		int l3hdr_len = skb_network_header_len(skb);
1191c6e0d914SCasey Leedom 		int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
1192c6e0d914SCasey Leedom 
1193c6e0d914SCasey Leedom 		wr->op_immdlen =
1194c6e0d914SCasey Leedom 			cpu_to_be32(FW_WR_OP(FW_ETH_TX_PKT_VM_WR) |
1195c6e0d914SCasey Leedom 				    FW_WR_IMMDLEN(sizeof(*lso) +
1196c6e0d914SCasey Leedom 						  sizeof(*cpl)));
1197c6e0d914SCasey Leedom 		/*
1198c6e0d914SCasey Leedom 		 * Fill in the LSO CPL message.
1199c6e0d914SCasey Leedom 		 */
1200c6e0d914SCasey Leedom 		lso->lso_ctrl =
1201c6e0d914SCasey Leedom 			cpu_to_be32(LSO_OPCODE(CPL_TX_PKT_LSO) |
1202c6e0d914SCasey Leedom 				    LSO_FIRST_SLICE |
1203c6e0d914SCasey Leedom 				    LSO_LAST_SLICE |
1204c6e0d914SCasey Leedom 				    LSO_IPV6(v6) |
1205c6e0d914SCasey Leedom 				    LSO_ETHHDR_LEN(eth_xtra_len/4) |
1206c6e0d914SCasey Leedom 				    LSO_IPHDR_LEN(l3hdr_len/4) |
1207c6e0d914SCasey Leedom 				    LSO_TCPHDR_LEN(tcp_hdr(skb)->doff));
1208c6e0d914SCasey Leedom 		lso->ipid_ofst = cpu_to_be16(0);
1209c6e0d914SCasey Leedom 		lso->mss = cpu_to_be16(ssi->gso_size);
1210c6e0d914SCasey Leedom 		lso->seqno_offset = cpu_to_be32(0);
1211c6e0d914SCasey Leedom 		lso->len = cpu_to_be32(skb->len);
1212c6e0d914SCasey Leedom 
1213c6e0d914SCasey Leedom 		/*
1214c6e0d914SCasey Leedom 		 * Set up TX Packet CPL pointer, control word and perform
1215c6e0d914SCasey Leedom 		 * accounting.
1216c6e0d914SCasey Leedom 		 */
1217c6e0d914SCasey Leedom 		cpl = (void *)(lso + 1);
1218c6e0d914SCasey Leedom 		cntrl = (TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
1219c6e0d914SCasey Leedom 			 TXPKT_IPHDR_LEN(l3hdr_len) |
1220c6e0d914SCasey Leedom 			 TXPKT_ETHHDR_LEN(eth_xtra_len));
1221c6e0d914SCasey Leedom 		txq->tso++;
1222c6e0d914SCasey Leedom 		txq->tx_cso += ssi->gso_segs;
1223c6e0d914SCasey Leedom 	} else {
1224c6e0d914SCasey Leedom 		int len;
1225c6e0d914SCasey Leedom 
1226c6e0d914SCasey Leedom 		len = is_eth_imm(skb) ? skb->len + sizeof(*cpl) : sizeof(*cpl);
1227c6e0d914SCasey Leedom 		wr->op_immdlen =
1228c6e0d914SCasey Leedom 			cpu_to_be32(FW_WR_OP(FW_ETH_TX_PKT_VM_WR) |
1229c6e0d914SCasey Leedom 				    FW_WR_IMMDLEN(len));
1230c6e0d914SCasey Leedom 
1231c6e0d914SCasey Leedom 		/*
1232c6e0d914SCasey Leedom 		 * Set up TX Packet CPL pointer, control word and perform
1233c6e0d914SCasey Leedom 		 * accounting.
1234c6e0d914SCasey Leedom 		 */
1235c6e0d914SCasey Leedom 		cpl = (void *)(wr + 1);
1236c6e0d914SCasey Leedom 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
1237c6e0d914SCasey Leedom 			cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS;
1238c6e0d914SCasey Leedom 			txq->tx_cso++;
1239c6e0d914SCasey Leedom 		} else
1240c6e0d914SCasey Leedom 			cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS;
1241c6e0d914SCasey Leedom 	}
1242c6e0d914SCasey Leedom 
1243c6e0d914SCasey Leedom 	/*
1244c6e0d914SCasey Leedom 	 * If there's a VLAN tag present, add that to the list of things to
1245c6e0d914SCasey Leedom 	 * do in this Work Request.
1246c6e0d914SCasey Leedom 	 */
1247c6e0d914SCasey Leedom 	if (vlan_tx_tag_present(skb)) {
1248c6e0d914SCasey Leedom 		txq->vlan_ins++;
1249c6e0d914SCasey Leedom 		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb));
1250c6e0d914SCasey Leedom 	}
1251c6e0d914SCasey Leedom 
1252c6e0d914SCasey Leedom 	/*
1253c6e0d914SCasey Leedom 	 * Fill in the TX Packet CPL message header.
1254c6e0d914SCasey Leedom 	 */
1255c6e0d914SCasey Leedom 	cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE(CPL_TX_PKT_XT) |
1256c6e0d914SCasey Leedom 				 TXPKT_INTF(pi->port_id) |
1257c6e0d914SCasey Leedom 				 TXPKT_PF(0));
1258c6e0d914SCasey Leedom 	cpl->pack = cpu_to_be16(0);
1259c6e0d914SCasey Leedom 	cpl->len = cpu_to_be16(skb->len);
1260c6e0d914SCasey Leedom 	cpl->ctrl1 = cpu_to_be64(cntrl);
1261c6e0d914SCasey Leedom 
1262c6e0d914SCasey Leedom #ifdef T4_TRACE
1263c6e0d914SCasey Leedom 	T4_TRACE5(adapter->tb[txq->q.cntxt_id & 7],
1264c6e0d914SCasey Leedom 		  "eth_xmit: ndesc %u, credits %u, pidx %u, len %u, frags %u",
1265c6e0d914SCasey Leedom 		  ndesc, credits, txq->q.pidx, skb->len, ssi->nr_frags);
1266c6e0d914SCasey Leedom #endif
1267c6e0d914SCasey Leedom 
1268c6e0d914SCasey Leedom 	/*
1269c6e0d914SCasey Leedom 	 * Fill in the body of the TX Packet CPL message with either in-lined
1270c6e0d914SCasey Leedom 	 * data or a Scatter/Gather List.
1271c6e0d914SCasey Leedom 	 */
1272c6e0d914SCasey Leedom 	if (is_eth_imm(skb)) {
1273c6e0d914SCasey Leedom 		/*
1274c6e0d914SCasey Leedom 		 * In-line the packet's data and free the skb since we don't
1275c6e0d914SCasey Leedom 		 * need it any longer.
1276c6e0d914SCasey Leedom 		 */
1277c6e0d914SCasey Leedom 		inline_tx_skb(skb, &txq->q, cpl + 1);
1278c6e0d914SCasey Leedom 		dev_kfree_skb(skb);
1279c6e0d914SCasey Leedom 	} else {
1280c6e0d914SCasey Leedom 		/*
1281c6e0d914SCasey Leedom 		 * Write the skb's Scatter/Gather list into the TX Packet CPL
1282c6e0d914SCasey Leedom 		 * message and retain a pointer to the skb so we can free it
1283c6e0d914SCasey Leedom 		 * later when its DMA completes.  (We store the skb pointer
1284c6e0d914SCasey Leedom 		 * in the Software Descriptor corresponding to the last TX
1285c6e0d914SCasey Leedom 		 * Descriptor used by the Work Request.)
1286c6e0d914SCasey Leedom 		 *
1287c6e0d914SCasey Leedom 		 * The retained skb will be freed when the corresponding TX
1288c6e0d914SCasey Leedom 		 * Descriptors are reclaimed after their DMAs complete.
1289c6e0d914SCasey Leedom 		 * However, this could take quite a while since, in general,
1290c6e0d914SCasey Leedom 		 * the hardware is set up to be lazy about sending DMA
1291c6e0d914SCasey Leedom 		 * completion notifications to us and we mostly perform TX
1292c6e0d914SCasey Leedom 		 * reclaims in the transmit routine.
1293c6e0d914SCasey Leedom 		 *
1294c6e0d914SCasey Leedom 		 * This is good for performamce but means that we rely on new
1295c6e0d914SCasey Leedom 		 * TX packets arriving to run the destructors of completed
1296c6e0d914SCasey Leedom 		 * packets, which open up space in their sockets' send queues.
1297c6e0d914SCasey Leedom 		 * Sometimes we do not get such new packets causing TX to
1298c6e0d914SCasey Leedom 		 * stall.  A single UDP transmitter is a good example of this
1299c6e0d914SCasey Leedom 		 * situation.  We have a clean up timer that periodically
1300c6e0d914SCasey Leedom 		 * reclaims completed packets but it doesn't run often enough
1301c6e0d914SCasey Leedom 		 * (nor do we want it to) to prevent lengthy stalls.  A
1302c6e0d914SCasey Leedom 		 * solution to this problem is to run the destructor early,
1303c6e0d914SCasey Leedom 		 * after the packet is queued but before it's DMAd.  A con is
1304c6e0d914SCasey Leedom 		 * that we lie to socket memory accounting, but the amount of
1305c6e0d914SCasey Leedom 		 * extra memory is reasonable (limited by the number of TX
1306c6e0d914SCasey Leedom 		 * descriptors), the packets do actually get freed quickly by
1307c6e0d914SCasey Leedom 		 * new packets almost always, and for protocols like TCP that
1308c6e0d914SCasey Leedom 		 * wait for acks to really free up the data the extra memory
1309c6e0d914SCasey Leedom 		 * is even less.  On the positive side we run the destructors
1310c6e0d914SCasey Leedom 		 * on the sending CPU rather than on a potentially different
131164bb336cSCasey Leedom 		 * completing CPU, usually a good thing.
1312c6e0d914SCasey Leedom 		 *
1313c6e0d914SCasey Leedom 		 * Run the destructor before telling the DMA engine about the
1314c6e0d914SCasey Leedom 		 * packet to make sure it doesn't complete and get freed
1315c6e0d914SCasey Leedom 		 * prematurely.
1316c6e0d914SCasey Leedom 		 */
1317c6e0d914SCasey Leedom 		struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1);
1318c6e0d914SCasey Leedom 		struct sge_txq *tq = &txq->q;
1319c6e0d914SCasey Leedom 		int last_desc;
1320c6e0d914SCasey Leedom 
1321c6e0d914SCasey Leedom 		/*
1322c6e0d914SCasey Leedom 		 * If the Work Request header was an exact multiple of our TX
1323c6e0d914SCasey Leedom 		 * Descriptor length, then it's possible that the starting SGL
1324c6e0d914SCasey Leedom 		 * pointer lines up exactly with the end of our TX Descriptor
1325c6e0d914SCasey Leedom 		 * ring.  If that's the case, wrap around to the beginning
1326c6e0d914SCasey Leedom 		 * here ...
1327c6e0d914SCasey Leedom 		 */
1328c6e0d914SCasey Leedom 		if (unlikely((void *)sgl == (void *)tq->stat)) {
1329c6e0d914SCasey Leedom 			sgl = (void *)tq->desc;
133064699336SJoe Perches 			end = ((void *)tq->desc + ((void *)end - (void *)tq->stat));
1331c6e0d914SCasey Leedom 		}
1332c6e0d914SCasey Leedom 
1333c6e0d914SCasey Leedom 		write_sgl(skb, tq, sgl, end, 0, addr);
1334c6e0d914SCasey Leedom 		skb_orphan(skb);
1335c6e0d914SCasey Leedom 
1336c6e0d914SCasey Leedom 		last_desc = tq->pidx + ndesc - 1;
1337c6e0d914SCasey Leedom 		if (last_desc >= tq->size)
1338c6e0d914SCasey Leedom 			last_desc -= tq->size;
1339c6e0d914SCasey Leedom 		tq->sdesc[last_desc].skb = skb;
1340c6e0d914SCasey Leedom 		tq->sdesc[last_desc].sgl = sgl;
1341c6e0d914SCasey Leedom 	}
1342c6e0d914SCasey Leedom 
1343c6e0d914SCasey Leedom 	/*
1344c6e0d914SCasey Leedom 	 * Advance our internal TX Queue state, tell the hardware about
1345c6e0d914SCasey Leedom 	 * the new TX descriptors and return success.
1346c6e0d914SCasey Leedom 	 */
1347c6e0d914SCasey Leedom 	txq_advance(&txq->q, ndesc);
1348c6e0d914SCasey Leedom 	dev->trans_start = jiffies;
1349c6e0d914SCasey Leedom 	ring_tx_db(adapter, &txq->q, ndesc);
1350c6e0d914SCasey Leedom 	return NETDEV_TX_OK;
1351c6e0d914SCasey Leedom 
1352c6e0d914SCasey Leedom out_free:
1353c6e0d914SCasey Leedom 	/*
1354c6e0d914SCasey Leedom 	 * An error of some sort happened.  Free the TX skb and tell the
1355c6e0d914SCasey Leedom 	 * OS that we've "dealt" with the packet ...
1356c6e0d914SCasey Leedom 	 */
1357c6e0d914SCasey Leedom 	dev_kfree_skb(skb);
1358c6e0d914SCasey Leedom 	return NETDEV_TX_OK;
1359c6e0d914SCasey Leedom }
1360c6e0d914SCasey Leedom 
1361c6e0d914SCasey Leedom /**
1362a0006a86SIan Campbell  *	copy_frags - copy fragments from gather list into skb_shared_info
1363a0006a86SIan Campbell  *	@skb: destination skb
1364a0006a86SIan Campbell  *	@gl: source internal packet gather list
1365a0006a86SIan Campbell  *	@offset: packet start offset in first page
1366a0006a86SIan Campbell  *
1367a0006a86SIan Campbell  *	Copy an internal packet gather list into a Linux skb_shared_info
1368a0006a86SIan Campbell  *	structure.
1369a0006a86SIan Campbell  */
1370a0006a86SIan Campbell static inline void copy_frags(struct sk_buff *skb,
1371a0006a86SIan Campbell 			      const struct pkt_gl *gl,
1372a0006a86SIan Campbell 			      unsigned int offset)
1373a0006a86SIan Campbell {
1374a0006a86SIan Campbell 	int i;
1375a0006a86SIan Campbell 
1376a0006a86SIan Campbell 	/* usually there's just one frag */
1377a0006a86SIan Campbell 	__skb_fill_page_desc(skb, 0, gl->frags[0].page,
1378a0006a86SIan Campbell 			     gl->frags[0].offset + offset,
1379a0006a86SIan Campbell 			     gl->frags[0].size - offset);
1380a0006a86SIan Campbell 	skb_shinfo(skb)->nr_frags = gl->nfrags;
1381a0006a86SIan Campbell 	for (i = 1; i < gl->nfrags; i++)
1382a0006a86SIan Campbell 		__skb_fill_page_desc(skb, i, gl->frags[i].page,
1383a0006a86SIan Campbell 				     gl->frags[i].offset,
1384a0006a86SIan Campbell 				     gl->frags[i].size);
1385a0006a86SIan Campbell 
1386a0006a86SIan Campbell 	/* get a reference to the last page, we don't own it */
1387a0006a86SIan Campbell 	get_page(gl->frags[gl->nfrags - 1].page);
1388a0006a86SIan Campbell }
1389a0006a86SIan Campbell 
1390a0006a86SIan Campbell /**
1391eb6c503dSCasey Leedom  *	t4vf_pktgl_to_skb - build an sk_buff from a packet gather list
1392eb6c503dSCasey Leedom  *	@gl: the gather list
1393eb6c503dSCasey Leedom  *	@skb_len: size of sk_buff main body if it carries fragments
1394eb6c503dSCasey Leedom  *	@pull_len: amount of data to move to the sk_buff's main body
1395eb6c503dSCasey Leedom  *
1396eb6c503dSCasey Leedom  *	Builds an sk_buff from the given packet gather list.  Returns the
1397eb6c503dSCasey Leedom  *	sk_buff or %NULL if sk_buff allocation failed.
1398eb6c503dSCasey Leedom  */
1399eb6c503dSCasey Leedom struct sk_buff *t4vf_pktgl_to_skb(const struct pkt_gl *gl,
1400eb6c503dSCasey Leedom 				  unsigned int skb_len, unsigned int pull_len)
1401eb6c503dSCasey Leedom {
1402eb6c503dSCasey Leedom 	struct sk_buff *skb;
1403eb6c503dSCasey Leedom 
1404eb6c503dSCasey Leedom 	/*
1405eb6c503dSCasey Leedom 	 * If the ingress packet is small enough, allocate an skb large enough
1406eb6c503dSCasey Leedom 	 * for all of the data and copy it inline.  Otherwise, allocate an skb
1407eb6c503dSCasey Leedom 	 * with enough room to pull in the header and reference the rest of
1408eb6c503dSCasey Leedom 	 * the data via the skb fragment list.
1409eb6c503dSCasey Leedom 	 *
1410eb6c503dSCasey Leedom 	 * Below we rely on RX_COPY_THRES being less than the smallest Rx
1411eb6c503dSCasey Leedom 	 * buff!  size, which is expected since buffers are at least
1412eb6c503dSCasey Leedom 	 * PAGE_SIZEd.  In this case packets up to RX_COPY_THRES have only one
1413eb6c503dSCasey Leedom 	 * fragment.
1414eb6c503dSCasey Leedom 	 */
1415eb6c503dSCasey Leedom 	if (gl->tot_len <= RX_COPY_THRES) {
1416eb6c503dSCasey Leedom 		/* small packets have only one fragment */
1417eb6c503dSCasey Leedom 		skb = alloc_skb(gl->tot_len, GFP_ATOMIC);
1418eb6c503dSCasey Leedom 		if (unlikely(!skb))
1419eb6c503dSCasey Leedom 			goto out;
1420eb6c503dSCasey Leedom 		__skb_put(skb, gl->tot_len);
1421eb6c503dSCasey Leedom 		skb_copy_to_linear_data(skb, gl->va, gl->tot_len);
1422eb6c503dSCasey Leedom 	} else {
1423eb6c503dSCasey Leedom 		skb = alloc_skb(skb_len, GFP_ATOMIC);
1424eb6c503dSCasey Leedom 		if (unlikely(!skb))
1425eb6c503dSCasey Leedom 			goto out;
1426eb6c503dSCasey Leedom 		__skb_put(skb, pull_len);
1427eb6c503dSCasey Leedom 		skb_copy_to_linear_data(skb, gl->va, pull_len);
1428eb6c503dSCasey Leedom 
1429a0006a86SIan Campbell 		copy_frags(skb, gl, pull_len);
1430eb6c503dSCasey Leedom 		skb->len = gl->tot_len;
1431eb6c503dSCasey Leedom 		skb->data_len = skb->len - pull_len;
1432eb6c503dSCasey Leedom 		skb->truesize += skb->data_len;
1433eb6c503dSCasey Leedom 	}
1434eb6c503dSCasey Leedom 
1435eb6c503dSCasey Leedom out:
1436eb6c503dSCasey Leedom 	return skb;
1437eb6c503dSCasey Leedom }
1438eb6c503dSCasey Leedom 
1439eb6c503dSCasey Leedom /**
1440c6e0d914SCasey Leedom  *	t4vf_pktgl_free - free a packet gather list
1441c6e0d914SCasey Leedom  *	@gl: the gather list
1442c6e0d914SCasey Leedom  *
1443c6e0d914SCasey Leedom  *	Releases the pages of a packet gather list.  We do not own the last
1444c6e0d914SCasey Leedom  *	page on the list and do not free it.
1445c6e0d914SCasey Leedom  */
1446c6e0d914SCasey Leedom void t4vf_pktgl_free(const struct pkt_gl *gl)
1447c6e0d914SCasey Leedom {
1448c6e0d914SCasey Leedom 	int frag;
1449c6e0d914SCasey Leedom 
1450c6e0d914SCasey Leedom 	frag = gl->nfrags - 1;
1451c6e0d914SCasey Leedom 	while (frag--)
1452c6e0d914SCasey Leedom 		put_page(gl->frags[frag].page);
1453c6e0d914SCasey Leedom }
1454c6e0d914SCasey Leedom 
1455c6e0d914SCasey Leedom /**
1456c6e0d914SCasey Leedom  *	do_gro - perform Generic Receive Offload ingress packet processing
1457c6e0d914SCasey Leedom  *	@rxq: ingress RX Ethernet Queue
1458c6e0d914SCasey Leedom  *	@gl: gather list for ingress packet
1459c6e0d914SCasey Leedom  *	@pkt: CPL header for last packet fragment
1460c6e0d914SCasey Leedom  *
1461c6e0d914SCasey Leedom  *	Perform Generic Receive Offload (GRO) ingress packet processing.
1462c6e0d914SCasey Leedom  *	We use the standard Linux GRO interfaces for this.
1463c6e0d914SCasey Leedom  */
1464c6e0d914SCasey Leedom static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
1465c6e0d914SCasey Leedom 		   const struct cpl_rx_pkt *pkt)
1466c6e0d914SCasey Leedom {
1467c6e0d914SCasey Leedom 	int ret;
1468c6e0d914SCasey Leedom 	struct sk_buff *skb;
1469c6e0d914SCasey Leedom 
1470c6e0d914SCasey Leedom 	skb = napi_get_frags(&rxq->rspq.napi);
1471c6e0d914SCasey Leedom 	if (unlikely(!skb)) {
1472c6e0d914SCasey Leedom 		t4vf_pktgl_free(gl);
1473c6e0d914SCasey Leedom 		rxq->stats.rx_drops++;
1474c6e0d914SCasey Leedom 		return;
1475c6e0d914SCasey Leedom 	}
1476c6e0d914SCasey Leedom 
1477a0006a86SIan Campbell 	copy_frags(skb, gl, PKTSHIFT);
1478c6e0d914SCasey Leedom 	skb->len = gl->tot_len - PKTSHIFT;
1479c6e0d914SCasey Leedom 	skb->data_len = skb->len;
1480c6e0d914SCasey Leedom 	skb->truesize += skb->data_len;
1481c6e0d914SCasey Leedom 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1482c6e0d914SCasey Leedom 	skb_record_rx_queue(skb, rxq->rspq.idx);
1483c6e0d914SCasey Leedom 
1484af32de0eSVipul Pandya 	if (pkt->vlan_ex) {
148587737663SJiri Pirko 		__vlan_hwaccel_put_tag(skb, be16_to_cpu(pkt->vlan));
1486af32de0eSVipul Pandya 		rxq->stats.vlan_ex++;
1487af32de0eSVipul Pandya 	}
1488c6e0d914SCasey Leedom 	ret = napi_gro_frags(&rxq->rspq.napi);
1489c6e0d914SCasey Leedom 
1490c6e0d914SCasey Leedom 	if (ret == GRO_HELD)
1491c6e0d914SCasey Leedom 		rxq->stats.lro_pkts++;
1492c6e0d914SCasey Leedom 	else if (ret == GRO_MERGED || ret == GRO_MERGED_FREE)
1493c6e0d914SCasey Leedom 		rxq->stats.lro_merged++;
1494c6e0d914SCasey Leedom 	rxq->stats.pkts++;
1495c6e0d914SCasey Leedom 	rxq->stats.rx_cso++;
1496c6e0d914SCasey Leedom }
1497c6e0d914SCasey Leedom 
1498c6e0d914SCasey Leedom /**
1499c6e0d914SCasey Leedom  *	t4vf_ethrx_handler - process an ingress ethernet packet
1500c6e0d914SCasey Leedom  *	@rspq: the response queue that received the packet
1501c6e0d914SCasey Leedom  *	@rsp: the response queue descriptor holding the RX_PKT message
1502c6e0d914SCasey Leedom  *	@gl: the gather list of packet fragments
1503c6e0d914SCasey Leedom  *
1504c6e0d914SCasey Leedom  *	Process an ingress ethernet packet and deliver it to the stack.
1505c6e0d914SCasey Leedom  */
1506c6e0d914SCasey Leedom int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
1507c6e0d914SCasey Leedom 		       const struct pkt_gl *gl)
1508c6e0d914SCasey Leedom {
1509c6e0d914SCasey Leedom 	struct sk_buff *skb;
15108b9a4d56SVipul Pandya 	const struct cpl_rx_pkt *pkt = (void *)rsp;
1511c6e0d914SCasey Leedom 	bool csum_ok = pkt->csum_calc && !pkt->err_vec;
1512c6e0d914SCasey Leedom 	struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq);
1513c6e0d914SCasey Leedom 
1514c6e0d914SCasey Leedom 	/*
1515c6e0d914SCasey Leedom 	 * If this is a good TCP packet and we have Generic Receive Offload
1516c6e0d914SCasey Leedom 	 * enabled, handle the packet in the GRO path.
1517c6e0d914SCasey Leedom 	 */
1518c6e0d914SCasey Leedom 	if ((pkt->l2info & cpu_to_be32(RXF_TCP)) &&
1519c6e0d914SCasey Leedom 	    (rspq->netdev->features & NETIF_F_GRO) && csum_ok &&
1520c6e0d914SCasey Leedom 	    !pkt->ip_frag) {
1521c6e0d914SCasey Leedom 		do_gro(rxq, gl, pkt);
1522c6e0d914SCasey Leedom 		return 0;
1523c6e0d914SCasey Leedom 	}
1524c6e0d914SCasey Leedom 
1525c6e0d914SCasey Leedom 	/*
1526eb6c503dSCasey Leedom 	 * Convert the Packet Gather List into an skb.
1527c6e0d914SCasey Leedom 	 */
1528eb6c503dSCasey Leedom 	skb = t4vf_pktgl_to_skb(gl, RX_SKB_LEN, RX_PULL_LEN);
1529eb6c503dSCasey Leedom 	if (unlikely(!skb)) {
1530eb6c503dSCasey Leedom 		t4vf_pktgl_free(gl);
1531eb6c503dSCasey Leedom 		rxq->stats.rx_drops++;
1532eb6c503dSCasey Leedom 		return 0;
1533c6e0d914SCasey Leedom 	}
1534c6e0d914SCasey Leedom 	__skb_pull(skb, PKTSHIFT);
1535c6e0d914SCasey Leedom 	skb->protocol = eth_type_trans(skb, rspq->netdev);
1536c6e0d914SCasey Leedom 	skb_record_rx_queue(skb, rspq->idx);
1537c6e0d914SCasey Leedom 	rxq->stats.pkts++;
1538c6e0d914SCasey Leedom 
15392ed28baaSMichał Mirosław 	if (csum_ok && (rspq->netdev->features & NETIF_F_RXCSUM) &&
15402ed28baaSMichał Mirosław 	    !pkt->err_vec && (be32_to_cpu(pkt->l2info) & (RXF_UDP|RXF_TCP))) {
1541c6e0d914SCasey Leedom 		if (!pkt->ip_frag)
1542c6e0d914SCasey Leedom 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1543c6e0d914SCasey Leedom 		else {
1544c6e0d914SCasey Leedom 			__sum16 c = (__force __sum16)pkt->csum;
1545c6e0d914SCasey Leedom 			skb->csum = csum_unfold(c);
1546c6e0d914SCasey Leedom 			skb->ip_summed = CHECKSUM_COMPLETE;
1547c6e0d914SCasey Leedom 		}
1548c6e0d914SCasey Leedom 		rxq->stats.rx_cso++;
1549c6e0d914SCasey Leedom 	} else
1550bc8acf2cSEric Dumazet 		skb_checksum_none_assert(skb);
1551c6e0d914SCasey Leedom 
155287737663SJiri Pirko 	if (pkt->vlan_ex) {
1553c6e0d914SCasey Leedom 		rxq->stats.vlan_ex++;
155487737663SJiri Pirko 		__vlan_hwaccel_put_tag(skb, be16_to_cpu(pkt->vlan));
155587737663SJiri Pirko 	}
155687737663SJiri Pirko 
1557c6e0d914SCasey Leedom 	netif_receive_skb(skb);
1558c6e0d914SCasey Leedom 
1559c6e0d914SCasey Leedom 	return 0;
1560c6e0d914SCasey Leedom }
1561c6e0d914SCasey Leedom 
1562c6e0d914SCasey Leedom /**
1563c6e0d914SCasey Leedom  *	is_new_response - check if a response is newly written
1564c6e0d914SCasey Leedom  *	@rc: the response control descriptor
1565c6e0d914SCasey Leedom  *	@rspq: the response queue
1566c6e0d914SCasey Leedom  *
1567c6e0d914SCasey Leedom  *	Returns true if a response descriptor contains a yet unprocessed
1568c6e0d914SCasey Leedom  *	response.
1569c6e0d914SCasey Leedom  */
1570c6e0d914SCasey Leedom static inline bool is_new_response(const struct rsp_ctrl *rc,
1571c6e0d914SCasey Leedom 				   const struct sge_rspq *rspq)
1572c6e0d914SCasey Leedom {
1573c6e0d914SCasey Leedom 	return RSPD_GEN(rc->type_gen) == rspq->gen;
1574c6e0d914SCasey Leedom }
1575c6e0d914SCasey Leedom 
1576c6e0d914SCasey Leedom /**
1577c6e0d914SCasey Leedom  *	restore_rx_bufs - put back a packet's RX buffers
1578c6e0d914SCasey Leedom  *	@gl: the packet gather list
1579c6e0d914SCasey Leedom  *	@fl: the SGE Free List
1580c6e0d914SCasey Leedom  *	@nfrags: how many fragments in @si
1581c6e0d914SCasey Leedom  *
1582c6e0d914SCasey Leedom  *	Called when we find out that the current packet, @si, can't be
1583c6e0d914SCasey Leedom  *	processed right away for some reason.  This is a very rare event and
1584c6e0d914SCasey Leedom  *	there's no effort to make this suspension/resumption process
1585c6e0d914SCasey Leedom  *	particularly efficient.
1586c6e0d914SCasey Leedom  *
1587c6e0d914SCasey Leedom  *	We implement the suspension by putting all of the RX buffers associated
1588c6e0d914SCasey Leedom  *	with the current packet back on the original Free List.  The buffers
1589c6e0d914SCasey Leedom  *	have already been unmapped and are left unmapped, we mark them as
1590c6e0d914SCasey Leedom  *	unmapped in order to prevent further unmapping attempts.  (Effectively
1591c6e0d914SCasey Leedom  *	this function undoes the series of @unmap_rx_buf calls which were done
1592c6e0d914SCasey Leedom  *	to create the current packet's gather list.)  This leaves us ready to
1593c6e0d914SCasey Leedom  *	restart processing of the packet the next time we start processing the
1594c6e0d914SCasey Leedom  *	RX Queue ...
1595c6e0d914SCasey Leedom  */
1596c6e0d914SCasey Leedom static void restore_rx_bufs(const struct pkt_gl *gl, struct sge_fl *fl,
1597c6e0d914SCasey Leedom 			    int frags)
1598c6e0d914SCasey Leedom {
1599c6e0d914SCasey Leedom 	struct rx_sw_desc *sdesc;
1600c6e0d914SCasey Leedom 
1601c6e0d914SCasey Leedom 	while (frags--) {
1602c6e0d914SCasey Leedom 		if (fl->cidx == 0)
1603c6e0d914SCasey Leedom 			fl->cidx = fl->size - 1;
1604c6e0d914SCasey Leedom 		else
1605c6e0d914SCasey Leedom 			fl->cidx--;
1606c6e0d914SCasey Leedom 		sdesc = &fl->sdesc[fl->cidx];
1607c6e0d914SCasey Leedom 		sdesc->page = gl->frags[frags].page;
1608c6e0d914SCasey Leedom 		sdesc->dma_addr |= RX_UNMAPPED_BUF;
1609c6e0d914SCasey Leedom 		fl->avail++;
1610c6e0d914SCasey Leedom 	}
1611c6e0d914SCasey Leedom }
1612c6e0d914SCasey Leedom 
1613c6e0d914SCasey Leedom /**
1614c6e0d914SCasey Leedom  *	rspq_next - advance to the next entry in a response queue
1615c6e0d914SCasey Leedom  *	@rspq: the queue
1616c6e0d914SCasey Leedom  *
1617c6e0d914SCasey Leedom  *	Updates the state of a response queue to advance it to the next entry.
1618c6e0d914SCasey Leedom  */
1619c6e0d914SCasey Leedom static inline void rspq_next(struct sge_rspq *rspq)
1620c6e0d914SCasey Leedom {
1621c6e0d914SCasey Leedom 	rspq->cur_desc = (void *)rspq->cur_desc + rspq->iqe_len;
1622c6e0d914SCasey Leedom 	if (unlikely(++rspq->cidx == rspq->size)) {
1623c6e0d914SCasey Leedom 		rspq->cidx = 0;
1624c6e0d914SCasey Leedom 		rspq->gen ^= 1;
1625c6e0d914SCasey Leedom 		rspq->cur_desc = rspq->desc;
1626c6e0d914SCasey Leedom 	}
1627c6e0d914SCasey Leedom }
1628c6e0d914SCasey Leedom 
1629c6e0d914SCasey Leedom /**
1630c6e0d914SCasey Leedom  *	process_responses - process responses from an SGE response queue
1631c6e0d914SCasey Leedom  *	@rspq: the ingress response queue to process
1632c6e0d914SCasey Leedom  *	@budget: how many responses can be processed in this round
1633c6e0d914SCasey Leedom  *
1634c6e0d914SCasey Leedom  *	Process responses from a Scatter Gather Engine response queue up to
1635c6e0d914SCasey Leedom  *	the supplied budget.  Responses include received packets as well as
1636c6e0d914SCasey Leedom  *	control messages from firmware or hardware.
1637c6e0d914SCasey Leedom  *
1638c6e0d914SCasey Leedom  *	Additionally choose the interrupt holdoff time for the next interrupt
1639c6e0d914SCasey Leedom  *	on this queue.  If the system is under memory shortage use a fairly
1640c6e0d914SCasey Leedom  *	long delay to help recovery.
1641c6e0d914SCasey Leedom  */
1642c6e0d914SCasey Leedom int process_responses(struct sge_rspq *rspq, int budget)
1643c6e0d914SCasey Leedom {
1644c6e0d914SCasey Leedom 	struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq);
1645c6e0d914SCasey Leedom 	int budget_left = budget;
1646c6e0d914SCasey Leedom 
1647c6e0d914SCasey Leedom 	while (likely(budget_left)) {
1648c6e0d914SCasey Leedom 		int ret, rsp_type;
1649c6e0d914SCasey Leedom 		const struct rsp_ctrl *rc;
1650c6e0d914SCasey Leedom 
1651c6e0d914SCasey Leedom 		rc = (void *)rspq->cur_desc + (rspq->iqe_len - sizeof(*rc));
1652c6e0d914SCasey Leedom 		if (!is_new_response(rc, rspq))
1653c6e0d914SCasey Leedom 			break;
1654c6e0d914SCasey Leedom 
1655c6e0d914SCasey Leedom 		/*
1656c6e0d914SCasey Leedom 		 * Figure out what kind of response we've received from the
1657c6e0d914SCasey Leedom 		 * SGE.
1658c6e0d914SCasey Leedom 		 */
1659c6e0d914SCasey Leedom 		rmb();
1660c6e0d914SCasey Leedom 		rsp_type = RSPD_TYPE(rc->type_gen);
1661c6e0d914SCasey Leedom 		if (likely(rsp_type == RSP_TYPE_FLBUF)) {
1662a0006a86SIan Campbell 			struct page_frag *fp;
1663c6e0d914SCasey Leedom 			struct pkt_gl gl;
1664c6e0d914SCasey Leedom 			const struct rx_sw_desc *sdesc;
1665c6e0d914SCasey Leedom 			u32 bufsz, frag;
1666c6e0d914SCasey Leedom 			u32 len = be32_to_cpu(rc->pldbuflen_qid);
1667c6e0d914SCasey Leedom 
1668c6e0d914SCasey Leedom 			/*
1669c6e0d914SCasey Leedom 			 * If we get a "new buffer" message from the SGE we
1670c6e0d914SCasey Leedom 			 * need to move on to the next Free List buffer.
1671c6e0d914SCasey Leedom 			 */
1672c6e0d914SCasey Leedom 			if (len & RSPD_NEWBUF) {
1673c6e0d914SCasey Leedom 				/*
1674c6e0d914SCasey Leedom 				 * We get one "new buffer" message when we
1675c6e0d914SCasey Leedom 				 * first start up a queue so we need to ignore
1676c6e0d914SCasey Leedom 				 * it when our offset into the buffer is 0.
1677c6e0d914SCasey Leedom 				 */
1678c6e0d914SCasey Leedom 				if (likely(rspq->offset > 0)) {
1679c6e0d914SCasey Leedom 					free_rx_bufs(rspq->adapter, &rxq->fl,
1680c6e0d914SCasey Leedom 						     1);
1681c6e0d914SCasey Leedom 					rspq->offset = 0;
1682c6e0d914SCasey Leedom 				}
1683c6e0d914SCasey Leedom 				len = RSPD_LEN(len);
1684c6e0d914SCasey Leedom 			}
1685b94e72e2SCasey Leedom 			gl.tot_len = len;
1686c6e0d914SCasey Leedom 
1687c6e0d914SCasey Leedom 			/*
1688c6e0d914SCasey Leedom 			 * Gather packet fragments.
1689c6e0d914SCasey Leedom 			 */
1690c6e0d914SCasey Leedom 			for (frag = 0, fp = gl.frags; /**/; frag++, fp++) {
1691c6e0d914SCasey Leedom 				BUG_ON(frag >= MAX_SKB_FRAGS);
1692c6e0d914SCasey Leedom 				BUG_ON(rxq->fl.avail == 0);
1693c6e0d914SCasey Leedom 				sdesc = &rxq->fl.sdesc[rxq->fl.cidx];
1694c6e0d914SCasey Leedom 				bufsz = get_buf_size(sdesc);
1695c6e0d914SCasey Leedom 				fp->page = sdesc->page;
1696a0006a86SIan Campbell 				fp->offset = rspq->offset;
1697a0006a86SIan Campbell 				fp->size = min(bufsz, len);
1698a0006a86SIan Campbell 				len -= fp->size;
1699c6e0d914SCasey Leedom 				if (!len)
1700c6e0d914SCasey Leedom 					break;
1701c6e0d914SCasey Leedom 				unmap_rx_buf(rspq->adapter, &rxq->fl);
1702c6e0d914SCasey Leedom 			}
1703c6e0d914SCasey Leedom 			gl.nfrags = frag+1;
1704c6e0d914SCasey Leedom 
1705c6e0d914SCasey Leedom 			/*
1706c6e0d914SCasey Leedom 			 * Last buffer remains mapped so explicitly make it
1707c6e0d914SCasey Leedom 			 * coherent for CPU access and start preloading first
1708c6e0d914SCasey Leedom 			 * cache line ...
1709c6e0d914SCasey Leedom 			 */
1710c6e0d914SCasey Leedom 			dma_sync_single_for_cpu(rspq->adapter->pdev_dev,
1711c6e0d914SCasey Leedom 						get_buf_addr(sdesc),
1712a0006a86SIan Campbell 						fp->size, DMA_FROM_DEVICE);
1713c6e0d914SCasey Leedom 			gl.va = (page_address(gl.frags[0].page) +
1714a0006a86SIan Campbell 				 gl.frags[0].offset);
1715c6e0d914SCasey Leedom 			prefetch(gl.va);
1716c6e0d914SCasey Leedom 
1717c6e0d914SCasey Leedom 			/*
1718c6e0d914SCasey Leedom 			 * Hand the new ingress packet to the handler for
1719c6e0d914SCasey Leedom 			 * this Response Queue.
1720c6e0d914SCasey Leedom 			 */
1721c6e0d914SCasey Leedom 			ret = rspq->handler(rspq, rspq->cur_desc, &gl);
1722c6e0d914SCasey Leedom 			if (likely(ret == 0))
1723a0006a86SIan Campbell 				rspq->offset += ALIGN(fp->size, FL_ALIGN);
1724c6e0d914SCasey Leedom 			else
1725c6e0d914SCasey Leedom 				restore_rx_bufs(&gl, &rxq->fl, frag);
1726c6e0d914SCasey Leedom 		} else if (likely(rsp_type == RSP_TYPE_CPL)) {
1727c6e0d914SCasey Leedom 			ret = rspq->handler(rspq, rspq->cur_desc, NULL);
1728c6e0d914SCasey Leedom 		} else {
1729c6e0d914SCasey Leedom 			WARN_ON(rsp_type > RSP_TYPE_CPL);
1730c6e0d914SCasey Leedom 			ret = 0;
1731c6e0d914SCasey Leedom 		}
1732c6e0d914SCasey Leedom 
1733c6e0d914SCasey Leedom 		if (unlikely(ret)) {
1734c6e0d914SCasey Leedom 			/*
1735c6e0d914SCasey Leedom 			 * Couldn't process descriptor, back off for recovery.
1736c6e0d914SCasey Leedom 			 * We use the SGE's last timer which has the longest
1737c6e0d914SCasey Leedom 			 * interrupt coalescing value ...
1738c6e0d914SCasey Leedom 			 */
1739c6e0d914SCasey Leedom 			const int NOMEM_TIMER_IDX = SGE_NTIMERS-1;
1740c6e0d914SCasey Leedom 			rspq->next_intr_params =
1741c6e0d914SCasey Leedom 				QINTR_TIMER_IDX(NOMEM_TIMER_IDX);
1742c6e0d914SCasey Leedom 			break;
1743c6e0d914SCasey Leedom 		}
1744c6e0d914SCasey Leedom 
1745c6e0d914SCasey Leedom 		rspq_next(rspq);
1746c6e0d914SCasey Leedom 		budget_left--;
1747c6e0d914SCasey Leedom 	}
1748c6e0d914SCasey Leedom 
1749c6e0d914SCasey Leedom 	/*
1750c6e0d914SCasey Leedom 	 * If this is a Response Queue with an associated Free List and
1751c6e0d914SCasey Leedom 	 * at least two Egress Queue units available in the Free List
1752c6e0d914SCasey Leedom 	 * for new buffer pointers, refill the Free List.
1753c6e0d914SCasey Leedom 	 */
1754c6e0d914SCasey Leedom 	if (rspq->offset >= 0 &&
1755c6e0d914SCasey Leedom 	    rxq->fl.size - rxq->fl.avail >= 2*FL_PER_EQ_UNIT)
1756c6e0d914SCasey Leedom 		__refill_fl(rspq->adapter, &rxq->fl);
1757c6e0d914SCasey Leedom 	return budget - budget_left;
1758c6e0d914SCasey Leedom }
1759c6e0d914SCasey Leedom 
1760c6e0d914SCasey Leedom /**
1761c6e0d914SCasey Leedom  *	napi_rx_handler - the NAPI handler for RX processing
1762c6e0d914SCasey Leedom  *	@napi: the napi instance
1763c6e0d914SCasey Leedom  *	@budget: how many packets we can process in this round
1764c6e0d914SCasey Leedom  *
1765c6e0d914SCasey Leedom  *	Handler for new data events when using NAPI.  This does not need any
1766c6e0d914SCasey Leedom  *	locking or protection from interrupts as data interrupts are off at
1767c6e0d914SCasey Leedom  *	this point and other adapter interrupts do not interfere (the latter
1768c6e0d914SCasey Leedom  *	in not a concern at all with MSI-X as non-data interrupts then have
1769c6e0d914SCasey Leedom  *	a separate handler).
1770c6e0d914SCasey Leedom  */
1771c6e0d914SCasey Leedom static int napi_rx_handler(struct napi_struct *napi, int budget)
1772c6e0d914SCasey Leedom {
1773c6e0d914SCasey Leedom 	unsigned int intr_params;
1774c6e0d914SCasey Leedom 	struct sge_rspq *rspq = container_of(napi, struct sge_rspq, napi);
1775c6e0d914SCasey Leedom 	int work_done = process_responses(rspq, budget);
1776c6e0d914SCasey Leedom 
1777c6e0d914SCasey Leedom 	if (likely(work_done < budget)) {
1778c6e0d914SCasey Leedom 		napi_complete(napi);
1779c6e0d914SCasey Leedom 		intr_params = rspq->next_intr_params;
1780c6e0d914SCasey Leedom 		rspq->next_intr_params = rspq->intr_params;
1781c6e0d914SCasey Leedom 	} else
1782c6e0d914SCasey Leedom 		intr_params = QINTR_TIMER_IDX(SGE_TIMER_UPD_CIDX);
1783c6e0d914SCasey Leedom 
178468dc9d36SCasey Leedom 	if (unlikely(work_done == 0))
178568dc9d36SCasey Leedom 		rspq->unhandled_irqs++;
178668dc9d36SCasey Leedom 
1787c6e0d914SCasey Leedom 	t4_write_reg(rspq->adapter,
1788c6e0d914SCasey Leedom 		     T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
1789c6e0d914SCasey Leedom 		     CIDXINC(work_done) |
1790c6e0d914SCasey Leedom 		     INGRESSQID((u32)rspq->cntxt_id) |
1791c6e0d914SCasey Leedom 		     SEINTARM(intr_params));
1792c6e0d914SCasey Leedom 	return work_done;
1793c6e0d914SCasey Leedom }
1794c6e0d914SCasey Leedom 
1795c6e0d914SCasey Leedom /*
1796c6e0d914SCasey Leedom  * The MSI-X interrupt handler for an SGE response queue for the NAPI case
1797c6e0d914SCasey Leedom  * (i.e., response queue serviced by NAPI polling).
1798c6e0d914SCasey Leedom  */
1799c6e0d914SCasey Leedom irqreturn_t t4vf_sge_intr_msix(int irq, void *cookie)
1800c6e0d914SCasey Leedom {
1801c6e0d914SCasey Leedom 	struct sge_rspq *rspq = cookie;
1802c6e0d914SCasey Leedom 
1803c6e0d914SCasey Leedom 	napi_schedule(&rspq->napi);
1804c6e0d914SCasey Leedom 	return IRQ_HANDLED;
1805c6e0d914SCasey Leedom }
1806c6e0d914SCasey Leedom 
1807c6e0d914SCasey Leedom /*
1808c6e0d914SCasey Leedom  * Process the indirect interrupt entries in the interrupt queue and kick off
1809c6e0d914SCasey Leedom  * NAPI for each queue that has generated an entry.
1810c6e0d914SCasey Leedom  */
1811c6e0d914SCasey Leedom static unsigned int process_intrq(struct adapter *adapter)
1812c6e0d914SCasey Leedom {
1813c6e0d914SCasey Leedom 	struct sge *s = &adapter->sge;
1814c6e0d914SCasey Leedom 	struct sge_rspq *intrq = &s->intrq;
1815c6e0d914SCasey Leedom 	unsigned int work_done;
1816c6e0d914SCasey Leedom 
1817c6e0d914SCasey Leedom 	spin_lock(&adapter->sge.intrq_lock);
1818c6e0d914SCasey Leedom 	for (work_done = 0; ; work_done++) {
1819c6e0d914SCasey Leedom 		const struct rsp_ctrl *rc;
1820c6e0d914SCasey Leedom 		unsigned int qid, iq_idx;
1821c6e0d914SCasey Leedom 		struct sge_rspq *rspq;
1822c6e0d914SCasey Leedom 
1823c6e0d914SCasey Leedom 		/*
1824c6e0d914SCasey Leedom 		 * Grab the next response from the interrupt queue and bail
1825c6e0d914SCasey Leedom 		 * out if it's not a new response.
1826c6e0d914SCasey Leedom 		 */
1827c6e0d914SCasey Leedom 		rc = (void *)intrq->cur_desc + (intrq->iqe_len - sizeof(*rc));
1828c6e0d914SCasey Leedom 		if (!is_new_response(rc, intrq))
1829c6e0d914SCasey Leedom 			break;
1830c6e0d914SCasey Leedom 
1831c6e0d914SCasey Leedom 		/*
1832c6e0d914SCasey Leedom 		 * If the response isn't a forwarded interrupt message issue a
1833c6e0d914SCasey Leedom 		 * error and go on to the next response message.  This should
1834c6e0d914SCasey Leedom 		 * never happen ...
1835c6e0d914SCasey Leedom 		 */
1836c6e0d914SCasey Leedom 		rmb();
1837c6e0d914SCasey Leedom 		if (unlikely(RSPD_TYPE(rc->type_gen) != RSP_TYPE_INTR)) {
1838c6e0d914SCasey Leedom 			dev_err(adapter->pdev_dev,
1839c6e0d914SCasey Leedom 				"Unexpected INTRQ response type %d\n",
1840c6e0d914SCasey Leedom 				RSPD_TYPE(rc->type_gen));
1841c6e0d914SCasey Leedom 			continue;
1842c6e0d914SCasey Leedom 		}
1843c6e0d914SCasey Leedom 
1844c6e0d914SCasey Leedom 		/*
1845c6e0d914SCasey Leedom 		 * Extract the Queue ID from the interrupt message and perform
1846c6e0d914SCasey Leedom 		 * sanity checking to make sure it really refers to one of our
1847c6e0d914SCasey Leedom 		 * Ingress Queues which is active and matches the queue's ID.
1848c6e0d914SCasey Leedom 		 * None of these error conditions should ever happen so we may
1849c6e0d914SCasey Leedom 		 * want to either make them fatal and/or conditionalized under
1850c6e0d914SCasey Leedom 		 * DEBUG.
1851c6e0d914SCasey Leedom 		 */
1852c6e0d914SCasey Leedom 		qid = RSPD_QID(be32_to_cpu(rc->pldbuflen_qid));
1853c6e0d914SCasey Leedom 		iq_idx = IQ_IDX(s, qid);
1854c6e0d914SCasey Leedom 		if (unlikely(iq_idx >= MAX_INGQ)) {
1855c6e0d914SCasey Leedom 			dev_err(adapter->pdev_dev,
1856c6e0d914SCasey Leedom 				"Ingress QID %d out of range\n", qid);
1857c6e0d914SCasey Leedom 			continue;
1858c6e0d914SCasey Leedom 		}
1859c6e0d914SCasey Leedom 		rspq = s->ingr_map[iq_idx];
1860c6e0d914SCasey Leedom 		if (unlikely(rspq == NULL)) {
1861c6e0d914SCasey Leedom 			dev_err(adapter->pdev_dev,
1862c6e0d914SCasey Leedom 				"Ingress QID %d RSPQ=NULL\n", qid);
1863c6e0d914SCasey Leedom 			continue;
1864c6e0d914SCasey Leedom 		}
1865c6e0d914SCasey Leedom 		if (unlikely(rspq->abs_id != qid)) {
1866c6e0d914SCasey Leedom 			dev_err(adapter->pdev_dev,
1867c6e0d914SCasey Leedom 				"Ingress QID %d refers to RSPQ %d\n",
1868c6e0d914SCasey Leedom 				qid, rspq->abs_id);
1869c6e0d914SCasey Leedom 			continue;
1870c6e0d914SCasey Leedom 		}
1871c6e0d914SCasey Leedom 
1872c6e0d914SCasey Leedom 		/*
1873c6e0d914SCasey Leedom 		 * Schedule NAPI processing on the indicated Response Queue
1874c6e0d914SCasey Leedom 		 * and move on to the next entry in the Forwarded Interrupt
1875c6e0d914SCasey Leedom 		 * Queue.
1876c6e0d914SCasey Leedom 		 */
1877c6e0d914SCasey Leedom 		napi_schedule(&rspq->napi);
1878c6e0d914SCasey Leedom 		rspq_next(intrq);
1879c6e0d914SCasey Leedom 	}
1880c6e0d914SCasey Leedom 
1881c6e0d914SCasey Leedom 	t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
1882c6e0d914SCasey Leedom 		     CIDXINC(work_done) |
1883c6e0d914SCasey Leedom 		     INGRESSQID(intrq->cntxt_id) |
1884c6e0d914SCasey Leedom 		     SEINTARM(intrq->intr_params));
1885c6e0d914SCasey Leedom 
1886c6e0d914SCasey Leedom 	spin_unlock(&adapter->sge.intrq_lock);
1887c6e0d914SCasey Leedom 
1888c6e0d914SCasey Leedom 	return work_done;
1889c6e0d914SCasey Leedom }
1890c6e0d914SCasey Leedom 
1891c6e0d914SCasey Leedom /*
1892c6e0d914SCasey Leedom  * The MSI interrupt handler handles data events from SGE response queues as
1893c6e0d914SCasey Leedom  * well as error and other async events as they all use the same MSI vector.
1894c6e0d914SCasey Leedom  */
1895c6e0d914SCasey Leedom irqreturn_t t4vf_intr_msi(int irq, void *cookie)
1896c6e0d914SCasey Leedom {
1897c6e0d914SCasey Leedom 	struct adapter *adapter = cookie;
1898c6e0d914SCasey Leedom 
1899c6e0d914SCasey Leedom 	process_intrq(adapter);
1900c6e0d914SCasey Leedom 	return IRQ_HANDLED;
1901c6e0d914SCasey Leedom }
1902c6e0d914SCasey Leedom 
1903c6e0d914SCasey Leedom /**
1904c6e0d914SCasey Leedom  *	t4vf_intr_handler - select the top-level interrupt handler
1905c6e0d914SCasey Leedom  *	@adapter: the adapter
1906c6e0d914SCasey Leedom  *
1907c6e0d914SCasey Leedom  *	Selects the top-level interrupt handler based on the type of interrupts
1908c6e0d914SCasey Leedom  *	(MSI-X or MSI).
1909c6e0d914SCasey Leedom  */
1910c6e0d914SCasey Leedom irq_handler_t t4vf_intr_handler(struct adapter *adapter)
1911c6e0d914SCasey Leedom {
1912c6e0d914SCasey Leedom 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
1913c6e0d914SCasey Leedom 	if (adapter->flags & USING_MSIX)
1914c6e0d914SCasey Leedom 		return t4vf_sge_intr_msix;
1915c6e0d914SCasey Leedom 	else
1916c6e0d914SCasey Leedom 		return t4vf_intr_msi;
1917c6e0d914SCasey Leedom }
1918c6e0d914SCasey Leedom 
1919c6e0d914SCasey Leedom /**
1920c6e0d914SCasey Leedom  *	sge_rx_timer_cb - perform periodic maintenance of SGE RX queues
1921c6e0d914SCasey Leedom  *	@data: the adapter
1922c6e0d914SCasey Leedom  *
1923c6e0d914SCasey Leedom  *	Runs periodically from a timer to perform maintenance of SGE RX queues.
1924c6e0d914SCasey Leedom  *
1925c6e0d914SCasey Leedom  *	a) Replenishes RX queues that have run out due to memory shortage.
1926c6e0d914SCasey Leedom  *	Normally new RX buffers are added when existing ones are consumed but
1927c6e0d914SCasey Leedom  *	when out of memory a queue can become empty.  We schedule NAPI to do
1928c6e0d914SCasey Leedom  *	the actual refill.
1929c6e0d914SCasey Leedom  */
1930c6e0d914SCasey Leedom static void sge_rx_timer_cb(unsigned long data)
1931c6e0d914SCasey Leedom {
1932c6e0d914SCasey Leedom 	struct adapter *adapter = (struct adapter *)data;
1933c6e0d914SCasey Leedom 	struct sge *s = &adapter->sge;
1934c6e0d914SCasey Leedom 	unsigned int i;
1935c6e0d914SCasey Leedom 
1936c6e0d914SCasey Leedom 	/*
1937c6e0d914SCasey Leedom 	 * Scan the "Starving Free Lists" flag array looking for any Free
1938c6e0d914SCasey Leedom 	 * Lists in need of more free buffers.  If we find one and it's not
1939c6e0d914SCasey Leedom 	 * being actively polled, then bump its "starving" counter and attempt
1940c6e0d914SCasey Leedom 	 * to refill it.  If we're successful in adding enough buffers to push
1941c6e0d914SCasey Leedom 	 * the Free List over the starving threshold, then we can clear its
1942c6e0d914SCasey Leedom 	 * "starving" status.
1943c6e0d914SCasey Leedom 	 */
1944c6e0d914SCasey Leedom 	for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++) {
1945c6e0d914SCasey Leedom 		unsigned long m;
1946c6e0d914SCasey Leedom 
1947c6e0d914SCasey Leedom 		for (m = s->starving_fl[i]; m; m &= m - 1) {
1948c6e0d914SCasey Leedom 			unsigned int id = __ffs(m) + i * BITS_PER_LONG;
1949c6e0d914SCasey Leedom 			struct sge_fl *fl = s->egr_map[id];
1950c6e0d914SCasey Leedom 
1951c6e0d914SCasey Leedom 			clear_bit(id, s->starving_fl);
1952c6e0d914SCasey Leedom 			smp_mb__after_clear_bit();
1953c6e0d914SCasey Leedom 
1954c6e0d914SCasey Leedom 			/*
1955c6e0d914SCasey Leedom 			 * Since we are accessing fl without a lock there's a
1956c6e0d914SCasey Leedom 			 * small probability of a false positive where we
1957c6e0d914SCasey Leedom 			 * schedule napi but the FL is no longer starving.
1958c6e0d914SCasey Leedom 			 * No biggie.
1959c6e0d914SCasey Leedom 			 */
1960c6e0d914SCasey Leedom 			if (fl_starving(fl)) {
1961c6e0d914SCasey Leedom 				struct sge_eth_rxq *rxq;
1962c6e0d914SCasey Leedom 
1963c6e0d914SCasey Leedom 				rxq = container_of(fl, struct sge_eth_rxq, fl);
1964c6e0d914SCasey Leedom 				if (napi_reschedule(&rxq->rspq.napi))
1965c6e0d914SCasey Leedom 					fl->starving++;
1966c6e0d914SCasey Leedom 				else
1967c6e0d914SCasey Leedom 					set_bit(id, s->starving_fl);
1968c6e0d914SCasey Leedom 			}
1969c6e0d914SCasey Leedom 		}
1970c6e0d914SCasey Leedom 	}
1971c6e0d914SCasey Leedom 
1972c6e0d914SCasey Leedom 	/*
1973c6e0d914SCasey Leedom 	 * Reschedule the next scan for starving Free Lists ...
1974c6e0d914SCasey Leedom 	 */
1975c6e0d914SCasey Leedom 	mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
1976c6e0d914SCasey Leedom }
1977c6e0d914SCasey Leedom 
1978c6e0d914SCasey Leedom /**
1979c6e0d914SCasey Leedom  *	sge_tx_timer_cb - perform periodic maintenance of SGE Tx queues
1980c6e0d914SCasey Leedom  *	@data: the adapter
1981c6e0d914SCasey Leedom  *
1982c6e0d914SCasey Leedom  *	Runs periodically from a timer to perform maintenance of SGE TX queues.
1983c6e0d914SCasey Leedom  *
1984c6e0d914SCasey Leedom  *	b) Reclaims completed Tx packets for the Ethernet queues.  Normally
1985c6e0d914SCasey Leedom  *	packets are cleaned up by new Tx packets, this timer cleans up packets
1986c6e0d914SCasey Leedom  *	when no new packets are being submitted.  This is essential for pktgen,
1987c6e0d914SCasey Leedom  *	at least.
1988c6e0d914SCasey Leedom  */
1989c6e0d914SCasey Leedom static void sge_tx_timer_cb(unsigned long data)
1990c6e0d914SCasey Leedom {
1991c6e0d914SCasey Leedom 	struct adapter *adapter = (struct adapter *)data;
1992c6e0d914SCasey Leedom 	struct sge *s = &adapter->sge;
1993c6e0d914SCasey Leedom 	unsigned int i, budget;
1994c6e0d914SCasey Leedom 
1995c6e0d914SCasey Leedom 	budget = MAX_TIMER_TX_RECLAIM;
1996c6e0d914SCasey Leedom 	i = s->ethtxq_rover;
1997c6e0d914SCasey Leedom 	do {
1998c6e0d914SCasey Leedom 		struct sge_eth_txq *txq = &s->ethtxq[i];
1999c6e0d914SCasey Leedom 
2000c6e0d914SCasey Leedom 		if (reclaimable(&txq->q) && __netif_tx_trylock(txq->txq)) {
2001c6e0d914SCasey Leedom 			int avail = reclaimable(&txq->q);
2002c6e0d914SCasey Leedom 
2003c6e0d914SCasey Leedom 			if (avail > budget)
2004c6e0d914SCasey Leedom 				avail = budget;
2005c6e0d914SCasey Leedom 
2006c6e0d914SCasey Leedom 			free_tx_desc(adapter, &txq->q, avail, true);
2007c6e0d914SCasey Leedom 			txq->q.in_use -= avail;
2008c6e0d914SCasey Leedom 			__netif_tx_unlock(txq->txq);
2009c6e0d914SCasey Leedom 
2010c6e0d914SCasey Leedom 			budget -= avail;
2011c6e0d914SCasey Leedom 			if (!budget)
2012c6e0d914SCasey Leedom 				break;
2013c6e0d914SCasey Leedom 		}
2014c6e0d914SCasey Leedom 
2015c6e0d914SCasey Leedom 		i++;
2016c6e0d914SCasey Leedom 		if (i >= s->ethqsets)
2017c6e0d914SCasey Leedom 			i = 0;
2018c6e0d914SCasey Leedom 	} while (i != s->ethtxq_rover);
2019c6e0d914SCasey Leedom 	s->ethtxq_rover = i;
2020c6e0d914SCasey Leedom 
2021c6e0d914SCasey Leedom 	/*
2022c6e0d914SCasey Leedom 	 * If we found too many reclaimable packets schedule a timer in the
2023c6e0d914SCasey Leedom 	 * near future to continue where we left off.  Otherwise the next timer
2024c6e0d914SCasey Leedom 	 * will be at its normal interval.
2025c6e0d914SCasey Leedom 	 */
2026c6e0d914SCasey Leedom 	mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
2027c6e0d914SCasey Leedom }
2028c6e0d914SCasey Leedom 
2029c6e0d914SCasey Leedom /**
2030c6e0d914SCasey Leedom  *	t4vf_sge_alloc_rxq - allocate an SGE RX Queue
2031c6e0d914SCasey Leedom  *	@adapter: the adapter
2032c6e0d914SCasey Leedom  *	@rspq: pointer to to the new rxq's Response Queue to be filled in
2033c6e0d914SCasey Leedom  *	@iqasynch: if 0, a normal rspq; if 1, an asynchronous event queue
2034c6e0d914SCasey Leedom  *	@dev: the network device associated with the new rspq
2035c6e0d914SCasey Leedom  *	@intr_dest: MSI-X vector index (overriden in MSI mode)
2036c6e0d914SCasey Leedom  *	@fl: pointer to the new rxq's Free List to be filled in
2037c6e0d914SCasey Leedom  *	@hnd: the interrupt handler to invoke for the rspq
2038c6e0d914SCasey Leedom  */
2039c6e0d914SCasey Leedom int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
2040c6e0d914SCasey Leedom 		       bool iqasynch, struct net_device *dev,
2041c6e0d914SCasey Leedom 		       int intr_dest,
2042c6e0d914SCasey Leedom 		       struct sge_fl *fl, rspq_handler_t hnd)
2043c6e0d914SCasey Leedom {
2044c6e0d914SCasey Leedom 	struct port_info *pi = netdev_priv(dev);
2045c6e0d914SCasey Leedom 	struct fw_iq_cmd cmd, rpl;
2046c6e0d914SCasey Leedom 	int ret, iqandst, flsz = 0;
2047c6e0d914SCasey Leedom 
2048c6e0d914SCasey Leedom 	/*
2049c6e0d914SCasey Leedom 	 * If we're using MSI interrupts and we're not initializing the
2050c6e0d914SCasey Leedom 	 * Forwarded Interrupt Queue itself, then set up this queue for
2051c6e0d914SCasey Leedom 	 * indirect interrupts to the Forwarded Interrupt Queue.  Obviously
2052c6e0d914SCasey Leedom 	 * the Forwarded Interrupt Queue must be set up before any other
2053c6e0d914SCasey Leedom 	 * ingress queue ...
2054c6e0d914SCasey Leedom 	 */
2055c6e0d914SCasey Leedom 	if ((adapter->flags & USING_MSI) && rspq != &adapter->sge.intrq) {
2056c6e0d914SCasey Leedom 		iqandst = SGE_INTRDST_IQ;
2057c6e0d914SCasey Leedom 		intr_dest = adapter->sge.intrq.abs_id;
2058c6e0d914SCasey Leedom 	} else
2059c6e0d914SCasey Leedom 		iqandst = SGE_INTRDST_PCI;
2060c6e0d914SCasey Leedom 
2061c6e0d914SCasey Leedom 	/*
2062c6e0d914SCasey Leedom 	 * Allocate the hardware ring for the Response Queue.  The size needs
2063c6e0d914SCasey Leedom 	 * to be a multiple of 16 which includes the mandatory status entry
2064c6e0d914SCasey Leedom 	 * (regardless of whether the Status Page capabilities are enabled or
2065c6e0d914SCasey Leedom 	 * not).
2066c6e0d914SCasey Leedom 	 */
2067c6e0d914SCasey Leedom 	rspq->size = roundup(rspq->size, 16);
2068c6e0d914SCasey Leedom 	rspq->desc = alloc_ring(adapter->pdev_dev, rspq->size, rspq->iqe_len,
2069c6e0d914SCasey Leedom 				0, &rspq->phys_addr, NULL, 0);
2070c6e0d914SCasey Leedom 	if (!rspq->desc)
2071c6e0d914SCasey Leedom 		return -ENOMEM;
2072c6e0d914SCasey Leedom 
2073c6e0d914SCasey Leedom 	/*
2074c6e0d914SCasey Leedom 	 * Fill in the Ingress Queue Command.  Note: Ideally this code would
2075c6e0d914SCasey Leedom 	 * be in t4vf_hw.c but there are so many parameters and dependencies
2076c6e0d914SCasey Leedom 	 * on our Linux SGE state that we would end up having to pass tons of
2077c6e0d914SCasey Leedom 	 * parameters.  We'll have to think about how this might be migrated
2078c6e0d914SCasey Leedom 	 * into OS-independent common code ...
2079c6e0d914SCasey Leedom 	 */
2080c6e0d914SCasey Leedom 	memset(&cmd, 0, sizeof(cmd));
2081c6e0d914SCasey Leedom 	cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_IQ_CMD) |
2082c6e0d914SCasey Leedom 				    FW_CMD_REQUEST |
2083c6e0d914SCasey Leedom 				    FW_CMD_WRITE |
2084c6e0d914SCasey Leedom 				    FW_CMD_EXEC);
2085c6e0d914SCasey Leedom 	cmd.alloc_to_len16 = cpu_to_be32(FW_IQ_CMD_ALLOC |
2086c6e0d914SCasey Leedom 					 FW_IQ_CMD_IQSTART(1) |
2087c6e0d914SCasey Leedom 					 FW_LEN16(cmd));
2088c6e0d914SCasey Leedom 	cmd.type_to_iqandstindex =
2089c6e0d914SCasey Leedom 		cpu_to_be32(FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
2090c6e0d914SCasey Leedom 			    FW_IQ_CMD_IQASYNCH(iqasynch) |
2091c6e0d914SCasey Leedom 			    FW_IQ_CMD_VIID(pi->viid) |
2092c6e0d914SCasey Leedom 			    FW_IQ_CMD_IQANDST(iqandst) |
2093c6e0d914SCasey Leedom 			    FW_IQ_CMD_IQANUS(1) |
2094c6e0d914SCasey Leedom 			    FW_IQ_CMD_IQANUD(SGE_UPDATEDEL_INTR) |
2095c6e0d914SCasey Leedom 			    FW_IQ_CMD_IQANDSTINDEX(intr_dest));
2096c6e0d914SCasey Leedom 	cmd.iqdroprss_to_iqesize =
2097c6e0d914SCasey Leedom 		cpu_to_be16(FW_IQ_CMD_IQPCIECH(pi->port_id) |
2098c6e0d914SCasey Leedom 			    FW_IQ_CMD_IQGTSMODE |
2099c6e0d914SCasey Leedom 			    FW_IQ_CMD_IQINTCNTTHRESH(rspq->pktcnt_idx) |
2100c6e0d914SCasey Leedom 			    FW_IQ_CMD_IQESIZE(ilog2(rspq->iqe_len) - 4));
2101c6e0d914SCasey Leedom 	cmd.iqsize = cpu_to_be16(rspq->size);
2102c6e0d914SCasey Leedom 	cmd.iqaddr = cpu_to_be64(rspq->phys_addr);
2103c6e0d914SCasey Leedom 
2104c6e0d914SCasey Leedom 	if (fl) {
2105c6e0d914SCasey Leedom 		/*
2106c6e0d914SCasey Leedom 		 * Allocate the ring for the hardware free list (with space
2107c6e0d914SCasey Leedom 		 * for its status page) along with the associated software
2108c6e0d914SCasey Leedom 		 * descriptor ring.  The free list size needs to be a multiple
2109c6e0d914SCasey Leedom 		 * of the Egress Queue Unit.
2110c6e0d914SCasey Leedom 		 */
2111c6e0d914SCasey Leedom 		fl->size = roundup(fl->size, FL_PER_EQ_UNIT);
2112c6e0d914SCasey Leedom 		fl->desc = alloc_ring(adapter->pdev_dev, fl->size,
2113c6e0d914SCasey Leedom 				      sizeof(__be64), sizeof(struct rx_sw_desc),
2114c6e0d914SCasey Leedom 				      &fl->addr, &fl->sdesc, STAT_LEN);
2115c6e0d914SCasey Leedom 		if (!fl->desc) {
2116c6e0d914SCasey Leedom 			ret = -ENOMEM;
2117c6e0d914SCasey Leedom 			goto err;
2118c6e0d914SCasey Leedom 		}
2119c6e0d914SCasey Leedom 
2120c6e0d914SCasey Leedom 		/*
2121c6e0d914SCasey Leedom 		 * Calculate the size of the hardware free list ring plus
2122caedda35SCasey Leedom 		 * Status Page (which the SGE will place after the end of the
2123c6e0d914SCasey Leedom 		 * free list ring) in Egress Queue Units.
2124c6e0d914SCasey Leedom 		 */
2125c6e0d914SCasey Leedom 		flsz = (fl->size / FL_PER_EQ_UNIT +
2126c6e0d914SCasey Leedom 			STAT_LEN / EQ_UNIT);
2127c6e0d914SCasey Leedom 
2128c6e0d914SCasey Leedom 		/*
2129c6e0d914SCasey Leedom 		 * Fill in all the relevant firmware Ingress Queue Command
2130c6e0d914SCasey Leedom 		 * fields for the free list.
2131c6e0d914SCasey Leedom 		 */
2132c6e0d914SCasey Leedom 		cmd.iqns_to_fl0congen =
2133c6e0d914SCasey Leedom 			cpu_to_be32(
2134c6e0d914SCasey Leedom 				FW_IQ_CMD_FL0HOSTFCMODE(SGE_HOSTFCMODE_NONE) |
2135ce91a923SNaresh Kumar Inna 				FW_IQ_CMD_FL0PACKEN(1) |
2136ce91a923SNaresh Kumar Inna 				FW_IQ_CMD_FL0PADEN(1));
2137c6e0d914SCasey Leedom 		cmd.fl0dcaen_to_fl0cidxfthresh =
2138c6e0d914SCasey Leedom 			cpu_to_be16(
2139c6e0d914SCasey Leedom 				FW_IQ_CMD_FL0FBMIN(SGE_FETCHBURSTMIN_64B) |
2140c6e0d914SCasey Leedom 				FW_IQ_CMD_FL0FBMAX(SGE_FETCHBURSTMAX_512B));
2141c6e0d914SCasey Leedom 		cmd.fl0size = cpu_to_be16(flsz);
2142c6e0d914SCasey Leedom 		cmd.fl0addr = cpu_to_be64(fl->addr);
2143c6e0d914SCasey Leedom 	}
2144c6e0d914SCasey Leedom 
2145c6e0d914SCasey Leedom 	/*
2146c6e0d914SCasey Leedom 	 * Issue the firmware Ingress Queue Command and extract the results if
2147c6e0d914SCasey Leedom 	 * it completes successfully.
2148c6e0d914SCasey Leedom 	 */
2149c6e0d914SCasey Leedom 	ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
2150c6e0d914SCasey Leedom 	if (ret)
2151c6e0d914SCasey Leedom 		goto err;
2152c6e0d914SCasey Leedom 
2153c6e0d914SCasey Leedom 	netif_napi_add(dev, &rspq->napi, napi_rx_handler, 64);
2154c6e0d914SCasey Leedom 	rspq->cur_desc = rspq->desc;
2155c6e0d914SCasey Leedom 	rspq->cidx = 0;
2156c6e0d914SCasey Leedom 	rspq->gen = 1;
2157c6e0d914SCasey Leedom 	rspq->next_intr_params = rspq->intr_params;
2158c6e0d914SCasey Leedom 	rspq->cntxt_id = be16_to_cpu(rpl.iqid);
2159c6e0d914SCasey Leedom 	rspq->abs_id = be16_to_cpu(rpl.physiqid);
2160c6e0d914SCasey Leedom 	rspq->size--;			/* subtract status entry */
2161c6e0d914SCasey Leedom 	rspq->adapter = adapter;
2162c6e0d914SCasey Leedom 	rspq->netdev = dev;
2163c6e0d914SCasey Leedom 	rspq->handler = hnd;
2164c6e0d914SCasey Leedom 
2165c6e0d914SCasey Leedom 	/* set offset to -1 to distinguish ingress queues without FL */
2166c6e0d914SCasey Leedom 	rspq->offset = fl ? 0 : -1;
2167c6e0d914SCasey Leedom 
2168c6e0d914SCasey Leedom 	if (fl) {
2169c6e0d914SCasey Leedom 		fl->cntxt_id = be16_to_cpu(rpl.fl0id);
2170c6e0d914SCasey Leedom 		fl->avail = 0;
2171c6e0d914SCasey Leedom 		fl->pend_cred = 0;
2172c6e0d914SCasey Leedom 		fl->pidx = 0;
2173c6e0d914SCasey Leedom 		fl->cidx = 0;
2174c6e0d914SCasey Leedom 		fl->alloc_failed = 0;
2175c6e0d914SCasey Leedom 		fl->large_alloc_failed = 0;
2176c6e0d914SCasey Leedom 		fl->starving = 0;
2177c6e0d914SCasey Leedom 		refill_fl(adapter, fl, fl_cap(fl), GFP_KERNEL);
2178c6e0d914SCasey Leedom 	}
2179c6e0d914SCasey Leedom 
2180c6e0d914SCasey Leedom 	return 0;
2181c6e0d914SCasey Leedom 
2182c6e0d914SCasey Leedom err:
2183c6e0d914SCasey Leedom 	/*
2184c6e0d914SCasey Leedom 	 * An error occurred.  Clean up our partial allocation state and
2185c6e0d914SCasey Leedom 	 * return the error.
2186c6e0d914SCasey Leedom 	 */
2187c6e0d914SCasey Leedom 	if (rspq->desc) {
2188c6e0d914SCasey Leedom 		dma_free_coherent(adapter->pdev_dev, rspq->size * rspq->iqe_len,
2189c6e0d914SCasey Leedom 				  rspq->desc, rspq->phys_addr);
2190c6e0d914SCasey Leedom 		rspq->desc = NULL;
2191c6e0d914SCasey Leedom 	}
2192c6e0d914SCasey Leedom 	if (fl && fl->desc) {
2193c6e0d914SCasey Leedom 		kfree(fl->sdesc);
2194c6e0d914SCasey Leedom 		fl->sdesc = NULL;
2195c6e0d914SCasey Leedom 		dma_free_coherent(adapter->pdev_dev, flsz * EQ_UNIT,
2196c6e0d914SCasey Leedom 				  fl->desc, fl->addr);
2197c6e0d914SCasey Leedom 		fl->desc = NULL;
2198c6e0d914SCasey Leedom 	}
2199c6e0d914SCasey Leedom 	return ret;
2200c6e0d914SCasey Leedom }
2201c6e0d914SCasey Leedom 
2202c6e0d914SCasey Leedom /**
2203c6e0d914SCasey Leedom  *	t4vf_sge_alloc_eth_txq - allocate an SGE Ethernet TX Queue
2204c6e0d914SCasey Leedom  *	@adapter: the adapter
2205c6e0d914SCasey Leedom  *	@txq: pointer to the new txq to be filled in
2206c6e0d914SCasey Leedom  *	@devq: the network TX queue associated with the new txq
2207c6e0d914SCasey Leedom  *	@iqid: the relative ingress queue ID to which events relating to
2208c6e0d914SCasey Leedom  *		the new txq should be directed
2209c6e0d914SCasey Leedom  */
2210c6e0d914SCasey Leedom int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq,
2211c6e0d914SCasey Leedom 			   struct net_device *dev, struct netdev_queue *devq,
2212c6e0d914SCasey Leedom 			   unsigned int iqid)
2213c6e0d914SCasey Leedom {
2214c6e0d914SCasey Leedom 	int ret, nentries;
2215c6e0d914SCasey Leedom 	struct fw_eq_eth_cmd cmd, rpl;
2216c6e0d914SCasey Leedom 	struct port_info *pi = netdev_priv(dev);
2217c6e0d914SCasey Leedom 
2218c6e0d914SCasey Leedom 	/*
2219caedda35SCasey Leedom 	 * Calculate the size of the hardware TX Queue (including the Status
2220caedda35SCasey Leedom 	 * Page on the end of the TX Queue) in units of TX Descriptors.
2221c6e0d914SCasey Leedom 	 */
2222c6e0d914SCasey Leedom 	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
2223c6e0d914SCasey Leedom 
2224c6e0d914SCasey Leedom 	/*
2225c6e0d914SCasey Leedom 	 * Allocate the hardware ring for the TX ring (with space for its
2226c6e0d914SCasey Leedom 	 * status page) along with the associated software descriptor ring.
2227c6e0d914SCasey Leedom 	 */
2228c6e0d914SCasey Leedom 	txq->q.desc = alloc_ring(adapter->pdev_dev, txq->q.size,
2229c6e0d914SCasey Leedom 				 sizeof(struct tx_desc),
2230c6e0d914SCasey Leedom 				 sizeof(struct tx_sw_desc),
2231c6e0d914SCasey Leedom 				 &txq->q.phys_addr, &txq->q.sdesc, STAT_LEN);
2232c6e0d914SCasey Leedom 	if (!txq->q.desc)
2233c6e0d914SCasey Leedom 		return -ENOMEM;
2234c6e0d914SCasey Leedom 
2235c6e0d914SCasey Leedom 	/*
2236c6e0d914SCasey Leedom 	 * Fill in the Egress Queue Command.  Note: As with the direct use of
2237c6e0d914SCasey Leedom 	 * the firmware Ingress Queue COmmand above in our RXQ allocation
2238c6e0d914SCasey Leedom 	 * routine, ideally, this code would be in t4vf_hw.c.  Again, we'll
2239c6e0d914SCasey Leedom 	 * have to see if there's some reasonable way to parameterize it
2240c6e0d914SCasey Leedom 	 * into the common code ...
2241c6e0d914SCasey Leedom 	 */
2242c6e0d914SCasey Leedom 	memset(&cmd, 0, sizeof(cmd));
2243c6e0d914SCasey Leedom 	cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_EQ_ETH_CMD) |
2244c6e0d914SCasey Leedom 				    FW_CMD_REQUEST |
2245c6e0d914SCasey Leedom 				    FW_CMD_WRITE |
2246c6e0d914SCasey Leedom 				    FW_CMD_EXEC);
2247c6e0d914SCasey Leedom 	cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_ALLOC |
2248c6e0d914SCasey Leedom 					 FW_EQ_ETH_CMD_EQSTART |
2249c6e0d914SCasey Leedom 					 FW_LEN16(cmd));
2250c6e0d914SCasey Leedom 	cmd.viid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_VIID(pi->viid));
2251c6e0d914SCasey Leedom 	cmd.fetchszm_to_iqid =
2252c6e0d914SCasey Leedom 		cpu_to_be32(FW_EQ_ETH_CMD_HOSTFCMODE(SGE_HOSTFCMODE_STPG) |
2253c6e0d914SCasey Leedom 			    FW_EQ_ETH_CMD_PCIECHN(pi->port_id) |
2254c6e0d914SCasey Leedom 			    FW_EQ_ETH_CMD_IQID(iqid));
2255c6e0d914SCasey Leedom 	cmd.dcaen_to_eqsize =
2256c6e0d914SCasey Leedom 		cpu_to_be32(FW_EQ_ETH_CMD_FBMIN(SGE_FETCHBURSTMIN_64B) |
2257c6e0d914SCasey Leedom 			    FW_EQ_ETH_CMD_FBMAX(SGE_FETCHBURSTMAX_512B) |
2258c6e0d914SCasey Leedom 			    FW_EQ_ETH_CMD_CIDXFTHRESH(SGE_CIDXFLUSHTHRESH_32) |
2259c6e0d914SCasey Leedom 			    FW_EQ_ETH_CMD_EQSIZE(nentries));
2260c6e0d914SCasey Leedom 	cmd.eqaddr = cpu_to_be64(txq->q.phys_addr);
2261c6e0d914SCasey Leedom 
2262c6e0d914SCasey Leedom 	/*
2263c6e0d914SCasey Leedom 	 * Issue the firmware Egress Queue Command and extract the results if
2264c6e0d914SCasey Leedom 	 * it completes successfully.
2265c6e0d914SCasey Leedom 	 */
2266c6e0d914SCasey Leedom 	ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl);
2267c6e0d914SCasey Leedom 	if (ret) {
2268c6e0d914SCasey Leedom 		/*
2269c6e0d914SCasey Leedom 		 * The girmware Ingress Queue Command failed for some reason.
2270c6e0d914SCasey Leedom 		 * Free up our partial allocation state and return the error.
2271c6e0d914SCasey Leedom 		 */
2272c6e0d914SCasey Leedom 		kfree(txq->q.sdesc);
2273c6e0d914SCasey Leedom 		txq->q.sdesc = NULL;
2274c6e0d914SCasey Leedom 		dma_free_coherent(adapter->pdev_dev,
2275c6e0d914SCasey Leedom 				  nentries * sizeof(struct tx_desc),
2276c6e0d914SCasey Leedom 				  txq->q.desc, txq->q.phys_addr);
2277c6e0d914SCasey Leedom 		txq->q.desc = NULL;
2278c6e0d914SCasey Leedom 		return ret;
2279c6e0d914SCasey Leedom 	}
2280c6e0d914SCasey Leedom 
2281c6e0d914SCasey Leedom 	txq->q.in_use = 0;
2282c6e0d914SCasey Leedom 	txq->q.cidx = 0;
2283c6e0d914SCasey Leedom 	txq->q.pidx = 0;
2284c6e0d914SCasey Leedom 	txq->q.stat = (void *)&txq->q.desc[txq->q.size];
2285c6e0d914SCasey Leedom 	txq->q.cntxt_id = FW_EQ_ETH_CMD_EQID_GET(be32_to_cpu(rpl.eqid_pkd));
2286c6e0d914SCasey Leedom 	txq->q.abs_id =
2287c6e0d914SCasey Leedom 		FW_EQ_ETH_CMD_PHYSEQID_GET(be32_to_cpu(rpl.physeqid_pkd));
2288c6e0d914SCasey Leedom 	txq->txq = devq;
2289c6e0d914SCasey Leedom 	txq->tso = 0;
2290c6e0d914SCasey Leedom 	txq->tx_cso = 0;
2291c6e0d914SCasey Leedom 	txq->vlan_ins = 0;
2292c6e0d914SCasey Leedom 	txq->q.stops = 0;
2293c6e0d914SCasey Leedom 	txq->q.restarts = 0;
2294c6e0d914SCasey Leedom 	txq->mapping_err = 0;
2295c6e0d914SCasey Leedom 	return 0;
2296c6e0d914SCasey Leedom }
2297c6e0d914SCasey Leedom 
2298c6e0d914SCasey Leedom /*
2299c6e0d914SCasey Leedom  * Free the DMA map resources associated with a TX queue.
2300c6e0d914SCasey Leedom  */
2301c6e0d914SCasey Leedom static void free_txq(struct adapter *adapter, struct sge_txq *tq)
2302c6e0d914SCasey Leedom {
2303c6e0d914SCasey Leedom 	dma_free_coherent(adapter->pdev_dev,
2304c6e0d914SCasey Leedom 			  tq->size * sizeof(*tq->desc) + STAT_LEN,
2305c6e0d914SCasey Leedom 			  tq->desc, tq->phys_addr);
2306c6e0d914SCasey Leedom 	tq->cntxt_id = 0;
2307c6e0d914SCasey Leedom 	tq->sdesc = NULL;
2308c6e0d914SCasey Leedom 	tq->desc = NULL;
2309c6e0d914SCasey Leedom }
2310c6e0d914SCasey Leedom 
2311c6e0d914SCasey Leedom /*
2312c6e0d914SCasey Leedom  * Free the resources associated with a response queue (possibly including a
2313c6e0d914SCasey Leedom  * free list).
2314c6e0d914SCasey Leedom  */
2315c6e0d914SCasey Leedom static void free_rspq_fl(struct adapter *adapter, struct sge_rspq *rspq,
2316c6e0d914SCasey Leedom 			 struct sge_fl *fl)
2317c6e0d914SCasey Leedom {
2318c6e0d914SCasey Leedom 	unsigned int flid = fl ? fl->cntxt_id : 0xffff;
2319c6e0d914SCasey Leedom 
2320c6e0d914SCasey Leedom 	t4vf_iq_free(adapter, FW_IQ_TYPE_FL_INT_CAP,
2321c6e0d914SCasey Leedom 		     rspq->cntxt_id, flid, 0xffff);
2322c6e0d914SCasey Leedom 	dma_free_coherent(adapter->pdev_dev, (rspq->size + 1) * rspq->iqe_len,
2323c6e0d914SCasey Leedom 			  rspq->desc, rspq->phys_addr);
2324c6e0d914SCasey Leedom 	netif_napi_del(&rspq->napi);
2325c6e0d914SCasey Leedom 	rspq->netdev = NULL;
2326c6e0d914SCasey Leedom 	rspq->cntxt_id = 0;
2327c6e0d914SCasey Leedom 	rspq->abs_id = 0;
2328c6e0d914SCasey Leedom 	rspq->desc = NULL;
2329c6e0d914SCasey Leedom 
2330c6e0d914SCasey Leedom 	if (fl) {
2331c6e0d914SCasey Leedom 		free_rx_bufs(adapter, fl, fl->avail);
2332c6e0d914SCasey Leedom 		dma_free_coherent(adapter->pdev_dev,
2333c6e0d914SCasey Leedom 				  fl->size * sizeof(*fl->desc) + STAT_LEN,
2334c6e0d914SCasey Leedom 				  fl->desc, fl->addr);
2335c6e0d914SCasey Leedom 		kfree(fl->sdesc);
2336c6e0d914SCasey Leedom 		fl->sdesc = NULL;
2337c6e0d914SCasey Leedom 		fl->cntxt_id = 0;
2338c6e0d914SCasey Leedom 		fl->desc = NULL;
2339c6e0d914SCasey Leedom 	}
2340c6e0d914SCasey Leedom }
2341c6e0d914SCasey Leedom 
2342c6e0d914SCasey Leedom /**
2343c6e0d914SCasey Leedom  *	t4vf_free_sge_resources - free SGE resources
2344c6e0d914SCasey Leedom  *	@adapter: the adapter
2345c6e0d914SCasey Leedom  *
2346c6e0d914SCasey Leedom  *	Frees resources used by the SGE queue sets.
2347c6e0d914SCasey Leedom  */
2348c6e0d914SCasey Leedom void t4vf_free_sge_resources(struct adapter *adapter)
2349c6e0d914SCasey Leedom {
2350c6e0d914SCasey Leedom 	struct sge *s = &adapter->sge;
2351c6e0d914SCasey Leedom 	struct sge_eth_rxq *rxq = s->ethrxq;
2352c6e0d914SCasey Leedom 	struct sge_eth_txq *txq = s->ethtxq;
2353c6e0d914SCasey Leedom 	struct sge_rspq *evtq = &s->fw_evtq;
2354c6e0d914SCasey Leedom 	struct sge_rspq *intrq = &s->intrq;
2355c6e0d914SCasey Leedom 	int qs;
2356c6e0d914SCasey Leedom 
2357b97d13a5SCasey Leedom 	for (qs = 0; qs < adapter->sge.ethqsets; qs++, rxq++, txq++) {
2358c6e0d914SCasey Leedom 		if (rxq->rspq.desc)
2359c6e0d914SCasey Leedom 			free_rspq_fl(adapter, &rxq->rspq, &rxq->fl);
2360c6e0d914SCasey Leedom 		if (txq->q.desc) {
2361c6e0d914SCasey Leedom 			t4vf_eth_eq_free(adapter, txq->q.cntxt_id);
2362c6e0d914SCasey Leedom 			free_tx_desc(adapter, &txq->q, txq->q.in_use, true);
2363c6e0d914SCasey Leedom 			kfree(txq->q.sdesc);
2364c6e0d914SCasey Leedom 			free_txq(adapter, &txq->q);
2365c6e0d914SCasey Leedom 		}
2366c6e0d914SCasey Leedom 	}
2367c6e0d914SCasey Leedom 	if (evtq->desc)
2368c6e0d914SCasey Leedom 		free_rspq_fl(adapter, evtq, NULL);
2369c6e0d914SCasey Leedom 	if (intrq->desc)
2370c6e0d914SCasey Leedom 		free_rspq_fl(adapter, intrq, NULL);
2371c6e0d914SCasey Leedom }
2372c6e0d914SCasey Leedom 
2373c6e0d914SCasey Leedom /**
2374c6e0d914SCasey Leedom  *	t4vf_sge_start - enable SGE operation
2375c6e0d914SCasey Leedom  *	@adapter: the adapter
2376c6e0d914SCasey Leedom  *
2377c6e0d914SCasey Leedom  *	Start tasklets and timers associated with the DMA engine.
2378c6e0d914SCasey Leedom  */
2379c6e0d914SCasey Leedom void t4vf_sge_start(struct adapter *adapter)
2380c6e0d914SCasey Leedom {
2381c6e0d914SCasey Leedom 	adapter->sge.ethtxq_rover = 0;
2382c6e0d914SCasey Leedom 	mod_timer(&adapter->sge.rx_timer, jiffies + RX_QCHECK_PERIOD);
2383c6e0d914SCasey Leedom 	mod_timer(&adapter->sge.tx_timer, jiffies + TX_QCHECK_PERIOD);
2384c6e0d914SCasey Leedom }
2385c6e0d914SCasey Leedom 
2386c6e0d914SCasey Leedom /**
2387c6e0d914SCasey Leedom  *	t4vf_sge_stop - disable SGE operation
2388c6e0d914SCasey Leedom  *	@adapter: the adapter
2389c6e0d914SCasey Leedom  *
2390c6e0d914SCasey Leedom  *	Stop tasklets and timers associated with the DMA engine.  Note that
2391c6e0d914SCasey Leedom  *	this is effective only if measures have been taken to disable any HW
2392c6e0d914SCasey Leedom  *	events that may restart them.
2393c6e0d914SCasey Leedom  */
2394c6e0d914SCasey Leedom void t4vf_sge_stop(struct adapter *adapter)
2395c6e0d914SCasey Leedom {
2396c6e0d914SCasey Leedom 	struct sge *s = &adapter->sge;
2397c6e0d914SCasey Leedom 
2398c6e0d914SCasey Leedom 	if (s->rx_timer.function)
2399c6e0d914SCasey Leedom 		del_timer_sync(&s->rx_timer);
2400c6e0d914SCasey Leedom 	if (s->tx_timer.function)
2401c6e0d914SCasey Leedom 		del_timer_sync(&s->tx_timer);
2402c6e0d914SCasey Leedom }
2403c6e0d914SCasey Leedom 
2404c6e0d914SCasey Leedom /**
2405c6e0d914SCasey Leedom  *	t4vf_sge_init - initialize SGE
2406c6e0d914SCasey Leedom  *	@adapter: the adapter
2407c6e0d914SCasey Leedom  *
2408c6e0d914SCasey Leedom  *	Performs SGE initialization needed every time after a chip reset.
2409c6e0d914SCasey Leedom  *	We do not initialize any of the queue sets here, instead the driver
2410c6e0d914SCasey Leedom  *	top-level must request those individually.  We also do not enable DMA
2411c6e0d914SCasey Leedom  *	here, that should be done after the queues have been set up.
2412c6e0d914SCasey Leedom  */
2413c6e0d914SCasey Leedom int t4vf_sge_init(struct adapter *adapter)
2414c6e0d914SCasey Leedom {
2415c6e0d914SCasey Leedom 	struct sge_params *sge_params = &adapter->params.sge;
2416c6e0d914SCasey Leedom 	u32 fl0 = sge_params->sge_fl_buffer_size[0];
2417c6e0d914SCasey Leedom 	u32 fl1 = sge_params->sge_fl_buffer_size[1];
2418c6e0d914SCasey Leedom 	struct sge *s = &adapter->sge;
2419c6e0d914SCasey Leedom 
2420c6e0d914SCasey Leedom 	/*
2421c6e0d914SCasey Leedom 	 * Start by vetting the basic SGE parameters which have been set up by
2422c6e0d914SCasey Leedom 	 * the Physical Function Driver.  Ideally we should be able to deal
2423c6e0d914SCasey Leedom 	 * with _any_ configuration.  Practice is different ...
2424c6e0d914SCasey Leedom 	 */
2425c6e0d914SCasey Leedom 	if (fl0 != PAGE_SIZE || (fl1 != 0 && fl1 <= fl0)) {
2426c6e0d914SCasey Leedom 		dev_err(adapter->pdev_dev, "bad SGE FL buffer sizes [%d, %d]\n",
2427c6e0d914SCasey Leedom 			fl0, fl1);
2428c6e0d914SCasey Leedom 		return -EINVAL;
2429c6e0d914SCasey Leedom 	}
243052367a76SVipul Pandya 	if ((sge_params->sge_control & RXPKTCPLMODE_MASK) == 0) {
2431c6e0d914SCasey Leedom 		dev_err(adapter->pdev_dev, "bad SGE CPL MODE\n");
2432c6e0d914SCasey Leedom 		return -EINVAL;
2433c6e0d914SCasey Leedom 	}
2434c6e0d914SCasey Leedom 
2435c6e0d914SCasey Leedom 	/*
2436c6e0d914SCasey Leedom 	 * Now translate the adapter parameters into our internal forms.
2437c6e0d914SCasey Leedom 	 */
2438c6e0d914SCasey Leedom 	if (fl1)
2439c6e0d914SCasey Leedom 		FL_PG_ORDER = ilog2(fl1) - PAGE_SHIFT;
244052367a76SVipul Pandya 	STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE_MASK)
244152367a76SVipul Pandya 		    ? 128 : 64);
2442c6e0d914SCasey Leedom 	PKTSHIFT = PKTSHIFT_GET(sge_params->sge_control);
2443c6e0d914SCasey Leedom 	FL_ALIGN = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) +
2444b3003be3SCasey Leedom 			 SGE_INGPADBOUNDARY_SHIFT);
2445c6e0d914SCasey Leedom 
2446c6e0d914SCasey Leedom 	/*
2447c6e0d914SCasey Leedom 	 * Set up tasklet timers.
2448c6e0d914SCasey Leedom 	 */
2449c6e0d914SCasey Leedom 	setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adapter);
2450c6e0d914SCasey Leedom 	setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adapter);
2451c6e0d914SCasey Leedom 
2452c6e0d914SCasey Leedom 	/*
2453c6e0d914SCasey Leedom 	 * Initialize Forwarded Interrupt Queue lock.
2454c6e0d914SCasey Leedom 	 */
2455c6e0d914SCasey Leedom 	spin_lock_init(&s->intrq_lock);
2456c6e0d914SCasey Leedom 
2457c6e0d914SCasey Leedom 	return 0;
2458c6e0d914SCasey Leedom }
2459