1 /*
2  * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  */
33 
34 #include <linux/mlx4/cq.h>
35 #include <linux/slab.h>
36 #include <linux/mlx4/qp.h>
37 #include <linux/skbuff.h>
38 #include <linux/if_ether.h>
39 #include <linux/if_vlan.h>
40 #include <linux/vmalloc.h>
41 
42 #include "mlx4_en.h"
43 
44 
mlx4_en_alloc_frag(struct mlx4_en_priv * priv,struct mlx4_en_rx_desc * rx_desc,struct page_frag * skb_frags,struct mlx4_en_rx_alloc * ring_alloc,int i)45 static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
46 			      struct mlx4_en_rx_desc *rx_desc,
47 			      struct page_frag *skb_frags,
48 			      struct mlx4_en_rx_alloc *ring_alloc,
49 			      int i)
50 {
51 	struct mlx4_en_dev *mdev = priv->mdev;
52 	struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
53 	struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
54 	struct page *page;
55 	dma_addr_t dma;
56 
57 	if (page_alloc->offset == frag_info->last_offset) {
58 		/* Allocate new page */
59 		page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER);
60 		if (!page)
61 			return -ENOMEM;
62 
63 		skb_frags[i].page = page_alloc->page;
64 		skb_frags[i].offset = page_alloc->offset;
65 		page_alloc->page = page;
66 		page_alloc->offset = frag_info->frag_align;
67 	} else {
68 		page = page_alloc->page;
69 		get_page(page);
70 
71 		skb_frags[i].page = page;
72 		skb_frags[i].offset = page_alloc->offset;
73 		page_alloc->offset += frag_info->frag_stride;
74 	}
75 	dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) +
76 			     skb_frags[i].offset, frag_info->frag_size,
77 			     PCI_DMA_FROMDEVICE);
78 	rx_desc->data[i].addr = cpu_to_be64(dma);
79 	return 0;
80 }
81 
mlx4_en_init_allocator(struct mlx4_en_priv * priv,struct mlx4_en_rx_ring * ring)82 static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
83 				  struct mlx4_en_rx_ring *ring)
84 {
85 	struct mlx4_en_rx_alloc *page_alloc;
86 	int i;
87 
88 	for (i = 0; i < priv->num_frags; i++) {
89 		page_alloc = &ring->page_alloc[i];
90 		page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
91 					       MLX4_EN_ALLOC_ORDER);
92 		if (!page_alloc->page)
93 			goto out;
94 
95 		page_alloc->offset = priv->frag_info[i].frag_align;
96 		en_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n",
97 		       i, page_alloc->page);
98 	}
99 	return 0;
100 
101 out:
102 	while (i--) {
103 		page_alloc = &ring->page_alloc[i];
104 		put_page(page_alloc->page);
105 		page_alloc->page = NULL;
106 	}
107 	return -ENOMEM;
108 }
109 
mlx4_en_destroy_allocator(struct mlx4_en_priv * priv,struct mlx4_en_rx_ring * ring)110 static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
111 				      struct mlx4_en_rx_ring *ring)
112 {
113 	struct mlx4_en_rx_alloc *page_alloc;
114 	int i;
115 
116 	for (i = 0; i < priv->num_frags; i++) {
117 		page_alloc = &ring->page_alloc[i];
118 		en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
119 		       i, page_count(page_alloc->page));
120 
121 		put_page(page_alloc->page);
122 		page_alloc->page = NULL;
123 	}
124 }
125 
126 
mlx4_en_init_rx_desc(struct mlx4_en_priv * priv,struct mlx4_en_rx_ring * ring,int index)127 static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
128 				 struct mlx4_en_rx_ring *ring, int index)
129 {
130 	struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
131 	struct skb_frag_struct *skb_frags = ring->rx_info +
132 					    (index << priv->log_rx_info);
133 	int possible_frags;
134 	int i;
135 
136 	/* Set size and memtype fields */
137 	for (i = 0; i < priv->num_frags; i++) {
138 		skb_frag_size_set(&skb_frags[i], priv->frag_info[i].frag_size);
139 		rx_desc->data[i].byte_count =
140 			cpu_to_be32(priv->frag_info[i].frag_size);
141 		rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
142 	}
143 
144 	/* If the number of used fragments does not fill up the ring stride,
145 	 * remaining (unused) fragments must be padded with null address/size
146 	 * and a special memory key */
147 	possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
148 	for (i = priv->num_frags; i < possible_frags; i++) {
149 		rx_desc->data[i].byte_count = 0;
150 		rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
151 		rx_desc->data[i].addr = 0;
152 	}
153 }
154 
155 
mlx4_en_prepare_rx_desc(struct mlx4_en_priv * priv,struct mlx4_en_rx_ring * ring,int index)156 static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
157 				   struct mlx4_en_rx_ring *ring, int index)
158 {
159 	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
160 	struct page_frag *skb_frags = ring->rx_info +
161 				      (index << priv->log_rx_info);
162 	int i;
163 
164 	for (i = 0; i < priv->num_frags; i++)
165 		if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i))
166 			goto err;
167 
168 	return 0;
169 
170 err:
171 	while (i--) {
172 		dma_addr_t dma = be64_to_cpu(rx_desc->data[i].addr);
173 		pci_unmap_single(priv->mdev->pdev, dma, skb_frags[i].size,
174 				 PCI_DMA_FROMDEVICE);
175 		put_page(skb_frags[i].page);
176 	}
177 	return -ENOMEM;
178 }
179 
mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring * ring)180 static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
181 {
182 	*ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
183 }
184 
mlx4_en_free_rx_desc(struct mlx4_en_priv * priv,struct mlx4_en_rx_ring * ring,int index)185 static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
186 				 struct mlx4_en_rx_ring *ring,
187 				 int index)
188 {
189 	struct mlx4_en_dev *mdev = priv->mdev;
190 	struct page_frag *skb_frags;
191 	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
192 	dma_addr_t dma;
193 	int nr;
194 
195 	skb_frags = ring->rx_info + (index << priv->log_rx_info);
196 	for (nr = 0; nr < priv->num_frags; nr++) {
197 		en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
198 		dma = be64_to_cpu(rx_desc->data[nr].addr);
199 
200 		en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
201 		pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
202 				 PCI_DMA_FROMDEVICE);
203 		put_page(skb_frags[nr].page);
204 	}
205 }
206 
mlx4_en_fill_rx_buffers(struct mlx4_en_priv * priv)207 static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
208 {
209 	struct mlx4_en_rx_ring *ring;
210 	int ring_ind;
211 	int buf_ind;
212 	int new_size;
213 
214 	for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
215 		for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
216 			ring = &priv->rx_ring[ring_ind];
217 
218 			if (mlx4_en_prepare_rx_desc(priv, ring,
219 						    ring->actual_size)) {
220 				if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
221 					en_err(priv, "Failed to allocate "
222 						     "enough rx buffers\n");
223 					return -ENOMEM;
224 				} else {
225 					new_size = rounddown_pow_of_two(ring->actual_size);
226 					en_warn(priv, "Only %d buffers allocated "
227 						      "reducing ring size to %d",
228 						ring->actual_size, new_size);
229 					goto reduce_rings;
230 				}
231 			}
232 			ring->actual_size++;
233 			ring->prod++;
234 		}
235 	}
236 	return 0;
237 
238 reduce_rings:
239 	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
240 		ring = &priv->rx_ring[ring_ind];
241 		while (ring->actual_size > new_size) {
242 			ring->actual_size--;
243 			ring->prod--;
244 			mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
245 		}
246 	}
247 
248 	return 0;
249 }
250 
mlx4_en_free_rx_buf(struct mlx4_en_priv * priv,struct mlx4_en_rx_ring * ring)251 static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
252 				struct mlx4_en_rx_ring *ring)
253 {
254 	int index;
255 
256 	en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
257 	       ring->cons, ring->prod);
258 
259 	/* Unmap and free Rx buffers */
260 	BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size);
261 	while (ring->cons != ring->prod) {
262 		index = ring->cons & ring->size_mask;
263 		en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
264 		mlx4_en_free_rx_desc(priv, ring, index);
265 		++ring->cons;
266 	}
267 }
268 
mlx4_en_create_rx_ring(struct mlx4_en_priv * priv,struct mlx4_en_rx_ring * ring,u32 size,u16 stride)269 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
270 			   struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
271 {
272 	struct mlx4_en_dev *mdev = priv->mdev;
273 	int err;
274 	int tmp;
275 
276 
277 	ring->prod = 0;
278 	ring->cons = 0;
279 	ring->size = size;
280 	ring->size_mask = size - 1;
281 	ring->stride = stride;
282 	ring->log_stride = ffs(ring->stride) - 1;
283 	ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
284 
285 	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
286 					sizeof(struct skb_frag_struct));
287 	ring->rx_info = vmalloc(tmp);
288 	if (!ring->rx_info) {
289 		en_err(priv, "Failed allocating rx_info ring\n");
290 		return -ENOMEM;
291 	}
292 	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
293 		 ring->rx_info, tmp);
294 
295 	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
296 				 ring->buf_size, 2 * PAGE_SIZE);
297 	if (err)
298 		goto err_ring;
299 
300 	err = mlx4_en_map_buffer(&ring->wqres.buf);
301 	if (err) {
302 		en_err(priv, "Failed to map RX buffer\n");
303 		goto err_hwq;
304 	}
305 	ring->buf = ring->wqres.buf.direct.buf;
306 
307 	return 0;
308 
309 err_hwq:
310 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
311 err_ring:
312 	vfree(ring->rx_info);
313 	ring->rx_info = NULL;
314 	return err;
315 }
316 
mlx4_en_activate_rx_rings(struct mlx4_en_priv * priv)317 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
318 {
319 	struct mlx4_en_rx_ring *ring;
320 	int i;
321 	int ring_ind;
322 	int err;
323 	int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
324 					DS_SIZE * priv->num_frags);
325 
326 	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
327 		ring = &priv->rx_ring[ring_ind];
328 
329 		ring->prod = 0;
330 		ring->cons = 0;
331 		ring->actual_size = 0;
332 		ring->cqn = priv->rx_cq[ring_ind].mcq.cqn;
333 
334 		ring->stride = stride;
335 		if (ring->stride <= TXBB_SIZE)
336 			ring->buf += TXBB_SIZE;
337 
338 		ring->log_stride = ffs(ring->stride) - 1;
339 		ring->buf_size = ring->size * ring->stride;
340 
341 		memset(ring->buf, 0, ring->buf_size);
342 		mlx4_en_update_rx_prod_db(ring);
343 
344 		/* Initailize all descriptors */
345 		for (i = 0; i < ring->size; i++)
346 			mlx4_en_init_rx_desc(priv, ring, i);
347 
348 		/* Initialize page allocators */
349 		err = mlx4_en_init_allocator(priv, ring);
350 		if (err) {
351 			en_err(priv, "Failed initializing ring allocator\n");
352 			if (ring->stride <= TXBB_SIZE)
353 				ring->buf -= TXBB_SIZE;
354 			ring_ind--;
355 			goto err_allocator;
356 		}
357 	}
358 	err = mlx4_en_fill_rx_buffers(priv);
359 	if (err)
360 		goto err_buffers;
361 
362 	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
363 		ring = &priv->rx_ring[ring_ind];
364 
365 		ring->size_mask = ring->actual_size - 1;
366 		mlx4_en_update_rx_prod_db(ring);
367 	}
368 
369 	return 0;
370 
371 err_buffers:
372 	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
373 		mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]);
374 
375 	ring_ind = priv->rx_ring_num - 1;
376 err_allocator:
377 	while (ring_ind >= 0) {
378 		if (priv->rx_ring[ring_ind].stride <= TXBB_SIZE)
379 			priv->rx_ring[ring_ind].buf -= TXBB_SIZE;
380 		mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]);
381 		ring_ind--;
382 	}
383 	return err;
384 }
385 
mlx4_en_destroy_rx_ring(struct mlx4_en_priv * priv,struct mlx4_en_rx_ring * ring,u32 size,u16 stride)386 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
387 			     struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
388 {
389 	struct mlx4_en_dev *mdev = priv->mdev;
390 
391 	mlx4_en_unmap_buffer(&ring->wqres.buf);
392 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
393 	vfree(ring->rx_info);
394 	ring->rx_info = NULL;
395 }
396 
mlx4_en_deactivate_rx_ring(struct mlx4_en_priv * priv,struct mlx4_en_rx_ring * ring)397 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
398 				struct mlx4_en_rx_ring *ring)
399 {
400 	mlx4_en_free_rx_buf(priv, ring);
401 	if (ring->stride <= TXBB_SIZE)
402 		ring->buf -= TXBB_SIZE;
403 	mlx4_en_destroy_allocator(priv, ring);
404 }
405 
406 
407 /* Unmap a completed descriptor and free unused pages */
mlx4_en_complete_rx_desc(struct mlx4_en_priv * priv,struct mlx4_en_rx_desc * rx_desc,struct page_frag * skb_frags,struct sk_buff * skb,struct mlx4_en_rx_alloc * page_alloc,int length)408 static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
409 				    struct mlx4_en_rx_desc *rx_desc,
410 				    struct page_frag *skb_frags,
411 				    struct sk_buff *skb,
412 				    struct mlx4_en_rx_alloc *page_alloc,
413 				    int length)
414 {
415 	struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
416 	struct mlx4_en_dev *mdev = priv->mdev;
417 	struct mlx4_en_frag_info *frag_info;
418 	int nr;
419 	dma_addr_t dma;
420 
421 	/* Collect used fragments while replacing them in the HW descirptors */
422 	for (nr = 0; nr < priv->num_frags; nr++) {
423 		frag_info = &priv->frag_info[nr];
424 		if (length <= frag_info->frag_prefix_size)
425 			break;
426 
427 		/* Save page reference in skb */
428 		__skb_frag_set_page(&skb_frags_rx[nr], skb_frags[nr].page);
429 		skb_frag_size_set(&skb_frags_rx[nr], skb_frags[nr].size);
430 		skb_frags_rx[nr].page_offset = skb_frags[nr].offset;
431 		skb->truesize += frag_info->frag_stride;
432 		dma = be64_to_cpu(rx_desc->data[nr].addr);
433 
434 		/* Allocate a replacement page */
435 		if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr))
436 			goto fail;
437 
438 		/* Unmap buffer */
439 		pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]),
440 				 PCI_DMA_FROMDEVICE);
441 	}
442 	/* Adjust size of last fragment to match actual length */
443 	if (nr > 0)
444 		skb_frag_size_set(&skb_frags_rx[nr - 1],
445 			length - priv->frag_info[nr - 1].frag_prefix_size);
446 	return nr;
447 
448 fail:
449 	/* Drop all accumulated fragments (which have already been replaced in
450 	 * the descriptor) of this packet; remaining fragments are reused... */
451 	while (nr > 0) {
452 		nr--;
453 		__skb_frag_unref(&skb_frags_rx[nr]);
454 	}
455 	return 0;
456 }
457 
458 
mlx4_en_rx_skb(struct mlx4_en_priv * priv,struct mlx4_en_rx_desc * rx_desc,struct page_frag * skb_frags,struct mlx4_en_rx_alloc * page_alloc,unsigned int length)459 static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
460 				      struct mlx4_en_rx_desc *rx_desc,
461 				      struct page_frag *skb_frags,
462 				      struct mlx4_en_rx_alloc *page_alloc,
463 				      unsigned int length)
464 {
465 	struct mlx4_en_dev *mdev = priv->mdev;
466 	struct sk_buff *skb;
467 	void *va;
468 	int used_frags;
469 	dma_addr_t dma;
470 
471 	skb = dev_alloc_skb(SMALL_PACKET_SIZE + NET_IP_ALIGN);
472 	if (!skb) {
473 		en_dbg(RX_ERR, priv, "Failed allocating skb\n");
474 		return NULL;
475 	}
476 	skb->dev = priv->dev;
477 	skb_reserve(skb, NET_IP_ALIGN);
478 	skb->len = length;
479 
480 	/* Get pointer to first fragment so we could copy the headers into the
481 	 * (linear part of the) skb */
482 	va = page_address(skb_frags[0].page) + skb_frags[0].offset;
483 
484 	if (length <= SMALL_PACKET_SIZE) {
485 		/* We are copying all relevant data to the skb - temporarily
486 		 * synch buffers for the copy */
487 		dma = be64_to_cpu(rx_desc->data[0].addr);
488 		dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length,
489 					DMA_FROM_DEVICE);
490 		skb_copy_to_linear_data(skb, va, length);
491 		dma_sync_single_for_device(&mdev->pdev->dev, dma, length,
492 					   DMA_FROM_DEVICE);
493 		skb->tail += length;
494 	} else {
495 
496 		/* Move relevant fragments to skb */
497 		used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags,
498 						      skb, page_alloc, length);
499 		if (unlikely(!used_frags)) {
500 			kfree_skb(skb);
501 			return NULL;
502 		}
503 		skb_shinfo(skb)->nr_frags = used_frags;
504 
505 		/* Copy headers into the skb linear buffer */
506 		memcpy(skb->data, va, HEADER_COPY_SIZE);
507 		skb->tail += HEADER_COPY_SIZE;
508 
509 		/* Skip headers in first fragment */
510 		skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE;
511 
512 		/* Adjust size of first fragment */
513 		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], HEADER_COPY_SIZE);
514 		skb->data_len = length - HEADER_COPY_SIZE;
515 	}
516 	return skb;
517 }
518 
validate_loopback(struct mlx4_en_priv * priv,struct sk_buff * skb)519 static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
520 {
521 	int i;
522 	int offset = ETH_HLEN;
523 
524 	for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
525 		if (*(skb->data + offset) != (unsigned char) (i & 0xff))
526 			goto out_loopback;
527 	}
528 	/* Loopback found */
529 	priv->loopback_ok = 1;
530 
531 out_loopback:
532 	dev_kfree_skb_any(skb);
533 }
534 
mlx4_en_process_rx_cq(struct net_device * dev,struct mlx4_en_cq * cq,int budget)535 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
536 {
537 	struct mlx4_en_priv *priv = netdev_priv(dev);
538 	struct mlx4_cqe *cqe;
539 	struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring];
540 	struct page_frag *skb_frags;
541 	struct mlx4_en_rx_desc *rx_desc;
542 	struct sk_buff *skb;
543 	int index;
544 	int nr;
545 	unsigned int length;
546 	int polled = 0;
547 	int ip_summed;
548 	struct ethhdr *ethh;
549 	u64 s_mac;
550 
551 	if (!priv->port_up)
552 		return 0;
553 
554 	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
555 	 * descriptor offset can be deduced from the CQE index instead of
556 	 * reading 'cqe->index' */
557 	index = cq->mcq.cons_index & ring->size_mask;
558 	cqe = &cq->buf[index];
559 
560 	/* Process all completed CQEs */
561 	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
562 		    cq->mcq.cons_index & cq->size)) {
563 
564 		skb_frags = ring->rx_info + (index << priv->log_rx_info);
565 		rx_desc = ring->buf + (index << ring->log_stride);
566 
567 		/*
568 		 * make sure we read the CQE after we read the ownership bit
569 		 */
570 		rmb();
571 
572 		/* Drop packet on bad receive or bad checksum */
573 		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
574 						MLX4_CQE_OPCODE_ERROR)) {
575 			en_err(priv, "CQE completed in error - vendor "
576 				  "syndrom:%d syndrom:%d\n",
577 				  ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome,
578 				  ((struct mlx4_err_cqe *) cqe)->syndrome);
579 			goto next;
580 		}
581 		if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
582 			en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
583 			goto next;
584 		}
585 
586 		/* Get pointer to first fragment since we haven't skb yet and
587 		 * cast it to ethhdr struct */
588 		ethh = (struct ethhdr *)(page_address(skb_frags[0].page) +
589 					 skb_frags[0].offset);
590 		s_mac = mlx4_en_mac_to_u64(ethh->h_source);
591 
592 		/* If source MAC is equal to our own MAC and not performing
593 		 * the selftest or flb disabled - drop the packet */
594 		if (s_mac == priv->mac &&
595 			(!(dev->features & NETIF_F_LOOPBACK) ||
596 			 !priv->validate_loopback))
597 			goto next;
598 
599 		/*
600 		 * Packet is OK - process it.
601 		 */
602 		length = be32_to_cpu(cqe->byte_cnt);
603 		length -= ring->fcs_del;
604 		ring->bytes += length;
605 		ring->packets++;
606 
607 		if (likely(dev->features & NETIF_F_RXCSUM)) {
608 			if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
609 			    (cqe->checksum == cpu_to_be16(0xffff))) {
610 				ring->csum_ok++;
611 				/* This packet is eligible for LRO if it is:
612 				 * - DIX Ethernet (type interpretation)
613 				 * - TCP/IP (v4)
614 				 * - without IP options
615 				 * - not an IP fragment */
616 				if (dev->features & NETIF_F_GRO) {
617 					struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
618 					if (!gro_skb)
619 						goto next;
620 
621 					nr = mlx4_en_complete_rx_desc(
622 						priv, rx_desc,
623 						skb_frags, gro_skb,
624 						ring->page_alloc, length);
625 					if (!nr)
626 						goto next;
627 
628 					skb_shinfo(gro_skb)->nr_frags = nr;
629 					gro_skb->len = length;
630 					gro_skb->data_len = length;
631 					gro_skb->ip_summed = CHECKSUM_UNNECESSARY;
632 
633 					if (cqe->vlan_my_qpn &
634 					    cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) {
635 						u16 vid = be16_to_cpu(cqe->sl_vid);
636 
637 						__vlan_hwaccel_put_tag(gro_skb, vid);
638 					}
639 
640 					if (dev->features & NETIF_F_RXHASH)
641 						gro_skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);
642 
643 					skb_record_rx_queue(gro_skb, cq->ring);
644 					napi_gro_frags(&cq->napi);
645 
646 					goto next;
647 				}
648 
649 				/* LRO not possible, complete processing here */
650 				ip_summed = CHECKSUM_UNNECESSARY;
651 			} else {
652 				ip_summed = CHECKSUM_NONE;
653 				ring->csum_none++;
654 			}
655 		} else {
656 			ip_summed = CHECKSUM_NONE;
657 			ring->csum_none++;
658 		}
659 
660 		skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags,
661 				     ring->page_alloc, length);
662 		if (!skb) {
663 			priv->stats.rx_dropped++;
664 			goto next;
665 		}
666 
667                 if (unlikely(priv->validate_loopback)) {
668 			validate_loopback(priv, skb);
669 			goto next;
670 		}
671 
672 		skb->ip_summed = ip_summed;
673 		skb->protocol = eth_type_trans(skb, dev);
674 		skb_record_rx_queue(skb, cq->ring);
675 
676 		if (dev->features & NETIF_F_RXHASH)
677 			skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);
678 
679 		if (be32_to_cpu(cqe->vlan_my_qpn) &
680 		    MLX4_CQE_VLAN_PRESENT_MASK)
681 			__vlan_hwaccel_put_tag(skb, be16_to_cpu(cqe->sl_vid));
682 
683 		/* Push it up the stack */
684 		netif_receive_skb(skb);
685 
686 next:
687 		++cq->mcq.cons_index;
688 		index = (cq->mcq.cons_index) & ring->size_mask;
689 		cqe = &cq->buf[index];
690 		if (++polled == budget) {
691 			/* We are here because we reached the NAPI budget -
692 			 * flush only pending LRO sessions */
693 			goto out;
694 		}
695 	}
696 
697 out:
698 	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
699 	mlx4_cq_set_ci(&cq->mcq);
700 	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
701 	ring->cons = cq->mcq.cons_index;
702 	ring->prod += polled; /* Polled descriptors were realocated in place */
703 	mlx4_en_update_rx_prod_db(ring);
704 	return polled;
705 }
706 
707 
mlx4_en_rx_irq(struct mlx4_cq * mcq)708 void mlx4_en_rx_irq(struct mlx4_cq *mcq)
709 {
710 	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
711 	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
712 
713 	if (priv->port_up)
714 		napi_schedule(&cq->napi);
715 	else
716 		mlx4_en_arm_cq(priv, cq);
717 }
718 
719 /* Rx CQ polling - called by NAPI */
mlx4_en_poll_rx_cq(struct napi_struct * napi,int budget)720 int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
721 {
722 	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
723 	struct net_device *dev = cq->dev;
724 	struct mlx4_en_priv *priv = netdev_priv(dev);
725 	int done;
726 
727 	done = mlx4_en_process_rx_cq(dev, cq, budget);
728 
729 	/* If we used up all the quota - we're probably not done yet... */
730 	if (done == budget)
731 		INC_PERF_COUNTER(priv->pstats.napi_quota);
732 	else {
733 		/* Done for now */
734 		napi_complete(napi);
735 		mlx4_en_arm_cq(priv, cq);
736 	}
737 	return done;
738 }
739 
740 
741 /* Calculate the last offset position that accommodates a full fragment
742  * (assuming fagment size = stride-align) */
mlx4_en_last_alloc_offset(struct mlx4_en_priv * priv,u16 stride,u16 align)743 static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align)
744 {
745 	u16 res = MLX4_EN_ALLOC_SIZE % stride;
746 	u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align;
747 
748 	en_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d "
749 			    "res:%d offset:%d\n", stride, align, res, offset);
750 	return offset;
751 }
752 
753 
754 static int frag_sizes[] = {
755 	FRAG_SZ0,
756 	FRAG_SZ1,
757 	FRAG_SZ2,
758 	FRAG_SZ3
759 };
760 
mlx4_en_calc_rx_buf(struct net_device * dev)761 void mlx4_en_calc_rx_buf(struct net_device *dev)
762 {
763 	struct mlx4_en_priv *priv = netdev_priv(dev);
764 	int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE;
765 	int buf_size = 0;
766 	int i = 0;
767 
768 	while (buf_size < eff_mtu) {
769 		priv->frag_info[i].frag_size =
770 			(eff_mtu > buf_size + frag_sizes[i]) ?
771 				frag_sizes[i] : eff_mtu - buf_size;
772 		priv->frag_info[i].frag_prefix_size = buf_size;
773 		if (!i)	{
774 			priv->frag_info[i].frag_align = NET_IP_ALIGN;
775 			priv->frag_info[i].frag_stride =
776 				ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES);
777 		} else {
778 			priv->frag_info[i].frag_align = 0;
779 			priv->frag_info[i].frag_stride =
780 				ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
781 		}
782 		priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset(
783 						priv, priv->frag_info[i].frag_stride,
784 						priv->frag_info[i].frag_align);
785 		buf_size += priv->frag_info[i].frag_size;
786 		i++;
787 	}
788 
789 	priv->num_frags = i;
790 	priv->rx_skb_size = eff_mtu;
791 	priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct));
792 
793 	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
794 		  "num_frags:%d):\n", eff_mtu, priv->num_frags);
795 	for (i = 0; i < priv->num_frags; i++) {
796 		en_dbg(DRV, priv, "  frag:%d - size:%d prefix:%d align:%d "
797 				"stride:%d last_offset:%d\n", i,
798 				priv->frag_info[i].frag_size,
799 				priv->frag_info[i].frag_prefix_size,
800 				priv->frag_info[i].frag_align,
801 				priv->frag_info[i].frag_stride,
802 				priv->frag_info[i].last_offset);
803 	}
804 }
805 
806 /* RSS related functions */
807 
mlx4_en_config_rss_qp(struct mlx4_en_priv * priv,int qpn,struct mlx4_en_rx_ring * ring,enum mlx4_qp_state * state,struct mlx4_qp * qp)808 static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
809 				 struct mlx4_en_rx_ring *ring,
810 				 enum mlx4_qp_state *state,
811 				 struct mlx4_qp *qp)
812 {
813 	struct mlx4_en_dev *mdev = priv->mdev;
814 	struct mlx4_qp_context *context;
815 	int err = 0;
816 
817 	context = kmalloc(sizeof *context , GFP_KERNEL);
818 	if (!context) {
819 		en_err(priv, "Failed to allocate qp context\n");
820 		return -ENOMEM;
821 	}
822 
823 	err = mlx4_qp_alloc(mdev->dev, qpn, qp);
824 	if (err) {
825 		en_err(priv, "Failed to allocate qp #%x\n", qpn);
826 		goto out;
827 	}
828 	qp->event = mlx4_en_sqp_event;
829 
830 	memset(context, 0, sizeof *context);
831 	mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
832 				qpn, ring->cqn, context);
833 	context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
834 
835 	/* Cancel FCS removal if FW allows */
836 	if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) {
837 		context->param3 |= cpu_to_be32(1 << 29);
838 		ring->fcs_del = ETH_FCS_LEN;
839 	} else
840 		ring->fcs_del = 0;
841 
842 	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
843 	if (err) {
844 		mlx4_qp_remove(mdev->dev, qp);
845 		mlx4_qp_free(mdev->dev, qp);
846 	}
847 	mlx4_en_update_rx_prod_db(ring);
848 out:
849 	kfree(context);
850 	return err;
851 }
852 
853 /* Allocate rx qp's and configure them according to rss map */
mlx4_en_config_rss_steer(struct mlx4_en_priv * priv)854 int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
855 {
856 	struct mlx4_en_dev *mdev = priv->mdev;
857 	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
858 	struct mlx4_qp_context context;
859 	struct mlx4_rss_context *rss_context;
860 	int rss_rings;
861 	void *ptr;
862 	u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
863 			MLX4_RSS_TCP_IPV6);
864 	int i, qpn;
865 	int err = 0;
866 	int good_qps = 0;
867 	static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC,
868 				0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD,
869 				0x593D56D9, 0xF3253C06, 0x2ADC1FFC};
870 
871 	en_dbg(DRV, priv, "Configuring rss steering\n");
872 	err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
873 				    priv->rx_ring_num,
874 				    &rss_map->base_qpn);
875 	if (err) {
876 		en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
877 		return err;
878 	}
879 
880 	for (i = 0; i < priv->rx_ring_num; i++) {
881 		qpn = rss_map->base_qpn + i;
882 		err = mlx4_en_config_rss_qp(priv, qpn, &priv->rx_ring[i],
883 					    &rss_map->state[i],
884 					    &rss_map->qps[i]);
885 		if (err)
886 			goto rss_err;
887 
888 		++good_qps;
889 	}
890 
891 	/* Configure RSS indirection qp */
892 	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
893 	if (err) {
894 		en_err(priv, "Failed to allocate RSS indirection QP\n");
895 		goto rss_err;
896 	}
897 	rss_map->indir_qp.event = mlx4_en_sqp_event;
898 	mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
899 				priv->rx_ring[0].cqn, &context);
900 
901 	if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num)
902 		rss_rings = priv->rx_ring_num;
903 	else
904 		rss_rings = priv->prof->rss_rings;
905 
906 	ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path)
907 					+ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
908 	rss_context = ptr;
909 	rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 |
910 					    (rss_map->base_qpn));
911 	rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
912 	if (priv->mdev->profile.udp_rss) {
913 		rss_mask |=  MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
914 		rss_context->base_qpn_udp = rss_context->default_qpn;
915 	}
916 	rss_context->flags = rss_mask;
917 	rss_context->hash_fn = MLX4_RSS_HASH_TOP;
918 	for (i = 0; i < 10; i++)
919 		rss_context->rss_key[i] = rsskey[i];
920 
921 	err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
922 			       &rss_map->indir_qp, &rss_map->indir_state);
923 	if (err)
924 		goto indir_err;
925 
926 	return 0;
927 
928 indir_err:
929 	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
930 		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
931 	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
932 	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
933 rss_err:
934 	for (i = 0; i < good_qps; i++) {
935 		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
936 			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
937 		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
938 		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
939 	}
940 	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
941 	return err;
942 }
943 
mlx4_en_release_rss_steer(struct mlx4_en_priv * priv)944 void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
945 {
946 	struct mlx4_en_dev *mdev = priv->mdev;
947 	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
948 	int i;
949 
950 	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
951 		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
952 	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
953 	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
954 
955 	for (i = 0; i < priv->rx_ring_num; i++) {
956 		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
957 			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
958 		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
959 		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
960 	}
961 	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
962 }
963 
964 
965 
966 
967 
968