1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2021 Microsoft Corp.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/bus.h>
34 #include <sys/kernel.h>
35 #include <sys/kthread.h>
36 #include <sys/malloc.h>
37 #include <sys/mbuf.h>
38 #include <sys/smp.h>
39 #include <sys/socket.h>
40 #include <sys/sockio.h>
41 #include <sys/time.h>
42 #include <sys/eventhandler.h>
43
44 #include <machine/bus.h>
45 #include <machine/resource.h>
46 #include <machine/in_cksum.h>
47
48 #include <net/if.h>
49 #include <net/if_var.h>
50 #include <net/if_types.h>
51 #include <net/if_vlan_var.h>
52 #ifdef RSS
53 #include <net/rss_config.h>
54 #endif
55
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/if_ether.h>
59 #include <netinet/ip.h>
60 #include <netinet/ip6.h>
61 #include <netinet/tcp.h>
62 #include <netinet/udp.h>
63
64 #include "mana.h"
65 #include "mana_sysctl.h"
66
67 static int mana_up(struct mana_port_context *apc);
68 static int mana_down(struct mana_port_context *apc);
69
70 extern unsigned int mana_tx_req_size;
71 extern unsigned int mana_rx_req_size;
72 extern unsigned int mana_rx_refill_threshold;
73
74 static void
mana_rss_key_fill(void * k,size_t size)75 mana_rss_key_fill(void *k, size_t size)
76 {
77 static bool rss_key_generated = false;
78 static uint8_t rss_key[MANA_HASH_KEY_SIZE];
79
80 KASSERT(size <= MANA_HASH_KEY_SIZE,
81 ("Request more buytes than MANA RSS key can hold"));
82
83 if (!rss_key_generated) {
84 arc4random_buf(rss_key, MANA_HASH_KEY_SIZE);
85 rss_key_generated = true;
86 }
87 memcpy(k, rss_key, size);
88 }
89
90 static int
mana_ifmedia_change(if_t ifp __unused)91 mana_ifmedia_change(if_t ifp __unused)
92 {
93 return EOPNOTSUPP;
94 }
95
96 static void
mana_ifmedia_status(if_t ifp,struct ifmediareq * ifmr)97 mana_ifmedia_status(if_t ifp, struct ifmediareq *ifmr)
98 {
99 struct mana_port_context *apc = if_getsoftc(ifp);
100
101 if (!apc) {
102 if_printf(ifp, "Port not available\n");
103 return;
104 }
105
106 MANA_APC_LOCK_LOCK(apc);
107
108 ifmr->ifm_status = IFM_AVALID;
109 ifmr->ifm_active = IFM_ETHER;
110
111 if (!apc->port_is_up) {
112 MANA_APC_LOCK_UNLOCK(apc);
113 mana_dbg(NULL, "Port %u link is down\n", apc->port_idx);
114 return;
115 }
116
117 ifmr->ifm_status |= IFM_ACTIVE;
118 ifmr->ifm_active |= IFM_100G_DR | IFM_FDX;
119
120 MANA_APC_LOCK_UNLOCK(apc);
121 }
122
123 static uint64_t
mana_get_counter(if_t ifp,ift_counter cnt)124 mana_get_counter(if_t ifp, ift_counter cnt)
125 {
126 struct mana_port_context *apc = if_getsoftc(ifp);
127 struct mana_port_stats *stats = &apc->port_stats;
128
129 switch (cnt) {
130 case IFCOUNTER_IPACKETS:
131 return (counter_u64_fetch(stats->rx_packets));
132 case IFCOUNTER_OPACKETS:
133 return (counter_u64_fetch(stats->tx_packets));
134 case IFCOUNTER_IBYTES:
135 return (counter_u64_fetch(stats->rx_bytes));
136 case IFCOUNTER_OBYTES:
137 return (counter_u64_fetch(stats->tx_bytes));
138 case IFCOUNTER_IQDROPS:
139 return (counter_u64_fetch(stats->rx_drops));
140 case IFCOUNTER_OQDROPS:
141 return (counter_u64_fetch(stats->tx_drops));
142 default:
143 return (if_get_counter_default(ifp, cnt));
144 }
145 }
146
147 static void
mana_qflush(if_t ifp)148 mana_qflush(if_t ifp)
149 {
150 if_qflush(ifp);
151 }
152
153 int
mana_restart(struct mana_port_context * apc)154 mana_restart(struct mana_port_context *apc)
155 {
156 int rc = 0;
157
158 MANA_APC_LOCK_LOCK(apc);
159 if (apc->port_is_up)
160 mana_down(apc);
161
162 rc = mana_up(apc);
163 MANA_APC_LOCK_UNLOCK(apc);
164
165 return (rc);
166 }
167
168 static int
mana_ioctl(if_t ifp,u_long command,caddr_t data)169 mana_ioctl(if_t ifp, u_long command, caddr_t data)
170 {
171 struct mana_port_context *apc = if_getsoftc(ifp);
172 struct ifrsskey *ifrk;
173 struct ifrsshash *ifrh;
174 struct ifreq *ifr;
175 uint16_t new_mtu;
176 int rc = 0, mask;
177
178 switch (command) {
179 case SIOCSIFMTU:
180 ifr = (struct ifreq *)data;
181 new_mtu = ifr->ifr_mtu;
182 if (if_getmtu(ifp) == new_mtu)
183 break;
184 if ((new_mtu > apc->max_mtu) ||
185 (new_mtu < apc->min_mtu)) {
186 if_printf(ifp, "Invalid MTU. new_mtu: %d, "
187 "max allowed: %d, min allowed: %d\n",
188 new_mtu, apc->max_mtu, apc->min_mtu);
189 return EINVAL;
190 }
191 MANA_APC_LOCK_LOCK(apc);
192 if (apc->port_is_up)
193 mana_down(apc);
194
195 apc->frame_size = new_mtu + ETHER_HDR_LEN;
196 apc->mtu = new_mtu;
197 if_setmtu(ifp, new_mtu);
198 mana_dbg(NULL, "Set MTU to %d\n", new_mtu);
199
200 rc = mana_up(apc);
201 MANA_APC_LOCK_UNLOCK(apc);
202 break;
203
204 case SIOCSIFFLAGS:
205 if (if_getflags(ifp) & IFF_UP) {
206 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
207 MANA_APC_LOCK_LOCK(apc);
208 if (!apc->port_is_up)
209 rc = mana_up(apc);
210 MANA_APC_LOCK_UNLOCK(apc);
211 }
212 } else {
213 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
214 MANA_APC_LOCK_LOCK(apc);
215 if (apc->port_is_up)
216 mana_down(apc);
217 MANA_APC_LOCK_UNLOCK(apc);
218 }
219 }
220 break;
221
222 case SIOCSIFCAP:
223 MANA_APC_LOCK_LOCK(apc);
224 ifr = (struct ifreq *)data;
225 /*
226 * Fix up requested capabilities w/ supported capabilities,
227 * since the supported capabilities could have been changed.
228 */
229 mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^
230 if_getcapenable(ifp);
231
232 if (mask & IFCAP_TXCSUM) {
233 if_togglecapenable(ifp, IFCAP_TXCSUM);
234 if_togglehwassist(ifp, (CSUM_TCP | CSUM_UDP | CSUM_IP));
235
236 if ((IFCAP_TSO4 & if_getcapenable(ifp)) &&
237 !(IFCAP_TXCSUM & if_getcapenable(ifp))) {
238 mask &= ~IFCAP_TSO4;
239 if_setcapenablebit(ifp, 0, IFCAP_TSO4);
240 if_sethwassistbits(ifp, 0, CSUM_IP_TSO);
241 mana_warn(NULL,
242 "Also disabled tso4 due to -txcsum.\n");
243 }
244 }
245
246 if (mask & IFCAP_TXCSUM_IPV6) {
247 if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
248 if_togglehwassist(ifp, (CSUM_UDP_IPV6 | CSUM_TCP_IPV6));
249
250 if ((IFCAP_TSO6 & if_getcapenable(ifp)) &&
251 !(IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp))) {
252 mask &= ~IFCAP_TSO6;
253 if_setcapenablebit(ifp, 0, IFCAP_TSO6);
254 if_sethwassistbits(ifp, 0, CSUM_IP6_TSO);
255 mana_warn(ifp,
256 "Also disabled tso6 due to -txcsum6.\n");
257 }
258 }
259
260 if (mask & IFCAP_RXCSUM)
261 if_togglecapenable(ifp, IFCAP_RXCSUM);
262 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
263 if (mask & IFCAP_RXCSUM_IPV6)
264 if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
265
266 if (mask & IFCAP_LRO)
267 if_togglecapenable(ifp, IFCAP_LRO);
268
269 if (mask & IFCAP_TSO4) {
270 if (!(IFCAP_TSO4 & if_getcapenable(ifp)) &&
271 !(IFCAP_TXCSUM & if_getcapenable(ifp))) {
272 MANA_APC_LOCK_UNLOCK(apc);
273 if_printf(ifp, "Enable txcsum first.\n");
274 rc = EAGAIN;
275 goto out;
276 }
277 if_togglecapenable(ifp, IFCAP_TSO4);
278 if_togglehwassist(ifp, CSUM_IP_TSO);
279 }
280
281 if (mask & IFCAP_TSO6) {
282 if (!(IFCAP_TSO6 & if_getcapenable(ifp)) &&
283 !(IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp))) {
284 MANA_APC_LOCK_UNLOCK(apc);
285 if_printf(ifp, "Enable txcsum6 first.\n");
286 rc = EAGAIN;
287 goto out;
288 }
289 if_togglecapenable(ifp, IFCAP_TSO6);
290 if_togglehwassist(ifp, CSUM_IP6_TSO);
291 }
292
293 MANA_APC_LOCK_UNLOCK(apc);
294 out:
295 break;
296
297 case SIOCSIFMEDIA:
298 case SIOCGIFMEDIA:
299 case SIOCGIFXMEDIA:
300 ifr = (struct ifreq *)data;
301 rc = ifmedia_ioctl(ifp, ifr, &apc->media, command);
302 break;
303
304 case SIOCGIFRSSKEY:
305 ifrk = (struct ifrsskey *)data;
306 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ;
307 ifrk->ifrk_keylen = MANA_HASH_KEY_SIZE;
308 memcpy(ifrk->ifrk_key, apc->hashkey, MANA_HASH_KEY_SIZE);
309 break;
310
311 case SIOCGIFRSSHASH:
312 ifrh = (struct ifrsshash *)data;
313 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ;
314 ifrh->ifrh_types =
315 RSS_TYPE_TCP_IPV4 |
316 RSS_TYPE_UDP_IPV4 |
317 RSS_TYPE_TCP_IPV6 |
318 RSS_TYPE_UDP_IPV6;
319 break;
320
321 default:
322 rc = ether_ioctl(ifp, command, data);
323 break;
324 }
325
326 return (rc);
327 }
328
329 static inline void
mana_alloc_counters(counter_u64_t * begin,int size)330 mana_alloc_counters(counter_u64_t *begin, int size)
331 {
332 counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
333
334 for (; begin < end; ++begin)
335 *begin = counter_u64_alloc(M_WAITOK);
336 }
337
338 static inline void
mana_free_counters(counter_u64_t * begin,int size)339 mana_free_counters(counter_u64_t *begin, int size)
340 {
341 counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
342
343 for (; begin < end; ++begin)
344 counter_u64_free(*begin);
345 }
346
347 static bool
mana_can_tx(struct gdma_queue * wq)348 mana_can_tx(struct gdma_queue *wq)
349 {
350 return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE;
351 }
352
353 static inline int
mana_tx_map_mbuf(struct mana_port_context * apc,struct mana_send_buf_info * tx_info,struct mbuf ** m_head,struct mana_tx_package * tp,struct mana_stats * tx_stats)354 mana_tx_map_mbuf(struct mana_port_context *apc,
355 struct mana_send_buf_info *tx_info,
356 struct mbuf **m_head, struct mana_tx_package *tp,
357 struct mana_stats *tx_stats)
358 {
359 struct gdma_dev *gd = apc->ac->gdma_dev;
360 bus_dma_segment_t segs[MAX_MBUF_FRAGS];
361 struct mbuf *m = *m_head;
362 int err, nsegs, i;
363
364 err = bus_dmamap_load_mbuf_sg(apc->tx_buf_tag, tx_info->dma_map,
365 m, segs, &nsegs, BUS_DMA_NOWAIT);
366 if (err == EFBIG) {
367 struct mbuf *m_new;
368
369 counter_u64_add(tx_stats->collapse, 1);
370 m_new = m_collapse(m, M_NOWAIT, MAX_MBUF_FRAGS);
371 if (unlikely(m_new == NULL)) {
372 counter_u64_add(tx_stats->collapse_err, 1);
373 return ENOBUFS;
374 } else {
375 *m_head = m = m_new;
376 }
377
378 mana_warn(NULL,
379 "Too many segs in orig mbuf, m_collapse called\n");
380
381 err = bus_dmamap_load_mbuf_sg(apc->tx_buf_tag,
382 tx_info->dma_map, m, segs, &nsegs, BUS_DMA_NOWAIT);
383 }
384 if (!err) {
385 for (i = 0; i < nsegs; i++) {
386 tp->wqe_req.sgl[i].address = segs[i].ds_addr;
387 tp->wqe_req.sgl[i].mem_key = gd->gpa_mkey;
388 tp->wqe_req.sgl[i].size = segs[i].ds_len;
389 }
390 tp->wqe_req.num_sge = nsegs;
391
392 tx_info->mbuf = *m_head;
393
394 bus_dmamap_sync(apc->tx_buf_tag, tx_info->dma_map,
395 BUS_DMASYNC_PREWRITE);
396 }
397
398 return err;
399 }
400
401 static inline void
mana_tx_unmap_mbuf(struct mana_port_context * apc,struct mana_send_buf_info * tx_info)402 mana_tx_unmap_mbuf(struct mana_port_context *apc,
403 struct mana_send_buf_info *tx_info)
404 {
405 bus_dmamap_sync(apc->tx_buf_tag, tx_info->dma_map,
406 BUS_DMASYNC_POSTWRITE);
407 bus_dmamap_unload(apc->tx_buf_tag, tx_info->dma_map);
408 if (tx_info->mbuf) {
409 m_freem(tx_info->mbuf);
410 tx_info->mbuf = NULL;
411 }
412 }
413
414 static inline int
mana_load_rx_mbuf(struct mana_port_context * apc,struct mana_rxq * rxq,struct mana_recv_buf_oob * rx_oob,bool alloc_mbuf)415 mana_load_rx_mbuf(struct mana_port_context *apc, struct mana_rxq *rxq,
416 struct mana_recv_buf_oob *rx_oob, bool alloc_mbuf)
417 {
418 bus_dma_segment_t segs[1];
419 struct mbuf *mbuf;
420 int nsegs, err;
421 uint32_t mlen;
422
423 if (alloc_mbuf) {
424 mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rxq->datasize);
425 if (unlikely(mbuf == NULL))
426 return ENOMEM;
427
428 mbuf->m_pkthdr.len = mbuf->m_len = rxq->datasize;
429 mlen = rxq->datasize;
430 } else {
431 if (rx_oob->mbuf) {
432 mbuf = rx_oob->mbuf;
433 mlen = rx_oob->mbuf->m_pkthdr.len;
434 } else {
435 return ENOMEM;
436 }
437 }
438
439 err = bus_dmamap_load_mbuf_sg(apc->rx_buf_tag, rx_oob->dma_map,
440 mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
441
442 if (unlikely((err != 0) || (nsegs != 1))) {
443 mana_warn(NULL, "Failed to map mbuf, error: %d, "
444 "nsegs: %d\n", err, nsegs);
445 counter_u64_add(rxq->stats.dma_mapping_err, 1);
446 goto error;
447 }
448
449 bus_dmamap_sync(apc->rx_buf_tag, rx_oob->dma_map,
450 BUS_DMASYNC_PREREAD);
451
452 rx_oob->mbuf = mbuf;
453 rx_oob->num_sge = 1;
454 rx_oob->sgl[0].address = segs[0].ds_addr;
455 rx_oob->sgl[0].size = mlen;
456 rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey;
457
458 return 0;
459
460 error:
461 m_freem(mbuf);
462 return EFAULT;
463 }
464
465 static inline void
mana_unload_rx_mbuf(struct mana_port_context * apc,struct mana_rxq * rxq,struct mana_recv_buf_oob * rx_oob,bool free_mbuf)466 mana_unload_rx_mbuf(struct mana_port_context *apc, struct mana_rxq *rxq,
467 struct mana_recv_buf_oob *rx_oob, bool free_mbuf)
468 {
469 bus_dmamap_sync(apc->rx_buf_tag, rx_oob->dma_map,
470 BUS_DMASYNC_POSTREAD);
471 bus_dmamap_unload(apc->rx_buf_tag, rx_oob->dma_map);
472
473 if (free_mbuf && rx_oob->mbuf) {
474 m_freem(rx_oob->mbuf);
475 rx_oob->mbuf = NULL;
476 }
477 }
478
479
480 /* Use couple mbuf PH_loc spaces for l3 and l4 protocal type */
481 #define MANA_L3_PROTO(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.sixteen[0])
482 #define MANA_L4_PROTO(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.sixteen[1])
483
484 #define MANA_TXQ_FULL (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)
485
486 static void
mana_xmit(struct mana_txq * txq)487 mana_xmit(struct mana_txq *txq)
488 {
489 enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
490 struct mana_send_buf_info *tx_info;
491 if_t ndev = txq->ndev;
492 struct mbuf *mbuf;
493 struct mana_port_context *apc = if_getsoftc(ndev);
494 unsigned int tx_queue_size = apc->tx_queue_size;
495 struct mana_port_stats *port_stats = &apc->port_stats;
496 struct gdma_dev *gd = apc->ac->gdma_dev;
497 uint64_t packets, bytes;
498 uint16_t next_to_use;
499 struct mana_tx_package pkg = {};
500 struct mana_stats *tx_stats;
501 struct gdma_queue *gdma_sq;
502 struct mana_cq *cq;
503 int err, len;
504 bool is_tso;
505
506 gdma_sq = txq->gdma_sq;
507 cq = &apc->tx_qp[txq->idx].tx_cq;
508 tx_stats = &txq->stats;
509
510 packets = 0;
511 bytes = 0;
512 next_to_use = txq->next_to_use;
513
514 while ((mbuf = drbr_peek(ndev, txq->txq_br)) != NULL) {
515 if (!apc->port_is_up ||
516 (if_getdrvflags(ndev) & MANA_TXQ_FULL) != IFF_DRV_RUNNING) {
517 drbr_putback(ndev, txq->txq_br, mbuf);
518 break;
519 }
520
521 if (!mana_can_tx(gdma_sq)) {
522 /* SQ is full. Set the IFF_DRV_OACTIVE flag */
523 if_setdrvflagbits(apc->ndev, IFF_DRV_OACTIVE, 0);
524 counter_u64_add(tx_stats->stop, 1);
525 uint64_t stops = counter_u64_fetch(tx_stats->stop);
526 uint64_t wakeups = counter_u64_fetch(tx_stats->wakeup);
527 #define MANA_TXQ_STOP_THRESHOLD 50
528 if (stops > MANA_TXQ_STOP_THRESHOLD && wakeups > 0 &&
529 stops > wakeups && txq->alt_txq_idx == txq->idx) {
530 txq->alt_txq_idx =
531 (txq->idx + (stops / wakeups))
532 % apc->num_queues;
533 counter_u64_add(tx_stats->alt_chg, 1);
534 }
535
536 drbr_putback(ndev, txq->txq_br, mbuf);
537
538 taskqueue_enqueue(cq->cleanup_tq, &cq->cleanup_task);
539 break;
540 }
541
542 tx_info = &txq->tx_buf_info[next_to_use];
543
544 memset(&pkg, 0, sizeof(struct mana_tx_package));
545 pkg.wqe_req.sgl = pkg.sgl_array;
546
547 err = mana_tx_map_mbuf(apc, tx_info, &mbuf, &pkg, tx_stats);
548 if (unlikely(err)) {
549 mana_dbg(NULL,
550 "Failed to map tx mbuf, err %d\n", err);
551
552 counter_u64_add(tx_stats->dma_mapping_err, 1);
553
554 /* The mbuf is still there. Free it */
555 m_freem(mbuf);
556 /* Advance the drbr queue */
557 drbr_advance(ndev, txq->txq_br);
558 continue;
559 }
560
561 pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
562 pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
563
564 if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) {
565 pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset;
566 pkt_fmt = MANA_LONG_PKT_FMT;
567 } else {
568 pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset;
569 }
570
571 pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt;
572
573 if (pkt_fmt == MANA_SHORT_PKT_FMT)
574 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob);
575 else
576 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob);
577
578 pkg.wqe_req.inline_oob_data = &pkg.tx_oob;
579 pkg.wqe_req.flags = 0;
580 pkg.wqe_req.client_data_unit = 0;
581
582 is_tso = false;
583 if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) {
584 is_tso = true;
585
586 if (MANA_L3_PROTO(mbuf) == ETHERTYPE_IP)
587 pkg.tx_oob.s_oob.is_outer_ipv4 = 1;
588 else
589 pkg.tx_oob.s_oob.is_outer_ipv6 = 1;
590
591 pkg.tx_oob.s_oob.comp_iphdr_csum = 1;
592 pkg.tx_oob.s_oob.comp_tcp_csum = 1;
593 pkg.tx_oob.s_oob.trans_off = mbuf->m_pkthdr.l3hlen;
594
595 pkg.wqe_req.client_data_unit = mbuf->m_pkthdr.tso_segsz;
596 pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0;
597 } else if (mbuf->m_pkthdr.csum_flags &
598 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) {
599 if (MANA_L3_PROTO(mbuf) == ETHERTYPE_IP) {
600 pkg.tx_oob.s_oob.is_outer_ipv4 = 1;
601 pkg.tx_oob.s_oob.comp_iphdr_csum = 1;
602 } else {
603 pkg.tx_oob.s_oob.is_outer_ipv6 = 1;
604 }
605
606 if (MANA_L4_PROTO(mbuf) == IPPROTO_TCP) {
607 pkg.tx_oob.s_oob.comp_tcp_csum = 1;
608 pkg.tx_oob.s_oob.trans_off =
609 mbuf->m_pkthdr.l3hlen;
610 } else {
611 pkg.tx_oob.s_oob.comp_udp_csum = 1;
612 }
613 } else if (mbuf->m_pkthdr.csum_flags & CSUM_IP) {
614 pkg.tx_oob.s_oob.is_outer_ipv4 = 1;
615 pkg.tx_oob.s_oob.comp_iphdr_csum = 1;
616 } else {
617 if (MANA_L3_PROTO(mbuf) == ETHERTYPE_IP)
618 pkg.tx_oob.s_oob.is_outer_ipv4 = 1;
619 else if (MANA_L3_PROTO(mbuf) == ETHERTYPE_IPV6)
620 pkg.tx_oob.s_oob.is_outer_ipv6 = 1;
621 }
622
623 len = mbuf->m_pkthdr.len;
624
625 err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req,
626 (struct gdma_posted_wqe_info *)&tx_info->wqe_inf);
627 if (unlikely(err)) {
628 /* Should not happen */
629 if_printf(ndev, "Failed to post TX OOB: %d\n", err);
630
631 mana_tx_unmap_mbuf(apc, tx_info);
632
633 drbr_advance(ndev, txq->txq_br);
634 continue;
635 }
636
637 next_to_use = MANA_IDX_NEXT(next_to_use, tx_queue_size);
638
639 (void)atomic_inc_return(&txq->pending_sends);
640
641 drbr_advance(ndev, txq->txq_br);
642
643 mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq);
644
645 packets++;
646 bytes += len;
647
648 if (is_tso) {
649 txq->tso_pkts++;
650 txq->tso_bytes += len;
651 }
652 }
653
654 counter_enter();
655 counter_u64_add_protected(tx_stats->packets, packets);
656 counter_u64_add_protected(port_stats->tx_packets, packets);
657 counter_u64_add_protected(tx_stats->bytes, bytes);
658 counter_u64_add_protected(port_stats->tx_bytes, bytes);
659 counter_exit();
660
661 txq->next_to_use = next_to_use;
662 }
663
664 static void
mana_xmit_taskfunc(void * arg,int pending)665 mana_xmit_taskfunc(void *arg, int pending)
666 {
667 struct mana_txq *txq = (struct mana_txq *)arg;
668 if_t ndev = txq->ndev;
669 struct mana_port_context *apc = if_getsoftc(ndev);
670
671 while (!drbr_empty(ndev, txq->txq_br) && apc->port_is_up &&
672 (if_getdrvflags(ndev) & MANA_TXQ_FULL) == IFF_DRV_RUNNING) {
673 mtx_lock(&txq->txq_mtx);
674 mana_xmit(txq);
675 mtx_unlock(&txq->txq_mtx);
676 }
677 }
678
679 #define PULLUP_HDR(m, len) \
680 do { \
681 if (unlikely((m)->m_len < (len))) { \
682 (m) = m_pullup((m), (len)); \
683 if ((m) == NULL) \
684 return (NULL); \
685 } \
686 } while (0)
687
688 /*
689 * If this function failed, the mbuf would be freed.
690 */
691 static inline struct mbuf *
mana_tso_fixup(struct mbuf * mbuf)692 mana_tso_fixup(struct mbuf *mbuf)
693 {
694 struct ether_vlan_header *eh = mtod(mbuf, struct ether_vlan_header *);
695 struct tcphdr *th;
696 uint16_t etype;
697 int ehlen;
698
699 if (eh->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) {
700 etype = ntohs(eh->evl_proto);
701 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
702 } else {
703 etype = ntohs(eh->evl_encap_proto);
704 ehlen = ETHER_HDR_LEN;
705 }
706
707 if (etype == ETHERTYPE_IP) {
708 struct ip *ip;
709 int iphlen;
710
711 PULLUP_HDR(mbuf, ehlen + sizeof(*ip));
712 ip = mtodo(mbuf, ehlen);
713 iphlen = ip->ip_hl << 2;
714 mbuf->m_pkthdr.l3hlen = ehlen + iphlen;
715
716 PULLUP_HDR(mbuf, ehlen + iphlen + sizeof(*th));
717 th = mtodo(mbuf, ehlen + iphlen);
718
719 ip->ip_len = 0;
720 ip->ip_sum = 0;
721 th->th_sum = in_pseudo(ip->ip_src.s_addr,
722 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
723 } else if (etype == ETHERTYPE_IPV6) {
724 struct ip6_hdr *ip6;
725
726 PULLUP_HDR(mbuf, ehlen + sizeof(*ip6) + sizeof(*th));
727 ip6 = mtodo(mbuf, ehlen);
728 if (ip6->ip6_nxt != IPPROTO_TCP) {
729 /* Realy something wrong, just return */
730 mana_dbg(NULL, "TSO mbuf not TCP, freed.\n");
731 m_freem(mbuf);
732 return NULL;
733 }
734 mbuf->m_pkthdr.l3hlen = ehlen + sizeof(*ip6);
735
736 th = mtodo(mbuf, ehlen + sizeof(*ip6));
737
738 ip6->ip6_plen = 0;
739 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
740 } else {
741 /* CSUM_TSO is set but not IP protocol. */
742 mana_warn(NULL, "TSO mbuf not right, freed.\n");
743 m_freem(mbuf);
744 return NULL;
745 }
746
747 MANA_L3_PROTO(mbuf) = etype;
748
749 return (mbuf);
750 }
751
752 /*
753 * If this function failed, the mbuf would be freed.
754 */
755 static inline struct mbuf *
mana_mbuf_csum_check(struct mbuf * mbuf)756 mana_mbuf_csum_check(struct mbuf *mbuf)
757 {
758 struct ether_vlan_header *eh = mtod(mbuf, struct ether_vlan_header *);
759 struct mbuf *mbuf_next;
760 uint16_t etype;
761 int offset;
762 int ehlen;
763
764 if (eh->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) {
765 etype = ntohs(eh->evl_proto);
766 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
767 } else {
768 etype = ntohs(eh->evl_encap_proto);
769 ehlen = ETHER_HDR_LEN;
770 }
771
772 mbuf_next = m_getptr(mbuf, ehlen, &offset);
773
774 MANA_L4_PROTO(mbuf) = 0;
775 if (etype == ETHERTYPE_IP) {
776 const struct ip *ip;
777 int iphlen;
778
779 ip = (struct ip *)(mtodo(mbuf_next, offset));
780 iphlen = ip->ip_hl << 2;
781 mbuf->m_pkthdr.l3hlen = ehlen + iphlen;
782
783 MANA_L4_PROTO(mbuf) = ip->ip_p;
784 } else if (etype == ETHERTYPE_IPV6) {
785 const struct ip6_hdr *ip6;
786
787 ip6 = (struct ip6_hdr *)(mtodo(mbuf_next, offset));
788 mbuf->m_pkthdr.l3hlen = ehlen + sizeof(*ip6);
789
790 MANA_L4_PROTO(mbuf) = ip6->ip6_nxt;
791 } else {
792 MANA_L4_PROTO(mbuf) = 0;
793 }
794
795 MANA_L3_PROTO(mbuf) = etype;
796
797 return (mbuf);
798 }
799
800 static int
mana_start_xmit(if_t ifp,struct mbuf * m)801 mana_start_xmit(if_t ifp, struct mbuf *m)
802 {
803 struct mana_port_context *apc = if_getsoftc(ifp);
804 struct mana_txq *txq;
805 int is_drbr_empty;
806 uint16_t txq_id;
807 int err;
808
809 if (unlikely((!apc->port_is_up) ||
810 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
811 return ENODEV;
812
813 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
814 m = mana_tso_fixup(m);
815 if (unlikely(m == NULL)) {
816 counter_enter();
817 counter_u64_add_protected(apc->port_stats.tx_drops, 1);
818 counter_exit();
819 return EIO;
820 }
821 } else {
822 m = mana_mbuf_csum_check(m);
823 if (unlikely(m == NULL)) {
824 counter_enter();
825 counter_u64_add_protected(apc->port_stats.tx_drops, 1);
826 counter_exit();
827 return EIO;
828 }
829 }
830
831 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
832 uint32_t hash = m->m_pkthdr.flowid;
833 txq_id = apc->indir_table[(hash) & MANA_INDIRECT_TABLE_MASK] %
834 apc->num_queues;
835 } else {
836 txq_id = m->m_pkthdr.flowid % apc->num_queues;
837 }
838
839 if (apc->enable_tx_altq)
840 txq_id = apc->tx_qp[txq_id].txq.alt_txq_idx;
841
842 txq = &apc->tx_qp[txq_id].txq;
843
844 is_drbr_empty = drbr_empty(ifp, txq->txq_br);
845 err = drbr_enqueue(ifp, txq->txq_br, m);
846 if (unlikely(err)) {
847 mana_warn(NULL, "txq %u failed to enqueue: %d\n",
848 txq_id, err);
849 taskqueue_enqueue(txq->enqueue_tq, &txq->enqueue_task);
850 return err;
851 }
852
853 if (is_drbr_empty && mtx_trylock(&txq->txq_mtx)) {
854 mana_xmit(txq);
855 mtx_unlock(&txq->txq_mtx);
856 } else {
857 taskqueue_enqueue(txq->enqueue_tq, &txq->enqueue_task);
858 }
859
860 return 0;
861 }
862
863 static void
mana_cleanup_port_context(struct mana_port_context * apc)864 mana_cleanup_port_context(struct mana_port_context *apc)
865 {
866 bus_dma_tag_destroy(apc->tx_buf_tag);
867 bus_dma_tag_destroy(apc->rx_buf_tag);
868 apc->rx_buf_tag = NULL;
869
870 free(apc->rxqs, M_DEVBUF);
871 apc->rxqs = NULL;
872
873 mana_free_counters((counter_u64_t *)&apc->port_stats,
874 sizeof(struct mana_port_stats));
875 }
876
877 static int
mana_init_port_context(struct mana_port_context * apc)878 mana_init_port_context(struct mana_port_context *apc)
879 {
880 device_t dev = apc->ac->gdma_dev->gdma_context->dev;
881 uint32_t tso_maxsize;
882 int err;
883
884 tso_maxsize = MANA_TSO_MAX_SZ;
885
886 /* Create DMA tag for tx bufs */
887 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
888 1, 0, /* alignment, boundary */
889 BUS_SPACE_MAXADDR, /* lowaddr */
890 BUS_SPACE_MAXADDR, /* highaddr */
891 NULL, NULL, /* filter, filterarg */
892 tso_maxsize, /* maxsize */
893 MAX_MBUF_FRAGS, /* nsegments */
894 tso_maxsize, /* maxsegsize */
895 0, /* flags */
896 NULL, NULL, /* lockfunc, lockfuncarg*/
897 &apc->tx_buf_tag);
898 if (unlikely(err)) {
899 device_printf(dev, "Feiled to create TX DMA tag\n");
900 return err;
901 }
902
903 /* Create DMA tag for rx bufs */
904 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
905 64, 0, /* alignment, boundary */
906 BUS_SPACE_MAXADDR, /* lowaddr */
907 BUS_SPACE_MAXADDR, /* highaddr */
908 NULL, NULL, /* filter, filterarg */
909 MJUM16BYTES, /* maxsize */
910 1, /* nsegments */
911 MJUM16BYTES, /* maxsegsize */
912 0, /* flags */
913 NULL, NULL, /* lockfunc, lockfuncarg*/
914 &apc->rx_buf_tag);
915 if (unlikely(err)) {
916 device_printf(dev, "Feiled to create RX DMA tag\n");
917 return err;
918 }
919
920 apc->rxqs = mallocarray(apc->num_queues, sizeof(struct mana_rxq *),
921 M_DEVBUF, M_WAITOK | M_ZERO);
922
923 return 0;
924 }
925
926 static int
mana_send_request(struct mana_context * ac,void * in_buf,uint32_t in_len,void * out_buf,uint32_t out_len)927 mana_send_request(struct mana_context *ac, void *in_buf,
928 uint32_t in_len, void *out_buf, uint32_t out_len)
929 {
930 struct gdma_context *gc = ac->gdma_dev->gdma_context;
931 struct gdma_resp_hdr *resp = out_buf;
932 struct gdma_req_hdr *req = in_buf;
933 device_t dev = gc->dev;
934 static atomic_t activity_id;
935 int err;
936
937 req->dev_id = gc->mana.dev_id;
938 req->activity_id = atomic_inc_return(&activity_id);
939
940 mana_dbg(NULL, "activity_id = %u\n", activity_id);
941
942 err = mana_gd_send_request(gc, in_len, in_buf, out_len,
943 out_buf);
944 if (err || resp->status) {
945 device_printf(dev, "Failed to send mana message: %d, 0x%x\n",
946 err, resp->status);
947 return err ? err : EPROTO;
948 }
949
950 if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 ||
951 req->activity_id != resp->activity_id) {
952 device_printf(dev,
953 "Unexpected mana message response: %x,%x,%x,%x\n",
954 req->dev_id.as_uint32, resp->dev_id.as_uint32,
955 req->activity_id, resp->activity_id);
956 return EPROTO;
957 }
958
959 return 0;
960 }
961
962 static int
mana_verify_resp_hdr(const struct gdma_resp_hdr * resp_hdr,const enum mana_command_code expected_code,const uint32_t min_size)963 mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr,
964 const enum mana_command_code expected_code,
965 const uint32_t min_size)
966 {
967 if (resp_hdr->response.msg_type != expected_code)
968 return EPROTO;
969
970 if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1)
971 return EPROTO;
972
973 if (resp_hdr->response.msg_size < min_size)
974 return EPROTO;
975
976 return 0;
977 }
978
979 static int
mana_query_device_cfg(struct mana_context * ac,uint32_t proto_major_ver,uint32_t proto_minor_ver,uint32_t proto_micro_ver,uint16_t * max_num_vports)980 mana_query_device_cfg(struct mana_context *ac, uint32_t proto_major_ver,
981 uint32_t proto_minor_ver, uint32_t proto_micro_ver,
982 uint16_t *max_num_vports)
983 {
984 struct gdma_context *gc = ac->gdma_dev->gdma_context;
985 struct mana_query_device_cfg_resp resp = {};
986 struct mana_query_device_cfg_req req = {};
987 device_t dev = gc->dev;
988 int err = 0;
989
990 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG,
991 sizeof(req), sizeof(resp));
992
993 req.hdr.resp.msg_version = GDMA_MESSAGE_V2;
994
995 req.proto_major_ver = proto_major_ver;
996 req.proto_minor_ver = proto_minor_ver;
997 req.proto_micro_ver = proto_micro_ver;
998
999 err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp));
1000 if (err) {
1001 device_printf(dev, "Failed to query config: %d", err);
1002 return err;
1003 }
1004
1005 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG,
1006 sizeof(resp));
1007 if (err || resp.hdr.status) {
1008 device_printf(dev, "Invalid query result: %d, 0x%x\n", err,
1009 resp.hdr.status);
1010 if (!err)
1011 err = EPROTO;
1012 return err;
1013 }
1014
1015 *max_num_vports = resp.max_num_vports;
1016
1017 if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2)
1018 gc->adapter_mtu = resp.adapter_mtu;
1019 else
1020 gc->adapter_mtu = ETHERMTU + ETHER_HDR_LEN;
1021
1022 mana_dbg(NULL, "mana max_num_vports from device = %d, "
1023 "adapter_mtu = %u\n",
1024 *max_num_vports, gc->adapter_mtu);
1025
1026 return 0;
1027 }
1028
1029 static int
mana_query_vport_cfg(struct mana_port_context * apc,uint32_t vport_index,uint32_t * max_sq,uint32_t * max_rq,uint32_t * num_indir_entry)1030 mana_query_vport_cfg(struct mana_port_context *apc, uint32_t vport_index,
1031 uint32_t *max_sq, uint32_t *max_rq, uint32_t *num_indir_entry)
1032 {
1033 struct mana_query_vport_cfg_resp resp = {};
1034 struct mana_query_vport_cfg_req req = {};
1035 int err;
1036
1037 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG,
1038 sizeof(req), sizeof(resp));
1039
1040 req.vport_index = vport_index;
1041
1042 err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1043 sizeof(resp));
1044 if (err)
1045 return err;
1046
1047 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG,
1048 sizeof(resp));
1049 if (err)
1050 return err;
1051
1052 if (resp.hdr.status)
1053 return EPROTO;
1054
1055 *max_sq = resp.max_num_sq;
1056 *max_rq = resp.max_num_rq;
1057 *num_indir_entry = resp.num_indirection_ent;
1058
1059 apc->port_handle = resp.vport;
1060 memcpy(apc->mac_addr, resp.mac_addr, ETHER_ADDR_LEN);
1061
1062 return 0;
1063 }
1064
1065 void
mana_uncfg_vport(struct mana_port_context * apc)1066 mana_uncfg_vport(struct mana_port_context *apc)
1067 {
1068 apc->vport_use_count--;
1069 if (apc->vport_use_count < 0) {
1070 mana_err(NULL,
1071 "WARNING: vport_use_count less than 0: %u\n",
1072 apc->vport_use_count);
1073 }
1074 }
1075
1076 int
mana_cfg_vport(struct mana_port_context * apc,uint32_t protection_dom_id,uint32_t doorbell_pg_id)1077 mana_cfg_vport(struct mana_port_context *apc, uint32_t protection_dom_id,
1078 uint32_t doorbell_pg_id)
1079 {
1080 struct mana_config_vport_resp resp = {};
1081 struct mana_config_vport_req req = {};
1082 int err;
1083
1084 /* This function is used to program the Ethernet port in the hardware
1085 * table. It can be called from the Ethernet driver or the RDMA driver.
1086 *
1087 * For Ethernet usage, the hardware supports only one active user on a
1088 * physical port. The driver checks on the port usage before programming
1089 * the hardware when creating the RAW QP (RDMA driver) or exposing the
1090 * device to kernel NET layer (Ethernet driver).
1091 *
1092 * Because the RDMA driver doesn't know in advance which QP type the
1093 * user will create, it exposes the device with all its ports. The user
1094 * may not be able to create RAW QP on a port if this port is already
1095 * in used by the Ethernet driver from the kernel.
1096 *
1097 * This physical port limitation only applies to the RAW QP. For RC QP,
1098 * the hardware doesn't have this limitation. The user can create RC
1099 * QPs on a physical port up to the hardware limits independent of the
1100 * Ethernet usage on the same port.
1101 */
1102 if (apc->vport_use_count > 0) {
1103 return EBUSY;
1104 }
1105 apc->vport_use_count++;
1106
1107 mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX,
1108 sizeof(req), sizeof(resp));
1109 req.vport = apc->port_handle;
1110 req.pdid = protection_dom_id;
1111 req.doorbell_pageid = doorbell_pg_id;
1112
1113 err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1114 sizeof(resp));
1115 if (err) {
1116 if_printf(apc->ndev, "Failed to configure vPort: %d\n", err);
1117 goto out;
1118 }
1119
1120 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX,
1121 sizeof(resp));
1122 if (err || resp.hdr.status) {
1123 if_printf(apc->ndev, "Failed to configure vPort: %d, 0x%x\n",
1124 err, resp.hdr.status);
1125 if (!err)
1126 err = EPROTO;
1127
1128 goto out;
1129 }
1130
1131 apc->tx_shortform_allowed = resp.short_form_allowed;
1132 apc->tx_vp_offset = resp.tx_vport_offset;
1133
1134 if_printf(apc->ndev, "Configured vPort %ju PD %u DB %u\n",
1135 apc->port_handle, protection_dom_id, doorbell_pg_id);
1136
1137 out:
1138 if (err)
1139 mana_uncfg_vport(apc);
1140
1141 return err;
1142 }
1143
1144 static int
mana_cfg_vport_steering(struct mana_port_context * apc,enum TRI_STATE rx,bool update_default_rxobj,bool update_key,bool update_tab)1145 mana_cfg_vport_steering(struct mana_port_context *apc,
1146 enum TRI_STATE rx,
1147 bool update_default_rxobj, bool update_key,
1148 bool update_tab)
1149 {
1150 uint16_t num_entries = MANA_INDIRECT_TABLE_SIZE;
1151 struct mana_cfg_rx_steer_req *req = NULL;
1152 struct mana_cfg_rx_steer_resp resp = {};
1153 if_t ndev = apc->ndev;
1154 mana_handle_t *req_indir_tab;
1155 uint32_t req_buf_size;
1156 int err;
1157
1158 req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries;
1159 req = malloc(req_buf_size, M_DEVBUF, M_WAITOK | M_ZERO);
1160
1161 mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
1162 sizeof(resp));
1163
1164 req->vport = apc->port_handle;
1165 req->num_indir_entries = num_entries;
1166 req->indir_tab_offset = sizeof(*req);
1167 req->rx_enable = rx;
1168 req->rss_enable = apc->rss_state;
1169 req->update_default_rxobj = update_default_rxobj;
1170 req->update_hashkey = update_key;
1171 req->update_indir_tab = update_tab;
1172 req->default_rxobj = apc->default_rxobj;
1173
1174 if (update_key)
1175 memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE);
1176
1177 if (update_tab) {
1178 req_indir_tab = (mana_handle_t *)(req + 1);
1179 memcpy(req_indir_tab, apc->rxobj_table,
1180 req->num_indir_entries * sizeof(mana_handle_t));
1181 }
1182
1183 err = mana_send_request(apc->ac, req, req_buf_size, &resp,
1184 sizeof(resp));
1185 if (err) {
1186 if_printf(ndev, "Failed to configure vPort RX: %d\n", err);
1187 goto out;
1188 }
1189
1190 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX,
1191 sizeof(resp));
1192 if (err) {
1193 if_printf(ndev, "vPort RX configuration failed: %d\n", err);
1194 goto out;
1195 }
1196
1197 if (resp.hdr.status) {
1198 if_printf(ndev, "vPort RX configuration failed: 0x%x\n",
1199 resp.hdr.status);
1200 err = EPROTO;
1201 }
1202
1203 if_printf(ndev, "Configured steering vPort %ju entries %u\n",
1204 apc->port_handle, num_entries);
1205
1206 out:
1207 free(req, M_DEVBUF);
1208 return err;
1209 }
1210
1211 int
mana_create_wq_obj(struct mana_port_context * apc,mana_handle_t vport,uint32_t wq_type,struct mana_obj_spec * wq_spec,struct mana_obj_spec * cq_spec,mana_handle_t * wq_obj)1212 mana_create_wq_obj(struct mana_port_context *apc,
1213 mana_handle_t vport,
1214 uint32_t wq_type, struct mana_obj_spec *wq_spec,
1215 struct mana_obj_spec *cq_spec,
1216 mana_handle_t *wq_obj)
1217 {
1218 struct mana_create_wqobj_resp resp = {};
1219 struct mana_create_wqobj_req req = {};
1220 if_t ndev = apc->ndev;
1221 int err;
1222
1223 mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ,
1224 sizeof(req), sizeof(resp));
1225 req.vport = vport;
1226 req.wq_type = wq_type;
1227 req.wq_gdma_region = wq_spec->gdma_region;
1228 req.cq_gdma_region = cq_spec->gdma_region;
1229 req.wq_size = wq_spec->queue_size;
1230 req.cq_size = cq_spec->queue_size;
1231 req.cq_moderation_ctx_id = cq_spec->modr_ctx_id;
1232 req.cq_parent_qid = cq_spec->attached_eq;
1233
1234 err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1235 sizeof(resp));
1236 if (err) {
1237 if_printf(ndev, "Failed to create WQ object: %d\n", err);
1238 goto out;
1239 }
1240
1241 err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ,
1242 sizeof(resp));
1243 if (err || resp.hdr.status) {
1244 if_printf(ndev, "Failed to create WQ object: %d, 0x%x\n", err,
1245 resp.hdr.status);
1246 if (!err)
1247 err = EPROTO;
1248 goto out;
1249 }
1250
1251 if (resp.wq_obj == INVALID_MANA_HANDLE) {
1252 if_printf(ndev, "Got an invalid WQ object handle\n");
1253 err = EPROTO;
1254 goto out;
1255 }
1256
1257 *wq_obj = resp.wq_obj;
1258 wq_spec->queue_index = resp.wq_id;
1259 cq_spec->queue_index = resp.cq_id;
1260
1261 return 0;
1262 out:
1263 return err;
1264 }
1265
1266 void
mana_destroy_wq_obj(struct mana_port_context * apc,uint32_t wq_type,mana_handle_t wq_obj)1267 mana_destroy_wq_obj(struct mana_port_context *apc, uint32_t wq_type,
1268 mana_handle_t wq_obj)
1269 {
1270 struct mana_destroy_wqobj_resp resp = {};
1271 struct mana_destroy_wqobj_req req = {};
1272 if_t ndev = apc->ndev;
1273 int err;
1274
1275 mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ,
1276 sizeof(req), sizeof(resp));
1277 req.wq_type = wq_type;
1278 req.wq_obj_handle = wq_obj;
1279
1280 err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1281 sizeof(resp));
1282 if (err) {
1283 if_printf(ndev, "Failed to destroy WQ object: %d\n", err);
1284 return;
1285 }
1286
1287 err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ,
1288 sizeof(resp));
1289 if (err || resp.hdr.status)
1290 if_printf(ndev, "Failed to destroy WQ object: %d, 0x%x\n",
1291 err, resp.hdr.status);
1292 }
1293
1294 static void
mana_destroy_eq(struct mana_context * ac)1295 mana_destroy_eq(struct mana_context *ac)
1296 {
1297 struct gdma_context *gc = ac->gdma_dev->gdma_context;
1298 struct gdma_queue *eq;
1299 int i;
1300
1301 if (!ac->eqs)
1302 return;
1303
1304 for (i = 0; i < gc->max_num_queues; i++) {
1305 eq = ac->eqs[i].eq;
1306 if (!eq)
1307 continue;
1308
1309 mana_gd_destroy_queue(gc, eq);
1310 }
1311
1312 free(ac->eqs, M_DEVBUF);
1313 ac->eqs = NULL;
1314 }
1315
1316 static int
mana_create_eq(struct mana_context * ac)1317 mana_create_eq(struct mana_context *ac)
1318 {
1319 struct gdma_dev *gd = ac->gdma_dev;
1320 struct gdma_context *gc = gd->gdma_context;
1321 struct gdma_queue_spec spec = {};
1322 int err;
1323 int i;
1324
1325 ac->eqs = mallocarray(gc->max_num_queues, sizeof(struct mana_eq),
1326 M_DEVBUF, M_WAITOK | M_ZERO);
1327
1328 spec.type = GDMA_EQ;
1329 spec.monitor_avl_buf = false;
1330 spec.queue_size = EQ_SIZE;
1331 spec.eq.callback = NULL;
1332 spec.eq.context = ac->eqs;
1333 spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
1334
1335 for (i = 0; i < gc->max_num_queues; i++) {
1336 err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
1337 if (err)
1338 goto out;
1339 }
1340
1341 return 0;
1342 out:
1343 mana_destroy_eq(ac);
1344 return err;
1345 }
1346
1347 static int
mana_fence_rq(struct mana_port_context * apc,struct mana_rxq * rxq)1348 mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq)
1349 {
1350 struct mana_fence_rq_resp resp = {};
1351 struct mana_fence_rq_req req = {};
1352 int err;
1353
1354 init_completion(&rxq->fence_event);
1355
1356 mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ,
1357 sizeof(req), sizeof(resp));
1358 req.wq_obj_handle = rxq->rxobj;
1359
1360 err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1361 sizeof(resp));
1362 if (err) {
1363 if_printf(apc->ndev, "Failed to fence RQ %u: %d\n",
1364 rxq->rxq_idx, err);
1365 return err;
1366 }
1367
1368 err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp));
1369 if (err || resp.hdr.status) {
1370 if_printf(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n",
1371 rxq->rxq_idx, err, resp.hdr.status);
1372 if (!err)
1373 err = EPROTO;
1374
1375 return err;
1376 }
1377
1378 if (wait_for_completion_timeout(&rxq->fence_event, 10 * hz)) {
1379 if_printf(apc->ndev, "Failed to fence RQ %u: timed out\n",
1380 rxq->rxq_idx);
1381 return ETIMEDOUT;
1382 }
1383
1384 return 0;
1385 }
1386
1387 static void
mana_fence_rqs(struct mana_port_context * apc)1388 mana_fence_rqs(struct mana_port_context *apc)
1389 {
1390 unsigned int rxq_idx;
1391 struct mana_rxq *rxq;
1392 int err;
1393
1394 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
1395 rxq = apc->rxqs[rxq_idx];
1396 err = mana_fence_rq(apc, rxq);
1397
1398 /* In case of any error, use sleep instead. */
1399 if (err)
1400 gdma_msleep(100);
1401 }
1402 }
1403
1404 static int
mana_move_wq_tail(struct gdma_queue * wq,uint32_t num_units)1405 mana_move_wq_tail(struct gdma_queue *wq, uint32_t num_units)
1406 {
1407 uint32_t used_space_old;
1408 uint32_t used_space_new;
1409
1410 used_space_old = wq->head - wq->tail;
1411 used_space_new = wq->head - (wq->tail + num_units);
1412
1413 if (used_space_new > used_space_old) {
1414 mana_err(NULL,
1415 "WARNING: new used space %u greater than old one %u\n",
1416 used_space_new, used_space_old);
1417 return ERANGE;
1418 }
1419
1420 wq->tail += num_units;
1421 return 0;
1422 }
1423
1424 static void
mana_poll_tx_cq(struct mana_cq * cq)1425 mana_poll_tx_cq(struct mana_cq *cq)
1426 {
1427 struct gdma_comp *completions = cq->gdma_comp_buf;
1428 struct gdma_posted_wqe_info *wqe_info;
1429 struct mana_send_buf_info *tx_info;
1430 unsigned int pkt_transmitted = 0;
1431 unsigned int wqe_unit_cnt = 0;
1432 struct mana_txq *txq = cq->txq;
1433 struct mana_port_context *apc;
1434 unsigned int tx_queue_size;
1435 uint16_t next_to_complete;
1436 if_t ndev;
1437 int comp_read;
1438 int txq_idx = txq->idx;
1439 int i;
1440 int sa_drop = 0;
1441
1442 struct gdma_queue *gdma_wq;
1443 unsigned int avail_space;
1444 bool txq_full = false;
1445
1446 ndev = txq->ndev;
1447 apc = if_getsoftc(ndev);
1448 tx_queue_size = apc->tx_queue_size;
1449
1450 comp_read = mana_gd_poll_cq(cq->gdma_cq, completions,
1451 CQE_POLLING_BUFFER);
1452
1453 if (comp_read < 1)
1454 return;
1455
1456 next_to_complete = txq->next_to_complete;
1457
1458 for (i = 0; i < comp_read; i++) {
1459 struct mana_tx_comp_oob *cqe_oob;
1460
1461 if (!completions[i].is_sq) {
1462 mana_err(NULL, "WARNING: Not for SQ\n");
1463 return;
1464 }
1465
1466 cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data;
1467 if (cqe_oob->cqe_hdr.client_type !=
1468 MANA_CQE_COMPLETION) {
1469 mana_err(NULL,
1470 "WARNING: Invalid CQE client type %u\n",
1471 cqe_oob->cqe_hdr.client_type);
1472 return;
1473 }
1474
1475 switch (cqe_oob->cqe_hdr.cqe_type) {
1476 case CQE_TX_OKAY:
1477 break;
1478
1479 case CQE_TX_SA_DROP:
1480 case CQE_TX_MTU_DROP:
1481 case CQE_TX_INVALID_OOB:
1482 case CQE_TX_INVALID_ETH_TYPE:
1483 case CQE_TX_HDR_PROCESSING_ERROR:
1484 case CQE_TX_VF_DISABLED:
1485 case CQE_TX_VPORT_IDX_OUT_OF_RANGE:
1486 case CQE_TX_VPORT_DISABLED:
1487 case CQE_TX_VLAN_TAGGING_VIOLATION:
1488 sa_drop ++;
1489 mana_dbg(NULL,
1490 "TX: txq %d CQE error %d, ntc = %d, "
1491 "pending sends = %d: err ignored.\n",
1492 txq_idx, cqe_oob->cqe_hdr.cqe_type,
1493 next_to_complete, txq->pending_sends);
1494 counter_u64_add(txq->stats.cqe_err, 1);
1495 break;
1496
1497 default:
1498 /* If the CQE type is unknown, log a debug msg,
1499 * and still free the mbuf, etc.
1500 */
1501 mana_dbg(NULL,
1502 "ERROR: TX: Unknown CQE type %d\n",
1503 cqe_oob->cqe_hdr.cqe_type);
1504 counter_u64_add(txq->stats.cqe_unknown_type, 1);
1505 break;
1506 }
1507 if (txq->gdma_txq_id != completions[i].wq_num) {
1508 mana_dbg(NULL,
1509 "txq gdma id not match completion wq num: "
1510 "%d != %d\n",
1511 txq->gdma_txq_id, completions[i].wq_num);
1512 break;
1513 }
1514
1515 tx_info = &txq->tx_buf_info[next_to_complete];
1516 if (!tx_info->mbuf) {
1517 mana_err(NULL,
1518 "WARNING: txq %d Empty mbuf on tx_info: %u, "
1519 "ntu = %u, pending_sends = %d, "
1520 "transmitted = %d, sa_drop = %d, i = %d, comp_read = %d\n",
1521 txq_idx, next_to_complete, txq->next_to_use,
1522 txq->pending_sends, pkt_transmitted, sa_drop,
1523 i, comp_read);
1524 break;
1525 }
1526
1527 wqe_info = &tx_info->wqe_inf;
1528 wqe_unit_cnt += wqe_info->wqe_size_in_bu;
1529
1530 mana_tx_unmap_mbuf(apc, tx_info);
1531 mb();
1532
1533 next_to_complete =
1534 MANA_IDX_NEXT(next_to_complete, tx_queue_size);
1535
1536 pkt_transmitted++;
1537 }
1538
1539 txq->next_to_complete = next_to_complete;
1540
1541 if (wqe_unit_cnt == 0) {
1542 mana_err(NULL,
1543 "WARNING: TX ring not proceeding!\n");
1544 return;
1545 }
1546
1547 mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt);
1548
1549 /* Ensure tail updated before checking q stop */
1550 wmb();
1551
1552 gdma_wq = txq->gdma_sq;
1553 avail_space = mana_gd_wq_avail_space(gdma_wq);
1554
1555
1556 if ((if_getdrvflags(ndev) & MANA_TXQ_FULL) == MANA_TXQ_FULL) {
1557 txq_full = true;
1558 }
1559
1560 /* Ensure checking txq_full before apc->port_is_up. */
1561 rmb();
1562
1563 if (txq_full && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) {
1564 /* Grab the txq lock and re-test */
1565 mtx_lock(&txq->txq_mtx);
1566 avail_space = mana_gd_wq_avail_space(gdma_wq);
1567
1568 if ((if_getdrvflags(ndev) & MANA_TXQ_FULL) == MANA_TXQ_FULL &&
1569 apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) {
1570 /* Clear the Q full flag */
1571 if_setdrvflagbits(apc->ndev, IFF_DRV_RUNNING,
1572 IFF_DRV_OACTIVE);
1573 counter_u64_add(txq->stats.wakeup, 1);
1574 if (txq->alt_txq_idx != txq->idx) {
1575 uint64_t stops = counter_u64_fetch(txq->stats.stop);
1576 uint64_t wakeups = counter_u64_fetch(txq->stats.wakeup);
1577 /* Reset alt_txq_idx back if it is not overloaded */
1578 if (stops < wakeups) {
1579 txq->alt_txq_idx = txq->idx;
1580 counter_u64_add(txq->stats.alt_reset, 1);
1581 }
1582 }
1583 rmb();
1584 /* Schedule a tx enqueue task */
1585 taskqueue_enqueue(txq->enqueue_tq, &txq->enqueue_task);
1586 }
1587 mtx_unlock(&txq->txq_mtx);
1588 }
1589
1590 if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0)
1591 mana_err(NULL,
1592 "WARNING: TX %d pending_sends error: %d\n",
1593 txq->idx, txq->pending_sends);
1594
1595 cq->work_done = pkt_transmitted;
1596 }
1597
1598 static void
mana_post_pkt_rxq(struct mana_rxq * rxq,struct mana_recv_buf_oob * recv_buf_oob)1599 mana_post_pkt_rxq(struct mana_rxq *rxq,
1600 struct mana_recv_buf_oob *recv_buf_oob)
1601 {
1602 int err;
1603
1604 err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req,
1605 &recv_buf_oob->wqe_inf);
1606 if (err) {
1607 mana_err(NULL, "WARNING: rxq %u post pkt err %d\n",
1608 rxq->rxq_idx, err);
1609 return;
1610 }
1611
1612 if (recv_buf_oob->wqe_inf.wqe_size_in_bu != 1) {
1613 mana_err(NULL, "WARNING: rxq %u wqe_size_in_bu %u\n",
1614 rxq->rxq_idx, recv_buf_oob->wqe_inf.wqe_size_in_bu);
1615 }
1616 }
1617
1618 static void
mana_rx_mbuf(struct mbuf * mbuf,struct mana_rxcomp_oob * cqe,struct mana_rxq * rxq)1619 mana_rx_mbuf(struct mbuf *mbuf, struct mana_rxcomp_oob *cqe,
1620 struct mana_rxq *rxq)
1621 {
1622 struct mana_stats *rx_stats = &rxq->stats;
1623 if_t ndev = rxq->ndev;
1624 uint32_t pkt_len = cqe->ppi[0].pkt_len;
1625 uint16_t rxq_idx = rxq->rxq_idx;
1626 struct mana_port_context *apc;
1627 bool do_lro = false;
1628 bool do_if_input;
1629
1630 apc = if_getsoftc(ndev);
1631 rxq->rx_cq.work_done++;
1632
1633 if (!mbuf) {
1634 return;
1635 }
1636
1637 mbuf->m_flags |= M_PKTHDR;
1638 mbuf->m_pkthdr.len = pkt_len;
1639 mbuf->m_len = pkt_len;
1640 mbuf->m_pkthdr.rcvif = ndev;
1641
1642 if ((if_getcapenable(ndev) & IFCAP_RXCSUM ||
1643 if_getcapenable(ndev) & IFCAP_RXCSUM_IPV6) &&
1644 (cqe->rx_iphdr_csum_succeed)) {
1645 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1646 mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1647 if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) {
1648 mbuf->m_pkthdr.csum_flags |=
1649 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1650 mbuf->m_pkthdr.csum_data = 0xffff;
1651
1652 if (cqe->rx_tcp_csum_succeed)
1653 do_lro = true;
1654 }
1655 }
1656
1657 if (cqe->rx_hashtype != 0) {
1658 mbuf->m_pkthdr.flowid = cqe->ppi[0].pkt_hash;
1659
1660 uint16_t hashtype = cqe->rx_hashtype;
1661 if (hashtype & NDIS_HASH_IPV4_MASK) {
1662 hashtype &= NDIS_HASH_IPV4_MASK;
1663 switch (hashtype) {
1664 case NDIS_HASH_TCP_IPV4:
1665 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
1666 break;
1667 case NDIS_HASH_UDP_IPV4:
1668 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
1669 break;
1670 default:
1671 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
1672 }
1673 } else if (hashtype & NDIS_HASH_IPV6_MASK) {
1674 hashtype &= NDIS_HASH_IPV6_MASK;
1675 switch (hashtype) {
1676 case NDIS_HASH_TCP_IPV6:
1677 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
1678 break;
1679 case NDIS_HASH_TCP_IPV6_EX:
1680 M_HASHTYPE_SET(mbuf,
1681 M_HASHTYPE_RSS_TCP_IPV6_EX);
1682 break;
1683 case NDIS_HASH_UDP_IPV6:
1684 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
1685 break;
1686 case NDIS_HASH_UDP_IPV6_EX:
1687 M_HASHTYPE_SET(mbuf,
1688 M_HASHTYPE_RSS_UDP_IPV6_EX);
1689 break;
1690 default:
1691 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
1692 }
1693 } else {
1694 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1695 }
1696 } else {
1697 mbuf->m_pkthdr.flowid = rxq_idx;
1698 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1699 }
1700
1701 do_if_input = true;
1702 if ((if_getcapenable(ndev) & IFCAP_LRO) && do_lro) {
1703 rxq->lro_tried++;
1704 if (rxq->lro.lro_cnt != 0 &&
1705 tcp_lro_rx(&rxq->lro, mbuf, 0) == 0)
1706 do_if_input = false;
1707 else
1708 rxq->lro_failed++;
1709 }
1710 if (do_if_input) {
1711 if_input(ndev, mbuf);
1712 }
1713
1714 counter_enter();
1715 counter_u64_add_protected(rx_stats->packets, 1);
1716 counter_u64_add_protected(apc->port_stats.rx_packets, 1);
1717 counter_u64_add_protected(rx_stats->bytes, pkt_len);
1718 counter_u64_add_protected(apc->port_stats.rx_bytes, pkt_len);
1719 counter_exit();
1720 }
1721
1722 static int
mana_refill_rx_mbufs(struct mana_port_context * apc,struct mana_rxq * rxq,uint32_t num)1723 mana_refill_rx_mbufs(struct mana_port_context *apc,
1724 struct mana_rxq *rxq, uint32_t num)
1725 {
1726 struct mana_recv_buf_oob *rxbuf_oob;
1727 uint32_t next_to_refill;
1728 uint32_t i;
1729 int err;
1730
1731 next_to_refill = rxq->next_to_refill;
1732
1733 for (i = 0; i < num; i++) {
1734 if (next_to_refill == rxq->buf_index) {
1735 mana_warn(NULL, "refilling index reached current, "
1736 "aborted! rxq %u, oob idx %u\n",
1737 rxq->rxq_idx, next_to_refill);
1738 break;
1739 }
1740
1741 rxbuf_oob = &rxq->rx_oobs[next_to_refill];
1742
1743 if (likely(rxbuf_oob->mbuf == NULL)) {
1744 err = mana_load_rx_mbuf(apc, rxq, rxbuf_oob, true);
1745 } else {
1746 mana_warn(NULL, "mbuf not null when refilling, "
1747 "rxq %u, oob idx %u, reusing\n",
1748 rxq->rxq_idx, next_to_refill);
1749 err = mana_load_rx_mbuf(apc, rxq, rxbuf_oob, false);
1750 }
1751
1752 if (unlikely(err != 0)) {
1753 mana_dbg(NULL,
1754 "failed to load rx mbuf, err = %d, rxq = %u\n",
1755 err, rxq->rxq_idx);
1756 counter_u64_add(rxq->stats.mbuf_alloc_fail, 1);
1757 break;
1758 }
1759
1760 mana_post_pkt_rxq(rxq, rxbuf_oob);
1761
1762 next_to_refill = MANA_IDX_NEXT(next_to_refill,
1763 rxq->num_rx_buf);
1764 }
1765
1766 if (likely(i != 0)) {
1767 struct gdma_context *gc =
1768 rxq->gdma_rq->gdma_dev->gdma_context;
1769
1770 mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq);
1771 }
1772
1773 if (unlikely(i < num)) {
1774 counter_u64_add(rxq->stats.partial_refill, 1);
1775 mana_dbg(NULL,
1776 "refilled rxq %u with only %u mbufs (%u requested)\n",
1777 rxq->rxq_idx, i, num);
1778 }
1779
1780 rxq->next_to_refill = next_to_refill;
1781 return (i);
1782 }
1783
1784 static void
mana_process_rx_cqe(struct mana_rxq * rxq,struct mana_cq * cq,struct gdma_comp * cqe)1785 mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
1786 struct gdma_comp *cqe)
1787 {
1788 struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data;
1789 struct mana_recv_buf_oob *rxbuf_oob;
1790 if_t ndev = rxq->ndev;
1791 struct mana_port_context *apc;
1792 struct mbuf *old_mbuf;
1793 uint32_t refill_required;
1794 uint32_t curr, pktlen;
1795
1796 switch (oob->cqe_hdr.cqe_type) {
1797 case CQE_RX_OKAY:
1798 break;
1799
1800 case CQE_RX_TRUNCATED:
1801 apc = if_getsoftc(ndev);
1802 counter_u64_add(apc->port_stats.rx_drops, 1);
1803 rxbuf_oob = &rxq->rx_oobs[rxq->buf_index];
1804 if_printf(ndev, "Dropped a truncated packet\n");
1805 goto drop;
1806
1807 case CQE_RX_COALESCED_4:
1808 if_printf(ndev, "RX coalescing is unsupported\n");
1809 return;
1810
1811 case CQE_RX_OBJECT_FENCE:
1812 complete(&rxq->fence_event);
1813 return;
1814
1815 default:
1816 if_printf(ndev, "Unknown RX CQE type = %d\n",
1817 oob->cqe_hdr.cqe_type);
1818 return;
1819 }
1820
1821 if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY)
1822 return;
1823
1824 pktlen = oob->ppi[0].pkt_len;
1825
1826 if (pktlen == 0) {
1827 /* data packets should never have packetlength of zero */
1828 if_printf(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%jx\n",
1829 rxq->gdma_id, cq->gdma_id, rxq->rxobj);
1830 return;
1831 }
1832
1833 curr = rxq->buf_index;
1834 rxbuf_oob = &rxq->rx_oobs[curr];
1835 if (rxbuf_oob->wqe_inf.wqe_size_in_bu != 1) {
1836 mana_err(NULL, "WARNING: Rx Incorrect complete "
1837 "WQE size %u\n",
1838 rxbuf_oob->wqe_inf.wqe_size_in_bu);
1839 }
1840
1841 apc = if_getsoftc(ndev);
1842
1843 old_mbuf = rxbuf_oob->mbuf;
1844
1845 /* Unload DMA map for the old mbuf */
1846 mana_unload_rx_mbuf(apc, rxq, rxbuf_oob, false);
1847 /* Clear the mbuf pointer to avoid reuse */
1848 rxbuf_oob->mbuf = NULL;
1849
1850 mana_rx_mbuf(old_mbuf, oob, rxq);
1851
1852 drop:
1853 mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
1854
1855 rxq->buf_index = MANA_IDX_NEXT(rxq->buf_index, rxq->num_rx_buf);
1856
1857 /* Check if refill is needed */
1858 refill_required = MANA_GET_SPACE(rxq->next_to_refill,
1859 rxq->buf_index, rxq->num_rx_buf);
1860
1861 if (refill_required >= rxq->refill_thresh) {
1862 /* Refill empty rx_oobs with new mbufs */
1863 mana_refill_rx_mbufs(apc, rxq, refill_required);
1864 }
1865 }
1866
1867 static void
mana_poll_rx_cq(struct mana_cq * cq)1868 mana_poll_rx_cq(struct mana_cq *cq)
1869 {
1870 struct gdma_comp *comp = cq->gdma_comp_buf;
1871 int comp_read, i;
1872
1873 comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
1874 KASSERT(comp_read <= CQE_POLLING_BUFFER,
1875 ("comp_read %d great than buf size %d",
1876 comp_read, CQE_POLLING_BUFFER));
1877
1878 for (i = 0; i < comp_read; i++) {
1879 if (comp[i].is_sq == true) {
1880 mana_err(NULL,
1881 "WARNING: CQE not for receive queue\n");
1882 return;
1883 }
1884
1885 /* verify recv cqe references the right rxq */
1886 if (comp[i].wq_num != cq->rxq->gdma_id) {
1887 mana_err(NULL,
1888 "WARNING: Received CQE %d not for "
1889 "this receive queue %d\n",
1890 comp[i].wq_num, cq->rxq->gdma_id);
1891 return;
1892 }
1893
1894 mana_process_rx_cqe(cq->rxq, cq, &comp[i]);
1895 }
1896
1897 tcp_lro_flush_all(&cq->rxq->lro);
1898 }
1899
1900 static void
mana_cq_handler(void * context,struct gdma_queue * gdma_queue)1901 mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
1902 {
1903 struct mana_cq *cq = context;
1904 uint8_t arm_bit;
1905
1906 KASSERT(cq->gdma_cq == gdma_queue,
1907 ("cq do not match %p, %p", cq->gdma_cq, gdma_queue));
1908
1909 if (cq->type == MANA_CQ_TYPE_RX) {
1910 mana_poll_rx_cq(cq);
1911 } else {
1912 mana_poll_tx_cq(cq);
1913 }
1914
1915 if (cq->work_done < cq->budget && cq->do_not_ring_db == false)
1916 arm_bit = SET_ARM_BIT;
1917 else
1918 arm_bit = 0;
1919
1920 mana_gd_ring_cq(gdma_queue, arm_bit);
1921 }
1922
1923 #define MANA_POLL_BUDGET 256
1924 #define MANA_RX_BUDGET 8
1925 #define MANA_TX_BUDGET 8
1926
1927 static void
mana_poll(void * arg,int pending)1928 mana_poll(void *arg, int pending)
1929 {
1930 struct mana_cq *cq = arg;
1931 int i;
1932
1933 cq->work_done = 0;
1934 if (cq->type == MANA_CQ_TYPE_RX) {
1935 cq->budget = MANA_RX_BUDGET;
1936 } else {
1937 cq->budget = MANA_TX_BUDGET;
1938 }
1939
1940 for (i = 0; i < MANA_POLL_BUDGET; i++) {
1941 /*
1942 * If this is the last loop, set the budget big enough
1943 * so it will arm the CQ any way.
1944 */
1945 if (i == (MANA_POLL_BUDGET - 1))
1946 cq->budget = CQE_POLLING_BUFFER + 1;
1947
1948 mana_cq_handler(cq, cq->gdma_cq);
1949
1950 if (cq->work_done < cq->budget)
1951 break;
1952
1953 cq->work_done = 0;
1954 }
1955 }
1956
1957 static void
mana_schedule_task(void * arg,struct gdma_queue * gdma_queue)1958 mana_schedule_task(void *arg, struct gdma_queue *gdma_queue)
1959 {
1960 struct mana_cq *cq = arg;
1961
1962 taskqueue_enqueue(cq->cleanup_tq, &cq->cleanup_task);
1963 }
1964
1965 static void
mana_deinit_cq(struct mana_port_context * apc,struct mana_cq * cq)1966 mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq)
1967 {
1968 struct gdma_dev *gd = apc->ac->gdma_dev;
1969
1970 if (!cq->gdma_cq)
1971 return;
1972
1973 /* Drain cleanup taskqueue */
1974 if (cq->cleanup_tq) {
1975 while (taskqueue_cancel(cq->cleanup_tq,
1976 &cq->cleanup_task, NULL)) {
1977 taskqueue_drain(cq->cleanup_tq,
1978 &cq->cleanup_task);
1979 }
1980
1981 taskqueue_free(cq->cleanup_tq);
1982 }
1983
1984 mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq);
1985 }
1986
1987 static void
mana_deinit_txq(struct mana_port_context * apc,struct mana_txq * txq)1988 mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
1989 {
1990 struct gdma_dev *gd = apc->ac->gdma_dev;
1991 struct mana_send_buf_info *txbuf_info;
1992 uint32_t pending_sends;
1993 int i;
1994
1995 if (!txq->gdma_sq)
1996 return;
1997
1998 if ((pending_sends = atomic_read(&txq->pending_sends)) > 0) {
1999 mana_err(NULL,
2000 "WARNING: txq pending sends not zero: %u\n",
2001 pending_sends);
2002 }
2003
2004 if (txq->next_to_use != txq->next_to_complete) {
2005 mana_err(NULL,
2006 "WARNING: txq buf not completed, "
2007 "next use %u, next complete %u\n",
2008 txq->next_to_use, txq->next_to_complete);
2009 }
2010
2011 /* Flush buf ring. Grab txq mtx lock */
2012 if (txq->txq_br) {
2013 mtx_lock(&txq->txq_mtx);
2014 drbr_flush(apc->ndev, txq->txq_br);
2015 mtx_unlock(&txq->txq_mtx);
2016 buf_ring_free(txq->txq_br, M_DEVBUF);
2017 }
2018
2019 /* Drain taskqueue */
2020 if (txq->enqueue_tq) {
2021 while (taskqueue_cancel(txq->enqueue_tq,
2022 &txq->enqueue_task, NULL)) {
2023 taskqueue_drain(txq->enqueue_tq,
2024 &txq->enqueue_task);
2025 }
2026
2027 taskqueue_free(txq->enqueue_tq);
2028 }
2029
2030 if (txq->tx_buf_info) {
2031 /* Free all mbufs which are still in-flight */
2032 for (i = 0; i < apc->tx_queue_size; i++) {
2033 txbuf_info = &txq->tx_buf_info[i];
2034 if (txbuf_info->mbuf) {
2035 mana_tx_unmap_mbuf(apc, txbuf_info);
2036 }
2037 }
2038
2039 free(txq->tx_buf_info, M_DEVBUF);
2040 }
2041
2042 mana_free_counters((counter_u64_t *)&txq->stats,
2043 sizeof(txq->stats));
2044
2045 mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq);
2046
2047 mtx_destroy(&txq->txq_mtx);
2048 }
2049
2050 static void
mana_destroy_txq(struct mana_port_context * apc)2051 mana_destroy_txq(struct mana_port_context *apc)
2052 {
2053 int i;
2054
2055 if (!apc->tx_qp)
2056 return;
2057
2058 for (i = 0; i < apc->num_queues; i++) {
2059 mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
2060
2061 mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
2062
2063 mana_deinit_txq(apc, &apc->tx_qp[i].txq);
2064 }
2065
2066 free(apc->tx_qp, M_DEVBUF);
2067 apc->tx_qp = NULL;
2068 }
2069
2070 static int
mana_create_txq(struct mana_port_context * apc,if_t net)2071 mana_create_txq(struct mana_port_context *apc, if_t net)
2072 {
2073 struct mana_context *ac = apc->ac;
2074 struct gdma_dev *gd = ac->gdma_dev;
2075 struct mana_obj_spec wq_spec;
2076 struct mana_obj_spec cq_spec;
2077 struct gdma_queue_spec spec;
2078 struct gdma_context *gc;
2079 struct mana_txq *txq;
2080 struct mana_cq *cq;
2081 uint32_t txq_size;
2082 uint32_t cq_size;
2083 int err;
2084 int i;
2085
2086 apc->tx_qp = mallocarray(apc->num_queues, sizeof(struct mana_tx_qp),
2087 M_DEVBUF, M_WAITOK | M_ZERO);
2088
2089 /* The minimum size of the WQE is 32 bytes, hence
2090 * apc->tx_queue_size represents the maximum number of WQEs
2091 * the SQ can store. This value is then used to size other queues
2092 * to prevent overflow.
2093 * Also note that the txq_size is always going to be page aligned,
2094 * as min val of apc->tx_queue_size is 128 and that would make
2095 * txq_size 128 * 32 = 4096 and the other higher values of
2096 * apc->tx_queue_size are always power of two.
2097 */
2098 txq_size = apc->tx_queue_size * 32;
2099 KASSERT(IS_ALIGNED(txq_size, PAGE_SIZE),
2100 ("txq size not page aligned"));
2101
2102 cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE;
2103 cq_size = ALIGN(cq_size, PAGE_SIZE);
2104
2105 gc = gd->gdma_context;
2106
2107 for (i = 0; i < apc->num_queues; i++) {
2108 apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE;
2109
2110 /* Create SQ */
2111 txq = &apc->tx_qp[i].txq;
2112
2113 txq->ndev = net;
2114 txq->vp_offset = apc->tx_vp_offset;
2115 txq->idx = i;
2116 txq->alt_txq_idx = i;
2117
2118 memset(&spec, 0, sizeof(spec));
2119 spec.type = GDMA_SQ;
2120 spec.monitor_avl_buf = true;
2121 spec.queue_size = txq_size;
2122 err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq);
2123 if (err)
2124 goto out;
2125
2126 /* Create SQ's CQ */
2127 cq = &apc->tx_qp[i].tx_cq;
2128 cq->type = MANA_CQ_TYPE_TX;
2129
2130 cq->txq = txq;
2131
2132 memset(&spec, 0, sizeof(spec));
2133 spec.type = GDMA_CQ;
2134 spec.monitor_avl_buf = false;
2135 spec.queue_size = cq_size;
2136 spec.cq.callback = mana_schedule_task;
2137 spec.cq.parent_eq = ac->eqs[i].eq;
2138 spec.cq.context = cq;
2139 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
2140 if (err)
2141 goto out;
2142
2143 memset(&wq_spec, 0, sizeof(wq_spec));
2144 memset(&cq_spec, 0, sizeof(cq_spec));
2145
2146 wq_spec.gdma_region = txq->gdma_sq->mem_info.dma_region_handle;
2147 wq_spec.queue_size = txq->gdma_sq->queue_size;
2148
2149 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle;
2150 cq_spec.queue_size = cq->gdma_cq->queue_size;
2151 cq_spec.modr_ctx_id = 0;
2152 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
2153
2154 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ,
2155 &wq_spec, &cq_spec, &apc->tx_qp[i].tx_object);
2156
2157 if (err)
2158 goto out;
2159
2160 txq->gdma_sq->id = wq_spec.queue_index;
2161 cq->gdma_cq->id = cq_spec.queue_index;
2162
2163 txq->gdma_sq->mem_info.dma_region_handle =
2164 GDMA_INVALID_DMA_REGION;
2165 cq->gdma_cq->mem_info.dma_region_handle =
2166 GDMA_INVALID_DMA_REGION;
2167
2168 txq->gdma_txq_id = txq->gdma_sq->id;
2169
2170 cq->gdma_id = cq->gdma_cq->id;
2171
2172 mana_dbg(NULL,
2173 "txq %d, txq gdma id %d, txq cq gdma id %d\n",
2174 i, txq->gdma_txq_id, cq->gdma_id);
2175
2176 if (cq->gdma_id >= gc->max_num_cqs) {
2177 if_printf(net, "CQ id %u too large.\n", cq->gdma_id);
2178 err = EINVAL;
2179 goto out;
2180 }
2181
2182 gc->cq_table[cq->gdma_id] = cq->gdma_cq;
2183
2184 /* Initialize tx specific data */
2185 txq->tx_buf_info = malloc(apc->tx_queue_size *
2186 sizeof(struct mana_send_buf_info),
2187 M_DEVBUF, M_WAITOK | M_ZERO);
2188
2189 snprintf(txq->txq_mtx_name, nitems(txq->txq_mtx_name),
2190 "mana:tx(%d)", i);
2191 mtx_init(&txq->txq_mtx, txq->txq_mtx_name, NULL, MTX_DEF);
2192
2193 txq->txq_br = buf_ring_alloc(4 * apc->tx_queue_size,
2194 M_DEVBUF, M_WAITOK, &txq->txq_mtx);
2195
2196 /* Allocate taskqueue for deferred send */
2197 TASK_INIT(&txq->enqueue_task, 0, mana_xmit_taskfunc, txq);
2198 txq->enqueue_tq = taskqueue_create_fast("mana_tx_enque",
2199 M_NOWAIT, taskqueue_thread_enqueue, &txq->enqueue_tq);
2200 if (unlikely(txq->enqueue_tq == NULL)) {
2201 if_printf(net,
2202 "Unable to create tx %d enqueue task queue\n", i);
2203 err = ENOMEM;
2204 goto out;
2205 }
2206 taskqueue_start_threads(&txq->enqueue_tq, 1, PI_NET,
2207 "mana txq p%u-tx%d", apc->port_idx, i);
2208
2209 mana_alloc_counters((counter_u64_t *)&txq->stats,
2210 sizeof(txq->stats));
2211
2212 /* Allocate and start the cleanup task on CQ */
2213 cq->do_not_ring_db = false;
2214
2215 NET_TASK_INIT(&cq->cleanup_task, 0, mana_poll, cq);
2216 cq->cleanup_tq =
2217 taskqueue_create_fast("mana tx cq cleanup",
2218 M_WAITOK, taskqueue_thread_enqueue,
2219 &cq->cleanup_tq);
2220
2221 if (apc->last_tx_cq_bind_cpu < 0)
2222 apc->last_tx_cq_bind_cpu = CPU_FIRST();
2223 cq->cpu = apc->last_tx_cq_bind_cpu;
2224 apc->last_tx_cq_bind_cpu = CPU_NEXT(apc->last_tx_cq_bind_cpu);
2225
2226 if (apc->bind_cleanup_thread_cpu) {
2227 cpuset_t cpu_mask;
2228 CPU_SETOF(cq->cpu, &cpu_mask);
2229 taskqueue_start_threads_cpuset(&cq->cleanup_tq,
2230 1, PI_NET, &cpu_mask,
2231 "mana cq p%u-tx%u-cpu%d",
2232 apc->port_idx, txq->idx, cq->cpu);
2233 } else {
2234 taskqueue_start_threads(&cq->cleanup_tq, 1,
2235 PI_NET, "mana cq p%u-tx%u",
2236 apc->port_idx, txq->idx);
2237 }
2238
2239 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
2240 }
2241
2242 return 0;
2243 out:
2244 mana_destroy_txq(apc);
2245 return err;
2246 }
2247
2248 static void
mana_destroy_rxq(struct mana_port_context * apc,struct mana_rxq * rxq,bool validate_state)2249 mana_destroy_rxq(struct mana_port_context *apc, struct mana_rxq *rxq,
2250 bool validate_state)
2251 {
2252 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
2253 struct mana_recv_buf_oob *rx_oob;
2254 int i;
2255
2256 if (!rxq)
2257 return;
2258
2259 if (validate_state) {
2260 /*
2261 * XXX Cancel and drain cleanup task queue here.
2262 */
2263 ;
2264 }
2265
2266 mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
2267
2268 mana_deinit_cq(apc, &rxq->rx_cq);
2269
2270 mana_free_counters((counter_u64_t *)&rxq->stats,
2271 sizeof(rxq->stats));
2272
2273 /* Free LRO resources */
2274 tcp_lro_free(&rxq->lro);
2275
2276 for (i = 0; i < rxq->num_rx_buf; i++) {
2277 rx_oob = &rxq->rx_oobs[i];
2278
2279 if (rx_oob->mbuf)
2280 mana_unload_rx_mbuf(apc, rxq, rx_oob, true);
2281
2282 bus_dmamap_destroy(apc->rx_buf_tag, rx_oob->dma_map);
2283 }
2284
2285 if (rxq->gdma_rq)
2286 mana_gd_destroy_queue(gc, rxq->gdma_rq);
2287
2288 free(rxq, M_DEVBUF);
2289 }
2290
2291 #define MANA_WQE_HEADER_SIZE 16
2292 #define MANA_WQE_SGE_SIZE 16
2293
2294 static int
mana_alloc_rx_wqe(struct mana_port_context * apc,struct mana_rxq * rxq,uint32_t * rxq_size,uint32_t * cq_size)2295 mana_alloc_rx_wqe(struct mana_port_context *apc,
2296 struct mana_rxq *rxq, uint32_t *rxq_size, uint32_t *cq_size)
2297 {
2298 struct mana_recv_buf_oob *rx_oob;
2299 uint32_t buf_idx;
2300 int err;
2301
2302 if (rxq->datasize == 0) {
2303 mana_err(NULL,
2304 "WARNING: Invalid rxq datasize %u\n", rxq->datasize);
2305 }
2306
2307 *rxq_size = 0;
2308 *cq_size = 0;
2309
2310 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) {
2311 rx_oob = &rxq->rx_oobs[buf_idx];
2312 memset(rx_oob, 0, sizeof(*rx_oob));
2313
2314 err = bus_dmamap_create(apc->rx_buf_tag, 0,
2315 &rx_oob->dma_map);
2316 if (err) {
2317 mana_err(NULL,
2318 "Failed to create rx DMA map for buf %d\n",
2319 buf_idx);
2320 return err;
2321 }
2322
2323 err = mana_load_rx_mbuf(apc, rxq, rx_oob, true);
2324 if (err) {
2325 mana_err(NULL,
2326 "Failed to create rx DMA map for buf %d\n",
2327 buf_idx);
2328 bus_dmamap_destroy(apc->rx_buf_tag, rx_oob->dma_map);
2329 return err;
2330 }
2331
2332 rx_oob->wqe_req.sgl = rx_oob->sgl;
2333 rx_oob->wqe_req.num_sge = rx_oob->num_sge;
2334 rx_oob->wqe_req.inline_oob_size = 0;
2335 rx_oob->wqe_req.inline_oob_data = NULL;
2336 rx_oob->wqe_req.flags = 0;
2337 rx_oob->wqe_req.client_data_unit = 0;
2338
2339 *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE +
2340 MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32);
2341 *cq_size += COMP_ENTRY_SIZE;
2342 }
2343
2344 return 0;
2345 }
2346
2347 static int
mana_push_wqe(struct mana_rxq * rxq)2348 mana_push_wqe(struct mana_rxq *rxq)
2349 {
2350 struct mana_recv_buf_oob *rx_oob;
2351 uint32_t buf_idx;
2352 int err;
2353
2354 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) {
2355 rx_oob = &rxq->rx_oobs[buf_idx];
2356
2357 err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req,
2358 &rx_oob->wqe_inf);
2359 if (err)
2360 return ENOSPC;
2361 }
2362
2363 return 0;
2364 }
2365
2366 static uint32_t
mana_calc_rx_datasize(struct mana_port_context * apc)2367 mana_calc_rx_datasize(struct mana_port_context *apc)
2368 {
2369 uint32_t effective_mtu = 0;
2370
2371 if (apc->frame_size > MJUM16BYTES) {
2372 mana_err(NULL, "mana frame_size %u is too big\n",
2373 apc->frame_size);
2374 effective_mtu = MJUM16BYTES;
2375 } else if (apc->frame_size > MJUM9BYTES) {
2376 effective_mtu = MJUM16BYTES;
2377 } else if (apc->frame_size > MJUMPAGESIZE) {
2378 effective_mtu = MJUM9BYTES;
2379 } else if (apc->frame_size > MCLBYTES) {
2380 effective_mtu = MJUMPAGESIZE;
2381 } else {
2382 effective_mtu = MCLBYTES;
2383 }
2384
2385 return effective_mtu;
2386 }
2387
2388 static struct mana_rxq *
mana_create_rxq(struct mana_port_context * apc,uint32_t rxq_idx,struct mana_eq * eq,if_t ndev)2389 mana_create_rxq(struct mana_port_context *apc, uint32_t rxq_idx,
2390 struct mana_eq *eq, if_t ndev)
2391 {
2392 struct gdma_dev *gd = apc->ac->gdma_dev;
2393 struct mana_obj_spec wq_spec;
2394 struct mana_obj_spec cq_spec;
2395 struct gdma_queue_spec spec;
2396 struct mana_cq *cq = NULL;
2397 uint32_t cq_size, rq_size;
2398 struct gdma_context *gc;
2399 struct mana_rxq *rxq;
2400 int err;
2401
2402 gc = gd->gdma_context;
2403
2404 rxq = malloc(sizeof(*rxq) +
2405 apc->rx_queue_size * sizeof(struct mana_recv_buf_oob),
2406 M_DEVBUF, M_WAITOK | M_ZERO);
2407 rxq->ndev = ndev;
2408 rxq->num_rx_buf = apc->rx_queue_size;
2409 rxq->rxq_idx = rxq_idx;
2410
2411 rxq->datasize = mana_calc_rx_datasize(apc);
2412 mana_dbg(NULL, "Setting rxq %d datasize %d\n",
2413 rxq_idx, rxq->datasize);
2414
2415 /*
2416 * Two steps to set the mbuf refill_thresh.
2417 * 1) If mana_rx_refill_threshold is set, honor it.
2418 * Set to default value otherwise.
2419 * 2) Select the smaller of 1) above and 1/4 of the
2420 * rx buffer size.
2421 */
2422 if (mana_rx_refill_threshold != 0)
2423 rxq->refill_thresh = mana_rx_refill_threshold;
2424 else
2425 rxq->refill_thresh = MANA_RX_REFILL_THRESH;
2426 rxq->refill_thresh = min_t(uint32_t,
2427 rxq->num_rx_buf / 4, rxq->refill_thresh);
2428
2429 mana_dbg(NULL, "Setting rxq %d refill thresh %u\n",
2430 rxq_idx, rxq->refill_thresh);
2431
2432 rxq->rxobj = INVALID_MANA_HANDLE;
2433
2434 err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
2435 if (err)
2436 goto out;
2437
2438 /* Create LRO for the RQ */
2439 if (if_getcapenable(ndev) & IFCAP_LRO) {
2440 err = tcp_lro_init(&rxq->lro);
2441 if (err) {
2442 if_printf(ndev, "Failed to create LRO for rxq %d\n",
2443 rxq_idx);
2444 } else {
2445 rxq->lro.ifp = ndev;
2446 }
2447 }
2448
2449 mana_alloc_counters((counter_u64_t *)&rxq->stats,
2450 sizeof(rxq->stats));
2451
2452 rq_size = ALIGN(rq_size, PAGE_SIZE);
2453 cq_size = ALIGN(cq_size, PAGE_SIZE);
2454
2455 /* Create RQ */
2456 memset(&spec, 0, sizeof(spec));
2457 spec.type = GDMA_RQ;
2458 spec.monitor_avl_buf = true;
2459 spec.queue_size = rq_size;
2460 err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq);
2461 if (err)
2462 goto out;
2463
2464 /* Create RQ's CQ */
2465 cq = &rxq->rx_cq;
2466 cq->type = MANA_CQ_TYPE_RX;
2467 cq->rxq = rxq;
2468
2469 memset(&spec, 0, sizeof(spec));
2470 spec.type = GDMA_CQ;
2471 spec.monitor_avl_buf = false;
2472 spec.queue_size = cq_size;
2473 spec.cq.callback = mana_schedule_task;
2474 spec.cq.parent_eq = eq->eq;
2475 spec.cq.context = cq;
2476 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
2477 if (err)
2478 goto out;
2479
2480 memset(&wq_spec, 0, sizeof(wq_spec));
2481 memset(&cq_spec, 0, sizeof(cq_spec));
2482 wq_spec.gdma_region = rxq->gdma_rq->mem_info.dma_region_handle;
2483 wq_spec.queue_size = rxq->gdma_rq->queue_size;
2484
2485 cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle;
2486 cq_spec.queue_size = cq->gdma_cq->queue_size;
2487 cq_spec.modr_ctx_id = 0;
2488 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
2489
2490 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ,
2491 &wq_spec, &cq_spec, &rxq->rxobj);
2492 if (err)
2493 goto out;
2494
2495 rxq->gdma_rq->id = wq_spec.queue_index;
2496 cq->gdma_cq->id = cq_spec.queue_index;
2497
2498 rxq->gdma_rq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
2499 cq->gdma_cq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
2500
2501 rxq->gdma_id = rxq->gdma_rq->id;
2502 cq->gdma_id = cq->gdma_cq->id;
2503
2504 err = mana_push_wqe(rxq);
2505 if (err)
2506 goto out;
2507
2508 if (cq->gdma_id >= gc->max_num_cqs) {
2509 err = EINVAL;
2510 goto out;
2511 }
2512
2513 gc->cq_table[cq->gdma_id] = cq->gdma_cq;
2514
2515 /* Allocate and start the cleanup task on CQ */
2516 cq->do_not_ring_db = false;
2517
2518 NET_TASK_INIT(&cq->cleanup_task, 0, mana_poll, cq);
2519 cq->cleanup_tq =
2520 taskqueue_create_fast("mana rx cq cleanup",
2521 M_WAITOK, taskqueue_thread_enqueue,
2522 &cq->cleanup_tq);
2523
2524 if (apc->last_rx_cq_bind_cpu < 0)
2525 apc->last_rx_cq_bind_cpu = CPU_FIRST();
2526 cq->cpu = apc->last_rx_cq_bind_cpu;
2527 apc->last_rx_cq_bind_cpu = CPU_NEXT(apc->last_rx_cq_bind_cpu);
2528
2529 if (apc->bind_cleanup_thread_cpu) {
2530 cpuset_t cpu_mask;
2531 CPU_SETOF(cq->cpu, &cpu_mask);
2532 taskqueue_start_threads_cpuset(&cq->cleanup_tq,
2533 1, PI_NET, &cpu_mask,
2534 "mana cq p%u-rx%u-cpu%d",
2535 apc->port_idx, rxq->rxq_idx, cq->cpu);
2536 } else {
2537 taskqueue_start_threads(&cq->cleanup_tq, 1,
2538 PI_NET, "mana cq p%u-rx%u",
2539 apc->port_idx, rxq->rxq_idx);
2540 }
2541
2542 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
2543 out:
2544 if (!err)
2545 return rxq;
2546
2547 if_printf(ndev, "Failed to create RXQ: err = %d\n", err);
2548
2549 mana_destroy_rxq(apc, rxq, false);
2550
2551 if (cq)
2552 mana_deinit_cq(apc, cq);
2553
2554 return NULL;
2555 }
2556
2557 static int
mana_add_rx_queues(struct mana_port_context * apc,if_t ndev)2558 mana_add_rx_queues(struct mana_port_context *apc, if_t ndev)
2559 {
2560 struct mana_context *ac = apc->ac;
2561 struct mana_rxq *rxq;
2562 int err = 0;
2563 int i;
2564
2565 for (i = 0; i < apc->num_queues; i++) {
2566 rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev);
2567 if (!rxq) {
2568 err = ENOMEM;
2569 goto out;
2570 }
2571
2572 apc->rxqs[i] = rxq;
2573 }
2574
2575 apc->default_rxobj = apc->rxqs[0]->rxobj;
2576 out:
2577 return err;
2578 }
2579
2580 static void
mana_destroy_vport(struct mana_port_context * apc)2581 mana_destroy_vport(struct mana_port_context *apc)
2582 {
2583 struct mana_rxq *rxq;
2584 uint32_t rxq_idx;
2585
2586 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
2587 rxq = apc->rxqs[rxq_idx];
2588 if (!rxq)
2589 continue;
2590
2591 mana_destroy_rxq(apc, rxq, true);
2592 apc->rxqs[rxq_idx] = NULL;
2593 }
2594
2595 mana_destroy_txq(apc);
2596
2597 mana_uncfg_vport(apc);
2598 }
2599
2600 static int
mana_create_vport(struct mana_port_context * apc,if_t net)2601 mana_create_vport(struct mana_port_context *apc, if_t net)
2602 {
2603 struct gdma_dev *gd = apc->ac->gdma_dev;
2604 int err;
2605
2606 apc->default_rxobj = INVALID_MANA_HANDLE;
2607
2608 err = mana_cfg_vport(apc, gd->pdid, gd->doorbell);
2609 if (err)
2610 return err;
2611
2612 return mana_create_txq(apc, net);
2613 }
2614
2615
mana_rss_table_init(struct mana_port_context * apc)2616 static void mana_rss_table_init(struct mana_port_context *apc)
2617 {
2618 int i;
2619
2620 for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
2621 apc->indir_table[i] = i % apc->num_queues;
2622 }
2623
mana_config_rss(struct mana_port_context * apc,enum TRI_STATE rx,bool update_hash,bool update_tab)2624 int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
2625 bool update_hash, bool update_tab)
2626 {
2627 uint32_t queue_idx;
2628 int err;
2629 int i;
2630
2631 if (update_tab) {
2632 for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
2633 queue_idx = apc->indir_table[i];
2634 apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj;
2635 }
2636 }
2637
2638 err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab);
2639 if (err)
2640 return err;
2641
2642 mana_fence_rqs(apc);
2643
2644 return 0;
2645 }
2646
2647 static int
mana_init_port(if_t ndev)2648 mana_init_port(if_t ndev)
2649 {
2650 struct mana_port_context *apc = if_getsoftc(ndev);
2651 uint32_t max_txq, max_rxq, max_queues;
2652 int port_idx = apc->port_idx;
2653 uint32_t num_indirect_entries;
2654 int err;
2655
2656 err = mana_init_port_context(apc);
2657 if (err)
2658 return err;
2659
2660 err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq,
2661 &num_indirect_entries);
2662 if (err) {
2663 if_printf(ndev, "Failed to query info for vPort %d\n",
2664 port_idx);
2665 goto reset_apc;
2666 }
2667
2668 max_queues = min_t(uint32_t, max_txq, max_rxq);
2669 if (apc->max_queues > max_queues)
2670 apc->max_queues = max_queues;
2671
2672 if (apc->num_queues > apc->max_queues)
2673 apc->num_queues = apc->max_queues;
2674
2675 return 0;
2676
2677 reset_apc:
2678 bus_dma_tag_destroy(apc->rx_buf_tag);
2679 apc->rx_buf_tag = NULL;
2680 free(apc->rxqs, M_DEVBUF);
2681 apc->rxqs = NULL;
2682 return err;
2683 }
2684
2685 int
mana_alloc_queues(if_t ndev)2686 mana_alloc_queues(if_t ndev)
2687 {
2688 struct mana_port_context *apc = if_getsoftc(ndev);
2689 int err;
2690
2691 err = mana_create_vport(apc, ndev);
2692 if (err)
2693 return err;
2694
2695 err = mana_add_rx_queues(apc, ndev);
2696 if (err)
2697 goto destroy_vport;
2698
2699 apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE;
2700
2701 mana_rss_table_init(apc);
2702
2703 err = mana_config_rss(apc, TRI_STATE_TRUE, true, true);
2704 if (err)
2705 goto destroy_vport;
2706
2707 return 0;
2708
2709 destroy_vport:
2710 mana_destroy_vport(apc);
2711 return err;
2712 }
2713
2714 static int
mana_up(struct mana_port_context * apc)2715 mana_up(struct mana_port_context *apc)
2716 {
2717 int err;
2718
2719 mana_dbg(NULL, "mana_up called\n");
2720
2721 err = mana_alloc_queues(apc->ndev);
2722 if (err) {
2723 mana_err(NULL, "Faile alloc mana queues: %d\n", err);
2724 return err;
2725 }
2726
2727 /* Add queue specific sysctl */
2728 mana_sysctl_add_queues(apc);
2729
2730 apc->port_is_up = true;
2731
2732 /* Ensure port state updated before txq state */
2733 wmb();
2734
2735 if_link_state_change(apc->ndev, LINK_STATE_UP);
2736 if_setdrvflagbits(apc->ndev, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
2737
2738 return 0;
2739 }
2740
2741
2742 static void
mana_init(void * arg)2743 mana_init(void *arg)
2744 {
2745 struct mana_port_context *apc = (struct mana_port_context *)arg;
2746
2747 MANA_APC_LOCK_LOCK(apc);
2748 if (!apc->port_is_up) {
2749 mana_up(apc);
2750 }
2751 MANA_APC_LOCK_UNLOCK(apc);
2752 }
2753
2754 static int
mana_dealloc_queues(if_t ndev)2755 mana_dealloc_queues(if_t ndev)
2756 {
2757 struct mana_port_context *apc = if_getsoftc(ndev);
2758 struct mana_txq *txq;
2759 int i, err;
2760
2761 if (apc->port_is_up)
2762 return EINVAL;
2763
2764 /* No packet can be transmitted now since apc->port_is_up is false.
2765 * There is still a tiny chance that mana_poll_tx_cq() can re-enable
2766 * a txq because it may not timely see apc->port_is_up being cleared
2767 * to false, but it doesn't matter since mana_start_xmit() drops any
2768 * new packets due to apc->port_is_up being false.
2769 *
2770 * Drain all the in-flight TX packets
2771 */
2772 for (i = 0; i < apc->num_queues; i++) {
2773 txq = &apc->tx_qp[i].txq;
2774
2775 struct mana_cq *tx_cq = &apc->tx_qp[i].tx_cq;
2776 struct mana_cq *rx_cq = &(apc->rxqs[i]->rx_cq);
2777
2778 tx_cq->do_not_ring_db = true;
2779 rx_cq->do_not_ring_db = true;
2780
2781 /* Schedule a cleanup task */
2782 taskqueue_enqueue(tx_cq->cleanup_tq, &tx_cq->cleanup_task);
2783
2784 while (atomic_read(&txq->pending_sends) > 0)
2785 usleep_range(1000, 2000);
2786 }
2787
2788 /* We're 100% sure the queues can no longer be woken up, because
2789 * we're sure now mana_poll_tx_cq() can't be running.
2790 */
2791
2792 apc->rss_state = TRI_STATE_FALSE;
2793 err = mana_config_rss(apc, TRI_STATE_FALSE, false, false);
2794 if (err) {
2795 if_printf(ndev, "Failed to disable vPort: %d\n", err);
2796 return err;
2797 }
2798
2799 mana_destroy_vport(apc);
2800
2801 return 0;
2802 }
2803
2804 static int
mana_down(struct mana_port_context * apc)2805 mana_down(struct mana_port_context *apc)
2806 {
2807 int err = 0;
2808
2809 apc->port_st_save = apc->port_is_up;
2810 apc->port_is_up = false;
2811
2812 /* Ensure port state updated before txq state */
2813 wmb();
2814
2815 if (apc->port_st_save) {
2816 if_setdrvflagbits(apc->ndev, IFF_DRV_OACTIVE,
2817 IFF_DRV_RUNNING);
2818 if_link_state_change(apc->ndev, LINK_STATE_DOWN);
2819
2820 mana_sysctl_free_queues(apc);
2821
2822 err = mana_dealloc_queues(apc->ndev);
2823 if (err) {
2824 if_printf(apc->ndev,
2825 "Failed to bring down mana interface: %d\n", err);
2826 }
2827 }
2828
2829 return err;
2830 }
2831
2832 int
mana_detach(if_t ndev)2833 mana_detach(if_t ndev)
2834 {
2835 struct mana_port_context *apc = if_getsoftc(ndev);
2836 int err;
2837
2838 ether_ifdetach(ndev);
2839
2840 if (!apc)
2841 return 0;
2842
2843 MANA_APC_LOCK_LOCK(apc);
2844 err = mana_down(apc);
2845 MANA_APC_LOCK_UNLOCK(apc);
2846
2847 mana_cleanup_port_context(apc);
2848
2849 MANA_APC_LOCK_DESTROY(apc);
2850
2851 free(apc, M_DEVBUF);
2852
2853 return err;
2854 }
2855
2856 static unsigned int
mana_get_tx_queue_size(int port_idx,unsigned int request_size)2857 mana_get_tx_queue_size(int port_idx, unsigned int request_size)
2858 {
2859 unsigned int new_size;
2860
2861 if (request_size == 0)
2862 /* Uninitialized */
2863 new_size = DEF_SEND_BUFFERS_PER_QUEUE;
2864 else
2865 new_size = roundup_pow_of_two(request_size);
2866
2867 if (new_size < MIN_SEND_BUFFERS_PER_QUEUE ||
2868 new_size > MAX_SEND_BUFFERS_PER_QUEUE) {
2869 mana_info(NULL, "mana port %d: requested tx buffer "
2870 "size %u out of allowable range (%u - %u), "
2871 "setting to default\n",
2872 port_idx, request_size,
2873 MIN_SEND_BUFFERS_PER_QUEUE,
2874 MAX_SEND_BUFFERS_PER_QUEUE);
2875 new_size = DEF_SEND_BUFFERS_PER_QUEUE;
2876 }
2877 mana_info(NULL, "mana port %d: tx buffer size %u "
2878 "(%u requested)\n",
2879 port_idx, new_size, request_size);
2880
2881 return (new_size);
2882 }
2883
2884 static unsigned int
mana_get_rx_queue_size(int port_idx,unsigned int request_size)2885 mana_get_rx_queue_size(int port_idx, unsigned int request_size)
2886 {
2887 unsigned int new_size;
2888
2889 if (request_size == 0)
2890 /* Uninitialized */
2891 new_size = DEF_RX_BUFFERS_PER_QUEUE;
2892 else
2893 new_size = roundup_pow_of_two(request_size);
2894
2895 if (new_size < MIN_RX_BUFFERS_PER_QUEUE ||
2896 new_size > MAX_RX_BUFFERS_PER_QUEUE) {
2897 mana_info(NULL, "mana port %d: requested rx buffer "
2898 "size %u out of allowable range (%u - %u), "
2899 "setting to default\n",
2900 port_idx, request_size,
2901 MIN_RX_BUFFERS_PER_QUEUE,
2902 MAX_RX_BUFFERS_PER_QUEUE);
2903 new_size = DEF_RX_BUFFERS_PER_QUEUE;
2904 }
2905 mana_info(NULL, "mana port %d: rx buffer size %u "
2906 "(%u requested)\n",
2907 port_idx, new_size, request_size);
2908
2909 return (new_size);
2910 }
2911
2912 static int
mana_probe_port(struct mana_context * ac,int port_idx,if_t * ndev_storage)2913 mana_probe_port(struct mana_context *ac, int port_idx,
2914 if_t *ndev_storage)
2915 {
2916 struct gdma_context *gc = ac->gdma_dev->gdma_context;
2917 struct mana_port_context *apc;
2918 uint32_t hwassist;
2919 if_t ndev;
2920 int err;
2921
2922 ndev = if_alloc_dev(IFT_ETHER, gc->dev);
2923 *ndev_storage = ndev;
2924
2925 apc = malloc(sizeof(*apc), M_DEVBUF, M_WAITOK | M_ZERO);
2926 apc->ac = ac;
2927 apc->ndev = ndev;
2928 apc->max_queues = gc->max_num_queues;
2929 apc->num_queues = min_t(unsigned int,
2930 gc->max_num_queues, MANA_MAX_NUM_QUEUES);
2931 apc->tx_queue_size = mana_get_tx_queue_size(port_idx,
2932 mana_tx_req_size);
2933 apc->rx_queue_size = mana_get_rx_queue_size(port_idx,
2934 mana_rx_req_size);
2935 apc->port_handle = INVALID_MANA_HANDLE;
2936 apc->port_idx = port_idx;
2937 apc->last_tx_cq_bind_cpu = -1;
2938 apc->last_rx_cq_bind_cpu = -1;
2939 apc->vport_use_count = 0;
2940 apc->max_mtu = gc->adapter_mtu - ETHER_HDR_LEN;
2941 apc->min_mtu = MIN_FRAME_SIZE;
2942 apc->mtu = ETHERMTU;
2943 apc->frame_size = apc->mtu + ETHER_HDR_LEN;
2944
2945 MANA_APC_LOCK_INIT(apc);
2946
2947 if_initname(ndev, device_get_name(gc->dev), port_idx);
2948 if_setdev(ndev,gc->dev);
2949 if_setsoftc(ndev, apc);
2950
2951 if_setflags(ndev, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2952 if_setinitfn(ndev, mana_init);
2953 if_settransmitfn(ndev, mana_start_xmit);
2954 if_setqflushfn(ndev, mana_qflush);
2955 if_setioctlfn(ndev, mana_ioctl);
2956 if_setgetcounterfn(ndev, mana_get_counter);
2957
2958 if_setmtu(ndev, apc->mtu);
2959 if_setbaudrate(ndev, IF_Gbps(100));
2960
2961 mana_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE);
2962
2963 err = mana_init_port(ndev);
2964 if (err)
2965 goto reset_apc;
2966
2967 if_setcapabilitiesbit(ndev,
2968 IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
2969 IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
2970 IFCAP_TSO4 | IFCAP_TSO6 |
2971 IFCAP_LRO | IFCAP_LINKSTATE, 0);
2972
2973 /* Enable all available capabilities by default. */
2974 if_setcapenable(ndev, if_getcapabilities(ndev));
2975
2976 /* TSO parameters */
2977 if_sethwtsomax(ndev, MANA_TSO_MAX_SZ -
2978 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
2979 if_sethwtsomaxsegcount(ndev, MAX_MBUF_FRAGS);
2980 if_sethwtsomaxsegsize(ndev, PAGE_SIZE);
2981
2982 hwassist = 0;
2983 if (if_getcapenable(ndev) & (IFCAP_TSO4 | IFCAP_TSO6))
2984 hwassist |= CSUM_TSO;
2985 if (if_getcapenable(ndev) & IFCAP_TXCSUM)
2986 hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2987 if (if_getcapenable(ndev) & IFCAP_TXCSUM_IPV6)
2988 hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2989 mana_dbg(NULL, "set hwassist 0x%x\n", hwassist);
2990 if_sethwassist(ndev, hwassist);
2991
2992 ifmedia_init(&apc->media, IFM_IMASK,
2993 mana_ifmedia_change, mana_ifmedia_status);
2994 ifmedia_add(&apc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2995 ifmedia_set(&apc->media, IFM_ETHER | IFM_AUTO);
2996
2997 ether_ifattach(ndev, apc->mac_addr);
2998
2999 /* Initialize statistics */
3000 mana_alloc_counters((counter_u64_t *)&apc->port_stats,
3001 sizeof(struct mana_port_stats));
3002 mana_sysctl_add_port(apc);
3003
3004 /* Tell the stack that the interface is not active */
3005 if_setdrvflagbits(ndev, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
3006
3007 return 0;
3008
3009 reset_apc:
3010 free(apc, M_DEVBUF);
3011 *ndev_storage = NULL;
3012 if_printf(ndev, "Failed to probe vPort %d: %d\n", port_idx, err);
3013 if_free(ndev);
3014 return err;
3015 }
3016
mana_probe(struct gdma_dev * gd)3017 int mana_probe(struct gdma_dev *gd)
3018 {
3019 struct gdma_context *gc = gd->gdma_context;
3020 device_t dev = gc->dev;
3021 struct mana_context *ac;
3022 int err;
3023 int i;
3024
3025 device_printf(dev, "%s protocol version: %d.%d.%d\n", DEVICE_NAME,
3026 MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION);
3027
3028 err = mana_gd_register_device(gd);
3029 if (err)
3030 return err;
3031
3032 ac = malloc(sizeof(*ac), M_DEVBUF, M_WAITOK | M_ZERO);
3033 ac->gdma_dev = gd;
3034 ac->num_ports = 1;
3035 gd->driver_data = ac;
3036
3037 err = mana_create_eq(ac);
3038 if (err)
3039 goto out;
3040
3041 err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
3042 MANA_MICRO_VERSION, &ac->num_ports);
3043 if (err)
3044 goto out;
3045
3046 if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
3047 ac->num_ports = MAX_PORTS_IN_MANA_DEV;
3048
3049 for (i = 0; i < ac->num_ports; i++) {
3050 err = mana_probe_port(ac, i, &ac->ports[i]);
3051 if (err) {
3052 device_printf(dev,
3053 "Failed to probe mana port %d\n", i);
3054 break;
3055 }
3056 }
3057
3058 out:
3059 if (err)
3060 mana_remove(gd);
3061
3062 return err;
3063 }
3064
3065 void
mana_remove(struct gdma_dev * gd)3066 mana_remove(struct gdma_dev *gd)
3067 {
3068 struct gdma_context *gc = gd->gdma_context;
3069 struct mana_context *ac = gd->driver_data;
3070 device_t dev = gc->dev;
3071 if_t ndev;
3072 int i;
3073
3074 for (i = 0; i < ac->num_ports; i++) {
3075 ndev = ac->ports[i];
3076 if (!ndev) {
3077 if (i == 0)
3078 device_printf(dev, "No net device to remove\n");
3079 goto out;
3080 }
3081
3082 mana_detach(ndev);
3083
3084 if_free(ndev);
3085 }
3086
3087 mana_destroy_eq(ac);
3088
3089 out:
3090 mana_gd_deregister_device(gd);
3091 gd->driver_data = NULL;
3092 gd->gdma_context = NULL;
3093 free(ac, M_DEVBUF);
3094 }
3095