xref: /linux/drivers/net/wireless/mediatek/mt76/dma.c (revision 91a4855d6c03e770e42f17c798a36a3c46e63de2)
1 // SPDX-License-Identifier: BSD-3-Clause-Clear
2 /*
3  * Copyright (C) 2016 Felix Fietkau <nbd@nbd.name>
4  */
5 
6 #include <linux/dma-mapping.h>
7 #include "mt76.h"
8 #include "dma.h"
9 #include "mt76_connac.h"
10 
11 static struct mt76_txwi_cache *
12 mt76_alloc_txwi(struct mt76_dev *dev)
13 {
14 	struct mt76_txwi_cache *t;
15 	dma_addr_t addr;
16 	u8 *txwi;
17 	int size;
18 
19 	size = L1_CACHE_ALIGN(dev->drv->txwi_size + sizeof(*t));
20 	txwi = kzalloc(size, GFP_ATOMIC);
21 	if (!txwi)
22 		return NULL;
23 
24 	addr = dma_map_single(dev->dma_dev, txwi, dev->drv->txwi_size,
25 			      DMA_TO_DEVICE);
26 	if (unlikely(dma_mapping_error(dev->dma_dev, addr))) {
27 		kfree(txwi);
28 		return NULL;
29 	}
30 
31 	t = (struct mt76_txwi_cache *)(txwi + dev->drv->txwi_size);
32 	t->dma_addr = addr;
33 
34 	return t;
35 }
36 
37 static struct mt76_txwi_cache *
38 mt76_alloc_rxwi(struct mt76_dev *dev)
39 {
40 	struct mt76_txwi_cache *t;
41 
42 	t = kzalloc(L1_CACHE_ALIGN(sizeof(*t)), GFP_ATOMIC);
43 	if (!t)
44 		return NULL;
45 
46 	t->ptr = NULL;
47 	return t;
48 }
49 
50 static struct mt76_txwi_cache *
51 __mt76_get_txwi(struct mt76_dev *dev)
52 {
53 	struct mt76_txwi_cache *t = NULL;
54 
55 	spin_lock(&dev->lock);
56 	if (!list_empty(&dev->txwi_cache)) {
57 		t = list_first_entry(&dev->txwi_cache, struct mt76_txwi_cache,
58 				     list);
59 		list_del(&t->list);
60 	}
61 	spin_unlock(&dev->lock);
62 
63 	return t;
64 }
65 
66 static struct mt76_txwi_cache *
67 __mt76_get_rxwi(struct mt76_dev *dev)
68 {
69 	struct mt76_txwi_cache *t = NULL;
70 
71 	spin_lock_bh(&dev->wed_lock);
72 	if (!list_empty(&dev->rxwi_cache)) {
73 		t = list_first_entry(&dev->rxwi_cache, struct mt76_txwi_cache,
74 				     list);
75 		list_del(&t->list);
76 	}
77 	spin_unlock_bh(&dev->wed_lock);
78 
79 	return t;
80 }
81 
82 static struct mt76_txwi_cache *
83 mt76_get_txwi(struct mt76_dev *dev)
84 {
85 	struct mt76_txwi_cache *t = __mt76_get_txwi(dev);
86 
87 	if (t)
88 		return t;
89 
90 	return mt76_alloc_txwi(dev);
91 }
92 
93 struct mt76_txwi_cache *
94 mt76_get_rxwi(struct mt76_dev *dev)
95 {
96 	struct mt76_txwi_cache *t = __mt76_get_rxwi(dev);
97 
98 	if (t)
99 		return t;
100 
101 	return mt76_alloc_rxwi(dev);
102 }
103 EXPORT_SYMBOL_GPL(mt76_get_rxwi);
104 
105 void
106 mt76_put_txwi(struct mt76_dev *dev, struct mt76_txwi_cache *t)
107 {
108 	if (!t)
109 		return;
110 
111 	spin_lock(&dev->lock);
112 	list_add(&t->list, &dev->txwi_cache);
113 	spin_unlock(&dev->lock);
114 }
115 EXPORT_SYMBOL_GPL(mt76_put_txwi);
116 
117 void
118 mt76_put_rxwi(struct mt76_dev *dev, struct mt76_txwi_cache *t)
119 {
120 	if (!t)
121 		return;
122 
123 	spin_lock_bh(&dev->wed_lock);
124 	list_add(&t->list, &dev->rxwi_cache);
125 	spin_unlock_bh(&dev->wed_lock);
126 }
127 EXPORT_SYMBOL_GPL(mt76_put_rxwi);
128 
129 static void
130 mt76_free_pending_txwi(struct mt76_dev *dev)
131 {
132 	struct mt76_txwi_cache *t;
133 
134 	local_bh_disable();
135 	while ((t = __mt76_get_txwi(dev)) != NULL) {
136 		dma_unmap_single(dev->dma_dev, t->dma_addr, dev->drv->txwi_size,
137 				 DMA_TO_DEVICE);
138 		kfree(mt76_get_txwi_ptr(dev, t));
139 	}
140 	local_bh_enable();
141 }
142 
143 void
144 mt76_free_pending_rxwi(struct mt76_dev *dev)
145 {
146 	struct mt76_txwi_cache *t;
147 
148 	local_bh_disable();
149 	while ((t = __mt76_get_rxwi(dev)) != NULL) {
150 		if (t->ptr)
151 			mt76_put_page_pool_buf(t->ptr, false);
152 		kfree(t);
153 	}
154 	local_bh_enable();
155 }
156 EXPORT_SYMBOL_GPL(mt76_free_pending_rxwi);
157 
158 static void
159 mt76_dma_queue_magic_cnt_init(struct mt76_dev *dev, struct mt76_queue *q)
160 {
161 	if (!mt76_queue_is_wed_rro(q))
162 		return;
163 
164 	q->magic_cnt = 0;
165 	if (mt76_queue_is_wed_rro_ind(q)) {
166 		struct mt76_wed_rro_desc *rro_desc;
167 		u32 data1 = FIELD_PREP(RRO_IND_DATA1_MAGIC_CNT_MASK,
168 				       MT_DMA_WED_IND_CMD_CNT - 1);
169 		int i;
170 
171 		rro_desc = (struct mt76_wed_rro_desc *)q->desc;
172 		for (i = 0; i < q->ndesc; i++) {
173 			struct mt76_wed_rro_ind *cmd;
174 
175 			cmd = (struct mt76_wed_rro_ind *)&rro_desc[i];
176 			cmd->data1 = cpu_to_le32(data1);
177 		}
178 	} else if (mt76_queue_is_wed_rro_rxdmad_c(q)) {
179 		struct mt76_rro_rxdmad_c *dmad = (void *)q->desc;
180 		u32 data3 = FIELD_PREP(RRO_RXDMAD_DATA3_MAGIC_CNT_MASK,
181 				       MT_DMA_MAGIC_CNT - 1);
182 		int i;
183 
184 		for (i = 0; i < q->ndesc; i++)
185 			dmad[i].data3 = cpu_to_le32(data3);
186 	}
187 }
188 
189 static void
190 mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q)
191 {
192 	if ((q->flags & MT_QFLAG_WED_RRO_EN) &&
193 	    (!is_mt7992(dev) || !mt76_npu_device_active(dev)))
194 		Q_WRITE(q, ring_size, MT_DMA_RRO_EN | q->ndesc);
195 	else
196 		Q_WRITE(q, ring_size, q->ndesc);
197 
198 	if (mt76_queue_is_npu_tx(q)) {
199 		writel(q->ndesc, &q->regs->ring_size);
200 		writel(q->desc_dma, &q->regs->desc_base);
201 	}
202 
203 	Q_WRITE(q, desc_base, q->desc_dma);
204 	q->head = Q_READ(q, dma_idx);
205 	q->tail = q->head;
206 }
207 
208 void mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q,
209 			  bool reset_idx)
210 {
211 	if (!q || !q->ndesc)
212 		return;
213 
214 	if (!mt76_queue_is_wed_rro_ind(q) &&
215 	    !mt76_queue_is_wed_rro_rxdmad_c(q) && !mt76_queue_is_npu(q)) {
216 		int i;
217 
218 		/* clear descriptors */
219 		for (i = 0; i < q->ndesc; i++)
220 			q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
221 	}
222 
223 	mt76_dma_queue_magic_cnt_init(dev, q);
224 	if (reset_idx) {
225 		if (mt76_queue_is_emi(q))
226 			*q->emi_cpu_idx = 0;
227 		else
228 			Q_WRITE(q, cpu_idx, 0);
229 		Q_WRITE(q, dma_idx, 0);
230 	}
231 	mt76_dma_sync_idx(dev, q);
232 }
233 
234 static int
235 mt76_dma_add_rx_buf(struct mt76_dev *dev, struct mt76_queue *q,
236 		    struct mt76_queue_buf *buf, void *data)
237 {
238 	struct mt76_queue_entry *entry = &q->entry[q->head];
239 	struct mt76_txwi_cache *txwi = NULL;
240 	u32 buf1 = 0, ctrl, info = 0;
241 	struct mt76_desc *desc;
242 	int idx = q->head;
243 	int rx_token;
244 
245 	if (mt76_queue_is_wed_rro_ind(q)) {
246 		struct mt76_wed_rro_desc *rro_desc;
247 
248 		rro_desc = (struct mt76_wed_rro_desc *)q->desc;
249 		data = &rro_desc[q->head];
250 		goto done;
251 	} else if (mt76_queue_is_wed_rro_rxdmad_c(q)) {
252 		data = &q->desc[q->head];
253 		goto done;
254 	}
255 
256 	desc = &q->desc[q->head];
257 	ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
258 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
259 	buf1 = FIELD_PREP(MT_DMA_CTL_SDP0_H, buf->addr >> 32);
260 #endif
261 
262 	if (mt76_queue_is_wed_rx(q) || mt76_queue_is_wed_rro_data(q)) {
263 		txwi = mt76_get_rxwi(dev);
264 		if (!txwi)
265 			return -ENOMEM;
266 
267 		rx_token = mt76_rx_token_consume(dev, data, txwi, buf->addr);
268 		if (rx_token < 0) {
269 			mt76_put_rxwi(dev, txwi);
270 			return -ENOMEM;
271 		}
272 
273 		buf1 |= FIELD_PREP(MT_DMA_CTL_TOKEN, rx_token);
274 		ctrl |= MT_DMA_CTL_TO_HOST;
275 
276 		txwi->qid = q - dev->q_rx;
277 	}
278 
279 	if (mt76_queue_is_wed_rro_msdu_pg(q) &&
280 	    dev->drv->rx_rro_add_msdu_page) {
281 		if (dev->drv->rx_rro_add_msdu_page(dev, q, buf->addr, data))
282 			return -ENOMEM;
283 	}
284 
285 	if (q->flags & MT_QFLAG_WED_RRO_EN) {
286 		info |= FIELD_PREP(MT_DMA_MAGIC_MASK, q->magic_cnt);
287 		if ((q->head + 1) == q->ndesc)
288 			q->magic_cnt = (q->magic_cnt + 1) % MT_DMA_MAGIC_CNT;
289 	}
290 
291 	WRITE_ONCE(desc->buf0, cpu_to_le32(buf->addr));
292 	WRITE_ONCE(desc->buf1, cpu_to_le32(buf1));
293 	WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
294 	WRITE_ONCE(desc->info, cpu_to_le32(info));
295 
296 done:
297 	entry->dma_addr[0] = buf->addr;
298 	entry->dma_len[0] = buf->len;
299 	entry->txwi = txwi;
300 	entry->buf = data;
301 	entry->wcid = 0xffff;
302 	entry->skip_buf1 = true;
303 	q->head = (q->head + 1) % q->ndesc;
304 	q->queued++;
305 
306 	return idx;
307 }
308 
309 static int
310 mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
311 		 struct mt76_queue_buf *buf, int nbufs, u32 info,
312 		 struct sk_buff *skb, void *txwi)
313 {
314 	struct mt76_queue_entry *entry;
315 	struct mt76_desc *desc;
316 	int i, idx = -1;
317 	u32 ctrl, next;
318 
319 	if (txwi) {
320 		q->entry[q->head].txwi = DMA_DUMMY_DATA;
321 		q->entry[q->head].skip_buf0 = true;
322 	}
323 
324 	for (i = 0; i < nbufs; i += 2, buf += 2) {
325 		u32 buf0 = buf[0].addr, buf1 = 0;
326 
327 		idx = q->head;
328 		next = (q->head + 1) % q->ndesc;
329 
330 		desc = &q->desc[idx];
331 		entry = &q->entry[idx];
332 
333 		if (buf[0].skip_unmap)
334 			entry->skip_buf0 = true;
335 		entry->skip_buf1 = i == nbufs - 1;
336 
337 		entry->dma_addr[0] = buf[0].addr;
338 		entry->dma_len[0] = buf[0].len;
339 
340 		ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
341 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
342 		info |= FIELD_PREP(MT_DMA_CTL_SDP0_H, buf[0].addr >> 32);
343 #endif
344 		if (i < nbufs - 1) {
345 			entry->dma_addr[1] = buf[1].addr;
346 			entry->dma_len[1] = buf[1].len;
347 			buf1 = buf[1].addr;
348 			ctrl |= FIELD_PREP(MT_DMA_CTL_SD_LEN1, buf[1].len);
349 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
350 			info |= FIELD_PREP(MT_DMA_CTL_SDP1_H,
351 					   buf[1].addr >> 32);
352 #endif
353 			if (buf[1].skip_unmap)
354 				entry->skip_buf1 = true;
355 		}
356 
357 		if (i == nbufs - 1)
358 			ctrl |= MT_DMA_CTL_LAST_SEC0;
359 		else if (i == nbufs - 2)
360 			ctrl |= MT_DMA_CTL_LAST_SEC1;
361 
362 		WRITE_ONCE(desc->buf0, cpu_to_le32(buf0));
363 		WRITE_ONCE(desc->buf1, cpu_to_le32(buf1));
364 		WRITE_ONCE(desc->info, cpu_to_le32(info));
365 		WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
366 
367 		q->head = next;
368 		q->queued++;
369 	}
370 
371 	q->entry[idx].txwi = txwi;
372 	q->entry[idx].skb = skb;
373 	q->entry[idx].wcid = 0xffff;
374 
375 	return idx;
376 }
377 
378 static void
379 mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
380 			struct mt76_queue_entry *prev_e)
381 {
382 	struct mt76_queue_entry *e = &q->entry[idx];
383 
384 	if (!e->skip_buf0)
385 		dma_unmap_single(dev->dma_dev, e->dma_addr[0], e->dma_len[0],
386 				 DMA_TO_DEVICE);
387 
388 	if (!e->skip_buf1)
389 		dma_unmap_single(dev->dma_dev, e->dma_addr[1], e->dma_len[1],
390 				 DMA_TO_DEVICE);
391 
392 	if (e->txwi == DMA_DUMMY_DATA)
393 		e->txwi = NULL;
394 
395 	*prev_e = *e;
396 	memset(e, 0, sizeof(*e));
397 }
398 
399 static void
400 mt76_dma_kick_queue(struct mt76_dev *dev, struct mt76_queue *q)
401 {
402 	wmb();
403 	if (mt76_queue_is_emi(q))
404 		*q->emi_cpu_idx = cpu_to_le16(q->head);
405 	else
406 		Q_WRITE(q, cpu_idx, q->head);
407 }
408 
409 static void
410 mt76_dma_tx_cleanup(struct mt76_dev *dev, struct mt76_queue *q, bool flush)
411 {
412 	struct mt76_queue_entry entry;
413 	int last;
414 
415 	if (!q || !q->ndesc)
416 		return;
417 
418 	spin_lock_bh(&q->cleanup_lock);
419 	if (flush)
420 		last = -1;
421 	else
422 		last = Q_READ(q, dma_idx);
423 
424 	while (q->queued > 0 && q->tail != last) {
425 		mt76_dma_tx_cleanup_idx(dev, q, q->tail, &entry);
426 		mt76_npu_txdesc_cleanup(q, q->tail);
427 		mt76_queue_tx_complete(dev, q, &entry);
428 
429 		if (entry.txwi) {
430 			if (!(dev->drv->drv_flags & MT_DRV_TXWI_NO_FREE))
431 				mt76_put_txwi(dev, entry.txwi);
432 		}
433 
434 		if (!flush && q->tail == last)
435 			last = Q_READ(q, dma_idx);
436 	}
437 	spin_unlock_bh(&q->cleanup_lock);
438 
439 	if (flush) {
440 		spin_lock_bh(&q->lock);
441 		mt76_dma_sync_idx(dev, q);
442 		mt76_dma_kick_queue(dev, q);
443 		spin_unlock_bh(&q->lock);
444 	}
445 
446 	if (!q->queued)
447 		wake_up(&dev->tx_wait);
448 }
449 
450 static void *
451 mt76_dma_get_rxdmad_c_buf(struct mt76_dev *dev, struct mt76_queue *q,
452 			  int idx, int *len, bool *more)
453 {
454 	struct mt76_queue_entry *e = &q->entry[idx];
455 	struct mt76_rro_rxdmad_c *dmad = e->buf;
456 	u32 data1 = le32_to_cpu(dmad->data1);
457 	u32 data2 = le32_to_cpu(dmad->data2);
458 	struct mt76_txwi_cache *t;
459 	u16 rx_token_id;
460 	u8 ind_reason;
461 	void *buf;
462 
463 	rx_token_id = FIELD_GET(RRO_RXDMAD_DATA2_RX_TOKEN_ID_MASK, data2);
464 	t = mt76_rx_token_release(dev, rx_token_id);
465 	if (!t)
466 		return ERR_PTR(-EAGAIN);
467 
468 	q = &dev->q_rx[t->qid];
469 	dma_sync_single_for_cpu(dev->dma_dev, t->dma_addr,
470 				SKB_WITH_OVERHEAD(q->buf_size),
471 				page_pool_get_dma_dir(q->page_pool));
472 
473 	if (len)
474 		*len = FIELD_GET(RRO_RXDMAD_DATA1_SDL0_MASK, data1);
475 	if (more)
476 		*more = !FIELD_GET(RRO_RXDMAD_DATA1_LS_MASK, data1);
477 
478 	buf = t->ptr;
479 	ind_reason = FIELD_GET(RRO_RXDMAD_DATA2_IND_REASON_MASK, data2);
480 	if (ind_reason == MT_DMA_WED_IND_REASON_REPEAT ||
481 	    ind_reason == MT_DMA_WED_IND_REASON_OLDPKT) {
482 		mt76_put_page_pool_buf(buf, false);
483 		buf = ERR_PTR(-EAGAIN);
484 	}
485 	t->ptr = NULL;
486 	t->dma_addr = 0;
487 
488 	mt76_put_rxwi(dev, t);
489 
490 	return buf;
491 }
492 
493 static void *
494 mt76_dma_get_buf(struct mt76_dev *dev, struct mt76_queue *q, int idx,
495 		 int *len, u32 *info, bool *more, bool *drop, bool flush)
496 {
497 	struct mt76_queue_entry *e = &q->entry[idx];
498 	struct mt76_desc *desc = &q->desc[idx];
499 	u32 ctrl, desc_info, buf1;
500 	void *buf = e->buf;
501 
502 	if (mt76_queue_is_wed_rro_rxdmad_c(q) && !flush)
503 		buf = mt76_dma_get_rxdmad_c_buf(dev, q, idx, len, more);
504 
505 	if (mt76_queue_is_wed_rro(q))
506 		goto done;
507 
508 	ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
509 	if (len) {
510 		*len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctrl);
511 		*more = !(ctrl & MT_DMA_CTL_LAST_SEC0);
512 	}
513 
514 	desc_info = le32_to_cpu(desc->info);
515 	if (info)
516 		*info = desc_info;
517 
518 	buf1 = le32_to_cpu(desc->buf1);
519 	mt76_dma_should_drop_buf(drop, ctrl, buf1, desc_info);
520 
521 	if (mt76_queue_is_wed_rx(q)) {
522 		u32 token = FIELD_GET(MT_DMA_CTL_TOKEN, buf1);
523 		struct mt76_txwi_cache *t = mt76_rx_token_release(dev, token);
524 
525 		if (!t)
526 			return NULL;
527 
528 		dma_sync_single_for_cpu(dev->dma_dev, t->dma_addr,
529 				SKB_WITH_OVERHEAD(q->buf_size),
530 				page_pool_get_dma_dir(q->page_pool));
531 
532 		buf = t->ptr;
533 		t->dma_addr = 0;
534 		t->ptr = NULL;
535 
536 		mt76_put_rxwi(dev, t);
537 		if (drop)
538 			*drop |= !!(buf1 & MT_DMA_CTL_WO_DROP);
539 	} else {
540 		dma_sync_single_for_cpu(dev->dma_dev, e->dma_addr[0],
541 				SKB_WITH_OVERHEAD(q->buf_size),
542 				page_pool_get_dma_dir(q->page_pool));
543 	}
544 
545 done:
546 	e->buf = NULL;
547 	return buf;
548 }
549 
550 static void *
551 mt76_dma_dequeue(struct mt76_dev *dev, struct mt76_queue *q, bool flush,
552 		 int *len, u32 *info, bool *more, bool *drop)
553 {
554 	int idx = q->tail;
555 
556 	*more = false;
557 	if (!q->queued)
558 		return NULL;
559 
560 	if (mt76_queue_is_wed_rro_data(q) || mt76_queue_is_wed_rro_msdu_pg(q))
561 		goto done;
562 
563 	if (mt76_queue_is_wed_rro_ind(q)) {
564 		struct mt76_wed_rro_ind *cmd;
565 		u8 magic_cnt;
566 
567 		if (flush)
568 			goto done;
569 
570 		cmd = q->entry[idx].buf;
571 		magic_cnt = FIELD_GET(RRO_IND_DATA1_MAGIC_CNT_MASK,
572 				      le32_to_cpu(cmd->data1));
573 		if (magic_cnt != q->magic_cnt)
574 			return NULL;
575 
576 		if (q->tail == q->ndesc - 1)
577 			q->magic_cnt = (q->magic_cnt + 1) % MT_DMA_WED_IND_CMD_CNT;
578 	} else if (mt76_queue_is_wed_rro_rxdmad_c(q)) {
579 		struct mt76_rro_rxdmad_c *dmad;
580 		u16 magic_cnt;
581 
582 		if (flush)
583 			goto done;
584 
585 		dmad = q->entry[idx].buf;
586 		magic_cnt = FIELD_GET(RRO_RXDMAD_DATA3_MAGIC_CNT_MASK,
587 				      le32_to_cpu(dmad->data3));
588 		if (magic_cnt != q->magic_cnt)
589 			return NULL;
590 
591 		if (q->tail == q->ndesc - 1)
592 			q->magic_cnt = (q->magic_cnt + 1) % MT_DMA_MAGIC_CNT;
593 	} else {
594 		if (flush)
595 			q->desc[idx].ctrl |= cpu_to_le32(MT_DMA_CTL_DMA_DONE);
596 		else if (!(q->desc[idx].ctrl & cpu_to_le32(MT_DMA_CTL_DMA_DONE)))
597 			return NULL;
598 	}
599 done:
600 	q->tail = (q->tail + 1) % q->ndesc;
601 	q->queued--;
602 
603 	return mt76_dma_get_buf(dev, q, idx, len, info, more, drop, flush);
604 }
605 
606 static int
607 mt76_dma_tx_queue_skb_raw(struct mt76_dev *dev, struct mt76_queue *q,
608 			  struct sk_buff *skb, u32 tx_info)
609 {
610 	struct mt76_queue_buf buf = {};
611 	dma_addr_t addr;
612 
613 	if (test_bit(MT76_MCU_RESET, &dev->phy.state))
614 		goto error;
615 
616 	if (q->queued + 1 >= q->ndesc - 1)
617 		goto error;
618 
619 	addr = dma_map_single(dev->dma_dev, skb->data, skb->len,
620 			      DMA_TO_DEVICE);
621 	if (unlikely(dma_mapping_error(dev->dma_dev, addr)))
622 		goto error;
623 
624 	buf.addr = addr;
625 	buf.len = skb->len;
626 
627 	spin_lock_bh(&q->lock);
628 	mt76_dma_add_buf(dev, q, &buf, 1, tx_info, skb, NULL);
629 	mt76_dma_kick_queue(dev, q);
630 	spin_unlock_bh(&q->lock);
631 
632 	return 0;
633 
634 error:
635 	dev_kfree_skb(skb);
636 	return -ENOMEM;
637 }
638 
639 static int
640 mt76_dma_tx_queue_skb(struct mt76_phy *phy, struct mt76_queue *q,
641 		      enum mt76_txq_id qid, struct sk_buff *skb,
642 		      struct mt76_wcid *wcid, struct ieee80211_sta *sta)
643 {
644 	struct ieee80211_tx_status status = {
645 		.sta = sta,
646 	};
647 	struct mt76_tx_info tx_info = {
648 		.skb = skb,
649 	};
650 	struct mt76_dev *dev = phy->dev;
651 	struct ieee80211_hw *hw;
652 	int len, n = 0, ret = -ENOMEM;
653 	struct mt76_txwi_cache *t;
654 	struct sk_buff *iter;
655 	dma_addr_t addr;
656 	u8 *txwi;
657 
658 	if (test_bit(MT76_RESET, &phy->state))
659 		goto free_skb;
660 
661 	/* TODO: Take into account unlinear skbs */
662 	if (mt76_npu_device_active(dev) && skb_linearize(skb))
663 		goto free_skb;
664 
665 	t = mt76_get_txwi(dev);
666 	if (!t)
667 		goto free_skb;
668 
669 	t->phy_idx = phy->band_idx;
670 	t->qid = qid;
671 	txwi = mt76_get_txwi_ptr(dev, t);
672 
673 	skb->prev = skb->next = NULL;
674 	if (dev->drv->drv_flags & MT_DRV_TX_ALIGNED4_SKBS)
675 		mt76_insert_hdr_pad(skb);
676 
677 	len = skb_headlen(skb);
678 	addr = dma_map_single(dev->dma_dev, skb->data, len, DMA_TO_DEVICE);
679 	if (unlikely(dma_mapping_error(dev->dma_dev, addr)))
680 		goto free;
681 
682 	tx_info.buf[n].addr = t->dma_addr;
683 	tx_info.buf[n++].len = dev->drv->txwi_size;
684 	tx_info.buf[n].addr = addr;
685 	tx_info.buf[n++].len = len;
686 
687 	skb_walk_frags(skb, iter) {
688 		if (n == ARRAY_SIZE(tx_info.buf))
689 			goto unmap;
690 
691 		addr = dma_map_single(dev->dma_dev, iter->data, iter->len,
692 				      DMA_TO_DEVICE);
693 		if (unlikely(dma_mapping_error(dev->dma_dev, addr)))
694 			goto unmap;
695 
696 		tx_info.buf[n].addr = addr;
697 		tx_info.buf[n++].len = iter->len;
698 	}
699 	tx_info.nbuf = n;
700 
701 	if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) {
702 		ret = -ENOMEM;
703 		goto unmap;
704 	}
705 
706 	dma_sync_single_for_cpu(dev->dma_dev, t->dma_addr, dev->drv->txwi_size,
707 				DMA_TO_DEVICE);
708 	ret = dev->drv->tx_prepare_skb(dev, txwi, qid, wcid, sta, &tx_info);
709 	dma_sync_single_for_device(dev->dma_dev, t->dma_addr, dev->drv->txwi_size,
710 				   DMA_TO_DEVICE);
711 	if (ret < 0)
712 		goto unmap;
713 
714 	if (mt76_npu_device_active(dev))
715 		return mt76_npu_dma_add_buf(phy, q, skb, &tx_info.buf[1], txwi);
716 
717 	return mt76_dma_add_buf(dev, q, tx_info.buf, tx_info.nbuf,
718 				tx_info.info, tx_info.skb, t);
719 
720 unmap:
721 	for (n--; n > 0; n--)
722 		dma_unmap_single(dev->dma_dev, tx_info.buf[n].addr,
723 				 tx_info.buf[n].len, DMA_TO_DEVICE);
724 
725 free:
726 #ifdef CONFIG_NL80211_TESTMODE
727 	/* fix tx_done accounting on queue overflow */
728 	if (mt76_is_testmode_skb(dev, skb, &hw)) {
729 		struct mt76_phy *phy = hw->priv;
730 
731 		if (tx_info.skb == phy->test.tx_skb)
732 			phy->test.tx_done--;
733 	}
734 #endif
735 
736 	mt76_put_txwi(dev, t);
737 
738 free_skb:
739 	status.skb = tx_info.skb;
740 	hw = mt76_tx_status_get_hw(dev, tx_info.skb);
741 	spin_lock_bh(&dev->rx_lock);
742 	ieee80211_tx_status_ext(hw, &status);
743 	spin_unlock_bh(&dev->rx_lock);
744 
745 	return ret;
746 }
747 
748 static int
749 mt76_dma_rx_fill_buf(struct mt76_dev *dev, struct mt76_queue *q,
750 		     bool allow_direct)
751 {
752 	int len = SKB_WITH_OVERHEAD(q->buf_size);
753 	int frames = 0;
754 
755 	if (!q->ndesc)
756 		return 0;
757 
758 	while (q->queued < q->ndesc - 1) {
759 		struct mt76_queue_buf qbuf = {};
760 		void *buf = NULL;
761 		int offset;
762 
763 		if (mt76_queue_is_wed_rro_ind(q) ||
764 		    mt76_queue_is_wed_rro_rxdmad_c(q))
765 			goto done;
766 
767 		buf = mt76_get_page_pool_buf(q, &offset, q->buf_size);
768 		if (!buf)
769 			break;
770 
771 		qbuf.addr = page_pool_get_dma_addr(virt_to_head_page(buf)) +
772 			    offset + q->buf_offset;
773 done:
774 		qbuf.len = len - q->buf_offset;
775 		qbuf.skip_unmap = false;
776 		if (mt76_dma_add_rx_buf(dev, q, &qbuf, buf) < 0) {
777 			mt76_put_page_pool_buf(buf, allow_direct);
778 			break;
779 		}
780 		frames++;
781 	}
782 
783 	if (frames || mt76_queue_is_wed_rx(q))
784 		mt76_dma_kick_queue(dev, q);
785 
786 	return frames;
787 }
788 
789 int mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q,
790 		     bool allow_direct)
791 {
792 	int frames;
793 
794 	spin_lock_bh(&q->lock);
795 	frames = mt76_dma_rx_fill_buf(dev, q, allow_direct);
796 	spin_unlock_bh(&q->lock);
797 
798 	return frames;
799 }
800 
801 static int
802 mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
803 		     int idx, int n_desc, int bufsize,
804 		     u32 ring_base)
805 {
806 	int ret, size;
807 
808 	spin_lock_init(&q->lock);
809 	spin_lock_init(&q->cleanup_lock);
810 
811 	q->regs = dev->mmio.regs + ring_base + idx * MT_RING_SIZE;
812 	q->ndesc = n_desc;
813 	q->buf_size = bufsize;
814 	q->hw_idx = idx;
815 	q->dev = dev;
816 
817 	if (mt76_queue_is_wed_rro_ind(q))
818 		size = sizeof(struct mt76_wed_rro_desc);
819 	else if (mt76_queue_is_npu_tx(q))
820 		size = sizeof(struct airoha_npu_tx_dma_desc);
821 	else if (mt76_queue_is_npu_rx(q))
822 		size = sizeof(struct airoha_npu_rx_dma_desc);
823 	else
824 		size = sizeof(struct mt76_desc);
825 
826 	q->desc = dmam_alloc_coherent(dev->dma_dev, q->ndesc * size,
827 				      &q->desc_dma, GFP_KERNEL);
828 	if (!q->desc)
829 		return -ENOMEM;
830 
831 	mt76_dma_queue_magic_cnt_init(dev, q);
832 	size = q->ndesc * sizeof(*q->entry);
833 	q->entry = devm_kzalloc(dev->dev, size, GFP_KERNEL);
834 	if (!q->entry)
835 		return -ENOMEM;
836 
837 	ret = mt76_create_page_pool(dev, q);
838 	if (ret)
839 		return ret;
840 
841 	mt76_npu_queue_setup(dev, q);
842 	ret = mt76_wed_dma_setup(dev, q, false);
843 	if (ret)
844 		return ret;
845 
846 	if (mtk_wed_device_active(&dev->mmio.wed)) {
847 		if ((mtk_wed_get_rx_capa(&dev->mmio.wed) && mt76_queue_is_wed_rro(q)) ||
848 		    mt76_queue_is_wed_tx_free(q))
849 			return 0;
850 	}
851 
852 	/* HW specific driver is supposed to reset brand-new EMI queues since
853 	 * it needs to set cpu index pointer.
854 	 */
855 	mt76_dma_queue_reset(dev, q, !mt76_queue_is_emi(q));
856 
857 	return 0;
858 }
859 
860 static void
861 mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
862 {
863 	void *buf;
864 	bool more;
865 
866 	if (!q->ndesc)
867 		return;
868 
869 	if (mt76_queue_is_npu(q)) {
870 		mt76_npu_queue_cleanup(dev, q);
871 		return;
872 	}
873 
874 	do {
875 		spin_lock_bh(&q->lock);
876 		buf = mt76_dma_dequeue(dev, q, true, NULL, NULL, &more, NULL);
877 		spin_unlock_bh(&q->lock);
878 
879 		if (!buf)
880 			break;
881 
882 		if (mtk_wed_device_active(&dev->mmio.wed) &&
883 		    mt76_queue_is_wed_rro(q))
884 			continue;
885 
886 		if (mt76_npu_device_active(dev) &&
887 		    mt76_queue_is_wed_rro(q))
888 			continue;
889 
890 		if (!mt76_queue_is_wed_rro_rxdmad_c(q) &&
891 		    !mt76_queue_is_wed_rro_ind(q))
892 			mt76_put_page_pool_buf(buf, false);
893 	} while (1);
894 
895 	spin_lock_bh(&q->lock);
896 	if (q->rx_head) {
897 		dev_kfree_skb(q->rx_head);
898 		q->rx_head = NULL;
899 	}
900 
901 	spin_unlock_bh(&q->lock);
902 }
903 
904 static void
905 mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid)
906 {
907 	struct mt76_queue *q = &dev->q_rx[qid];
908 
909 	if (!q->ndesc)
910 		return;
911 
912 	if (!mt76_queue_is_wed_rro_ind(q) &&
913 	    !mt76_queue_is_wed_rro_rxdmad_c(q) && !mt76_queue_is_npu(q)) {
914 		int i;
915 
916 		for (i = 0; i < q->ndesc; i++)
917 			q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
918 	}
919 
920 	mt76_dma_rx_cleanup(dev, q);
921 
922 	/* reset WED rx queues */
923 	mt76_wed_dma_setup(dev, q, true);
924 
925 	if (mt76_queue_is_wed_tx_free(q))
926 		return;
927 
928 	if (mtk_wed_device_active(&dev->mmio.wed) &&
929 	    mt76_queue_is_wed_rro(q))
930 		return;
931 
932 	if (mt76_npu_device_active(dev) &&
933 	    mt76_queue_is_wed_rro(q))
934 		return;
935 
936 	if (mt76_queue_is_npu_txfree(q))
937 		return;
938 
939 	mt76_dma_sync_idx(dev, q);
940 	if (mt76_queue_is_npu(q))
941 		mt76_npu_fill_rx_queue(dev, q);
942 	else
943 		mt76_dma_rx_fill(dev, q, false);
944 }
945 
946 static void
947 mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
948 		  int len, bool more, u32 info, bool allow_direct)
949 {
950 	struct sk_buff *skb = q->rx_head;
951 	struct skb_shared_info *shinfo = skb_shinfo(skb);
952 	int nr_frags = shinfo->nr_frags;
953 
954 	if (nr_frags < ARRAY_SIZE(shinfo->frags)) {
955 		struct page *page = virt_to_head_page(data);
956 		int offset = data - page_address(page) + q->buf_offset;
957 
958 		skb_add_rx_frag(skb, nr_frags, page, offset, len, q->buf_size);
959 	} else {
960 		mt76_put_page_pool_buf(data, allow_direct);
961 	}
962 
963 	if (more)
964 		return;
965 
966 	q->rx_head = NULL;
967 	if (nr_frags < ARRAY_SIZE(shinfo->frags))
968 		dev->drv->rx_skb(dev, q - dev->q_rx, skb, &info);
969 	else
970 		dev_kfree_skb(skb);
971 }
972 
973 static int
974 mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
975 {
976 	int len, data_len, done = 0, dma_idx;
977 	struct sk_buff *skb;
978 	unsigned char *data;
979 	bool check_ddone = false;
980 	bool allow_direct = !mt76_queue_is_wed_rx(q);
981 	bool more;
982 
983 	if ((q->flags & MT_QFLAG_WED_RRO_EN) ||
984 	    (IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED) &&
985 	     mt76_queue_is_wed_tx_free(q))) {
986 		dma_idx = Q_READ(q, dma_idx);
987 		check_ddone = true;
988 	}
989 
990 	while (done < budget) {
991 		bool drop = false;
992 		u32 info;
993 
994 		if (check_ddone) {
995 			if (q->tail == dma_idx)
996 				dma_idx = Q_READ(q, dma_idx);
997 
998 			if (q->tail == dma_idx)
999 				break;
1000 		}
1001 
1002 		data = mt76_dma_dequeue(dev, q, false, &len, &info, &more,
1003 					&drop);
1004 		if (!data)
1005 			break;
1006 
1007 		if (PTR_ERR(data) == -EAGAIN) {
1008 			done++;
1009 			continue;
1010 		}
1011 
1012 		if (mt76_queue_is_wed_rro_ind(q) && dev->drv->rx_rro_ind_process)
1013 			dev->drv->rx_rro_ind_process(dev, data);
1014 
1015 		if (mt76_queue_is_wed_rro(q) &&
1016 		    !mt76_queue_is_wed_rro_rxdmad_c(q)) {
1017 			done++;
1018 			continue;
1019 		}
1020 
1021 		if (drop)
1022 			goto free_frag;
1023 
1024 		if (q->rx_head)
1025 			data_len = q->buf_size;
1026 		else
1027 			data_len = SKB_WITH_OVERHEAD(q->buf_size);
1028 
1029 		if (data_len < len + q->buf_offset) {
1030 			dev_kfree_skb(q->rx_head);
1031 			q->rx_head = NULL;
1032 			goto free_frag;
1033 		}
1034 
1035 		if (q->rx_head) {
1036 			mt76_add_fragment(dev, q, data, len, more, info,
1037 					  allow_direct);
1038 			continue;
1039 		}
1040 
1041 		if (!more && dev->drv->rx_check &&
1042 		    !(dev->drv->rx_check(dev, data, len)))
1043 			goto free_frag;
1044 
1045 		skb = napi_build_skb(data, q->buf_size);
1046 		if (!skb)
1047 			goto free_frag;
1048 
1049 		skb_reserve(skb, q->buf_offset);
1050 		skb_mark_for_recycle(skb);
1051 
1052 		*(u32 *)skb->cb = info;
1053 
1054 		__skb_put(skb, len);
1055 		done++;
1056 
1057 		if (more) {
1058 			q->rx_head = skb;
1059 			continue;
1060 		}
1061 
1062 		dev->drv->rx_skb(dev, q - dev->q_rx, skb, &info);
1063 		continue;
1064 
1065 free_frag:
1066 		mt76_put_page_pool_buf(data, allow_direct);
1067 	}
1068 
1069 	mt76_dma_rx_fill(dev, q, true);
1070 	return done;
1071 }
1072 
1073 int mt76_dma_rx_poll(struct napi_struct *napi, int budget)
1074 {
1075 	struct mt76_dev *dev;
1076 	int qid, done = 0, cur;
1077 
1078 	dev = mt76_priv(napi->dev);
1079 	qid = napi - dev->napi;
1080 
1081 	rcu_read_lock();
1082 
1083 	do {
1084 		cur = mt76_dma_rx_process(dev, &dev->q_rx[qid], budget - done);
1085 		mt76_rx_poll_complete(dev, qid, napi);
1086 		done += cur;
1087 	} while (cur && done < budget);
1088 
1089 	rcu_read_unlock();
1090 
1091 	if (done < budget && napi_complete(napi))
1092 		dev->drv->rx_poll_complete(dev, qid);
1093 
1094 	return done;
1095 }
1096 EXPORT_SYMBOL_GPL(mt76_dma_rx_poll);
1097 
1098 static void
1099 mt76_dma_rx_queue_init(struct mt76_dev *dev, enum mt76_rxq_id qid,
1100 		       int (*poll)(struct napi_struct *napi, int budget))
1101 {
1102 	netif_napi_add(dev->napi_dev, &dev->napi[qid], poll);
1103 	mt76_dma_rx_fill_buf(dev, &dev->q_rx[qid], false);
1104 	napi_enable(&dev->napi[qid]);
1105 }
1106 
1107 static int
1108 mt76_dma_init(struct mt76_dev *dev,
1109 	      int (*poll)(struct napi_struct *napi, int budget))
1110 {
1111 	struct mt76_dev **priv;
1112 	int i;
1113 
1114 	dev->napi_dev = alloc_netdev_dummy(sizeof(struct mt76_dev *));
1115 	if (!dev->napi_dev)
1116 		return -ENOMEM;
1117 
1118 	/* napi_dev private data points to mt76_dev parent, so, mt76_dev
1119 	 * can be retrieved given napi_dev
1120 	 */
1121 	priv = netdev_priv(dev->napi_dev);
1122 	*priv = dev;
1123 
1124 	dev->tx_napi_dev = alloc_netdev_dummy(sizeof(struct mt76_dev *));
1125 	if (!dev->tx_napi_dev) {
1126 		free_netdev(dev->napi_dev);
1127 		return -ENOMEM;
1128 	}
1129 	priv = netdev_priv(dev->tx_napi_dev);
1130 	*priv = dev;
1131 
1132 	snprintf(dev->napi_dev->name, sizeof(dev->napi_dev->name), "%s",
1133 		 wiphy_name(dev->hw->wiphy));
1134 	dev->napi_dev->threaded = 1;
1135 	init_completion(&dev->mmio.wed_reset);
1136 	init_completion(&dev->mmio.wed_reset_complete);
1137 
1138 	mt76_for_each_q_rx(dev, i) {
1139 		if (mt76_queue_is_wed_rro(&dev->q_rx[i]))
1140 			continue;
1141 
1142 		mt76_dma_rx_queue_init(dev, i, poll);
1143 	}
1144 
1145 	return 0;
1146 }
1147 
1148 static const struct mt76_queue_ops mt76_dma_ops = {
1149 	.init = mt76_dma_init,
1150 	.alloc = mt76_dma_alloc_queue,
1151 	.reset_q = mt76_dma_queue_reset,
1152 	.tx_queue_skb_raw = mt76_dma_tx_queue_skb_raw,
1153 	.tx_queue_skb = mt76_dma_tx_queue_skb,
1154 	.tx_cleanup = mt76_dma_tx_cleanup,
1155 	.rx_queue_init = mt76_dma_rx_queue_init,
1156 	.rx_cleanup = mt76_dma_rx_cleanup,
1157 	.rx_reset = mt76_dma_rx_reset,
1158 	.kick = mt76_dma_kick_queue,
1159 };
1160 
1161 void mt76_dma_attach(struct mt76_dev *dev)
1162 {
1163 	dev->queue_ops = &mt76_dma_ops;
1164 }
1165 EXPORT_SYMBOL_GPL(mt76_dma_attach);
1166 
1167 void mt76_dma_cleanup(struct mt76_dev *dev)
1168 {
1169 	int i;
1170 
1171 	mt76_worker_disable(&dev->tx_worker);
1172 	napi_disable(&dev->tx_napi);
1173 	netif_napi_del(&dev->tx_napi);
1174 
1175 	for (i = 0; i < ARRAY_SIZE(dev->phys); i++) {
1176 		struct mt76_phy *phy = dev->phys[i];
1177 		int j;
1178 
1179 		if (!phy)
1180 			continue;
1181 
1182 		for (j = 0; j < ARRAY_SIZE(phy->q_tx); j++)
1183 			mt76_dma_tx_cleanup(dev, phy->q_tx[j], true);
1184 	}
1185 
1186 	for (i = 0; i < ARRAY_SIZE(dev->q_mcu); i++)
1187 		mt76_dma_tx_cleanup(dev, dev->q_mcu[i], true);
1188 
1189 	mt76_for_each_q_rx(dev, i) {
1190 		struct mt76_queue *q = &dev->q_rx[i];
1191 
1192 		netif_napi_del(&dev->napi[i]);
1193 		mt76_dma_rx_cleanup(dev, q);
1194 
1195 		page_pool_destroy(q->page_pool);
1196 	}
1197 
1198 	if (mtk_wed_device_active(&dev->mmio.wed))
1199 		mtk_wed_device_detach(&dev->mmio.wed);
1200 
1201 	if (mtk_wed_device_active(&dev->mmio.wed_hif2))
1202 		mtk_wed_device_detach(&dev->mmio.wed_hif2);
1203 
1204 	mt76_free_pending_txwi(dev);
1205 	mt76_free_pending_rxwi(dev);
1206 	free_netdev(dev->napi_dev);
1207 	free_netdev(dev->tx_napi_dev);
1208 }
1209 EXPORT_SYMBOL_GPL(mt76_dma_cleanup);
1210