xref: /linux/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c (revision 189f164e573e18d9f8876dbd3ad8fcbe11f93037)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
3 
4 #include <linux/if_vlan.h>
5 #include <linux/iopoll.h>
6 #include <net/ip6_checksum.h>
7 #include <net/ipv6.h>
8 #include <net/netdev_queues.h>
9 
10 #include "hinic3_hwdev.h"
11 #include "hinic3_nic_cfg.h"
12 #include "hinic3_nic_dev.h"
13 #include "hinic3_nic_io.h"
14 #include "hinic3_tx.h"
15 #include "hinic3_wq.h"
16 
17 #define MIN_SKB_LEN                32
18 
hinic3_txq_clean_stats(struct hinic3_txq_stats * txq_stats)19 static void hinic3_txq_clean_stats(struct hinic3_txq_stats *txq_stats)
20 {
21 	u64_stats_update_begin(&txq_stats->syncp);
22 	txq_stats->bytes = 0;
23 	txq_stats->packets = 0;
24 	txq_stats->busy = 0;
25 	txq_stats->dropped = 0;
26 
27 	txq_stats->skb_pad_err = 0;
28 	txq_stats->frag_len_overflow = 0;
29 	txq_stats->offload_cow_skb_err = 0;
30 	txq_stats->map_frag_err = 0;
31 	txq_stats->unknown_tunnel_pkt = 0;
32 	txq_stats->frag_size_err = 0;
33 	u64_stats_update_end(&txq_stats->syncp);
34 }
35 
hinic3_txq_stats_init(struct hinic3_txq * txq)36 static void hinic3_txq_stats_init(struct hinic3_txq *txq)
37 {
38 	struct hinic3_txq_stats *txq_stats = &txq->txq_stats;
39 
40 	u64_stats_init(&txq_stats->syncp);
41 	hinic3_txq_clean_stats(txq_stats);
42 }
43 
hinic3_alloc_txqs(struct net_device * netdev)44 int hinic3_alloc_txqs(struct net_device *netdev)
45 {
46 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
47 	u16 q_id, num_txqs = nic_dev->max_qps;
48 	struct pci_dev *pdev = nic_dev->pdev;
49 	struct hinic3_txq *txq;
50 
51 	nic_dev->txqs = kzalloc_objs(*nic_dev->txqs, num_txqs);
52 	if (!nic_dev->txqs)
53 		return -ENOMEM;
54 
55 	for (q_id = 0; q_id < num_txqs; q_id++) {
56 		txq = &nic_dev->txqs[q_id];
57 		txq->netdev = netdev;
58 		txq->q_id = q_id;
59 		txq->q_depth = nic_dev->q_params.sq_depth;
60 		txq->q_mask = nic_dev->q_params.sq_depth - 1;
61 		txq->dev = &pdev->dev;
62 
63 		hinic3_txq_stats_init(txq);
64 	}
65 
66 	return 0;
67 }
68 
hinic3_free_txqs(struct net_device * netdev)69 void hinic3_free_txqs(struct net_device *netdev)
70 {
71 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
72 
73 	kfree(nic_dev->txqs);
74 }
75 
hinic3_set_buf_desc(struct hinic3_sq_bufdesc * buf_descs,dma_addr_t addr,u32 len)76 static void hinic3_set_buf_desc(struct hinic3_sq_bufdesc *buf_descs,
77 				dma_addr_t addr, u32 len)
78 {
79 	buf_descs->hi_addr = cpu_to_le32(upper_32_bits(addr));
80 	buf_descs->lo_addr = cpu_to_le32(lower_32_bits(addr));
81 	buf_descs->len = cpu_to_le32(len);
82 }
83 
hinic3_tx_map_skb(struct net_device * netdev,struct sk_buff * skb,struct hinic3_txq * txq,struct hinic3_tx_info * tx_info,struct hinic3_sq_wqe_combo * wqe_combo)84 static int hinic3_tx_map_skb(struct net_device *netdev, struct sk_buff *skb,
85 			     struct hinic3_txq *txq,
86 			     struct hinic3_tx_info *tx_info,
87 			     struct hinic3_sq_wqe_combo *wqe_combo)
88 {
89 	struct hinic3_sq_wqe_desc *wqe_desc = wqe_combo->ctrl_bd0;
90 	struct hinic3_sq_bufdesc *buf_desc = wqe_combo->bds_head;
91 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
92 	struct hinic3_dma_info *dma_info = tx_info->dma_info;
93 	struct pci_dev *pdev = nic_dev->pdev;
94 	skb_frag_t *frag;
95 	u32 i, idx;
96 	int err;
97 
98 	dma_info[0].dma = dma_map_single(&pdev->dev, skb->data,
99 					 skb_headlen(skb), DMA_TO_DEVICE);
100 	if (dma_mapping_error(&pdev->dev, dma_info[0].dma))
101 		return -EFAULT;
102 
103 	dma_info[0].len = skb_headlen(skb);
104 
105 	wqe_desc->hi_addr = cpu_to_le32(upper_32_bits(dma_info[0].dma));
106 	wqe_desc->lo_addr = cpu_to_le32(lower_32_bits(dma_info[0].dma));
107 
108 	wqe_desc->ctrl_len = cpu_to_le32(dma_info[0].len);
109 
110 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
111 		frag = &(skb_shinfo(skb)->frags[i]);
112 		if (unlikely(i == wqe_combo->first_bds_num))
113 			buf_desc = wqe_combo->bds_sec2;
114 
115 		idx = i + 1;
116 		dma_info[idx].dma = skb_frag_dma_map(&pdev->dev, frag, 0,
117 						     skb_frag_size(frag),
118 						     DMA_TO_DEVICE);
119 		if (dma_mapping_error(&pdev->dev, dma_info[idx].dma)) {
120 			err = -EFAULT;
121 			goto err_unmap_page;
122 		}
123 		dma_info[idx].len = skb_frag_size(frag);
124 
125 		hinic3_set_buf_desc(buf_desc, dma_info[idx].dma,
126 				    dma_info[idx].len);
127 		buf_desc++;
128 	}
129 
130 	return 0;
131 
132 err_unmap_page:
133 	while (idx > 1) {
134 		idx--;
135 		dma_unmap_page(&pdev->dev, dma_info[idx].dma,
136 			       dma_info[idx].len, DMA_TO_DEVICE);
137 	}
138 	dma_unmap_single(&pdev->dev, dma_info[0].dma, dma_info[0].len,
139 			 DMA_TO_DEVICE);
140 
141 	return err;
142 }
143 
hinic3_tx_unmap_skb(struct net_device * netdev,struct sk_buff * skb,struct hinic3_dma_info * dma_info)144 static void hinic3_tx_unmap_skb(struct net_device *netdev,
145 				struct sk_buff *skb,
146 				struct hinic3_dma_info *dma_info)
147 {
148 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
149 	struct pci_dev *pdev = nic_dev->pdev;
150 	int i;
151 
152 	for (i = 0; i < skb_shinfo(skb)->nr_frags;) {
153 		i++;
154 		dma_unmap_page(&pdev->dev,
155 			       dma_info[i].dma,
156 			       dma_info[i].len, DMA_TO_DEVICE);
157 	}
158 
159 	dma_unmap_single(&pdev->dev, dma_info[0].dma,
160 			 dma_info[0].len, DMA_TO_DEVICE);
161 }
162 
free_all_tx_skbs(struct net_device * netdev,u32 sq_depth,struct hinic3_tx_info * tx_info_arr)163 static void free_all_tx_skbs(struct net_device *netdev, u32 sq_depth,
164 			     struct hinic3_tx_info *tx_info_arr)
165 {
166 	struct hinic3_tx_info *tx_info;
167 	u32 idx;
168 
169 	for (idx = 0; idx < sq_depth; idx++) {
170 		tx_info = &tx_info_arr[idx];
171 		if (tx_info->skb) {
172 			hinic3_tx_unmap_skb(netdev, tx_info->skb,
173 					    tx_info->dma_info);
174 			dev_kfree_skb_any(tx_info->skb);
175 			tx_info->skb = NULL;
176 		}
177 	}
178 }
179 
180 union hinic3_ip {
181 	struct iphdr   *v4;
182 	struct ipv6hdr *v6;
183 	unsigned char  *hdr;
184 };
185 
186 union hinic3_l4 {
187 	struct tcphdr *tcp;
188 	struct udphdr *udp;
189 	unsigned char *hdr;
190 };
191 
192 enum hinic3_l3_type {
193 	HINIC3_L3_UNKNOWN         = 0,
194 	HINIC3_L3_IP6_PKT         = 1,
195 	HINIC3_L3_IP4_PKT_NO_CSUM = 2,
196 	HINIC3_L3_IP4_PKT_CSUM    = 3,
197 };
198 
199 enum hinic3_l4_offload_type {
200 	HINIC3_L4_OFFLOAD_DISABLE = 0,
201 	HINIC3_L4_OFFLOAD_TCP     = 1,
202 	HINIC3_L4_OFFLOAD_STCP    = 2,
203 	HINIC3_L4_OFFLOAD_UDP     = 3,
204 };
205 
206 /* initialize l4 offset and offload */
get_inner_l4_info(struct sk_buff * skb,union hinic3_l4 * l4,u8 l4_proto,u32 * offset,enum hinic3_l4_offload_type * l4_offload)207 static void get_inner_l4_info(struct sk_buff *skb, union hinic3_l4 *l4,
208 			      u8 l4_proto, u32 *offset,
209 			      enum hinic3_l4_offload_type *l4_offload)
210 {
211 	switch (l4_proto) {
212 	case IPPROTO_TCP:
213 		*l4_offload = HINIC3_L4_OFFLOAD_TCP;
214 		/* To be same with TSO, payload offset begins from payload */
215 		*offset = (l4->tcp->doff << TCP_HDR_DATA_OFF_UNIT_SHIFT) +
216 			   TRANSPORT_OFFSET(l4->hdr, skb);
217 		break;
218 
219 	case IPPROTO_UDP:
220 		*l4_offload = HINIC3_L4_OFFLOAD_UDP;
221 		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
222 		break;
223 	default:
224 		*l4_offload = HINIC3_L4_OFFLOAD_DISABLE;
225 		*offset = 0;
226 	}
227 }
228 
hinic3_tx_csum(struct hinic3_txq * txq,struct hinic3_sq_task * task,struct sk_buff * skb)229 static int hinic3_tx_csum(struct hinic3_txq *txq, struct hinic3_sq_task *task,
230 			  struct sk_buff *skb)
231 {
232 	if (skb->ip_summed != CHECKSUM_PARTIAL)
233 		return 0;
234 
235 	if (skb->encapsulation) {
236 		union hinic3_ip ip;
237 		u8 l4_proto;
238 
239 		task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1,
240 								 TUNNEL_FLAG));
241 
242 		ip.hdr = skb_network_header(skb);
243 		if (ip.v4->version == 4) {
244 			l4_proto = ip.v4->protocol;
245 		} else if (ip.v4->version == 6) {
246 			union hinic3_l4 l4;
247 			unsigned char *exthdr;
248 			__be16 frag_off;
249 
250 			exthdr = ip.hdr + sizeof(*ip.v6);
251 			l4_proto = ip.v6->nexthdr;
252 			l4.hdr = skb_transport_header(skb);
253 			if (l4.hdr != exthdr)
254 				ipv6_skip_exthdr(skb, exthdr - skb->data,
255 						 &l4_proto, &frag_off);
256 		} else {
257 			l4_proto = IPPROTO_RAW;
258 		}
259 
260 		if (l4_proto != IPPROTO_UDP ||
261 		    ((struct udphdr *)skb_transport_header(skb))->dest !=
262 		    VXLAN_OFFLOAD_PORT_LE) {
263 			/* Unsupported tunnel packet, disable csum offload */
264 			skb_checksum_help(skb);
265 			return 0;
266 		}
267 	}
268 
269 	task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, INNER_L4_EN));
270 
271 	return 1;
272 }
273 
get_inner_l3_l4_type(struct sk_buff * skb,union hinic3_ip * ip,union hinic3_l4 * l4,enum hinic3_l3_type * l3_type,u8 * l4_proto)274 static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic3_ip *ip,
275 				 union hinic3_l4 *l4,
276 				 enum hinic3_l3_type *l3_type, u8 *l4_proto)
277 {
278 	unsigned char *exthdr;
279 	__be16 frag_off;
280 
281 	if (ip->v4->version == 4) {
282 		*l3_type = HINIC3_L3_IP4_PKT_CSUM;
283 		*l4_proto = ip->v4->protocol;
284 	} else if (ip->v4->version == 6) {
285 		*l3_type = HINIC3_L3_IP6_PKT;
286 		exthdr = ip->hdr + sizeof(*ip->v6);
287 		*l4_proto = ip->v6->nexthdr;
288 		if (exthdr != l4->hdr) {
289 			ipv6_skip_exthdr(skb, exthdr - skb->data,
290 					 l4_proto, &frag_off);
291 		}
292 	} else {
293 		*l3_type = HINIC3_L3_UNKNOWN;
294 		*l4_proto = 0;
295 	}
296 }
297 
hinic3_set_tso_info(struct hinic3_sq_task * task,__le32 * queue_info,enum hinic3_l4_offload_type l4_offload,u32 offset,u32 mss)298 static void hinic3_set_tso_info(struct hinic3_sq_task *task, __le32 *queue_info,
299 				enum hinic3_l4_offload_type l4_offload,
300 				u32 offset, u32 mss)
301 {
302 	if (l4_offload == HINIC3_L4_OFFLOAD_TCP) {
303 		*queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(1, TSO));
304 		task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1,
305 								 INNER_L4_EN));
306 	} else if (l4_offload == HINIC3_L4_OFFLOAD_UDP) {
307 		*queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(1, UFO));
308 		task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1,
309 								 INNER_L4_EN));
310 	}
311 
312 	/* enable L3 calculation */
313 	task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, INNER_L3_EN));
314 
315 	*queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(offset >> 1, PLDOFF));
316 
317 	/* set MSS value */
318 	*queue_info &= cpu_to_le32(~SQ_CTRL_QUEUE_INFO_MSS_MASK);
319 	*queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(mss, MSS));
320 }
321 
csum_magic(union hinic3_ip * ip,unsigned short proto)322 static __sum16 csum_magic(union hinic3_ip *ip, unsigned short proto)
323 {
324 	return (ip->v4->version == 4) ?
325 		csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) :
326 		csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0);
327 }
328 
hinic3_tso(struct hinic3_sq_task * task,__le32 * queue_info,struct sk_buff * skb)329 static int hinic3_tso(struct hinic3_sq_task *task, __le32 *queue_info,
330 		      struct sk_buff *skb)
331 {
332 	enum hinic3_l4_offload_type l4_offload;
333 	enum hinic3_l3_type l3_type;
334 	union hinic3_ip ip;
335 	union hinic3_l4 l4;
336 	u8 l4_proto;
337 	u32 offset;
338 	int err;
339 
340 	if (!skb_is_gso(skb))
341 		return 0;
342 
343 	err = skb_cow_head(skb, 0);
344 	if (err < 0)
345 		return err;
346 
347 	if (skb->encapsulation) {
348 		u32 gso_type = skb_shinfo(skb)->gso_type;
349 		/* L3 checksum is always enabled */
350 		task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, OUT_L3_EN));
351 		task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1,
352 								 TUNNEL_FLAG));
353 
354 		l4.hdr = skb_transport_header(skb);
355 		ip.hdr = skb_network_header(skb);
356 
357 		if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
358 			l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP);
359 			task->pkt_info0 |=
360 				cpu_to_le32(SQ_TASK_INFO0_SET(1, OUT_L4_EN));
361 		}
362 
363 		ip.hdr = skb_inner_network_header(skb);
364 		l4.hdr = skb_inner_transport_header(skb);
365 	} else {
366 		ip.hdr = skb_network_header(skb);
367 		l4.hdr = skb_transport_header(skb);
368 	}
369 
370 	get_inner_l3_l4_type(skb, &ip, &l4, &l3_type, &l4_proto);
371 
372 	if (l4_proto == IPPROTO_TCP)
373 		l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP);
374 
375 	get_inner_l4_info(skb, &l4, l4_proto, &offset, &l4_offload);
376 
377 	hinic3_set_tso_info(task, queue_info, l4_offload, offset,
378 			    skb_shinfo(skb)->gso_size);
379 
380 	return 1;
381 }
382 
hinic3_set_vlan_tx_offload(struct hinic3_sq_task * task,u16 vlan_tag,u8 vlan_tpid)383 static void hinic3_set_vlan_tx_offload(struct hinic3_sq_task *task,
384 				       u16 vlan_tag, u8 vlan_tpid)
385 {
386 	/* vlan_tpid: 0=select TPID0 in IPSU, 1=select TPID1 in IPSU
387 	 * 2=select TPID2 in IPSU, 3=select TPID3 in IPSU,
388 	 * 4=select TPID4 in IPSU
389 	 */
390 	task->vlan_offload =
391 		cpu_to_le32(SQ_TASK_INFO3_SET(vlan_tag, VLAN_TAG) |
392 			    SQ_TASK_INFO3_SET(vlan_tpid, VLAN_TPID) |
393 			    SQ_TASK_INFO3_SET(1, VLAN_TAG_VALID));
394 }
395 
hinic3_tx_offload(struct sk_buff * skb,struct hinic3_sq_task * task,__le32 * queue_info,struct hinic3_txq * txq)396 static u32 hinic3_tx_offload(struct sk_buff *skb, struct hinic3_sq_task *task,
397 			     __le32 *queue_info, struct hinic3_txq *txq)
398 {
399 	u32 offload = 0;
400 	int tso_cs_en;
401 
402 	task->pkt_info0 = 0;
403 	task->ip_identify = 0;
404 	task->rsvd = 0;
405 	task->vlan_offload = 0;
406 
407 	tso_cs_en = hinic3_tso(task, queue_info, skb);
408 	if (tso_cs_en < 0) {
409 		offload = HINIC3_TX_OFFLOAD_INVALID;
410 		return offload;
411 	} else if (tso_cs_en) {
412 		offload |= HINIC3_TX_OFFLOAD_TSO;
413 	} else {
414 		tso_cs_en = hinic3_tx_csum(txq, task, skb);
415 		if (tso_cs_en)
416 			offload |= HINIC3_TX_OFFLOAD_CSUM;
417 	}
418 
419 #define VLAN_INSERT_MODE_MAX 5
420 	if (unlikely(skb_vlan_tag_present(skb))) {
421 		/* select vlan insert mode by qid, default 802.1Q Tag type */
422 		hinic3_set_vlan_tx_offload(task, skb_vlan_tag_get(skb),
423 					   txq->q_id % VLAN_INSERT_MODE_MAX);
424 		offload |= HINIC3_TX_OFFLOAD_VLAN;
425 	}
426 
427 	if (unlikely(SQ_CTRL_QUEUE_INFO_GET(*queue_info, PLDOFF) >
428 		     SQ_CTRL_MAX_PLDOFF)) {
429 		offload = HINIC3_TX_OFFLOAD_INVALID;
430 		return offload;
431 	}
432 
433 	return offload;
434 }
435 
hinic3_get_and_update_sq_owner(struct hinic3_io_queue * sq,u16 curr_pi,u16 wqebb_cnt)436 static u16 hinic3_get_and_update_sq_owner(struct hinic3_io_queue *sq,
437 					  u16 curr_pi, u16 wqebb_cnt)
438 {
439 	u16 owner = sq->owner;
440 
441 	if (unlikely(curr_pi + wqebb_cnt >= sq->wq.q_depth))
442 		sq->owner = !sq->owner;
443 
444 	return owner;
445 }
446 
hinic3_set_wqe_combo(struct hinic3_txq * txq,struct hinic3_sq_wqe_combo * wqe_combo,u32 offload,u16 num_sge,u16 * curr_pi)447 static u16 hinic3_set_wqe_combo(struct hinic3_txq *txq,
448 				struct hinic3_sq_wqe_combo *wqe_combo,
449 				u32 offload, u16 num_sge, u16 *curr_pi)
450 {
451 	struct hinic3_sq_bufdesc *first_part_wqebbs, *second_part_wqebbs;
452 	u16 first_part_wqebbs_num, tmp_pi;
453 
454 	wqe_combo->ctrl_bd0 = hinic3_wq_get_one_wqebb(&txq->sq->wq, curr_pi);
455 	if (!offload && num_sge == 1) {
456 		wqe_combo->wqe_type = SQ_WQE_COMPACT_TYPE;
457 		return hinic3_get_and_update_sq_owner(txq->sq, *curr_pi, 1);
458 	}
459 
460 	wqe_combo->wqe_type = SQ_WQE_EXTENDED_TYPE;
461 
462 	if (offload) {
463 		wqe_combo->task = hinic3_wq_get_one_wqebb(&txq->sq->wq,
464 							  &tmp_pi);
465 		wqe_combo->task_type = SQ_WQE_TASKSECT_16BYTES;
466 	} else {
467 		wqe_combo->task_type = SQ_WQE_TASKSECT_46BITS;
468 	}
469 
470 	if (num_sge > 1) {
471 		/* first wqebb contain bd0, and bd size is equal to sq wqebb
472 		 * size, so we use (num_sge - 1) as wanted weqbb_cnt
473 		 */
474 		hinic3_wq_get_multi_wqebbs(&txq->sq->wq, num_sge - 1, &tmp_pi,
475 					   &first_part_wqebbs,
476 					   &second_part_wqebbs,
477 					   &first_part_wqebbs_num);
478 		wqe_combo->bds_head = first_part_wqebbs;
479 		wqe_combo->bds_sec2 = second_part_wqebbs;
480 		wqe_combo->first_bds_num = first_part_wqebbs_num;
481 	}
482 
483 	return hinic3_get_and_update_sq_owner(txq->sq, *curr_pi,
484 					      num_sge + !!offload);
485 }
486 
hinic3_prepare_sq_ctrl(struct hinic3_sq_wqe_combo * wqe_combo,__le32 queue_info,int nr_descs,u16 owner)487 static void hinic3_prepare_sq_ctrl(struct hinic3_sq_wqe_combo *wqe_combo,
488 				   __le32 queue_info, int nr_descs, u16 owner)
489 {
490 	struct hinic3_sq_wqe_desc *wqe_desc = wqe_combo->ctrl_bd0;
491 
492 	if (wqe_combo->wqe_type == SQ_WQE_COMPACT_TYPE) {
493 		wqe_desc->ctrl_len |=
494 			cpu_to_le32(SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
495 				    SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) |
496 				    SQ_CTRL_SET(owner, OWNER));
497 
498 		/* compact wqe queue_info will transfer to chip */
499 		wqe_desc->queue_info = 0;
500 		return;
501 	}
502 
503 	wqe_desc->ctrl_len |=
504 		cpu_to_le32(SQ_CTRL_SET(nr_descs, BUFDESC_NUM) |
505 			    SQ_CTRL_SET(wqe_combo->task_type, TASKSECT_LEN) |
506 			    SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
507 			    SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) |
508 			    SQ_CTRL_SET(owner, OWNER));
509 
510 	wqe_desc->queue_info = queue_info;
511 	wqe_desc->queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(1, UC));
512 
513 	if (!SQ_CTRL_QUEUE_INFO_GET(wqe_desc->queue_info, MSS)) {
514 		wqe_desc->queue_info |=
515 		    cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(HINIC3_TX_MSS_DEFAULT, MSS));
516 	} else if (SQ_CTRL_QUEUE_INFO_GET(wqe_desc->queue_info, MSS) <
517 		   HINIC3_TX_MSS_MIN) {
518 		/* mss should not be less than 80 */
519 		wqe_desc->queue_info &=
520 		    cpu_to_le32(~SQ_CTRL_QUEUE_INFO_MSS_MASK);
521 		wqe_desc->queue_info |=
522 		    cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(HINIC3_TX_MSS_MIN, MSS));
523 	}
524 }
525 
hinic3_send_one_skb(struct sk_buff * skb,struct net_device * netdev,struct hinic3_txq * txq)526 static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb,
527 				       struct net_device *netdev,
528 				       struct hinic3_txq *txq)
529 {
530 	struct hinic3_sq_wqe_combo wqe_combo = {};
531 	struct hinic3_tx_info *tx_info;
532 	struct hinic3_sq_task task;
533 	u16 wqebb_cnt, num_sge;
534 	__le32 queue_info = 0;
535 	u16 saved_wq_prod_idx;
536 	u16 owner, pi = 0;
537 	u8 saved_sq_owner;
538 	u32 offload;
539 	int err;
540 
541 	if (unlikely(skb->len < MIN_SKB_LEN)) {
542 		if (skb_pad(skb, MIN_SKB_LEN - skb->len))
543 			goto err_out;
544 
545 		skb->len = MIN_SKB_LEN;
546 	}
547 
548 	num_sge = skb_shinfo(skb)->nr_frags + 1;
549 	/* assume normal wqe format + 1 wqebb for task info */
550 	wqebb_cnt = num_sge + 1;
551 
552 	if (unlikely(hinic3_wq_free_wqebbs(&txq->sq->wq) < wqebb_cnt)) {
553 		if (likely(wqebb_cnt > txq->tx_stop_thrs))
554 			txq->tx_stop_thrs = min(wqebb_cnt, txq->tx_start_thrs);
555 
556 		netif_subqueue_try_stop(netdev, txq->sq->q_id,
557 					hinic3_wq_free_wqebbs(&txq->sq->wq),
558 					txq->tx_start_thrs);
559 
560 		return NETDEV_TX_BUSY;
561 	}
562 
563 	offload = hinic3_tx_offload(skb, &task, &queue_info, txq);
564 	if (unlikely(offload == HINIC3_TX_OFFLOAD_INVALID)) {
565 		goto err_drop_pkt;
566 	} else if (!offload) {
567 		wqebb_cnt -= 1;
568 		if (unlikely(num_sge == 1 &&
569 			     skb->len > HINIC3_COMPACT_WQEE_SKB_MAX_LEN))
570 			goto err_drop_pkt;
571 	}
572 
573 	saved_wq_prod_idx = txq->sq->wq.prod_idx;
574 	saved_sq_owner = txq->sq->owner;
575 
576 	owner = hinic3_set_wqe_combo(txq, &wqe_combo, offload, num_sge, &pi);
577 	if (offload)
578 		*wqe_combo.task = task;
579 
580 	tx_info = &txq->tx_info[pi];
581 	tx_info->skb = skb;
582 	tx_info->wqebb_cnt = wqebb_cnt;
583 
584 	err = hinic3_tx_map_skb(netdev, skb, txq, tx_info, &wqe_combo);
585 	if (err) {
586 		/* Rollback work queue to reclaim the wqebb we did not use */
587 		txq->sq->wq.prod_idx = saved_wq_prod_idx;
588 		txq->sq->owner = saved_sq_owner;
589 		goto err_drop_pkt;
590 	}
591 
592 	netif_subqueue_sent(netdev, txq->sq->q_id, skb->len);
593 	netif_subqueue_maybe_stop(netdev, txq->sq->q_id,
594 				  hinic3_wq_free_wqebbs(&txq->sq->wq),
595 				  txq->tx_stop_thrs,
596 				  txq->tx_start_thrs);
597 
598 	hinic3_prepare_sq_ctrl(&wqe_combo, queue_info, num_sge, owner);
599 	hinic3_write_db(txq->sq, 0, DB_CFLAG_DP_SQ,
600 			hinic3_get_sq_local_pi(txq->sq));
601 
602 	return NETDEV_TX_OK;
603 
604 err_drop_pkt:
605 	dev_kfree_skb_any(skb);
606 err_out:
607 	return NETDEV_TX_OK;
608 }
609 
hinic3_xmit_frame(struct sk_buff * skb,struct net_device * netdev)610 netdev_tx_t hinic3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
611 {
612 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
613 	u16 q_id = skb_get_queue_mapping(skb);
614 
615 	if (unlikely(!netif_carrier_ok(netdev)))
616 		goto err_drop_pkt;
617 
618 	if (unlikely(q_id >= nic_dev->q_params.num_qps))
619 		goto err_drop_pkt;
620 
621 	return hinic3_send_one_skb(skb, netdev, &nic_dev->txqs[q_id]);
622 
623 err_drop_pkt:
624 	dev_kfree_skb_any(skb);
625 
626 	return NETDEV_TX_OK;
627 }
628 
is_hw_complete_sq_process(struct hinic3_io_queue * sq)629 static bool is_hw_complete_sq_process(struct hinic3_io_queue *sq)
630 {
631 	u16 sw_pi, hw_ci;
632 
633 	sw_pi = hinic3_get_sq_local_pi(sq);
634 	hw_ci = hinic3_get_sq_hw_ci(sq);
635 
636 	return sw_pi == hw_ci;
637 }
638 
639 #define HINIC3_FLUSH_QUEUE_POLL_SLEEP_US   10000
640 #define HINIC3_FLUSH_QUEUE_POLL_TIMEOUT_US 10000000
hinic3_stop_sq(struct hinic3_txq * txq)641 static int hinic3_stop_sq(struct hinic3_txq *txq)
642 {
643 	struct hinic3_nic_dev *nic_dev = netdev_priv(txq->netdev);
644 	int err, rc;
645 
646 	err = read_poll_timeout(hinic3_force_drop_tx_pkt, rc,
647 				is_hw_complete_sq_process(txq->sq) || rc,
648 				HINIC3_FLUSH_QUEUE_POLL_SLEEP_US,
649 				HINIC3_FLUSH_QUEUE_POLL_TIMEOUT_US,
650 				true, nic_dev->hwdev);
651 	if (rc)
652 		return rc;
653 	else
654 		return err;
655 }
656 
657 /* packet transmission should be stopped before calling this function */
hinic3_flush_txqs(struct net_device * netdev)658 void hinic3_flush_txqs(struct net_device *netdev)
659 {
660 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
661 	u16 qid;
662 	int err;
663 
664 	for (qid = 0; qid < nic_dev->q_params.num_qps; qid++) {
665 		err = hinic3_stop_sq(&nic_dev->txqs[qid]);
666 		netdev_tx_reset_subqueue(netdev, qid);
667 		if (err)
668 			netdev_err(netdev, "Failed to stop sq%u\n", qid);
669 	}
670 }
671 
672 #define HINIC3_BDS_PER_SQ_WQEBB \
673 	(HINIC3_SQ_WQEBB_SIZE / sizeof(struct hinic3_sq_bufdesc))
674 
hinic3_alloc_txqs_res(struct net_device * netdev,u16 num_sq,u32 sq_depth,struct hinic3_dyna_txq_res * txqs_res)675 int hinic3_alloc_txqs_res(struct net_device *netdev, u16 num_sq,
676 			  u32 sq_depth, struct hinic3_dyna_txq_res *txqs_res)
677 {
678 	struct hinic3_dyna_txq_res *tqres;
679 	int idx;
680 
681 	for (idx = 0; idx < num_sq; idx++) {
682 		tqres = &txqs_res[idx];
683 
684 		tqres->tx_info = kzalloc_objs(*tqres->tx_info, sq_depth);
685 		if (!tqres->tx_info)
686 			goto err_free_tqres;
687 
688 		tqres->bds = kzalloc_objs(*tqres->bds,
689 					  sq_depth * HINIC3_BDS_PER_SQ_WQEBB + HINIC3_MAX_SQ_SGE);
690 		if (!tqres->bds) {
691 			kfree(tqres->tx_info);
692 			goto err_free_tqres;
693 		}
694 	}
695 
696 	return 0;
697 
698 err_free_tqres:
699 	while (idx > 0) {
700 		idx--;
701 		tqres = &txqs_res[idx];
702 
703 		kfree(tqres->bds);
704 		kfree(tqres->tx_info);
705 	}
706 
707 	return -ENOMEM;
708 }
709 
hinic3_free_txqs_res(struct net_device * netdev,u16 num_sq,u32 sq_depth,struct hinic3_dyna_txq_res * txqs_res)710 void hinic3_free_txqs_res(struct net_device *netdev, u16 num_sq,
711 			  u32 sq_depth, struct hinic3_dyna_txq_res *txqs_res)
712 {
713 	struct hinic3_dyna_txq_res *tqres;
714 	int idx;
715 
716 	for (idx = 0; idx < num_sq; idx++) {
717 		tqres = &txqs_res[idx];
718 
719 		free_all_tx_skbs(netdev, sq_depth, tqres->tx_info);
720 		kfree(tqres->bds);
721 		kfree(tqres->tx_info);
722 	}
723 }
724 
hinic3_configure_txqs(struct net_device * netdev,u16 num_sq,u32 sq_depth,struct hinic3_dyna_txq_res * txqs_res)725 int hinic3_configure_txqs(struct net_device *netdev, u16 num_sq,
726 			  u32 sq_depth, struct hinic3_dyna_txq_res *txqs_res)
727 {
728 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
729 	struct hinic3_dyna_txq_res *tqres;
730 	struct hinic3_txq *txq;
731 	u16 q_id;
732 	u32 idx;
733 
734 	for (q_id = 0; q_id < num_sq; q_id++) {
735 		txq = &nic_dev->txqs[q_id];
736 		tqres = &txqs_res[q_id];
737 
738 		txq->q_depth = sq_depth;
739 		txq->q_mask = sq_depth - 1;
740 
741 		txq->tx_stop_thrs = min(HINIC3_DEFAULT_STOP_THRS,
742 					sq_depth / 20);
743 		txq->tx_start_thrs = min(HINIC3_DEFAULT_START_THRS,
744 					 sq_depth / 10);
745 
746 		txq->tx_info = tqres->tx_info;
747 		for (idx = 0; idx < sq_depth; idx++)
748 			txq->tx_info[idx].dma_info =
749 				&tqres->bds[idx * HINIC3_BDS_PER_SQ_WQEBB];
750 
751 		txq->sq = &nic_dev->nic_io->sq[q_id];
752 	}
753 
754 	return 0;
755 }
756 
hinic3_tx_poll(struct hinic3_txq * txq,int budget)757 bool hinic3_tx_poll(struct hinic3_txq *txq, int budget)
758 {
759 	struct net_device *netdev = txq->netdev;
760 	u16 hw_ci, sw_ci, q_id = txq->sq->q_id;
761 	struct hinic3_tx_info *tx_info;
762 	unsigned int bytes_compl = 0;
763 	unsigned int pkts = 0;
764 	u16 wqebb_cnt = 0;
765 
766 	hw_ci = hinic3_get_sq_hw_ci(txq->sq);
767 	dma_rmb();
768 	sw_ci = hinic3_get_sq_local_ci(txq->sq);
769 
770 	do {
771 		tx_info = &txq->tx_info[sw_ci];
772 
773 		/* Did all wqebb of this wqe complete? */
774 		if (hw_ci == sw_ci ||
775 		    ((hw_ci - sw_ci) & txq->q_mask) < tx_info->wqebb_cnt)
776 			break;
777 
778 		sw_ci = (sw_ci + tx_info->wqebb_cnt) & txq->q_mask;
779 		net_prefetch(&txq->tx_info[sw_ci]);
780 
781 		wqebb_cnt += tx_info->wqebb_cnt;
782 		bytes_compl += tx_info->skb->len;
783 		pkts++;
784 
785 		hinic3_tx_unmap_skb(netdev, tx_info->skb, tx_info->dma_info);
786 		napi_consume_skb(tx_info->skb, budget);
787 		tx_info->skb = NULL;
788 	} while (likely(pkts < HINIC3_TX_POLL_WEIGHT));
789 
790 	hinic3_wq_put_wqebbs(&txq->sq->wq, wqebb_cnt);
791 
792 	netif_subqueue_completed_wake(netdev, q_id, pkts, bytes_compl,
793 				      hinic3_wq_free_wqebbs(&txq->sq->wq),
794 				      txq->tx_start_thrs);
795 
796 	return pkts == HINIC3_TX_POLL_WEIGHT;
797 }
798