1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /*
3 * Copyright (C) 2017 Intel Deutschland GmbH
4 * Copyright (C) 2018-2020, 2023-2025 Intel Corporation
5 */
6 #include <net/tso.h>
7 #include <linux/tcp.h>
8
9 #include "iwl-debug.h"
10 #include "iwl-csr.h"
11 #include "iwl-io.h"
12 #include "internal.h"
13 #include "fw/api/tx.h"
14 #include "fw/api/commands.h"
15 #include "fw/api/datapath.h"
16 #include "iwl-scd.h"
17
get_workaround_page(struct iwl_trans * trans,struct sk_buff * skb)18 static struct page *get_workaround_page(struct iwl_trans *trans,
19 struct sk_buff *skb)
20 {
21 struct iwl_tso_page_info *info;
22 struct page **page_ptr;
23 struct page *ret;
24 dma_addr_t phys;
25
26 page_ptr = (void *)((u8 *)skb->cb + trans->conf.cb_data_offs);
27
28 ret = alloc_page(GFP_ATOMIC);
29 if (!ret)
30 return NULL;
31
32 info = IWL_TSO_PAGE_INFO(page_address(ret));
33
34 /* Create a DMA mapping for the page */
35 phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE,
36 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
37 if (unlikely(dma_mapping_error(trans->dev, phys))) {
38 __free_page(ret);
39 return NULL;
40 }
41
42 /* Store physical address and set use count */
43 info->dma_addr = phys;
44 refcount_set(&info->use_count, 1);
45
46 /* set the chaining pointer to the previous page if there */
47 info->next = *page_ptr;
48 *page_ptr = ret;
49
50 return ret;
51 }
52
53 /*
54 * Add a TB and if needed apply the FH HW bug workaround;
55 * meta != NULL indicates that it's a page mapping and we
56 * need to dma_unmap_page() and set the meta->tbs bit in
57 * this case.
58 */
iwl_txq_gen2_set_tb_with_wa(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_tfh_tfd * tfd,dma_addr_t phys,void * virt,u16 len,struct iwl_cmd_meta * meta,bool unmap)59 static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans,
60 struct sk_buff *skb,
61 struct iwl_tfh_tfd *tfd,
62 dma_addr_t phys, void *virt,
63 u16 len, struct iwl_cmd_meta *meta,
64 bool unmap)
65 {
66 dma_addr_t oldphys = phys;
67 struct page *page;
68 int ret;
69
70 if (unlikely(dma_mapping_error(trans->dev, phys)))
71 return -ENOMEM;
72
73 if (likely(!iwl_txq_crosses_4g_boundary(phys, len))) {
74 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len);
75
76 if (ret < 0)
77 goto unmap;
78
79 if (meta)
80 meta->tbs |= BIT(ret);
81
82 ret = 0;
83 goto trace;
84 }
85
86 /*
87 * Work around a hardware bug. If (as expressed in the
88 * condition above) the TB ends on a 32-bit boundary,
89 * then the next TB may be accessed with the wrong
90 * address.
91 * To work around it, copy the data elsewhere and make
92 * a new mapping for it so the device will not fail.
93 */
94
95 if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) {
96 ret = -ENOBUFS;
97 goto unmap;
98 }
99
100 page = get_workaround_page(trans, skb);
101 if (!page) {
102 ret = -ENOMEM;
103 goto unmap;
104 }
105
106 memcpy(page_address(page), virt, len);
107
108 /*
109 * This is a bit odd, but performance does not matter here, what
110 * matters are the expectations of the calling code and TB cleanup
111 * function.
112 *
113 * As such, if unmap is set, then create another mapping for the TB
114 * entry as it will be unmapped later. On the other hand, if it is not
115 * set, then the TB entry will not be unmapped and instead we simply
116 * reference and sync the mapping that get_workaround_page() created.
117 */
118 if (unmap) {
119 phys = dma_map_single(trans->dev, page_address(page), len,
120 DMA_TO_DEVICE);
121 if (unlikely(dma_mapping_error(trans->dev, phys)))
122 return -ENOMEM;
123 } else {
124 phys = iwl_pcie_get_tso_page_phys(page_address(page));
125 dma_sync_single_for_device(trans->dev, phys, len,
126 DMA_TO_DEVICE);
127 }
128
129 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len);
130 if (ret < 0) {
131 /* unmap the new allocation as single */
132 oldphys = phys;
133 meta = NULL;
134 goto unmap;
135 }
136
137 IWL_DEBUG_TX(trans,
138 "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n",
139 len, (unsigned long long)oldphys,
140 (unsigned long long)phys);
141
142 ret = 0;
143 unmap:
144 if (!unmap)
145 goto trace;
146
147 if (meta)
148 dma_unmap_page(trans->dev, oldphys, len, DMA_TO_DEVICE);
149 else
150 dma_unmap_single(trans->dev, oldphys, len, DMA_TO_DEVICE);
151 trace:
152 trace_iwlwifi_dev_tx_tb(trans->dev, skb, virt, phys, len);
153
154 return ret;
155 }
156
iwl_txq_gen2_build_amsdu(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_tfh_tfd * tfd,struct iwl_cmd_meta * out_meta,int start_len,u8 hdr_len,struct iwl_device_tx_cmd * dev_cmd)157 static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
158 struct sk_buff *skb,
159 struct iwl_tfh_tfd *tfd,
160 struct iwl_cmd_meta *out_meta,
161 int start_len,
162 u8 hdr_len,
163 struct iwl_device_tx_cmd *dev_cmd)
164 {
165 #ifdef CONFIG_INET
166 struct iwl_tx_cmd_v9 *tx_cmd = (void *)dev_cmd->payload;
167 struct ieee80211_hdr *hdr = (void *)skb->data;
168 unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
169 unsigned int mss = skb_shinfo(skb)->gso_size;
170 unsigned int data_offset = 0;
171 dma_addr_t start_hdr_phys;
172 u16 length, amsdu_pad;
173 u8 *start_hdr;
174 struct sg_table *sgt;
175 struct tso_t tso;
176
177 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd),
178 &dev_cmd->hdr, start_len, 0);
179
180 ip_hdrlen = skb_network_header_len(skb);
181 snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
182 total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len;
183 amsdu_pad = 0;
184
185 /* total amount of header we may need for this A-MSDU */
186 hdr_room = DIV_ROUND_UP(total_len, mss) *
187 (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr));
188
189 /* Our device supports 9 segments at most, it will fit in 1 page */
190 sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room,
191 snap_ip_tcp_hdrlen + hdr_len);
192 if (!sgt)
193 return -ENOMEM;
194
195 start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr);
196
197 /*
198 * Pull the ieee80211 header to be able to use TSO core,
199 * we will restore it for the tx_status flow.
200 */
201 skb_pull(skb, hdr_len);
202
203 /*
204 * Remove the length of all the headers that we don't actually
205 * have in the MPDU by themselves, but that we duplicate into
206 * all the different MSDUs inside the A-MSDU.
207 */
208 le16_add_cpu(&tx_cmd->len, -snap_ip_tcp_hdrlen);
209
210 tso_start(skb, &tso);
211
212 while (total_len) {
213 /* this is the data left for this subframe */
214 unsigned int data_left = min_t(unsigned int, mss, total_len);
215 unsigned int tb_len;
216 dma_addr_t tb_phys;
217 u8 *pos_hdr = start_hdr;
218
219 total_len -= data_left;
220
221 memset(pos_hdr, 0, amsdu_pad);
222 pos_hdr += amsdu_pad;
223 amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen +
224 data_left)) & 0x3;
225 ether_addr_copy(pos_hdr, ieee80211_get_DA(hdr));
226 pos_hdr += ETH_ALEN;
227 ether_addr_copy(pos_hdr, ieee80211_get_SA(hdr));
228 pos_hdr += ETH_ALEN;
229
230 length = snap_ip_tcp_hdrlen + data_left;
231 *((__be16 *)pos_hdr) = cpu_to_be16(length);
232 pos_hdr += sizeof(length);
233
234 /*
235 * This will copy the SNAP as well which will be considered
236 * as MAC header.
237 */
238 tso_build_hdr(skb, pos_hdr, &tso, data_left, !total_len);
239
240 pos_hdr += snap_ip_tcp_hdrlen;
241
242 tb_len = pos_hdr - start_hdr;
243 tb_phys = iwl_pcie_get_tso_page_phys(start_hdr);
244
245 /*
246 * No need for _with_wa, this is from the TSO page and
247 * we leave some space at the end of it so can't hit
248 * the buggy scenario.
249 */
250 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb_len);
251 trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr,
252 tb_phys, tb_len);
253 /* add this subframe's headers' length to the tx_cmd */
254 le16_add_cpu(&tx_cmd->len, tb_len);
255
256 /* prepare the start_hdr for the next subframe */
257 start_hdr = pos_hdr;
258
259 /* put the payload */
260 while (data_left) {
261 int ret;
262
263 tb_len = min_t(unsigned int, tso.size, data_left);
264 tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset,
265 tb_len);
266 /* Not a real mapping error, use direct comparison */
267 if (unlikely(tb_phys == DMA_MAPPING_ERROR))
268 goto out_err;
269
270 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd,
271 tb_phys, tso.data,
272 tb_len, NULL, false);
273 if (ret)
274 goto out_err;
275
276 data_left -= tb_len;
277 data_offset += tb_len;
278 tso_build_data(skb, &tso, tb_len);
279 }
280 }
281
282 dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room,
283 DMA_TO_DEVICE);
284
285 /* re -add the WiFi header */
286 skb_push(skb, hdr_len);
287
288 return 0;
289
290 out_err:
291 #endif
292 return -EINVAL;
293 }
294
295 static struct
iwl_txq_gen2_build_tx_amsdu(struct iwl_trans * trans,struct iwl_txq * txq,struct iwl_device_tx_cmd * dev_cmd,struct sk_buff * skb,struct iwl_cmd_meta * out_meta,int hdr_len,int tx_cmd_len)296 iwl_tfh_tfd *iwl_txq_gen2_build_tx_amsdu(struct iwl_trans *trans,
297 struct iwl_txq *txq,
298 struct iwl_device_tx_cmd *dev_cmd,
299 struct sk_buff *skb,
300 struct iwl_cmd_meta *out_meta,
301 int hdr_len,
302 int tx_cmd_len)
303 {
304 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
305 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx);
306 dma_addr_t tb_phys;
307 int len;
308 void *tb1_addr;
309
310 tb_phys = iwl_txq_get_first_tb_dma(txq, idx);
311
312 /*
313 * No need for _with_wa, the first TB allocation is aligned up
314 * to a 64-byte boundary and thus can't be at the end or cross
315 * a page boundary (much less a 2^32 boundary).
316 */
317 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE);
318
319 /*
320 * The second TB (tb1) points to the remainder of the TX command
321 * and the 802.11 header - dword aligned size
322 * (This calculation modifies the TX command, so do it before the
323 * setup of the first TB)
324 */
325 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len -
326 IWL_FIRST_TB_SIZE;
327
328 /* do not align A-MSDU to dword as the subframe header aligns it */
329
330 /* map the data for TB1 */
331 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
332 tb_phys = dma_map_single(trans->dev, tb1_addr, len, DMA_TO_DEVICE);
333 if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
334 goto out_err;
335 /*
336 * No need for _with_wa(), we ensure (via alignment) that the data
337 * here can never cross or end at a page boundary.
338 */
339 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, len);
340
341 if (iwl_txq_gen2_build_amsdu(trans, skb, tfd, out_meta,
342 len + IWL_FIRST_TB_SIZE, hdr_len, dev_cmd))
343 goto out_err;
344
345 /* building the A-MSDU might have changed this data, memcpy it now */
346 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE);
347 return tfd;
348
349 out_err:
350 iwl_pcie_free_tso_pages(trans, skb, out_meta);
351 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd);
352 return NULL;
353 }
354
iwl_txq_gen2_tx_add_frags(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_tfh_tfd * tfd,struct iwl_cmd_meta * out_meta)355 static int iwl_txq_gen2_tx_add_frags(struct iwl_trans *trans,
356 struct sk_buff *skb,
357 struct iwl_tfh_tfd *tfd,
358 struct iwl_cmd_meta *out_meta)
359 {
360 int i;
361
362 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
363 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
364 dma_addr_t tb_phys;
365 unsigned int fragsz = skb_frag_size(frag);
366 int ret;
367
368 if (!fragsz)
369 continue;
370
371 tb_phys = skb_frag_dma_map(trans->dev, frag, 0,
372 fragsz, DMA_TO_DEVICE);
373 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
374 skb_frag_address(frag),
375 fragsz, out_meta, true);
376 if (ret)
377 return ret;
378 }
379
380 return 0;
381 }
382
383 static struct
iwl_txq_gen2_build_tx(struct iwl_trans * trans,struct iwl_txq * txq,struct iwl_device_tx_cmd * dev_cmd,struct sk_buff * skb,struct iwl_cmd_meta * out_meta,int hdr_len,int tx_cmd_len,bool pad)384 iwl_tfh_tfd *iwl_txq_gen2_build_tx(struct iwl_trans *trans,
385 struct iwl_txq *txq,
386 struct iwl_device_tx_cmd *dev_cmd,
387 struct sk_buff *skb,
388 struct iwl_cmd_meta *out_meta,
389 int hdr_len,
390 int tx_cmd_len,
391 bool pad)
392 {
393 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
394 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx);
395 dma_addr_t tb_phys;
396 int len, tb1_len, tb2_len;
397 void *tb1_addr;
398 struct sk_buff *frag;
399
400 tb_phys = iwl_txq_get_first_tb_dma(txq, idx);
401
402 /* The first TB points to bi-directional DMA data */
403 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE);
404
405 /*
406 * No need for _with_wa, the first TB allocation is aligned up
407 * to a 64-byte boundary and thus can't be at the end or cross
408 * a page boundary (much less a 2^32 boundary).
409 */
410 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE);
411
412 /*
413 * The second TB (tb1) points to the remainder of the TX command
414 * and the 802.11 header - dword aligned size
415 * (This calculation modifies the TX command, so do it before the
416 * setup of the first TB)
417 */
418 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len -
419 IWL_FIRST_TB_SIZE;
420
421 if (pad)
422 tb1_len = ALIGN(len, 4);
423 else
424 tb1_len = len;
425
426 /* map the data for TB1 */
427 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
428 tb_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
429 if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
430 goto out_err;
431 /*
432 * No need for _with_wa(), we ensure (via alignment) that the data
433 * here can never cross or end at a page boundary.
434 */
435 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb1_len);
436 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr,
437 IWL_FIRST_TB_SIZE + tb1_len, hdr_len);
438
439 /* set up TFD's third entry to point to remainder of skb's head */
440 tb2_len = skb_headlen(skb) - hdr_len;
441
442 if (tb2_len > 0) {
443 int ret;
444
445 tb_phys = dma_map_single(trans->dev, skb->data + hdr_len,
446 tb2_len, DMA_TO_DEVICE);
447 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
448 skb->data + hdr_len, tb2_len,
449 NULL, true);
450 if (ret)
451 goto out_err;
452 }
453
454 if (iwl_txq_gen2_tx_add_frags(trans, skb, tfd, out_meta))
455 goto out_err;
456
457 skb_walk_frags(skb, frag) {
458 int ret;
459
460 tb_phys = dma_map_single(trans->dev, frag->data,
461 skb_headlen(frag), DMA_TO_DEVICE);
462 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
463 frag->data,
464 skb_headlen(frag), NULL,
465 true);
466 if (ret)
467 goto out_err;
468 if (iwl_txq_gen2_tx_add_frags(trans, frag, tfd, out_meta))
469 goto out_err;
470 }
471
472 return tfd;
473
474 out_err:
475 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd);
476 return NULL;
477 }
478
479 static
iwl_txq_gen2_build_tfd(struct iwl_trans * trans,struct iwl_txq * txq,struct iwl_device_tx_cmd * dev_cmd,struct sk_buff * skb,struct iwl_cmd_meta * out_meta)480 struct iwl_tfh_tfd *iwl_txq_gen2_build_tfd(struct iwl_trans *trans,
481 struct iwl_txq *txq,
482 struct iwl_device_tx_cmd *dev_cmd,
483 struct sk_buff *skb,
484 struct iwl_cmd_meta *out_meta)
485 {
486 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
487 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
488 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx);
489 int len, hdr_len;
490 bool amsdu;
491
492 /* There must be data left over for TB1 or this code must be changed */
493 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_v9) < IWL_FIRST_TB_SIZE);
494 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) +
495 offsetofend(struct iwl_tx_cmd_v9, dram_info) >
496 IWL_FIRST_TB_SIZE);
497 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_FIRST_TB_SIZE);
498 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) +
499 offsetofend(struct iwl_tx_cmd, dram_info) >
500 IWL_FIRST_TB_SIZE);
501
502 memset(tfd, 0, sizeof(*tfd));
503
504 if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
505 len = sizeof(struct iwl_tx_cmd_v9);
506 else
507 len = sizeof(struct iwl_tx_cmd);
508
509 amsdu = ieee80211_is_data_qos(hdr->frame_control) &&
510 (*ieee80211_get_qos_ctl(hdr) &
511 IEEE80211_QOS_CTL_A_MSDU_PRESENT);
512
513 hdr_len = ieee80211_hdrlen(hdr->frame_control);
514
515 /*
516 * Only build A-MSDUs here if doing so by GSO, otherwise it may be
517 * an A-MSDU for other reasons, e.g. NAN or an A-MSDU having been
518 * built in the higher layers already.
519 */
520 if (amsdu && skb_shinfo(skb)->gso_size)
521 return iwl_txq_gen2_build_tx_amsdu(trans, txq, dev_cmd, skb,
522 out_meta, hdr_len, len);
523 return iwl_txq_gen2_build_tx(trans, txq, dev_cmd, skb, out_meta,
524 hdr_len, len, !amsdu);
525 }
526
iwl_txq_space(struct iwl_trans * trans,const struct iwl_txq * q)527 int iwl_txq_space(struct iwl_trans *trans, const struct iwl_txq *q)
528 {
529 unsigned int max;
530 unsigned int used;
531
532 /*
533 * To avoid ambiguity between empty and completely full queues, there
534 * should always be less than max_tfd_queue_size elements in the queue.
535 * If q->n_window is smaller than max_tfd_queue_size, there is no need
536 * to reserve any queue entries for this purpose.
537 */
538 if (q->n_window < trans->mac_cfg->base->max_tfd_queue_size)
539 max = q->n_window;
540 else
541 max = trans->mac_cfg->base->max_tfd_queue_size - 1;
542
543 /*
544 * max_tfd_queue_size is a power of 2, so the following is equivalent to
545 * modulo by max_tfd_queue_size and is well defined.
546 */
547 used = (q->write_ptr - q->read_ptr) &
548 (trans->mac_cfg->base->max_tfd_queue_size - 1);
549
550 if (WARN_ON(used > max))
551 return 0;
552
553 return max - used;
554 }
555
556 /*
557 * iwl_pcie_gen2_update_byte_tbl - Set up entry in Tx byte-count array
558 */
iwl_pcie_gen2_update_byte_tbl(struct iwl_trans * trans,struct iwl_txq * txq,u16 byte_cnt,int num_tbs)559 static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans *trans,
560 struct iwl_txq *txq, u16 byte_cnt,
561 int num_tbs)
562 {
563 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
564 struct iwl_bc_tbl_entry *scd_bc_tbl = txq->bc_tbl.addr;
565 u8 filled_tfd_size, num_fetch_chunks;
566 u16 len = byte_cnt;
567 __le16 bc_ent;
568
569 if (WARN(idx >= txq->n_window, "%d >= %d\n", idx, txq->n_window))
570 return;
571
572 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) +
573 num_tbs * sizeof(struct iwl_tfh_tb);
574 /*
575 * filled_tfd_size contains the number of filled bytes in the TFD.
576 * Dividing it by 64 will give the number of chunks to fetch
577 * to SRAM- 0 for one chunk, 1 for 2 and so on.
578 * If, for example, TFD contains only 3 TBs then 32 bytes
579 * of the TFD are used, and only one chunk of 64 bytes should
580 * be fetched
581 */
582 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1;
583
584 if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
585 WARN_ON(len > 0x3FFF);
586 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 14));
587 } else {
588 len = DIV_ROUND_UP(len, 4);
589 WARN_ON(len > 0xFFF);
590 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12));
591 }
592
593 scd_bc_tbl[idx].tfd_offset = bc_ent;
594 }
595
iwl_txq_gen2_get_num_tbs(struct iwl_tfh_tfd * tfd)596 static u8 iwl_txq_gen2_get_num_tbs(struct iwl_tfh_tfd *tfd)
597 {
598 return le16_to_cpu(tfd->num_tbs) & 0x1f;
599 }
600
iwl_txq_gen2_set_tb(struct iwl_trans * trans,struct iwl_tfh_tfd * tfd,dma_addr_t addr,u16 len)601 int iwl_txq_gen2_set_tb(struct iwl_trans *trans, struct iwl_tfh_tfd *tfd,
602 dma_addr_t addr, u16 len)
603 {
604 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
605 int idx = iwl_txq_gen2_get_num_tbs(tfd);
606 struct iwl_tfh_tb *tb;
607
608 /* Only WARN here so we know about the issue, but we mess up our
609 * unmap path because not every place currently checks for errors
610 * returned from this function - it can only return an error if
611 * there's no more space, and so when we know there is enough we
612 * don't always check ...
613 */
614 WARN(iwl_txq_crosses_4g_boundary(addr, len),
615 "possible DMA problem with iova:0x%llx, len:%d\n",
616 (unsigned long long)addr, len);
617
618 if (WARN_ON(idx >= IWL_TFH_NUM_TBS))
619 return -EINVAL;
620 tb = &tfd->tbs[idx];
621
622 /* Each TFD can point to a maximum max_tbs Tx buffers */
623 if (le16_to_cpu(tfd->num_tbs) >= trans_pcie->txqs.tfd.max_tbs) {
624 IWL_ERR(trans, "Error can not send more than %d chunks\n",
625 trans_pcie->txqs.tfd.max_tbs);
626 return -EINVAL;
627 }
628
629 put_unaligned_le64(addr, &tb->addr);
630 tb->tb_len = cpu_to_le16(len);
631
632 tfd->num_tbs = cpu_to_le16(idx + 1);
633
634 return idx;
635 }
636
iwl_txq_gen2_tfd_unmap(struct iwl_trans * trans,struct iwl_cmd_meta * meta,struct iwl_tfh_tfd * tfd)637 void iwl_txq_gen2_tfd_unmap(struct iwl_trans *trans,
638 struct iwl_cmd_meta *meta,
639 struct iwl_tfh_tfd *tfd)
640 {
641 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
642 int i, num_tbs;
643
644 /* Sanity check on number of chunks */
645 num_tbs = iwl_txq_gen2_get_num_tbs(tfd);
646
647 if (num_tbs > trans_pcie->txqs.tfd.max_tbs) {
648 IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
649 return;
650 }
651
652 /* TB1 is mapped directly, the rest is the TSO page and SG list. */
653 if (meta->sg_offset)
654 num_tbs = 2;
655
656 /* first TB is never freed - it's the bidirectional DMA data */
657 for (i = 1; i < num_tbs; i++) {
658 if (meta->tbs & BIT(i))
659 dma_unmap_page(trans->dev,
660 le64_to_cpu(tfd->tbs[i].addr),
661 le16_to_cpu(tfd->tbs[i].tb_len),
662 DMA_TO_DEVICE);
663 else
664 dma_unmap_single(trans->dev,
665 le64_to_cpu(tfd->tbs[i].addr),
666 le16_to_cpu(tfd->tbs[i].tb_len),
667 DMA_TO_DEVICE);
668 }
669
670 iwl_txq_set_tfd_invalid_gen2(trans, tfd);
671 }
672
iwl_txq_gen2_free_tfd(struct iwl_trans * trans,struct iwl_txq * txq)673 static void iwl_txq_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
674 {
675 /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
676 * idx is bounded by n_window
677 */
678 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr);
679 struct sk_buff *skb;
680
681 lockdep_assert_held(&txq->lock);
682
683 if (!txq->entries)
684 return;
685
686 iwl_txq_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
687 iwl_txq_get_tfd(trans, txq, idx));
688
689 skb = txq->entries[idx].skb;
690
691 /* Can be called from irqs-disabled context
692 * If skb is not NULL, it means that the whole queue is being
693 * freed and that the queue is not empty - free the skb
694 */
695 if (skb) {
696 iwl_op_mode_free_skb(trans->op_mode, skb);
697 txq->entries[idx].skb = NULL;
698 }
699 }
700
701 /*
702 * iwl_txq_inc_wr_ptr - Send new write index to hardware
703 */
iwl_txq_inc_wr_ptr(struct iwl_trans * trans,struct iwl_txq * txq)704 static void iwl_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq)
705 {
706 lockdep_assert_held(&txq->lock);
707
708 IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq->id, txq->write_ptr);
709
710 /*
711 * if not in power-save mode, uCode will never sleep when we're
712 * trying to tx (during RFKILL, we're not trying to tx).
713 */
714 iwl_write32(trans, HBUS_TARG_WRPTR, txq->write_ptr | (txq->id << 16));
715 }
716
iwl_txq_gen2_tx(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_device_tx_cmd * dev_cmd,int txq_id)717 int iwl_txq_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb,
718 struct iwl_device_tx_cmd *dev_cmd, int txq_id)
719 {
720 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
721 struct iwl_cmd_meta *out_meta;
722 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
723 u16 cmd_len;
724 int idx;
725 void *tfd;
726
727 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES,
728 "queue %d out of range", txq_id))
729 return -EINVAL;
730
731 if (WARN_ONCE(!test_bit(txq_id, trans_pcie->txqs.queue_used),
732 "TX on unused queue %d\n", txq_id))
733 return -EINVAL;
734
735 if (skb_is_nonlinear(skb) &&
736 skb_shinfo(skb)->nr_frags > IWL_TRANS_PCIE_MAX_FRAGS(trans_pcie) &&
737 __skb_linearize(skb))
738 return -ENOMEM;
739
740 spin_lock(&txq->lock);
741
742 if (iwl_txq_space(trans, txq) < txq->high_mark) {
743 iwl_txq_stop(trans, txq);
744
745 /* don't put the packet on the ring, if there is no room */
746 if (unlikely(iwl_txq_space(trans, txq) < 3)) {
747 struct iwl_device_tx_cmd **dev_cmd_ptr;
748
749 dev_cmd_ptr = (void *)((u8 *)skb->cb +
750 trans->conf.cb_data_offs +
751 sizeof(void *));
752
753 *dev_cmd_ptr = dev_cmd;
754 __skb_queue_tail(&txq->overflow_q, skb);
755 spin_unlock(&txq->lock);
756 return 0;
757 }
758 }
759
760 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
761
762 /* Set up driver data for this TFD */
763 txq->entries[idx].skb = skb;
764 txq->entries[idx].cmd = dev_cmd;
765
766 dev_cmd->hdr.sequence =
767 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
768 INDEX_TO_SEQ(idx)));
769
770 /* Set up first empty entry in queue's array of Tx/cmd buffers */
771 out_meta = &txq->entries[idx].meta;
772 memset(out_meta, 0, sizeof(*out_meta));
773
774 tfd = iwl_txq_gen2_build_tfd(trans, txq, dev_cmd, skb, out_meta);
775 if (!tfd) {
776 spin_unlock(&txq->lock);
777 return -1;
778 }
779
780 if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
781 struct iwl_tx_cmd *tx_cmd =
782 (void *)dev_cmd->payload;
783
784 cmd_len = le16_to_cpu(tx_cmd->len);
785 } else {
786 struct iwl_tx_cmd_v9 *tx_cmd_v9 =
787 (void *)dev_cmd->payload;
788
789 cmd_len = le16_to_cpu(tx_cmd_v9->len);
790 }
791
792 /* Set up entry for this TFD in Tx byte-count array */
793 iwl_pcie_gen2_update_byte_tbl(trans, txq, cmd_len,
794 iwl_txq_gen2_get_num_tbs(tfd));
795
796 /* start timer if queue currently empty */
797 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
798 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
799
800 /* Tell device the write index *just past* this latest filled TFD */
801 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr);
802 iwl_txq_inc_wr_ptr(trans, txq);
803 /*
804 * At this point the frame is "transmitted" successfully
805 * and we will get a TX status notification eventually.
806 */
807 spin_unlock(&txq->lock);
808 return 0;
809 }
810
811 /*************** HOST COMMAND QUEUE FUNCTIONS *****/
812
813 /*
814 * iwl_txq_gen2_unmap - Unmap any remaining DMA mappings and free skb's
815 */
iwl_txq_gen2_unmap(struct iwl_trans * trans,int txq_id)816 static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id)
817 {
818 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
819 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
820
821 spin_lock_bh(&txq->reclaim_lock);
822 spin_lock(&txq->lock);
823 while (txq->write_ptr != txq->read_ptr) {
824 IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
825 txq_id, txq->read_ptr);
826
827 if (txq_id != trans->conf.cmd_queue) {
828 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr);
829 struct iwl_cmd_meta *cmd_meta = &txq->entries[idx].meta;
830 struct sk_buff *skb = txq->entries[idx].skb;
831
832 if (!WARN_ON_ONCE(!skb))
833 iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
834 }
835 iwl_txq_gen2_free_tfd(trans, txq);
836 txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
837 }
838
839 while (!skb_queue_empty(&txq->overflow_q)) {
840 struct sk_buff *skb = __skb_dequeue(&txq->overflow_q);
841
842 iwl_op_mode_free_skb(trans->op_mode, skb);
843 }
844
845 spin_unlock(&txq->lock);
846 spin_unlock_bh(&txq->reclaim_lock);
847
848 /* just in case - this queue may have been stopped */
849 iwl_trans_pcie_wake_queue(trans, txq);
850 }
851
iwl_txq_gen2_free_memory(struct iwl_trans * trans,struct iwl_txq * txq)852 static void iwl_txq_gen2_free_memory(struct iwl_trans *trans,
853 struct iwl_txq *txq)
854 {
855 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
856 struct device *dev = trans->dev;
857
858 /* De-alloc circular buffer of TFDs */
859 if (txq->tfds) {
860 dma_free_coherent(dev,
861 trans_pcie->txqs.tfd.size * txq->n_window,
862 txq->tfds, txq->dma_addr);
863 dma_free_coherent(dev,
864 sizeof(*txq->first_tb_bufs) * txq->n_window,
865 txq->first_tb_bufs, txq->first_tb_dma);
866 }
867
868 kfree(txq->entries);
869 if (txq->bc_tbl.addr)
870 dma_pool_free(trans_pcie->txqs.bc_pool,
871 txq->bc_tbl.addr, txq->bc_tbl.dma);
872 kfree(txq);
873 }
874
875 /*
876 * iwl_pcie_txq_free - Deallocate DMA queue.
877 * @txq: Transmit queue to deallocate.
878 *
879 * Empty queue by removing and destroying all BD's.
880 * Free all buffers.
881 * 0-fill, but do not free "txq" descriptor structure.
882 */
iwl_txq_gen2_free(struct iwl_trans * trans,int txq_id)883 static void iwl_txq_gen2_free(struct iwl_trans *trans, int txq_id)
884 {
885 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
886 struct iwl_txq *txq;
887 int i;
888
889 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES,
890 "queue %d out of range", txq_id))
891 return;
892
893 txq = trans_pcie->txqs.txq[txq_id];
894
895 if (WARN_ON(!txq))
896 return;
897
898 iwl_txq_gen2_unmap(trans, txq_id);
899
900 /* De-alloc array of command/tx buffers */
901 if (txq_id == trans->conf.cmd_queue)
902 for (i = 0; i < txq->n_window; i++) {
903 kfree_sensitive(txq->entries[i].cmd);
904 kfree_sensitive(txq->entries[i].free_buf);
905 }
906 timer_delete_sync(&txq->stuck_timer);
907
908 iwl_txq_gen2_free_memory(trans, txq);
909
910 trans_pcie->txqs.txq[txq_id] = NULL;
911
912 clear_bit(txq_id, trans_pcie->txqs.queue_used);
913 }
914
915 static struct iwl_txq *
iwl_txq_dyn_alloc_dma(struct iwl_trans * trans,int size,unsigned int timeout)916 iwl_txq_dyn_alloc_dma(struct iwl_trans *trans, int size, unsigned int timeout)
917 {
918 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
919 size_t bc_tbl_size, bc_tbl_entries;
920 struct iwl_txq *txq;
921 int ret;
922
923 WARN_ON(!trans_pcie->txqs.bc_tbl_size);
924
925 bc_tbl_size = trans_pcie->txqs.bc_tbl_size;
926 bc_tbl_entries = bc_tbl_size / sizeof(u16);
927
928 if (WARN_ON(size > bc_tbl_entries))
929 return ERR_PTR(-EINVAL);
930
931 txq = kzalloc(sizeof(*txq), GFP_KERNEL);
932 if (!txq)
933 return ERR_PTR(-ENOMEM);
934
935 txq->bc_tbl.addr = dma_pool_alloc(trans_pcie->txqs.bc_pool, GFP_KERNEL,
936 &txq->bc_tbl.dma);
937 if (!txq->bc_tbl.addr) {
938 IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
939 kfree(txq);
940 return ERR_PTR(-ENOMEM);
941 }
942
943 ret = iwl_pcie_txq_alloc(trans, txq, size, false);
944 if (ret) {
945 IWL_ERR(trans, "Tx queue alloc failed\n");
946 goto error;
947 }
948 ret = iwl_txq_init(trans, txq, size, false);
949 if (ret) {
950 IWL_ERR(trans, "Tx queue init failed\n");
951 goto error;
952 }
953
954 txq->wd_timeout = msecs_to_jiffies(timeout);
955
956 return txq;
957
958 error:
959 iwl_txq_gen2_free_memory(trans, txq);
960 return ERR_PTR(ret);
961 }
962
iwl_pcie_txq_alloc_response(struct iwl_trans * trans,struct iwl_txq * txq,struct iwl_host_cmd * hcmd)963 static int iwl_pcie_txq_alloc_response(struct iwl_trans *trans,
964 struct iwl_txq *txq,
965 struct iwl_host_cmd *hcmd)
966 {
967 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
968 struct iwl_tx_queue_cfg_rsp *rsp;
969 int ret, qid;
970 u32 wr_ptr;
971
972 if (WARN_ON(iwl_rx_packet_payload_len(hcmd->resp_pkt) !=
973 sizeof(*rsp))) {
974 ret = -EINVAL;
975 goto error_free_resp;
976 }
977
978 rsp = (void *)hcmd->resp_pkt->data;
979 qid = le16_to_cpu(rsp->queue_number);
980 wr_ptr = le16_to_cpu(rsp->write_pointer);
981
982 if (qid >= ARRAY_SIZE(trans_pcie->txqs.txq)) {
983 WARN_ONCE(1, "queue index %d unsupported", qid);
984 ret = -EIO;
985 goto error_free_resp;
986 }
987
988 if (test_and_set_bit(qid, trans_pcie->txqs.queue_used)) {
989 WARN_ONCE(1, "queue %d already used", qid);
990 ret = -EIO;
991 goto error_free_resp;
992 }
993
994 if (WARN_ONCE(trans_pcie->txqs.txq[qid],
995 "queue %d already allocated\n", qid)) {
996 ret = -EIO;
997 goto error_free_resp;
998 }
999
1000 txq->id = qid;
1001 trans_pcie->txqs.txq[qid] = txq;
1002 wr_ptr &= (trans->mac_cfg->base->max_tfd_queue_size - 1);
1003
1004 /* Place first TFD at index corresponding to start sequence number */
1005 txq->read_ptr = wr_ptr;
1006 txq->write_ptr = wr_ptr;
1007
1008 IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d\n", qid);
1009
1010 iwl_free_resp(hcmd);
1011 return qid;
1012
1013 error_free_resp:
1014 iwl_free_resp(hcmd);
1015 iwl_txq_gen2_free_memory(trans, txq);
1016 return ret;
1017 }
1018
iwl_txq_dyn_alloc(struct iwl_trans * trans,u32 flags,u32 sta_mask,u8 tid,int size,unsigned int timeout)1019 int iwl_txq_dyn_alloc(struct iwl_trans *trans, u32 flags, u32 sta_mask,
1020 u8 tid, int size, unsigned int timeout)
1021 {
1022 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1023 struct iwl_txq *txq;
1024 union {
1025 struct iwl_tx_queue_cfg_cmd old;
1026 struct iwl_scd_queue_cfg_cmd new;
1027 } cmd;
1028 struct iwl_host_cmd hcmd = {
1029 .flags = CMD_WANT_SKB,
1030 };
1031 int ret;
1032
1033 /* take the min with bytecount table entries allowed */
1034 size = min_t(u32, size, trans_pcie->txqs.bc_tbl_size / sizeof(u16));
1035 /* but must be power of 2 values for calculating read/write pointers */
1036 size = rounddown_pow_of_two(size);
1037
1038 if (trans->mac_cfg->device_family == IWL_DEVICE_FAMILY_BZ &&
1039 trans->info.hw_rev_step == SILICON_A_STEP) {
1040 size = 4096;
1041 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout);
1042 } else {
1043 do {
1044 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout);
1045 if (!IS_ERR(txq))
1046 break;
1047
1048 IWL_DEBUG_TX_QUEUES(trans,
1049 "Failed allocating TXQ of size %d for sta mask %x tid %d, ret: %ld\n",
1050 size, sta_mask, tid,
1051 PTR_ERR(txq));
1052 size /= 2;
1053 } while (size >= 16);
1054 }
1055
1056 if (IS_ERR(txq))
1057 return PTR_ERR(txq);
1058
1059 if (trans->conf.queue_alloc_cmd_ver == 0) {
1060 memset(&cmd.old, 0, sizeof(cmd.old));
1061 cmd.old.tfdq_addr = cpu_to_le64(txq->dma_addr);
1062 cmd.old.byte_cnt_addr = cpu_to_le64(txq->bc_tbl.dma);
1063 cmd.old.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size));
1064 cmd.old.flags = cpu_to_le16(flags | TX_QUEUE_CFG_ENABLE_QUEUE);
1065 cmd.old.tid = tid;
1066
1067 if (hweight32(sta_mask) != 1) {
1068 ret = -EINVAL;
1069 goto error;
1070 }
1071 cmd.old.sta_id = ffs(sta_mask) - 1;
1072
1073 hcmd.id = SCD_QUEUE_CFG;
1074 hcmd.len[0] = sizeof(cmd.old);
1075 hcmd.data[0] = &cmd.old;
1076 } else if (trans->conf.queue_alloc_cmd_ver == 3) {
1077 memset(&cmd.new, 0, sizeof(cmd.new));
1078 cmd.new.operation = cpu_to_le32(IWL_SCD_QUEUE_ADD);
1079 cmd.new.u.add.tfdq_dram_addr = cpu_to_le64(txq->dma_addr);
1080 cmd.new.u.add.bc_dram_addr = cpu_to_le64(txq->bc_tbl.dma);
1081 cmd.new.u.add.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size));
1082 cmd.new.u.add.flags = cpu_to_le32(flags);
1083 cmd.new.u.add.sta_mask = cpu_to_le32(sta_mask);
1084 cmd.new.u.add.tid = tid;
1085
1086 hcmd.id = WIDE_ID(DATA_PATH_GROUP, SCD_QUEUE_CONFIG_CMD);
1087 hcmd.len[0] = sizeof(cmd.new);
1088 hcmd.data[0] = &cmd.new;
1089 } else {
1090 ret = -EOPNOTSUPP;
1091 goto error;
1092 }
1093
1094 ret = iwl_trans_send_cmd(trans, &hcmd);
1095 if (ret)
1096 goto error;
1097
1098 return iwl_pcie_txq_alloc_response(trans, txq, &hcmd);
1099
1100 error:
1101 iwl_txq_gen2_free_memory(trans, txq);
1102 return ret;
1103 }
1104
iwl_txq_dyn_free(struct iwl_trans * trans,int queue)1105 void iwl_txq_dyn_free(struct iwl_trans *trans, int queue)
1106 {
1107 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1108
1109 if (WARN(queue >= IWL_MAX_TVQM_QUEUES,
1110 "queue %d out of range", queue))
1111 return;
1112
1113 /*
1114 * Upon HW Rfkill - we stop the device, and then stop the queues
1115 * in the op_mode. Just for the sake of the simplicity of the op_mode,
1116 * allow the op_mode to call txq_disable after it already called
1117 * stop_device.
1118 */
1119 if (!test_and_clear_bit(queue, trans_pcie->txqs.queue_used)) {
1120 WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status),
1121 "queue %d not used", queue);
1122 return;
1123 }
1124
1125 iwl_txq_gen2_free(trans, queue);
1126
1127 IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue);
1128 }
1129
iwl_txq_gen2_tx_free(struct iwl_trans * trans)1130 void iwl_txq_gen2_tx_free(struct iwl_trans *trans)
1131 {
1132 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1133 int i;
1134
1135 memset(trans_pcie->txqs.queue_used, 0,
1136 sizeof(trans_pcie->txqs.queue_used));
1137
1138 /* Free all TX queues */
1139 for (i = 0; i < ARRAY_SIZE(trans_pcie->txqs.txq); i++) {
1140 if (!trans_pcie->txqs.txq[i])
1141 continue;
1142
1143 iwl_txq_gen2_free(trans, i);
1144 }
1145 }
1146
iwl_txq_gen2_init(struct iwl_trans * trans,int txq_id,int queue_size)1147 int iwl_txq_gen2_init(struct iwl_trans *trans, int txq_id, int queue_size)
1148 {
1149 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1150 struct iwl_txq *queue;
1151 int ret;
1152
1153 /* alloc and init the tx queue */
1154 if (!trans_pcie->txqs.txq[txq_id]) {
1155 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1156 if (!queue) {
1157 IWL_ERR(trans, "Not enough memory for tx queue\n");
1158 return -ENOMEM;
1159 }
1160 trans_pcie->txqs.txq[txq_id] = queue;
1161 ret = iwl_pcie_txq_alloc(trans, queue, queue_size, true);
1162 if (ret) {
1163 IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
1164 goto error;
1165 }
1166 } else {
1167 queue = trans_pcie->txqs.txq[txq_id];
1168 }
1169
1170 ret = iwl_txq_init(trans, queue, queue_size,
1171 (txq_id == trans->conf.cmd_queue));
1172 if (ret) {
1173 IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
1174 goto error;
1175 }
1176 trans_pcie->txqs.txq[txq_id]->id = txq_id;
1177 set_bit(txq_id, trans_pcie->txqs.queue_used);
1178
1179 return 0;
1180
1181 error:
1182 iwl_txq_gen2_tx_free(trans);
1183 return ret;
1184 }
1185
1186 /*************** HOST COMMAND QUEUE FUNCTIONS *****/
1187
1188 /*
1189 * iwl_pcie_gen2_enqueue_hcmd - enqueue a uCode command
1190 * @priv: device private data point
1191 * @cmd: a pointer to the ucode command structure
1192 *
1193 * The function returns < 0 values to indicate the operation
1194 * failed. On success, it returns the index (>= 0) of command in the
1195 * command queue.
1196 */
iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans * trans,struct iwl_host_cmd * cmd)1197 int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
1198 struct iwl_host_cmd *cmd)
1199 {
1200 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1201 struct iwl_txq *txq = trans_pcie->txqs.txq[trans->conf.cmd_queue];
1202 struct iwl_device_cmd *out_cmd;
1203 struct iwl_cmd_meta *out_meta;
1204 void *dup_buf = NULL;
1205 dma_addr_t phys_addr;
1206 int i, cmd_pos, idx;
1207 u16 copy_size, cmd_size, tb0_size;
1208 bool had_nocopy = false;
1209 u8 group_id = iwl_cmd_groupid(cmd->id);
1210 const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
1211 u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
1212 struct iwl_tfh_tfd *tfd;
1213 unsigned long flags;
1214
1215 if (WARN_ON(cmd->flags & CMD_BLOCK_TXQS))
1216 return -EINVAL;
1217
1218 copy_size = sizeof(struct iwl_cmd_header_wide);
1219 cmd_size = sizeof(struct iwl_cmd_header_wide);
1220
1221 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1222 cmddata[i] = cmd->data[i];
1223 cmdlen[i] = cmd->len[i];
1224
1225 if (!cmd->len[i])
1226 continue;
1227
1228 /* need at least IWL_FIRST_TB_SIZE copied */
1229 if (copy_size < IWL_FIRST_TB_SIZE) {
1230 int copy = IWL_FIRST_TB_SIZE - copy_size;
1231
1232 if (copy > cmdlen[i])
1233 copy = cmdlen[i];
1234 cmdlen[i] -= copy;
1235 cmddata[i] += copy;
1236 copy_size += copy;
1237 }
1238
1239 if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) {
1240 had_nocopy = true;
1241 if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) {
1242 idx = -EINVAL;
1243 goto free_dup_buf;
1244 }
1245 } else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) {
1246 /*
1247 * This is also a chunk that isn't copied
1248 * to the static buffer so set had_nocopy.
1249 */
1250 had_nocopy = true;
1251
1252 /* only allowed once */
1253 if (WARN_ON(dup_buf)) {
1254 idx = -EINVAL;
1255 goto free_dup_buf;
1256 }
1257
1258 dup_buf = kmemdup(cmddata[i], cmdlen[i],
1259 GFP_ATOMIC);
1260 if (!dup_buf)
1261 return -ENOMEM;
1262 } else {
1263 /* NOCOPY must not be followed by normal! */
1264 if (WARN_ON(had_nocopy)) {
1265 idx = -EINVAL;
1266 goto free_dup_buf;
1267 }
1268 copy_size += cmdlen[i];
1269 }
1270 cmd_size += cmd->len[i];
1271 }
1272
1273 /*
1274 * If any of the command structures end up being larger than the
1275 * TFD_MAX_PAYLOAD_SIZE and they aren't dynamically allocated into
1276 * separate TFDs, then we will need to increase the size of the buffers
1277 */
1278 if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE,
1279 "Command %s (%#x) is too large (%d bytes)\n",
1280 iwl_get_cmd_string(trans, cmd->id), cmd->id, copy_size)) {
1281 idx = -EINVAL;
1282 goto free_dup_buf;
1283 }
1284
1285 spin_lock_irqsave(&txq->lock, flags);
1286
1287 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
1288 tfd = iwl_txq_get_tfd(trans, txq, txq->write_ptr);
1289 memset(tfd, 0, sizeof(*tfd));
1290
1291 if (iwl_txq_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
1292 spin_unlock_irqrestore(&txq->lock, flags);
1293
1294 IWL_ERR(trans, "No space in command queue\n");
1295 iwl_op_mode_nic_error(trans->op_mode,
1296 IWL_ERR_TYPE_CMD_QUEUE_FULL);
1297 iwl_trans_schedule_reset(trans, IWL_ERR_TYPE_CMD_QUEUE_FULL);
1298 idx = -ENOSPC;
1299 goto free_dup_buf;
1300 }
1301
1302 out_cmd = txq->entries[idx].cmd;
1303 out_meta = &txq->entries[idx].meta;
1304
1305 /* re-initialize, this also marks the SG list as unused */
1306 memset(out_meta, 0, sizeof(*out_meta));
1307 if (cmd->flags & CMD_WANT_SKB)
1308 out_meta->source = cmd;
1309
1310 /* set up the header */
1311 out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id);
1312 out_cmd->hdr_wide.group_id = group_id;
1313 out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id);
1314 out_cmd->hdr_wide.length =
1315 cpu_to_le16(cmd_size - sizeof(struct iwl_cmd_header_wide));
1316 out_cmd->hdr_wide.reserved = 0;
1317 out_cmd->hdr_wide.sequence =
1318 cpu_to_le16(QUEUE_TO_SEQ(trans->conf.cmd_queue) |
1319 INDEX_TO_SEQ(txq->write_ptr));
1320
1321 cmd_pos = sizeof(struct iwl_cmd_header_wide);
1322 copy_size = sizeof(struct iwl_cmd_header_wide);
1323
1324 /* and copy the data that needs to be copied */
1325 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1326 int copy;
1327
1328 if (!cmd->len[i])
1329 continue;
1330
1331 /* copy everything if not nocopy/dup */
1332 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1333 IWL_HCMD_DFL_DUP))) {
1334 copy = cmd->len[i];
1335
1336 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1337 cmd_pos += copy;
1338 copy_size += copy;
1339 continue;
1340 }
1341
1342 /*
1343 * Otherwise we need at least IWL_FIRST_TB_SIZE copied
1344 * in total (for bi-directional DMA), but copy up to what
1345 * we can fit into the payload for debug dump purposes.
1346 */
1347 copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]);
1348
1349 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1350 cmd_pos += copy;
1351
1352 /* However, treat copy_size the proper way, we need it below */
1353 if (copy_size < IWL_FIRST_TB_SIZE) {
1354 copy = IWL_FIRST_TB_SIZE - copy_size;
1355
1356 if (copy > cmd->len[i])
1357 copy = cmd->len[i];
1358 copy_size += copy;
1359 }
1360 }
1361
1362 IWL_DEBUG_HC(trans,
1363 "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
1364 iwl_get_cmd_string(trans, cmd->id), group_id,
1365 out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence),
1366 cmd_size, txq->write_ptr, idx, trans->conf.cmd_queue);
1367
1368 /* start the TFD with the minimum copy bytes */
1369 tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
1370 memcpy(&txq->first_tb_bufs[idx], out_cmd, tb0_size);
1371 iwl_txq_gen2_set_tb(trans, tfd, iwl_txq_get_first_tb_dma(txq, idx),
1372 tb0_size);
1373
1374 /* map first command fragment, if any remains */
1375 if (copy_size > tb0_size) {
1376 phys_addr = dma_map_single(trans->dev,
1377 (u8 *)out_cmd + tb0_size,
1378 copy_size - tb0_size,
1379 DMA_TO_DEVICE);
1380 if (dma_mapping_error(trans->dev, phys_addr)) {
1381 idx = -ENOMEM;
1382 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd);
1383 goto out;
1384 }
1385 iwl_txq_gen2_set_tb(trans, tfd, phys_addr,
1386 copy_size - tb0_size);
1387 }
1388
1389 /* map the remaining (adjusted) nocopy/dup fragments */
1390 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1391 void *data = (void *)(uintptr_t)cmddata[i];
1392
1393 if (!cmdlen[i])
1394 continue;
1395 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1396 IWL_HCMD_DFL_DUP)))
1397 continue;
1398 if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP)
1399 data = dup_buf;
1400 phys_addr = dma_map_single(trans->dev, data,
1401 cmdlen[i], DMA_TO_DEVICE);
1402 if (dma_mapping_error(trans->dev, phys_addr)) {
1403 idx = -ENOMEM;
1404 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd);
1405 goto out;
1406 }
1407 iwl_txq_gen2_set_tb(trans, tfd, phys_addr, cmdlen[i]);
1408 }
1409
1410 BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE);
1411 out_meta->flags = cmd->flags;
1412 if (WARN_ON_ONCE(txq->entries[idx].free_buf))
1413 kfree_sensitive(txq->entries[idx].free_buf);
1414 txq->entries[idx].free_buf = dup_buf;
1415
1416 trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide);
1417
1418 /* start timer if queue currently empty */
1419 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
1420 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1421
1422 spin_lock(&trans_pcie->reg_lock);
1423 /* Increment and update queue's write index */
1424 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr);
1425 iwl_txq_inc_wr_ptr(trans, txq);
1426 spin_unlock(&trans_pcie->reg_lock);
1427
1428 out:
1429 spin_unlock_irqrestore(&txq->lock, flags);
1430 free_dup_buf:
1431 if (idx < 0)
1432 kfree(dup_buf);
1433 return idx;
1434 }
1435