1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /* Copyright (c) 2021, Microsoft Corporation. */
3
4 #include <linux/debugfs.h>
5 #include <linux/module.h>
6 #include <linux/pci.h>
7 #include <linux/sizes.h>
8 #include <linux/utsname.h>
9 #include <linux/version.h>
10 #include <linux/msi.h>
11 #include <linux/irqdomain.h>
12 #include <linux/export.h>
13
14 #include <net/mana/mana.h>
15 #include <net/mana/hw_channel.h>
16
17 struct dentry *mana_debugfs_root;
18
19 struct mana_dev_recovery {
20 struct list_head list;
21 struct pci_dev *pdev;
22 enum gdma_eqe_type type;
23 };
24
25 static struct mana_dev_recovery_work {
26 struct list_head dev_list;
27 struct delayed_work work;
28
29 /* Lock for dev_list above */
30 spinlock_t lock;
31 } mana_dev_recovery_work;
32
mana_gd_r32(struct gdma_context * g,u64 offset)33 static u32 mana_gd_r32(struct gdma_context *g, u64 offset)
34 {
35 return readl(g->bar0_va + offset);
36 }
37
mana_gd_r64(struct gdma_context * g,u64 offset)38 static u64 mana_gd_r64(struct gdma_context *g, u64 offset)
39 {
40 return readq(g->bar0_va + offset);
41 }
42
mana_gd_init_pf_regs(struct pci_dev * pdev)43 static int mana_gd_init_pf_regs(struct pci_dev *pdev)
44 {
45 struct gdma_context *gc = pci_get_drvdata(pdev);
46 void __iomem *sriov_base_va;
47 u64 sriov_base_off;
48
49 gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF;
50
51 /* mana_gd_ring_doorbell() accesses offsets up to DOORBELL_OFFSET_EQ
52 * (0xFF8) + 8 bytes = 4KB within each doorbell page, so the page
53 * size must be at least SZ_4K.
54 */
55 if (gc->db_page_size < SZ_4K) {
56 dev_err(gc->dev,
57 "Doorbell page size %llu too small (min %u)\n",
58 gc->db_page_size, SZ_4K);
59 return -EPROTO;
60 }
61
62 gc->db_page_off = mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF);
63
64 /* Validate doorbell offset is within BAR0 */
65 if (gc->db_page_off >= gc->bar0_size) {
66 dev_err(gc->dev,
67 "Doorbell offset 0x%llx exceeds BAR0 size 0x%llx\n",
68 gc->db_page_off, (u64)gc->bar0_size);
69 return -EPROTO;
70 }
71
72 gc->db_page_base = gc->bar0_va + gc->db_page_off;
73 gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off;
74
75 sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF);
76
77 sriov_base_va = gc->bar0_va + sriov_base_off;
78 gc->shm_base = sriov_base_va +
79 mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF);
80
81 return 0;
82 }
83
mana_gd_init_vf_regs(struct pci_dev * pdev)84 static int mana_gd_init_vf_regs(struct pci_dev *pdev)
85 {
86 struct gdma_context *gc = pci_get_drvdata(pdev);
87
88 gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF;
89
90 /* mana_gd_ring_doorbell() accesses offsets up to DOORBELL_OFFSET_EQ
91 * (0xFF8) + 8 bytes = 4KB within each doorbell page, so the page
92 * size must be at least SZ_4K.
93 */
94 if (gc->db_page_size < SZ_4K) {
95 dev_err(gc->dev,
96 "Doorbell page size %llu too small (min %u)\n",
97 gc->db_page_size, SZ_4K);
98 return -EPROTO;
99 }
100
101 gc->db_page_off = mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET);
102
103 /* Validate doorbell offset is within BAR0 */
104 if (gc->db_page_off >= gc->bar0_size) {
105 dev_err(gc->dev,
106 "Doorbell offset 0x%llx exceeds BAR0 size 0x%llx\n",
107 gc->db_page_off, (u64)gc->bar0_size);
108 return -EPROTO;
109 }
110
111 gc->db_page_base = gc->bar0_va + gc->db_page_off;
112 gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off;
113
114 gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET);
115
116 return 0;
117 }
118
mana_gd_init_registers(struct pci_dev * pdev)119 static int mana_gd_init_registers(struct pci_dev *pdev)
120 {
121 struct gdma_context *gc = pci_get_drvdata(pdev);
122
123 if (gc->is_pf)
124 return mana_gd_init_pf_regs(pdev);
125 else
126 return mana_gd_init_vf_regs(pdev);
127 }
128
129 /* Suppress logging when we set timeout to zero */
mana_need_log(struct gdma_context * gc,int err)130 bool mana_need_log(struct gdma_context *gc, int err)
131 {
132 struct hw_channel_context *hwc;
133
134 if (err != -ETIMEDOUT)
135 return true;
136
137 if (!gc)
138 return true;
139
140 hwc = gc->hwc.driver_data;
141 if (hwc && hwc->hwc_timeout == 0)
142 return false;
143
144 return true;
145 }
146
mana_gd_query_max_resources(struct pci_dev * pdev)147 static int mana_gd_query_max_resources(struct pci_dev *pdev)
148 {
149 struct gdma_context *gc = pci_get_drvdata(pdev);
150 struct gdma_query_max_resources_resp resp = {};
151 struct gdma_general_req req = {};
152 int err;
153
154 mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
155 sizeof(req), sizeof(resp));
156
157 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
158 if (err || resp.hdr.status) {
159 dev_err(gc->dev, "Failed to query resource info: %d, 0x%x\n",
160 err, resp.hdr.status);
161 return err ? err : -EPROTO;
162 }
163
164 if (!pci_msix_can_alloc_dyn(pdev)) {
165 if (gc->num_msix_usable > resp.max_msix)
166 gc->num_msix_usable = resp.max_msix;
167 } else {
168 /* If dynamic allocation is enabled we have already allocated
169 * hwc msi
170 */
171 gc->num_msix_usable = min(resp.max_msix, num_online_cpus() + 1);
172 }
173
174 if (gc->num_msix_usable <= 1)
175 return -ENOSPC;
176
177 gc->max_num_queues = num_online_cpus();
178 if (gc->max_num_queues > MANA_MAX_NUM_QUEUES)
179 gc->max_num_queues = MANA_MAX_NUM_QUEUES;
180
181 if (gc->max_num_queues > resp.max_eq)
182 gc->max_num_queues = resp.max_eq;
183
184 if (gc->max_num_queues > resp.max_cq)
185 gc->max_num_queues = resp.max_cq;
186
187 if (gc->max_num_queues > resp.max_sq)
188 gc->max_num_queues = resp.max_sq;
189
190 if (gc->max_num_queues > resp.max_rq)
191 gc->max_num_queues = resp.max_rq;
192
193 /* The Hardware Channel (HWC) used 1 MSI-X */
194 if (gc->max_num_queues > gc->num_msix_usable - 1)
195 gc->max_num_queues = gc->num_msix_usable - 1;
196
197 return 0;
198 }
199
mana_gd_query_hwc_timeout(struct pci_dev * pdev,u32 * timeout_val)200 static int mana_gd_query_hwc_timeout(struct pci_dev *pdev, u32 *timeout_val)
201 {
202 struct gdma_context *gc = pci_get_drvdata(pdev);
203 struct gdma_query_hwc_timeout_resp resp = {};
204 struct gdma_query_hwc_timeout_req req = {};
205 int err;
206
207 mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_HWC_TIMEOUT,
208 sizeof(req), sizeof(resp));
209 req.timeout_ms = *timeout_val;
210 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
211 if (err || resp.hdr.status)
212 return err ? err : -EPROTO;
213
214 *timeout_val = resp.timeout_ms;
215
216 return 0;
217 }
218
mana_gd_detect_devices(struct pci_dev * pdev)219 static int mana_gd_detect_devices(struct pci_dev *pdev)
220 {
221 struct gdma_context *gc = pci_get_drvdata(pdev);
222 struct gdma_list_devices_resp resp = {};
223 struct gdma_general_req req = {};
224 struct gdma_dev_id dev;
225 int found_dev = 0;
226 u16 dev_type;
227 int err;
228 u32 i;
229
230 mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req),
231 sizeof(resp));
232
233 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
234 if (err || resp.hdr.status) {
235 dev_err(gc->dev, "Failed to detect devices: %d, 0x%x\n", err,
236 resp.hdr.status);
237 return err ? err : -EPROTO;
238 }
239
240 for (i = 0; i < GDMA_DEV_LIST_SIZE &&
241 found_dev < resp.num_of_devs; i++) {
242 dev = resp.devs[i];
243 dev_type = dev.type;
244
245 /* Skip empty devices */
246 if (dev.as_uint32 == 0)
247 continue;
248
249 found_dev++;
250
251 /* HWC is already detected in mana_hwc_create_channel(). */
252 if (dev_type == GDMA_DEVICE_HWC)
253 continue;
254
255 if (dev_type == GDMA_DEVICE_MANA) {
256 gc->mana.gdma_context = gc;
257 gc->mana.dev_id = dev;
258 } else if (dev_type == GDMA_DEVICE_MANA_IB) {
259 gc->mana_ib.dev_id = dev;
260 gc->mana_ib.gdma_context = gc;
261 }
262 }
263
264 return gc->mana.dev_id.type == 0 ? -ENODEV : 0;
265 }
266
mana_gd_send_request(struct gdma_context * gc,u32 req_len,const void * req,u32 resp_len,void * resp)267 int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req,
268 u32 resp_len, void *resp)
269 {
270 struct hw_channel_context *hwc = gc->hwc.driver_data;
271
272 return mana_hwc_send_request(hwc, req_len, req, resp_len, resp);
273 }
274 EXPORT_SYMBOL_NS(mana_gd_send_request, "NET_MANA");
275
mana_gd_alloc_memory(struct gdma_context * gc,unsigned int length,struct gdma_mem_info * gmi)276 int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length,
277 struct gdma_mem_info *gmi)
278 {
279 dma_addr_t dma_handle;
280 void *buf;
281
282 if (length < MANA_PAGE_SIZE || !is_power_of_2(length))
283 return -EINVAL;
284
285 gmi->dev = gc->dev;
286 buf = dma_alloc_coherent(gmi->dev, length, &dma_handle, GFP_KERNEL);
287 if (!buf)
288 return -ENOMEM;
289
290 gmi->dma_handle = dma_handle;
291 gmi->virt_addr = buf;
292 gmi->length = length;
293
294 return 0;
295 }
296
mana_gd_free_memory(struct gdma_mem_info * gmi)297 void mana_gd_free_memory(struct gdma_mem_info *gmi)
298 {
299 dma_free_coherent(gmi->dev, gmi->length, gmi->virt_addr,
300 gmi->dma_handle);
301 }
302
mana_gd_create_hw_eq(struct gdma_context * gc,struct gdma_queue * queue)303 static int mana_gd_create_hw_eq(struct gdma_context *gc,
304 struct gdma_queue *queue)
305 {
306 struct gdma_create_queue_resp resp = {};
307 struct gdma_create_queue_req req = {};
308 int err;
309
310 if (queue->type != GDMA_EQ)
311 return -EINVAL;
312
313 mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_QUEUE,
314 sizeof(req), sizeof(resp));
315
316 req.hdr.dev_id = queue->gdma_dev->dev_id;
317 req.type = queue->type;
318 req.pdid = queue->gdma_dev->pdid;
319 req.doolbell_id = queue->gdma_dev->doorbell;
320 req.gdma_region = queue->mem_info.dma_region_handle;
321 req.queue_size = queue->queue_size;
322 req.log2_throttle_limit = queue->eq.log2_throttle_limit;
323 req.eq_pci_msix_index = queue->eq.msix_index;
324
325 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
326 if (err || resp.hdr.status) {
327 dev_err(gc->dev, "Failed to create queue: %d, 0x%x\n", err,
328 resp.hdr.status);
329 return err ? err : -EPROTO;
330 }
331
332 queue->id = resp.queue_index;
333 queue->eq.disable_needed = true;
334 queue->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
335 return 0;
336 }
337
mana_gd_disable_queue(struct gdma_queue * queue)338 static int mana_gd_disable_queue(struct gdma_queue *queue)
339 {
340 struct gdma_context *gc = queue->gdma_dev->gdma_context;
341 struct gdma_disable_queue_req req = {};
342 struct gdma_general_resp resp = {};
343 int err;
344
345 WARN_ON(queue->type != GDMA_EQ);
346
347 mana_gd_init_req_hdr(&req.hdr, GDMA_DISABLE_QUEUE,
348 sizeof(req), sizeof(resp));
349
350 req.hdr.dev_id = queue->gdma_dev->dev_id;
351 req.type = queue->type;
352 req.queue_index = queue->id;
353 req.alloc_res_id_on_creation = 1;
354
355 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
356 if (err || resp.hdr.status) {
357 if (mana_need_log(gc, err))
358 dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
359 resp.hdr.status);
360 return err ? err : -EPROTO;
361 }
362
363 return 0;
364 }
365
366 #define DOORBELL_OFFSET_SQ 0x0
367 #define DOORBELL_OFFSET_RQ 0x400
368 #define DOORBELL_OFFSET_CQ 0x800
369 #define DOORBELL_OFFSET_EQ 0xFF8
370
mana_gd_ring_doorbell(struct gdma_context * gc,u32 db_index,enum gdma_queue_type q_type,u32 qid,u32 tail_ptr,u8 num_req)371 static void mana_gd_ring_doorbell(struct gdma_context *gc, u32 db_index,
372 enum gdma_queue_type q_type, u32 qid,
373 u32 tail_ptr, u8 num_req)
374 {
375 void __iomem *addr = gc->db_page_base + gc->db_page_size * db_index;
376 union gdma_doorbell_entry e = {};
377
378 switch (q_type) {
379 case GDMA_EQ:
380 e.eq.id = qid;
381 e.eq.tail_ptr = tail_ptr;
382 e.eq.arm = num_req;
383
384 addr += DOORBELL_OFFSET_EQ;
385 break;
386
387 case GDMA_CQ:
388 e.cq.id = qid;
389 e.cq.tail_ptr = tail_ptr;
390 e.cq.arm = num_req;
391
392 addr += DOORBELL_OFFSET_CQ;
393 break;
394
395 case GDMA_RQ:
396 e.rq.id = qid;
397 e.rq.tail_ptr = tail_ptr;
398 e.rq.wqe_cnt = num_req;
399
400 addr += DOORBELL_OFFSET_RQ;
401 break;
402
403 case GDMA_SQ:
404 e.sq.id = qid;
405 e.sq.tail_ptr = tail_ptr;
406
407 addr += DOORBELL_OFFSET_SQ;
408 break;
409
410 default:
411 WARN_ON(1);
412 return;
413 }
414
415 /* Ensure all writes are done before ring doorbell */
416 wmb();
417
418 writeq(e.as_uint64, addr);
419 }
420
mana_gd_wq_ring_doorbell(struct gdma_context * gc,struct gdma_queue * queue)421 void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue)
422 {
423 /* Hardware Spec specifies that software client should set 0 for
424 * wqe_cnt for Receive Queues. This value is not used in Send Queues.
425 */
426 mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type,
427 queue->id, queue->head * GDMA_WQE_BU_SIZE, 0);
428 }
429 EXPORT_SYMBOL_NS(mana_gd_wq_ring_doorbell, "NET_MANA");
430
mana_gd_ring_cq(struct gdma_queue * cq,u8 arm_bit)431 void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
432 {
433 struct gdma_context *gc = cq->gdma_dev->gdma_context;
434
435 u32 num_cqe = cq->queue_size / GDMA_CQE_SIZE;
436
437 u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS);
438
439 mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id,
440 head, arm_bit);
441 }
442 EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
443
444 #define MANA_SERVICE_PERIOD 10
445
mana_serv_rescan(struct pci_dev * pdev)446 static void mana_serv_rescan(struct pci_dev *pdev)
447 {
448 struct pci_bus *parent;
449
450 pci_lock_rescan_remove();
451
452 parent = pdev->bus;
453 if (!parent) {
454 dev_err(&pdev->dev, "MANA service: no parent bus\n");
455 goto out;
456 }
457
458 pci_stop_and_remove_bus_device(pdev);
459 pci_rescan_bus(parent);
460
461 out:
462 pci_unlock_rescan_remove();
463 }
464
mana_serv_fpga(struct pci_dev * pdev)465 static void mana_serv_fpga(struct pci_dev *pdev)
466 {
467 struct pci_bus *bus, *parent;
468
469 pci_lock_rescan_remove();
470
471 bus = pdev->bus;
472 if (!bus) {
473 dev_err(&pdev->dev, "MANA service: no bus\n");
474 goto out;
475 }
476
477 parent = bus->parent;
478 if (!parent) {
479 dev_err(&pdev->dev, "MANA service: no parent bus\n");
480 goto out;
481 }
482
483 pci_stop_and_remove_bus_device(bus->self);
484
485 msleep(MANA_SERVICE_PERIOD * 1000);
486
487 pci_rescan_bus(parent);
488
489 out:
490 pci_unlock_rescan_remove();
491 }
492
mana_serv_reset(struct pci_dev * pdev)493 static void mana_serv_reset(struct pci_dev *pdev)
494 {
495 struct gdma_context *gc = pci_get_drvdata(pdev);
496 struct hw_channel_context *hwc;
497 int ret;
498
499 if (!gc) {
500 /* Perform PCI rescan on device if GC is not set up */
501 dev_err(&pdev->dev, "MANA service: GC not setup, rescanning\n");
502 mana_serv_rescan(pdev);
503 return;
504 }
505
506 hwc = gc->hwc.driver_data;
507 if (!hwc) {
508 dev_err(&pdev->dev, "MANA service: no HWC\n");
509 goto out;
510 }
511
512 /* HWC is not responding in this case, so don't wait */
513 hwc->hwc_timeout = 0;
514
515 dev_info(&pdev->dev, "MANA reset cycle start\n");
516
517 mana_gd_suspend(pdev, PMSG_SUSPEND);
518
519 msleep(MANA_SERVICE_PERIOD * 1000);
520
521 ret = mana_gd_resume(pdev);
522 if (ret == -ETIMEDOUT || ret == -EPROTO) {
523 /* Perform PCI rescan on device if we failed on HWC */
524 dev_err(&pdev->dev, "MANA service: resume failed, rescanning\n");
525 mana_serv_rescan(pdev);
526 return;
527 }
528
529 if (ret)
530 dev_info(&pdev->dev, "MANA reset cycle failed err %d\n", ret);
531 else
532 dev_info(&pdev->dev, "MANA reset cycle completed\n");
533
534 out:
535 clear_bit(GC_IN_SERVICE, &gc->flags);
536 }
537
mana_do_service(enum gdma_eqe_type type,struct pci_dev * pdev)538 static void mana_do_service(enum gdma_eqe_type type, struct pci_dev *pdev)
539 {
540 switch (type) {
541 case GDMA_EQE_HWC_FPGA_RECONFIG:
542 mana_serv_fpga(pdev);
543 break;
544
545 case GDMA_EQE_HWC_RESET_REQUEST:
546 mana_serv_reset(pdev);
547 break;
548
549 default:
550 dev_err(&pdev->dev, "MANA service: unknown type %d\n", type);
551 break;
552 }
553 }
554
mana_recovery_delayed_func(struct work_struct * w)555 static void mana_recovery_delayed_func(struct work_struct *w)
556 {
557 struct mana_dev_recovery_work *work;
558 struct mana_dev_recovery *dev;
559 unsigned long flags;
560
561 work = container_of(w, struct mana_dev_recovery_work, work.work);
562
563 spin_lock_irqsave(&work->lock, flags);
564
565 while (!list_empty(&work->dev_list)) {
566 dev = list_first_entry(&work->dev_list,
567 struct mana_dev_recovery, list);
568 list_del(&dev->list);
569 spin_unlock_irqrestore(&work->lock, flags);
570
571 mana_do_service(dev->type, dev->pdev);
572 pci_dev_put(dev->pdev);
573 kfree(dev);
574
575 spin_lock_irqsave(&work->lock, flags);
576 }
577
578 spin_unlock_irqrestore(&work->lock, flags);
579 }
580
mana_serv_func(struct work_struct * w)581 static void mana_serv_func(struct work_struct *w)
582 {
583 struct mana_serv_work *mns_wk;
584 struct pci_dev *pdev;
585
586 mns_wk = container_of(w, struct mana_serv_work, serv_work);
587 pdev = mns_wk->pdev;
588
589 if (pdev)
590 mana_do_service(mns_wk->type, pdev);
591
592 pci_dev_put(pdev);
593 kfree(mns_wk);
594 module_put(THIS_MODULE);
595 }
596
mana_schedule_serv_work(struct gdma_context * gc,enum gdma_eqe_type type)597 int mana_schedule_serv_work(struct gdma_context *gc, enum gdma_eqe_type type)
598 {
599 struct mana_serv_work *mns_wk;
600
601 if (test_and_set_bit(GC_IN_SERVICE, &gc->flags)) {
602 dev_info(gc->dev, "Already in service\n");
603 return -EBUSY;
604 }
605
606 if (!try_module_get(THIS_MODULE)) {
607 dev_info(gc->dev, "Module is unloading\n");
608 clear_bit(GC_IN_SERVICE, &gc->flags);
609 return -ENODEV;
610 }
611
612 mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
613 if (!mns_wk) {
614 module_put(THIS_MODULE);
615 clear_bit(GC_IN_SERVICE, &gc->flags);
616 return -ENOMEM;
617 }
618
619 dev_info(gc->dev, "Start MANA service type:%d\n", type);
620 mns_wk->pdev = to_pci_dev(gc->dev);
621 mns_wk->type = type;
622 pci_dev_get(mns_wk->pdev);
623 INIT_WORK(&mns_wk->serv_work, mana_serv_func);
624 schedule_work(&mns_wk->serv_work);
625 return 0;
626 }
627
mana_gd_process_eqe(struct gdma_queue * eq)628 static void mana_gd_process_eqe(struct gdma_queue *eq)
629 {
630 u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
631 struct gdma_context *gc = eq->gdma_dev->gdma_context;
632 struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
633 union gdma_eqe_info eqe_info;
634 enum gdma_eqe_type type;
635 struct gdma_event event;
636 struct gdma_queue *cq;
637 struct gdma_eqe *eqe;
638 u32 cq_id;
639
640 eqe = &eq_eqe_ptr[head];
641 eqe_info.as_uint32 = eqe->eqe_info;
642 type = eqe_info.type;
643
644 switch (type) {
645 case GDMA_EQE_COMPLETION:
646 cq_id = eqe->details[0] & 0xFFFFFF;
647 if (WARN_ON_ONCE(cq_id >= gc->max_num_cqs))
648 break;
649
650 cq = gc->cq_table[cq_id];
651 if (WARN_ON_ONCE(!cq || cq->type != GDMA_CQ || cq->id != cq_id))
652 break;
653
654 if (cq->cq.callback)
655 cq->cq.callback(cq->cq.context, cq);
656
657 break;
658
659 case GDMA_EQE_TEST_EVENT:
660 gc->test_event_eq_id = eq->id;
661 complete(&gc->eq_test_event);
662 break;
663
664 case GDMA_EQE_HWC_INIT_EQ_ID_DB:
665 case GDMA_EQE_HWC_INIT_DATA:
666 case GDMA_EQE_HWC_INIT_DONE:
667 case GDMA_EQE_HWC_SOC_SERVICE:
668 case GDMA_EQE_RNIC_QP_FATAL:
669 case GDMA_EQE_HWC_SOC_RECONFIG_DATA:
670 if (!eq->eq.callback)
671 break;
672
673 event.type = type;
674 memcpy(&event.details, &eqe->details, GDMA_EVENT_DATA_SIZE);
675 eq->eq.callback(eq->eq.context, eq, &event);
676 break;
677
678 case GDMA_EQE_HWC_FPGA_RECONFIG:
679 case GDMA_EQE_HWC_RESET_REQUEST:
680 dev_info(gc->dev, "Recv MANA service type:%d\n", type);
681
682 if (!test_and_set_bit(GC_PROBE_SUCCEEDED, &gc->flags)) {
683 /*
684 * Device is in probe and we received a hardware reset
685 * event, the probe function will detect that the flag
686 * has changed and perform service procedure.
687 */
688 dev_info(gc->dev,
689 "Service is to be processed in probe\n");
690 break;
691 }
692 mana_schedule_serv_work(gc, type);
693 break;
694
695 default:
696 break;
697 }
698 }
699
mana_gd_process_eq_events(void * arg)700 static void mana_gd_process_eq_events(void *arg)
701 {
702 u32 owner_bits, new_bits, old_bits;
703 union gdma_eqe_info eqe_info;
704 struct gdma_eqe *eq_eqe_ptr;
705 struct gdma_queue *eq = arg;
706 struct gdma_context *gc;
707 struct gdma_eqe *eqe;
708 u32 head, num_eqe;
709 int i;
710
711 gc = eq->gdma_dev->gdma_context;
712
713 num_eqe = eq->queue_size / GDMA_EQE_SIZE;
714 eq_eqe_ptr = eq->queue_mem_ptr;
715
716 /* Process up to 5 EQEs at a time, and update the HW head. */
717 for (i = 0; i < 5; i++) {
718 eqe = &eq_eqe_ptr[eq->head % num_eqe];
719 eqe_info.as_uint32 = eqe->eqe_info;
720 owner_bits = eqe_info.owner_bits;
721
722 old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK;
723 /* No more entries */
724 if (owner_bits == old_bits) {
725 /* return here without ringing the doorbell */
726 if (i == 0)
727 return;
728 break;
729 }
730
731 new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK;
732 if (owner_bits != new_bits) {
733 dev_err(gc->dev, "EQ %d: overflow detected\n", eq->id);
734 break;
735 }
736
737 /* Per GDMA spec, rmb is necessary after checking owner_bits, before
738 * reading eqe.
739 */
740 rmb();
741
742 mana_gd_process_eqe(eq);
743
744 eq->head++;
745 }
746
747 head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
748
749 mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id,
750 head, SET_ARM_BIT);
751 }
752
mana_gd_register_irq(struct gdma_queue * queue,const struct gdma_queue_spec * spec)753 static int mana_gd_register_irq(struct gdma_queue *queue,
754 const struct gdma_queue_spec *spec)
755 {
756 struct gdma_dev *gd = queue->gdma_dev;
757 struct gdma_irq_context *gic;
758 struct gdma_context *gc;
759 unsigned int msi_index;
760 unsigned long flags;
761 struct device *dev;
762 int err = 0;
763
764 gc = gd->gdma_context;
765 dev = gc->dev;
766 msi_index = spec->eq.msix_index;
767
768 if (msi_index >= gc->num_msix_usable) {
769 err = -ENOSPC;
770 dev_err(dev, "Register IRQ err:%d, msi:%u nMSI:%u",
771 err, msi_index, gc->num_msix_usable);
772
773 return err;
774 }
775
776 queue->eq.msix_index = msi_index;
777 gic = xa_load(&gc->irq_contexts, msi_index);
778 if (WARN_ON(!gic))
779 return -EINVAL;
780
781 spin_lock_irqsave(&gic->lock, flags);
782 list_add_rcu(&queue->entry, &gic->eq_list);
783 spin_unlock_irqrestore(&gic->lock, flags);
784
785 return 0;
786 }
787
mana_gd_deregister_irq(struct gdma_queue * queue)788 static void mana_gd_deregister_irq(struct gdma_queue *queue)
789 {
790 struct gdma_dev *gd = queue->gdma_dev;
791 struct gdma_irq_context *gic;
792 struct gdma_context *gc;
793 unsigned int msix_index;
794 unsigned long flags;
795 struct gdma_queue *eq;
796
797 gc = gd->gdma_context;
798
799 /* At most num_online_cpus() + 1 interrupts are used. */
800 msix_index = queue->eq.msix_index;
801 if (WARN_ON(msix_index >= gc->num_msix_usable))
802 return;
803
804 gic = xa_load(&gc->irq_contexts, msix_index);
805 if (WARN_ON(!gic))
806 return;
807
808 spin_lock_irqsave(&gic->lock, flags);
809 list_for_each_entry_rcu(eq, &gic->eq_list, entry) {
810 if (queue == eq) {
811 list_del_rcu(&eq->entry);
812 break;
813 }
814 }
815 spin_unlock_irqrestore(&gic->lock, flags);
816
817 queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
818 synchronize_rcu();
819 }
820
mana_gd_test_eq(struct gdma_context * gc,struct gdma_queue * eq)821 int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq)
822 {
823 struct gdma_generate_test_event_req req = {};
824 struct gdma_general_resp resp = {};
825 struct device *dev = gc->dev;
826 int err;
827
828 mutex_lock(&gc->eq_test_event_mutex);
829
830 init_completion(&gc->eq_test_event);
831 gc->test_event_eq_id = INVALID_QUEUE_ID;
832
833 mana_gd_init_req_hdr(&req.hdr, GDMA_GENERATE_TEST_EQE,
834 sizeof(req), sizeof(resp));
835
836 req.hdr.dev_id = eq->gdma_dev->dev_id;
837 req.queue_index = eq->id;
838
839 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
840 if (err) {
841 if (mana_need_log(gc, err))
842 dev_err(dev, "test_eq failed: %d\n", err);
843 goto out;
844 }
845
846 err = -EPROTO;
847
848 if (resp.hdr.status) {
849 dev_err(dev, "test_eq failed: 0x%x\n", resp.hdr.status);
850 goto out;
851 }
852
853 if (!wait_for_completion_timeout(&gc->eq_test_event, 30 * HZ)) {
854 dev_err(dev, "test_eq timed out on queue %d\n", eq->id);
855 goto out;
856 }
857
858 if (eq->id != gc->test_event_eq_id) {
859 dev_err(dev, "test_eq got an event on wrong queue %d (%d)\n",
860 gc->test_event_eq_id, eq->id);
861 goto out;
862 }
863
864 err = 0;
865 out:
866 mutex_unlock(&gc->eq_test_event_mutex);
867 return err;
868 }
869
mana_gd_destroy_eq(struct gdma_context * gc,bool flush_evenets,struct gdma_queue * queue)870 static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
871 struct gdma_queue *queue)
872 {
873 int err;
874
875 if (flush_evenets) {
876 err = mana_gd_test_eq(gc, queue);
877 if (err && mana_need_log(gc, err))
878 dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
879 }
880
881 mana_gd_deregister_irq(queue);
882
883 if (queue->eq.disable_needed)
884 mana_gd_disable_queue(queue);
885 }
886
mana_gd_create_eq(struct gdma_dev * gd,const struct gdma_queue_spec * spec,bool create_hwq,struct gdma_queue * queue)887 static int mana_gd_create_eq(struct gdma_dev *gd,
888 const struct gdma_queue_spec *spec,
889 bool create_hwq, struct gdma_queue *queue)
890 {
891 struct gdma_context *gc = gd->gdma_context;
892 struct device *dev = gc->dev;
893 u32 log2_num_entries;
894 int err;
895
896 queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
897 queue->id = INVALID_QUEUE_ID;
898
899 log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE);
900
901 if (spec->eq.log2_throttle_limit > log2_num_entries) {
902 dev_err(dev, "EQ throttling limit (%lu) > maximum EQE (%u)\n",
903 spec->eq.log2_throttle_limit, log2_num_entries);
904 return -EINVAL;
905 }
906
907 err = mana_gd_register_irq(queue, spec);
908 if (err) {
909 dev_err(dev, "Failed to register irq: %d\n", err);
910 return err;
911 }
912
913 queue->eq.callback = spec->eq.callback;
914 queue->eq.context = spec->eq.context;
915 queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries);
916 queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1;
917
918 if (create_hwq) {
919 err = mana_gd_create_hw_eq(gc, queue);
920 if (err)
921 goto out;
922
923 err = mana_gd_test_eq(gc, queue);
924 if (err)
925 goto out;
926 }
927
928 return 0;
929 out:
930 dev_err(dev, "Failed to create EQ: %d\n", err);
931 mana_gd_destroy_eq(gc, false, queue);
932 return err;
933 }
934
mana_gd_create_cq(const struct gdma_queue_spec * spec,struct gdma_queue * queue)935 static void mana_gd_create_cq(const struct gdma_queue_spec *spec,
936 struct gdma_queue *queue)
937 {
938 u32 log2_num_entries = ilog2(spec->queue_size / GDMA_CQE_SIZE);
939
940 queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries);
941 queue->cq.parent = spec->cq.parent_eq;
942 queue->cq.context = spec->cq.context;
943 queue->cq.callback = spec->cq.callback;
944 }
945
mana_gd_destroy_cq(struct gdma_context * gc,struct gdma_queue * queue)946 static void mana_gd_destroy_cq(struct gdma_context *gc,
947 struct gdma_queue *queue)
948 {
949 u32 id = queue->id;
950
951 if (id >= gc->max_num_cqs)
952 return;
953
954 if (!gc->cq_table[id])
955 return;
956
957 gc->cq_table[id] = NULL;
958 }
959
mana_gd_create_hwc_queue(struct gdma_dev * gd,const struct gdma_queue_spec * spec,struct gdma_queue ** queue_ptr)960 int mana_gd_create_hwc_queue(struct gdma_dev *gd,
961 const struct gdma_queue_spec *spec,
962 struct gdma_queue **queue_ptr)
963 {
964 struct gdma_context *gc = gd->gdma_context;
965 struct gdma_mem_info *gmi;
966 struct gdma_queue *queue;
967 int err;
968
969 queue = kzalloc_obj(*queue);
970 if (!queue)
971 return -ENOMEM;
972
973 gmi = &queue->mem_info;
974 err = mana_gd_alloc_memory(gc, spec->queue_size, gmi);
975 if (err) {
976 dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n",
977 spec->type, spec->queue_size, err);
978 goto free_q;
979 }
980
981 queue->head = 0;
982 queue->tail = 0;
983 queue->queue_mem_ptr = gmi->virt_addr;
984 queue->queue_size = spec->queue_size;
985 queue->monitor_avl_buf = spec->monitor_avl_buf;
986 queue->type = spec->type;
987 queue->gdma_dev = gd;
988
989 if (spec->type == GDMA_EQ)
990 err = mana_gd_create_eq(gd, spec, false, queue);
991 else if (spec->type == GDMA_CQ)
992 mana_gd_create_cq(spec, queue);
993
994 if (err)
995 goto out;
996
997 *queue_ptr = queue;
998 return 0;
999 out:
1000 dev_err(gc->dev, "Failed to create queue type %d of size %u, err: %d\n",
1001 spec->type, spec->queue_size, err);
1002 mana_gd_free_memory(gmi);
1003 free_q:
1004 kfree(queue);
1005 return err;
1006 }
1007
mana_gd_destroy_dma_region(struct gdma_context * gc,u64 dma_region_handle)1008 int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle)
1009 {
1010 struct gdma_destroy_dma_region_req req = {};
1011 struct gdma_general_resp resp = {};
1012 int err;
1013
1014 if (dma_region_handle == GDMA_INVALID_DMA_REGION)
1015 return 0;
1016
1017 mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DMA_REGION, sizeof(req),
1018 sizeof(resp));
1019 req.dma_region_handle = dma_region_handle;
1020
1021 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
1022 if (err || resp.hdr.status) {
1023 if (mana_need_log(gc, err))
1024 dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
1025 err, resp.hdr.status);
1026 return -EPROTO;
1027 }
1028
1029 return 0;
1030 }
1031 EXPORT_SYMBOL_NS(mana_gd_destroy_dma_region, "NET_MANA");
1032
mana_gd_create_dma_region(struct gdma_dev * gd,struct gdma_mem_info * gmi)1033 static int mana_gd_create_dma_region(struct gdma_dev *gd,
1034 struct gdma_mem_info *gmi)
1035 {
1036 unsigned int num_page = gmi->length / MANA_PAGE_SIZE;
1037 struct gdma_create_dma_region_req *req = NULL;
1038 struct gdma_create_dma_region_resp resp = {};
1039 struct gdma_context *gc = gd->gdma_context;
1040 struct hw_channel_context *hwc;
1041 u32 length = gmi->length;
1042 size_t req_msg_size;
1043 int err;
1044 int i;
1045
1046 if (length < MANA_PAGE_SIZE || !is_power_of_2(length))
1047 return -EINVAL;
1048
1049 if (!MANA_PAGE_ALIGNED(gmi->virt_addr))
1050 return -EINVAL;
1051
1052 hwc = gc->hwc.driver_data;
1053 req_msg_size = struct_size(req, page_addr_list, num_page);
1054 if (req_msg_size > hwc->max_req_msg_size)
1055 return -EINVAL;
1056
1057 req = kzalloc(req_msg_size, GFP_KERNEL);
1058 if (!req)
1059 return -ENOMEM;
1060
1061 mana_gd_init_req_hdr(&req->hdr, GDMA_CREATE_DMA_REGION,
1062 req_msg_size, sizeof(resp));
1063 req->length = length;
1064 req->offset_in_page = 0;
1065 req->gdma_page_type = GDMA_PAGE_TYPE_4K;
1066 req->page_count = num_page;
1067 req->page_addr_list_len = num_page;
1068
1069 for (i = 0; i < num_page; i++)
1070 req->page_addr_list[i] = gmi->dma_handle + i * MANA_PAGE_SIZE;
1071
1072 err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp);
1073 if (err)
1074 goto out;
1075
1076 if (resp.hdr.status ||
1077 resp.dma_region_handle == GDMA_INVALID_DMA_REGION) {
1078 dev_err(gc->dev, "Failed to create DMA region: 0x%x\n",
1079 resp.hdr.status);
1080 err = -EPROTO;
1081 goto out;
1082 }
1083
1084 gmi->dma_region_handle = resp.dma_region_handle;
1085 dev_dbg(gc->dev, "Created DMA region handle 0x%llx\n",
1086 gmi->dma_region_handle);
1087 out:
1088 if (err)
1089 dev_dbg(gc->dev,
1090 "Failed to create DMA region of length: %u, page_type: %d, status: 0x%x, err: %d\n",
1091 length, req->gdma_page_type, resp.hdr.status, err);
1092 kfree(req);
1093 return err;
1094 }
1095
mana_gd_create_mana_eq(struct gdma_dev * gd,const struct gdma_queue_spec * spec,struct gdma_queue ** queue_ptr)1096 int mana_gd_create_mana_eq(struct gdma_dev *gd,
1097 const struct gdma_queue_spec *spec,
1098 struct gdma_queue **queue_ptr)
1099 {
1100 struct gdma_context *gc = gd->gdma_context;
1101 struct gdma_mem_info *gmi;
1102 struct gdma_queue *queue;
1103 int err;
1104
1105 if (spec->type != GDMA_EQ)
1106 return -EINVAL;
1107
1108 queue = kzalloc_obj(*queue);
1109 if (!queue)
1110 return -ENOMEM;
1111
1112 gmi = &queue->mem_info;
1113 err = mana_gd_alloc_memory(gc, spec->queue_size, gmi);
1114 if (err) {
1115 dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n",
1116 spec->type, spec->queue_size, err);
1117 goto free_q;
1118 }
1119
1120 err = mana_gd_create_dma_region(gd, gmi);
1121 if (err)
1122 goto out;
1123
1124 queue->head = 0;
1125 queue->tail = 0;
1126 queue->queue_mem_ptr = gmi->virt_addr;
1127 queue->queue_size = spec->queue_size;
1128 queue->monitor_avl_buf = spec->monitor_avl_buf;
1129 queue->type = spec->type;
1130 queue->gdma_dev = gd;
1131
1132 err = mana_gd_create_eq(gd, spec, true, queue);
1133 if (err)
1134 goto out;
1135
1136 *queue_ptr = queue;
1137 return 0;
1138 out:
1139 dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n",
1140 spec->type, spec->queue_size, err);
1141 mana_gd_free_memory(gmi);
1142 free_q:
1143 kfree(queue);
1144 return err;
1145 }
1146 EXPORT_SYMBOL_NS(mana_gd_create_mana_eq, "NET_MANA");
1147
mana_gd_create_mana_wq_cq(struct gdma_dev * gd,const struct gdma_queue_spec * spec,struct gdma_queue ** queue_ptr)1148 int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
1149 const struct gdma_queue_spec *spec,
1150 struct gdma_queue **queue_ptr)
1151 {
1152 struct gdma_context *gc = gd->gdma_context;
1153 struct gdma_mem_info *gmi;
1154 struct gdma_queue *queue;
1155 int err;
1156
1157 if (spec->type != GDMA_CQ && spec->type != GDMA_SQ &&
1158 spec->type != GDMA_RQ)
1159 return -EINVAL;
1160
1161 queue = kzalloc_obj(*queue);
1162 if (!queue)
1163 return -ENOMEM;
1164
1165 gmi = &queue->mem_info;
1166 err = mana_gd_alloc_memory(gc, spec->queue_size, gmi);
1167 if (err) {
1168 dev_err(gc->dev, "GDMA queue type: %d, size: %u, memory allocation err: %d\n",
1169 spec->type, spec->queue_size, err);
1170 goto free_q;
1171 }
1172
1173 err = mana_gd_create_dma_region(gd, gmi);
1174 if (err)
1175 goto out;
1176
1177 queue->head = 0;
1178 queue->tail = 0;
1179 queue->queue_mem_ptr = gmi->virt_addr;
1180 queue->queue_size = spec->queue_size;
1181 queue->monitor_avl_buf = spec->monitor_avl_buf;
1182 queue->type = spec->type;
1183 queue->gdma_dev = gd;
1184
1185 if (spec->type == GDMA_CQ)
1186 mana_gd_create_cq(spec, queue);
1187
1188 *queue_ptr = queue;
1189 return 0;
1190 out:
1191 dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n",
1192 spec->type, spec->queue_size, err);
1193 mana_gd_free_memory(gmi);
1194 free_q:
1195 kfree(queue);
1196 return err;
1197 }
1198 EXPORT_SYMBOL_NS(mana_gd_create_mana_wq_cq, "NET_MANA");
1199
mana_gd_destroy_queue(struct gdma_context * gc,struct gdma_queue * queue)1200 void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue)
1201 {
1202 struct gdma_mem_info *gmi = &queue->mem_info;
1203
1204 switch (queue->type) {
1205 case GDMA_EQ:
1206 mana_gd_destroy_eq(gc, queue->eq.disable_needed, queue);
1207 break;
1208
1209 case GDMA_CQ:
1210 mana_gd_destroy_cq(gc, queue);
1211 break;
1212
1213 case GDMA_RQ:
1214 break;
1215
1216 case GDMA_SQ:
1217 break;
1218
1219 default:
1220 dev_err(gc->dev, "Can't destroy unknown queue: type=%d\n",
1221 queue->type);
1222 return;
1223 }
1224
1225 mana_gd_destroy_dma_region(gc, gmi->dma_region_handle);
1226 mana_gd_free_memory(gmi);
1227 kfree(queue);
1228 }
1229 EXPORT_SYMBOL_NS(mana_gd_destroy_queue, "NET_MANA");
1230
mana_gd_verify_vf_version(struct pci_dev * pdev)1231 int mana_gd_verify_vf_version(struct pci_dev *pdev)
1232 {
1233 struct gdma_context *gc = pci_get_drvdata(pdev);
1234 struct gdma_verify_ver_resp resp = {};
1235 struct gdma_verify_ver_req req = {};
1236 struct hw_channel_context *hwc;
1237 int err;
1238
1239 hwc = gc->hwc.driver_data;
1240 mana_gd_init_req_hdr(&req.hdr, GDMA_VERIFY_VF_DRIVER_VERSION,
1241 sizeof(req), sizeof(resp));
1242
1243 req.protocol_ver_min = GDMA_PROTOCOL_FIRST;
1244 req.protocol_ver_max = GDMA_PROTOCOL_LAST;
1245
1246 req.gd_drv_cap_flags1 = GDMA_DRV_CAP_FLAGS1;
1247 req.gd_drv_cap_flags2 = GDMA_DRV_CAP_FLAGS2;
1248 req.gd_drv_cap_flags3 = GDMA_DRV_CAP_FLAGS3;
1249 req.gd_drv_cap_flags4 = GDMA_DRV_CAP_FLAGS4;
1250
1251 req.drv_ver = 0; /* Unused*/
1252 req.os_type = 0x10; /* Linux */
1253 req.os_ver_major = LINUX_VERSION_MAJOR;
1254 req.os_ver_minor = LINUX_VERSION_PATCHLEVEL;
1255 req.os_ver_build = LINUX_VERSION_SUBLEVEL;
1256 strscpy(req.os_ver_str1, utsname()->sysname, sizeof(req.os_ver_str1));
1257 strscpy(req.os_ver_str2, utsname()->release, sizeof(req.os_ver_str2));
1258 strscpy(req.os_ver_str3, utsname()->version, sizeof(req.os_ver_str3));
1259
1260 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
1261 if (err || resp.hdr.status) {
1262 dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n",
1263 err, resp.hdr.status);
1264 return err ? err : -EPROTO;
1265 }
1266 gc->pf_cap_flags1 = resp.pf_cap_flags1;
1267 if (resp.pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) {
1268 err = mana_gd_query_hwc_timeout(pdev, &hwc->hwc_timeout);
1269 if (err) {
1270 dev_err(gc->dev, "Failed to set the hwc timeout %d\n", err);
1271 return err;
1272 }
1273 dev_dbg(gc->dev, "set the hwc timeout to %u\n", hwc->hwc_timeout);
1274 }
1275 return 0;
1276 }
1277
mana_gd_register_device(struct gdma_dev * gd)1278 int mana_gd_register_device(struct gdma_dev *gd)
1279 {
1280 struct gdma_context *gc = gd->gdma_context;
1281 struct gdma_register_device_resp resp = {};
1282 struct gdma_general_req req = {};
1283 int err;
1284
1285 gd->pdid = INVALID_PDID;
1286 gd->doorbell = INVALID_DOORBELL;
1287 gd->gpa_mkey = INVALID_MEM_KEY;
1288
1289 mana_gd_init_req_hdr(&req.hdr, GDMA_REGISTER_DEVICE, sizeof(req),
1290 sizeof(resp));
1291
1292 req.hdr.dev_id = gd->dev_id;
1293
1294 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
1295 if (err || resp.hdr.status) {
1296 dev_err(gc->dev, "gdma_register_device_resp failed: %d, 0x%x\n",
1297 err, resp.hdr.status);
1298 return err ? err : -EPROTO;
1299 }
1300
1301 /* Validate that doorbell page for db_id is within the BAR0 region.
1302 * In mana_gd_ring_doorbell(), the address is calculated as:
1303 * addr = db_page_base + db_page_size * db_id
1304 * = (bar0_va + db_page_off) + (db_page_size * db_id)
1305 * So we need: db_page_off + db_page_size * (db_id + 1) <= bar0_size
1306 */
1307 if (gc->db_page_off + gc->db_page_size * ((u64)resp.db_id + 1) > gc->bar0_size) {
1308 dev_err(gc->dev, "Doorbell ID %u out of range\n", resp.db_id);
1309 return -EPROTO;
1310 }
1311
1312 gd->pdid = resp.pdid;
1313 gd->gpa_mkey = resp.gpa_mkey;
1314 gd->doorbell = resp.db_id;
1315
1316 return 0;
1317 }
1318
mana_gd_deregister_device(struct gdma_dev * gd)1319 int mana_gd_deregister_device(struct gdma_dev *gd)
1320 {
1321 struct gdma_context *gc = gd->gdma_context;
1322 struct gdma_general_resp resp = {};
1323 struct gdma_general_req req = {};
1324 int err;
1325
1326 if (gd->pdid == INVALID_PDID)
1327 return -EINVAL;
1328
1329 mana_gd_init_req_hdr(&req.hdr, GDMA_DEREGISTER_DEVICE, sizeof(req),
1330 sizeof(resp));
1331
1332 req.hdr.dev_id = gd->dev_id;
1333
1334 err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
1335 if (err || resp.hdr.status) {
1336 if (mana_need_log(gc, err))
1337 dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
1338 err, resp.hdr.status);
1339 if (!err)
1340 err = -EPROTO;
1341 }
1342
1343 gd->pdid = INVALID_PDID;
1344 gd->doorbell = INVALID_DOORBELL;
1345 gd->gpa_mkey = INVALID_MEM_KEY;
1346
1347 return err;
1348 }
1349
mana_gd_wq_avail_space(struct gdma_queue * wq)1350 u32 mana_gd_wq_avail_space(struct gdma_queue *wq)
1351 {
1352 u32 used_space = (wq->head - wq->tail) * GDMA_WQE_BU_SIZE;
1353 u32 wq_size = wq->queue_size;
1354
1355 WARN_ON_ONCE(used_space > wq_size);
1356
1357 return wq_size - used_space;
1358 }
1359
mana_gd_get_wqe_ptr(const struct gdma_queue * wq,u32 wqe_offset)1360 u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset)
1361 {
1362 u32 offset = (wqe_offset * GDMA_WQE_BU_SIZE) & (wq->queue_size - 1);
1363
1364 WARN_ON_ONCE((offset + GDMA_WQE_BU_SIZE) > wq->queue_size);
1365
1366 return wq->queue_mem_ptr + offset;
1367 }
1368
mana_gd_write_client_oob(const struct gdma_wqe_request * wqe_req,enum gdma_queue_type q_type,u32 client_oob_size,u32 sgl_data_size,u8 * wqe_ptr)1369 static u32 mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req,
1370 enum gdma_queue_type q_type,
1371 u32 client_oob_size, u32 sgl_data_size,
1372 u8 *wqe_ptr)
1373 {
1374 bool oob_in_sgl = !!(wqe_req->flags & GDMA_WR_OOB_IN_SGL);
1375 bool pad_data = !!(wqe_req->flags & GDMA_WR_PAD_BY_SGE0);
1376 struct gdma_wqe *header = (struct gdma_wqe *)wqe_ptr;
1377 u8 *ptr;
1378
1379 memset(header, 0, sizeof(struct gdma_wqe));
1380 header->num_sge = wqe_req->num_sge;
1381 header->inline_oob_size_div4 = client_oob_size / sizeof(u32);
1382
1383 if (oob_in_sgl) {
1384 WARN_ON_ONCE(wqe_req->num_sge < 2);
1385
1386 header->client_oob_in_sgl = 1;
1387
1388 if (pad_data)
1389 header->last_vbytes = wqe_req->sgl[0].size;
1390 }
1391
1392 if (q_type == GDMA_SQ)
1393 header->client_data_unit = wqe_req->client_data_unit;
1394
1395 /* The size of gdma_wqe + client_oob_size must be less than or equal
1396 * to one Basic Unit (i.e. 32 bytes), so the pointer can't go beyond
1397 * the queue memory buffer boundary.
1398 */
1399 ptr = wqe_ptr + sizeof(header);
1400
1401 if (wqe_req->inline_oob_data && wqe_req->inline_oob_size > 0) {
1402 memcpy(ptr, wqe_req->inline_oob_data, wqe_req->inline_oob_size);
1403
1404 if (client_oob_size > wqe_req->inline_oob_size)
1405 memset(ptr + wqe_req->inline_oob_size, 0,
1406 client_oob_size - wqe_req->inline_oob_size);
1407 }
1408
1409 return sizeof(header) + client_oob_size;
1410 }
1411
mana_gd_write_sgl(struct gdma_queue * wq,u8 * wqe_ptr,const struct gdma_wqe_request * wqe_req)1412 static void mana_gd_write_sgl(struct gdma_queue *wq, u8 *wqe_ptr,
1413 const struct gdma_wqe_request *wqe_req)
1414 {
1415 u32 sgl_size = sizeof(struct gdma_sge) * wqe_req->num_sge;
1416 const u8 *address = (u8 *)wqe_req->sgl;
1417 u8 *base_ptr, *end_ptr;
1418 u32 size_to_end;
1419
1420 base_ptr = wq->queue_mem_ptr;
1421 end_ptr = base_ptr + wq->queue_size;
1422 size_to_end = (u32)(end_ptr - wqe_ptr);
1423
1424 if (size_to_end < sgl_size) {
1425 memcpy(wqe_ptr, address, size_to_end);
1426
1427 wqe_ptr = base_ptr;
1428 address += size_to_end;
1429 sgl_size -= size_to_end;
1430 }
1431
1432 memcpy(wqe_ptr, address, sgl_size);
1433 }
1434
mana_gd_post_work_request(struct gdma_queue * wq,const struct gdma_wqe_request * wqe_req,struct gdma_posted_wqe_info * wqe_info)1435 int mana_gd_post_work_request(struct gdma_queue *wq,
1436 const struct gdma_wqe_request *wqe_req,
1437 struct gdma_posted_wqe_info *wqe_info)
1438 {
1439 u32 client_oob_size = wqe_req->inline_oob_size;
1440 u32 sgl_data_size;
1441 u32 max_wqe_size;
1442 u32 wqe_size;
1443 u8 *wqe_ptr;
1444
1445 if (wqe_req->num_sge == 0)
1446 return -EINVAL;
1447
1448 if (wq->type == GDMA_RQ) {
1449 if (client_oob_size != 0)
1450 return -EINVAL;
1451
1452 client_oob_size = INLINE_OOB_SMALL_SIZE;
1453
1454 max_wqe_size = GDMA_MAX_RQE_SIZE;
1455 } else {
1456 if (client_oob_size != INLINE_OOB_SMALL_SIZE &&
1457 client_oob_size != INLINE_OOB_LARGE_SIZE)
1458 return -EINVAL;
1459
1460 max_wqe_size = GDMA_MAX_SQE_SIZE;
1461 }
1462
1463 sgl_data_size = sizeof(struct gdma_sge) * wqe_req->num_sge;
1464 wqe_size = ALIGN(sizeof(struct gdma_wqe) + client_oob_size +
1465 sgl_data_size, GDMA_WQE_BU_SIZE);
1466 if (wqe_size > max_wqe_size)
1467 return -EINVAL;
1468
1469 if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq))
1470 return -ENOSPC;
1471
1472 if (wqe_info)
1473 wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE;
1474
1475 wqe_ptr = mana_gd_get_wqe_ptr(wq, wq->head);
1476 wqe_ptr += mana_gd_write_client_oob(wqe_req, wq->type, client_oob_size,
1477 sgl_data_size, wqe_ptr);
1478 if (wqe_ptr >= (u8 *)wq->queue_mem_ptr + wq->queue_size)
1479 wqe_ptr -= wq->queue_size;
1480
1481 mana_gd_write_sgl(wq, wqe_ptr, wqe_req);
1482
1483 wq->head += wqe_size / GDMA_WQE_BU_SIZE;
1484
1485 return 0;
1486 }
1487 EXPORT_SYMBOL_NS(mana_gd_post_work_request, "NET_MANA");
1488
mana_gd_post_and_ring(struct gdma_queue * queue,const struct gdma_wqe_request * wqe_req,struct gdma_posted_wqe_info * wqe_info)1489 int mana_gd_post_and_ring(struct gdma_queue *queue,
1490 const struct gdma_wqe_request *wqe_req,
1491 struct gdma_posted_wqe_info *wqe_info)
1492 {
1493 struct gdma_context *gc = queue->gdma_dev->gdma_context;
1494 int err;
1495
1496 err = mana_gd_post_work_request(queue, wqe_req, wqe_info);
1497 if (err) {
1498 dev_err(gc->dev, "Failed to post work req from queue type %d of size %u (err=%d)\n",
1499 queue->type, queue->queue_size, err);
1500 return err;
1501 }
1502
1503 mana_gd_wq_ring_doorbell(gc, queue);
1504
1505 return 0;
1506 }
1507
mana_gd_read_cqe(struct gdma_queue * cq,struct gdma_comp * comp)1508 static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp)
1509 {
1510 unsigned int num_cqe = cq->queue_size / sizeof(struct gdma_cqe);
1511 struct gdma_cqe *cq_cqe = cq->queue_mem_ptr;
1512 u32 owner_bits, new_bits, old_bits;
1513 struct gdma_cqe *cqe;
1514
1515 cqe = &cq_cqe[cq->head % num_cqe];
1516 owner_bits = cqe->cqe_info.owner_bits;
1517
1518 old_bits = (cq->head / num_cqe - 1) & GDMA_CQE_OWNER_MASK;
1519 /* Return 0 if no more entries. */
1520 if (owner_bits == old_bits)
1521 return 0;
1522
1523 new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK;
1524 /* Return -1 if overflow detected. */
1525 if (WARN_ON_ONCE(owner_bits != new_bits))
1526 return -1;
1527
1528 /* Per GDMA spec, rmb is necessary after checking owner_bits, before
1529 * reading completion info
1530 */
1531 rmb();
1532
1533 comp->wq_num = cqe->cqe_info.wq_num;
1534 comp->is_sq = cqe->cqe_info.is_sq;
1535 memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE);
1536
1537 return 1;
1538 }
1539
mana_gd_poll_cq(struct gdma_queue * cq,struct gdma_comp * comp,int num_cqe)1540 int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe)
1541 {
1542 int cqe_idx;
1543 int ret;
1544
1545 for (cqe_idx = 0; cqe_idx < num_cqe; cqe_idx++) {
1546 ret = mana_gd_read_cqe(cq, &comp[cqe_idx]);
1547
1548 if (ret < 0) {
1549 cq->head -= cqe_idx;
1550 return ret;
1551 }
1552
1553 if (ret == 0)
1554 break;
1555
1556 cq->head++;
1557 }
1558
1559 return cqe_idx;
1560 }
1561 EXPORT_SYMBOL_NS(mana_gd_poll_cq, "NET_MANA");
1562
mana_gd_intr(int irq,void * arg)1563 static irqreturn_t mana_gd_intr(int irq, void *arg)
1564 {
1565 struct gdma_irq_context *gic = arg;
1566 struct list_head *eq_list = &gic->eq_list;
1567 struct gdma_queue *eq;
1568
1569 rcu_read_lock();
1570 list_for_each_entry_rcu(eq, eq_list, entry) {
1571 gic->handler(eq);
1572 }
1573 rcu_read_unlock();
1574
1575 return IRQ_HANDLED;
1576 }
1577
mana_gd_alloc_res_map(u32 res_avail,struct gdma_resource * r)1578 int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r)
1579 {
1580 r->map = bitmap_zalloc(res_avail, GFP_KERNEL);
1581 if (!r->map)
1582 return -ENOMEM;
1583
1584 r->size = res_avail;
1585 spin_lock_init(&r->lock);
1586
1587 return 0;
1588 }
1589
mana_gd_free_res_map(struct gdma_resource * r)1590 void mana_gd_free_res_map(struct gdma_resource *r)
1591 {
1592 bitmap_free(r->map);
1593 r->map = NULL;
1594 r->size = 0;
1595 }
1596
1597 /*
1598 * Spread on CPUs with the following heuristics:
1599 *
1600 * 1. No more than one IRQ per CPU, if possible;
1601 * 2. NUMA locality is the second priority;
1602 * 3. Sibling dislocality is the last priority.
1603 *
1604 * Let's consider this topology:
1605 *
1606 * Node 0 1
1607 * Core 0 1 2 3
1608 * CPU 0 1 2 3 4 5 6 7
1609 *
1610 * The most performant IRQ distribution based on the above topology
1611 * and heuristics may look like this:
1612 *
1613 * IRQ Nodes Cores CPUs
1614 * 0 1 0 0-1
1615 * 1 1 1 2-3
1616 * 2 1 0 0-1
1617 * 3 1 1 2-3
1618 * 4 2 2 4-5
1619 * 5 2 3 6-7
1620 * 6 2 2 4-5
1621 * 7 2 3 6-7
1622 *
1623 * The heuristics is implemented as follows.
1624 *
1625 * The outer for_each() loop resets the 'weight' to the actual number
1626 * of CPUs in the hop. Then inner for_each() loop decrements it by the
1627 * number of sibling groups (cores) while assigning first set of IRQs
1628 * to each group. IRQs 0 and 1 above are distributed this way.
1629 *
1630 * Now, because NUMA locality is more important, we should walk the
1631 * same set of siblings and assign 2nd set of IRQs (2 and 3), and it's
1632 * implemented by the medium while() loop. We do like this unless the
1633 * number of IRQs assigned on this hop will not become equal to number
1634 * of CPUs in the hop (weight == 0). Then we switch to the next hop and
1635 * do the same thing.
1636 */
1637
irq_setup(unsigned int * irqs,unsigned int len,int node,bool skip_first_cpu)1638 static int irq_setup(unsigned int *irqs, unsigned int len, int node,
1639 bool skip_first_cpu)
1640 {
1641 const struct cpumask *next, *prev = cpu_none_mask;
1642 cpumask_var_t cpus __free(free_cpumask_var);
1643 int cpu, weight;
1644
1645 if (!alloc_cpumask_var(&cpus, GFP_KERNEL))
1646 return -ENOMEM;
1647
1648 rcu_read_lock();
1649 for_each_numa_hop_mask(next, node) {
1650 weight = cpumask_weight_andnot(next, prev);
1651 while (weight > 0) {
1652 cpumask_andnot(cpus, next, prev);
1653 for_each_cpu(cpu, cpus) {
1654 cpumask_andnot(cpus, cpus, topology_sibling_cpumask(cpu));
1655 --weight;
1656
1657 if (unlikely(skip_first_cpu)) {
1658 skip_first_cpu = false;
1659 continue;
1660 }
1661
1662 if (len-- == 0)
1663 goto done;
1664
1665 irq_set_affinity_and_hint(*irqs++, topology_sibling_cpumask(cpu));
1666 }
1667 }
1668 prev = next;
1669 }
1670 done:
1671 rcu_read_unlock();
1672 return 0;
1673 }
1674
mana_gd_setup_dyn_irqs(struct pci_dev * pdev,int nvec)1675 static int mana_gd_setup_dyn_irqs(struct pci_dev *pdev, int nvec)
1676 {
1677 struct gdma_context *gc = pci_get_drvdata(pdev);
1678 struct gdma_irq_context *gic;
1679 bool skip_first_cpu = false;
1680 int *irqs, irq, err, i;
1681
1682 irqs = kmalloc_objs(int, nvec);
1683 if (!irqs)
1684 return -ENOMEM;
1685
1686 /*
1687 * While processing the next pci irq vector, we start with index 1,
1688 * as IRQ vector at index 0 is already processed for HWC.
1689 * However, the population of irqs array starts with index 0, to be
1690 * further used in irq_setup()
1691 */
1692 for (i = 1; i <= nvec; i++) {
1693 gic = kzalloc_obj(*gic);
1694 if (!gic) {
1695 err = -ENOMEM;
1696 goto free_irq;
1697 }
1698 gic->handler = mana_gd_process_eq_events;
1699 INIT_LIST_HEAD(&gic->eq_list);
1700 spin_lock_init(&gic->lock);
1701
1702 snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_q%d@pci:%s",
1703 i - 1, pci_name(pdev));
1704
1705 /* one pci vector is already allocated for HWC */
1706 irqs[i - 1] = pci_irq_vector(pdev, i);
1707 if (irqs[i - 1] < 0) {
1708 err = irqs[i - 1];
1709 goto free_current_gic;
1710 }
1711
1712 err = request_irq(irqs[i - 1], mana_gd_intr, 0, gic->name, gic);
1713 if (err)
1714 goto free_current_gic;
1715
1716 xa_store(&gc->irq_contexts, i, gic, GFP_KERNEL);
1717 }
1718
1719 /*
1720 * When calling irq_setup() for dynamically added IRQs, if number of
1721 * CPUs is more than or equal to allocated MSI-X, we need to skip the
1722 * first CPU sibling group since they are already affinitized to HWC IRQ
1723 */
1724 cpus_read_lock();
1725 if (gc->num_msix_usable <= num_online_cpus())
1726 skip_first_cpu = true;
1727
1728 err = irq_setup(irqs, nvec, gc->numa_node, skip_first_cpu);
1729 if (err) {
1730 cpus_read_unlock();
1731 goto free_irq;
1732 }
1733
1734 cpus_read_unlock();
1735 kfree(irqs);
1736 return 0;
1737
1738 free_current_gic:
1739 kfree(gic);
1740 free_irq:
1741 for (i -= 1; i > 0; i--) {
1742 irq = pci_irq_vector(pdev, i);
1743 gic = xa_load(&gc->irq_contexts, i);
1744 if (WARN_ON(!gic))
1745 continue;
1746
1747 irq_update_affinity_hint(irq, NULL);
1748 free_irq(irq, gic);
1749 xa_erase(&gc->irq_contexts, i);
1750 kfree(gic);
1751 }
1752 kfree(irqs);
1753 return err;
1754 }
1755
mana_gd_setup_irqs(struct pci_dev * pdev,int nvec)1756 static int mana_gd_setup_irqs(struct pci_dev *pdev, int nvec)
1757 {
1758 struct gdma_context *gc = pci_get_drvdata(pdev);
1759 struct gdma_irq_context *gic;
1760 int *irqs, *start_irqs, irq;
1761 unsigned int cpu;
1762 int err, i;
1763
1764 irqs = kmalloc_objs(int, nvec);
1765 if (!irqs)
1766 return -ENOMEM;
1767
1768 start_irqs = irqs;
1769
1770 for (i = 0; i < nvec; i++) {
1771 gic = kzalloc_obj(*gic);
1772 if (!gic) {
1773 err = -ENOMEM;
1774 goto free_irq;
1775 }
1776
1777 gic->handler = mana_gd_process_eq_events;
1778 INIT_LIST_HEAD(&gic->eq_list);
1779 spin_lock_init(&gic->lock);
1780
1781 if (!i)
1782 snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s",
1783 pci_name(pdev));
1784 else
1785 snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_q%d@pci:%s",
1786 i - 1, pci_name(pdev));
1787
1788 irqs[i] = pci_irq_vector(pdev, i);
1789 if (irqs[i] < 0) {
1790 err = irqs[i];
1791 goto free_current_gic;
1792 }
1793
1794 err = request_irq(irqs[i], mana_gd_intr, 0, gic->name, gic);
1795 if (err)
1796 goto free_current_gic;
1797
1798 xa_store(&gc->irq_contexts, i, gic, GFP_KERNEL);
1799 }
1800
1801 /* If number of IRQ is one extra than number of online CPUs,
1802 * then we need to assign IRQ0 (hwc irq) and IRQ1 to
1803 * same CPU.
1804 * Else we will use different CPUs for IRQ0 and IRQ1.
1805 * Also we are using cpumask_local_spread instead of
1806 * cpumask_first for the node, because the node can be
1807 * mem only.
1808 */
1809 cpus_read_lock();
1810 if (nvec > num_online_cpus()) {
1811 cpu = cpumask_local_spread(0, gc->numa_node);
1812 irq_set_affinity_and_hint(irqs[0], cpumask_of(cpu));
1813 irqs++;
1814 nvec -= 1;
1815 }
1816
1817 err = irq_setup(irqs, nvec, gc->numa_node, false);
1818 if (err) {
1819 cpus_read_unlock();
1820 goto free_irq;
1821 }
1822
1823 cpus_read_unlock();
1824 kfree(start_irqs);
1825 return 0;
1826
1827 free_current_gic:
1828 kfree(gic);
1829 free_irq:
1830 for (i -= 1; i >= 0; i--) {
1831 irq = pci_irq_vector(pdev, i);
1832 gic = xa_load(&gc->irq_contexts, i);
1833 if (WARN_ON(!gic))
1834 continue;
1835
1836 irq_update_affinity_hint(irq, NULL);
1837 free_irq(irq, gic);
1838 xa_erase(&gc->irq_contexts, i);
1839 kfree(gic);
1840 }
1841
1842 kfree(start_irqs);
1843 return err;
1844 }
1845
mana_gd_setup_hwc_irqs(struct pci_dev * pdev)1846 static int mana_gd_setup_hwc_irqs(struct pci_dev *pdev)
1847 {
1848 struct gdma_context *gc = pci_get_drvdata(pdev);
1849 unsigned int max_irqs, min_irqs;
1850 int nvec, err;
1851
1852 if (pci_msix_can_alloc_dyn(pdev)) {
1853 max_irqs = 1;
1854 min_irqs = 1;
1855 } else {
1856 /* Need 1 interrupt for HWC */
1857 max_irqs = min(num_online_cpus(), MANA_MAX_NUM_QUEUES) + 1;
1858 min_irqs = 2;
1859 }
1860
1861 nvec = pci_alloc_irq_vectors(pdev, min_irqs, max_irqs, PCI_IRQ_MSIX);
1862 if (nvec < 0)
1863 return nvec;
1864
1865 err = mana_gd_setup_irqs(pdev, nvec);
1866 if (err) {
1867 pci_free_irq_vectors(pdev);
1868 return err;
1869 }
1870
1871 gc->num_msix_usable = nvec;
1872 gc->max_num_msix = nvec;
1873
1874 return 0;
1875 }
1876
mana_gd_setup_remaining_irqs(struct pci_dev * pdev)1877 static int mana_gd_setup_remaining_irqs(struct pci_dev *pdev)
1878 {
1879 struct gdma_context *gc = pci_get_drvdata(pdev);
1880 struct msi_map irq_map;
1881 int max_irqs, i, err;
1882
1883 if (!pci_msix_can_alloc_dyn(pdev))
1884 /* remain irqs are already allocated with HWC IRQ */
1885 return 0;
1886
1887 /* allocate only remaining IRQs*/
1888 max_irqs = gc->num_msix_usable - 1;
1889
1890 for (i = 1; i <= max_irqs; i++) {
1891 irq_map = pci_msix_alloc_irq_at(pdev, i, NULL);
1892 if (!irq_map.virq) {
1893 err = irq_map.index;
1894 /* caller will handle cleaning up all allocated
1895 * irqs, after HWC is destroyed
1896 */
1897 return err;
1898 }
1899 }
1900
1901 err = mana_gd_setup_dyn_irqs(pdev, max_irqs);
1902 if (err)
1903 return err;
1904
1905 gc->max_num_msix = gc->max_num_msix + max_irqs;
1906
1907 return 0;
1908 }
1909
mana_gd_remove_irqs(struct pci_dev * pdev)1910 static void mana_gd_remove_irqs(struct pci_dev *pdev)
1911 {
1912 struct gdma_context *gc = pci_get_drvdata(pdev);
1913 struct gdma_irq_context *gic;
1914 int irq, i;
1915
1916 if (gc->max_num_msix < 1)
1917 return;
1918
1919 for (i = 0; i < gc->max_num_msix; i++) {
1920 irq = pci_irq_vector(pdev, i);
1921 if (irq < 0)
1922 continue;
1923
1924 gic = xa_load(&gc->irq_contexts, i);
1925 if (WARN_ON(!gic))
1926 continue;
1927
1928 /* Need to clear the hint before free_irq */
1929 irq_update_affinity_hint(irq, NULL);
1930 free_irq(irq, gic);
1931 xa_erase(&gc->irq_contexts, i);
1932 kfree(gic);
1933 }
1934
1935 pci_free_irq_vectors(pdev);
1936
1937 gc->max_num_msix = 0;
1938 gc->num_msix_usable = 0;
1939 }
1940
mana_gd_setup(struct pci_dev * pdev)1941 static int mana_gd_setup(struct pci_dev *pdev)
1942 {
1943 struct gdma_context *gc = pci_get_drvdata(pdev);
1944 int err;
1945
1946 err = mana_gd_init_registers(pdev);
1947 if (err)
1948 return err;
1949
1950 mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
1951
1952 gc->service_wq = alloc_ordered_workqueue("gdma_service_wq", 0);
1953 if (!gc->service_wq)
1954 return -ENOMEM;
1955
1956 err = mana_gd_setup_hwc_irqs(pdev);
1957 if (err) {
1958 dev_err(gc->dev, "Failed to setup IRQs for HWC creation: %d\n",
1959 err);
1960 goto free_workqueue;
1961 }
1962
1963 err = mana_hwc_create_channel(gc);
1964 if (err)
1965 goto remove_irq;
1966
1967 err = mana_gd_verify_vf_version(pdev);
1968 if (err)
1969 goto destroy_hwc;
1970
1971 err = mana_gd_query_max_resources(pdev);
1972 if (err)
1973 goto destroy_hwc;
1974
1975 err = mana_gd_setup_remaining_irqs(pdev);
1976 if (err) {
1977 dev_err(gc->dev, "Failed to setup remaining IRQs: %d", err);
1978 goto destroy_hwc;
1979 }
1980
1981 err = mana_gd_detect_devices(pdev);
1982 if (err)
1983 goto destroy_hwc;
1984
1985 dev_dbg(&pdev->dev, "mana gdma setup successful\n");
1986 return 0;
1987
1988 destroy_hwc:
1989 mana_hwc_destroy_channel(gc);
1990 remove_irq:
1991 mana_gd_remove_irqs(pdev);
1992 free_workqueue:
1993 destroy_workqueue(gc->service_wq);
1994 gc->service_wq = NULL;
1995 dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err);
1996 return err;
1997 }
1998
mana_gd_cleanup(struct pci_dev * pdev)1999 static void mana_gd_cleanup(struct pci_dev *pdev)
2000 {
2001 struct gdma_context *gc = pci_get_drvdata(pdev);
2002
2003 mana_hwc_destroy_channel(gc);
2004
2005 mana_gd_remove_irqs(pdev);
2006
2007 if (gc->service_wq) {
2008 destroy_workqueue(gc->service_wq);
2009 gc->service_wq = NULL;
2010 }
2011 dev_dbg(&pdev->dev, "mana gdma cleanup successful\n");
2012 }
2013
mana_is_pf(unsigned short dev_id)2014 static bool mana_is_pf(unsigned short dev_id)
2015 {
2016 return dev_id == MANA_PF_DEVICE_ID;
2017 }
2018
mana_gd_probe(struct pci_dev * pdev,const struct pci_device_id * ent)2019 static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2020 {
2021 struct gdma_context *gc;
2022 void __iomem *bar0_va;
2023 int bar = 0;
2024 int err;
2025
2026 /* Each port has 2 CQs, each CQ has at most 1 EQE at a time */
2027 BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE);
2028
2029 err = pci_enable_device(pdev);
2030 if (err) {
2031 dev_err(&pdev->dev, "Failed to enable pci device (err=%d)\n", err);
2032 return -ENXIO;
2033 }
2034
2035 pci_set_master(pdev);
2036
2037 err = pci_request_regions(pdev, "mana");
2038 if (err)
2039 goto disable_dev;
2040
2041 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2042 if (err) {
2043 dev_err(&pdev->dev, "DMA set mask failed: %d\n", err);
2044 goto release_region;
2045 }
2046 dma_set_max_seg_size(&pdev->dev, UINT_MAX);
2047
2048 err = -ENOMEM;
2049 gc = vzalloc(sizeof(*gc));
2050 if (!gc)
2051 goto release_region;
2052
2053 mutex_init(&gc->eq_test_event_mutex);
2054 pci_set_drvdata(pdev, gc);
2055 gc->bar0_pa = pci_resource_start(pdev, 0);
2056 gc->bar0_size = pci_resource_len(pdev, 0);
2057
2058 bar0_va = pci_iomap(pdev, bar, 0);
2059 if (!bar0_va)
2060 goto free_gc;
2061
2062 gc->numa_node = dev_to_node(&pdev->dev);
2063 gc->is_pf = mana_is_pf(pdev->device);
2064 gc->bar0_va = bar0_va;
2065 gc->dev = &pdev->dev;
2066 xa_init(&gc->irq_contexts);
2067
2068 gc->mana_pci_debugfs = debugfs_create_dir(pci_name(pdev),
2069 mana_debugfs_root);
2070
2071 err = mana_gd_setup(pdev);
2072 if (err)
2073 goto unmap_bar;
2074
2075 err = mana_probe(&gc->mana, false);
2076 if (err)
2077 goto cleanup_gd;
2078
2079 err = mana_rdma_probe(&gc->mana_ib);
2080 if (err)
2081 goto cleanup_mana;
2082
2083 /*
2084 * If a hardware reset event has occurred over HWC during probe,
2085 * rollback and perform hardware reset procedure.
2086 */
2087 if (test_and_set_bit(GC_PROBE_SUCCEEDED, &gc->flags)) {
2088 err = -EPROTO;
2089 goto cleanup_mana_rdma;
2090 }
2091
2092 return 0;
2093
2094 cleanup_mana_rdma:
2095 mana_rdma_remove(&gc->mana_ib);
2096 cleanup_mana:
2097 mana_remove(&gc->mana, false);
2098 cleanup_gd:
2099 mana_gd_cleanup(pdev);
2100 unmap_bar:
2101 /*
2102 * at this point we know that the other debugfs child dir/files
2103 * are either not yet created or are already cleaned up.
2104 * The pci debugfs folder clean-up now, will only be cleaning up
2105 * adapter-MTU file and apc->mana_pci_debugfs folder.
2106 */
2107 debugfs_remove_recursive(gc->mana_pci_debugfs);
2108 gc->mana_pci_debugfs = NULL;
2109 xa_destroy(&gc->irq_contexts);
2110 pci_iounmap(pdev, bar0_va);
2111 free_gc:
2112 pci_set_drvdata(pdev, NULL);
2113 vfree(gc);
2114 release_region:
2115 pci_release_regions(pdev);
2116 disable_dev:
2117 pci_disable_device(pdev);
2118 dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err);
2119
2120 /*
2121 * Hardware could be in recovery mode and the HWC returns TIMEDOUT or
2122 * EPROTO from mana_gd_setup(), mana_probe() or mana_rdma_probe(), or
2123 * we received a hardware reset event over HWC interrupt. In this case,
2124 * perform the device recovery procedure after MANA_SERVICE_PERIOD
2125 * seconds.
2126 */
2127 if (err == -ETIMEDOUT || err == -EPROTO) {
2128 struct mana_dev_recovery *dev;
2129 unsigned long flags;
2130
2131 dev_info(&pdev->dev, "Start MANA recovery mode\n");
2132
2133 dev = kzalloc_obj(*dev);
2134 if (!dev)
2135 return err;
2136
2137 dev->pdev = pci_dev_get(pdev);
2138 dev->type = GDMA_EQE_HWC_RESET_REQUEST;
2139
2140 spin_lock_irqsave(&mana_dev_recovery_work.lock, flags);
2141 list_add_tail(&dev->list, &mana_dev_recovery_work.dev_list);
2142 spin_unlock_irqrestore(&mana_dev_recovery_work.lock, flags);
2143
2144 schedule_delayed_work(&mana_dev_recovery_work.work,
2145 secs_to_jiffies(MANA_SERVICE_PERIOD));
2146 }
2147
2148 return err;
2149 }
2150
mana_gd_remove(struct pci_dev * pdev)2151 static void mana_gd_remove(struct pci_dev *pdev)
2152 {
2153 struct gdma_context *gc = pci_get_drvdata(pdev);
2154
2155 mana_rdma_remove(&gc->mana_ib);
2156 mana_remove(&gc->mana, false);
2157
2158 mana_gd_cleanup(pdev);
2159
2160 debugfs_remove_recursive(gc->mana_pci_debugfs);
2161
2162 gc->mana_pci_debugfs = NULL;
2163
2164 xa_destroy(&gc->irq_contexts);
2165
2166 pci_iounmap(pdev, gc->bar0_va);
2167
2168 vfree(gc);
2169
2170 pci_release_regions(pdev);
2171 pci_disable_device(pdev);
2172
2173 dev_dbg(&pdev->dev, "mana gdma remove successful\n");
2174 }
2175
2176 /* The 'state' parameter is not used. */
mana_gd_suspend(struct pci_dev * pdev,pm_message_t state)2177 int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
2178 {
2179 struct gdma_context *gc = pci_get_drvdata(pdev);
2180
2181 mana_rdma_remove(&gc->mana_ib);
2182 mana_remove(&gc->mana, true);
2183
2184 mana_gd_cleanup(pdev);
2185
2186 return 0;
2187 }
2188
2189 /* In case the NIC hardware stops working, the suspend and resume callbacks will
2190 * fail -- if this happens, it's safer to just report an error than try to undo
2191 * what has been done.
2192 */
mana_gd_resume(struct pci_dev * pdev)2193 int mana_gd_resume(struct pci_dev *pdev)
2194 {
2195 struct gdma_context *gc = pci_get_drvdata(pdev);
2196 int err;
2197
2198 err = mana_gd_setup(pdev);
2199 if (err)
2200 return err;
2201
2202 err = mana_probe(&gc->mana, true);
2203 if (err)
2204 return err;
2205
2206 err = mana_rdma_probe(&gc->mana_ib);
2207 if (err)
2208 return err;
2209
2210 return 0;
2211 }
2212
2213 /* Quiesce the device for kexec. This is also called upon reboot/shutdown. */
mana_gd_shutdown(struct pci_dev * pdev)2214 static void mana_gd_shutdown(struct pci_dev *pdev)
2215 {
2216 struct gdma_context *gc = pci_get_drvdata(pdev);
2217
2218 dev_info(&pdev->dev, "Shutdown was called\n");
2219
2220 mana_rdma_remove(&gc->mana_ib);
2221 mana_remove(&gc->mana, true);
2222
2223 mana_gd_cleanup(pdev);
2224
2225 debugfs_remove_recursive(gc->mana_pci_debugfs);
2226
2227 gc->mana_pci_debugfs = NULL;
2228
2229 pci_disable_device(pdev);
2230 }
2231
2232 static const struct pci_device_id mana_id_table[] = {
2233 { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_PF_DEVICE_ID) },
2234 { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_VF_DEVICE_ID) },
2235 { }
2236 };
2237
2238 static struct pci_driver mana_driver = {
2239 .name = "mana",
2240 .id_table = mana_id_table,
2241 .probe = mana_gd_probe,
2242 .remove = mana_gd_remove,
2243 .suspend = mana_gd_suspend,
2244 .resume = mana_gd_resume,
2245 .shutdown = mana_gd_shutdown,
2246 };
2247
mana_driver_init(void)2248 static int __init mana_driver_init(void)
2249 {
2250 int err;
2251
2252 INIT_LIST_HEAD(&mana_dev_recovery_work.dev_list);
2253 spin_lock_init(&mana_dev_recovery_work.lock);
2254 INIT_DELAYED_WORK(&mana_dev_recovery_work.work, mana_recovery_delayed_func);
2255
2256 mana_debugfs_root = debugfs_create_dir("mana", NULL);
2257
2258 err = pci_register_driver(&mana_driver);
2259 if (err) {
2260 debugfs_remove(mana_debugfs_root);
2261 mana_debugfs_root = NULL;
2262 }
2263
2264 return err;
2265 }
2266
mana_driver_exit(void)2267 static void __exit mana_driver_exit(void)
2268 {
2269 struct mana_dev_recovery *dev;
2270 unsigned long flags;
2271
2272 disable_delayed_work_sync(&mana_dev_recovery_work.work);
2273
2274 spin_lock_irqsave(&mana_dev_recovery_work.lock, flags);
2275 while (!list_empty(&mana_dev_recovery_work.dev_list)) {
2276 dev = list_first_entry(&mana_dev_recovery_work.dev_list,
2277 struct mana_dev_recovery, list);
2278 list_del(&dev->list);
2279 pci_dev_put(dev->pdev);
2280 kfree(dev);
2281 }
2282 spin_unlock_irqrestore(&mana_dev_recovery_work.lock, flags);
2283
2284 pci_unregister_driver(&mana_driver);
2285
2286 debugfs_remove(mana_debugfs_root);
2287
2288 mana_debugfs_root = NULL;
2289 }
2290
2291 module_init(mana_driver_init);
2292 module_exit(mana_driver_exit);
2293
2294 MODULE_DEVICE_TABLE(pci, mana_id_table);
2295
2296 MODULE_LICENSE("Dual BSD/GPL");
2297 MODULE_DESCRIPTION("Microsoft Azure Network Adapter driver");
2298