1 /*
2 * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 #include <config.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <unistd.h>
37 #include <errno.h>
38 #include <sys/mman.h>
39 #include <pthread.h>
40 #include <string.h>
41 #include <signal.h>
42 #include <stdbool.h>
43
44 #include "libcxgb4.h"
45 #include "cxgb4-abi.h"
46
47 #define PCI_VENDOR_ID_CHELSIO 0x1425
48
49 /*
50 * Macros needed to support the PCI Device ID Table ...
51 */
52 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
53 static struct { \
54 unsigned vendor; \
55 unsigned device; \
56 } hca_table[] = {
57
58 #define CH_PCI_DEVICE_ID_FUNCTION \
59 0x4
60
61 #define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \
62 { \
63 .vendor = PCI_VENDOR_ID_CHELSIO, \
64 .device = (__DeviceID), \
65 }
66
67 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
68 }
69
70 #include "t4_chip_type.h"
71 #include "t4_pci_id_tbl.h"
72
73 unsigned long c4iw_page_size;
74 unsigned long c4iw_page_shift;
75 unsigned long c4iw_page_mask;
76 int ma_wr;
77 int t5_en_wc = 1;
78
79 static TAILQ_HEAD(,c4iw_dev) devices = TAILQ_HEAD_INITIALIZER(devices);
80
81 static struct ibv_context_ops c4iw_ctx_ops = {
82 .query_device = c4iw_query_device,
83 .query_port = c4iw_query_port,
84 .alloc_pd = c4iw_alloc_pd,
85 .dealloc_pd = c4iw_free_pd,
86 .reg_mr = c4iw_reg_mr,
87 .dereg_mr = c4iw_dereg_mr,
88 .create_cq = c4iw_create_cq,
89 .resize_cq = c4iw_resize_cq,
90 .destroy_cq = c4iw_destroy_cq,
91 .create_srq = c4iw_create_srq,
92 .modify_srq = c4iw_modify_srq,
93 .destroy_srq = c4iw_destroy_srq,
94 .create_qp = c4iw_create_qp,
95 .modify_qp = c4iw_modify_qp,
96 .destroy_qp = c4iw_destroy_qp,
97 .query_qp = c4iw_query_qp,
98 .create_ah = c4iw_create_ah,
99 .destroy_ah = c4iw_destroy_ah,
100 .attach_mcast = c4iw_attach_mcast,
101 .detach_mcast = c4iw_detach_mcast,
102 .post_srq_recv = c4iw_post_srq_recv,
103 .req_notify_cq = c4iw_arm_cq,
104 };
105
c4iw_alloc_context(struct ibv_device * ibdev,int cmd_fd)106 static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev,
107 int cmd_fd)
108 {
109 struct c4iw_context *context;
110 struct ibv_get_context cmd;
111 struct c4iw_alloc_ucontext_resp resp;
112 struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
113 struct ibv_query_device qcmd;
114 uint64_t raw_fw_ver;
115 struct ibv_device_attr attr;
116
117 context = malloc(sizeof *context);
118 if (!context)
119 return NULL;
120
121 memset(context, 0, sizeof *context);
122 context->ibv_ctx.cmd_fd = cmd_fd;
123
124 resp.status_page_size = 0;
125 resp.reserved = 0;
126 if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
127 &resp.ibv_resp, sizeof resp))
128 goto err_free;
129
130 if (resp.reserved)
131 PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n",
132 __FUNCTION__);
133
134 context->status_page_size = resp.status_page_size;
135 if (resp.status_page_size) {
136 context->status_page = mmap(NULL, resp.status_page_size,
137 PROT_READ, MAP_SHARED, cmd_fd,
138 resp.status_page_key);
139 if (context->status_page == MAP_FAILED)
140 goto err_free;
141 }
142
143 context->ibv_ctx.device = ibdev;
144 context->ibv_ctx.ops = c4iw_ctx_ops;
145
146 switch (rhp->chip_version) {
147 case CHELSIO_T7:
148 PDBG("%s T7/T6/T5/T4 device\n", __FUNCTION__);
149 case CHELSIO_T6:
150 PDBG("%s T6/T5/T4 device\n", __FUNCTION__);
151 case CHELSIO_T5:
152 PDBG("%s T5/T4 device\n", __FUNCTION__);
153 case CHELSIO_T4:
154 PDBG("%s T4 device\n", __FUNCTION__);
155 context->ibv_ctx.ops.async_event = c4iw_async_event;
156 context->ibv_ctx.ops.post_send = c4iw_post_send;
157 context->ibv_ctx.ops.post_recv = c4iw_post_receive;
158 context->ibv_ctx.ops.poll_cq = c4iw_poll_cq;
159 context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq;
160 break;
161 default:
162 PDBG("%s unknown hca type %d\n", __FUNCTION__,
163 rhp->chip_version);
164 goto err_unmap;
165 break;
166 }
167
168 if (!rhp->mmid2ptr) {
169 int ret;
170
171 ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd,
172 sizeof qcmd);
173 if (ret)
174 goto err_unmap;
175 rhp->max_mr = attr.max_mr;
176 rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *));
177 if (!rhp->mmid2ptr) {
178 goto err_unmap;
179 }
180 if (rhp->abi_version < 3) {
181 fprintf(stderr, "Warning: iw_cxgb4 driver is of older version"
182 " than libcxgb4:: %d\n", rhp->abi_version);
183 rhp->max_qp = T4_QID_BASE + attr.max_qp;
184 } else {
185 rhp->max_qp = context->status_page->qp_start +
186 context->status_page->qp_size;
187 }
188 rhp->qpid2ptr = calloc(rhp->max_qp, sizeof(void *));
189 if (!rhp->qpid2ptr) {
190 goto err_unmap;
191 }
192 if (rhp->abi_version < 3)
193 rhp->max_cq = T4_QID_BASE + attr.max_cq;
194 else
195 rhp->max_cq = context->status_page->cq_start +
196 context->status_page->cq_size;
197 rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *));
198 if (!rhp->cqid2ptr)
199 goto err_unmap;
200
201 /* Disable userspace WC if architecture/adapter does not
202 * support WC.
203 * Note: To forcefully disable WC in kernel driver use the
204 * loader tunable "hw.cxl.write_combine=0"
205 */
206 if (t5_en_wc && !context->status_page->wc_supported) {
207 t5_en_wc = 0;
208 }
209 }
210
211 return &context->ibv_ctx;
212
213 err_unmap:
214 munmap(context->status_page, context->status_page_size);
215 err_free:
216 if (rhp->cqid2ptr)
217 free(rhp->cqid2ptr);
218 if (rhp->qpid2ptr)
219 free(rhp->cqid2ptr);
220 if (rhp->mmid2ptr)
221 free(rhp->cqid2ptr);
222 free(context);
223 return NULL;
224 }
225
c4iw_free_context(struct ibv_context * ibctx)226 static void c4iw_free_context(struct ibv_context *ibctx)
227 {
228 struct c4iw_context *context = to_c4iw_context(ibctx);
229
230 if (context->status_page_size)
231 munmap(context->status_page, context->status_page_size);
232 free(context);
233 }
234
235 static struct verbs_device_ops c4iw_dev_ops = {
236 .alloc_context = c4iw_alloc_context,
237 .free_context = c4iw_free_context
238 };
239
240 #ifdef STALL_DETECTION
241
242 int stall_to;
243
dump_cq(struct c4iw_cq * chp)244 static void dump_cq(struct c4iw_cq *chp)
245 {
246 int i;
247
248 fprintf(stderr,
249 "CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d "
250 "cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp,
251 chp->cq.cqid, chp->cq.queue, chp->cq.cidx,
252 chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use,
253 chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64toh(chp->cq.bits_type_ts),
254 t4_cq_notempty(&chp->cq));
255
256 for (i=0; i < chp->cq.size; i++) {
257 u64 *p = (u64 *)(chp->cq.queue + i);
258
259 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64toh(p[0]), be64toh(p[1]));
260 if (i == chp->cq.cidx)
261 fprintf(stderr, " <-- cidx\n");
262 else
263 fprintf(stderr, "\n");
264 p+= 2;
265 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
266 p+= 2;
267 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
268 p+= 2;
269 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
270 p+= 2;
271 }
272 }
273
dump_qp(struct c4iw_qp * qhp)274 static void dump_qp(struct c4iw_qp *qhp)
275 {
276 int i;
277 int j;
278 struct t4_swsqe *swsqe;
279 struct t4_swrqe *swrqe;
280 u16 cidx, pidx;
281 u64 *p;
282
283 fprintf(stderr,
284 "QP: %p id %u error %d flushed %d qid_mask 0x%x\n"
285 " SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n"
286 " RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n",
287 qhp,
288 qhp->wq.sq.qid,
289 qhp->wq.error,
290 qhp->wq.flushed,
291 qhp->wq.qid_mask,
292 qhp->wq.sq.qid,
293 qhp->wq.sq.queue,
294 qhp->wq.sq.sw_sq,
295 qhp->wq.sq.cidx,
296 qhp->wq.sq.pidx,
297 qhp->wq.sq.in_use,
298 qhp->wq.sq.wq_pidx,
299 qhp->wq.sq.size,
300 qhp->wq.sq.flags,
301 qhp->wq.sq.flush_cidx,
302 qhp->wq.rq.qid,
303 qhp->wq.rq.queue,
304 qhp->wq.rq.sw_rq,
305 qhp->wq.rq.cidx,
306 qhp->wq.rq.pidx,
307 qhp->wq.rq.in_use,
308 qhp->wq.rq.size);
309 cidx = qhp->wq.sq.cidx;
310 pidx = qhp->wq.sq.pidx;
311 if (cidx != pidx)
312 fprintf(stderr, "SQ: \n");
313 while (cidx != pidx) {
314 swsqe = &qhp->wq.sq.sw_sq[cidx];
315 fprintf(stderr, "%04u: wr_id %016" PRIx64
316 " sq_wptr %08x read_len %u opcode 0x%x "
317 "complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n",
318 cidx,
319 swsqe->wr_id,
320 swsqe->idx,
321 swsqe->read_len,
322 swsqe->opcode,
323 swsqe->complete,
324 swsqe->signaled,
325 htobe64(((uint64_t *)&swsqe->cqe)[0]),
326 htobe64(((uint64_t *)&swsqe->cqe)[1]),
327 htobe64(((uint64_t *)&swsqe->cqe)[2]),
328 htobe64(((uint64_t *)&swsqe->cqe)[3]));
329 if (++cidx == qhp->wq.sq.size)
330 cidx = 0;
331 }
332
333 fprintf(stderr, "SQ WQ: \n");
334 p = (u64 *)qhp->wq.sq.queue;
335 for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) {
336 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
337 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
338 i, be64toh(p[0]), be64toh(p[1]));
339 if (j == 0 && i == qhp->wq.sq.wq_pidx)
340 fprintf(stderr, " <-- pidx");
341 fprintf(stderr, "\n");
342 p += 2;
343 }
344 }
345 cidx = qhp->wq.rq.cidx;
346 pidx = qhp->wq.rq.pidx;
347 if (cidx != pidx)
348 fprintf(stderr, "RQ: \n");
349 while (cidx != pidx) {
350 swrqe = &qhp->wq.rq.sw_rq[cidx];
351 fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n",
352 cidx,
353 swrqe->wr_id );
354 if (++cidx == qhp->wq.rq.size)
355 cidx = 0;
356 }
357
358 fprintf(stderr, "RQ WQ: \n");
359 p = (u64 *)qhp->wq.rq.queue;
360 for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) {
361 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
362 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
363 i, be64toh(p[0]), be64toh(p[1]));
364 if (j == 0 && i == qhp->wq.rq.pidx)
365 fprintf(stderr, " <-- pidx");
366 if (j == 0 && i == qhp->wq.rq.cidx)
367 fprintf(stderr, " <-- cidx");
368 fprintf(stderr, "\n");
369 p+=2;
370 }
371 }
372 }
373
dump_state(void)374 void dump_state(void)
375 {
376 struct c4iw_dev *dev;
377 int i;
378
379 fprintf(stderr, "STALL DETECTED:\n");
380 TAILQ_FOREACH(dev, &devices, list) {
381 //pthread_spin_lock(&dev->lock);
382 fprintf(stderr, "Device %s\n", dev->ibv_dev.name);
383 for (i=0; i < dev->max_cq; i++) {
384 if (dev->cqid2ptr[i]) {
385 struct c4iw_cq *chp = dev->cqid2ptr[i];
386 //pthread_spin_lock(&chp->lock);
387 dump_cq(chp);
388 //pthread_spin_unlock(&chp->lock);
389 }
390 }
391 for (i=0; i < dev->max_qp; i++) {
392 if (dev->qpid2ptr[i]) {
393 struct c4iw_qp *qhp = dev->qpid2ptr[i];
394 //pthread_spin_lock(&qhp->lock);
395 dump_qp(qhp);
396 //pthread_spin_unlock(&qhp->lock);
397 }
398 }
399 //pthread_spin_unlock(&dev->lock);
400 }
401 fprintf(stderr, "DUMP COMPLETE:\n");
402 fflush(stderr);
403 }
404 #endif /* end of STALL_DETECTION */
405
406 /*
407 * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library
408 * can know if the driver supports the kernel mode db ringing.
409 */
410 int c4iw_abi_version = 1;
411
cxgb4_driver_init(const char * uverbs_sys_path,int abi_version)412 static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path,
413 int abi_version)
414 {
415 char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp;
416 char dev_str[IBV_SYSFS_PATH_MAX];
417 struct c4iw_dev *dev;
418 unsigned vendor, device, fw_maj, fw_min;
419 int i;
420 char devnum;
421 char ib_param[16];
422
423 #ifndef __linux__
424 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
425 ibdev, sizeof ibdev) < 0)
426 return NULL;
427
428 devnum = atoi(&ibdev[5]);
429
430 if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' &&
431 strstr(&ibdev[2], "nex") && devnum >= 0) {
432 snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1],
433 devnum);
434 } else if (strstr(&ibdev[0], "chnex") && devnum >= 0) {
435 snprintf(dev_str, sizeof(dev_str), "/dev/chnex/%d", devnum);
436 } else
437 return NULL;
438
439 if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0)
440 return NULL;
441 else {
442 if (strstr(value, "vendor=")) {
443 strncpy(ib_param, strstr(value, "vendor=") +
444 strlen("vendor="), 6);
445 sscanf(ib_param, "%i", &vendor);
446 }
447
448 if (strstr(value, "device=")) {
449 strncpy(ib_param, strstr(value, "device=") +
450 strlen("device="), 6);
451 sscanf(ib_param, "%i", &device);
452 }
453 }
454 #else
455 if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
456 value, sizeof value) < 0)
457 return NULL;
458 sscanf(value, "%i", &vendor);
459
460 if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
461 value, sizeof value) < 0)
462 return NULL;
463 sscanf(value, "%i", &device);
464 #endif
465
466 for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
467 if (vendor == hca_table[i].vendor &&
468 device == hca_table[i].device)
469 goto found;
470
471 return NULL;
472
473 found:
474 c4iw_abi_version = abi_version;
475
476 #ifndef __linux__
477 if (ibv_read_sysfs_file(dev_str, "firmware_version",
478 value, sizeof value) < 0)
479 return NULL;
480 #else
481 /*
482 * Verify that the firmware major number matches. Major number
483 * mismatches are fatal. Minor number mismatches are tolerated.
484 */
485 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
486 ibdev, sizeof ibdev) < 0)
487 return NULL;
488
489 memset(devstr, 0, sizeof devstr);
490 snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s",
491 ibv_get_sysfs_path(), ibdev);
492 if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0)
493 return NULL;
494 #endif
495
496 cp = strtok(value+1, ".");
497 sscanf(cp, "%i", &fw_maj);
498 cp = strtok(NULL, ".");
499 sscanf(cp, "%i", &fw_min);
500
501 if ((signed int)fw_maj < FW_MAJ) {
502 fprintf(stderr, "libcxgb4: Fatal firmware version mismatch. "
503 "Firmware major number is %u and libcxgb4 needs %u.\n",
504 fw_maj, FW_MAJ);
505 fflush(stderr);
506 return NULL;
507 }
508
509 DBGLOG("libcxgb4");
510
511 if ((signed int)fw_min < FW_MIN) {
512 PDBG("libcxgb4: non-fatal firmware version mismatch. "
513 "Firmware minor number is %u and libcxgb4 needs %u.\n",
514 fw_min, FW_MIN);
515 fflush(stderr);
516 }
517
518 PDBG("%s found vendor %d device %d type %d\n",
519 __FUNCTION__, vendor, device, CHELSIO_CHIP_VERSION(hca_table[i].device >> 8));
520
521 dev = calloc(1, sizeof *dev);
522 if (!dev) {
523 return NULL;
524 }
525
526 if (pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE))
527 goto err;
528
529 dev->ibv_dev.ops = &c4iw_dev_ops;
530 if (hca_table[i].device == 0xd000)
531 dev->chip_version = CHELSIO_T7;
532 else
533 dev->chip_version = CHELSIO_CHIP_VERSION(hca_table[i].device >> 8);
534 dev->abi_version = abi_version;
535
536 PDBG("%s device claimed\n", __FUNCTION__);
537 TAILQ_INSERT_TAIL(&devices, dev, list);
538 #ifdef STALL_DETECTION
539 {
540 char *c = getenv("CXGB4_STALL_TIMEOUT");
541 if (c) {
542 stall_to = strtol(c, NULL, 0);
543 if (errno || stall_to < 0)
544 stall_to = 0;
545 }
546 }
547 #endif
548 {
549 char *c = getenv("CXGB4_MA_WR");
550 if (c) {
551 ma_wr = strtol(c, NULL, 0);
552 if (ma_wr != 1)
553 ma_wr = 0;
554 }
555 }
556 {
557 char *c = getenv("T5_ENABLE_WC");
558 if (c) {
559 t5_en_wc = strtol(c, NULL, 0);
560 if (t5_en_wc != 1)
561 t5_en_wc = 0;
562 }
563 }
564
565 return &dev->ibv_dev;
566
567 err:
568 free(dev);
569
570 return NULL;
571 }
572
cxgb4_register_driver(void)573 static __attribute__((constructor)) void cxgb4_register_driver(void)
574 {
575 c4iw_page_size = sysconf(_SC_PAGESIZE);
576 c4iw_page_shift = long_log2(c4iw_page_size);
577 c4iw_page_mask = ~(c4iw_page_size - 1);
578 verbs_register_driver("cxgb4", cxgb4_driver_init);
579 }
580
581 #ifdef STATS
582 void __attribute__ ((destructor)) cs_fini(void);
cs_fini(void)583 void __attribute__ ((destructor)) cs_fini(void)
584 {
585 syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu "
586 "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n",
587 c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read,
588 c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe,
589 c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq);
590 }
591 #endif
592