1 /* 2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/completion.h> 34 #include <linux/file.h> 35 #include <linux/mutex.h> 36 #include <linux/poll.h> 37 #include <linux/sched.h> 38 #include <linux/idr.h> 39 #include <linux/in.h> 40 #include <linux/in6.h> 41 #include <linux/miscdevice.h> 42 #include <linux/slab.h> 43 #include <linux/sysctl.h> 44 #include <linux/module.h> 45 #include <linux/nsproxy.h> 46 47 #include <linux/nospec.h> 48 49 #include <rdma/rdma_user_cm.h> 50 #include <rdma/ib_marshall.h> 51 #include <rdma/rdma_cm.h> 52 #include <rdma/rdma_cm_ib.h> 53 #include <rdma/ib_addr.h> 54 #include <rdma/ib.h> 55 #include <rdma/ib_cm.h> 56 #include <rdma/rdma_netlink.h> 57 #include "core_priv.h" 58 59 MODULE_AUTHOR("Sean Hefty"); 60 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 61 MODULE_LICENSE("Dual BSD/GPL"); 62 63 static unsigned int max_backlog = 1024; 64 65 static struct ctl_table_header *ucma_ctl_table_hdr; 66 static struct ctl_table ucma_ctl_table[] = { 67 { 68 .procname = "max_backlog", 69 .data = &max_backlog, 70 .maxlen = sizeof max_backlog, 71 .mode = 0644, 72 .proc_handler = proc_dointvec_minmax, 73 .extra1 = SYSCTL_ZERO, 74 .extra2 = SYSCTL_INT_MAX, 75 }, 76 }; 77 78 struct ucma_file { 79 struct mutex mut; 80 struct file *filp; 81 struct list_head ctx_list; 82 struct list_head event_list; 83 wait_queue_head_t poll_wait; 84 }; 85 86 struct ucma_context { 87 u32 id; 88 struct completion comp; 89 refcount_t ref; 90 int events_reported; 91 atomic_t backlog; 92 93 struct ucma_file *file; 94 struct rdma_cm_id *cm_id; 95 struct mutex mutex; 96 u64 uid; 97 98 struct list_head list; 99 struct list_head mc_list; 100 struct work_struct close_work; 101 }; 102 103 struct ucma_multicast { 104 struct ucma_context *ctx; 105 u32 id; 106 int events_reported; 107 108 u64 uid; 109 u8 join_state; 110 struct list_head list; 111 struct sockaddr_storage addr; 112 }; 113 114 struct ucma_event { 115 struct ucma_context *ctx; 116 struct ucma_context *conn_req_ctx; 117 struct ucma_multicast *mc; 118 struct list_head list; 119 struct rdma_ucm_event_resp resp; 120 }; 121 122 static DEFINE_XARRAY_ALLOC(ctx_table); 123 static DEFINE_XARRAY_ALLOC(multicast_table); 124 125 static const struct file_operations ucma_fops; 126 static int ucma_destroy_private_ctx(struct ucma_context *ctx); 127 128 static inline struct ucma_context *_ucma_find_context(int id, 129 struct ucma_file *file) 130 { 131 struct ucma_context *ctx; 132 133 ctx = xa_load(&ctx_table, id); 134 if (!ctx) 135 ctx = ERR_PTR(-ENOENT); 136 else if (ctx->file != file) 137 ctx = ERR_PTR(-EINVAL); 138 return ctx; 139 } 140 141 static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) 142 { 143 struct ucma_context *ctx; 144 145 xa_lock(&ctx_table); 146 ctx = _ucma_find_context(id, file); 147 if (!IS_ERR(ctx)) 148 if (!refcount_inc_not_zero(&ctx->ref)) 149 ctx = ERR_PTR(-ENXIO); 150 xa_unlock(&ctx_table); 151 return ctx; 152 } 153 154 static void ucma_put_ctx(struct ucma_context *ctx) 155 { 156 if (refcount_dec_and_test(&ctx->ref)) 157 complete(&ctx->comp); 158 } 159 160 /* 161 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the 162 * CM_ID is bound. 163 */ 164 static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) 165 { 166 struct ucma_context *ctx = ucma_get_ctx(file, id); 167 168 if (IS_ERR(ctx)) 169 return ctx; 170 if (!ctx->cm_id->device) { 171 ucma_put_ctx(ctx); 172 return ERR_PTR(-EINVAL); 173 } 174 return ctx; 175 } 176 177 static void ucma_close_id(struct work_struct *work) 178 { 179 struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); 180 181 /* once all inflight tasks are finished, we close all underlying 182 * resources. The context is still alive till its explicit destryoing 183 * by its creator. This puts back the xarray's reference. 184 */ 185 ucma_put_ctx(ctx); 186 wait_for_completion(&ctx->comp); 187 /* No new events will be generated after destroying the id. */ 188 rdma_destroy_id(ctx->cm_id); 189 190 /* Reading the cm_id without holding a positive ref is not allowed */ 191 ctx->cm_id = NULL; 192 } 193 194 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) 195 { 196 struct ucma_context *ctx; 197 198 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 199 if (!ctx) 200 return NULL; 201 202 INIT_WORK(&ctx->close_work, ucma_close_id); 203 init_completion(&ctx->comp); 204 INIT_LIST_HEAD(&ctx->mc_list); 205 /* So list_del() will work if we don't do ucma_finish_ctx() */ 206 INIT_LIST_HEAD(&ctx->list); 207 ctx->file = file; 208 mutex_init(&ctx->mutex); 209 210 if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) { 211 kfree(ctx); 212 return NULL; 213 } 214 return ctx; 215 } 216 217 static void ucma_set_ctx_cm_id(struct ucma_context *ctx, 218 struct rdma_cm_id *cm_id) 219 { 220 refcount_set(&ctx->ref, 1); 221 ctx->cm_id = cm_id; 222 } 223 224 static void ucma_finish_ctx(struct ucma_context *ctx) 225 { 226 lockdep_assert_held(&ctx->file->mut); 227 list_add_tail(&ctx->list, &ctx->file->ctx_list); 228 xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL); 229 } 230 231 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 232 struct rdma_conn_param *src) 233 { 234 if (src->private_data_len) 235 memcpy(dst->private_data, src->private_data, 236 src->private_data_len); 237 dst->private_data_len = src->private_data_len; 238 dst->responder_resources = src->responder_resources; 239 dst->initiator_depth = src->initiator_depth; 240 dst->flow_control = src->flow_control; 241 dst->retry_count = src->retry_count; 242 dst->rnr_retry_count = src->rnr_retry_count; 243 dst->srq = src->srq; 244 dst->qp_num = src->qp_num; 245 } 246 247 static void ucma_copy_ud_event(struct ib_device *device, 248 struct rdma_ucm_ud_param *dst, 249 struct rdma_ud_param *src) 250 { 251 if (src->private_data_len) 252 memcpy(dst->private_data, src->private_data, 253 src->private_data_len); 254 dst->private_data_len = src->private_data_len; 255 ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); 256 dst->qp_num = src->qp_num; 257 dst->qkey = src->qkey; 258 } 259 260 static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, 261 struct rdma_cm_event *event) 262 { 263 struct ucma_event *uevent; 264 265 uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); 266 if (!uevent) 267 return NULL; 268 269 uevent->ctx = ctx; 270 switch (event->event) { 271 case RDMA_CM_EVENT_MULTICAST_JOIN: 272 case RDMA_CM_EVENT_MULTICAST_ERROR: 273 uevent->mc = (struct ucma_multicast *) 274 event->param.ud.private_data; 275 uevent->resp.uid = uevent->mc->uid; 276 uevent->resp.id = uevent->mc->id; 277 break; 278 default: 279 uevent->resp.uid = ctx->uid; 280 uevent->resp.id = ctx->id; 281 break; 282 } 283 uevent->resp.event = event->event; 284 uevent->resp.status = event->status; 285 if (ctx->cm_id->qp_type == IB_QPT_UD) 286 ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud, 287 &event->param.ud); 288 else 289 ucma_copy_conn_event(&uevent->resp.param.conn, 290 &event->param.conn); 291 292 uevent->resp.ece.vendor_id = event->ece.vendor_id; 293 uevent->resp.ece.attr_mod = event->ece.attr_mod; 294 return uevent; 295 } 296 297 static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, 298 struct rdma_cm_event *event) 299 { 300 struct ucma_context *listen_ctx = cm_id->context; 301 struct ucma_context *ctx; 302 struct ucma_event *uevent; 303 304 if (!atomic_add_unless(&listen_ctx->backlog, -1, 0)) 305 return -ENOMEM; 306 ctx = ucma_alloc_ctx(listen_ctx->file); 307 if (!ctx) 308 goto err_backlog; 309 ucma_set_ctx_cm_id(ctx, cm_id); 310 311 uevent = ucma_create_uevent(listen_ctx, event); 312 if (!uevent) 313 goto err_alloc; 314 uevent->conn_req_ctx = ctx; 315 uevent->resp.id = ctx->id; 316 317 ctx->cm_id->context = ctx; 318 319 mutex_lock(&ctx->file->mut); 320 ucma_finish_ctx(ctx); 321 list_add_tail(&uevent->list, &ctx->file->event_list); 322 mutex_unlock(&ctx->file->mut); 323 wake_up_interruptible(&ctx->file->poll_wait); 324 return 0; 325 326 err_alloc: 327 ucma_destroy_private_ctx(ctx); 328 err_backlog: 329 atomic_inc(&listen_ctx->backlog); 330 /* Returning error causes the new ID to be destroyed */ 331 return -ENOMEM; 332 } 333 334 static int ucma_event_handler(struct rdma_cm_id *cm_id, 335 struct rdma_cm_event *event) 336 { 337 struct ucma_event *uevent; 338 struct ucma_context *ctx = cm_id->context; 339 340 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) 341 return ucma_connect_event_handler(cm_id, event); 342 343 /* 344 * We ignore events for new connections until userspace has set their 345 * context. This can only happen if an error occurs on a new connection 346 * before the user accepts it. This is okay, since the accept will just 347 * fail later. However, we do need to release the underlying HW 348 * resources in case of a device removal event. 349 */ 350 if (ctx->uid) { 351 uevent = ucma_create_uevent(ctx, event); 352 if (!uevent) 353 return 0; 354 355 mutex_lock(&ctx->file->mut); 356 list_add_tail(&uevent->list, &ctx->file->event_list); 357 mutex_unlock(&ctx->file->mut); 358 wake_up_interruptible(&ctx->file->poll_wait); 359 } 360 361 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { 362 xa_lock(&ctx_table); 363 if (xa_load(&ctx_table, ctx->id) == ctx) 364 queue_work(system_unbound_wq, &ctx->close_work); 365 xa_unlock(&ctx_table); 366 } 367 return 0; 368 } 369 370 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, 371 int in_len, int out_len) 372 { 373 struct rdma_ucm_get_event cmd; 374 struct ucma_event *uevent; 375 376 /* 377 * Old 32 bit user space does not send the 4 byte padding in the 378 * reserved field. We don't care, allow it to keep working. 379 */ 380 if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) - 381 sizeof(uevent->resp.ece)) 382 return -ENOSPC; 383 384 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 385 return -EFAULT; 386 387 mutex_lock(&file->mut); 388 while (list_empty(&file->event_list)) { 389 mutex_unlock(&file->mut); 390 391 if (file->filp->f_flags & O_NONBLOCK) 392 return -EAGAIN; 393 394 if (wait_event_interruptible(file->poll_wait, 395 !list_empty(&file->event_list))) 396 return -ERESTARTSYS; 397 398 mutex_lock(&file->mut); 399 } 400 401 uevent = list_first_entry(&file->event_list, struct ucma_event, list); 402 403 if (copy_to_user(u64_to_user_ptr(cmd.response), 404 &uevent->resp, 405 min_t(size_t, out_len, sizeof(uevent->resp)))) { 406 mutex_unlock(&file->mut); 407 return -EFAULT; 408 } 409 410 list_del(&uevent->list); 411 uevent->ctx->events_reported++; 412 if (uevent->mc) 413 uevent->mc->events_reported++; 414 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) 415 atomic_inc(&uevent->ctx->backlog); 416 mutex_unlock(&file->mut); 417 418 kfree(uevent); 419 return 0; 420 } 421 422 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) 423 { 424 switch (cmd->ps) { 425 case RDMA_PS_TCP: 426 *qp_type = IB_QPT_RC; 427 return 0; 428 case RDMA_PS_UDP: 429 case RDMA_PS_IPOIB: 430 *qp_type = IB_QPT_UD; 431 return 0; 432 case RDMA_PS_IB: 433 *qp_type = cmd->qp_type; 434 return 0; 435 default: 436 return -EINVAL; 437 } 438 } 439 440 static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, 441 int in_len, int out_len) 442 { 443 struct rdma_ucm_create_id cmd; 444 struct rdma_ucm_create_id_resp resp; 445 struct ucma_context *ctx; 446 struct rdma_cm_id *cm_id; 447 enum ib_qp_type qp_type; 448 int ret; 449 450 if (out_len < sizeof(resp)) 451 return -ENOSPC; 452 453 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 454 return -EFAULT; 455 456 ret = ucma_get_qp_type(&cmd, &qp_type); 457 if (ret) 458 return ret; 459 460 ctx = ucma_alloc_ctx(file); 461 if (!ctx) 462 return -ENOMEM; 463 464 ctx->uid = cmd.uid; 465 cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type); 466 if (IS_ERR(cm_id)) { 467 ret = PTR_ERR(cm_id); 468 goto err1; 469 } 470 ucma_set_ctx_cm_id(ctx, cm_id); 471 472 resp.id = ctx->id; 473 if (copy_to_user(u64_to_user_ptr(cmd.response), 474 &resp, sizeof(resp))) { 475 ret = -EFAULT; 476 goto err1; 477 } 478 479 mutex_lock(&file->mut); 480 ucma_finish_ctx(ctx); 481 mutex_unlock(&file->mut); 482 return 0; 483 484 err1: 485 ucma_destroy_private_ctx(ctx); 486 return ret; 487 } 488 489 static void ucma_cleanup_multicast(struct ucma_context *ctx) 490 { 491 struct ucma_multicast *mc, *tmp; 492 493 xa_lock(&multicast_table); 494 list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { 495 list_del(&mc->list); 496 /* 497 * At this point mc->ctx->ref is 0 so the mc cannot leave the 498 * lock on the reader and this is enough serialization 499 */ 500 __xa_erase(&multicast_table, mc->id); 501 kfree(mc); 502 } 503 xa_unlock(&multicast_table); 504 } 505 506 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 507 { 508 struct ucma_event *uevent, *tmp; 509 510 rdma_lock_handler(mc->ctx->cm_id); 511 mutex_lock(&mc->ctx->file->mut); 512 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { 513 if (uevent->mc != mc) 514 continue; 515 516 list_del(&uevent->list); 517 kfree(uevent); 518 } 519 mutex_unlock(&mc->ctx->file->mut); 520 rdma_unlock_handler(mc->ctx->cm_id); 521 } 522 523 static int ucma_cleanup_ctx_events(struct ucma_context *ctx) 524 { 525 int events_reported; 526 struct ucma_event *uevent, *tmp; 527 LIST_HEAD(list); 528 529 /* Cleanup events not yet reported to the user.*/ 530 mutex_lock(&ctx->file->mut); 531 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 532 if (uevent->ctx != ctx) 533 continue; 534 535 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && 536 xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, 537 uevent->conn_req_ctx, XA_ZERO_ENTRY, 538 GFP_KERNEL) == uevent->conn_req_ctx) { 539 list_move_tail(&uevent->list, &list); 540 continue; 541 } 542 list_del(&uevent->list); 543 kfree(uevent); 544 } 545 list_del(&ctx->list); 546 events_reported = ctx->events_reported; 547 mutex_unlock(&ctx->file->mut); 548 549 /* 550 * If this was a listening ID then any connections spawned from it that 551 * have not been delivered to userspace are cleaned up too. Must be done 552 * outside any locks. 553 */ 554 list_for_each_entry_safe(uevent, tmp, &list, list) { 555 ucma_destroy_private_ctx(uevent->conn_req_ctx); 556 kfree(uevent); 557 } 558 return events_reported; 559 } 560 561 /* 562 * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie 563 * the ctx is not public to the user). This either because: 564 * - ucma_finish_ctx() hasn't been called 565 * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) 566 */ 567 static int ucma_destroy_private_ctx(struct ucma_context *ctx) 568 { 569 int events_reported; 570 571 /* 572 * Destroy the underlying cm_id. New work queuing is prevented now by 573 * the removal from the xarray. Once the work is cancled ref will either 574 * be 0 because the work ran to completion and consumed the ref from the 575 * xarray, or it will be positive because we still have the ref from the 576 * xarray. This can also be 0 in cases where cm_id was never set 577 */ 578 cancel_work_sync(&ctx->close_work); 579 if (refcount_read(&ctx->ref)) 580 ucma_close_id(&ctx->close_work); 581 582 events_reported = ucma_cleanup_ctx_events(ctx); 583 ucma_cleanup_multicast(ctx); 584 585 WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, 586 GFP_KERNEL) != NULL); 587 mutex_destroy(&ctx->mutex); 588 kfree(ctx); 589 return events_reported; 590 } 591 592 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, 593 int in_len, int out_len) 594 { 595 struct rdma_ucm_destroy_id cmd; 596 struct rdma_ucm_destroy_id_resp resp; 597 struct ucma_context *ctx; 598 int ret = 0; 599 600 if (out_len < sizeof(resp)) 601 return -ENOSPC; 602 603 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 604 return -EFAULT; 605 606 xa_lock(&ctx_table); 607 ctx = _ucma_find_context(cmd.id, file); 608 if (!IS_ERR(ctx)) { 609 if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 610 GFP_KERNEL) != ctx) 611 ctx = ERR_PTR(-ENOENT); 612 } 613 xa_unlock(&ctx_table); 614 615 if (IS_ERR(ctx)) 616 return PTR_ERR(ctx); 617 618 resp.events_reported = ucma_destroy_private_ctx(ctx); 619 if (copy_to_user(u64_to_user_ptr(cmd.response), 620 &resp, sizeof(resp))) 621 ret = -EFAULT; 622 623 return ret; 624 } 625 626 static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, 627 int in_len, int out_len) 628 { 629 struct rdma_ucm_bind_ip cmd; 630 struct ucma_context *ctx; 631 int ret; 632 633 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 634 return -EFAULT; 635 636 if (!rdma_addr_size_in6(&cmd.addr)) 637 return -EINVAL; 638 639 ctx = ucma_get_ctx(file, cmd.id); 640 if (IS_ERR(ctx)) 641 return PTR_ERR(ctx); 642 643 mutex_lock(&ctx->mutex); 644 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 645 mutex_unlock(&ctx->mutex); 646 647 ucma_put_ctx(ctx); 648 return ret; 649 } 650 651 static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, 652 int in_len, int out_len) 653 { 654 struct rdma_ucm_bind cmd; 655 struct ucma_context *ctx; 656 int ret; 657 658 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 659 return -EFAULT; 660 661 if (cmd.reserved || !cmd.addr_size || 662 cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) 663 return -EINVAL; 664 665 ctx = ucma_get_ctx(file, cmd.id); 666 if (IS_ERR(ctx)) 667 return PTR_ERR(ctx); 668 669 mutex_lock(&ctx->mutex); 670 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 671 mutex_unlock(&ctx->mutex); 672 ucma_put_ctx(ctx); 673 return ret; 674 } 675 676 static ssize_t ucma_resolve_ip(struct ucma_file *file, 677 const char __user *inbuf, 678 int in_len, int out_len) 679 { 680 struct rdma_ucm_resolve_ip cmd; 681 struct ucma_context *ctx; 682 int ret; 683 684 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 685 return -EFAULT; 686 687 if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || 688 !rdma_addr_size_in6(&cmd.dst_addr)) 689 return -EINVAL; 690 691 ctx = ucma_get_ctx(file, cmd.id); 692 if (IS_ERR(ctx)) 693 return PTR_ERR(ctx); 694 695 mutex_lock(&ctx->mutex); 696 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 697 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 698 mutex_unlock(&ctx->mutex); 699 ucma_put_ctx(ctx); 700 return ret; 701 } 702 703 static ssize_t ucma_resolve_addr(struct ucma_file *file, 704 const char __user *inbuf, 705 int in_len, int out_len) 706 { 707 struct rdma_ucm_resolve_addr cmd; 708 struct ucma_context *ctx; 709 int ret; 710 711 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 712 return -EFAULT; 713 714 if (cmd.reserved || 715 (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || 716 !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) 717 return -EINVAL; 718 719 ctx = ucma_get_ctx(file, cmd.id); 720 if (IS_ERR(ctx)) 721 return PTR_ERR(ctx); 722 723 mutex_lock(&ctx->mutex); 724 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 725 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 726 mutex_unlock(&ctx->mutex); 727 ucma_put_ctx(ctx); 728 return ret; 729 } 730 731 static ssize_t ucma_resolve_route(struct ucma_file *file, 732 const char __user *inbuf, 733 int in_len, int out_len) 734 { 735 struct rdma_ucm_resolve_route cmd; 736 struct ucma_context *ctx; 737 int ret; 738 739 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 740 return -EFAULT; 741 742 ctx = ucma_get_ctx_dev(file, cmd.id); 743 if (IS_ERR(ctx)) 744 return PTR_ERR(ctx); 745 746 mutex_lock(&ctx->mutex); 747 ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); 748 mutex_unlock(&ctx->mutex); 749 ucma_put_ctx(ctx); 750 return ret; 751 } 752 753 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, 754 struct rdma_route *route) 755 { 756 struct rdma_dev_addr *dev_addr; 757 758 resp->num_paths = route->num_pri_alt_paths; 759 switch (route->num_pri_alt_paths) { 760 case 0: 761 dev_addr = &route->addr.dev_addr; 762 rdma_addr_get_dgid(dev_addr, 763 (union ib_gid *) &resp->ib_route[0].dgid); 764 rdma_addr_get_sgid(dev_addr, 765 (union ib_gid *) &resp->ib_route[0].sgid); 766 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 767 break; 768 case 2: 769 ib_copy_path_rec_to_user(&resp->ib_route[1], 770 &route->path_rec[1]); 771 fallthrough; 772 case 1: 773 ib_copy_path_rec_to_user(&resp->ib_route[0], 774 &route->path_rec[0]); 775 break; 776 default: 777 break; 778 } 779 } 780 781 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, 782 struct rdma_route *route) 783 { 784 785 resp->num_paths = route->num_pri_alt_paths; 786 switch (route->num_pri_alt_paths) { 787 case 0: 788 rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, 789 (union ib_gid *)&resp->ib_route[0].dgid); 790 rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, 791 (union ib_gid *)&resp->ib_route[0].sgid); 792 resp->ib_route[0].pkey = cpu_to_be16(0xffff); 793 break; 794 case 2: 795 ib_copy_path_rec_to_user(&resp->ib_route[1], 796 &route->path_rec[1]); 797 fallthrough; 798 case 1: 799 ib_copy_path_rec_to_user(&resp->ib_route[0], 800 &route->path_rec[0]); 801 break; 802 default: 803 break; 804 } 805 } 806 807 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, 808 struct rdma_route *route) 809 { 810 struct rdma_dev_addr *dev_addr; 811 812 dev_addr = &route->addr.dev_addr; 813 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); 814 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); 815 } 816 817 static ssize_t ucma_query_route(struct ucma_file *file, 818 const char __user *inbuf, 819 int in_len, int out_len) 820 { 821 struct rdma_ucm_query cmd; 822 struct rdma_ucm_query_route_resp resp; 823 struct ucma_context *ctx; 824 struct sockaddr *addr; 825 int ret = 0; 826 827 if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index)) 828 return -ENOSPC; 829 830 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 831 return -EFAULT; 832 833 ctx = ucma_get_ctx(file, cmd.id); 834 if (IS_ERR(ctx)) 835 return PTR_ERR(ctx); 836 837 mutex_lock(&ctx->mutex); 838 memset(&resp, 0, sizeof resp); 839 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 840 memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? 841 sizeof(struct sockaddr_in) : 842 sizeof(struct sockaddr_in6)); 843 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 844 memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? 845 sizeof(struct sockaddr_in) : 846 sizeof(struct sockaddr_in6)); 847 if (!ctx->cm_id->device) 848 goto out; 849 850 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; 851 resp.ibdev_index = ctx->cm_id->device->index; 852 resp.port_num = ctx->cm_id->port_num; 853 854 if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) 855 ucma_copy_ib_route(&resp, &ctx->cm_id->route); 856 else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) 857 ucma_copy_iboe_route(&resp, &ctx->cm_id->route); 858 else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) 859 ucma_copy_iw_route(&resp, &ctx->cm_id->route); 860 861 out: 862 mutex_unlock(&ctx->mutex); 863 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, 864 min_t(size_t, out_len, sizeof(resp)))) 865 ret = -EFAULT; 866 867 ucma_put_ctx(ctx); 868 return ret; 869 } 870 871 static void ucma_query_device_addr(struct rdma_cm_id *cm_id, 872 struct rdma_ucm_query_addr_resp *resp) 873 { 874 if (!cm_id->device) 875 return; 876 877 resp->node_guid = (__force __u64) cm_id->device->node_guid; 878 resp->ibdev_index = cm_id->device->index; 879 resp->port_num = cm_id->port_num; 880 resp->pkey = (__force __u16) cpu_to_be16( 881 ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); 882 } 883 884 static ssize_t ucma_query_addr(struct ucma_context *ctx, 885 void __user *response, int out_len) 886 { 887 struct rdma_ucm_query_addr_resp resp; 888 struct sockaddr *addr; 889 int ret = 0; 890 891 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 892 return -ENOSPC; 893 894 memset(&resp, 0, sizeof resp); 895 896 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 897 resp.src_size = rdma_addr_size(addr); 898 memcpy(&resp.src_addr, addr, resp.src_size); 899 900 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 901 resp.dst_size = rdma_addr_size(addr); 902 memcpy(&resp.dst_addr, addr, resp.dst_size); 903 904 ucma_query_device_addr(ctx->cm_id, &resp); 905 906 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 907 ret = -EFAULT; 908 909 return ret; 910 } 911 912 static ssize_t ucma_query_path(struct ucma_context *ctx, 913 void __user *response, int out_len) 914 { 915 struct rdma_ucm_query_path_resp *resp; 916 int i, ret = 0; 917 918 if (out_len < sizeof(*resp)) 919 return -ENOSPC; 920 921 resp = kzalloc(out_len, GFP_KERNEL); 922 if (!resp) 923 return -ENOMEM; 924 925 resp->num_paths = ctx->cm_id->route.num_pri_alt_paths; 926 for (i = 0, out_len -= sizeof(*resp); 927 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); 928 i++, out_len -= sizeof(struct ib_path_rec_data)) { 929 struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; 930 931 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | 932 IB_PATH_BIDIRECTIONAL; 933 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { 934 struct sa_path_rec ib; 935 936 sa_convert_path_opa_to_ib(&ib, rec); 937 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); 938 939 } else { 940 ib_sa_pack_path(rec, &resp->path_data[i].path_rec); 941 } 942 } 943 944 if (copy_to_user(response, resp, struct_size(resp, path_data, i))) 945 ret = -EFAULT; 946 947 kfree(resp); 948 return ret; 949 } 950 951 static ssize_t ucma_query_gid(struct ucma_context *ctx, 952 void __user *response, int out_len) 953 { 954 struct rdma_ucm_query_addr_resp resp; 955 struct sockaddr_ib *addr; 956 int ret = 0; 957 958 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 959 return -ENOSPC; 960 961 memset(&resp, 0, sizeof resp); 962 963 ucma_query_device_addr(ctx->cm_id, &resp); 964 965 addr = (struct sockaddr_ib *) &resp.src_addr; 966 resp.src_size = sizeof(*addr); 967 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { 968 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); 969 } else { 970 addr->sib_family = AF_IB; 971 addr->sib_pkey = (__force __be16) resp.pkey; 972 rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr, 973 NULL); 974 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 975 &ctx->cm_id->route.addr.src_addr); 976 } 977 978 addr = (struct sockaddr_ib *) &resp.dst_addr; 979 resp.dst_size = sizeof(*addr); 980 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { 981 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); 982 } else { 983 addr->sib_family = AF_IB; 984 addr->sib_pkey = (__force __be16) resp.pkey; 985 rdma_read_gids(ctx->cm_id, NULL, 986 (union ib_gid *)&addr->sib_addr); 987 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 988 &ctx->cm_id->route.addr.dst_addr); 989 } 990 991 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 992 ret = -EFAULT; 993 994 return ret; 995 } 996 997 static ssize_t ucma_query(struct ucma_file *file, 998 const char __user *inbuf, 999 int in_len, int out_len) 1000 { 1001 struct rdma_ucm_query cmd; 1002 struct ucma_context *ctx; 1003 void __user *response; 1004 int ret; 1005 1006 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1007 return -EFAULT; 1008 1009 response = u64_to_user_ptr(cmd.response); 1010 ctx = ucma_get_ctx(file, cmd.id); 1011 if (IS_ERR(ctx)) 1012 return PTR_ERR(ctx); 1013 1014 mutex_lock(&ctx->mutex); 1015 switch (cmd.option) { 1016 case RDMA_USER_CM_QUERY_ADDR: 1017 ret = ucma_query_addr(ctx, response, out_len); 1018 break; 1019 case RDMA_USER_CM_QUERY_PATH: 1020 ret = ucma_query_path(ctx, response, out_len); 1021 break; 1022 case RDMA_USER_CM_QUERY_GID: 1023 ret = ucma_query_gid(ctx, response, out_len); 1024 break; 1025 default: 1026 ret = -ENOSYS; 1027 break; 1028 } 1029 mutex_unlock(&ctx->mutex); 1030 1031 ucma_put_ctx(ctx); 1032 return ret; 1033 } 1034 1035 static void ucma_copy_conn_param(struct rdma_cm_id *id, 1036 struct rdma_conn_param *dst, 1037 struct rdma_ucm_conn_param *src) 1038 { 1039 dst->private_data = src->private_data; 1040 dst->private_data_len = src->private_data_len; 1041 dst->responder_resources = src->responder_resources; 1042 dst->initiator_depth = src->initiator_depth; 1043 dst->flow_control = src->flow_control; 1044 dst->retry_count = src->retry_count; 1045 dst->rnr_retry_count = src->rnr_retry_count; 1046 dst->srq = src->srq; 1047 dst->qp_num = src->qp_num & 0xFFFFFF; 1048 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; 1049 } 1050 1051 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 1052 int in_len, int out_len) 1053 { 1054 struct rdma_conn_param conn_param; 1055 struct rdma_ucm_ece ece = {}; 1056 struct rdma_ucm_connect cmd; 1057 struct ucma_context *ctx; 1058 size_t in_size; 1059 int ret; 1060 1061 if (in_len < offsetofend(typeof(cmd), reserved)) 1062 return -EINVAL; 1063 in_size = min_t(size_t, in_len, sizeof(cmd)); 1064 if (copy_from_user(&cmd, inbuf, in_size)) 1065 return -EFAULT; 1066 1067 if (!cmd.conn_param.valid) 1068 return -EINVAL; 1069 1070 ctx = ucma_get_ctx_dev(file, cmd.id); 1071 if (IS_ERR(ctx)) 1072 return PTR_ERR(ctx); 1073 1074 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1075 if (offsetofend(typeof(cmd), ece) <= in_size) { 1076 ece.vendor_id = cmd.ece.vendor_id; 1077 ece.attr_mod = cmd.ece.attr_mod; 1078 } 1079 1080 mutex_lock(&ctx->mutex); 1081 ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece); 1082 mutex_unlock(&ctx->mutex); 1083 ucma_put_ctx(ctx); 1084 return ret; 1085 } 1086 1087 static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, 1088 int in_len, int out_len) 1089 { 1090 struct rdma_ucm_listen cmd; 1091 struct ucma_context *ctx; 1092 int ret; 1093 1094 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1095 return -EFAULT; 1096 1097 ctx = ucma_get_ctx(file, cmd.id); 1098 if (IS_ERR(ctx)) 1099 return PTR_ERR(ctx); 1100 1101 if (cmd.backlog <= 0 || cmd.backlog > max_backlog) 1102 cmd.backlog = max_backlog; 1103 atomic_set(&ctx->backlog, cmd.backlog); 1104 1105 mutex_lock(&ctx->mutex); 1106 ret = rdma_listen(ctx->cm_id, cmd.backlog); 1107 mutex_unlock(&ctx->mutex); 1108 ucma_put_ctx(ctx); 1109 return ret; 1110 } 1111 1112 static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, 1113 int in_len, int out_len) 1114 { 1115 struct rdma_ucm_accept cmd; 1116 struct rdma_conn_param conn_param; 1117 struct rdma_ucm_ece ece = {}; 1118 struct ucma_context *ctx; 1119 size_t in_size; 1120 int ret; 1121 1122 if (in_len < offsetofend(typeof(cmd), reserved)) 1123 return -EINVAL; 1124 in_size = min_t(size_t, in_len, sizeof(cmd)); 1125 if (copy_from_user(&cmd, inbuf, in_size)) 1126 return -EFAULT; 1127 1128 ctx = ucma_get_ctx_dev(file, cmd.id); 1129 if (IS_ERR(ctx)) 1130 return PTR_ERR(ctx); 1131 1132 if (offsetofend(typeof(cmd), ece) <= in_size) { 1133 ece.vendor_id = cmd.ece.vendor_id; 1134 ece.attr_mod = cmd.ece.attr_mod; 1135 } 1136 1137 if (cmd.conn_param.valid) { 1138 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1139 mutex_lock(&ctx->mutex); 1140 rdma_lock_handler(ctx->cm_id); 1141 ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece); 1142 if (!ret) { 1143 /* The uid must be set atomically with the handler */ 1144 ctx->uid = cmd.uid; 1145 } 1146 rdma_unlock_handler(ctx->cm_id); 1147 mutex_unlock(&ctx->mutex); 1148 } else { 1149 mutex_lock(&ctx->mutex); 1150 rdma_lock_handler(ctx->cm_id); 1151 ret = rdma_accept_ece(ctx->cm_id, NULL, &ece); 1152 rdma_unlock_handler(ctx->cm_id); 1153 mutex_unlock(&ctx->mutex); 1154 } 1155 ucma_put_ctx(ctx); 1156 return ret; 1157 } 1158 1159 static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, 1160 int in_len, int out_len) 1161 { 1162 struct rdma_ucm_reject cmd; 1163 struct ucma_context *ctx; 1164 int ret; 1165 1166 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1167 return -EFAULT; 1168 1169 if (!cmd.reason) 1170 cmd.reason = IB_CM_REJ_CONSUMER_DEFINED; 1171 1172 switch (cmd.reason) { 1173 case IB_CM_REJ_CONSUMER_DEFINED: 1174 case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED: 1175 break; 1176 default: 1177 return -EINVAL; 1178 } 1179 1180 ctx = ucma_get_ctx_dev(file, cmd.id); 1181 if (IS_ERR(ctx)) 1182 return PTR_ERR(ctx); 1183 1184 mutex_lock(&ctx->mutex); 1185 ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len, 1186 cmd.reason); 1187 mutex_unlock(&ctx->mutex); 1188 ucma_put_ctx(ctx); 1189 return ret; 1190 } 1191 1192 static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, 1193 int in_len, int out_len) 1194 { 1195 struct rdma_ucm_disconnect cmd; 1196 struct ucma_context *ctx; 1197 int ret; 1198 1199 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1200 return -EFAULT; 1201 1202 ctx = ucma_get_ctx_dev(file, cmd.id); 1203 if (IS_ERR(ctx)) 1204 return PTR_ERR(ctx); 1205 1206 mutex_lock(&ctx->mutex); 1207 ret = rdma_disconnect(ctx->cm_id); 1208 mutex_unlock(&ctx->mutex); 1209 ucma_put_ctx(ctx); 1210 return ret; 1211 } 1212 1213 static ssize_t ucma_init_qp_attr(struct ucma_file *file, 1214 const char __user *inbuf, 1215 int in_len, int out_len) 1216 { 1217 struct rdma_ucm_init_qp_attr cmd; 1218 struct ib_uverbs_qp_attr resp; 1219 struct ucma_context *ctx; 1220 struct ib_qp_attr qp_attr; 1221 int ret; 1222 1223 if (out_len < sizeof(resp)) 1224 return -ENOSPC; 1225 1226 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1227 return -EFAULT; 1228 1229 if (cmd.qp_state > IB_QPS_ERR) 1230 return -EINVAL; 1231 1232 ctx = ucma_get_ctx_dev(file, cmd.id); 1233 if (IS_ERR(ctx)) 1234 return PTR_ERR(ctx); 1235 1236 resp.qp_attr_mask = 0; 1237 memset(&qp_attr, 0, sizeof qp_attr); 1238 qp_attr.qp_state = cmd.qp_state; 1239 mutex_lock(&ctx->mutex); 1240 ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); 1241 mutex_unlock(&ctx->mutex); 1242 if (ret) 1243 goto out; 1244 1245 ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); 1246 if (copy_to_user(u64_to_user_ptr(cmd.response), 1247 &resp, sizeof(resp))) 1248 ret = -EFAULT; 1249 1250 out: 1251 ucma_put_ctx(ctx); 1252 return ret; 1253 } 1254 1255 static int ucma_set_option_id(struct ucma_context *ctx, int optname, 1256 void *optval, size_t optlen) 1257 { 1258 int ret = 0; 1259 1260 switch (optname) { 1261 case RDMA_OPTION_ID_TOS: 1262 if (optlen != sizeof(u8)) { 1263 ret = -EINVAL; 1264 break; 1265 } 1266 rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); 1267 break; 1268 case RDMA_OPTION_ID_REUSEADDR: 1269 if (optlen != sizeof(int)) { 1270 ret = -EINVAL; 1271 break; 1272 } 1273 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); 1274 break; 1275 case RDMA_OPTION_ID_AFONLY: 1276 if (optlen != sizeof(int)) { 1277 ret = -EINVAL; 1278 break; 1279 } 1280 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); 1281 break; 1282 case RDMA_OPTION_ID_ACK_TIMEOUT: 1283 if (optlen != sizeof(u8)) { 1284 ret = -EINVAL; 1285 break; 1286 } 1287 ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval)); 1288 break; 1289 default: 1290 ret = -ENOSYS; 1291 } 1292 1293 return ret; 1294 } 1295 1296 static int ucma_set_ib_path(struct ucma_context *ctx, 1297 struct ib_path_rec_data *path_data, size_t optlen) 1298 { 1299 struct sa_path_rec sa_path; 1300 struct rdma_cm_event event; 1301 int ret; 1302 1303 if (optlen % sizeof(*path_data)) 1304 return -EINVAL; 1305 1306 for (; optlen; optlen -= sizeof(*path_data), path_data++) { 1307 if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | 1308 IB_PATH_BIDIRECTIONAL)) 1309 break; 1310 } 1311 1312 if (!optlen) 1313 return -EINVAL; 1314 1315 if (!ctx->cm_id->device) 1316 return -EINVAL; 1317 1318 memset(&sa_path, 0, sizeof(sa_path)); 1319 1320 sa_path.rec_type = SA_PATH_REC_TYPE_IB; 1321 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1322 1323 if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { 1324 struct sa_path_rec opa; 1325 1326 sa_convert_path_ib_to_opa(&opa, &sa_path); 1327 mutex_lock(&ctx->mutex); 1328 ret = rdma_set_ib_path(ctx->cm_id, &opa); 1329 mutex_unlock(&ctx->mutex); 1330 } else { 1331 mutex_lock(&ctx->mutex); 1332 ret = rdma_set_ib_path(ctx->cm_id, &sa_path); 1333 mutex_unlock(&ctx->mutex); 1334 } 1335 if (ret) 1336 return ret; 1337 1338 memset(&event, 0, sizeof event); 1339 event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 1340 return ucma_event_handler(ctx->cm_id, &event); 1341 } 1342 1343 static int ucma_set_option_ib(struct ucma_context *ctx, int optname, 1344 void *optval, size_t optlen) 1345 { 1346 int ret; 1347 1348 switch (optname) { 1349 case RDMA_OPTION_IB_PATH: 1350 ret = ucma_set_ib_path(ctx, optval, optlen); 1351 break; 1352 default: 1353 ret = -ENOSYS; 1354 } 1355 1356 return ret; 1357 } 1358 1359 static int ucma_set_option_level(struct ucma_context *ctx, int level, 1360 int optname, void *optval, size_t optlen) 1361 { 1362 int ret; 1363 1364 switch (level) { 1365 case RDMA_OPTION_ID: 1366 mutex_lock(&ctx->mutex); 1367 ret = ucma_set_option_id(ctx, optname, optval, optlen); 1368 mutex_unlock(&ctx->mutex); 1369 break; 1370 case RDMA_OPTION_IB: 1371 ret = ucma_set_option_ib(ctx, optname, optval, optlen); 1372 break; 1373 default: 1374 ret = -ENOSYS; 1375 } 1376 1377 return ret; 1378 } 1379 1380 static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, 1381 int in_len, int out_len) 1382 { 1383 struct rdma_ucm_set_option cmd; 1384 struct ucma_context *ctx; 1385 void *optval; 1386 int ret; 1387 1388 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1389 return -EFAULT; 1390 1391 if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) 1392 return -EINVAL; 1393 1394 ctx = ucma_get_ctx(file, cmd.id); 1395 if (IS_ERR(ctx)) 1396 return PTR_ERR(ctx); 1397 1398 optval = memdup_user(u64_to_user_ptr(cmd.optval), 1399 cmd.optlen); 1400 if (IS_ERR(optval)) { 1401 ret = PTR_ERR(optval); 1402 goto out; 1403 } 1404 1405 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, 1406 cmd.optlen); 1407 kfree(optval); 1408 1409 out: 1410 ucma_put_ctx(ctx); 1411 return ret; 1412 } 1413 1414 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, 1415 int in_len, int out_len) 1416 { 1417 struct rdma_ucm_notify cmd; 1418 struct ucma_context *ctx; 1419 int ret = -EINVAL; 1420 1421 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1422 return -EFAULT; 1423 1424 ctx = ucma_get_ctx(file, cmd.id); 1425 if (IS_ERR(ctx)) 1426 return PTR_ERR(ctx); 1427 1428 mutex_lock(&ctx->mutex); 1429 if (ctx->cm_id->device) 1430 ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); 1431 mutex_unlock(&ctx->mutex); 1432 1433 ucma_put_ctx(ctx); 1434 return ret; 1435 } 1436 1437 static ssize_t ucma_process_join(struct ucma_file *file, 1438 struct rdma_ucm_join_mcast *cmd, int out_len) 1439 { 1440 struct rdma_ucm_create_id_resp resp; 1441 struct ucma_context *ctx; 1442 struct ucma_multicast *mc; 1443 struct sockaddr *addr; 1444 int ret; 1445 u8 join_state; 1446 1447 if (out_len < sizeof(resp)) 1448 return -ENOSPC; 1449 1450 addr = (struct sockaddr *) &cmd->addr; 1451 if (cmd->addr_size != rdma_addr_size(addr)) 1452 return -EINVAL; 1453 1454 if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) 1455 join_state = BIT(FULLMEMBER_JOIN); 1456 else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) 1457 join_state = BIT(SENDONLY_FULLMEMBER_JOIN); 1458 else 1459 return -EINVAL; 1460 1461 ctx = ucma_get_ctx_dev(file, cmd->id); 1462 if (IS_ERR(ctx)) 1463 return PTR_ERR(ctx); 1464 1465 mc = kzalloc(sizeof(*mc), GFP_KERNEL); 1466 if (!mc) { 1467 ret = -ENOMEM; 1468 goto err_put_ctx; 1469 } 1470 1471 mc->ctx = ctx; 1472 mc->join_state = join_state; 1473 mc->uid = cmd->uid; 1474 memcpy(&mc->addr, addr, cmd->addr_size); 1475 1476 xa_lock(&multicast_table); 1477 if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, 1478 GFP_KERNEL)) { 1479 ret = -ENOMEM; 1480 goto err_free_mc; 1481 } 1482 1483 list_add_tail(&mc->list, &ctx->mc_list); 1484 xa_unlock(&multicast_table); 1485 1486 mutex_lock(&ctx->mutex); 1487 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1488 join_state, mc); 1489 mutex_unlock(&ctx->mutex); 1490 if (ret) 1491 goto err_xa_erase; 1492 1493 resp.id = mc->id; 1494 if (copy_to_user(u64_to_user_ptr(cmd->response), 1495 &resp, sizeof(resp))) { 1496 ret = -EFAULT; 1497 goto err_leave_multicast; 1498 } 1499 1500 xa_store(&multicast_table, mc->id, mc, 0); 1501 1502 ucma_put_ctx(ctx); 1503 return 0; 1504 1505 err_leave_multicast: 1506 mutex_lock(&ctx->mutex); 1507 rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); 1508 mutex_unlock(&ctx->mutex); 1509 ucma_cleanup_mc_events(mc); 1510 err_xa_erase: 1511 xa_lock(&multicast_table); 1512 list_del(&mc->list); 1513 __xa_erase(&multicast_table, mc->id); 1514 err_free_mc: 1515 xa_unlock(&multicast_table); 1516 kfree(mc); 1517 err_put_ctx: 1518 ucma_put_ctx(ctx); 1519 return ret; 1520 } 1521 1522 static ssize_t ucma_join_ip_multicast(struct ucma_file *file, 1523 const char __user *inbuf, 1524 int in_len, int out_len) 1525 { 1526 struct rdma_ucm_join_ip_mcast cmd; 1527 struct rdma_ucm_join_mcast join_cmd; 1528 1529 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1530 return -EFAULT; 1531 1532 join_cmd.response = cmd.response; 1533 join_cmd.uid = cmd.uid; 1534 join_cmd.id = cmd.id; 1535 join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); 1536 if (!join_cmd.addr_size) 1537 return -EINVAL; 1538 1539 join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; 1540 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1541 1542 return ucma_process_join(file, &join_cmd, out_len); 1543 } 1544 1545 static ssize_t ucma_join_multicast(struct ucma_file *file, 1546 const char __user *inbuf, 1547 int in_len, int out_len) 1548 { 1549 struct rdma_ucm_join_mcast cmd; 1550 1551 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1552 return -EFAULT; 1553 1554 if (!rdma_addr_size_kss(&cmd.addr)) 1555 return -EINVAL; 1556 1557 return ucma_process_join(file, &cmd, out_len); 1558 } 1559 1560 static ssize_t ucma_leave_multicast(struct ucma_file *file, 1561 const char __user *inbuf, 1562 int in_len, int out_len) 1563 { 1564 struct rdma_ucm_destroy_id cmd; 1565 struct rdma_ucm_destroy_id_resp resp; 1566 struct ucma_multicast *mc; 1567 int ret = 0; 1568 1569 if (out_len < sizeof(resp)) 1570 return -ENOSPC; 1571 1572 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1573 return -EFAULT; 1574 1575 xa_lock(&multicast_table); 1576 mc = xa_load(&multicast_table, cmd.id); 1577 if (!mc) 1578 mc = ERR_PTR(-ENOENT); 1579 else if (READ_ONCE(mc->ctx->file) != file) 1580 mc = ERR_PTR(-EINVAL); 1581 else if (!refcount_inc_not_zero(&mc->ctx->ref)) 1582 mc = ERR_PTR(-ENXIO); 1583 1584 if (IS_ERR(mc)) { 1585 xa_unlock(&multicast_table); 1586 ret = PTR_ERR(mc); 1587 goto out; 1588 } 1589 1590 list_del(&mc->list); 1591 __xa_erase(&multicast_table, mc->id); 1592 xa_unlock(&multicast_table); 1593 1594 mutex_lock(&mc->ctx->mutex); 1595 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); 1596 mutex_unlock(&mc->ctx->mutex); 1597 1598 ucma_cleanup_mc_events(mc); 1599 1600 ucma_put_ctx(mc->ctx); 1601 resp.events_reported = mc->events_reported; 1602 kfree(mc); 1603 1604 if (copy_to_user(u64_to_user_ptr(cmd.response), 1605 &resp, sizeof(resp))) 1606 ret = -EFAULT; 1607 out: 1608 return ret; 1609 } 1610 1611 static ssize_t ucma_migrate_id(struct ucma_file *new_file, 1612 const char __user *inbuf, 1613 int in_len, int out_len) 1614 { 1615 struct rdma_ucm_migrate_id cmd; 1616 struct rdma_ucm_migrate_resp resp; 1617 struct ucma_event *uevent, *tmp; 1618 struct ucma_context *ctx; 1619 LIST_HEAD(event_list); 1620 struct ucma_file *cur_file; 1621 int ret = 0; 1622 1623 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1624 return -EFAULT; 1625 1626 /* Get current fd to protect against it being closed */ 1627 CLASS(fd, f)(cmd.fd); 1628 if (fd_empty(f)) 1629 return -ENOENT; 1630 if (fd_file(f)->f_op != &ucma_fops) 1631 return -EINVAL; 1632 cur_file = fd_file(f)->private_data; 1633 1634 /* Validate current fd and prevent destruction of id. */ 1635 ctx = ucma_get_ctx(cur_file, cmd.id); 1636 if (IS_ERR(ctx)) 1637 return PTR_ERR(ctx); 1638 1639 rdma_lock_handler(ctx->cm_id); 1640 /* 1641 * ctx->file can only be changed under the handler & xa_lock. xa_load() 1642 * must be checked again to ensure the ctx hasn't begun destruction 1643 * since the ucma_get_ctx(). 1644 */ 1645 xa_lock(&ctx_table); 1646 if (_ucma_find_context(cmd.id, cur_file) != ctx) { 1647 xa_unlock(&ctx_table); 1648 ret = -ENOENT; 1649 goto err_unlock; 1650 } 1651 ctx->file = new_file; 1652 xa_unlock(&ctx_table); 1653 1654 mutex_lock(&cur_file->mut); 1655 list_del(&ctx->list); 1656 /* 1657 * At this point lock_handler() prevents addition of new uevents for 1658 * this ctx. 1659 */ 1660 list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list) 1661 if (uevent->ctx == ctx) 1662 list_move_tail(&uevent->list, &event_list); 1663 resp.events_reported = ctx->events_reported; 1664 mutex_unlock(&cur_file->mut); 1665 1666 mutex_lock(&new_file->mut); 1667 list_add_tail(&ctx->list, &new_file->ctx_list); 1668 list_splice_tail(&event_list, &new_file->event_list); 1669 mutex_unlock(&new_file->mut); 1670 1671 if (copy_to_user(u64_to_user_ptr(cmd.response), 1672 &resp, sizeof(resp))) 1673 ret = -EFAULT; 1674 1675 err_unlock: 1676 rdma_unlock_handler(ctx->cm_id); 1677 ucma_put_ctx(ctx); 1678 return ret; 1679 } 1680 1681 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1682 const char __user *inbuf, 1683 int in_len, int out_len) = { 1684 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1685 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1686 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, 1687 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, 1688 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, 1689 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1690 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1691 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1692 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1693 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1694 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1695 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1696 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1697 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1698 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1699 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1700 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, 1701 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1702 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, 1703 [RDMA_USER_CM_CMD_QUERY] = ucma_query, 1704 [RDMA_USER_CM_CMD_BIND] = ucma_bind, 1705 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1706 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast 1707 }; 1708 1709 static ssize_t ucma_write(struct file *filp, const char __user *buf, 1710 size_t len, loff_t *pos) 1711 { 1712 struct ucma_file *file = filp->private_data; 1713 struct rdma_ucm_cmd_hdr hdr; 1714 ssize_t ret; 1715 1716 if (!ib_safe_file_access(filp)) { 1717 pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 1718 __func__, task_tgid_vnr(current), current->comm); 1719 return -EACCES; 1720 } 1721 1722 if (len < sizeof(hdr)) 1723 return -EINVAL; 1724 1725 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1726 return -EFAULT; 1727 1728 if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) 1729 return -EINVAL; 1730 hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); 1731 1732 if (hdr.in + sizeof(hdr) > len) 1733 return -EINVAL; 1734 1735 if (!ucma_cmd_table[hdr.cmd]) 1736 return -ENOSYS; 1737 1738 ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); 1739 if (!ret) 1740 ret = len; 1741 1742 return ret; 1743 } 1744 1745 static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait) 1746 { 1747 struct ucma_file *file = filp->private_data; 1748 __poll_t mask = 0; 1749 1750 poll_wait(filp, &file->poll_wait, wait); 1751 1752 if (!list_empty(&file->event_list)) 1753 mask = EPOLLIN | EPOLLRDNORM; 1754 1755 return mask; 1756 } 1757 1758 /* 1759 * ucma_open() does not need the BKL: 1760 * 1761 * - no global state is referred to; 1762 * - there is no ioctl method to race against; 1763 * - no further module initialization is required for open to work 1764 * after the device is registered. 1765 */ 1766 static int ucma_open(struct inode *inode, struct file *filp) 1767 { 1768 struct ucma_file *file; 1769 1770 file = kmalloc(sizeof *file, GFP_KERNEL); 1771 if (!file) 1772 return -ENOMEM; 1773 1774 INIT_LIST_HEAD(&file->event_list); 1775 INIT_LIST_HEAD(&file->ctx_list); 1776 init_waitqueue_head(&file->poll_wait); 1777 mutex_init(&file->mut); 1778 1779 filp->private_data = file; 1780 file->filp = filp; 1781 1782 return stream_open(inode, filp); 1783 } 1784 1785 static int ucma_close(struct inode *inode, struct file *filp) 1786 { 1787 struct ucma_file *file = filp->private_data; 1788 1789 /* 1790 * All paths that touch ctx_list or ctx_list starting from write() are 1791 * prevented by this being a FD release function. The list_add_tail() in 1792 * ucma_connect_event_handler() can run concurrently, however it only 1793 * adds to the list *after* a listening ID. By only reading the first of 1794 * the list, and relying on ucma_destroy_private_ctx() to block 1795 * ucma_connect_event_handler(), no additional locking is needed. 1796 */ 1797 while (!list_empty(&file->ctx_list)) { 1798 struct ucma_context *ctx = list_first_entry( 1799 &file->ctx_list, struct ucma_context, list); 1800 1801 WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 1802 GFP_KERNEL) != ctx); 1803 ucma_destroy_private_ctx(ctx); 1804 } 1805 kfree(file); 1806 return 0; 1807 } 1808 1809 static const struct file_operations ucma_fops = { 1810 .owner = THIS_MODULE, 1811 .open = ucma_open, 1812 .release = ucma_close, 1813 .write = ucma_write, 1814 .poll = ucma_poll, 1815 }; 1816 1817 static struct miscdevice ucma_misc = { 1818 .minor = MISC_DYNAMIC_MINOR, 1819 .name = "rdma_cm", 1820 .nodename = "infiniband/rdma_cm", 1821 .mode = 0666, 1822 .fops = &ucma_fops, 1823 }; 1824 1825 static int ucma_get_global_nl_info(struct ib_client_nl_info *res) 1826 { 1827 res->abi = RDMA_USER_CM_ABI_VERSION; 1828 res->cdev = ucma_misc.this_device; 1829 return 0; 1830 } 1831 1832 static struct ib_client rdma_cma_client = { 1833 .name = "rdma_cm", 1834 .get_global_nl_info = ucma_get_global_nl_info, 1835 }; 1836 MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); 1837 1838 static ssize_t abi_version_show(struct device *dev, 1839 struct device_attribute *attr, char *buf) 1840 { 1841 return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1842 } 1843 static DEVICE_ATTR_RO(abi_version); 1844 1845 static int __init ucma_init(void) 1846 { 1847 int ret; 1848 1849 ret = misc_register(&ucma_misc); 1850 if (ret) 1851 return ret; 1852 1853 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); 1854 if (ret) { 1855 pr_err("rdma_ucm: couldn't create abi_version attr\n"); 1856 goto err1; 1857 } 1858 1859 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1860 if (!ucma_ctl_table_hdr) { 1861 pr_err("rdma_ucm: couldn't register sysctl paths\n"); 1862 ret = -ENOMEM; 1863 goto err2; 1864 } 1865 1866 ret = ib_register_client(&rdma_cma_client); 1867 if (ret) 1868 goto err3; 1869 1870 return 0; 1871 err3: 1872 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1873 err2: 1874 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1875 err1: 1876 misc_deregister(&ucma_misc); 1877 return ret; 1878 } 1879 1880 static void __exit ucma_cleanup(void) 1881 { 1882 ib_unregister_client(&rdma_cma_client); 1883 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1884 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1885 misc_deregister(&ucma_misc); 1886 } 1887 1888 module_init(ucma_init); 1889 module_exit(ucma_cleanup); 1890