1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (C) 2018, LG Electronics. 5 * 6 * Author(s): Long Li <longli@microsoft.com>, 7 * Hyunchul Lee <hyc.lee@gmail.com> 8 */ 9 10 #define SUBMOD_NAME "smb_direct" 11 12 #include <linux/kthread.h> 13 #include <linux/list.h> 14 #include <linux/mempool.h> 15 #include <linux/highmem.h> 16 #include <linux/scatterlist.h> 17 #include <linux/string_choices.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/rdma_cm.h> 20 #include <rdma/rw.h> 21 22 #include "glob.h" 23 #include "connection.h" 24 #include "smb_common.h" 25 #include "../common/smb2status.h" 26 #include "transport_rdma.h" 27 28 #define SMB_DIRECT_PORT_IWARP 5445 29 #define SMB_DIRECT_PORT_INFINIBAND 445 30 31 #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100) 32 33 /* SMB_DIRECT negotiation timeout in seconds */ 34 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 120 35 36 #define SMB_DIRECT_MAX_SEND_SGES 6 37 #define SMB_DIRECT_MAX_RECV_SGES 1 38 39 /* 40 * Default maximum number of RDMA read/write outstanding on this connection 41 * This value is possibly decreased during QP creation on hardware limit 42 */ 43 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8 44 45 /* Maximum number of retries on data transfer operations */ 46 #define SMB_DIRECT_CM_RETRY 6 47 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ 48 #define SMB_DIRECT_CM_RNR_RETRY 0 49 50 /* 51 * User configurable initial values per SMB_DIRECT transport connection 52 * as defined in [MS-SMBD] 3.1.1.1 53 * Those may change after a SMB_DIRECT negotiation 54 */ 55 56 /* Set 445 port to SMB Direct port by default */ 57 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND; 58 59 /* The local peer's maximum number of credits to grant to the peer */ 60 static int smb_direct_receive_credit_max = 255; 61 62 /* The remote peer's credit request of local peer */ 63 static int smb_direct_send_credit_target = 255; 64 65 /* The maximum single message size can be sent to remote peer */ 66 static int smb_direct_max_send_size = 1364; 67 68 /* The maximum fragmented upper-layer payload receive size supported */ 69 static int smb_direct_max_fragmented_recv_size = 1024 * 1024; 70 71 /* The maximum single-message size which can be received */ 72 static int smb_direct_max_receive_size = 1364; 73 74 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; 75 76 static LIST_HEAD(smb_direct_device_list); 77 static DEFINE_RWLOCK(smb_direct_device_lock); 78 79 struct smb_direct_device { 80 struct ib_device *ib_dev; 81 struct list_head list; 82 }; 83 84 static struct smb_direct_listener { 85 struct rdma_cm_id *cm_id; 86 } smb_direct_listener; 87 88 static struct workqueue_struct *smb_direct_wq; 89 90 enum smb_direct_status { 91 SMB_DIRECT_CS_NEW = 0, 92 SMB_DIRECT_CS_CONNECTED, 93 SMB_DIRECT_CS_DISCONNECTING, 94 SMB_DIRECT_CS_DISCONNECTED, 95 }; 96 97 struct smb_direct_transport { 98 struct ksmbd_transport transport; 99 100 enum smb_direct_status status; 101 bool full_packet_received; 102 wait_queue_head_t wait_status; 103 104 struct rdma_cm_id *cm_id; 105 struct ib_cq *send_cq; 106 struct ib_cq *recv_cq; 107 struct ib_pd *pd; 108 struct ib_qp *qp; 109 110 int max_send_size; 111 int max_recv_size; 112 int max_fragmented_send_size; 113 int max_fragmented_recv_size; 114 int max_rdma_rw_size; 115 116 spinlock_t reassembly_queue_lock; 117 struct list_head reassembly_queue; 118 int reassembly_data_length; 119 int reassembly_queue_length; 120 int first_entry_offset; 121 wait_queue_head_t wait_reassembly_queue; 122 123 spinlock_t receive_credit_lock; 124 int recv_credits; 125 int count_avail_recvmsg; 126 int recv_credit_max; 127 int recv_credit_target; 128 129 spinlock_t recvmsg_queue_lock; 130 struct list_head recvmsg_queue; 131 132 spinlock_t empty_recvmsg_queue_lock; 133 struct list_head empty_recvmsg_queue; 134 135 int send_credit_target; 136 atomic_t send_credits; 137 spinlock_t lock_new_recv_credits; 138 int new_recv_credits; 139 int max_rw_credits; 140 int pages_per_rw_credit; 141 atomic_t rw_credits; 142 143 wait_queue_head_t wait_send_credits; 144 wait_queue_head_t wait_rw_credits; 145 146 mempool_t *sendmsg_mempool; 147 struct kmem_cache *sendmsg_cache; 148 mempool_t *recvmsg_mempool; 149 struct kmem_cache *recvmsg_cache; 150 151 wait_queue_head_t wait_send_pending; 152 atomic_t send_pending; 153 154 struct delayed_work post_recv_credits_work; 155 struct work_struct send_immediate_work; 156 struct work_struct disconnect_work; 157 158 bool negotiation_requested; 159 }; 160 161 #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) 162 163 enum { 164 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, 165 SMB_DIRECT_MSG_DATA_TRANSFER 166 }; 167 168 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; 169 170 struct smb_direct_send_ctx { 171 struct list_head msg_list; 172 int wr_cnt; 173 bool need_invalidate_rkey; 174 unsigned int remote_key; 175 }; 176 177 struct smb_direct_sendmsg { 178 struct smb_direct_transport *transport; 179 struct ib_send_wr wr; 180 struct list_head list; 181 int num_sge; 182 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES]; 183 struct ib_cqe cqe; 184 u8 packet[]; 185 }; 186 187 struct smb_direct_recvmsg { 188 struct smb_direct_transport *transport; 189 struct list_head list; 190 int type; 191 struct ib_sge sge; 192 struct ib_cqe cqe; 193 bool first_segment; 194 u8 packet[]; 195 }; 196 197 struct smb_direct_rdma_rw_msg { 198 struct smb_direct_transport *t; 199 struct ib_cqe cqe; 200 int status; 201 struct completion *completion; 202 struct list_head list; 203 struct rdma_rw_ctx rw_ctx; 204 struct sg_table sgt; 205 struct scatterlist sg_list[]; 206 }; 207 208 void init_smbd_max_io_size(unsigned int sz) 209 { 210 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); 211 smb_direct_max_read_write_size = sz; 212 } 213 214 unsigned int get_smbd_max_read_write_size(void) 215 { 216 return smb_direct_max_read_write_size; 217 } 218 219 static inline int get_buf_page_count(void *buf, int size) 220 { 221 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 222 (uintptr_t)buf / PAGE_SIZE; 223 } 224 225 static void smb_direct_destroy_pools(struct smb_direct_transport *transport); 226 static void smb_direct_post_recv_credits(struct work_struct *work); 227 static int smb_direct_post_send_data(struct smb_direct_transport *t, 228 struct smb_direct_send_ctx *send_ctx, 229 struct kvec *iov, int niov, 230 int remaining_data_length); 231 232 static inline struct smb_direct_transport * 233 smb_trans_direct_transfort(struct ksmbd_transport *t) 234 { 235 return container_of(t, struct smb_direct_transport, transport); 236 } 237 238 static inline void 239 *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg) 240 { 241 return (void *)recvmsg->packet; 242 } 243 244 static inline bool is_receive_credit_post_required(int receive_credits, 245 int avail_recvmsg_count) 246 { 247 return receive_credits <= (smb_direct_receive_credit_max >> 3) && 248 avail_recvmsg_count >= (receive_credits >> 2); 249 } 250 251 static struct 252 smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t) 253 { 254 struct smb_direct_recvmsg *recvmsg = NULL; 255 256 spin_lock(&t->recvmsg_queue_lock); 257 if (!list_empty(&t->recvmsg_queue)) { 258 recvmsg = list_first_entry(&t->recvmsg_queue, 259 struct smb_direct_recvmsg, 260 list); 261 list_del(&recvmsg->list); 262 } 263 spin_unlock(&t->recvmsg_queue_lock); 264 return recvmsg; 265 } 266 267 static void put_recvmsg(struct smb_direct_transport *t, 268 struct smb_direct_recvmsg *recvmsg) 269 { 270 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, 271 recvmsg->sge.length, DMA_FROM_DEVICE); 272 273 spin_lock(&t->recvmsg_queue_lock); 274 list_add(&recvmsg->list, &t->recvmsg_queue); 275 spin_unlock(&t->recvmsg_queue_lock); 276 } 277 278 static struct 279 smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t) 280 { 281 struct smb_direct_recvmsg *recvmsg = NULL; 282 283 spin_lock(&t->empty_recvmsg_queue_lock); 284 if (!list_empty(&t->empty_recvmsg_queue)) { 285 recvmsg = list_first_entry(&t->empty_recvmsg_queue, 286 struct smb_direct_recvmsg, list); 287 list_del(&recvmsg->list); 288 } 289 spin_unlock(&t->empty_recvmsg_queue_lock); 290 return recvmsg; 291 } 292 293 static void put_empty_recvmsg(struct smb_direct_transport *t, 294 struct smb_direct_recvmsg *recvmsg) 295 { 296 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, 297 recvmsg->sge.length, DMA_FROM_DEVICE); 298 299 spin_lock(&t->empty_recvmsg_queue_lock); 300 list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue); 301 spin_unlock(&t->empty_recvmsg_queue_lock); 302 } 303 304 static void enqueue_reassembly(struct smb_direct_transport *t, 305 struct smb_direct_recvmsg *recvmsg, 306 int data_length) 307 { 308 spin_lock(&t->reassembly_queue_lock); 309 list_add_tail(&recvmsg->list, &t->reassembly_queue); 310 t->reassembly_queue_length++; 311 /* 312 * Make sure reassembly_data_length is updated after list and 313 * reassembly_queue_length are updated. On the dequeue side 314 * reassembly_data_length is checked without a lock to determine 315 * if reassembly_queue_length and list is up to date 316 */ 317 virt_wmb(); 318 t->reassembly_data_length += data_length; 319 spin_unlock(&t->reassembly_queue_lock); 320 } 321 322 static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t) 323 { 324 if (!list_empty(&t->reassembly_queue)) 325 return list_first_entry(&t->reassembly_queue, 326 struct smb_direct_recvmsg, list); 327 else 328 return NULL; 329 } 330 331 static void smb_direct_disconnect_rdma_work(struct work_struct *work) 332 { 333 struct smb_direct_transport *t = 334 container_of(work, struct smb_direct_transport, 335 disconnect_work); 336 337 if (t->status == SMB_DIRECT_CS_CONNECTED) { 338 t->status = SMB_DIRECT_CS_DISCONNECTING; 339 rdma_disconnect(t->cm_id); 340 } 341 } 342 343 static void 344 smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t) 345 { 346 if (t->status == SMB_DIRECT_CS_CONNECTED) 347 queue_work(smb_direct_wq, &t->disconnect_work); 348 } 349 350 static void smb_direct_send_immediate_work(struct work_struct *work) 351 { 352 struct smb_direct_transport *t = container_of(work, 353 struct smb_direct_transport, send_immediate_work); 354 355 if (t->status != SMB_DIRECT_CS_CONNECTED) 356 return; 357 358 smb_direct_post_send_data(t, NULL, NULL, 0, 0); 359 } 360 361 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) 362 { 363 struct smb_direct_transport *t; 364 struct ksmbd_conn *conn; 365 366 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP); 367 if (!t) 368 return NULL; 369 370 t->cm_id = cm_id; 371 cm_id->context = t; 372 373 t->status = SMB_DIRECT_CS_NEW; 374 init_waitqueue_head(&t->wait_status); 375 376 spin_lock_init(&t->reassembly_queue_lock); 377 INIT_LIST_HEAD(&t->reassembly_queue); 378 t->reassembly_data_length = 0; 379 t->reassembly_queue_length = 0; 380 init_waitqueue_head(&t->wait_reassembly_queue); 381 init_waitqueue_head(&t->wait_send_credits); 382 init_waitqueue_head(&t->wait_rw_credits); 383 384 spin_lock_init(&t->receive_credit_lock); 385 spin_lock_init(&t->recvmsg_queue_lock); 386 INIT_LIST_HEAD(&t->recvmsg_queue); 387 388 spin_lock_init(&t->empty_recvmsg_queue_lock); 389 INIT_LIST_HEAD(&t->empty_recvmsg_queue); 390 391 init_waitqueue_head(&t->wait_send_pending); 392 atomic_set(&t->send_pending, 0); 393 394 spin_lock_init(&t->lock_new_recv_credits); 395 396 INIT_DELAYED_WORK(&t->post_recv_credits_work, 397 smb_direct_post_recv_credits); 398 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work); 399 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work); 400 401 conn = ksmbd_conn_alloc(); 402 if (!conn) 403 goto err; 404 conn->transport = KSMBD_TRANS(t); 405 KSMBD_TRANS(t)->conn = conn; 406 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; 407 return t; 408 err: 409 kfree(t); 410 return NULL; 411 } 412 413 static void free_transport(struct smb_direct_transport *t) 414 { 415 struct smb_direct_recvmsg *recvmsg; 416 417 wake_up_interruptible(&t->wait_send_credits); 418 419 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n"); 420 wait_event(t->wait_send_pending, 421 atomic_read(&t->send_pending) == 0); 422 423 cancel_work_sync(&t->disconnect_work); 424 cancel_delayed_work_sync(&t->post_recv_credits_work); 425 cancel_work_sync(&t->send_immediate_work); 426 427 if (t->qp) { 428 ib_drain_qp(t->qp); 429 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); 430 ib_destroy_qp(t->qp); 431 } 432 433 ksmbd_debug(RDMA, "drain the reassembly queue\n"); 434 do { 435 spin_lock(&t->reassembly_queue_lock); 436 recvmsg = get_first_reassembly(t); 437 if (recvmsg) { 438 list_del(&recvmsg->list); 439 spin_unlock(&t->reassembly_queue_lock); 440 put_recvmsg(t, recvmsg); 441 } else { 442 spin_unlock(&t->reassembly_queue_lock); 443 } 444 } while (recvmsg); 445 t->reassembly_data_length = 0; 446 447 if (t->send_cq) 448 ib_free_cq(t->send_cq); 449 if (t->recv_cq) 450 ib_free_cq(t->recv_cq); 451 if (t->pd) 452 ib_dealloc_pd(t->pd); 453 if (t->cm_id) 454 rdma_destroy_id(t->cm_id); 455 456 smb_direct_destroy_pools(t); 457 ksmbd_conn_free(KSMBD_TRANS(t)->conn); 458 kfree(t); 459 } 460 461 static struct smb_direct_sendmsg 462 *smb_direct_alloc_sendmsg(struct smb_direct_transport *t) 463 { 464 struct smb_direct_sendmsg *msg; 465 466 msg = mempool_alloc(t->sendmsg_mempool, KSMBD_DEFAULT_GFP); 467 if (!msg) 468 return ERR_PTR(-ENOMEM); 469 msg->transport = t; 470 INIT_LIST_HEAD(&msg->list); 471 msg->num_sge = 0; 472 return msg; 473 } 474 475 static void smb_direct_free_sendmsg(struct smb_direct_transport *t, 476 struct smb_direct_sendmsg *msg) 477 { 478 int i; 479 480 if (msg->num_sge > 0) { 481 ib_dma_unmap_single(t->cm_id->device, 482 msg->sge[0].addr, msg->sge[0].length, 483 DMA_TO_DEVICE); 484 for (i = 1; i < msg->num_sge; i++) 485 ib_dma_unmap_page(t->cm_id->device, 486 msg->sge[i].addr, msg->sge[i].length, 487 DMA_TO_DEVICE); 488 } 489 mempool_free(msg, t->sendmsg_mempool); 490 } 491 492 static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg) 493 { 494 switch (recvmsg->type) { 495 case SMB_DIRECT_MSG_DATA_TRANSFER: { 496 struct smb_direct_data_transfer *req = 497 (struct smb_direct_data_transfer *)recvmsg->packet; 498 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet 499 + le32_to_cpu(req->data_offset)); 500 ksmbd_debug(RDMA, 501 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", 502 le16_to_cpu(req->credits_granted), 503 le16_to_cpu(req->credits_requested), 504 req->data_length, req->remaining_data_length, 505 hdr->ProtocolId, hdr->Command); 506 break; 507 } 508 case SMB_DIRECT_MSG_NEGOTIATE_REQ: { 509 struct smb_direct_negotiate_req *req = 510 (struct smb_direct_negotiate_req *)recvmsg->packet; 511 ksmbd_debug(RDMA, 512 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", 513 le16_to_cpu(req->min_version), 514 le16_to_cpu(req->max_version), 515 le16_to_cpu(req->credits_requested), 516 le32_to_cpu(req->preferred_send_size), 517 le32_to_cpu(req->max_receive_size), 518 le32_to_cpu(req->max_fragmented_size)); 519 if (le16_to_cpu(req->min_version) > 0x0100 || 520 le16_to_cpu(req->max_version) < 0x0100) 521 return -EOPNOTSUPP; 522 if (le16_to_cpu(req->credits_requested) <= 0 || 523 le32_to_cpu(req->max_receive_size) <= 128 || 524 le32_to_cpu(req->max_fragmented_size) <= 525 128 * 1024) 526 return -ECONNABORTED; 527 528 break; 529 } 530 default: 531 return -EINVAL; 532 } 533 return 0; 534 } 535 536 static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 537 { 538 struct smb_direct_recvmsg *recvmsg; 539 struct smb_direct_transport *t; 540 541 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe); 542 t = recvmsg->transport; 543 544 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 545 if (wc->status != IB_WC_WR_FLUSH_ERR) { 546 pr_err("Recv error. status='%s (%d)' opcode=%d\n", 547 ib_wc_status_msg(wc->status), wc->status, 548 wc->opcode); 549 smb_direct_disconnect_rdma_connection(t); 550 } 551 put_empty_recvmsg(t, recvmsg); 552 return; 553 } 554 555 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", 556 ib_wc_status_msg(wc->status), wc->status, 557 wc->opcode); 558 559 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, 560 recvmsg->sge.length, DMA_FROM_DEVICE); 561 562 switch (recvmsg->type) { 563 case SMB_DIRECT_MSG_NEGOTIATE_REQ: 564 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { 565 put_empty_recvmsg(t, recvmsg); 566 return; 567 } 568 t->negotiation_requested = true; 569 t->full_packet_received = true; 570 t->status = SMB_DIRECT_CS_CONNECTED; 571 enqueue_reassembly(t, recvmsg, 0); 572 wake_up_interruptible(&t->wait_status); 573 break; 574 case SMB_DIRECT_MSG_DATA_TRANSFER: { 575 struct smb_direct_data_transfer *data_transfer = 576 (struct smb_direct_data_transfer *)recvmsg->packet; 577 unsigned int data_length; 578 int avail_recvmsg_count, receive_credits; 579 580 if (wc->byte_len < 581 offsetof(struct smb_direct_data_transfer, padding)) { 582 put_empty_recvmsg(t, recvmsg); 583 return; 584 } 585 586 data_length = le32_to_cpu(data_transfer->data_length); 587 if (data_length) { 588 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + 589 (u64)data_length) { 590 put_empty_recvmsg(t, recvmsg); 591 return; 592 } 593 594 if (t->full_packet_received) 595 recvmsg->first_segment = true; 596 597 if (le32_to_cpu(data_transfer->remaining_data_length)) 598 t->full_packet_received = false; 599 else 600 t->full_packet_received = true; 601 602 enqueue_reassembly(t, recvmsg, (int)data_length); 603 wake_up_interruptible(&t->wait_reassembly_queue); 604 605 spin_lock(&t->receive_credit_lock); 606 receive_credits = --(t->recv_credits); 607 avail_recvmsg_count = t->count_avail_recvmsg; 608 spin_unlock(&t->receive_credit_lock); 609 } else { 610 put_empty_recvmsg(t, recvmsg); 611 612 spin_lock(&t->receive_credit_lock); 613 receive_credits = --(t->recv_credits); 614 avail_recvmsg_count = ++(t->count_avail_recvmsg); 615 spin_unlock(&t->receive_credit_lock); 616 } 617 618 t->recv_credit_target = 619 le16_to_cpu(data_transfer->credits_requested); 620 atomic_add(le16_to_cpu(data_transfer->credits_granted), 621 &t->send_credits); 622 623 if (le16_to_cpu(data_transfer->flags) & 624 SMB_DIRECT_RESPONSE_REQUESTED) 625 queue_work(smb_direct_wq, &t->send_immediate_work); 626 627 if (atomic_read(&t->send_credits) > 0) 628 wake_up_interruptible(&t->wait_send_credits); 629 630 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) 631 mod_delayed_work(smb_direct_wq, 632 &t->post_recv_credits_work, 0); 633 break; 634 } 635 default: 636 break; 637 } 638 } 639 640 static int smb_direct_post_recv(struct smb_direct_transport *t, 641 struct smb_direct_recvmsg *recvmsg) 642 { 643 struct ib_recv_wr wr; 644 int ret; 645 646 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device, 647 recvmsg->packet, t->max_recv_size, 648 DMA_FROM_DEVICE); 649 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr); 650 if (ret) 651 return ret; 652 recvmsg->sge.length = t->max_recv_size; 653 recvmsg->sge.lkey = t->pd->local_dma_lkey; 654 recvmsg->cqe.done = recv_done; 655 656 wr.wr_cqe = &recvmsg->cqe; 657 wr.next = NULL; 658 wr.sg_list = &recvmsg->sge; 659 wr.num_sge = 1; 660 661 ret = ib_post_recv(t->qp, &wr, NULL); 662 if (ret) { 663 pr_err("Can't post recv: %d\n", ret); 664 ib_dma_unmap_single(t->cm_id->device, 665 recvmsg->sge.addr, recvmsg->sge.length, 666 DMA_FROM_DEVICE); 667 smb_direct_disconnect_rdma_connection(t); 668 return ret; 669 } 670 return ret; 671 } 672 673 static int smb_direct_read(struct ksmbd_transport *t, char *buf, 674 unsigned int size, int unused) 675 { 676 struct smb_direct_recvmsg *recvmsg; 677 struct smb_direct_data_transfer *data_transfer; 678 int to_copy, to_read, data_read, offset; 679 u32 data_length, remaining_data_length, data_offset; 680 int rc; 681 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 682 683 again: 684 if (st->status != SMB_DIRECT_CS_CONNECTED) { 685 pr_err("disconnected\n"); 686 return -ENOTCONN; 687 } 688 689 /* 690 * No need to hold the reassembly queue lock all the time as we are 691 * the only one reading from the front of the queue. The transport 692 * may add more entries to the back of the queue at the same time 693 */ 694 if (st->reassembly_data_length >= size) { 695 int queue_length; 696 int queue_removed = 0; 697 698 /* 699 * Need to make sure reassembly_data_length is read before 700 * reading reassembly_queue_length and calling 701 * get_first_reassembly. This call is lock free 702 * as we never read at the end of the queue which are being 703 * updated in SOFTIRQ as more data is received 704 */ 705 virt_rmb(); 706 queue_length = st->reassembly_queue_length; 707 data_read = 0; 708 to_read = size; 709 offset = st->first_entry_offset; 710 while (data_read < size) { 711 recvmsg = get_first_reassembly(st); 712 data_transfer = smb_direct_recvmsg_payload(recvmsg); 713 data_length = le32_to_cpu(data_transfer->data_length); 714 remaining_data_length = 715 le32_to_cpu(data_transfer->remaining_data_length); 716 data_offset = le32_to_cpu(data_transfer->data_offset); 717 718 /* 719 * The upper layer expects RFC1002 length at the 720 * beginning of the payload. Return it to indicate 721 * the total length of the packet. This minimize the 722 * change to upper layer packet processing logic. This 723 * will be eventually remove when an intermediate 724 * transport layer is added 725 */ 726 if (recvmsg->first_segment && size == 4) { 727 unsigned int rfc1002_len = 728 data_length + remaining_data_length; 729 *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 730 data_read = 4; 731 recvmsg->first_segment = false; 732 ksmbd_debug(RDMA, 733 "returning rfc1002 length %d\n", 734 rfc1002_len); 735 goto read_rfc1002_done; 736 } 737 738 to_copy = min_t(int, data_length - offset, to_read); 739 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, 740 to_copy); 741 742 /* move on to the next buffer? */ 743 if (to_copy == data_length - offset) { 744 queue_length--; 745 /* 746 * No need to lock if we are not at the 747 * end of the queue 748 */ 749 if (queue_length) { 750 list_del(&recvmsg->list); 751 } else { 752 spin_lock_irq(&st->reassembly_queue_lock); 753 list_del(&recvmsg->list); 754 spin_unlock_irq(&st->reassembly_queue_lock); 755 } 756 queue_removed++; 757 put_recvmsg(st, recvmsg); 758 offset = 0; 759 } else { 760 offset += to_copy; 761 } 762 763 to_read -= to_copy; 764 data_read += to_copy; 765 } 766 767 spin_lock_irq(&st->reassembly_queue_lock); 768 st->reassembly_data_length -= data_read; 769 st->reassembly_queue_length -= queue_removed; 770 spin_unlock_irq(&st->reassembly_queue_lock); 771 772 spin_lock(&st->receive_credit_lock); 773 st->count_avail_recvmsg += queue_removed; 774 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) { 775 spin_unlock(&st->receive_credit_lock); 776 mod_delayed_work(smb_direct_wq, 777 &st->post_recv_credits_work, 0); 778 } else { 779 spin_unlock(&st->receive_credit_lock); 780 } 781 782 st->first_entry_offset = offset; 783 ksmbd_debug(RDMA, 784 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 785 data_read, st->reassembly_data_length, 786 st->first_entry_offset); 787 read_rfc1002_done: 788 return data_read; 789 } 790 791 ksmbd_debug(RDMA, "wait_event on more data\n"); 792 rc = wait_event_interruptible(st->wait_reassembly_queue, 793 st->reassembly_data_length >= size || 794 st->status != SMB_DIRECT_CS_CONNECTED); 795 if (rc) 796 return -EINTR; 797 798 goto again; 799 } 800 801 static void smb_direct_post_recv_credits(struct work_struct *work) 802 { 803 struct smb_direct_transport *t = container_of(work, 804 struct smb_direct_transport, post_recv_credits_work.work); 805 struct smb_direct_recvmsg *recvmsg; 806 int receive_credits, credits = 0; 807 int ret; 808 int use_free = 1; 809 810 spin_lock(&t->receive_credit_lock); 811 receive_credits = t->recv_credits; 812 spin_unlock(&t->receive_credit_lock); 813 814 if (receive_credits < t->recv_credit_target) { 815 while (true) { 816 if (use_free) 817 recvmsg = get_free_recvmsg(t); 818 else 819 recvmsg = get_empty_recvmsg(t); 820 if (!recvmsg) { 821 if (use_free) { 822 use_free = 0; 823 continue; 824 } else { 825 break; 826 } 827 } 828 829 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER; 830 recvmsg->first_segment = false; 831 832 ret = smb_direct_post_recv(t, recvmsg); 833 if (ret) { 834 pr_err("Can't post recv: %d\n", ret); 835 put_recvmsg(t, recvmsg); 836 break; 837 } 838 credits++; 839 } 840 } 841 842 spin_lock(&t->receive_credit_lock); 843 t->recv_credits += credits; 844 t->count_avail_recvmsg -= credits; 845 spin_unlock(&t->receive_credit_lock); 846 847 spin_lock(&t->lock_new_recv_credits); 848 t->new_recv_credits += credits; 849 spin_unlock(&t->lock_new_recv_credits); 850 851 if (credits) 852 queue_work(smb_direct_wq, &t->send_immediate_work); 853 } 854 855 static void send_done(struct ib_cq *cq, struct ib_wc *wc) 856 { 857 struct smb_direct_sendmsg *sendmsg, *sibling; 858 struct smb_direct_transport *t; 859 struct list_head *pos, *prev, *end; 860 861 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe); 862 t = sendmsg->transport; 863 864 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", 865 ib_wc_status_msg(wc->status), wc->status, 866 wc->opcode); 867 868 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 869 pr_err("Send error. status='%s (%d)', opcode=%d\n", 870 ib_wc_status_msg(wc->status), wc->status, 871 wc->opcode); 872 smb_direct_disconnect_rdma_connection(t); 873 } 874 875 if (atomic_dec_and_test(&t->send_pending)) 876 wake_up(&t->wait_send_pending); 877 878 /* iterate and free the list of messages in reverse. the list's head 879 * is invalid. 880 */ 881 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next; 882 prev != end; pos = prev, prev = prev->prev) { 883 sibling = container_of(pos, struct smb_direct_sendmsg, list); 884 smb_direct_free_sendmsg(t, sibling); 885 } 886 887 sibling = container_of(pos, struct smb_direct_sendmsg, list); 888 smb_direct_free_sendmsg(t, sibling); 889 } 890 891 static int manage_credits_prior_sending(struct smb_direct_transport *t) 892 { 893 int new_credits; 894 895 spin_lock(&t->lock_new_recv_credits); 896 new_credits = t->new_recv_credits; 897 t->new_recv_credits = 0; 898 spin_unlock(&t->lock_new_recv_credits); 899 900 return new_credits; 901 } 902 903 static int smb_direct_post_send(struct smb_direct_transport *t, 904 struct ib_send_wr *wr) 905 { 906 int ret; 907 908 atomic_inc(&t->send_pending); 909 ret = ib_post_send(t->qp, wr, NULL); 910 if (ret) { 911 pr_err("failed to post send: %d\n", ret); 912 if (atomic_dec_and_test(&t->send_pending)) 913 wake_up(&t->wait_send_pending); 914 smb_direct_disconnect_rdma_connection(t); 915 } 916 return ret; 917 } 918 919 static void smb_direct_send_ctx_init(struct smb_direct_transport *t, 920 struct smb_direct_send_ctx *send_ctx, 921 bool need_invalidate_rkey, 922 unsigned int remote_key) 923 { 924 INIT_LIST_HEAD(&send_ctx->msg_list); 925 send_ctx->wr_cnt = 0; 926 send_ctx->need_invalidate_rkey = need_invalidate_rkey; 927 send_ctx->remote_key = remote_key; 928 } 929 930 static int smb_direct_flush_send_list(struct smb_direct_transport *t, 931 struct smb_direct_send_ctx *send_ctx, 932 bool is_last) 933 { 934 struct smb_direct_sendmsg *first, *last; 935 int ret; 936 937 if (list_empty(&send_ctx->msg_list)) 938 return 0; 939 940 first = list_first_entry(&send_ctx->msg_list, 941 struct smb_direct_sendmsg, 942 list); 943 last = list_last_entry(&send_ctx->msg_list, 944 struct smb_direct_sendmsg, 945 list); 946 947 last->wr.send_flags = IB_SEND_SIGNALED; 948 last->wr.wr_cqe = &last->cqe; 949 if (is_last && send_ctx->need_invalidate_rkey) { 950 last->wr.opcode = IB_WR_SEND_WITH_INV; 951 last->wr.ex.invalidate_rkey = send_ctx->remote_key; 952 } 953 954 ret = smb_direct_post_send(t, &first->wr); 955 if (!ret) { 956 smb_direct_send_ctx_init(t, send_ctx, 957 send_ctx->need_invalidate_rkey, 958 send_ctx->remote_key); 959 } else { 960 atomic_add(send_ctx->wr_cnt, &t->send_credits); 961 wake_up(&t->wait_send_credits); 962 list_for_each_entry_safe(first, last, &send_ctx->msg_list, 963 list) { 964 smb_direct_free_sendmsg(t, first); 965 } 966 } 967 return ret; 968 } 969 970 static int wait_for_credits(struct smb_direct_transport *t, 971 wait_queue_head_t *waitq, atomic_t *total_credits, 972 int needed) 973 { 974 int ret; 975 976 do { 977 if (atomic_sub_return(needed, total_credits) >= 0) 978 return 0; 979 980 atomic_add(needed, total_credits); 981 ret = wait_event_interruptible(*waitq, 982 atomic_read(total_credits) >= needed || 983 t->status != SMB_DIRECT_CS_CONNECTED); 984 985 if (t->status != SMB_DIRECT_CS_CONNECTED) 986 return -ENOTCONN; 987 else if (ret < 0) 988 return ret; 989 } while (true); 990 } 991 992 static int wait_for_send_credits(struct smb_direct_transport *t, 993 struct smb_direct_send_ctx *send_ctx) 994 { 995 int ret; 996 997 if (send_ctx && 998 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) { 999 ret = smb_direct_flush_send_list(t, send_ctx, false); 1000 if (ret) 1001 return ret; 1002 } 1003 1004 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1); 1005 } 1006 1007 static int wait_for_rw_credits(struct smb_direct_transport *t, int credits) 1008 { 1009 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits); 1010 } 1011 1012 static int calc_rw_credits(struct smb_direct_transport *t, 1013 char *buf, unsigned int len) 1014 { 1015 return DIV_ROUND_UP(get_buf_page_count(buf, len), 1016 t->pages_per_rw_credit); 1017 } 1018 1019 static int smb_direct_create_header(struct smb_direct_transport *t, 1020 int size, int remaining_data_length, 1021 struct smb_direct_sendmsg **sendmsg_out) 1022 { 1023 struct smb_direct_sendmsg *sendmsg; 1024 struct smb_direct_data_transfer *packet; 1025 int header_length; 1026 int ret; 1027 1028 sendmsg = smb_direct_alloc_sendmsg(t); 1029 if (IS_ERR(sendmsg)) 1030 return PTR_ERR(sendmsg); 1031 1032 /* Fill in the packet header */ 1033 packet = (struct smb_direct_data_transfer *)sendmsg->packet; 1034 packet->credits_requested = cpu_to_le16(t->send_credit_target); 1035 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1036 1037 packet->flags = 0; 1038 packet->reserved = 0; 1039 if (!size) 1040 packet->data_offset = 0; 1041 else 1042 packet->data_offset = cpu_to_le32(24); 1043 packet->data_length = cpu_to_le32(size); 1044 packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1045 packet->padding = 0; 1046 1047 ksmbd_debug(RDMA, 1048 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1049 le16_to_cpu(packet->credits_requested), 1050 le16_to_cpu(packet->credits_granted), 1051 le32_to_cpu(packet->data_offset), 1052 le32_to_cpu(packet->data_length), 1053 le32_to_cpu(packet->remaining_data_length)); 1054 1055 /* Map the packet to DMA */ 1056 header_length = sizeof(struct smb_direct_data_transfer); 1057 /* If this is a packet without payload, don't send padding */ 1058 if (!size) 1059 header_length = 1060 offsetof(struct smb_direct_data_transfer, padding); 1061 1062 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1063 (void *)packet, 1064 header_length, 1065 DMA_TO_DEVICE); 1066 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1067 if (ret) { 1068 smb_direct_free_sendmsg(t, sendmsg); 1069 return ret; 1070 } 1071 1072 sendmsg->num_sge = 1; 1073 sendmsg->sge[0].length = header_length; 1074 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1075 1076 *sendmsg_out = sendmsg; 1077 return 0; 1078 } 1079 1080 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) 1081 { 1082 bool high = is_vmalloc_addr(buf); 1083 struct page *page; 1084 int offset, len; 1085 int i = 0; 1086 1087 if (size <= 0 || nentries < get_buf_page_count(buf, size)) 1088 return -EINVAL; 1089 1090 offset = offset_in_page(buf); 1091 buf -= offset; 1092 while (size > 0) { 1093 len = min_t(int, PAGE_SIZE - offset, size); 1094 if (high) 1095 page = vmalloc_to_page(buf); 1096 else 1097 page = kmap_to_page(buf); 1098 1099 if (!sg_list) 1100 return -EINVAL; 1101 sg_set_page(sg_list, page, len, offset); 1102 sg_list = sg_next(sg_list); 1103 1104 buf += PAGE_SIZE; 1105 size -= len; 1106 offset = 0; 1107 i++; 1108 } 1109 return i; 1110 } 1111 1112 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, 1113 struct scatterlist *sg_list, int nentries, 1114 enum dma_data_direction dir) 1115 { 1116 int npages; 1117 1118 npages = get_sg_list(buf, size, sg_list, nentries); 1119 if (npages < 0) 1120 return -EINVAL; 1121 return ib_dma_map_sg(device, sg_list, npages, dir); 1122 } 1123 1124 static int post_sendmsg(struct smb_direct_transport *t, 1125 struct smb_direct_send_ctx *send_ctx, 1126 struct smb_direct_sendmsg *msg) 1127 { 1128 int i; 1129 1130 for (i = 0; i < msg->num_sge; i++) 1131 ib_dma_sync_single_for_device(t->cm_id->device, 1132 msg->sge[i].addr, msg->sge[i].length, 1133 DMA_TO_DEVICE); 1134 1135 msg->cqe.done = send_done; 1136 msg->wr.opcode = IB_WR_SEND; 1137 msg->wr.sg_list = &msg->sge[0]; 1138 msg->wr.num_sge = msg->num_sge; 1139 msg->wr.next = NULL; 1140 1141 if (send_ctx) { 1142 msg->wr.wr_cqe = NULL; 1143 msg->wr.send_flags = 0; 1144 if (!list_empty(&send_ctx->msg_list)) { 1145 struct smb_direct_sendmsg *last; 1146 1147 last = list_last_entry(&send_ctx->msg_list, 1148 struct smb_direct_sendmsg, 1149 list); 1150 last->wr.next = &msg->wr; 1151 } 1152 list_add_tail(&msg->list, &send_ctx->msg_list); 1153 send_ctx->wr_cnt++; 1154 return 0; 1155 } 1156 1157 msg->wr.wr_cqe = &msg->cqe; 1158 msg->wr.send_flags = IB_SEND_SIGNALED; 1159 return smb_direct_post_send(t, &msg->wr); 1160 } 1161 1162 static int smb_direct_post_send_data(struct smb_direct_transport *t, 1163 struct smb_direct_send_ctx *send_ctx, 1164 struct kvec *iov, int niov, 1165 int remaining_data_length) 1166 { 1167 int i, j, ret; 1168 struct smb_direct_sendmsg *msg; 1169 int data_length; 1170 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1]; 1171 1172 ret = wait_for_send_credits(t, send_ctx); 1173 if (ret) 1174 return ret; 1175 1176 data_length = 0; 1177 for (i = 0; i < niov; i++) 1178 data_length += iov[i].iov_len; 1179 1180 ret = smb_direct_create_header(t, data_length, remaining_data_length, 1181 &msg); 1182 if (ret) { 1183 atomic_inc(&t->send_credits); 1184 return ret; 1185 } 1186 1187 for (i = 0; i < niov; i++) { 1188 struct ib_sge *sge; 1189 int sg_cnt; 1190 1191 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1); 1192 sg_cnt = get_mapped_sg_list(t->cm_id->device, 1193 iov[i].iov_base, iov[i].iov_len, 1194 sg, SMB_DIRECT_MAX_SEND_SGES - 1, 1195 DMA_TO_DEVICE); 1196 if (sg_cnt <= 0) { 1197 pr_err("failed to map buffer\n"); 1198 ret = -ENOMEM; 1199 goto err; 1200 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) { 1201 pr_err("buffer not fitted into sges\n"); 1202 ret = -E2BIG; 1203 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt, 1204 DMA_TO_DEVICE); 1205 goto err; 1206 } 1207 1208 for (j = 0; j < sg_cnt; j++) { 1209 sge = &msg->sge[msg->num_sge]; 1210 sge->addr = sg_dma_address(&sg[j]); 1211 sge->length = sg_dma_len(&sg[j]); 1212 sge->lkey = t->pd->local_dma_lkey; 1213 msg->num_sge++; 1214 } 1215 } 1216 1217 ret = post_sendmsg(t, send_ctx, msg); 1218 if (ret) 1219 goto err; 1220 return 0; 1221 err: 1222 smb_direct_free_sendmsg(t, msg); 1223 atomic_inc(&t->send_credits); 1224 return ret; 1225 } 1226 1227 static int smb_direct_writev(struct ksmbd_transport *t, 1228 struct kvec *iov, int niovs, int buflen, 1229 bool need_invalidate, unsigned int remote_key) 1230 { 1231 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1232 int remaining_data_length; 1233 int start, i, j; 1234 int max_iov_size = st->max_send_size - 1235 sizeof(struct smb_direct_data_transfer); 1236 int ret; 1237 struct kvec vec; 1238 struct smb_direct_send_ctx send_ctx; 1239 1240 if (st->status != SMB_DIRECT_CS_CONNECTED) 1241 return -ENOTCONN; 1242 1243 //FIXME: skip RFC1002 header.. 1244 buflen -= 4; 1245 1246 remaining_data_length = buflen; 1247 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); 1248 1249 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key); 1250 start = i = 1; 1251 buflen = 0; 1252 while (true) { 1253 buflen += iov[i].iov_len; 1254 if (buflen > max_iov_size) { 1255 if (i > start) { 1256 remaining_data_length -= 1257 (buflen - iov[i].iov_len); 1258 ret = smb_direct_post_send_data(st, &send_ctx, 1259 &iov[start], i - start, 1260 remaining_data_length); 1261 if (ret) 1262 goto done; 1263 } else { 1264 /* iov[start] is too big, break it */ 1265 int nvec = (buflen + max_iov_size - 1) / 1266 max_iov_size; 1267 1268 for (j = 0; j < nvec; j++) { 1269 vec.iov_base = 1270 (char *)iov[start].iov_base + 1271 j * max_iov_size; 1272 vec.iov_len = 1273 min_t(int, max_iov_size, 1274 buflen - max_iov_size * j); 1275 remaining_data_length -= vec.iov_len; 1276 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1, 1277 remaining_data_length); 1278 if (ret) 1279 goto done; 1280 } 1281 i++; 1282 if (i == niovs) 1283 break; 1284 } 1285 start = i; 1286 buflen = 0; 1287 } else { 1288 i++; 1289 if (i == niovs) { 1290 /* send out all remaining vecs */ 1291 remaining_data_length -= buflen; 1292 ret = smb_direct_post_send_data(st, &send_ctx, 1293 &iov[start], i - start, 1294 remaining_data_length); 1295 if (ret) 1296 goto done; 1297 break; 1298 } 1299 } 1300 } 1301 1302 done: 1303 ret = smb_direct_flush_send_list(st, &send_ctx, true); 1304 1305 /* 1306 * As an optimization, we don't wait for individual I/O to finish 1307 * before sending the next one. 1308 * Send them all and wait for pending send count to get to 0 1309 * that means all the I/Os have been out and we are good to return 1310 */ 1311 1312 wait_event(st->wait_send_pending, 1313 atomic_read(&st->send_pending) == 0); 1314 return ret; 1315 } 1316 1317 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, 1318 struct smb_direct_rdma_rw_msg *msg, 1319 enum dma_data_direction dir) 1320 { 1321 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, 1322 msg->sgt.sgl, msg->sgt.nents, dir); 1323 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1324 kfree(msg); 1325 } 1326 1327 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, 1328 enum dma_data_direction dir) 1329 { 1330 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe, 1331 struct smb_direct_rdma_rw_msg, cqe); 1332 struct smb_direct_transport *t = msg->t; 1333 1334 if (wc->status != IB_WC_SUCCESS) { 1335 msg->status = -EIO; 1336 pr_err("read/write error. opcode = %d, status = %s(%d)\n", 1337 wc->opcode, ib_wc_status_msg(wc->status), wc->status); 1338 if (wc->status != IB_WC_WR_FLUSH_ERR) 1339 smb_direct_disconnect_rdma_connection(t); 1340 } 1341 1342 complete(msg->completion); 1343 } 1344 1345 static void read_done(struct ib_cq *cq, struct ib_wc *wc) 1346 { 1347 read_write_done(cq, wc, DMA_FROM_DEVICE); 1348 } 1349 1350 static void write_done(struct ib_cq *cq, struct ib_wc *wc) 1351 { 1352 read_write_done(cq, wc, DMA_TO_DEVICE); 1353 } 1354 1355 static int smb_direct_rdma_xmit(struct smb_direct_transport *t, 1356 void *buf, int buf_len, 1357 struct smb2_buffer_desc_v1 *desc, 1358 unsigned int desc_len, 1359 bool is_read) 1360 { 1361 struct smb_direct_rdma_rw_msg *msg, *next_msg; 1362 int i, ret; 1363 DECLARE_COMPLETION_ONSTACK(completion); 1364 struct ib_send_wr *first_wr; 1365 LIST_HEAD(msg_list); 1366 char *desc_buf; 1367 int credits_needed; 1368 unsigned int desc_buf_len, desc_num = 0; 1369 1370 if (t->status != SMB_DIRECT_CS_CONNECTED) 1371 return -ENOTCONN; 1372 1373 if (buf_len > t->max_rdma_rw_size) 1374 return -EINVAL; 1375 1376 /* calculate needed credits */ 1377 credits_needed = 0; 1378 desc_buf = buf; 1379 for (i = 0; i < desc_len / sizeof(*desc); i++) { 1380 if (!buf_len) 1381 break; 1382 1383 desc_buf_len = le32_to_cpu(desc[i].length); 1384 if (!desc_buf_len) 1385 return -EINVAL; 1386 1387 if (desc_buf_len > buf_len) { 1388 desc_buf_len = buf_len; 1389 desc[i].length = cpu_to_le32(desc_buf_len); 1390 buf_len = 0; 1391 } 1392 1393 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len); 1394 desc_buf += desc_buf_len; 1395 buf_len -= desc_buf_len; 1396 desc_num++; 1397 } 1398 1399 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", 1400 str_read_write(is_read), buf_len, credits_needed); 1401 1402 ret = wait_for_rw_credits(t, credits_needed); 1403 if (ret < 0) 1404 return ret; 1405 1406 /* build rdma_rw_ctx for each descriptor */ 1407 desc_buf = buf; 1408 for (i = 0; i < desc_num; i++) { 1409 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE), 1410 KSMBD_DEFAULT_GFP); 1411 if (!msg) { 1412 ret = -ENOMEM; 1413 goto out; 1414 } 1415 1416 desc_buf_len = le32_to_cpu(desc[i].length); 1417 1418 msg->t = t; 1419 msg->cqe.done = is_read ? read_done : write_done; 1420 msg->completion = &completion; 1421 1422 msg->sgt.sgl = &msg->sg_list[0]; 1423 ret = sg_alloc_table_chained(&msg->sgt, 1424 get_buf_page_count(desc_buf, desc_buf_len), 1425 msg->sg_list, SG_CHUNK_SIZE); 1426 if (ret) { 1427 kfree(msg); 1428 ret = -ENOMEM; 1429 goto out; 1430 } 1431 1432 ret = get_sg_list(desc_buf, desc_buf_len, 1433 msg->sgt.sgl, msg->sgt.orig_nents); 1434 if (ret < 0) { 1435 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1436 kfree(msg); 1437 goto out; 1438 } 1439 1440 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, 1441 msg->sgt.sgl, 1442 get_buf_page_count(desc_buf, desc_buf_len), 1443 0, 1444 le64_to_cpu(desc[i].offset), 1445 le32_to_cpu(desc[i].token), 1446 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1447 if (ret < 0) { 1448 pr_err("failed to init rdma_rw_ctx: %d\n", ret); 1449 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1450 kfree(msg); 1451 goto out; 1452 } 1453 1454 list_add_tail(&msg->list, &msg_list); 1455 desc_buf += desc_buf_len; 1456 } 1457 1458 /* concatenate work requests of rdma_rw_ctxs */ 1459 first_wr = NULL; 1460 list_for_each_entry_reverse(msg, &msg_list, list) { 1461 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, 1462 &msg->cqe, first_wr); 1463 } 1464 1465 ret = ib_post_send(t->qp, first_wr, NULL); 1466 if (ret) { 1467 pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 1468 goto out; 1469 } 1470 1471 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list); 1472 wait_for_completion(&completion); 1473 ret = msg->status; 1474 out: 1475 list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 1476 list_del(&msg->list); 1477 smb_direct_free_rdma_rw_msg(t, msg, 1478 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1479 } 1480 atomic_add(credits_needed, &t->rw_credits); 1481 wake_up(&t->wait_rw_credits); 1482 return ret; 1483 } 1484 1485 static int smb_direct_rdma_write(struct ksmbd_transport *t, 1486 void *buf, unsigned int buflen, 1487 struct smb2_buffer_desc_v1 *desc, 1488 unsigned int desc_len) 1489 { 1490 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1491 desc, desc_len, false); 1492 } 1493 1494 static int smb_direct_rdma_read(struct ksmbd_transport *t, 1495 void *buf, unsigned int buflen, 1496 struct smb2_buffer_desc_v1 *desc, 1497 unsigned int desc_len) 1498 { 1499 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1500 desc, desc_len, true); 1501 } 1502 1503 static void smb_direct_disconnect(struct ksmbd_transport *t) 1504 { 1505 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1506 1507 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id); 1508 1509 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1510 wait_event_interruptible(st->wait_status, 1511 st->status == SMB_DIRECT_CS_DISCONNECTED); 1512 free_transport(st); 1513 } 1514 1515 static void smb_direct_shutdown(struct ksmbd_transport *t) 1516 { 1517 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1518 1519 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id); 1520 1521 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1522 } 1523 1524 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, 1525 struct rdma_cm_event *event) 1526 { 1527 struct smb_direct_transport *t = cm_id->context; 1528 1529 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", 1530 cm_id, rdma_event_msg(event->event), event->event); 1531 1532 switch (event->event) { 1533 case RDMA_CM_EVENT_ESTABLISHED: { 1534 t->status = SMB_DIRECT_CS_CONNECTED; 1535 wake_up_interruptible(&t->wait_status); 1536 break; 1537 } 1538 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1539 case RDMA_CM_EVENT_DISCONNECTED: { 1540 ib_drain_qp(t->qp); 1541 1542 t->status = SMB_DIRECT_CS_DISCONNECTED; 1543 wake_up_interruptible(&t->wait_status); 1544 wake_up_interruptible(&t->wait_reassembly_queue); 1545 wake_up(&t->wait_send_credits); 1546 break; 1547 } 1548 case RDMA_CM_EVENT_CONNECT_ERROR: { 1549 t->status = SMB_DIRECT_CS_DISCONNECTED; 1550 wake_up_interruptible(&t->wait_status); 1551 break; 1552 } 1553 default: 1554 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", 1555 cm_id, rdma_event_msg(event->event), 1556 event->event); 1557 break; 1558 } 1559 return 0; 1560 } 1561 1562 static void smb_direct_qpair_handler(struct ib_event *event, void *context) 1563 { 1564 struct smb_direct_transport *t = context; 1565 1566 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", 1567 t->cm_id, ib_event_msg(event->event), event->event); 1568 1569 switch (event->event) { 1570 case IB_EVENT_CQ_ERR: 1571 case IB_EVENT_QP_FATAL: 1572 smb_direct_disconnect_rdma_connection(t); 1573 break; 1574 default: 1575 break; 1576 } 1577 } 1578 1579 static int smb_direct_send_negotiate_response(struct smb_direct_transport *t, 1580 int failed) 1581 { 1582 struct smb_direct_sendmsg *sendmsg; 1583 struct smb_direct_negotiate_resp *resp; 1584 int ret; 1585 1586 sendmsg = smb_direct_alloc_sendmsg(t); 1587 if (IS_ERR(sendmsg)) 1588 return -ENOMEM; 1589 1590 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet; 1591 if (failed) { 1592 memset(resp, 0, sizeof(*resp)); 1593 resp->min_version = cpu_to_le16(0x0100); 1594 resp->max_version = cpu_to_le16(0x0100); 1595 resp->status = STATUS_NOT_SUPPORTED; 1596 } else { 1597 resp->status = STATUS_SUCCESS; 1598 resp->min_version = SMB_DIRECT_VERSION_LE; 1599 resp->max_version = SMB_DIRECT_VERSION_LE; 1600 resp->negotiated_version = SMB_DIRECT_VERSION_LE; 1601 resp->reserved = 0; 1602 resp->credits_requested = 1603 cpu_to_le16(t->send_credit_target); 1604 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1605 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size); 1606 resp->preferred_send_size = cpu_to_le32(t->max_send_size); 1607 resp->max_receive_size = cpu_to_le32(t->max_recv_size); 1608 resp->max_fragmented_size = 1609 cpu_to_le32(t->max_fragmented_recv_size); 1610 } 1611 1612 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1613 (void *)resp, sizeof(*resp), 1614 DMA_TO_DEVICE); 1615 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1616 if (ret) { 1617 smb_direct_free_sendmsg(t, sendmsg); 1618 return ret; 1619 } 1620 1621 sendmsg->num_sge = 1; 1622 sendmsg->sge[0].length = sizeof(*resp); 1623 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1624 1625 ret = post_sendmsg(t, NULL, sendmsg); 1626 if (ret) { 1627 smb_direct_free_sendmsg(t, sendmsg); 1628 return ret; 1629 } 1630 1631 wait_event(t->wait_send_pending, 1632 atomic_read(&t->send_pending) == 0); 1633 return 0; 1634 } 1635 1636 static int smb_direct_accept_client(struct smb_direct_transport *t) 1637 { 1638 struct rdma_conn_param conn_param; 1639 struct ib_port_immutable port_immutable; 1640 u32 ird_ord_hdr[2]; 1641 int ret; 1642 1643 memset(&conn_param, 0, sizeof(conn_param)); 1644 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom, 1645 SMB_DIRECT_CM_INITIATOR_DEPTH); 1646 conn_param.responder_resources = 0; 1647 1648 t->cm_id->device->ops.get_port_immutable(t->cm_id->device, 1649 t->cm_id->port_num, 1650 &port_immutable); 1651 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 1652 ird_ord_hdr[0] = conn_param.responder_resources; 1653 ird_ord_hdr[1] = 1; 1654 conn_param.private_data = ird_ord_hdr; 1655 conn_param.private_data_len = sizeof(ird_ord_hdr); 1656 } else { 1657 conn_param.private_data = NULL; 1658 conn_param.private_data_len = 0; 1659 } 1660 conn_param.retry_count = SMB_DIRECT_CM_RETRY; 1661 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; 1662 conn_param.flow_control = 0; 1663 1664 ret = rdma_accept(t->cm_id, &conn_param); 1665 if (ret) { 1666 pr_err("error at rdma_accept: %d\n", ret); 1667 return ret; 1668 } 1669 return 0; 1670 } 1671 1672 static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) 1673 { 1674 int ret; 1675 struct smb_direct_recvmsg *recvmsg; 1676 1677 recvmsg = get_free_recvmsg(t); 1678 if (!recvmsg) 1679 return -ENOMEM; 1680 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ; 1681 1682 ret = smb_direct_post_recv(t, recvmsg); 1683 if (ret) { 1684 pr_err("Can't post recv: %d\n", ret); 1685 goto out_err; 1686 } 1687 1688 t->negotiation_requested = false; 1689 ret = smb_direct_accept_client(t); 1690 if (ret) { 1691 pr_err("Can't accept client\n"); 1692 goto out_err; 1693 } 1694 1695 smb_direct_post_recv_credits(&t->post_recv_credits_work.work); 1696 return 0; 1697 out_err: 1698 put_recvmsg(t, recvmsg); 1699 return ret; 1700 } 1701 1702 static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t) 1703 { 1704 return min_t(unsigned int, 1705 t->cm_id->device->attrs.max_fast_reg_page_list_len, 1706 256); 1707 } 1708 1709 static int smb_direct_init_params(struct smb_direct_transport *t, 1710 struct ib_qp_cap *cap) 1711 { 1712 struct ib_device *device = t->cm_id->device; 1713 int max_send_sges, max_rw_wrs, max_send_wrs; 1714 unsigned int max_sge_per_wr, wrs_per_credit; 1715 1716 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 1717 * SMB2 response could be mapped. 1718 */ 1719 t->max_send_size = smb_direct_max_send_size; 1720 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3; 1721 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) { 1722 pr_err("max_send_size %d is too large\n", t->max_send_size); 1723 return -EINVAL; 1724 } 1725 1726 /* Calculate the number of work requests for RDMA R/W. 1727 * The maximum number of pages which can be registered 1728 * with one Memory region can be transferred with one 1729 * R/W credit. And at least 4 work requests for each credit 1730 * are needed for MR registration, RDMA R/W, local & remote 1731 * MR invalidation. 1732 */ 1733 t->max_rdma_rw_size = smb_direct_max_read_write_size; 1734 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t); 1735 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size, 1736 (t->pages_per_rw_credit - 1) * 1737 PAGE_SIZE); 1738 1739 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, 1740 device->attrs.max_sge_rd); 1741 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, 1742 max_send_sges); 1743 wrs_per_credit = max_t(unsigned int, 4, 1744 DIV_ROUND_UP(t->pages_per_rw_credit, 1745 max_sge_per_wr) + 1); 1746 max_rw_wrs = t->max_rw_credits * wrs_per_credit; 1747 1748 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs; 1749 if (max_send_wrs > device->attrs.max_cqe || 1750 max_send_wrs > device->attrs.max_qp_wr) { 1751 pr_err("consider lowering send_credit_target = %d\n", 1752 smb_direct_send_credit_target); 1753 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1754 device->attrs.max_cqe, device->attrs.max_qp_wr); 1755 return -EINVAL; 1756 } 1757 1758 if (smb_direct_receive_credit_max > device->attrs.max_cqe || 1759 smb_direct_receive_credit_max > device->attrs.max_qp_wr) { 1760 pr_err("consider lowering receive_credit_max = %d\n", 1761 smb_direct_receive_credit_max); 1762 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", 1763 device->attrs.max_cqe, device->attrs.max_qp_wr); 1764 return -EINVAL; 1765 } 1766 1767 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) { 1768 pr_err("warning: device max_recv_sge = %d too small\n", 1769 device->attrs.max_recv_sge); 1770 return -EINVAL; 1771 } 1772 1773 t->recv_credits = 0; 1774 t->count_avail_recvmsg = 0; 1775 1776 t->recv_credit_max = smb_direct_receive_credit_max; 1777 t->recv_credit_target = 10; 1778 t->new_recv_credits = 0; 1779 1780 t->send_credit_target = smb_direct_send_credit_target; 1781 atomic_set(&t->send_credits, 0); 1782 atomic_set(&t->rw_credits, t->max_rw_credits); 1783 1784 t->max_send_size = smb_direct_max_send_size; 1785 t->max_recv_size = smb_direct_max_receive_size; 1786 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 1787 1788 cap->max_send_wr = max_send_wrs; 1789 cap->max_recv_wr = t->recv_credit_max; 1790 cap->max_send_sge = max_sge_per_wr; 1791 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; 1792 cap->max_inline_data = 0; 1793 cap->max_rdma_ctxs = t->max_rw_credits; 1794 return 0; 1795 } 1796 1797 static void smb_direct_destroy_pools(struct smb_direct_transport *t) 1798 { 1799 struct smb_direct_recvmsg *recvmsg; 1800 1801 while ((recvmsg = get_free_recvmsg(t))) 1802 mempool_free(recvmsg, t->recvmsg_mempool); 1803 while ((recvmsg = get_empty_recvmsg(t))) 1804 mempool_free(recvmsg, t->recvmsg_mempool); 1805 1806 mempool_destroy(t->recvmsg_mempool); 1807 t->recvmsg_mempool = NULL; 1808 1809 kmem_cache_destroy(t->recvmsg_cache); 1810 t->recvmsg_cache = NULL; 1811 1812 mempool_destroy(t->sendmsg_mempool); 1813 t->sendmsg_mempool = NULL; 1814 1815 kmem_cache_destroy(t->sendmsg_cache); 1816 t->sendmsg_cache = NULL; 1817 } 1818 1819 static int smb_direct_create_pools(struct smb_direct_transport *t) 1820 { 1821 char name[80]; 1822 int i; 1823 struct smb_direct_recvmsg *recvmsg; 1824 1825 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t); 1826 t->sendmsg_cache = kmem_cache_create(name, 1827 sizeof(struct smb_direct_sendmsg) + 1828 sizeof(struct smb_direct_negotiate_resp), 1829 0, SLAB_HWCACHE_ALIGN, NULL); 1830 if (!t->sendmsg_cache) 1831 return -ENOMEM; 1832 1833 t->sendmsg_mempool = mempool_create(t->send_credit_target, 1834 mempool_alloc_slab, mempool_free_slab, 1835 t->sendmsg_cache); 1836 if (!t->sendmsg_mempool) 1837 goto err; 1838 1839 snprintf(name, sizeof(name), "smb_direct_resp_%p", t); 1840 t->recvmsg_cache = kmem_cache_create(name, 1841 sizeof(struct smb_direct_recvmsg) + 1842 t->max_recv_size, 1843 0, SLAB_HWCACHE_ALIGN, NULL); 1844 if (!t->recvmsg_cache) 1845 goto err; 1846 1847 t->recvmsg_mempool = 1848 mempool_create(t->recv_credit_max, mempool_alloc_slab, 1849 mempool_free_slab, t->recvmsg_cache); 1850 if (!t->recvmsg_mempool) 1851 goto err; 1852 1853 INIT_LIST_HEAD(&t->recvmsg_queue); 1854 1855 for (i = 0; i < t->recv_credit_max; i++) { 1856 recvmsg = mempool_alloc(t->recvmsg_mempool, KSMBD_DEFAULT_GFP); 1857 if (!recvmsg) 1858 goto err; 1859 recvmsg->transport = t; 1860 list_add(&recvmsg->list, &t->recvmsg_queue); 1861 } 1862 t->count_avail_recvmsg = t->recv_credit_max; 1863 1864 return 0; 1865 err: 1866 smb_direct_destroy_pools(t); 1867 return -ENOMEM; 1868 } 1869 1870 static int smb_direct_create_qpair(struct smb_direct_transport *t, 1871 struct ib_qp_cap *cap) 1872 { 1873 int ret; 1874 struct ib_qp_init_attr qp_attr; 1875 int pages_per_rw; 1876 1877 t->pd = ib_alloc_pd(t->cm_id->device, 0); 1878 if (IS_ERR(t->pd)) { 1879 pr_err("Can't create RDMA PD\n"); 1880 ret = PTR_ERR(t->pd); 1881 t->pd = NULL; 1882 return ret; 1883 } 1884 1885 t->send_cq = ib_alloc_cq(t->cm_id->device, t, 1886 smb_direct_send_credit_target + cap->max_rdma_ctxs, 1887 0, IB_POLL_WORKQUEUE); 1888 if (IS_ERR(t->send_cq)) { 1889 pr_err("Can't create RDMA send CQ\n"); 1890 ret = PTR_ERR(t->send_cq); 1891 t->send_cq = NULL; 1892 goto err; 1893 } 1894 1895 t->recv_cq = ib_alloc_cq(t->cm_id->device, t, 1896 t->recv_credit_max, 0, IB_POLL_WORKQUEUE); 1897 if (IS_ERR(t->recv_cq)) { 1898 pr_err("Can't create RDMA recv CQ\n"); 1899 ret = PTR_ERR(t->recv_cq); 1900 t->recv_cq = NULL; 1901 goto err; 1902 } 1903 1904 memset(&qp_attr, 0, sizeof(qp_attr)); 1905 qp_attr.event_handler = smb_direct_qpair_handler; 1906 qp_attr.qp_context = t; 1907 qp_attr.cap = *cap; 1908 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 1909 qp_attr.qp_type = IB_QPT_RC; 1910 qp_attr.send_cq = t->send_cq; 1911 qp_attr.recv_cq = t->recv_cq; 1912 qp_attr.port_num = ~0; 1913 1914 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr); 1915 if (ret) { 1916 pr_err("Can't create RDMA QP: %d\n", ret); 1917 goto err; 1918 } 1919 1920 t->qp = t->cm_id->qp; 1921 t->cm_id->event_handler = smb_direct_cm_handler; 1922 1923 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; 1924 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { 1925 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, 1926 t->max_rw_credits, IB_MR_TYPE_MEM_REG, 1927 t->pages_per_rw_credit, 0); 1928 if (ret) { 1929 pr_err("failed to init mr pool count %d pages %d\n", 1930 t->max_rw_credits, t->pages_per_rw_credit); 1931 goto err; 1932 } 1933 } 1934 1935 return 0; 1936 err: 1937 if (t->qp) { 1938 ib_destroy_qp(t->qp); 1939 t->qp = NULL; 1940 } 1941 if (t->recv_cq) { 1942 ib_destroy_cq(t->recv_cq); 1943 t->recv_cq = NULL; 1944 } 1945 if (t->send_cq) { 1946 ib_destroy_cq(t->send_cq); 1947 t->send_cq = NULL; 1948 } 1949 if (t->pd) { 1950 ib_dealloc_pd(t->pd); 1951 t->pd = NULL; 1952 } 1953 return ret; 1954 } 1955 1956 static int smb_direct_prepare(struct ksmbd_transport *t) 1957 { 1958 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1959 struct smb_direct_recvmsg *recvmsg; 1960 struct smb_direct_negotiate_req *req; 1961 int ret; 1962 1963 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); 1964 ret = wait_event_interruptible_timeout(st->wait_status, 1965 st->negotiation_requested || 1966 st->status == SMB_DIRECT_CS_DISCONNECTED, 1967 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); 1968 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED) 1969 return ret < 0 ? ret : -ETIMEDOUT; 1970 1971 recvmsg = get_first_reassembly(st); 1972 if (!recvmsg) 1973 return -ECONNABORTED; 1974 1975 ret = smb_direct_check_recvmsg(recvmsg); 1976 if (ret == -ECONNABORTED) 1977 goto out; 1978 1979 req = (struct smb_direct_negotiate_req *)recvmsg->packet; 1980 st->max_recv_size = min_t(int, st->max_recv_size, 1981 le32_to_cpu(req->preferred_send_size)); 1982 st->max_send_size = min_t(int, st->max_send_size, 1983 le32_to_cpu(req->max_receive_size)); 1984 st->max_fragmented_send_size = 1985 le32_to_cpu(req->max_fragmented_size); 1986 st->max_fragmented_recv_size = 1987 (st->recv_credit_max * st->max_recv_size) / 2; 1988 1989 ret = smb_direct_send_negotiate_response(st, ret); 1990 out: 1991 spin_lock_irq(&st->reassembly_queue_lock); 1992 st->reassembly_queue_length--; 1993 list_del(&recvmsg->list); 1994 spin_unlock_irq(&st->reassembly_queue_lock); 1995 put_recvmsg(st, recvmsg); 1996 1997 return ret; 1998 } 1999 2000 static int smb_direct_connect(struct smb_direct_transport *st) 2001 { 2002 int ret; 2003 struct ib_qp_cap qp_cap; 2004 2005 ret = smb_direct_init_params(st, &qp_cap); 2006 if (ret) { 2007 pr_err("Can't configure RDMA parameters\n"); 2008 return ret; 2009 } 2010 2011 ret = smb_direct_create_pools(st); 2012 if (ret) { 2013 pr_err("Can't init RDMA pool: %d\n", ret); 2014 return ret; 2015 } 2016 2017 ret = smb_direct_create_qpair(st, &qp_cap); 2018 if (ret) { 2019 pr_err("Can't accept RDMA client: %d\n", ret); 2020 return ret; 2021 } 2022 2023 ret = smb_direct_prepare_negotiation(st); 2024 if (ret) { 2025 pr_err("Can't negotiate: %d\n", ret); 2026 return ret; 2027 } 2028 return 0; 2029 } 2030 2031 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) 2032 { 2033 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 2034 return false; 2035 if (attrs->max_fast_reg_page_list_len == 0) 2036 return false; 2037 return true; 2038 } 2039 2040 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) 2041 { 2042 struct smb_direct_transport *t; 2043 struct task_struct *handler; 2044 int ret; 2045 2046 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { 2047 ksmbd_debug(RDMA, 2048 "Fast Registration Work Requests is not supported. device capabilities=%llx\n", 2049 new_cm_id->device->attrs.device_cap_flags); 2050 return -EPROTONOSUPPORT; 2051 } 2052 2053 t = alloc_transport(new_cm_id); 2054 if (!t) 2055 return -ENOMEM; 2056 2057 ret = smb_direct_connect(t); 2058 if (ret) 2059 goto out_err; 2060 2061 handler = kthread_run(ksmbd_conn_handler_loop, 2062 KSMBD_TRANS(t)->conn, "ksmbd:r%u", 2063 smb_direct_port); 2064 if (IS_ERR(handler)) { 2065 ret = PTR_ERR(handler); 2066 pr_err("Can't start thread\n"); 2067 goto out_err; 2068 } 2069 2070 return 0; 2071 out_err: 2072 free_transport(t); 2073 return ret; 2074 } 2075 2076 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, 2077 struct rdma_cm_event *event) 2078 { 2079 switch (event->event) { 2080 case RDMA_CM_EVENT_CONNECT_REQUEST: { 2081 int ret = smb_direct_handle_connect_request(cm_id); 2082 2083 if (ret) { 2084 pr_err("Can't create transport: %d\n", ret); 2085 return ret; 2086 } 2087 2088 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", 2089 cm_id); 2090 break; 2091 } 2092 default: 2093 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", 2094 cm_id, rdma_event_msg(event->event), event->event); 2095 break; 2096 } 2097 return 0; 2098 } 2099 2100 static int smb_direct_listen(int port) 2101 { 2102 int ret; 2103 struct rdma_cm_id *cm_id; 2104 struct sockaddr_in sin = { 2105 .sin_family = AF_INET, 2106 .sin_addr.s_addr = htonl(INADDR_ANY), 2107 .sin_port = htons(port), 2108 }; 2109 2110 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, 2111 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC); 2112 if (IS_ERR(cm_id)) { 2113 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); 2114 return PTR_ERR(cm_id); 2115 } 2116 2117 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 2118 if (ret) { 2119 pr_err("Can't bind: %d\n", ret); 2120 goto err; 2121 } 2122 2123 smb_direct_listener.cm_id = cm_id; 2124 2125 ret = rdma_listen(cm_id, 10); 2126 if (ret) { 2127 pr_err("Can't listen: %d\n", ret); 2128 goto err; 2129 } 2130 return 0; 2131 err: 2132 smb_direct_listener.cm_id = NULL; 2133 rdma_destroy_id(cm_id); 2134 return ret; 2135 } 2136 2137 static int smb_direct_ib_client_add(struct ib_device *ib_dev) 2138 { 2139 struct smb_direct_device *smb_dev; 2140 2141 /* Set 5445 port if device type is iWARP(No IB) */ 2142 if (ib_dev->node_type != RDMA_NODE_IB_CA) 2143 smb_direct_port = SMB_DIRECT_PORT_IWARP; 2144 2145 if (!rdma_frwr_is_supported(&ib_dev->attrs)) 2146 return 0; 2147 2148 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP); 2149 if (!smb_dev) 2150 return -ENOMEM; 2151 smb_dev->ib_dev = ib_dev; 2152 2153 write_lock(&smb_direct_device_lock); 2154 list_add(&smb_dev->list, &smb_direct_device_list); 2155 write_unlock(&smb_direct_device_lock); 2156 2157 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); 2158 return 0; 2159 } 2160 2161 static void smb_direct_ib_client_remove(struct ib_device *ib_dev, 2162 void *client_data) 2163 { 2164 struct smb_direct_device *smb_dev, *tmp; 2165 2166 write_lock(&smb_direct_device_lock); 2167 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { 2168 if (smb_dev->ib_dev == ib_dev) { 2169 list_del(&smb_dev->list); 2170 kfree(smb_dev); 2171 break; 2172 } 2173 } 2174 write_unlock(&smb_direct_device_lock); 2175 } 2176 2177 static struct ib_client smb_direct_ib_client = { 2178 .name = "ksmbd_smb_direct_ib", 2179 .add = smb_direct_ib_client_add, 2180 .remove = smb_direct_ib_client_remove, 2181 }; 2182 2183 int ksmbd_rdma_init(void) 2184 { 2185 int ret; 2186 2187 smb_direct_listener.cm_id = NULL; 2188 2189 ret = ib_register_client(&smb_direct_ib_client); 2190 if (ret) { 2191 pr_err("failed to ib_register_client\n"); 2192 return ret; 2193 } 2194 2195 /* When a client is running out of send credits, the credits are 2196 * granted by the server's sending a packet using this queue. 2197 * This avoids the situation that a clients cannot send packets 2198 * for lack of credits 2199 */ 2200 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", 2201 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0); 2202 if (!smb_direct_wq) 2203 return -ENOMEM; 2204 2205 ret = smb_direct_listen(smb_direct_port); 2206 if (ret) { 2207 destroy_workqueue(smb_direct_wq); 2208 smb_direct_wq = NULL; 2209 pr_err("Can't listen: %d\n", ret); 2210 return ret; 2211 } 2212 2213 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n", 2214 smb_direct_listener.cm_id); 2215 return 0; 2216 } 2217 2218 void ksmbd_rdma_destroy(void) 2219 { 2220 if (!smb_direct_listener.cm_id) 2221 return; 2222 2223 ib_unregister_client(&smb_direct_ib_client); 2224 rdma_destroy_id(smb_direct_listener.cm_id); 2225 2226 smb_direct_listener.cm_id = NULL; 2227 2228 if (smb_direct_wq) { 2229 destroy_workqueue(smb_direct_wq); 2230 smb_direct_wq = NULL; 2231 } 2232 } 2233 2234 bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2235 { 2236 struct smb_direct_device *smb_dev; 2237 int i; 2238 bool rdma_capable = false; 2239 2240 read_lock(&smb_direct_device_lock); 2241 list_for_each_entry(smb_dev, &smb_direct_device_list, list) { 2242 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { 2243 struct net_device *ndev; 2244 2245 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1); 2246 if (!ndev) 2247 continue; 2248 2249 if (ndev == netdev) { 2250 dev_put(ndev); 2251 rdma_capable = true; 2252 goto out; 2253 } 2254 dev_put(ndev); 2255 } 2256 } 2257 out: 2258 read_unlock(&smb_direct_device_lock); 2259 2260 if (rdma_capable == false) { 2261 struct ib_device *ibdev; 2262 2263 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 2264 if (ibdev) { 2265 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs); 2266 ib_device_put(ibdev); 2267 } 2268 } 2269 2270 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", 2271 netdev->name, str_true_false(rdma_capable)); 2272 2273 return rdma_capable; 2274 } 2275 2276 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { 2277 .prepare = smb_direct_prepare, 2278 .disconnect = smb_direct_disconnect, 2279 .shutdown = smb_direct_shutdown, 2280 .writev = smb_direct_writev, 2281 .read = smb_direct_read, 2282 .rdma_read = smb_direct_rdma_read, 2283 .rdma_write = smb_direct_rdma_write, 2284 }; 2285