1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017, Microsoft Corporation.
4 * Copyright (C) 2018, LG Electronics.
5 *
6 * Author(s): Long Li <longli@microsoft.com>,
7 * Hyunchul Lee <hyc.lee@gmail.com>
8 */
9
10 #define SUBMOD_NAME "smb_direct"
11
12 #include <linux/kthread.h>
13 #include <linux/list.h>
14 #include <linux/mempool.h>
15 #include <linux/highmem.h>
16 #include <linux/scatterlist.h>
17 #include <linux/string_choices.h>
18 #include <rdma/ib_verbs.h>
19 #include <rdma/rdma_cm.h>
20 #include <rdma/rw.h>
21
22 #include "glob.h"
23 #include "connection.h"
24 #include "smb_common.h"
25 #include "../common/smb2status.h"
26 #include "transport_rdma.h"
27
28 #define SMB_DIRECT_PORT_IWARP 5445
29 #define SMB_DIRECT_PORT_INFINIBAND 445
30
31 #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
32
33 /* SMB_DIRECT negotiation timeout in seconds */
34 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
35
36 #define SMB_DIRECT_MAX_SEND_SGES 6
37 #define SMB_DIRECT_MAX_RECV_SGES 1
38
39 /*
40 * Default maximum number of RDMA read/write outstanding on this connection
41 * This value is possibly decreased during QP creation on hardware limit
42 */
43 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8
44
45 /* Maximum number of retries on data transfer operations */
46 #define SMB_DIRECT_CM_RETRY 6
47 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
48 #define SMB_DIRECT_CM_RNR_RETRY 0
49
50 /*
51 * User configurable initial values per SMB_DIRECT transport connection
52 * as defined in [MS-SMBD] 3.1.1.1
53 * Those may change after a SMB_DIRECT negotiation
54 */
55
56 /* Set 445 port to SMB Direct port by default */
57 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
58
59 /* The local peer's maximum number of credits to grant to the peer */
60 static int smb_direct_receive_credit_max = 255;
61
62 /* The remote peer's credit request of local peer */
63 static int smb_direct_send_credit_target = 255;
64
65 /* The maximum single message size can be sent to remote peer */
66 static int smb_direct_max_send_size = 1364;
67
68 /* The maximum fragmented upper-layer payload receive size supported */
69 static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
70
71 /* The maximum single-message size which can be received */
72 static int smb_direct_max_receive_size = 1364;
73
74 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
75
76 static LIST_HEAD(smb_direct_device_list);
77 static DEFINE_RWLOCK(smb_direct_device_lock);
78
79 struct smb_direct_device {
80 struct ib_device *ib_dev;
81 struct list_head list;
82 };
83
84 static struct smb_direct_listener {
85 struct rdma_cm_id *cm_id;
86 } smb_direct_listener;
87
88 static struct workqueue_struct *smb_direct_wq;
89
90 enum smb_direct_status {
91 SMB_DIRECT_CS_NEW = 0,
92 SMB_DIRECT_CS_CONNECTED,
93 SMB_DIRECT_CS_DISCONNECTING,
94 SMB_DIRECT_CS_DISCONNECTED,
95 };
96
97 struct smb_direct_transport {
98 struct ksmbd_transport transport;
99
100 enum smb_direct_status status;
101 bool full_packet_received;
102 wait_queue_head_t wait_status;
103
104 struct rdma_cm_id *cm_id;
105 struct ib_cq *send_cq;
106 struct ib_cq *recv_cq;
107 struct ib_pd *pd;
108 struct ib_qp *qp;
109
110 int max_send_size;
111 int max_recv_size;
112 int max_fragmented_send_size;
113 int max_fragmented_recv_size;
114 int max_rdma_rw_size;
115
116 spinlock_t reassembly_queue_lock;
117 struct list_head reassembly_queue;
118 int reassembly_data_length;
119 int reassembly_queue_length;
120 int first_entry_offset;
121 wait_queue_head_t wait_reassembly_queue;
122
123 spinlock_t receive_credit_lock;
124 int recv_credits;
125 int count_avail_recvmsg;
126 int recv_credit_max;
127 int recv_credit_target;
128
129 spinlock_t recvmsg_queue_lock;
130 struct list_head recvmsg_queue;
131
132 spinlock_t empty_recvmsg_queue_lock;
133 struct list_head empty_recvmsg_queue;
134
135 int send_credit_target;
136 atomic_t send_credits;
137 spinlock_t lock_new_recv_credits;
138 int new_recv_credits;
139 int max_rw_credits;
140 int pages_per_rw_credit;
141 atomic_t rw_credits;
142
143 wait_queue_head_t wait_send_credits;
144 wait_queue_head_t wait_rw_credits;
145
146 mempool_t *sendmsg_mempool;
147 struct kmem_cache *sendmsg_cache;
148 mempool_t *recvmsg_mempool;
149 struct kmem_cache *recvmsg_cache;
150
151 wait_queue_head_t wait_send_pending;
152 atomic_t send_pending;
153
154 struct delayed_work post_recv_credits_work;
155 struct work_struct send_immediate_work;
156 struct work_struct disconnect_work;
157
158 bool negotiation_requested;
159 };
160
161 #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
162
163 enum {
164 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
165 SMB_DIRECT_MSG_DATA_TRANSFER
166 };
167
168 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
169
170 struct smb_direct_send_ctx {
171 struct list_head msg_list;
172 int wr_cnt;
173 bool need_invalidate_rkey;
174 unsigned int remote_key;
175 };
176
177 struct smb_direct_sendmsg {
178 struct smb_direct_transport *transport;
179 struct ib_send_wr wr;
180 struct list_head list;
181 int num_sge;
182 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES];
183 struct ib_cqe cqe;
184 u8 packet[];
185 };
186
187 struct smb_direct_recvmsg {
188 struct smb_direct_transport *transport;
189 struct list_head list;
190 int type;
191 struct ib_sge sge;
192 struct ib_cqe cqe;
193 bool first_segment;
194 u8 packet[];
195 };
196
197 struct smb_direct_rdma_rw_msg {
198 struct smb_direct_transport *t;
199 struct ib_cqe cqe;
200 int status;
201 struct completion *completion;
202 struct list_head list;
203 struct rdma_rw_ctx rw_ctx;
204 struct sg_table sgt;
205 struct scatterlist sg_list[];
206 };
207
init_smbd_max_io_size(unsigned int sz)208 void init_smbd_max_io_size(unsigned int sz)
209 {
210 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
211 smb_direct_max_read_write_size = sz;
212 }
213
get_smbd_max_read_write_size(void)214 unsigned int get_smbd_max_read_write_size(void)
215 {
216 return smb_direct_max_read_write_size;
217 }
218
get_buf_page_count(void * buf,int size)219 static inline int get_buf_page_count(void *buf, int size)
220 {
221 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
222 (uintptr_t)buf / PAGE_SIZE;
223 }
224
225 static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
226 static void smb_direct_post_recv_credits(struct work_struct *work);
227 static int smb_direct_post_send_data(struct smb_direct_transport *t,
228 struct smb_direct_send_ctx *send_ctx,
229 struct kvec *iov, int niov,
230 int remaining_data_length);
231
232 static inline struct smb_direct_transport *
smb_trans_direct_transfort(struct ksmbd_transport * t)233 smb_trans_direct_transfort(struct ksmbd_transport *t)
234 {
235 return container_of(t, struct smb_direct_transport, transport);
236 }
237
238 static inline void
smb_direct_recvmsg_payload(struct smb_direct_recvmsg * recvmsg)239 *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
240 {
241 return (void *)recvmsg->packet;
242 }
243
is_receive_credit_post_required(int receive_credits,int avail_recvmsg_count)244 static inline bool is_receive_credit_post_required(int receive_credits,
245 int avail_recvmsg_count)
246 {
247 return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
248 avail_recvmsg_count >= (receive_credits >> 2);
249 }
250
251 static struct
get_free_recvmsg(struct smb_direct_transport * t)252 smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
253 {
254 struct smb_direct_recvmsg *recvmsg = NULL;
255
256 spin_lock(&t->recvmsg_queue_lock);
257 if (!list_empty(&t->recvmsg_queue)) {
258 recvmsg = list_first_entry(&t->recvmsg_queue,
259 struct smb_direct_recvmsg,
260 list);
261 list_del(&recvmsg->list);
262 }
263 spin_unlock(&t->recvmsg_queue_lock);
264 return recvmsg;
265 }
266
put_recvmsg(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg)267 static void put_recvmsg(struct smb_direct_transport *t,
268 struct smb_direct_recvmsg *recvmsg)
269 {
270 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
271 recvmsg->sge.length, DMA_FROM_DEVICE);
272
273 spin_lock(&t->recvmsg_queue_lock);
274 list_add(&recvmsg->list, &t->recvmsg_queue);
275 spin_unlock(&t->recvmsg_queue_lock);
276 }
277
278 static struct
get_empty_recvmsg(struct smb_direct_transport * t)279 smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
280 {
281 struct smb_direct_recvmsg *recvmsg = NULL;
282
283 spin_lock(&t->empty_recvmsg_queue_lock);
284 if (!list_empty(&t->empty_recvmsg_queue)) {
285 recvmsg = list_first_entry(&t->empty_recvmsg_queue,
286 struct smb_direct_recvmsg, list);
287 list_del(&recvmsg->list);
288 }
289 spin_unlock(&t->empty_recvmsg_queue_lock);
290 return recvmsg;
291 }
292
put_empty_recvmsg(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg)293 static void put_empty_recvmsg(struct smb_direct_transport *t,
294 struct smb_direct_recvmsg *recvmsg)
295 {
296 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
297 recvmsg->sge.length, DMA_FROM_DEVICE);
298
299 spin_lock(&t->empty_recvmsg_queue_lock);
300 list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
301 spin_unlock(&t->empty_recvmsg_queue_lock);
302 }
303
enqueue_reassembly(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg,int data_length)304 static void enqueue_reassembly(struct smb_direct_transport *t,
305 struct smb_direct_recvmsg *recvmsg,
306 int data_length)
307 {
308 spin_lock(&t->reassembly_queue_lock);
309 list_add_tail(&recvmsg->list, &t->reassembly_queue);
310 t->reassembly_queue_length++;
311 /*
312 * Make sure reassembly_data_length is updated after list and
313 * reassembly_queue_length are updated. On the dequeue side
314 * reassembly_data_length is checked without a lock to determine
315 * if reassembly_queue_length and list is up to date
316 */
317 virt_wmb();
318 t->reassembly_data_length += data_length;
319 spin_unlock(&t->reassembly_queue_lock);
320 }
321
get_first_reassembly(struct smb_direct_transport * t)322 static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
323 {
324 if (!list_empty(&t->reassembly_queue))
325 return list_first_entry(&t->reassembly_queue,
326 struct smb_direct_recvmsg, list);
327 else
328 return NULL;
329 }
330
smb_direct_disconnect_rdma_work(struct work_struct * work)331 static void smb_direct_disconnect_rdma_work(struct work_struct *work)
332 {
333 struct smb_direct_transport *t =
334 container_of(work, struct smb_direct_transport,
335 disconnect_work);
336
337 if (t->status == SMB_DIRECT_CS_CONNECTED) {
338 t->status = SMB_DIRECT_CS_DISCONNECTING;
339 rdma_disconnect(t->cm_id);
340 }
341 }
342
343 static void
smb_direct_disconnect_rdma_connection(struct smb_direct_transport * t)344 smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
345 {
346 if (t->status == SMB_DIRECT_CS_CONNECTED)
347 queue_work(smb_direct_wq, &t->disconnect_work);
348 }
349
smb_direct_send_immediate_work(struct work_struct * work)350 static void smb_direct_send_immediate_work(struct work_struct *work)
351 {
352 struct smb_direct_transport *t = container_of(work,
353 struct smb_direct_transport, send_immediate_work);
354
355 if (t->status != SMB_DIRECT_CS_CONNECTED)
356 return;
357
358 smb_direct_post_send_data(t, NULL, NULL, 0, 0);
359 }
360
alloc_transport(struct rdma_cm_id * cm_id)361 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
362 {
363 struct smb_direct_transport *t;
364 struct ksmbd_conn *conn;
365
366 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP);
367 if (!t)
368 return NULL;
369
370 t->cm_id = cm_id;
371 cm_id->context = t;
372
373 t->status = SMB_DIRECT_CS_NEW;
374 init_waitqueue_head(&t->wait_status);
375
376 spin_lock_init(&t->reassembly_queue_lock);
377 INIT_LIST_HEAD(&t->reassembly_queue);
378 t->reassembly_data_length = 0;
379 t->reassembly_queue_length = 0;
380 init_waitqueue_head(&t->wait_reassembly_queue);
381 init_waitqueue_head(&t->wait_send_credits);
382 init_waitqueue_head(&t->wait_rw_credits);
383
384 spin_lock_init(&t->receive_credit_lock);
385 spin_lock_init(&t->recvmsg_queue_lock);
386 INIT_LIST_HEAD(&t->recvmsg_queue);
387
388 spin_lock_init(&t->empty_recvmsg_queue_lock);
389 INIT_LIST_HEAD(&t->empty_recvmsg_queue);
390
391 init_waitqueue_head(&t->wait_send_pending);
392 atomic_set(&t->send_pending, 0);
393
394 spin_lock_init(&t->lock_new_recv_credits);
395
396 INIT_DELAYED_WORK(&t->post_recv_credits_work,
397 smb_direct_post_recv_credits);
398 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
399 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
400
401 conn = ksmbd_conn_alloc();
402 if (!conn)
403 goto err;
404 conn->transport = KSMBD_TRANS(t);
405 KSMBD_TRANS(t)->conn = conn;
406 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
407 return t;
408 err:
409 kfree(t);
410 return NULL;
411 }
412
free_transport(struct smb_direct_transport * t)413 static void free_transport(struct smb_direct_transport *t)
414 {
415 struct smb_direct_recvmsg *recvmsg;
416
417 wake_up_interruptible(&t->wait_send_credits);
418
419 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
420 wait_event(t->wait_send_pending,
421 atomic_read(&t->send_pending) == 0);
422
423 cancel_work_sync(&t->disconnect_work);
424 cancel_delayed_work_sync(&t->post_recv_credits_work);
425 cancel_work_sync(&t->send_immediate_work);
426
427 if (t->qp) {
428 ib_drain_qp(t->qp);
429 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
430 ib_destroy_qp(t->qp);
431 }
432
433 ksmbd_debug(RDMA, "drain the reassembly queue\n");
434 do {
435 spin_lock(&t->reassembly_queue_lock);
436 recvmsg = get_first_reassembly(t);
437 if (recvmsg) {
438 list_del(&recvmsg->list);
439 spin_unlock(&t->reassembly_queue_lock);
440 put_recvmsg(t, recvmsg);
441 } else {
442 spin_unlock(&t->reassembly_queue_lock);
443 }
444 } while (recvmsg);
445 t->reassembly_data_length = 0;
446
447 if (t->send_cq)
448 ib_free_cq(t->send_cq);
449 if (t->recv_cq)
450 ib_free_cq(t->recv_cq);
451 if (t->pd)
452 ib_dealloc_pd(t->pd);
453 if (t->cm_id)
454 rdma_destroy_id(t->cm_id);
455
456 smb_direct_destroy_pools(t);
457 ksmbd_conn_free(KSMBD_TRANS(t)->conn);
458 kfree(t);
459 }
460
461 static struct smb_direct_sendmsg
smb_direct_alloc_sendmsg(struct smb_direct_transport * t)462 *smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
463 {
464 struct smb_direct_sendmsg *msg;
465
466 msg = mempool_alloc(t->sendmsg_mempool, KSMBD_DEFAULT_GFP);
467 if (!msg)
468 return ERR_PTR(-ENOMEM);
469 msg->transport = t;
470 INIT_LIST_HEAD(&msg->list);
471 msg->num_sge = 0;
472 return msg;
473 }
474
smb_direct_free_sendmsg(struct smb_direct_transport * t,struct smb_direct_sendmsg * msg)475 static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
476 struct smb_direct_sendmsg *msg)
477 {
478 int i;
479
480 if (msg->num_sge > 0) {
481 ib_dma_unmap_single(t->cm_id->device,
482 msg->sge[0].addr, msg->sge[0].length,
483 DMA_TO_DEVICE);
484 for (i = 1; i < msg->num_sge; i++)
485 ib_dma_unmap_page(t->cm_id->device,
486 msg->sge[i].addr, msg->sge[i].length,
487 DMA_TO_DEVICE);
488 }
489 mempool_free(msg, t->sendmsg_mempool);
490 }
491
smb_direct_check_recvmsg(struct smb_direct_recvmsg * recvmsg)492 static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
493 {
494 switch (recvmsg->type) {
495 case SMB_DIRECT_MSG_DATA_TRANSFER: {
496 struct smb_direct_data_transfer *req =
497 (struct smb_direct_data_transfer *)recvmsg->packet;
498 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
499 + le32_to_cpu(req->data_offset));
500 ksmbd_debug(RDMA,
501 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
502 le16_to_cpu(req->credits_granted),
503 le16_to_cpu(req->credits_requested),
504 req->data_length, req->remaining_data_length,
505 hdr->ProtocolId, hdr->Command);
506 break;
507 }
508 case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
509 struct smb_direct_negotiate_req *req =
510 (struct smb_direct_negotiate_req *)recvmsg->packet;
511 ksmbd_debug(RDMA,
512 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
513 le16_to_cpu(req->min_version),
514 le16_to_cpu(req->max_version),
515 le16_to_cpu(req->credits_requested),
516 le32_to_cpu(req->preferred_send_size),
517 le32_to_cpu(req->max_receive_size),
518 le32_to_cpu(req->max_fragmented_size));
519 if (le16_to_cpu(req->min_version) > 0x0100 ||
520 le16_to_cpu(req->max_version) < 0x0100)
521 return -EOPNOTSUPP;
522 if (le16_to_cpu(req->credits_requested) <= 0 ||
523 le32_to_cpu(req->max_receive_size) <= 128 ||
524 le32_to_cpu(req->max_fragmented_size) <=
525 128 * 1024)
526 return -ECONNABORTED;
527
528 break;
529 }
530 default:
531 return -EINVAL;
532 }
533 return 0;
534 }
535
recv_done(struct ib_cq * cq,struct ib_wc * wc)536 static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
537 {
538 struct smb_direct_recvmsg *recvmsg;
539 struct smb_direct_transport *t;
540
541 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
542 t = recvmsg->transport;
543
544 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
545 if (wc->status != IB_WC_WR_FLUSH_ERR) {
546 pr_err("Recv error. status='%s (%d)' opcode=%d\n",
547 ib_wc_status_msg(wc->status), wc->status,
548 wc->opcode);
549 smb_direct_disconnect_rdma_connection(t);
550 }
551 put_empty_recvmsg(t, recvmsg);
552 return;
553 }
554
555 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
556 ib_wc_status_msg(wc->status), wc->status,
557 wc->opcode);
558
559 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
560 recvmsg->sge.length, DMA_FROM_DEVICE);
561
562 switch (recvmsg->type) {
563 case SMB_DIRECT_MSG_NEGOTIATE_REQ:
564 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
565 put_empty_recvmsg(t, recvmsg);
566 return;
567 }
568 t->negotiation_requested = true;
569 t->full_packet_received = true;
570 t->status = SMB_DIRECT_CS_CONNECTED;
571 enqueue_reassembly(t, recvmsg, 0);
572 wake_up_interruptible(&t->wait_status);
573 break;
574 case SMB_DIRECT_MSG_DATA_TRANSFER: {
575 struct smb_direct_data_transfer *data_transfer =
576 (struct smb_direct_data_transfer *)recvmsg->packet;
577 unsigned int data_length;
578 int avail_recvmsg_count, receive_credits;
579
580 if (wc->byte_len <
581 offsetof(struct smb_direct_data_transfer, padding)) {
582 put_empty_recvmsg(t, recvmsg);
583 return;
584 }
585
586 data_length = le32_to_cpu(data_transfer->data_length);
587 if (data_length) {
588 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
589 (u64)data_length) {
590 put_empty_recvmsg(t, recvmsg);
591 return;
592 }
593
594 if (t->full_packet_received)
595 recvmsg->first_segment = true;
596
597 if (le32_to_cpu(data_transfer->remaining_data_length))
598 t->full_packet_received = false;
599 else
600 t->full_packet_received = true;
601
602 enqueue_reassembly(t, recvmsg, (int)data_length);
603 wake_up_interruptible(&t->wait_reassembly_queue);
604
605 spin_lock(&t->receive_credit_lock);
606 receive_credits = --(t->recv_credits);
607 avail_recvmsg_count = t->count_avail_recvmsg;
608 spin_unlock(&t->receive_credit_lock);
609 } else {
610 put_empty_recvmsg(t, recvmsg);
611
612 spin_lock(&t->receive_credit_lock);
613 receive_credits = --(t->recv_credits);
614 avail_recvmsg_count = ++(t->count_avail_recvmsg);
615 spin_unlock(&t->receive_credit_lock);
616 }
617
618 t->recv_credit_target =
619 le16_to_cpu(data_transfer->credits_requested);
620 atomic_add(le16_to_cpu(data_transfer->credits_granted),
621 &t->send_credits);
622
623 if (le16_to_cpu(data_transfer->flags) &
624 SMB_DIRECT_RESPONSE_REQUESTED)
625 queue_work(smb_direct_wq, &t->send_immediate_work);
626
627 if (atomic_read(&t->send_credits) > 0)
628 wake_up_interruptible(&t->wait_send_credits);
629
630 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
631 mod_delayed_work(smb_direct_wq,
632 &t->post_recv_credits_work, 0);
633 break;
634 }
635 default:
636 break;
637 }
638 }
639
smb_direct_post_recv(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg)640 static int smb_direct_post_recv(struct smb_direct_transport *t,
641 struct smb_direct_recvmsg *recvmsg)
642 {
643 struct ib_recv_wr wr;
644 int ret;
645
646 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
647 recvmsg->packet, t->max_recv_size,
648 DMA_FROM_DEVICE);
649 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
650 if (ret)
651 return ret;
652 recvmsg->sge.length = t->max_recv_size;
653 recvmsg->sge.lkey = t->pd->local_dma_lkey;
654 recvmsg->cqe.done = recv_done;
655
656 wr.wr_cqe = &recvmsg->cqe;
657 wr.next = NULL;
658 wr.sg_list = &recvmsg->sge;
659 wr.num_sge = 1;
660
661 ret = ib_post_recv(t->qp, &wr, NULL);
662 if (ret) {
663 pr_err("Can't post recv: %d\n", ret);
664 ib_dma_unmap_single(t->cm_id->device,
665 recvmsg->sge.addr, recvmsg->sge.length,
666 DMA_FROM_DEVICE);
667 smb_direct_disconnect_rdma_connection(t);
668 return ret;
669 }
670 return ret;
671 }
672
smb_direct_read(struct ksmbd_transport * t,char * buf,unsigned int size,int unused)673 static int smb_direct_read(struct ksmbd_transport *t, char *buf,
674 unsigned int size, int unused)
675 {
676 struct smb_direct_recvmsg *recvmsg;
677 struct smb_direct_data_transfer *data_transfer;
678 int to_copy, to_read, data_read, offset;
679 u32 data_length, remaining_data_length, data_offset;
680 int rc;
681 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
682
683 again:
684 if (st->status != SMB_DIRECT_CS_CONNECTED) {
685 pr_err("disconnected\n");
686 return -ENOTCONN;
687 }
688
689 /*
690 * No need to hold the reassembly queue lock all the time as we are
691 * the only one reading from the front of the queue. The transport
692 * may add more entries to the back of the queue at the same time
693 */
694 if (st->reassembly_data_length >= size) {
695 int queue_length;
696 int queue_removed = 0;
697
698 /*
699 * Need to make sure reassembly_data_length is read before
700 * reading reassembly_queue_length and calling
701 * get_first_reassembly. This call is lock free
702 * as we never read at the end of the queue which are being
703 * updated in SOFTIRQ as more data is received
704 */
705 virt_rmb();
706 queue_length = st->reassembly_queue_length;
707 data_read = 0;
708 to_read = size;
709 offset = st->first_entry_offset;
710 while (data_read < size) {
711 recvmsg = get_first_reassembly(st);
712 data_transfer = smb_direct_recvmsg_payload(recvmsg);
713 data_length = le32_to_cpu(data_transfer->data_length);
714 remaining_data_length =
715 le32_to_cpu(data_transfer->remaining_data_length);
716 data_offset = le32_to_cpu(data_transfer->data_offset);
717
718 /*
719 * The upper layer expects RFC1002 length at the
720 * beginning of the payload. Return it to indicate
721 * the total length of the packet. This minimize the
722 * change to upper layer packet processing logic. This
723 * will be eventually remove when an intermediate
724 * transport layer is added
725 */
726 if (recvmsg->first_segment && size == 4) {
727 unsigned int rfc1002_len =
728 data_length + remaining_data_length;
729 *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
730 data_read = 4;
731 recvmsg->first_segment = false;
732 ksmbd_debug(RDMA,
733 "returning rfc1002 length %d\n",
734 rfc1002_len);
735 goto read_rfc1002_done;
736 }
737
738 to_copy = min_t(int, data_length - offset, to_read);
739 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
740 to_copy);
741
742 /* move on to the next buffer? */
743 if (to_copy == data_length - offset) {
744 queue_length--;
745 /*
746 * No need to lock if we are not at the
747 * end of the queue
748 */
749 if (queue_length) {
750 list_del(&recvmsg->list);
751 } else {
752 spin_lock_irq(&st->reassembly_queue_lock);
753 list_del(&recvmsg->list);
754 spin_unlock_irq(&st->reassembly_queue_lock);
755 }
756 queue_removed++;
757 put_recvmsg(st, recvmsg);
758 offset = 0;
759 } else {
760 offset += to_copy;
761 }
762
763 to_read -= to_copy;
764 data_read += to_copy;
765 }
766
767 spin_lock_irq(&st->reassembly_queue_lock);
768 st->reassembly_data_length -= data_read;
769 st->reassembly_queue_length -= queue_removed;
770 spin_unlock_irq(&st->reassembly_queue_lock);
771
772 spin_lock(&st->receive_credit_lock);
773 st->count_avail_recvmsg += queue_removed;
774 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
775 spin_unlock(&st->receive_credit_lock);
776 mod_delayed_work(smb_direct_wq,
777 &st->post_recv_credits_work, 0);
778 } else {
779 spin_unlock(&st->receive_credit_lock);
780 }
781
782 st->first_entry_offset = offset;
783 ksmbd_debug(RDMA,
784 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
785 data_read, st->reassembly_data_length,
786 st->first_entry_offset);
787 read_rfc1002_done:
788 return data_read;
789 }
790
791 ksmbd_debug(RDMA, "wait_event on more data\n");
792 rc = wait_event_interruptible(st->wait_reassembly_queue,
793 st->reassembly_data_length >= size ||
794 st->status != SMB_DIRECT_CS_CONNECTED);
795 if (rc)
796 return -EINTR;
797
798 goto again;
799 }
800
smb_direct_post_recv_credits(struct work_struct * work)801 static void smb_direct_post_recv_credits(struct work_struct *work)
802 {
803 struct smb_direct_transport *t = container_of(work,
804 struct smb_direct_transport, post_recv_credits_work.work);
805 struct smb_direct_recvmsg *recvmsg;
806 int receive_credits, credits = 0;
807 int ret;
808 int use_free = 1;
809
810 spin_lock(&t->receive_credit_lock);
811 receive_credits = t->recv_credits;
812 spin_unlock(&t->receive_credit_lock);
813
814 if (receive_credits < t->recv_credit_target) {
815 while (true) {
816 if (use_free)
817 recvmsg = get_free_recvmsg(t);
818 else
819 recvmsg = get_empty_recvmsg(t);
820 if (!recvmsg) {
821 if (use_free) {
822 use_free = 0;
823 continue;
824 } else {
825 break;
826 }
827 }
828
829 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
830 recvmsg->first_segment = false;
831
832 ret = smb_direct_post_recv(t, recvmsg);
833 if (ret) {
834 pr_err("Can't post recv: %d\n", ret);
835 put_recvmsg(t, recvmsg);
836 break;
837 }
838 credits++;
839 }
840 }
841
842 spin_lock(&t->receive_credit_lock);
843 t->recv_credits += credits;
844 t->count_avail_recvmsg -= credits;
845 spin_unlock(&t->receive_credit_lock);
846
847 spin_lock(&t->lock_new_recv_credits);
848 t->new_recv_credits += credits;
849 spin_unlock(&t->lock_new_recv_credits);
850
851 if (credits)
852 queue_work(smb_direct_wq, &t->send_immediate_work);
853 }
854
send_done(struct ib_cq * cq,struct ib_wc * wc)855 static void send_done(struct ib_cq *cq, struct ib_wc *wc)
856 {
857 struct smb_direct_sendmsg *sendmsg, *sibling;
858 struct smb_direct_transport *t;
859 struct list_head *pos, *prev, *end;
860
861 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
862 t = sendmsg->transport;
863
864 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
865 ib_wc_status_msg(wc->status), wc->status,
866 wc->opcode);
867
868 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
869 pr_err("Send error. status='%s (%d)', opcode=%d\n",
870 ib_wc_status_msg(wc->status), wc->status,
871 wc->opcode);
872 smb_direct_disconnect_rdma_connection(t);
873 }
874
875 if (atomic_dec_and_test(&t->send_pending))
876 wake_up(&t->wait_send_pending);
877
878 /* iterate and free the list of messages in reverse. the list's head
879 * is invalid.
880 */
881 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
882 prev != end; pos = prev, prev = prev->prev) {
883 sibling = container_of(pos, struct smb_direct_sendmsg, list);
884 smb_direct_free_sendmsg(t, sibling);
885 }
886
887 sibling = container_of(pos, struct smb_direct_sendmsg, list);
888 smb_direct_free_sendmsg(t, sibling);
889 }
890
manage_credits_prior_sending(struct smb_direct_transport * t)891 static int manage_credits_prior_sending(struct smb_direct_transport *t)
892 {
893 int new_credits;
894
895 spin_lock(&t->lock_new_recv_credits);
896 new_credits = t->new_recv_credits;
897 t->new_recv_credits = 0;
898 spin_unlock(&t->lock_new_recv_credits);
899
900 return new_credits;
901 }
902
smb_direct_post_send(struct smb_direct_transport * t,struct ib_send_wr * wr)903 static int smb_direct_post_send(struct smb_direct_transport *t,
904 struct ib_send_wr *wr)
905 {
906 int ret;
907
908 atomic_inc(&t->send_pending);
909 ret = ib_post_send(t->qp, wr, NULL);
910 if (ret) {
911 pr_err("failed to post send: %d\n", ret);
912 if (atomic_dec_and_test(&t->send_pending))
913 wake_up(&t->wait_send_pending);
914 smb_direct_disconnect_rdma_connection(t);
915 }
916 return ret;
917 }
918
smb_direct_send_ctx_init(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,bool need_invalidate_rkey,unsigned int remote_key)919 static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
920 struct smb_direct_send_ctx *send_ctx,
921 bool need_invalidate_rkey,
922 unsigned int remote_key)
923 {
924 INIT_LIST_HEAD(&send_ctx->msg_list);
925 send_ctx->wr_cnt = 0;
926 send_ctx->need_invalidate_rkey = need_invalidate_rkey;
927 send_ctx->remote_key = remote_key;
928 }
929
smb_direct_flush_send_list(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,bool is_last)930 static int smb_direct_flush_send_list(struct smb_direct_transport *t,
931 struct smb_direct_send_ctx *send_ctx,
932 bool is_last)
933 {
934 struct smb_direct_sendmsg *first, *last;
935 int ret;
936
937 if (list_empty(&send_ctx->msg_list))
938 return 0;
939
940 first = list_first_entry(&send_ctx->msg_list,
941 struct smb_direct_sendmsg,
942 list);
943 last = list_last_entry(&send_ctx->msg_list,
944 struct smb_direct_sendmsg,
945 list);
946
947 last->wr.send_flags = IB_SEND_SIGNALED;
948 last->wr.wr_cqe = &last->cqe;
949 if (is_last && send_ctx->need_invalidate_rkey) {
950 last->wr.opcode = IB_WR_SEND_WITH_INV;
951 last->wr.ex.invalidate_rkey = send_ctx->remote_key;
952 }
953
954 ret = smb_direct_post_send(t, &first->wr);
955 if (!ret) {
956 smb_direct_send_ctx_init(t, send_ctx,
957 send_ctx->need_invalidate_rkey,
958 send_ctx->remote_key);
959 } else {
960 atomic_add(send_ctx->wr_cnt, &t->send_credits);
961 wake_up(&t->wait_send_credits);
962 list_for_each_entry_safe(first, last, &send_ctx->msg_list,
963 list) {
964 smb_direct_free_sendmsg(t, first);
965 }
966 }
967 return ret;
968 }
969
wait_for_credits(struct smb_direct_transport * t,wait_queue_head_t * waitq,atomic_t * total_credits,int needed)970 static int wait_for_credits(struct smb_direct_transport *t,
971 wait_queue_head_t *waitq, atomic_t *total_credits,
972 int needed)
973 {
974 int ret;
975
976 do {
977 if (atomic_sub_return(needed, total_credits) >= 0)
978 return 0;
979
980 atomic_add(needed, total_credits);
981 ret = wait_event_interruptible(*waitq,
982 atomic_read(total_credits) >= needed ||
983 t->status != SMB_DIRECT_CS_CONNECTED);
984
985 if (t->status != SMB_DIRECT_CS_CONNECTED)
986 return -ENOTCONN;
987 else if (ret < 0)
988 return ret;
989 } while (true);
990 }
991
wait_for_send_credits(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx)992 static int wait_for_send_credits(struct smb_direct_transport *t,
993 struct smb_direct_send_ctx *send_ctx)
994 {
995 int ret;
996
997 if (send_ctx &&
998 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
999 ret = smb_direct_flush_send_list(t, send_ctx, false);
1000 if (ret)
1001 return ret;
1002 }
1003
1004 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1);
1005 }
1006
wait_for_rw_credits(struct smb_direct_transport * t,int credits)1007 static int wait_for_rw_credits(struct smb_direct_transport *t, int credits)
1008 {
1009 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits);
1010 }
1011
calc_rw_credits(struct smb_direct_transport * t,char * buf,unsigned int len)1012 static int calc_rw_credits(struct smb_direct_transport *t,
1013 char *buf, unsigned int len)
1014 {
1015 return DIV_ROUND_UP(get_buf_page_count(buf, len),
1016 t->pages_per_rw_credit);
1017 }
1018
smb_direct_create_header(struct smb_direct_transport * t,int size,int remaining_data_length,struct smb_direct_sendmsg ** sendmsg_out)1019 static int smb_direct_create_header(struct smb_direct_transport *t,
1020 int size, int remaining_data_length,
1021 struct smb_direct_sendmsg **sendmsg_out)
1022 {
1023 struct smb_direct_sendmsg *sendmsg;
1024 struct smb_direct_data_transfer *packet;
1025 int header_length;
1026 int ret;
1027
1028 sendmsg = smb_direct_alloc_sendmsg(t);
1029 if (IS_ERR(sendmsg))
1030 return PTR_ERR(sendmsg);
1031
1032 /* Fill in the packet header */
1033 packet = (struct smb_direct_data_transfer *)sendmsg->packet;
1034 packet->credits_requested = cpu_to_le16(t->send_credit_target);
1035 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1036
1037 packet->flags = 0;
1038 packet->reserved = 0;
1039 if (!size)
1040 packet->data_offset = 0;
1041 else
1042 packet->data_offset = cpu_to_le32(24);
1043 packet->data_length = cpu_to_le32(size);
1044 packet->remaining_data_length = cpu_to_le32(remaining_data_length);
1045 packet->padding = 0;
1046
1047 ksmbd_debug(RDMA,
1048 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
1049 le16_to_cpu(packet->credits_requested),
1050 le16_to_cpu(packet->credits_granted),
1051 le32_to_cpu(packet->data_offset),
1052 le32_to_cpu(packet->data_length),
1053 le32_to_cpu(packet->remaining_data_length));
1054
1055 /* Map the packet to DMA */
1056 header_length = sizeof(struct smb_direct_data_transfer);
1057 /* If this is a packet without payload, don't send padding */
1058 if (!size)
1059 header_length =
1060 offsetof(struct smb_direct_data_transfer, padding);
1061
1062 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1063 (void *)packet,
1064 header_length,
1065 DMA_TO_DEVICE);
1066 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1067 if (ret) {
1068 smb_direct_free_sendmsg(t, sendmsg);
1069 return ret;
1070 }
1071
1072 sendmsg->num_sge = 1;
1073 sendmsg->sge[0].length = header_length;
1074 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1075
1076 *sendmsg_out = sendmsg;
1077 return 0;
1078 }
1079
get_sg_list(void * buf,int size,struct scatterlist * sg_list,int nentries)1080 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
1081 {
1082 bool high = is_vmalloc_addr(buf);
1083 struct page *page;
1084 int offset, len;
1085 int i = 0;
1086
1087 if (size <= 0 || nentries < get_buf_page_count(buf, size))
1088 return -EINVAL;
1089
1090 offset = offset_in_page(buf);
1091 buf -= offset;
1092 while (size > 0) {
1093 len = min_t(int, PAGE_SIZE - offset, size);
1094 if (high)
1095 page = vmalloc_to_page(buf);
1096 else
1097 page = kmap_to_page(buf);
1098
1099 if (!sg_list)
1100 return -EINVAL;
1101 sg_set_page(sg_list, page, len, offset);
1102 sg_list = sg_next(sg_list);
1103
1104 buf += PAGE_SIZE;
1105 size -= len;
1106 offset = 0;
1107 i++;
1108 }
1109 return i;
1110 }
1111
get_mapped_sg_list(struct ib_device * device,void * buf,int size,struct scatterlist * sg_list,int nentries,enum dma_data_direction dir)1112 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
1113 struct scatterlist *sg_list, int nentries,
1114 enum dma_data_direction dir)
1115 {
1116 int npages;
1117
1118 npages = get_sg_list(buf, size, sg_list, nentries);
1119 if (npages < 0)
1120 return -EINVAL;
1121 return ib_dma_map_sg(device, sg_list, npages, dir);
1122 }
1123
post_sendmsg(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,struct smb_direct_sendmsg * msg)1124 static int post_sendmsg(struct smb_direct_transport *t,
1125 struct smb_direct_send_ctx *send_ctx,
1126 struct smb_direct_sendmsg *msg)
1127 {
1128 int i;
1129
1130 for (i = 0; i < msg->num_sge; i++)
1131 ib_dma_sync_single_for_device(t->cm_id->device,
1132 msg->sge[i].addr, msg->sge[i].length,
1133 DMA_TO_DEVICE);
1134
1135 msg->cqe.done = send_done;
1136 msg->wr.opcode = IB_WR_SEND;
1137 msg->wr.sg_list = &msg->sge[0];
1138 msg->wr.num_sge = msg->num_sge;
1139 msg->wr.next = NULL;
1140
1141 if (send_ctx) {
1142 msg->wr.wr_cqe = NULL;
1143 msg->wr.send_flags = 0;
1144 if (!list_empty(&send_ctx->msg_list)) {
1145 struct smb_direct_sendmsg *last;
1146
1147 last = list_last_entry(&send_ctx->msg_list,
1148 struct smb_direct_sendmsg,
1149 list);
1150 last->wr.next = &msg->wr;
1151 }
1152 list_add_tail(&msg->list, &send_ctx->msg_list);
1153 send_ctx->wr_cnt++;
1154 return 0;
1155 }
1156
1157 msg->wr.wr_cqe = &msg->cqe;
1158 msg->wr.send_flags = IB_SEND_SIGNALED;
1159 return smb_direct_post_send(t, &msg->wr);
1160 }
1161
smb_direct_post_send_data(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,struct kvec * iov,int niov,int remaining_data_length)1162 static int smb_direct_post_send_data(struct smb_direct_transport *t,
1163 struct smb_direct_send_ctx *send_ctx,
1164 struct kvec *iov, int niov,
1165 int remaining_data_length)
1166 {
1167 int i, j, ret;
1168 struct smb_direct_sendmsg *msg;
1169 int data_length;
1170 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
1171
1172 ret = wait_for_send_credits(t, send_ctx);
1173 if (ret)
1174 return ret;
1175
1176 data_length = 0;
1177 for (i = 0; i < niov; i++)
1178 data_length += iov[i].iov_len;
1179
1180 ret = smb_direct_create_header(t, data_length, remaining_data_length,
1181 &msg);
1182 if (ret) {
1183 atomic_inc(&t->send_credits);
1184 return ret;
1185 }
1186
1187 for (i = 0; i < niov; i++) {
1188 struct ib_sge *sge;
1189 int sg_cnt;
1190
1191 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
1192 sg_cnt = get_mapped_sg_list(t->cm_id->device,
1193 iov[i].iov_base, iov[i].iov_len,
1194 sg, SMB_DIRECT_MAX_SEND_SGES - 1,
1195 DMA_TO_DEVICE);
1196 if (sg_cnt <= 0) {
1197 pr_err("failed to map buffer\n");
1198 ret = -ENOMEM;
1199 goto err;
1200 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
1201 pr_err("buffer not fitted into sges\n");
1202 ret = -E2BIG;
1203 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
1204 DMA_TO_DEVICE);
1205 goto err;
1206 }
1207
1208 for (j = 0; j < sg_cnt; j++) {
1209 sge = &msg->sge[msg->num_sge];
1210 sge->addr = sg_dma_address(&sg[j]);
1211 sge->length = sg_dma_len(&sg[j]);
1212 sge->lkey = t->pd->local_dma_lkey;
1213 msg->num_sge++;
1214 }
1215 }
1216
1217 ret = post_sendmsg(t, send_ctx, msg);
1218 if (ret)
1219 goto err;
1220 return 0;
1221 err:
1222 smb_direct_free_sendmsg(t, msg);
1223 atomic_inc(&t->send_credits);
1224 return ret;
1225 }
1226
smb_direct_writev(struct ksmbd_transport * t,struct kvec * iov,int niovs,int buflen,bool need_invalidate,unsigned int remote_key)1227 static int smb_direct_writev(struct ksmbd_transport *t,
1228 struct kvec *iov, int niovs, int buflen,
1229 bool need_invalidate, unsigned int remote_key)
1230 {
1231 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1232 int remaining_data_length;
1233 int start, i, j;
1234 int max_iov_size = st->max_send_size -
1235 sizeof(struct smb_direct_data_transfer);
1236 int ret;
1237 struct kvec vec;
1238 struct smb_direct_send_ctx send_ctx;
1239
1240 if (st->status != SMB_DIRECT_CS_CONNECTED)
1241 return -ENOTCONN;
1242
1243 //FIXME: skip RFC1002 header..
1244 buflen -= 4;
1245
1246 remaining_data_length = buflen;
1247 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
1248
1249 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
1250 start = i = 1;
1251 buflen = 0;
1252 while (true) {
1253 buflen += iov[i].iov_len;
1254 if (buflen > max_iov_size) {
1255 if (i > start) {
1256 remaining_data_length -=
1257 (buflen - iov[i].iov_len);
1258 ret = smb_direct_post_send_data(st, &send_ctx,
1259 &iov[start], i - start,
1260 remaining_data_length);
1261 if (ret)
1262 goto done;
1263 } else {
1264 /* iov[start] is too big, break it */
1265 int nvec = (buflen + max_iov_size - 1) /
1266 max_iov_size;
1267
1268 for (j = 0; j < nvec; j++) {
1269 vec.iov_base =
1270 (char *)iov[start].iov_base +
1271 j * max_iov_size;
1272 vec.iov_len =
1273 min_t(int, max_iov_size,
1274 buflen - max_iov_size * j);
1275 remaining_data_length -= vec.iov_len;
1276 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
1277 remaining_data_length);
1278 if (ret)
1279 goto done;
1280 }
1281 i++;
1282 if (i == niovs)
1283 break;
1284 }
1285 start = i;
1286 buflen = 0;
1287 } else {
1288 i++;
1289 if (i == niovs) {
1290 /* send out all remaining vecs */
1291 remaining_data_length -= buflen;
1292 ret = smb_direct_post_send_data(st, &send_ctx,
1293 &iov[start], i - start,
1294 remaining_data_length);
1295 if (ret)
1296 goto done;
1297 break;
1298 }
1299 }
1300 }
1301
1302 done:
1303 ret = smb_direct_flush_send_list(st, &send_ctx, true);
1304
1305 /*
1306 * As an optimization, we don't wait for individual I/O to finish
1307 * before sending the next one.
1308 * Send them all and wait for pending send count to get to 0
1309 * that means all the I/Os have been out and we are good to return
1310 */
1311
1312 wait_event(st->wait_send_pending,
1313 atomic_read(&st->send_pending) == 0);
1314 return ret;
1315 }
1316
smb_direct_free_rdma_rw_msg(struct smb_direct_transport * t,struct smb_direct_rdma_rw_msg * msg,enum dma_data_direction dir)1317 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t,
1318 struct smb_direct_rdma_rw_msg *msg,
1319 enum dma_data_direction dir)
1320 {
1321 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
1322 msg->sgt.sgl, msg->sgt.nents, dir);
1323 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1324 kfree(msg);
1325 }
1326
read_write_done(struct ib_cq * cq,struct ib_wc * wc,enum dma_data_direction dir)1327 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
1328 enum dma_data_direction dir)
1329 {
1330 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
1331 struct smb_direct_rdma_rw_msg, cqe);
1332 struct smb_direct_transport *t = msg->t;
1333
1334 if (wc->status != IB_WC_SUCCESS) {
1335 msg->status = -EIO;
1336 pr_err("read/write error. opcode = %d, status = %s(%d)\n",
1337 wc->opcode, ib_wc_status_msg(wc->status), wc->status);
1338 if (wc->status != IB_WC_WR_FLUSH_ERR)
1339 smb_direct_disconnect_rdma_connection(t);
1340 }
1341
1342 complete(msg->completion);
1343 }
1344
read_done(struct ib_cq * cq,struct ib_wc * wc)1345 static void read_done(struct ib_cq *cq, struct ib_wc *wc)
1346 {
1347 read_write_done(cq, wc, DMA_FROM_DEVICE);
1348 }
1349
write_done(struct ib_cq * cq,struct ib_wc * wc)1350 static void write_done(struct ib_cq *cq, struct ib_wc *wc)
1351 {
1352 read_write_done(cq, wc, DMA_TO_DEVICE);
1353 }
1354
smb_direct_rdma_xmit(struct smb_direct_transport * t,void * buf,int buf_len,struct smb2_buffer_desc_v1 * desc,unsigned int desc_len,bool is_read)1355 static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
1356 void *buf, int buf_len,
1357 struct smb2_buffer_desc_v1 *desc,
1358 unsigned int desc_len,
1359 bool is_read)
1360 {
1361 struct smb_direct_rdma_rw_msg *msg, *next_msg;
1362 int i, ret;
1363 DECLARE_COMPLETION_ONSTACK(completion);
1364 struct ib_send_wr *first_wr;
1365 LIST_HEAD(msg_list);
1366 char *desc_buf;
1367 int credits_needed;
1368 unsigned int desc_buf_len, desc_num = 0;
1369
1370 if (t->status != SMB_DIRECT_CS_CONNECTED)
1371 return -ENOTCONN;
1372
1373 if (buf_len > t->max_rdma_rw_size)
1374 return -EINVAL;
1375
1376 /* calculate needed credits */
1377 credits_needed = 0;
1378 desc_buf = buf;
1379 for (i = 0; i < desc_len / sizeof(*desc); i++) {
1380 if (!buf_len)
1381 break;
1382
1383 desc_buf_len = le32_to_cpu(desc[i].length);
1384 if (!desc_buf_len)
1385 return -EINVAL;
1386
1387 if (desc_buf_len > buf_len) {
1388 desc_buf_len = buf_len;
1389 desc[i].length = cpu_to_le32(desc_buf_len);
1390 buf_len = 0;
1391 }
1392
1393 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len);
1394 desc_buf += desc_buf_len;
1395 buf_len -= desc_buf_len;
1396 desc_num++;
1397 }
1398
1399 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
1400 str_read_write(is_read), buf_len, credits_needed);
1401
1402 ret = wait_for_rw_credits(t, credits_needed);
1403 if (ret < 0)
1404 return ret;
1405
1406 /* build rdma_rw_ctx for each descriptor */
1407 desc_buf = buf;
1408 for (i = 0; i < desc_num; i++) {
1409 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE),
1410 KSMBD_DEFAULT_GFP);
1411 if (!msg) {
1412 ret = -ENOMEM;
1413 goto out;
1414 }
1415
1416 desc_buf_len = le32_to_cpu(desc[i].length);
1417
1418 msg->t = t;
1419 msg->cqe.done = is_read ? read_done : write_done;
1420 msg->completion = &completion;
1421
1422 msg->sgt.sgl = &msg->sg_list[0];
1423 ret = sg_alloc_table_chained(&msg->sgt,
1424 get_buf_page_count(desc_buf, desc_buf_len),
1425 msg->sg_list, SG_CHUNK_SIZE);
1426 if (ret) {
1427 kfree(msg);
1428 ret = -ENOMEM;
1429 goto out;
1430 }
1431
1432 ret = get_sg_list(desc_buf, desc_buf_len,
1433 msg->sgt.sgl, msg->sgt.orig_nents);
1434 if (ret < 0) {
1435 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1436 kfree(msg);
1437 goto out;
1438 }
1439
1440 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
1441 msg->sgt.sgl,
1442 get_buf_page_count(desc_buf, desc_buf_len),
1443 0,
1444 le64_to_cpu(desc[i].offset),
1445 le32_to_cpu(desc[i].token),
1446 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1447 if (ret < 0) {
1448 pr_err("failed to init rdma_rw_ctx: %d\n", ret);
1449 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1450 kfree(msg);
1451 goto out;
1452 }
1453
1454 list_add_tail(&msg->list, &msg_list);
1455 desc_buf += desc_buf_len;
1456 }
1457
1458 /* concatenate work requests of rdma_rw_ctxs */
1459 first_wr = NULL;
1460 list_for_each_entry_reverse(msg, &msg_list, list) {
1461 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
1462 &msg->cqe, first_wr);
1463 }
1464
1465 ret = ib_post_send(t->qp, first_wr, NULL);
1466 if (ret) {
1467 pr_err("failed to post send wr for RDMA R/W: %d\n", ret);
1468 goto out;
1469 }
1470
1471 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list);
1472 wait_for_completion(&completion);
1473 ret = msg->status;
1474 out:
1475 list_for_each_entry_safe(msg, next_msg, &msg_list, list) {
1476 list_del(&msg->list);
1477 smb_direct_free_rdma_rw_msg(t, msg,
1478 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1479 }
1480 atomic_add(credits_needed, &t->rw_credits);
1481 wake_up(&t->wait_rw_credits);
1482 return ret;
1483 }
1484
smb_direct_rdma_write(struct ksmbd_transport * t,void * buf,unsigned int buflen,struct smb2_buffer_desc_v1 * desc,unsigned int desc_len)1485 static int smb_direct_rdma_write(struct ksmbd_transport *t,
1486 void *buf, unsigned int buflen,
1487 struct smb2_buffer_desc_v1 *desc,
1488 unsigned int desc_len)
1489 {
1490 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1491 desc, desc_len, false);
1492 }
1493
smb_direct_rdma_read(struct ksmbd_transport * t,void * buf,unsigned int buflen,struct smb2_buffer_desc_v1 * desc,unsigned int desc_len)1494 static int smb_direct_rdma_read(struct ksmbd_transport *t,
1495 void *buf, unsigned int buflen,
1496 struct smb2_buffer_desc_v1 *desc,
1497 unsigned int desc_len)
1498 {
1499 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1500 desc, desc_len, true);
1501 }
1502
smb_direct_disconnect(struct ksmbd_transport * t)1503 static void smb_direct_disconnect(struct ksmbd_transport *t)
1504 {
1505 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1506
1507 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
1508
1509 smb_direct_disconnect_rdma_work(&st->disconnect_work);
1510 wait_event_interruptible(st->wait_status,
1511 st->status == SMB_DIRECT_CS_DISCONNECTED);
1512 free_transport(st);
1513 }
1514
smb_direct_shutdown(struct ksmbd_transport * t)1515 static void smb_direct_shutdown(struct ksmbd_transport *t)
1516 {
1517 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1518
1519 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
1520
1521 smb_direct_disconnect_rdma_work(&st->disconnect_work);
1522 }
1523
smb_direct_cm_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)1524 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
1525 struct rdma_cm_event *event)
1526 {
1527 struct smb_direct_transport *t = cm_id->context;
1528
1529 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
1530 cm_id, rdma_event_msg(event->event), event->event);
1531
1532 switch (event->event) {
1533 case RDMA_CM_EVENT_ESTABLISHED: {
1534 t->status = SMB_DIRECT_CS_CONNECTED;
1535 wake_up_interruptible(&t->wait_status);
1536 break;
1537 }
1538 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1539 case RDMA_CM_EVENT_DISCONNECTED: {
1540 ib_drain_qp(t->qp);
1541
1542 t->status = SMB_DIRECT_CS_DISCONNECTED;
1543 wake_up_interruptible(&t->wait_status);
1544 wake_up_interruptible(&t->wait_reassembly_queue);
1545 wake_up(&t->wait_send_credits);
1546 break;
1547 }
1548 case RDMA_CM_EVENT_CONNECT_ERROR: {
1549 t->status = SMB_DIRECT_CS_DISCONNECTED;
1550 wake_up_interruptible(&t->wait_status);
1551 break;
1552 }
1553 default:
1554 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
1555 cm_id, rdma_event_msg(event->event),
1556 event->event);
1557 break;
1558 }
1559 return 0;
1560 }
1561
smb_direct_qpair_handler(struct ib_event * event,void * context)1562 static void smb_direct_qpair_handler(struct ib_event *event, void *context)
1563 {
1564 struct smb_direct_transport *t = context;
1565
1566 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
1567 t->cm_id, ib_event_msg(event->event), event->event);
1568
1569 switch (event->event) {
1570 case IB_EVENT_CQ_ERR:
1571 case IB_EVENT_QP_FATAL:
1572 smb_direct_disconnect_rdma_connection(t);
1573 break;
1574 default:
1575 break;
1576 }
1577 }
1578
smb_direct_send_negotiate_response(struct smb_direct_transport * t,int failed)1579 static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
1580 int failed)
1581 {
1582 struct smb_direct_sendmsg *sendmsg;
1583 struct smb_direct_negotiate_resp *resp;
1584 int ret;
1585
1586 sendmsg = smb_direct_alloc_sendmsg(t);
1587 if (IS_ERR(sendmsg))
1588 return -ENOMEM;
1589
1590 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
1591 if (failed) {
1592 memset(resp, 0, sizeof(*resp));
1593 resp->min_version = cpu_to_le16(0x0100);
1594 resp->max_version = cpu_to_le16(0x0100);
1595 resp->status = STATUS_NOT_SUPPORTED;
1596 } else {
1597 resp->status = STATUS_SUCCESS;
1598 resp->min_version = SMB_DIRECT_VERSION_LE;
1599 resp->max_version = SMB_DIRECT_VERSION_LE;
1600 resp->negotiated_version = SMB_DIRECT_VERSION_LE;
1601 resp->reserved = 0;
1602 resp->credits_requested =
1603 cpu_to_le16(t->send_credit_target);
1604 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1605 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
1606 resp->preferred_send_size = cpu_to_le32(t->max_send_size);
1607 resp->max_receive_size = cpu_to_le32(t->max_recv_size);
1608 resp->max_fragmented_size =
1609 cpu_to_le32(t->max_fragmented_recv_size);
1610 }
1611
1612 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1613 (void *)resp, sizeof(*resp),
1614 DMA_TO_DEVICE);
1615 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1616 if (ret) {
1617 smb_direct_free_sendmsg(t, sendmsg);
1618 return ret;
1619 }
1620
1621 sendmsg->num_sge = 1;
1622 sendmsg->sge[0].length = sizeof(*resp);
1623 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1624
1625 ret = post_sendmsg(t, NULL, sendmsg);
1626 if (ret) {
1627 smb_direct_free_sendmsg(t, sendmsg);
1628 return ret;
1629 }
1630
1631 wait_event(t->wait_send_pending,
1632 atomic_read(&t->send_pending) == 0);
1633 return 0;
1634 }
1635
smb_direct_accept_client(struct smb_direct_transport * t)1636 static int smb_direct_accept_client(struct smb_direct_transport *t)
1637 {
1638 struct rdma_conn_param conn_param;
1639 struct ib_port_immutable port_immutable;
1640 u32 ird_ord_hdr[2];
1641 int ret;
1642
1643 memset(&conn_param, 0, sizeof(conn_param));
1644 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
1645 SMB_DIRECT_CM_INITIATOR_DEPTH);
1646 conn_param.responder_resources = 0;
1647
1648 t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
1649 t->cm_id->port_num,
1650 &port_immutable);
1651 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
1652 ird_ord_hdr[0] = conn_param.responder_resources;
1653 ird_ord_hdr[1] = 1;
1654 conn_param.private_data = ird_ord_hdr;
1655 conn_param.private_data_len = sizeof(ird_ord_hdr);
1656 } else {
1657 conn_param.private_data = NULL;
1658 conn_param.private_data_len = 0;
1659 }
1660 conn_param.retry_count = SMB_DIRECT_CM_RETRY;
1661 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
1662 conn_param.flow_control = 0;
1663
1664 ret = rdma_accept(t->cm_id, &conn_param);
1665 if (ret) {
1666 pr_err("error at rdma_accept: %d\n", ret);
1667 return ret;
1668 }
1669 return 0;
1670 }
1671
smb_direct_prepare_negotiation(struct smb_direct_transport * t)1672 static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
1673 {
1674 int ret;
1675 struct smb_direct_recvmsg *recvmsg;
1676
1677 recvmsg = get_free_recvmsg(t);
1678 if (!recvmsg)
1679 return -ENOMEM;
1680 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
1681
1682 ret = smb_direct_post_recv(t, recvmsg);
1683 if (ret) {
1684 pr_err("Can't post recv: %d\n", ret);
1685 goto out_err;
1686 }
1687
1688 t->negotiation_requested = false;
1689 ret = smb_direct_accept_client(t);
1690 if (ret) {
1691 pr_err("Can't accept client\n");
1692 goto out_err;
1693 }
1694
1695 smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
1696 return 0;
1697 out_err:
1698 put_recvmsg(t, recvmsg);
1699 return ret;
1700 }
1701
smb_direct_get_max_fr_pages(struct smb_direct_transport * t)1702 static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t)
1703 {
1704 return min_t(unsigned int,
1705 t->cm_id->device->attrs.max_fast_reg_page_list_len,
1706 256);
1707 }
1708
smb_direct_init_params(struct smb_direct_transport * t,struct ib_qp_cap * cap)1709 static int smb_direct_init_params(struct smb_direct_transport *t,
1710 struct ib_qp_cap *cap)
1711 {
1712 struct ib_device *device = t->cm_id->device;
1713 int max_send_sges, max_rw_wrs, max_send_wrs;
1714 unsigned int max_sge_per_wr, wrs_per_credit;
1715
1716 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
1717 * SMB2 response could be mapped.
1718 */
1719 t->max_send_size = smb_direct_max_send_size;
1720 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3;
1721 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
1722 pr_err("max_send_size %d is too large\n", t->max_send_size);
1723 return -EINVAL;
1724 }
1725
1726 /* Calculate the number of work requests for RDMA R/W.
1727 * The maximum number of pages which can be registered
1728 * with one Memory region can be transferred with one
1729 * R/W credit. And at least 4 work requests for each credit
1730 * are needed for MR registration, RDMA R/W, local & remote
1731 * MR invalidation.
1732 */
1733 t->max_rdma_rw_size = smb_direct_max_read_write_size;
1734 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t);
1735 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size,
1736 (t->pages_per_rw_credit - 1) *
1737 PAGE_SIZE);
1738
1739 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
1740 device->attrs.max_sge_rd);
1741 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr,
1742 max_send_sges);
1743 wrs_per_credit = max_t(unsigned int, 4,
1744 DIV_ROUND_UP(t->pages_per_rw_credit,
1745 max_sge_per_wr) + 1);
1746 max_rw_wrs = t->max_rw_credits * wrs_per_credit;
1747
1748 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
1749 if (max_send_wrs > device->attrs.max_cqe ||
1750 max_send_wrs > device->attrs.max_qp_wr) {
1751 pr_err("consider lowering send_credit_target = %d\n",
1752 smb_direct_send_credit_target);
1753 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
1754 device->attrs.max_cqe, device->attrs.max_qp_wr);
1755 return -EINVAL;
1756 }
1757
1758 if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
1759 smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
1760 pr_err("consider lowering receive_credit_max = %d\n",
1761 smb_direct_receive_credit_max);
1762 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1763 device->attrs.max_cqe, device->attrs.max_qp_wr);
1764 return -EINVAL;
1765 }
1766
1767 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
1768 pr_err("warning: device max_recv_sge = %d too small\n",
1769 device->attrs.max_recv_sge);
1770 return -EINVAL;
1771 }
1772
1773 t->recv_credits = 0;
1774 t->count_avail_recvmsg = 0;
1775
1776 t->recv_credit_max = smb_direct_receive_credit_max;
1777 t->recv_credit_target = 10;
1778 t->new_recv_credits = 0;
1779
1780 t->send_credit_target = smb_direct_send_credit_target;
1781 atomic_set(&t->send_credits, 0);
1782 atomic_set(&t->rw_credits, t->max_rw_credits);
1783
1784 t->max_send_size = smb_direct_max_send_size;
1785 t->max_recv_size = smb_direct_max_receive_size;
1786 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
1787
1788 cap->max_send_wr = max_send_wrs;
1789 cap->max_recv_wr = t->recv_credit_max;
1790 cap->max_send_sge = max_sge_per_wr;
1791 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
1792 cap->max_inline_data = 0;
1793 cap->max_rdma_ctxs = t->max_rw_credits;
1794 return 0;
1795 }
1796
smb_direct_destroy_pools(struct smb_direct_transport * t)1797 static void smb_direct_destroy_pools(struct smb_direct_transport *t)
1798 {
1799 struct smb_direct_recvmsg *recvmsg;
1800
1801 while ((recvmsg = get_free_recvmsg(t)))
1802 mempool_free(recvmsg, t->recvmsg_mempool);
1803 while ((recvmsg = get_empty_recvmsg(t)))
1804 mempool_free(recvmsg, t->recvmsg_mempool);
1805
1806 mempool_destroy(t->recvmsg_mempool);
1807 t->recvmsg_mempool = NULL;
1808
1809 kmem_cache_destroy(t->recvmsg_cache);
1810 t->recvmsg_cache = NULL;
1811
1812 mempool_destroy(t->sendmsg_mempool);
1813 t->sendmsg_mempool = NULL;
1814
1815 kmem_cache_destroy(t->sendmsg_cache);
1816 t->sendmsg_cache = NULL;
1817 }
1818
smb_direct_create_pools(struct smb_direct_transport * t)1819 static int smb_direct_create_pools(struct smb_direct_transport *t)
1820 {
1821 char name[80];
1822 int i;
1823 struct smb_direct_recvmsg *recvmsg;
1824
1825 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
1826 t->sendmsg_cache = kmem_cache_create(name,
1827 sizeof(struct smb_direct_sendmsg) +
1828 sizeof(struct smb_direct_negotiate_resp),
1829 0, SLAB_HWCACHE_ALIGN, NULL);
1830 if (!t->sendmsg_cache)
1831 return -ENOMEM;
1832
1833 t->sendmsg_mempool = mempool_create(t->send_credit_target,
1834 mempool_alloc_slab, mempool_free_slab,
1835 t->sendmsg_cache);
1836 if (!t->sendmsg_mempool)
1837 goto err;
1838
1839 snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
1840 t->recvmsg_cache = kmem_cache_create(name,
1841 sizeof(struct smb_direct_recvmsg) +
1842 t->max_recv_size,
1843 0, SLAB_HWCACHE_ALIGN, NULL);
1844 if (!t->recvmsg_cache)
1845 goto err;
1846
1847 t->recvmsg_mempool =
1848 mempool_create(t->recv_credit_max, mempool_alloc_slab,
1849 mempool_free_slab, t->recvmsg_cache);
1850 if (!t->recvmsg_mempool)
1851 goto err;
1852
1853 INIT_LIST_HEAD(&t->recvmsg_queue);
1854
1855 for (i = 0; i < t->recv_credit_max; i++) {
1856 recvmsg = mempool_alloc(t->recvmsg_mempool, KSMBD_DEFAULT_GFP);
1857 if (!recvmsg)
1858 goto err;
1859 recvmsg->transport = t;
1860 list_add(&recvmsg->list, &t->recvmsg_queue);
1861 }
1862 t->count_avail_recvmsg = t->recv_credit_max;
1863
1864 return 0;
1865 err:
1866 smb_direct_destroy_pools(t);
1867 return -ENOMEM;
1868 }
1869
smb_direct_create_qpair(struct smb_direct_transport * t,struct ib_qp_cap * cap)1870 static int smb_direct_create_qpair(struct smb_direct_transport *t,
1871 struct ib_qp_cap *cap)
1872 {
1873 int ret;
1874 struct ib_qp_init_attr qp_attr;
1875 int pages_per_rw;
1876
1877 t->pd = ib_alloc_pd(t->cm_id->device, 0);
1878 if (IS_ERR(t->pd)) {
1879 pr_err("Can't create RDMA PD\n");
1880 ret = PTR_ERR(t->pd);
1881 t->pd = NULL;
1882 return ret;
1883 }
1884
1885 t->send_cq = ib_alloc_cq(t->cm_id->device, t,
1886 smb_direct_send_credit_target + cap->max_rdma_ctxs,
1887 0, IB_POLL_WORKQUEUE);
1888 if (IS_ERR(t->send_cq)) {
1889 pr_err("Can't create RDMA send CQ\n");
1890 ret = PTR_ERR(t->send_cq);
1891 t->send_cq = NULL;
1892 goto err;
1893 }
1894
1895 t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
1896 t->recv_credit_max, 0, IB_POLL_WORKQUEUE);
1897 if (IS_ERR(t->recv_cq)) {
1898 pr_err("Can't create RDMA recv CQ\n");
1899 ret = PTR_ERR(t->recv_cq);
1900 t->recv_cq = NULL;
1901 goto err;
1902 }
1903
1904 memset(&qp_attr, 0, sizeof(qp_attr));
1905 qp_attr.event_handler = smb_direct_qpair_handler;
1906 qp_attr.qp_context = t;
1907 qp_attr.cap = *cap;
1908 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
1909 qp_attr.qp_type = IB_QPT_RC;
1910 qp_attr.send_cq = t->send_cq;
1911 qp_attr.recv_cq = t->recv_cq;
1912 qp_attr.port_num = ~0;
1913
1914 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
1915 if (ret) {
1916 pr_err("Can't create RDMA QP: %d\n", ret);
1917 goto err;
1918 }
1919
1920 t->qp = t->cm_id->qp;
1921 t->cm_id->event_handler = smb_direct_cm_handler;
1922
1923 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
1924 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
1925 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs,
1926 t->max_rw_credits, IB_MR_TYPE_MEM_REG,
1927 t->pages_per_rw_credit, 0);
1928 if (ret) {
1929 pr_err("failed to init mr pool count %d pages %d\n",
1930 t->max_rw_credits, t->pages_per_rw_credit);
1931 goto err;
1932 }
1933 }
1934
1935 return 0;
1936 err:
1937 if (t->qp) {
1938 ib_destroy_qp(t->qp);
1939 t->qp = NULL;
1940 }
1941 if (t->recv_cq) {
1942 ib_destroy_cq(t->recv_cq);
1943 t->recv_cq = NULL;
1944 }
1945 if (t->send_cq) {
1946 ib_destroy_cq(t->send_cq);
1947 t->send_cq = NULL;
1948 }
1949 if (t->pd) {
1950 ib_dealloc_pd(t->pd);
1951 t->pd = NULL;
1952 }
1953 return ret;
1954 }
1955
smb_direct_prepare(struct ksmbd_transport * t)1956 static int smb_direct_prepare(struct ksmbd_transport *t)
1957 {
1958 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1959 struct smb_direct_recvmsg *recvmsg;
1960 struct smb_direct_negotiate_req *req;
1961 int ret;
1962
1963 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
1964 ret = wait_event_interruptible_timeout(st->wait_status,
1965 st->negotiation_requested ||
1966 st->status == SMB_DIRECT_CS_DISCONNECTED,
1967 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
1968 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
1969 return ret < 0 ? ret : -ETIMEDOUT;
1970
1971 recvmsg = get_first_reassembly(st);
1972 if (!recvmsg)
1973 return -ECONNABORTED;
1974
1975 ret = smb_direct_check_recvmsg(recvmsg);
1976 if (ret == -ECONNABORTED)
1977 goto out;
1978
1979 req = (struct smb_direct_negotiate_req *)recvmsg->packet;
1980 st->max_recv_size = min_t(int, st->max_recv_size,
1981 le32_to_cpu(req->preferred_send_size));
1982 st->max_send_size = min_t(int, st->max_send_size,
1983 le32_to_cpu(req->max_receive_size));
1984 st->max_fragmented_send_size =
1985 le32_to_cpu(req->max_fragmented_size);
1986 st->max_fragmented_recv_size =
1987 (st->recv_credit_max * st->max_recv_size) / 2;
1988
1989 ret = smb_direct_send_negotiate_response(st, ret);
1990 out:
1991 spin_lock_irq(&st->reassembly_queue_lock);
1992 st->reassembly_queue_length--;
1993 list_del(&recvmsg->list);
1994 spin_unlock_irq(&st->reassembly_queue_lock);
1995 put_recvmsg(st, recvmsg);
1996
1997 return ret;
1998 }
1999
smb_direct_connect(struct smb_direct_transport * st)2000 static int smb_direct_connect(struct smb_direct_transport *st)
2001 {
2002 int ret;
2003 struct ib_qp_cap qp_cap;
2004
2005 ret = smb_direct_init_params(st, &qp_cap);
2006 if (ret) {
2007 pr_err("Can't configure RDMA parameters\n");
2008 return ret;
2009 }
2010
2011 ret = smb_direct_create_pools(st);
2012 if (ret) {
2013 pr_err("Can't init RDMA pool: %d\n", ret);
2014 return ret;
2015 }
2016
2017 ret = smb_direct_create_qpair(st, &qp_cap);
2018 if (ret) {
2019 pr_err("Can't accept RDMA client: %d\n", ret);
2020 return ret;
2021 }
2022
2023 ret = smb_direct_prepare_negotiation(st);
2024 if (ret) {
2025 pr_err("Can't negotiate: %d\n", ret);
2026 return ret;
2027 }
2028 return 0;
2029 }
2030
rdma_frwr_is_supported(struct ib_device_attr * attrs)2031 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
2032 {
2033 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
2034 return false;
2035 if (attrs->max_fast_reg_page_list_len == 0)
2036 return false;
2037 return true;
2038 }
2039
smb_direct_handle_connect_request(struct rdma_cm_id * new_cm_id)2040 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
2041 {
2042 struct smb_direct_transport *t;
2043 struct task_struct *handler;
2044 int ret;
2045
2046 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
2047 ksmbd_debug(RDMA,
2048 "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
2049 new_cm_id->device->attrs.device_cap_flags);
2050 return -EPROTONOSUPPORT;
2051 }
2052
2053 t = alloc_transport(new_cm_id);
2054 if (!t)
2055 return -ENOMEM;
2056
2057 ret = smb_direct_connect(t);
2058 if (ret)
2059 goto out_err;
2060
2061 handler = kthread_run(ksmbd_conn_handler_loop,
2062 KSMBD_TRANS(t)->conn, "ksmbd:r%u",
2063 smb_direct_port);
2064 if (IS_ERR(handler)) {
2065 ret = PTR_ERR(handler);
2066 pr_err("Can't start thread\n");
2067 goto out_err;
2068 }
2069
2070 return 0;
2071 out_err:
2072 free_transport(t);
2073 return ret;
2074 }
2075
smb_direct_listen_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)2076 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
2077 struct rdma_cm_event *event)
2078 {
2079 switch (event->event) {
2080 case RDMA_CM_EVENT_CONNECT_REQUEST: {
2081 int ret = smb_direct_handle_connect_request(cm_id);
2082
2083 if (ret) {
2084 pr_err("Can't create transport: %d\n", ret);
2085 return ret;
2086 }
2087
2088 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
2089 cm_id);
2090 break;
2091 }
2092 default:
2093 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
2094 cm_id, rdma_event_msg(event->event), event->event);
2095 break;
2096 }
2097 return 0;
2098 }
2099
smb_direct_listen(int port)2100 static int smb_direct_listen(int port)
2101 {
2102 int ret;
2103 struct rdma_cm_id *cm_id;
2104 struct sockaddr_in sin = {
2105 .sin_family = AF_INET,
2106 .sin_addr.s_addr = htonl(INADDR_ANY),
2107 .sin_port = htons(port),
2108 };
2109
2110 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
2111 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
2112 if (IS_ERR(cm_id)) {
2113 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
2114 return PTR_ERR(cm_id);
2115 }
2116
2117 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
2118 if (ret) {
2119 pr_err("Can't bind: %d\n", ret);
2120 goto err;
2121 }
2122
2123 smb_direct_listener.cm_id = cm_id;
2124
2125 ret = rdma_listen(cm_id, 10);
2126 if (ret) {
2127 pr_err("Can't listen: %d\n", ret);
2128 goto err;
2129 }
2130 return 0;
2131 err:
2132 smb_direct_listener.cm_id = NULL;
2133 rdma_destroy_id(cm_id);
2134 return ret;
2135 }
2136
smb_direct_ib_client_add(struct ib_device * ib_dev)2137 static int smb_direct_ib_client_add(struct ib_device *ib_dev)
2138 {
2139 struct smb_direct_device *smb_dev;
2140
2141 /* Set 5445 port if device type is iWARP(No IB) */
2142 if (ib_dev->node_type != RDMA_NODE_IB_CA)
2143 smb_direct_port = SMB_DIRECT_PORT_IWARP;
2144
2145 if (!rdma_frwr_is_supported(&ib_dev->attrs))
2146 return 0;
2147
2148 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP);
2149 if (!smb_dev)
2150 return -ENOMEM;
2151 smb_dev->ib_dev = ib_dev;
2152
2153 write_lock(&smb_direct_device_lock);
2154 list_add(&smb_dev->list, &smb_direct_device_list);
2155 write_unlock(&smb_direct_device_lock);
2156
2157 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
2158 return 0;
2159 }
2160
smb_direct_ib_client_remove(struct ib_device * ib_dev,void * client_data)2161 static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
2162 void *client_data)
2163 {
2164 struct smb_direct_device *smb_dev, *tmp;
2165
2166 write_lock(&smb_direct_device_lock);
2167 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
2168 if (smb_dev->ib_dev == ib_dev) {
2169 list_del(&smb_dev->list);
2170 kfree(smb_dev);
2171 break;
2172 }
2173 }
2174 write_unlock(&smb_direct_device_lock);
2175 }
2176
2177 static struct ib_client smb_direct_ib_client = {
2178 .name = "ksmbd_smb_direct_ib",
2179 .add = smb_direct_ib_client_add,
2180 .remove = smb_direct_ib_client_remove,
2181 };
2182
ksmbd_rdma_init(void)2183 int ksmbd_rdma_init(void)
2184 {
2185 int ret;
2186
2187 smb_direct_listener.cm_id = NULL;
2188
2189 ret = ib_register_client(&smb_direct_ib_client);
2190 if (ret) {
2191 pr_err("failed to ib_register_client\n");
2192 return ret;
2193 }
2194
2195 /* When a client is running out of send credits, the credits are
2196 * granted by the server's sending a packet using this queue.
2197 * This avoids the situation that a clients cannot send packets
2198 * for lack of credits
2199 */
2200 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
2201 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
2202 if (!smb_direct_wq)
2203 return -ENOMEM;
2204
2205 ret = smb_direct_listen(smb_direct_port);
2206 if (ret) {
2207 destroy_workqueue(smb_direct_wq);
2208 smb_direct_wq = NULL;
2209 pr_err("Can't listen: %d\n", ret);
2210 return ret;
2211 }
2212
2213 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
2214 smb_direct_listener.cm_id);
2215 return 0;
2216 }
2217
ksmbd_rdma_destroy(void)2218 void ksmbd_rdma_destroy(void)
2219 {
2220 if (!smb_direct_listener.cm_id)
2221 return;
2222
2223 ib_unregister_client(&smb_direct_ib_client);
2224 rdma_destroy_id(smb_direct_listener.cm_id);
2225
2226 smb_direct_listener.cm_id = NULL;
2227
2228 if (smb_direct_wq) {
2229 destroy_workqueue(smb_direct_wq);
2230 smb_direct_wq = NULL;
2231 }
2232 }
2233
ksmbd_rdma_capable_netdev(struct net_device * netdev)2234 bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
2235 {
2236 struct smb_direct_device *smb_dev;
2237 int i;
2238 bool rdma_capable = false;
2239
2240 read_lock(&smb_direct_device_lock);
2241 list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
2242 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
2243 struct net_device *ndev;
2244
2245 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1);
2246 if (!ndev)
2247 continue;
2248
2249 if (ndev == netdev) {
2250 dev_put(ndev);
2251 rdma_capable = true;
2252 goto out;
2253 }
2254 dev_put(ndev);
2255 }
2256 }
2257 out:
2258 read_unlock(&smb_direct_device_lock);
2259
2260 if (rdma_capable == false) {
2261 struct ib_device *ibdev;
2262
2263 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
2264 if (ibdev) {
2265 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs);
2266 ib_device_put(ibdev);
2267 }
2268 }
2269
2270 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n",
2271 netdev->name, str_true_false(rdma_capable));
2272
2273 return rdma_capable;
2274 }
2275
2276 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
2277 .prepare = smb_direct_prepare,
2278 .disconnect = smb_direct_disconnect,
2279 .shutdown = smb_direct_shutdown,
2280 .writev = smb_direct_writev,
2281 .read = smb_direct_read,
2282 .rdma_read = smb_direct_rdma_read,
2283 .rdma_write = smb_direct_rdma_write,
2284 };
2285