1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017, Microsoft Corporation.
4 * Copyright (C) 2018, LG Electronics.
5 *
6 * Author(s): Long Li <longli@microsoft.com>,
7 * Hyunchul Lee <hyc.lee@gmail.com>
8 */
9
10 #define SUBMOD_NAME "smb_direct"
11
12 #include <linux/kthread.h>
13 #include <linux/list.h>
14 #include <linux/mempool.h>
15 #include <linux/highmem.h>
16 #include <linux/scatterlist.h>
17 #include <linux/string_choices.h>
18 #include <rdma/ib_verbs.h>
19 #include <rdma/rdma_cm.h>
20 #include <rdma/rw.h>
21
22 #include "glob.h"
23 #include "connection.h"
24 #include "smb_common.h"
25 #include "../common/smb2status.h"
26 #include "transport_rdma.h"
27
28 #define SMB_DIRECT_PORT_IWARP 5445
29 #define SMB_DIRECT_PORT_INFINIBAND 445
30
31 #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
32
33 /* SMB_DIRECT negotiation timeout in seconds */
34 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
35
36 #define SMB_DIRECT_MAX_SEND_SGES 6
37 #define SMB_DIRECT_MAX_RECV_SGES 1
38
39 /*
40 * Default maximum number of RDMA read/write outstanding on this connection
41 * This value is possibly decreased during QP creation on hardware limit
42 */
43 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8
44
45 /* Maximum number of retries on data transfer operations */
46 #define SMB_DIRECT_CM_RETRY 6
47 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
48 #define SMB_DIRECT_CM_RNR_RETRY 0
49
50 /*
51 * User configurable initial values per SMB_DIRECT transport connection
52 * as defined in [MS-SMBD] 3.1.1.1
53 * Those may change after a SMB_DIRECT negotiation
54 */
55
56 /* Set 445 port to SMB Direct port by default */
57 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
58
59 /* The local peer's maximum number of credits to grant to the peer */
60 static int smb_direct_receive_credit_max = 255;
61
62 /* The remote peer's credit request of local peer */
63 static int smb_direct_send_credit_target = 255;
64
65 /* The maximum single message size can be sent to remote peer */
66 static int smb_direct_max_send_size = 1364;
67
68 /* The maximum fragmented upper-layer payload receive size supported */
69 static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
70
71 /* The maximum single-message size which can be received */
72 static int smb_direct_max_receive_size = 1364;
73
74 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
75
76 static LIST_HEAD(smb_direct_device_list);
77 static DEFINE_RWLOCK(smb_direct_device_lock);
78
79 struct smb_direct_device {
80 struct ib_device *ib_dev;
81 struct list_head list;
82 };
83
84 static struct smb_direct_listener {
85 struct rdma_cm_id *cm_id;
86 } smb_direct_listener;
87
88 static struct workqueue_struct *smb_direct_wq;
89
90 enum smb_direct_status {
91 SMB_DIRECT_CS_NEW = 0,
92 SMB_DIRECT_CS_CONNECTED,
93 SMB_DIRECT_CS_DISCONNECTING,
94 SMB_DIRECT_CS_DISCONNECTED,
95 };
96
97 struct smb_direct_transport {
98 struct ksmbd_transport transport;
99
100 enum smb_direct_status status;
101 bool full_packet_received;
102 wait_queue_head_t wait_status;
103
104 struct rdma_cm_id *cm_id;
105 struct ib_cq *send_cq;
106 struct ib_cq *recv_cq;
107 struct ib_pd *pd;
108 struct ib_qp *qp;
109
110 int max_send_size;
111 int max_recv_size;
112 int max_fragmented_send_size;
113 int max_fragmented_recv_size;
114 int max_rdma_rw_size;
115
116 spinlock_t reassembly_queue_lock;
117 struct list_head reassembly_queue;
118 int reassembly_data_length;
119 int reassembly_queue_length;
120 int first_entry_offset;
121 wait_queue_head_t wait_reassembly_queue;
122
123 spinlock_t receive_credit_lock;
124 int recv_credits;
125 int count_avail_recvmsg;
126 int recv_credit_max;
127 int recv_credit_target;
128
129 spinlock_t recvmsg_queue_lock;
130 struct list_head recvmsg_queue;
131
132 int send_credit_target;
133 atomic_t send_credits;
134 spinlock_t lock_new_recv_credits;
135 int new_recv_credits;
136 int max_rw_credits;
137 int pages_per_rw_credit;
138 atomic_t rw_credits;
139
140 wait_queue_head_t wait_send_credits;
141 wait_queue_head_t wait_rw_credits;
142
143 mempool_t *sendmsg_mempool;
144 struct kmem_cache *sendmsg_cache;
145 mempool_t *recvmsg_mempool;
146 struct kmem_cache *recvmsg_cache;
147
148 wait_queue_head_t wait_send_pending;
149 atomic_t send_pending;
150
151 struct delayed_work post_recv_credits_work;
152 struct work_struct send_immediate_work;
153 struct work_struct disconnect_work;
154
155 bool negotiation_requested;
156 };
157
158 #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
159 #define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \
160 struct smb_direct_transport, transport))
161 enum {
162 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
163 SMB_DIRECT_MSG_DATA_TRANSFER
164 };
165
166 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
167
168 struct smb_direct_send_ctx {
169 struct list_head msg_list;
170 int wr_cnt;
171 bool need_invalidate_rkey;
172 unsigned int remote_key;
173 };
174
175 struct smb_direct_sendmsg {
176 struct smb_direct_transport *transport;
177 struct ib_send_wr wr;
178 struct list_head list;
179 int num_sge;
180 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES];
181 struct ib_cqe cqe;
182 u8 packet[];
183 };
184
185 struct smb_direct_recvmsg {
186 struct smb_direct_transport *transport;
187 struct list_head list;
188 int type;
189 struct ib_sge sge;
190 struct ib_cqe cqe;
191 bool first_segment;
192 u8 packet[];
193 };
194
195 struct smb_direct_rdma_rw_msg {
196 struct smb_direct_transport *t;
197 struct ib_cqe cqe;
198 int status;
199 struct completion *completion;
200 struct list_head list;
201 struct rdma_rw_ctx rw_ctx;
202 struct sg_table sgt;
203 struct scatterlist sg_list[];
204 };
205
init_smbd_max_io_size(unsigned int sz)206 void init_smbd_max_io_size(unsigned int sz)
207 {
208 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
209 smb_direct_max_read_write_size = sz;
210 }
211
get_smbd_max_read_write_size(void)212 unsigned int get_smbd_max_read_write_size(void)
213 {
214 return smb_direct_max_read_write_size;
215 }
216
get_buf_page_count(void * buf,int size)217 static inline int get_buf_page_count(void *buf, int size)
218 {
219 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
220 (uintptr_t)buf / PAGE_SIZE;
221 }
222
223 static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
224 static void smb_direct_post_recv_credits(struct work_struct *work);
225 static int smb_direct_post_send_data(struct smb_direct_transport *t,
226 struct smb_direct_send_ctx *send_ctx,
227 struct kvec *iov, int niov,
228 int remaining_data_length);
229
230 static inline struct smb_direct_transport *
smb_trans_direct_transfort(struct ksmbd_transport * t)231 smb_trans_direct_transfort(struct ksmbd_transport *t)
232 {
233 return container_of(t, struct smb_direct_transport, transport);
234 }
235
236 static inline void
smb_direct_recvmsg_payload(struct smb_direct_recvmsg * recvmsg)237 *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
238 {
239 return (void *)recvmsg->packet;
240 }
241
is_receive_credit_post_required(int receive_credits,int avail_recvmsg_count)242 static inline bool is_receive_credit_post_required(int receive_credits,
243 int avail_recvmsg_count)
244 {
245 return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
246 avail_recvmsg_count >= (receive_credits >> 2);
247 }
248
249 static struct
get_free_recvmsg(struct smb_direct_transport * t)250 smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
251 {
252 struct smb_direct_recvmsg *recvmsg = NULL;
253
254 spin_lock(&t->recvmsg_queue_lock);
255 if (!list_empty(&t->recvmsg_queue)) {
256 recvmsg = list_first_entry(&t->recvmsg_queue,
257 struct smb_direct_recvmsg,
258 list);
259 list_del(&recvmsg->list);
260 }
261 spin_unlock(&t->recvmsg_queue_lock);
262 return recvmsg;
263 }
264
put_recvmsg(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg)265 static void put_recvmsg(struct smb_direct_transport *t,
266 struct smb_direct_recvmsg *recvmsg)
267 {
268 if (likely(recvmsg->sge.length != 0)) {
269 ib_dma_unmap_single(t->cm_id->device,
270 recvmsg->sge.addr,
271 recvmsg->sge.length,
272 DMA_FROM_DEVICE);
273 recvmsg->sge.length = 0;
274 }
275
276 spin_lock(&t->recvmsg_queue_lock);
277 list_add(&recvmsg->list, &t->recvmsg_queue);
278 spin_unlock(&t->recvmsg_queue_lock);
279 }
280
enqueue_reassembly(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg,int data_length)281 static void enqueue_reassembly(struct smb_direct_transport *t,
282 struct smb_direct_recvmsg *recvmsg,
283 int data_length)
284 {
285 spin_lock(&t->reassembly_queue_lock);
286 list_add_tail(&recvmsg->list, &t->reassembly_queue);
287 t->reassembly_queue_length++;
288 /*
289 * Make sure reassembly_data_length is updated after list and
290 * reassembly_queue_length are updated. On the dequeue side
291 * reassembly_data_length is checked without a lock to determine
292 * if reassembly_queue_length and list is up to date
293 */
294 virt_wmb();
295 t->reassembly_data_length += data_length;
296 spin_unlock(&t->reassembly_queue_lock);
297 }
298
get_first_reassembly(struct smb_direct_transport * t)299 static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
300 {
301 if (!list_empty(&t->reassembly_queue))
302 return list_first_entry(&t->reassembly_queue,
303 struct smb_direct_recvmsg, list);
304 else
305 return NULL;
306 }
307
smb_direct_disconnect_rdma_work(struct work_struct * work)308 static void smb_direct_disconnect_rdma_work(struct work_struct *work)
309 {
310 struct smb_direct_transport *t =
311 container_of(work, struct smb_direct_transport,
312 disconnect_work);
313
314 if (t->status == SMB_DIRECT_CS_CONNECTED) {
315 t->status = SMB_DIRECT_CS_DISCONNECTING;
316 rdma_disconnect(t->cm_id);
317 }
318 }
319
320 static void
smb_direct_disconnect_rdma_connection(struct smb_direct_transport * t)321 smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
322 {
323 if (t->status == SMB_DIRECT_CS_CONNECTED)
324 queue_work(smb_direct_wq, &t->disconnect_work);
325 }
326
smb_direct_send_immediate_work(struct work_struct * work)327 static void smb_direct_send_immediate_work(struct work_struct *work)
328 {
329 struct smb_direct_transport *t = container_of(work,
330 struct smb_direct_transport, send_immediate_work);
331
332 if (t->status != SMB_DIRECT_CS_CONNECTED)
333 return;
334
335 smb_direct_post_send_data(t, NULL, NULL, 0, 0);
336 }
337
alloc_transport(struct rdma_cm_id * cm_id)338 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
339 {
340 struct smb_direct_transport *t;
341 struct ksmbd_conn *conn;
342
343 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP);
344 if (!t)
345 return NULL;
346
347 t->cm_id = cm_id;
348 cm_id->context = t;
349
350 t->status = SMB_DIRECT_CS_NEW;
351 init_waitqueue_head(&t->wait_status);
352
353 spin_lock_init(&t->reassembly_queue_lock);
354 INIT_LIST_HEAD(&t->reassembly_queue);
355 t->reassembly_data_length = 0;
356 t->reassembly_queue_length = 0;
357 init_waitqueue_head(&t->wait_reassembly_queue);
358 init_waitqueue_head(&t->wait_send_credits);
359 init_waitqueue_head(&t->wait_rw_credits);
360
361 spin_lock_init(&t->receive_credit_lock);
362 spin_lock_init(&t->recvmsg_queue_lock);
363 INIT_LIST_HEAD(&t->recvmsg_queue);
364
365 init_waitqueue_head(&t->wait_send_pending);
366 atomic_set(&t->send_pending, 0);
367
368 spin_lock_init(&t->lock_new_recv_credits);
369
370 INIT_DELAYED_WORK(&t->post_recv_credits_work,
371 smb_direct_post_recv_credits);
372 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
373 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
374
375 conn = ksmbd_conn_alloc();
376 if (!conn)
377 goto err;
378 conn->transport = KSMBD_TRANS(t);
379 KSMBD_TRANS(t)->conn = conn;
380 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
381 return t;
382 err:
383 kfree(t);
384 return NULL;
385 }
386
smb_direct_free_transport(struct ksmbd_transport * kt)387 static void smb_direct_free_transport(struct ksmbd_transport *kt)
388 {
389 kfree(SMBD_TRANS(kt));
390 }
391
free_transport(struct smb_direct_transport * t)392 static void free_transport(struct smb_direct_transport *t)
393 {
394 struct smb_direct_recvmsg *recvmsg;
395
396 wake_up_interruptible(&t->wait_send_credits);
397
398 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
399 wait_event(t->wait_send_pending,
400 atomic_read(&t->send_pending) == 0);
401
402 cancel_work_sync(&t->disconnect_work);
403 cancel_delayed_work_sync(&t->post_recv_credits_work);
404 cancel_work_sync(&t->send_immediate_work);
405
406 if (t->qp) {
407 ib_drain_qp(t->qp);
408 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
409 t->qp = NULL;
410 rdma_destroy_qp(t->cm_id);
411 }
412
413 ksmbd_debug(RDMA, "drain the reassembly queue\n");
414 do {
415 spin_lock(&t->reassembly_queue_lock);
416 recvmsg = get_first_reassembly(t);
417 if (recvmsg) {
418 list_del(&recvmsg->list);
419 spin_unlock(&t->reassembly_queue_lock);
420 put_recvmsg(t, recvmsg);
421 } else {
422 spin_unlock(&t->reassembly_queue_lock);
423 }
424 } while (recvmsg);
425 t->reassembly_data_length = 0;
426
427 if (t->send_cq)
428 ib_free_cq(t->send_cq);
429 if (t->recv_cq)
430 ib_free_cq(t->recv_cq);
431 if (t->pd)
432 ib_dealloc_pd(t->pd);
433 if (t->cm_id)
434 rdma_destroy_id(t->cm_id);
435
436 smb_direct_destroy_pools(t);
437 ksmbd_conn_free(KSMBD_TRANS(t)->conn);
438 }
439
440 static struct smb_direct_sendmsg
smb_direct_alloc_sendmsg(struct smb_direct_transport * t)441 *smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
442 {
443 struct smb_direct_sendmsg *msg;
444
445 msg = mempool_alloc(t->sendmsg_mempool, KSMBD_DEFAULT_GFP);
446 if (!msg)
447 return ERR_PTR(-ENOMEM);
448 msg->transport = t;
449 INIT_LIST_HEAD(&msg->list);
450 msg->num_sge = 0;
451 return msg;
452 }
453
smb_direct_free_sendmsg(struct smb_direct_transport * t,struct smb_direct_sendmsg * msg)454 static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
455 struct smb_direct_sendmsg *msg)
456 {
457 int i;
458
459 if (msg->num_sge > 0) {
460 ib_dma_unmap_single(t->cm_id->device,
461 msg->sge[0].addr, msg->sge[0].length,
462 DMA_TO_DEVICE);
463 for (i = 1; i < msg->num_sge; i++)
464 ib_dma_unmap_page(t->cm_id->device,
465 msg->sge[i].addr, msg->sge[i].length,
466 DMA_TO_DEVICE);
467 }
468 mempool_free(msg, t->sendmsg_mempool);
469 }
470
smb_direct_check_recvmsg(struct smb_direct_recvmsg * recvmsg)471 static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
472 {
473 switch (recvmsg->type) {
474 case SMB_DIRECT_MSG_DATA_TRANSFER: {
475 struct smb_direct_data_transfer *req =
476 (struct smb_direct_data_transfer *)recvmsg->packet;
477 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
478 + le32_to_cpu(req->data_offset));
479 ksmbd_debug(RDMA,
480 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
481 le16_to_cpu(req->credits_granted),
482 le16_to_cpu(req->credits_requested),
483 req->data_length, req->remaining_data_length,
484 hdr->ProtocolId, hdr->Command);
485 break;
486 }
487 case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
488 struct smb_direct_negotiate_req *req =
489 (struct smb_direct_negotiate_req *)recvmsg->packet;
490 ksmbd_debug(RDMA,
491 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
492 le16_to_cpu(req->min_version),
493 le16_to_cpu(req->max_version),
494 le16_to_cpu(req->credits_requested),
495 le32_to_cpu(req->preferred_send_size),
496 le32_to_cpu(req->max_receive_size),
497 le32_to_cpu(req->max_fragmented_size));
498 if (le16_to_cpu(req->min_version) > 0x0100 ||
499 le16_to_cpu(req->max_version) < 0x0100)
500 return -EOPNOTSUPP;
501 if (le16_to_cpu(req->credits_requested) <= 0 ||
502 le32_to_cpu(req->max_receive_size) <= 128 ||
503 le32_to_cpu(req->max_fragmented_size) <=
504 128 * 1024)
505 return -ECONNABORTED;
506
507 break;
508 }
509 default:
510 return -EINVAL;
511 }
512 return 0;
513 }
514
recv_done(struct ib_cq * cq,struct ib_wc * wc)515 static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
516 {
517 struct smb_direct_recvmsg *recvmsg;
518 struct smb_direct_transport *t;
519
520 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
521 t = recvmsg->transport;
522
523 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
524 put_recvmsg(t, recvmsg);
525 if (wc->status != IB_WC_WR_FLUSH_ERR) {
526 pr_err("Recv error. status='%s (%d)' opcode=%d\n",
527 ib_wc_status_msg(wc->status), wc->status,
528 wc->opcode);
529 smb_direct_disconnect_rdma_connection(t);
530 }
531 return;
532 }
533
534 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
535 ib_wc_status_msg(wc->status), wc->status,
536 wc->opcode);
537
538 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
539 recvmsg->sge.length, DMA_FROM_DEVICE);
540
541 switch (recvmsg->type) {
542 case SMB_DIRECT_MSG_NEGOTIATE_REQ:
543 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
544 put_recvmsg(t, recvmsg);
545 smb_direct_disconnect_rdma_connection(t);
546 return;
547 }
548 t->negotiation_requested = true;
549 t->full_packet_received = true;
550 t->status = SMB_DIRECT_CS_CONNECTED;
551 enqueue_reassembly(t, recvmsg, 0);
552 wake_up_interruptible(&t->wait_status);
553 return;
554 case SMB_DIRECT_MSG_DATA_TRANSFER: {
555 struct smb_direct_data_transfer *data_transfer =
556 (struct smb_direct_data_transfer *)recvmsg->packet;
557 unsigned int data_length;
558 int avail_recvmsg_count, receive_credits;
559
560 if (wc->byte_len <
561 offsetof(struct smb_direct_data_transfer, padding)) {
562 put_recvmsg(t, recvmsg);
563 smb_direct_disconnect_rdma_connection(t);
564 return;
565 }
566
567 data_length = le32_to_cpu(data_transfer->data_length);
568 if (data_length) {
569 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
570 (u64)data_length) {
571 put_recvmsg(t, recvmsg);
572 smb_direct_disconnect_rdma_connection(t);
573 return;
574 }
575
576 if (t->full_packet_received)
577 recvmsg->first_segment = true;
578
579 if (le32_to_cpu(data_transfer->remaining_data_length))
580 t->full_packet_received = false;
581 else
582 t->full_packet_received = true;
583
584 spin_lock(&t->receive_credit_lock);
585 receive_credits = --(t->recv_credits);
586 avail_recvmsg_count = t->count_avail_recvmsg;
587 spin_unlock(&t->receive_credit_lock);
588 } else {
589 spin_lock(&t->receive_credit_lock);
590 receive_credits = --(t->recv_credits);
591 avail_recvmsg_count = ++(t->count_avail_recvmsg);
592 spin_unlock(&t->receive_credit_lock);
593 }
594
595 t->recv_credit_target =
596 le16_to_cpu(data_transfer->credits_requested);
597 atomic_add(le16_to_cpu(data_transfer->credits_granted),
598 &t->send_credits);
599
600 if (le16_to_cpu(data_transfer->flags) &
601 SMB_DIRECT_RESPONSE_REQUESTED)
602 queue_work(smb_direct_wq, &t->send_immediate_work);
603
604 if (atomic_read(&t->send_credits) > 0)
605 wake_up_interruptible(&t->wait_send_credits);
606
607 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
608 mod_delayed_work(smb_direct_wq,
609 &t->post_recv_credits_work, 0);
610
611 if (data_length) {
612 enqueue_reassembly(t, recvmsg, (int)data_length);
613 wake_up_interruptible(&t->wait_reassembly_queue);
614 } else
615 put_recvmsg(t, recvmsg);
616
617 return;
618 }
619 }
620
621 /*
622 * This is an internal error!
623 */
624 WARN_ON_ONCE(recvmsg->type != SMB_DIRECT_MSG_DATA_TRANSFER);
625 put_recvmsg(t, recvmsg);
626 smb_direct_disconnect_rdma_connection(t);
627 }
628
smb_direct_post_recv(struct smb_direct_transport * t,struct smb_direct_recvmsg * recvmsg)629 static int smb_direct_post_recv(struct smb_direct_transport *t,
630 struct smb_direct_recvmsg *recvmsg)
631 {
632 struct ib_recv_wr wr;
633 int ret;
634
635 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
636 recvmsg->packet, t->max_recv_size,
637 DMA_FROM_DEVICE);
638 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
639 if (ret)
640 return ret;
641 recvmsg->sge.length = t->max_recv_size;
642 recvmsg->sge.lkey = t->pd->local_dma_lkey;
643 recvmsg->cqe.done = recv_done;
644
645 wr.wr_cqe = &recvmsg->cqe;
646 wr.next = NULL;
647 wr.sg_list = &recvmsg->sge;
648 wr.num_sge = 1;
649
650 ret = ib_post_recv(t->qp, &wr, NULL);
651 if (ret) {
652 pr_err("Can't post recv: %d\n", ret);
653 ib_dma_unmap_single(t->cm_id->device,
654 recvmsg->sge.addr, recvmsg->sge.length,
655 DMA_FROM_DEVICE);
656 recvmsg->sge.length = 0;
657 smb_direct_disconnect_rdma_connection(t);
658 return ret;
659 }
660 return ret;
661 }
662
smb_direct_read(struct ksmbd_transport * t,char * buf,unsigned int size,int unused)663 static int smb_direct_read(struct ksmbd_transport *t, char *buf,
664 unsigned int size, int unused)
665 {
666 struct smb_direct_recvmsg *recvmsg;
667 struct smb_direct_data_transfer *data_transfer;
668 int to_copy, to_read, data_read, offset;
669 u32 data_length, remaining_data_length, data_offset;
670 int rc;
671 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
672
673 again:
674 if (st->status != SMB_DIRECT_CS_CONNECTED) {
675 pr_err("disconnected\n");
676 return -ENOTCONN;
677 }
678
679 /*
680 * No need to hold the reassembly queue lock all the time as we are
681 * the only one reading from the front of the queue. The transport
682 * may add more entries to the back of the queue at the same time
683 */
684 if (st->reassembly_data_length >= size) {
685 int queue_length;
686 int queue_removed = 0;
687
688 /*
689 * Need to make sure reassembly_data_length is read before
690 * reading reassembly_queue_length and calling
691 * get_first_reassembly. This call is lock free
692 * as we never read at the end of the queue which are being
693 * updated in SOFTIRQ as more data is received
694 */
695 virt_rmb();
696 queue_length = st->reassembly_queue_length;
697 data_read = 0;
698 to_read = size;
699 offset = st->first_entry_offset;
700 while (data_read < size) {
701 recvmsg = get_first_reassembly(st);
702 data_transfer = smb_direct_recvmsg_payload(recvmsg);
703 data_length = le32_to_cpu(data_transfer->data_length);
704 remaining_data_length =
705 le32_to_cpu(data_transfer->remaining_data_length);
706 data_offset = le32_to_cpu(data_transfer->data_offset);
707
708 /*
709 * The upper layer expects RFC1002 length at the
710 * beginning of the payload. Return it to indicate
711 * the total length of the packet. This minimize the
712 * change to upper layer packet processing logic. This
713 * will be eventually remove when an intermediate
714 * transport layer is added
715 */
716 if (recvmsg->first_segment && size == 4) {
717 unsigned int rfc1002_len =
718 data_length + remaining_data_length;
719 *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
720 data_read = 4;
721 recvmsg->first_segment = false;
722 ksmbd_debug(RDMA,
723 "returning rfc1002 length %d\n",
724 rfc1002_len);
725 goto read_rfc1002_done;
726 }
727
728 to_copy = min_t(int, data_length - offset, to_read);
729 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
730 to_copy);
731
732 /* move on to the next buffer? */
733 if (to_copy == data_length - offset) {
734 queue_length--;
735 /*
736 * No need to lock if we are not at the
737 * end of the queue
738 */
739 if (queue_length) {
740 list_del(&recvmsg->list);
741 } else {
742 spin_lock_irq(&st->reassembly_queue_lock);
743 list_del(&recvmsg->list);
744 spin_unlock_irq(&st->reassembly_queue_lock);
745 }
746 queue_removed++;
747 put_recvmsg(st, recvmsg);
748 offset = 0;
749 } else {
750 offset += to_copy;
751 }
752
753 to_read -= to_copy;
754 data_read += to_copy;
755 }
756
757 spin_lock_irq(&st->reassembly_queue_lock);
758 st->reassembly_data_length -= data_read;
759 st->reassembly_queue_length -= queue_removed;
760 spin_unlock_irq(&st->reassembly_queue_lock);
761
762 spin_lock(&st->receive_credit_lock);
763 st->count_avail_recvmsg += queue_removed;
764 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
765 spin_unlock(&st->receive_credit_lock);
766 mod_delayed_work(smb_direct_wq,
767 &st->post_recv_credits_work, 0);
768 } else {
769 spin_unlock(&st->receive_credit_lock);
770 }
771
772 st->first_entry_offset = offset;
773 ksmbd_debug(RDMA,
774 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
775 data_read, st->reassembly_data_length,
776 st->first_entry_offset);
777 read_rfc1002_done:
778 return data_read;
779 }
780
781 ksmbd_debug(RDMA, "wait_event on more data\n");
782 rc = wait_event_interruptible(st->wait_reassembly_queue,
783 st->reassembly_data_length >= size ||
784 st->status != SMB_DIRECT_CS_CONNECTED);
785 if (rc)
786 return -EINTR;
787
788 goto again;
789 }
790
smb_direct_post_recv_credits(struct work_struct * work)791 static void smb_direct_post_recv_credits(struct work_struct *work)
792 {
793 struct smb_direct_transport *t = container_of(work,
794 struct smb_direct_transport, post_recv_credits_work.work);
795 struct smb_direct_recvmsg *recvmsg;
796 int receive_credits, credits = 0;
797 int ret;
798
799 spin_lock(&t->receive_credit_lock);
800 receive_credits = t->recv_credits;
801 spin_unlock(&t->receive_credit_lock);
802
803 if (receive_credits < t->recv_credit_target) {
804 while (true) {
805 recvmsg = get_free_recvmsg(t);
806 if (!recvmsg)
807 break;
808
809 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
810 recvmsg->first_segment = false;
811
812 ret = smb_direct_post_recv(t, recvmsg);
813 if (ret) {
814 pr_err("Can't post recv: %d\n", ret);
815 put_recvmsg(t, recvmsg);
816 break;
817 }
818 credits++;
819 }
820 }
821
822 spin_lock(&t->receive_credit_lock);
823 t->recv_credits += credits;
824 t->count_avail_recvmsg -= credits;
825 spin_unlock(&t->receive_credit_lock);
826
827 spin_lock(&t->lock_new_recv_credits);
828 t->new_recv_credits += credits;
829 spin_unlock(&t->lock_new_recv_credits);
830
831 if (credits)
832 queue_work(smb_direct_wq, &t->send_immediate_work);
833 }
834
send_done(struct ib_cq * cq,struct ib_wc * wc)835 static void send_done(struct ib_cq *cq, struct ib_wc *wc)
836 {
837 struct smb_direct_sendmsg *sendmsg, *sibling;
838 struct smb_direct_transport *t;
839 struct list_head *pos, *prev, *end;
840
841 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
842 t = sendmsg->transport;
843
844 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
845 ib_wc_status_msg(wc->status), wc->status,
846 wc->opcode);
847
848 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
849 pr_err("Send error. status='%s (%d)', opcode=%d\n",
850 ib_wc_status_msg(wc->status), wc->status,
851 wc->opcode);
852 smb_direct_disconnect_rdma_connection(t);
853 }
854
855 if (atomic_dec_and_test(&t->send_pending))
856 wake_up(&t->wait_send_pending);
857
858 /* iterate and free the list of messages in reverse. the list's head
859 * is invalid.
860 */
861 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
862 prev != end; pos = prev, prev = prev->prev) {
863 sibling = container_of(pos, struct smb_direct_sendmsg, list);
864 smb_direct_free_sendmsg(t, sibling);
865 }
866
867 sibling = container_of(pos, struct smb_direct_sendmsg, list);
868 smb_direct_free_sendmsg(t, sibling);
869 }
870
manage_credits_prior_sending(struct smb_direct_transport * t)871 static int manage_credits_prior_sending(struct smb_direct_transport *t)
872 {
873 int new_credits;
874
875 spin_lock(&t->lock_new_recv_credits);
876 new_credits = t->new_recv_credits;
877 t->new_recv_credits = 0;
878 spin_unlock(&t->lock_new_recv_credits);
879
880 return new_credits;
881 }
882
smb_direct_post_send(struct smb_direct_transport * t,struct ib_send_wr * wr)883 static int smb_direct_post_send(struct smb_direct_transport *t,
884 struct ib_send_wr *wr)
885 {
886 int ret;
887
888 atomic_inc(&t->send_pending);
889 ret = ib_post_send(t->qp, wr, NULL);
890 if (ret) {
891 pr_err("failed to post send: %d\n", ret);
892 if (atomic_dec_and_test(&t->send_pending))
893 wake_up(&t->wait_send_pending);
894 smb_direct_disconnect_rdma_connection(t);
895 }
896 return ret;
897 }
898
smb_direct_send_ctx_init(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,bool need_invalidate_rkey,unsigned int remote_key)899 static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
900 struct smb_direct_send_ctx *send_ctx,
901 bool need_invalidate_rkey,
902 unsigned int remote_key)
903 {
904 INIT_LIST_HEAD(&send_ctx->msg_list);
905 send_ctx->wr_cnt = 0;
906 send_ctx->need_invalidate_rkey = need_invalidate_rkey;
907 send_ctx->remote_key = remote_key;
908 }
909
smb_direct_flush_send_list(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,bool is_last)910 static int smb_direct_flush_send_list(struct smb_direct_transport *t,
911 struct smb_direct_send_ctx *send_ctx,
912 bool is_last)
913 {
914 struct smb_direct_sendmsg *first, *last;
915 int ret;
916
917 if (list_empty(&send_ctx->msg_list))
918 return 0;
919
920 first = list_first_entry(&send_ctx->msg_list,
921 struct smb_direct_sendmsg,
922 list);
923 last = list_last_entry(&send_ctx->msg_list,
924 struct smb_direct_sendmsg,
925 list);
926
927 last->wr.send_flags = IB_SEND_SIGNALED;
928 last->wr.wr_cqe = &last->cqe;
929 if (is_last && send_ctx->need_invalidate_rkey) {
930 last->wr.opcode = IB_WR_SEND_WITH_INV;
931 last->wr.ex.invalidate_rkey = send_ctx->remote_key;
932 }
933
934 ret = smb_direct_post_send(t, &first->wr);
935 if (!ret) {
936 smb_direct_send_ctx_init(t, send_ctx,
937 send_ctx->need_invalidate_rkey,
938 send_ctx->remote_key);
939 } else {
940 atomic_add(send_ctx->wr_cnt, &t->send_credits);
941 wake_up(&t->wait_send_credits);
942 list_for_each_entry_safe(first, last, &send_ctx->msg_list,
943 list) {
944 smb_direct_free_sendmsg(t, first);
945 }
946 }
947 return ret;
948 }
949
wait_for_credits(struct smb_direct_transport * t,wait_queue_head_t * waitq,atomic_t * total_credits,int needed)950 static int wait_for_credits(struct smb_direct_transport *t,
951 wait_queue_head_t *waitq, atomic_t *total_credits,
952 int needed)
953 {
954 int ret;
955
956 do {
957 if (atomic_sub_return(needed, total_credits) >= 0)
958 return 0;
959
960 atomic_add(needed, total_credits);
961 ret = wait_event_interruptible(*waitq,
962 atomic_read(total_credits) >= needed ||
963 t->status != SMB_DIRECT_CS_CONNECTED);
964
965 if (t->status != SMB_DIRECT_CS_CONNECTED)
966 return -ENOTCONN;
967 else if (ret < 0)
968 return ret;
969 } while (true);
970 }
971
wait_for_send_credits(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx)972 static int wait_for_send_credits(struct smb_direct_transport *t,
973 struct smb_direct_send_ctx *send_ctx)
974 {
975 int ret;
976
977 if (send_ctx &&
978 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
979 ret = smb_direct_flush_send_list(t, send_ctx, false);
980 if (ret)
981 return ret;
982 }
983
984 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1);
985 }
986
wait_for_rw_credits(struct smb_direct_transport * t,int credits)987 static int wait_for_rw_credits(struct smb_direct_transport *t, int credits)
988 {
989 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits);
990 }
991
calc_rw_credits(struct smb_direct_transport * t,char * buf,unsigned int len)992 static int calc_rw_credits(struct smb_direct_transport *t,
993 char *buf, unsigned int len)
994 {
995 return DIV_ROUND_UP(get_buf_page_count(buf, len),
996 t->pages_per_rw_credit);
997 }
998
smb_direct_create_header(struct smb_direct_transport * t,int size,int remaining_data_length,struct smb_direct_sendmsg ** sendmsg_out)999 static int smb_direct_create_header(struct smb_direct_transport *t,
1000 int size, int remaining_data_length,
1001 struct smb_direct_sendmsg **sendmsg_out)
1002 {
1003 struct smb_direct_sendmsg *sendmsg;
1004 struct smb_direct_data_transfer *packet;
1005 int header_length;
1006 int ret;
1007
1008 sendmsg = smb_direct_alloc_sendmsg(t);
1009 if (IS_ERR(sendmsg))
1010 return PTR_ERR(sendmsg);
1011
1012 /* Fill in the packet header */
1013 packet = (struct smb_direct_data_transfer *)sendmsg->packet;
1014 packet->credits_requested = cpu_to_le16(t->send_credit_target);
1015 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1016
1017 packet->flags = 0;
1018 packet->reserved = 0;
1019 if (!size)
1020 packet->data_offset = 0;
1021 else
1022 packet->data_offset = cpu_to_le32(24);
1023 packet->data_length = cpu_to_le32(size);
1024 packet->remaining_data_length = cpu_to_le32(remaining_data_length);
1025 packet->padding = 0;
1026
1027 ksmbd_debug(RDMA,
1028 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
1029 le16_to_cpu(packet->credits_requested),
1030 le16_to_cpu(packet->credits_granted),
1031 le32_to_cpu(packet->data_offset),
1032 le32_to_cpu(packet->data_length),
1033 le32_to_cpu(packet->remaining_data_length));
1034
1035 /* Map the packet to DMA */
1036 header_length = sizeof(struct smb_direct_data_transfer);
1037 /* If this is a packet without payload, don't send padding */
1038 if (!size)
1039 header_length =
1040 offsetof(struct smb_direct_data_transfer, padding);
1041
1042 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1043 (void *)packet,
1044 header_length,
1045 DMA_TO_DEVICE);
1046 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1047 if (ret) {
1048 smb_direct_free_sendmsg(t, sendmsg);
1049 return ret;
1050 }
1051
1052 sendmsg->num_sge = 1;
1053 sendmsg->sge[0].length = header_length;
1054 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1055
1056 *sendmsg_out = sendmsg;
1057 return 0;
1058 }
1059
get_sg_list(void * buf,int size,struct scatterlist * sg_list,int nentries)1060 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
1061 {
1062 bool high = is_vmalloc_addr(buf);
1063 struct page *page;
1064 int offset, len;
1065 int i = 0;
1066
1067 if (size <= 0 || nentries < get_buf_page_count(buf, size))
1068 return -EINVAL;
1069
1070 offset = offset_in_page(buf);
1071 buf -= offset;
1072 while (size > 0) {
1073 len = min_t(int, PAGE_SIZE - offset, size);
1074 if (high)
1075 page = vmalloc_to_page(buf);
1076 else
1077 page = kmap_to_page(buf);
1078
1079 if (!sg_list)
1080 return -EINVAL;
1081 sg_set_page(sg_list, page, len, offset);
1082 sg_list = sg_next(sg_list);
1083
1084 buf += PAGE_SIZE;
1085 size -= len;
1086 offset = 0;
1087 i++;
1088 }
1089 return i;
1090 }
1091
get_mapped_sg_list(struct ib_device * device,void * buf,int size,struct scatterlist * sg_list,int nentries,enum dma_data_direction dir)1092 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
1093 struct scatterlist *sg_list, int nentries,
1094 enum dma_data_direction dir)
1095 {
1096 int npages;
1097
1098 npages = get_sg_list(buf, size, sg_list, nentries);
1099 if (npages < 0)
1100 return -EINVAL;
1101 return ib_dma_map_sg(device, sg_list, npages, dir);
1102 }
1103
post_sendmsg(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,struct smb_direct_sendmsg * msg)1104 static int post_sendmsg(struct smb_direct_transport *t,
1105 struct smb_direct_send_ctx *send_ctx,
1106 struct smb_direct_sendmsg *msg)
1107 {
1108 int i;
1109
1110 for (i = 0; i < msg->num_sge; i++)
1111 ib_dma_sync_single_for_device(t->cm_id->device,
1112 msg->sge[i].addr, msg->sge[i].length,
1113 DMA_TO_DEVICE);
1114
1115 msg->cqe.done = send_done;
1116 msg->wr.opcode = IB_WR_SEND;
1117 msg->wr.sg_list = &msg->sge[0];
1118 msg->wr.num_sge = msg->num_sge;
1119 msg->wr.next = NULL;
1120
1121 if (send_ctx) {
1122 msg->wr.wr_cqe = NULL;
1123 msg->wr.send_flags = 0;
1124 if (!list_empty(&send_ctx->msg_list)) {
1125 struct smb_direct_sendmsg *last;
1126
1127 last = list_last_entry(&send_ctx->msg_list,
1128 struct smb_direct_sendmsg,
1129 list);
1130 last->wr.next = &msg->wr;
1131 }
1132 list_add_tail(&msg->list, &send_ctx->msg_list);
1133 send_ctx->wr_cnt++;
1134 return 0;
1135 }
1136
1137 msg->wr.wr_cqe = &msg->cqe;
1138 msg->wr.send_flags = IB_SEND_SIGNALED;
1139 return smb_direct_post_send(t, &msg->wr);
1140 }
1141
smb_direct_post_send_data(struct smb_direct_transport * t,struct smb_direct_send_ctx * send_ctx,struct kvec * iov,int niov,int remaining_data_length)1142 static int smb_direct_post_send_data(struct smb_direct_transport *t,
1143 struct smb_direct_send_ctx *send_ctx,
1144 struct kvec *iov, int niov,
1145 int remaining_data_length)
1146 {
1147 int i, j, ret;
1148 struct smb_direct_sendmsg *msg;
1149 int data_length;
1150 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
1151
1152 ret = wait_for_send_credits(t, send_ctx);
1153 if (ret)
1154 return ret;
1155
1156 data_length = 0;
1157 for (i = 0; i < niov; i++)
1158 data_length += iov[i].iov_len;
1159
1160 ret = smb_direct_create_header(t, data_length, remaining_data_length,
1161 &msg);
1162 if (ret) {
1163 atomic_inc(&t->send_credits);
1164 return ret;
1165 }
1166
1167 for (i = 0; i < niov; i++) {
1168 struct ib_sge *sge;
1169 int sg_cnt;
1170
1171 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
1172 sg_cnt = get_mapped_sg_list(t->cm_id->device,
1173 iov[i].iov_base, iov[i].iov_len,
1174 sg, SMB_DIRECT_MAX_SEND_SGES - 1,
1175 DMA_TO_DEVICE);
1176 if (sg_cnt <= 0) {
1177 pr_err("failed to map buffer\n");
1178 ret = -ENOMEM;
1179 goto err;
1180 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
1181 pr_err("buffer not fitted into sges\n");
1182 ret = -E2BIG;
1183 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
1184 DMA_TO_DEVICE);
1185 goto err;
1186 }
1187
1188 for (j = 0; j < sg_cnt; j++) {
1189 sge = &msg->sge[msg->num_sge];
1190 sge->addr = sg_dma_address(&sg[j]);
1191 sge->length = sg_dma_len(&sg[j]);
1192 sge->lkey = t->pd->local_dma_lkey;
1193 msg->num_sge++;
1194 }
1195 }
1196
1197 ret = post_sendmsg(t, send_ctx, msg);
1198 if (ret)
1199 goto err;
1200 return 0;
1201 err:
1202 smb_direct_free_sendmsg(t, msg);
1203 atomic_inc(&t->send_credits);
1204 return ret;
1205 }
1206
smb_direct_writev(struct ksmbd_transport * t,struct kvec * iov,int niovs,int buflen,bool need_invalidate,unsigned int remote_key)1207 static int smb_direct_writev(struct ksmbd_transport *t,
1208 struct kvec *iov, int niovs, int buflen,
1209 bool need_invalidate, unsigned int remote_key)
1210 {
1211 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1212 int remaining_data_length;
1213 int start, i, j;
1214 int max_iov_size = st->max_send_size -
1215 sizeof(struct smb_direct_data_transfer);
1216 int ret;
1217 struct kvec vec;
1218 struct smb_direct_send_ctx send_ctx;
1219
1220 if (st->status != SMB_DIRECT_CS_CONNECTED)
1221 return -ENOTCONN;
1222
1223 //FIXME: skip RFC1002 header..
1224 buflen -= 4;
1225
1226 remaining_data_length = buflen;
1227 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
1228
1229 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
1230 start = i = 1;
1231 buflen = 0;
1232 while (true) {
1233 buflen += iov[i].iov_len;
1234 if (buflen > max_iov_size) {
1235 if (i > start) {
1236 remaining_data_length -=
1237 (buflen - iov[i].iov_len);
1238 ret = smb_direct_post_send_data(st, &send_ctx,
1239 &iov[start], i - start,
1240 remaining_data_length);
1241 if (ret)
1242 goto done;
1243 } else {
1244 /* iov[start] is too big, break it */
1245 int nvec = (buflen + max_iov_size - 1) /
1246 max_iov_size;
1247
1248 for (j = 0; j < nvec; j++) {
1249 vec.iov_base =
1250 (char *)iov[start].iov_base +
1251 j * max_iov_size;
1252 vec.iov_len =
1253 min_t(int, max_iov_size,
1254 buflen - max_iov_size * j);
1255 remaining_data_length -= vec.iov_len;
1256 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
1257 remaining_data_length);
1258 if (ret)
1259 goto done;
1260 }
1261 i++;
1262 if (i == niovs)
1263 break;
1264 }
1265 start = i;
1266 buflen = 0;
1267 } else {
1268 i++;
1269 if (i == niovs) {
1270 /* send out all remaining vecs */
1271 remaining_data_length -= buflen;
1272 ret = smb_direct_post_send_data(st, &send_ctx,
1273 &iov[start], i - start,
1274 remaining_data_length);
1275 if (ret)
1276 goto done;
1277 break;
1278 }
1279 }
1280 }
1281
1282 done:
1283 ret = smb_direct_flush_send_list(st, &send_ctx, true);
1284
1285 /*
1286 * As an optimization, we don't wait for individual I/O to finish
1287 * before sending the next one.
1288 * Send them all and wait for pending send count to get to 0
1289 * that means all the I/Os have been out and we are good to return
1290 */
1291
1292 wait_event(st->wait_send_pending,
1293 atomic_read(&st->send_pending) == 0);
1294 return ret;
1295 }
1296
smb_direct_free_rdma_rw_msg(struct smb_direct_transport * t,struct smb_direct_rdma_rw_msg * msg,enum dma_data_direction dir)1297 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t,
1298 struct smb_direct_rdma_rw_msg *msg,
1299 enum dma_data_direction dir)
1300 {
1301 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
1302 msg->sgt.sgl, msg->sgt.nents, dir);
1303 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1304 kfree(msg);
1305 }
1306
read_write_done(struct ib_cq * cq,struct ib_wc * wc,enum dma_data_direction dir)1307 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
1308 enum dma_data_direction dir)
1309 {
1310 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
1311 struct smb_direct_rdma_rw_msg, cqe);
1312 struct smb_direct_transport *t = msg->t;
1313
1314 if (wc->status != IB_WC_SUCCESS) {
1315 msg->status = -EIO;
1316 pr_err("read/write error. opcode = %d, status = %s(%d)\n",
1317 wc->opcode, ib_wc_status_msg(wc->status), wc->status);
1318 if (wc->status != IB_WC_WR_FLUSH_ERR)
1319 smb_direct_disconnect_rdma_connection(t);
1320 }
1321
1322 complete(msg->completion);
1323 }
1324
read_done(struct ib_cq * cq,struct ib_wc * wc)1325 static void read_done(struct ib_cq *cq, struct ib_wc *wc)
1326 {
1327 read_write_done(cq, wc, DMA_FROM_DEVICE);
1328 }
1329
write_done(struct ib_cq * cq,struct ib_wc * wc)1330 static void write_done(struct ib_cq *cq, struct ib_wc *wc)
1331 {
1332 read_write_done(cq, wc, DMA_TO_DEVICE);
1333 }
1334
smb_direct_rdma_xmit(struct smb_direct_transport * t,void * buf,int buf_len,struct smb2_buffer_desc_v1 * desc,unsigned int desc_len,bool is_read)1335 static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
1336 void *buf, int buf_len,
1337 struct smb2_buffer_desc_v1 *desc,
1338 unsigned int desc_len,
1339 bool is_read)
1340 {
1341 struct smb_direct_rdma_rw_msg *msg, *next_msg;
1342 int i, ret;
1343 DECLARE_COMPLETION_ONSTACK(completion);
1344 struct ib_send_wr *first_wr;
1345 LIST_HEAD(msg_list);
1346 char *desc_buf;
1347 int credits_needed;
1348 unsigned int desc_buf_len, desc_num = 0;
1349
1350 if (t->status != SMB_DIRECT_CS_CONNECTED)
1351 return -ENOTCONN;
1352
1353 if (buf_len > t->max_rdma_rw_size)
1354 return -EINVAL;
1355
1356 /* calculate needed credits */
1357 credits_needed = 0;
1358 desc_buf = buf;
1359 for (i = 0; i < desc_len / sizeof(*desc); i++) {
1360 if (!buf_len)
1361 break;
1362
1363 desc_buf_len = le32_to_cpu(desc[i].length);
1364 if (!desc_buf_len)
1365 return -EINVAL;
1366
1367 if (desc_buf_len > buf_len) {
1368 desc_buf_len = buf_len;
1369 desc[i].length = cpu_to_le32(desc_buf_len);
1370 buf_len = 0;
1371 }
1372
1373 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len);
1374 desc_buf += desc_buf_len;
1375 buf_len -= desc_buf_len;
1376 desc_num++;
1377 }
1378
1379 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
1380 str_read_write(is_read), buf_len, credits_needed);
1381
1382 ret = wait_for_rw_credits(t, credits_needed);
1383 if (ret < 0)
1384 return ret;
1385
1386 /* build rdma_rw_ctx for each descriptor */
1387 desc_buf = buf;
1388 for (i = 0; i < desc_num; i++) {
1389 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE),
1390 KSMBD_DEFAULT_GFP);
1391 if (!msg) {
1392 ret = -ENOMEM;
1393 goto out;
1394 }
1395
1396 desc_buf_len = le32_to_cpu(desc[i].length);
1397
1398 msg->t = t;
1399 msg->cqe.done = is_read ? read_done : write_done;
1400 msg->completion = &completion;
1401
1402 msg->sgt.sgl = &msg->sg_list[0];
1403 ret = sg_alloc_table_chained(&msg->sgt,
1404 get_buf_page_count(desc_buf, desc_buf_len),
1405 msg->sg_list, SG_CHUNK_SIZE);
1406 if (ret) {
1407 kfree(msg);
1408 ret = -ENOMEM;
1409 goto out;
1410 }
1411
1412 ret = get_sg_list(desc_buf, desc_buf_len,
1413 msg->sgt.sgl, msg->sgt.orig_nents);
1414 if (ret < 0) {
1415 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1416 kfree(msg);
1417 goto out;
1418 }
1419
1420 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
1421 msg->sgt.sgl,
1422 get_buf_page_count(desc_buf, desc_buf_len),
1423 0,
1424 le64_to_cpu(desc[i].offset),
1425 le32_to_cpu(desc[i].token),
1426 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1427 if (ret < 0) {
1428 pr_err("failed to init rdma_rw_ctx: %d\n", ret);
1429 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1430 kfree(msg);
1431 goto out;
1432 }
1433
1434 list_add_tail(&msg->list, &msg_list);
1435 desc_buf += desc_buf_len;
1436 }
1437
1438 /* concatenate work requests of rdma_rw_ctxs */
1439 first_wr = NULL;
1440 list_for_each_entry_reverse(msg, &msg_list, list) {
1441 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
1442 &msg->cqe, first_wr);
1443 }
1444
1445 ret = ib_post_send(t->qp, first_wr, NULL);
1446 if (ret) {
1447 pr_err("failed to post send wr for RDMA R/W: %d\n", ret);
1448 goto out;
1449 }
1450
1451 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list);
1452 wait_for_completion(&completion);
1453 ret = msg->status;
1454 out:
1455 list_for_each_entry_safe(msg, next_msg, &msg_list, list) {
1456 list_del(&msg->list);
1457 smb_direct_free_rdma_rw_msg(t, msg,
1458 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1459 }
1460 atomic_add(credits_needed, &t->rw_credits);
1461 wake_up(&t->wait_rw_credits);
1462 return ret;
1463 }
1464
smb_direct_rdma_write(struct ksmbd_transport * t,void * buf,unsigned int buflen,struct smb2_buffer_desc_v1 * desc,unsigned int desc_len)1465 static int smb_direct_rdma_write(struct ksmbd_transport *t,
1466 void *buf, unsigned int buflen,
1467 struct smb2_buffer_desc_v1 *desc,
1468 unsigned int desc_len)
1469 {
1470 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1471 desc, desc_len, false);
1472 }
1473
smb_direct_rdma_read(struct ksmbd_transport * t,void * buf,unsigned int buflen,struct smb2_buffer_desc_v1 * desc,unsigned int desc_len)1474 static int smb_direct_rdma_read(struct ksmbd_transport *t,
1475 void *buf, unsigned int buflen,
1476 struct smb2_buffer_desc_v1 *desc,
1477 unsigned int desc_len)
1478 {
1479 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1480 desc, desc_len, true);
1481 }
1482
smb_direct_disconnect(struct ksmbd_transport * t)1483 static void smb_direct_disconnect(struct ksmbd_transport *t)
1484 {
1485 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1486
1487 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
1488
1489 smb_direct_disconnect_rdma_work(&st->disconnect_work);
1490 wait_event_interruptible(st->wait_status,
1491 st->status == SMB_DIRECT_CS_DISCONNECTED);
1492 free_transport(st);
1493 }
1494
smb_direct_shutdown(struct ksmbd_transport * t)1495 static void smb_direct_shutdown(struct ksmbd_transport *t)
1496 {
1497 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1498
1499 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
1500
1501 smb_direct_disconnect_rdma_work(&st->disconnect_work);
1502 }
1503
smb_direct_cm_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)1504 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
1505 struct rdma_cm_event *event)
1506 {
1507 struct smb_direct_transport *t = cm_id->context;
1508
1509 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
1510 cm_id, rdma_event_msg(event->event), event->event);
1511
1512 switch (event->event) {
1513 case RDMA_CM_EVENT_ESTABLISHED: {
1514 t->status = SMB_DIRECT_CS_CONNECTED;
1515 wake_up_interruptible(&t->wait_status);
1516 break;
1517 }
1518 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1519 case RDMA_CM_EVENT_DISCONNECTED: {
1520 ib_drain_qp(t->qp);
1521
1522 t->status = SMB_DIRECT_CS_DISCONNECTED;
1523 wake_up_interruptible(&t->wait_status);
1524 wake_up_interruptible(&t->wait_reassembly_queue);
1525 wake_up(&t->wait_send_credits);
1526 break;
1527 }
1528 case RDMA_CM_EVENT_CONNECT_ERROR: {
1529 t->status = SMB_DIRECT_CS_DISCONNECTED;
1530 wake_up_interruptible(&t->wait_status);
1531 break;
1532 }
1533 default:
1534 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
1535 cm_id, rdma_event_msg(event->event),
1536 event->event);
1537 break;
1538 }
1539 return 0;
1540 }
1541
smb_direct_qpair_handler(struct ib_event * event,void * context)1542 static void smb_direct_qpair_handler(struct ib_event *event, void *context)
1543 {
1544 struct smb_direct_transport *t = context;
1545
1546 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
1547 t->cm_id, ib_event_msg(event->event), event->event);
1548
1549 switch (event->event) {
1550 case IB_EVENT_CQ_ERR:
1551 case IB_EVENT_QP_FATAL:
1552 smb_direct_disconnect_rdma_connection(t);
1553 break;
1554 default:
1555 break;
1556 }
1557 }
1558
smb_direct_send_negotiate_response(struct smb_direct_transport * t,int failed)1559 static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
1560 int failed)
1561 {
1562 struct smb_direct_sendmsg *sendmsg;
1563 struct smb_direct_negotiate_resp *resp;
1564 int ret;
1565
1566 sendmsg = smb_direct_alloc_sendmsg(t);
1567 if (IS_ERR(sendmsg))
1568 return -ENOMEM;
1569
1570 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
1571 if (failed) {
1572 memset(resp, 0, sizeof(*resp));
1573 resp->min_version = cpu_to_le16(0x0100);
1574 resp->max_version = cpu_to_le16(0x0100);
1575 resp->status = STATUS_NOT_SUPPORTED;
1576 } else {
1577 resp->status = STATUS_SUCCESS;
1578 resp->min_version = SMB_DIRECT_VERSION_LE;
1579 resp->max_version = SMB_DIRECT_VERSION_LE;
1580 resp->negotiated_version = SMB_DIRECT_VERSION_LE;
1581 resp->reserved = 0;
1582 resp->credits_requested =
1583 cpu_to_le16(t->send_credit_target);
1584 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1585 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
1586 resp->preferred_send_size = cpu_to_le32(t->max_send_size);
1587 resp->max_receive_size = cpu_to_le32(t->max_recv_size);
1588 resp->max_fragmented_size =
1589 cpu_to_le32(t->max_fragmented_recv_size);
1590 }
1591
1592 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1593 (void *)resp, sizeof(*resp),
1594 DMA_TO_DEVICE);
1595 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1596 if (ret) {
1597 smb_direct_free_sendmsg(t, sendmsg);
1598 return ret;
1599 }
1600
1601 sendmsg->num_sge = 1;
1602 sendmsg->sge[0].length = sizeof(*resp);
1603 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1604
1605 ret = post_sendmsg(t, NULL, sendmsg);
1606 if (ret) {
1607 smb_direct_free_sendmsg(t, sendmsg);
1608 return ret;
1609 }
1610
1611 wait_event(t->wait_send_pending,
1612 atomic_read(&t->send_pending) == 0);
1613 return 0;
1614 }
1615
smb_direct_accept_client(struct smb_direct_transport * t)1616 static int smb_direct_accept_client(struct smb_direct_transport *t)
1617 {
1618 struct rdma_conn_param conn_param;
1619 struct ib_port_immutable port_immutable;
1620 u32 ird_ord_hdr[2];
1621 int ret;
1622
1623 memset(&conn_param, 0, sizeof(conn_param));
1624 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
1625 SMB_DIRECT_CM_INITIATOR_DEPTH);
1626 conn_param.responder_resources = 0;
1627
1628 t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
1629 t->cm_id->port_num,
1630 &port_immutable);
1631 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
1632 ird_ord_hdr[0] = conn_param.responder_resources;
1633 ird_ord_hdr[1] = 1;
1634 conn_param.private_data = ird_ord_hdr;
1635 conn_param.private_data_len = sizeof(ird_ord_hdr);
1636 } else {
1637 conn_param.private_data = NULL;
1638 conn_param.private_data_len = 0;
1639 }
1640 conn_param.retry_count = SMB_DIRECT_CM_RETRY;
1641 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
1642 conn_param.flow_control = 0;
1643
1644 ret = rdma_accept(t->cm_id, &conn_param);
1645 if (ret) {
1646 pr_err("error at rdma_accept: %d\n", ret);
1647 return ret;
1648 }
1649 return 0;
1650 }
1651
smb_direct_prepare_negotiation(struct smb_direct_transport * t)1652 static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
1653 {
1654 int ret;
1655 struct smb_direct_recvmsg *recvmsg;
1656
1657 recvmsg = get_free_recvmsg(t);
1658 if (!recvmsg)
1659 return -ENOMEM;
1660 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
1661
1662 ret = smb_direct_post_recv(t, recvmsg);
1663 if (ret) {
1664 pr_err("Can't post recv: %d\n", ret);
1665 goto out_err;
1666 }
1667
1668 t->negotiation_requested = false;
1669 ret = smb_direct_accept_client(t);
1670 if (ret) {
1671 pr_err("Can't accept client\n");
1672 goto out_err;
1673 }
1674
1675 smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
1676 return 0;
1677 out_err:
1678 put_recvmsg(t, recvmsg);
1679 return ret;
1680 }
1681
smb_direct_get_max_fr_pages(struct smb_direct_transport * t)1682 static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t)
1683 {
1684 return min_t(unsigned int,
1685 t->cm_id->device->attrs.max_fast_reg_page_list_len,
1686 256);
1687 }
1688
smb_direct_init_params(struct smb_direct_transport * t,struct ib_qp_cap * cap)1689 static int smb_direct_init_params(struct smb_direct_transport *t,
1690 struct ib_qp_cap *cap)
1691 {
1692 struct ib_device *device = t->cm_id->device;
1693 int max_send_sges, max_rw_wrs, max_send_wrs;
1694 unsigned int max_sge_per_wr, wrs_per_credit;
1695
1696 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
1697 * SMB2 response could be mapped.
1698 */
1699 t->max_send_size = smb_direct_max_send_size;
1700 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3;
1701 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
1702 pr_err("max_send_size %d is too large\n", t->max_send_size);
1703 return -EINVAL;
1704 }
1705
1706 /* Calculate the number of work requests for RDMA R/W.
1707 * The maximum number of pages which can be registered
1708 * with one Memory region can be transferred with one
1709 * R/W credit. And at least 4 work requests for each credit
1710 * are needed for MR registration, RDMA R/W, local & remote
1711 * MR invalidation.
1712 */
1713 t->max_rdma_rw_size = smb_direct_max_read_write_size;
1714 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t);
1715 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size,
1716 (t->pages_per_rw_credit - 1) *
1717 PAGE_SIZE);
1718
1719 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
1720 device->attrs.max_sge_rd);
1721 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr,
1722 max_send_sges);
1723 wrs_per_credit = max_t(unsigned int, 4,
1724 DIV_ROUND_UP(t->pages_per_rw_credit,
1725 max_sge_per_wr) + 1);
1726 max_rw_wrs = t->max_rw_credits * wrs_per_credit;
1727
1728 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
1729 if (max_send_wrs > device->attrs.max_cqe ||
1730 max_send_wrs > device->attrs.max_qp_wr) {
1731 pr_err("consider lowering send_credit_target = %d\n",
1732 smb_direct_send_credit_target);
1733 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
1734 device->attrs.max_cqe, device->attrs.max_qp_wr);
1735 return -EINVAL;
1736 }
1737
1738 if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
1739 smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
1740 pr_err("consider lowering receive_credit_max = %d\n",
1741 smb_direct_receive_credit_max);
1742 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1743 device->attrs.max_cqe, device->attrs.max_qp_wr);
1744 return -EINVAL;
1745 }
1746
1747 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
1748 pr_err("warning: device max_recv_sge = %d too small\n",
1749 device->attrs.max_recv_sge);
1750 return -EINVAL;
1751 }
1752
1753 t->recv_credits = 0;
1754 t->count_avail_recvmsg = 0;
1755
1756 t->recv_credit_max = smb_direct_receive_credit_max;
1757 t->recv_credit_target = 10;
1758 t->new_recv_credits = 0;
1759
1760 t->send_credit_target = smb_direct_send_credit_target;
1761 atomic_set(&t->send_credits, 0);
1762 atomic_set(&t->rw_credits, t->max_rw_credits);
1763
1764 t->max_send_size = smb_direct_max_send_size;
1765 t->max_recv_size = smb_direct_max_receive_size;
1766 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
1767
1768 cap->max_send_wr = max_send_wrs;
1769 cap->max_recv_wr = t->recv_credit_max;
1770 cap->max_send_sge = max_sge_per_wr;
1771 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
1772 cap->max_inline_data = 0;
1773 cap->max_rdma_ctxs = t->max_rw_credits;
1774 return 0;
1775 }
1776
smb_direct_destroy_pools(struct smb_direct_transport * t)1777 static void smb_direct_destroy_pools(struct smb_direct_transport *t)
1778 {
1779 struct smb_direct_recvmsg *recvmsg;
1780
1781 while ((recvmsg = get_free_recvmsg(t)))
1782 mempool_free(recvmsg, t->recvmsg_mempool);
1783
1784 mempool_destroy(t->recvmsg_mempool);
1785 t->recvmsg_mempool = NULL;
1786
1787 kmem_cache_destroy(t->recvmsg_cache);
1788 t->recvmsg_cache = NULL;
1789
1790 mempool_destroy(t->sendmsg_mempool);
1791 t->sendmsg_mempool = NULL;
1792
1793 kmem_cache_destroy(t->sendmsg_cache);
1794 t->sendmsg_cache = NULL;
1795 }
1796
smb_direct_create_pools(struct smb_direct_transport * t)1797 static int smb_direct_create_pools(struct smb_direct_transport *t)
1798 {
1799 char name[80];
1800 int i;
1801 struct smb_direct_recvmsg *recvmsg;
1802
1803 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
1804 t->sendmsg_cache = kmem_cache_create(name,
1805 sizeof(struct smb_direct_sendmsg) +
1806 sizeof(struct smb_direct_negotiate_resp),
1807 0, SLAB_HWCACHE_ALIGN, NULL);
1808 if (!t->sendmsg_cache)
1809 return -ENOMEM;
1810
1811 t->sendmsg_mempool = mempool_create(t->send_credit_target,
1812 mempool_alloc_slab, mempool_free_slab,
1813 t->sendmsg_cache);
1814 if (!t->sendmsg_mempool)
1815 goto err;
1816
1817 snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
1818 t->recvmsg_cache = kmem_cache_create(name,
1819 sizeof(struct smb_direct_recvmsg) +
1820 t->max_recv_size,
1821 0, SLAB_HWCACHE_ALIGN, NULL);
1822 if (!t->recvmsg_cache)
1823 goto err;
1824
1825 t->recvmsg_mempool =
1826 mempool_create(t->recv_credit_max, mempool_alloc_slab,
1827 mempool_free_slab, t->recvmsg_cache);
1828 if (!t->recvmsg_mempool)
1829 goto err;
1830
1831 INIT_LIST_HEAD(&t->recvmsg_queue);
1832
1833 for (i = 0; i < t->recv_credit_max; i++) {
1834 recvmsg = mempool_alloc(t->recvmsg_mempool, KSMBD_DEFAULT_GFP);
1835 if (!recvmsg)
1836 goto err;
1837 recvmsg->transport = t;
1838 recvmsg->sge.length = 0;
1839 list_add(&recvmsg->list, &t->recvmsg_queue);
1840 }
1841 t->count_avail_recvmsg = t->recv_credit_max;
1842
1843 return 0;
1844 err:
1845 smb_direct_destroy_pools(t);
1846 return -ENOMEM;
1847 }
1848
smb_direct_create_qpair(struct smb_direct_transport * t,struct ib_qp_cap * cap)1849 static int smb_direct_create_qpair(struct smb_direct_transport *t,
1850 struct ib_qp_cap *cap)
1851 {
1852 int ret;
1853 struct ib_qp_init_attr qp_attr;
1854 int pages_per_rw;
1855
1856 t->pd = ib_alloc_pd(t->cm_id->device, 0);
1857 if (IS_ERR(t->pd)) {
1858 pr_err("Can't create RDMA PD\n");
1859 ret = PTR_ERR(t->pd);
1860 t->pd = NULL;
1861 return ret;
1862 }
1863
1864 t->send_cq = ib_alloc_cq(t->cm_id->device, t,
1865 smb_direct_send_credit_target + cap->max_rdma_ctxs,
1866 0, IB_POLL_WORKQUEUE);
1867 if (IS_ERR(t->send_cq)) {
1868 pr_err("Can't create RDMA send CQ\n");
1869 ret = PTR_ERR(t->send_cq);
1870 t->send_cq = NULL;
1871 goto err;
1872 }
1873
1874 t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
1875 t->recv_credit_max, 0, IB_POLL_WORKQUEUE);
1876 if (IS_ERR(t->recv_cq)) {
1877 pr_err("Can't create RDMA recv CQ\n");
1878 ret = PTR_ERR(t->recv_cq);
1879 t->recv_cq = NULL;
1880 goto err;
1881 }
1882
1883 memset(&qp_attr, 0, sizeof(qp_attr));
1884 qp_attr.event_handler = smb_direct_qpair_handler;
1885 qp_attr.qp_context = t;
1886 qp_attr.cap = *cap;
1887 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
1888 qp_attr.qp_type = IB_QPT_RC;
1889 qp_attr.send_cq = t->send_cq;
1890 qp_attr.recv_cq = t->recv_cq;
1891 qp_attr.port_num = ~0;
1892
1893 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
1894 if (ret) {
1895 pr_err("Can't create RDMA QP: %d\n", ret);
1896 goto err;
1897 }
1898
1899 t->qp = t->cm_id->qp;
1900 t->cm_id->event_handler = smb_direct_cm_handler;
1901
1902 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
1903 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
1904 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs,
1905 t->max_rw_credits, IB_MR_TYPE_MEM_REG,
1906 t->pages_per_rw_credit, 0);
1907 if (ret) {
1908 pr_err("failed to init mr pool count %d pages %d\n",
1909 t->max_rw_credits, t->pages_per_rw_credit);
1910 goto err;
1911 }
1912 }
1913
1914 return 0;
1915 err:
1916 if (t->qp) {
1917 t->qp = NULL;
1918 rdma_destroy_qp(t->cm_id);
1919 }
1920 if (t->recv_cq) {
1921 ib_destroy_cq(t->recv_cq);
1922 t->recv_cq = NULL;
1923 }
1924 if (t->send_cq) {
1925 ib_destroy_cq(t->send_cq);
1926 t->send_cq = NULL;
1927 }
1928 if (t->pd) {
1929 ib_dealloc_pd(t->pd);
1930 t->pd = NULL;
1931 }
1932 return ret;
1933 }
1934
smb_direct_prepare(struct ksmbd_transport * t)1935 static int smb_direct_prepare(struct ksmbd_transport *t)
1936 {
1937 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1938 struct smb_direct_recvmsg *recvmsg;
1939 struct smb_direct_negotiate_req *req;
1940 int ret;
1941
1942 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
1943 ret = wait_event_interruptible_timeout(st->wait_status,
1944 st->negotiation_requested ||
1945 st->status == SMB_DIRECT_CS_DISCONNECTED,
1946 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
1947 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
1948 return ret < 0 ? ret : -ETIMEDOUT;
1949
1950 recvmsg = get_first_reassembly(st);
1951 if (!recvmsg)
1952 return -ECONNABORTED;
1953
1954 ret = smb_direct_check_recvmsg(recvmsg);
1955 if (ret == -ECONNABORTED)
1956 goto out;
1957
1958 req = (struct smb_direct_negotiate_req *)recvmsg->packet;
1959 st->max_recv_size = min_t(int, st->max_recv_size,
1960 le32_to_cpu(req->preferred_send_size));
1961 st->max_send_size = min_t(int, st->max_send_size,
1962 le32_to_cpu(req->max_receive_size));
1963 st->max_fragmented_send_size =
1964 le32_to_cpu(req->max_fragmented_size);
1965 st->max_fragmented_recv_size =
1966 (st->recv_credit_max * st->max_recv_size) / 2;
1967
1968 ret = smb_direct_send_negotiate_response(st, ret);
1969 out:
1970 spin_lock_irq(&st->reassembly_queue_lock);
1971 st->reassembly_queue_length--;
1972 list_del(&recvmsg->list);
1973 spin_unlock_irq(&st->reassembly_queue_lock);
1974 put_recvmsg(st, recvmsg);
1975
1976 return ret;
1977 }
1978
smb_direct_connect(struct smb_direct_transport * st)1979 static int smb_direct_connect(struct smb_direct_transport *st)
1980 {
1981 int ret;
1982 struct ib_qp_cap qp_cap;
1983
1984 ret = smb_direct_init_params(st, &qp_cap);
1985 if (ret) {
1986 pr_err("Can't configure RDMA parameters\n");
1987 return ret;
1988 }
1989
1990 ret = smb_direct_create_pools(st);
1991 if (ret) {
1992 pr_err("Can't init RDMA pool: %d\n", ret);
1993 return ret;
1994 }
1995
1996 ret = smb_direct_create_qpair(st, &qp_cap);
1997 if (ret) {
1998 pr_err("Can't accept RDMA client: %d\n", ret);
1999 return ret;
2000 }
2001
2002 ret = smb_direct_prepare_negotiation(st);
2003 if (ret) {
2004 pr_err("Can't negotiate: %d\n", ret);
2005 return ret;
2006 }
2007 return 0;
2008 }
2009
rdma_frwr_is_supported(struct ib_device_attr * attrs)2010 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
2011 {
2012 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
2013 return false;
2014 if (attrs->max_fast_reg_page_list_len == 0)
2015 return false;
2016 return true;
2017 }
2018
smb_direct_handle_connect_request(struct rdma_cm_id * new_cm_id)2019 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
2020 {
2021 struct smb_direct_transport *t;
2022 struct task_struct *handler;
2023 int ret;
2024
2025 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
2026 ksmbd_debug(RDMA,
2027 "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
2028 new_cm_id->device->attrs.device_cap_flags);
2029 return -EPROTONOSUPPORT;
2030 }
2031
2032 t = alloc_transport(new_cm_id);
2033 if (!t)
2034 return -ENOMEM;
2035
2036 ret = smb_direct_connect(t);
2037 if (ret)
2038 goto out_err;
2039
2040 handler = kthread_run(ksmbd_conn_handler_loop,
2041 KSMBD_TRANS(t)->conn, "ksmbd:r%u",
2042 smb_direct_port);
2043 if (IS_ERR(handler)) {
2044 ret = PTR_ERR(handler);
2045 pr_err("Can't start thread\n");
2046 goto out_err;
2047 }
2048
2049 return 0;
2050 out_err:
2051 free_transport(t);
2052 return ret;
2053 }
2054
smb_direct_listen_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)2055 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
2056 struct rdma_cm_event *event)
2057 {
2058 switch (event->event) {
2059 case RDMA_CM_EVENT_CONNECT_REQUEST: {
2060 int ret = smb_direct_handle_connect_request(cm_id);
2061
2062 if (ret) {
2063 pr_err("Can't create transport: %d\n", ret);
2064 return ret;
2065 }
2066
2067 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
2068 cm_id);
2069 break;
2070 }
2071 default:
2072 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
2073 cm_id, rdma_event_msg(event->event), event->event);
2074 break;
2075 }
2076 return 0;
2077 }
2078
smb_direct_listen(int port)2079 static int smb_direct_listen(int port)
2080 {
2081 int ret;
2082 struct rdma_cm_id *cm_id;
2083 struct sockaddr_in sin = {
2084 .sin_family = AF_INET,
2085 .sin_addr.s_addr = htonl(INADDR_ANY),
2086 .sin_port = htons(port),
2087 };
2088
2089 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
2090 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
2091 if (IS_ERR(cm_id)) {
2092 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
2093 return PTR_ERR(cm_id);
2094 }
2095
2096 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
2097 if (ret) {
2098 pr_err("Can't bind: %d\n", ret);
2099 goto err;
2100 }
2101
2102 smb_direct_listener.cm_id = cm_id;
2103
2104 ret = rdma_listen(cm_id, 10);
2105 if (ret) {
2106 pr_err("Can't listen: %d\n", ret);
2107 goto err;
2108 }
2109 return 0;
2110 err:
2111 smb_direct_listener.cm_id = NULL;
2112 rdma_destroy_id(cm_id);
2113 return ret;
2114 }
2115
smb_direct_ib_client_add(struct ib_device * ib_dev)2116 static int smb_direct_ib_client_add(struct ib_device *ib_dev)
2117 {
2118 struct smb_direct_device *smb_dev;
2119
2120 /* Set 5445 port if device type is iWARP(No IB) */
2121 if (ib_dev->node_type != RDMA_NODE_IB_CA)
2122 smb_direct_port = SMB_DIRECT_PORT_IWARP;
2123
2124 if (!rdma_frwr_is_supported(&ib_dev->attrs))
2125 return 0;
2126
2127 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP);
2128 if (!smb_dev)
2129 return -ENOMEM;
2130 smb_dev->ib_dev = ib_dev;
2131
2132 write_lock(&smb_direct_device_lock);
2133 list_add(&smb_dev->list, &smb_direct_device_list);
2134 write_unlock(&smb_direct_device_lock);
2135
2136 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
2137 return 0;
2138 }
2139
smb_direct_ib_client_remove(struct ib_device * ib_dev,void * client_data)2140 static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
2141 void *client_data)
2142 {
2143 struct smb_direct_device *smb_dev, *tmp;
2144
2145 write_lock(&smb_direct_device_lock);
2146 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
2147 if (smb_dev->ib_dev == ib_dev) {
2148 list_del(&smb_dev->list);
2149 kfree(smb_dev);
2150 break;
2151 }
2152 }
2153 write_unlock(&smb_direct_device_lock);
2154 }
2155
2156 static struct ib_client smb_direct_ib_client = {
2157 .name = "ksmbd_smb_direct_ib",
2158 .add = smb_direct_ib_client_add,
2159 .remove = smb_direct_ib_client_remove,
2160 };
2161
ksmbd_rdma_init(void)2162 int ksmbd_rdma_init(void)
2163 {
2164 int ret;
2165
2166 smb_direct_listener.cm_id = NULL;
2167
2168 ret = ib_register_client(&smb_direct_ib_client);
2169 if (ret) {
2170 pr_err("failed to ib_register_client\n");
2171 return ret;
2172 }
2173
2174 /* When a client is running out of send credits, the credits are
2175 * granted by the server's sending a packet using this queue.
2176 * This avoids the situation that a clients cannot send packets
2177 * for lack of credits
2178 */
2179 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
2180 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
2181 if (!smb_direct_wq)
2182 return -ENOMEM;
2183
2184 ret = smb_direct_listen(smb_direct_port);
2185 if (ret) {
2186 destroy_workqueue(smb_direct_wq);
2187 smb_direct_wq = NULL;
2188 pr_err("Can't listen: %d\n", ret);
2189 return ret;
2190 }
2191
2192 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
2193 smb_direct_listener.cm_id);
2194 return 0;
2195 }
2196
ksmbd_rdma_destroy(void)2197 void ksmbd_rdma_destroy(void)
2198 {
2199 if (!smb_direct_listener.cm_id)
2200 return;
2201
2202 ib_unregister_client(&smb_direct_ib_client);
2203 rdma_destroy_id(smb_direct_listener.cm_id);
2204
2205 smb_direct_listener.cm_id = NULL;
2206
2207 if (smb_direct_wq) {
2208 destroy_workqueue(smb_direct_wq);
2209 smb_direct_wq = NULL;
2210 }
2211 }
2212
ksmbd_rdma_capable_netdev(struct net_device * netdev)2213 bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
2214 {
2215 struct smb_direct_device *smb_dev;
2216 int i;
2217 bool rdma_capable = false;
2218
2219 read_lock(&smb_direct_device_lock);
2220 list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
2221 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
2222 struct net_device *ndev;
2223
2224 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1);
2225 if (!ndev)
2226 continue;
2227
2228 if (ndev == netdev) {
2229 dev_put(ndev);
2230 rdma_capable = true;
2231 goto out;
2232 }
2233 dev_put(ndev);
2234 }
2235 }
2236 out:
2237 read_unlock(&smb_direct_device_lock);
2238
2239 if (rdma_capable == false) {
2240 struct ib_device *ibdev;
2241
2242 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
2243 if (ibdev) {
2244 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs);
2245 ib_device_put(ibdev);
2246 }
2247 }
2248
2249 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n",
2250 netdev->name, str_true_false(rdma_capable));
2251
2252 return rdma_capable;
2253 }
2254
2255 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
2256 .prepare = smb_direct_prepare,
2257 .disconnect = smb_direct_disconnect,
2258 .shutdown = smb_direct_shutdown,
2259 .writev = smb_direct_writev,
2260 .read = smb_direct_read,
2261 .rdma_read = smb_direct_rdma_read,
2262 .rdma_write = smb_direct_rdma_write,
2263 .free_transport = smb_direct_free_transport,
2264 };
2265