1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * iSCSI Initiator over TCP/IP Data-Path 4 * 5 * Copyright (C) 2004 Dmitry Yusupov 6 * Copyright (C) 2004 Alex Aizman 7 * Copyright (C) 2005 - 2006 Mike Christie 8 * Copyright (C) 2006 Red Hat, Inc. All rights reserved. 9 * maintained by open-iscsi@googlegroups.com 10 * 11 * See the file COPYING included with this distribution for more details. 12 * 13 * Credits: 14 * Christoph Hellwig 15 * FUJITA Tomonori 16 * Arne Redlich 17 * Zhenyu Wang 18 */ 19 20 #include <linux/types.h> 21 #include <linux/inet.h> 22 #include <linux/slab.h> 23 #include <linux/sched/mm.h> 24 #include <linux/file.h> 25 #include <linux/blkdev.h> 26 #include <linux/delay.h> 27 #include <linux/kfifo.h> 28 #include <linux/scatterlist.h> 29 #include <linux/module.h> 30 #include <linux/backing-dev.h> 31 #include <net/tcp.h> 32 #include <scsi/scsi_cmnd.h> 33 #include <scsi/scsi_device.h> 34 #include <scsi/scsi_host.h> 35 #include <scsi/scsi.h> 36 #include <scsi/scsi_transport_iscsi.h> 37 #include <trace/events/iscsi.h> 38 #include <trace/events/sock.h> 39 40 #include "iscsi_tcp.h" 41 42 MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, " 43 "Dmitry Yusupov <dmitry_yus@yahoo.com>, " 44 "Alex Aizman <itn780@yahoo.com>"); 45 MODULE_DESCRIPTION("iSCSI/TCP data-path"); 46 MODULE_LICENSE("GPL"); 47 48 static struct scsi_transport_template *iscsi_sw_tcp_scsi_transport; 49 static const struct scsi_host_template iscsi_sw_tcp_sht; 50 static struct iscsi_transport iscsi_sw_tcp_transport; 51 52 static unsigned int iscsi_max_lun = ~0; 53 module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); 54 55 static bool iscsi_recv_from_iscsi_q; 56 module_param_named(recv_from_iscsi_q, iscsi_recv_from_iscsi_q, bool, 0644); 57 MODULE_PARM_DESC(recv_from_iscsi_q, "Set to true to read iSCSI data/headers from the iscsi_q workqueue. The default is false which will perform reads from the network softirq context."); 58 59 static int iscsi_sw_tcp_dbg; 60 module_param_named(debug_iscsi_tcp, iscsi_sw_tcp_dbg, int, 61 S_IRUGO | S_IWUSR); 62 MODULE_PARM_DESC(debug_iscsi_tcp, "Turn on debugging for iscsi_tcp module " 63 "Set to 1 to turn on, and zero to turn off. Default is off."); 64 65 #define ISCSI_SW_TCP_DBG(_conn, dbg_fmt, arg...) \ 66 do { \ 67 if (iscsi_sw_tcp_dbg) \ 68 iscsi_conn_printk(KERN_INFO, _conn, \ 69 "%s " dbg_fmt, \ 70 __func__, ##arg); \ 71 iscsi_dbg_trace(trace_iscsi_dbg_sw_tcp, \ 72 &(_conn)->cls_conn->dev, \ 73 "%s " dbg_fmt, __func__, ##arg);\ 74 } while (0); 75 76 77 /** 78 * iscsi_sw_tcp_recv - TCP receive in sendfile fashion 79 * @rd_desc: read descriptor 80 * @skb: socket buffer 81 * @offset: offset in skb 82 * @len: skb->len - offset 83 */ 84 static int iscsi_sw_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, 85 unsigned int offset, size_t len) 86 { 87 struct iscsi_conn *conn = rd_desc->arg.data; 88 unsigned int consumed, total_consumed = 0; 89 int status; 90 91 ISCSI_SW_TCP_DBG(conn, "in %d bytes\n", skb->len - offset); 92 93 do { 94 status = 0; 95 consumed = iscsi_tcp_recv_skb(conn, skb, offset, 0, &status); 96 offset += consumed; 97 total_consumed += consumed; 98 } while (consumed != 0 && status != ISCSI_TCP_SKB_DONE); 99 100 ISCSI_SW_TCP_DBG(conn, "read %d bytes status %d\n", 101 skb->len - offset, status); 102 return total_consumed; 103 } 104 105 /** 106 * iscsi_sw_sk_state_check - check socket state 107 * @sk: socket 108 * 109 * If the socket is in CLOSE or CLOSE_WAIT we should 110 * not close the connection if there is still some 111 * data pending. 112 * 113 * Must be called with sk_callback_lock. 114 */ 115 static inline int iscsi_sw_sk_state_check(struct sock *sk) 116 { 117 struct iscsi_conn *conn = sk->sk_user_data; 118 119 if ((sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) && 120 (conn->session->state != ISCSI_STATE_LOGGING_OUT) && 121 !atomic_read(&sk->sk_rmem_alloc)) { 122 ISCSI_SW_TCP_DBG(conn, "TCP_CLOSE|TCP_CLOSE_WAIT\n"); 123 iscsi_conn_failure(conn, ISCSI_ERR_TCP_CONN_CLOSE); 124 return -ECONNRESET; 125 } 126 return 0; 127 } 128 129 static void iscsi_sw_tcp_recv_data(struct iscsi_conn *conn) 130 { 131 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 132 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 133 struct sock *sk = tcp_sw_conn->sock->sk; 134 read_descriptor_t rd_desc; 135 136 /* 137 * Use rd_desc to pass 'conn' to iscsi_tcp_recv. 138 * We set count to 1 because we want the network layer to 139 * hand us all the skbs that are available. iscsi_tcp_recv 140 * handled pdus that cross buffers or pdus that still need data. 141 */ 142 rd_desc.arg.data = conn; 143 rd_desc.count = 1; 144 145 tcp_read_sock(sk, &rd_desc, iscsi_sw_tcp_recv); 146 147 /* If we had to (atomically) map a highmem page, 148 * unmap it now. */ 149 iscsi_tcp_segment_unmap(&tcp_conn->in.segment); 150 151 iscsi_sw_sk_state_check(sk); 152 } 153 154 static void iscsi_sw_tcp_recv_data_work(struct work_struct *work) 155 { 156 struct iscsi_conn *conn = container_of(work, struct iscsi_conn, 157 recvwork); 158 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 159 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 160 struct sock *sk = tcp_sw_conn->sock->sk; 161 162 lock_sock(sk); 163 iscsi_sw_tcp_recv_data(conn); 164 release_sock(sk); 165 } 166 167 static void iscsi_sw_tcp_data_ready(struct sock *sk) 168 { 169 struct iscsi_sw_tcp_conn *tcp_sw_conn; 170 struct iscsi_tcp_conn *tcp_conn; 171 struct iscsi_conn *conn; 172 173 trace_sk_data_ready(sk); 174 175 read_lock_bh(&sk->sk_callback_lock); 176 conn = sk->sk_user_data; 177 if (!conn) { 178 read_unlock_bh(&sk->sk_callback_lock); 179 return; 180 } 181 tcp_conn = conn->dd_data; 182 tcp_sw_conn = tcp_conn->dd_data; 183 184 if (tcp_sw_conn->queue_recv) 185 iscsi_conn_queue_recv(conn); 186 else 187 iscsi_sw_tcp_recv_data(conn); 188 read_unlock_bh(&sk->sk_callback_lock); 189 } 190 191 static void iscsi_sw_tcp_state_change(struct sock *sk) 192 { 193 struct iscsi_tcp_conn *tcp_conn; 194 struct iscsi_sw_tcp_conn *tcp_sw_conn; 195 struct iscsi_conn *conn; 196 void (*old_state_change)(struct sock *); 197 198 read_lock_bh(&sk->sk_callback_lock); 199 conn = sk->sk_user_data; 200 if (!conn) { 201 read_unlock_bh(&sk->sk_callback_lock); 202 return; 203 } 204 205 iscsi_sw_sk_state_check(sk); 206 207 tcp_conn = conn->dd_data; 208 tcp_sw_conn = tcp_conn->dd_data; 209 old_state_change = tcp_sw_conn->old_state_change; 210 211 read_unlock_bh(&sk->sk_callback_lock); 212 213 old_state_change(sk); 214 } 215 216 /** 217 * iscsi_sw_tcp_write_space - Called when more output buffer space is available 218 * @sk: socket space is available for 219 **/ 220 static void iscsi_sw_tcp_write_space(struct sock *sk) 221 { 222 struct iscsi_conn *conn; 223 struct iscsi_tcp_conn *tcp_conn; 224 struct iscsi_sw_tcp_conn *tcp_sw_conn; 225 void (*old_write_space)(struct sock *); 226 227 read_lock_bh(&sk->sk_callback_lock); 228 conn = sk->sk_user_data; 229 if (!conn) { 230 read_unlock_bh(&sk->sk_callback_lock); 231 return; 232 } 233 234 tcp_conn = conn->dd_data; 235 tcp_sw_conn = tcp_conn->dd_data; 236 old_write_space = tcp_sw_conn->old_write_space; 237 read_unlock_bh(&sk->sk_callback_lock); 238 239 old_write_space(sk); 240 241 ISCSI_SW_TCP_DBG(conn, "iscsi_write_space\n"); 242 iscsi_conn_queue_xmit(conn); 243 } 244 245 static void iscsi_sw_tcp_conn_set_callbacks(struct iscsi_conn *conn) 246 { 247 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 248 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 249 struct sock *sk = tcp_sw_conn->sock->sk; 250 251 /* assign new callbacks */ 252 write_lock_bh(&sk->sk_callback_lock); 253 sk->sk_user_data = conn; 254 tcp_sw_conn->old_data_ready = sk->sk_data_ready; 255 tcp_sw_conn->old_state_change = sk->sk_state_change; 256 tcp_sw_conn->old_write_space = sk->sk_write_space; 257 sk->sk_data_ready = iscsi_sw_tcp_data_ready; 258 sk->sk_state_change = iscsi_sw_tcp_state_change; 259 sk->sk_write_space = iscsi_sw_tcp_write_space; 260 write_unlock_bh(&sk->sk_callback_lock); 261 } 262 263 static void 264 iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_conn *conn) 265 { 266 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 267 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 268 struct sock *sk = tcp_sw_conn->sock->sk; 269 270 /* restore socket callbacks, see also: iscsi_conn_set_callbacks() */ 271 write_lock_bh(&sk->sk_callback_lock); 272 sk->sk_user_data = NULL; 273 sk->sk_data_ready = tcp_sw_conn->old_data_ready; 274 sk->sk_state_change = tcp_sw_conn->old_state_change; 275 sk->sk_write_space = tcp_sw_conn->old_write_space; 276 sk->sk_no_check_tx = 0; 277 write_unlock_bh(&sk->sk_callback_lock); 278 } 279 280 /** 281 * iscsi_sw_tcp_xmit_segment - transmit segment 282 * @tcp_conn: the iSCSI TCP connection 283 * @segment: the buffer to transmnit 284 * 285 * This function transmits as much of the buffer as 286 * the network layer will accept, and returns the number of 287 * bytes transmitted. 288 * 289 * If CRC hashing is enabled, the function will compute the 290 * hash as it goes. When the entire segment has been transmitted, 291 * it will retrieve the hash value and send it as well. 292 */ 293 static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn, 294 struct iscsi_segment *segment) 295 { 296 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 297 struct socket *sk = tcp_sw_conn->sock; 298 unsigned int copied = 0; 299 int r = 0; 300 301 while (!iscsi_tcp_segment_done(tcp_conn, segment, 0, r)) { 302 struct scatterlist *sg; 303 struct msghdr msg = {}; 304 struct bio_vec bv; 305 unsigned int offset, copy; 306 307 r = 0; 308 offset = segment->copied; 309 copy = segment->size - offset; 310 311 if (segment->total_copied + segment->size < segment->total_size) 312 msg.msg_flags |= MSG_MORE; 313 314 if (tcp_sw_conn->queue_recv) 315 msg.msg_flags |= MSG_DONTWAIT; 316 317 if (!segment->data) { 318 if (!tcp_conn->iscsi_conn->datadgst_en) 319 msg.msg_flags |= MSG_SPLICE_PAGES; 320 sg = segment->sg; 321 offset += segment->sg_offset + sg->offset; 322 bvec_set_page(&bv, sg_page(sg), copy, offset); 323 } else { 324 bvec_set_virt(&bv, segment->data + offset, copy); 325 } 326 iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, copy); 327 328 r = sock_sendmsg(sk, &msg); 329 if (r < 0) { 330 iscsi_tcp_segment_unmap(segment); 331 return r; 332 } 333 copied += r; 334 } 335 return copied; 336 } 337 338 /** 339 * iscsi_sw_tcp_xmit - TCP transmit 340 * @conn: iscsi connection 341 **/ 342 static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn) 343 { 344 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 345 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 346 struct iscsi_segment *segment = &tcp_sw_conn->out.segment; 347 unsigned int consumed = 0; 348 int rc = 0; 349 350 while (1) { 351 rc = iscsi_sw_tcp_xmit_segment(tcp_conn, segment); 352 /* 353 * We may not have been able to send data because the conn 354 * is getting stopped. libiscsi will know so propagate err 355 * for it to do the right thing. 356 */ 357 if (rc == -EAGAIN) 358 return rc; 359 else if (rc < 0) { 360 rc = ISCSI_ERR_XMIT_FAILED; 361 goto error; 362 } else if (rc == 0) 363 break; 364 365 consumed += rc; 366 367 if (segment->total_copied >= segment->total_size) { 368 if (segment->done != NULL) { 369 rc = segment->done(tcp_conn, segment); 370 if (rc != 0) 371 goto error; 372 } 373 } 374 } 375 376 ISCSI_SW_TCP_DBG(conn, "xmit %d bytes\n", consumed); 377 378 conn->txdata_octets += consumed; 379 return consumed; 380 381 error: 382 /* Transmit error. We could initiate error recovery 383 * here. */ 384 ISCSI_SW_TCP_DBG(conn, "Error sending PDU, errno=%d\n", rc); 385 iscsi_conn_failure(conn, rc); 386 return -EIO; 387 } 388 389 /** 390 * iscsi_sw_tcp_xmit_qlen - return the number of bytes queued for xmit 391 * @conn: iscsi connection 392 */ 393 static inline int iscsi_sw_tcp_xmit_qlen(struct iscsi_conn *conn) 394 { 395 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 396 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 397 struct iscsi_segment *segment = &tcp_sw_conn->out.segment; 398 399 return segment->total_copied - segment->total_size; 400 } 401 402 static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task) 403 { 404 struct iscsi_conn *conn = task->conn; 405 unsigned int noreclaim_flag; 406 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 407 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 408 int rc = 0; 409 410 if (!tcp_sw_conn->sock) { 411 iscsi_conn_printk(KERN_ERR, conn, 412 "Transport not bound to socket!\n"); 413 return -EINVAL; 414 } 415 416 noreclaim_flag = memalloc_noreclaim_save(); 417 418 while (iscsi_sw_tcp_xmit_qlen(conn)) { 419 rc = iscsi_sw_tcp_xmit(conn); 420 if (rc == 0) { 421 rc = -EAGAIN; 422 break; 423 } 424 if (rc < 0) 425 break; 426 rc = 0; 427 } 428 429 memalloc_noreclaim_restore(noreclaim_flag); 430 return rc; 431 } 432 433 /* 434 * This is called when we're done sending the header. 435 * Simply copy the data_segment to the send segment, and return. 436 */ 437 static int iscsi_sw_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn, 438 struct iscsi_segment *segment) 439 { 440 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 441 442 tcp_sw_conn->out.segment = tcp_sw_conn->out.data_segment; 443 ISCSI_SW_TCP_DBG(tcp_conn->iscsi_conn, 444 "Header done. Next segment size %u total_size %u\n", 445 tcp_sw_conn->out.segment.size, 446 tcp_sw_conn->out.segment.total_size); 447 return 0; 448 } 449 450 static void iscsi_sw_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr, 451 size_t hdrlen) 452 { 453 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 454 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 455 456 ISCSI_SW_TCP_DBG(conn, "%s\n", conn->hdrdgst_en ? 457 "digest enabled" : "digest disabled"); 458 459 /* Clear the data segment - needs to be filled in by the 460 * caller using iscsi_tcp_send_data_prep() */ 461 memset(&tcp_sw_conn->out.data_segment, 0, 462 sizeof(struct iscsi_segment)); 463 464 /* If header digest is enabled, compute the CRC and 465 * place the digest into the same buffer. We make 466 * sure that both iscsi_tcp_task and mtask have 467 * sufficient room. 468 */ 469 if (conn->hdrdgst_en) { 470 iscsi_tcp_dgst_header(hdr, hdrlen, hdr + hdrlen); 471 hdrlen += ISCSI_DIGEST_SIZE; 472 } 473 474 /* Remember header pointer for later, when we need 475 * to decide whether there's a payload to go along 476 * with the header. */ 477 tcp_sw_conn->out.hdr = hdr; 478 479 iscsi_segment_init_linear(&tcp_sw_conn->out.segment, hdr, hdrlen, 480 iscsi_sw_tcp_send_hdr_done, NULL); 481 } 482 483 /* 484 * Prepare the send buffer for the payload data. 485 * Padding and checksumming will all be taken care 486 * of by the iscsi_segment routines. 487 */ 488 static int 489 iscsi_sw_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg, 490 unsigned int count, unsigned int offset, 491 unsigned int len) 492 { 493 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 494 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 495 u32 *tx_crcp = NULL; 496 unsigned int hdr_spec_len; 497 498 ISCSI_SW_TCP_DBG(conn, "offset=%d, datalen=%d %s\n", offset, len, 499 conn->datadgst_en ? 500 "digest enabled" : "digest disabled"); 501 502 /* Make sure the datalen matches what the caller 503 said he would send. */ 504 hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength); 505 WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len)); 506 507 if (conn->datadgst_en) 508 tx_crcp = &tcp_sw_conn->tx_crc; 509 510 return iscsi_segment_seek_sg(&tcp_sw_conn->out.data_segment, 511 sg, count, offset, len, NULL, tx_crcp); 512 } 513 514 static void 515 iscsi_sw_tcp_send_linear_data_prep(struct iscsi_conn *conn, void *data, 516 size_t len) 517 { 518 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 519 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 520 u32 *tx_crcp = NULL; 521 unsigned int hdr_spec_len; 522 523 ISCSI_SW_TCP_DBG(conn, "datalen=%zd %s\n", len, conn->datadgst_en ? 524 "digest enabled" : "digest disabled"); 525 526 /* Make sure the datalen matches what the caller 527 said he would send. */ 528 hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength); 529 WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len)); 530 531 if (conn->datadgst_en) 532 tx_crcp = &tcp_sw_conn->tx_crc; 533 534 iscsi_segment_init_linear(&tcp_sw_conn->out.data_segment, 535 data, len, NULL, tx_crcp); 536 } 537 538 static int iscsi_sw_tcp_pdu_init(struct iscsi_task *task, 539 unsigned int offset, unsigned int count) 540 { 541 struct iscsi_conn *conn = task->conn; 542 int err = 0; 543 544 iscsi_sw_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len); 545 546 if (!count) 547 return 0; 548 549 if (!task->sc) 550 iscsi_sw_tcp_send_linear_data_prep(conn, task->data, count); 551 else { 552 struct scsi_data_buffer *sdb = &task->sc->sdb; 553 554 err = iscsi_sw_tcp_send_data_prep(conn, sdb->table.sgl, 555 sdb->table.nents, offset, 556 count); 557 } 558 559 if (err) { 560 /* got invalid offset/len */ 561 return -EIO; 562 } 563 return 0; 564 } 565 566 static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task, uint8_t opcode) 567 { 568 struct iscsi_tcp_task *tcp_task = task->dd_data; 569 570 task->hdr = task->dd_data + sizeof(*tcp_task); 571 task->hdr_max = sizeof(struct iscsi_sw_tcp_hdrbuf) - ISCSI_DIGEST_SIZE; 572 return 0; 573 } 574 575 static struct iscsi_cls_conn * 576 iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session, 577 uint32_t conn_idx) 578 { 579 struct iscsi_conn *conn; 580 struct iscsi_cls_conn *cls_conn; 581 struct iscsi_tcp_conn *tcp_conn; 582 struct iscsi_sw_tcp_conn *tcp_sw_conn; 583 584 cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*tcp_sw_conn), 585 conn_idx); 586 if (!cls_conn) 587 return NULL; 588 conn = cls_conn->dd_data; 589 tcp_conn = conn->dd_data; 590 tcp_sw_conn = tcp_conn->dd_data; 591 INIT_WORK(&conn->recvwork, iscsi_sw_tcp_recv_data_work); 592 tcp_sw_conn->queue_recv = iscsi_recv_from_iscsi_q; 593 594 mutex_init(&tcp_sw_conn->sock_lock); 595 tcp_conn->rx_crcp = &tcp_sw_conn->rx_crc; 596 597 return cls_conn; 598 } 599 600 static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn) 601 { 602 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 603 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 604 struct socket *sock = tcp_sw_conn->sock; 605 606 /* 607 * The iscsi transport class will make sure we are not called in 608 * parallel with start, stop, bind and destroys. However, this can be 609 * called twice if userspace does a stop then a destroy. 610 */ 611 if (!sock) 612 return; 613 614 /* 615 * Make sure we start socket shutdown now in case userspace is up 616 * but delayed in releasing the socket. 617 */ 618 kernel_sock_shutdown(sock, SHUT_RDWR); 619 620 sock_hold(sock->sk); 621 iscsi_sw_tcp_conn_restore_callbacks(conn); 622 sock_put(sock->sk); 623 624 iscsi_suspend_rx(conn); 625 626 mutex_lock(&tcp_sw_conn->sock_lock); 627 tcp_sw_conn->sock = NULL; 628 mutex_unlock(&tcp_sw_conn->sock_lock); 629 sockfd_put(sock); 630 } 631 632 static void iscsi_sw_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn) 633 { 634 struct iscsi_conn *conn = cls_conn->dd_data; 635 636 iscsi_sw_tcp_release_conn(conn); 637 iscsi_tcp_conn_teardown(cls_conn); 638 } 639 640 static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) 641 { 642 struct iscsi_conn *conn = cls_conn->dd_data; 643 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 644 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 645 struct socket *sock = tcp_sw_conn->sock; 646 647 /* userspace may have goofed up and not bound us */ 648 if (!sock) 649 return; 650 651 sock->sk->sk_err = EIO; 652 wake_up_interruptible(sk_sleep(sock->sk)); 653 654 /* stop xmit side */ 655 iscsi_suspend_tx(conn); 656 657 /* stop recv side and release socket */ 658 iscsi_sw_tcp_release_conn(conn); 659 660 iscsi_conn_stop(cls_conn, flag); 661 } 662 663 static int 664 iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session, 665 struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, 666 int is_leading) 667 { 668 struct iscsi_conn *conn = cls_conn->dd_data; 669 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 670 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 671 struct sock *sk; 672 struct socket *sock; 673 int err; 674 675 /* lookup for existing socket */ 676 sock = sockfd_lookup((int)transport_eph, &err); 677 if (!sock) { 678 iscsi_conn_printk(KERN_ERR, conn, 679 "sockfd_lookup failed %d\n", err); 680 return -EEXIST; 681 } 682 683 err = -EINVAL; 684 if (!sk_is_tcp(sock->sk)) 685 goto free_socket; 686 687 err = iscsi_conn_bind(cls_session, cls_conn, is_leading); 688 if (err) 689 goto free_socket; 690 691 mutex_lock(&tcp_sw_conn->sock_lock); 692 /* bind iSCSI connection and socket */ 693 tcp_sw_conn->sock = sock; 694 mutex_unlock(&tcp_sw_conn->sock_lock); 695 696 /* setup Socket parameters */ 697 sk = sock->sk; 698 sk->sk_reuse = SK_CAN_REUSE; 699 sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */ 700 sk->sk_allocation = GFP_ATOMIC; 701 sk->sk_use_task_frag = false; 702 sk_set_memalloc(sk); 703 sock_no_linger(sk); 704 705 iscsi_sw_tcp_conn_set_callbacks(conn); 706 /* 707 * set receive state machine into initial state 708 */ 709 iscsi_tcp_hdr_recv_prep(tcp_conn); 710 return 0; 711 712 free_socket: 713 sockfd_put(sock); 714 return err; 715 } 716 717 static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn, 718 enum iscsi_param param, char *buf, 719 int buflen) 720 { 721 struct iscsi_conn *conn = cls_conn->dd_data; 722 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 723 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 724 725 switch(param) { 726 case ISCSI_PARAM_HDRDGST_EN: 727 iscsi_set_param(cls_conn, param, buf, buflen); 728 break; 729 case ISCSI_PARAM_DATADGST_EN: 730 mutex_lock(&tcp_sw_conn->sock_lock); 731 if (!tcp_sw_conn->sock) { 732 mutex_unlock(&tcp_sw_conn->sock_lock); 733 return -ENOTCONN; 734 } 735 iscsi_set_param(cls_conn, param, buf, buflen); 736 mutex_unlock(&tcp_sw_conn->sock_lock); 737 break; 738 case ISCSI_PARAM_MAX_R2T: 739 return iscsi_tcp_set_max_r2t(conn, buf); 740 default: 741 return iscsi_set_param(cls_conn, param, buf, buflen); 742 } 743 744 return 0; 745 } 746 747 static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn, 748 enum iscsi_param param, char *buf) 749 { 750 struct iscsi_conn *conn = cls_conn->dd_data; 751 struct iscsi_sw_tcp_conn *tcp_sw_conn; 752 struct iscsi_tcp_conn *tcp_conn; 753 struct sockaddr_in6 addr; 754 struct socket *sock; 755 int rc; 756 757 switch(param) { 758 case ISCSI_PARAM_CONN_PORT: 759 case ISCSI_PARAM_CONN_ADDRESS: 760 case ISCSI_PARAM_LOCAL_PORT: 761 spin_lock_bh(&conn->session->frwd_lock); 762 if (!conn->session->leadconn) { 763 spin_unlock_bh(&conn->session->frwd_lock); 764 return -ENOTCONN; 765 } 766 /* 767 * The conn has been setup and bound, so just grab a ref 768 * incase a destroy runs while we are in the net layer. 769 */ 770 iscsi_get_conn(conn->cls_conn); 771 spin_unlock_bh(&conn->session->frwd_lock); 772 773 tcp_conn = conn->dd_data; 774 tcp_sw_conn = tcp_conn->dd_data; 775 776 mutex_lock(&tcp_sw_conn->sock_lock); 777 sock = tcp_sw_conn->sock; 778 if (!sock) { 779 rc = -ENOTCONN; 780 goto sock_unlock; 781 } 782 783 if (param == ISCSI_PARAM_LOCAL_PORT) 784 rc = kernel_getsockname(sock, 785 (struct sockaddr *)&addr); 786 else 787 rc = kernel_getpeername(sock, 788 (struct sockaddr *)&addr); 789 sock_unlock: 790 mutex_unlock(&tcp_sw_conn->sock_lock); 791 iscsi_put_conn(conn->cls_conn); 792 if (rc < 0) 793 return rc; 794 795 return iscsi_conn_get_addr_param((struct sockaddr_storage *) 796 &addr, param, buf); 797 default: 798 return iscsi_conn_get_param(cls_conn, param, buf); 799 } 800 801 return 0; 802 } 803 804 static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, 805 enum iscsi_host_param param, char *buf) 806 { 807 struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(shost); 808 struct iscsi_session *session; 809 struct iscsi_conn *conn; 810 struct iscsi_tcp_conn *tcp_conn; 811 struct iscsi_sw_tcp_conn *tcp_sw_conn; 812 struct sockaddr_in6 addr; 813 struct socket *sock; 814 int rc; 815 816 switch (param) { 817 case ISCSI_HOST_PARAM_IPADDRESS: 818 session = tcp_sw_host->session; 819 if (!session) 820 return -ENOTCONN; 821 822 spin_lock_bh(&session->frwd_lock); 823 conn = session->leadconn; 824 if (!conn) { 825 spin_unlock_bh(&session->frwd_lock); 826 return -ENOTCONN; 827 } 828 tcp_conn = conn->dd_data; 829 tcp_sw_conn = tcp_conn->dd_data; 830 /* 831 * The conn has been setup and bound, so just grab a ref 832 * incase a destroy runs while we are in the net layer. 833 */ 834 iscsi_get_conn(conn->cls_conn); 835 spin_unlock_bh(&session->frwd_lock); 836 837 mutex_lock(&tcp_sw_conn->sock_lock); 838 sock = tcp_sw_conn->sock; 839 if (!sock) 840 rc = -ENOTCONN; 841 else 842 rc = kernel_getsockname(sock, (struct sockaddr *)&addr); 843 mutex_unlock(&tcp_sw_conn->sock_lock); 844 iscsi_put_conn(conn->cls_conn); 845 if (rc < 0) 846 return rc; 847 848 return iscsi_conn_get_addr_param((struct sockaddr_storage *) 849 &addr, 850 (enum iscsi_param)param, buf); 851 default: 852 return iscsi_host_get_param(shost, param, buf); 853 } 854 855 return 0; 856 } 857 858 static void 859 iscsi_sw_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn, 860 struct iscsi_stats *stats) 861 { 862 struct iscsi_conn *conn = cls_conn->dd_data; 863 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 864 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 865 866 stats->custom_length = 3; 867 strcpy(stats->custom[0].desc, "tx_sendpage_failures"); 868 stats->custom[0].value = tcp_sw_conn->sendpage_failures_cnt; 869 strcpy(stats->custom[1].desc, "rx_discontiguous_hdr"); 870 stats->custom[1].value = tcp_sw_conn->discontiguous_hdr_cnt; 871 strcpy(stats->custom[2].desc, "eh_abort_cnt"); 872 stats->custom[2].value = conn->eh_abort_cnt; 873 874 iscsi_tcp_conn_get_stats(cls_conn, stats); 875 } 876 877 static struct iscsi_cls_session * 878 iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, 879 uint16_t qdepth, uint32_t initial_cmdsn) 880 { 881 struct iscsi_cls_session *cls_session; 882 struct iscsi_session *session; 883 struct iscsi_sw_tcp_host *tcp_sw_host; 884 struct Scsi_Host *shost; 885 int rc; 886 887 if (ep) { 888 printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep); 889 return NULL; 890 } 891 892 shost = iscsi_host_alloc(&iscsi_sw_tcp_sht, 893 sizeof(struct iscsi_sw_tcp_host), 1); 894 if (!shost) 895 return NULL; 896 shost->transportt = iscsi_sw_tcp_scsi_transport; 897 shost->cmd_per_lun = qdepth; 898 shost->max_lun = iscsi_max_lun; 899 shost->max_id = 0; 900 shost->max_channel = 0; 901 shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE; 902 shost->dma_alignment = 0; 903 904 rc = iscsi_host_get_max_scsi_cmds(shost, cmds_max); 905 if (rc < 0) 906 goto free_host; 907 shost->can_queue = rc; 908 909 if (iscsi_host_add(shost, NULL)) 910 goto free_host; 911 912 cls_session = iscsi_session_setup(&iscsi_sw_tcp_transport, shost, 913 cmds_max, 0, 914 sizeof(struct iscsi_tcp_task) + 915 sizeof(struct iscsi_sw_tcp_hdrbuf), 916 initial_cmdsn, 0); 917 if (!cls_session) 918 goto remove_host; 919 session = cls_session->dd_data; 920 921 if (iscsi_tcp_r2tpool_alloc(session)) 922 goto remove_session; 923 924 /* We are now fully setup so expose the session to sysfs. */ 925 tcp_sw_host = iscsi_host_priv(shost); 926 tcp_sw_host->session = session; 927 return cls_session; 928 929 remove_session: 930 iscsi_session_teardown(cls_session); 931 remove_host: 932 iscsi_host_remove(shost, false); 933 free_host: 934 iscsi_host_free(shost); 935 return NULL; 936 } 937 938 static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) 939 { 940 struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); 941 struct iscsi_session *session = cls_session->dd_data; 942 943 if (WARN_ON_ONCE(session->leadconn)) 944 return; 945 946 iscsi_session_remove(cls_session); 947 /* 948 * Our get_host_param needs to access the session, so remove the 949 * host from sysfs before freeing the session to make sure userspace 950 * is no longer accessing the callout. 951 */ 952 iscsi_host_remove(shost, false); 953 954 iscsi_tcp_r2tpool_free(cls_session->dd_data); 955 956 iscsi_session_free(cls_session); 957 iscsi_host_free(shost); 958 } 959 960 static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param) 961 { 962 switch (param_type) { 963 case ISCSI_HOST_PARAM: 964 switch (param) { 965 case ISCSI_HOST_PARAM_NETDEV_NAME: 966 case ISCSI_HOST_PARAM_HWADDRESS: 967 case ISCSI_HOST_PARAM_IPADDRESS: 968 case ISCSI_HOST_PARAM_INITIATOR_NAME: 969 return S_IRUGO; 970 default: 971 return 0; 972 } 973 case ISCSI_PARAM: 974 switch (param) { 975 case ISCSI_PARAM_MAX_RECV_DLENGTH: 976 case ISCSI_PARAM_MAX_XMIT_DLENGTH: 977 case ISCSI_PARAM_HDRDGST_EN: 978 case ISCSI_PARAM_DATADGST_EN: 979 case ISCSI_PARAM_CONN_ADDRESS: 980 case ISCSI_PARAM_CONN_PORT: 981 case ISCSI_PARAM_LOCAL_PORT: 982 case ISCSI_PARAM_EXP_STATSN: 983 case ISCSI_PARAM_PERSISTENT_ADDRESS: 984 case ISCSI_PARAM_PERSISTENT_PORT: 985 case ISCSI_PARAM_PING_TMO: 986 case ISCSI_PARAM_RECV_TMO: 987 case ISCSI_PARAM_INITIAL_R2T_EN: 988 case ISCSI_PARAM_MAX_R2T: 989 case ISCSI_PARAM_IMM_DATA_EN: 990 case ISCSI_PARAM_FIRST_BURST: 991 case ISCSI_PARAM_MAX_BURST: 992 case ISCSI_PARAM_PDU_INORDER_EN: 993 case ISCSI_PARAM_DATASEQ_INORDER_EN: 994 case ISCSI_PARAM_ERL: 995 case ISCSI_PARAM_TARGET_NAME: 996 case ISCSI_PARAM_TPGT: 997 case ISCSI_PARAM_USERNAME: 998 case ISCSI_PARAM_PASSWORD: 999 case ISCSI_PARAM_USERNAME_IN: 1000 case ISCSI_PARAM_PASSWORD_IN: 1001 case ISCSI_PARAM_FAST_ABORT: 1002 case ISCSI_PARAM_ABORT_TMO: 1003 case ISCSI_PARAM_LU_RESET_TMO: 1004 case ISCSI_PARAM_TGT_RESET_TMO: 1005 case ISCSI_PARAM_IFACE_NAME: 1006 case ISCSI_PARAM_INITIATOR_NAME: 1007 return S_IRUGO; 1008 default: 1009 return 0; 1010 } 1011 } 1012 1013 return 0; 1014 } 1015 1016 static int iscsi_sw_tcp_sdev_configure(struct scsi_device *sdev, 1017 struct queue_limits *lim) 1018 { 1019 struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(sdev->host); 1020 struct iscsi_session *session = tcp_sw_host->session; 1021 struct iscsi_conn *conn = session->leadconn; 1022 1023 if (conn->datadgst_en) 1024 lim->features |= BLK_FEAT_STABLE_WRITES; 1025 return 0; 1026 } 1027 1028 static const struct scsi_host_template iscsi_sw_tcp_sht = { 1029 .module = THIS_MODULE, 1030 .name = "iSCSI Initiator over TCP/IP", 1031 .queuecommand = iscsi_queuecommand, 1032 .change_queue_depth = scsi_change_queue_depth, 1033 .can_queue = ISCSI_TOTAL_CMDS_MAX, 1034 .sg_tablesize = 4096, 1035 .max_sectors = 0xFFFF, 1036 .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN, 1037 .eh_timed_out = iscsi_eh_cmd_timed_out, 1038 .eh_abort_handler = iscsi_eh_abort, 1039 .eh_device_reset_handler= iscsi_eh_device_reset, 1040 .eh_target_reset_handler = iscsi_eh_recover_target, 1041 .dma_boundary = PAGE_SIZE - 1, 1042 .sdev_configure = iscsi_sw_tcp_sdev_configure, 1043 .proc_name = "iscsi_tcp", 1044 .this_id = -1, 1045 .track_queue_depth = 1, 1046 .cmd_size = sizeof(struct iscsi_cmd), 1047 }; 1048 1049 static struct iscsi_transport iscsi_sw_tcp_transport = { 1050 .owner = THIS_MODULE, 1051 .name = "tcp", 1052 .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST 1053 | CAP_DATADGST, 1054 /* session management */ 1055 .create_session = iscsi_sw_tcp_session_create, 1056 .destroy_session = iscsi_sw_tcp_session_destroy, 1057 /* connection management */ 1058 .create_conn = iscsi_sw_tcp_conn_create, 1059 .bind_conn = iscsi_sw_tcp_conn_bind, 1060 .destroy_conn = iscsi_sw_tcp_conn_destroy, 1061 .attr_is_visible = iscsi_sw_tcp_attr_is_visible, 1062 .set_param = iscsi_sw_tcp_conn_set_param, 1063 .get_conn_param = iscsi_sw_tcp_conn_get_param, 1064 .get_session_param = iscsi_session_get_param, 1065 .start_conn = iscsi_conn_start, 1066 .stop_conn = iscsi_sw_tcp_conn_stop, 1067 /* iscsi host params */ 1068 .get_host_param = iscsi_sw_tcp_host_get_param, 1069 .set_host_param = iscsi_host_set_param, 1070 /* IO */ 1071 .send_pdu = iscsi_conn_send_pdu, 1072 .get_stats = iscsi_sw_tcp_conn_get_stats, 1073 /* iscsi task/cmd helpers */ 1074 .init_task = iscsi_tcp_task_init, 1075 .xmit_task = iscsi_tcp_task_xmit, 1076 .cleanup_task = iscsi_tcp_cleanup_task, 1077 /* low level pdu helpers */ 1078 .xmit_pdu = iscsi_sw_tcp_pdu_xmit, 1079 .init_pdu = iscsi_sw_tcp_pdu_init, 1080 .alloc_pdu = iscsi_sw_tcp_pdu_alloc, 1081 /* recovery */ 1082 .session_recovery_timedout = iscsi_session_recovery_timedout, 1083 }; 1084 1085 static int __init iscsi_sw_tcp_init(void) 1086 { 1087 if (iscsi_max_lun < 1) { 1088 printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n", 1089 iscsi_max_lun); 1090 return -EINVAL; 1091 } 1092 1093 iscsi_sw_tcp_scsi_transport = iscsi_register_transport( 1094 &iscsi_sw_tcp_transport); 1095 if (!iscsi_sw_tcp_scsi_transport) 1096 return -ENODEV; 1097 1098 return 0; 1099 } 1100 1101 static void __exit iscsi_sw_tcp_exit(void) 1102 { 1103 iscsi_unregister_transport(&iscsi_sw_tcp_transport); 1104 } 1105 1106 module_init(iscsi_sw_tcp_init); 1107 module_exit(iscsi_sw_tcp_exit); 1108