1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/blk-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct request *req; 106 107 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 108 if (IS_ERR(req)) 109 return req; 110 nvme_init_request(req, cmd); 111 nvme_req(req)->flags |= NVME_REQ_USERCMD; 112 return req; 113 } 114 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 struct iov_iter *iter, unsigned int flags) 118 { 119 struct request_queue *q = req->q; 120 struct nvme_ns *ns = q->queuedata; 121 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 122 bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); 123 struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; 124 bool has_metadata = meta_buffer && meta_len; 125 struct bio *bio = NULL; 126 int ret; 127 128 if (!nvme_ctrl_sgl_supported(ctrl)) 129 dev_warn_once(ctrl->device, "using unchecked data buffer\n"); 130 if (has_metadata) { 131 if (!supports_metadata) 132 return -EINVAL; 133 134 if (!nvme_ctrl_meta_sgl_supported(ctrl)) 135 dev_warn_once(ctrl->device, 136 "using unchecked metadata buffer\n"); 137 } 138 139 if (iter) 140 ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); 141 else 142 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 143 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 144 0, rq_data_dir(req)); 145 146 if (ret) 147 return ret; 148 149 bio = req->bio; 150 if (bdev) 151 bio_set_dev(bio, bdev); 152 153 if (has_metadata) { 154 ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len); 155 if (ret) 156 goto out_unmap; 157 } 158 159 return ret; 160 161 out_unmap: 162 if (bio) 163 blk_rq_unmap_user(bio); 164 return ret; 165 } 166 167 static int nvme_submit_user_cmd(struct request_queue *q, 168 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 169 void __user *meta_buffer, unsigned meta_len, 170 u64 *result, unsigned timeout, unsigned int flags) 171 { 172 struct nvme_ns *ns = q->queuedata; 173 struct nvme_ctrl *ctrl; 174 struct request *req; 175 struct bio *bio; 176 u32 effects; 177 int ret; 178 179 req = nvme_alloc_user_request(q, cmd, 0, 0); 180 if (IS_ERR(req)) 181 return PTR_ERR(req); 182 183 req->timeout = timeout; 184 if (ubuffer && bufflen) { 185 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 186 meta_len, NULL, flags); 187 if (ret) 188 goto out_free_req; 189 } 190 191 bio = req->bio; 192 ctrl = nvme_req(req)->ctrl; 193 194 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 195 ret = nvme_execute_rq(req, false); 196 if (result) 197 *result = le64_to_cpu(nvme_req(req)->result.u64); 198 if (bio) 199 blk_rq_unmap_user(bio); 200 blk_mq_free_request(req); 201 202 if (effects) 203 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 204 return ret; 205 206 out_free_req: 207 blk_mq_free_request(req); 208 return ret; 209 } 210 211 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 212 { 213 struct nvme_user_io io; 214 struct nvme_command c; 215 unsigned length, meta_len; 216 void __user *metadata; 217 218 if (copy_from_user(&io, uio, sizeof(io))) 219 return -EFAULT; 220 if (io.flags) 221 return -EINVAL; 222 223 switch (io.opcode) { 224 case nvme_cmd_write: 225 case nvme_cmd_read: 226 case nvme_cmd_compare: 227 break; 228 default: 229 return -EINVAL; 230 } 231 232 length = (io.nblocks + 1) << ns->head->lba_shift; 233 234 if ((io.control & NVME_RW_PRINFO_PRACT) && 235 (ns->head->ms == ns->head->pi_size)) { 236 /* 237 * Protection information is stripped/inserted by the 238 * controller. 239 */ 240 if (nvme_to_user_ptr(io.metadata)) 241 return -EINVAL; 242 meta_len = 0; 243 metadata = NULL; 244 } else { 245 meta_len = (io.nblocks + 1) * ns->head->ms; 246 metadata = nvme_to_user_ptr(io.metadata); 247 } 248 249 if (ns->head->features & NVME_NS_EXT_LBAS) { 250 length += meta_len; 251 meta_len = 0; 252 } else if (meta_len) { 253 if ((io.metadata & 3) || !io.metadata) 254 return -EINVAL; 255 } 256 257 memset(&c, 0, sizeof(c)); 258 c.rw.opcode = io.opcode; 259 c.rw.flags = io.flags; 260 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 261 c.rw.slba = cpu_to_le64(io.slba); 262 c.rw.length = cpu_to_le16(io.nblocks); 263 c.rw.control = cpu_to_le16(io.control); 264 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 265 c.rw.reftag = cpu_to_le32(io.reftag); 266 c.rw.lbat = cpu_to_le16(io.apptag); 267 c.rw.lbatm = cpu_to_le16(io.appmask); 268 269 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 270 meta_len, NULL, 0, 0); 271 } 272 273 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 274 struct nvme_ns *ns, __u32 nsid) 275 { 276 if (ns && nsid != ns->head->ns_id) { 277 dev_err(ctrl->device, 278 "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", 279 current->comm, nsid, ns->head->ns_id); 280 return false; 281 } 282 283 return true; 284 } 285 286 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 287 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 288 bool open_for_write) 289 { 290 struct nvme_passthru_cmd cmd; 291 struct nvme_command c; 292 unsigned timeout = 0; 293 u64 result; 294 int status; 295 296 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 297 return -EFAULT; 298 if (cmd.flags) 299 return -EINVAL; 300 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 301 return -EINVAL; 302 303 memset(&c, 0, sizeof(c)); 304 c.common.opcode = cmd.opcode; 305 c.common.flags = cmd.flags; 306 c.common.nsid = cpu_to_le32(cmd.nsid); 307 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 308 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 309 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 310 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 311 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 312 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 313 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 314 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 315 316 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 317 return -EACCES; 318 319 if (cmd.timeout_ms) 320 timeout = msecs_to_jiffies(cmd.timeout_ms); 321 322 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 323 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 324 cmd.metadata_len, &result, timeout, 0); 325 326 if (status >= 0) { 327 if (put_user(result, &ucmd->result)) 328 return -EFAULT; 329 } 330 331 return status; 332 } 333 334 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 335 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 336 bool open_for_write) 337 { 338 struct nvme_passthru_cmd64 cmd; 339 struct nvme_command c; 340 unsigned timeout = 0; 341 int status; 342 343 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 344 return -EFAULT; 345 if (cmd.flags) 346 return -EINVAL; 347 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 348 return -EINVAL; 349 350 memset(&c, 0, sizeof(c)); 351 c.common.opcode = cmd.opcode; 352 c.common.flags = cmd.flags; 353 c.common.nsid = cpu_to_le32(cmd.nsid); 354 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 355 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 356 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 357 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 358 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 359 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 360 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 361 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 362 363 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 364 return -EACCES; 365 366 if (cmd.timeout_ms) 367 timeout = msecs_to_jiffies(cmd.timeout_ms); 368 369 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 370 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 371 cmd.metadata_len, &cmd.result, timeout, flags); 372 373 if (status >= 0) { 374 if (put_user(cmd.result, &ucmd->result)) 375 return -EFAULT; 376 } 377 378 return status; 379 } 380 381 struct nvme_uring_data { 382 __u64 metadata; 383 __u64 addr; 384 __u32 data_len; 385 __u32 metadata_len; 386 __u32 timeout_ms; 387 }; 388 389 /* 390 * This overlays struct io_uring_cmd pdu. 391 * Expect build errors if this grows larger than that. 392 */ 393 struct nvme_uring_cmd_pdu { 394 struct request *req; 395 struct bio *bio; 396 u64 result; 397 int status; 398 }; 399 400 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 401 struct io_uring_cmd *ioucmd) 402 { 403 return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu); 404 } 405 406 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 407 unsigned issue_flags) 408 { 409 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 410 411 if (pdu->bio) 412 blk_rq_unmap_user(pdu->bio); 413 io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); 414 } 415 416 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 417 blk_status_t err) 418 { 419 struct io_uring_cmd *ioucmd = req->end_io_data; 420 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 421 422 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { 423 pdu->status = -EINTR; 424 } else { 425 pdu->status = nvme_req(req)->status; 426 if (!pdu->status) 427 pdu->status = blk_status_to_errno(err); 428 } 429 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 430 431 /* 432 * IOPOLL could potentially complete this request directly, but 433 * if multiple rings are polling on the same queue, then it's possible 434 * for one ring to find completions for another ring. Punting the 435 * completion via task_work will always direct it to the right 436 * location, rather than potentially complete requests for ringA 437 * under iopoll invocations from ringB. 438 */ 439 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 440 return RQ_END_IO_FREE; 441 } 442 443 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 444 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 445 { 446 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 447 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 448 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 449 struct nvme_uring_data d; 450 struct nvme_command c; 451 struct iov_iter iter; 452 struct iov_iter *map_iter = NULL; 453 struct request *req; 454 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 455 blk_mq_req_flags_t blk_flags = 0; 456 int ret; 457 458 c.common.opcode = READ_ONCE(cmd->opcode); 459 c.common.flags = READ_ONCE(cmd->flags); 460 if (c.common.flags) 461 return -EINVAL; 462 463 c.common.command_id = 0; 464 c.common.nsid = cpu_to_le32(cmd->nsid); 465 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 466 return -EINVAL; 467 468 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 469 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 470 c.common.metadata = 0; 471 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 472 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 473 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 474 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 475 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 476 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 477 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 478 479 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 480 return -EACCES; 481 482 d.metadata = READ_ONCE(cmd->metadata); 483 d.addr = READ_ONCE(cmd->addr); 484 d.data_len = READ_ONCE(cmd->data_len); 485 d.metadata_len = READ_ONCE(cmd->metadata_len); 486 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 487 488 if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 489 int ddir = nvme_is_write(&c) ? WRITE : READ; 490 491 if (vec) 492 ret = io_uring_cmd_import_fixed_vec(ioucmd, 493 u64_to_user_ptr(d.addr), d.data_len, 494 ddir, &iter, issue_flags); 495 else 496 ret = io_uring_cmd_import_fixed(d.addr, d.data_len, 497 ddir, &iter, ioucmd, issue_flags); 498 if (ret < 0) 499 return ret; 500 501 map_iter = &iter; 502 } 503 504 if (issue_flags & IO_URING_F_NONBLOCK) { 505 rq_flags |= REQ_NOWAIT; 506 blk_flags = BLK_MQ_REQ_NOWAIT; 507 } 508 if (issue_flags & IO_URING_F_IOPOLL) 509 rq_flags |= REQ_POLLED; 510 511 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 512 if (IS_ERR(req)) 513 return PTR_ERR(req); 514 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 515 516 if (d.data_len) { 517 ret = nvme_map_user_request(req, d.addr, d.data_len, 518 nvme_to_user_ptr(d.metadata), d.metadata_len, 519 map_iter, vec ? NVME_IOCTL_VEC : 0); 520 if (ret) 521 goto out_free_req; 522 } 523 524 /* to free bio on completion, as req->bio will be null at that time */ 525 pdu->bio = req->bio; 526 pdu->req = req; 527 req->end_io_data = ioucmd; 528 req->end_io = nvme_uring_cmd_end_io; 529 blk_execute_rq_nowait(req, false); 530 return -EIOCBQUEUED; 531 532 out_free_req: 533 blk_mq_free_request(req); 534 return ret; 535 } 536 537 static bool is_ctrl_ioctl(unsigned int cmd) 538 { 539 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 540 return true; 541 if (is_sed_ioctl(cmd)) 542 return true; 543 return false; 544 } 545 546 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 547 void __user *argp, bool open_for_write) 548 { 549 switch (cmd) { 550 case NVME_IOCTL_ADMIN_CMD: 551 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 552 case NVME_IOCTL_ADMIN64_CMD: 553 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 554 default: 555 return sed_ioctl(ctrl->opal_dev, cmd, argp); 556 } 557 } 558 559 #ifdef COMPAT_FOR_U64_ALIGNMENT 560 struct nvme_user_io32 { 561 __u8 opcode; 562 __u8 flags; 563 __u16 control; 564 __u16 nblocks; 565 __u16 rsvd; 566 __u64 metadata; 567 __u64 addr; 568 __u64 slba; 569 __u32 dsmgmt; 570 __u32 reftag; 571 __u16 apptag; 572 __u16 appmask; 573 } __attribute__((__packed__)); 574 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 575 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 576 577 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 578 void __user *argp, unsigned int flags, bool open_for_write) 579 { 580 switch (cmd) { 581 case NVME_IOCTL_ID: 582 force_successful_syscall_return(); 583 return ns->head->ns_id; 584 case NVME_IOCTL_IO_CMD: 585 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 586 /* 587 * struct nvme_user_io can have different padding on some 32-bit ABIs. 588 * Just accept the compat version as all fields that are used are the 589 * same size and at the same offset. 590 */ 591 #ifdef COMPAT_FOR_U64_ALIGNMENT 592 case NVME_IOCTL_SUBMIT_IO32: 593 #endif 594 case NVME_IOCTL_SUBMIT_IO: 595 return nvme_submit_io(ns, argp); 596 case NVME_IOCTL_IO64_CMD_VEC: 597 flags |= NVME_IOCTL_VEC; 598 fallthrough; 599 case NVME_IOCTL_IO64_CMD: 600 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 601 open_for_write); 602 default: 603 return -ENOTTY; 604 } 605 } 606 607 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 608 unsigned int cmd, unsigned long arg) 609 { 610 struct nvme_ns *ns = bdev->bd_disk->private_data; 611 bool open_for_write = mode & BLK_OPEN_WRITE; 612 void __user *argp = (void __user *)arg; 613 unsigned int flags = 0; 614 615 if (bdev_is_partition(bdev)) 616 flags |= NVME_IOCTL_PARTITION; 617 618 if (is_ctrl_ioctl(cmd)) 619 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 620 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 621 } 622 623 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 624 { 625 struct nvme_ns *ns = 626 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 627 bool open_for_write = file->f_mode & FMODE_WRITE; 628 void __user *argp = (void __user *)arg; 629 630 if (is_ctrl_ioctl(cmd)) 631 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 632 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 633 } 634 635 static int nvme_uring_cmd_checks(unsigned int issue_flags) 636 { 637 638 /* NVMe passthrough requires big SQE/CQE support */ 639 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 640 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 641 return -EOPNOTSUPP; 642 return 0; 643 } 644 645 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 646 unsigned int issue_flags) 647 { 648 struct nvme_ctrl *ctrl = ns->ctrl; 649 int ret; 650 651 ret = nvme_uring_cmd_checks(issue_flags); 652 if (ret) 653 return ret; 654 655 switch (ioucmd->cmd_op) { 656 case NVME_URING_CMD_IO: 657 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 658 break; 659 case NVME_URING_CMD_IO_VEC: 660 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 661 break; 662 default: 663 ret = -ENOTTY; 664 } 665 666 return ret; 667 } 668 669 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 670 { 671 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 672 struct nvme_ns, cdev); 673 674 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 675 } 676 677 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 678 struct io_comp_batch *iob, 679 unsigned int poll_flags) 680 { 681 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 682 struct request *req = pdu->req; 683 684 if (req && blk_rq_is_poll(req)) 685 return blk_rq_poll(req, iob, poll_flags); 686 return 0; 687 } 688 #ifdef CONFIG_NVME_MULTIPATH 689 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 690 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 691 bool open_for_write) 692 __releases(&head->srcu) 693 { 694 struct nvme_ctrl *ctrl = ns->ctrl; 695 int ret; 696 697 nvme_get_ctrl(ns->ctrl); 698 srcu_read_unlock(&head->srcu, srcu_idx); 699 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 700 701 nvme_put_ctrl(ctrl); 702 return ret; 703 } 704 705 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 706 unsigned int cmd, unsigned long arg) 707 { 708 struct nvme_ns_head *head = bdev->bd_disk->private_data; 709 bool open_for_write = mode & BLK_OPEN_WRITE; 710 void __user *argp = (void __user *)arg; 711 struct nvme_ns *ns; 712 int srcu_idx, ret = -EWOULDBLOCK; 713 unsigned int flags = 0; 714 715 if (bdev_is_partition(bdev)) 716 flags |= NVME_IOCTL_PARTITION; 717 718 srcu_idx = srcu_read_lock(&head->srcu); 719 ns = nvme_find_path(head); 720 if (!ns) 721 goto out_unlock; 722 723 /* 724 * Handle ioctls that apply to the controller instead of the namespace 725 * separately and drop the ns SRCU reference early. This avoids a 726 * deadlock when deleting namespaces using the passthrough interface. 727 */ 728 if (is_ctrl_ioctl(cmd)) 729 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 730 open_for_write); 731 732 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 733 out_unlock: 734 srcu_read_unlock(&head->srcu, srcu_idx); 735 return ret; 736 } 737 738 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 739 unsigned long arg) 740 { 741 bool open_for_write = file->f_mode & FMODE_WRITE; 742 struct cdev *cdev = file_inode(file)->i_cdev; 743 struct nvme_ns_head *head = 744 container_of(cdev, struct nvme_ns_head, cdev); 745 void __user *argp = (void __user *)arg; 746 struct nvme_ns *ns; 747 int srcu_idx, ret = -EWOULDBLOCK; 748 749 srcu_idx = srcu_read_lock(&head->srcu); 750 ns = nvme_find_path(head); 751 if (!ns) 752 goto out_unlock; 753 754 if (is_ctrl_ioctl(cmd)) 755 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 756 open_for_write); 757 758 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 759 out_unlock: 760 srcu_read_unlock(&head->srcu, srcu_idx); 761 return ret; 762 } 763 764 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 765 unsigned int issue_flags) 766 { 767 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 768 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 769 int srcu_idx = srcu_read_lock(&head->srcu); 770 struct nvme_ns *ns = nvme_find_path(head); 771 int ret = -EINVAL; 772 773 if (ns) 774 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 775 srcu_read_unlock(&head->srcu, srcu_idx); 776 return ret; 777 } 778 #endif /* CONFIG_NVME_MULTIPATH */ 779 780 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 781 { 782 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 783 int ret; 784 785 /* IOPOLL not supported yet */ 786 if (issue_flags & IO_URING_F_IOPOLL) 787 return -EOPNOTSUPP; 788 789 ret = nvme_uring_cmd_checks(issue_flags); 790 if (ret) 791 return ret; 792 793 switch (ioucmd->cmd_op) { 794 case NVME_URING_CMD_ADMIN: 795 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 796 break; 797 case NVME_URING_CMD_ADMIN_VEC: 798 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 799 break; 800 default: 801 ret = -ENOTTY; 802 } 803 804 return ret; 805 } 806 807 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 808 bool open_for_write) 809 { 810 struct nvme_ns *ns; 811 int ret, srcu_idx; 812 813 srcu_idx = srcu_read_lock(&ctrl->srcu); 814 if (list_empty(&ctrl->namespaces)) { 815 ret = -ENOTTY; 816 goto out_unlock; 817 } 818 819 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 820 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 821 dev_warn(ctrl->device, 822 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 823 ret = -EINVAL; 824 goto out_unlock; 825 } 826 827 dev_warn(ctrl->device, 828 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 829 if (!nvme_get_ns(ns)) { 830 ret = -ENXIO; 831 goto out_unlock; 832 } 833 srcu_read_unlock(&ctrl->srcu, srcu_idx); 834 835 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 836 nvme_put_ns(ns); 837 return ret; 838 839 out_unlock: 840 srcu_read_unlock(&ctrl->srcu, srcu_idx); 841 return ret; 842 } 843 844 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 845 unsigned long arg) 846 { 847 bool open_for_write = file->f_mode & FMODE_WRITE; 848 struct nvme_ctrl *ctrl = file->private_data; 849 void __user *argp = (void __user *)arg; 850 851 switch (cmd) { 852 case NVME_IOCTL_ADMIN_CMD: 853 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 854 case NVME_IOCTL_ADMIN64_CMD: 855 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 856 case NVME_IOCTL_IO_CMD: 857 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 858 case NVME_IOCTL_RESET: 859 if (!capable(CAP_SYS_ADMIN)) 860 return -EACCES; 861 dev_warn(ctrl->device, "resetting controller\n"); 862 return nvme_reset_ctrl_sync(ctrl); 863 case NVME_IOCTL_SUBSYS_RESET: 864 if (!capable(CAP_SYS_ADMIN)) 865 return -EACCES; 866 return nvme_reset_subsystem(ctrl); 867 case NVME_IOCTL_RESCAN: 868 if (!capable(CAP_SYS_ADMIN)) 869 return -EACCES; 870 nvme_queue_scan(ctrl); 871 return 0; 872 default: 873 return -ENOTTY; 874 } 875 } 876