1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015 Netflix, Inc. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * Derived from ata_da.c: 28 * Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org> 29 */ 30 31 #include <sys/param.h> 32 33 #ifdef _KERNEL 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/bio.h> 37 #include <sys/sysctl.h> 38 #include <sys/taskqueue.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/conf.h> 42 #include <sys/devicestat.h> 43 #include <sys/eventhandler.h> 44 #include <sys/malloc.h> 45 #include <sys/cons.h> 46 #include <sys/power.h> 47 #include <sys/proc.h> 48 #include <sys/reboot.h> 49 #include <sys/sbuf.h> 50 #include <geom/geom.h> 51 #include <geom/geom_disk.h> 52 #endif /* _KERNEL */ 53 54 #ifndef _KERNEL 55 #include <stdio.h> 56 #include <string.h> 57 #endif /* _KERNEL */ 58 59 #include <cam/cam.h> 60 #include <cam/cam_ccb.h> 61 #include <cam/cam_periph.h> 62 #include <cam/cam_xpt_periph.h> 63 #include <cam/cam_sim.h> 64 #include <cam/cam_iosched.h> 65 66 #include <cam/nvme/nvme_all.h> 67 68 typedef enum { 69 NDA_STATE_NORMAL 70 } nda_state; 71 72 typedef enum { 73 NDA_FLAG_OPEN = 0x0001, 74 NDA_FLAG_DIRTY = 0x0002, 75 NDA_FLAG_SCTX_INIT = 0x0004, 76 NDA_FLAG_RESCAN = 0x0008, 77 } nda_flags; 78 #define NDA_FLAG_STRING \ 79 "\020" \ 80 "\001OPEN" \ 81 "\002DIRTY" \ 82 "\003SCTX_INIT" \ 83 "\004RESCAN" 84 85 typedef enum { 86 NDA_Q_4K = 0x01, 87 NDA_Q_NONE = 0x00, 88 } nda_quirks; 89 90 #define NDA_Q_BIT_STRING \ 91 "\020" \ 92 "\001Bit 0" 93 94 typedef enum { 95 NDA_CCB_BUFFER_IO = 0x01, 96 NDA_CCB_DUMP = 0x02, 97 NDA_CCB_TRIM = 0x03, 98 NDA_CCB_PASS = 0x04, 99 NDA_CCB_TYPE_MASK = 0x0F, 100 } nda_ccb_state; 101 102 /* Offsets into our private area for storing information */ 103 #define ccb_state ccb_h.ppriv_field0 104 #define ccb_bp ccb_h.ppriv_ptr1 /* For NDA_CCB_BUFFER_IO */ 105 #define ccb_trim ccb_h.ppriv_ptr1 /* For NDA_CCB_TRIM */ 106 107 struct nda_softc { 108 struct cam_iosched_softc *cam_iosched; 109 int outstanding_cmds; /* Number of active commands */ 110 int refcount; /* Active xpt_action() calls */ 111 nda_state state; 112 nda_flags flags; 113 nda_quirks quirks; 114 int unmappedio; 115 quad_t deletes; 116 uint32_t nsid; /* Namespace ID for this nda device */ 117 struct disk *disk; 118 struct task sysctl_task; 119 struct sysctl_ctx_list sysctl_ctx; 120 struct sysctl_oid *sysctl_tree; 121 uint64_t trim_count; 122 uint64_t trim_ranges; 123 uint64_t trim_lbas; 124 #ifdef CAM_TEST_FAILURE 125 int force_read_error; 126 int force_write_error; 127 int periodic_read_error; 128 int periodic_read_count; 129 #endif 130 #ifdef CAM_IO_STATS 131 struct sysctl_ctx_list sysctl_stats_ctx; 132 struct sysctl_oid *sysctl_stats_tree; 133 u_int timeouts; 134 u_int errors; 135 u_int invalidations; 136 #endif 137 }; 138 139 struct nda_trim_request { 140 struct nvme_dsm_range dsm[NVME_MAX_DSM_TRIM / sizeof(struct nvme_dsm_range)]; 141 TAILQ_HEAD(, bio) bps; 142 }; 143 _Static_assert(NVME_MAX_DSM_TRIM % sizeof(struct nvme_dsm_range) == 0, 144 "NVME_MAX_DSM_TRIM must be an integral number of ranges"); 145 146 /* Need quirk table */ 147 148 static disk_ioctl_t ndaioctl; 149 static disk_strategy_t ndastrategy; 150 static dumper_t ndadump; 151 static periph_init_t ndainit; 152 static void ndaasync(void *callback_arg, uint32_t code, 153 struct cam_path *path, void *arg); 154 static void ndasysctlinit(void *context, int pending); 155 static int ndaflagssysctl(SYSCTL_HANDLER_ARGS); 156 static periph_ctor_t ndaregister; 157 static periph_dtor_t ndacleanup; 158 static periph_start_t ndastart; 159 static periph_oninv_t ndaoninvalidate; 160 static void ndadone(struct cam_periph *periph, 161 union ccb *done_ccb); 162 static int ndaerror(union ccb *ccb, uint32_t cam_flags, 163 uint32_t sense_flags); 164 static void ndashutdown(void *arg, int howto); 165 static void ndasuspend(void *arg, enum power_stype stype); 166 167 #ifndef NDA_DEFAULT_SEND_ORDERED 168 #define NDA_DEFAULT_SEND_ORDERED 1 169 #endif 170 #ifndef NDA_DEFAULT_TIMEOUT 171 #define NDA_DEFAULT_TIMEOUT 30 /* Timeout in seconds */ 172 #endif 173 #ifndef NDA_DEFAULT_RETRY 174 #define NDA_DEFAULT_RETRY 4 175 #endif 176 #ifndef NDA_MAX_TRIM_ENTRIES 177 #define NDA_MAX_TRIM_ENTRIES (NVME_MAX_DSM_TRIM / sizeof(struct nvme_dsm_range))/* Number of DSM trims to use, max 256 */ 178 #endif 179 180 static SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 181 "CAM Direct Access Disk driver"); 182 183 //static int nda_retry_count = NDA_DEFAULT_RETRY; 184 static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED; 185 static int nda_default_timeout = NDA_DEFAULT_TIMEOUT; 186 static int nda_max_trim_entries = NDA_MAX_TRIM_ENTRIES; 187 static int nda_enable_biospeedup = 1; 188 static int nda_nvd_compat = 1; 189 SYSCTL_INT(_kern_cam_nda, OID_AUTO, max_trim, CTLFLAG_RDTUN, 190 &nda_max_trim_entries, NDA_MAX_TRIM_ENTRIES, 191 "Maximum number of BIO_DELETE to send down as a DSM TRIM."); 192 SYSCTL_INT(_kern_cam_nda, OID_AUTO, enable_biospeedup, CTLFLAG_RDTUN, 193 &nda_enable_biospeedup, 0, "Enable BIO_SPEEDUP processing."); 194 SYSCTL_INT(_kern_cam_nda, OID_AUTO, nvd_compat, CTLFLAG_RDTUN, 195 &nda_nvd_compat, 1, "Enable creation of nvd aliases."); 196 197 /* 198 * All NVMe media is non-rotational, so all nvme device instances 199 * share this to implement the sysctl. 200 */ 201 static int nda_rotating_media = 0; 202 203 static struct periph_driver ndadriver = 204 { 205 ndainit, "nda", 206 TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0 207 }; 208 209 PERIPHDRIVER_DECLARE(nda, ndadriver); 210 211 static MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers"); 212 213 /* 214 * nice wrappers. Maybe these belong in nvme_all.c instead of 215 * here, but this is the only place that uses these. Should 216 * we ever grow another NVME periph, we should move them 217 * all there wholesale. 218 */ 219 220 static void 221 nda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio) 222 { 223 cam_fill_nvmeio(nvmeio, 224 0, /* retries */ 225 ndadone, /* cbfcnp */ 226 CAM_DIR_NONE, /* flags */ 227 NULL, /* data_ptr */ 228 0, /* dxfer_len */ 229 nda_default_timeout * 1000); /* timeout 30s */ 230 nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid); 231 } 232 233 static void 234 nda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, 235 void *payload, uint32_t num_ranges) 236 { 237 cam_fill_nvmeio(nvmeio, 238 0, /* retries */ 239 ndadone, /* cbfcnp */ 240 CAM_DIR_OUT, /* flags */ 241 payload, /* data_ptr */ 242 num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */ 243 nda_default_timeout * 1000); /* timeout 30s */ 244 nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges); 245 } 246 247 static void 248 nda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, 249 void *payload, uint64_t lba, uint32_t len, uint32_t count) 250 { 251 cam_fill_nvmeio(nvmeio, 252 0, /* retries */ 253 ndadone, /* cbfcnp */ 254 CAM_DIR_OUT, /* flags */ 255 payload, /* data_ptr */ 256 len, /* dxfer_len */ 257 nda_default_timeout * 1000); /* timeout 30s */ 258 nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count); 259 } 260 261 static void 262 nda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, 263 struct bio *bp, uint32_t rwcmd) 264 { 265 int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT; 266 void *payload; 267 uint64_t lba; 268 uint32_t count; 269 270 if (bp->bio_flags & BIO_UNMAPPED) { 271 flags |= CAM_DATA_BIO; 272 payload = bp; 273 } else { 274 payload = bp->bio_data; 275 } 276 277 lba = bp->bio_pblkno; 278 count = bp->bio_bcount / softc->disk->d_sectorsize; 279 280 cam_fill_nvmeio(nvmeio, 281 0, /* retries */ 282 ndadone, /* cbfcnp */ 283 flags, /* flags */ 284 payload, /* data_ptr */ 285 bp->bio_bcount, /* dxfer_len */ 286 nda_default_timeout * 1000); /* timeout 30s */ 287 nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count); 288 } 289 290 static void 291 ndasetgeom(struct nda_softc *softc, struct cam_periph *periph) 292 { 293 struct disk *disk = softc->disk; 294 const struct nvme_namespace_data *nsd; 295 const struct nvme_controller_data *cd; 296 uint8_t flbas_fmt, lbads, vwc_present; 297 u_int flags; 298 299 nsd = nvme_get_identify_ns(periph); 300 cd = nvme_get_identify_cntrl(periph); 301 302 /* 303 * Preserve flags we can't infer that were set before. UNMAPPED comes 304 * from the PIM, so won't change after we set it the first 305 * time. Subsequent times, we have to preserve it. 306 */ 307 flags = disk->d_flags & DISKFLAG_UNMAPPED_BIO; /* Need to preserve */ 308 309 flbas_fmt = NVMEV(NVME_NS_DATA_FLBAS_FORMAT, nsd->flbas); 310 lbads = NVMEV(NVME_NS_DATA_LBAF_LBADS, nsd->lbaf[flbas_fmt]); 311 disk->d_sectorsize = 1 << lbads; 312 disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze); 313 disk->d_delmaxsize = disk->d_mediasize; 314 disk->d_flags = DISKFLAG_DIRECT_COMPLETION; 315 if (nvme_ctrlr_has_dataset_mgmt(cd)) 316 disk->d_flags |= DISKFLAG_CANDELETE; 317 vwc_present = NVMEV(NVME_CTRLR_DATA_VWC_PRESENT, cd->vwc); 318 if (vwc_present) 319 disk->d_flags |= DISKFLAG_CANFLUSHCACHE; 320 disk->d_flags |= flags; 321 } 322 323 static void 324 ndaopen_rescan_done(struct cam_periph *periph, union ccb *ccb) 325 { 326 struct nda_softc *softc; 327 328 softc = (struct nda_softc *)periph->softc; 329 330 cam_periph_assert(periph, MA_OWNED); 331 332 softc->flags &= ~NDA_FLAG_RESCAN; 333 xpt_release_ccb(ccb); 334 wakeup(&softc->disk->d_mediasize); 335 } 336 337 338 static int 339 ndaopen(struct disk *dp) 340 { 341 struct cam_periph *periph; 342 struct nda_softc *softc; 343 union ccb *ccb; 344 int error; 345 346 periph = (struct cam_periph *)dp->d_drv1; 347 if (cam_periph_acquire(periph) != 0) { 348 return(ENXIO); 349 } 350 351 cam_periph_lock(periph); 352 if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) { 353 cam_periph_unlock(periph); 354 cam_periph_release(periph); 355 return (error); 356 } 357 358 softc = (struct nda_softc *)periph->softc; 359 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, 360 ("ndaopen\n")); 361 362 /* 363 * Rescan the lun in case the mediasize or sectorsize has changed since 364 * we probed the device. Format and secure erase operations can do this, 365 * but the nvme standard doesn't require a async notification of that 366 * happening. da/ada do this by restarting their probe, but since 367 * nvme_xpt gets the identify information we need, we just rescan here 368 * since it's the easiest way to notice size changes. 369 * 370 * Not acquiring / releasing for the geom probe -- it's inline 371 */ 372 ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); 373 ccb->ccb_h.func_code = XPT_SCAN_LUN; 374 ccb->ccb_h.cbfcnp = ndaopen_rescan_done; 375 ccb->ccb_h.ppriv_ptr0 = periph; 376 ccb->crcn.flags = 0; 377 xpt_action(ccb); 378 379 softc->flags |= NDA_FLAG_RESCAN; 380 error = 0; 381 while ((softc->flags & NDA_FLAG_RESCAN) != 0 && error == 0) 382 error = cam_periph_sleep(periph, &softc->disk->d_mediasize, PRIBIO, 383 "ndareprobe", 0); 384 if (error != 0) 385 xpt_print(periph->path, "Unable to retrieve capacity data\n"); 386 else 387 ndasetgeom(softc, periph); 388 389 softc->flags |= NDA_FLAG_OPEN; 390 391 cam_periph_unhold(periph); 392 cam_periph_unlock(periph); 393 return (0); 394 } 395 396 static int 397 ndaclose(struct disk *dp) 398 { 399 struct cam_periph *periph; 400 struct nda_softc *softc; 401 union ccb *ccb; 402 int error; 403 404 periph = (struct cam_periph *)dp->d_drv1; 405 softc = (struct nda_softc *)periph->softc; 406 cam_periph_lock(periph); 407 408 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, 409 ("ndaclose\n")); 410 411 if ((softc->flags & NDA_FLAG_DIRTY) != 0 && 412 (periph->flags & CAM_PERIPH_INVALID) == 0 && 413 cam_periph_hold(periph, PRIBIO) == 0) { 414 ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); 415 nda_nvme_flush(softc, &ccb->nvmeio); 416 error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, 417 /*sense_flags*/0, softc->disk->d_devstat); 418 419 if (error != 0) 420 xpt_print(periph->path, "Synchronize cache failed\n"); 421 else 422 softc->flags &= ~NDA_FLAG_DIRTY; 423 xpt_release_ccb(ccb); 424 cam_periph_unhold(periph); 425 } 426 427 softc->flags &= ~NDA_FLAG_OPEN; 428 429 while (softc->refcount != 0) 430 cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1); 431 KASSERT(softc->outstanding_cmds == 0, 432 ("nda %d outstanding commands", softc->outstanding_cmds)); 433 cam_periph_unlock(periph); 434 cam_periph_release(periph); 435 return (0); 436 } 437 438 static void 439 ndaschedule(struct cam_periph *periph) 440 { 441 struct nda_softc *softc = (struct nda_softc *)periph->softc; 442 443 if (softc->state != NDA_STATE_NORMAL) 444 return; 445 446 cam_iosched_schedule(softc->cam_iosched, periph); 447 } 448 449 static int 450 ndaioctl(struct disk *dp, u_long cmd, void *data, int fflag, 451 struct thread *td) 452 { 453 struct cam_periph *periph; 454 455 periph = (struct cam_periph *)dp->d_drv1; 456 457 switch (cmd) { 458 case NVME_IO_TEST: 459 case NVME_BIO_TEST: 460 /* 461 * These don't map well to the underlying CCBs, so 462 * they are usupported via CAM. 463 */ 464 return (ENOTTY); 465 case NVME_GET_NSID: 466 { 467 struct nvme_get_nsid *gnsid = (struct nvme_get_nsid *)data; 468 struct ccb_pathinq cpi; 469 470 xpt_path_inq(&cpi, periph->path); 471 strncpy(gnsid->cdev, cpi.xport_specific.nvme.dev_name, 472 sizeof(gnsid->cdev)); 473 gnsid->nsid = cpi.xport_specific.nvme.nsid; 474 return (0); 475 } 476 case NVME_PASSTHROUGH_CMD: 477 { 478 struct nvme_pt_command *pt; 479 union ccb *ccb; 480 struct cam_periph_map_info mapinfo; 481 u_int maxmap = dp->d_maxsize; 482 int error; 483 484 /* 485 * Create a NVME_IO CCB to do the passthrough command. 486 */ 487 pt = (struct nvme_pt_command *)data; 488 ccb = xpt_alloc_ccb(); 489 xpt_setup_ccb(&ccb->ccb_h, periph->path, CAM_PRIORITY_NORMAL); 490 ccb->ccb_state = NDA_CCB_PASS; 491 cam_fill_nvmeio(&ccb->nvmeio, 492 0, /* Retries */ 493 ndadone, 494 (pt->is_read ? CAM_DIR_IN : CAM_DIR_OUT) | CAM_DATA_VADDR, 495 pt->buf, 496 pt->len, 497 nda_default_timeout * 1000); 498 memcpy(&ccb->nvmeio.cmd, &pt->cmd, sizeof(pt->cmd)); 499 500 /* 501 * Wire the user memory in this request for the I/O 502 */ 503 memset(&mapinfo, 0, sizeof(mapinfo)); 504 error = cam_periph_mapmem(ccb, &mapinfo, maxmap); 505 if (error) 506 goto out; 507 508 /* 509 * Lock the periph and run the command. 510 */ 511 cam_periph_lock(periph); 512 cam_periph_runccb(ccb, NULL, CAM_RETRY_SELTO, 513 SF_RETRY_UA | SF_NO_PRINT, NULL); 514 515 /* 516 * Tear down mapping and return status. 517 */ 518 cam_periph_unlock(periph); 519 error = cam_periph_unmapmem(ccb, &mapinfo); 520 if (!cam_ccb_success(ccb)) 521 error = EIO; 522 out: 523 cam_periph_lock(periph); 524 xpt_release_ccb(ccb); 525 cam_periph_unlock(periph); 526 return (error); 527 } 528 default: 529 break; 530 } 531 return (ENOTTY); 532 } 533 534 /* 535 * Actually translate the requested transfer into one the physical driver 536 * can understand. The transfer is described by a buf and will include 537 * only one physical transfer. 538 */ 539 static void 540 ndastrategy(struct bio *bp) 541 { 542 struct cam_periph *periph; 543 struct nda_softc *softc; 544 545 periph = (struct cam_periph *)bp->bio_disk->d_drv1; 546 softc = (struct nda_softc *)periph->softc; 547 548 cam_periph_lock(periph); 549 550 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp)); 551 552 /* 553 * If the device has been made invalid, error out 554 */ 555 if ((periph->flags & CAM_PERIPH_INVALID) != 0) { 556 cam_periph_unlock(periph); 557 biofinish(bp, NULL, ENXIO); 558 return; 559 } 560 561 if (bp->bio_cmd == BIO_DELETE) 562 softc->deletes++; 563 564 /* 565 * Place it in the queue of disk activities for this disk 566 */ 567 cam_iosched_queue_work(softc->cam_iosched, bp); 568 569 /* 570 * Schedule ourselves for performing the work. 571 */ 572 ndaschedule(periph); 573 cam_periph_unlock(periph); 574 575 return; 576 } 577 578 static int 579 ndadump(void *arg, void *virtual, off_t offset, size_t length) 580 { 581 struct cam_periph *periph; 582 struct nda_softc *softc; 583 u_int secsize; 584 struct ccb_nvmeio nvmeio; 585 struct disk *dp; 586 uint64_t lba; 587 uint32_t count; 588 int error = 0; 589 590 dp = arg; 591 periph = dp->d_drv1; 592 softc = (struct nda_softc *)periph->softc; 593 secsize = softc->disk->d_sectorsize; 594 lba = offset / secsize; 595 count = length / secsize; 596 597 if ((periph->flags & CAM_PERIPH_INVALID) != 0) 598 return (ENXIO); 599 600 /* xpt_get_ccb returns a zero'd allocation for the ccb, mimic that here */ 601 memset(&nvmeio, 0, sizeof(nvmeio)); 602 if (length > 0) { 603 xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL); 604 nvmeio.ccb_state = NDA_CCB_DUMP; 605 nda_nvme_write(softc, &nvmeio, virtual, lba, length, count); 606 error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error, 607 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); 608 if (error != 0) 609 printf("Aborting dump due to I/O error %d.\n", error); 610 611 return (error); 612 } 613 614 /* Flush */ 615 xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL); 616 617 nvmeio.ccb_state = NDA_CCB_DUMP; 618 nda_nvme_flush(softc, &nvmeio); 619 error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error, 620 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); 621 if (error != 0) 622 xpt_print(periph->path, "flush cmd failed\n"); 623 return (error); 624 } 625 626 static void 627 ndainit(void) 628 { 629 cam_status status; 630 631 /* 632 * Install a global async callback. This callback will 633 * receive async callbacks like "new device found". 634 */ 635 status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL); 636 637 if (status != CAM_REQ_CMP) { 638 printf("nda: Failed to attach master async callback " 639 "due to status 0x%x!\n", status); 640 } else if (nda_send_ordered) { 641 /* Register our event handlers */ 642 if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend, 643 NULL, EVENTHANDLER_PRI_LAST)) == NULL) 644 printf("ndainit: power event registration failed!\n"); 645 if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown, 646 NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) 647 printf("ndainit: shutdown event registration failed!\n"); 648 } 649 } 650 651 /* 652 * Callback from GEOM, called when it has finished cleaning up its 653 * resources. 654 */ 655 static void 656 ndadiskgonecb(struct disk *dp) 657 { 658 struct cam_periph *periph; 659 660 periph = (struct cam_periph *)dp->d_drv1; 661 662 cam_periph_release(periph); 663 } 664 665 static void 666 ndaoninvalidate(struct cam_periph *periph) 667 { 668 struct nda_softc *softc; 669 670 softc = (struct nda_softc *)periph->softc; 671 672 /* 673 * De-register any async callbacks. 674 */ 675 xpt_register_async(0, ndaasync, periph, periph->path); 676 #ifdef CAM_IO_STATS 677 softc->invalidations++; 678 #endif 679 680 /* 681 * Return all queued I/O with ENXIO. Transactions may be queued up here 682 * for retry (since we are called while there's other transactions 683 * pending). Any requests in the hardware will drain before ndacleanup 684 * is called. 685 */ 686 cam_iosched_flush(softc->cam_iosched, NULL, ENXIO); 687 688 /* 689 * Tell GEOM that we've gone away, we'll get a callback when it is 690 * done cleaning up its resources. 691 */ 692 disk_gone(softc->disk); 693 } 694 695 static void 696 ndacleanup(struct cam_periph *periph) 697 { 698 struct nda_softc *softc; 699 700 softc = (struct nda_softc *)periph->softc; 701 702 cam_periph_unlock(periph); 703 704 cam_iosched_fini(softc->cam_iosched); 705 706 /* 707 * If we can't free the sysctl tree, oh well... 708 */ 709 if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) { 710 #ifdef CAM_IO_STATS 711 if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0) 712 xpt_print(periph->path, 713 "can't remove sysctl stats context\n"); 714 #endif 715 if (sysctl_ctx_free(&softc->sysctl_ctx) != 0) 716 xpt_print(periph->path, 717 "can't remove sysctl context\n"); 718 } 719 720 disk_destroy(softc->disk); 721 free(softc, M_DEVBUF); 722 cam_periph_lock(periph); 723 } 724 725 static void 726 ndaasync(void *callback_arg, uint32_t code, struct cam_path *path, void *arg) 727 { 728 struct cam_periph *periph = callback_arg; 729 struct nda_softc *softc; 730 731 switch (code) { 732 case AC_FOUND_DEVICE: 733 { 734 struct ccb_getdev *cgd; 735 cam_status status; 736 737 cgd = (struct ccb_getdev *)arg; 738 if (cgd == NULL) 739 break; 740 741 if (cgd->protocol != PROTO_NVME) 742 break; 743 744 /* 745 * Allocate a peripheral instance for 746 * this device and start the probe 747 * process. 748 */ 749 status = cam_periph_alloc(ndaregister, ndaoninvalidate, 750 ndacleanup, ndastart, 751 "nda", CAM_PERIPH_BIO, 752 path, ndaasync, 753 AC_FOUND_DEVICE, cgd); 754 755 if (status != CAM_REQ_CMP 756 && status != CAM_REQ_INPROG) 757 printf("ndaasync: Unable to attach to new device " 758 "due to status 0x%x\n", status); 759 break; 760 } 761 case AC_GETDEV_CHANGED: 762 { 763 off_t mediasize; 764 u_int sectorsize; 765 766 softc = periph->softc; 767 mediasize = softc->disk->d_mediasize; 768 sectorsize = softc->disk->d_sectorsize; 769 ndasetgeom(softc, periph); 770 /* 771 * If the sectorsize changed, then it's new media. Otherwise if 772 * the media size changed, resize the existing disk. Otherwise 773 * do nothing. 774 */ 775 if (sectorsize != softc->disk->d_sectorsize) 776 disk_media_changed(softc->disk, M_WAITOK); 777 else if (mediasize != softc->disk->d_mediasize) 778 disk_resize(softc->disk, M_WAITOK); 779 break; 780 } 781 case AC_ADVINFO_CHANGED: 782 { 783 uintptr_t buftype; 784 785 /* 786 * Note: In theory, we could send CDAI_TYPE_NVME_* events here, 787 * but instead the rescan code only sends more specific 788 * AC_GETDEV_CHANGED. There's no way to generically get 789 * notifications of changes to these structures from the drive 790 * (though we could notice with memcmp). The automation in NVME 791 * is at a much more granular level, so we leverage that. 792 */ 793 softc = periph->softc; 794 buftype = (uintptr_t)arg; 795 if (buftype == CDAI_TYPE_PHYS_PATH) { 796 disk_attr_changed(softc->disk, "GEOM::physpath", 797 M_WAITOK); 798 } 799 break; 800 } 801 case AC_LOST_DEVICE: 802 default: 803 break; 804 } 805 cam_periph_async(periph, code, path, arg); 806 } 807 808 static void 809 ndasysctlinit(void *context, int pending) 810 { 811 struct cam_periph *periph; 812 struct nda_softc *softc; 813 char tmpstr[32], tmpstr2[16]; 814 815 periph = (struct cam_periph *)context; 816 817 /* periph was held for us when this task was enqueued */ 818 if ((periph->flags & CAM_PERIPH_INVALID) != 0) { 819 cam_periph_release(periph); 820 return; 821 } 822 823 softc = (struct nda_softc *)periph->softc; 824 snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number); 825 snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number); 826 827 sysctl_ctx_init(&softc->sysctl_ctx); 828 softc->flags |= NDA_FLAG_SCTX_INIT; 829 softc->sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&softc->sysctl_ctx, 830 SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2, 831 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, tmpstr, "device_index"); 832 if (softc->sysctl_tree == NULL) { 833 printf("ndasysctlinit: unable to allocate sysctl tree\n"); 834 cam_periph_release(periph); 835 return; 836 } 837 838 SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), 839 OID_AUTO, "unmapped_io", CTLFLAG_RD, 840 &softc->unmappedio, 0, "Unmapped I/O leaf"); 841 842 SYSCTL_ADD_QUAD(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), 843 OID_AUTO, "deletes", CTLFLAG_RD, 844 &softc->deletes, "Number of BIO_DELETE requests"); 845 846 SYSCTL_ADD_UQUAD(&softc->sysctl_ctx, 847 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, 848 "trim_count", CTLFLAG_RD, &softc->trim_count, 849 "Total number of unmap/dsm commands sent"); 850 SYSCTL_ADD_UQUAD(&softc->sysctl_ctx, 851 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, 852 "trim_ranges", CTLFLAG_RD, &softc->trim_ranges, 853 "Total number of ranges in unmap/dsm commands"); 854 SYSCTL_ADD_UQUAD(&softc->sysctl_ctx, 855 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, 856 "trim_lbas", CTLFLAG_RD, &softc->trim_lbas, 857 "Total lbas in the unmap/dsm commands sent"); 858 859 SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), 860 OID_AUTO, "rotating", CTLFLAG_RD, &nda_rotating_media, 1, 861 "Rotating media"); 862 863 SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), 864 OID_AUTO, "flags", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 865 softc, 0, ndaflagssysctl, "A", 866 "Flags for drive"); 867 868 #ifdef CAM_IO_STATS 869 softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx, 870 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats", 871 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Statistics"); 872 if (softc->sysctl_stats_tree == NULL) { 873 printf("ndasysctlinit: unable to allocate sysctl tree for stats\n"); 874 cam_periph_release(periph); 875 return; 876 } 877 SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, 878 SYSCTL_CHILDREN(softc->sysctl_stats_tree), 879 OID_AUTO, "timeouts", CTLFLAG_RD, 880 &softc->timeouts, 0, 881 "Device timeouts reported by the SIM"); 882 SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, 883 SYSCTL_CHILDREN(softc->sysctl_stats_tree), 884 OID_AUTO, "errors", CTLFLAG_RD, 885 &softc->errors, 0, 886 "Transport errors reported by the SIM."); 887 SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, 888 SYSCTL_CHILDREN(softc->sysctl_stats_tree), 889 OID_AUTO, "pack_invalidations", CTLFLAG_RD, 890 &softc->invalidations, 0, 891 "Device pack invalidations."); 892 #endif 893 894 #ifdef CAM_TEST_FAILURE 895 SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), 896 OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, 897 periph, 0, cam_periph_invalidate_sysctl, "I", 898 "Write 1 to invalidate the drive immediately"); 899 #endif 900 901 cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx, 902 softc->sysctl_tree); 903 904 cam_periph_release(periph); 905 } 906 907 static int 908 ndaflagssysctl(SYSCTL_HANDLER_ARGS) 909 { 910 struct sbuf sbuf; 911 struct nda_softc *softc = arg1; 912 int error; 913 914 sbuf_new_for_sysctl(&sbuf, NULL, 0, req); 915 if (softc->flags != 0) 916 sbuf_printf(&sbuf, "0x%b", (unsigned)softc->flags, NDA_FLAG_STRING); 917 else 918 sbuf_putc(&sbuf, '0'); 919 error = sbuf_finish(&sbuf); 920 sbuf_delete(&sbuf); 921 922 return (error); 923 } 924 925 static int 926 ndagetattr(struct bio *bp) 927 { 928 int ret; 929 struct cam_periph *periph; 930 931 if (g_handleattr_int(bp, "GEOM::canspeedup", nda_enable_biospeedup)) 932 return (EJUSTRETURN); 933 934 periph = (struct cam_periph *)bp->bio_disk->d_drv1; 935 cam_periph_lock(periph); 936 ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute, 937 periph->path); 938 cam_periph_unlock(periph); 939 if (ret == 0) 940 bp->bio_completed = bp->bio_length; 941 return ret; 942 } 943 944 static cam_status 945 ndaregister(struct cam_periph *periph, void *arg) 946 { 947 struct nda_softc *softc; 948 struct disk *disk; 949 struct ccb_pathinq cpi; 950 const struct nvme_namespace_data *nsd; 951 const struct nvme_controller_data *cd; 952 char announce_buf[80]; 953 u_int maxio; 954 int quirks; 955 956 nsd = nvme_get_identify_ns(periph); 957 cd = nvme_get_identify_cntrl(periph); 958 959 softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF, 960 M_NOWAIT | M_ZERO); 961 962 if (softc == NULL) { 963 printf("ndaregister: Unable to probe new device. " 964 "Unable to allocate softc\n"); 965 return(CAM_REQ_CMP_ERR); 966 } 967 968 /* ident_data parsing */ 969 970 periph->softc = softc; 971 softc->quirks = NDA_Q_NONE; 972 xpt_path_inq(&cpi, periph->path); 973 TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph); 974 975 /* 976 * The name space ID is the lun, save it for later I/O 977 */ 978 softc->nsid = (uint32_t)xpt_path_lun_id(periph->path); 979 980 /* 981 * Register this media as a disk 982 */ 983 (void)cam_periph_acquire(periph); 984 cam_periph_unlock(periph); 985 snprintf(announce_buf, sizeof(announce_buf), 986 "kern.cam.nda.%d.quirks", periph->unit_number); 987 quirks = softc->quirks; 988 TUNABLE_INT_FETCH(announce_buf, &quirks); 989 softc->quirks = quirks; 990 softc->disk = disk = disk_alloc(); 991 disk->d_rotation_rate = DISK_RR_NON_ROTATING; 992 disk->d_open = ndaopen; 993 disk->d_close = ndaclose; 994 disk->d_strategy = ndastrategy; 995 disk->d_ioctl = ndaioctl; 996 disk->d_getattr = ndagetattr; 997 if (cam_sim_pollable(periph->sim)) 998 disk->d_dump = ndadump; 999 disk->d_gone = ndadiskgonecb; 1000 disk->d_name = "nda"; 1001 disk->d_drv1 = periph; 1002 disk->d_unit = periph->unit_number; 1003 maxio = cpi.maxio; /* Honor max I/O size of SIM */ 1004 if (maxio == 0) 1005 maxio = DFLTPHYS; /* traditional default */ 1006 else if (maxio > maxphys) 1007 maxio = maxphys; /* for safety */ 1008 disk->d_maxsize = maxio; 1009 ndasetgeom(softc, periph); 1010 if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { 1011 disk->d_flags |= DISKFLAG_UNMAPPED_BIO; 1012 softc->unmappedio = 1; 1013 } 1014 1015 /* 1016 * d_ident and d_descr are both far bigger than the length of either 1017 * the serial or model number strings. 1018 */ 1019 cam_strvis_flag(disk->d_descr, cd->mn, NVME_MODEL_NUMBER_LENGTH, 1020 sizeof(disk->d_descr), CAM_STRVIS_FLAG_NONASCII_SPC); 1021 1022 cam_strvis_flag(disk->d_ident, cd->sn, NVME_SERIAL_NUMBER_LENGTH, 1023 sizeof(disk->d_ident), CAM_STRVIS_FLAG_NONASCII_SPC); 1024 1025 disk->d_hba_vendor = cpi.hba_vendor; 1026 disk->d_hba_device = cpi.hba_device; 1027 disk->d_hba_subvendor = cpi.hba_subvendor; 1028 disk->d_hba_subdevice = cpi.hba_subdevice; 1029 snprintf(disk->d_attachment, sizeof(disk->d_attachment), 1030 "%s%d", cpi.dev_name, cpi.unit_number); 1031 if (NVMEV(NVME_NS_DATA_NSFEAT_NPVALID, nsd->nsfeat) != 0 && 1032 nsd->npwg != 0) 1033 disk->d_stripesize = ((nsd->npwg + 1) * disk->d_sectorsize); 1034 else 1035 disk->d_stripesize = nsd->noiob * disk->d_sectorsize; 1036 disk->d_stripeoffset = 0; 1037 disk->d_devstat = devstat_new_entry(periph->periph_name, 1038 periph->unit_number, disk->d_sectorsize, 1039 DEVSTAT_ALL_SUPPORTED, 1040 DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport), 1041 DEVSTAT_PRIORITY_DISK); 1042 1043 if (cam_iosched_init(&softc->cam_iosched, periph, disk, 1044 ndaschedule) != 0) { 1045 printf("ndaregister: Unable to probe new device. " 1046 "Unable to allocate iosched memory\n"); 1047 free(softc, M_DEVBUF); 1048 return(CAM_REQ_CMP_ERR); 1049 } 1050 cam_iosched_set_sort_queue(softc->cam_iosched, 0); 1051 1052 /* 1053 * Add alias for older nvd drives to ease transition. 1054 */ 1055 if (nda_nvd_compat) 1056 disk_add_alias(disk, "nvd"); 1057 1058 cam_periph_lock(periph); 1059 1060 snprintf(announce_buf, sizeof(announce_buf), 1061 "%juMB (%ju %u byte sectors)", 1062 (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)), 1063 (uintmax_t)disk->d_mediasize / disk->d_sectorsize, 1064 disk->d_sectorsize); 1065 xpt_announce_periph(periph, announce_buf); 1066 xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING); 1067 1068 /* 1069 * Create our sysctl variables, now that we know 1070 * we have successfully attached. 1071 */ 1072 if (cam_periph_acquire(periph) == 0) 1073 taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task); 1074 1075 /* 1076 * Register for device going away and info about the drive 1077 * changing (though with NVMe, it can't) 1078 */ 1079 xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED | AC_GETDEV_CHANGED, 1080 ndaasync, periph, periph->path); 1081 1082 softc->state = NDA_STATE_NORMAL; 1083 1084 /* 1085 * We'll release this reference once GEOM calls us back via 1086 * ndadiskgonecb(), telling us that our provider has been freed. 1087 */ 1088 if (cam_periph_acquire(periph) == 0) 1089 disk_create(softc->disk, DISK_VERSION); 1090 1091 cam_periph_release_locked(periph); 1092 return(CAM_REQ_CMP); 1093 } 1094 1095 static void 1096 ndastart(struct cam_periph *periph, union ccb *start_ccb) 1097 { 1098 struct nda_softc *softc = (struct nda_softc *)periph->softc; 1099 struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio; 1100 1101 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n")); 1102 1103 switch (softc->state) { 1104 case NDA_STATE_NORMAL: 1105 { 1106 struct bio *bp; 1107 1108 bp = cam_iosched_next_bio(softc->cam_iosched); 1109 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp)); 1110 if (bp == NULL) { 1111 xpt_release_ccb(start_ccb); 1112 break; 1113 } 1114 1115 switch (bp->bio_cmd) { 1116 case BIO_WRITE: 1117 softc->flags |= NDA_FLAG_DIRTY; 1118 /* FALLTHROUGH */ 1119 case BIO_READ: 1120 { 1121 #ifdef CAM_TEST_FAILURE 1122 int fail = 0; 1123 1124 /* 1125 * Support the failure ioctls. If the command is a 1126 * read, and there are pending forced read errors, or 1127 * if a write and pending write errors, then fail this 1128 * operation with EIO. This is useful for testing 1129 * purposes. Also, support having every Nth read fail. 1130 * 1131 * This is a rather blunt tool. 1132 */ 1133 if (bp->bio_cmd == BIO_READ) { 1134 if (softc->force_read_error) { 1135 softc->force_read_error--; 1136 fail = 1; 1137 } 1138 if (softc->periodic_read_error > 0) { 1139 if (++softc->periodic_read_count >= 1140 softc->periodic_read_error) { 1141 softc->periodic_read_count = 0; 1142 fail = 1; 1143 } 1144 } 1145 } else { 1146 if (softc->force_write_error) { 1147 softc->force_write_error--; 1148 fail = 1; 1149 } 1150 } 1151 if (fail) { 1152 biofinish(bp, NULL, EIO); 1153 xpt_release_ccb(start_ccb); 1154 ndaschedule(periph); 1155 return; 1156 } 1157 #endif 1158 KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || 1159 round_page(bp->bio_bcount + bp->bio_ma_offset) / 1160 PAGE_SIZE == bp->bio_ma_n, 1161 ("Short bio %p", bp)); 1162 nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ? 1163 NVME_OPC_READ : NVME_OPC_WRITE); 1164 break; 1165 } 1166 case BIO_DELETE: 1167 { 1168 struct nvme_dsm_range *dsm_range, *dsm_end; 1169 struct nda_trim_request *trim; 1170 struct bio *bp1; 1171 int ents; 1172 uint32_t totalcount = 0, ranges = 0; 1173 1174 trim = malloc(sizeof(*trim), M_NVMEDA, M_ZERO | M_NOWAIT); 1175 if (trim == NULL) { 1176 /* 1177 * We have to drop the periph lock when 1178 * returning ENOMEM. g_io_deliver treats these 1179 * request differently and will recursively call 1180 * the start routine which causes us to get into 1181 * ndastrategy with the periph lock held, 1182 * leading to a panic when its acquired again. 1183 */ 1184 cam_periph_unlock(periph); 1185 biofinish(bp, NULL, ENOMEM); 1186 cam_periph_lock(periph); 1187 xpt_release_ccb(start_ccb); 1188 ndaschedule(periph); 1189 return; 1190 } 1191 TAILQ_INIT(&trim->bps); 1192 bp1 = bp; 1193 ents = min(nitems(trim->dsm), nda_max_trim_entries); 1194 ents = max(ents, 1); 1195 dsm_range = trim->dsm; 1196 dsm_end = dsm_range + ents; 1197 do { 1198 TAILQ_INSERT_TAIL(&trim->bps, bp1, bio_queue); 1199 dsm_range->length = 1200 htole32(bp1->bio_bcount / softc->disk->d_sectorsize); 1201 dsm_range->starting_lba = 1202 htole64(bp1->bio_offset / softc->disk->d_sectorsize); 1203 ranges++; 1204 totalcount += dsm_range->length; 1205 dsm_range++; 1206 if (dsm_range >= dsm_end) 1207 break; 1208 bp1 = cam_iosched_next_trim(softc->cam_iosched); 1209 /* XXX -- Could collapse adjacent ranges, but we don't for now */ 1210 /* XXX -- Could limit based on total payload size */ 1211 } while (bp1 != NULL); 1212 start_ccb->ccb_trim = trim; 1213 nda_nvme_trim(softc, &start_ccb->nvmeio, trim->dsm, 1214 dsm_range - trim->dsm); 1215 start_ccb->ccb_state = NDA_CCB_TRIM; 1216 softc->trim_count++; 1217 softc->trim_ranges += ranges; 1218 softc->trim_lbas += totalcount; 1219 /* 1220 * Note: We can have multiple TRIMs in flight, so we don't call 1221 * cam_iosched_submit_trim(softc->cam_iosched); 1222 * since that forces the I/O scheduler to only schedule one at a time. 1223 * On NVMe drives, this is a performance disaster. 1224 */ 1225 goto out; 1226 } 1227 case BIO_FLUSH: 1228 nda_nvme_flush(softc, nvmeio); 1229 break; 1230 default: 1231 biofinish(bp, NULL, EOPNOTSUPP); 1232 xpt_release_ccb(start_ccb); 1233 ndaschedule(periph); 1234 return; 1235 } 1236 start_ccb->ccb_state = NDA_CCB_BUFFER_IO; 1237 start_ccb->ccb_bp = bp; 1238 out: 1239 start_ccb->ccb_h.flags |= CAM_UNLOCKED; 1240 softc->outstanding_cmds++; 1241 softc->refcount++; /* For submission only */ 1242 cam_periph_unlock(periph); 1243 xpt_action(start_ccb); 1244 cam_periph_lock(periph); 1245 softc->refcount--; /* Submission done */ 1246 1247 /* May have more work to do, so ensure we stay scheduled */ 1248 ndaschedule(periph); 1249 break; 1250 } 1251 } 1252 } 1253 1254 static void 1255 ndadone(struct cam_periph *periph, union ccb *done_ccb) 1256 { 1257 struct nda_softc *softc; 1258 struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio; 1259 struct cam_path *path; 1260 int state; 1261 1262 softc = (struct nda_softc *)periph->softc; 1263 path = done_ccb->ccb_h.path; 1264 1265 CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n")); 1266 1267 state = nvmeio->ccb_state & NDA_CCB_TYPE_MASK; 1268 switch (state) { 1269 case NDA_CCB_BUFFER_IO: 1270 case NDA_CCB_TRIM: 1271 { 1272 int error; 1273 1274 cam_periph_lock(periph); 1275 if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { 1276 error = ndaerror(done_ccb, 0, 0); 1277 if (error == ERESTART) { 1278 /* A retry was scheduled, so just return. */ 1279 cam_periph_unlock(periph); 1280 return; 1281 } 1282 if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) 1283 cam_release_devq(path, 1284 /*relsim_flags*/0, 1285 /*reduction*/0, 1286 /*timeout*/0, 1287 /*getcount_only*/0); 1288 } else { 1289 if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) 1290 panic("REQ_CMP with QFRZN"); 1291 error = 0; 1292 } 1293 if (state == NDA_CCB_BUFFER_IO) { 1294 struct bio *bp; 1295 1296 bp = (struct bio *)done_ccb->ccb_bp; 1297 bp->bio_error = error; 1298 if (error != 0) { 1299 bp->bio_resid = bp->bio_bcount; 1300 bp->bio_flags |= BIO_ERROR; 1301 } else { 1302 bp->bio_resid = 0; 1303 } 1304 softc->outstanding_cmds--; 1305 1306 /* 1307 * We need to call cam_iosched before we call biodone so that we 1308 * don't measure any activity that happens in the completion 1309 * routine, which in the case of sendfile can be quite 1310 * extensive. 1311 */ 1312 cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb); 1313 xpt_release_ccb(done_ccb); 1314 ndaschedule(periph); 1315 cam_periph_unlock(periph); 1316 biodone(bp); 1317 } else { /* state == NDA_CCB_TRIM */ 1318 struct nda_trim_request *trim; 1319 struct bio *bp1, *bp2; 1320 TAILQ_HEAD(, bio) queue; 1321 1322 trim = nvmeio->ccb_trim; 1323 TAILQ_INIT(&queue); 1324 TAILQ_CONCAT(&queue, &trim->bps, bio_queue); 1325 free(trim, M_NVMEDA); 1326 1327 /* 1328 * Since we can have multiple trims in flight, we don't 1329 * need to call this here. 1330 * cam_iosched_trim_done(softc->cam_iosched); 1331 */ 1332 /* 1333 * The the I/O scheduler that we're finishing the I/O 1334 * so we can keep book. The first one we pass in the CCB 1335 * which has the timing information. The rest we pass in NULL 1336 * so we can keep proper counts. 1337 */ 1338 bp1 = TAILQ_FIRST(&queue); 1339 cam_iosched_bio_complete(softc->cam_iosched, bp1, done_ccb); 1340 xpt_release_ccb(done_ccb); 1341 softc->outstanding_cmds--; 1342 ndaschedule(periph); 1343 cam_periph_unlock(periph); 1344 while ((bp2 = TAILQ_FIRST(&queue)) != NULL) { 1345 TAILQ_REMOVE(&queue, bp2, bio_queue); 1346 bp2->bio_error = error; 1347 if (error != 0) { 1348 bp2->bio_flags |= BIO_ERROR; 1349 bp2->bio_resid = bp1->bio_bcount; 1350 } else 1351 bp2->bio_resid = 0; 1352 if (bp1 != bp2) 1353 cam_iosched_bio_complete(softc->cam_iosched, bp2, NULL); 1354 biodone(bp2); 1355 } 1356 } 1357 return; 1358 } 1359 case NDA_CCB_DUMP: 1360 /* No-op. We're polling */ 1361 return; 1362 case NDA_CCB_PASS: 1363 /* NVME_PASSTHROUGH_CMD runs this CCB and releases it */ 1364 return; 1365 default: 1366 break; 1367 } 1368 xpt_release_ccb(done_ccb); 1369 } 1370 1371 static int 1372 ndaerror(union ccb *ccb, uint32_t cam_flags, uint32_t sense_flags) 1373 { 1374 #ifdef CAM_IO_STATS 1375 struct nda_softc *softc; 1376 struct cam_periph *periph; 1377 1378 periph = xpt_path_periph(ccb->ccb_h.path); 1379 softc = (struct nda_softc *)periph->softc; 1380 #endif 1381 1382 switch (ccb->ccb_h.status & CAM_STATUS_MASK) { 1383 case CAM_CMD_TIMEOUT: 1384 #ifdef CAM_IO_STATS 1385 softc->timeouts++; 1386 #endif 1387 break; 1388 case CAM_REQ_CMP_ERR: 1389 case CAM_NVME_STATUS_ERROR: 1390 #ifdef CAM_IO_STATS 1391 softc->errors++; 1392 #endif 1393 break; 1394 default: 1395 break; 1396 } 1397 1398 return(cam_periph_error(ccb, cam_flags, sense_flags)); 1399 } 1400 1401 /* 1402 * Step through all NDA peripheral drivers, and if the device is still open, 1403 * sync the disk cache to physical media. 1404 */ 1405 static void 1406 ndaflush(void) 1407 { 1408 struct cam_periph *periph; 1409 struct nda_softc *softc; 1410 union ccb *ccb; 1411 int error; 1412 1413 CAM_PERIPH_FOREACH(periph, &ndadriver) { 1414 softc = (struct nda_softc *)periph->softc; 1415 1416 if (SCHEDULER_STOPPED()) { 1417 /* 1418 * If we panicked with the lock held or the periph is not 1419 * open, do not recurse. Otherwise, call ndadump since 1420 * that avoids the sleeping cam_periph_getccb does if no 1421 * CCBs are available. 1422 */ 1423 if (!cam_periph_owned(periph) && 1424 (softc->flags & NDA_FLAG_OPEN)) { 1425 ndadump(softc->disk, NULL, 0, 0); 1426 } 1427 continue; 1428 } 1429 1430 /* 1431 * We only sync the cache if the drive is still open 1432 */ 1433 cam_periph_lock(periph); 1434 if ((softc->flags & NDA_FLAG_OPEN) == 0) { 1435 cam_periph_unlock(periph); 1436 continue; 1437 } 1438 1439 ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); 1440 nda_nvme_flush(softc, &ccb->nvmeio); 1441 error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, 1442 /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY, 1443 softc->disk->d_devstat); 1444 if (error != 0) 1445 xpt_print(periph->path, "Synchronize cache failed\n"); 1446 xpt_release_ccb(ccb); 1447 cam_periph_unlock(periph); 1448 } 1449 } 1450 1451 static void 1452 ndashutdown(void *arg, int howto) 1453 { 1454 1455 if ((howto & RB_NOSYNC) != 0) 1456 return; 1457 1458 ndaflush(); 1459 } 1460 1461 static void 1462 ndasuspend(void *arg, enum power_stype stype) 1463 { 1464 1465 ndaflush(); 1466 } 1467