1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2011 IBM Corp. 5 * Copyright (c) 2012 Red Hat, Inc. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "block/block.h" 29 #include "block/blockjob_int.h" 30 #include "block/block_int.h" 31 #include "sysemu/block-backend.h" 32 #include "qapi/qmp/qerror.h" 33 #include "qapi/qmp/qjson.h" 34 #include "qemu/coroutine.h" 35 #include "qemu/id.h" 36 #include "qmp-commands.h" 37 #include "qemu/timer.h" 38 #include "qapi-event.h" 39 40 static void block_job_event_cancelled(BlockJob *job); 41 static void block_job_event_completed(BlockJob *job, const char *msg); 42 43 /* Transactional group of block jobs */ 44 struct BlockJobTxn { 45 46 /* Is this txn being cancelled? */ 47 bool aborting; 48 49 /* List of jobs */ 50 QLIST_HEAD(, BlockJob) jobs; 51 52 /* Reference count */ 53 int refcnt; 54 }; 55 56 static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); 57 58 static char *child_job_get_parent_desc(BdrvChild *c) 59 { 60 BlockJob *job = c->opaque; 61 return g_strdup_printf("%s job '%s'", 62 BlockJobType_lookup[job->driver->job_type], 63 job->id); 64 } 65 66 static const BdrvChildRole child_job = { 67 .get_parent_desc = child_job_get_parent_desc, 68 .stay_at_node = true, 69 }; 70 71 BlockJob *block_job_next(BlockJob *job) 72 { 73 if (!job) { 74 return QLIST_FIRST(&block_jobs); 75 } 76 return QLIST_NEXT(job, job_list); 77 } 78 79 BlockJob *block_job_get(const char *id) 80 { 81 BlockJob *job; 82 83 QLIST_FOREACH(job, &block_jobs, job_list) { 84 if (job->id && !strcmp(id, job->id)) { 85 return job; 86 } 87 } 88 89 return NULL; 90 } 91 92 static void block_job_attached_aio_context(AioContext *new_context, 93 void *opaque) 94 { 95 BlockJob *job = opaque; 96 97 if (job->driver->attached_aio_context) { 98 job->driver->attached_aio_context(job, new_context); 99 } 100 101 block_job_resume(job); 102 } 103 104 static void block_job_drain(BlockJob *job) 105 { 106 /* If job is !job->busy this kicks it into the next pause point. */ 107 block_job_enter(job); 108 109 blk_drain(job->blk); 110 if (job->driver->drain) { 111 job->driver->drain(job); 112 } 113 } 114 115 static void block_job_detach_aio_context(void *opaque) 116 { 117 BlockJob *job = opaque; 118 119 /* In case the job terminates during aio_poll()... */ 120 block_job_ref(job); 121 122 block_job_pause(job); 123 124 while (!job->paused && !job->completed) { 125 block_job_drain(job); 126 } 127 128 block_job_unref(job); 129 } 130 131 void block_job_remove_all_bdrv(BlockJob *job) 132 { 133 GSList *l; 134 for (l = job->nodes; l; l = l->next) { 135 BdrvChild *c = l->data; 136 bdrv_op_unblock_all(c->bs, job->blocker); 137 bdrv_root_unref_child(c); 138 } 139 g_slist_free(job->nodes); 140 job->nodes = NULL; 141 } 142 143 int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, 144 uint64_t perm, uint64_t shared_perm, Error **errp) 145 { 146 BdrvChild *c; 147 148 c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm, 149 job, errp); 150 if (c == NULL) { 151 return -EPERM; 152 } 153 154 job->nodes = g_slist_prepend(job->nodes, c); 155 bdrv_ref(bs); 156 bdrv_op_block_all(bs, job->blocker); 157 158 return 0; 159 } 160 161 void *block_job_create(const char *job_id, const BlockJobDriver *driver, 162 BlockDriverState *bs, uint64_t perm, 163 uint64_t shared_perm, int64_t speed, int flags, 164 BlockCompletionFunc *cb, void *opaque, Error **errp) 165 { 166 BlockBackend *blk; 167 BlockJob *job; 168 int ret; 169 170 if (bs->job) { 171 error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); 172 return NULL; 173 } 174 175 if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) { 176 job_id = bdrv_get_device_name(bs); 177 if (!*job_id) { 178 error_setg(errp, "An explicit job ID is required for this node"); 179 return NULL; 180 } 181 } 182 183 if (job_id) { 184 if (flags & BLOCK_JOB_INTERNAL) { 185 error_setg(errp, "Cannot specify job ID for internal block job"); 186 return NULL; 187 } 188 189 if (!id_wellformed(job_id)) { 190 error_setg(errp, "Invalid job ID '%s'", job_id); 191 return NULL; 192 } 193 194 if (block_job_get(job_id)) { 195 error_setg(errp, "Job ID '%s' already in use", job_id); 196 return NULL; 197 } 198 } 199 200 blk = blk_new(perm, shared_perm); 201 ret = blk_insert_bs(blk, bs, errp); 202 if (ret < 0) { 203 blk_unref(blk); 204 return NULL; 205 } 206 207 job = g_malloc0(driver->instance_size); 208 error_setg(&job->blocker, "block device is in use by block job: %s", 209 BlockJobType_lookup[driver->job_type]); 210 block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); 211 bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); 212 213 job->driver = driver; 214 job->id = g_strdup(job_id); 215 job->blk = blk; 216 job->cb = cb; 217 job->opaque = opaque; 218 job->busy = false; 219 job->paused = true; 220 job->pause_count = 1; 221 job->refcnt = 1; 222 bs->job = job; 223 224 QLIST_INSERT_HEAD(&block_jobs, job, job_list); 225 226 blk_add_aio_context_notifier(blk, block_job_attached_aio_context, 227 block_job_detach_aio_context, job); 228 229 /* Only set speed when necessary to avoid NotSupported error */ 230 if (speed != 0) { 231 Error *local_err = NULL; 232 233 block_job_set_speed(job, speed, &local_err); 234 if (local_err) { 235 block_job_unref(job); 236 error_propagate(errp, local_err); 237 return NULL; 238 } 239 } 240 return job; 241 } 242 243 bool block_job_is_internal(BlockJob *job) 244 { 245 return (job->id == NULL); 246 } 247 248 static bool block_job_started(BlockJob *job) 249 { 250 return job->co; 251 } 252 253 void block_job_start(BlockJob *job) 254 { 255 assert(job && !block_job_started(job) && job->paused && 256 !job->busy && job->driver->start); 257 job->co = qemu_coroutine_create(job->driver->start, job); 258 if (--job->pause_count == 0) { 259 job->paused = false; 260 job->busy = true; 261 qemu_coroutine_enter(job->co); 262 } 263 } 264 265 void block_job_ref(BlockJob *job) 266 { 267 ++job->refcnt; 268 } 269 270 void block_job_unref(BlockJob *job) 271 { 272 if (--job->refcnt == 0) { 273 BlockDriverState *bs = blk_bs(job->blk); 274 bs->job = NULL; 275 block_job_remove_all_bdrv(job); 276 blk_remove_aio_context_notifier(job->blk, 277 block_job_attached_aio_context, 278 block_job_detach_aio_context, job); 279 blk_unref(job->blk); 280 error_free(job->blocker); 281 g_free(job->id); 282 QLIST_REMOVE(job, job_list); 283 g_free(job); 284 } 285 } 286 287 static void block_job_completed_single(BlockJob *job) 288 { 289 if (!job->ret) { 290 if (job->driver->commit) { 291 job->driver->commit(job); 292 } 293 } else { 294 if (job->driver->abort) { 295 job->driver->abort(job); 296 } 297 } 298 if (job->driver->clean) { 299 job->driver->clean(job); 300 } 301 302 if (job->cb) { 303 job->cb(job->opaque, job->ret); 304 } 305 306 /* Emit events only if we actually started */ 307 if (block_job_started(job)) { 308 if (block_job_is_cancelled(job)) { 309 block_job_event_cancelled(job); 310 } else { 311 const char *msg = NULL; 312 if (job->ret < 0) { 313 msg = strerror(-job->ret); 314 } 315 block_job_event_completed(job, msg); 316 } 317 } 318 319 if (job->txn) { 320 QLIST_REMOVE(job, txn_list); 321 block_job_txn_unref(job->txn); 322 } 323 block_job_unref(job); 324 } 325 326 static void block_job_completed_txn_abort(BlockJob *job) 327 { 328 AioContext *ctx; 329 BlockJobTxn *txn = job->txn; 330 BlockJob *other_job, *next; 331 332 if (txn->aborting) { 333 /* 334 * We are cancelled by another job, which will handle everything. 335 */ 336 return; 337 } 338 txn->aborting = true; 339 /* We are the first failed job. Cancel other jobs. */ 340 QLIST_FOREACH(other_job, &txn->jobs, txn_list) { 341 ctx = blk_get_aio_context(other_job->blk); 342 aio_context_acquire(ctx); 343 } 344 QLIST_FOREACH(other_job, &txn->jobs, txn_list) { 345 if (other_job == job || other_job->completed) { 346 /* Other jobs are "effectively" cancelled by us, set the status for 347 * them; this job, however, may or may not be cancelled, depending 348 * on the caller, so leave it. */ 349 if (other_job != job) { 350 other_job->cancelled = true; 351 } 352 continue; 353 } 354 block_job_cancel_sync(other_job); 355 assert(other_job->completed); 356 } 357 QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { 358 ctx = blk_get_aio_context(other_job->blk); 359 block_job_completed_single(other_job); 360 aio_context_release(ctx); 361 } 362 } 363 364 static void block_job_completed_txn_success(BlockJob *job) 365 { 366 AioContext *ctx; 367 BlockJobTxn *txn = job->txn; 368 BlockJob *other_job, *next; 369 /* 370 * Successful completion, see if there are other running jobs in this 371 * txn. 372 */ 373 QLIST_FOREACH(other_job, &txn->jobs, txn_list) { 374 if (!other_job->completed) { 375 return; 376 } 377 } 378 /* We are the last completed job, commit the transaction. */ 379 QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { 380 ctx = blk_get_aio_context(other_job->blk); 381 aio_context_acquire(ctx); 382 assert(other_job->ret == 0); 383 block_job_completed_single(other_job); 384 aio_context_release(ctx); 385 } 386 } 387 388 void block_job_completed(BlockJob *job, int ret) 389 { 390 assert(blk_bs(job->blk)->job == job); 391 assert(!job->completed); 392 job->completed = true; 393 job->ret = ret; 394 if (!job->txn) { 395 block_job_completed_single(job); 396 } else if (ret < 0 || block_job_is_cancelled(job)) { 397 block_job_completed_txn_abort(job); 398 } else { 399 block_job_completed_txn_success(job); 400 } 401 } 402 403 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) 404 { 405 Error *local_err = NULL; 406 407 if (!job->driver->set_speed) { 408 error_setg(errp, QERR_UNSUPPORTED); 409 return; 410 } 411 job->driver->set_speed(job, speed, &local_err); 412 if (local_err) { 413 error_propagate(errp, local_err); 414 return; 415 } 416 417 job->speed = speed; 418 } 419 420 void block_job_complete(BlockJob *job, Error **errp) 421 { 422 /* Should not be reachable via external interface for internal jobs */ 423 assert(job->id); 424 if (job->pause_count || job->cancelled || 425 !block_job_started(job) || !job->driver->complete) { 426 error_setg(errp, "The active block job '%s' cannot be completed", 427 job->id); 428 return; 429 } 430 431 job->driver->complete(job, errp); 432 } 433 434 void block_job_pause(BlockJob *job) 435 { 436 job->pause_count++; 437 } 438 439 void block_job_user_pause(BlockJob *job) 440 { 441 job->user_paused = true; 442 block_job_pause(job); 443 } 444 445 static bool block_job_should_pause(BlockJob *job) 446 { 447 return job->pause_count > 0; 448 } 449 450 bool block_job_user_paused(BlockJob *job) 451 { 452 return job ? job->user_paused : 0; 453 } 454 455 void coroutine_fn block_job_pause_point(BlockJob *job) 456 { 457 assert(job && block_job_started(job)); 458 459 if (!block_job_should_pause(job)) { 460 return; 461 } 462 if (block_job_is_cancelled(job)) { 463 return; 464 } 465 466 if (job->driver->pause) { 467 job->driver->pause(job); 468 } 469 470 if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { 471 job->paused = true; 472 job->busy = false; 473 qemu_coroutine_yield(); /* wait for block_job_resume() */ 474 job->busy = true; 475 job->paused = false; 476 } 477 478 if (job->driver->resume) { 479 job->driver->resume(job); 480 } 481 } 482 483 void block_job_resume(BlockJob *job) 484 { 485 assert(job->pause_count > 0); 486 job->pause_count--; 487 if (job->pause_count) { 488 return; 489 } 490 block_job_enter(job); 491 } 492 493 void block_job_user_resume(BlockJob *job) 494 { 495 if (job && job->user_paused && job->pause_count > 0) { 496 job->user_paused = false; 497 block_job_resume(job); 498 } 499 } 500 501 void block_job_enter(BlockJob *job) 502 { 503 if (job->co && !job->busy) { 504 qemu_coroutine_enter(job->co); 505 } 506 } 507 508 void block_job_cancel(BlockJob *job) 509 { 510 if (block_job_started(job)) { 511 job->cancelled = true; 512 block_job_iostatus_reset(job); 513 block_job_enter(job); 514 } else { 515 block_job_completed(job, -ECANCELED); 516 } 517 } 518 519 bool block_job_is_cancelled(BlockJob *job) 520 { 521 return job->cancelled; 522 } 523 524 void block_job_iostatus_reset(BlockJob *job) 525 { 526 job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 527 if (job->driver->iostatus_reset) { 528 job->driver->iostatus_reset(job); 529 } 530 } 531 532 static int block_job_finish_sync(BlockJob *job, 533 void (*finish)(BlockJob *, Error **errp), 534 Error **errp) 535 { 536 Error *local_err = NULL; 537 int ret; 538 539 assert(blk_bs(job->blk)->job == job); 540 541 block_job_ref(job); 542 543 finish(job, &local_err); 544 if (local_err) { 545 error_propagate(errp, local_err); 546 block_job_unref(job); 547 return -EBUSY; 548 } 549 /* block_job_drain calls block_job_enter, and it should be enough to 550 * induce progress until the job completes or moves to the main thread. 551 */ 552 while (!job->deferred_to_main_loop && !job->completed) { 553 block_job_drain(job); 554 } 555 while (!job->completed) { 556 aio_poll(qemu_get_aio_context(), true); 557 } 558 ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; 559 block_job_unref(job); 560 return ret; 561 } 562 563 /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be 564 * used with block_job_finish_sync() without the need for (rather nasty) 565 * function pointer casts there. */ 566 static void block_job_cancel_err(BlockJob *job, Error **errp) 567 { 568 block_job_cancel(job); 569 } 570 571 int block_job_cancel_sync(BlockJob *job) 572 { 573 return block_job_finish_sync(job, &block_job_cancel_err, NULL); 574 } 575 576 void block_job_cancel_sync_all(void) 577 { 578 BlockJob *job; 579 AioContext *aio_context; 580 581 while ((job = QLIST_FIRST(&block_jobs))) { 582 aio_context = blk_get_aio_context(job->blk); 583 aio_context_acquire(aio_context); 584 block_job_cancel_sync(job); 585 aio_context_release(aio_context); 586 } 587 } 588 589 int block_job_complete_sync(BlockJob *job, Error **errp) 590 { 591 return block_job_finish_sync(job, &block_job_complete, errp); 592 } 593 594 void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) 595 { 596 assert(job->busy); 597 598 /* Check cancellation *before* setting busy = false, too! */ 599 if (block_job_is_cancelled(job)) { 600 return; 601 } 602 603 job->busy = false; 604 if (!block_job_should_pause(job)) { 605 co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); 606 } 607 job->busy = true; 608 609 block_job_pause_point(job); 610 } 611 612 void block_job_yield(BlockJob *job) 613 { 614 assert(job->busy); 615 616 /* Check cancellation *before* setting busy = false, too! */ 617 if (block_job_is_cancelled(job)) { 618 return; 619 } 620 621 job->busy = false; 622 if (!block_job_should_pause(job)) { 623 qemu_coroutine_yield(); 624 } 625 job->busy = true; 626 627 block_job_pause_point(job); 628 } 629 630 BlockJobInfo *block_job_query(BlockJob *job, Error **errp) 631 { 632 BlockJobInfo *info; 633 634 if (block_job_is_internal(job)) { 635 error_setg(errp, "Cannot query QEMU internal jobs"); 636 return NULL; 637 } 638 info = g_new0(BlockJobInfo, 1); 639 info->type = g_strdup(BlockJobType_lookup[job->driver->job_type]); 640 info->device = g_strdup(job->id); 641 info->len = job->len; 642 info->busy = job->busy; 643 info->paused = job->pause_count > 0; 644 info->offset = job->offset; 645 info->speed = job->speed; 646 info->io_status = job->iostatus; 647 info->ready = job->ready; 648 return info; 649 } 650 651 static void block_job_iostatus_set_err(BlockJob *job, int error) 652 { 653 if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 654 job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 655 BLOCK_DEVICE_IO_STATUS_FAILED; 656 } 657 } 658 659 static void block_job_event_cancelled(BlockJob *job) 660 { 661 if (block_job_is_internal(job)) { 662 return; 663 } 664 665 qapi_event_send_block_job_cancelled(job->driver->job_type, 666 job->id, 667 job->len, 668 job->offset, 669 job->speed, 670 &error_abort); 671 } 672 673 static void block_job_event_completed(BlockJob *job, const char *msg) 674 { 675 if (block_job_is_internal(job)) { 676 return; 677 } 678 679 qapi_event_send_block_job_completed(job->driver->job_type, 680 job->id, 681 job->len, 682 job->offset, 683 job->speed, 684 !!msg, 685 msg, 686 &error_abort); 687 } 688 689 void block_job_event_ready(BlockJob *job) 690 { 691 job->ready = true; 692 693 if (block_job_is_internal(job)) { 694 return; 695 } 696 697 qapi_event_send_block_job_ready(job->driver->job_type, 698 job->id, 699 job->len, 700 job->offset, 701 job->speed, &error_abort); 702 } 703 704 BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, 705 int is_read, int error) 706 { 707 BlockErrorAction action; 708 709 switch (on_err) { 710 case BLOCKDEV_ON_ERROR_ENOSPC: 711 case BLOCKDEV_ON_ERROR_AUTO: 712 action = (error == ENOSPC) ? 713 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 714 break; 715 case BLOCKDEV_ON_ERROR_STOP: 716 action = BLOCK_ERROR_ACTION_STOP; 717 break; 718 case BLOCKDEV_ON_ERROR_REPORT: 719 action = BLOCK_ERROR_ACTION_REPORT; 720 break; 721 case BLOCKDEV_ON_ERROR_IGNORE: 722 action = BLOCK_ERROR_ACTION_IGNORE; 723 break; 724 default: 725 abort(); 726 } 727 if (!block_job_is_internal(job)) { 728 qapi_event_send_block_job_error(job->id, 729 is_read ? IO_OPERATION_TYPE_READ : 730 IO_OPERATION_TYPE_WRITE, 731 action, &error_abort); 732 } 733 if (action == BLOCK_ERROR_ACTION_STOP) { 734 /* make the pause user visible, which will be resumed from QMP. */ 735 block_job_user_pause(job); 736 block_job_iostatus_set_err(job, error); 737 } 738 return action; 739 } 740 741 typedef struct { 742 BlockJob *job; 743 AioContext *aio_context; 744 BlockJobDeferToMainLoopFn *fn; 745 void *opaque; 746 } BlockJobDeferToMainLoopData; 747 748 static void block_job_defer_to_main_loop_bh(void *opaque) 749 { 750 BlockJobDeferToMainLoopData *data = opaque; 751 AioContext *aio_context; 752 753 /* Prevent race with block_job_defer_to_main_loop() */ 754 aio_context_acquire(data->aio_context); 755 756 /* Fetch BDS AioContext again, in case it has changed */ 757 aio_context = blk_get_aio_context(data->job->blk); 758 aio_context_acquire(aio_context); 759 760 data->job->deferred_to_main_loop = false; 761 data->fn(data->job, data->opaque); 762 763 aio_context_release(aio_context); 764 765 aio_context_release(data->aio_context); 766 767 g_free(data); 768 } 769 770 void block_job_defer_to_main_loop(BlockJob *job, 771 BlockJobDeferToMainLoopFn *fn, 772 void *opaque) 773 { 774 BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data)); 775 data->job = job; 776 data->aio_context = blk_get_aio_context(job->blk); 777 data->fn = fn; 778 data->opaque = opaque; 779 job->deferred_to_main_loop = true; 780 781 aio_bh_schedule_oneshot(qemu_get_aio_context(), 782 block_job_defer_to_main_loop_bh, data); 783 } 784 785 BlockJobTxn *block_job_txn_new(void) 786 { 787 BlockJobTxn *txn = g_new0(BlockJobTxn, 1); 788 QLIST_INIT(&txn->jobs); 789 txn->refcnt = 1; 790 return txn; 791 } 792 793 static void block_job_txn_ref(BlockJobTxn *txn) 794 { 795 txn->refcnt++; 796 } 797 798 void block_job_txn_unref(BlockJobTxn *txn) 799 { 800 if (txn && --txn->refcnt == 0) { 801 g_free(txn); 802 } 803 } 804 805 void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) 806 { 807 if (!txn) { 808 return; 809 } 810 811 assert(!job->txn); 812 job->txn = txn; 813 814 QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); 815 block_job_txn_ref(txn); 816 } 817