1893f7ebaSPaolo Bonzini /* 2893f7ebaSPaolo Bonzini * Image mirroring 3893f7ebaSPaolo Bonzini * 4893f7ebaSPaolo Bonzini * Copyright Red Hat, Inc. 2012 5893f7ebaSPaolo Bonzini * 6893f7ebaSPaolo Bonzini * Authors: 7893f7ebaSPaolo Bonzini * Paolo Bonzini <pbonzini@redhat.com> 8893f7ebaSPaolo Bonzini * 9893f7ebaSPaolo Bonzini * This work is licensed under the terms of the GNU LGPL, version 2 or later. 10893f7ebaSPaolo Bonzini * See the COPYING.LIB file in the top-level directory. 11893f7ebaSPaolo Bonzini * 12893f7ebaSPaolo Bonzini */ 13893f7ebaSPaolo Bonzini 14893f7ebaSPaolo Bonzini #include "trace.h" 15893f7ebaSPaolo Bonzini #include "blockjob.h" 16893f7ebaSPaolo Bonzini #include "block_int.h" 17893f7ebaSPaolo Bonzini #include "qemu/ratelimit.h" 18893f7ebaSPaolo Bonzini 19893f7ebaSPaolo Bonzini enum { 20893f7ebaSPaolo Bonzini /* 21893f7ebaSPaolo Bonzini * Size of data buffer for populating the image file. This should be large 22893f7ebaSPaolo Bonzini * enough to process multiple clusters in a single call, so that populating 23893f7ebaSPaolo Bonzini * contiguous regions of the image is efficient. 24893f7ebaSPaolo Bonzini */ 25893f7ebaSPaolo Bonzini BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ 26893f7ebaSPaolo Bonzini }; 27893f7ebaSPaolo Bonzini 28893f7ebaSPaolo Bonzini #define SLICE_TIME 100000000ULL /* ns */ 29893f7ebaSPaolo Bonzini 30893f7ebaSPaolo Bonzini typedef struct MirrorBlockJob { 31893f7ebaSPaolo Bonzini BlockJob common; 32893f7ebaSPaolo Bonzini RateLimit limit; 33893f7ebaSPaolo Bonzini BlockDriverState *target; 34893f7ebaSPaolo Bonzini MirrorSyncMode mode; 35*d63ffd87SPaolo Bonzini bool synced; 36*d63ffd87SPaolo Bonzini bool should_complete; 37893f7ebaSPaolo Bonzini int64_t sector_num; 38893f7ebaSPaolo Bonzini uint8_t *buf; 39893f7ebaSPaolo Bonzini } MirrorBlockJob; 40893f7ebaSPaolo Bonzini 41893f7ebaSPaolo Bonzini static int coroutine_fn mirror_iteration(MirrorBlockJob *s) 42893f7ebaSPaolo Bonzini { 43893f7ebaSPaolo Bonzini BlockDriverState *source = s->common.bs; 44893f7ebaSPaolo Bonzini BlockDriverState *target = s->target; 45893f7ebaSPaolo Bonzini QEMUIOVector qiov; 46893f7ebaSPaolo Bonzini int ret, nb_sectors; 47893f7ebaSPaolo Bonzini int64_t end; 48893f7ebaSPaolo Bonzini struct iovec iov; 49893f7ebaSPaolo Bonzini 50893f7ebaSPaolo Bonzini end = s->common.len >> BDRV_SECTOR_BITS; 51893f7ebaSPaolo Bonzini s->sector_num = bdrv_get_next_dirty(source, s->sector_num); 52893f7ebaSPaolo Bonzini nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); 53893f7ebaSPaolo Bonzini bdrv_reset_dirty(source, s->sector_num, nb_sectors); 54893f7ebaSPaolo Bonzini 55893f7ebaSPaolo Bonzini /* Copy the dirty cluster. */ 56893f7ebaSPaolo Bonzini iov.iov_base = s->buf; 57893f7ebaSPaolo Bonzini iov.iov_len = nb_sectors * 512; 58893f7ebaSPaolo Bonzini qemu_iovec_init_external(&qiov, &iov, 1); 59893f7ebaSPaolo Bonzini 60893f7ebaSPaolo Bonzini trace_mirror_one_iteration(s, s->sector_num, nb_sectors); 61893f7ebaSPaolo Bonzini ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); 62893f7ebaSPaolo Bonzini if (ret < 0) { 63893f7ebaSPaolo Bonzini return ret; 64893f7ebaSPaolo Bonzini } 65893f7ebaSPaolo Bonzini return bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); 66893f7ebaSPaolo Bonzini } 67893f7ebaSPaolo Bonzini 68893f7ebaSPaolo Bonzini static void coroutine_fn mirror_run(void *opaque) 69893f7ebaSPaolo Bonzini { 70893f7ebaSPaolo Bonzini MirrorBlockJob *s = opaque; 71893f7ebaSPaolo Bonzini BlockDriverState *bs = s->common.bs; 72893f7ebaSPaolo Bonzini int64_t sector_num, end; 73893f7ebaSPaolo Bonzini int ret = 0; 74893f7ebaSPaolo Bonzini int n; 75893f7ebaSPaolo Bonzini 76893f7ebaSPaolo Bonzini if (block_job_is_cancelled(&s->common)) { 77893f7ebaSPaolo Bonzini goto immediate_exit; 78893f7ebaSPaolo Bonzini } 79893f7ebaSPaolo Bonzini 80893f7ebaSPaolo Bonzini s->common.len = bdrv_getlength(bs); 81893f7ebaSPaolo Bonzini if (s->common.len < 0) { 82893f7ebaSPaolo Bonzini block_job_completed(&s->common, s->common.len); 83893f7ebaSPaolo Bonzini return; 84893f7ebaSPaolo Bonzini } 85893f7ebaSPaolo Bonzini 86893f7ebaSPaolo Bonzini end = s->common.len >> BDRV_SECTOR_BITS; 87893f7ebaSPaolo Bonzini s->buf = qemu_blockalign(bs, BLOCK_SIZE); 88893f7ebaSPaolo Bonzini 89893f7ebaSPaolo Bonzini if (s->mode != MIRROR_SYNC_MODE_NONE) { 90893f7ebaSPaolo Bonzini /* First part, loop on the sectors and initialize the dirty bitmap. */ 91893f7ebaSPaolo Bonzini BlockDriverState *base; 92893f7ebaSPaolo Bonzini base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; 93893f7ebaSPaolo Bonzini for (sector_num = 0; sector_num < end; ) { 94893f7ebaSPaolo Bonzini int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; 95893f7ebaSPaolo Bonzini ret = bdrv_co_is_allocated_above(bs, base, 96893f7ebaSPaolo Bonzini sector_num, next - sector_num, &n); 97893f7ebaSPaolo Bonzini 98893f7ebaSPaolo Bonzini if (ret < 0) { 99893f7ebaSPaolo Bonzini goto immediate_exit; 100893f7ebaSPaolo Bonzini } 101893f7ebaSPaolo Bonzini 102893f7ebaSPaolo Bonzini assert(n > 0); 103893f7ebaSPaolo Bonzini if (ret == 1) { 104893f7ebaSPaolo Bonzini bdrv_set_dirty(bs, sector_num, n); 105893f7ebaSPaolo Bonzini sector_num = next; 106893f7ebaSPaolo Bonzini } else { 107893f7ebaSPaolo Bonzini sector_num += n; 108893f7ebaSPaolo Bonzini } 109893f7ebaSPaolo Bonzini } 110893f7ebaSPaolo Bonzini } 111893f7ebaSPaolo Bonzini 112893f7ebaSPaolo Bonzini s->sector_num = -1; 113893f7ebaSPaolo Bonzini for (;;) { 114893f7ebaSPaolo Bonzini uint64_t delay_ns; 115893f7ebaSPaolo Bonzini int64_t cnt; 116893f7ebaSPaolo Bonzini bool should_complete; 117893f7ebaSPaolo Bonzini 118893f7ebaSPaolo Bonzini cnt = bdrv_get_dirty_count(bs); 119893f7ebaSPaolo Bonzini if (cnt != 0) { 120893f7ebaSPaolo Bonzini ret = mirror_iteration(s); 121893f7ebaSPaolo Bonzini if (ret < 0) { 122893f7ebaSPaolo Bonzini goto immediate_exit; 123893f7ebaSPaolo Bonzini } 124893f7ebaSPaolo Bonzini cnt = bdrv_get_dirty_count(bs); 125893f7ebaSPaolo Bonzini } 126893f7ebaSPaolo Bonzini 127893f7ebaSPaolo Bonzini should_complete = false; 128893f7ebaSPaolo Bonzini if (cnt == 0) { 129893f7ebaSPaolo Bonzini trace_mirror_before_flush(s); 130893f7ebaSPaolo Bonzini ret = bdrv_flush(s->target); 131893f7ebaSPaolo Bonzini if (ret < 0) { 132893f7ebaSPaolo Bonzini goto immediate_exit; 133893f7ebaSPaolo Bonzini } 134893f7ebaSPaolo Bonzini 135893f7ebaSPaolo Bonzini /* We're out of the streaming phase. From now on, if the job 136893f7ebaSPaolo Bonzini * is cancelled we will actually complete all pending I/O and 137893f7ebaSPaolo Bonzini * report completion. This way, block-job-cancel will leave 138893f7ebaSPaolo Bonzini * the target in a consistent state. 139893f7ebaSPaolo Bonzini */ 140893f7ebaSPaolo Bonzini s->common.offset = end * BDRV_SECTOR_SIZE; 141*d63ffd87SPaolo Bonzini if (!s->synced) { 142*d63ffd87SPaolo Bonzini block_job_ready(&s->common); 143*d63ffd87SPaolo Bonzini s->synced = true; 144*d63ffd87SPaolo Bonzini } 145*d63ffd87SPaolo Bonzini 146*d63ffd87SPaolo Bonzini should_complete = s->should_complete || 147*d63ffd87SPaolo Bonzini block_job_is_cancelled(&s->common); 148893f7ebaSPaolo Bonzini cnt = bdrv_get_dirty_count(bs); 149893f7ebaSPaolo Bonzini } 150893f7ebaSPaolo Bonzini 151893f7ebaSPaolo Bonzini if (cnt == 0 && should_complete) { 152893f7ebaSPaolo Bonzini /* The dirty bitmap is not updated while operations are pending. 153893f7ebaSPaolo Bonzini * If we're about to exit, wait for pending operations before 154893f7ebaSPaolo Bonzini * calling bdrv_get_dirty_count(bs), or we may exit while the 155893f7ebaSPaolo Bonzini * source has dirty data to copy! 156893f7ebaSPaolo Bonzini * 157893f7ebaSPaolo Bonzini * Note that I/O can be submitted by the guest while 158893f7ebaSPaolo Bonzini * mirror_populate runs. 159893f7ebaSPaolo Bonzini */ 160893f7ebaSPaolo Bonzini trace_mirror_before_drain(s, cnt); 161893f7ebaSPaolo Bonzini bdrv_drain_all(); 162893f7ebaSPaolo Bonzini cnt = bdrv_get_dirty_count(bs); 163893f7ebaSPaolo Bonzini } 164893f7ebaSPaolo Bonzini 165893f7ebaSPaolo Bonzini ret = 0; 166*d63ffd87SPaolo Bonzini trace_mirror_before_sleep(s, cnt, s->synced); 167*d63ffd87SPaolo Bonzini if (!s->synced) { 168893f7ebaSPaolo Bonzini /* Publish progress */ 169893f7ebaSPaolo Bonzini s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; 170893f7ebaSPaolo Bonzini 171893f7ebaSPaolo Bonzini if (s->common.speed) { 172893f7ebaSPaolo Bonzini delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); 173893f7ebaSPaolo Bonzini } else { 174893f7ebaSPaolo Bonzini delay_ns = 0; 175893f7ebaSPaolo Bonzini } 176893f7ebaSPaolo Bonzini 177893f7ebaSPaolo Bonzini /* Note that even when no rate limit is applied we need to yield 178893f7ebaSPaolo Bonzini * with no pending I/O here so that qemu_aio_flush() returns. 179893f7ebaSPaolo Bonzini */ 180893f7ebaSPaolo Bonzini block_job_sleep_ns(&s->common, rt_clock, delay_ns); 181893f7ebaSPaolo Bonzini if (block_job_is_cancelled(&s->common)) { 182893f7ebaSPaolo Bonzini break; 183893f7ebaSPaolo Bonzini } 184893f7ebaSPaolo Bonzini } else if (!should_complete) { 185893f7ebaSPaolo Bonzini delay_ns = (cnt == 0 ? SLICE_TIME : 0); 186893f7ebaSPaolo Bonzini block_job_sleep_ns(&s->common, rt_clock, delay_ns); 187893f7ebaSPaolo Bonzini } else if (cnt == 0) { 188893f7ebaSPaolo Bonzini /* The two disks are in sync. Exit and report successful 189893f7ebaSPaolo Bonzini * completion. 190893f7ebaSPaolo Bonzini */ 191893f7ebaSPaolo Bonzini assert(QLIST_EMPTY(&bs->tracked_requests)); 192893f7ebaSPaolo Bonzini s->common.cancelled = false; 193893f7ebaSPaolo Bonzini break; 194893f7ebaSPaolo Bonzini } 195893f7ebaSPaolo Bonzini } 196893f7ebaSPaolo Bonzini 197893f7ebaSPaolo Bonzini immediate_exit: 198893f7ebaSPaolo Bonzini g_free(s->buf); 199893f7ebaSPaolo Bonzini bdrv_set_dirty_tracking(bs, false); 200*d63ffd87SPaolo Bonzini if (s->should_complete && ret == 0) { 201*d63ffd87SPaolo Bonzini if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { 202*d63ffd87SPaolo Bonzini bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); 203*d63ffd87SPaolo Bonzini } 204*d63ffd87SPaolo Bonzini bdrv_swap(s->target, s->common.bs); 205*d63ffd87SPaolo Bonzini } 206893f7ebaSPaolo Bonzini bdrv_close(s->target); 207893f7ebaSPaolo Bonzini bdrv_delete(s->target); 208893f7ebaSPaolo Bonzini block_job_completed(&s->common, ret); 209893f7ebaSPaolo Bonzini } 210893f7ebaSPaolo Bonzini 211893f7ebaSPaolo Bonzini static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) 212893f7ebaSPaolo Bonzini { 213893f7ebaSPaolo Bonzini MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); 214893f7ebaSPaolo Bonzini 215893f7ebaSPaolo Bonzini if (speed < 0) { 216893f7ebaSPaolo Bonzini error_set(errp, QERR_INVALID_PARAMETER, "speed"); 217893f7ebaSPaolo Bonzini return; 218893f7ebaSPaolo Bonzini } 219893f7ebaSPaolo Bonzini ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); 220893f7ebaSPaolo Bonzini } 221893f7ebaSPaolo Bonzini 222*d63ffd87SPaolo Bonzini static void mirror_complete(BlockJob *job, Error **errp) 223*d63ffd87SPaolo Bonzini { 224*d63ffd87SPaolo Bonzini MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); 225*d63ffd87SPaolo Bonzini int ret; 226*d63ffd87SPaolo Bonzini 227*d63ffd87SPaolo Bonzini ret = bdrv_open_backing_file(s->target); 228*d63ffd87SPaolo Bonzini if (ret < 0) { 229*d63ffd87SPaolo Bonzini char backing_filename[PATH_MAX]; 230*d63ffd87SPaolo Bonzini bdrv_get_full_backing_filename(s->target, backing_filename, 231*d63ffd87SPaolo Bonzini sizeof(backing_filename)); 232*d63ffd87SPaolo Bonzini error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); 233*d63ffd87SPaolo Bonzini return; 234*d63ffd87SPaolo Bonzini } 235*d63ffd87SPaolo Bonzini if (!s->synced) { 236*d63ffd87SPaolo Bonzini error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); 237*d63ffd87SPaolo Bonzini return; 238*d63ffd87SPaolo Bonzini } 239*d63ffd87SPaolo Bonzini 240*d63ffd87SPaolo Bonzini s->should_complete = true; 241*d63ffd87SPaolo Bonzini block_job_resume(job); 242*d63ffd87SPaolo Bonzini } 243*d63ffd87SPaolo Bonzini 244893f7ebaSPaolo Bonzini static BlockJobType mirror_job_type = { 245893f7ebaSPaolo Bonzini .instance_size = sizeof(MirrorBlockJob), 246893f7ebaSPaolo Bonzini .job_type = "mirror", 247893f7ebaSPaolo Bonzini .set_speed = mirror_set_speed, 248*d63ffd87SPaolo Bonzini .complete = mirror_complete, 249893f7ebaSPaolo Bonzini }; 250893f7ebaSPaolo Bonzini 251893f7ebaSPaolo Bonzini void mirror_start(BlockDriverState *bs, BlockDriverState *target, 252893f7ebaSPaolo Bonzini int64_t speed, MirrorSyncMode mode, 253893f7ebaSPaolo Bonzini BlockDriverCompletionFunc *cb, 254893f7ebaSPaolo Bonzini void *opaque, Error **errp) 255893f7ebaSPaolo Bonzini { 256893f7ebaSPaolo Bonzini MirrorBlockJob *s; 257893f7ebaSPaolo Bonzini 258893f7ebaSPaolo Bonzini s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); 259893f7ebaSPaolo Bonzini if (!s) { 260893f7ebaSPaolo Bonzini return; 261893f7ebaSPaolo Bonzini } 262893f7ebaSPaolo Bonzini 263893f7ebaSPaolo Bonzini s->target = target; 264893f7ebaSPaolo Bonzini s->mode = mode; 265893f7ebaSPaolo Bonzini bdrv_set_dirty_tracking(bs, true); 266893f7ebaSPaolo Bonzini bdrv_set_enable_write_cache(s->target, true); 267893f7ebaSPaolo Bonzini s->common.co = qemu_coroutine_create(mirror_run); 268893f7ebaSPaolo Bonzini trace_mirror_start(bs, s, s->common.co, opaque); 269893f7ebaSPaolo Bonzini qemu_coroutine_enter(s->common.co, s); 270893f7ebaSPaolo Bonzini } 271