xref: /qemu/block/mirror.c (revision d63ffd87acad618a4a64b8812b64ad88577ae9b1)
1893f7ebaSPaolo Bonzini /*
2893f7ebaSPaolo Bonzini  * Image mirroring
3893f7ebaSPaolo Bonzini  *
4893f7ebaSPaolo Bonzini  * Copyright Red Hat, Inc. 2012
5893f7ebaSPaolo Bonzini  *
6893f7ebaSPaolo Bonzini  * Authors:
7893f7ebaSPaolo Bonzini  *  Paolo Bonzini  <pbonzini@redhat.com>
8893f7ebaSPaolo Bonzini  *
9893f7ebaSPaolo Bonzini  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10893f7ebaSPaolo Bonzini  * See the COPYING.LIB file in the top-level directory.
11893f7ebaSPaolo Bonzini  *
12893f7ebaSPaolo Bonzini  */
13893f7ebaSPaolo Bonzini 
14893f7ebaSPaolo Bonzini #include "trace.h"
15893f7ebaSPaolo Bonzini #include "blockjob.h"
16893f7ebaSPaolo Bonzini #include "block_int.h"
17893f7ebaSPaolo Bonzini #include "qemu/ratelimit.h"
18893f7ebaSPaolo Bonzini 
19893f7ebaSPaolo Bonzini enum {
20893f7ebaSPaolo Bonzini     /*
21893f7ebaSPaolo Bonzini      * Size of data buffer for populating the image file.  This should be large
22893f7ebaSPaolo Bonzini      * enough to process multiple clusters in a single call, so that populating
23893f7ebaSPaolo Bonzini      * contiguous regions of the image is efficient.
24893f7ebaSPaolo Bonzini      */
25893f7ebaSPaolo Bonzini     BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
26893f7ebaSPaolo Bonzini };
27893f7ebaSPaolo Bonzini 
28893f7ebaSPaolo Bonzini #define SLICE_TIME 100000000ULL /* ns */
29893f7ebaSPaolo Bonzini 
30893f7ebaSPaolo Bonzini typedef struct MirrorBlockJob {
31893f7ebaSPaolo Bonzini     BlockJob common;
32893f7ebaSPaolo Bonzini     RateLimit limit;
33893f7ebaSPaolo Bonzini     BlockDriverState *target;
34893f7ebaSPaolo Bonzini     MirrorSyncMode mode;
35*d63ffd87SPaolo Bonzini     bool synced;
36*d63ffd87SPaolo Bonzini     bool should_complete;
37893f7ebaSPaolo Bonzini     int64_t sector_num;
38893f7ebaSPaolo Bonzini     uint8_t *buf;
39893f7ebaSPaolo Bonzini } MirrorBlockJob;
40893f7ebaSPaolo Bonzini 
41893f7ebaSPaolo Bonzini static int coroutine_fn mirror_iteration(MirrorBlockJob *s)
42893f7ebaSPaolo Bonzini {
43893f7ebaSPaolo Bonzini     BlockDriverState *source = s->common.bs;
44893f7ebaSPaolo Bonzini     BlockDriverState *target = s->target;
45893f7ebaSPaolo Bonzini     QEMUIOVector qiov;
46893f7ebaSPaolo Bonzini     int ret, nb_sectors;
47893f7ebaSPaolo Bonzini     int64_t end;
48893f7ebaSPaolo Bonzini     struct iovec iov;
49893f7ebaSPaolo Bonzini 
50893f7ebaSPaolo Bonzini     end = s->common.len >> BDRV_SECTOR_BITS;
51893f7ebaSPaolo Bonzini     s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
52893f7ebaSPaolo Bonzini     nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
53893f7ebaSPaolo Bonzini     bdrv_reset_dirty(source, s->sector_num, nb_sectors);
54893f7ebaSPaolo Bonzini 
55893f7ebaSPaolo Bonzini     /* Copy the dirty cluster.  */
56893f7ebaSPaolo Bonzini     iov.iov_base = s->buf;
57893f7ebaSPaolo Bonzini     iov.iov_len  = nb_sectors * 512;
58893f7ebaSPaolo Bonzini     qemu_iovec_init_external(&qiov, &iov, 1);
59893f7ebaSPaolo Bonzini 
60893f7ebaSPaolo Bonzini     trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
61893f7ebaSPaolo Bonzini     ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
62893f7ebaSPaolo Bonzini     if (ret < 0) {
63893f7ebaSPaolo Bonzini         return ret;
64893f7ebaSPaolo Bonzini     }
65893f7ebaSPaolo Bonzini     return bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
66893f7ebaSPaolo Bonzini }
67893f7ebaSPaolo Bonzini 
68893f7ebaSPaolo Bonzini static void coroutine_fn mirror_run(void *opaque)
69893f7ebaSPaolo Bonzini {
70893f7ebaSPaolo Bonzini     MirrorBlockJob *s = opaque;
71893f7ebaSPaolo Bonzini     BlockDriverState *bs = s->common.bs;
72893f7ebaSPaolo Bonzini     int64_t sector_num, end;
73893f7ebaSPaolo Bonzini     int ret = 0;
74893f7ebaSPaolo Bonzini     int n;
75893f7ebaSPaolo Bonzini 
76893f7ebaSPaolo Bonzini     if (block_job_is_cancelled(&s->common)) {
77893f7ebaSPaolo Bonzini         goto immediate_exit;
78893f7ebaSPaolo Bonzini     }
79893f7ebaSPaolo Bonzini 
80893f7ebaSPaolo Bonzini     s->common.len = bdrv_getlength(bs);
81893f7ebaSPaolo Bonzini     if (s->common.len < 0) {
82893f7ebaSPaolo Bonzini         block_job_completed(&s->common, s->common.len);
83893f7ebaSPaolo Bonzini         return;
84893f7ebaSPaolo Bonzini     }
85893f7ebaSPaolo Bonzini 
86893f7ebaSPaolo Bonzini     end = s->common.len >> BDRV_SECTOR_BITS;
87893f7ebaSPaolo Bonzini     s->buf = qemu_blockalign(bs, BLOCK_SIZE);
88893f7ebaSPaolo Bonzini 
89893f7ebaSPaolo Bonzini     if (s->mode != MIRROR_SYNC_MODE_NONE) {
90893f7ebaSPaolo Bonzini         /* First part, loop on the sectors and initialize the dirty bitmap.  */
91893f7ebaSPaolo Bonzini         BlockDriverState *base;
92893f7ebaSPaolo Bonzini         base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
93893f7ebaSPaolo Bonzini         for (sector_num = 0; sector_num < end; ) {
94893f7ebaSPaolo Bonzini             int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
95893f7ebaSPaolo Bonzini             ret = bdrv_co_is_allocated_above(bs, base,
96893f7ebaSPaolo Bonzini                                              sector_num, next - sector_num, &n);
97893f7ebaSPaolo Bonzini 
98893f7ebaSPaolo Bonzini             if (ret < 0) {
99893f7ebaSPaolo Bonzini                 goto immediate_exit;
100893f7ebaSPaolo Bonzini             }
101893f7ebaSPaolo Bonzini 
102893f7ebaSPaolo Bonzini             assert(n > 0);
103893f7ebaSPaolo Bonzini             if (ret == 1) {
104893f7ebaSPaolo Bonzini                 bdrv_set_dirty(bs, sector_num, n);
105893f7ebaSPaolo Bonzini                 sector_num = next;
106893f7ebaSPaolo Bonzini             } else {
107893f7ebaSPaolo Bonzini                 sector_num += n;
108893f7ebaSPaolo Bonzini             }
109893f7ebaSPaolo Bonzini         }
110893f7ebaSPaolo Bonzini     }
111893f7ebaSPaolo Bonzini 
112893f7ebaSPaolo Bonzini     s->sector_num = -1;
113893f7ebaSPaolo Bonzini     for (;;) {
114893f7ebaSPaolo Bonzini         uint64_t delay_ns;
115893f7ebaSPaolo Bonzini         int64_t cnt;
116893f7ebaSPaolo Bonzini         bool should_complete;
117893f7ebaSPaolo Bonzini 
118893f7ebaSPaolo Bonzini         cnt = bdrv_get_dirty_count(bs);
119893f7ebaSPaolo Bonzini         if (cnt != 0) {
120893f7ebaSPaolo Bonzini             ret = mirror_iteration(s);
121893f7ebaSPaolo Bonzini             if (ret < 0) {
122893f7ebaSPaolo Bonzini                 goto immediate_exit;
123893f7ebaSPaolo Bonzini             }
124893f7ebaSPaolo Bonzini             cnt = bdrv_get_dirty_count(bs);
125893f7ebaSPaolo Bonzini         }
126893f7ebaSPaolo Bonzini 
127893f7ebaSPaolo Bonzini         should_complete = false;
128893f7ebaSPaolo Bonzini         if (cnt == 0) {
129893f7ebaSPaolo Bonzini             trace_mirror_before_flush(s);
130893f7ebaSPaolo Bonzini             ret = bdrv_flush(s->target);
131893f7ebaSPaolo Bonzini             if (ret < 0) {
132893f7ebaSPaolo Bonzini                 goto immediate_exit;
133893f7ebaSPaolo Bonzini             }
134893f7ebaSPaolo Bonzini 
135893f7ebaSPaolo Bonzini             /* We're out of the streaming phase.  From now on, if the job
136893f7ebaSPaolo Bonzini              * is cancelled we will actually complete all pending I/O and
137893f7ebaSPaolo Bonzini              * report completion.  This way, block-job-cancel will leave
138893f7ebaSPaolo Bonzini              * the target in a consistent state.
139893f7ebaSPaolo Bonzini              */
140893f7ebaSPaolo Bonzini             s->common.offset = end * BDRV_SECTOR_SIZE;
141*d63ffd87SPaolo Bonzini             if (!s->synced) {
142*d63ffd87SPaolo Bonzini                 block_job_ready(&s->common);
143*d63ffd87SPaolo Bonzini                 s->synced = true;
144*d63ffd87SPaolo Bonzini             }
145*d63ffd87SPaolo Bonzini 
146*d63ffd87SPaolo Bonzini             should_complete = s->should_complete ||
147*d63ffd87SPaolo Bonzini                 block_job_is_cancelled(&s->common);
148893f7ebaSPaolo Bonzini             cnt = bdrv_get_dirty_count(bs);
149893f7ebaSPaolo Bonzini         }
150893f7ebaSPaolo Bonzini 
151893f7ebaSPaolo Bonzini         if (cnt == 0 && should_complete) {
152893f7ebaSPaolo Bonzini             /* The dirty bitmap is not updated while operations are pending.
153893f7ebaSPaolo Bonzini              * If we're about to exit, wait for pending operations before
154893f7ebaSPaolo Bonzini              * calling bdrv_get_dirty_count(bs), or we may exit while the
155893f7ebaSPaolo Bonzini              * source has dirty data to copy!
156893f7ebaSPaolo Bonzini              *
157893f7ebaSPaolo Bonzini              * Note that I/O can be submitted by the guest while
158893f7ebaSPaolo Bonzini              * mirror_populate runs.
159893f7ebaSPaolo Bonzini              */
160893f7ebaSPaolo Bonzini             trace_mirror_before_drain(s, cnt);
161893f7ebaSPaolo Bonzini             bdrv_drain_all();
162893f7ebaSPaolo Bonzini             cnt = bdrv_get_dirty_count(bs);
163893f7ebaSPaolo Bonzini         }
164893f7ebaSPaolo Bonzini 
165893f7ebaSPaolo Bonzini         ret = 0;
166*d63ffd87SPaolo Bonzini         trace_mirror_before_sleep(s, cnt, s->synced);
167*d63ffd87SPaolo Bonzini         if (!s->synced) {
168893f7ebaSPaolo Bonzini             /* Publish progress */
169893f7ebaSPaolo Bonzini             s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
170893f7ebaSPaolo Bonzini 
171893f7ebaSPaolo Bonzini             if (s->common.speed) {
172893f7ebaSPaolo Bonzini                 delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
173893f7ebaSPaolo Bonzini             } else {
174893f7ebaSPaolo Bonzini                 delay_ns = 0;
175893f7ebaSPaolo Bonzini             }
176893f7ebaSPaolo Bonzini 
177893f7ebaSPaolo Bonzini             /* Note that even when no rate limit is applied we need to yield
178893f7ebaSPaolo Bonzini              * with no pending I/O here so that qemu_aio_flush() returns.
179893f7ebaSPaolo Bonzini              */
180893f7ebaSPaolo Bonzini             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
181893f7ebaSPaolo Bonzini             if (block_job_is_cancelled(&s->common)) {
182893f7ebaSPaolo Bonzini                 break;
183893f7ebaSPaolo Bonzini             }
184893f7ebaSPaolo Bonzini         } else if (!should_complete) {
185893f7ebaSPaolo Bonzini             delay_ns = (cnt == 0 ? SLICE_TIME : 0);
186893f7ebaSPaolo Bonzini             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
187893f7ebaSPaolo Bonzini         } else if (cnt == 0) {
188893f7ebaSPaolo Bonzini             /* The two disks are in sync.  Exit and report successful
189893f7ebaSPaolo Bonzini              * completion.
190893f7ebaSPaolo Bonzini              */
191893f7ebaSPaolo Bonzini             assert(QLIST_EMPTY(&bs->tracked_requests));
192893f7ebaSPaolo Bonzini             s->common.cancelled = false;
193893f7ebaSPaolo Bonzini             break;
194893f7ebaSPaolo Bonzini         }
195893f7ebaSPaolo Bonzini     }
196893f7ebaSPaolo Bonzini 
197893f7ebaSPaolo Bonzini immediate_exit:
198893f7ebaSPaolo Bonzini     g_free(s->buf);
199893f7ebaSPaolo Bonzini     bdrv_set_dirty_tracking(bs, false);
200*d63ffd87SPaolo Bonzini     if (s->should_complete && ret == 0) {
201*d63ffd87SPaolo Bonzini         if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
202*d63ffd87SPaolo Bonzini             bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
203*d63ffd87SPaolo Bonzini         }
204*d63ffd87SPaolo Bonzini         bdrv_swap(s->target, s->common.bs);
205*d63ffd87SPaolo Bonzini     }
206893f7ebaSPaolo Bonzini     bdrv_close(s->target);
207893f7ebaSPaolo Bonzini     bdrv_delete(s->target);
208893f7ebaSPaolo Bonzini     block_job_completed(&s->common, ret);
209893f7ebaSPaolo Bonzini }
210893f7ebaSPaolo Bonzini 
211893f7ebaSPaolo Bonzini static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
212893f7ebaSPaolo Bonzini {
213893f7ebaSPaolo Bonzini     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
214893f7ebaSPaolo Bonzini 
215893f7ebaSPaolo Bonzini     if (speed < 0) {
216893f7ebaSPaolo Bonzini         error_set(errp, QERR_INVALID_PARAMETER, "speed");
217893f7ebaSPaolo Bonzini         return;
218893f7ebaSPaolo Bonzini     }
219893f7ebaSPaolo Bonzini     ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
220893f7ebaSPaolo Bonzini }
221893f7ebaSPaolo Bonzini 
222*d63ffd87SPaolo Bonzini static void mirror_complete(BlockJob *job, Error **errp)
223*d63ffd87SPaolo Bonzini {
224*d63ffd87SPaolo Bonzini     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
225*d63ffd87SPaolo Bonzini     int ret;
226*d63ffd87SPaolo Bonzini 
227*d63ffd87SPaolo Bonzini     ret = bdrv_open_backing_file(s->target);
228*d63ffd87SPaolo Bonzini     if (ret < 0) {
229*d63ffd87SPaolo Bonzini         char backing_filename[PATH_MAX];
230*d63ffd87SPaolo Bonzini         bdrv_get_full_backing_filename(s->target, backing_filename,
231*d63ffd87SPaolo Bonzini                                        sizeof(backing_filename));
232*d63ffd87SPaolo Bonzini         error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
233*d63ffd87SPaolo Bonzini         return;
234*d63ffd87SPaolo Bonzini     }
235*d63ffd87SPaolo Bonzini     if (!s->synced) {
236*d63ffd87SPaolo Bonzini         error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
237*d63ffd87SPaolo Bonzini         return;
238*d63ffd87SPaolo Bonzini     }
239*d63ffd87SPaolo Bonzini 
240*d63ffd87SPaolo Bonzini     s->should_complete = true;
241*d63ffd87SPaolo Bonzini     block_job_resume(job);
242*d63ffd87SPaolo Bonzini }
243*d63ffd87SPaolo Bonzini 
244893f7ebaSPaolo Bonzini static BlockJobType mirror_job_type = {
245893f7ebaSPaolo Bonzini     .instance_size = sizeof(MirrorBlockJob),
246893f7ebaSPaolo Bonzini     .job_type      = "mirror",
247893f7ebaSPaolo Bonzini     .set_speed     = mirror_set_speed,
248*d63ffd87SPaolo Bonzini     .complete      = mirror_complete,
249893f7ebaSPaolo Bonzini };
250893f7ebaSPaolo Bonzini 
251893f7ebaSPaolo Bonzini void mirror_start(BlockDriverState *bs, BlockDriverState *target,
252893f7ebaSPaolo Bonzini                   int64_t speed, MirrorSyncMode mode,
253893f7ebaSPaolo Bonzini                   BlockDriverCompletionFunc *cb,
254893f7ebaSPaolo Bonzini                   void *opaque, Error **errp)
255893f7ebaSPaolo Bonzini {
256893f7ebaSPaolo Bonzini     MirrorBlockJob *s;
257893f7ebaSPaolo Bonzini 
258893f7ebaSPaolo Bonzini     s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
259893f7ebaSPaolo Bonzini     if (!s) {
260893f7ebaSPaolo Bonzini         return;
261893f7ebaSPaolo Bonzini     }
262893f7ebaSPaolo Bonzini 
263893f7ebaSPaolo Bonzini     s->target = target;
264893f7ebaSPaolo Bonzini     s->mode = mode;
265893f7ebaSPaolo Bonzini     bdrv_set_dirty_tracking(bs, true);
266893f7ebaSPaolo Bonzini     bdrv_set_enable_write_cache(s->target, true);
267893f7ebaSPaolo Bonzini     s->common.co = qemu_coroutine_create(mirror_run);
268893f7ebaSPaolo Bonzini     trace_mirror_start(bs, s, s->common.co, opaque);
269893f7ebaSPaolo Bonzini     qemu_coroutine_enter(s->common.co, s);
270893f7ebaSPaolo Bonzini }
271