xref: /qemu/block/mirror.c (revision c57b6656c3168bccca7f78b3f740e9149893b3da)
1893f7ebaSPaolo Bonzini /*
2893f7ebaSPaolo Bonzini  * Image mirroring
3893f7ebaSPaolo Bonzini  *
4893f7ebaSPaolo Bonzini  * Copyright Red Hat, Inc. 2012
5893f7ebaSPaolo Bonzini  *
6893f7ebaSPaolo Bonzini  * Authors:
7893f7ebaSPaolo Bonzini  *  Paolo Bonzini  <pbonzini@redhat.com>
8893f7ebaSPaolo Bonzini  *
9893f7ebaSPaolo Bonzini  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10893f7ebaSPaolo Bonzini  * See the COPYING.LIB file in the top-level directory.
11893f7ebaSPaolo Bonzini  *
12893f7ebaSPaolo Bonzini  */
13893f7ebaSPaolo Bonzini 
14893f7ebaSPaolo Bonzini #include "trace.h"
15893f7ebaSPaolo Bonzini #include "blockjob.h"
16893f7ebaSPaolo Bonzini #include "block_int.h"
17893f7ebaSPaolo Bonzini #include "qemu/ratelimit.h"
18893f7ebaSPaolo Bonzini 
19893f7ebaSPaolo Bonzini enum {
20893f7ebaSPaolo Bonzini     /*
21893f7ebaSPaolo Bonzini      * Size of data buffer for populating the image file.  This should be large
22893f7ebaSPaolo Bonzini      * enough to process multiple clusters in a single call, so that populating
23893f7ebaSPaolo Bonzini      * contiguous regions of the image is efficient.
24893f7ebaSPaolo Bonzini      */
25893f7ebaSPaolo Bonzini     BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
26893f7ebaSPaolo Bonzini };
27893f7ebaSPaolo Bonzini 
28893f7ebaSPaolo Bonzini #define SLICE_TIME 100000000ULL /* ns */
29893f7ebaSPaolo Bonzini 
30893f7ebaSPaolo Bonzini typedef struct MirrorBlockJob {
31893f7ebaSPaolo Bonzini     BlockJob common;
32893f7ebaSPaolo Bonzini     RateLimit limit;
33893f7ebaSPaolo Bonzini     BlockDriverState *target;
34893f7ebaSPaolo Bonzini     MirrorSyncMode mode;
35b952b558SPaolo Bonzini     BlockdevOnError on_source_error, on_target_error;
36d63ffd87SPaolo Bonzini     bool synced;
37d63ffd87SPaolo Bonzini     bool should_complete;
38893f7ebaSPaolo Bonzini     int64_t sector_num;
39893f7ebaSPaolo Bonzini     uint8_t *buf;
40893f7ebaSPaolo Bonzini } MirrorBlockJob;
41893f7ebaSPaolo Bonzini 
42b952b558SPaolo Bonzini static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
43b952b558SPaolo Bonzini                                             int error)
44b952b558SPaolo Bonzini {
45b952b558SPaolo Bonzini     s->synced = false;
46b952b558SPaolo Bonzini     if (read) {
47b952b558SPaolo Bonzini         return block_job_error_action(&s->common, s->common.bs,
48b952b558SPaolo Bonzini                                       s->on_source_error, true, error);
49b952b558SPaolo Bonzini     } else {
50b952b558SPaolo Bonzini         return block_job_error_action(&s->common, s->target,
51b952b558SPaolo Bonzini                                       s->on_target_error, false, error);
52b952b558SPaolo Bonzini     }
53b952b558SPaolo Bonzini }
54b952b558SPaolo Bonzini 
55b952b558SPaolo Bonzini static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
56b952b558SPaolo Bonzini                                          BlockErrorAction *p_action)
57893f7ebaSPaolo Bonzini {
58893f7ebaSPaolo Bonzini     BlockDriverState *source = s->common.bs;
59893f7ebaSPaolo Bonzini     BlockDriverState *target = s->target;
60893f7ebaSPaolo Bonzini     QEMUIOVector qiov;
61893f7ebaSPaolo Bonzini     int ret, nb_sectors;
62893f7ebaSPaolo Bonzini     int64_t end;
63893f7ebaSPaolo Bonzini     struct iovec iov;
64893f7ebaSPaolo Bonzini 
65893f7ebaSPaolo Bonzini     end = s->common.len >> BDRV_SECTOR_BITS;
66893f7ebaSPaolo Bonzini     s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
67893f7ebaSPaolo Bonzini     nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
68893f7ebaSPaolo Bonzini     bdrv_reset_dirty(source, s->sector_num, nb_sectors);
69893f7ebaSPaolo Bonzini 
70893f7ebaSPaolo Bonzini     /* Copy the dirty cluster.  */
71893f7ebaSPaolo Bonzini     iov.iov_base = s->buf;
72893f7ebaSPaolo Bonzini     iov.iov_len  = nb_sectors * 512;
73893f7ebaSPaolo Bonzini     qemu_iovec_init_external(&qiov, &iov, 1);
74893f7ebaSPaolo Bonzini 
75893f7ebaSPaolo Bonzini     trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
76893f7ebaSPaolo Bonzini     ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
77893f7ebaSPaolo Bonzini     if (ret < 0) {
78b952b558SPaolo Bonzini         *p_action = mirror_error_action(s, true, -ret);
79b952b558SPaolo Bonzini         goto fail;
80893f7ebaSPaolo Bonzini     }
81b952b558SPaolo Bonzini     ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
82b952b558SPaolo Bonzini     if (ret < 0) {
83b952b558SPaolo Bonzini         *p_action = mirror_error_action(s, false, -ret);
84b952b558SPaolo Bonzini         s->synced = false;
85b952b558SPaolo Bonzini         goto fail;
86b952b558SPaolo Bonzini     }
87b952b558SPaolo Bonzini     return 0;
88b952b558SPaolo Bonzini 
89b952b558SPaolo Bonzini fail:
90b952b558SPaolo Bonzini     /* Try again later.  */
91b952b558SPaolo Bonzini     bdrv_set_dirty(source, s->sector_num, nb_sectors);
92b952b558SPaolo Bonzini     return ret;
93893f7ebaSPaolo Bonzini }
94893f7ebaSPaolo Bonzini 
95893f7ebaSPaolo Bonzini static void coroutine_fn mirror_run(void *opaque)
96893f7ebaSPaolo Bonzini {
97893f7ebaSPaolo Bonzini     MirrorBlockJob *s = opaque;
98893f7ebaSPaolo Bonzini     BlockDriverState *bs = s->common.bs;
99893f7ebaSPaolo Bonzini     int64_t sector_num, end;
100893f7ebaSPaolo Bonzini     int ret = 0;
101893f7ebaSPaolo Bonzini     int n;
102893f7ebaSPaolo Bonzini 
103893f7ebaSPaolo Bonzini     if (block_job_is_cancelled(&s->common)) {
104893f7ebaSPaolo Bonzini         goto immediate_exit;
105893f7ebaSPaolo Bonzini     }
106893f7ebaSPaolo Bonzini 
107893f7ebaSPaolo Bonzini     s->common.len = bdrv_getlength(bs);
108893f7ebaSPaolo Bonzini     if (s->common.len < 0) {
109893f7ebaSPaolo Bonzini         block_job_completed(&s->common, s->common.len);
110893f7ebaSPaolo Bonzini         return;
111893f7ebaSPaolo Bonzini     }
112893f7ebaSPaolo Bonzini 
113893f7ebaSPaolo Bonzini     end = s->common.len >> BDRV_SECTOR_BITS;
114893f7ebaSPaolo Bonzini     s->buf = qemu_blockalign(bs, BLOCK_SIZE);
115893f7ebaSPaolo Bonzini 
116893f7ebaSPaolo Bonzini     if (s->mode != MIRROR_SYNC_MODE_NONE) {
117893f7ebaSPaolo Bonzini         /* First part, loop on the sectors and initialize the dirty bitmap.  */
118893f7ebaSPaolo Bonzini         BlockDriverState *base;
119893f7ebaSPaolo Bonzini         base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
120893f7ebaSPaolo Bonzini         for (sector_num = 0; sector_num < end; ) {
121893f7ebaSPaolo Bonzini             int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
122893f7ebaSPaolo Bonzini             ret = bdrv_co_is_allocated_above(bs, base,
123893f7ebaSPaolo Bonzini                                              sector_num, next - sector_num, &n);
124893f7ebaSPaolo Bonzini 
125893f7ebaSPaolo Bonzini             if (ret < 0) {
126893f7ebaSPaolo Bonzini                 goto immediate_exit;
127893f7ebaSPaolo Bonzini             }
128893f7ebaSPaolo Bonzini 
129893f7ebaSPaolo Bonzini             assert(n > 0);
130893f7ebaSPaolo Bonzini             if (ret == 1) {
131893f7ebaSPaolo Bonzini                 bdrv_set_dirty(bs, sector_num, n);
132893f7ebaSPaolo Bonzini                 sector_num = next;
133893f7ebaSPaolo Bonzini             } else {
134893f7ebaSPaolo Bonzini                 sector_num += n;
135893f7ebaSPaolo Bonzini             }
136893f7ebaSPaolo Bonzini         }
137893f7ebaSPaolo Bonzini     }
138893f7ebaSPaolo Bonzini 
139893f7ebaSPaolo Bonzini     s->sector_num = -1;
140893f7ebaSPaolo Bonzini     for (;;) {
141893f7ebaSPaolo Bonzini         uint64_t delay_ns;
142893f7ebaSPaolo Bonzini         int64_t cnt;
143893f7ebaSPaolo Bonzini         bool should_complete;
144893f7ebaSPaolo Bonzini 
145893f7ebaSPaolo Bonzini         cnt = bdrv_get_dirty_count(bs);
146893f7ebaSPaolo Bonzini         if (cnt != 0) {
147b952b558SPaolo Bonzini             BlockErrorAction action = BDRV_ACTION_REPORT;
148b952b558SPaolo Bonzini             ret = mirror_iteration(s, &action);
149b952b558SPaolo Bonzini             if (ret < 0 && action == BDRV_ACTION_REPORT) {
150893f7ebaSPaolo Bonzini                 goto immediate_exit;
151893f7ebaSPaolo Bonzini             }
152893f7ebaSPaolo Bonzini             cnt = bdrv_get_dirty_count(bs);
153893f7ebaSPaolo Bonzini         }
154893f7ebaSPaolo Bonzini 
155893f7ebaSPaolo Bonzini         should_complete = false;
156893f7ebaSPaolo Bonzini         if (cnt == 0) {
157893f7ebaSPaolo Bonzini             trace_mirror_before_flush(s);
158893f7ebaSPaolo Bonzini             ret = bdrv_flush(s->target);
159893f7ebaSPaolo Bonzini             if (ret < 0) {
160b952b558SPaolo Bonzini                 if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
161893f7ebaSPaolo Bonzini                     goto immediate_exit;
162893f7ebaSPaolo Bonzini                 }
163b952b558SPaolo Bonzini             } else {
164893f7ebaSPaolo Bonzini                 /* We're out of the streaming phase.  From now on, if the job
165893f7ebaSPaolo Bonzini                  * is cancelled we will actually complete all pending I/O and
166893f7ebaSPaolo Bonzini                  * report completion.  This way, block-job-cancel will leave
167893f7ebaSPaolo Bonzini                  * the target in a consistent state.
168893f7ebaSPaolo Bonzini                  */
169893f7ebaSPaolo Bonzini                 s->common.offset = end * BDRV_SECTOR_SIZE;
170d63ffd87SPaolo Bonzini                 if (!s->synced) {
171d63ffd87SPaolo Bonzini                     block_job_ready(&s->common);
172d63ffd87SPaolo Bonzini                     s->synced = true;
173d63ffd87SPaolo Bonzini                 }
174d63ffd87SPaolo Bonzini 
175d63ffd87SPaolo Bonzini                 should_complete = s->should_complete ||
176d63ffd87SPaolo Bonzini                     block_job_is_cancelled(&s->common);
177893f7ebaSPaolo Bonzini                 cnt = bdrv_get_dirty_count(bs);
178893f7ebaSPaolo Bonzini             }
179b952b558SPaolo Bonzini         }
180893f7ebaSPaolo Bonzini 
181893f7ebaSPaolo Bonzini         if (cnt == 0 && should_complete) {
182893f7ebaSPaolo Bonzini             /* The dirty bitmap is not updated while operations are pending.
183893f7ebaSPaolo Bonzini              * If we're about to exit, wait for pending operations before
184893f7ebaSPaolo Bonzini              * calling bdrv_get_dirty_count(bs), or we may exit while the
185893f7ebaSPaolo Bonzini              * source has dirty data to copy!
186893f7ebaSPaolo Bonzini              *
187893f7ebaSPaolo Bonzini              * Note that I/O can be submitted by the guest while
188893f7ebaSPaolo Bonzini              * mirror_populate runs.
189893f7ebaSPaolo Bonzini              */
190893f7ebaSPaolo Bonzini             trace_mirror_before_drain(s, cnt);
191893f7ebaSPaolo Bonzini             bdrv_drain_all();
192893f7ebaSPaolo Bonzini             cnt = bdrv_get_dirty_count(bs);
193893f7ebaSPaolo Bonzini         }
194893f7ebaSPaolo Bonzini 
195893f7ebaSPaolo Bonzini         ret = 0;
196d63ffd87SPaolo Bonzini         trace_mirror_before_sleep(s, cnt, s->synced);
197d63ffd87SPaolo Bonzini         if (!s->synced) {
198893f7ebaSPaolo Bonzini             /* Publish progress */
199893f7ebaSPaolo Bonzini             s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
200893f7ebaSPaolo Bonzini 
201893f7ebaSPaolo Bonzini             if (s->common.speed) {
202893f7ebaSPaolo Bonzini                 delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
203893f7ebaSPaolo Bonzini             } else {
204893f7ebaSPaolo Bonzini                 delay_ns = 0;
205893f7ebaSPaolo Bonzini             }
206893f7ebaSPaolo Bonzini 
207893f7ebaSPaolo Bonzini             /* Note that even when no rate limit is applied we need to yield
208*c57b6656SKevin Wolf              * with no pending I/O here so that bdrv_drain_all() returns.
209893f7ebaSPaolo Bonzini              */
210893f7ebaSPaolo Bonzini             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
211893f7ebaSPaolo Bonzini             if (block_job_is_cancelled(&s->common)) {
212893f7ebaSPaolo Bonzini                 break;
213893f7ebaSPaolo Bonzini             }
214893f7ebaSPaolo Bonzini         } else if (!should_complete) {
215893f7ebaSPaolo Bonzini             delay_ns = (cnt == 0 ? SLICE_TIME : 0);
216893f7ebaSPaolo Bonzini             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
217893f7ebaSPaolo Bonzini         } else if (cnt == 0) {
218893f7ebaSPaolo Bonzini             /* The two disks are in sync.  Exit and report successful
219893f7ebaSPaolo Bonzini              * completion.
220893f7ebaSPaolo Bonzini              */
221893f7ebaSPaolo Bonzini             assert(QLIST_EMPTY(&bs->tracked_requests));
222893f7ebaSPaolo Bonzini             s->common.cancelled = false;
223893f7ebaSPaolo Bonzini             break;
224893f7ebaSPaolo Bonzini         }
225893f7ebaSPaolo Bonzini     }
226893f7ebaSPaolo Bonzini 
227893f7ebaSPaolo Bonzini immediate_exit:
228893f7ebaSPaolo Bonzini     g_free(s->buf);
229893f7ebaSPaolo Bonzini     bdrv_set_dirty_tracking(bs, false);
230b952b558SPaolo Bonzini     bdrv_iostatus_disable(s->target);
231d63ffd87SPaolo Bonzini     if (s->should_complete && ret == 0) {
232d63ffd87SPaolo Bonzini         if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
233d63ffd87SPaolo Bonzini             bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
234d63ffd87SPaolo Bonzini         }
235d63ffd87SPaolo Bonzini         bdrv_swap(s->target, s->common.bs);
236d63ffd87SPaolo Bonzini     }
237893f7ebaSPaolo Bonzini     bdrv_close(s->target);
238893f7ebaSPaolo Bonzini     bdrv_delete(s->target);
239893f7ebaSPaolo Bonzini     block_job_completed(&s->common, ret);
240893f7ebaSPaolo Bonzini }
241893f7ebaSPaolo Bonzini 
242893f7ebaSPaolo Bonzini static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
243893f7ebaSPaolo Bonzini {
244893f7ebaSPaolo Bonzini     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
245893f7ebaSPaolo Bonzini 
246893f7ebaSPaolo Bonzini     if (speed < 0) {
247893f7ebaSPaolo Bonzini         error_set(errp, QERR_INVALID_PARAMETER, "speed");
248893f7ebaSPaolo Bonzini         return;
249893f7ebaSPaolo Bonzini     }
250893f7ebaSPaolo Bonzini     ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
251893f7ebaSPaolo Bonzini }
252893f7ebaSPaolo Bonzini 
253b952b558SPaolo Bonzini static void mirror_iostatus_reset(BlockJob *job)
254b952b558SPaolo Bonzini {
255b952b558SPaolo Bonzini     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
256b952b558SPaolo Bonzini 
257b952b558SPaolo Bonzini     bdrv_iostatus_reset(s->target);
258b952b558SPaolo Bonzini }
259b952b558SPaolo Bonzini 
260d63ffd87SPaolo Bonzini static void mirror_complete(BlockJob *job, Error **errp)
261d63ffd87SPaolo Bonzini {
262d63ffd87SPaolo Bonzini     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
263d63ffd87SPaolo Bonzini     int ret;
264d63ffd87SPaolo Bonzini 
265d63ffd87SPaolo Bonzini     ret = bdrv_open_backing_file(s->target);
266d63ffd87SPaolo Bonzini     if (ret < 0) {
267d63ffd87SPaolo Bonzini         char backing_filename[PATH_MAX];
268d63ffd87SPaolo Bonzini         bdrv_get_full_backing_filename(s->target, backing_filename,
269d63ffd87SPaolo Bonzini                                        sizeof(backing_filename));
270d63ffd87SPaolo Bonzini         error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
271d63ffd87SPaolo Bonzini         return;
272d63ffd87SPaolo Bonzini     }
273d63ffd87SPaolo Bonzini     if (!s->synced) {
274d63ffd87SPaolo Bonzini         error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
275d63ffd87SPaolo Bonzini         return;
276d63ffd87SPaolo Bonzini     }
277d63ffd87SPaolo Bonzini 
278d63ffd87SPaolo Bonzini     s->should_complete = true;
279d63ffd87SPaolo Bonzini     block_job_resume(job);
280d63ffd87SPaolo Bonzini }
281d63ffd87SPaolo Bonzini 
282893f7ebaSPaolo Bonzini static BlockJobType mirror_job_type = {
283893f7ebaSPaolo Bonzini     .instance_size = sizeof(MirrorBlockJob),
284893f7ebaSPaolo Bonzini     .job_type      = "mirror",
285893f7ebaSPaolo Bonzini     .set_speed     = mirror_set_speed,
286b952b558SPaolo Bonzini     .iostatus_reset= mirror_iostatus_reset,
287d63ffd87SPaolo Bonzini     .complete      = mirror_complete,
288893f7ebaSPaolo Bonzini };
289893f7ebaSPaolo Bonzini 
290893f7ebaSPaolo Bonzini void mirror_start(BlockDriverState *bs, BlockDriverState *target,
291893f7ebaSPaolo Bonzini                   int64_t speed, MirrorSyncMode mode,
292b952b558SPaolo Bonzini                   BlockdevOnError on_source_error,
293b952b558SPaolo Bonzini                   BlockdevOnError on_target_error,
294893f7ebaSPaolo Bonzini                   BlockDriverCompletionFunc *cb,
295893f7ebaSPaolo Bonzini                   void *opaque, Error **errp)
296893f7ebaSPaolo Bonzini {
297893f7ebaSPaolo Bonzini     MirrorBlockJob *s;
298893f7ebaSPaolo Bonzini 
299b952b558SPaolo Bonzini     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
300b952b558SPaolo Bonzini          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
301b952b558SPaolo Bonzini         !bdrv_iostatus_is_enabled(bs)) {
302b952b558SPaolo Bonzini         error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
303b952b558SPaolo Bonzini         return;
304b952b558SPaolo Bonzini     }
305b952b558SPaolo Bonzini 
306893f7ebaSPaolo Bonzini     s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
307893f7ebaSPaolo Bonzini     if (!s) {
308893f7ebaSPaolo Bonzini         return;
309893f7ebaSPaolo Bonzini     }
310893f7ebaSPaolo Bonzini 
311b952b558SPaolo Bonzini     s->on_source_error = on_source_error;
312b952b558SPaolo Bonzini     s->on_target_error = on_target_error;
313893f7ebaSPaolo Bonzini     s->target = target;
314893f7ebaSPaolo Bonzini     s->mode = mode;
315893f7ebaSPaolo Bonzini     bdrv_set_dirty_tracking(bs, true);
316893f7ebaSPaolo Bonzini     bdrv_set_enable_write_cache(s->target, true);
317b952b558SPaolo Bonzini     bdrv_set_on_error(s->target, on_target_error, on_target_error);
318b952b558SPaolo Bonzini     bdrv_iostatus_enable(s->target);
319893f7ebaSPaolo Bonzini     s->common.co = qemu_coroutine_create(mirror_run);
320893f7ebaSPaolo Bonzini     trace_mirror_start(bs, s, s->common.co, opaque);
321893f7ebaSPaolo Bonzini     qemu_coroutine_enter(s->common.co, s);
322893f7ebaSPaolo Bonzini }
323