1893f7ebaSPaolo Bonzini /*
2893f7ebaSPaolo Bonzini * Image mirroring
3893f7ebaSPaolo Bonzini *
4893f7ebaSPaolo Bonzini * Copyright Red Hat, Inc. 2012
5893f7ebaSPaolo Bonzini *
6893f7ebaSPaolo Bonzini * Authors:
7893f7ebaSPaolo Bonzini * Paolo Bonzini <pbonzini@redhat.com>
8893f7ebaSPaolo Bonzini *
9893f7ebaSPaolo Bonzini * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10893f7ebaSPaolo Bonzini * See the COPYING.LIB file in the top-level directory.
11893f7ebaSPaolo Bonzini *
12893f7ebaSPaolo Bonzini */
13893f7ebaSPaolo Bonzini
1480c71a24SPeter Maydell #include "qemu/osdep.h"
15fd4a6493SKevin Wolf #include "qemu/cutils.h"
1612aa4082SMax Reitz #include "qemu/coroutine.h"
171181e19aSMax Reitz #include "qemu/range.h"
18893f7ebaSPaolo Bonzini #include "trace.h"
19c87621eaSJohn Snow #include "block/blockjob_int.h"
20737e150eSPaolo Bonzini #include "block/block_int.h"
21e2c1c34fSMarkus Armbruster #include "block/dirty-bitmap.h"
2232cad1ffSPhilippe Mathieu-Daudé #include "system/block-backend.h"
23da34e65cSMarkus Armbruster #include "qapi/error.h"
24893f7ebaSPaolo Bonzini #include "qemu/ratelimit.h"
25b812f671SPaolo Bonzini #include "qemu/bitmap.h"
265df022cfSPeter Maydell #include "qemu/memalign.h"
27893f7ebaSPaolo Bonzini
28402a4741SPaolo Bonzini #define MAX_IN_FLIGHT 16
29b436982fSEric Blake #define MAX_IO_BYTES (1 << 20) /* 1 Mb */
30b436982fSEric Blake #define DEFAULT_MIRROR_BUF_SIZE (MAX_IN_FLIGHT * MAX_IO_BYTES)
31402a4741SPaolo Bonzini
32402a4741SPaolo Bonzini /* The mirroring buffer is a list of granularity-sized chunks.
33402a4741SPaolo Bonzini * Free chunks are organized in a list.
34402a4741SPaolo Bonzini */
35402a4741SPaolo Bonzini typedef struct MirrorBuffer {
36402a4741SPaolo Bonzini QSIMPLEQ_ENTRY(MirrorBuffer) next;
37402a4741SPaolo Bonzini } MirrorBuffer;
38893f7ebaSPaolo Bonzini
3912aa4082SMax Reitz typedef struct MirrorOp MirrorOp;
4012aa4082SMax Reitz
41893f7ebaSPaolo Bonzini typedef struct MirrorBlockJob {
42893f7ebaSPaolo Bonzini BlockJob common;
43e253f4b8SKevin Wolf BlockBackend *target;
444ef85a9cSKevin Wolf BlockDriverState *mirror_top_bs;
455bc361b8SFam Zheng BlockDriverState *base;
463f072a7fSMax Reitz BlockDriverState *base_overlay;
474ef85a9cSKevin Wolf
4809158f00SBenoît Canet /* The name of the graph node to replace */
4909158f00SBenoît Canet char *replaces;
5009158f00SBenoît Canet /* The BDS to replace */
5109158f00SBenoît Canet BlockDriverState *to_replace;
5209158f00SBenoît Canet /* Used to block operations on the drive-mirror-replace target */
5309158f00SBenoît Canet Error *replace_blocker;
549474d97bSEric Blake MirrorSyncMode sync_mode;
55274fcceeSMax Reitz BlockMirrorBackingMode backing_mode;
56d17a34bfSEric Blake /* Whether the target should be assumed to be already zero initialized */
57d17a34bfSEric Blake bool target_is_zero;
582d400d15SFiona Ebner /*
592d400d15SFiona Ebner * To be accesssed with atomics. Written only under the BQL (required by the
602d400d15SFiona Ebner * current implementation of mirror_change()).
612d400d15SFiona Ebner */
62d06107adSMax Reitz MirrorCopyMode copy_mode;
63b952b558SPaolo Bonzini BlockdevOnError on_source_error, on_target_error;
6476cb2f24SFiona Ebner /*
6576cb2f24SFiona Ebner * To be accessed with atomics.
6676cb2f24SFiona Ebner *
6776cb2f24SFiona Ebner * Set when the target is synced (dirty bitmap is clean, nothing in flight)
6876cb2f24SFiona Ebner * and the job is running in active mode.
6976cb2f24SFiona Ebner */
70d06107adSMax Reitz bool actively_synced;
71d63ffd87SPaolo Bonzini bool should_complete;
72eee13dfeSPaolo Bonzini int64_t granularity;
73b812f671SPaolo Bonzini size_t buf_size;
74b21c7652SMax Reitz int64_t bdev_length;
75b812f671SPaolo Bonzini unsigned long *cow_bitmap;
767e277545SEric Blake unsigned long *zero_bitmap;
77e4654d2dSFam Zheng BdrvDirtyBitmap *dirty_bitmap;
78dc162c8eSFam Zheng BdrvDirtyBitmapIter *dbi;
79893f7ebaSPaolo Bonzini uint8_t *buf;
80402a4741SPaolo Bonzini QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
81402a4741SPaolo Bonzini int buf_free_count;
82bd48bde8SPaolo Bonzini
8349efb1f5SDenis V. Lunev uint64_t last_pause_ns;
84402a4741SPaolo Bonzini unsigned long *in_flight_bitmap;
851b8f7776SDenis V. Lunev unsigned in_flight;
86b436982fSEric Blake int64_t bytes_in_flight;
87b58deb34SPaolo Bonzini QTAILQ_HEAD(, MirrorOp) ops_in_flight;
88bd48bde8SPaolo Bonzini int ret;
890fc9f8eaSFam Zheng bool unmap;
90b436982fSEric Blake int target_cluster_size;
91e5b43573SFam Zheng int max_iov;
9290ab48ebSAnton Nefedov bool initial_zeroing_ongoing;
93d06107adSMax Reitz int in_active_write_counter;
94d69a879bSHanna Reitz int64_t active_write_bytes_in_flight;
95737efc1eSJohn Snow bool prepared;
965e771752SSergio Lopez bool in_drain;
977d99ae59SAlexander Ivanov bool base_ro;
98893f7ebaSPaolo Bonzini } MirrorBlockJob;
99893f7ebaSPaolo Bonzini
100429076e8SMax Reitz typedef struct MirrorBDSOpaque {
101429076e8SMax Reitz MirrorBlockJob *job;
102f94dc3b4SMax Reitz bool stop;
10353431b90SMax Reitz bool is_commit;
104429076e8SMax Reitz } MirrorBDSOpaque;
105429076e8SMax Reitz
10612aa4082SMax Reitz struct MirrorOp {
107bd48bde8SPaolo Bonzini MirrorBlockJob *s;
108bd48bde8SPaolo Bonzini QEMUIOVector qiov;
109b436982fSEric Blake int64_t offset;
110b436982fSEric Blake uint64_t bytes;
1112e1990b2SMax Reitz
1127e277545SEric Blake /*
1137e277545SEric Blake * These pointers are set by mirror_co_read(), mirror_co_zero(), and
1147e277545SEric Blake * mirror_co_discard() before yielding for the first time
1157e277545SEric Blake */
1162e1990b2SMax Reitz int64_t *bytes_handled;
1177e277545SEric Blake bool *io_skipped;
11812aa4082SMax Reitz
1191181e19aSMax Reitz bool is_pseudo_op;
120d06107adSMax Reitz bool is_active_write;
121ce8cabbdSKevin Wolf bool is_in_flight;
12212aa4082SMax Reitz CoQueue waiting_requests;
123eed325b9SKevin Wolf Coroutine *co;
124d44dae1aSVladimir Sementsov-Ogievskiy MirrorOp *waiting_for_op;
12512aa4082SMax Reitz
12612aa4082SMax Reitz QTAILQ_ENTRY(MirrorOp) next;
12712aa4082SMax Reitz };
128bd48bde8SPaolo Bonzini
1294295c5fcSMax Reitz typedef enum MirrorMethod {
1304295c5fcSMax Reitz MIRROR_METHOD_COPY,
1314295c5fcSMax Reitz MIRROR_METHOD_ZERO,
1324295c5fcSMax Reitz MIRROR_METHOD_DISCARD,
1334295c5fcSMax Reitz } MirrorMethod;
1344295c5fcSMax Reitz
mirror_error_action(MirrorBlockJob * s,bool read,int error)135b952b558SPaolo Bonzini static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
136b952b558SPaolo Bonzini int error)
137b952b558SPaolo Bonzini {
13876cb2f24SFiona Ebner qatomic_set(&s->actively_synced, false);
139b952b558SPaolo Bonzini if (read) {
14081e254dcSKevin Wolf return block_job_error_action(&s->common, s->on_source_error,
14181e254dcSKevin Wolf true, error);
142b952b558SPaolo Bonzini } else {
14381e254dcSKevin Wolf return block_job_error_action(&s->common, s->on_target_error,
14481e254dcSKevin Wolf false, error);
145b952b558SPaolo Bonzini }
146b952b558SPaolo Bonzini }
147b952b558SPaolo Bonzini
mirror_wait_on_conflicts(MirrorOp * self,MirrorBlockJob * s,uint64_t offset,uint64_t bytes)1481181e19aSMax Reitz static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
1491181e19aSMax Reitz MirrorBlockJob *s,
1501181e19aSMax Reitz uint64_t offset,
1511181e19aSMax Reitz uint64_t bytes)
1521181e19aSMax Reitz {
1531181e19aSMax Reitz uint64_t self_start_chunk = offset / s->granularity;
1541181e19aSMax Reitz uint64_t self_end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity);
1551181e19aSMax Reitz uint64_t self_nb_chunks = self_end_chunk - self_start_chunk;
1561181e19aSMax Reitz
1571181e19aSMax Reitz while (find_next_bit(s->in_flight_bitmap, self_end_chunk,
1581181e19aSMax Reitz self_start_chunk) < self_end_chunk &&
1591181e19aSMax Reitz s->ret >= 0)
1601181e19aSMax Reitz {
1611181e19aSMax Reitz MirrorOp *op;
1621181e19aSMax Reitz
1631181e19aSMax Reitz QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
1641181e19aSMax Reitz uint64_t op_start_chunk = op->offset / s->granularity;
1651181e19aSMax Reitz uint64_t op_nb_chunks = DIV_ROUND_UP(op->offset + op->bytes,
1661181e19aSMax Reitz s->granularity) -
1671181e19aSMax Reitz op_start_chunk;
1681181e19aSMax Reitz
1691181e19aSMax Reitz if (op == self) {
1701181e19aSMax Reitz continue;
1711181e19aSMax Reitz }
1721181e19aSMax Reitz
1731181e19aSMax Reitz if (ranges_overlap(self_start_chunk, self_nb_chunks,
1741181e19aSMax Reitz op_start_chunk, op_nb_chunks))
1751181e19aSMax Reitz {
17666fed30cSStefano Garzarella if (self) {
177d44dae1aSVladimir Sementsov-Ogievskiy /*
17866fed30cSStefano Garzarella * If the operation is already (indirectly) waiting for us,
17966fed30cSStefano Garzarella * or will wait for us as soon as it wakes up, then just go
18066fed30cSStefano Garzarella * on (instead of producing a deadlock in the former case).
181d44dae1aSVladimir Sementsov-Ogievskiy */
182d44dae1aSVladimir Sementsov-Ogievskiy if (op->waiting_for_op) {
183d44dae1aSVladimir Sementsov-Ogievskiy continue;
184d44dae1aSVladimir Sementsov-Ogievskiy }
185d44dae1aSVladimir Sementsov-Ogievskiy
186d44dae1aSVladimir Sementsov-Ogievskiy self->waiting_for_op = op;
18766fed30cSStefano Garzarella }
18866fed30cSStefano Garzarella
1891181e19aSMax Reitz qemu_co_queue_wait(&op->waiting_requests, NULL);
19066fed30cSStefano Garzarella
19166fed30cSStefano Garzarella if (self) {
192d44dae1aSVladimir Sementsov-Ogievskiy self->waiting_for_op = NULL;
19366fed30cSStefano Garzarella }
19466fed30cSStefano Garzarella
1951181e19aSMax Reitz break;
1961181e19aSMax Reitz }
1971181e19aSMax Reitz }
1981181e19aSMax Reitz }
1991181e19aSMax Reitz }
2001181e19aSMax Reitz
mirror_iteration_done(MirrorOp * op,int ret)2012e1990b2SMax Reitz static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret)
202bd48bde8SPaolo Bonzini {
203bd48bde8SPaolo Bonzini MirrorBlockJob *s = op->s;
204402a4741SPaolo Bonzini struct iovec *iov;
205bd48bde8SPaolo Bonzini int64_t chunk_num;
206b436982fSEric Blake int i, nb_chunks;
207bd48bde8SPaolo Bonzini
208b436982fSEric Blake trace_mirror_iteration_done(s, op->offset, op->bytes, ret);
209bd48bde8SPaolo Bonzini
210bd48bde8SPaolo Bonzini s->in_flight--;
211b436982fSEric Blake s->bytes_in_flight -= op->bytes;
212402a4741SPaolo Bonzini iov = op->qiov.iov;
213402a4741SPaolo Bonzini for (i = 0; i < op->qiov.niov; i++) {
214402a4741SPaolo Bonzini MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
215402a4741SPaolo Bonzini QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next);
216402a4741SPaolo Bonzini s->buf_free_count++;
217402a4741SPaolo Bonzini }
218402a4741SPaolo Bonzini
219b436982fSEric Blake chunk_num = op->offset / s->granularity;
220b436982fSEric Blake nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
22112aa4082SMax Reitz
222402a4741SPaolo Bonzini bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
22312aa4082SMax Reitz QTAILQ_REMOVE(&s->ops_in_flight, op, next);
224b21c7652SMax Reitz if (ret >= 0) {
225b21c7652SMax Reitz if (s->cow_bitmap) {
226bd48bde8SPaolo Bonzini bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
227bd48bde8SPaolo Bonzini }
22890ab48ebSAnton Nefedov if (!s->initial_zeroing_ongoing) {
22930a5c887SKevin Wolf job_progress_update(&s->common.job, op->bytes);
230b21c7652SMax Reitz }
23190ab48ebSAnton Nefedov }
2326df3bf8eSZhang Min qemu_iovec_destroy(&op->qiov);
2337b770c72SStefan Hajnoczi
23412aa4082SMax Reitz qemu_co_queue_restart_all(&op->waiting_requests);
23512aa4082SMax Reitz g_free(op);
2367b770c72SStefan Hajnoczi }
237bd48bde8SPaolo Bonzini
mirror_write_complete(MirrorOp * op,int ret)2382e1990b2SMax Reitz static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret)
239bd48bde8SPaolo Bonzini {
240bd48bde8SPaolo Bonzini MirrorBlockJob *s = op->s;
241b9e413ddSPaolo Bonzini
242bd48bde8SPaolo Bonzini if (ret < 0) {
243bd48bde8SPaolo Bonzini BlockErrorAction action;
244bd48bde8SPaolo Bonzini
245e0d7f73eSEric Blake bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes);
246bd48bde8SPaolo Bonzini action = mirror_error_action(s, false, -ret);
247a589569fSWenchao Xia if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
248bd48bde8SPaolo Bonzini s->ret = ret;
249bd48bde8SPaolo Bonzini }
250bd48bde8SPaolo Bonzini }
251d12ade57SVladimir Sementsov-Ogievskiy
252bd48bde8SPaolo Bonzini mirror_iteration_done(op, ret);
253bd48bde8SPaolo Bonzini }
254bd48bde8SPaolo Bonzini
mirror_read_complete(MirrorOp * op,int ret)2552e1990b2SMax Reitz static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret)
256bd48bde8SPaolo Bonzini {
257bd48bde8SPaolo Bonzini MirrorBlockJob *s = op->s;
258b9e413ddSPaolo Bonzini
259bd48bde8SPaolo Bonzini if (ret < 0) {
260bd48bde8SPaolo Bonzini BlockErrorAction action;
261bd48bde8SPaolo Bonzini
262e0d7f73eSEric Blake bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes);
263bd48bde8SPaolo Bonzini action = mirror_error_action(s, true, -ret);
264a589569fSWenchao Xia if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
265bd48bde8SPaolo Bonzini s->ret = ret;
266bd48bde8SPaolo Bonzini }
267bd48bde8SPaolo Bonzini
268bd48bde8SPaolo Bonzini mirror_iteration_done(op, ret);
269d12ade57SVladimir Sementsov-Ogievskiy return;
270bd48bde8SPaolo Bonzini }
271d12ade57SVladimir Sementsov-Ogievskiy
272d12ade57SVladimir Sementsov-Ogievskiy ret = blk_co_pwritev(s->target, op->offset, op->qiov.size, &op->qiov, 0);
273d12ade57SVladimir Sementsov-Ogievskiy mirror_write_complete(op, ret);
274b9e413ddSPaolo Bonzini }
275bd48bde8SPaolo Bonzini
276782d97efSEric Blake /* Clip bytes relative to offset to not exceed end-of-file */
mirror_clip_bytes(MirrorBlockJob * s,int64_t offset,int64_t bytes)277782d97efSEric Blake static inline int64_t mirror_clip_bytes(MirrorBlockJob *s,
278782d97efSEric Blake int64_t offset,
279782d97efSEric Blake int64_t bytes)
280782d97efSEric Blake {
281782d97efSEric Blake return MIN(bytes, s->bdev_length - offset);
282782d97efSEric Blake }
283782d97efSEric Blake
284782d97efSEric Blake /* Round offset and/or bytes to target cluster if COW is needed, and
285782d97efSEric Blake * return the offset of the adjusted tail against original. */
mirror_cow_align(MirrorBlockJob * s,int64_t * offset,uint64_t * bytes)28617ac39c3SPaolo Bonzini static int coroutine_fn mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
287ae4cc877SEric Blake uint64_t *bytes)
288893f7ebaSPaolo Bonzini {
289e5b43573SFam Zheng bool need_cow;
290e5b43573SFam Zheng int ret = 0;
291782d97efSEric Blake int64_t align_offset = *offset;
2927cfd5275SEric Blake int64_t align_bytes = *bytes;
293782d97efSEric Blake int max_bytes = s->granularity * s->max_iov;
294893f7ebaSPaolo Bonzini
295782d97efSEric Blake need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
296782d97efSEric Blake need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
297e5b43573SFam Zheng s->cow_bitmap);
298e5b43573SFam Zheng if (need_cow) {
299fc6b211fSAndrey Drobyshev bdrv_round_to_subclusters(blk_bs(s->target), *offset, *bytes,
300782d97efSEric Blake &align_offset, &align_bytes);
3018f0720ecSPaolo Bonzini }
3028f0720ecSPaolo Bonzini
303782d97efSEric Blake if (align_bytes > max_bytes) {
304782d97efSEric Blake align_bytes = max_bytes;
305e5b43573SFam Zheng if (need_cow) {
306782d97efSEric Blake align_bytes = QEMU_ALIGN_DOWN(align_bytes, s->target_cluster_size);
307e5b43573SFam Zheng }
308e5b43573SFam Zheng }
309782d97efSEric Blake /* Clipping may result in align_bytes unaligned to chunk boundary, but
3104150ae60SFam Zheng * that doesn't matter because it's already the end of source image. */
311782d97efSEric Blake align_bytes = mirror_clip_bytes(s, align_offset, align_bytes);
312402a4741SPaolo Bonzini
313782d97efSEric Blake ret = align_offset + align_bytes - (*offset + *bytes);
314782d97efSEric Blake *offset = align_offset;
315782d97efSEric Blake *bytes = align_bytes;
316e5b43573SFam Zheng assert(ret >= 0);
317e5b43573SFam Zheng return ret;
318e5b43573SFam Zheng }
319e5b43573SFam Zheng
320537c3d4fSStefan Hajnoczi static inline void coroutine_fn
mirror_wait_for_free_in_flight_slot(MirrorBlockJob * s)321eb994912SHanna Reitz mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
32221cd917fSFam Zheng {
32312aa4082SMax Reitz MirrorOp *op;
32412aa4082SMax Reitz
3251181e19aSMax Reitz QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
326eb994912SHanna Reitz /*
327eb994912SHanna Reitz * Do not wait on pseudo ops, because it may in turn wait on
3281181e19aSMax Reitz * some other operation to start, which may in fact be the
3291181e19aSMax Reitz * caller of this function. Since there is only one pseudo op
3301181e19aSMax Reitz * at any given time, we will always find some real operation
331eb994912SHanna Reitz * to wait on.
332eb994912SHanna Reitz * Also, do not wait on active operations, because they do not
333eb994912SHanna Reitz * use up in-flight slots.
334eb994912SHanna Reitz */
335eb994912SHanna Reitz if (!op->is_pseudo_op && op->is_in_flight && !op->is_active_write) {
33612aa4082SMax Reitz qemu_co_queue_wait(&op->waiting_requests, NULL);
3371181e19aSMax Reitz return;
3381181e19aSMax Reitz }
3391181e19aSMax Reitz }
3401181e19aSMax Reitz abort();
34121cd917fSFam Zheng }
34221cd917fSFam Zheng
3432e1990b2SMax Reitz /* Perform a mirror copy operation.
3442e1990b2SMax Reitz *
3452e1990b2SMax Reitz * *op->bytes_handled is set to the number of bytes copied after and
3462e1990b2SMax Reitz * including offset, excluding any bytes copied prior to offset due
3472e1990b2SMax Reitz * to alignment. This will be op->bytes if no alignment is necessary,
3482e1990b2SMax Reitz * or (new_end - op->offset) if the tail is rounded up or down due to
349e5b43573SFam Zheng * alignment or buffer limit.
350402a4741SPaolo Bonzini */
mirror_co_read(void * opaque)3512e1990b2SMax Reitz static void coroutine_fn mirror_co_read(void *opaque)
352e5b43573SFam Zheng {
3532e1990b2SMax Reitz MirrorOp *op = opaque;
3542e1990b2SMax Reitz MirrorBlockJob *s = op->s;
355ae4cc877SEric Blake int nb_chunks;
3565791ba52SMarc-André Lureau int ret = -1;
357ae4cc877SEric Blake uint64_t max_bytes;
358402a4741SPaolo Bonzini
359ae4cc877SEric Blake max_bytes = s->granularity * s->max_iov;
360e5b43573SFam Zheng
361e5b43573SFam Zheng /* We can only handle as much as buf_size at a time. */
3622e1990b2SMax Reitz op->bytes = MIN(s->buf_size, MIN(max_bytes, op->bytes));
3632e1990b2SMax Reitz assert(op->bytes);
3642e1990b2SMax Reitz assert(op->bytes < BDRV_REQUEST_MAX_BYTES);
3652e1990b2SMax Reitz *op->bytes_handled = op->bytes;
366e5b43573SFam Zheng
367e5b43573SFam Zheng if (s->cow_bitmap) {
3682e1990b2SMax Reitz *op->bytes_handled += mirror_cow_align(s, &op->offset, &op->bytes);
369e5b43573SFam Zheng }
3702e1990b2SMax Reitz /* Cannot exceed BDRV_REQUEST_MAX_BYTES + INT_MAX */
3712e1990b2SMax Reitz assert(*op->bytes_handled <= UINT_MAX);
3722e1990b2SMax Reitz assert(op->bytes <= s->buf_size);
373ae4cc877SEric Blake /* The offset is granularity-aligned because:
374e5b43573SFam Zheng * 1) Caller passes in aligned values;
375e5b43573SFam Zheng * 2) mirror_cow_align is used only when target cluster is larger. */
3762e1990b2SMax Reitz assert(QEMU_IS_ALIGNED(op->offset, s->granularity));
377ae4cc877SEric Blake /* The range is sector-aligned, since bdrv_getlength() rounds up. */
3782e1990b2SMax Reitz assert(QEMU_IS_ALIGNED(op->bytes, BDRV_SECTOR_SIZE));
3792e1990b2SMax Reitz nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
380e5b43573SFam Zheng
381e5b43573SFam Zheng while (s->buf_free_count < nb_chunks) {
3822e1990b2SMax Reitz trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
3839178f4feSKevin Wolf mirror_wait_for_free_in_flight_slot(s);
384b812f671SPaolo Bonzini }
385b812f671SPaolo Bonzini
386402a4741SPaolo Bonzini /* Now make a QEMUIOVector taking enough granularity-sized chunks
387402a4741SPaolo Bonzini * from s->buf_free.
388402a4741SPaolo Bonzini */
389402a4741SPaolo Bonzini qemu_iovec_init(&op->qiov, nb_chunks);
390402a4741SPaolo Bonzini while (nb_chunks-- > 0) {
391402a4741SPaolo Bonzini MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
3922e1990b2SMax Reitz size_t remaining = op->bytes - op->qiov.size;
3935a0f6fd5SKevin Wolf
394402a4741SPaolo Bonzini QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
395402a4741SPaolo Bonzini s->buf_free_count--;
3965a0f6fd5SKevin Wolf qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
397402a4741SPaolo Bonzini }
398402a4741SPaolo Bonzini
399893f7ebaSPaolo Bonzini /* Copy the dirty cluster. */
400bd48bde8SPaolo Bonzini s->in_flight++;
4012e1990b2SMax Reitz s->bytes_in_flight += op->bytes;
402ce8cabbdSKevin Wolf op->is_in_flight = true;
4032e1990b2SMax Reitz trace_mirror_one_iteration(s, op->offset, op->bytes);
404dcfb3bebSFam Zheng
405b9b10c35SKevin Wolf WITH_GRAPH_RDLOCK_GUARD() {
406138f9fffSMax Reitz ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
407138f9fffSMax Reitz &op->qiov, 0);
408b9b10c35SKevin Wolf }
4092e1990b2SMax Reitz mirror_read_complete(op, ret);
410e5b43573SFam Zheng }
411e5b43573SFam Zheng
mirror_co_zero(void * opaque)4122e1990b2SMax Reitz static void coroutine_fn mirror_co_zero(void *opaque)
413e5b43573SFam Zheng {
4142e1990b2SMax Reitz MirrorOp *op = opaque;
4157e277545SEric Blake bool write_needed = true;
4167e277545SEric Blake int ret = 0;
417e5b43573SFam Zheng
4182e1990b2SMax Reitz op->s->in_flight++;
4192e1990b2SMax Reitz op->s->bytes_in_flight += op->bytes;
4202e1990b2SMax Reitz *op->bytes_handled = op->bytes;
421ce8cabbdSKevin Wolf op->is_in_flight = true;
422e5b43573SFam Zheng
4237e277545SEric Blake if (op->s->zero_bitmap) {
4247e277545SEric Blake unsigned long end = DIV_ROUND_UP(op->offset + op->bytes,
4257e277545SEric Blake op->s->granularity);
4267e277545SEric Blake assert(QEMU_IS_ALIGNED(op->offset, op->s->granularity));
4277e277545SEric Blake assert(QEMU_IS_ALIGNED(op->bytes, op->s->granularity) ||
4287e277545SEric Blake op->offset + op->bytes == op->s->bdev_length);
4297e277545SEric Blake if (find_next_zero_bit(op->s->zero_bitmap, end,
4307e277545SEric Blake op->offset / op->s->granularity) == end) {
4317e277545SEric Blake write_needed = false;
4327e277545SEric Blake *op->io_skipped = true;
4337e277545SEric Blake }
4347e277545SEric Blake }
4357e277545SEric Blake if (write_needed) {
4362e1990b2SMax Reitz ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes,
4372e1990b2SMax Reitz op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0);
4387e277545SEric Blake }
4397e277545SEric Blake if (ret >= 0 && op->s->zero_bitmap) {
4407e277545SEric Blake bitmap_set(op->s->zero_bitmap, op->offset / op->s->granularity,
4417e277545SEric Blake DIV_ROUND_UP(op->bytes, op->s->granularity));
4427e277545SEric Blake }
4432e1990b2SMax Reitz mirror_write_complete(op, ret);
444e5b43573SFam Zheng }
4452e1990b2SMax Reitz
mirror_co_discard(void * opaque)4462e1990b2SMax Reitz static void coroutine_fn mirror_co_discard(void *opaque)
4472e1990b2SMax Reitz {
4482e1990b2SMax Reitz MirrorOp *op = opaque;
4492e1990b2SMax Reitz int ret;
4502e1990b2SMax Reitz
4512e1990b2SMax Reitz op->s->in_flight++;
4522e1990b2SMax Reitz op->s->bytes_in_flight += op->bytes;
4532e1990b2SMax Reitz *op->bytes_handled = op->bytes;
454ce8cabbdSKevin Wolf op->is_in_flight = true;
4552e1990b2SMax Reitz
4562e1990b2SMax Reitz ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes);
4572e1990b2SMax Reitz mirror_write_complete(op, ret);
458e5b43573SFam Zheng }
459e5b43573SFam Zheng
mirror_perform(MirrorBlockJob * s,int64_t offset,unsigned bytes,MirrorMethod mirror_method,bool * io_skipped)4604295c5fcSMax Reitz static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
4617e277545SEric Blake unsigned bytes, MirrorMethod mirror_method,
4627e277545SEric Blake bool *io_skipped)
4634295c5fcSMax Reitz {
4642e1990b2SMax Reitz MirrorOp *op;
4652e1990b2SMax Reitz Coroutine *co;
4662e1990b2SMax Reitz int64_t bytes_handled = -1;
4672e1990b2SMax Reitz
4687e277545SEric Blake assert(QEMU_IS_ALIGNED(offset, s->granularity));
4697e277545SEric Blake assert(QEMU_IS_ALIGNED(bytes, s->granularity) ||
4707e277545SEric Blake offset + bytes == s->bdev_length);
4712e1990b2SMax Reitz op = g_new(MirrorOp, 1);
4722e1990b2SMax Reitz *op = (MirrorOp){
4732e1990b2SMax Reitz .s = s,
4742e1990b2SMax Reitz .offset = offset,
4752e1990b2SMax Reitz .bytes = bytes,
4762e1990b2SMax Reitz .bytes_handled = &bytes_handled,
4777e277545SEric Blake .io_skipped = io_skipped,
4782e1990b2SMax Reitz };
47912aa4082SMax Reitz qemu_co_queue_init(&op->waiting_requests);
4802e1990b2SMax Reitz
4814295c5fcSMax Reitz switch (mirror_method) {
4824295c5fcSMax Reitz case MIRROR_METHOD_COPY:
4837e277545SEric Blake if (s->zero_bitmap) {
4847e277545SEric Blake bitmap_clear(s->zero_bitmap, offset / s->granularity,
4857e277545SEric Blake DIV_ROUND_UP(bytes, s->granularity));
4867e277545SEric Blake }
4872e1990b2SMax Reitz co = qemu_coroutine_create(mirror_co_read, op);
4882e1990b2SMax Reitz break;
4894295c5fcSMax Reitz case MIRROR_METHOD_ZERO:
4907e277545SEric Blake /* s->zero_bitmap handled in mirror_co_zero */
4912e1990b2SMax Reitz co = qemu_coroutine_create(mirror_co_zero, op);
4922e1990b2SMax Reitz break;
4934295c5fcSMax Reitz case MIRROR_METHOD_DISCARD:
4947e277545SEric Blake if (s->zero_bitmap) {
4957e277545SEric Blake bitmap_clear(s->zero_bitmap, offset / s->granularity,
4967e277545SEric Blake DIV_ROUND_UP(bytes, s->granularity));
4977e277545SEric Blake }
4982e1990b2SMax Reitz co = qemu_coroutine_create(mirror_co_discard, op);
4992e1990b2SMax Reitz break;
5004295c5fcSMax Reitz default:
5014295c5fcSMax Reitz abort();
5024295c5fcSMax Reitz }
503eed325b9SKevin Wolf op->co = co;
5042e1990b2SMax Reitz
50512aa4082SMax Reitz QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
5062e1990b2SMax Reitz qemu_coroutine_enter(co);
5072e1990b2SMax Reitz /* At this point, ownership of op has been moved to the coroutine
5082e1990b2SMax Reitz * and the object may already be freed */
5092e1990b2SMax Reitz
5102e1990b2SMax Reitz /* Assert that this value has been set */
5112e1990b2SMax Reitz assert(bytes_handled >= 0);
5122e1990b2SMax Reitz
5132e1990b2SMax Reitz /* Same assertion as in mirror_co_read() (and for mirror_co_read()
5142e1990b2SMax Reitz * and mirror_co_discard(), bytes_handled == op->bytes, which
5152e1990b2SMax Reitz * is the @bytes parameter given to this function) */
5162e1990b2SMax Reitz assert(bytes_handled <= UINT_MAX);
5172e1990b2SMax Reitz return bytes_handled;
5184295c5fcSMax Reitz }
5194295c5fcSMax Reitz
mirror_iteration(MirrorBlockJob * s)520ae5a40e8SKevin Wolf static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s)
521e5b43573SFam Zheng {
522ae5a40e8SKevin Wolf BlockDriverState *source;
5231181e19aSMax Reitz MirrorOp *pseudo_op;
5241181e19aSMax Reitz int64_t offset;
525e5b43573SFam Zheng /* At least the first dirty chunk is mirrored in one iteration. */
526e5b43573SFam Zheng int nb_chunks = 1;
5274b5004d9SDenis V. Lunev bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
528b436982fSEric Blake int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
529e5b43573SFam Zheng
530ae5a40e8SKevin Wolf bdrv_graph_co_rdlock();
531ae5a40e8SKevin Wolf source = s->mirror_top_bs->backing->bs;
532ae5a40e8SKevin Wolf bdrv_graph_co_rdunlock();
533ae5a40e8SKevin Wolf
534b64bd51eSPaolo Bonzini bdrv_dirty_bitmap_lock(s->dirty_bitmap);
535f798184cSEric Blake offset = bdrv_dirty_iter_next(s->dbi);
536fb2ef791SEric Blake if (offset < 0) {
537dc162c8eSFam Zheng bdrv_set_dirty_iter(s->dbi, 0);
538f798184cSEric Blake offset = bdrv_dirty_iter_next(s->dbi);
5399a46dba7SEric Blake trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
540fb2ef791SEric Blake assert(offset >= 0);
541e5b43573SFam Zheng }
542b64bd51eSPaolo Bonzini bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
543e5b43573SFam Zheng
544d69a879bSHanna Reitz /*
545d69a879bSHanna Reitz * Wait for concurrent requests to @offset. The next loop will limit the
546d69a879bSHanna Reitz * copied area based on in_flight_bitmap so we only copy an area that does
547d69a879bSHanna Reitz * not overlap with concurrent in-flight requests. Still, we would like to
548d69a879bSHanna Reitz * copy something, so wait until there are at least no more requests to the
549d69a879bSHanna Reitz * very beginning of the area.
550d69a879bSHanna Reitz */
5511181e19aSMax Reitz mirror_wait_on_conflicts(NULL, s, offset, 1);
5529c83625bSMax Reitz
553da01ff7fSKevin Wolf job_pause_point(&s->common.job);
554565ac01fSStefan Hajnoczi
5553202d8e4SMichael Tokarev /* Find the number of consecutive dirty chunks following the first dirty
556e5b43573SFam Zheng * one, and wait for in flight requests in them. */
557b64bd51eSPaolo Bonzini bdrv_dirty_bitmap_lock(s->dirty_bitmap);
558fb2ef791SEric Blake while (nb_chunks * s->granularity < s->buf_size) {
559dc162c8eSFam Zheng int64_t next_dirty;
560fb2ef791SEric Blake int64_t next_offset = offset + nb_chunks * s->granularity;
561fb2ef791SEric Blake int64_t next_chunk = next_offset / s->granularity;
562fb2ef791SEric Blake if (next_offset >= s->bdev_length ||
56328636b82SJohn Snow !bdrv_dirty_bitmap_get_locked(s->dirty_bitmap, next_offset)) {
564e5b43573SFam Zheng break;
565e5b43573SFam Zheng }
566e5b43573SFam Zheng if (test_bit(next_chunk, s->in_flight_bitmap)) {
567e5b43573SFam Zheng break;
568e5b43573SFam Zheng }
5699c83625bSMax Reitz
570f798184cSEric Blake next_dirty = bdrv_dirty_iter_next(s->dbi);
571fb2ef791SEric Blake if (next_dirty > next_offset || next_dirty < 0) {
572f27a2742SMax Reitz /* The bitmap iterator's cache is stale, refresh it */
573715a74d8SEric Blake bdrv_set_dirty_iter(s->dbi, next_offset);
574f798184cSEric Blake next_dirty = bdrv_dirty_iter_next(s->dbi);
575f27a2742SMax Reitz }
576fb2ef791SEric Blake assert(next_dirty == next_offset);
577e5b43573SFam Zheng nb_chunks++;
578e5b43573SFam Zheng }
579e5b43573SFam Zheng
580e5b43573SFam Zheng /* Clear dirty bits before querying the block status, because
58131826642SEric Blake * calling bdrv_block_status_above could yield - if some blocks are
582e5b43573SFam Zheng * marked dirty in this window, we need to know.
583e5b43573SFam Zheng */
584e0d7f73eSEric Blake bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset,
585e0d7f73eSEric Blake nb_chunks * s->granularity);
586b64bd51eSPaolo Bonzini bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
587b64bd51eSPaolo Bonzini
5881181e19aSMax Reitz /* Before claiming an area in the in-flight bitmap, we have to
5891181e19aSMax Reitz * create a MirrorOp for it so that conflicting requests can wait
5901181e19aSMax Reitz * for it. mirror_perform() will create the real MirrorOps later,
5911181e19aSMax Reitz * for now we just create a pseudo operation that will wake up all
5921181e19aSMax Reitz * conflicting requests once all real operations have been
5931181e19aSMax Reitz * launched. */
5941181e19aSMax Reitz pseudo_op = g_new(MirrorOp, 1);
5951181e19aSMax Reitz *pseudo_op = (MirrorOp){
5961181e19aSMax Reitz .offset = offset,
5971181e19aSMax Reitz .bytes = nb_chunks * s->granularity,
5981181e19aSMax Reitz .is_pseudo_op = true,
5991181e19aSMax Reitz };
6001181e19aSMax Reitz qemu_co_queue_init(&pseudo_op->waiting_requests);
6011181e19aSMax Reitz QTAILQ_INSERT_TAIL(&s->ops_in_flight, pseudo_op, next);
6021181e19aSMax Reitz
603fb2ef791SEric Blake bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks);
604fb2ef791SEric Blake while (nb_chunks > 0 && offset < s->bdev_length) {
6055791ba52SMarc-André Lureau int ret = -1;
6067cfd5275SEric Blake int64_t io_bytes;
607f3e4ce4aSEric Blake int64_t io_bytes_acct;
6087e277545SEric Blake bool io_skipped = false;
6094295c5fcSMax Reitz MirrorMethod mirror_method = MIRROR_METHOD_COPY;
610e5b43573SFam Zheng
611fb2ef791SEric Blake assert(!(offset % s->granularity));
6127ff9579eSKevin Wolf WITH_GRAPH_RDLOCK_GUARD() {
613cc323997SPaolo Bonzini ret = bdrv_co_block_status_above(source, NULL, offset,
61431826642SEric Blake nb_chunks * s->granularity,
61531826642SEric Blake &io_bytes, NULL, NULL);
6167ff9579eSKevin Wolf }
617e5b43573SFam Zheng if (ret < 0) {
618fb2ef791SEric Blake io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
6190965a41eSVladimir Sementsov-Ogievskiy } else if (ret & BDRV_BLOCK_DATA) {
620fb2ef791SEric Blake io_bytes = MIN(io_bytes, max_io_bytes);
621e5b43573SFam Zheng }
622e5b43573SFam Zheng
623fb2ef791SEric Blake io_bytes -= io_bytes % s->granularity;
624fb2ef791SEric Blake if (io_bytes < s->granularity) {
625fb2ef791SEric Blake io_bytes = s->granularity;
626e5b43573SFam Zheng } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
627fb2ef791SEric Blake int64_t target_offset;
6287cfd5275SEric Blake int64_t target_bytes;
629a00e70c0SEmanuele Giuseppe Esposito WITH_GRAPH_RDLOCK_GUARD() {
630fc6b211fSAndrey Drobyshev bdrv_round_to_subclusters(blk_bs(s->target), offset, io_bytes,
631fb2ef791SEric Blake &target_offset, &target_bytes);
632a00e70c0SEmanuele Giuseppe Esposito }
633fb2ef791SEric Blake if (target_offset == offset &&
634fb2ef791SEric Blake target_bytes == io_bytes) {
635e5b43573SFam Zheng mirror_method = ret & BDRV_BLOCK_ZERO ?
636e5b43573SFam Zheng MIRROR_METHOD_ZERO :
637e5b43573SFam Zheng MIRROR_METHOD_DISCARD;
638e5b43573SFam Zheng }
639e5b43573SFam Zheng }
640e5b43573SFam Zheng
641cf56a3c6SDenis V. Lunev while (s->in_flight >= MAX_IN_FLIGHT) {
642fb2ef791SEric Blake trace_mirror_yield_in_flight(s, offset, s->in_flight);
6439178f4feSKevin Wolf mirror_wait_for_free_in_flight_slot(s);
644cf56a3c6SDenis V. Lunev }
645cf56a3c6SDenis V. Lunev
646dbaa7b57SVladimir Sementsov-Ogievskiy if (s->ret < 0) {
6471181e19aSMax Reitz ret = 0;
6481181e19aSMax Reitz goto fail;
649dbaa7b57SVladimir Sementsov-Ogievskiy }
650dbaa7b57SVladimir Sementsov-Ogievskiy
651fb2ef791SEric Blake io_bytes = mirror_clip_bytes(s, offset, io_bytes);
6527e277545SEric Blake io_bytes = mirror_perform(s, offset, io_bytes, mirror_method,
6537e277545SEric Blake &io_skipped);
6547e277545SEric Blake if (io_skipped ||
6557e277545SEric Blake (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok)) {
656f3e4ce4aSEric Blake io_bytes_acct = 0;
6574b5004d9SDenis V. Lunev } else {
658fb2ef791SEric Blake io_bytes_acct = io_bytes;
6594b5004d9SDenis V. Lunev }
660fb2ef791SEric Blake assert(io_bytes);
661fb2ef791SEric Blake offset += io_bytes;
662fb2ef791SEric Blake nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
663018e5987SKevin Wolf block_job_ratelimit_processed_bytes(&s->common, io_bytes_acct);
664dcfb3bebSFam Zheng }
6651181e19aSMax Reitz
6661181e19aSMax Reitz fail:
6671181e19aSMax Reitz QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next);
6681181e19aSMax Reitz qemu_co_queue_restart_all(&pseudo_op->waiting_requests);
6691181e19aSMax Reitz g_free(pseudo_op);
670893f7ebaSPaolo Bonzini }
671b952b558SPaolo Bonzini
mirror_free_init(MirrorBlockJob * s)672402a4741SPaolo Bonzini static void mirror_free_init(MirrorBlockJob *s)
673402a4741SPaolo Bonzini {
674402a4741SPaolo Bonzini int granularity = s->granularity;
675402a4741SPaolo Bonzini size_t buf_size = s->buf_size;
676402a4741SPaolo Bonzini uint8_t *buf = s->buf;
677402a4741SPaolo Bonzini
678402a4741SPaolo Bonzini assert(s->buf_free_count == 0);
679402a4741SPaolo Bonzini QSIMPLEQ_INIT(&s->buf_free);
680402a4741SPaolo Bonzini while (buf_size != 0) {
681402a4741SPaolo Bonzini MirrorBuffer *cur = (MirrorBuffer *)buf;
682402a4741SPaolo Bonzini QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next);
683402a4741SPaolo Bonzini s->buf_free_count++;
684402a4741SPaolo Bonzini buf_size -= granularity;
685402a4741SPaolo Bonzini buf += granularity;
686402a4741SPaolo Bonzini }
687402a4741SPaolo Bonzini }
688402a4741SPaolo Bonzini
689bae8196dSPaolo Bonzini /* This is also used for the .pause callback. There is no matching
690bae8196dSPaolo Bonzini * mirror_resume() because mirror_run() will begin iterating again
691bae8196dSPaolo Bonzini * when the job is resumed.
692bae8196dSPaolo Bonzini */
mirror_wait_for_all_io(MirrorBlockJob * s)693537c3d4fSStefan Hajnoczi static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s)
694bd48bde8SPaolo Bonzini {
695bd48bde8SPaolo Bonzini while (s->in_flight > 0) {
6969178f4feSKevin Wolf mirror_wait_for_free_in_flight_slot(s);
697bd48bde8SPaolo Bonzini }
698893f7ebaSPaolo Bonzini }
699893f7ebaSPaolo Bonzini
700737efc1eSJohn Snow /**
701737efc1eSJohn Snow * mirror_exit_common: handle both abort() and prepare() cases.
702737efc1eSJohn Snow * for .prepare, returns 0 on success and -errno on failure.
703737efc1eSJohn Snow * for .abort cases, denoted by abort = true, MUST return 0.
704737efc1eSJohn Snow */
mirror_exit_common(Job * job)705737efc1eSJohn Snow static int mirror_exit_common(Job *job)
7065a7e7a0bSStefan Hajnoczi {
7071908a559SKevin Wolf MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
7081908a559SKevin Wolf BlockJob *bjob = &s->common;
709f93c3addSMax Reitz MirrorBDSOpaque *bs_opaque;
710f93c3addSMax Reitz BlockDriverState *src;
711f93c3addSMax Reitz BlockDriverState *target_bs;
712f93c3addSMax Reitz BlockDriverState *mirror_top_bs;
71312fa4af6SKevin Wolf Error *local_err = NULL;
714737efc1eSJohn Snow bool abort = job->ret < 0;
715737efc1eSJohn Snow int ret = 0;
716737efc1eSJohn Snow
7172626d27fSKevin Wolf GLOBAL_STATE_CODE();
7182626d27fSKevin Wolf
719737efc1eSJohn Snow if (s->prepared) {
720737efc1eSJohn Snow return 0;
721737efc1eSJohn Snow }
722737efc1eSJohn Snow s->prepared = true;
7233f09bfbcSKevin Wolf
7249275fc72SKevin Wolf bdrv_graph_rdlock_main_loop();
7252626d27fSKevin Wolf
726f93c3addSMax Reitz mirror_top_bs = s->mirror_top_bs;
727f93c3addSMax Reitz bs_opaque = mirror_top_bs->opaque;
728f93c3addSMax Reitz src = mirror_top_bs->backing->bs;
729f93c3addSMax Reitz target_bs = blk_bs(s->target);
730f93c3addSMax Reitz
731ef53dc09SAlberto Garcia if (bdrv_chain_contains(src, target_bs)) {
732ef53dc09SAlberto Garcia bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
733ef53dc09SAlberto Garcia }
734ef53dc09SAlberto Garcia
7355deb6cbdSVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(s->dirty_bitmap);
7362119882cSPaolo Bonzini
7377b508f6bSJohn Snow /* Make sure that the source BDS doesn't go away during bdrv_replace_node,
7387b508f6bSJohn Snow * before we can call bdrv_drained_end */
7393f09bfbcSKevin Wolf bdrv_ref(src);
7404ef85a9cSKevin Wolf bdrv_ref(mirror_top_bs);
7417d9fcb39SKevin Wolf bdrv_ref(target_bs);
7427d9fcb39SKevin Wolf
7439275fc72SKevin Wolf bdrv_graph_rdunlock_main_loop();
7449275fc72SKevin Wolf
745bb0c9409SVladimir Sementsov-Ogievskiy /*
746bb0c9409SVladimir Sementsov-Ogievskiy * Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
7477d9fcb39SKevin Wolf * inserting target_bs at s->to_replace, where we might not be able to get
74863c8ef28SKevin Wolf * these permissions.
749bb0c9409SVladimir Sementsov-Ogievskiy */
7507d9fcb39SKevin Wolf blk_unref(s->target);
7517d9fcb39SKevin Wolf s->target = NULL;
7524ef85a9cSKevin Wolf
7534ef85a9cSKevin Wolf /* We don't access the source any more. Dropping any WRITE/RESIZE is
754d2da5e28SKevin Wolf * required before it could become a backing file of target_bs. Not having
755d2da5e28SKevin Wolf * these permissions any more means that we can't allow any new requests on
756d2da5e28SKevin Wolf * mirror_top_bs from now on, so keep it drained. */
757d2da5e28SKevin Wolf bdrv_drained_begin(mirror_top_bs);
758ccd6a379SKevin Wolf bdrv_drained_begin(target_bs);
759f94dc3b4SMax Reitz bs_opaque->stop = true;
7603804e3cfSKevin Wolf
7613804e3cfSKevin Wolf bdrv_graph_rdlock_main_loop();
762f94dc3b4SMax Reitz bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
7634ef85a9cSKevin Wolf &error_abort);
7643804e3cfSKevin Wolf
765737efc1eSJohn Snow if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
7669474d97bSEric Blake BlockDriverState *backing;
7673f072a7fSMax Reitz BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
7683f072a7fSMax Reitz
7699474d97bSEric Blake backing = s->sync_mode == MIRROR_SYNC_MODE_NONE ? src : s->base;
7703f072a7fSMax Reitz if (bdrv_cow_bs(unfiltered_target) != backing) {
7713f072a7fSMax Reitz bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
77212fa4af6SKevin Wolf if (local_err) {
77312fa4af6SKevin Wolf error_report_err(local_err);
77466c8672dSVladimir Sementsov-Ogievskiy local_err = NULL;
7757b508f6bSJohn Snow ret = -EPERM;
77612fa4af6SKevin Wolf }
7774ef85a9cSKevin Wolf }
778c41f5b96SMax Reitz } else if (!abort && s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
779c41f5b96SMax Reitz assert(!bdrv_backing_chain_next(target_bs));
780c41f5b96SMax Reitz ret = bdrv_open_backing_file(bdrv_skip_filters(target_bs), NULL,
781c41f5b96SMax Reitz "backing", &local_err);
782c41f5b96SMax Reitz if (ret < 0) {
783c41f5b96SMax Reitz error_report_err(local_err);
784c41f5b96SMax Reitz local_err = NULL;
785c41f5b96SMax Reitz }
7864ef85a9cSKevin Wolf }
787ad74751fSKevin Wolf bdrv_graph_rdunlock_main_loop();
7885a7e7a0bSStefan Hajnoczi
789737efc1eSJohn Snow if (s->should_complete && !abort) {
790737efc1eSJohn Snow BlockDriverState *to_replace = s->to_replace ?: src;
7911ba79388SAlberto Garcia bool ro = bdrv_is_read_only(to_replace);
79240365552SKevin Wolf
7931ba79388SAlberto Garcia if (ro != bdrv_is_read_only(target_bs)) {
7941ba79388SAlberto Garcia bdrv_reopen_set_read_only(target_bs, ro, NULL);
7955a7e7a0bSStefan Hajnoczi }
796b8804815SKevin Wolf
797b8804815SKevin Wolf /* The mirror job has no requests in flight any more, but we need to
798b8804815SKevin Wolf * drain potential other users of the BDS before changing the graph. */
7995e771752SSergio Lopez assert(s->in_drain);
800ccd6a379SKevin Wolf bdrv_drained_begin(to_replace);
8016e9cc051SMax Reitz /*
8026e9cc051SMax Reitz * Cannot use check_to_replace_node() here, because that would
8036e9cc051SMax Reitz * check for an op blocker on @to_replace, and we have our own
8046e9cc051SMax Reitz * there.
8056e9cc051SMax Reitz */
8066bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
8076e9cc051SMax Reitz if (bdrv_recurse_can_replace(src, to_replace)) {
8085fe31c25SKevin Wolf bdrv_replace_node(to_replace, target_bs, &local_err);
8096e9cc051SMax Reitz } else {
8106e9cc051SMax Reitz error_setg(&local_err, "Can no longer replace '%s' by '%s', "
8116e9cc051SMax Reitz "because it can no longer be guaranteed that doing so "
8126e9cc051SMax Reitz "would not lead to an abrupt change of visible data",
8136e9cc051SMax Reitz to_replace->node_name, target_bs->node_name);
8146e9cc051SMax Reitz }
8156bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
816ccd6a379SKevin Wolf bdrv_drained_end(to_replace);
8175fe31c25SKevin Wolf if (local_err) {
8185fe31c25SKevin Wolf error_report_err(local_err);
8197b508f6bSJohn Snow ret = -EPERM;
8205fe31c25SKevin Wolf }
8215a7e7a0bSStefan Hajnoczi }
8225a7e7a0bSStefan Hajnoczi if (s->to_replace) {
8235a7e7a0bSStefan Hajnoczi bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
8245a7e7a0bSStefan Hajnoczi error_free(s->replace_blocker);
8255a7e7a0bSStefan Hajnoczi bdrv_unref(s->to_replace);
8265a7e7a0bSStefan Hajnoczi }
8275a7e7a0bSStefan Hajnoczi g_free(s->replaces);
8284ef85a9cSKevin Wolf
829f94dc3b4SMax Reitz /*
830f94dc3b4SMax Reitz * Remove the mirror filter driver from the graph. Before this, get rid of
8314ef85a9cSKevin Wolf * the blockers on the intermediate nodes so that the resulting state is
832f94dc3b4SMax Reitz * valid.
833f94dc3b4SMax Reitz */
8341908a559SKevin Wolf block_job_remove_all_bdrv(bjob);
8356bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
8363f072a7fSMax Reitz bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
8376bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
838ccd6a379SKevin Wolf
8397d99ae59SAlexander Ivanov if (abort && s->base_ro && !bdrv_is_read_only(target_bs)) {
8407d99ae59SAlexander Ivanov bdrv_reopen_set_read_only(target_bs, true, NULL);
8417d99ae59SAlexander Ivanov }
8427d99ae59SAlexander Ivanov
843ccd6a379SKevin Wolf bdrv_drained_end(target_bs);
844ccd6a379SKevin Wolf bdrv_unref(target_bs);
8454ef85a9cSKevin Wolf
846429076e8SMax Reitz bs_opaque->job = NULL;
8474ef85a9cSKevin Wolf
848176c3699SFam Zheng bdrv_drained_end(src);
849d2da5e28SKevin Wolf bdrv_drained_end(mirror_top_bs);
8505e771752SSergio Lopez s->in_drain = false;
8514ef85a9cSKevin Wolf bdrv_unref(mirror_top_bs);
8523f09bfbcSKevin Wolf bdrv_unref(src);
8537b508f6bSJohn Snow
854737efc1eSJohn Snow return ret;
855737efc1eSJohn Snow }
856737efc1eSJohn Snow
mirror_prepare(Job * job)857737efc1eSJohn Snow static int mirror_prepare(Job *job)
858737efc1eSJohn Snow {
859737efc1eSJohn Snow return mirror_exit_common(job);
860737efc1eSJohn Snow }
861737efc1eSJohn Snow
mirror_abort(Job * job)862737efc1eSJohn Snow static void mirror_abort(Job *job)
863737efc1eSJohn Snow {
864737efc1eSJohn Snow int ret = mirror_exit_common(job);
865737efc1eSJohn Snow assert(ret == 0);
8665a7e7a0bSStefan Hajnoczi }
8675a7e7a0bSStefan Hajnoczi
mirror_throttle(MirrorBlockJob * s)868537c3d4fSStefan Hajnoczi static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
86949efb1f5SDenis V. Lunev {
87049efb1f5SDenis V. Lunev int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
87149efb1f5SDenis V. Lunev
87218bb6928SKevin Wolf if (now - s->last_pause_ns > BLOCK_JOB_SLICE_TIME) {
87349efb1f5SDenis V. Lunev s->last_pause_ns = now;
8745d43e86eSKevin Wolf job_sleep_ns(&s->common.job, 0);
87549efb1f5SDenis V. Lunev } else {
876da01ff7fSKevin Wolf job_pause_point(&s->common.job);
87749efb1f5SDenis V. Lunev }
87849efb1f5SDenis V. Lunev }
87949efb1f5SDenis V. Lunev
mirror_dirty_init(MirrorBlockJob * s)880004915a9SKevin Wolf static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s)
881c0b363adSDenis V. Lunev {
88223ca459aSEric Blake int64_t offset;
883004915a9SKevin Wolf BlockDriverState *bs;
884c0b363adSDenis V. Lunev BlockDriverState *target_bs = blk_bs(s->target);
885870f8963SEric Blake int ret = -EIO;
88651b0a488SEric Blake int64_t count;
887d17a34bfSEric Blake bool punch_holes =
888d17a34bfSEric Blake target_bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
889d17a34bfSEric Blake bdrv_can_write_zeroes_with_unmap(target_bs);
8907e277545SEric Blake int64_t bitmap_length = DIV_ROUND_UP(s->bdev_length, s->granularity);
891c0b363adSDenis V. Lunev
892181a6366SEric Blake /* Determine if the image is already zero, regardless of sync mode. */
8937e277545SEric Blake s->zero_bitmap = bitmap_new(bitmap_length);
894004915a9SKevin Wolf bdrv_graph_co_rdlock();
895004915a9SKevin Wolf bs = s->mirror_top_bs->backing->bs;
896181a6366SEric Blake if (s->target_is_zero) {
897181a6366SEric Blake ret = 1;
898181a6366SEric Blake } else {
899181a6366SEric Blake ret = bdrv_co_is_all_zeroes(target_bs);
900181a6366SEric Blake }
901004915a9SKevin Wolf bdrv_graph_co_rdunlock();
902004915a9SKevin Wolf
903181a6366SEric Blake /* Determine if a pre-zeroing pass is necessary. */
904181a6366SEric Blake if (ret < 0) {
905181a6366SEric Blake return ret;
906181a6366SEric Blake } else if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
9077e277545SEric Blake /*
9087e277545SEric Blake * In TOP mode, there is no benefit to a pre-zeroing pass, but
9097e277545SEric Blake * the zero bitmap can be set if the destination already reads
9107e277545SEric Blake * as zero and we are not punching holes.
9117e277545SEric Blake */
9127e277545SEric Blake if (ret > 0 && !punch_holes) {
9137e277545SEric Blake bitmap_set(s->zero_bitmap, 0, bitmap_length);
9147e277545SEric Blake }
915181a6366SEric Blake } else if (ret == 0 || punch_holes) {
916d17a34bfSEric Blake /*
917d17a34bfSEric Blake * Here, we are in FULL mode; our goal is to avoid writing
918d17a34bfSEric Blake * zeroes if the destination already reads as zero, except
919d17a34bfSEric Blake * when we are trying to punch holes. This is possible if
920181a6366SEric Blake * zeroing happened externally (ret > 0) or if we have a fast
921181a6366SEric Blake * way to pre-zero the image (the dirty bitmap will be
922181a6366SEric Blake * populated later by the non-zero portions, the same as for
923aff46b4bSEric Blake * TOP mode). If pre-zeroing is not fast, or we need to visit
924aff46b4bSEric Blake * the entire image in order to punch holes even in the
925aff46b4bSEric Blake * non-allocated regions of the source, then just mark the
926aff46b4bSEric Blake * entire image dirty and leave the zero bitmap clear at this
927aff46b4bSEric Blake * point in time. Otherwise, it can be faster to pre-zero the
928aff46b4bSEric Blake * image now, even if we re-write the allocated portions of
929aff46b4bSEric Blake * the disk later, and the pre-zero pass will populate the
930aff46b4bSEric Blake * zero bitmap.
931d17a34bfSEric Blake */
932aff46b4bSEric Blake if (!bdrv_can_write_zeroes_with_unmap(target_bs) || punch_holes) {
933e0d7f73eSEric Blake bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length);
934b7d5062cSDenis V. Lunev return 0;
935b7d5062cSDenis V. Lunev }
936b7d5062cSDenis V. Lunev
93790ab48ebSAnton Nefedov s->initial_zeroing_ongoing = true;
93823ca459aSEric Blake for (offset = 0; offset < s->bdev_length; ) {
93923ca459aSEric Blake int bytes = MIN(s->bdev_length - offset,
94023ca459aSEric Blake QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
9417e277545SEric Blake bool ignored;
942c7c2769cSDenis V. Lunev
943c7c2769cSDenis V. Lunev mirror_throttle(s);
944c7c2769cSDenis V. Lunev
945daa7f2f9SKevin Wolf if (job_is_cancelled(&s->common.job)) {
94690ab48ebSAnton Nefedov s->initial_zeroing_ongoing = false;
947c7c2769cSDenis V. Lunev return 0;
948c7c2769cSDenis V. Lunev }
949c7c2769cSDenis V. Lunev
950c7c2769cSDenis V. Lunev if (s->in_flight >= MAX_IN_FLIGHT) {
95167adf4b3SEric Blake trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
95267adf4b3SEric Blake s->in_flight);
9539178f4feSKevin Wolf mirror_wait_for_free_in_flight_slot(s);
954c7c2769cSDenis V. Lunev continue;
955c7c2769cSDenis V. Lunev }
956c7c2769cSDenis V. Lunev
9577e277545SEric Blake mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO, &ignored);
95823ca459aSEric Blake offset += bytes;
959c7c2769cSDenis V. Lunev }
960c7c2769cSDenis V. Lunev
961bae8196dSPaolo Bonzini mirror_wait_for_all_io(s);
96290ab48ebSAnton Nefedov s->initial_zeroing_ongoing = false;
9637e277545SEric Blake } else {
9647e277545SEric Blake /* In FULL mode, and image already reads as zero. */
9657e277545SEric Blake bitmap_set(s->zero_bitmap, 0, bitmap_length);
966c7c2769cSDenis V. Lunev }
967c7c2769cSDenis V. Lunev
968c0b363adSDenis V. Lunev /* First part, loop on the sectors and initialize the dirty bitmap. */
96923ca459aSEric Blake for (offset = 0; offset < s->bdev_length; ) {
970c0b363adSDenis V. Lunev /* Just to make sure we are not exceeding int limit. */
97123ca459aSEric Blake int bytes = MIN(s->bdev_length - offset,
97223ca459aSEric Blake QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
973c0b363adSDenis V. Lunev
974c0b363adSDenis V. Lunev mirror_throttle(s);
975c0b363adSDenis V. Lunev
976daa7f2f9SKevin Wolf if (job_is_cancelled(&s->common.job)) {
977c0b363adSDenis V. Lunev return 0;
978c0b363adSDenis V. Lunev }
979c0b363adSDenis V. Lunev
9807ff9579eSKevin Wolf WITH_GRAPH_RDLOCK_GUARD() {
981cc323997SPaolo Bonzini ret = bdrv_co_is_allocated_above(bs, s->base_overlay, true, offset,
9827ff9579eSKevin Wolf bytes, &count);
9837ff9579eSKevin Wolf }
984c0b363adSDenis V. Lunev if (ret < 0) {
985c0b363adSDenis V. Lunev return ret;
986c0b363adSDenis V. Lunev }
987c0b363adSDenis V. Lunev
98823ca459aSEric Blake assert(count);
989a92b1b06SEric Blake if (ret > 0) {
99023ca459aSEric Blake bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count);
991c0b363adSDenis V. Lunev }
99223ca459aSEric Blake offset += count;
993c0b363adSDenis V. Lunev }
994c0b363adSDenis V. Lunev return 0;
995c0b363adSDenis V. Lunev }
996c0b363adSDenis V. Lunev
997bdffb31dSPaolo Bonzini /* Called when going out of the streaming phase to flush the bulk of the
998bdffb31dSPaolo Bonzini * data to the medium, or just before completing.
999bdffb31dSPaolo Bonzini */
mirror_flush(MirrorBlockJob * s)100026bef102SPaolo Bonzini static int coroutine_fn mirror_flush(MirrorBlockJob *s)
1001bdffb31dSPaolo Bonzini {
100226bef102SPaolo Bonzini int ret = blk_co_flush(s->target);
1003bdffb31dSPaolo Bonzini if (ret < 0) {
1004bdffb31dSPaolo Bonzini if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) {
1005bdffb31dSPaolo Bonzini s->ret = ret;
1006bdffb31dSPaolo Bonzini }
1007bdffb31dSPaolo Bonzini }
1008bdffb31dSPaolo Bonzini return ret;
1009bdffb31dSPaolo Bonzini }
1010bdffb31dSPaolo Bonzini
mirror_run(Job * job,Error ** errp)1011f67432a2SJohn Snow static int coroutine_fn mirror_run(Job *job, Error **errp)
1012893f7ebaSPaolo Bonzini {
1013f67432a2SJohn Snow MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
1014004915a9SKevin Wolf BlockDriverState *bs;
101532125b14SKevin Wolf MirrorBDSOpaque *mirror_top_opaque = s->mirror_top_bs->opaque;
1016e253f4b8SKevin Wolf BlockDriverState *target_bs = blk_bs(s->target);
10179a0cec66SPaolo Bonzini bool need_drain = true;
1018ba11c88dSMarc-André Lureau BlockDeviceIoStatus iostatus = BLOCK_DEVICE_IO_STATUS__MAX;
1019c0b363adSDenis V. Lunev int64_t length;
1020e83dd680SKevin Wolf int64_t target_length;
1021b812f671SPaolo Bonzini BlockDriverInfo bdi;
10221d33936eSJeff Cody char backing_filename[2]; /* we only need 2 characters because we are only
10231d33936eSJeff Cody checking for a NULL string */
1024893f7ebaSPaolo Bonzini int ret = 0;
1025893f7ebaSPaolo Bonzini
1026004915a9SKevin Wolf bdrv_graph_co_rdlock();
1027004915a9SKevin Wolf bs = bdrv_filter_bs(s->mirror_top_bs);
1028004915a9SKevin Wolf bdrv_graph_co_rdunlock();
1029004915a9SKevin Wolf
1030daa7f2f9SKevin Wolf if (job_is_cancelled(&s->common.job)) {
1031893f7ebaSPaolo Bonzini goto immediate_exit;
1032893f7ebaSPaolo Bonzini }
1033893f7ebaSPaolo Bonzini
10348ab8140aSKevin Wolf bdrv_graph_co_rdlock();
1035c86422c5SEmanuele Giuseppe Esposito s->bdev_length = bdrv_co_getlength(bs);
10368ab8140aSKevin Wolf bdrv_graph_co_rdunlock();
10378ab8140aSKevin Wolf
1038b21c7652SMax Reitz if (s->bdev_length < 0) {
1039b21c7652SMax Reitz ret = s->bdev_length;
1040373df5b1SFam Zheng goto immediate_exit;
1041becc347eSKevin Wolf }
1042becc347eSKevin Wolf
1043c86422c5SEmanuele Giuseppe Esposito target_length = blk_co_getlength(s->target);
1044e83dd680SKevin Wolf if (target_length < 0) {
1045e83dd680SKevin Wolf ret = target_length;
1046becc347eSKevin Wolf goto immediate_exit;
1047becc347eSKevin Wolf }
1048becc347eSKevin Wolf
1049e83dd680SKevin Wolf /* Active commit must resize the base image if its size differs from the
1050e83dd680SKevin Wolf * active layer. */
1051e83dd680SKevin Wolf if (s->base == blk_bs(s->target)) {
1052e83dd680SKevin Wolf if (s->bdev_length > target_length) {
105388276216SAlberto Faria ret = blk_co_truncate(s->target, s->bdev_length, false,
10548c6242b6SKevin Wolf PREALLOC_MODE_OFF, 0, NULL);
1055becc347eSKevin Wolf if (ret < 0) {
1056becc347eSKevin Wolf goto immediate_exit;
1057becc347eSKevin Wolf }
1058becc347eSKevin Wolf }
1059e83dd680SKevin Wolf } else if (s->bdev_length != target_length) {
1060e83dd680SKevin Wolf error_setg(errp, "Source and target image have different sizes");
1061e83dd680SKevin Wolf ret = -EINVAL;
1062e83dd680SKevin Wolf goto immediate_exit;
1063becc347eSKevin Wolf }
1064becc347eSKevin Wolf
1065becc347eSKevin Wolf if (s->bdev_length == 0) {
10662e1795b5SKevin Wolf /* Transition to the READY state and wait for complete. */
10672e1795b5SKevin Wolf job_transition_to_ready(&s->common.job);
106876cb2f24SFiona Ebner qatomic_set(&s->actively_synced, true);
106908b83bffSHanna Reitz while (!job_cancel_requested(&s->common.job) && !s->should_complete) {
1070198c49ccSKevin Wolf job_yield(&s->common.job);
10719e48b025SFam Zheng }
10729e48b025SFam Zheng goto immediate_exit;
1073893f7ebaSPaolo Bonzini }
1074893f7ebaSPaolo Bonzini
1075b21c7652SMax Reitz length = DIV_ROUND_UP(s->bdev_length, s->granularity);
1076402a4741SPaolo Bonzini s->in_flight_bitmap = bitmap_new(length);
1077402a4741SPaolo Bonzini
1078b812f671SPaolo Bonzini /* If we have no backing file yet in the destination, we cannot let
1079b812f671SPaolo Bonzini * the destination do COW. Instead, we copy sectors around the
1080b812f671SPaolo Bonzini * dirty data if needed. We need a bitmap to do that.
1081b812f671SPaolo Bonzini */
1082e253f4b8SKevin Wolf bdrv_get_backing_filename(target_bs, backing_filename,
1083b812f671SPaolo Bonzini sizeof(backing_filename));
1084a00e70c0SEmanuele Giuseppe Esposito bdrv_graph_co_rdlock();
10853d47eb0aSEmanuele Giuseppe Esposito if (!bdrv_co_get_info(target_bs, &bdi) && bdi.cluster_size) {
1086b436982fSEric Blake s->target_cluster_size = bdi.cluster_size;
1087b436982fSEric Blake } else {
1088b436982fSEric Blake s->target_cluster_size = BDRV_SECTOR_SIZE;
1089c3cc95bdSFam Zheng }
10903f072a7fSMax Reitz if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
1091b436982fSEric Blake s->granularity < s->target_cluster_size) {
1092b436982fSEric Blake s->buf_size = MAX(s->buf_size, s->target_cluster_size);
1093b812f671SPaolo Bonzini s->cow_bitmap = bitmap_new(length);
1094b812f671SPaolo Bonzini }
1095e253f4b8SKevin Wolf s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
1096ad74751fSKevin Wolf bdrv_graph_co_rdunlock();
1097b812f671SPaolo Bonzini
10987504edf4SKevin Wolf s->buf = qemu_try_blockalign(bs, s->buf_size);
10997504edf4SKevin Wolf if (s->buf == NULL) {
11007504edf4SKevin Wolf ret = -ENOMEM;
11017504edf4SKevin Wolf goto immediate_exit;
11027504edf4SKevin Wolf }
11037504edf4SKevin Wolf
1104402a4741SPaolo Bonzini mirror_free_init(s);
1105893f7ebaSPaolo Bonzini
110649efb1f5SDenis V. Lunev s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
11079474d97bSEric Blake if (s->sync_mode != MIRROR_SYNC_MODE_NONE) {
1108c0b363adSDenis V. Lunev ret = mirror_dirty_init(s);
1109daa7f2f9SKevin Wolf if (ret < 0 || job_is_cancelled(&s->common.job)) {
11104c0cbd6fSFam Zheng goto immediate_exit;
11114c0cbd6fSFam Zheng }
1112893f7ebaSPaolo Bonzini }
1113893f7ebaSPaolo Bonzini
111432125b14SKevin Wolf /*
111532125b14SKevin Wolf * Only now the job is fully initialised and mirror_top_bs should start
111632125b14SKevin Wolf * accessing it.
111732125b14SKevin Wolf */
111832125b14SKevin Wolf mirror_top_opaque->job = s;
111932125b14SKevin Wolf
1120dc162c8eSFam Zheng assert(!s->dbi);
1121715a74d8SEric Blake s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
1122893f7ebaSPaolo Bonzini for (;;) {
112349efb1f5SDenis V. Lunev int64_t cnt, delta;
1124893f7ebaSPaolo Bonzini bool should_complete;
1125893f7ebaSPaolo Bonzini
1126bd48bde8SPaolo Bonzini if (s->ret < 0) {
1127bd48bde8SPaolo Bonzini ret = s->ret;
1128893f7ebaSPaolo Bonzini goto immediate_exit;
1129893f7ebaSPaolo Bonzini }
1130bd48bde8SPaolo Bonzini
1131da01ff7fSKevin Wolf job_pause_point(&s->common.job);
1132565ac01fSStefan Hajnoczi
11334feeec7eSHanna Reitz if (job_is_cancelled(&s->common.job)) {
11344feeec7eSHanna Reitz ret = 0;
11354feeec7eSHanna Reitz goto immediate_exit;
11364feeec7eSHanna Reitz }
11374feeec7eSHanna Reitz
113820dca810SJohn Snow cnt = bdrv_get_dirty_count(s->dirty_bitmap);
113905df8a6aSKevin Wolf /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
114005df8a6aSKevin Wolf * the number of bytes currently being processed; together those are
114105df8a6aSKevin Wolf * the current remaining operation length */
1142d69a879bSHanna Reitz job_progress_set_remaining(&s->common.job,
1143d69a879bSHanna Reitz s->bytes_in_flight + cnt +
1144d69a879bSHanna Reitz s->active_write_bytes_in_flight);
1145bd48bde8SPaolo Bonzini
1146bd48bde8SPaolo Bonzini /* Note that even when no rate limit is applied we need to yield
1147a7282330SFam Zheng * periodically with no pending I/O so that bdrv_drain_all() returns.
114818bb6928SKevin Wolf * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is
114918bb6928SKevin Wolf * an error, or when the source is clean, whichever comes first. */
115049efb1f5SDenis V. Lunev delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
1151d59cb66dSEmanuele Giuseppe Esposito WITH_JOB_LOCK_GUARD() {
1152d59cb66dSEmanuele Giuseppe Esposito iostatus = s->common.iostatus;
1153d59cb66dSEmanuele Giuseppe Esposito }
115418bb6928SKevin Wolf if (delta < BLOCK_JOB_SLICE_TIME &&
1155d59cb66dSEmanuele Giuseppe Esposito iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
1156cf56a3c6SDenis V. Lunev if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
1157402a4741SPaolo Bonzini (cnt == 0 && s->in_flight > 0)) {
11589a46dba7SEric Blake trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
11599178f4feSKevin Wolf mirror_wait_for_free_in_flight_slot(s);
1160bd48bde8SPaolo Bonzini continue;
1161bd48bde8SPaolo Bonzini } else if (cnt != 0) {
1162018e5987SKevin Wolf mirror_iteration(s);
1163893f7ebaSPaolo Bonzini }
1164cc8c9d6cSPaolo Bonzini }
1165893f7ebaSPaolo Bonzini
1166893f7ebaSPaolo Bonzini should_complete = false;
1167bd48bde8SPaolo Bonzini if (s->in_flight == 0 && cnt == 0) {
1168893f7ebaSPaolo Bonzini trace_mirror_before_flush(s);
116944716224SHanna Reitz if (!job_is_ready(&s->common.job)) {
1170bdffb31dSPaolo Bonzini if (mirror_flush(s) < 0) {
1171bdffb31dSPaolo Bonzini /* Go check s->ret. */
1172bdffb31dSPaolo Bonzini continue;
1173893f7ebaSPaolo Bonzini }
1174893f7ebaSPaolo Bonzini /* We're out of the streaming phase. From now on, if the job
1175893f7ebaSPaolo Bonzini * is cancelled we will actually complete all pending I/O and
1176893f7ebaSPaolo Bonzini * report completion. This way, block-job-cancel will leave
1177893f7ebaSPaolo Bonzini * the target in a consistent state.
1178893f7ebaSPaolo Bonzini */
11792e1795b5SKevin Wolf job_transition_to_ready(&s->common.job);
1180c45d0e1aSFiona Ebner }
11812d400d15SFiona Ebner if (qatomic_read(&s->copy_mode) != MIRROR_COPY_MODE_BACKGROUND) {
118276cb2f24SFiona Ebner qatomic_set(&s->actively_synced, true);
1183d06107adSMax Reitz }
1184d63ffd87SPaolo Bonzini
1185d63ffd87SPaolo Bonzini should_complete = s->should_complete ||
118608b83bffSHanna Reitz job_cancel_requested(&s->common.job);
118720dca810SJohn Snow cnt = bdrv_get_dirty_count(s->dirty_bitmap);
1188893f7ebaSPaolo Bonzini }
1189893f7ebaSPaolo Bonzini
1190893f7ebaSPaolo Bonzini if (cnt == 0 && should_complete) {
1191893f7ebaSPaolo Bonzini /* The dirty bitmap is not updated while operations are pending.
1192893f7ebaSPaolo Bonzini * If we're about to exit, wait for pending operations before
1193893f7ebaSPaolo Bonzini * calling bdrv_get_dirty_count(bs), or we may exit while the
1194893f7ebaSPaolo Bonzini * source has dirty data to copy!
1195893f7ebaSPaolo Bonzini *
1196893f7ebaSPaolo Bonzini * Note that I/O can be submitted by the guest while
11979a0cec66SPaolo Bonzini * mirror_populate runs, so pause it now. Before deciding
11989a0cec66SPaolo Bonzini * whether to switch to target check one last time if I/O has
11999a0cec66SPaolo Bonzini * come in the meanwhile, and if not flush the data to disk.
1200893f7ebaSPaolo Bonzini */
12019a46dba7SEric Blake trace_mirror_before_drain(s, cnt);
12029a0cec66SPaolo Bonzini
12035e771752SSergio Lopez s->in_drain = true;
12049a0cec66SPaolo Bonzini bdrv_drained_begin(bs);
1205d69a879bSHanna Reitz
1206d69a879bSHanna Reitz /* Must be zero because we are drained */
1207d69a879bSHanna Reitz assert(s->in_active_write_counter == 0);
1208d69a879bSHanna Reitz
120920dca810SJohn Snow cnt = bdrv_get_dirty_count(s->dirty_bitmap);
1210bdffb31dSPaolo Bonzini if (cnt > 0 || mirror_flush(s) < 0) {
12119a0cec66SPaolo Bonzini bdrv_drained_end(bs);
12125e771752SSergio Lopez s->in_drain = false;
12139a0cec66SPaolo Bonzini continue;
12149a0cec66SPaolo Bonzini }
12159a0cec66SPaolo Bonzini
12169a0cec66SPaolo Bonzini /* The two disks are in sync. Exit and report successful
12179a0cec66SPaolo Bonzini * completion.
12189a0cec66SPaolo Bonzini */
12199a0cec66SPaolo Bonzini assert(QLIST_EMPTY(&bs->tracked_requests));
12209a0cec66SPaolo Bonzini need_drain = false;
12219a0cec66SPaolo Bonzini break;
1222893f7ebaSPaolo Bonzini }
1223893f7ebaSPaolo Bonzini
122444716224SHanna Reitz if (job_is_ready(&s->common.job) && !should_complete) {
1225018e5987SKevin Wolf if (s->in_flight == 0 && cnt == 0) {
122644716224SHanna Reitz trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
1227018e5987SKevin Wolf BLOCK_JOB_SLICE_TIME);
1228018e5987SKevin Wolf job_sleep_ns(&s->common.job, BLOCK_JOB_SLICE_TIME);
1229018e5987SKevin Wolf }
1230018e5987SKevin Wolf } else {
1231018e5987SKevin Wolf block_job_ratelimit_sleep(&s->common);
1232018e5987SKevin Wolf }
123349efb1f5SDenis V. Lunev s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1234893f7ebaSPaolo Bonzini }
1235893f7ebaSPaolo Bonzini
1236893f7ebaSPaolo Bonzini immediate_exit:
1237bd48bde8SPaolo Bonzini if (s->in_flight > 0) {
1238bd48bde8SPaolo Bonzini /* We get here only if something went wrong. Either the job failed,
1239bd48bde8SPaolo Bonzini * or it was cancelled prematurely so that we do not guarantee that
1240bd48bde8SPaolo Bonzini * the target is a copy of the source.
1241bd48bde8SPaolo Bonzini */
124208b83bffSHanna Reitz assert(ret < 0 || job_is_cancelled(&s->common.job));
12439a0cec66SPaolo Bonzini assert(need_drain);
1244bae8196dSPaolo Bonzini mirror_wait_for_all_io(s);
1245bd48bde8SPaolo Bonzini }
1246bd48bde8SPaolo Bonzini
1247bd48bde8SPaolo Bonzini assert(s->in_flight == 0);
12487191bf31SMarkus Armbruster qemu_vfree(s->buf);
1249b812f671SPaolo Bonzini g_free(s->cow_bitmap);
12507e277545SEric Blake g_free(s->zero_bitmap);
1251402a4741SPaolo Bonzini g_free(s->in_flight_bitmap);
1252dc162c8eSFam Zheng bdrv_dirty_iter_free(s->dbi);
12535a7e7a0bSStefan Hajnoczi
12549a0cec66SPaolo Bonzini if (need_drain) {
12555e771752SSergio Lopez s->in_drain = true;
1256e253f4b8SKevin Wolf bdrv_drained_begin(bs);
12579a0cec66SPaolo Bonzini }
1258f67432a2SJohn Snow
1259f67432a2SJohn Snow return ret;
1260893f7ebaSPaolo Bonzini }
1261893f7ebaSPaolo Bonzini
mirror_complete(Job * job,Error ** errp)12623453d972SKevin Wolf static void mirror_complete(Job *job, Error **errp)
1263d63ffd87SPaolo Bonzini {
12643453d972SKevin Wolf MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
1265274fcceeSMax Reitz
126644716224SHanna Reitz if (!job_is_ready(job)) {
12679df229c3SAlberto Garcia error_setg(errp, "The active block job '%s' cannot be completed",
12683453d972SKevin Wolf job->id);
1269d63ffd87SPaolo Bonzini return;
1270d63ffd87SPaolo Bonzini }
1271d63ffd87SPaolo Bonzini
127215d67298SChanglong Xie /* block all operations on to_replace bs */
127309158f00SBenoît Canet if (s->replaces) {
1274e12f3784SWen Congyang s->to_replace = bdrv_find_node(s->replaces);
127509158f00SBenoît Canet if (!s->to_replace) {
1276e12f3784SWen Congyang error_setg(errp, "Node name '%s' not found", s->replaces);
127709158f00SBenoît Canet return;
127809158f00SBenoît Canet }
127909158f00SBenoît Canet
128064631f36SVladimir Sementsov-Ogievskiy /* TODO Translate this into child freeze system. */
128109158f00SBenoît Canet error_setg(&s->replace_blocker,
128209158f00SBenoît Canet "block device is in use by block-job-complete");
128309158f00SBenoît Canet bdrv_op_block_all(s->to_replace, s->replace_blocker);
128409158f00SBenoît Canet bdrv_ref(s->to_replace);
128509158f00SBenoît Canet }
128609158f00SBenoît Canet
1287d63ffd87SPaolo Bonzini s->should_complete = true;
128800769414SMax Reitz
128900769414SMax Reitz /* If the job is paused, it will be re-entered when it is resumed */
1290279ac06eSEmanuele Giuseppe Esposito WITH_JOB_LOCK_GUARD() {
129100769414SMax Reitz if (!job->paused) {
1292279ac06eSEmanuele Giuseppe Esposito job_enter_cond_locked(job, NULL);
1293279ac06eSEmanuele Giuseppe Esposito }
1294d63ffd87SPaolo Bonzini }
129500769414SMax Reitz }
1296d63ffd87SPaolo Bonzini
mirror_pause(Job * job)1297537c3d4fSStefan Hajnoczi static void coroutine_fn mirror_pause(Job *job)
1298565ac01fSStefan Hajnoczi {
1299da01ff7fSKevin Wolf MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
1300565ac01fSStefan Hajnoczi
1301bae8196dSPaolo Bonzini mirror_wait_for_all_io(s);
1302565ac01fSStefan Hajnoczi }
1303565ac01fSStefan Hajnoczi
mirror_drained_poll(BlockJob * job)130489bd0305SKevin Wolf static bool mirror_drained_poll(BlockJob *job)
130589bd0305SKevin Wolf {
130689bd0305SKevin Wolf MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
13075e771752SSergio Lopez
13085e771752SSergio Lopez /* If the job isn't paused nor cancelled, we can't be sure that it won't
13095e771752SSergio Lopez * issue more requests. We make an exception if we've reached this point
13105e771752SSergio Lopez * from one of our own drain sections, to avoid a deadlock waiting for
13115e771752SSergio Lopez * ourselves.
13125e771752SSergio Lopez */
1313279ac06eSEmanuele Giuseppe Esposito WITH_JOB_LOCK_GUARD() {
1314279ac06eSEmanuele Giuseppe Esposito if (!s->common.job.paused && !job_is_cancelled_locked(&job->job)
1315279ac06eSEmanuele Giuseppe Esposito && !s->in_drain) {
13165e771752SSergio Lopez return true;
13175e771752SSergio Lopez }
1318279ac06eSEmanuele Giuseppe Esposito }
13195e771752SSergio Lopez
132089bd0305SKevin Wolf return !!s->in_flight;
132189bd0305SKevin Wolf }
132289bd0305SKevin Wolf
mirror_cancel(Job * job,bool force)132373895f38SHanna Reitz static bool mirror_cancel(Job *job, bool force)
1324521ff8b7SVladimir Sementsov-Ogievskiy {
1325521ff8b7SVladimir Sementsov-Ogievskiy MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
1326521ff8b7SVladimir Sementsov-Ogievskiy BlockDriverState *target = blk_bs(s->target);
1327521ff8b7SVladimir Sementsov-Ogievskiy
132873895f38SHanna Reitz /*
132973895f38SHanna Reitz * Before the job is READY, we treat any cancellation like a
133073895f38SHanna Reitz * force-cancellation.
133173895f38SHanna Reitz */
133273895f38SHanna Reitz force = force || !job_is_ready(job);
133373895f38SHanna Reitz
133473895f38SHanna Reitz if (force) {
1335521ff8b7SVladimir Sementsov-Ogievskiy bdrv_cancel_in_flight(target);
1336521ff8b7SVladimir Sementsov-Ogievskiy }
133773895f38SHanna Reitz return force;
133873895f38SHanna Reitz }
133973895f38SHanna Reitz
commit_active_cancel(Job * job,bool force)134073895f38SHanna Reitz static bool commit_active_cancel(Job *job, bool force)
134173895f38SHanna Reitz {
134273895f38SHanna Reitz /* Same as above in mirror_cancel() */
134373895f38SHanna Reitz return force || !job_is_ready(job);
13449c785cd7SVladimir Sementsov-Ogievskiy }
1345521ff8b7SVladimir Sementsov-Ogievskiy
mirror_change(BlockJob * job,BlockJobChangeOptions * opts,Error ** errp)13462d400d15SFiona Ebner static void mirror_change(BlockJob *job, BlockJobChangeOptions *opts,
13472d400d15SFiona Ebner Error **errp)
13482d400d15SFiona Ebner {
13492d400d15SFiona Ebner MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
13502d400d15SFiona Ebner BlockJobChangeOptionsMirror *change_opts = &opts->u.mirror;
13512d400d15SFiona Ebner MirrorCopyMode current;
13522d400d15SFiona Ebner
13532d400d15SFiona Ebner /*
13542d400d15SFiona Ebner * The implementation relies on the fact that copy_mode is only written
13552d400d15SFiona Ebner * under the BQL. Otherwise, further synchronization would be required.
13562d400d15SFiona Ebner */
13572d400d15SFiona Ebner
13582d400d15SFiona Ebner GLOBAL_STATE_CODE();
13592d400d15SFiona Ebner
13602d400d15SFiona Ebner if (qatomic_read(&s->copy_mode) == change_opts->copy_mode) {
13612d400d15SFiona Ebner return;
13622d400d15SFiona Ebner }
13632d400d15SFiona Ebner
13642d400d15SFiona Ebner if (change_opts->copy_mode != MIRROR_COPY_MODE_WRITE_BLOCKING) {
13652d400d15SFiona Ebner error_setg(errp, "Change to copy mode '%s' is not implemented",
13662d400d15SFiona Ebner MirrorCopyMode_str(change_opts->copy_mode));
13672d400d15SFiona Ebner return;
13682d400d15SFiona Ebner }
13692d400d15SFiona Ebner
13702d400d15SFiona Ebner current = qatomic_cmpxchg(&s->copy_mode, MIRROR_COPY_MODE_BACKGROUND,
13712d400d15SFiona Ebner change_opts->copy_mode);
13722d400d15SFiona Ebner if (current != MIRROR_COPY_MODE_BACKGROUND) {
13732d400d15SFiona Ebner error_setg(errp, "Expected current copy mode '%s', got '%s'",
13742d400d15SFiona Ebner MirrorCopyMode_str(MIRROR_COPY_MODE_BACKGROUND),
13752d400d15SFiona Ebner MirrorCopyMode_str(current));
13762d400d15SFiona Ebner }
13772d400d15SFiona Ebner }
13782d400d15SFiona Ebner
mirror_query(BlockJob * job,BlockJobInfo * info)137976cb2f24SFiona Ebner static void mirror_query(BlockJob *job, BlockJobInfo *info)
138076cb2f24SFiona Ebner {
138176cb2f24SFiona Ebner MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
138276cb2f24SFiona Ebner
138376cb2f24SFiona Ebner info->u.mirror = (BlockJobInfoMirror) {
138476cb2f24SFiona Ebner .actively_synced = qatomic_read(&s->actively_synced),
138576cb2f24SFiona Ebner };
138676cb2f24SFiona Ebner }
138776cb2f24SFiona Ebner
13883fc4b10aSFam Zheng static const BlockJobDriver mirror_job_driver = {
138933e9e9bdSKevin Wolf .job_driver = {
1390893f7ebaSPaolo Bonzini .instance_size = sizeof(MirrorBlockJob),
13918e4c8700SKevin Wolf .job_type = JOB_TYPE_MIRROR,
139280fa2c75SKevin Wolf .free = block_job_free,
1393b15de828SKevin Wolf .user_resume = block_job_user_resume,
1394f67432a2SJohn Snow .run = mirror_run,
1395737efc1eSJohn Snow .prepare = mirror_prepare,
1396737efc1eSJohn Snow .abort = mirror_abort,
1397565ac01fSStefan Hajnoczi .pause = mirror_pause,
1398da01ff7fSKevin Wolf .complete = mirror_complete,
1399521ff8b7SVladimir Sementsov-Ogievskiy .cancel = mirror_cancel,
14003453d972SKevin Wolf },
140189bd0305SKevin Wolf .drained_poll = mirror_drained_poll,
14022d400d15SFiona Ebner .change = mirror_change,
140376cb2f24SFiona Ebner .query = mirror_query,
1404893f7ebaSPaolo Bonzini };
1405893f7ebaSPaolo Bonzini
140603544a6eSFam Zheng static const BlockJobDriver commit_active_job_driver = {
140733e9e9bdSKevin Wolf .job_driver = {
140803544a6eSFam Zheng .instance_size = sizeof(MirrorBlockJob),
14098e4c8700SKevin Wolf .job_type = JOB_TYPE_COMMIT,
141080fa2c75SKevin Wolf .free = block_job_free,
1411b15de828SKevin Wolf .user_resume = block_job_user_resume,
1412f67432a2SJohn Snow .run = mirror_run,
1413737efc1eSJohn Snow .prepare = mirror_prepare,
1414737efc1eSJohn Snow .abort = mirror_abort,
1415565ac01fSStefan Hajnoczi .pause = mirror_pause,
1416da01ff7fSKevin Wolf .complete = mirror_complete,
141773895f38SHanna Reitz .cancel = commit_active_cancel,
14183453d972SKevin Wolf },
141989bd0305SKevin Wolf .drained_poll = mirror_drained_poll,
142003544a6eSFam Zheng };
142103544a6eSFam Zheng
1422537c3d4fSStefan Hajnoczi static void coroutine_fn
do_sync_target_write(MirrorBlockJob * job,MirrorMethod method,uint64_t offset,uint64_t bytes,QEMUIOVector * qiov,int flags)1423537c3d4fSStefan Hajnoczi do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
1424d06107adSMax Reitz uint64_t offset, uint64_t bytes,
1425d06107adSMax Reitz QEMUIOVector *qiov, int flags)
1426d06107adSMax Reitz {
1427d06107adSMax Reitz int ret;
1428dbdf699cSVladimir Sementsov-Ogievskiy size_t qiov_offset = 0;
1429870f8963SEric Blake int64_t dirty_bitmap_offset, dirty_bitmap_end;
14307e277545SEric Blake int64_t zero_bitmap_offset, zero_bitmap_end;
1431d06107adSMax Reitz
1432dbdf699cSVladimir Sementsov-Ogievskiy if (!QEMU_IS_ALIGNED(offset, job->granularity) &&
1433dbdf699cSVladimir Sementsov-Ogievskiy bdrv_dirty_bitmap_get(job->dirty_bitmap, offset))
1434dbdf699cSVladimir Sementsov-Ogievskiy {
1435dbdf699cSVladimir Sementsov-Ogievskiy /*
1436dbdf699cSVladimir Sementsov-Ogievskiy * Dirty unaligned padding: ignore it.
1437dbdf699cSVladimir Sementsov-Ogievskiy *
1438dbdf699cSVladimir Sementsov-Ogievskiy * Reasoning:
1439dbdf699cSVladimir Sementsov-Ogievskiy * 1. If we copy it, we can't reset corresponding bit in
1440dbdf699cSVladimir Sementsov-Ogievskiy * dirty_bitmap as there may be some "dirty" bytes still not
1441dbdf699cSVladimir Sementsov-Ogievskiy * copied.
1442dbdf699cSVladimir Sementsov-Ogievskiy * 2. It's already dirty, so skipping it we don't diverge mirror
1443dbdf699cSVladimir Sementsov-Ogievskiy * progress.
1444dbdf699cSVladimir Sementsov-Ogievskiy *
1445dbdf699cSVladimir Sementsov-Ogievskiy * Note, that because of this, guest write may have no contribution
1446dbdf699cSVladimir Sementsov-Ogievskiy * into mirror converge, but that's not bad, as we have background
1447dbdf699cSVladimir Sementsov-Ogievskiy * process of mirroring. If under some bad circumstances (high guest
1448dbdf699cSVladimir Sementsov-Ogievskiy * IO load) background process starve, we will not converge anyway,
1449dbdf699cSVladimir Sementsov-Ogievskiy * even if each write will contribute, as guest is not guaranteed to
1450dbdf699cSVladimir Sementsov-Ogievskiy * rewrite the whole disk.
1451dbdf699cSVladimir Sementsov-Ogievskiy */
1452dbdf699cSVladimir Sementsov-Ogievskiy qiov_offset = QEMU_ALIGN_UP(offset, job->granularity) - offset;
1453dbdf699cSVladimir Sementsov-Ogievskiy if (bytes <= qiov_offset) {
1454dbdf699cSVladimir Sementsov-Ogievskiy /* nothing to do after shrink */
1455dbdf699cSVladimir Sementsov-Ogievskiy return;
1456dbdf699cSVladimir Sementsov-Ogievskiy }
1457dbdf699cSVladimir Sementsov-Ogievskiy offset += qiov_offset;
1458dbdf699cSVladimir Sementsov-Ogievskiy bytes -= qiov_offset;
1459dbdf699cSVladimir Sementsov-Ogievskiy }
1460dbdf699cSVladimir Sementsov-Ogievskiy
1461dbdf699cSVladimir Sementsov-Ogievskiy if (!QEMU_IS_ALIGNED(offset + bytes, job->granularity) &&
1462dbdf699cSVladimir Sementsov-Ogievskiy bdrv_dirty_bitmap_get(job->dirty_bitmap, offset + bytes - 1))
1463dbdf699cSVladimir Sementsov-Ogievskiy {
1464dbdf699cSVladimir Sementsov-Ogievskiy uint64_t tail = (offset + bytes) % job->granularity;
1465dbdf699cSVladimir Sementsov-Ogievskiy
1466dbdf699cSVladimir Sementsov-Ogievskiy if (bytes <= tail) {
1467dbdf699cSVladimir Sementsov-Ogievskiy /* nothing to do after shrink */
1468dbdf699cSVladimir Sementsov-Ogievskiy return;
1469dbdf699cSVladimir Sementsov-Ogievskiy }
1470dbdf699cSVladimir Sementsov-Ogievskiy bytes -= tail;
1471dbdf699cSVladimir Sementsov-Ogievskiy }
1472dbdf699cSVladimir Sementsov-Ogievskiy
1473dbdf699cSVladimir Sementsov-Ogievskiy /*
14747e277545SEric Blake * Tails are either clean or shrunk, so for dirty bitmap resetting
14757e277545SEric Blake * we safely align the range narrower. But for zero bitmap, round
14767e277545SEric Blake * range wider for checking or clearing, and narrower for setting.
1477dbdf699cSVladimir Sementsov-Ogievskiy */
1478870f8963SEric Blake dirty_bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity);
1479870f8963SEric Blake dirty_bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity);
1480870f8963SEric Blake if (dirty_bitmap_offset < dirty_bitmap_end) {
1481870f8963SEric Blake bdrv_reset_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset,
1482870f8963SEric Blake dirty_bitmap_end - dirty_bitmap_offset);
1483dbdf699cSVladimir Sementsov-Ogievskiy }
14847e277545SEric Blake zero_bitmap_offset = offset / job->granularity;
14857e277545SEric Blake zero_bitmap_end = DIV_ROUND_UP(offset + bytes, job->granularity);
1486d06107adSMax Reitz
14875c511ac3SVladimir Sementsov-Ogievskiy job_progress_increase_remaining(&job->common.job, bytes);
1488d69a879bSHanna Reitz job->active_write_bytes_in_flight += bytes;
1489d06107adSMax Reitz
1490d06107adSMax Reitz switch (method) {
1491d06107adSMax Reitz case MIRROR_METHOD_COPY:
14927e277545SEric Blake if (job->zero_bitmap) {
14937e277545SEric Blake bitmap_clear(job->zero_bitmap, zero_bitmap_offset,
14947e277545SEric Blake zero_bitmap_end - zero_bitmap_offset);
14957e277545SEric Blake }
1496dbdf699cSVladimir Sementsov-Ogievskiy ret = blk_co_pwritev_part(job->target, offset, bytes,
1497dbdf699cSVladimir Sementsov-Ogievskiy qiov, qiov_offset, flags);
1498d06107adSMax Reitz break;
1499d06107adSMax Reitz
1500d06107adSMax Reitz case MIRROR_METHOD_ZERO:
15017e277545SEric Blake if (job->zero_bitmap) {
15027e277545SEric Blake if (find_next_zero_bit(job->zero_bitmap, zero_bitmap_end,
15037e277545SEric Blake zero_bitmap_offset) == zero_bitmap_end) {
15047e277545SEric Blake ret = 0;
15057e277545SEric Blake break;
15067e277545SEric Blake }
15077e277545SEric Blake }
1508d06107adSMax Reitz assert(!qiov);
15095c511ac3SVladimir Sementsov-Ogievskiy ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags);
15107e277545SEric Blake if (job->zero_bitmap && ret >= 0) {
15117e277545SEric Blake bitmap_set(job->zero_bitmap, dirty_bitmap_offset / job->granularity,
15127e277545SEric Blake (dirty_bitmap_end - dirty_bitmap_offset) /
15137e277545SEric Blake job->granularity);
15147e277545SEric Blake }
1515d06107adSMax Reitz break;
1516d06107adSMax Reitz
1517d06107adSMax Reitz case MIRROR_METHOD_DISCARD:
15187e277545SEric Blake if (job->zero_bitmap) {
15197e277545SEric Blake bitmap_clear(job->zero_bitmap, zero_bitmap_offset,
15207e277545SEric Blake zero_bitmap_end - zero_bitmap_offset);
15217e277545SEric Blake }
1522d06107adSMax Reitz assert(!qiov);
15235c511ac3SVladimir Sementsov-Ogievskiy ret = blk_co_pdiscard(job->target, offset, bytes);
1524d06107adSMax Reitz break;
1525d06107adSMax Reitz
1526d06107adSMax Reitz default:
1527d06107adSMax Reitz abort();
1528d06107adSMax Reitz }
1529d06107adSMax Reitz
1530d69a879bSHanna Reitz job->active_write_bytes_in_flight -= bytes;
1531d06107adSMax Reitz if (ret >= 0) {
15325c511ac3SVladimir Sementsov-Ogievskiy job_progress_update(&job->common.job, bytes);
1533d06107adSMax Reitz } else {
1534d06107adSMax Reitz BlockErrorAction action;
1535d06107adSMax Reitz
1536dbdf699cSVladimir Sementsov-Ogievskiy /*
1537dbdf699cSVladimir Sementsov-Ogievskiy * We failed, so we should mark dirty the whole area, aligned up.
1538dbdf699cSVladimir Sementsov-Ogievskiy * Note that we don't care about shrunk tails if any: they were dirty
1539dbdf699cSVladimir Sementsov-Ogievskiy * at function start, and they must be still dirty, as we've locked
1540dbdf699cSVladimir Sementsov-Ogievskiy * the region for in-flight op.
1541dbdf699cSVladimir Sementsov-Ogievskiy */
1542870f8963SEric Blake dirty_bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity);
1543870f8963SEric Blake dirty_bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity);
1544870f8963SEric Blake bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset,
1545870f8963SEric Blake dirty_bitmap_end - dirty_bitmap_offset);
154676cb2f24SFiona Ebner qatomic_set(&job->actively_synced, false);
1547d06107adSMax Reitz
1548d06107adSMax Reitz action = mirror_error_action(job, false, -ret);
1549d06107adSMax Reitz if (action == BLOCK_ERROR_ACTION_REPORT) {
1550d06107adSMax Reitz if (!job->ret) {
1551d06107adSMax Reitz job->ret = ret;
1552d06107adSMax Reitz }
1553d06107adSMax Reitz }
1554d06107adSMax Reitz }
1555d06107adSMax Reitz }
1556d06107adSMax Reitz
active_write_prepare(MirrorBlockJob * s,uint64_t offset,uint64_t bytes)1557d06107adSMax Reitz static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
1558d06107adSMax Reitz uint64_t offset,
1559d06107adSMax Reitz uint64_t bytes)
1560d06107adSMax Reitz {
1561d06107adSMax Reitz MirrorOp *op;
1562d06107adSMax Reitz uint64_t start_chunk = offset / s->granularity;
1563d06107adSMax Reitz uint64_t end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity);
1564d06107adSMax Reitz
1565d06107adSMax Reitz op = g_new(MirrorOp, 1);
1566d06107adSMax Reitz *op = (MirrorOp){
1567d06107adSMax Reitz .s = s,
1568d06107adSMax Reitz .offset = offset,
1569d06107adSMax Reitz .bytes = bytes,
1570d06107adSMax Reitz .is_active_write = true,
1571ce8cabbdSKevin Wolf .is_in_flight = true,
1572ead3f1bfSVladimir Sementsov-Ogievskiy .co = qemu_coroutine_self(),
1573d06107adSMax Reitz };
1574d06107adSMax Reitz qemu_co_queue_init(&op->waiting_requests);
1575d06107adSMax Reitz QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
1576d06107adSMax Reitz
1577d06107adSMax Reitz s->in_active_write_counter++;
1578d06107adSMax Reitz
1579d69a879bSHanna Reitz /*
1580d69a879bSHanna Reitz * Wait for concurrent requests affecting the area. If there are already
1581d69a879bSHanna Reitz * running requests that are copying off now-to-be stale data in the area,
1582d69a879bSHanna Reitz * we must wait for them to finish before we begin writing fresh data to the
1583d69a879bSHanna Reitz * target so that the write operations appear in the correct order.
1584d69a879bSHanna Reitz * Note that background requests (see mirror_iteration()) in contrast only
1585d69a879bSHanna Reitz * wait for conflicting requests at the start of the dirty area, and then
1586d69a879bSHanna Reitz * (based on the in_flight_bitmap) truncate the area to copy so it will not
1587d69a879bSHanna Reitz * conflict with any requests beyond that. For active writes, however, we
1588d69a879bSHanna Reitz * cannot truncate that area. The request from our parent must be blocked
1589d69a879bSHanna Reitz * until the area is copied in full. Therefore, we must wait for the whole
1590d69a879bSHanna Reitz * area to become free of concurrent requests.
1591d69a879bSHanna Reitz */
1592d06107adSMax Reitz mirror_wait_on_conflicts(op, s, offset, bytes);
1593d06107adSMax Reitz
1594d06107adSMax Reitz bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
1595d06107adSMax Reitz
1596d06107adSMax Reitz return op;
1597d06107adSMax Reitz }
1598d06107adSMax Reitz
active_write_settle(MirrorOp * op)15999c93652dSKevin Wolf static void coroutine_fn GRAPH_RDLOCK active_write_settle(MirrorOp *op)
1600d06107adSMax Reitz {
1601d06107adSMax Reitz uint64_t start_chunk = op->offset / op->s->granularity;
1602d06107adSMax Reitz uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes,
1603d06107adSMax Reitz op->s->granularity);
1604d06107adSMax Reitz
160576cb2f24SFiona Ebner if (!--op->s->in_active_write_counter &&
160676cb2f24SFiona Ebner qatomic_read(&op->s->actively_synced)) {
1607d06107adSMax Reitz BdrvChild *source = op->s->mirror_top_bs->backing;
1608d06107adSMax Reitz
1609d06107adSMax Reitz if (QLIST_FIRST(&source->bs->parents) == source &&
1610d06107adSMax Reitz QLIST_NEXT(source, next_parent) == NULL)
1611d06107adSMax Reitz {
1612d06107adSMax Reitz /* Assert that we are back in sync once all active write
1613d06107adSMax Reitz * operations are settled.
1614d06107adSMax Reitz * Note that we can only assert this if the mirror node
1615d06107adSMax Reitz * is the source node's only parent. */
1616d06107adSMax Reitz assert(!bdrv_get_dirty_count(op->s->dirty_bitmap));
1617d06107adSMax Reitz }
1618d06107adSMax Reitz }
1619d06107adSMax Reitz bitmap_clear(op->s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
1620d06107adSMax Reitz QTAILQ_REMOVE(&op->s->ops_in_flight, op, next);
1621d06107adSMax Reitz qemu_co_queue_restart_all(&op->waiting_requests);
1622d06107adSMax Reitz g_free(op);
1623d06107adSMax Reitz }
1624d06107adSMax Reitz
1625b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
bdrv_mirror_top_preadv(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)1626b9b10c35SKevin Wolf bdrv_mirror_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
1627b9b10c35SKevin Wolf QEMUIOVector *qiov, BdrvRequestFlags flags)
16284ef85a9cSKevin Wolf {
16294ef85a9cSKevin Wolf return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
16304ef85a9cSKevin Wolf }
16314ef85a9cSKevin Wolf
should_copy_to_target(MirrorBDSOpaque * s)16327b32ad22SFiona Ebner static bool should_copy_to_target(MirrorBDSOpaque *s)
16337b32ad22SFiona Ebner {
16347b32ad22SFiona Ebner return s->job && s->job->ret >= 0 &&
16357b32ad22SFiona Ebner !job_is_cancelled(&s->job->common.job) &&
16362d400d15SFiona Ebner qatomic_read(&s->job->copy_mode) == MIRROR_COPY_MODE_WRITE_BLOCKING;
16377b32ad22SFiona Ebner }
16387b32ad22SFiona Ebner
16399a5a1c62SEmanuele Giuseppe Esposito static int coroutine_fn GRAPH_RDLOCK
bdrv_mirror_top_do_write(BlockDriverState * bs,MirrorMethod method,bool copy_to_target,uint64_t offset,uint64_t bytes,QEMUIOVector * qiov,int flags)16409a5a1c62SEmanuele Giuseppe Esposito bdrv_mirror_top_do_write(BlockDriverState *bs, MirrorMethod method,
16417b32ad22SFiona Ebner bool copy_to_target, uint64_t offset, uint64_t bytes,
16427b32ad22SFiona Ebner QEMUIOVector *qiov, int flags)
1643d06107adSMax Reitz {
1644d06107adSMax Reitz MirrorOp *op = NULL;
1645d06107adSMax Reitz MirrorBDSOpaque *s = bs->opaque;
1646d06107adSMax Reitz int ret = 0;
1647d06107adSMax Reitz
1648d06107adSMax Reitz if (copy_to_target) {
1649d06107adSMax Reitz op = active_write_prepare(s->job, offset, bytes);
1650d06107adSMax Reitz }
1651d06107adSMax Reitz
1652d06107adSMax Reitz switch (method) {
1653d06107adSMax Reitz case MIRROR_METHOD_COPY:
1654d06107adSMax Reitz ret = bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
1655d06107adSMax Reitz break;
1656d06107adSMax Reitz
1657d06107adSMax Reitz case MIRROR_METHOD_ZERO:
1658d06107adSMax Reitz ret = bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
1659d06107adSMax Reitz break;
1660d06107adSMax Reitz
1661d06107adSMax Reitz case MIRROR_METHOD_DISCARD:
16620b9fd3f4SFam Zheng ret = bdrv_co_pdiscard(bs->backing, offset, bytes);
1663d06107adSMax Reitz break;
1664d06107adSMax Reitz
1665d06107adSMax Reitz default:
1666d06107adSMax Reitz abort();
1667d06107adSMax Reitz }
1668d06107adSMax Reitz
1669058cfca5SFiona Ebner if (!copy_to_target && s->job && s->job->dirty_bitmap) {
167076cb2f24SFiona Ebner qatomic_set(&s->job->actively_synced, false);
1671058cfca5SFiona Ebner bdrv_set_dirty_bitmap(s->job->dirty_bitmap, offset, bytes);
1672058cfca5SFiona Ebner }
1673058cfca5SFiona Ebner
1674d06107adSMax Reitz if (ret < 0) {
1675d06107adSMax Reitz goto out;
1676d06107adSMax Reitz }
1677d06107adSMax Reitz
1678d06107adSMax Reitz if (copy_to_target) {
1679d06107adSMax Reitz do_sync_target_write(s->job, method, offset, bytes, qiov, flags);
1680d06107adSMax Reitz }
1681d06107adSMax Reitz
1682d06107adSMax Reitz out:
1683d06107adSMax Reitz if (copy_to_target) {
1684d06107adSMax Reitz active_write_settle(op);
1685d06107adSMax Reitz }
1686d06107adSMax Reitz return ret;
1687d06107adSMax Reitz }
1688d06107adSMax Reitz
1689b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
bdrv_mirror_top_pwritev(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)1690b9b10c35SKevin Wolf bdrv_mirror_top_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
1691b9b10c35SKevin Wolf QEMUIOVector *qiov, BdrvRequestFlags flags)
16924ef85a9cSKevin Wolf {
1693d06107adSMax Reitz QEMUIOVector bounce_qiov;
1694d06107adSMax Reitz void *bounce_buf;
1695d06107adSMax Reitz int ret = 0;
16967b32ad22SFiona Ebner bool copy_to_target = should_copy_to_target(bs->opaque);
1697d06107adSMax Reitz
1698d06107adSMax Reitz if (copy_to_target) {
1699d06107adSMax Reitz /* The guest might concurrently modify the data to write; but
1700d06107adSMax Reitz * the data on source and destination must match, so we have
1701d06107adSMax Reitz * to use a bounce buffer if we are going to write to the
1702d06107adSMax Reitz * target now. */
1703d06107adSMax Reitz bounce_buf = qemu_blockalign(bs, bytes);
1704d06107adSMax Reitz iov_to_buf_full(qiov->iov, qiov->niov, 0, bounce_buf, bytes);
1705d06107adSMax Reitz
1706d06107adSMax Reitz qemu_iovec_init(&bounce_qiov, 1);
1707d06107adSMax Reitz qemu_iovec_add(&bounce_qiov, bounce_buf, bytes);
1708d06107adSMax Reitz qiov = &bounce_qiov;
1709e8b65355SStefan Hajnoczi
1710e8b65355SStefan Hajnoczi flags &= ~BDRV_REQ_REGISTERED_BUF;
1711d06107adSMax Reitz }
1712d06107adSMax Reitz
17137b32ad22SFiona Ebner ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, copy_to_target,
17147b32ad22SFiona Ebner offset, bytes, qiov, flags);
1715d06107adSMax Reitz
1716d06107adSMax Reitz if (copy_to_target) {
1717d06107adSMax Reitz qemu_iovec_destroy(&bounce_qiov);
1718d06107adSMax Reitz qemu_vfree(bounce_buf);
1719d06107adSMax Reitz }
1720d06107adSMax Reitz
1721d06107adSMax Reitz return ret;
17224ef85a9cSKevin Wolf }
17234ef85a9cSKevin Wolf
bdrv_mirror_top_flush(BlockDriverState * bs)172488095349SEmanuele Giuseppe Esposito static int coroutine_fn GRAPH_RDLOCK bdrv_mirror_top_flush(BlockDriverState *bs)
17254ef85a9cSKevin Wolf {
1726ce960aa9SVladimir Sementsov-Ogievskiy if (bs->backing == NULL) {
1727ce960aa9SVladimir Sementsov-Ogievskiy /* we can be here after failed bdrv_append in mirror_start_job */
1728ce960aa9SVladimir Sementsov-Ogievskiy return 0;
1729ce960aa9SVladimir Sementsov-Ogievskiy }
17304ef85a9cSKevin Wolf return bdrv_co_flush(bs->backing->bs);
17314ef85a9cSKevin Wolf }
17324ef85a9cSKevin Wolf
1733abaf8b75SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
bdrv_mirror_top_pwrite_zeroes(BlockDriverState * bs,int64_t offset,int64_t bytes,BdrvRequestFlags flags)1734abaf8b75SKevin Wolf bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
1735abaf8b75SKevin Wolf int64_t bytes, BdrvRequestFlags flags)
17364ef85a9cSKevin Wolf {
17377b32ad22SFiona Ebner bool copy_to_target = should_copy_to_target(bs->opaque);
17387b32ad22SFiona Ebner return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, copy_to_target,
17397b32ad22SFiona Ebner offset, bytes, NULL, flags);
17404ef85a9cSKevin Wolf }
17414ef85a9cSKevin Wolf
17429a5a1c62SEmanuele Giuseppe Esposito static int coroutine_fn GRAPH_RDLOCK
bdrv_mirror_top_pdiscard(BlockDriverState * bs,int64_t offset,int64_t bytes)17439a5a1c62SEmanuele Giuseppe Esposito bdrv_mirror_top_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
17444ef85a9cSKevin Wolf {
17457b32ad22SFiona Ebner bool copy_to_target = should_copy_to_target(bs->opaque);
17467b32ad22SFiona Ebner return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, copy_to_target,
17477b32ad22SFiona Ebner offset, bytes, NULL, 0);
17484ef85a9cSKevin Wolf }
17494ef85a9cSKevin Wolf
bdrv_mirror_top_refresh_filename(BlockDriverState * bs)1750004915a9SKevin Wolf static void GRAPH_RDLOCK bdrv_mirror_top_refresh_filename(BlockDriverState *bs)
1751fd4a6493SKevin Wolf {
175218775ff3SVladimir Sementsov-Ogievskiy if (bs->backing == NULL) {
175318775ff3SVladimir Sementsov-Ogievskiy /* we can be here after failed bdrv_attach_child in
175418775ff3SVladimir Sementsov-Ogievskiy * bdrv_set_backing_hd */
175518775ff3SVladimir Sementsov-Ogievskiy return;
175618775ff3SVladimir Sementsov-Ogievskiy }
1757fd4a6493SKevin Wolf pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
1758fd4a6493SKevin Wolf bs->backing->bs->filename);
1759fd4a6493SKevin Wolf }
1760fd4a6493SKevin Wolf
bdrv_mirror_top_child_perm(BlockDriverState * bs,BdrvChild * c,BdrvChildRole role,BlockReopenQueue * reopen_queue,uint64_t perm,uint64_t shared,uint64_t * nperm,uint64_t * nshared)17614ef85a9cSKevin Wolf static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
1762bf8e925eSMax Reitz BdrvChildRole role,
1763e0995dc3SKevin Wolf BlockReopenQueue *reopen_queue,
17644ef85a9cSKevin Wolf uint64_t perm, uint64_t shared,
17654ef85a9cSKevin Wolf uint64_t *nperm, uint64_t *nshared)
17664ef85a9cSKevin Wolf {
1767f94dc3b4SMax Reitz MirrorBDSOpaque *s = bs->opaque;
1768f94dc3b4SMax Reitz
1769f94dc3b4SMax Reitz if (s->stop) {
1770f94dc3b4SMax Reitz /*
1771f94dc3b4SMax Reitz * If the job is to be stopped, we do not need to forward
1772f94dc3b4SMax Reitz * anything to the real image.
1773f94dc3b4SMax Reitz */
1774f94dc3b4SMax Reitz *nperm = 0;
1775f94dc3b4SMax Reitz *nshared = BLK_PERM_ALL;
1776f94dc3b4SMax Reitz return;
1777f94dc3b4SMax Reitz }
1778f94dc3b4SMax Reitz
177953431b90SMax Reitz bdrv_default_perms(bs, c, role, reopen_queue,
178053431b90SMax Reitz perm, shared, nperm, nshared);
17814ef85a9cSKevin Wolf
178253431b90SMax Reitz if (s->is_commit) {
178353431b90SMax Reitz /*
178453431b90SMax Reitz * For commit jobs, we cannot take CONSISTENT_READ, because
178553431b90SMax Reitz * that permission is unshared for everything above the base
178653431b90SMax Reitz * node (except for filters on the base node).
178753431b90SMax Reitz * We also have to force-share the WRITE permission, or
178853431b90SMax Reitz * otherwise we would block ourselves at the base node (if
178953431b90SMax Reitz * writes are blocked for a node, they are also blocked for
179053431b90SMax Reitz * its backing file).
179153431b90SMax Reitz * (We could also share RESIZE, because it may be needed for
179253431b90SMax Reitz * the target if its size is less than the top node's; but
179353431b90SMax Reitz * bdrv_default_perms_for_cow() automatically shares RESIZE
179453431b90SMax Reitz * for backing nodes if WRITE is shared, so there is no need
179553431b90SMax Reitz * to do it here.)
179653431b90SMax Reitz */
179753431b90SMax Reitz *nperm &= ~BLK_PERM_CONSISTENT_READ;
179853431b90SMax Reitz *nshared |= BLK_PERM_WRITE;
179953431b90SMax Reitz }
18004ef85a9cSKevin Wolf }
18014ef85a9cSKevin Wolf
18024ef85a9cSKevin Wolf /* Dummy node that provides consistent read to its users without requiring it
18034ef85a9cSKevin Wolf * from its backing file and that allows writes on the backing file chain. */
18044ef85a9cSKevin Wolf static BlockDriver bdrv_mirror_top = {
18054ef85a9cSKevin Wolf .format_name = "mirror_top",
18064ef85a9cSKevin Wolf .bdrv_co_preadv = bdrv_mirror_top_preadv,
18074ef85a9cSKevin Wolf .bdrv_co_pwritev = bdrv_mirror_top_pwritev,
18084ef85a9cSKevin Wolf .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
18094ef85a9cSKevin Wolf .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
18104ef85a9cSKevin Wolf .bdrv_co_flush = bdrv_mirror_top_flush,
1811fd4a6493SKevin Wolf .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename,
18124ef85a9cSKevin Wolf .bdrv_child_perm = bdrv_mirror_top_child_perm,
18136540fd15SMax Reitz
18146540fd15SMax Reitz .is_filter = true,
1815046fd84fSVladimir Sementsov-Ogievskiy .filtered_child_is_backing = true,
18164ef85a9cSKevin Wolf };
18174ef85a9cSKevin Wolf
mirror_start_job(const char * job_id,BlockDriverState * bs,int creation_flags,BlockDriverState * target,const char * replaces,int64_t speed,uint32_t granularity,int64_t buf_size,MirrorSyncMode sync_mode,BlockMirrorBackingMode backing_mode,bool target_is_zero,BlockdevOnError on_source_error,BlockdevOnError on_target_error,bool unmap,BlockCompletionFunc * cb,void * opaque,const BlockJobDriver * driver,BlockDriverState * base,bool auto_complete,const char * filter_node_name,bool is_mirror,MirrorCopyMode copy_mode,bool base_ro,Error ** errp)1818cc19f177SVladimir Sementsov-Ogievskiy static BlockJob *mirror_start_job(
1819cc19f177SVladimir Sementsov-Ogievskiy const char *job_id, BlockDriverState *bs,
182047970dfbSJohn Snow int creation_flags, BlockDriverState *target,
182147970dfbSJohn Snow const char *replaces, int64_t speed,
182247970dfbSJohn Snow uint32_t granularity, int64_t buf_size,
18239474d97bSEric Blake MirrorSyncMode sync_mode,
1824274fcceeSMax Reitz BlockMirrorBackingMode backing_mode,
1825253b43a2SEric Blake bool target_is_zero,
182603544a6eSFam Zheng BlockdevOnError on_source_error,
1827b952b558SPaolo Bonzini BlockdevOnError on_target_error,
18280fc9f8eaSFam Zheng bool unmap,
1829097310b5SMarkus Armbruster BlockCompletionFunc *cb,
183051ccfa2dSFam Zheng void *opaque,
183103544a6eSFam Zheng const BlockJobDriver *driver,
18329474d97bSEric Blake BlockDriverState *base,
183351ccfa2dSFam Zheng bool auto_complete, const char *filter_node_name,
1834481debaaSMax Reitz bool is_mirror, MirrorCopyMode copy_mode,
18357d99ae59SAlexander Ivanov bool base_ro,
183651ccfa2dSFam Zheng Error **errp)
1837893f7ebaSPaolo Bonzini {
1838893f7ebaSPaolo Bonzini MirrorBlockJob *s;
1839429076e8SMax Reitz MirrorBDSOpaque *bs_opaque;
18404ef85a9cSKevin Wolf BlockDriverState *mirror_top_bs;
18414ef85a9cSKevin Wolf bool target_is_backing;
18423f072a7fSMax Reitz uint64_t target_perms, target_shared_perms;
1843d7086422SKevin Wolf int ret;
1844893f7ebaSPaolo Bonzini
18453804e3cfSKevin Wolf GLOBAL_STATE_CODE();
18463804e3cfSKevin Wolf
1847eee13dfeSPaolo Bonzini if (granularity == 0) {
1848341ebc2fSJohn Snow granularity = bdrv_get_default_bitmap_granularity(target);
1849eee13dfeSPaolo Bonzini }
1850eee13dfeSPaolo Bonzini
185131826642SEric Blake assert(is_power_of_2(granularity));
1852eee13dfeSPaolo Bonzini
185348ac0a4dSWen Congyang if (buf_size < 0) {
185448ac0a4dSWen Congyang error_setg(errp, "Invalid parameter 'buf-size'");
1855cc19f177SVladimir Sementsov-Ogievskiy return NULL;
185648ac0a4dSWen Congyang }
185748ac0a4dSWen Congyang
185848ac0a4dSWen Congyang if (buf_size == 0) {
185948ac0a4dSWen Congyang buf_size = DEFAULT_MIRROR_BUF_SIZE;
186048ac0a4dSWen Congyang }
18615bc361b8SFam Zheng
1862ad74751fSKevin Wolf bdrv_graph_rdlock_main_loop();
18633f072a7fSMax Reitz if (bdrv_skip_filters(bs) == bdrv_skip_filters(target)) {
186486fae10cSKevin Wolf error_setg(errp, "Can't mirror node into itself");
1865ad74751fSKevin Wolf bdrv_graph_rdunlock_main_loop();
1866cc19f177SVladimir Sementsov-Ogievskiy return NULL;
186786fae10cSKevin Wolf }
186886fae10cSKevin Wolf
186953431b90SMax Reitz target_is_backing = bdrv_chain_contains(bs, target);
1870ad74751fSKevin Wolf bdrv_graph_rdunlock_main_loop();
187153431b90SMax Reitz
18724ef85a9cSKevin Wolf /* In the case of active commit, add dummy driver to provide consistent
18734ef85a9cSKevin Wolf * reads on the top, while disabling it in the intermediate nodes, and make
18744ef85a9cSKevin Wolf * the backing chain writable. */
18756cdbceb1SKevin Wolf mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
18766cdbceb1SKevin Wolf BDRV_O_RDWR, errp);
18774ef85a9cSKevin Wolf if (mirror_top_bs == NULL) {
1878cc19f177SVladimir Sementsov-Ogievskiy return NULL;
1879893f7ebaSPaolo Bonzini }
1880d3c8c674SKevin Wolf if (!filter_node_name) {
1881d3c8c674SKevin Wolf mirror_top_bs->implicit = true;
1882d3c8c674SKevin Wolf }
1883e5182c1cSMax Reitz
1884e5182c1cSMax Reitz /* So that we can always drop this node */
1885e5182c1cSMax Reitz mirror_top_bs->never_freeze = true;
1886e5182c1cSMax Reitz
18874ef85a9cSKevin Wolf mirror_top_bs->total_sectors = bs->total_sectors;
1888228345bfSMax Reitz mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
188980f5c33fSKevin Wolf mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
189080f5c33fSKevin Wolf BDRV_REQ_NO_FALLBACK;
1891429076e8SMax Reitz bs_opaque = g_new0(MirrorBDSOpaque, 1);
1892429076e8SMax Reitz mirror_top_bs->opaque = bs_opaque;
1893893f7ebaSPaolo Bonzini
189453431b90SMax Reitz bs_opaque->is_commit = target_is_backing;
189553431b90SMax Reitz
18964ef85a9cSKevin Wolf bdrv_drained_begin(bs);
1897934aee14SVladimir Sementsov-Ogievskiy ret = bdrv_append(mirror_top_bs, bs, errp);
18984ef85a9cSKevin Wolf bdrv_drained_end(bs);
18994ef85a9cSKevin Wolf
1900934aee14SVladimir Sementsov-Ogievskiy if (ret < 0) {
1901b2c2832cSKevin Wolf bdrv_unref(mirror_top_bs);
1902cc19f177SVladimir Sementsov-Ogievskiy return NULL;
1903b2c2832cSKevin Wolf }
1904b2c2832cSKevin Wolf
19054ef85a9cSKevin Wolf /* Make sure that the source is not resized while the job is running */
190675859b94SJohn Snow s = block_job_create(job_id, driver, NULL, mirror_top_bs,
19074ef85a9cSKevin Wolf BLK_PERM_CONSISTENT_READ,
19084ef85a9cSKevin Wolf BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
190964631f36SVladimir Sementsov-Ogievskiy BLK_PERM_WRITE, speed,
19104ef85a9cSKevin Wolf creation_flags, cb, opaque, errp);
19114ef85a9cSKevin Wolf if (!s) {
19124ef85a9cSKevin Wolf goto fail;
19134ef85a9cSKevin Wolf }
1914429076e8SMax Reitz
19157a25fcd0SMax Reitz /* The block job now has a reference to this node */
19167a25fcd0SMax Reitz bdrv_unref(mirror_top_bs);
19177a25fcd0SMax Reitz
19184ef85a9cSKevin Wolf s->mirror_top_bs = mirror_top_bs;
19197d99ae59SAlexander Ivanov s->base_ro = base_ro;
19204ef85a9cSKevin Wolf
19214ef85a9cSKevin Wolf /* No resize for the target either; while the mirror is still running, a
19224ef85a9cSKevin Wolf * consistent read isn't necessarily possible. We could possibly allow
19234ef85a9cSKevin Wolf * writes and graph modifications, though it would likely defeat the
19244ef85a9cSKevin Wolf * purpose of a mirror, so leave them blocked for now.
19254ef85a9cSKevin Wolf *
19264ef85a9cSKevin Wolf * In the case of active commit, things look a bit different, though,
19274ef85a9cSKevin Wolf * because the target is an already populated backing file in active use.
19284ef85a9cSKevin Wolf * We can allow anything except resize there.*/
19293f072a7fSMax Reitz
19303f072a7fSMax Reitz target_perms = BLK_PERM_WRITE;
19313f072a7fSMax Reitz target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
19323f072a7fSMax Reitz
19333f072a7fSMax Reitz if (target_is_backing) {
19343f072a7fSMax Reitz int64_t bs_size, target_size;
19353f072a7fSMax Reitz bs_size = bdrv_getlength(bs);
19363f072a7fSMax Reitz if (bs_size < 0) {
19373f072a7fSMax Reitz error_setg_errno(errp, -bs_size,
19383f072a7fSMax Reitz "Could not inquire top image size");
19393f072a7fSMax Reitz goto fail;
19403f072a7fSMax Reitz }
19413f072a7fSMax Reitz
19423f072a7fSMax Reitz target_size = bdrv_getlength(target);
19433f072a7fSMax Reitz if (target_size < 0) {
19443f072a7fSMax Reitz error_setg_errno(errp, -target_size,
19453f072a7fSMax Reitz "Could not inquire base image size");
19463f072a7fSMax Reitz goto fail;
19473f072a7fSMax Reitz }
19483f072a7fSMax Reitz
19493f072a7fSMax Reitz if (target_size < bs_size) {
19503f072a7fSMax Reitz target_perms |= BLK_PERM_RESIZE;
19513f072a7fSMax Reitz }
19523f072a7fSMax Reitz
195364631f36SVladimir Sementsov-Ogievskiy target_shared_perms |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
1954ad74751fSKevin Wolf } else {
1955ad74751fSKevin Wolf bdrv_graph_rdlock_main_loop();
1956ad74751fSKevin Wolf if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) {
19573f072a7fSMax Reitz /*
19583f072a7fSMax Reitz * We may want to allow this in the future, but it would
19593f072a7fSMax Reitz * require taking some extra care.
19603f072a7fSMax Reitz */
1961ad74751fSKevin Wolf error_setg(errp, "Cannot mirror to a filter on top of a node in "
1962ad74751fSKevin Wolf "the source's backing chain");
1963ad74751fSKevin Wolf bdrv_graph_rdunlock_main_loop();
19643f072a7fSMax Reitz goto fail;
19653f072a7fSMax Reitz }
1966ad74751fSKevin Wolf bdrv_graph_rdunlock_main_loop();
1967ad74751fSKevin Wolf }
19683f072a7fSMax Reitz
1969d861ab3aSKevin Wolf s->target = blk_new(s->common.job.aio_context,
19703f072a7fSMax Reitz target_perms, target_shared_perms);
1971d7086422SKevin Wolf ret = blk_insert_bs(s->target, target, errp);
1972d7086422SKevin Wolf if (ret < 0) {
19734ef85a9cSKevin Wolf goto fail;
1974d7086422SKevin Wolf }
1975045a2f82SFam Zheng if (is_mirror) {
1976045a2f82SFam Zheng /* XXX: Mirror target could be a NBD server of target QEMU in the case
1977045a2f82SFam Zheng * of non-shared block migration. To allow migration completion, we
1978045a2f82SFam Zheng * have to allow "inactivate" of the target BB. When that happens, we
1979045a2f82SFam Zheng * know the job is drained, and the vcpus are stopped, so no write
1980045a2f82SFam Zheng * operation will be performed. Block layer already has assertions to
1981045a2f82SFam Zheng * ensure that. */
1982045a2f82SFam Zheng blk_set_force_allow_inactivate(s->target);
1983045a2f82SFam Zheng }
19849ff7f0dfSKevin Wolf blk_set_allow_aio_context_change(s->target, true);
1985cf312932SKevin Wolf blk_set_disable_request_queuing(s->target, true);
1986e253f4b8SKevin Wolf
1987ad74751fSKevin Wolf bdrv_graph_rdlock_main_loop();
198809158f00SBenoît Canet s->replaces = g_strdup(replaces);
1989b952b558SPaolo Bonzini s->on_source_error = on_source_error;
1990b952b558SPaolo Bonzini s->on_target_error = on_target_error;
19919474d97bSEric Blake s->sync_mode = sync_mode;
1992274fcceeSMax Reitz s->backing_mode = backing_mode;
1993d17a34bfSEric Blake s->target_is_zero = target_is_zero;
19942d400d15SFiona Ebner qatomic_set(&s->copy_mode, copy_mode);
19955bc361b8SFam Zheng s->base = base;
19963f072a7fSMax Reitz s->base_overlay = bdrv_find_overlay(bs, base);
1997eee13dfeSPaolo Bonzini s->granularity = granularity;
199848ac0a4dSWen Congyang s->buf_size = ROUND_UP(buf_size, granularity);
19990fc9f8eaSFam Zheng s->unmap = unmap;
2000b49f7eadSWen Congyang if (auto_complete) {
2001b49f7eadSWen Congyang s->should_complete = true;
2002b49f7eadSWen Congyang }
2003ad74751fSKevin Wolf bdrv_graph_rdunlock_main_loop();
2004b812f671SPaolo Bonzini
2005058cfca5SFiona Ebner s->dirty_bitmap = bdrv_create_dirty_bitmap(s->mirror_top_bs, granularity,
2006058cfca5SFiona Ebner NULL, errp);
2007b8afb520SFam Zheng if (!s->dirty_bitmap) {
200888f9d1b3SKevin Wolf goto fail;
2009b8afb520SFam Zheng }
2010058cfca5SFiona Ebner
2011058cfca5SFiona Ebner /*
2012058cfca5SFiona Ebner * The dirty bitmap is set by bdrv_mirror_top_do_write() when not in active
2013058cfca5SFiona Ebner * mode.
2014058cfca5SFiona Ebner */
2015dbdf699cSVladimir Sementsov-Ogievskiy bdrv_disable_dirty_bitmap(s->dirty_bitmap);
201610f3cd15SAlberto Garcia
2017*ffdcd081SFiona Ebner bdrv_drain_all_begin();
20186bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
201967b24427SAlberto Garcia ret = block_job_add_bdrv(&s->common, "source", bs, 0,
202067b24427SAlberto Garcia BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
202167b24427SAlberto Garcia BLK_PERM_CONSISTENT_READ,
202267b24427SAlberto Garcia errp);
202367b24427SAlberto Garcia if (ret < 0) {
20246bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
2025*ffdcd081SFiona Ebner bdrv_drain_all_end();
202667b24427SAlberto Garcia goto fail;
202767b24427SAlberto Garcia }
202867b24427SAlberto Garcia
20294ef85a9cSKevin Wolf /* Required permissions are already taken with blk_new() */
203076d554e2SKevin Wolf block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
203176d554e2SKevin Wolf &error_abort);
203276d554e2SKevin Wolf
2033f3ede4b0SAlberto Garcia /* In commit_active_start() all intermediate nodes disappear, so
2034f3ede4b0SAlberto Garcia * any jobs in them must be blocked */
20354ef85a9cSKevin Wolf if (target_is_backing) {
20363f072a7fSMax Reitz BlockDriverState *iter, *filtered_target;
20373f072a7fSMax Reitz uint64_t iter_shared_perms;
20383f072a7fSMax Reitz
20393f072a7fSMax Reitz /*
20403f072a7fSMax Reitz * The topmost node with
20413f072a7fSMax Reitz * bdrv_skip_filters(filtered_target) == bdrv_skip_filters(target)
20423f072a7fSMax Reitz */
20433f072a7fSMax Reitz filtered_target = bdrv_cow_bs(bdrv_find_overlay(bs, target));
20443f072a7fSMax Reitz
20453f072a7fSMax Reitz assert(bdrv_skip_filters(filtered_target) ==
20463f072a7fSMax Reitz bdrv_skip_filters(target));
20473f072a7fSMax Reitz
20483f072a7fSMax Reitz /*
20493f072a7fSMax Reitz * XXX BLK_PERM_WRITE needs to be allowed so we don't block
20504ef85a9cSKevin Wolf * ourselves at s->base (if writes are blocked for a node, they are
20514ef85a9cSKevin Wolf * also blocked for its backing file). The other options would be a
20523f072a7fSMax Reitz * second filter driver above s->base (== target).
20533f072a7fSMax Reitz */
20543f072a7fSMax Reitz iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
20553f072a7fSMax Reitz
20563f072a7fSMax Reitz for (iter = bdrv_filter_or_cow_bs(bs); iter != target;
20573f072a7fSMax Reitz iter = bdrv_filter_or_cow_bs(iter))
20583f072a7fSMax Reitz {
20593f072a7fSMax Reitz if (iter == filtered_target) {
20603f072a7fSMax Reitz /*
20613f072a7fSMax Reitz * From here on, all nodes are filters on the base.
20623f072a7fSMax Reitz * This allows us to share BLK_PERM_CONSISTENT_READ.
20633f072a7fSMax Reitz */
20643f072a7fSMax Reitz iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
20653f072a7fSMax Reitz }
20663f072a7fSMax Reitz
20674ef85a9cSKevin Wolf ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
20683f072a7fSMax Reitz iter_shared_perms, errp);
20694ef85a9cSKevin Wolf if (ret < 0) {
20706bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
2071*ffdcd081SFiona Ebner bdrv_drain_all_end();
20724ef85a9cSKevin Wolf goto fail;
20734ef85a9cSKevin Wolf }
2074f3ede4b0SAlberto Garcia }
2075ef53dc09SAlberto Garcia
2076ef53dc09SAlberto Garcia if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) {
20776bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
2078*ffdcd081SFiona Ebner bdrv_drain_all_end();
2079ef53dc09SAlberto Garcia goto fail;
2080ef53dc09SAlberto Garcia }
2081f3ede4b0SAlberto Garcia }
20826bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
2083*ffdcd081SFiona Ebner bdrv_drain_all_end();
208410f3cd15SAlberto Garcia
208512aa4082SMax Reitz QTAILQ_INIT(&s->ops_in_flight);
208612aa4082SMax Reitz
20875ccac6f1SJohn Snow trace_mirror_start(bs, s, opaque);
2088da01ff7fSKevin Wolf job_start(&s->common.job);
2089cc19f177SVladimir Sementsov-Ogievskiy
2090cc19f177SVladimir Sementsov-Ogievskiy return &s->common;
20914ef85a9cSKevin Wolf
20924ef85a9cSKevin Wolf fail:
20934ef85a9cSKevin Wolf if (s) {
20947a25fcd0SMax Reitz /* Make sure this BDS does not go away until we have completed the graph
20957a25fcd0SMax Reitz * changes below */
20967a25fcd0SMax Reitz bdrv_ref(mirror_top_bs);
20977a25fcd0SMax Reitz
20984ef85a9cSKevin Wolf g_free(s->replaces);
20994ef85a9cSKevin Wolf blk_unref(s->target);
2100429076e8SMax Reitz bs_opaque->job = NULL;
2101e917e2cbSAlberto Garcia if (s->dirty_bitmap) {
21025deb6cbdSVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(s->dirty_bitmap);
2103e917e2cbSAlberto Garcia }
21044ad35181SKevin Wolf job_early_fail(&s->common.job);
21054ef85a9cSKevin Wolf }
21064ef85a9cSKevin Wolf
2107f94dc3b4SMax Reitz bs_opaque->stop = true;
2108ccd6a379SKevin Wolf bdrv_drained_begin(bs);
21096bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
2110ccd6a379SKevin Wolf assert(mirror_top_bs->backing->bs == bs);
2111f94dc3b4SMax Reitz bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
2112c1cef672SFam Zheng &error_abort);
2113ccd6a379SKevin Wolf bdrv_replace_node(mirror_top_bs, bs, &error_abort);
21146bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
2115ccd6a379SKevin Wolf bdrv_drained_end(bs);
21167a25fcd0SMax Reitz
21177a25fcd0SMax Reitz bdrv_unref(mirror_top_bs);
2118cc19f177SVladimir Sementsov-Ogievskiy
2119cc19f177SVladimir Sementsov-Ogievskiy return NULL;
2120893f7ebaSPaolo Bonzini }
212103544a6eSFam Zheng
mirror_start(const char * job_id,BlockDriverState * bs,BlockDriverState * target,const char * replaces,int creation_flags,int64_t speed,uint32_t granularity,int64_t buf_size,MirrorSyncMode mode,BlockMirrorBackingMode backing_mode,bool target_is_zero,BlockdevOnError on_source_error,BlockdevOnError on_target_error,bool unmap,const char * filter_node_name,MirrorCopyMode copy_mode,Error ** errp)212271aa9867SAlberto Garcia void mirror_start(const char *job_id, BlockDriverState *bs,
212371aa9867SAlberto Garcia BlockDriverState *target, const char *replaces,
2124a1999b33SJohn Snow int creation_flags, int64_t speed,
2125a1999b33SJohn Snow uint32_t granularity, int64_t buf_size,
2126274fcceeSMax Reitz MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
2127253b43a2SEric Blake bool target_is_zero,
2128274fcceeSMax Reitz BlockdevOnError on_source_error,
212903544a6eSFam Zheng BlockdevOnError on_target_error,
2130481debaaSMax Reitz bool unmap, const char *filter_node_name,
2131481debaaSMax Reitz MirrorCopyMode copy_mode, Error **errp)
213203544a6eSFam Zheng {
213303544a6eSFam Zheng BlockDriverState *base;
213403544a6eSFam Zheng
2135b4ad82aaSEmanuele Giuseppe Esposito GLOBAL_STATE_CODE();
2136b4ad82aaSEmanuele Giuseppe Esposito
2137c8b56501SJohn Snow if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
2138c8b56501SJohn Snow (mode == MIRROR_SYNC_MODE_BITMAP)) {
2139c8b56501SJohn Snow error_setg(errp, "Sync mode '%s' not supported",
2140c8b56501SJohn Snow MirrorSyncMode_str(mode));
2141d58d8453SJohn Snow return;
2142d58d8453SJohn Snow }
2143ad74751fSKevin Wolf
2144ad74751fSKevin Wolf bdrv_graph_rdlock_main_loop();
21453f072a7fSMax Reitz base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
2146ad74751fSKevin Wolf bdrv_graph_rdunlock_main_loop();
2147ad74751fSKevin Wolf
2148a1999b33SJohn Snow mirror_start_job(job_id, bs, creation_flags, target, replaces,
21499474d97bSEric Blake speed, granularity, buf_size, mode, backing_mode,
2150d17a34bfSEric Blake target_is_zero, on_source_error, on_target_error, unmap,
21519474d97bSEric Blake NULL, NULL, &mirror_job_driver, base, false,
21527d99ae59SAlexander Ivanov filter_node_name, true, copy_mode, false, errp);
215303544a6eSFam Zheng }
215403544a6eSFam Zheng
commit_active_start(const char * job_id,BlockDriverState * bs,BlockDriverState * base,int creation_flags,int64_t speed,BlockdevOnError on_error,const char * filter_node_name,BlockCompletionFunc * cb,void * opaque,bool auto_complete,Error ** errp)2155cc19f177SVladimir Sementsov-Ogievskiy BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
215647970dfbSJohn Snow BlockDriverState *base, int creation_flags,
215747970dfbSJohn Snow int64_t speed, BlockdevOnError on_error,
21580db832f4SKevin Wolf const char *filter_node_name,
215978bbd910SFam Zheng BlockCompletionFunc *cb, void *opaque,
216078bbd910SFam Zheng bool auto_complete, Error **errp)
216103544a6eSFam Zheng {
21621ba79388SAlberto Garcia bool base_read_only;
2163eb5becc1SVladimir Sementsov-Ogievskiy BlockJob *job;
21644da83585SJeff Cody
2165b4ad82aaSEmanuele Giuseppe Esposito GLOBAL_STATE_CODE();
2166b4ad82aaSEmanuele Giuseppe Esposito
21671ba79388SAlberto Garcia base_read_only = bdrv_is_read_only(base);
21684da83585SJeff Cody
21691ba79388SAlberto Garcia if (base_read_only) {
21701ba79388SAlberto Garcia if (bdrv_reopen_set_read_only(base, false, errp) < 0) {
2171cc19f177SVladimir Sementsov-Ogievskiy return NULL;
217220a63d2cSFam Zheng }
21731ba79388SAlberto Garcia }
21744da83585SJeff Cody
2175eb5becc1SVladimir Sementsov-Ogievskiy job = mirror_start_job(
2176cc19f177SVladimir Sementsov-Ogievskiy job_id, bs, creation_flags, base, NULL, speed, 0, 0,
21779474d97bSEric Blake MIRROR_SYNC_MODE_TOP, MIRROR_LEAVE_BACKING_CHAIN, false,
217851ccfa2dSFam Zheng on_error, on_error, true, cb, opaque,
21799474d97bSEric Blake &commit_active_job_driver, base, auto_complete,
2180481debaaSMax Reitz filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
21817d99ae59SAlexander Ivanov base_read_only, errp);
2182eb5becc1SVladimir Sementsov-Ogievskiy if (!job) {
21834da83585SJeff Cody goto error_restore_flags;
21844da83585SJeff Cody }
21854da83585SJeff Cody
2186eb5becc1SVladimir Sementsov-Ogievskiy return job;
21874da83585SJeff Cody
21884da83585SJeff Cody error_restore_flags:
21894da83585SJeff Cody /* ignore error and errp for bdrv_reopen, because we want to propagate
21904da83585SJeff Cody * the original error */
21911ba79388SAlberto Garcia if (base_read_only) {
21921ba79388SAlberto Garcia bdrv_reopen_set_read_only(base, true, NULL);
21931ba79388SAlberto Garcia }
2194cc19f177SVladimir Sementsov-Ogievskiy return NULL;
219503544a6eSFam Zheng }
2196