xref: /qemu/block/block-backend.c (revision b4682a63f86ed81abcaa543ea6135e17f9e99d01)
1  /*
2   * QEMU Block backends
3   *
4   * Copyright (C) 2014-2016 Red Hat, Inc.
5   *
6   * Authors:
7   *  Markus Armbruster <armbru@redhat.com>,
8   *
9   * This work is licensed under the terms of the GNU LGPL, version 2.1
10   * or later.  See the COPYING.LIB file in the top-level directory.
11   */
12  
13  #include "qemu/osdep.h"
14  #include "sysemu/block-backend.h"
15  #include "block/block_int.h"
16  #include "block/blockjob.h"
17  #include "block/throttle-groups.h"
18  #include "sysemu/blockdev.h"
19  #include "sysemu/sysemu.h"
20  #include "qapi/error.h"
21  #include "qapi/qapi-events-block.h"
22  #include "qemu/id.h"
23  #include "qemu/option.h"
24  #include "trace.h"
25  #include "migration/misc.h"
26  
27  /* Number of coroutines to reserve per attached device model */
28  #define COROUTINE_POOL_RESERVATION 64
29  
30  #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
31  
32  static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
33  
34  typedef struct BlockBackendAioNotifier {
35      void (*attached_aio_context)(AioContext *new_context, void *opaque);
36      void (*detach_aio_context)(void *opaque);
37      void *opaque;
38      QLIST_ENTRY(BlockBackendAioNotifier) list;
39  } BlockBackendAioNotifier;
40  
41  struct BlockBackend {
42      char *name;
43      int refcnt;
44      BdrvChild *root;
45      DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
46      QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
47      QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
48      BlockBackendPublic public;
49  
50      DeviceState *dev;           /* attached device model, if any */
51      const BlockDevOps *dev_ops;
52      void *dev_opaque;
53  
54      /* the block size for which the guest device expects atomicity */
55      int guest_block_size;
56  
57      /* If the BDS tree is removed, some of its options are stored here (which
58       * can be used to restore those options in the new BDS on insert) */
59      BlockBackendRootState root_state;
60  
61      bool enable_write_cache;
62  
63      /* I/O stats (display with "info blockstats"). */
64      BlockAcctStats stats;
65  
66      BlockdevOnError on_read_error, on_write_error;
67      bool iostatus_enabled;
68      BlockDeviceIoStatus iostatus;
69  
70      uint64_t perm;
71      uint64_t shared_perm;
72      bool disable_perm;
73  
74      bool allow_write_beyond_eof;
75  
76      NotifierList remove_bs_notifiers, insert_bs_notifiers;
77      QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
78  
79      int quiesce_counter;
80      VMChangeStateEntry *vmsh;
81      bool force_allow_inactivate;
82  
83      /* Number of in-flight aio requests.  BlockDriverState also counts
84       * in-flight requests but aio requests can exist even when blk->root is
85       * NULL, so we cannot rely on its counter for that case.
86       * Accessed with atomic ops.
87       */
88      unsigned int in_flight;
89  };
90  
91  typedef struct BlockBackendAIOCB {
92      BlockAIOCB common;
93      BlockBackend *blk;
94      int ret;
95  } BlockBackendAIOCB;
96  
97  static const AIOCBInfo block_backend_aiocb_info = {
98      .get_aio_context = blk_aiocb_get_aio_context,
99      .aiocb_size = sizeof(BlockBackendAIOCB),
100  };
101  
102  static void drive_info_del(DriveInfo *dinfo);
103  static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
104  
105  /* All BlockBackends */
106  static QTAILQ_HEAD(, BlockBackend) block_backends =
107      QTAILQ_HEAD_INITIALIZER(block_backends);
108  
109  /* All BlockBackends referenced by the monitor and which are iterated through by
110   * blk_next() */
111  static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
112      QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
113  
114  static void blk_root_inherit_options(int *child_flags, QDict *child_options,
115                                       int parent_flags, QDict *parent_options)
116  {
117      /* We're not supposed to call this function for root nodes */
118      abort();
119  }
120  static void blk_root_drained_begin(BdrvChild *child);
121  static bool blk_root_drained_poll(BdrvChild *child);
122  static void blk_root_drained_end(BdrvChild *child);
123  
124  static void blk_root_change_media(BdrvChild *child, bool load);
125  static void blk_root_resize(BdrvChild *child);
126  
127  static char *blk_root_get_parent_desc(BdrvChild *child)
128  {
129      BlockBackend *blk = child->opaque;
130      char *dev_id;
131  
132      if (blk->name) {
133          return g_strdup(blk->name);
134      }
135  
136      dev_id = blk_get_attached_dev_id(blk);
137      if (*dev_id) {
138          return dev_id;
139      } else {
140          /* TODO Callback into the BB owner for something more detailed */
141          g_free(dev_id);
142          return g_strdup("a block device");
143      }
144  }
145  
146  static const char *blk_root_get_name(BdrvChild *child)
147  {
148      return blk_name(child->opaque);
149  }
150  
151  static void blk_vm_state_changed(void *opaque, int running, RunState state)
152  {
153      Error *local_err = NULL;
154      BlockBackend *blk = opaque;
155  
156      if (state == RUN_STATE_INMIGRATE) {
157          return;
158      }
159  
160      qemu_del_vm_change_state_handler(blk->vmsh);
161      blk->vmsh = NULL;
162      blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
163      if (local_err) {
164          error_report_err(local_err);
165      }
166  }
167  
168  /*
169   * Notifies the user of the BlockBackend that migration has completed. qdev
170   * devices can tighten their permissions in response (specifically revoke
171   * shared write permissions that we needed for storage migration).
172   *
173   * If an error is returned, the VM cannot be allowed to be resumed.
174   */
175  static void blk_root_activate(BdrvChild *child, Error **errp)
176  {
177      BlockBackend *blk = child->opaque;
178      Error *local_err = NULL;
179  
180      if (!blk->disable_perm) {
181          return;
182      }
183  
184      blk->disable_perm = false;
185  
186      blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
187      if (local_err) {
188          error_propagate(errp, local_err);
189          blk->disable_perm = true;
190          return;
191      }
192  
193      if (runstate_check(RUN_STATE_INMIGRATE)) {
194          /* Activation can happen when migration process is still active, for
195           * example when nbd_server_add is called during non-shared storage
196           * migration. Defer the shared_perm update to migration completion. */
197          if (!blk->vmsh) {
198              blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed,
199                                                           blk);
200          }
201          return;
202      }
203  
204      blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
205      if (local_err) {
206          error_propagate(errp, local_err);
207          blk->disable_perm = true;
208          return;
209      }
210  }
211  
212  void blk_set_force_allow_inactivate(BlockBackend *blk)
213  {
214      blk->force_allow_inactivate = true;
215  }
216  
217  static bool blk_can_inactivate(BlockBackend *blk)
218  {
219      /* If it is a guest device, inactivate is ok. */
220      if (blk->dev || blk_name(blk)[0]) {
221          return true;
222      }
223  
224      /* Inactivating means no more writes to the image can be done,
225       * even if those writes would be changes invisible to the
226       * guest.  For block job BBs that satisfy this, we can just allow
227       * it.  This is the case for mirror job source, which is required
228       * by libvirt non-shared block migration. */
229      if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) {
230          return true;
231      }
232  
233      return blk->force_allow_inactivate;
234  }
235  
236  static int blk_root_inactivate(BdrvChild *child)
237  {
238      BlockBackend *blk = child->opaque;
239  
240      if (blk->disable_perm) {
241          return 0;
242      }
243  
244      if (!blk_can_inactivate(blk)) {
245          return -EPERM;
246      }
247  
248      blk->disable_perm = true;
249      if (blk->root) {
250          bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
251      }
252  
253      return 0;
254  }
255  
256  static void blk_root_attach(BdrvChild *child)
257  {
258      BlockBackend *blk = child->opaque;
259      BlockBackendAioNotifier *notifier;
260  
261      trace_blk_root_attach(child, blk, child->bs);
262  
263      QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
264          bdrv_add_aio_context_notifier(child->bs,
265                  notifier->attached_aio_context,
266                  notifier->detach_aio_context,
267                  notifier->opaque);
268      }
269  }
270  
271  static void blk_root_detach(BdrvChild *child)
272  {
273      BlockBackend *blk = child->opaque;
274      BlockBackendAioNotifier *notifier;
275  
276      trace_blk_root_detach(child, blk, child->bs);
277  
278      QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
279          bdrv_remove_aio_context_notifier(child->bs,
280                  notifier->attached_aio_context,
281                  notifier->detach_aio_context,
282                  notifier->opaque);
283      }
284  }
285  
286  static const BdrvChildRole child_root = {
287      .inherit_options    = blk_root_inherit_options,
288  
289      .change_media       = blk_root_change_media,
290      .resize             = blk_root_resize,
291      .get_name           = blk_root_get_name,
292      .get_parent_desc    = blk_root_get_parent_desc,
293  
294      .drained_begin      = blk_root_drained_begin,
295      .drained_poll       = blk_root_drained_poll,
296      .drained_end        = blk_root_drained_end,
297  
298      .activate           = blk_root_activate,
299      .inactivate         = blk_root_inactivate,
300  
301      .attach             = blk_root_attach,
302      .detach             = blk_root_detach,
303  };
304  
305  /*
306   * Create a new BlockBackend with a reference count of one.
307   *
308   * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
309   * to request for a block driver node that is attached to this BlockBackend.
310   * @shared_perm is a bitmask which describes which permissions may be granted
311   * to other users of the attached node.
312   * Both sets of permissions can be changed later using blk_set_perm().
313   *
314   * Return the new BlockBackend on success, null on failure.
315   */
316  BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
317  {
318      BlockBackend *blk;
319  
320      blk = g_new0(BlockBackend, 1);
321      blk->refcnt = 1;
322      blk->perm = perm;
323      blk->shared_perm = shared_perm;
324      blk_set_enable_write_cache(blk, true);
325  
326      blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
327      blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
328  
329      block_acct_init(&blk->stats);
330  
331      notifier_list_init(&blk->remove_bs_notifiers);
332      notifier_list_init(&blk->insert_bs_notifiers);
333      QLIST_INIT(&blk->aio_notifiers);
334  
335      QTAILQ_INSERT_TAIL(&block_backends, blk, link);
336      return blk;
337  }
338  
339  /*
340   * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
341   *
342   * Just as with bdrv_open(), after having called this function the reference to
343   * @options belongs to the block layer (even on failure).
344   *
345   * TODO: Remove @filename and @flags; it should be possible to specify a whole
346   * BDS tree just by specifying the @options QDict (or @reference,
347   * alternatively). At the time of adding this function, this is not possible,
348   * though, so callers of this function have to be able to specify @filename and
349   * @flags.
350   */
351  BlockBackend *blk_new_open(const char *filename, const char *reference,
352                             QDict *options, int flags, Error **errp)
353  {
354      BlockBackend *blk;
355      BlockDriverState *bs;
356      uint64_t perm = 0;
357  
358      /* blk_new_open() is mainly used in .bdrv_create implementations and the
359       * tools where sharing isn't a concern because the BDS stays private, so we
360       * just request permission according to the flags.
361       *
362       * The exceptions are xen_disk and blockdev_init(); in these cases, the
363       * caller of blk_new_open() doesn't make use of the permissions, but they
364       * shouldn't hurt either. We can still share everything here because the
365       * guest devices will add their own blockers if they can't share. */
366      if ((flags & BDRV_O_NO_IO) == 0) {
367          perm |= BLK_PERM_CONSISTENT_READ;
368          if (flags & BDRV_O_RDWR) {
369              perm |= BLK_PERM_WRITE;
370          }
371      }
372      if (flags & BDRV_O_RESIZE) {
373          perm |= BLK_PERM_RESIZE;
374      }
375  
376      blk = blk_new(perm, BLK_PERM_ALL);
377      bs = bdrv_open(filename, reference, options, flags, errp);
378      if (!bs) {
379          blk_unref(blk);
380          return NULL;
381      }
382  
383      blk->root = bdrv_root_attach_child(bs, "root", &child_root,
384                                         perm, BLK_PERM_ALL, blk, errp);
385      if (!blk->root) {
386          bdrv_unref(bs);
387          blk_unref(blk);
388          return NULL;
389      }
390  
391      return blk;
392  }
393  
394  static void blk_delete(BlockBackend *blk)
395  {
396      assert(!blk->refcnt);
397      assert(!blk->name);
398      assert(!blk->dev);
399      if (blk->public.throttle_group_member.throttle_state) {
400          blk_io_limits_disable(blk);
401      }
402      if (blk->root) {
403          blk_remove_bs(blk);
404      }
405      if (blk->vmsh) {
406          qemu_del_vm_change_state_handler(blk->vmsh);
407          blk->vmsh = NULL;
408      }
409      assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
410      assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
411      assert(QLIST_EMPTY(&blk->aio_notifiers));
412      QTAILQ_REMOVE(&block_backends, blk, link);
413      drive_info_del(blk->legacy_dinfo);
414      block_acct_cleanup(&blk->stats);
415      g_free(blk);
416  }
417  
418  static void drive_info_del(DriveInfo *dinfo)
419  {
420      if (!dinfo) {
421          return;
422      }
423      qemu_opts_del(dinfo->opts);
424      g_free(dinfo);
425  }
426  
427  int blk_get_refcnt(BlockBackend *blk)
428  {
429      return blk ? blk->refcnt : 0;
430  }
431  
432  /*
433   * Increment @blk's reference count.
434   * @blk must not be null.
435   */
436  void blk_ref(BlockBackend *blk)
437  {
438      assert(blk->refcnt > 0);
439      blk->refcnt++;
440  }
441  
442  /*
443   * Decrement @blk's reference count.
444   * If this drops it to zero, destroy @blk.
445   * For convenience, do nothing if @blk is null.
446   */
447  void blk_unref(BlockBackend *blk)
448  {
449      if (blk) {
450          assert(blk->refcnt > 0);
451          if (blk->refcnt > 1) {
452              blk->refcnt--;
453          } else {
454              blk_drain(blk);
455              /* blk_drain() cannot resurrect blk, nobody held a reference */
456              assert(blk->refcnt == 1);
457              blk->refcnt = 0;
458              blk_delete(blk);
459          }
460      }
461  }
462  
463  /*
464   * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
465   * ones which are hidden (i.e. are not referenced by the monitor).
466   */
467  BlockBackend *blk_all_next(BlockBackend *blk)
468  {
469      return blk ? QTAILQ_NEXT(blk, link)
470                 : QTAILQ_FIRST(&block_backends);
471  }
472  
473  void blk_remove_all_bs(void)
474  {
475      BlockBackend *blk = NULL;
476  
477      while ((blk = blk_all_next(blk)) != NULL) {
478          AioContext *ctx = blk_get_aio_context(blk);
479  
480          aio_context_acquire(ctx);
481          if (blk->root) {
482              blk_remove_bs(blk);
483          }
484          aio_context_release(ctx);
485      }
486  }
487  
488  /*
489   * Return the monitor-owned BlockBackend after @blk.
490   * If @blk is null, return the first one.
491   * Else, return @blk's next sibling, which may be null.
492   *
493   * To iterate over all BlockBackends, do
494   * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
495   *     ...
496   * }
497   */
498  BlockBackend *blk_next(BlockBackend *blk)
499  {
500      return blk ? QTAILQ_NEXT(blk, monitor_link)
501                 : QTAILQ_FIRST(&monitor_block_backends);
502  }
503  
504  /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
505   * the monitor or attached to a BlockBackend */
506  BlockDriverState *bdrv_next(BdrvNextIterator *it)
507  {
508      BlockDriverState *bs, *old_bs;
509  
510      /* Must be called from the main loop */
511      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
512  
513      /* First, return all root nodes of BlockBackends. In order to avoid
514       * returning a BDS twice when multiple BBs refer to it, we only return it
515       * if the BB is the first one in the parent list of the BDS. */
516      if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
517          BlockBackend *old_blk = it->blk;
518  
519          old_bs = old_blk ? blk_bs(old_blk) : NULL;
520  
521          do {
522              it->blk = blk_all_next(it->blk);
523              bs = it->blk ? blk_bs(it->blk) : NULL;
524          } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
525  
526          if (it->blk) {
527              blk_ref(it->blk);
528          }
529          blk_unref(old_blk);
530  
531          if (bs) {
532              bdrv_ref(bs);
533              bdrv_unref(old_bs);
534              return bs;
535          }
536          it->phase = BDRV_NEXT_MONITOR_OWNED;
537      } else {
538          old_bs = it->bs;
539      }
540  
541      /* Then return the monitor-owned BDSes without a BB attached. Ignore all
542       * BDSes that are attached to a BlockBackend here; they have been handled
543       * by the above block already */
544      do {
545          it->bs = bdrv_next_monitor_owned(it->bs);
546          bs = it->bs;
547      } while (bs && bdrv_has_blk(bs));
548  
549      if (bs) {
550          bdrv_ref(bs);
551      }
552      bdrv_unref(old_bs);
553  
554      return bs;
555  }
556  
557  static void bdrv_next_reset(BdrvNextIterator *it)
558  {
559      *it = (BdrvNextIterator) {
560          .phase = BDRV_NEXT_BACKEND_ROOTS,
561      };
562  }
563  
564  BlockDriverState *bdrv_first(BdrvNextIterator *it)
565  {
566      bdrv_next_reset(it);
567      return bdrv_next(it);
568  }
569  
570  /* Must be called when aborting a bdrv_next() iteration before
571   * bdrv_next() returns NULL */
572  void bdrv_next_cleanup(BdrvNextIterator *it)
573  {
574      /* Must be called from the main loop */
575      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
576  
577      if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
578          if (it->blk) {
579              bdrv_unref(blk_bs(it->blk));
580              blk_unref(it->blk);
581          }
582      } else {
583          bdrv_unref(it->bs);
584      }
585  
586      bdrv_next_reset(it);
587  }
588  
589  /*
590   * Add a BlockBackend into the list of backends referenced by the monitor, with
591   * the given @name acting as the handle for the monitor.
592   * Strictly for use by blockdev.c.
593   *
594   * @name must not be null or empty.
595   *
596   * Returns true on success and false on failure. In the latter case, an Error
597   * object is returned through @errp.
598   */
599  bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
600  {
601      assert(!blk->name);
602      assert(name && name[0]);
603  
604      if (!id_wellformed(name)) {
605          error_setg(errp, "Invalid device name");
606          return false;
607      }
608      if (blk_by_name(name)) {
609          error_setg(errp, "Device with id '%s' already exists", name);
610          return false;
611      }
612      if (bdrv_find_node(name)) {
613          error_setg(errp,
614                     "Device name '%s' conflicts with an existing node name",
615                     name);
616          return false;
617      }
618  
619      blk->name = g_strdup(name);
620      QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
621      return true;
622  }
623  
624  /*
625   * Remove a BlockBackend from the list of backends referenced by the monitor.
626   * Strictly for use by blockdev.c.
627   */
628  void monitor_remove_blk(BlockBackend *blk)
629  {
630      if (!blk->name) {
631          return;
632      }
633  
634      QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
635      g_free(blk->name);
636      blk->name = NULL;
637  }
638  
639  /*
640   * Return @blk's name, a non-null string.
641   * Returns an empty string iff @blk is not referenced by the monitor.
642   */
643  const char *blk_name(const BlockBackend *blk)
644  {
645      return blk->name ?: "";
646  }
647  
648  /*
649   * Return the BlockBackend with name @name if it exists, else null.
650   * @name must not be null.
651   */
652  BlockBackend *blk_by_name(const char *name)
653  {
654      BlockBackend *blk = NULL;
655  
656      assert(name);
657      while ((blk = blk_next(blk)) != NULL) {
658          if (!strcmp(name, blk->name)) {
659              return blk;
660          }
661      }
662      return NULL;
663  }
664  
665  /*
666   * Return the BlockDriverState attached to @blk if any, else null.
667   */
668  BlockDriverState *blk_bs(BlockBackend *blk)
669  {
670      return blk->root ? blk->root->bs : NULL;
671  }
672  
673  static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
674  {
675      BdrvChild *child;
676      QLIST_FOREACH(child, &bs->parents, next_parent) {
677          if (child->role == &child_root) {
678              return child->opaque;
679          }
680      }
681  
682      return NULL;
683  }
684  
685  /*
686   * Returns true if @bs has an associated BlockBackend.
687   */
688  bool bdrv_has_blk(BlockDriverState *bs)
689  {
690      return bdrv_first_blk(bs) != NULL;
691  }
692  
693  /*
694   * Returns true if @bs has only BlockBackends as parents.
695   */
696  bool bdrv_is_root_node(BlockDriverState *bs)
697  {
698      BdrvChild *c;
699  
700      QLIST_FOREACH(c, &bs->parents, next_parent) {
701          if (c->role != &child_root) {
702              return false;
703          }
704      }
705  
706      return true;
707  }
708  
709  /*
710   * Return @blk's DriveInfo if any, else null.
711   */
712  DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
713  {
714      return blk->legacy_dinfo;
715  }
716  
717  /*
718   * Set @blk's DriveInfo to @dinfo, and return it.
719   * @blk must not have a DriveInfo set already.
720   * No other BlockBackend may have the same DriveInfo set.
721   */
722  DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
723  {
724      assert(!blk->legacy_dinfo);
725      return blk->legacy_dinfo = dinfo;
726  }
727  
728  /*
729   * Return the BlockBackend with DriveInfo @dinfo.
730   * It must exist.
731   */
732  BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
733  {
734      BlockBackend *blk = NULL;
735  
736      while ((blk = blk_next(blk)) != NULL) {
737          if (blk->legacy_dinfo == dinfo) {
738              return blk;
739          }
740      }
741      abort();
742  }
743  
744  /*
745   * Returns a pointer to the publicly accessible fields of @blk.
746   */
747  BlockBackendPublic *blk_get_public(BlockBackend *blk)
748  {
749      return &blk->public;
750  }
751  
752  /*
753   * Returns a BlockBackend given the associated @public fields.
754   */
755  BlockBackend *blk_by_public(BlockBackendPublic *public)
756  {
757      return container_of(public, BlockBackend, public);
758  }
759  
760  /*
761   * Disassociates the currently associated BlockDriverState from @blk.
762   */
763  void blk_remove_bs(BlockBackend *blk)
764  {
765      ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
766      BlockDriverState *bs;
767  
768      notifier_list_notify(&blk->remove_bs_notifiers, blk);
769      if (tgm->throttle_state) {
770          bs = blk_bs(blk);
771          bdrv_drained_begin(bs);
772          throttle_group_detach_aio_context(tgm);
773          throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
774          bdrv_drained_end(bs);
775      }
776  
777      blk_update_root_state(blk);
778  
779      /* bdrv_root_unref_child() will cause blk->root to become stale and may
780       * switch to a completion coroutine later on. Let's drain all I/O here
781       * to avoid that and a potential QEMU crash.
782       */
783      blk_drain(blk);
784      bdrv_root_unref_child(blk->root);
785      blk->root = NULL;
786  }
787  
788  /*
789   * Associates a new BlockDriverState with @blk.
790   */
791  int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
792  {
793      ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
794      blk->root = bdrv_root_attach_child(bs, "root", &child_root,
795                                         blk->perm, blk->shared_perm, blk, errp);
796      if (blk->root == NULL) {
797          return -EPERM;
798      }
799      bdrv_ref(bs);
800  
801      notifier_list_notify(&blk->insert_bs_notifiers, blk);
802      if (tgm->throttle_state) {
803          throttle_group_detach_aio_context(tgm);
804          throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs));
805      }
806  
807      return 0;
808  }
809  
810  /*
811   * Sets the permission bitmasks that the user of the BlockBackend needs.
812   */
813  int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
814                   Error **errp)
815  {
816      int ret;
817  
818      if (blk->root && !blk->disable_perm) {
819          ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
820          if (ret < 0) {
821              return ret;
822          }
823      }
824  
825      blk->perm = perm;
826      blk->shared_perm = shared_perm;
827  
828      return 0;
829  }
830  
831  void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
832  {
833      *perm = blk->perm;
834      *shared_perm = blk->shared_perm;
835  }
836  
837  /*
838   * Attach device model @dev to @blk.
839   * Return 0 on success, -EBUSY when a device model is attached already.
840   */
841  int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
842  {
843      if (blk->dev) {
844          return -EBUSY;
845      }
846  
847      /* While migration is still incoming, we don't need to apply the
848       * permissions of guest device BlockBackends. We might still have a block
849       * job or NBD server writing to the image for storage migration. */
850      if (runstate_check(RUN_STATE_INMIGRATE)) {
851          blk->disable_perm = true;
852      }
853  
854      blk_ref(blk);
855      blk->dev = dev;
856      blk_iostatus_reset(blk);
857  
858      return 0;
859  }
860  
861  /*
862   * Detach device model @dev from @blk.
863   * @dev must be currently attached to @blk.
864   */
865  void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
866  {
867      assert(blk->dev == dev);
868      blk->dev = NULL;
869      blk->dev_ops = NULL;
870      blk->dev_opaque = NULL;
871      blk->guest_block_size = 512;
872      blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
873      blk_unref(blk);
874  }
875  
876  /*
877   * Return the device model attached to @blk if any, else null.
878   */
879  DeviceState *blk_get_attached_dev(BlockBackend *blk)
880  {
881      return blk->dev;
882  }
883  
884  /* Return the qdev ID, or if no ID is assigned the QOM path, of the block
885   * device attached to the BlockBackend. */
886  char *blk_get_attached_dev_id(BlockBackend *blk)
887  {
888      DeviceState *dev = blk->dev;
889  
890      if (!dev) {
891          return g_strdup("");
892      } else if (dev->id) {
893          return g_strdup(dev->id);
894      }
895  
896      return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
897  }
898  
899  /*
900   * Return the BlockBackend which has the device model @dev attached if it
901   * exists, else null.
902   *
903   * @dev must not be null.
904   */
905  BlockBackend *blk_by_dev(void *dev)
906  {
907      BlockBackend *blk = NULL;
908  
909      assert(dev != NULL);
910      while ((blk = blk_all_next(blk)) != NULL) {
911          if (blk->dev == dev) {
912              return blk;
913          }
914      }
915      return NULL;
916  }
917  
918  /*
919   * Set @blk's device model callbacks to @ops.
920   * @opaque is the opaque argument to pass to the callbacks.
921   * This is for use by device models.
922   */
923  void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
924                       void *opaque)
925  {
926      blk->dev_ops = ops;
927      blk->dev_opaque = opaque;
928  
929      /* Are we currently quiesced? Should we enforce this right now? */
930      if (blk->quiesce_counter && ops->drained_begin) {
931          ops->drained_begin(opaque);
932      }
933  }
934  
935  /*
936   * Notify @blk's attached device model of media change.
937   *
938   * If @load is true, notify of media load. This action can fail, meaning that
939   * the medium cannot be loaded. @errp is set then.
940   *
941   * If @load is false, notify of media eject. This can never fail.
942   *
943   * Also send DEVICE_TRAY_MOVED events as appropriate.
944   */
945  void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
946  {
947      if (blk->dev_ops && blk->dev_ops->change_media_cb) {
948          bool tray_was_open, tray_is_open;
949          Error *local_err = NULL;
950  
951          tray_was_open = blk_dev_is_tray_open(blk);
952          blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
953          if (local_err) {
954              assert(load == true);
955              error_propagate(errp, local_err);
956              return;
957          }
958          tray_is_open = blk_dev_is_tray_open(blk);
959  
960          if (tray_was_open != tray_is_open) {
961              char *id = blk_get_attached_dev_id(blk);
962              qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open);
963              g_free(id);
964          }
965      }
966  }
967  
968  static void blk_root_change_media(BdrvChild *child, bool load)
969  {
970      blk_dev_change_media_cb(child->opaque, load, NULL);
971  }
972  
973  /*
974   * Does @blk's attached device model have removable media?
975   * %true if no device model is attached.
976   */
977  bool blk_dev_has_removable_media(BlockBackend *blk)
978  {
979      return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
980  }
981  
982  /*
983   * Does @blk's attached device model have a tray?
984   */
985  bool blk_dev_has_tray(BlockBackend *blk)
986  {
987      return blk->dev_ops && blk->dev_ops->is_tray_open;
988  }
989  
990  /*
991   * Notify @blk's attached device model of a media eject request.
992   * If @force is true, the medium is about to be yanked out forcefully.
993   */
994  void blk_dev_eject_request(BlockBackend *blk, bool force)
995  {
996      if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
997          blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
998      }
999  }
1000  
1001  /*
1002   * Does @blk's attached device model have a tray, and is it open?
1003   */
1004  bool blk_dev_is_tray_open(BlockBackend *blk)
1005  {
1006      if (blk_dev_has_tray(blk)) {
1007          return blk->dev_ops->is_tray_open(blk->dev_opaque);
1008      }
1009      return false;
1010  }
1011  
1012  /*
1013   * Does @blk's attached device model have the medium locked?
1014   * %false if the device model has no such lock.
1015   */
1016  bool blk_dev_is_medium_locked(BlockBackend *blk)
1017  {
1018      if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
1019          return blk->dev_ops->is_medium_locked(blk->dev_opaque);
1020      }
1021      return false;
1022  }
1023  
1024  /*
1025   * Notify @blk's attached device model of a backend size change.
1026   */
1027  static void blk_root_resize(BdrvChild *child)
1028  {
1029      BlockBackend *blk = child->opaque;
1030  
1031      if (blk->dev_ops && blk->dev_ops->resize_cb) {
1032          blk->dev_ops->resize_cb(blk->dev_opaque);
1033      }
1034  }
1035  
1036  void blk_iostatus_enable(BlockBackend *blk)
1037  {
1038      blk->iostatus_enabled = true;
1039      blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1040  }
1041  
1042  /* The I/O status is only enabled if the drive explicitly
1043   * enables it _and_ the VM is configured to stop on errors */
1044  bool blk_iostatus_is_enabled(const BlockBackend *blk)
1045  {
1046      return (blk->iostatus_enabled &&
1047             (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
1048              blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
1049              blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
1050  }
1051  
1052  BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
1053  {
1054      return blk->iostatus;
1055  }
1056  
1057  void blk_iostatus_disable(BlockBackend *blk)
1058  {
1059      blk->iostatus_enabled = false;
1060  }
1061  
1062  void blk_iostatus_reset(BlockBackend *blk)
1063  {
1064      if (blk_iostatus_is_enabled(blk)) {
1065          BlockDriverState *bs = blk_bs(blk);
1066          blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1067          if (bs && bs->job) {
1068              block_job_iostatus_reset(bs->job);
1069          }
1070      }
1071  }
1072  
1073  void blk_iostatus_set_err(BlockBackend *blk, int error)
1074  {
1075      assert(blk_iostatus_is_enabled(blk));
1076      if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
1077          blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
1078                                            BLOCK_DEVICE_IO_STATUS_FAILED;
1079      }
1080  }
1081  
1082  void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
1083  {
1084      blk->allow_write_beyond_eof = allow;
1085  }
1086  
1087  static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
1088                                    size_t size)
1089  {
1090      int64_t len;
1091  
1092      if (size > INT_MAX) {
1093          return -EIO;
1094      }
1095  
1096      if (!blk_is_available(blk)) {
1097          return -ENOMEDIUM;
1098      }
1099  
1100      if (offset < 0) {
1101          return -EIO;
1102      }
1103  
1104      if (!blk->allow_write_beyond_eof) {
1105          len = blk_getlength(blk);
1106          if (len < 0) {
1107              return len;
1108          }
1109  
1110          if (offset > len || len - offset < size) {
1111              return -EIO;
1112          }
1113      }
1114  
1115      return 0;
1116  }
1117  
1118  int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
1119                                 unsigned int bytes, QEMUIOVector *qiov,
1120                                 BdrvRequestFlags flags)
1121  {
1122      int ret;
1123      BlockDriverState *bs = blk_bs(blk);
1124  
1125      trace_blk_co_preadv(blk, bs, offset, bytes, flags);
1126  
1127      ret = blk_check_byte_request(blk, offset, bytes);
1128      if (ret < 0) {
1129          return ret;
1130      }
1131  
1132      bdrv_inc_in_flight(bs);
1133  
1134      /* throttling disk I/O */
1135      if (blk->public.throttle_group_member.throttle_state) {
1136          throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1137                  bytes, false);
1138      }
1139  
1140      ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
1141      bdrv_dec_in_flight(bs);
1142      return ret;
1143  }
1144  
1145  int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
1146                                  unsigned int bytes, QEMUIOVector *qiov,
1147                                  BdrvRequestFlags flags)
1148  {
1149      int ret;
1150      BlockDriverState *bs = blk_bs(blk);
1151  
1152      trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
1153  
1154      ret = blk_check_byte_request(blk, offset, bytes);
1155      if (ret < 0) {
1156          return ret;
1157      }
1158  
1159      bdrv_inc_in_flight(bs);
1160      /* throttling disk I/O */
1161      if (blk->public.throttle_group_member.throttle_state) {
1162          throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1163                  bytes, true);
1164      }
1165  
1166      if (!blk->enable_write_cache) {
1167          flags |= BDRV_REQ_FUA;
1168      }
1169  
1170      ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
1171      bdrv_dec_in_flight(bs);
1172      return ret;
1173  }
1174  
1175  typedef struct BlkRwCo {
1176      BlockBackend *blk;
1177      int64_t offset;
1178      void *iobuf;
1179      int ret;
1180      BdrvRequestFlags flags;
1181  } BlkRwCo;
1182  
1183  static void blk_read_entry(void *opaque)
1184  {
1185      BlkRwCo *rwco = opaque;
1186      QEMUIOVector *qiov = rwco->iobuf;
1187  
1188      rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
1189                                qiov, rwco->flags);
1190      aio_wait_kick();
1191  }
1192  
1193  static void blk_write_entry(void *opaque)
1194  {
1195      BlkRwCo *rwco = opaque;
1196      QEMUIOVector *qiov = rwco->iobuf;
1197  
1198      rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
1199                                 qiov, rwco->flags);
1200      aio_wait_kick();
1201  }
1202  
1203  static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
1204                     int64_t bytes, CoroutineEntry co_entry,
1205                     BdrvRequestFlags flags)
1206  {
1207      QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1208      BlkRwCo rwco = {
1209          .blk    = blk,
1210          .offset = offset,
1211          .iobuf  = &qiov,
1212          .flags  = flags,
1213          .ret    = NOT_DONE,
1214      };
1215  
1216      if (qemu_in_coroutine()) {
1217          /* Fast-path if already in coroutine context */
1218          co_entry(&rwco);
1219      } else {
1220          Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
1221          bdrv_coroutine_enter(blk_bs(blk), co);
1222          BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
1223      }
1224  
1225      return rwco.ret;
1226  }
1227  
1228  int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
1229                            int count)
1230  {
1231      int ret;
1232  
1233      ret = blk_check_byte_request(blk, offset, count);
1234      if (ret < 0) {
1235          return ret;
1236      }
1237  
1238      blk_root_drained_begin(blk->root);
1239      ret = blk_pread(blk, offset, buf, count);
1240      blk_root_drained_end(blk->root);
1241      return ret;
1242  }
1243  
1244  int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1245                        int bytes, BdrvRequestFlags flags)
1246  {
1247      return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
1248                     flags | BDRV_REQ_ZERO_WRITE);
1249  }
1250  
1251  int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
1252  {
1253      return bdrv_make_zero(blk->root, flags);
1254  }
1255  
1256  void blk_inc_in_flight(BlockBackend *blk)
1257  {
1258      atomic_inc(&blk->in_flight);
1259  }
1260  
1261  void blk_dec_in_flight(BlockBackend *blk)
1262  {
1263      atomic_dec(&blk->in_flight);
1264      aio_wait_kick();
1265  }
1266  
1267  static void error_callback_bh(void *opaque)
1268  {
1269      struct BlockBackendAIOCB *acb = opaque;
1270  
1271      blk_dec_in_flight(acb->blk);
1272      acb->common.cb(acb->common.opaque, acb->ret);
1273      qemu_aio_unref(acb);
1274  }
1275  
1276  BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
1277                                    BlockCompletionFunc *cb,
1278                                    void *opaque, int ret)
1279  {
1280      struct BlockBackendAIOCB *acb;
1281  
1282      blk_inc_in_flight(blk);
1283      acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
1284      acb->blk = blk;
1285      acb->ret = ret;
1286  
1287      aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb);
1288      return &acb->common;
1289  }
1290  
1291  typedef struct BlkAioEmAIOCB {
1292      BlockAIOCB common;
1293      BlkRwCo rwco;
1294      int bytes;
1295      bool has_returned;
1296  } BlkAioEmAIOCB;
1297  
1298  static const AIOCBInfo blk_aio_em_aiocb_info = {
1299      .aiocb_size         = sizeof(BlkAioEmAIOCB),
1300  };
1301  
1302  static void blk_aio_complete(BlkAioEmAIOCB *acb)
1303  {
1304      if (acb->has_returned) {
1305          acb->common.cb(acb->common.opaque, acb->rwco.ret);
1306          blk_dec_in_flight(acb->rwco.blk);
1307          qemu_aio_unref(acb);
1308      }
1309  }
1310  
1311  static void blk_aio_complete_bh(void *opaque)
1312  {
1313      BlkAioEmAIOCB *acb = opaque;
1314      assert(acb->has_returned);
1315      blk_aio_complete(acb);
1316  }
1317  
1318  static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
1319                                  void *iobuf, CoroutineEntry co_entry,
1320                                  BdrvRequestFlags flags,
1321                                  BlockCompletionFunc *cb, void *opaque)
1322  {
1323      BlkAioEmAIOCB *acb;
1324      Coroutine *co;
1325  
1326      blk_inc_in_flight(blk);
1327      acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1328      acb->rwco = (BlkRwCo) {
1329          .blk    = blk,
1330          .offset = offset,
1331          .iobuf  = iobuf,
1332          .flags  = flags,
1333          .ret    = NOT_DONE,
1334      };
1335      acb->bytes = bytes;
1336      acb->has_returned = false;
1337  
1338      co = qemu_coroutine_create(co_entry, acb);
1339      bdrv_coroutine_enter(blk_bs(blk), co);
1340  
1341      acb->has_returned = true;
1342      if (acb->rwco.ret != NOT_DONE) {
1343          aio_bh_schedule_oneshot(blk_get_aio_context(blk),
1344                                  blk_aio_complete_bh, acb);
1345      }
1346  
1347      return &acb->common;
1348  }
1349  
1350  static void blk_aio_read_entry(void *opaque)
1351  {
1352      BlkAioEmAIOCB *acb = opaque;
1353      BlkRwCo *rwco = &acb->rwco;
1354      QEMUIOVector *qiov = rwco->iobuf;
1355  
1356      assert(qiov->size == acb->bytes);
1357      rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
1358                                qiov, rwco->flags);
1359      blk_aio_complete(acb);
1360  }
1361  
1362  static void blk_aio_write_entry(void *opaque)
1363  {
1364      BlkAioEmAIOCB *acb = opaque;
1365      BlkRwCo *rwco = &acb->rwco;
1366      QEMUIOVector *qiov = rwco->iobuf;
1367  
1368      assert(!qiov || qiov->size == acb->bytes);
1369      rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
1370                                 qiov, rwco->flags);
1371      blk_aio_complete(acb);
1372  }
1373  
1374  BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1375                                    int count, BdrvRequestFlags flags,
1376                                    BlockCompletionFunc *cb, void *opaque)
1377  {
1378      return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
1379                          flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
1380  }
1381  
1382  int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
1383  {
1384      int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
1385      if (ret < 0) {
1386          return ret;
1387      }
1388      return count;
1389  }
1390  
1391  int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
1392                 BdrvRequestFlags flags)
1393  {
1394      int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1395                        flags);
1396      if (ret < 0) {
1397          return ret;
1398      }
1399      return count;
1400  }
1401  
1402  int64_t blk_getlength(BlockBackend *blk)
1403  {
1404      if (!blk_is_available(blk)) {
1405          return -ENOMEDIUM;
1406      }
1407  
1408      return bdrv_getlength(blk_bs(blk));
1409  }
1410  
1411  void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
1412  {
1413      if (!blk_bs(blk)) {
1414          *nb_sectors_ptr = 0;
1415      } else {
1416          bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1417      }
1418  }
1419  
1420  int64_t blk_nb_sectors(BlockBackend *blk)
1421  {
1422      if (!blk_is_available(blk)) {
1423          return -ENOMEDIUM;
1424      }
1425  
1426      return bdrv_nb_sectors(blk_bs(blk));
1427  }
1428  
1429  BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1430                             QEMUIOVector *qiov, BdrvRequestFlags flags,
1431                             BlockCompletionFunc *cb, void *opaque)
1432  {
1433      return blk_aio_prwv(blk, offset, qiov->size, qiov,
1434                          blk_aio_read_entry, flags, cb, opaque);
1435  }
1436  
1437  BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1438                              QEMUIOVector *qiov, BdrvRequestFlags flags,
1439                              BlockCompletionFunc *cb, void *opaque)
1440  {
1441      return blk_aio_prwv(blk, offset, qiov->size, qiov,
1442                          blk_aio_write_entry, flags, cb, opaque);
1443  }
1444  
1445  static void blk_aio_flush_entry(void *opaque)
1446  {
1447      BlkAioEmAIOCB *acb = opaque;
1448      BlkRwCo *rwco = &acb->rwco;
1449  
1450      rwco->ret = blk_co_flush(rwco->blk);
1451      blk_aio_complete(acb);
1452  }
1453  
1454  BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1455                            BlockCompletionFunc *cb, void *opaque)
1456  {
1457      return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
1458  }
1459  
1460  static void blk_aio_pdiscard_entry(void *opaque)
1461  {
1462      BlkAioEmAIOCB *acb = opaque;
1463      BlkRwCo *rwco = &acb->rwco;
1464  
1465      rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
1466      blk_aio_complete(acb);
1467  }
1468  
1469  BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
1470                               int64_t offset, int bytes,
1471                               BlockCompletionFunc *cb, void *opaque)
1472  {
1473      return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
1474                          cb, opaque);
1475  }
1476  
1477  void blk_aio_cancel(BlockAIOCB *acb)
1478  {
1479      bdrv_aio_cancel(acb);
1480  }
1481  
1482  void blk_aio_cancel_async(BlockAIOCB *acb)
1483  {
1484      bdrv_aio_cancel_async(acb);
1485  }
1486  
1487  int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1488  {
1489      if (!blk_is_available(blk)) {
1490          return -ENOMEDIUM;
1491      }
1492  
1493      return bdrv_co_ioctl(blk_bs(blk), req, buf);
1494  }
1495  
1496  static void blk_ioctl_entry(void *opaque)
1497  {
1498      BlkRwCo *rwco = opaque;
1499      QEMUIOVector *qiov = rwco->iobuf;
1500  
1501      rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
1502                               qiov->iov[0].iov_base);
1503      aio_wait_kick();
1504  }
1505  
1506  int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1507  {
1508      return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
1509  }
1510  
1511  static void blk_aio_ioctl_entry(void *opaque)
1512  {
1513      BlkAioEmAIOCB *acb = opaque;
1514      BlkRwCo *rwco = &acb->rwco;
1515  
1516      rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
1517  
1518      blk_aio_complete(acb);
1519  }
1520  
1521  BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1522                            BlockCompletionFunc *cb, void *opaque)
1523  {
1524      return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
1525  }
1526  
1527  int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
1528  {
1529      int ret = blk_check_byte_request(blk, offset, bytes);
1530      if (ret < 0) {
1531          return ret;
1532      }
1533  
1534      return bdrv_co_pdiscard(blk->root, offset, bytes);
1535  }
1536  
1537  int blk_co_flush(BlockBackend *blk)
1538  {
1539      if (!blk_is_available(blk)) {
1540          return -ENOMEDIUM;
1541      }
1542  
1543      return bdrv_co_flush(blk_bs(blk));
1544  }
1545  
1546  static void blk_flush_entry(void *opaque)
1547  {
1548      BlkRwCo *rwco = opaque;
1549      rwco->ret = blk_co_flush(rwco->blk);
1550      aio_wait_kick();
1551  }
1552  
1553  int blk_flush(BlockBackend *blk)
1554  {
1555      return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
1556  }
1557  
1558  void blk_drain(BlockBackend *blk)
1559  {
1560      BlockDriverState *bs = blk_bs(blk);
1561  
1562      if (bs) {
1563          bdrv_drained_begin(bs);
1564      }
1565  
1566      /* We may have -ENOMEDIUM completions in flight */
1567      AIO_WAIT_WHILE(blk_get_aio_context(blk),
1568                     atomic_mb_read(&blk->in_flight) > 0);
1569  
1570      if (bs) {
1571          bdrv_drained_end(bs);
1572      }
1573  }
1574  
1575  void blk_drain_all(void)
1576  {
1577      BlockBackend *blk = NULL;
1578  
1579      bdrv_drain_all_begin();
1580  
1581      while ((blk = blk_all_next(blk)) != NULL) {
1582          AioContext *ctx = blk_get_aio_context(blk);
1583  
1584          aio_context_acquire(ctx);
1585  
1586          /* We may have -ENOMEDIUM completions in flight */
1587          AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0);
1588  
1589          aio_context_release(ctx);
1590      }
1591  
1592      bdrv_drain_all_end();
1593  }
1594  
1595  void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1596                        BlockdevOnError on_write_error)
1597  {
1598      blk->on_read_error = on_read_error;
1599      blk->on_write_error = on_write_error;
1600  }
1601  
1602  BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1603  {
1604      return is_read ? blk->on_read_error : blk->on_write_error;
1605  }
1606  
1607  BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1608                                        int error)
1609  {
1610      BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1611  
1612      switch (on_err) {
1613      case BLOCKDEV_ON_ERROR_ENOSPC:
1614          return (error == ENOSPC) ?
1615                 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1616      case BLOCKDEV_ON_ERROR_STOP:
1617          return BLOCK_ERROR_ACTION_STOP;
1618      case BLOCKDEV_ON_ERROR_REPORT:
1619          return BLOCK_ERROR_ACTION_REPORT;
1620      case BLOCKDEV_ON_ERROR_IGNORE:
1621          return BLOCK_ERROR_ACTION_IGNORE;
1622      case BLOCKDEV_ON_ERROR_AUTO:
1623      default:
1624          abort();
1625      }
1626  }
1627  
1628  static void send_qmp_error_event(BlockBackend *blk,
1629                                   BlockErrorAction action,
1630                                   bool is_read, int error)
1631  {
1632      IoOperationType optype;
1633      BlockDriverState *bs = blk_bs(blk);
1634  
1635      optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1636      qapi_event_send_block_io_error(blk_name(blk), !!bs,
1637                                     bs ? bdrv_get_node_name(bs) : NULL, optype,
1638                                     action, blk_iostatus_is_enabled(blk),
1639                                     error == ENOSPC, strerror(error));
1640  }
1641  
1642  /* This is done by device models because, while the block layer knows
1643   * about the error, it does not know whether an operation comes from
1644   * the device or the block layer (from a job, for example).
1645   */
1646  void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1647                        bool is_read, int error)
1648  {
1649      assert(error >= 0);
1650  
1651      if (action == BLOCK_ERROR_ACTION_STOP) {
1652          /* First set the iostatus, so that "info block" returns an iostatus
1653           * that matches the events raised so far (an additional error iostatus
1654           * is fine, but not a lost one).
1655           */
1656          blk_iostatus_set_err(blk, error);
1657  
1658          /* Then raise the request to stop the VM and the event.
1659           * qemu_system_vmstop_request_prepare has two effects.  First,
1660           * it ensures that the STOP event always comes after the
1661           * BLOCK_IO_ERROR event.  Second, it ensures that even if management
1662           * can observe the STOP event and do a "cont" before the STOP
1663           * event is issued, the VM will not stop.  In this case, vm_start()
1664           * also ensures that the STOP/RESUME pair of events is emitted.
1665           */
1666          qemu_system_vmstop_request_prepare();
1667          send_qmp_error_event(blk, action, is_read, error);
1668          qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1669      } else {
1670          send_qmp_error_event(blk, action, is_read, error);
1671      }
1672  }
1673  
1674  bool blk_is_read_only(BlockBackend *blk)
1675  {
1676      BlockDriverState *bs = blk_bs(blk);
1677  
1678      if (bs) {
1679          return bdrv_is_read_only(bs);
1680      } else {
1681          return blk->root_state.read_only;
1682      }
1683  }
1684  
1685  bool blk_is_sg(BlockBackend *blk)
1686  {
1687      BlockDriverState *bs = blk_bs(blk);
1688  
1689      if (!bs) {
1690          return false;
1691      }
1692  
1693      return bdrv_is_sg(bs);
1694  }
1695  
1696  bool blk_enable_write_cache(BlockBackend *blk)
1697  {
1698      return blk->enable_write_cache;
1699  }
1700  
1701  void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1702  {
1703      blk->enable_write_cache = wce;
1704  }
1705  
1706  void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1707  {
1708      BlockDriverState *bs = blk_bs(blk);
1709  
1710      if (!bs) {
1711          error_setg(errp, "Device '%s' has no medium", blk->name);
1712          return;
1713      }
1714  
1715      bdrv_invalidate_cache(bs, errp);
1716  }
1717  
1718  bool blk_is_inserted(BlockBackend *blk)
1719  {
1720      BlockDriverState *bs = blk_bs(blk);
1721  
1722      return bs && bdrv_is_inserted(bs);
1723  }
1724  
1725  bool blk_is_available(BlockBackend *blk)
1726  {
1727      return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1728  }
1729  
1730  void blk_lock_medium(BlockBackend *blk, bool locked)
1731  {
1732      BlockDriverState *bs = blk_bs(blk);
1733  
1734      if (bs) {
1735          bdrv_lock_medium(bs, locked);
1736      }
1737  }
1738  
1739  void blk_eject(BlockBackend *blk, bool eject_flag)
1740  {
1741      BlockDriverState *bs = blk_bs(blk);
1742      char *id;
1743  
1744      if (bs) {
1745          bdrv_eject(bs, eject_flag);
1746      }
1747  
1748      /* Whether or not we ejected on the backend,
1749       * the frontend experienced a tray event. */
1750      id = blk_get_attached_dev_id(blk);
1751      qapi_event_send_device_tray_moved(blk_name(blk), id,
1752                                        eject_flag);
1753      g_free(id);
1754  }
1755  
1756  int blk_get_flags(BlockBackend *blk)
1757  {
1758      BlockDriverState *bs = blk_bs(blk);
1759  
1760      if (bs) {
1761          return bdrv_get_flags(bs);
1762      } else {
1763          return blk->root_state.open_flags;
1764      }
1765  }
1766  
1767  /* Returns the minimum request alignment, in bytes; guaranteed nonzero */
1768  uint32_t blk_get_request_alignment(BlockBackend *blk)
1769  {
1770      BlockDriverState *bs = blk_bs(blk);
1771      return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
1772  }
1773  
1774  /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
1775  uint32_t blk_get_max_transfer(BlockBackend *blk)
1776  {
1777      BlockDriverState *bs = blk_bs(blk);
1778      uint32_t max = 0;
1779  
1780      if (bs) {
1781          max = bs->bl.max_transfer;
1782      }
1783      return MIN_NON_ZERO(max, INT_MAX);
1784  }
1785  
1786  int blk_get_max_iov(BlockBackend *blk)
1787  {
1788      return blk->root->bs->bl.max_iov;
1789  }
1790  
1791  void blk_set_guest_block_size(BlockBackend *blk, int align)
1792  {
1793      blk->guest_block_size = align;
1794  }
1795  
1796  void *blk_try_blockalign(BlockBackend *blk, size_t size)
1797  {
1798      return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1799  }
1800  
1801  void *blk_blockalign(BlockBackend *blk, size_t size)
1802  {
1803      return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1804  }
1805  
1806  bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
1807  {
1808      BlockDriverState *bs = blk_bs(blk);
1809  
1810      if (!bs) {
1811          return false;
1812      }
1813  
1814      return bdrv_op_is_blocked(bs, op, errp);
1815  }
1816  
1817  void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
1818  {
1819      BlockDriverState *bs = blk_bs(blk);
1820  
1821      if (bs) {
1822          bdrv_op_unblock(bs, op, reason);
1823      }
1824  }
1825  
1826  void blk_op_block_all(BlockBackend *blk, Error *reason)
1827  {
1828      BlockDriverState *bs = blk_bs(blk);
1829  
1830      if (bs) {
1831          bdrv_op_block_all(bs, reason);
1832      }
1833  }
1834  
1835  void blk_op_unblock_all(BlockBackend *blk, Error *reason)
1836  {
1837      BlockDriverState *bs = blk_bs(blk);
1838  
1839      if (bs) {
1840          bdrv_op_unblock_all(bs, reason);
1841      }
1842  }
1843  
1844  AioContext *blk_get_aio_context(BlockBackend *blk)
1845  {
1846      return bdrv_get_aio_context(blk_bs(blk));
1847  }
1848  
1849  static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
1850  {
1851      BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
1852      return blk_get_aio_context(blk_acb->blk);
1853  }
1854  
1855  void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
1856  {
1857      BlockDriverState *bs = blk_bs(blk);
1858      ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
1859  
1860      if (bs) {
1861          if (tgm->throttle_state) {
1862              bdrv_drained_begin(bs);
1863              throttle_group_detach_aio_context(tgm);
1864              throttle_group_attach_aio_context(tgm, new_context);
1865              bdrv_drained_end(bs);
1866          }
1867          bdrv_set_aio_context(bs, new_context);
1868      }
1869  }
1870  
1871  void blk_add_aio_context_notifier(BlockBackend *blk,
1872          void (*attached_aio_context)(AioContext *new_context, void *opaque),
1873          void (*detach_aio_context)(void *opaque), void *opaque)
1874  {
1875      BlockBackendAioNotifier *notifier;
1876      BlockDriverState *bs = blk_bs(blk);
1877  
1878      notifier = g_new(BlockBackendAioNotifier, 1);
1879      notifier->attached_aio_context = attached_aio_context;
1880      notifier->detach_aio_context = detach_aio_context;
1881      notifier->opaque = opaque;
1882      QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
1883  
1884      if (bs) {
1885          bdrv_add_aio_context_notifier(bs, attached_aio_context,
1886                                        detach_aio_context, opaque);
1887      }
1888  }
1889  
1890  void blk_remove_aio_context_notifier(BlockBackend *blk,
1891                                       void (*attached_aio_context)(AioContext *,
1892                                                                    void *),
1893                                       void (*detach_aio_context)(void *),
1894                                       void *opaque)
1895  {
1896      BlockBackendAioNotifier *notifier;
1897      BlockDriverState *bs = blk_bs(blk);
1898  
1899      if (bs) {
1900          bdrv_remove_aio_context_notifier(bs, attached_aio_context,
1901                                           detach_aio_context, opaque);
1902      }
1903  
1904      QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
1905          if (notifier->attached_aio_context == attached_aio_context &&
1906              notifier->detach_aio_context == detach_aio_context &&
1907              notifier->opaque == opaque) {
1908              QLIST_REMOVE(notifier, list);
1909              g_free(notifier);
1910              return;
1911          }
1912      }
1913  
1914      abort();
1915  }
1916  
1917  void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
1918  {
1919      notifier_list_add(&blk->remove_bs_notifiers, notify);
1920  }
1921  
1922  void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
1923  {
1924      notifier_list_add(&blk->insert_bs_notifiers, notify);
1925  }
1926  
1927  void blk_io_plug(BlockBackend *blk)
1928  {
1929      BlockDriverState *bs = blk_bs(blk);
1930  
1931      if (bs) {
1932          bdrv_io_plug(bs);
1933      }
1934  }
1935  
1936  void blk_io_unplug(BlockBackend *blk)
1937  {
1938      BlockDriverState *bs = blk_bs(blk);
1939  
1940      if (bs) {
1941          bdrv_io_unplug(bs);
1942      }
1943  }
1944  
1945  BlockAcctStats *blk_get_stats(BlockBackend *blk)
1946  {
1947      return &blk->stats;
1948  }
1949  
1950  void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
1951                    BlockCompletionFunc *cb, void *opaque)
1952  {
1953      return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
1954  }
1955  
1956  int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1957                                        int bytes, BdrvRequestFlags flags)
1958  {
1959      return blk_co_pwritev(blk, offset, bytes, NULL,
1960                            flags | BDRV_REQ_ZERO_WRITE);
1961  }
1962  
1963  int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
1964                            int count)
1965  {
1966      return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1967                     BDRV_REQ_WRITE_COMPRESSED);
1968  }
1969  
1970  int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
1971                   Error **errp)
1972  {
1973      if (!blk_is_available(blk)) {
1974          error_setg(errp, "No medium inserted");
1975          return -ENOMEDIUM;
1976      }
1977  
1978      return bdrv_truncate(blk->root, offset, prealloc, errp);
1979  }
1980  
1981  static void blk_pdiscard_entry(void *opaque)
1982  {
1983      BlkRwCo *rwco = opaque;
1984      QEMUIOVector *qiov = rwco->iobuf;
1985  
1986      rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
1987      aio_wait_kick();
1988  }
1989  
1990  int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
1991  {
1992      return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
1993  }
1994  
1995  int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
1996                       int64_t pos, int size)
1997  {
1998      int ret;
1999  
2000      if (!blk_is_available(blk)) {
2001          return -ENOMEDIUM;
2002      }
2003  
2004      ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
2005      if (ret < 0) {
2006          return ret;
2007      }
2008  
2009      if (ret == size && !blk->enable_write_cache) {
2010          ret = bdrv_flush(blk_bs(blk));
2011      }
2012  
2013      return ret < 0 ? ret : size;
2014  }
2015  
2016  int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
2017  {
2018      if (!blk_is_available(blk)) {
2019          return -ENOMEDIUM;
2020      }
2021  
2022      return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
2023  }
2024  
2025  int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
2026  {
2027      if (!blk_is_available(blk)) {
2028          return -ENOMEDIUM;
2029      }
2030  
2031      return bdrv_probe_blocksizes(blk_bs(blk), bsz);
2032  }
2033  
2034  int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
2035  {
2036      if (!blk_is_available(blk)) {
2037          return -ENOMEDIUM;
2038      }
2039  
2040      return bdrv_probe_geometry(blk_bs(blk), geo);
2041  }
2042  
2043  /*
2044   * Updates the BlockBackendRootState object with data from the currently
2045   * attached BlockDriverState.
2046   */
2047  void blk_update_root_state(BlockBackend *blk)
2048  {
2049      assert(blk->root);
2050  
2051      blk->root_state.open_flags    = blk->root->bs->open_flags;
2052      blk->root_state.read_only     = blk->root->bs->read_only;
2053      blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
2054  }
2055  
2056  /*
2057   * Returns the detect-zeroes setting to be used for bdrv_open() of a
2058   * BlockDriverState which is supposed to inherit the root state.
2059   */
2060  bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
2061  {
2062      return blk->root_state.detect_zeroes;
2063  }
2064  
2065  /*
2066   * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
2067   * supposed to inherit the root state.
2068   */
2069  int blk_get_open_flags_from_root_state(BlockBackend *blk)
2070  {
2071      int bs_flags;
2072  
2073      bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
2074      bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
2075  
2076      return bs_flags;
2077  }
2078  
2079  BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
2080  {
2081      return &blk->root_state;
2082  }
2083  
2084  int blk_commit_all(void)
2085  {
2086      BlockBackend *blk = NULL;
2087  
2088      while ((blk = blk_all_next(blk)) != NULL) {
2089          AioContext *aio_context = blk_get_aio_context(blk);
2090  
2091          aio_context_acquire(aio_context);
2092          if (blk_is_inserted(blk) && blk->root->bs->backing) {
2093              int ret = bdrv_commit(blk->root->bs);
2094              if (ret < 0) {
2095                  aio_context_release(aio_context);
2096                  return ret;
2097              }
2098          }
2099          aio_context_release(aio_context);
2100      }
2101      return 0;
2102  }
2103  
2104  
2105  /* throttling disk I/O limits */
2106  void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
2107  {
2108      throttle_group_config(&blk->public.throttle_group_member, cfg);
2109  }
2110  
2111  void blk_io_limits_disable(BlockBackend *blk)
2112  {
2113      BlockDriverState *bs = blk_bs(blk);
2114      ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2115      assert(tgm->throttle_state);
2116      if (bs) {
2117          bdrv_drained_begin(bs);
2118      }
2119      throttle_group_unregister_tgm(tgm);
2120      if (bs) {
2121          bdrv_drained_end(bs);
2122      }
2123  }
2124  
2125  /* should be called before blk_set_io_limits if a limit is set */
2126  void blk_io_limits_enable(BlockBackend *blk, const char *group)
2127  {
2128      assert(!blk->public.throttle_group_member.throttle_state);
2129      throttle_group_register_tgm(&blk->public.throttle_group_member,
2130                                  group, blk_get_aio_context(blk));
2131  }
2132  
2133  void blk_io_limits_update_group(BlockBackend *blk, const char *group)
2134  {
2135      /* this BB is not part of any group */
2136      if (!blk->public.throttle_group_member.throttle_state) {
2137          return;
2138      }
2139  
2140      /* this BB is a part of the same group than the one we want */
2141      if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
2142                  group)) {
2143          return;
2144      }
2145  
2146      /* need to change the group this bs belong to */
2147      blk_io_limits_disable(blk);
2148      blk_io_limits_enable(blk, group);
2149  }
2150  
2151  static void blk_root_drained_begin(BdrvChild *child)
2152  {
2153      BlockBackend *blk = child->opaque;
2154  
2155      if (++blk->quiesce_counter == 1) {
2156          if (blk->dev_ops && blk->dev_ops->drained_begin) {
2157              blk->dev_ops->drained_begin(blk->dev_opaque);
2158          }
2159      }
2160  
2161      /* Note that blk->root may not be accessible here yet if we are just
2162       * attaching to a BlockDriverState that is drained. Use child instead. */
2163  
2164      if (atomic_fetch_inc(&blk->public.throttle_group_member.io_limits_disabled) == 0) {
2165          throttle_group_restart_tgm(&blk->public.throttle_group_member);
2166      }
2167  }
2168  
2169  static bool blk_root_drained_poll(BdrvChild *child)
2170  {
2171      BlockBackend *blk = child->opaque;
2172      assert(blk->quiesce_counter);
2173      return !!blk->in_flight;
2174  }
2175  
2176  static void blk_root_drained_end(BdrvChild *child)
2177  {
2178      BlockBackend *blk = child->opaque;
2179      assert(blk->quiesce_counter);
2180  
2181      assert(blk->public.throttle_group_member.io_limits_disabled);
2182      atomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
2183  
2184      if (--blk->quiesce_counter == 0) {
2185          if (blk->dev_ops && blk->dev_ops->drained_end) {
2186              blk->dev_ops->drained_end(blk->dev_opaque);
2187          }
2188      }
2189  }
2190  
2191  void blk_register_buf(BlockBackend *blk, void *host, size_t size)
2192  {
2193      bdrv_register_buf(blk_bs(blk), host, size);
2194  }
2195  
2196  void blk_unregister_buf(BlockBackend *blk, void *host)
2197  {
2198      bdrv_unregister_buf(blk_bs(blk), host);
2199  }
2200  
2201  int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
2202                                     BlockBackend *blk_out, int64_t off_out,
2203                                     int bytes, BdrvRequestFlags read_flags,
2204                                     BdrvRequestFlags write_flags)
2205  {
2206      int r;
2207      r = blk_check_byte_request(blk_in, off_in, bytes);
2208      if (r) {
2209          return r;
2210      }
2211      r = blk_check_byte_request(blk_out, off_out, bytes);
2212      if (r) {
2213          return r;
2214      }
2215      return bdrv_co_copy_range(blk_in->root, off_in,
2216                                blk_out->root, off_out,
2217                                bytes, read_flags, write_flags);
2218  }
2219  
2220  const BdrvChild *blk_root(BlockBackend *blk)
2221  {
2222      return blk->root;
2223  }
2224