xref: /qemu/block/export/vduse-blk.c (revision 19a989096e5d439c78a887bb51d4d9a5310557c9)
12a2359b8SXie Yongji /*
22a2359b8SXie Yongji  * Export QEMU block device via VDUSE
32a2359b8SXie Yongji  *
42a2359b8SXie Yongji  * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
52a2359b8SXie Yongji  *
62a2359b8SXie Yongji  * Author:
72a2359b8SXie Yongji  *   Xie Yongji <xieyongji@bytedance.com>
82a2359b8SXie Yongji  *
92a2359b8SXie Yongji  * This work is licensed under the terms of the GNU GPL, version 2 or
102a2359b8SXie Yongji  * later.  See the COPYING file in the top-level directory.
112a2359b8SXie Yongji  */
122a2359b8SXie Yongji 
132ca10faeSMarkus Armbruster #include "qemu/osdep.h"
142a2359b8SXie Yongji #include <sys/eventfd.h>
152a2359b8SXie Yongji 
162a2359b8SXie Yongji #include "qapi/error.h"
172a2359b8SXie Yongji #include "block/export.h"
182a2359b8SXie Yongji #include "qemu/error-report.h"
192a2359b8SXie Yongji #include "util/block-helpers.h"
202a2359b8SXie Yongji #include "subprojects/libvduse/libvduse.h"
212a2359b8SXie Yongji #include "virtio-blk-handler.h"
222a2359b8SXie Yongji 
232a2359b8SXie Yongji #include "standard-headers/linux/virtio_blk.h"
242a2359b8SXie Yongji 
252a2359b8SXie Yongji #define VDUSE_DEFAULT_NUM_QUEUE 1
262a2359b8SXie Yongji #define VDUSE_DEFAULT_QUEUE_SIZE 256
272a2359b8SXie Yongji 
282a2359b8SXie Yongji typedef struct VduseBlkExport {
292a2359b8SXie Yongji     BlockExport export;
302a2359b8SXie Yongji     VirtioBlkHandler handler;
312a2359b8SXie Yongji     VduseDev *dev;
322a2359b8SXie Yongji     uint16_t num_queues;
33d043e2dbSXie Yongji     char *recon_file;
34195332c1SStefan Hajnoczi     unsigned int inflight; /* atomic */
35195332c1SStefan Hajnoczi     bool vqs_started;
362a2359b8SXie Yongji } VduseBlkExport;
372a2359b8SXie Yongji 
382a2359b8SXie Yongji typedef struct VduseBlkReq {
392a2359b8SXie Yongji     VduseVirtqElement elem;
402a2359b8SXie Yongji     VduseVirtq *vq;
412a2359b8SXie Yongji } VduseBlkReq;
422a2359b8SXie Yongji 
vduse_blk_inflight_inc(VduseBlkExport * vblk_exp)432a2359b8SXie Yongji static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
442a2359b8SXie Yongji {
45195332c1SStefan Hajnoczi     if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) {
46195332c1SStefan Hajnoczi         /* Prevent export from being deleted */
47195332c1SStefan Hajnoczi         blk_exp_ref(&vblk_exp->export);
48195332c1SStefan Hajnoczi     }
492a2359b8SXie Yongji }
502a2359b8SXie Yongji 
vduse_blk_inflight_dec(VduseBlkExport * vblk_exp)512a2359b8SXie Yongji static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
522a2359b8SXie Yongji {
53195332c1SStefan Hajnoczi     if (qatomic_fetch_dec(&vblk_exp->inflight) == 1) {
54195332c1SStefan Hajnoczi         /* Wake AIO_WAIT_WHILE() */
552a2359b8SXie Yongji         aio_wait_kick();
56195332c1SStefan Hajnoczi 
57195332c1SStefan Hajnoczi         /* Now the export can be deleted */
58195332c1SStefan Hajnoczi         blk_exp_unref(&vblk_exp->export);
592a2359b8SXie Yongji     }
602a2359b8SXie Yongji }
612a2359b8SXie Yongji 
vduse_blk_req_complete(VduseBlkReq * req,size_t in_len)622a2359b8SXie Yongji static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len)
632a2359b8SXie Yongji {
642a2359b8SXie Yongji     vduse_queue_push(req->vq, &req->elem, in_len);
652a2359b8SXie Yongji     vduse_queue_notify(req->vq);
662a2359b8SXie Yongji 
672a2359b8SXie Yongji     free(req);
682a2359b8SXie Yongji }
692a2359b8SXie Yongji 
vduse_blk_virtio_process_req(void * opaque)702a2359b8SXie Yongji static void coroutine_fn vduse_blk_virtio_process_req(void *opaque)
712a2359b8SXie Yongji {
722a2359b8SXie Yongji     VduseBlkReq *req = opaque;
732a2359b8SXie Yongji     VduseVirtq *vq = req->vq;
742a2359b8SXie Yongji     VduseDev *dev = vduse_queue_get_dev(vq);
752a2359b8SXie Yongji     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
762a2359b8SXie Yongji     VirtioBlkHandler *handler = &vblk_exp->handler;
772a2359b8SXie Yongji     VduseVirtqElement *elem = &req->elem;
782a2359b8SXie Yongji     struct iovec *in_iov = elem->in_sg;
792a2359b8SXie Yongji     struct iovec *out_iov = elem->out_sg;
802a2359b8SXie Yongji     unsigned in_num = elem->in_num;
812a2359b8SXie Yongji     unsigned out_num = elem->out_num;
822a2359b8SXie Yongji     int in_len;
832a2359b8SXie Yongji 
842a2359b8SXie Yongji     in_len = virtio_blk_process_req(handler, in_iov,
852a2359b8SXie Yongji                                     out_iov, in_num, out_num);
862a2359b8SXie Yongji     if (in_len < 0) {
872a2359b8SXie Yongji         free(req);
882a2359b8SXie Yongji         return;
892a2359b8SXie Yongji     }
902a2359b8SXie Yongji 
912a2359b8SXie Yongji     vduse_blk_req_complete(req, in_len);
922a2359b8SXie Yongji     vduse_blk_inflight_dec(vblk_exp);
932a2359b8SXie Yongji }
942a2359b8SXie Yongji 
vduse_blk_vq_handler(VduseDev * dev,VduseVirtq * vq)952a2359b8SXie Yongji static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq)
962a2359b8SXie Yongji {
972a2359b8SXie Yongji     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
982a2359b8SXie Yongji 
992a2359b8SXie Yongji     while (1) {
1002a2359b8SXie Yongji         VduseBlkReq *req;
1012a2359b8SXie Yongji 
1022a2359b8SXie Yongji         req = vduse_queue_pop(vq, sizeof(VduseBlkReq));
1032a2359b8SXie Yongji         if (!req) {
1042a2359b8SXie Yongji             break;
1052a2359b8SXie Yongji         }
1062a2359b8SXie Yongji         req->vq = vq;
1072a2359b8SXie Yongji 
1082a2359b8SXie Yongji         Coroutine *co =
1092a2359b8SXie Yongji             qemu_coroutine_create(vduse_blk_virtio_process_req, req);
1102a2359b8SXie Yongji 
1112a2359b8SXie Yongji         vduse_blk_inflight_inc(vblk_exp);
1122a2359b8SXie Yongji         qemu_coroutine_enter(co);
1132a2359b8SXie Yongji     }
1142a2359b8SXie Yongji }
1152a2359b8SXie Yongji 
on_vduse_vq_kick(void * opaque)1162a2359b8SXie Yongji static void on_vduse_vq_kick(void *opaque)
1172a2359b8SXie Yongji {
1182a2359b8SXie Yongji     VduseVirtq *vq = opaque;
1192a2359b8SXie Yongji     VduseDev *dev = vduse_queue_get_dev(vq);
1202a2359b8SXie Yongji     int fd = vduse_queue_get_fd(vq);
1212a2359b8SXie Yongji     eventfd_t kick_data;
1222a2359b8SXie Yongji 
1232a2359b8SXie Yongji     if (eventfd_read(fd, &kick_data) == -1) {
1242a2359b8SXie Yongji         error_report("failed to read data from eventfd");
1252a2359b8SXie Yongji         return;
1262a2359b8SXie Yongji     }
1272a2359b8SXie Yongji 
1282a2359b8SXie Yongji     vduse_blk_vq_handler(dev, vq);
1292a2359b8SXie Yongji }
1302a2359b8SXie Yongji 
vduse_blk_enable_queue(VduseDev * dev,VduseVirtq * vq)1312a2359b8SXie Yongji static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
1322a2359b8SXie Yongji {
1332a2359b8SXie Yongji     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
1342a2359b8SXie Yongji 
135195332c1SStefan Hajnoczi     if (!vblk_exp->vqs_started) {
136195332c1SStefan Hajnoczi         return; /* vduse_blk_drained_end() will start vqs later */
137195332c1SStefan Hajnoczi     }
138195332c1SStefan Hajnoczi 
1392a2359b8SXie Yongji     aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
14060f782b6SStefan Hajnoczi                        on_vduse_vq_kick, NULL, NULL, NULL, vq);
1413202d8e4SMichael Tokarev     /* Make sure we don't miss any kick after reconnecting */
142d043e2dbSXie Yongji     eventfd_write(vduse_queue_get_fd(vq), 1);
1432a2359b8SXie Yongji }
1442a2359b8SXie Yongji 
vduse_blk_disable_queue(VduseDev * dev,VduseVirtq * vq)1452a2359b8SXie Yongji static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
1462a2359b8SXie Yongji {
1472a2359b8SXie Yongji     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
148195332c1SStefan Hajnoczi     int fd = vduse_queue_get_fd(vq);
1492a2359b8SXie Yongji 
150195332c1SStefan Hajnoczi     if (fd < 0) {
151195332c1SStefan Hajnoczi         return;
152195332c1SStefan Hajnoczi     }
153195332c1SStefan Hajnoczi 
15460f782b6SStefan Hajnoczi     aio_set_fd_handler(vblk_exp->export.ctx, fd,
155195332c1SStefan Hajnoczi                        NULL, NULL, NULL, NULL, NULL);
1562a2359b8SXie Yongji }
1572a2359b8SXie Yongji 
1582a2359b8SXie Yongji static const VduseOps vduse_blk_ops = {
1592a2359b8SXie Yongji     .enable_queue = vduse_blk_enable_queue,
1602a2359b8SXie Yongji     .disable_queue = vduse_blk_disable_queue,
1612a2359b8SXie Yongji };
1622a2359b8SXie Yongji 
on_vduse_dev_kick(void * opaque)1632a2359b8SXie Yongji static void on_vduse_dev_kick(void *opaque)
1642a2359b8SXie Yongji {
1652a2359b8SXie Yongji     VduseDev *dev = opaque;
1662a2359b8SXie Yongji 
1672a2359b8SXie Yongji     vduse_dev_handler(dev);
1682a2359b8SXie Yongji }
1692a2359b8SXie Yongji 
vduse_blk_attach_ctx(VduseBlkExport * vblk_exp,AioContext * ctx)1702a2359b8SXie Yongji static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
1712a2359b8SXie Yongji {
1722a2359b8SXie Yongji     aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
17360f782b6SStefan Hajnoczi                        on_vduse_dev_kick, NULL, NULL, NULL,
1742a2359b8SXie Yongji                        vblk_exp->dev);
1752a2359b8SXie Yongji 
176195332c1SStefan Hajnoczi     /* Virtqueues are handled by vduse_blk_drained_end() */
1772a2359b8SXie Yongji }
1782a2359b8SXie Yongji 
vduse_blk_detach_ctx(VduseBlkExport * vblk_exp)1792a2359b8SXie Yongji static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
1802a2359b8SXie Yongji {
1812a2359b8SXie Yongji     aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
18260f782b6SStefan Hajnoczi                        NULL, NULL, NULL, NULL, NULL);
1832a2359b8SXie Yongji 
184195332c1SStefan Hajnoczi     /* Virtqueues are handled by vduse_blk_drained_begin() */
1852a2359b8SXie Yongji }
1862a2359b8SXie Yongji 
1872a2359b8SXie Yongji 
blk_aio_attached(AioContext * ctx,void * opaque)1882a2359b8SXie Yongji static void blk_aio_attached(AioContext *ctx, void *opaque)
1892a2359b8SXie Yongji {
1902a2359b8SXie Yongji     VduseBlkExport *vblk_exp = opaque;
1912a2359b8SXie Yongji 
1922a2359b8SXie Yongji     vblk_exp->export.ctx = ctx;
1932a2359b8SXie Yongji     vduse_blk_attach_ctx(vblk_exp, ctx);
1942a2359b8SXie Yongji }
1952a2359b8SXie Yongji 
blk_aio_detach(void * opaque)1962a2359b8SXie Yongji static void blk_aio_detach(void *opaque)
1972a2359b8SXie Yongji {
1982a2359b8SXie Yongji     VduseBlkExport *vblk_exp = opaque;
1992a2359b8SXie Yongji 
2002a2359b8SXie Yongji     vduse_blk_detach_ctx(vblk_exp);
2012a2359b8SXie Yongji     vblk_exp->export.ctx = NULL;
2022a2359b8SXie Yongji }
2032a2359b8SXie Yongji 
vduse_blk_resize(void * opaque)2049e4dea67SXie Yongji static void vduse_blk_resize(void *opaque)
2059e4dea67SXie Yongji {
2069e4dea67SXie Yongji     BlockExport *exp = opaque;
2079e4dea67SXie Yongji     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
2089e4dea67SXie Yongji     struct virtio_blk_config config;
2099e4dea67SXie Yongji 
2109e4dea67SXie Yongji     config.capacity =
2119e4dea67SXie Yongji             cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
2129e4dea67SXie Yongji     vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
2139e4dea67SXie Yongji                             offsetof(struct virtio_blk_config, capacity),
2149e4dea67SXie Yongji                             (char *)&config.capacity);
2159e4dea67SXie Yongji }
2169e4dea67SXie Yongji 
vduse_blk_stop_virtqueues(VduseBlkExport * vblk_exp)217195332c1SStefan Hajnoczi static void vduse_blk_stop_virtqueues(VduseBlkExport *vblk_exp)
218195332c1SStefan Hajnoczi {
219195332c1SStefan Hajnoczi     for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
220195332c1SStefan Hajnoczi         VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
221195332c1SStefan Hajnoczi         vduse_blk_disable_queue(vblk_exp->dev, vq);
222195332c1SStefan Hajnoczi     }
223195332c1SStefan Hajnoczi 
224195332c1SStefan Hajnoczi     vblk_exp->vqs_started = false;
225195332c1SStefan Hajnoczi }
226195332c1SStefan Hajnoczi 
vduse_blk_start_virtqueues(VduseBlkExport * vblk_exp)227195332c1SStefan Hajnoczi static void vduse_blk_start_virtqueues(VduseBlkExport *vblk_exp)
228195332c1SStefan Hajnoczi {
229195332c1SStefan Hajnoczi     vblk_exp->vqs_started = true;
230195332c1SStefan Hajnoczi 
231195332c1SStefan Hajnoczi     for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
232195332c1SStefan Hajnoczi         VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
233195332c1SStefan Hajnoczi         vduse_blk_enable_queue(vblk_exp->dev, vq);
234195332c1SStefan Hajnoczi     }
235195332c1SStefan Hajnoczi }
236195332c1SStefan Hajnoczi 
vduse_blk_drained_begin(void * opaque)237195332c1SStefan Hajnoczi static void vduse_blk_drained_begin(void *opaque)
238195332c1SStefan Hajnoczi {
239195332c1SStefan Hajnoczi     BlockExport *exp = opaque;
240195332c1SStefan Hajnoczi     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
241195332c1SStefan Hajnoczi 
242195332c1SStefan Hajnoczi     vduse_blk_stop_virtqueues(vblk_exp);
243195332c1SStefan Hajnoczi }
244195332c1SStefan Hajnoczi 
vduse_blk_drained_end(void * opaque)245195332c1SStefan Hajnoczi static void vduse_blk_drained_end(void *opaque)
246195332c1SStefan Hajnoczi {
247195332c1SStefan Hajnoczi     BlockExport *exp = opaque;
248195332c1SStefan Hajnoczi     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
249195332c1SStefan Hajnoczi 
250195332c1SStefan Hajnoczi     vduse_blk_start_virtqueues(vblk_exp);
251195332c1SStefan Hajnoczi }
252195332c1SStefan Hajnoczi 
vduse_blk_drained_poll(void * opaque)253195332c1SStefan Hajnoczi static bool vduse_blk_drained_poll(void *opaque)
254195332c1SStefan Hajnoczi {
255195332c1SStefan Hajnoczi     BlockExport *exp = opaque;
256195332c1SStefan Hajnoczi     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
257195332c1SStefan Hajnoczi 
258195332c1SStefan Hajnoczi     return qatomic_read(&vblk_exp->inflight) > 0;
259195332c1SStefan Hajnoczi }
260195332c1SStefan Hajnoczi 
2619e4dea67SXie Yongji static const BlockDevOps vduse_block_ops = {
2629e4dea67SXie Yongji     .resize_cb     = vduse_blk_resize,
263195332c1SStefan Hajnoczi     .drained_begin = vduse_blk_drained_begin,
264195332c1SStefan Hajnoczi     .drained_end   = vduse_blk_drained_end,
265195332c1SStefan Hajnoczi     .drained_poll  = vduse_blk_drained_poll,
2669e4dea67SXie Yongji };
2679e4dea67SXie Yongji 
vduse_blk_exp_create(BlockExport * exp,BlockExportOptions * opts,Error ** errp)2682a2359b8SXie Yongji static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
2692a2359b8SXie Yongji                                 Error **errp)
2702a2359b8SXie Yongji {
2712a2359b8SXie Yongji     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
2722a2359b8SXie Yongji     BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk;
2732a2359b8SXie Yongji     uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
2742a2359b8SXie Yongji     uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE;
2752a2359b8SXie Yongji     uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE;
2762a2359b8SXie Yongji     struct virtio_blk_config config = { 0 };
2772a2359b8SXie Yongji     uint64_t features;
2780862a087SXie Yongji     int i, ret;
2792a2359b8SXie Yongji 
2802a2359b8SXie Yongji     if (vblk_opts->has_num_queues) {
2812a2359b8SXie Yongji         num_queues = vblk_opts->num_queues;
2822a2359b8SXie Yongji         if (num_queues == 0) {
2832a2359b8SXie Yongji             error_setg(errp, "num-queues must be greater than 0");
2842a2359b8SXie Yongji             return -EINVAL;
2852a2359b8SXie Yongji         }
2862a2359b8SXie Yongji     }
2872a2359b8SXie Yongji 
2882a2359b8SXie Yongji     if (vblk_opts->has_queue_size) {
2892a2359b8SXie Yongji         queue_size = vblk_opts->queue_size;
2902a2359b8SXie Yongji         if (queue_size <= 2 || !is_power_of_2(queue_size) ||
2912a2359b8SXie Yongji             queue_size > VIRTQUEUE_MAX_SIZE) {
2922a2359b8SXie Yongji             error_setg(errp, "queue-size is invalid");
2932a2359b8SXie Yongji             return -EINVAL;
2942a2359b8SXie Yongji         }
2952a2359b8SXie Yongji     }
2962a2359b8SXie Yongji 
2972a2359b8SXie Yongji     if (vblk_opts->has_logical_block_size) {
2982a2359b8SXie Yongji         logical_block_size = vblk_opts->logical_block_size;
299*5551449bSMarkus Armbruster         if (!check_block_size("logical-block-size", logical_block_size,
300*5551449bSMarkus Armbruster                               errp)) {
3012a2359b8SXie Yongji             return -EINVAL;
3022a2359b8SXie Yongji         }
3032a2359b8SXie Yongji     }
3042a2359b8SXie Yongji     vblk_exp->num_queues = num_queues;
3052a2359b8SXie Yongji     vblk_exp->handler.blk = exp->blk;
30654fde4ffSMarkus Armbruster     vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
3072a2359b8SXie Yongji     vblk_exp->handler.logical_block_size = logical_block_size;
3082a2359b8SXie Yongji     vblk_exp->handler.writable = opts->writable;
309195332c1SStefan Hajnoczi     vblk_exp->vqs_started = true;
3102a2359b8SXie Yongji 
3112a2359b8SXie Yongji     config.capacity =
3122a2359b8SXie Yongji             cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
3132a2359b8SXie Yongji     config.seg_max = cpu_to_le32(queue_size - 2);
3142a2359b8SXie Yongji     config.min_io_size = cpu_to_le16(1);
3152a2359b8SXie Yongji     config.opt_io_size = cpu_to_le32(1);
3162a2359b8SXie Yongji     config.num_queues = cpu_to_le16(num_queues);
3172a2359b8SXie Yongji     config.blk_size = cpu_to_le32(logical_block_size);
3182a2359b8SXie Yongji     config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
3192a2359b8SXie Yongji     config.max_discard_seg = cpu_to_le32(1);
3202a2359b8SXie Yongji     config.discard_sector_alignment =
3212a2359b8SXie Yongji         cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS);
3222a2359b8SXie Yongji     config.max_write_zeroes_sectors =
3232a2359b8SXie Yongji         cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
3242a2359b8SXie Yongji     config.max_write_zeroes_seg = cpu_to_le32(1);
3252a2359b8SXie Yongji 
3262a2359b8SXie Yongji     features = vduse_get_virtio_features() |
3272a2359b8SXie Yongji                (1ULL << VIRTIO_BLK_F_SEG_MAX) |
3282a2359b8SXie Yongji                (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
3292a2359b8SXie Yongji                (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
3302a2359b8SXie Yongji                (1ULL << VIRTIO_BLK_F_FLUSH) |
3312a2359b8SXie Yongji                (1ULL << VIRTIO_BLK_F_DISCARD) |
3322a2359b8SXie Yongji                (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
3332a2359b8SXie Yongji 
3342a2359b8SXie Yongji     if (num_queues > 1) {
3352a2359b8SXie Yongji         features |= 1ULL << VIRTIO_BLK_F_MQ;
3362a2359b8SXie Yongji     }
3372a2359b8SXie Yongji     if (!opts->writable) {
3382a2359b8SXie Yongji         features |= 1ULL << VIRTIO_BLK_F_RO;
3392a2359b8SXie Yongji     }
3402a2359b8SXie Yongji 
341779d82e1SXie Yongji     vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0,
3422a2359b8SXie Yongji                                      features, num_queues,
3432a2359b8SXie Yongji                                      sizeof(struct virtio_blk_config),
3442a2359b8SXie Yongji                                      (char *)&config, &vduse_blk_ops,
3452a2359b8SXie Yongji                                      vblk_exp);
3462a2359b8SXie Yongji     if (!vblk_exp->dev) {
3472a2359b8SXie Yongji         error_setg(errp, "failed to create vduse device");
3480862a087SXie Yongji         ret = -ENOMEM;
3490862a087SXie Yongji         goto err_dev;
3502a2359b8SXie Yongji     }
3512a2359b8SXie Yongji 
352d043e2dbSXie Yongji     vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
353779d82e1SXie Yongji                                            g_get_tmp_dir(), vblk_opts->name);
354d043e2dbSXie Yongji     if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
355d043e2dbSXie Yongji         error_setg(errp, "failed to set reconnect log file");
3560862a087SXie Yongji         ret = -EINVAL;
3570862a087SXie Yongji         goto err;
358d043e2dbSXie Yongji     }
359d043e2dbSXie Yongji 
3602a2359b8SXie Yongji     for (i = 0; i < num_queues; i++) {
3612a2359b8SXie Yongji         vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
3622a2359b8SXie Yongji     }
3632a2359b8SXie Yongji 
36460f782b6SStefan Hajnoczi     aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev),
3652a2359b8SXie Yongji                        on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
3662a2359b8SXie Yongji 
3672a2359b8SXie Yongji     blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
3682a2359b8SXie Yongji                                  vblk_exp);
3699e4dea67SXie Yongji     blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
3709e4dea67SXie Yongji 
371195332c1SStefan Hajnoczi     /*
372195332c1SStefan Hajnoczi      * We handle draining ourselves using an in-flight counter and by disabling
373195332c1SStefan Hajnoczi      * virtqueue fd handlers. Do not queue BlockBackend requests, they need to
374195332c1SStefan Hajnoczi      * complete so the in-flight counter reaches zero.
375195332c1SStefan Hajnoczi      */
376195332c1SStefan Hajnoczi     blk_set_disable_request_queuing(exp->blk, true);
377195332c1SStefan Hajnoczi 
3782a2359b8SXie Yongji     return 0;
3790862a087SXie Yongji err:
3800862a087SXie Yongji     vduse_dev_destroy(vblk_exp->dev);
3810862a087SXie Yongji     g_free(vblk_exp->recon_file);
3820862a087SXie Yongji err_dev:
3830862a087SXie Yongji     g_free(vblk_exp->handler.serial);
3840862a087SXie Yongji     return ret;
3852a2359b8SXie Yongji }
3862a2359b8SXie Yongji 
vduse_blk_exp_delete(BlockExport * exp)3872a2359b8SXie Yongji static void vduse_blk_exp_delete(BlockExport *exp)
3882a2359b8SXie Yongji {
3892a2359b8SXie Yongji     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
390d043e2dbSXie Yongji     int ret;
3912a2359b8SXie Yongji 
392195332c1SStefan Hajnoczi     assert(qatomic_read(&vblk_exp->inflight) == 0);
393195332c1SStefan Hajnoczi 
394195332c1SStefan Hajnoczi     vduse_blk_detach_ctx(vblk_exp);
3952a2359b8SXie Yongji     blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
3962a2359b8SXie Yongji                                     vblk_exp);
397d043e2dbSXie Yongji     ret = vduse_dev_destroy(vblk_exp->dev);
398d043e2dbSXie Yongji     if (ret != -EBUSY) {
399d043e2dbSXie Yongji         unlink(vblk_exp->recon_file);
400d043e2dbSXie Yongji     }
401d043e2dbSXie Yongji     g_free(vblk_exp->recon_file);
4020862a087SXie Yongji     g_free(vblk_exp->handler.serial);
4032a2359b8SXie Yongji }
4042a2359b8SXie Yongji 
405195332c1SStefan Hajnoczi /* Called with exp->ctx acquired */
vduse_blk_exp_request_shutdown(BlockExport * exp)4062a2359b8SXie Yongji static void vduse_blk_exp_request_shutdown(BlockExport *exp)
4072a2359b8SXie Yongji {
4082a2359b8SXie Yongji     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
4092a2359b8SXie Yongji 
410195332c1SStefan Hajnoczi     vduse_blk_stop_virtqueues(vblk_exp);
4112a2359b8SXie Yongji }
4122a2359b8SXie Yongji 
4132a2359b8SXie Yongji const BlockExportDriver blk_exp_vduse_blk = {
4142a2359b8SXie Yongji     .type               = BLOCK_EXPORT_TYPE_VDUSE_BLK,
4152a2359b8SXie Yongji     .instance_size      = sizeof(VduseBlkExport),
4162a2359b8SXie Yongji     .create             = vduse_blk_exp_create,
4172a2359b8SXie Yongji     .delete             = vduse_blk_exp_delete,
4182a2359b8SXie Yongji     .request_shutdown   = vduse_blk_exp_request_shutdown,
4192a2359b8SXie Yongji };
420