1 #include "kvm/virtio-blk.h"
2
3 #include "kvm/virtio-pci-dev.h"
4 #include "kvm/disk-image.h"
5 #include "kvm/iovec.h"
6 #include "kvm/mutex.h"
7 #include "kvm/util.h"
8 #include "kvm/kvm.h"
9 #include "kvm/pci.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/ioeventfd.h"
12 #include "kvm/guest_compat.h"
13 #include "kvm/virtio-pci.h"
14 #include "kvm/virtio.h"
15
16 #include <linux/virtio_ring.h>
17 #include <linux/virtio_blk.h>
18 #include <linux/kernel.h>
19 #include <linux/list.h>
20 #include <linux/types.h>
21 #include <pthread.h>
22
23 #define VIRTIO_BLK_MAX_DEV 4
24
25 /*
26 * the header and status consume too entries
27 */
28 #define DISK_SEG_MAX (VIRTIO_BLK_QUEUE_SIZE - 2)
29 #define VIRTIO_BLK_QUEUE_SIZE 256
30 #define NUM_VIRT_QUEUES 1
31
32 struct blk_dev_req {
33 struct virt_queue *vq;
34 struct blk_dev *bdev;
35 struct iovec iov[VIRTIO_BLK_QUEUE_SIZE];
36 u16 out, in, head;
37 u8 *status;
38 struct kvm *kvm;
39 };
40
41 struct blk_dev {
42 struct mutex mutex;
43
44 struct list_head list;
45
46 struct virtio_device vdev;
47 struct virtio_blk_config blk_config;
48 u64 capacity;
49 struct disk_image *disk;
50
51 struct virt_queue vqs[NUM_VIRT_QUEUES];
52 struct blk_dev_req reqs[VIRTIO_BLK_QUEUE_SIZE];
53
54 pthread_t io_thread;
55 int io_efd;
56
57 struct kvm *kvm;
58 };
59
60 static LIST_HEAD(bdevs);
61 static int compat_id = -1;
62
virtio_blk_complete(void * param,long len)63 void virtio_blk_complete(void *param, long len)
64 {
65 struct blk_dev_req *req = param;
66 struct blk_dev *bdev = req->bdev;
67 int queueid = req->vq - bdev->vqs;
68 u8 *status;
69
70 /* status */
71 status = req->status;
72 *status = (len < 0) ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
73
74 mutex_lock(&bdev->mutex);
75 virt_queue__set_used_elem(req->vq, req->head, len);
76 mutex_unlock(&bdev->mutex);
77
78 if (virtio_queue__should_signal(&bdev->vqs[queueid]))
79 bdev->vdev.ops->signal_vq(req->kvm, &bdev->vdev, queueid);
80 }
81
virtio_blk_do_io_request(struct kvm * kvm,struct virt_queue * vq,struct blk_dev_req * req)82 static void virtio_blk_do_io_request(struct kvm *kvm, struct virt_queue *vq, struct blk_dev_req *req)
83 {
84 struct virtio_blk_outhdr req_hdr;
85 size_t iovcount, last_iov;
86 struct blk_dev *bdev;
87 struct iovec *iov;
88 ssize_t len;
89 u32 type;
90 u64 sector;
91
92 bdev = req->bdev;
93 iov = req->iov;
94
95 iovcount = req->out;
96 len = memcpy_fromiovec_safe(&req_hdr, &iov, sizeof(req_hdr), &iovcount);
97 if (len) {
98 pr_warning("Failed to get header");
99 return;
100 }
101
102 type = virtio_guest_to_host_u32(vq->endian, req_hdr.type);
103 sector = virtio_guest_to_host_u64(vq->endian, req_hdr.sector);
104
105 iovcount += req->in;
106 if (!iov_size(iov, iovcount)) {
107 pr_warning("Invalid IOV");
108 return;
109 }
110
111 /* Extract status byte from iovec */
112 last_iov = iovcount - 1;
113 while (!iov[last_iov].iov_len)
114 last_iov--;
115 iov[last_iov].iov_len--;
116 req->status = iov[last_iov].iov_base + iov[last_iov].iov_len;
117 if (!iov[last_iov].iov_len)
118 iovcount--;
119
120 switch (type) {
121 case VIRTIO_BLK_T_IN:
122 disk_image__read(bdev->disk, sector, iov, iovcount, req);
123 break;
124 case VIRTIO_BLK_T_OUT:
125 disk_image__write(bdev->disk, sector, iov, iovcount, req);
126 break;
127 case VIRTIO_BLK_T_FLUSH:
128 len = disk_image__flush(bdev->disk);
129 virtio_blk_complete(req, len);
130 break;
131 case VIRTIO_BLK_T_GET_ID:
132 len = disk_image__get_serial(bdev->disk, iov, iovcount,
133 VIRTIO_BLK_ID_BYTES);
134 virtio_blk_complete(req, len);
135 break;
136 default:
137 pr_warning("request type %d", type);
138 break;
139 }
140 }
141
virtio_blk_do_io(struct kvm * kvm,struct virt_queue * vq,struct blk_dev * bdev)142 static void virtio_blk_do_io(struct kvm *kvm, struct virt_queue *vq, struct blk_dev *bdev)
143 {
144 struct blk_dev_req *req;
145 u16 head;
146
147 while (virt_queue__available(vq)) {
148 head = virt_queue__pop(vq);
149 req = &bdev->reqs[head];
150 req->head = virt_queue__get_head_iov(vq, req->iov, &req->out,
151 &req->in, head, kvm);
152 req->vq = vq;
153
154 virtio_blk_do_io_request(kvm, vq, req);
155 }
156 }
157
get_config(struct kvm * kvm,void * dev)158 static u8 *get_config(struct kvm *kvm, void *dev)
159 {
160 struct blk_dev *bdev = dev;
161
162 return ((u8 *)(&bdev->blk_config));
163 }
164
get_config_size(struct kvm * kvm,void * dev)165 static size_t get_config_size(struct kvm *kvm, void *dev)
166 {
167 struct blk_dev *bdev = dev;
168
169 return sizeof(bdev->blk_config);
170 }
171
get_host_features(struct kvm * kvm,void * dev)172 static u64 get_host_features(struct kvm *kvm, void *dev)
173 {
174 struct blk_dev *bdev = dev;
175
176 return 1UL << VIRTIO_BLK_F_SEG_MAX
177 | 1UL << VIRTIO_BLK_F_FLUSH
178 | 1UL << VIRTIO_RING_F_EVENT_IDX
179 | 1UL << VIRTIO_RING_F_INDIRECT_DESC
180 | 1UL << VIRTIO_F_ANY_LAYOUT
181 | (bdev->disk->readonly ? 1UL << VIRTIO_BLK_F_RO : 0);
182 }
183
notify_status(struct kvm * kvm,void * dev,u32 status)184 static void notify_status(struct kvm *kvm, void *dev, u32 status)
185 {
186 struct blk_dev *bdev = dev;
187 struct virtio_blk_config *conf = &bdev->blk_config;
188
189 if (!(status & VIRTIO__STATUS_CONFIG))
190 return;
191
192 conf->capacity = virtio_host_to_guest_u64(bdev->vdev.endian, bdev->capacity);
193 conf->seg_max = virtio_host_to_guest_u32(bdev->vdev.endian, DISK_SEG_MAX);
194 }
195
virtio_blk_thread(void * dev)196 static void *virtio_blk_thread(void *dev)
197 {
198 struct blk_dev *bdev = dev;
199 u64 data;
200 int r;
201
202 kvm__set_thread_name("virtio-blk-io");
203
204 while (1) {
205 r = read(bdev->io_efd, &data, sizeof(u64));
206 if (r < 0)
207 continue;
208 virtio_blk_do_io(bdev->kvm, &bdev->vqs[0], bdev);
209 }
210
211 pthread_exit(NULL);
212 return NULL;
213 }
214
init_vq(struct kvm * kvm,void * dev,u32 vq)215 static int init_vq(struct kvm *kvm, void *dev, u32 vq)
216 {
217 unsigned int i;
218 struct blk_dev *bdev = dev;
219
220 compat__remove_message(compat_id);
221
222 virtio_init_device_vq(kvm, &bdev->vdev, &bdev->vqs[vq],
223 VIRTIO_BLK_QUEUE_SIZE);
224
225 if (vq != 0)
226 return 0;
227
228 for (i = 0; i < ARRAY_SIZE(bdev->reqs); i++) {
229 bdev->reqs[i] = (struct blk_dev_req) {
230 .bdev = bdev,
231 .kvm = kvm,
232 };
233 }
234
235 mutex_init(&bdev->mutex);
236 bdev->io_efd = eventfd(0, 0);
237 if (bdev->io_efd < 0)
238 return -errno;
239
240 if (pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev))
241 return -errno;
242
243 return 0;
244 }
245
exit_vq(struct kvm * kvm,void * dev,u32 vq)246 static void exit_vq(struct kvm *kvm, void *dev, u32 vq)
247 {
248 struct blk_dev *bdev = dev;
249
250 if (vq != 0)
251 return;
252
253 close(bdev->io_efd);
254 pthread_cancel(bdev->io_thread);
255 pthread_join(bdev->io_thread, NULL);
256
257 disk_image__wait(bdev->disk);
258 }
259
notify_vq(struct kvm * kvm,void * dev,u32 vq)260 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
261 {
262 struct blk_dev *bdev = dev;
263 u64 data = 1;
264 int r;
265
266 r = write(bdev->io_efd, &data, sizeof(data));
267 if (r < 0)
268 return r;
269
270 return 0;
271 }
272
get_vq(struct kvm * kvm,void * dev,u32 vq)273 static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq)
274 {
275 struct blk_dev *bdev = dev;
276
277 return &bdev->vqs[vq];
278 }
279
get_size_vq(struct kvm * kvm,void * dev,u32 vq)280 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
281 {
282 /* FIXME: dynamic */
283 return VIRTIO_BLK_QUEUE_SIZE;
284 }
285
set_size_vq(struct kvm * kvm,void * dev,u32 vq,int size)286 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
287 {
288 /* FIXME: dynamic */
289 return size;
290 }
291
get_vq_count(struct kvm * kvm,void * dev)292 static unsigned int get_vq_count(struct kvm *kvm, void *dev)
293 {
294 return NUM_VIRT_QUEUES;
295 }
296
297 static struct virtio_ops blk_dev_virtio_ops = {
298 .get_config = get_config,
299 .get_config_size = get_config_size,
300 .get_host_features = get_host_features,
301 .get_vq_count = get_vq_count,
302 .init_vq = init_vq,
303 .exit_vq = exit_vq,
304 .notify_status = notify_status,
305 .notify_vq = notify_vq,
306 .get_vq = get_vq,
307 .get_size_vq = get_size_vq,
308 .set_size_vq = set_size_vq,
309 };
310
virtio_blk__init_one(struct kvm * kvm,struct disk_image * disk)311 static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk)
312 {
313 struct blk_dev *bdev;
314 int r;
315
316 if (!disk)
317 return -EINVAL;
318
319 bdev = calloc(1, sizeof(struct blk_dev));
320 if (bdev == NULL)
321 return -ENOMEM;
322
323 *bdev = (struct blk_dev) {
324 .disk = disk,
325 .capacity = disk->size / SECTOR_SIZE,
326 .kvm = kvm,
327 };
328
329 list_add_tail(&bdev->list, &bdevs);
330
331 r = virtio_init(kvm, bdev, &bdev->vdev, &blk_dev_virtio_ops,
332 kvm->cfg.virtio_transport, PCI_DEVICE_ID_VIRTIO_BLK,
333 VIRTIO_ID_BLOCK, PCI_CLASS_BLK);
334 if (r < 0)
335 return r;
336
337 disk_image__set_callback(bdev->disk, virtio_blk_complete);
338
339 if (compat_id == -1)
340 compat_id = virtio_compat_add_message("virtio-blk", "CONFIG_VIRTIO_BLK");
341
342 return 0;
343 }
344
virtio_blk__exit_one(struct kvm * kvm,struct blk_dev * bdev)345 static int virtio_blk__exit_one(struct kvm *kvm, struct blk_dev *bdev)
346 {
347 list_del(&bdev->list);
348 virtio_exit(kvm, &bdev->vdev);
349 free(bdev);
350
351 return 0;
352 }
353
virtio_blk__init(struct kvm * kvm)354 int virtio_blk__init(struct kvm *kvm)
355 {
356 int i, r = 0;
357
358 for (i = 0; i < kvm->nr_disks; i++) {
359 if (kvm->disks[i]->wwpn)
360 continue;
361 r = virtio_blk__init_one(kvm, kvm->disks[i]);
362 if (r < 0)
363 goto cleanup;
364 }
365
366 return 0;
367 cleanup:
368 virtio_blk__exit(kvm);
369 return r;
370 }
371 virtio_dev_init(virtio_blk__init);
372
virtio_blk__exit(struct kvm * kvm)373 int virtio_blk__exit(struct kvm *kvm)
374 {
375 while (!list_empty(&bdevs)) {
376 struct blk_dev *bdev;
377
378 bdev = list_first_entry(&bdevs, struct blk_dev, list);
379 virtio_blk__exit_one(kvm, bdev);
380 }
381
382 return 0;
383 }
384 virtio_dev_exit(virtio_blk__exit);
385