xref: /kvmtool/disk/aio.c (revision 2790307c3d6d2aad838e1eef76ec0aa21b110698)
1 #include <libaio.h>
2 #include <pthread.h>
3 #include <sys/eventfd.h>
4 
5 #include "kvm/brlock.h"
6 #include "kvm/disk-image.h"
7 #include "kvm/kvm.h"
8 #include "linux/list.h"
9 
10 #define AIO_MAX 256
11 
aio_submit(struct disk_image * disk,int nr,struct iocb ** ios)12 static int aio_submit(struct disk_image *disk, int nr, struct iocb **ios)
13 {
14 	int ret;
15 
16 	__sync_fetch_and_add(&disk->aio_inflight, nr);
17 	/*
18 	 * A wmb() is needed here, to ensure disk_aio_thread() sees this
19 	 * increase after receiving the events. It is included in the
20 	 * __sync_fetch_and_add (as a full barrier).
21 	 */
22 restart:
23 	ret = io_submit(disk->ctx, nr, ios);
24 	if (ret == -EAGAIN)
25 		goto restart;
26 	else if (ret <= 0)
27 		/* disk_aio_thread() is never going to see those */
28 		__sync_fetch_and_sub(&disk->aio_inflight, nr);
29 
30 	return ret;
31 }
32 
raw_image__read_async(struct disk_image * disk,u64 sector,const struct iovec * iov,int iovcount,void * param)33 ssize_t raw_image__read_async(struct disk_image *disk, u64 sector,
34 			      const struct iovec *iov, int iovcount,
35 			      void *param)
36 {
37 	struct iocb iocb;
38 	u64 offset = sector << SECTOR_SHIFT;
39 	struct iocb *ios[1] = { &iocb };
40 
41 	io_prep_preadv(&iocb, disk->fd, iov, iovcount, offset);
42 	io_set_eventfd(&iocb, disk->evt);
43 	iocb.data = param;
44 
45 	return aio_submit(disk, 1, ios);
46 }
47 
raw_image__write_async(struct disk_image * disk,u64 sector,const struct iovec * iov,int iovcount,void * param)48 ssize_t raw_image__write_async(struct disk_image *disk, u64 sector,
49 			       const struct iovec *iov, int iovcount,
50 			       void *param)
51 {
52 	struct iocb iocb;
53 	u64 offset = sector << SECTOR_SHIFT;
54 	struct iocb *ios[1] = { &iocb };
55 
56 	io_prep_pwritev(&iocb, disk->fd, iov, iovcount, offset);
57 	io_set_eventfd(&iocb, disk->evt);
58 	iocb.data = param;
59 
60 	return aio_submit(disk, 1, ios);
61 }
62 
63 /*
64  * When this function returns there are no in-flight I/O. Caller ensures that
65  * io_submit() isn't called concurrently.
66  *
67  * Returns an inaccurate number of I/O that was in-flight when the function was
68  * called.
69  */
raw_image__wait(struct disk_image * disk)70 int raw_image__wait(struct disk_image *disk)
71 {
72 	u64 inflight = disk->aio_inflight;
73 
74 	while (disk->aio_inflight) {
75 		usleep(100);
76 		barrier();
77 	}
78 
79 	return inflight;
80 }
81 
disk_aio_get_events(struct disk_image * disk)82 static int disk_aio_get_events(struct disk_image *disk)
83 {
84 	struct io_event event[AIO_MAX];
85 	struct timespec notime = {0};
86 	int nr, i;
87 
88 	do {
89 		nr = io_getevents(disk->ctx, 1, ARRAY_SIZE(event), event, &notime);
90 		for (i = 0; i < nr; i++)
91 			disk->disk_req_cb(event[i].data, event[i].res);
92 
93 		/* Pairs with wmb() in aio_submit() */
94 		rmb();
95 		__sync_fetch_and_sub(&disk->aio_inflight, nr);
96 
97 	} while (nr > 0);
98 
99 	return 0;
100 }
101 
disk_aio_thread(void * param)102 static void *disk_aio_thread(void *param)
103 {
104 	struct disk_image *disk = param;
105 	u64 dummy;
106 
107 	kvm__set_thread_name("disk-image-io");
108 
109 	while (read(disk->evt, &dummy, sizeof(dummy)) > 0) {
110 		if (disk_aio_get_events(disk))
111 			break;
112 	}
113 
114 	return NULL;
115 }
116 
disk_aio_setup(struct disk_image * disk)117 int disk_aio_setup(struct disk_image *disk)
118 {
119 	int r;
120 
121 	/* No need to setup AIO if the disk ops won't make use of it */
122 	if (!disk->ops->async)
123 		return 0;
124 
125 	disk->evt = eventfd(0, 0);
126 	if (disk->evt < 0)
127 		return -errno;
128 
129 	io_setup(AIO_MAX, &disk->ctx);
130 	r = pthread_create(&disk->thread, NULL, disk_aio_thread, disk);
131 	if (r) {
132 		r = -errno;
133 		close(disk->evt);
134 		return r;
135 	}
136 
137 	disk->async = true;
138 	return 0;
139 }
140 
disk_aio_destroy(struct disk_image * disk)141 void disk_aio_destroy(struct disk_image *disk)
142 {
143 	if (!disk->async)
144 		return;
145 
146 	pthread_cancel(disk->thread);
147 	pthread_join(disk->thread, NULL);
148 	close(disk->evt);
149 	io_destroy(disk->ctx);
150 }
151