xref: /linux/virt/kvm/eventfd.c (revision 4f6b838c378a52ea3ae0b15f12ca8a20849072fa)
1775c8a3dSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2721eecbfSGregory Haskins /*
3721eecbfSGregory Haskins  * kvm eventfd support - use eventfd objects to signal various KVM events
4721eecbfSGregory Haskins  *
5721eecbfSGregory Haskins  * Copyright 2009 Novell.  All Rights Reserved.
6221d059dSAvi Kivity  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
7721eecbfSGregory Haskins  *
8721eecbfSGregory Haskins  * Author:
9721eecbfSGregory Haskins  *	Gregory Haskins <ghaskins@novell.com>
10721eecbfSGregory Haskins  */
11721eecbfSGregory Haskins 
12721eecbfSGregory Haskins #include <linux/kvm_host.h>
13d34e6b17SGregory Haskins #include <linux/kvm.h>
14166c9775SEric Auger #include <linux/kvm_irqfd.h>
15721eecbfSGregory Haskins #include <linux/workqueue.h>
16721eecbfSGregory Haskins #include <linux/syscalls.h>
17721eecbfSGregory Haskins #include <linux/wait.h>
18721eecbfSGregory Haskins #include <linux/poll.h>
19721eecbfSGregory Haskins #include <linux/file.h>
20721eecbfSGregory Haskins #include <linux/list.h>
21721eecbfSGregory Haskins #include <linux/eventfd.h>
22d34e6b17SGregory Haskins #include <linux/kernel.h>
23719d93cdSChristian Borntraeger #include <linux/srcu.h>
245a0e3ad6STejun Heo #include <linux/slab.h>
2556f89f36SPaul Mackerras #include <linux/seqlock.h>
269016cfb5SEric Auger #include <linux/irqbypass.h>
27e4d57e1eSPaul Mackerras #include <trace/events/kvm.h>
28d34e6b17SGregory Haskins 
29af669ac6SAndre Przywara #include <kvm/iodev.h>
30721eecbfSGregory Haskins 
31297e2105SPaul Mackerras #ifdef CONFIG_HAVE_KVM_IRQFD
32721eecbfSGregory Haskins 
3336343f6eSPaolo Bonzini static struct workqueue_struct *irqfd_cleanup_wq;
34721eecbfSGregory Haskins 
35654f1f13SPeter Xu bool __attribute__((weak))
36654f1f13SPeter Xu kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
37654f1f13SPeter Xu {
38654f1f13SPeter Xu 	return true;
39654f1f13SPeter Xu }
40654f1f13SPeter Xu 
41721eecbfSGregory Haskins static void
42721eecbfSGregory Haskins irqfd_inject(struct work_struct *work)
43721eecbfSGregory Haskins {
44166c9775SEric Auger 	struct kvm_kernel_irqfd *irqfd =
45166c9775SEric Auger 		container_of(work, struct kvm_kernel_irqfd, inject);
46721eecbfSGregory Haskins 	struct kvm *kvm = irqfd->kvm;
47721eecbfSGregory Haskins 
487a84428aSAlex Williamson 	if (!irqfd->resampler) {
49aa2fbe6dSYang Zhang 		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
50aa2fbe6dSYang Zhang 				false);
51aa2fbe6dSYang Zhang 		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
52aa2fbe6dSYang Zhang 				false);
537a84428aSAlex Williamson 	} else
547a84428aSAlex Williamson 		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
55aa2fbe6dSYang Zhang 			    irqfd->gsi, 1, false);
567a84428aSAlex Williamson }
577a84428aSAlex Williamson 
587a84428aSAlex Williamson /*
597a84428aSAlex Williamson  * Since resampler irqfds share an IRQ source ID, we de-assert once
607a84428aSAlex Williamson  * then notify all of the resampler irqfds using this GSI.  We can't
617a84428aSAlex Williamson  * do multiple de-asserts or we risk racing with incoming re-asserts.
627a84428aSAlex Williamson  */
637a84428aSAlex Williamson static void
647a84428aSAlex Williamson irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
657a84428aSAlex Williamson {
66166c9775SEric Auger 	struct kvm_kernel_irqfd_resampler *resampler;
67719d93cdSChristian Borntraeger 	struct kvm *kvm;
68166c9775SEric Auger 	struct kvm_kernel_irqfd *irqfd;
69719d93cdSChristian Borntraeger 	int idx;
707a84428aSAlex Williamson 
71166c9775SEric Auger 	resampler = container_of(kian,
72166c9775SEric Auger 			struct kvm_kernel_irqfd_resampler, notifier);
73719d93cdSChristian Borntraeger 	kvm = resampler->kvm;
747a84428aSAlex Williamson 
75719d93cdSChristian Borntraeger 	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
76aa2fbe6dSYang Zhang 		    resampler->notifier.gsi, 0, false);
777a84428aSAlex Williamson 
78719d93cdSChristian Borntraeger 	idx = srcu_read_lock(&kvm->irq_srcu);
797a84428aSAlex Williamson 
807a84428aSAlex Williamson 	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
817a84428aSAlex Williamson 		eventfd_signal(irqfd->resamplefd, 1);
827a84428aSAlex Williamson 
83719d93cdSChristian Borntraeger 	srcu_read_unlock(&kvm->irq_srcu, idx);
847a84428aSAlex Williamson }
857a84428aSAlex Williamson 
867a84428aSAlex Williamson static void
87166c9775SEric Auger irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
887a84428aSAlex Williamson {
89166c9775SEric Auger 	struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
907a84428aSAlex Williamson 	struct kvm *kvm = resampler->kvm;
917a84428aSAlex Williamson 
927a84428aSAlex Williamson 	mutex_lock(&kvm->irqfds.resampler_lock);
937a84428aSAlex Williamson 
947a84428aSAlex Williamson 	list_del_rcu(&irqfd->resampler_link);
95719d93cdSChristian Borntraeger 	synchronize_srcu(&kvm->irq_srcu);
967a84428aSAlex Williamson 
977a84428aSAlex Williamson 	if (list_empty(&resampler->list)) {
987a84428aSAlex Williamson 		list_del(&resampler->link);
997a84428aSAlex Williamson 		kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
1007a84428aSAlex Williamson 		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
101aa2fbe6dSYang Zhang 			    resampler->notifier.gsi, 0, false);
1027a84428aSAlex Williamson 		kfree(resampler);
1037a84428aSAlex Williamson 	}
1047a84428aSAlex Williamson 
1057a84428aSAlex Williamson 	mutex_unlock(&kvm->irqfds.resampler_lock);
106721eecbfSGregory Haskins }
107721eecbfSGregory Haskins 
108721eecbfSGregory Haskins /*
109721eecbfSGregory Haskins  * Race-free decouple logic (ordering is critical)
110721eecbfSGregory Haskins  */
111721eecbfSGregory Haskins static void
112721eecbfSGregory Haskins irqfd_shutdown(struct work_struct *work)
113721eecbfSGregory Haskins {
114166c9775SEric Auger 	struct kvm_kernel_irqfd *irqfd =
115166c9775SEric Auger 		container_of(work, struct kvm_kernel_irqfd, shutdown);
116b5020a8eSLan Tianyu 	struct kvm *kvm = irqfd->kvm;
117b6a114d2SMichael S. Tsirkin 	u64 cnt;
118721eecbfSGregory Haskins 
119656012c7SFuad Tabba 	/* Make sure irqfd has been initialized in assign path. */
120b5020a8eSLan Tianyu 	synchronize_srcu(&kvm->irq_srcu);
121b5020a8eSLan Tianyu 
122721eecbfSGregory Haskins 	/*
123721eecbfSGregory Haskins 	 * Synchronize with the wait-queue and unhook ourselves to prevent
124721eecbfSGregory Haskins 	 * further events.
125721eecbfSGregory Haskins 	 */
126b6a114d2SMichael S. Tsirkin 	eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
127721eecbfSGregory Haskins 
128721eecbfSGregory Haskins 	/*
129721eecbfSGregory Haskins 	 * We know no new events will be scheduled at this point, so block
130721eecbfSGregory Haskins 	 * until all previously outstanding events have completed
131721eecbfSGregory Haskins 	 */
13243829731STejun Heo 	flush_work(&irqfd->inject);
133721eecbfSGregory Haskins 
1347a84428aSAlex Williamson 	if (irqfd->resampler) {
1357a84428aSAlex Williamson 		irqfd_resampler_shutdown(irqfd);
1367a84428aSAlex Williamson 		eventfd_ctx_put(irqfd->resamplefd);
1377a84428aSAlex Williamson 	}
1387a84428aSAlex Williamson 
139721eecbfSGregory Haskins 	/*
140721eecbfSGregory Haskins 	 * It is now safe to release the object's resources
141721eecbfSGregory Haskins 	 */
1429016cfb5SEric Auger #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
1439016cfb5SEric Auger 	irq_bypass_unregister_consumer(&irqfd->consumer);
1449016cfb5SEric Auger #endif
145721eecbfSGregory Haskins 	eventfd_ctx_put(irqfd->eventfd);
146721eecbfSGregory Haskins 	kfree(irqfd);
147721eecbfSGregory Haskins }
148721eecbfSGregory Haskins 
149721eecbfSGregory Haskins 
150721eecbfSGregory Haskins /* assumes kvm->irqfds.lock is held */
151721eecbfSGregory Haskins static bool
152166c9775SEric Auger irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
153721eecbfSGregory Haskins {
154721eecbfSGregory Haskins 	return list_empty(&irqfd->list) ? false : true;
155721eecbfSGregory Haskins }
156721eecbfSGregory Haskins 
157721eecbfSGregory Haskins /*
158721eecbfSGregory Haskins  * Mark the irqfd as inactive and schedule it for removal
159721eecbfSGregory Haskins  *
160721eecbfSGregory Haskins  * assumes kvm->irqfds.lock is held
161721eecbfSGregory Haskins  */
162721eecbfSGregory Haskins static void
163166c9775SEric Auger irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
164721eecbfSGregory Haskins {
165721eecbfSGregory Haskins 	BUG_ON(!irqfd_is_active(irqfd));
166721eecbfSGregory Haskins 
167721eecbfSGregory Haskins 	list_del_init(&irqfd->list);
168721eecbfSGregory Haskins 
16936343f6eSPaolo Bonzini 	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
170721eecbfSGregory Haskins }
171721eecbfSGregory Haskins 
172b97e6de9SPaolo Bonzini int __attribute__((weak)) kvm_arch_set_irq_inatomic(
173c9a5eccaSAndrey Smetanin 				struct kvm_kernel_irq_routing_entry *irq,
174c9a5eccaSAndrey Smetanin 				struct kvm *kvm, int irq_source_id,
175c9a5eccaSAndrey Smetanin 				int level,
176c9a5eccaSAndrey Smetanin 				bool line_status)
177c9a5eccaSAndrey Smetanin {
178c9a5eccaSAndrey Smetanin 	return -EWOULDBLOCK;
179c9a5eccaSAndrey Smetanin }
180c9a5eccaSAndrey Smetanin 
181721eecbfSGregory Haskins /*
182721eecbfSGregory Haskins  * Called with wqh->lock held and interrupts disabled
183721eecbfSGregory Haskins  */
184721eecbfSGregory Haskins static int
185ac6424b9SIngo Molnar irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
186721eecbfSGregory Haskins {
187166c9775SEric Auger 	struct kvm_kernel_irqfd *irqfd =
188166c9775SEric Auger 		container_of(wait, struct kvm_kernel_irqfd, wait);
1893ad6f93eSAl Viro 	__poll_t flags = key_to_poll(key);
19056f89f36SPaul Mackerras 	struct kvm_kernel_irq_routing_entry irq;
191bd2b53b2SMichael S. Tsirkin 	struct kvm *kvm = irqfd->kvm;
19256f89f36SPaul Mackerras 	unsigned seq;
193719d93cdSChristian Borntraeger 	int idx;
194721eecbfSGregory Haskins 
195a9a08845SLinus Torvalds 	if (flags & EPOLLIN) {
196719d93cdSChristian Borntraeger 		idx = srcu_read_lock(&kvm->irq_srcu);
19756f89f36SPaul Mackerras 		do {
19856f89f36SPaul Mackerras 			seq = read_seqcount_begin(&irqfd->irq_entry_sc);
19956f89f36SPaul Mackerras 			irq = irqfd->irq_entry;
20056f89f36SPaul Mackerras 		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
201721eecbfSGregory Haskins 		/* An event has been signaled, inject an interrupt */
202b97e6de9SPaolo Bonzini 		if (kvm_arch_set_irq_inatomic(&irq, kvm,
203c9a5eccaSAndrey Smetanin 					      KVM_USERSPACE_IRQ_SOURCE_ID, 1,
204c9a5eccaSAndrey Smetanin 					      false) == -EWOULDBLOCK)
205721eecbfSGregory Haskins 			schedule_work(&irqfd->inject);
206719d93cdSChristian Borntraeger 		srcu_read_unlock(&kvm->irq_srcu, idx);
207bd2b53b2SMichael S. Tsirkin 	}
208721eecbfSGregory Haskins 
209a9a08845SLinus Torvalds 	if (flags & EPOLLHUP) {
210721eecbfSGregory Haskins 		/* The eventfd is closing, detach from KVM */
211ca0488aaSSebastian Andrzej Siewior 		unsigned long iflags;
212721eecbfSGregory Haskins 
213ca0488aaSSebastian Andrzej Siewior 		spin_lock_irqsave(&kvm->irqfds.lock, iflags);
214721eecbfSGregory Haskins 
215721eecbfSGregory Haskins 		/*
216721eecbfSGregory Haskins 		 * We must check if someone deactivated the irqfd before
217721eecbfSGregory Haskins 		 * we could acquire the irqfds.lock since the item is
218721eecbfSGregory Haskins 		 * deactivated from the KVM side before it is unhooked from
219721eecbfSGregory Haskins 		 * the wait-queue.  If it is already deactivated, we can
220721eecbfSGregory Haskins 		 * simply return knowing the other side will cleanup for us.
221721eecbfSGregory Haskins 		 * We cannot race against the irqfd going away since the
222721eecbfSGregory Haskins 		 * other side is required to acquire wqh->lock, which we hold
223721eecbfSGregory Haskins 		 */
224721eecbfSGregory Haskins 		if (irqfd_is_active(irqfd))
225721eecbfSGregory Haskins 			irqfd_deactivate(irqfd);
226721eecbfSGregory Haskins 
227ca0488aaSSebastian Andrzej Siewior 		spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
228721eecbfSGregory Haskins 	}
229721eecbfSGregory Haskins 
230721eecbfSGregory Haskins 	return 0;
231721eecbfSGregory Haskins }
232721eecbfSGregory Haskins 
233721eecbfSGregory Haskins static void
234721eecbfSGregory Haskins irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
235721eecbfSGregory Haskins 			poll_table *pt)
236721eecbfSGregory Haskins {
237166c9775SEric Auger 	struct kvm_kernel_irqfd *irqfd =
238166c9775SEric Auger 		container_of(pt, struct kvm_kernel_irqfd, pt);
239721eecbfSGregory Haskins 	add_wait_queue(wqh, &irqfd->wait);
240721eecbfSGregory Haskins }
241721eecbfSGregory Haskins 
242bd2b53b2SMichael S. Tsirkin /* Must be called under irqfds.lock */
243166c9775SEric Auger static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
244bd2b53b2SMichael S. Tsirkin {
245bd2b53b2SMichael S. Tsirkin 	struct kvm_kernel_irq_routing_entry *e;
2468ba918d4SPaul Mackerras 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
247351dc647SAndrey Smetanin 	int n_entries;
2488ba918d4SPaul Mackerras 
2499957c86dSPaul Mackerras 	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
250bd2b53b2SMichael S. Tsirkin 
25156f89f36SPaul Mackerras 	write_seqcount_begin(&irqfd->irq_entry_sc);
25256f89f36SPaul Mackerras 
2538ba918d4SPaul Mackerras 	e = entries;
254351dc647SAndrey Smetanin 	if (n_entries == 1)
25556f89f36SPaul Mackerras 		irqfd->irq_entry = *e;
256351dc647SAndrey Smetanin 	else
257351dc647SAndrey Smetanin 		irqfd->irq_entry.type = 0;
25856f89f36SPaul Mackerras 
25956f89f36SPaul Mackerras 	write_seqcount_end(&irqfd->irq_entry_sc);
260bd2b53b2SMichael S. Tsirkin }
261bd2b53b2SMichael S. Tsirkin 
2621a02b270SEric Auger #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
2631a02b270SEric Auger void __attribute__((weak)) kvm_arch_irq_bypass_stop(
2641a02b270SEric Auger 				struct irq_bypass_consumer *cons)
2651a02b270SEric Auger {
2661a02b270SEric Auger }
2671a02b270SEric Auger 
2681a02b270SEric Auger void __attribute__((weak)) kvm_arch_irq_bypass_start(
2691a02b270SEric Auger 				struct irq_bypass_consumer *cons)
2701a02b270SEric Auger {
2711a02b270SEric Auger }
272f70c20aaSFeng Wu 
273f70c20aaSFeng Wu int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
274f70c20aaSFeng Wu 				struct kvm *kvm, unsigned int host_irq,
275f70c20aaSFeng Wu 				uint32_t guest_irq, bool set)
276f70c20aaSFeng Wu {
277f70c20aaSFeng Wu 	return 0;
278f70c20aaSFeng Wu }
2791a02b270SEric Auger #endif
2801a02b270SEric Auger 
281721eecbfSGregory Haskins static int
282d4db2935SAlex Williamson kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
283721eecbfSGregory Haskins {
284166c9775SEric Auger 	struct kvm_kernel_irqfd *irqfd, *tmp;
285cffe78d9SAl Viro 	struct fd f;
2867a84428aSAlex Williamson 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
287721eecbfSGregory Haskins 	int ret;
288e6c8adcaSAl Viro 	__poll_t events;
2899957c86dSPaul Mackerras 	int idx;
290721eecbfSGregory Haskins 
29101c94e64SEric Auger 	if (!kvm_arch_intc_initialized(kvm))
29201c94e64SEric Auger 		return -EAGAIN;
29301c94e64SEric Auger 
294654f1f13SPeter Xu 	if (!kvm_arch_irqfd_allowed(kvm, args))
295654f1f13SPeter Xu 		return -EINVAL;
296654f1f13SPeter Xu 
297b12ce36aSBen Gardon 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
298721eecbfSGregory Haskins 	if (!irqfd)
299721eecbfSGregory Haskins 		return -ENOMEM;
300721eecbfSGregory Haskins 
301721eecbfSGregory Haskins 	irqfd->kvm = kvm;
302d4db2935SAlex Williamson 	irqfd->gsi = args->gsi;
303721eecbfSGregory Haskins 	INIT_LIST_HEAD(&irqfd->list);
304721eecbfSGregory Haskins 	INIT_WORK(&irqfd->inject, irqfd_inject);
305721eecbfSGregory Haskins 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
3065c73b9a2SAhmed S. Darwish 	seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock);
307721eecbfSGregory Haskins 
308cffe78d9SAl Viro 	f = fdget(args->fd);
309cffe78d9SAl Viro 	if (!f.file) {
310cffe78d9SAl Viro 		ret = -EBADF;
311cffe78d9SAl Viro 		goto out;
312721eecbfSGregory Haskins 	}
313721eecbfSGregory Haskins 
314cffe78d9SAl Viro 	eventfd = eventfd_ctx_fileget(f.file);
315721eecbfSGregory Haskins 	if (IS_ERR(eventfd)) {
316721eecbfSGregory Haskins 		ret = PTR_ERR(eventfd);
317721eecbfSGregory Haskins 		goto fail;
318721eecbfSGregory Haskins 	}
319721eecbfSGregory Haskins 
320721eecbfSGregory Haskins 	irqfd->eventfd = eventfd;
321721eecbfSGregory Haskins 
3227a84428aSAlex Williamson 	if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
323166c9775SEric Auger 		struct kvm_kernel_irqfd_resampler *resampler;
3247a84428aSAlex Williamson 
3257a84428aSAlex Williamson 		resamplefd = eventfd_ctx_fdget(args->resamplefd);
3267a84428aSAlex Williamson 		if (IS_ERR(resamplefd)) {
3277a84428aSAlex Williamson 			ret = PTR_ERR(resamplefd);
3287a84428aSAlex Williamson 			goto fail;
3297a84428aSAlex Williamson 		}
3307a84428aSAlex Williamson 
3317a84428aSAlex Williamson 		irqfd->resamplefd = resamplefd;
3327a84428aSAlex Williamson 		INIT_LIST_HEAD(&irqfd->resampler_link);
3337a84428aSAlex Williamson 
3347a84428aSAlex Williamson 		mutex_lock(&kvm->irqfds.resampler_lock);
3357a84428aSAlex Williamson 
3367a84428aSAlex Williamson 		list_for_each_entry(resampler,
33749f8a1a5SAlex Williamson 				    &kvm->irqfds.resampler_list, link) {
3387a84428aSAlex Williamson 			if (resampler->notifier.gsi == irqfd->gsi) {
3397a84428aSAlex Williamson 				irqfd->resampler = resampler;
3407a84428aSAlex Williamson 				break;
3417a84428aSAlex Williamson 			}
3427a84428aSAlex Williamson 		}
3437a84428aSAlex Williamson 
3447a84428aSAlex Williamson 		if (!irqfd->resampler) {
345b12ce36aSBen Gardon 			resampler = kzalloc(sizeof(*resampler),
346b12ce36aSBen Gardon 					    GFP_KERNEL_ACCOUNT);
3477a84428aSAlex Williamson 			if (!resampler) {
3487a84428aSAlex Williamson 				ret = -ENOMEM;
3497a84428aSAlex Williamson 				mutex_unlock(&kvm->irqfds.resampler_lock);
3507a84428aSAlex Williamson 				goto fail;
3517a84428aSAlex Williamson 			}
3527a84428aSAlex Williamson 
3537a84428aSAlex Williamson 			resampler->kvm = kvm;
3547a84428aSAlex Williamson 			INIT_LIST_HEAD(&resampler->list);
3557a84428aSAlex Williamson 			resampler->notifier.gsi = irqfd->gsi;
3567a84428aSAlex Williamson 			resampler->notifier.irq_acked = irqfd_resampler_ack;
3577a84428aSAlex Williamson 			INIT_LIST_HEAD(&resampler->link);
3587a84428aSAlex Williamson 
3597a84428aSAlex Williamson 			list_add(&resampler->link, &kvm->irqfds.resampler_list);
3607a84428aSAlex Williamson 			kvm_register_irq_ack_notifier(kvm,
3617a84428aSAlex Williamson 						      &resampler->notifier);
3627a84428aSAlex Williamson 			irqfd->resampler = resampler;
3637a84428aSAlex Williamson 		}
3647a84428aSAlex Williamson 
3657a84428aSAlex Williamson 		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
366719d93cdSChristian Borntraeger 		synchronize_srcu(&kvm->irq_srcu);
3677a84428aSAlex Williamson 
3687a84428aSAlex Williamson 		mutex_unlock(&kvm->irqfds.resampler_lock);
3697a84428aSAlex Williamson 	}
3707a84428aSAlex Williamson 
371721eecbfSGregory Haskins 	/*
372721eecbfSGregory Haskins 	 * Install our own custom wake-up handling so we are notified via
373721eecbfSGregory Haskins 	 * a callback whenever someone signals the underlying eventfd
374721eecbfSGregory Haskins 	 */
375721eecbfSGregory Haskins 	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
376721eecbfSGregory Haskins 	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
377721eecbfSGregory Haskins 
378f1d1c309SMichael S. Tsirkin 	spin_lock_irq(&kvm->irqfds.lock);
379f1d1c309SMichael S. Tsirkin 
380f1d1c309SMichael S. Tsirkin 	ret = 0;
381f1d1c309SMichael S. Tsirkin 	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
382f1d1c309SMichael S. Tsirkin 		if (irqfd->eventfd != tmp->eventfd)
383f1d1c309SMichael S. Tsirkin 			continue;
384f1d1c309SMichael S. Tsirkin 		/* This fd is used for another irq already. */
385f1d1c309SMichael S. Tsirkin 		ret = -EBUSY;
386f1d1c309SMichael S. Tsirkin 		spin_unlock_irq(&kvm->irqfds.lock);
387f1d1c309SMichael S. Tsirkin 		goto fail;
388f1d1c309SMichael S. Tsirkin 	}
389f1d1c309SMichael S. Tsirkin 
3909957c86dSPaul Mackerras 	idx = srcu_read_lock(&kvm->irq_srcu);
3919957c86dSPaul Mackerras 	irqfd_update(kvm, irqfd);
392bd2b53b2SMichael S. Tsirkin 
393721eecbfSGregory Haskins 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
394721eecbfSGregory Haskins 
395684a0b71SCornelia Huck 	spin_unlock_irq(&kvm->irqfds.lock);
396684a0b71SCornelia Huck 
397721eecbfSGregory Haskins 	/*
398721eecbfSGregory Haskins 	 * Check if there was an event already pending on the eventfd
399721eecbfSGregory Haskins 	 * before we registered, and trigger it as if we didn't miss it.
400721eecbfSGregory Haskins 	 */
4019965ed17SChristoph Hellwig 	events = vfs_poll(f.file, &irqfd->pt);
402684a0b71SCornelia Huck 
403a9a08845SLinus Torvalds 	if (events & EPOLLIN)
404721eecbfSGregory Haskins 		schedule_work(&irqfd->inject);
405721eecbfSGregory Haskins 
4069016cfb5SEric Auger #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
40714717e20SAlex Williamson 	if (kvm_arch_has_irq_bypass()) {
4089016cfb5SEric Auger 		irqfd->consumer.token = (void *)irqfd->eventfd;
4099016cfb5SEric Auger 		irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
4109016cfb5SEric Auger 		irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
4119016cfb5SEric Auger 		irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
4129016cfb5SEric Auger 		irqfd->consumer.start = kvm_arch_irq_bypass_start;
4139016cfb5SEric Auger 		ret = irq_bypass_register_consumer(&irqfd->consumer);
4149016cfb5SEric Auger 		if (ret)
4159016cfb5SEric Auger 			pr_info("irq bypass consumer (token %p) registration fails: %d\n",
4169016cfb5SEric Auger 				irqfd->consumer.token, ret);
41714717e20SAlex Williamson 	}
4189016cfb5SEric Auger #endif
419721eecbfSGregory Haskins 
420b5020a8eSLan Tianyu 	srcu_read_unlock(&kvm->irq_srcu, idx);
4219432a317SPaolo Bonzini 
4229432a317SPaolo Bonzini 	/*
4239432a317SPaolo Bonzini 	 * do not drop the file until the irqfd is fully initialized, otherwise
4249432a317SPaolo Bonzini 	 * we might race against the EPOLLHUP
4259432a317SPaolo Bonzini 	 */
4269432a317SPaolo Bonzini 	fdput(f);
427721eecbfSGregory Haskins 	return 0;
428721eecbfSGregory Haskins 
429721eecbfSGregory Haskins fail:
4307a84428aSAlex Williamson 	if (irqfd->resampler)
4317a84428aSAlex Williamson 		irqfd_resampler_shutdown(irqfd);
4327a84428aSAlex Williamson 
4337a84428aSAlex Williamson 	if (resamplefd && !IS_ERR(resamplefd))
4347a84428aSAlex Williamson 		eventfd_ctx_put(resamplefd);
4357a84428aSAlex Williamson 
436721eecbfSGregory Haskins 	if (eventfd && !IS_ERR(eventfd))
437721eecbfSGregory Haskins 		eventfd_ctx_put(eventfd);
438721eecbfSGregory Haskins 
439cffe78d9SAl Viro 	fdput(f);
440721eecbfSGregory Haskins 
441cffe78d9SAl Viro out:
442721eecbfSGregory Haskins 	kfree(irqfd);
443721eecbfSGregory Haskins 	return ret;
444721eecbfSGregory Haskins }
445c77dcacbSPaolo Bonzini 
446c77dcacbSPaolo Bonzini bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
447c77dcacbSPaolo Bonzini {
448c77dcacbSPaolo Bonzini 	struct kvm_irq_ack_notifier *kian;
449c77dcacbSPaolo Bonzini 	int gsi, idx;
450c77dcacbSPaolo Bonzini 
451c77dcacbSPaolo Bonzini 	idx = srcu_read_lock(&kvm->irq_srcu);
452c77dcacbSPaolo Bonzini 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
453c77dcacbSPaolo Bonzini 	if (gsi != -1)
454c77dcacbSPaolo Bonzini 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
455c77dcacbSPaolo Bonzini 					 link)
456c77dcacbSPaolo Bonzini 			if (kian->gsi == gsi) {
457c77dcacbSPaolo Bonzini 				srcu_read_unlock(&kvm->irq_srcu, idx);
458c77dcacbSPaolo Bonzini 				return true;
459c77dcacbSPaolo Bonzini 			}
460c77dcacbSPaolo Bonzini 
461c77dcacbSPaolo Bonzini 	srcu_read_unlock(&kvm->irq_srcu, idx);
462c77dcacbSPaolo Bonzini 
463c77dcacbSPaolo Bonzini 	return false;
464c77dcacbSPaolo Bonzini }
465c77dcacbSPaolo Bonzini EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
466c77dcacbSPaolo Bonzini 
467ba1aefcdSAndrey Smetanin void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
468c77dcacbSPaolo Bonzini {
469c77dcacbSPaolo Bonzini 	struct kvm_irq_ack_notifier *kian;
470ba1aefcdSAndrey Smetanin 
471ba1aefcdSAndrey Smetanin 	hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
472ba1aefcdSAndrey Smetanin 				 link)
473ba1aefcdSAndrey Smetanin 		if (kian->gsi == gsi)
474ba1aefcdSAndrey Smetanin 			kian->irq_acked(kian);
475ba1aefcdSAndrey Smetanin }
476ba1aefcdSAndrey Smetanin 
477ba1aefcdSAndrey Smetanin void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
478ba1aefcdSAndrey Smetanin {
479c77dcacbSPaolo Bonzini 	int gsi, idx;
480c77dcacbSPaolo Bonzini 
481c77dcacbSPaolo Bonzini 	trace_kvm_ack_irq(irqchip, pin);
482c77dcacbSPaolo Bonzini 
483c77dcacbSPaolo Bonzini 	idx = srcu_read_lock(&kvm->irq_srcu);
484c77dcacbSPaolo Bonzini 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
485c77dcacbSPaolo Bonzini 	if (gsi != -1)
486ba1aefcdSAndrey Smetanin 		kvm_notify_acked_gsi(kvm, gsi);
487c77dcacbSPaolo Bonzini 	srcu_read_unlock(&kvm->irq_srcu, idx);
488c77dcacbSPaolo Bonzini }
489c77dcacbSPaolo Bonzini 
490c77dcacbSPaolo Bonzini void kvm_register_irq_ack_notifier(struct kvm *kvm,
491c77dcacbSPaolo Bonzini 				   struct kvm_irq_ack_notifier *kian)
492c77dcacbSPaolo Bonzini {
493c77dcacbSPaolo Bonzini 	mutex_lock(&kvm->irq_lock);
494c77dcacbSPaolo Bonzini 	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
495c77dcacbSPaolo Bonzini 	mutex_unlock(&kvm->irq_lock);
496993225adSDavid Hildenbrand 	kvm_arch_post_irq_ack_notifier_list_update(kvm);
497c77dcacbSPaolo Bonzini }
498c77dcacbSPaolo Bonzini 
499c77dcacbSPaolo Bonzini void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
500c77dcacbSPaolo Bonzini 				    struct kvm_irq_ack_notifier *kian)
501c77dcacbSPaolo Bonzini {
502c77dcacbSPaolo Bonzini 	mutex_lock(&kvm->irq_lock);
503c77dcacbSPaolo Bonzini 	hlist_del_init_rcu(&kian->link);
504c77dcacbSPaolo Bonzini 	mutex_unlock(&kvm->irq_lock);
505c77dcacbSPaolo Bonzini 	synchronize_srcu(&kvm->irq_srcu);
506993225adSDavid Hildenbrand 	kvm_arch_post_irq_ack_notifier_list_update(kvm);
507c77dcacbSPaolo Bonzini }
508914daba8SAlexander Graf #endif
509721eecbfSGregory Haskins 
510721eecbfSGregory Haskins void
511d34e6b17SGregory Haskins kvm_eventfd_init(struct kvm *kvm)
512721eecbfSGregory Haskins {
513297e2105SPaul Mackerras #ifdef CONFIG_HAVE_KVM_IRQFD
514721eecbfSGregory Haskins 	spin_lock_init(&kvm->irqfds.lock);
515721eecbfSGregory Haskins 	INIT_LIST_HEAD(&kvm->irqfds.items);
5167a84428aSAlex Williamson 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
5177a84428aSAlex Williamson 	mutex_init(&kvm->irqfds.resampler_lock);
518914daba8SAlexander Graf #endif
519d34e6b17SGregory Haskins 	INIT_LIST_HEAD(&kvm->ioeventfds);
520721eecbfSGregory Haskins }
521721eecbfSGregory Haskins 
522297e2105SPaul Mackerras #ifdef CONFIG_HAVE_KVM_IRQFD
523721eecbfSGregory Haskins /*
524721eecbfSGregory Haskins  * shutdown any irqfd's that match fd+gsi
525721eecbfSGregory Haskins  */
526721eecbfSGregory Haskins static int
527d4db2935SAlex Williamson kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
528721eecbfSGregory Haskins {
529166c9775SEric Auger 	struct kvm_kernel_irqfd *irqfd, *tmp;
530721eecbfSGregory Haskins 	struct eventfd_ctx *eventfd;
531721eecbfSGregory Haskins 
532d4db2935SAlex Williamson 	eventfd = eventfd_ctx_fdget(args->fd);
533721eecbfSGregory Haskins 	if (IS_ERR(eventfd))
534721eecbfSGregory Haskins 		return PTR_ERR(eventfd);
535721eecbfSGregory Haskins 
536721eecbfSGregory Haskins 	spin_lock_irq(&kvm->irqfds.lock);
537721eecbfSGregory Haskins 
538721eecbfSGregory Haskins 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
539d4db2935SAlex Williamson 		if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
540bd2b53b2SMichael S. Tsirkin 			/*
54156f89f36SPaul Mackerras 			 * This clearing of irq_entry.type is needed for when
542c8ce057eSMichael S. Tsirkin 			 * another thread calls kvm_irq_routing_update before
543c8ce057eSMichael S. Tsirkin 			 * we flush workqueue below (we synchronize with
544c8ce057eSMichael S. Tsirkin 			 * kvm_irq_routing_update using irqfds.lock).
545bd2b53b2SMichael S. Tsirkin 			 */
54656f89f36SPaul Mackerras 			write_seqcount_begin(&irqfd->irq_entry_sc);
54756f89f36SPaul Mackerras 			irqfd->irq_entry.type = 0;
54856f89f36SPaul Mackerras 			write_seqcount_end(&irqfd->irq_entry_sc);
549721eecbfSGregory Haskins 			irqfd_deactivate(irqfd);
550721eecbfSGregory Haskins 		}
551bd2b53b2SMichael S. Tsirkin 	}
552721eecbfSGregory Haskins 
553721eecbfSGregory Haskins 	spin_unlock_irq(&kvm->irqfds.lock);
554721eecbfSGregory Haskins 	eventfd_ctx_put(eventfd);
555721eecbfSGregory Haskins 
556721eecbfSGregory Haskins 	/*
557721eecbfSGregory Haskins 	 * Block until we know all outstanding shutdown jobs have completed
558721eecbfSGregory Haskins 	 * so that we guarantee there will not be any more interrupts on this
559721eecbfSGregory Haskins 	 * gsi once this deassign function returns.
560721eecbfSGregory Haskins 	 */
56136343f6eSPaolo Bonzini 	flush_workqueue(irqfd_cleanup_wq);
562721eecbfSGregory Haskins 
563721eecbfSGregory Haskins 	return 0;
564721eecbfSGregory Haskins }
565721eecbfSGregory Haskins 
566721eecbfSGregory Haskins int
567d4db2935SAlex Williamson kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
568721eecbfSGregory Haskins {
5697a84428aSAlex Williamson 	if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
570326cf033SAlex Williamson 		return -EINVAL;
571326cf033SAlex Williamson 
572d4db2935SAlex Williamson 	if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
573d4db2935SAlex Williamson 		return kvm_irqfd_deassign(kvm, args);
574721eecbfSGregory Haskins 
575d4db2935SAlex Williamson 	return kvm_irqfd_assign(kvm, args);
576721eecbfSGregory Haskins }
577721eecbfSGregory Haskins 
578721eecbfSGregory Haskins /*
579721eecbfSGregory Haskins  * This function is called as the kvm VM fd is being released. Shutdown all
580721eecbfSGregory Haskins  * irqfds that still remain open
581721eecbfSGregory Haskins  */
582721eecbfSGregory Haskins void
583721eecbfSGregory Haskins kvm_irqfd_release(struct kvm *kvm)
584721eecbfSGregory Haskins {
585166c9775SEric Auger 	struct kvm_kernel_irqfd *irqfd, *tmp;
586721eecbfSGregory Haskins 
587721eecbfSGregory Haskins 	spin_lock_irq(&kvm->irqfds.lock);
588721eecbfSGregory Haskins 
589721eecbfSGregory Haskins 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
590721eecbfSGregory Haskins 		irqfd_deactivate(irqfd);
591721eecbfSGregory Haskins 
592721eecbfSGregory Haskins 	spin_unlock_irq(&kvm->irqfds.lock);
593721eecbfSGregory Haskins 
594721eecbfSGregory Haskins 	/*
595721eecbfSGregory Haskins 	 * Block until we know all outstanding shutdown jobs have completed
596721eecbfSGregory Haskins 	 * since we do not take a kvm* reference.
597721eecbfSGregory Haskins 	 */
59836343f6eSPaolo Bonzini 	flush_workqueue(irqfd_cleanup_wq);
599721eecbfSGregory Haskins 
600721eecbfSGregory Haskins }
601721eecbfSGregory Haskins 
602721eecbfSGregory Haskins /*
6039957c86dSPaul Mackerras  * Take note of a change in irq routing.
604719d93cdSChristian Borntraeger  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
605bd2b53b2SMichael S. Tsirkin  */
6069957c86dSPaul Mackerras void kvm_irq_routing_update(struct kvm *kvm)
607bd2b53b2SMichael S. Tsirkin {
608166c9775SEric Auger 	struct kvm_kernel_irqfd *irqfd;
609bd2b53b2SMichael S. Tsirkin 
610bd2b53b2SMichael S. Tsirkin 	spin_lock_irq(&kvm->irqfds.lock);
611bd2b53b2SMichael S. Tsirkin 
612f70c20aaSFeng Wu 	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
6139957c86dSPaul Mackerras 		irqfd_update(kvm, irqfd);
614bd2b53b2SMichael S. Tsirkin 
615f70c20aaSFeng Wu #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
616f70c20aaSFeng Wu 		if (irqfd->producer) {
617f70c20aaSFeng Wu 			int ret = kvm_arch_update_irqfd_routing(
618f70c20aaSFeng Wu 					irqfd->kvm, irqfd->producer->irq,
619f70c20aaSFeng Wu 					irqfd->gsi, 1);
620f70c20aaSFeng Wu 			WARN_ON(ret);
621f70c20aaSFeng Wu 		}
622f70c20aaSFeng Wu #endif
623f70c20aaSFeng Wu 	}
624f70c20aaSFeng Wu 
625bd2b53b2SMichael S. Tsirkin 	spin_unlock_irq(&kvm->irqfds.lock);
626bd2b53b2SMichael S. Tsirkin }
627bd2b53b2SMichael S. Tsirkin 
62836343f6eSPaolo Bonzini /*
62936343f6eSPaolo Bonzini  * create a host-wide workqueue for issuing deferred shutdown requests
63036343f6eSPaolo Bonzini  * aggregated from all vm* instances. We need our own isolated
63136343f6eSPaolo Bonzini  * queue to ease flushing work items when a VM exits.
63236343f6eSPaolo Bonzini  */
63336343f6eSPaolo Bonzini int kvm_irqfd_init(void)
63436343f6eSPaolo Bonzini {
63536343f6eSPaolo Bonzini 	irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
63636343f6eSPaolo Bonzini 	if (!irqfd_cleanup_wq)
63736343f6eSPaolo Bonzini 		return -ENOMEM;
63836343f6eSPaolo Bonzini 
63936343f6eSPaolo Bonzini 	return 0;
64036343f6eSPaolo Bonzini }
64136343f6eSPaolo Bonzini 
642a0f155e9SCornelia Huck void kvm_irqfd_exit(void)
643721eecbfSGregory Haskins {
64436343f6eSPaolo Bonzini 	destroy_workqueue(irqfd_cleanup_wq);
645721eecbfSGregory Haskins }
646914daba8SAlexander Graf #endif
647d34e6b17SGregory Haskins 
648d34e6b17SGregory Haskins /*
649d34e6b17SGregory Haskins  * --------------------------------------------------------------------
650d34e6b17SGregory Haskins  * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
651d34e6b17SGregory Haskins  *
652d34e6b17SGregory Haskins  * userspace can register a PIO/MMIO address with an eventfd for receiving
653d34e6b17SGregory Haskins  * notification when the memory has been touched.
654d34e6b17SGregory Haskins  * --------------------------------------------------------------------
655d34e6b17SGregory Haskins  */
656d34e6b17SGregory Haskins 
657d34e6b17SGregory Haskins struct _ioeventfd {
658d34e6b17SGregory Haskins 	struct list_head     list;
659d34e6b17SGregory Haskins 	u64                  addr;
660d34e6b17SGregory Haskins 	int                  length;
661d34e6b17SGregory Haskins 	struct eventfd_ctx  *eventfd;
662d34e6b17SGregory Haskins 	u64                  datamatch;
663d34e6b17SGregory Haskins 	struct kvm_io_device dev;
66405e07f9bSMichael S. Tsirkin 	u8                   bus_idx;
665d34e6b17SGregory Haskins 	bool                 wildcard;
666d34e6b17SGregory Haskins };
667d34e6b17SGregory Haskins 
668d34e6b17SGregory Haskins static inline struct _ioeventfd *
669d34e6b17SGregory Haskins to_ioeventfd(struct kvm_io_device *dev)
670d34e6b17SGregory Haskins {
671d34e6b17SGregory Haskins 	return container_of(dev, struct _ioeventfd, dev);
672d34e6b17SGregory Haskins }
673d34e6b17SGregory Haskins 
674d34e6b17SGregory Haskins static void
675d34e6b17SGregory Haskins ioeventfd_release(struct _ioeventfd *p)
676d34e6b17SGregory Haskins {
677d34e6b17SGregory Haskins 	eventfd_ctx_put(p->eventfd);
678d34e6b17SGregory Haskins 	list_del(&p->list);
679d34e6b17SGregory Haskins 	kfree(p);
680d34e6b17SGregory Haskins }
681d34e6b17SGregory Haskins 
682d34e6b17SGregory Haskins static bool
683d34e6b17SGregory Haskins ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
684d34e6b17SGregory Haskins {
685d34e6b17SGregory Haskins 	u64 _val;
686d34e6b17SGregory Haskins 
687f848a5a8SMichael S. Tsirkin 	if (addr != p->addr)
688f848a5a8SMichael S. Tsirkin 		/* address must be precise for a hit */
689f848a5a8SMichael S. Tsirkin 		return false;
690f848a5a8SMichael S. Tsirkin 
691f848a5a8SMichael S. Tsirkin 	if (!p->length)
692f848a5a8SMichael S. Tsirkin 		/* length = 0 means only look at the address, so always a hit */
693f848a5a8SMichael S. Tsirkin 		return true;
694f848a5a8SMichael S. Tsirkin 
695f848a5a8SMichael S. Tsirkin 	if (len != p->length)
696d34e6b17SGregory Haskins 		/* address-range must be precise for a hit */
697d34e6b17SGregory Haskins 		return false;
698d34e6b17SGregory Haskins 
699d34e6b17SGregory Haskins 	if (p->wildcard)
700d34e6b17SGregory Haskins 		/* all else equal, wildcard is always a hit */
701d34e6b17SGregory Haskins 		return true;
702d34e6b17SGregory Haskins 
703d34e6b17SGregory Haskins 	/* otherwise, we have to actually compare the data */
704d34e6b17SGregory Haskins 
705d34e6b17SGregory Haskins 	BUG_ON(!IS_ALIGNED((unsigned long)val, len));
706d34e6b17SGregory Haskins 
707d34e6b17SGregory Haskins 	switch (len) {
708d34e6b17SGregory Haskins 	case 1:
709d34e6b17SGregory Haskins 		_val = *(u8 *)val;
710d34e6b17SGregory Haskins 		break;
711d34e6b17SGregory Haskins 	case 2:
712d34e6b17SGregory Haskins 		_val = *(u16 *)val;
713d34e6b17SGregory Haskins 		break;
714d34e6b17SGregory Haskins 	case 4:
715d34e6b17SGregory Haskins 		_val = *(u32 *)val;
716d34e6b17SGregory Haskins 		break;
717d34e6b17SGregory Haskins 	case 8:
718d34e6b17SGregory Haskins 		_val = *(u64 *)val;
719d34e6b17SGregory Haskins 		break;
720d34e6b17SGregory Haskins 	default:
721d34e6b17SGregory Haskins 		return false;
722d34e6b17SGregory Haskins 	}
723d34e6b17SGregory Haskins 
724c4e115f0SJason Yan 	return _val == p->datamatch;
725d34e6b17SGregory Haskins }
726d34e6b17SGregory Haskins 
727d34e6b17SGregory Haskins /* MMIO/PIO writes trigger an event if the addr/val match */
728d34e6b17SGregory Haskins static int
729e32edf4fSNikolay Nikolaev ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
730e32edf4fSNikolay Nikolaev 		int len, const void *val)
731d34e6b17SGregory Haskins {
732d34e6b17SGregory Haskins 	struct _ioeventfd *p = to_ioeventfd(this);
733d34e6b17SGregory Haskins 
734d34e6b17SGregory Haskins 	if (!ioeventfd_in_range(p, addr, len, val))
735d34e6b17SGregory Haskins 		return -EOPNOTSUPP;
736d34e6b17SGregory Haskins 
737d34e6b17SGregory Haskins 	eventfd_signal(p->eventfd, 1);
738d34e6b17SGregory Haskins 	return 0;
739d34e6b17SGregory Haskins }
740d34e6b17SGregory Haskins 
741d34e6b17SGregory Haskins /*
742d34e6b17SGregory Haskins  * This function is called as KVM is completely shutting down.  We do not
743d34e6b17SGregory Haskins  * need to worry about locking just nuke anything we have as quickly as possible
744d34e6b17SGregory Haskins  */
745d34e6b17SGregory Haskins static void
746d34e6b17SGregory Haskins ioeventfd_destructor(struct kvm_io_device *this)
747d34e6b17SGregory Haskins {
748d34e6b17SGregory Haskins 	struct _ioeventfd *p = to_ioeventfd(this);
749d34e6b17SGregory Haskins 
750d34e6b17SGregory Haskins 	ioeventfd_release(p);
751d34e6b17SGregory Haskins }
752d34e6b17SGregory Haskins 
753d34e6b17SGregory Haskins static const struct kvm_io_device_ops ioeventfd_ops = {
754d34e6b17SGregory Haskins 	.write      = ioeventfd_write,
755d34e6b17SGregory Haskins 	.destructor = ioeventfd_destructor,
756d34e6b17SGregory Haskins };
757d34e6b17SGregory Haskins 
758d34e6b17SGregory Haskins /* assumes kvm->slots_lock held */
759d34e6b17SGregory Haskins static bool
760d34e6b17SGregory Haskins ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
761d34e6b17SGregory Haskins {
762d34e6b17SGregory Haskins 	struct _ioeventfd *_p;
763d34e6b17SGregory Haskins 
764d34e6b17SGregory Haskins 	list_for_each_entry(_p, &kvm->ioeventfds, list)
76505e07f9bSMichael S. Tsirkin 		if (_p->bus_idx == p->bus_idx &&
766f848a5a8SMichael S. Tsirkin 		    _p->addr == p->addr &&
767f848a5a8SMichael S. Tsirkin 		    (!_p->length || !p->length ||
768f848a5a8SMichael S. Tsirkin 		     (_p->length == p->length &&
769d34e6b17SGregory Haskins 		      (_p->wildcard || p->wildcard ||
770f848a5a8SMichael S. Tsirkin 		       _p->datamatch == p->datamatch))))
771d34e6b17SGregory Haskins 			return true;
772d34e6b17SGregory Haskins 
773d34e6b17SGregory Haskins 	return false;
774d34e6b17SGregory Haskins }
775d34e6b17SGregory Haskins 
7762b83451bSCornelia Huck static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
7772b83451bSCornelia Huck {
7782b83451bSCornelia Huck 	if (flags & KVM_IOEVENTFD_FLAG_PIO)
7792b83451bSCornelia Huck 		return KVM_PIO_BUS;
7802b83451bSCornelia Huck 	if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
7812b83451bSCornelia Huck 		return KVM_VIRTIO_CCW_NOTIFY_BUS;
7822b83451bSCornelia Huck 	return KVM_MMIO_BUS;
7832b83451bSCornelia Huck }
7842b83451bSCornelia Huck 
78585da11caSJason Wang static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
78685da11caSJason Wang 				enum kvm_bus bus_idx,
78785da11caSJason Wang 				struct kvm_ioeventfd *args)
788d34e6b17SGregory Haskins {
78985da11caSJason Wang 
790d34e6b17SGregory Haskins 	struct eventfd_ctx *eventfd;
79185da11caSJason Wang 	struct _ioeventfd *p;
792d34e6b17SGregory Haskins 	int ret;
793d34e6b17SGregory Haskins 
794d34e6b17SGregory Haskins 	eventfd = eventfd_ctx_fdget(args->fd);
795d34e6b17SGregory Haskins 	if (IS_ERR(eventfd))
796d34e6b17SGregory Haskins 		return PTR_ERR(eventfd);
797d34e6b17SGregory Haskins 
798b12ce36aSBen Gardon 	p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
799d34e6b17SGregory Haskins 	if (!p) {
800d34e6b17SGregory Haskins 		ret = -ENOMEM;
801d34e6b17SGregory Haskins 		goto fail;
802d34e6b17SGregory Haskins 	}
803d34e6b17SGregory Haskins 
804d34e6b17SGregory Haskins 	INIT_LIST_HEAD(&p->list);
805d34e6b17SGregory Haskins 	p->addr    = args->addr;
80605e07f9bSMichael S. Tsirkin 	p->bus_idx = bus_idx;
807d34e6b17SGregory Haskins 	p->length  = args->len;
808d34e6b17SGregory Haskins 	p->eventfd = eventfd;
809d34e6b17SGregory Haskins 
810d34e6b17SGregory Haskins 	/* The datamatch feature is optional, otherwise this is a wildcard */
811d34e6b17SGregory Haskins 	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
812d34e6b17SGregory Haskins 		p->datamatch = args->datamatch;
813d34e6b17SGregory Haskins 	else
814d34e6b17SGregory Haskins 		p->wildcard = true;
815d34e6b17SGregory Haskins 
81679fac95eSMarcelo Tosatti 	mutex_lock(&kvm->slots_lock);
817d34e6b17SGregory Haskins 
81825985edcSLucas De Marchi 	/* Verify that there isn't a match already */
819d34e6b17SGregory Haskins 	if (ioeventfd_check_collision(kvm, p)) {
820d34e6b17SGregory Haskins 		ret = -EEXIST;
821d34e6b17SGregory Haskins 		goto unlock_fail;
822d34e6b17SGregory Haskins 	}
823d34e6b17SGregory Haskins 
824d34e6b17SGregory Haskins 	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
825d34e6b17SGregory Haskins 
826743eeb0bSSasha Levin 	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
827743eeb0bSSasha Levin 				      &p->dev);
828d34e6b17SGregory Haskins 	if (ret < 0)
829d34e6b17SGregory Haskins 		goto unlock_fail;
830d34e6b17SGregory Haskins 
8314a12f951SChristian Borntraeger 	kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
832d34e6b17SGregory Haskins 	list_add_tail(&p->list, &kvm->ioeventfds);
833d34e6b17SGregory Haskins 
83479fac95eSMarcelo Tosatti 	mutex_unlock(&kvm->slots_lock);
835d34e6b17SGregory Haskins 
836d34e6b17SGregory Haskins 	return 0;
837d34e6b17SGregory Haskins 
838d34e6b17SGregory Haskins unlock_fail:
83979fac95eSMarcelo Tosatti 	mutex_unlock(&kvm->slots_lock);
840d34e6b17SGregory Haskins 
841d34e6b17SGregory Haskins fail:
842d34e6b17SGregory Haskins 	kfree(p);
843d34e6b17SGregory Haskins 	eventfd_ctx_put(eventfd);
844d34e6b17SGregory Haskins 
845d34e6b17SGregory Haskins 	return ret;
846d34e6b17SGregory Haskins }
847d34e6b17SGregory Haskins 
848d34e6b17SGregory Haskins static int
84985da11caSJason Wang kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
85085da11caSJason Wang 			   struct kvm_ioeventfd *args)
851d34e6b17SGregory Haskins {
852d34e6b17SGregory Haskins 	struct _ioeventfd        *p, *tmp;
853d34e6b17SGregory Haskins 	struct eventfd_ctx       *eventfd;
8544a12f951SChristian Borntraeger 	struct kvm_io_bus	 *bus;
855d34e6b17SGregory Haskins 	int                       ret = -ENOENT;
856*2fc4f15dSYi Li 	bool                      wildcard;
857d34e6b17SGregory Haskins 
858d34e6b17SGregory Haskins 	eventfd = eventfd_ctx_fdget(args->fd);
859d34e6b17SGregory Haskins 	if (IS_ERR(eventfd))
860d34e6b17SGregory Haskins 		return PTR_ERR(eventfd);
861d34e6b17SGregory Haskins 
862*2fc4f15dSYi Li 	wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
863*2fc4f15dSYi Li 
86479fac95eSMarcelo Tosatti 	mutex_lock(&kvm->slots_lock);
865d34e6b17SGregory Haskins 
866d34e6b17SGregory Haskins 	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
867d34e6b17SGregory Haskins 
86805e07f9bSMichael S. Tsirkin 		if (p->bus_idx != bus_idx ||
86905e07f9bSMichael S. Tsirkin 		    p->eventfd != eventfd  ||
870d34e6b17SGregory Haskins 		    p->addr != args->addr  ||
871d34e6b17SGregory Haskins 		    p->length != args->len ||
872d34e6b17SGregory Haskins 		    p->wildcard != wildcard)
873d34e6b17SGregory Haskins 			continue;
874d34e6b17SGregory Haskins 
875d34e6b17SGregory Haskins 		if (!p->wildcard && p->datamatch != args->datamatch)
876d34e6b17SGregory Haskins 			continue;
877d34e6b17SGregory Haskins 
878e93f8a0fSMarcelo Tosatti 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
8794a12f951SChristian Borntraeger 		bus = kvm_get_bus(kvm, bus_idx);
8804a12f951SChristian Borntraeger 		if (bus)
8814a12f951SChristian Borntraeger 			bus->ioeventfd_count--;
882d34e6b17SGregory Haskins 		ioeventfd_release(p);
883d34e6b17SGregory Haskins 		ret = 0;
884d34e6b17SGregory Haskins 		break;
885d34e6b17SGregory Haskins 	}
886d34e6b17SGregory Haskins 
88779fac95eSMarcelo Tosatti 	mutex_unlock(&kvm->slots_lock);
888d34e6b17SGregory Haskins 
889d34e6b17SGregory Haskins 	eventfd_ctx_put(eventfd);
890d34e6b17SGregory Haskins 
891d34e6b17SGregory Haskins 	return ret;
892d34e6b17SGregory Haskins }
893d34e6b17SGregory Haskins 
89485da11caSJason Wang static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
89585da11caSJason Wang {
89685da11caSJason Wang 	enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
897eefd6b06SJason Wang 	int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
89885da11caSJason Wang 
899eefd6b06SJason Wang 	if (!args->len && bus_idx == KVM_MMIO_BUS)
900eefd6b06SJason Wang 		kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
901eefd6b06SJason Wang 
902eefd6b06SJason Wang 	return ret;
90385da11caSJason Wang }
90485da11caSJason Wang 
90585da11caSJason Wang static int
90685da11caSJason Wang kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
90785da11caSJason Wang {
90885da11caSJason Wang 	enum kvm_bus              bus_idx;
909eefd6b06SJason Wang 	int ret;
91085da11caSJason Wang 
91185da11caSJason Wang 	bus_idx = ioeventfd_bus_from_flags(args->flags);
91285da11caSJason Wang 	/* must be natural-word sized, or 0 to ignore length */
91385da11caSJason Wang 	switch (args->len) {
91485da11caSJason Wang 	case 0:
91585da11caSJason Wang 	case 1:
91685da11caSJason Wang 	case 2:
91785da11caSJason Wang 	case 4:
91885da11caSJason Wang 	case 8:
91985da11caSJason Wang 		break;
92085da11caSJason Wang 	default:
92185da11caSJason Wang 		return -EINVAL;
92285da11caSJason Wang 	}
92385da11caSJason Wang 
92485da11caSJason Wang 	/* check for range overflow */
92585da11caSJason Wang 	if (args->addr + args->len < args->addr)
92685da11caSJason Wang 		return -EINVAL;
92785da11caSJason Wang 
92885da11caSJason Wang 	/* check for extra flags that we don't understand */
92985da11caSJason Wang 	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
93085da11caSJason Wang 		return -EINVAL;
93185da11caSJason Wang 
93285da11caSJason Wang 	/* ioeventfd with no length can't be combined with DATAMATCH */
933e9ea5069SJason Wang 	if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
93485da11caSJason Wang 		return -EINVAL;
93585da11caSJason Wang 
936eefd6b06SJason Wang 	ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
937eefd6b06SJason Wang 	if (ret)
938eefd6b06SJason Wang 		goto fail;
939eefd6b06SJason Wang 
940eefd6b06SJason Wang 	/* When length is ignored, MMIO is also put on a separate bus, for
941eefd6b06SJason Wang 	 * faster lookups.
942eefd6b06SJason Wang 	 */
943eefd6b06SJason Wang 	if (!args->len && bus_idx == KVM_MMIO_BUS) {
944eefd6b06SJason Wang 		ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
945eefd6b06SJason Wang 		if (ret < 0)
946eefd6b06SJason Wang 			goto fast_fail;
947eefd6b06SJason Wang 	}
948eefd6b06SJason Wang 
949eefd6b06SJason Wang 	return 0;
950eefd6b06SJason Wang 
951eefd6b06SJason Wang fast_fail:
952eefd6b06SJason Wang 	kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
953eefd6b06SJason Wang fail:
954eefd6b06SJason Wang 	return ret;
95585da11caSJason Wang }
95685da11caSJason Wang 
957d34e6b17SGregory Haskins int
958d34e6b17SGregory Haskins kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
959d34e6b17SGregory Haskins {
960d34e6b17SGregory Haskins 	if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
961d34e6b17SGregory Haskins 		return kvm_deassign_ioeventfd(kvm, args);
962d34e6b17SGregory Haskins 
963d34e6b17SGregory Haskins 	return kvm_assign_ioeventfd(kvm, args);
964d34e6b17SGregory Haskins }
965