xref: /qemu/hw/i386/kvm/xen_evtchn.c (revision d2e9b78162e31b1eaf20f3a4f563da82da56908d)
1 /*
2  * QEMU Xen emulation: Event channel support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/log.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "monitor/hmp.h"
21 #include "qapi/error.h"
22 #include "qapi/qapi-commands-misc-i386.h"
23 #include "qobject/qdict.h"
24 #include "qom/object.h"
25 #include "exec/target_page.h"
26 #include "system/address-spaces.h"
27 #include "migration/vmstate.h"
28 #include "trace.h"
29 
30 #include "hw/sysbus.h"
31 #include "hw/xen/xen.h"
32 #include "hw/i386/x86.h"
33 #include "hw/i386/pc.h"
34 #include "hw/pci/pci.h"
35 #include "hw/pci/msi.h"
36 #include "hw/pci/msix.h"
37 #include "hw/irq.h"
38 #include "hw/xen/xen_backend_ops.h"
39 
40 #include "xen_evtchn.h"
41 #include "xen_overlay.h"
42 #include "xen_xenstore.h"
43 
44 #include "system/kvm.h"
45 #include "system/kvm_xen.h"
46 #include <linux/kvm.h>
47 #include <sys/eventfd.h>
48 
49 #include "hw/xen/interface/memory.h"
50 #include "hw/xen/interface/hvm/params.h"
51 
52 /* XX: For kvm_update_msi_routes_all() */
53 #include "target/i386/kvm/kvm_i386.h"
54 
55 #define TYPE_XEN_EVTCHN "xen-evtchn"
56 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
57 
58 typedef struct XenEvtchnPort {
59     uint32_t vcpu;      /* Xen/ACPI vcpu_id */
60     uint16_t type;      /* EVTCHNSTAT_xxxx */
61     union {
62         uint16_t val;  /* raw value for serialization etc. */
63         uint16_t pirq;
64         uint16_t virq;
65         struct {
66             uint16_t port:15;
67             uint16_t to_qemu:1; /* Only two targets; qemu or loopback */
68         } interdomain;
69     } u;
70 } XenEvtchnPort;
71 
72 /* 32-bit compatibility definitions, also used natively in 32-bit build */
73 struct compat_arch_vcpu_info {
74     unsigned int cr2;
75     unsigned int pad[5];
76 };
77 
78 struct compat_vcpu_info {
79     uint8_t evtchn_upcall_pending;
80     uint8_t evtchn_upcall_mask;
81     uint16_t pad;
82     uint32_t evtchn_pending_sel;
83     struct compat_arch_vcpu_info arch;
84     struct vcpu_time_info time;
85 }; /* 64 bytes (x86) */
86 
87 struct compat_arch_shared_info {
88     unsigned int max_pfn;
89     unsigned int pfn_to_mfn_frame_list_list;
90     unsigned int nmi_reason;
91     unsigned int p2m_cr3;
92     unsigned int p2m_vaddr;
93     unsigned int p2m_generation;
94     uint32_t wc_sec_hi;
95 };
96 
97 struct compat_shared_info {
98     struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
99     uint32_t evtchn_pending[32];
100     uint32_t evtchn_mask[32];
101     uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
102     uint32_t wc_sec;
103     uint32_t wc_nsec;
104     struct compat_arch_shared_info arch;
105 };
106 
107 #define COMPAT_EVTCHN_2L_NR_CHANNELS            1024
108 
109 /* Local private implementation of struct xenevtchn_handle */
110 struct xenevtchn_handle {
111     evtchn_port_t be_port;
112     evtchn_port_t guest_port; /* Or zero for unbound */
113     int fd;
114 };
115 
116 /*
117  * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
118  * insane enough to think about guest-transparent live migration from actual
119  * Xen to QEMU, and ensuring that we can convert/consume the stream.
120  */
121 #define IRQ_UNBOUND -1
122 #define IRQ_PT -2
123 #define IRQ_MSI_EMU -3
124 
125 
126 struct pirq_info {
127     int gsi;
128     uint16_t port;
129     PCIDevice *dev;
130     int vector;
131     bool is_msix;
132     bool is_masked;
133     bool is_translated;
134 };
135 
136 struct XenEvtchnState {
137     /*< private >*/
138     SysBusDevice busdev;
139     /*< public >*/
140 
141     uint64_t callback_param;
142     bool evtchn_in_kernel;
143     bool setting_callback_gsi;
144     int extern_gsi_level;
145     uint32_t callback_gsi;
146 
147     QEMUBH *gsi_bh;
148 
149     QemuMutex port_lock;
150     uint32_t nr_ports;
151     XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
152 
153     /* Connected to the system GSIs for raising callback as GSI / INTx */
154     unsigned int nr_callback_gsis;
155     qemu_irq *callback_gsis;
156 
157     struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
158 
159     uint32_t nr_pirqs;
160 
161     /* Bitmap of allocated PIRQs (serialized) */
162     uint16_t nr_pirq_inuse_words;
163     uint64_t *pirq_inuse_bitmap;
164 
165     /* GSI → PIRQ mapping (serialized) */
166     uint16_t gsi_pirq[IOAPIC_NUM_PINS];
167 
168     /* Per-GSI assertion state (serialized) */
169     uint32_t pirq_gsi_set;
170 
171     /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
172     struct pirq_info *pirq;
173 };
174 
175 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
176 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
177 
178 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
179 
180 struct XenEvtchnState *xen_evtchn_singleton;
181 
182 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
183 #define CALLBACK_VIA_TYPE_SHIFT 56
184 
185 static void unbind_backend_ports(XenEvtchnState *s);
186 
xen_evtchn_pre_load(void * opaque)187 static int xen_evtchn_pre_load(void *opaque)
188 {
189     XenEvtchnState *s = opaque;
190 
191     /* Unbind all the backend-side ports; they need to rebind */
192     unbind_backend_ports(s);
193 
194     /* It'll be leaked otherwise. */
195     g_free(s->pirq_inuse_bitmap);
196     s->pirq_inuse_bitmap = NULL;
197 
198     return 0;
199 }
200 
xen_evtchn_post_load(void * opaque,int version_id)201 static int xen_evtchn_post_load(void *opaque, int version_id)
202 {
203     XenEvtchnState *s = opaque;
204     uint32_t i;
205 
206     if (s->callback_param) {
207         xen_evtchn_set_callback_param(s->callback_param);
208     }
209 
210     /* Rebuild s->pirq[].port mapping */
211     for (i = 0; i < s->nr_ports; i++) {
212         XenEvtchnPort *p = &s->port_table[i];
213 
214         if (p->type == EVTCHNSTAT_pirq) {
215             assert(p->u.pirq);
216             assert(p->u.pirq < s->nr_pirqs);
217 
218             /*
219              * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
220              * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
221              * catches up with it.
222              */
223             s->pirq[p->u.pirq].gsi = IRQ_UNBOUND;
224             s->pirq[p->u.pirq].port = i;
225         }
226     }
227     /* Rebuild s->pirq[].gsi mapping */
228     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
229         if (s->gsi_pirq[i]) {
230             s->pirq[s->gsi_pirq[i]].gsi = i;
231         }
232     }
233     return 0;
234 }
235 
xen_evtchn_is_needed(void * opaque)236 static bool xen_evtchn_is_needed(void *opaque)
237 {
238     return xen_mode == XEN_EMULATE;
239 }
240 
241 static const VMStateDescription xen_evtchn_port_vmstate = {
242     .name = "xen_evtchn_port",
243     .version_id = 1,
244     .minimum_version_id = 1,
245     .fields = (const VMStateField[]) {
246         VMSTATE_UINT32(vcpu, XenEvtchnPort),
247         VMSTATE_UINT16(type, XenEvtchnPort),
248         VMSTATE_UINT16(u.val, XenEvtchnPort),
249         VMSTATE_END_OF_LIST()
250     }
251 };
252 
253 static const VMStateDescription xen_evtchn_vmstate = {
254     .name = "xen_evtchn",
255     .version_id = 1,
256     .minimum_version_id = 1,
257     .needed = xen_evtchn_is_needed,
258     .pre_load = xen_evtchn_pre_load,
259     .post_load = xen_evtchn_post_load,
260     .fields = (const VMStateField[]) {
261         VMSTATE_UINT64(callback_param, XenEvtchnState),
262         VMSTATE_UINT32(nr_ports, XenEvtchnState),
263         VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
264                                      xen_evtchn_port_vmstate, XenEvtchnPort),
265         VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
266         VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
267                                     nr_pirq_inuse_words, 0,
268                                     vmstate_info_uint64, uint64_t),
269         VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
270         VMSTATE_END_OF_LIST()
271     }
272 };
273 
xen_evtchn_class_init(ObjectClass * klass,const void * data)274 static void xen_evtchn_class_init(ObjectClass *klass, const void *data)
275 {
276     DeviceClass *dc = DEVICE_CLASS(klass);
277 
278     dc->vmsd = &xen_evtchn_vmstate;
279 }
280 
281 static const TypeInfo xen_evtchn_info = {
282     .name          = TYPE_XEN_EVTCHN,
283     .parent        = TYPE_SYS_BUS_DEVICE,
284     .instance_size = sizeof(XenEvtchnState),
285     .class_init    = xen_evtchn_class_init,
286 };
287 
288 static struct evtchn_backend_ops emu_evtchn_backend_ops = {
289     .open = xen_be_evtchn_open,
290     .bind_interdomain = xen_be_evtchn_bind_interdomain,
291     .unbind = xen_be_evtchn_unbind,
292     .close = xen_be_evtchn_close,
293     .get_fd = xen_be_evtchn_fd,
294     .notify = xen_be_evtchn_notify,
295     .unmask = xen_be_evtchn_unmask,
296     .pending = xen_be_evtchn_pending,
297 };
298 
gsi_assert_bh(void * opaque)299 static void gsi_assert_bh(void *opaque)
300 {
301     struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
302     if (vi) {
303         xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
304     }
305 }
306 
xen_evtchn_create(unsigned int nr_gsis,qemu_irq * system_gsis)307 void xen_evtchn_create(unsigned int nr_gsis, qemu_irq *system_gsis)
308 {
309     XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
310                                                         -1, NULL));
311     int i;
312 
313     xen_evtchn_singleton = s;
314 
315     qemu_mutex_init(&s->port_lock);
316     s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
317 
318     /*
319      * These are the *output* GSI from event channel support, for
320      * signalling CPU0's events via GSI or PCI INTx instead of the
321      * per-CPU vector. We create a *set* of irqs and connect one to
322      * each of the system GSIs which were passed in from the platform
323      * code, and then just trigger the right one as appropriate from
324      * xen_evtchn_set_callback_level().
325      */
326     s->nr_callback_gsis = nr_gsis;
327     s->callback_gsis = g_new0(qemu_irq, nr_gsis);
328     for (i = 0; i < nr_gsis; i++) {
329         sysbus_init_irq(SYS_BUS_DEVICE(s), &s->callback_gsis[i]);
330         sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
331     }
332 
333     /*
334      * The Xen scheme for encoding PIRQ# into an MSI message is not
335      * compatible with 32-bit MSI, as it puts the high bits of the
336      * PIRQ# into the high bits of the MSI message address, instead of
337      * using the Extended Destination ID in address bits 4-11 which
338      * perhaps would have been a better choice.
339      *
340      * To keep life simple, kvm_accel_instance_init() initialises the
341      * default to 256. which conveniently doesn't need to set anything
342      * outside the low 32 bits of the address. It can be increased by
343      * setting the xen-evtchn-max-pirq property.
344      */
345     s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
346 
347     s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
348     s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
349     s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
350 
351     /* Set event channel functions for backend drivers to use */
352     xen_evtchn_ops = &emu_evtchn_backend_ops;
353 }
354 
xen_evtchn_register_types(void)355 static void xen_evtchn_register_types(void)
356 {
357     type_register_static(&xen_evtchn_info);
358 }
359 
type_init(xen_evtchn_register_types)360 type_init(xen_evtchn_register_types)
361 
362 static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
363 {
364     PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
365     uint8_t pin = param & 3;
366     uint8_t devfn = (param >> 8) & 0xff;
367     uint16_t bus = (param >> 16) & 0xffff;
368     uint16_t domain = (param >> 32) & 0xffff;
369     PCIDevice *pdev;
370     PCIINTxRoute r;
371 
372     if (domain || !pcms) {
373         return 0;
374     }
375 
376     pdev = pci_find_device(pcms->pcibus, bus, devfn);
377     if (!pdev) {
378         return 0;
379     }
380 
381     r = pci_device_route_intx_to_irq(pdev, pin);
382     if (r.mode != PCI_INTX_ENABLED) {
383         return 0;
384     }
385 
386     /*
387      * Hm, can we be notified of INTX routing changes? Not without
388      * *owning* the device and being allowed to overwrite its own
389      * ->intx_routing_notifier, AFAICT. So let's not.
390      */
391     return r.irq;
392 }
393 
xen_evtchn_set_callback_level(int level)394 void xen_evtchn_set_callback_level(int level)
395 {
396     XenEvtchnState *s = xen_evtchn_singleton;
397     if (!s) {
398         return;
399     }
400 
401     /*
402      * We get to this function in a number of ways:
403      *
404      *  • From I/O context, via PV backend drivers sending a notification to
405      *    the guest.
406      *
407      *  • From guest vCPU context, via loopback interdomain event channels
408      *    (or theoretically even IPIs but guests don't use those with GSI
409      *    delivery because that's pointless. We don't want a malicious guest
410      *    to be able to trigger a deadlock though, so we can't rule it out.)
411      *
412      *  • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
413      *    configured.
414      *
415      *  • From guest vCPU context in the KVM exit handler, if the upcall
416      *    pending flag has been cleared and the GSI needs to be deasserted.
417      *
418      *  • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
419      *    been acked in the irqchip.
420      *
421      * Whichever context we come from if we aren't already holding the BQL
422      * then e can't take it now, as we may already hold s->port_lock. So
423      * trigger the BH to set the IRQ for us instead of doing it immediately.
424      *
425      * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
426      * will deliberately take the BQL because they want the change to take
427      * effect immediately. That just leaves interdomain loopback as the case
428      * which uses the BH.
429      */
430     if (!bql_locked()) {
431         qemu_bh_schedule(s->gsi_bh);
432         return;
433     }
434 
435     if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
436         /*
437          * Ugly, but since we hold the BQL we can set this flag so that
438          * xen_evtchn_set_gsi() can tell the difference between this code
439          * setting the GSI, and an external device (PCI INTx) doing so.
440          */
441         s->setting_callback_gsi = true;
442         /* Do not deassert the line if an external device is asserting it. */
443         qemu_set_irq(s->callback_gsis[s->callback_gsi],
444                      level || s->extern_gsi_level);
445         s->setting_callback_gsi = false;
446 
447         /*
448          * If the callback GSI is the only one asserted, ensure the status
449          * is polled for deassertion in kvm_arch_post_run().
450          */
451         if (level && !s->extern_gsi_level) {
452             kvm_xen_set_callback_asserted();
453         }
454     }
455 }
456 
xen_evtchn_set_callback_param(uint64_t param)457 int xen_evtchn_set_callback_param(uint64_t param)
458 {
459     XenEvtchnState *s = xen_evtchn_singleton;
460     struct kvm_xen_hvm_attr xa = {
461         .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
462         .u.vector = 0,
463     };
464     bool in_kernel = false;
465     uint32_t gsi = 0;
466     int type = param >> CALLBACK_VIA_TYPE_SHIFT;
467     int ret;
468 
469     if (!s) {
470         return -ENOTSUP;
471     }
472 
473     /*
474      * We need the BQL because set_callback_pci_intx() may call into PCI code,
475      * and because we may need to manipulate the old and new GSI levels.
476      */
477     assert(bql_locked());
478     qemu_mutex_lock(&s->port_lock);
479 
480     switch (type) {
481     case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
482         xa.u.vector = (uint8_t)param,
483 
484         ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
485         if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
486             in_kernel = true;
487         }
488         gsi = 0;
489         break;
490     }
491 
492     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
493         gsi = set_callback_pci_intx(s, param);
494         ret = gsi ? 0 : -EINVAL;
495         break;
496 
497     case HVM_PARAM_CALLBACK_TYPE_GSI:
498         gsi = (uint32_t)param;
499         ret = 0;
500         break;
501 
502     default:
503         /* Xen doesn't return error even if you set something bogus */
504         ret = 0;
505         break;
506     }
507 
508     /* If the guest has set a per-vCPU callback vector, prefer that. */
509     if (gsi && kvm_xen_has_vcpu_callback_vector()) {
510         in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
511         gsi = 0;
512     }
513 
514     if (!ret) {
515         /* If vector delivery was turned *off* then tell the kernel */
516         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
517             HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
518             kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
519         }
520         s->callback_param = param;
521         s->evtchn_in_kernel = in_kernel;
522 
523         if (gsi != s->callback_gsi) {
524             struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
525 
526             xen_evtchn_set_callback_level(0);
527             s->callback_gsi = gsi;
528 
529             if (gsi && vi && vi->evtchn_upcall_pending) {
530                 kvm_xen_inject_vcpu_callback_vector(0, type);
531             }
532         }
533     }
534 
535     qemu_mutex_unlock(&s->port_lock);
536 
537     return ret;
538 }
539 
inject_callback(XenEvtchnState * s,uint32_t vcpu)540 static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
541 {
542     int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
543 
544     kvm_xen_inject_vcpu_callback_vector(vcpu, type);
545 }
546 
deassign_kernel_port(evtchn_port_t port)547 static void deassign_kernel_port(evtchn_port_t port)
548 {
549     struct kvm_xen_hvm_attr ha;
550     int ret;
551 
552     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
553     ha.u.evtchn.send_port = port;
554     ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
555 
556     ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
557     if (ret) {
558         qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
559                       port, strerror(ret));
560     }
561 }
562 
assign_kernel_port(uint16_t type,evtchn_port_t port,uint32_t vcpu_id)563 static int assign_kernel_port(uint16_t type, evtchn_port_t port,
564                               uint32_t vcpu_id)
565 {
566     CPUState *cpu = qemu_get_cpu(vcpu_id);
567     struct kvm_xen_hvm_attr ha;
568 
569     if (!cpu) {
570         return -ENOENT;
571     }
572 
573     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
574     ha.u.evtchn.send_port = port;
575     ha.u.evtchn.type = type;
576     ha.u.evtchn.flags = 0;
577     ha.u.evtchn.deliver.port.port = port;
578     ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
579     ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
580 
581     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
582 }
583 
assign_kernel_eventfd(uint16_t type,evtchn_port_t port,int fd)584 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
585 {
586     struct kvm_xen_hvm_attr ha;
587 
588     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
589     ha.u.evtchn.send_port = port;
590     ha.u.evtchn.type = type;
591     ha.u.evtchn.flags = 0;
592     ha.u.evtchn.deliver.eventfd.port = 0;
593     ha.u.evtchn.deliver.eventfd.fd = fd;
594 
595     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
596 }
597 
valid_port(evtchn_port_t port)598 static bool valid_port(evtchn_port_t port)
599 {
600     if (!port) {
601         return false;
602     }
603 
604     if (xen_is_long_mode()) {
605         return port < EVTCHN_2L_NR_CHANNELS;
606     } else {
607         return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
608     }
609 }
610 
valid_vcpu(uint32_t vcpu)611 static bool valid_vcpu(uint32_t vcpu)
612 {
613     return !!qemu_get_cpu(vcpu);
614 }
615 
unbind_backend_ports(XenEvtchnState * s)616 static void unbind_backend_ports(XenEvtchnState *s)
617 {
618     XenEvtchnPort *p;
619     int i;
620 
621     for (i = 1; i < s->nr_ports; i++) {
622         p = &s->port_table[i];
623         if (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu) {
624             evtchn_port_t be_port = p->u.interdomain.port;
625 
626             if (s->be_handles[be_port]) {
627                 /* This part will be overwritten on the load anyway. */
628                 p->type = EVTCHNSTAT_unbound;
629                 p->u.interdomain.port = 0;
630 
631                 /* Leave the backend port open and unbound too. */
632                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
633                     deassign_kernel_port(i);
634                 }
635                 s->be_handles[be_port]->guest_port = 0;
636             }
637         }
638     }
639 }
640 
xen_evtchn_status_op(struct evtchn_status * status)641 int xen_evtchn_status_op(struct evtchn_status *status)
642 {
643     XenEvtchnState *s = xen_evtchn_singleton;
644     XenEvtchnPort *p;
645 
646     if (!s) {
647         return -ENOTSUP;
648     }
649 
650     if (status->dom != DOMID_SELF && status->dom != xen_domid) {
651         return -ESRCH;
652     }
653 
654     if (!valid_port(status->port)) {
655         return -EINVAL;
656     }
657 
658     qemu_mutex_lock(&s->port_lock);
659 
660     p = &s->port_table[status->port];
661 
662     status->status = p->type;
663     status->vcpu = p->vcpu;
664 
665     switch (p->type) {
666     case EVTCHNSTAT_unbound:
667         status->u.unbound.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
668                                                          : xen_domid;
669         break;
670 
671     case EVTCHNSTAT_interdomain:
672         status->u.interdomain.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
673                                                              : xen_domid;
674         status->u.interdomain.port = p->u.interdomain.port;
675         break;
676 
677     case EVTCHNSTAT_pirq:
678         status->u.pirq = p->u.pirq;
679         break;
680 
681     case EVTCHNSTAT_virq:
682         status->u.virq = p->u.virq;
683         break;
684     }
685 
686     qemu_mutex_unlock(&s->port_lock);
687     return 0;
688 }
689 
690 /*
691  * Never thought I'd hear myself say this, but C++ templates would be
692  * kind of nice here.
693  *
694  * template<class T> static int do_unmask_port(T *shinfo, ...);
695  */
do_unmask_port_lm(XenEvtchnState * s,evtchn_port_t port,bool do_unmask,struct shared_info * shinfo,struct vcpu_info * vcpu_info)696 static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
697                              bool do_unmask, struct shared_info *shinfo,
698                              struct vcpu_info *vcpu_info)
699 {
700     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
701     typeof(shinfo->evtchn_pending[0]) mask;
702     int idx = port / bits_per_word;
703     int offset = port % bits_per_word;
704 
705     mask = 1UL << offset;
706 
707     if (idx >= bits_per_word) {
708         return -EINVAL;
709     }
710 
711     if (do_unmask) {
712         /*
713          * If this is a true unmask operation, clear the mask bit. If
714          * it was already unmasked, we have nothing further to do.
715          */
716         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
717             return 0;
718         }
719     } else {
720         /*
721          * This is a pseudo-unmask for affinity changes. We don't
722          * change the mask bit, and if it's *masked* we have nothing
723          * else to do.
724          */
725         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
726             return 0;
727         }
728     }
729 
730     /* If the event was not pending, we're done. */
731     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
732         return 0;
733     }
734 
735     /* Now on to the vcpu_info evtchn_pending_sel index... */
736     mask = 1UL << idx;
737 
738     /* If a port in this word was already pending for this vCPU, all done. */
739     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
740         return 0;
741     }
742 
743     /* Set evtchn_upcall_pending for this vCPU */
744     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
745         return 0;
746     }
747 
748     inject_callback(s, s->port_table[port].vcpu);
749 
750     return 0;
751 }
752 
do_unmask_port_compat(XenEvtchnState * s,evtchn_port_t port,bool do_unmask,struct compat_shared_info * shinfo,struct compat_vcpu_info * vcpu_info)753 static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
754                                  bool do_unmask,
755                                  struct compat_shared_info *shinfo,
756                                  struct compat_vcpu_info *vcpu_info)
757 {
758     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
759     typeof(shinfo->evtchn_pending[0]) mask;
760     int idx = port / bits_per_word;
761     int offset = port % bits_per_word;
762 
763     mask = 1UL << offset;
764 
765     if (idx >= bits_per_word) {
766         return -EINVAL;
767     }
768 
769     if (do_unmask) {
770         /*
771          * If this is a true unmask operation, clear the mask bit. If
772          * it was already unmasked, we have nothing further to do.
773          */
774         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
775             return 0;
776         }
777     } else {
778         /*
779          * This is a pseudo-unmask for affinity changes. We don't
780          * change the mask bit, and if it's *masked* we have nothing
781          * else to do.
782          */
783         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
784             return 0;
785         }
786     }
787 
788     /* If the event was not pending, we're done. */
789     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
790         return 0;
791     }
792 
793     /* Now on to the vcpu_info evtchn_pending_sel index... */
794     mask = 1UL << idx;
795 
796     /* If a port in this word was already pending for this vCPU, all done. */
797     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
798         return 0;
799     }
800 
801     /* Set evtchn_upcall_pending for this vCPU */
802     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
803         return 0;
804     }
805 
806     inject_callback(s, s->port_table[port].vcpu);
807 
808     return 0;
809 }
810 
unmask_port(XenEvtchnState * s,evtchn_port_t port,bool do_unmask)811 static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
812 {
813     void *vcpu_info, *shinfo;
814 
815     if (s->port_table[port].type == EVTCHNSTAT_closed) {
816         return -EINVAL;
817     }
818 
819     shinfo = xen_overlay_get_shinfo_ptr();
820     if (!shinfo) {
821         return -ENOTSUP;
822     }
823 
824     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
825     if (!vcpu_info) {
826         return -EINVAL;
827     }
828 
829     if (xen_is_long_mode()) {
830         return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
831     } else {
832         return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
833     }
834 }
835 
do_set_port_lm(XenEvtchnState * s,evtchn_port_t port,struct shared_info * shinfo,struct vcpu_info * vcpu_info)836 static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
837                           struct shared_info *shinfo,
838                           struct vcpu_info *vcpu_info)
839 {
840     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
841     typeof(shinfo->evtchn_pending[0]) mask;
842     int idx = port / bits_per_word;
843     int offset = port % bits_per_word;
844 
845     mask = 1UL << offset;
846 
847     if (idx >= bits_per_word) {
848         return -EINVAL;
849     }
850 
851     /* Update the pending bit itself. If it was already set, we're done. */
852     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
853         return 0;
854     }
855 
856     /* Check if it's masked. */
857     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
858         return 0;
859     }
860 
861     /* Now on to the vcpu_info evtchn_pending_sel index... */
862     mask = 1UL << idx;
863 
864     /* If a port in this word was already pending for this vCPU, all done. */
865     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
866         return 0;
867     }
868 
869     /* Set evtchn_upcall_pending for this vCPU */
870     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
871         return 0;
872     }
873 
874     inject_callback(s, s->port_table[port].vcpu);
875 
876     return 0;
877 }
878 
do_set_port_compat(XenEvtchnState * s,evtchn_port_t port,struct compat_shared_info * shinfo,struct compat_vcpu_info * vcpu_info)879 static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
880                               struct compat_shared_info *shinfo,
881                               struct compat_vcpu_info *vcpu_info)
882 {
883     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
884     typeof(shinfo->evtchn_pending[0]) mask;
885     int idx = port / bits_per_word;
886     int offset = port % bits_per_word;
887 
888     mask = 1UL << offset;
889 
890     if (idx >= bits_per_word) {
891         return -EINVAL;
892     }
893 
894     /* Update the pending bit itself. If it was already set, we're done. */
895     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
896         return 0;
897     }
898 
899     /* Check if it's masked. */
900     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
901         return 0;
902     }
903 
904     /* Now on to the vcpu_info evtchn_pending_sel index... */
905     mask = 1UL << idx;
906 
907     /* If a port in this word was already pending for this vCPU, all done. */
908     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
909         return 0;
910     }
911 
912     /* Set evtchn_upcall_pending for this vCPU */
913     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
914         return 0;
915     }
916 
917     inject_callback(s, s->port_table[port].vcpu);
918 
919     return 0;
920 }
921 
set_port_pending(XenEvtchnState * s,evtchn_port_t port)922 static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
923 {
924     void *vcpu_info, *shinfo;
925 
926     if (s->port_table[port].type == EVTCHNSTAT_closed) {
927         return -EINVAL;
928     }
929 
930     if (s->evtchn_in_kernel) {
931         XenEvtchnPort *p = &s->port_table[port];
932         CPUState *cpu = qemu_get_cpu(p->vcpu);
933         struct kvm_irq_routing_xen_evtchn evt;
934 
935         if (!cpu) {
936             return 0;
937         }
938 
939         evt.port = port;
940         evt.vcpu = kvm_arch_vcpu_id(cpu);
941         evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
942 
943         return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
944     }
945 
946     shinfo = xen_overlay_get_shinfo_ptr();
947     if (!shinfo) {
948         return -ENOTSUP;
949     }
950 
951     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
952     if (!vcpu_info) {
953         return -EINVAL;
954     }
955 
956     if (xen_is_long_mode()) {
957         return do_set_port_lm(s, port, shinfo, vcpu_info);
958     } else {
959         return do_set_port_compat(s, port, shinfo, vcpu_info);
960     }
961 }
962 
clear_port_pending(XenEvtchnState * s,evtchn_port_t port)963 static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
964 {
965     void *p = xen_overlay_get_shinfo_ptr();
966 
967     if (!p) {
968         return -ENOTSUP;
969     }
970 
971     if (xen_is_long_mode()) {
972         struct shared_info *shinfo = p;
973         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
974         typeof(shinfo->evtchn_pending[0]) mask;
975         int idx = port / bits_per_word;
976         int offset = port % bits_per_word;
977 
978         mask = 1UL << offset;
979 
980         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
981     } else {
982         struct compat_shared_info *shinfo = p;
983         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
984         typeof(shinfo->evtchn_pending[0]) mask;
985         int idx = port / bits_per_word;
986         int offset = port % bits_per_word;
987 
988         mask = 1UL << offset;
989 
990         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
991     }
992     return 0;
993 }
994 
free_port(XenEvtchnState * s,evtchn_port_t port)995 static void free_port(XenEvtchnState *s, evtchn_port_t port)
996 {
997     s->port_table[port].type = EVTCHNSTAT_closed;
998     s->port_table[port].u.val = 0;
999     s->port_table[port].vcpu = 0;
1000 
1001     if (s->nr_ports == port + 1) {
1002         do {
1003             s->nr_ports--;
1004         } while (s->nr_ports &&
1005                  s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
1006     }
1007 
1008     /* Clear pending event to avoid unexpected behavior on re-bind. */
1009     clear_port_pending(s, port);
1010 }
1011 
allocate_port(XenEvtchnState * s,uint32_t vcpu,uint16_t type,uint16_t val,evtchn_port_t * port)1012 static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
1013                          uint16_t val, evtchn_port_t *port)
1014 {
1015     evtchn_port_t p = 1;
1016 
1017     for (p = 1; valid_port(p); p++) {
1018         if (s->port_table[p].type == EVTCHNSTAT_closed) {
1019             s->port_table[p].vcpu = vcpu;
1020             s->port_table[p].type = type;
1021             s->port_table[p].u.val = val;
1022 
1023             *port = p;
1024 
1025             if (s->nr_ports < p + 1) {
1026                 s->nr_ports = p + 1;
1027             }
1028 
1029             return 0;
1030         }
1031     }
1032     return -ENOSPC;
1033 }
1034 
virq_is_global(uint32_t virq)1035 static bool virq_is_global(uint32_t virq)
1036 {
1037     switch (virq) {
1038     case VIRQ_TIMER:
1039     case VIRQ_DEBUG:
1040     case VIRQ_XENOPROF:
1041     case VIRQ_XENPMU:
1042         return false;
1043 
1044     default:
1045         return true;
1046     }
1047 }
1048 
close_port(XenEvtchnState * s,evtchn_port_t port,bool * flush_kvm_routes)1049 static int close_port(XenEvtchnState *s, evtchn_port_t port,
1050                       bool *flush_kvm_routes)
1051 {
1052     XenEvtchnPort *p = &s->port_table[port];
1053 
1054     /* Because it *might* be a PIRQ port */
1055     assert(bql_locked());
1056 
1057     switch (p->type) {
1058     case EVTCHNSTAT_closed:
1059         return -ENOENT;
1060 
1061     case EVTCHNSTAT_pirq:
1062         s->pirq[p->u.pirq].port = 0;
1063         if (s->pirq[p->u.pirq].is_translated) {
1064             *flush_kvm_routes = true;
1065         }
1066         break;
1067 
1068     case EVTCHNSTAT_virq:
1069         kvm_xen_set_vcpu_virq(virq_is_global(p->u.virq) ? 0 : p->vcpu,
1070                               p->u.virq, 0);
1071         break;
1072 
1073     case EVTCHNSTAT_ipi:
1074         if (s->evtchn_in_kernel) {
1075             deassign_kernel_port(port);
1076         }
1077         break;
1078 
1079     case EVTCHNSTAT_interdomain:
1080         if (p->u.interdomain.to_qemu) {
1081             uint16_t be_port = p->u.interdomain.port;
1082             struct xenevtchn_handle *xc = s->be_handles[be_port];
1083             if (xc) {
1084                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1085                     deassign_kernel_port(port);
1086                 }
1087                 xc->guest_port = 0;
1088             }
1089         } else {
1090             /* Loopback interdomain */
1091             XenEvtchnPort *rp = &s->port_table[p->u.interdomain.port];
1092             if (!valid_port(p->u.interdomain.port) ||
1093                 rp->u.interdomain.port != port ||
1094                 rp->type != EVTCHNSTAT_interdomain) {
1095                 error_report("Inconsistent state for interdomain unbind");
1096             } else {
1097                 /* Set the other end back to unbound */
1098                 rp->type = EVTCHNSTAT_unbound;
1099                 rp->u.interdomain.port = 0;
1100             }
1101         }
1102         break;
1103 
1104     default:
1105         break;
1106     }
1107 
1108     free_port(s, port);
1109     return 0;
1110 }
1111 
xen_evtchn_soft_reset(void)1112 int xen_evtchn_soft_reset(void)
1113 {
1114     XenEvtchnState *s = xen_evtchn_singleton;
1115     bool flush_kvm_routes = false;
1116     int i;
1117 
1118     if (!s) {
1119         return -ENOTSUP;
1120     }
1121 
1122     assert(bql_locked());
1123 
1124     qemu_mutex_lock(&s->port_lock);
1125 
1126     for (i = 0; i < s->nr_ports; i++) {
1127         close_port(s, i, &flush_kvm_routes);
1128     }
1129 
1130     qemu_mutex_unlock(&s->port_lock);
1131 
1132     if (flush_kvm_routes) {
1133         kvm_update_msi_routes_all(NULL, true, 0, 0);
1134     }
1135 
1136     return 0;
1137 }
1138 
xen_evtchn_reset_op(struct evtchn_reset * reset)1139 int xen_evtchn_reset_op(struct evtchn_reset *reset)
1140 {
1141     if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
1142         return -ESRCH;
1143     }
1144 
1145     BQL_LOCK_GUARD();
1146     return xen_evtchn_soft_reset();
1147 }
1148 
xen_evtchn_close_op(struct evtchn_close * close)1149 int xen_evtchn_close_op(struct evtchn_close *close)
1150 {
1151     XenEvtchnState *s = xen_evtchn_singleton;
1152     bool flush_kvm_routes = false;
1153     int ret;
1154 
1155     if (!s) {
1156         return -ENOTSUP;
1157     }
1158 
1159     if (!valid_port(close->port)) {
1160         return -EINVAL;
1161     }
1162 
1163     BQL_LOCK_GUARD();
1164     qemu_mutex_lock(&s->port_lock);
1165 
1166     ret = close_port(s, close->port, &flush_kvm_routes);
1167 
1168     qemu_mutex_unlock(&s->port_lock);
1169 
1170     if (flush_kvm_routes) {
1171         kvm_update_msi_routes_all(NULL, true, 0, 0);
1172     }
1173 
1174     return ret;
1175 }
1176 
xen_evtchn_unmask_op(struct evtchn_unmask * unmask)1177 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
1178 {
1179     XenEvtchnState *s = xen_evtchn_singleton;
1180     int ret;
1181 
1182     if (!s) {
1183         return -ENOTSUP;
1184     }
1185 
1186     if (!valid_port(unmask->port)) {
1187         return -EINVAL;
1188     }
1189 
1190     qemu_mutex_lock(&s->port_lock);
1191 
1192     ret = unmask_port(s, unmask->port, true);
1193 
1194     qemu_mutex_unlock(&s->port_lock);
1195 
1196     return ret;
1197 }
1198 
xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu * vcpu)1199 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
1200 {
1201     XenEvtchnState *s = xen_evtchn_singleton;
1202     XenEvtchnPort *p;
1203     int ret = -EINVAL;
1204 
1205     if (!s) {
1206         return -ENOTSUP;
1207     }
1208 
1209     if (!valid_port(vcpu->port)) {
1210         return -EINVAL;
1211     }
1212 
1213     if (!valid_vcpu(vcpu->vcpu)) {
1214         return -ENOENT;
1215     }
1216 
1217     qemu_mutex_lock(&s->port_lock);
1218 
1219     p = &s->port_table[vcpu->port];
1220 
1221     if (p->type == EVTCHNSTAT_interdomain ||
1222         p->type == EVTCHNSTAT_unbound ||
1223         p->type == EVTCHNSTAT_pirq ||
1224         (p->type == EVTCHNSTAT_virq && virq_is_global(p->u.virq))) {
1225         /*
1226          * unmask_port() with do_unmask==false will just raise the event
1227          * on the new vCPU if the port was already pending.
1228          */
1229         p->vcpu = vcpu->vcpu;
1230         unmask_port(s, vcpu->port, false);
1231         ret = 0;
1232     }
1233 
1234     qemu_mutex_unlock(&s->port_lock);
1235 
1236     return ret;
1237 }
1238 
xen_evtchn_bind_virq_op(struct evtchn_bind_virq * virq)1239 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
1240 {
1241     XenEvtchnState *s = xen_evtchn_singleton;
1242     int ret;
1243 
1244     if (!s) {
1245         return -ENOTSUP;
1246     }
1247 
1248     if (virq->virq >= NR_VIRQS) {
1249         return -EINVAL;
1250     }
1251 
1252     /* Global VIRQ must be allocated on vCPU0 first */
1253     if (virq_is_global(virq->virq) && virq->vcpu != 0) {
1254         return -EINVAL;
1255     }
1256 
1257     if (!valid_vcpu(virq->vcpu)) {
1258         return -ENOENT;
1259     }
1260 
1261     qemu_mutex_lock(&s->port_lock);
1262 
1263     ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
1264                         &virq->port);
1265     if (!ret) {
1266         ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
1267         if (ret) {
1268             free_port(s, virq->port);
1269         }
1270     }
1271 
1272     qemu_mutex_unlock(&s->port_lock);
1273 
1274     return ret;
1275 }
1276 
xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq * pirq)1277 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
1278 {
1279     XenEvtchnState *s = xen_evtchn_singleton;
1280     int ret;
1281 
1282     if (!s) {
1283         return -ENOTSUP;
1284     }
1285 
1286     if (pirq->pirq >= s->nr_pirqs) {
1287         return -EINVAL;
1288     }
1289 
1290     BQL_LOCK_GUARD();
1291 
1292     if (s->pirq[pirq->pirq].port) {
1293         return -EBUSY;
1294     }
1295 
1296     qemu_mutex_lock(&s->port_lock);
1297 
1298     ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
1299                         &pirq->port);
1300     if (ret) {
1301         qemu_mutex_unlock(&s->port_lock);
1302         return ret;
1303     }
1304 
1305     s->pirq[pirq->pirq].port = pirq->port;
1306     trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
1307 
1308     qemu_mutex_unlock(&s->port_lock);
1309 
1310     /*
1311      * Need to do the unmask outside port_lock because it may call
1312      * back into the MSI translate function.
1313      */
1314     if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
1315         if (s->pirq[pirq->pirq].is_masked) {
1316             PCIDevice *dev = s->pirq[pirq->pirq].dev;
1317             int vector = s->pirq[pirq->pirq].vector;
1318             char *dev_path = qdev_get_dev_path(DEVICE(dev));
1319 
1320             trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
1321             g_free(dev_path);
1322 
1323             if (s->pirq[pirq->pirq].is_msix) {
1324                 msix_set_mask(dev, vector, false);
1325             } else {
1326                 msi_set_mask(dev, vector, false, NULL);
1327             }
1328         } else if (s->pirq[pirq->pirq].is_translated) {
1329             /*
1330              * If KVM had attempted to translate this one before, make it try
1331              * again. If we unmasked, then the notifier on the MSI(-X) vector
1332              * will already have had the same effect.
1333              */
1334             kvm_update_msi_routes_all(NULL, true, 0, 0);
1335         }
1336     }
1337 
1338     return ret;
1339 }
1340 
xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi * ipi)1341 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
1342 {
1343     XenEvtchnState *s = xen_evtchn_singleton;
1344     int ret;
1345 
1346     if (!s) {
1347         return -ENOTSUP;
1348     }
1349 
1350     if (!valid_vcpu(ipi->vcpu)) {
1351         return -ENOENT;
1352     }
1353 
1354     qemu_mutex_lock(&s->port_lock);
1355 
1356     ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
1357     if (!ret && s->evtchn_in_kernel) {
1358         assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
1359     }
1360 
1361     qemu_mutex_unlock(&s->port_lock);
1362 
1363     return ret;
1364 }
1365 
xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain * interdomain)1366 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
1367 {
1368     XenEvtchnState *s = xen_evtchn_singleton;
1369     int ret;
1370 
1371     if (!s) {
1372         return -ENOTSUP;
1373     }
1374 
1375     if (interdomain->remote_dom != DOMID_QEMU &&
1376         interdomain->remote_dom != DOMID_SELF &&
1377         interdomain->remote_dom != xen_domid) {
1378         return -ESRCH;
1379     }
1380 
1381     if (!valid_port(interdomain->remote_port)) {
1382         return -EINVAL;
1383     }
1384 
1385     qemu_mutex_lock(&s->port_lock);
1386 
1387     /* The newly allocated port starts out as unbound */
1388     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &interdomain->local_port);
1389 
1390     if (ret) {
1391         goto out;
1392     }
1393 
1394     if (interdomain->remote_dom == DOMID_QEMU) {
1395         struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
1396         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1397 
1398         if (!xc) {
1399             ret = -ENOENT;
1400             goto out_free_port;
1401         }
1402 
1403         if (xc->guest_port) {
1404             ret = -EBUSY;
1405             goto out_free_port;
1406         }
1407 
1408         assert(xc->be_port == interdomain->remote_port);
1409         xc->guest_port = interdomain->local_port;
1410         if (kvm_xen_has_cap(EVTCHN_SEND)) {
1411             assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
1412         }
1413         lp->type = EVTCHNSTAT_interdomain;
1414         lp->u.interdomain.to_qemu = 1;
1415         lp->u.interdomain.port = interdomain->remote_port;
1416         ret = 0;
1417     } else {
1418         /* Loopback */
1419         XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
1420         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1421 
1422         /*
1423          * The 'remote' port for loopback must be an unbound port allocated
1424          * for communication with the local domain, and must *not* be the
1425          * port that was just allocated for the local end.
1426          */
1427         if (interdomain->local_port != interdomain->remote_port &&
1428             rp->type == EVTCHNSTAT_unbound && !rp->u.interdomain.to_qemu) {
1429 
1430             rp->type = EVTCHNSTAT_interdomain;
1431             rp->u.interdomain.port = interdomain->local_port;
1432 
1433             lp->type = EVTCHNSTAT_interdomain;
1434             lp->u.interdomain.port = interdomain->remote_port;
1435         } else {
1436             ret = -EINVAL;
1437         }
1438     }
1439 
1440  out_free_port:
1441     if (ret) {
1442         free_port(s, interdomain->local_port);
1443     }
1444  out:
1445     qemu_mutex_unlock(&s->port_lock);
1446 
1447     return ret;
1448 
1449 }
xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound * alloc)1450 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
1451 {
1452     XenEvtchnState *s = xen_evtchn_singleton;
1453     int ret;
1454 
1455     if (!s) {
1456         return -ENOTSUP;
1457     }
1458 
1459     if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
1460         return -ESRCH;
1461     }
1462 
1463     if (alloc->remote_dom != DOMID_QEMU &&
1464         alloc->remote_dom != DOMID_SELF &&
1465         alloc->remote_dom != xen_domid) {
1466         return -EPERM;
1467     }
1468 
1469     qemu_mutex_lock(&s->port_lock);
1470 
1471     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &alloc->port);
1472 
1473     if (!ret && alloc->remote_dom == DOMID_QEMU) {
1474         XenEvtchnPort *p = &s->port_table[alloc->port];
1475         p->u.interdomain.to_qemu = 1;
1476     }
1477 
1478     qemu_mutex_unlock(&s->port_lock);
1479 
1480     return ret;
1481 }
1482 
xen_evtchn_send_op(struct evtchn_send * send)1483 int xen_evtchn_send_op(struct evtchn_send *send)
1484 {
1485     XenEvtchnState *s = xen_evtchn_singleton;
1486     XenEvtchnPort *p;
1487     int ret = 0;
1488 
1489     if (!s) {
1490         return -ENOTSUP;
1491     }
1492 
1493     if (!valid_port(send->port)) {
1494         return -EINVAL;
1495     }
1496 
1497     qemu_mutex_lock(&s->port_lock);
1498 
1499     p = &s->port_table[send->port];
1500 
1501     switch (p->type) {
1502     case EVTCHNSTAT_interdomain:
1503         if (p->u.interdomain.to_qemu) {
1504             /*
1505              * This is an event from the guest to qemu itself, which is
1506              * serving as the driver domain.
1507              */
1508             uint16_t be_port = p->u.interdomain.port;
1509             struct xenevtchn_handle *xc = s->be_handles[be_port];
1510             if (xc) {
1511                 eventfd_write(xc->fd, 1);
1512                 ret = 0;
1513             } else {
1514                 ret = -ENOENT;
1515             }
1516         } else {
1517             /* Loopback interdomain ports; just a complex IPI */
1518             set_port_pending(s, p->u.interdomain.port);
1519         }
1520         break;
1521 
1522     case EVTCHNSTAT_ipi:
1523         set_port_pending(s, send->port);
1524         break;
1525 
1526     case EVTCHNSTAT_unbound:
1527         /* Xen will silently drop these */
1528         break;
1529 
1530     default:
1531         ret = -EINVAL;
1532         break;
1533     }
1534 
1535     qemu_mutex_unlock(&s->port_lock);
1536 
1537     return ret;
1538 }
1539 
xen_evtchn_set_port(uint16_t port)1540 int xen_evtchn_set_port(uint16_t port)
1541 {
1542     XenEvtchnState *s = xen_evtchn_singleton;
1543     XenEvtchnPort *p;
1544     int ret = -EINVAL;
1545 
1546     if (!s) {
1547         return -ENOTSUP;
1548     }
1549 
1550     if (!valid_port(port)) {
1551         return -EINVAL;
1552     }
1553 
1554     qemu_mutex_lock(&s->port_lock);
1555 
1556     p = &s->port_table[port];
1557 
1558     /* QEMU has no business sending to anything but these */
1559     if (p->type == EVTCHNSTAT_virq ||
1560         (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu)) {
1561         set_port_pending(s, port);
1562         ret = 0;
1563     }
1564 
1565     qemu_mutex_unlock(&s->port_lock);
1566 
1567     return ret;
1568 }
1569 
allocate_pirq(XenEvtchnState * s,int type,int gsi)1570 static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
1571 {
1572     uint16_t pirq;
1573 
1574     /*
1575      * Preserve the allocation strategy that Xen has. It looks like
1576      * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1577      * to GSIs (counting up from 16), and then we count backwards from
1578      * the top for MSIs or when the GSI space is exhausted.
1579      */
1580     if (type == MAP_PIRQ_TYPE_GSI) {
1581         for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
1582             if (pirq_inuse(s, pirq)) {
1583                 continue;
1584             }
1585 
1586             /* Found it */
1587             goto found;
1588         }
1589     }
1590     for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
1591         /* Skip whole words at a time when they're full */
1592         if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
1593             pirq &= ~63ULL;
1594             continue;
1595         }
1596         if (pirq_inuse(s, pirq)) {
1597             continue;
1598         }
1599 
1600         goto found;
1601     }
1602     return -ENOSPC;
1603 
1604  found:
1605     pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1606     if (gsi >= 0) {
1607         assert(gsi < IOAPIC_NUM_PINS);
1608         s->gsi_pirq[gsi] = pirq;
1609     }
1610     s->pirq[pirq].gsi = gsi;
1611     return pirq;
1612 }
1613 
xen_evtchn_set_gsi(int gsi,int * level)1614 bool xen_evtchn_set_gsi(int gsi, int *level)
1615 {
1616     XenEvtchnState *s = xen_evtchn_singleton;
1617     int pirq;
1618 
1619     assert(bql_locked());
1620 
1621     if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1622         return false;
1623     }
1624 
1625     /*
1626      * For the callback_gsi we need to implement a logical OR of the event
1627      * channel GSI and the external input (e.g. from PCI INTx), because
1628      * QEMU itself doesn't support shared level interrupts via demux or
1629      * resamplers.
1630      */
1631     if (gsi && gsi == s->callback_gsi) {
1632         /* Remember the external state of the GSI pin (e.g. from PCI INTx) */
1633         if (!s->setting_callback_gsi) {
1634             s->extern_gsi_level = *level;
1635 
1636             /*
1637              * Don't allow the external device to deassert the line if the
1638              * eveht channel GSI should still be asserted.
1639              */
1640             if (!s->extern_gsi_level) {
1641                 struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
1642                 if (vi && vi->evtchn_upcall_pending) {
1643                     /* Need to poll for deassertion */
1644                     kvm_xen_set_callback_asserted();
1645                     *level = 1;
1646                 }
1647             }
1648         }
1649 
1650         /*
1651          * The event channel GSI cannot be routed to PIRQ, as that would make
1652          * no sense. It could also deadlock on s->port_lock, if we proceed.
1653          * So bail out now.
1654          */
1655         return false;
1656     }
1657 
1658     QEMU_LOCK_GUARD(&s->port_lock);
1659 
1660     pirq = s->gsi_pirq[gsi];
1661     if (!pirq) {
1662         return false;
1663     }
1664 
1665     if (*level) {
1666         int port = s->pirq[pirq].port;
1667 
1668         s->pirq_gsi_set |= (1U << gsi);
1669         if (port) {
1670             set_port_pending(s, port);
1671         }
1672     } else {
1673         s->pirq_gsi_set &= ~(1U << gsi);
1674     }
1675     return true;
1676 }
1677 
msi_pirq_target(uint64_t addr,uint32_t data)1678 static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
1679 {
1680     /* The vector (in low 8 bits of data) must be zero */
1681     if (data & 0xff) {
1682         return 0;
1683     }
1684 
1685     uint32_t pirq = (addr & 0xff000) >> 12;
1686     pirq |= (addr >> 32) & 0xffffff00;
1687 
1688     return pirq;
1689 }
1690 
do_remove_pci_vector(XenEvtchnState * s,PCIDevice * dev,int vector,int except_pirq)1691 static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
1692                                  int except_pirq)
1693 {
1694     uint32_t pirq;
1695 
1696     for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
1697         /*
1698          * We could be cleverer here, but it isn't really a fast path, and
1699          * this trivial optimisation is enough to let us skip the big gap
1700          * in the middle a bit quicker (in terms of both loop iterations,
1701          * and cache lines).
1702          */
1703         if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
1704             pirq += 64;
1705             continue;
1706         }
1707         if (except_pirq && pirq == except_pirq) {
1708             continue;
1709         }
1710         if (s->pirq[pirq].dev != dev) {
1711             continue;
1712         }
1713         if (vector != -1 && s->pirq[pirq].vector != vector) {
1714             continue;
1715         }
1716 
1717         /* It could theoretically be bound to a port already, but that is OK. */
1718         s->pirq[pirq].dev = dev;
1719         s->pirq[pirq].gsi = IRQ_UNBOUND;
1720         s->pirq[pirq].is_msix = false;
1721         s->pirq[pirq].vector = 0;
1722         s->pirq[pirq].is_masked = false;
1723         s->pirq[pirq].is_translated = false;
1724     }
1725 }
1726 
xen_evtchn_remove_pci_device(PCIDevice * dev)1727 void xen_evtchn_remove_pci_device(PCIDevice *dev)
1728 {
1729     XenEvtchnState *s = xen_evtchn_singleton;
1730 
1731     if (!s) {
1732         return;
1733     }
1734 
1735     QEMU_LOCK_GUARD(&s->port_lock);
1736     do_remove_pci_vector(s, dev, -1, 0);
1737 }
1738 
xen_evtchn_snoop_msi(PCIDevice * dev,bool is_msix,unsigned int vector,uint64_t addr,uint32_t data,bool is_masked)1739 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
1740                           uint64_t addr, uint32_t data, bool is_masked)
1741 {
1742     XenEvtchnState *s = xen_evtchn_singleton;
1743     uint32_t pirq;
1744 
1745     if (!s) {
1746         return;
1747     }
1748 
1749     assert(bql_locked());
1750 
1751     pirq = msi_pirq_target(addr, data);
1752 
1753     /*
1754      * The PIRQ# must be sane, and there must be an allocated PIRQ in
1755      * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1756      */
1757     if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
1758         (s->pirq[pirq].gsi != IRQ_UNBOUND &&
1759          s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
1760         pirq = 0;
1761     }
1762 
1763     if (pirq) {
1764         s->pirq[pirq].dev = dev;
1765         s->pirq[pirq].gsi = IRQ_MSI_EMU;
1766         s->pirq[pirq].is_msix = is_msix;
1767         s->pirq[pirq].vector = vector;
1768         s->pirq[pirq].is_masked = is_masked;
1769     }
1770 
1771     /* Remove any (other) entries for this {device, vector} */
1772     do_remove_pci_vector(s, dev, vector, pirq);
1773 }
1774 
xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry * route,uint64_t address,uint32_t data)1775 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
1776                                   uint64_t address, uint32_t data)
1777 {
1778     XenEvtchnState *s = xen_evtchn_singleton;
1779     uint32_t pirq, port;
1780     CPUState *cpu;
1781 
1782     if (!s) {
1783         return 1; /* Not a PIRQ */
1784     }
1785 
1786     assert(bql_locked());
1787 
1788     pirq = msi_pirq_target(address, data);
1789     if (!pirq || pirq >= s->nr_pirqs) {
1790         return 1; /* Not a PIRQ */
1791     }
1792 
1793     if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
1794         return -ENOTSUP;
1795     }
1796 
1797     if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
1798         return -EINVAL;
1799     }
1800 
1801     /* Remember that KVM tried to translate this. It might need to try again. */
1802     s->pirq[pirq].is_translated = true;
1803 
1804     QEMU_LOCK_GUARD(&s->port_lock);
1805 
1806     port = s->pirq[pirq].port;
1807     if (!valid_port(port)) {
1808         return -EINVAL;
1809     }
1810 
1811     cpu = qemu_get_cpu(s->port_table[port].vcpu);
1812     if (!cpu) {
1813         return -EINVAL;
1814     }
1815 
1816     route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
1817     route->u.xen_evtchn.port = port;
1818     route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
1819     route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
1820 
1821     return 0; /* Handled */
1822 }
1823 
xen_evtchn_deliver_pirq_msi(uint64_t address,uint32_t data)1824 bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
1825 {
1826     XenEvtchnState *s = xen_evtchn_singleton;
1827     uint32_t pirq, port;
1828 
1829     if (!s) {
1830         return false;
1831     }
1832 
1833     assert(bql_locked());
1834 
1835     pirq = msi_pirq_target(address, data);
1836     if (!pirq || pirq >= s->nr_pirqs) {
1837         return false;
1838     }
1839 
1840     QEMU_LOCK_GUARD(&s->port_lock);
1841 
1842     port = s->pirq[pirq].port;
1843     if (!valid_port(port)) {
1844         return false;
1845     }
1846 
1847     set_port_pending(s, port);
1848     return true;
1849 }
1850 
xen_physdev_map_pirq(struct physdev_map_pirq * map)1851 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
1852 {
1853     XenEvtchnState *s = xen_evtchn_singleton;
1854     int pirq = map->pirq;
1855     int gsi = map->index;
1856 
1857     if (!s) {
1858         return -ENOTSUP;
1859     }
1860 
1861     BQL_LOCK_GUARD();
1862     QEMU_LOCK_GUARD(&s->port_lock);
1863 
1864     if (map->domid != DOMID_SELF && map->domid != xen_domid) {
1865         return -EPERM;
1866     }
1867     if (map->type != MAP_PIRQ_TYPE_GSI) {
1868         return -EINVAL;
1869     }
1870     if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1871         return -EINVAL;
1872     }
1873 
1874     if (pirq < 0) {
1875         pirq = allocate_pirq(s, map->type, gsi);
1876         if (pirq < 0) {
1877             return pirq;
1878         }
1879         map->pirq = pirq;
1880     } else if (pirq > s->nr_pirqs) {
1881         return -EINVAL;
1882     } else {
1883         /*
1884          * User specified a valid-looking PIRQ#. Allow it if it is
1885          * allocated and not yet bound, or if it is unallocated
1886          */
1887         if (pirq_inuse(s, pirq)) {
1888             if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
1889                 return -EBUSY;
1890             }
1891         } else {
1892             /* If it was unused, mark it used now. */
1893             pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1894         }
1895         /* Set the mapping in both directions. */
1896         s->pirq[pirq].gsi = gsi;
1897         s->gsi_pirq[gsi] = pirq;
1898     }
1899 
1900     trace_kvm_xen_map_pirq(pirq, gsi);
1901     return 0;
1902 }
1903 
xen_physdev_unmap_pirq(struct physdev_unmap_pirq * unmap)1904 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
1905 {
1906     XenEvtchnState *s = xen_evtchn_singleton;
1907     int pirq = unmap->pirq;
1908     int gsi;
1909 
1910     if (!s) {
1911         return -ENOTSUP;
1912     }
1913 
1914     if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
1915         return -EPERM;
1916     }
1917     if (pirq < 0 || pirq >= s->nr_pirqs) {
1918         return -EINVAL;
1919     }
1920 
1921     BQL_LOCK_GUARD();
1922     qemu_mutex_lock(&s->port_lock);
1923 
1924     if (!pirq_inuse(s, pirq)) {
1925         qemu_mutex_unlock(&s->port_lock);
1926         return -ENOENT;
1927     }
1928 
1929     gsi = s->pirq[pirq].gsi;
1930 
1931     /* We can only unmap GSI PIRQs */
1932     if (gsi < 0) {
1933         qemu_mutex_unlock(&s->port_lock);
1934         return -EINVAL;
1935     }
1936 
1937     s->gsi_pirq[gsi] = 0;
1938     s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
1939     pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
1940 
1941     trace_kvm_xen_unmap_pirq(pirq, gsi);
1942     qemu_mutex_unlock(&s->port_lock);
1943 
1944     if (gsi == IRQ_MSI_EMU) {
1945         kvm_update_msi_routes_all(NULL, true, 0, 0);
1946     }
1947 
1948     return 0;
1949 }
1950 
xen_physdev_eoi_pirq(struct physdev_eoi * eoi)1951 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
1952 {
1953     XenEvtchnState *s = xen_evtchn_singleton;
1954     int pirq = eoi->irq;
1955     int gsi;
1956 
1957     if (!s) {
1958         return -ENOTSUP;
1959     }
1960 
1961     BQL_LOCK_GUARD();
1962     QEMU_LOCK_GUARD(&s->port_lock);
1963 
1964     if (!pirq_inuse(s, pirq)) {
1965         return -ENOENT;
1966     }
1967 
1968     gsi = s->pirq[pirq].gsi;
1969     if (gsi < 0) {
1970         return -EINVAL;
1971     }
1972 
1973     /* Reassert a level IRQ if needed */
1974     if (s->pirq_gsi_set & (1U << gsi)) {
1975         int port = s->pirq[pirq].port;
1976         if (port) {
1977             set_port_pending(s, port);
1978         }
1979     }
1980 
1981     return 0;
1982 }
1983 
xen_physdev_query_pirq(struct physdev_irq_status_query * query)1984 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
1985 {
1986     XenEvtchnState *s = xen_evtchn_singleton;
1987     int pirq = query->irq;
1988 
1989     if (!s) {
1990         return -ENOTSUP;
1991     }
1992 
1993     BQL_LOCK_GUARD();
1994     QEMU_LOCK_GUARD(&s->port_lock);
1995 
1996     if (!pirq_inuse(s, pirq)) {
1997         return -ENOENT;
1998     }
1999 
2000     if (s->pirq[pirq].gsi >= 0) {
2001         query->flags = XENIRQSTAT_needs_eoi;
2002     } else {
2003         query->flags = 0;
2004     }
2005 
2006     return 0;
2007 }
2008 
xen_physdev_get_free_pirq(struct physdev_get_free_pirq * get)2009 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
2010 {
2011     XenEvtchnState *s = xen_evtchn_singleton;
2012     int pirq;
2013 
2014     if (!s) {
2015         return -ENOTSUP;
2016     }
2017 
2018     QEMU_LOCK_GUARD(&s->port_lock);
2019 
2020     pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
2021     if (pirq < 0) {
2022         return pirq;
2023     }
2024 
2025     get->pirq = pirq;
2026     trace_kvm_xen_get_free_pirq(pirq, get->type);
2027     return 0;
2028 }
2029 
xen_be_evtchn_open(void)2030 struct xenevtchn_handle *xen_be_evtchn_open(void)
2031 {
2032     struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
2033 
2034     xc->fd = eventfd(0, EFD_CLOEXEC);
2035     if (xc->fd < 0) {
2036         free(xc);
2037         return NULL;
2038     }
2039 
2040     return xc;
2041 }
2042 
find_be_port(XenEvtchnState * s,struct xenevtchn_handle * xc)2043 static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
2044 {
2045     int i;
2046 
2047     for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
2048         if (!s->be_handles[i]) {
2049             s->be_handles[i] = xc;
2050             xc->be_port = i;
2051             return i;
2052         }
2053     }
2054     return 0;
2055 }
2056 
xen_be_evtchn_bind_interdomain(struct xenevtchn_handle * xc,uint32_t domid,evtchn_port_t guest_port)2057 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
2058                                    evtchn_port_t guest_port)
2059 {
2060     XenEvtchnState *s = xen_evtchn_singleton;
2061     XenEvtchnPort *gp;
2062     uint16_t be_port = 0;
2063     int ret;
2064 
2065     if (!s) {
2066         return -ENOTSUP;
2067     }
2068 
2069     if (!xc) {
2070         return -EFAULT;
2071     }
2072 
2073     if (domid != xen_domid) {
2074         return -ESRCH;
2075     }
2076 
2077     if (!valid_port(guest_port)) {
2078         return -EINVAL;
2079     }
2080 
2081     qemu_mutex_lock(&s->port_lock);
2082 
2083     /* The guest has to have an unbound port waiting for us to bind */
2084     gp = &s->port_table[guest_port];
2085 
2086     switch (gp->type) {
2087     case EVTCHNSTAT_interdomain:
2088         /* Allow rebinding after migration, preserve port # if possible */
2089         be_port = gp->u.interdomain.port;
2090         assert(be_port != 0);
2091         if (!s->be_handles[be_port]) {
2092             s->be_handles[be_port] = xc;
2093             xc->guest_port = guest_port;
2094             ret = xc->be_port = be_port;
2095             if (kvm_xen_has_cap(EVTCHN_SEND)) {
2096                 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2097             }
2098             break;
2099         }
2100         /* fall through */
2101 
2102     case EVTCHNSTAT_unbound:
2103         be_port = find_be_port(s, xc);
2104         if (!be_port) {
2105             ret = -ENOSPC;
2106             goto out;
2107         }
2108 
2109         gp->type = EVTCHNSTAT_interdomain;
2110         gp->u.interdomain.to_qemu = 1;
2111         gp->u.interdomain.port = be_port;
2112         xc->guest_port = guest_port;
2113         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2114             assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2115         }
2116         ret = be_port;
2117         break;
2118 
2119     default:
2120         ret = -EINVAL;
2121         break;
2122     }
2123 
2124  out:
2125     qemu_mutex_unlock(&s->port_lock);
2126 
2127     return ret;
2128 }
2129 
xen_be_evtchn_unbind(struct xenevtchn_handle * xc,evtchn_port_t port)2130 int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
2131 {
2132     XenEvtchnState *s = xen_evtchn_singleton;
2133     int ret;
2134 
2135     if (!s) {
2136         return -ENOTSUP;
2137     }
2138 
2139     if (!xc) {
2140         return -EFAULT;
2141     }
2142 
2143     qemu_mutex_lock(&s->port_lock);
2144 
2145     if (port && port != xc->be_port) {
2146         ret = -EINVAL;
2147         goto out;
2148     }
2149 
2150     if (xc->guest_port) {
2151         XenEvtchnPort *gp = &s->port_table[xc->guest_port];
2152 
2153         /* This should never *not* be true */
2154         if (gp->type == EVTCHNSTAT_interdomain) {
2155             gp->type = EVTCHNSTAT_unbound;
2156             gp->u.interdomain.port = 0;
2157         }
2158 
2159         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2160             deassign_kernel_port(xc->guest_port);
2161         }
2162         xc->guest_port = 0;
2163     }
2164 
2165     s->be_handles[xc->be_port] = NULL;
2166     xc->be_port = 0;
2167     ret = 0;
2168  out:
2169     qemu_mutex_unlock(&s->port_lock);
2170     return ret;
2171 }
2172 
xen_be_evtchn_close(struct xenevtchn_handle * xc)2173 int xen_be_evtchn_close(struct xenevtchn_handle *xc)
2174 {
2175     if (!xc) {
2176         return -EFAULT;
2177     }
2178 
2179     xen_be_evtchn_unbind(xc, 0);
2180 
2181     close(xc->fd);
2182     free(xc);
2183     return 0;
2184 }
2185 
xen_be_evtchn_fd(struct xenevtchn_handle * xc)2186 int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
2187 {
2188     if (!xc) {
2189         return -1;
2190     }
2191     return xc->fd;
2192 }
2193 
xen_be_evtchn_notify(struct xenevtchn_handle * xc,evtchn_port_t port)2194 int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
2195 {
2196     XenEvtchnState *s = xen_evtchn_singleton;
2197     int ret;
2198 
2199     if (!s) {
2200         return -ENOTSUP;
2201     }
2202 
2203     if (!xc) {
2204         return -EFAULT;
2205     }
2206 
2207     qemu_mutex_lock(&s->port_lock);
2208 
2209     if (xc->guest_port) {
2210         set_port_pending(s, xc->guest_port);
2211         ret = 0;
2212     } else {
2213         ret = -ENOTCONN;
2214     }
2215 
2216     qemu_mutex_unlock(&s->port_lock);
2217 
2218     return ret;
2219 }
2220 
xen_be_evtchn_pending(struct xenevtchn_handle * xc)2221 int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
2222 {
2223     uint64_t val;
2224 
2225     if (!xc) {
2226         return -EFAULT;
2227     }
2228 
2229     if (!xc->be_port) {
2230         return 0;
2231     }
2232 
2233     if (eventfd_read(xc->fd, &val)) {
2234         return -errno;
2235     }
2236 
2237     return val ? xc->be_port : 0;
2238 }
2239 
xen_be_evtchn_unmask(struct xenevtchn_handle * xc,evtchn_port_t port)2240 int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
2241 {
2242     if (!xc) {
2243         return -EFAULT;
2244     }
2245 
2246     if (xc->be_port != port) {
2247         return -EINVAL;
2248     }
2249 
2250     /*
2251      * We don't actually do anything to unmask it; the event was already
2252      * consumed in xen_be_evtchn_pending().
2253      */
2254     return 0;
2255 }
2256 
xen_be_evtchn_get_guest_port(struct xenevtchn_handle * xc)2257 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
2258 {
2259     return xc->guest_port;
2260 }
2261 
qmp_xen_event_list(Error ** errp)2262 EvtchnInfoList *qmp_xen_event_list(Error **errp)
2263 {
2264     XenEvtchnState *s = xen_evtchn_singleton;
2265     EvtchnInfoList *head = NULL, **tail = &head;
2266     void *shinfo, *pending, *mask;
2267     int i;
2268 
2269     if (!s) {
2270         error_setg(errp, "Xen event channel emulation not enabled");
2271         return NULL;
2272     }
2273 
2274     shinfo = xen_overlay_get_shinfo_ptr();
2275     if (!shinfo) {
2276         error_setg(errp, "Xen shared info page not allocated");
2277         return NULL;
2278     }
2279 
2280     if (xen_is_long_mode()) {
2281         pending = shinfo + offsetof(struct shared_info, evtchn_pending);
2282         mask = shinfo + offsetof(struct shared_info, evtchn_mask);
2283     } else {
2284         pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
2285         mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
2286     }
2287 
2288     QEMU_LOCK_GUARD(&s->port_lock);
2289 
2290     for (i = 0; i < s->nr_ports; i++) {
2291         XenEvtchnPort *p = &s->port_table[i];
2292         EvtchnInfo *info;
2293 
2294         if (p->type == EVTCHNSTAT_closed) {
2295             continue;
2296         }
2297 
2298         info = g_new0(EvtchnInfo, 1);
2299 
2300         info->port = i;
2301         qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
2302         qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
2303         qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
2304         qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
2305         qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
2306         qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
2307 
2308         info->type = p->type;
2309         if (p->type == EVTCHNSTAT_interdomain) {
2310             info->remote_domain = g_strdup(p->u.interdomain.to_qemu ?
2311                                            "qemu" : "loopback");
2312             info->target = p->u.interdomain.port;
2313         } else {
2314             info->target = p->u.val; /* pirq# or virq# */
2315         }
2316         info->vcpu = p->vcpu;
2317         info->pending = test_bit(i, pending);
2318         info->masked = test_bit(i, mask);
2319 
2320         QAPI_LIST_APPEND(tail, info);
2321     }
2322 
2323     return head;
2324 }
2325 
qmp_xen_event_inject(uint32_t port,Error ** errp)2326 void qmp_xen_event_inject(uint32_t port, Error **errp)
2327 {
2328     XenEvtchnState *s = xen_evtchn_singleton;
2329 
2330     if (!s) {
2331         error_setg(errp, "Xen event channel emulation not enabled");
2332         return;
2333     }
2334 
2335     if (!valid_port(port)) {
2336         error_setg(errp, "Invalid port %u", port);
2337     }
2338 
2339     QEMU_LOCK_GUARD(&s->port_lock);
2340 
2341     if (set_port_pending(s, port)) {
2342         error_setg(errp, "Failed to set port %u", port);
2343         return;
2344     }
2345 }
2346 
hmp_xen_event_list(Monitor * mon,const QDict * qdict)2347 void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
2348 {
2349     EvtchnInfoList *iter, *info_list;
2350     Error *err = NULL;
2351 
2352     info_list = qmp_xen_event_list(&err);
2353     if (err) {
2354         hmp_handle_error(mon, err);
2355         return;
2356     }
2357 
2358     for (iter = info_list; iter; iter = iter->next) {
2359         EvtchnInfo *info = iter->value;
2360 
2361         monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
2362                        EvtchnPortType_str(info->type));
2363         if (info->type != EVTCHN_PORT_TYPE_IPI) {
2364             monitor_printf(mon,  "(");
2365             if (info->remote_domain) {
2366                 monitor_printf(mon, "%s:", info->remote_domain);
2367             }
2368             monitor_printf(mon, "%d)", info->target);
2369         }
2370         if (info->pending) {
2371             monitor_printf(mon, " PENDING");
2372         }
2373         if (info->masked) {
2374             monitor_printf(mon, " MASKED");
2375         }
2376         monitor_printf(mon, "\n");
2377     }
2378 
2379     qapi_free_EvtchnInfoList(info_list);
2380 }
2381 
hmp_xen_event_inject(Monitor * mon,const QDict * qdict)2382 void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
2383 {
2384     int port = qdict_get_int(qdict, "port");
2385     Error *err = NULL;
2386 
2387     qmp_xen_event_inject(port, &err);
2388     if (err) {
2389         hmp_handle_error(mon, err);
2390     } else {
2391         monitor_printf(mon, "Delivered port %d\n", port);
2392     }
2393 }
2394 
2395