1 /*
2 * QEMU Xen emulation: Event channel support
3 *
4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5 *
6 * Authors: David Woodhouse <dwmw2@infradead.org>
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
10 */
11
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/log.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "monitor/hmp.h"
21 #include "qapi/error.h"
22 #include "qapi/qapi-commands-misc-i386.h"
23 #include "qobject/qdict.h"
24 #include "qom/object.h"
25 #include "exec/target_page.h"
26 #include "system/address-spaces.h"
27 #include "migration/vmstate.h"
28 #include "trace.h"
29
30 #include "hw/sysbus.h"
31 #include "hw/xen/xen.h"
32 #include "hw/i386/x86.h"
33 #include "hw/i386/pc.h"
34 #include "hw/pci/pci.h"
35 #include "hw/pci/msi.h"
36 #include "hw/pci/msix.h"
37 #include "hw/irq.h"
38 #include "hw/xen/xen_backend_ops.h"
39
40 #include "xen_evtchn.h"
41 #include "xen_overlay.h"
42 #include "xen_xenstore.h"
43
44 #include "system/kvm.h"
45 #include "system/kvm_xen.h"
46 #include <linux/kvm.h>
47 #include <sys/eventfd.h>
48
49 #include "hw/xen/interface/memory.h"
50 #include "hw/xen/interface/hvm/params.h"
51
52 /* XX: For kvm_update_msi_routes_all() */
53 #include "target/i386/kvm/kvm_i386.h"
54
55 #define TYPE_XEN_EVTCHN "xen-evtchn"
56 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
57
58 typedef struct XenEvtchnPort {
59 uint32_t vcpu; /* Xen/ACPI vcpu_id */
60 uint16_t type; /* EVTCHNSTAT_xxxx */
61 union {
62 uint16_t val; /* raw value for serialization etc. */
63 uint16_t pirq;
64 uint16_t virq;
65 struct {
66 uint16_t port:15;
67 uint16_t to_qemu:1; /* Only two targets; qemu or loopback */
68 } interdomain;
69 } u;
70 } XenEvtchnPort;
71
72 /* 32-bit compatibility definitions, also used natively in 32-bit build */
73 struct compat_arch_vcpu_info {
74 unsigned int cr2;
75 unsigned int pad[5];
76 };
77
78 struct compat_vcpu_info {
79 uint8_t evtchn_upcall_pending;
80 uint8_t evtchn_upcall_mask;
81 uint16_t pad;
82 uint32_t evtchn_pending_sel;
83 struct compat_arch_vcpu_info arch;
84 struct vcpu_time_info time;
85 }; /* 64 bytes (x86) */
86
87 struct compat_arch_shared_info {
88 unsigned int max_pfn;
89 unsigned int pfn_to_mfn_frame_list_list;
90 unsigned int nmi_reason;
91 unsigned int p2m_cr3;
92 unsigned int p2m_vaddr;
93 unsigned int p2m_generation;
94 uint32_t wc_sec_hi;
95 };
96
97 struct compat_shared_info {
98 struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
99 uint32_t evtchn_pending[32];
100 uint32_t evtchn_mask[32];
101 uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */
102 uint32_t wc_sec;
103 uint32_t wc_nsec;
104 struct compat_arch_shared_info arch;
105 };
106
107 #define COMPAT_EVTCHN_2L_NR_CHANNELS 1024
108
109 /* Local private implementation of struct xenevtchn_handle */
110 struct xenevtchn_handle {
111 evtchn_port_t be_port;
112 evtchn_port_t guest_port; /* Or zero for unbound */
113 int fd;
114 };
115
116 /*
117 * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
118 * insane enough to think about guest-transparent live migration from actual
119 * Xen to QEMU, and ensuring that we can convert/consume the stream.
120 */
121 #define IRQ_UNBOUND -1
122 #define IRQ_PT -2
123 #define IRQ_MSI_EMU -3
124
125
126 struct pirq_info {
127 int gsi;
128 uint16_t port;
129 PCIDevice *dev;
130 int vector;
131 bool is_msix;
132 bool is_masked;
133 bool is_translated;
134 };
135
136 struct XenEvtchnState {
137 /*< private >*/
138 SysBusDevice busdev;
139 /*< public >*/
140
141 uint64_t callback_param;
142 bool evtchn_in_kernel;
143 bool setting_callback_gsi;
144 int extern_gsi_level;
145 uint32_t callback_gsi;
146
147 QEMUBH *gsi_bh;
148
149 QemuMutex port_lock;
150 uint32_t nr_ports;
151 XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
152
153 /* Connected to the system GSIs for raising callback as GSI / INTx */
154 unsigned int nr_callback_gsis;
155 qemu_irq *callback_gsis;
156
157 struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
158
159 uint32_t nr_pirqs;
160
161 /* Bitmap of allocated PIRQs (serialized) */
162 uint16_t nr_pirq_inuse_words;
163 uint64_t *pirq_inuse_bitmap;
164
165 /* GSI → PIRQ mapping (serialized) */
166 uint16_t gsi_pirq[IOAPIC_NUM_PINS];
167
168 /* Per-GSI assertion state (serialized) */
169 uint32_t pirq_gsi_set;
170
171 /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
172 struct pirq_info *pirq;
173 };
174
175 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
176 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
177
178 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
179
180 struct XenEvtchnState *xen_evtchn_singleton;
181
182 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
183 #define CALLBACK_VIA_TYPE_SHIFT 56
184
185 static void unbind_backend_ports(XenEvtchnState *s);
186
xen_evtchn_pre_load(void * opaque)187 static int xen_evtchn_pre_load(void *opaque)
188 {
189 XenEvtchnState *s = opaque;
190
191 /* Unbind all the backend-side ports; they need to rebind */
192 unbind_backend_ports(s);
193
194 /* It'll be leaked otherwise. */
195 g_free(s->pirq_inuse_bitmap);
196 s->pirq_inuse_bitmap = NULL;
197
198 return 0;
199 }
200
xen_evtchn_post_load(void * opaque,int version_id)201 static int xen_evtchn_post_load(void *opaque, int version_id)
202 {
203 XenEvtchnState *s = opaque;
204 uint32_t i;
205
206 if (s->callback_param) {
207 xen_evtchn_set_callback_param(s->callback_param);
208 }
209
210 /* Rebuild s->pirq[].port mapping */
211 for (i = 0; i < s->nr_ports; i++) {
212 XenEvtchnPort *p = &s->port_table[i];
213
214 if (p->type == EVTCHNSTAT_pirq) {
215 assert(p->u.pirq);
216 assert(p->u.pirq < s->nr_pirqs);
217
218 /*
219 * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
220 * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
221 * catches up with it.
222 */
223 s->pirq[p->u.pirq].gsi = IRQ_UNBOUND;
224 s->pirq[p->u.pirq].port = i;
225 }
226 }
227 /* Rebuild s->pirq[].gsi mapping */
228 for (i = 0; i < IOAPIC_NUM_PINS; i++) {
229 if (s->gsi_pirq[i]) {
230 s->pirq[s->gsi_pirq[i]].gsi = i;
231 }
232 }
233 return 0;
234 }
235
xen_evtchn_is_needed(void * opaque)236 static bool xen_evtchn_is_needed(void *opaque)
237 {
238 return xen_mode == XEN_EMULATE;
239 }
240
241 static const VMStateDescription xen_evtchn_port_vmstate = {
242 .name = "xen_evtchn_port",
243 .version_id = 1,
244 .minimum_version_id = 1,
245 .fields = (const VMStateField[]) {
246 VMSTATE_UINT32(vcpu, XenEvtchnPort),
247 VMSTATE_UINT16(type, XenEvtchnPort),
248 VMSTATE_UINT16(u.val, XenEvtchnPort),
249 VMSTATE_END_OF_LIST()
250 }
251 };
252
253 static const VMStateDescription xen_evtchn_vmstate = {
254 .name = "xen_evtchn",
255 .version_id = 1,
256 .minimum_version_id = 1,
257 .needed = xen_evtchn_is_needed,
258 .pre_load = xen_evtchn_pre_load,
259 .post_load = xen_evtchn_post_load,
260 .fields = (const VMStateField[]) {
261 VMSTATE_UINT64(callback_param, XenEvtchnState),
262 VMSTATE_UINT32(nr_ports, XenEvtchnState),
263 VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
264 xen_evtchn_port_vmstate, XenEvtchnPort),
265 VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
266 VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
267 nr_pirq_inuse_words, 0,
268 vmstate_info_uint64, uint64_t),
269 VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
270 VMSTATE_END_OF_LIST()
271 }
272 };
273
xen_evtchn_class_init(ObjectClass * klass,const void * data)274 static void xen_evtchn_class_init(ObjectClass *klass, const void *data)
275 {
276 DeviceClass *dc = DEVICE_CLASS(klass);
277
278 dc->vmsd = &xen_evtchn_vmstate;
279 }
280
281 static const TypeInfo xen_evtchn_info = {
282 .name = TYPE_XEN_EVTCHN,
283 .parent = TYPE_SYS_BUS_DEVICE,
284 .instance_size = sizeof(XenEvtchnState),
285 .class_init = xen_evtchn_class_init,
286 };
287
288 static struct evtchn_backend_ops emu_evtchn_backend_ops = {
289 .open = xen_be_evtchn_open,
290 .bind_interdomain = xen_be_evtchn_bind_interdomain,
291 .unbind = xen_be_evtchn_unbind,
292 .close = xen_be_evtchn_close,
293 .get_fd = xen_be_evtchn_fd,
294 .notify = xen_be_evtchn_notify,
295 .unmask = xen_be_evtchn_unmask,
296 .pending = xen_be_evtchn_pending,
297 };
298
gsi_assert_bh(void * opaque)299 static void gsi_assert_bh(void *opaque)
300 {
301 struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
302 if (vi) {
303 xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
304 }
305 }
306
xen_evtchn_create(unsigned int nr_gsis,qemu_irq * system_gsis)307 void xen_evtchn_create(unsigned int nr_gsis, qemu_irq *system_gsis)
308 {
309 XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
310 -1, NULL));
311 int i;
312
313 xen_evtchn_singleton = s;
314
315 qemu_mutex_init(&s->port_lock);
316 s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
317
318 /*
319 * These are the *output* GSI from event channel support, for
320 * signalling CPU0's events via GSI or PCI INTx instead of the
321 * per-CPU vector. We create a *set* of irqs and connect one to
322 * each of the system GSIs which were passed in from the platform
323 * code, and then just trigger the right one as appropriate from
324 * xen_evtchn_set_callback_level().
325 */
326 s->nr_callback_gsis = nr_gsis;
327 s->callback_gsis = g_new0(qemu_irq, nr_gsis);
328 for (i = 0; i < nr_gsis; i++) {
329 sysbus_init_irq(SYS_BUS_DEVICE(s), &s->callback_gsis[i]);
330 sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
331 }
332
333 /*
334 * The Xen scheme for encoding PIRQ# into an MSI message is not
335 * compatible with 32-bit MSI, as it puts the high bits of the
336 * PIRQ# into the high bits of the MSI message address, instead of
337 * using the Extended Destination ID in address bits 4-11 which
338 * perhaps would have been a better choice.
339 *
340 * To keep life simple, kvm_accel_instance_init() initialises the
341 * default to 256. which conveniently doesn't need to set anything
342 * outside the low 32 bits of the address. It can be increased by
343 * setting the xen-evtchn-max-pirq property.
344 */
345 s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
346
347 s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
348 s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
349 s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
350
351 /* Set event channel functions for backend drivers to use */
352 xen_evtchn_ops = &emu_evtchn_backend_ops;
353 }
354
xen_evtchn_register_types(void)355 static void xen_evtchn_register_types(void)
356 {
357 type_register_static(&xen_evtchn_info);
358 }
359
type_init(xen_evtchn_register_types)360 type_init(xen_evtchn_register_types)
361
362 static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
363 {
364 PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
365 uint8_t pin = param & 3;
366 uint8_t devfn = (param >> 8) & 0xff;
367 uint16_t bus = (param >> 16) & 0xffff;
368 uint16_t domain = (param >> 32) & 0xffff;
369 PCIDevice *pdev;
370 PCIINTxRoute r;
371
372 if (domain || !pcms) {
373 return 0;
374 }
375
376 pdev = pci_find_device(pcms->pcibus, bus, devfn);
377 if (!pdev) {
378 return 0;
379 }
380
381 r = pci_device_route_intx_to_irq(pdev, pin);
382 if (r.mode != PCI_INTX_ENABLED) {
383 return 0;
384 }
385
386 /*
387 * Hm, can we be notified of INTX routing changes? Not without
388 * *owning* the device and being allowed to overwrite its own
389 * ->intx_routing_notifier, AFAICT. So let's not.
390 */
391 return r.irq;
392 }
393
xen_evtchn_set_callback_level(int level)394 void xen_evtchn_set_callback_level(int level)
395 {
396 XenEvtchnState *s = xen_evtchn_singleton;
397 if (!s) {
398 return;
399 }
400
401 /*
402 * We get to this function in a number of ways:
403 *
404 * • From I/O context, via PV backend drivers sending a notification to
405 * the guest.
406 *
407 * • From guest vCPU context, via loopback interdomain event channels
408 * (or theoretically even IPIs but guests don't use those with GSI
409 * delivery because that's pointless. We don't want a malicious guest
410 * to be able to trigger a deadlock though, so we can't rule it out.)
411 *
412 * • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
413 * configured.
414 *
415 * • From guest vCPU context in the KVM exit handler, if the upcall
416 * pending flag has been cleared and the GSI needs to be deasserted.
417 *
418 * • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
419 * been acked in the irqchip.
420 *
421 * Whichever context we come from if we aren't already holding the BQL
422 * then e can't take it now, as we may already hold s->port_lock. So
423 * trigger the BH to set the IRQ for us instead of doing it immediately.
424 *
425 * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
426 * will deliberately take the BQL because they want the change to take
427 * effect immediately. That just leaves interdomain loopback as the case
428 * which uses the BH.
429 */
430 if (!bql_locked()) {
431 qemu_bh_schedule(s->gsi_bh);
432 return;
433 }
434
435 if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
436 /*
437 * Ugly, but since we hold the BQL we can set this flag so that
438 * xen_evtchn_set_gsi() can tell the difference between this code
439 * setting the GSI, and an external device (PCI INTx) doing so.
440 */
441 s->setting_callback_gsi = true;
442 /* Do not deassert the line if an external device is asserting it. */
443 qemu_set_irq(s->callback_gsis[s->callback_gsi],
444 level || s->extern_gsi_level);
445 s->setting_callback_gsi = false;
446
447 /*
448 * If the callback GSI is the only one asserted, ensure the status
449 * is polled for deassertion in kvm_arch_post_run().
450 */
451 if (level && !s->extern_gsi_level) {
452 kvm_xen_set_callback_asserted();
453 }
454 }
455 }
456
xen_evtchn_set_callback_param(uint64_t param)457 int xen_evtchn_set_callback_param(uint64_t param)
458 {
459 XenEvtchnState *s = xen_evtchn_singleton;
460 struct kvm_xen_hvm_attr xa = {
461 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
462 .u.vector = 0,
463 };
464 bool in_kernel = false;
465 uint32_t gsi = 0;
466 int type = param >> CALLBACK_VIA_TYPE_SHIFT;
467 int ret;
468
469 if (!s) {
470 return -ENOTSUP;
471 }
472
473 /*
474 * We need the BQL because set_callback_pci_intx() may call into PCI code,
475 * and because we may need to manipulate the old and new GSI levels.
476 */
477 assert(bql_locked());
478 qemu_mutex_lock(&s->port_lock);
479
480 switch (type) {
481 case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
482 xa.u.vector = (uint8_t)param,
483
484 ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
485 if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
486 in_kernel = true;
487 }
488 gsi = 0;
489 break;
490 }
491
492 case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
493 gsi = set_callback_pci_intx(s, param);
494 ret = gsi ? 0 : -EINVAL;
495 break;
496
497 case HVM_PARAM_CALLBACK_TYPE_GSI:
498 gsi = (uint32_t)param;
499 ret = 0;
500 break;
501
502 default:
503 /* Xen doesn't return error even if you set something bogus */
504 ret = 0;
505 break;
506 }
507
508 /* If the guest has set a per-vCPU callback vector, prefer that. */
509 if (gsi && kvm_xen_has_vcpu_callback_vector()) {
510 in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
511 gsi = 0;
512 }
513
514 if (!ret) {
515 /* If vector delivery was turned *off* then tell the kernel */
516 if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
517 HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
518 kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
519 }
520 s->callback_param = param;
521 s->evtchn_in_kernel = in_kernel;
522
523 if (gsi != s->callback_gsi) {
524 struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
525
526 xen_evtchn_set_callback_level(0);
527 s->callback_gsi = gsi;
528
529 if (gsi && vi && vi->evtchn_upcall_pending) {
530 kvm_xen_inject_vcpu_callback_vector(0, type);
531 }
532 }
533 }
534
535 qemu_mutex_unlock(&s->port_lock);
536
537 return ret;
538 }
539
inject_callback(XenEvtchnState * s,uint32_t vcpu)540 static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
541 {
542 int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
543
544 kvm_xen_inject_vcpu_callback_vector(vcpu, type);
545 }
546
deassign_kernel_port(evtchn_port_t port)547 static void deassign_kernel_port(evtchn_port_t port)
548 {
549 struct kvm_xen_hvm_attr ha;
550 int ret;
551
552 ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
553 ha.u.evtchn.send_port = port;
554 ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
555
556 ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
557 if (ret) {
558 qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
559 port, strerror(ret));
560 }
561 }
562
assign_kernel_port(uint16_t type,evtchn_port_t port,uint32_t vcpu_id)563 static int assign_kernel_port(uint16_t type, evtchn_port_t port,
564 uint32_t vcpu_id)
565 {
566 CPUState *cpu = qemu_get_cpu(vcpu_id);
567 struct kvm_xen_hvm_attr ha;
568
569 if (!cpu) {
570 return -ENOENT;
571 }
572
573 ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
574 ha.u.evtchn.send_port = port;
575 ha.u.evtchn.type = type;
576 ha.u.evtchn.flags = 0;
577 ha.u.evtchn.deliver.port.port = port;
578 ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
579 ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
580
581 return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
582 }
583
assign_kernel_eventfd(uint16_t type,evtchn_port_t port,int fd)584 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
585 {
586 struct kvm_xen_hvm_attr ha;
587
588 ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
589 ha.u.evtchn.send_port = port;
590 ha.u.evtchn.type = type;
591 ha.u.evtchn.flags = 0;
592 ha.u.evtchn.deliver.eventfd.port = 0;
593 ha.u.evtchn.deliver.eventfd.fd = fd;
594
595 return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
596 }
597
valid_port(evtchn_port_t port)598 static bool valid_port(evtchn_port_t port)
599 {
600 if (!port) {
601 return false;
602 }
603
604 if (xen_is_long_mode()) {
605 return port < EVTCHN_2L_NR_CHANNELS;
606 } else {
607 return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
608 }
609 }
610
valid_vcpu(uint32_t vcpu)611 static bool valid_vcpu(uint32_t vcpu)
612 {
613 return !!qemu_get_cpu(vcpu);
614 }
615
unbind_backend_ports(XenEvtchnState * s)616 static void unbind_backend_ports(XenEvtchnState *s)
617 {
618 XenEvtchnPort *p;
619 int i;
620
621 for (i = 1; i < s->nr_ports; i++) {
622 p = &s->port_table[i];
623 if (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu) {
624 evtchn_port_t be_port = p->u.interdomain.port;
625
626 if (s->be_handles[be_port]) {
627 /* This part will be overwritten on the load anyway. */
628 p->type = EVTCHNSTAT_unbound;
629 p->u.interdomain.port = 0;
630
631 /* Leave the backend port open and unbound too. */
632 if (kvm_xen_has_cap(EVTCHN_SEND)) {
633 deassign_kernel_port(i);
634 }
635 s->be_handles[be_port]->guest_port = 0;
636 }
637 }
638 }
639 }
640
xen_evtchn_status_op(struct evtchn_status * status)641 int xen_evtchn_status_op(struct evtchn_status *status)
642 {
643 XenEvtchnState *s = xen_evtchn_singleton;
644 XenEvtchnPort *p;
645
646 if (!s) {
647 return -ENOTSUP;
648 }
649
650 if (status->dom != DOMID_SELF && status->dom != xen_domid) {
651 return -ESRCH;
652 }
653
654 if (!valid_port(status->port)) {
655 return -EINVAL;
656 }
657
658 qemu_mutex_lock(&s->port_lock);
659
660 p = &s->port_table[status->port];
661
662 status->status = p->type;
663 status->vcpu = p->vcpu;
664
665 switch (p->type) {
666 case EVTCHNSTAT_unbound:
667 status->u.unbound.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
668 : xen_domid;
669 break;
670
671 case EVTCHNSTAT_interdomain:
672 status->u.interdomain.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
673 : xen_domid;
674 status->u.interdomain.port = p->u.interdomain.port;
675 break;
676
677 case EVTCHNSTAT_pirq:
678 status->u.pirq = p->u.pirq;
679 break;
680
681 case EVTCHNSTAT_virq:
682 status->u.virq = p->u.virq;
683 break;
684 }
685
686 qemu_mutex_unlock(&s->port_lock);
687 return 0;
688 }
689
690 /*
691 * Never thought I'd hear myself say this, but C++ templates would be
692 * kind of nice here.
693 *
694 * template<class T> static int do_unmask_port(T *shinfo, ...);
695 */
do_unmask_port_lm(XenEvtchnState * s,evtchn_port_t port,bool do_unmask,struct shared_info * shinfo,struct vcpu_info * vcpu_info)696 static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
697 bool do_unmask, struct shared_info *shinfo,
698 struct vcpu_info *vcpu_info)
699 {
700 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
701 typeof(shinfo->evtchn_pending[0]) mask;
702 int idx = port / bits_per_word;
703 int offset = port % bits_per_word;
704
705 mask = 1UL << offset;
706
707 if (idx >= bits_per_word) {
708 return -EINVAL;
709 }
710
711 if (do_unmask) {
712 /*
713 * If this is a true unmask operation, clear the mask bit. If
714 * it was already unmasked, we have nothing further to do.
715 */
716 if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
717 return 0;
718 }
719 } else {
720 /*
721 * This is a pseudo-unmask for affinity changes. We don't
722 * change the mask bit, and if it's *masked* we have nothing
723 * else to do.
724 */
725 if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
726 return 0;
727 }
728 }
729
730 /* If the event was not pending, we're done. */
731 if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
732 return 0;
733 }
734
735 /* Now on to the vcpu_info evtchn_pending_sel index... */
736 mask = 1UL << idx;
737
738 /* If a port in this word was already pending for this vCPU, all done. */
739 if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
740 return 0;
741 }
742
743 /* Set evtchn_upcall_pending for this vCPU */
744 if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
745 return 0;
746 }
747
748 inject_callback(s, s->port_table[port].vcpu);
749
750 return 0;
751 }
752
do_unmask_port_compat(XenEvtchnState * s,evtchn_port_t port,bool do_unmask,struct compat_shared_info * shinfo,struct compat_vcpu_info * vcpu_info)753 static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
754 bool do_unmask,
755 struct compat_shared_info *shinfo,
756 struct compat_vcpu_info *vcpu_info)
757 {
758 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
759 typeof(shinfo->evtchn_pending[0]) mask;
760 int idx = port / bits_per_word;
761 int offset = port % bits_per_word;
762
763 mask = 1UL << offset;
764
765 if (idx >= bits_per_word) {
766 return -EINVAL;
767 }
768
769 if (do_unmask) {
770 /*
771 * If this is a true unmask operation, clear the mask bit. If
772 * it was already unmasked, we have nothing further to do.
773 */
774 if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
775 return 0;
776 }
777 } else {
778 /*
779 * This is a pseudo-unmask for affinity changes. We don't
780 * change the mask bit, and if it's *masked* we have nothing
781 * else to do.
782 */
783 if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
784 return 0;
785 }
786 }
787
788 /* If the event was not pending, we're done. */
789 if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
790 return 0;
791 }
792
793 /* Now on to the vcpu_info evtchn_pending_sel index... */
794 mask = 1UL << idx;
795
796 /* If a port in this word was already pending for this vCPU, all done. */
797 if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
798 return 0;
799 }
800
801 /* Set evtchn_upcall_pending for this vCPU */
802 if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
803 return 0;
804 }
805
806 inject_callback(s, s->port_table[port].vcpu);
807
808 return 0;
809 }
810
unmask_port(XenEvtchnState * s,evtchn_port_t port,bool do_unmask)811 static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
812 {
813 void *vcpu_info, *shinfo;
814
815 if (s->port_table[port].type == EVTCHNSTAT_closed) {
816 return -EINVAL;
817 }
818
819 shinfo = xen_overlay_get_shinfo_ptr();
820 if (!shinfo) {
821 return -ENOTSUP;
822 }
823
824 vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
825 if (!vcpu_info) {
826 return -EINVAL;
827 }
828
829 if (xen_is_long_mode()) {
830 return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
831 } else {
832 return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
833 }
834 }
835
do_set_port_lm(XenEvtchnState * s,evtchn_port_t port,struct shared_info * shinfo,struct vcpu_info * vcpu_info)836 static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
837 struct shared_info *shinfo,
838 struct vcpu_info *vcpu_info)
839 {
840 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
841 typeof(shinfo->evtchn_pending[0]) mask;
842 int idx = port / bits_per_word;
843 int offset = port % bits_per_word;
844
845 mask = 1UL << offset;
846
847 if (idx >= bits_per_word) {
848 return -EINVAL;
849 }
850
851 /* Update the pending bit itself. If it was already set, we're done. */
852 if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
853 return 0;
854 }
855
856 /* Check if it's masked. */
857 if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
858 return 0;
859 }
860
861 /* Now on to the vcpu_info evtchn_pending_sel index... */
862 mask = 1UL << idx;
863
864 /* If a port in this word was already pending for this vCPU, all done. */
865 if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
866 return 0;
867 }
868
869 /* Set evtchn_upcall_pending for this vCPU */
870 if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
871 return 0;
872 }
873
874 inject_callback(s, s->port_table[port].vcpu);
875
876 return 0;
877 }
878
do_set_port_compat(XenEvtchnState * s,evtchn_port_t port,struct compat_shared_info * shinfo,struct compat_vcpu_info * vcpu_info)879 static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
880 struct compat_shared_info *shinfo,
881 struct compat_vcpu_info *vcpu_info)
882 {
883 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
884 typeof(shinfo->evtchn_pending[0]) mask;
885 int idx = port / bits_per_word;
886 int offset = port % bits_per_word;
887
888 mask = 1UL << offset;
889
890 if (idx >= bits_per_word) {
891 return -EINVAL;
892 }
893
894 /* Update the pending bit itself. If it was already set, we're done. */
895 if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
896 return 0;
897 }
898
899 /* Check if it's masked. */
900 if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
901 return 0;
902 }
903
904 /* Now on to the vcpu_info evtchn_pending_sel index... */
905 mask = 1UL << idx;
906
907 /* If a port in this word was already pending for this vCPU, all done. */
908 if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
909 return 0;
910 }
911
912 /* Set evtchn_upcall_pending for this vCPU */
913 if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
914 return 0;
915 }
916
917 inject_callback(s, s->port_table[port].vcpu);
918
919 return 0;
920 }
921
set_port_pending(XenEvtchnState * s,evtchn_port_t port)922 static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
923 {
924 void *vcpu_info, *shinfo;
925
926 if (s->port_table[port].type == EVTCHNSTAT_closed) {
927 return -EINVAL;
928 }
929
930 if (s->evtchn_in_kernel) {
931 XenEvtchnPort *p = &s->port_table[port];
932 CPUState *cpu = qemu_get_cpu(p->vcpu);
933 struct kvm_irq_routing_xen_evtchn evt;
934
935 if (!cpu) {
936 return 0;
937 }
938
939 evt.port = port;
940 evt.vcpu = kvm_arch_vcpu_id(cpu);
941 evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
942
943 return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
944 }
945
946 shinfo = xen_overlay_get_shinfo_ptr();
947 if (!shinfo) {
948 return -ENOTSUP;
949 }
950
951 vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
952 if (!vcpu_info) {
953 return -EINVAL;
954 }
955
956 if (xen_is_long_mode()) {
957 return do_set_port_lm(s, port, shinfo, vcpu_info);
958 } else {
959 return do_set_port_compat(s, port, shinfo, vcpu_info);
960 }
961 }
962
clear_port_pending(XenEvtchnState * s,evtchn_port_t port)963 static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
964 {
965 void *p = xen_overlay_get_shinfo_ptr();
966
967 if (!p) {
968 return -ENOTSUP;
969 }
970
971 if (xen_is_long_mode()) {
972 struct shared_info *shinfo = p;
973 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
974 typeof(shinfo->evtchn_pending[0]) mask;
975 int idx = port / bits_per_word;
976 int offset = port % bits_per_word;
977
978 mask = 1UL << offset;
979
980 qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
981 } else {
982 struct compat_shared_info *shinfo = p;
983 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
984 typeof(shinfo->evtchn_pending[0]) mask;
985 int idx = port / bits_per_word;
986 int offset = port % bits_per_word;
987
988 mask = 1UL << offset;
989
990 qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
991 }
992 return 0;
993 }
994
free_port(XenEvtchnState * s,evtchn_port_t port)995 static void free_port(XenEvtchnState *s, evtchn_port_t port)
996 {
997 s->port_table[port].type = EVTCHNSTAT_closed;
998 s->port_table[port].u.val = 0;
999 s->port_table[port].vcpu = 0;
1000
1001 if (s->nr_ports == port + 1) {
1002 do {
1003 s->nr_ports--;
1004 } while (s->nr_ports &&
1005 s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
1006 }
1007
1008 /* Clear pending event to avoid unexpected behavior on re-bind. */
1009 clear_port_pending(s, port);
1010 }
1011
allocate_port(XenEvtchnState * s,uint32_t vcpu,uint16_t type,uint16_t val,evtchn_port_t * port)1012 static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
1013 uint16_t val, evtchn_port_t *port)
1014 {
1015 evtchn_port_t p = 1;
1016
1017 for (p = 1; valid_port(p); p++) {
1018 if (s->port_table[p].type == EVTCHNSTAT_closed) {
1019 s->port_table[p].vcpu = vcpu;
1020 s->port_table[p].type = type;
1021 s->port_table[p].u.val = val;
1022
1023 *port = p;
1024
1025 if (s->nr_ports < p + 1) {
1026 s->nr_ports = p + 1;
1027 }
1028
1029 return 0;
1030 }
1031 }
1032 return -ENOSPC;
1033 }
1034
virq_is_global(uint32_t virq)1035 static bool virq_is_global(uint32_t virq)
1036 {
1037 switch (virq) {
1038 case VIRQ_TIMER:
1039 case VIRQ_DEBUG:
1040 case VIRQ_XENOPROF:
1041 case VIRQ_XENPMU:
1042 return false;
1043
1044 default:
1045 return true;
1046 }
1047 }
1048
close_port(XenEvtchnState * s,evtchn_port_t port,bool * flush_kvm_routes)1049 static int close_port(XenEvtchnState *s, evtchn_port_t port,
1050 bool *flush_kvm_routes)
1051 {
1052 XenEvtchnPort *p = &s->port_table[port];
1053
1054 /* Because it *might* be a PIRQ port */
1055 assert(bql_locked());
1056
1057 switch (p->type) {
1058 case EVTCHNSTAT_closed:
1059 return -ENOENT;
1060
1061 case EVTCHNSTAT_pirq:
1062 s->pirq[p->u.pirq].port = 0;
1063 if (s->pirq[p->u.pirq].is_translated) {
1064 *flush_kvm_routes = true;
1065 }
1066 break;
1067
1068 case EVTCHNSTAT_virq:
1069 kvm_xen_set_vcpu_virq(virq_is_global(p->u.virq) ? 0 : p->vcpu,
1070 p->u.virq, 0);
1071 break;
1072
1073 case EVTCHNSTAT_ipi:
1074 if (s->evtchn_in_kernel) {
1075 deassign_kernel_port(port);
1076 }
1077 break;
1078
1079 case EVTCHNSTAT_interdomain:
1080 if (p->u.interdomain.to_qemu) {
1081 uint16_t be_port = p->u.interdomain.port;
1082 struct xenevtchn_handle *xc = s->be_handles[be_port];
1083 if (xc) {
1084 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1085 deassign_kernel_port(port);
1086 }
1087 xc->guest_port = 0;
1088 }
1089 } else {
1090 /* Loopback interdomain */
1091 XenEvtchnPort *rp = &s->port_table[p->u.interdomain.port];
1092 if (!valid_port(p->u.interdomain.port) ||
1093 rp->u.interdomain.port != port ||
1094 rp->type != EVTCHNSTAT_interdomain) {
1095 error_report("Inconsistent state for interdomain unbind");
1096 } else {
1097 /* Set the other end back to unbound */
1098 rp->type = EVTCHNSTAT_unbound;
1099 rp->u.interdomain.port = 0;
1100 }
1101 }
1102 break;
1103
1104 default:
1105 break;
1106 }
1107
1108 free_port(s, port);
1109 return 0;
1110 }
1111
xen_evtchn_soft_reset(void)1112 int xen_evtchn_soft_reset(void)
1113 {
1114 XenEvtchnState *s = xen_evtchn_singleton;
1115 bool flush_kvm_routes = false;
1116 int i;
1117
1118 if (!s) {
1119 return -ENOTSUP;
1120 }
1121
1122 assert(bql_locked());
1123
1124 qemu_mutex_lock(&s->port_lock);
1125
1126 for (i = 0; i < s->nr_ports; i++) {
1127 close_port(s, i, &flush_kvm_routes);
1128 }
1129
1130 qemu_mutex_unlock(&s->port_lock);
1131
1132 if (flush_kvm_routes) {
1133 kvm_update_msi_routes_all(NULL, true, 0, 0);
1134 }
1135
1136 return 0;
1137 }
1138
xen_evtchn_reset_op(struct evtchn_reset * reset)1139 int xen_evtchn_reset_op(struct evtchn_reset *reset)
1140 {
1141 if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
1142 return -ESRCH;
1143 }
1144
1145 BQL_LOCK_GUARD();
1146 return xen_evtchn_soft_reset();
1147 }
1148
xen_evtchn_close_op(struct evtchn_close * close)1149 int xen_evtchn_close_op(struct evtchn_close *close)
1150 {
1151 XenEvtchnState *s = xen_evtchn_singleton;
1152 bool flush_kvm_routes = false;
1153 int ret;
1154
1155 if (!s) {
1156 return -ENOTSUP;
1157 }
1158
1159 if (!valid_port(close->port)) {
1160 return -EINVAL;
1161 }
1162
1163 BQL_LOCK_GUARD();
1164 qemu_mutex_lock(&s->port_lock);
1165
1166 ret = close_port(s, close->port, &flush_kvm_routes);
1167
1168 qemu_mutex_unlock(&s->port_lock);
1169
1170 if (flush_kvm_routes) {
1171 kvm_update_msi_routes_all(NULL, true, 0, 0);
1172 }
1173
1174 return ret;
1175 }
1176
xen_evtchn_unmask_op(struct evtchn_unmask * unmask)1177 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
1178 {
1179 XenEvtchnState *s = xen_evtchn_singleton;
1180 int ret;
1181
1182 if (!s) {
1183 return -ENOTSUP;
1184 }
1185
1186 if (!valid_port(unmask->port)) {
1187 return -EINVAL;
1188 }
1189
1190 qemu_mutex_lock(&s->port_lock);
1191
1192 ret = unmask_port(s, unmask->port, true);
1193
1194 qemu_mutex_unlock(&s->port_lock);
1195
1196 return ret;
1197 }
1198
xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu * vcpu)1199 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
1200 {
1201 XenEvtchnState *s = xen_evtchn_singleton;
1202 XenEvtchnPort *p;
1203 int ret = -EINVAL;
1204
1205 if (!s) {
1206 return -ENOTSUP;
1207 }
1208
1209 if (!valid_port(vcpu->port)) {
1210 return -EINVAL;
1211 }
1212
1213 if (!valid_vcpu(vcpu->vcpu)) {
1214 return -ENOENT;
1215 }
1216
1217 qemu_mutex_lock(&s->port_lock);
1218
1219 p = &s->port_table[vcpu->port];
1220
1221 if (p->type == EVTCHNSTAT_interdomain ||
1222 p->type == EVTCHNSTAT_unbound ||
1223 p->type == EVTCHNSTAT_pirq ||
1224 (p->type == EVTCHNSTAT_virq && virq_is_global(p->u.virq))) {
1225 /*
1226 * unmask_port() with do_unmask==false will just raise the event
1227 * on the new vCPU if the port was already pending.
1228 */
1229 p->vcpu = vcpu->vcpu;
1230 unmask_port(s, vcpu->port, false);
1231 ret = 0;
1232 }
1233
1234 qemu_mutex_unlock(&s->port_lock);
1235
1236 return ret;
1237 }
1238
xen_evtchn_bind_virq_op(struct evtchn_bind_virq * virq)1239 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
1240 {
1241 XenEvtchnState *s = xen_evtchn_singleton;
1242 int ret;
1243
1244 if (!s) {
1245 return -ENOTSUP;
1246 }
1247
1248 if (virq->virq >= NR_VIRQS) {
1249 return -EINVAL;
1250 }
1251
1252 /* Global VIRQ must be allocated on vCPU0 first */
1253 if (virq_is_global(virq->virq) && virq->vcpu != 0) {
1254 return -EINVAL;
1255 }
1256
1257 if (!valid_vcpu(virq->vcpu)) {
1258 return -ENOENT;
1259 }
1260
1261 qemu_mutex_lock(&s->port_lock);
1262
1263 ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
1264 &virq->port);
1265 if (!ret) {
1266 ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
1267 if (ret) {
1268 free_port(s, virq->port);
1269 }
1270 }
1271
1272 qemu_mutex_unlock(&s->port_lock);
1273
1274 return ret;
1275 }
1276
xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq * pirq)1277 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
1278 {
1279 XenEvtchnState *s = xen_evtchn_singleton;
1280 int ret;
1281
1282 if (!s) {
1283 return -ENOTSUP;
1284 }
1285
1286 if (pirq->pirq >= s->nr_pirqs) {
1287 return -EINVAL;
1288 }
1289
1290 BQL_LOCK_GUARD();
1291
1292 if (s->pirq[pirq->pirq].port) {
1293 return -EBUSY;
1294 }
1295
1296 qemu_mutex_lock(&s->port_lock);
1297
1298 ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
1299 &pirq->port);
1300 if (ret) {
1301 qemu_mutex_unlock(&s->port_lock);
1302 return ret;
1303 }
1304
1305 s->pirq[pirq->pirq].port = pirq->port;
1306 trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
1307
1308 qemu_mutex_unlock(&s->port_lock);
1309
1310 /*
1311 * Need to do the unmask outside port_lock because it may call
1312 * back into the MSI translate function.
1313 */
1314 if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
1315 if (s->pirq[pirq->pirq].is_masked) {
1316 PCIDevice *dev = s->pirq[pirq->pirq].dev;
1317 int vector = s->pirq[pirq->pirq].vector;
1318 char *dev_path = qdev_get_dev_path(DEVICE(dev));
1319
1320 trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
1321 g_free(dev_path);
1322
1323 if (s->pirq[pirq->pirq].is_msix) {
1324 msix_set_mask(dev, vector, false);
1325 } else {
1326 msi_set_mask(dev, vector, false, NULL);
1327 }
1328 } else if (s->pirq[pirq->pirq].is_translated) {
1329 /*
1330 * If KVM had attempted to translate this one before, make it try
1331 * again. If we unmasked, then the notifier on the MSI(-X) vector
1332 * will already have had the same effect.
1333 */
1334 kvm_update_msi_routes_all(NULL, true, 0, 0);
1335 }
1336 }
1337
1338 return ret;
1339 }
1340
xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi * ipi)1341 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
1342 {
1343 XenEvtchnState *s = xen_evtchn_singleton;
1344 int ret;
1345
1346 if (!s) {
1347 return -ENOTSUP;
1348 }
1349
1350 if (!valid_vcpu(ipi->vcpu)) {
1351 return -ENOENT;
1352 }
1353
1354 qemu_mutex_lock(&s->port_lock);
1355
1356 ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
1357 if (!ret && s->evtchn_in_kernel) {
1358 assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
1359 }
1360
1361 qemu_mutex_unlock(&s->port_lock);
1362
1363 return ret;
1364 }
1365
xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain * interdomain)1366 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
1367 {
1368 XenEvtchnState *s = xen_evtchn_singleton;
1369 int ret;
1370
1371 if (!s) {
1372 return -ENOTSUP;
1373 }
1374
1375 if (interdomain->remote_dom != DOMID_QEMU &&
1376 interdomain->remote_dom != DOMID_SELF &&
1377 interdomain->remote_dom != xen_domid) {
1378 return -ESRCH;
1379 }
1380
1381 if (!valid_port(interdomain->remote_port)) {
1382 return -EINVAL;
1383 }
1384
1385 qemu_mutex_lock(&s->port_lock);
1386
1387 /* The newly allocated port starts out as unbound */
1388 ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &interdomain->local_port);
1389
1390 if (ret) {
1391 goto out;
1392 }
1393
1394 if (interdomain->remote_dom == DOMID_QEMU) {
1395 struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
1396 XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1397
1398 if (!xc) {
1399 ret = -ENOENT;
1400 goto out_free_port;
1401 }
1402
1403 if (xc->guest_port) {
1404 ret = -EBUSY;
1405 goto out_free_port;
1406 }
1407
1408 assert(xc->be_port == interdomain->remote_port);
1409 xc->guest_port = interdomain->local_port;
1410 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1411 assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
1412 }
1413 lp->type = EVTCHNSTAT_interdomain;
1414 lp->u.interdomain.to_qemu = 1;
1415 lp->u.interdomain.port = interdomain->remote_port;
1416 ret = 0;
1417 } else {
1418 /* Loopback */
1419 XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
1420 XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1421
1422 /*
1423 * The 'remote' port for loopback must be an unbound port allocated
1424 * for communication with the local domain, and must *not* be the
1425 * port that was just allocated for the local end.
1426 */
1427 if (interdomain->local_port != interdomain->remote_port &&
1428 rp->type == EVTCHNSTAT_unbound && !rp->u.interdomain.to_qemu) {
1429
1430 rp->type = EVTCHNSTAT_interdomain;
1431 rp->u.interdomain.port = interdomain->local_port;
1432
1433 lp->type = EVTCHNSTAT_interdomain;
1434 lp->u.interdomain.port = interdomain->remote_port;
1435 } else {
1436 ret = -EINVAL;
1437 }
1438 }
1439
1440 out_free_port:
1441 if (ret) {
1442 free_port(s, interdomain->local_port);
1443 }
1444 out:
1445 qemu_mutex_unlock(&s->port_lock);
1446
1447 return ret;
1448
1449 }
xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound * alloc)1450 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
1451 {
1452 XenEvtchnState *s = xen_evtchn_singleton;
1453 int ret;
1454
1455 if (!s) {
1456 return -ENOTSUP;
1457 }
1458
1459 if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
1460 return -ESRCH;
1461 }
1462
1463 if (alloc->remote_dom != DOMID_QEMU &&
1464 alloc->remote_dom != DOMID_SELF &&
1465 alloc->remote_dom != xen_domid) {
1466 return -EPERM;
1467 }
1468
1469 qemu_mutex_lock(&s->port_lock);
1470
1471 ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &alloc->port);
1472
1473 if (!ret && alloc->remote_dom == DOMID_QEMU) {
1474 XenEvtchnPort *p = &s->port_table[alloc->port];
1475 p->u.interdomain.to_qemu = 1;
1476 }
1477
1478 qemu_mutex_unlock(&s->port_lock);
1479
1480 return ret;
1481 }
1482
xen_evtchn_send_op(struct evtchn_send * send)1483 int xen_evtchn_send_op(struct evtchn_send *send)
1484 {
1485 XenEvtchnState *s = xen_evtchn_singleton;
1486 XenEvtchnPort *p;
1487 int ret = 0;
1488
1489 if (!s) {
1490 return -ENOTSUP;
1491 }
1492
1493 if (!valid_port(send->port)) {
1494 return -EINVAL;
1495 }
1496
1497 qemu_mutex_lock(&s->port_lock);
1498
1499 p = &s->port_table[send->port];
1500
1501 switch (p->type) {
1502 case EVTCHNSTAT_interdomain:
1503 if (p->u.interdomain.to_qemu) {
1504 /*
1505 * This is an event from the guest to qemu itself, which is
1506 * serving as the driver domain.
1507 */
1508 uint16_t be_port = p->u.interdomain.port;
1509 struct xenevtchn_handle *xc = s->be_handles[be_port];
1510 if (xc) {
1511 eventfd_write(xc->fd, 1);
1512 ret = 0;
1513 } else {
1514 ret = -ENOENT;
1515 }
1516 } else {
1517 /* Loopback interdomain ports; just a complex IPI */
1518 set_port_pending(s, p->u.interdomain.port);
1519 }
1520 break;
1521
1522 case EVTCHNSTAT_ipi:
1523 set_port_pending(s, send->port);
1524 break;
1525
1526 case EVTCHNSTAT_unbound:
1527 /* Xen will silently drop these */
1528 break;
1529
1530 default:
1531 ret = -EINVAL;
1532 break;
1533 }
1534
1535 qemu_mutex_unlock(&s->port_lock);
1536
1537 return ret;
1538 }
1539
xen_evtchn_set_port(uint16_t port)1540 int xen_evtchn_set_port(uint16_t port)
1541 {
1542 XenEvtchnState *s = xen_evtchn_singleton;
1543 XenEvtchnPort *p;
1544 int ret = -EINVAL;
1545
1546 if (!s) {
1547 return -ENOTSUP;
1548 }
1549
1550 if (!valid_port(port)) {
1551 return -EINVAL;
1552 }
1553
1554 qemu_mutex_lock(&s->port_lock);
1555
1556 p = &s->port_table[port];
1557
1558 /* QEMU has no business sending to anything but these */
1559 if (p->type == EVTCHNSTAT_virq ||
1560 (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu)) {
1561 set_port_pending(s, port);
1562 ret = 0;
1563 }
1564
1565 qemu_mutex_unlock(&s->port_lock);
1566
1567 return ret;
1568 }
1569
allocate_pirq(XenEvtchnState * s,int type,int gsi)1570 static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
1571 {
1572 uint16_t pirq;
1573
1574 /*
1575 * Preserve the allocation strategy that Xen has. It looks like
1576 * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1577 * to GSIs (counting up from 16), and then we count backwards from
1578 * the top for MSIs or when the GSI space is exhausted.
1579 */
1580 if (type == MAP_PIRQ_TYPE_GSI) {
1581 for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
1582 if (pirq_inuse(s, pirq)) {
1583 continue;
1584 }
1585
1586 /* Found it */
1587 goto found;
1588 }
1589 }
1590 for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
1591 /* Skip whole words at a time when they're full */
1592 if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
1593 pirq &= ~63ULL;
1594 continue;
1595 }
1596 if (pirq_inuse(s, pirq)) {
1597 continue;
1598 }
1599
1600 goto found;
1601 }
1602 return -ENOSPC;
1603
1604 found:
1605 pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1606 if (gsi >= 0) {
1607 assert(gsi < IOAPIC_NUM_PINS);
1608 s->gsi_pirq[gsi] = pirq;
1609 }
1610 s->pirq[pirq].gsi = gsi;
1611 return pirq;
1612 }
1613
xen_evtchn_set_gsi(int gsi,int * level)1614 bool xen_evtchn_set_gsi(int gsi, int *level)
1615 {
1616 XenEvtchnState *s = xen_evtchn_singleton;
1617 int pirq;
1618
1619 assert(bql_locked());
1620
1621 if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1622 return false;
1623 }
1624
1625 /*
1626 * For the callback_gsi we need to implement a logical OR of the event
1627 * channel GSI and the external input (e.g. from PCI INTx), because
1628 * QEMU itself doesn't support shared level interrupts via demux or
1629 * resamplers.
1630 */
1631 if (gsi && gsi == s->callback_gsi) {
1632 /* Remember the external state of the GSI pin (e.g. from PCI INTx) */
1633 if (!s->setting_callback_gsi) {
1634 s->extern_gsi_level = *level;
1635
1636 /*
1637 * Don't allow the external device to deassert the line if the
1638 * eveht channel GSI should still be asserted.
1639 */
1640 if (!s->extern_gsi_level) {
1641 struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
1642 if (vi && vi->evtchn_upcall_pending) {
1643 /* Need to poll for deassertion */
1644 kvm_xen_set_callback_asserted();
1645 *level = 1;
1646 }
1647 }
1648 }
1649
1650 /*
1651 * The event channel GSI cannot be routed to PIRQ, as that would make
1652 * no sense. It could also deadlock on s->port_lock, if we proceed.
1653 * So bail out now.
1654 */
1655 return false;
1656 }
1657
1658 QEMU_LOCK_GUARD(&s->port_lock);
1659
1660 pirq = s->gsi_pirq[gsi];
1661 if (!pirq) {
1662 return false;
1663 }
1664
1665 if (*level) {
1666 int port = s->pirq[pirq].port;
1667
1668 s->pirq_gsi_set |= (1U << gsi);
1669 if (port) {
1670 set_port_pending(s, port);
1671 }
1672 } else {
1673 s->pirq_gsi_set &= ~(1U << gsi);
1674 }
1675 return true;
1676 }
1677
msi_pirq_target(uint64_t addr,uint32_t data)1678 static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
1679 {
1680 /* The vector (in low 8 bits of data) must be zero */
1681 if (data & 0xff) {
1682 return 0;
1683 }
1684
1685 uint32_t pirq = (addr & 0xff000) >> 12;
1686 pirq |= (addr >> 32) & 0xffffff00;
1687
1688 return pirq;
1689 }
1690
do_remove_pci_vector(XenEvtchnState * s,PCIDevice * dev,int vector,int except_pirq)1691 static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
1692 int except_pirq)
1693 {
1694 uint32_t pirq;
1695
1696 for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
1697 /*
1698 * We could be cleverer here, but it isn't really a fast path, and
1699 * this trivial optimisation is enough to let us skip the big gap
1700 * in the middle a bit quicker (in terms of both loop iterations,
1701 * and cache lines).
1702 */
1703 if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
1704 pirq += 64;
1705 continue;
1706 }
1707 if (except_pirq && pirq == except_pirq) {
1708 continue;
1709 }
1710 if (s->pirq[pirq].dev != dev) {
1711 continue;
1712 }
1713 if (vector != -1 && s->pirq[pirq].vector != vector) {
1714 continue;
1715 }
1716
1717 /* It could theoretically be bound to a port already, but that is OK. */
1718 s->pirq[pirq].dev = dev;
1719 s->pirq[pirq].gsi = IRQ_UNBOUND;
1720 s->pirq[pirq].is_msix = false;
1721 s->pirq[pirq].vector = 0;
1722 s->pirq[pirq].is_masked = false;
1723 s->pirq[pirq].is_translated = false;
1724 }
1725 }
1726
xen_evtchn_remove_pci_device(PCIDevice * dev)1727 void xen_evtchn_remove_pci_device(PCIDevice *dev)
1728 {
1729 XenEvtchnState *s = xen_evtchn_singleton;
1730
1731 if (!s) {
1732 return;
1733 }
1734
1735 QEMU_LOCK_GUARD(&s->port_lock);
1736 do_remove_pci_vector(s, dev, -1, 0);
1737 }
1738
xen_evtchn_snoop_msi(PCIDevice * dev,bool is_msix,unsigned int vector,uint64_t addr,uint32_t data,bool is_masked)1739 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
1740 uint64_t addr, uint32_t data, bool is_masked)
1741 {
1742 XenEvtchnState *s = xen_evtchn_singleton;
1743 uint32_t pirq;
1744
1745 if (!s) {
1746 return;
1747 }
1748
1749 assert(bql_locked());
1750
1751 pirq = msi_pirq_target(addr, data);
1752
1753 /*
1754 * The PIRQ# must be sane, and there must be an allocated PIRQ in
1755 * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1756 */
1757 if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
1758 (s->pirq[pirq].gsi != IRQ_UNBOUND &&
1759 s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
1760 pirq = 0;
1761 }
1762
1763 if (pirq) {
1764 s->pirq[pirq].dev = dev;
1765 s->pirq[pirq].gsi = IRQ_MSI_EMU;
1766 s->pirq[pirq].is_msix = is_msix;
1767 s->pirq[pirq].vector = vector;
1768 s->pirq[pirq].is_masked = is_masked;
1769 }
1770
1771 /* Remove any (other) entries for this {device, vector} */
1772 do_remove_pci_vector(s, dev, vector, pirq);
1773 }
1774
xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry * route,uint64_t address,uint32_t data)1775 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
1776 uint64_t address, uint32_t data)
1777 {
1778 XenEvtchnState *s = xen_evtchn_singleton;
1779 uint32_t pirq, port;
1780 CPUState *cpu;
1781
1782 if (!s) {
1783 return 1; /* Not a PIRQ */
1784 }
1785
1786 assert(bql_locked());
1787
1788 pirq = msi_pirq_target(address, data);
1789 if (!pirq || pirq >= s->nr_pirqs) {
1790 return 1; /* Not a PIRQ */
1791 }
1792
1793 if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
1794 return -ENOTSUP;
1795 }
1796
1797 if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
1798 return -EINVAL;
1799 }
1800
1801 /* Remember that KVM tried to translate this. It might need to try again. */
1802 s->pirq[pirq].is_translated = true;
1803
1804 QEMU_LOCK_GUARD(&s->port_lock);
1805
1806 port = s->pirq[pirq].port;
1807 if (!valid_port(port)) {
1808 return -EINVAL;
1809 }
1810
1811 cpu = qemu_get_cpu(s->port_table[port].vcpu);
1812 if (!cpu) {
1813 return -EINVAL;
1814 }
1815
1816 route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
1817 route->u.xen_evtchn.port = port;
1818 route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
1819 route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
1820
1821 return 0; /* Handled */
1822 }
1823
xen_evtchn_deliver_pirq_msi(uint64_t address,uint32_t data)1824 bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
1825 {
1826 XenEvtchnState *s = xen_evtchn_singleton;
1827 uint32_t pirq, port;
1828
1829 if (!s) {
1830 return false;
1831 }
1832
1833 assert(bql_locked());
1834
1835 pirq = msi_pirq_target(address, data);
1836 if (!pirq || pirq >= s->nr_pirqs) {
1837 return false;
1838 }
1839
1840 QEMU_LOCK_GUARD(&s->port_lock);
1841
1842 port = s->pirq[pirq].port;
1843 if (!valid_port(port)) {
1844 return false;
1845 }
1846
1847 set_port_pending(s, port);
1848 return true;
1849 }
1850
xen_physdev_map_pirq(struct physdev_map_pirq * map)1851 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
1852 {
1853 XenEvtchnState *s = xen_evtchn_singleton;
1854 int pirq = map->pirq;
1855 int gsi = map->index;
1856
1857 if (!s) {
1858 return -ENOTSUP;
1859 }
1860
1861 BQL_LOCK_GUARD();
1862 QEMU_LOCK_GUARD(&s->port_lock);
1863
1864 if (map->domid != DOMID_SELF && map->domid != xen_domid) {
1865 return -EPERM;
1866 }
1867 if (map->type != MAP_PIRQ_TYPE_GSI) {
1868 return -EINVAL;
1869 }
1870 if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1871 return -EINVAL;
1872 }
1873
1874 if (pirq < 0) {
1875 pirq = allocate_pirq(s, map->type, gsi);
1876 if (pirq < 0) {
1877 return pirq;
1878 }
1879 map->pirq = pirq;
1880 } else if (pirq > s->nr_pirqs) {
1881 return -EINVAL;
1882 } else {
1883 /*
1884 * User specified a valid-looking PIRQ#. Allow it if it is
1885 * allocated and not yet bound, or if it is unallocated
1886 */
1887 if (pirq_inuse(s, pirq)) {
1888 if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
1889 return -EBUSY;
1890 }
1891 } else {
1892 /* If it was unused, mark it used now. */
1893 pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1894 }
1895 /* Set the mapping in both directions. */
1896 s->pirq[pirq].gsi = gsi;
1897 s->gsi_pirq[gsi] = pirq;
1898 }
1899
1900 trace_kvm_xen_map_pirq(pirq, gsi);
1901 return 0;
1902 }
1903
xen_physdev_unmap_pirq(struct physdev_unmap_pirq * unmap)1904 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
1905 {
1906 XenEvtchnState *s = xen_evtchn_singleton;
1907 int pirq = unmap->pirq;
1908 int gsi;
1909
1910 if (!s) {
1911 return -ENOTSUP;
1912 }
1913
1914 if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
1915 return -EPERM;
1916 }
1917 if (pirq < 0 || pirq >= s->nr_pirqs) {
1918 return -EINVAL;
1919 }
1920
1921 BQL_LOCK_GUARD();
1922 qemu_mutex_lock(&s->port_lock);
1923
1924 if (!pirq_inuse(s, pirq)) {
1925 qemu_mutex_unlock(&s->port_lock);
1926 return -ENOENT;
1927 }
1928
1929 gsi = s->pirq[pirq].gsi;
1930
1931 /* We can only unmap GSI PIRQs */
1932 if (gsi < 0) {
1933 qemu_mutex_unlock(&s->port_lock);
1934 return -EINVAL;
1935 }
1936
1937 s->gsi_pirq[gsi] = 0;
1938 s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
1939 pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
1940
1941 trace_kvm_xen_unmap_pirq(pirq, gsi);
1942 qemu_mutex_unlock(&s->port_lock);
1943
1944 if (gsi == IRQ_MSI_EMU) {
1945 kvm_update_msi_routes_all(NULL, true, 0, 0);
1946 }
1947
1948 return 0;
1949 }
1950
xen_physdev_eoi_pirq(struct physdev_eoi * eoi)1951 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
1952 {
1953 XenEvtchnState *s = xen_evtchn_singleton;
1954 int pirq = eoi->irq;
1955 int gsi;
1956
1957 if (!s) {
1958 return -ENOTSUP;
1959 }
1960
1961 BQL_LOCK_GUARD();
1962 QEMU_LOCK_GUARD(&s->port_lock);
1963
1964 if (!pirq_inuse(s, pirq)) {
1965 return -ENOENT;
1966 }
1967
1968 gsi = s->pirq[pirq].gsi;
1969 if (gsi < 0) {
1970 return -EINVAL;
1971 }
1972
1973 /* Reassert a level IRQ if needed */
1974 if (s->pirq_gsi_set & (1U << gsi)) {
1975 int port = s->pirq[pirq].port;
1976 if (port) {
1977 set_port_pending(s, port);
1978 }
1979 }
1980
1981 return 0;
1982 }
1983
xen_physdev_query_pirq(struct physdev_irq_status_query * query)1984 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
1985 {
1986 XenEvtchnState *s = xen_evtchn_singleton;
1987 int pirq = query->irq;
1988
1989 if (!s) {
1990 return -ENOTSUP;
1991 }
1992
1993 BQL_LOCK_GUARD();
1994 QEMU_LOCK_GUARD(&s->port_lock);
1995
1996 if (!pirq_inuse(s, pirq)) {
1997 return -ENOENT;
1998 }
1999
2000 if (s->pirq[pirq].gsi >= 0) {
2001 query->flags = XENIRQSTAT_needs_eoi;
2002 } else {
2003 query->flags = 0;
2004 }
2005
2006 return 0;
2007 }
2008
xen_physdev_get_free_pirq(struct physdev_get_free_pirq * get)2009 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
2010 {
2011 XenEvtchnState *s = xen_evtchn_singleton;
2012 int pirq;
2013
2014 if (!s) {
2015 return -ENOTSUP;
2016 }
2017
2018 QEMU_LOCK_GUARD(&s->port_lock);
2019
2020 pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
2021 if (pirq < 0) {
2022 return pirq;
2023 }
2024
2025 get->pirq = pirq;
2026 trace_kvm_xen_get_free_pirq(pirq, get->type);
2027 return 0;
2028 }
2029
xen_be_evtchn_open(void)2030 struct xenevtchn_handle *xen_be_evtchn_open(void)
2031 {
2032 struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
2033
2034 xc->fd = eventfd(0, EFD_CLOEXEC);
2035 if (xc->fd < 0) {
2036 free(xc);
2037 return NULL;
2038 }
2039
2040 return xc;
2041 }
2042
find_be_port(XenEvtchnState * s,struct xenevtchn_handle * xc)2043 static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
2044 {
2045 int i;
2046
2047 for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
2048 if (!s->be_handles[i]) {
2049 s->be_handles[i] = xc;
2050 xc->be_port = i;
2051 return i;
2052 }
2053 }
2054 return 0;
2055 }
2056
xen_be_evtchn_bind_interdomain(struct xenevtchn_handle * xc,uint32_t domid,evtchn_port_t guest_port)2057 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
2058 evtchn_port_t guest_port)
2059 {
2060 XenEvtchnState *s = xen_evtchn_singleton;
2061 XenEvtchnPort *gp;
2062 uint16_t be_port = 0;
2063 int ret;
2064
2065 if (!s) {
2066 return -ENOTSUP;
2067 }
2068
2069 if (!xc) {
2070 return -EFAULT;
2071 }
2072
2073 if (domid != xen_domid) {
2074 return -ESRCH;
2075 }
2076
2077 if (!valid_port(guest_port)) {
2078 return -EINVAL;
2079 }
2080
2081 qemu_mutex_lock(&s->port_lock);
2082
2083 /* The guest has to have an unbound port waiting for us to bind */
2084 gp = &s->port_table[guest_port];
2085
2086 switch (gp->type) {
2087 case EVTCHNSTAT_interdomain:
2088 /* Allow rebinding after migration, preserve port # if possible */
2089 be_port = gp->u.interdomain.port;
2090 assert(be_port != 0);
2091 if (!s->be_handles[be_port]) {
2092 s->be_handles[be_port] = xc;
2093 xc->guest_port = guest_port;
2094 ret = xc->be_port = be_port;
2095 if (kvm_xen_has_cap(EVTCHN_SEND)) {
2096 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2097 }
2098 break;
2099 }
2100 /* fall through */
2101
2102 case EVTCHNSTAT_unbound:
2103 be_port = find_be_port(s, xc);
2104 if (!be_port) {
2105 ret = -ENOSPC;
2106 goto out;
2107 }
2108
2109 gp->type = EVTCHNSTAT_interdomain;
2110 gp->u.interdomain.to_qemu = 1;
2111 gp->u.interdomain.port = be_port;
2112 xc->guest_port = guest_port;
2113 if (kvm_xen_has_cap(EVTCHN_SEND)) {
2114 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2115 }
2116 ret = be_port;
2117 break;
2118
2119 default:
2120 ret = -EINVAL;
2121 break;
2122 }
2123
2124 out:
2125 qemu_mutex_unlock(&s->port_lock);
2126
2127 return ret;
2128 }
2129
xen_be_evtchn_unbind(struct xenevtchn_handle * xc,evtchn_port_t port)2130 int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
2131 {
2132 XenEvtchnState *s = xen_evtchn_singleton;
2133 int ret;
2134
2135 if (!s) {
2136 return -ENOTSUP;
2137 }
2138
2139 if (!xc) {
2140 return -EFAULT;
2141 }
2142
2143 qemu_mutex_lock(&s->port_lock);
2144
2145 if (port && port != xc->be_port) {
2146 ret = -EINVAL;
2147 goto out;
2148 }
2149
2150 if (xc->guest_port) {
2151 XenEvtchnPort *gp = &s->port_table[xc->guest_port];
2152
2153 /* This should never *not* be true */
2154 if (gp->type == EVTCHNSTAT_interdomain) {
2155 gp->type = EVTCHNSTAT_unbound;
2156 gp->u.interdomain.port = 0;
2157 }
2158
2159 if (kvm_xen_has_cap(EVTCHN_SEND)) {
2160 deassign_kernel_port(xc->guest_port);
2161 }
2162 xc->guest_port = 0;
2163 }
2164
2165 s->be_handles[xc->be_port] = NULL;
2166 xc->be_port = 0;
2167 ret = 0;
2168 out:
2169 qemu_mutex_unlock(&s->port_lock);
2170 return ret;
2171 }
2172
xen_be_evtchn_close(struct xenevtchn_handle * xc)2173 int xen_be_evtchn_close(struct xenevtchn_handle *xc)
2174 {
2175 if (!xc) {
2176 return -EFAULT;
2177 }
2178
2179 xen_be_evtchn_unbind(xc, 0);
2180
2181 close(xc->fd);
2182 free(xc);
2183 return 0;
2184 }
2185
xen_be_evtchn_fd(struct xenevtchn_handle * xc)2186 int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
2187 {
2188 if (!xc) {
2189 return -1;
2190 }
2191 return xc->fd;
2192 }
2193
xen_be_evtchn_notify(struct xenevtchn_handle * xc,evtchn_port_t port)2194 int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
2195 {
2196 XenEvtchnState *s = xen_evtchn_singleton;
2197 int ret;
2198
2199 if (!s) {
2200 return -ENOTSUP;
2201 }
2202
2203 if (!xc) {
2204 return -EFAULT;
2205 }
2206
2207 qemu_mutex_lock(&s->port_lock);
2208
2209 if (xc->guest_port) {
2210 set_port_pending(s, xc->guest_port);
2211 ret = 0;
2212 } else {
2213 ret = -ENOTCONN;
2214 }
2215
2216 qemu_mutex_unlock(&s->port_lock);
2217
2218 return ret;
2219 }
2220
xen_be_evtchn_pending(struct xenevtchn_handle * xc)2221 int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
2222 {
2223 uint64_t val;
2224
2225 if (!xc) {
2226 return -EFAULT;
2227 }
2228
2229 if (!xc->be_port) {
2230 return 0;
2231 }
2232
2233 if (eventfd_read(xc->fd, &val)) {
2234 return -errno;
2235 }
2236
2237 return val ? xc->be_port : 0;
2238 }
2239
xen_be_evtchn_unmask(struct xenevtchn_handle * xc,evtchn_port_t port)2240 int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
2241 {
2242 if (!xc) {
2243 return -EFAULT;
2244 }
2245
2246 if (xc->be_port != port) {
2247 return -EINVAL;
2248 }
2249
2250 /*
2251 * We don't actually do anything to unmask it; the event was already
2252 * consumed in xen_be_evtchn_pending().
2253 */
2254 return 0;
2255 }
2256
xen_be_evtchn_get_guest_port(struct xenevtchn_handle * xc)2257 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
2258 {
2259 return xc->guest_port;
2260 }
2261
qmp_xen_event_list(Error ** errp)2262 EvtchnInfoList *qmp_xen_event_list(Error **errp)
2263 {
2264 XenEvtchnState *s = xen_evtchn_singleton;
2265 EvtchnInfoList *head = NULL, **tail = &head;
2266 void *shinfo, *pending, *mask;
2267 int i;
2268
2269 if (!s) {
2270 error_setg(errp, "Xen event channel emulation not enabled");
2271 return NULL;
2272 }
2273
2274 shinfo = xen_overlay_get_shinfo_ptr();
2275 if (!shinfo) {
2276 error_setg(errp, "Xen shared info page not allocated");
2277 return NULL;
2278 }
2279
2280 if (xen_is_long_mode()) {
2281 pending = shinfo + offsetof(struct shared_info, evtchn_pending);
2282 mask = shinfo + offsetof(struct shared_info, evtchn_mask);
2283 } else {
2284 pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
2285 mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
2286 }
2287
2288 QEMU_LOCK_GUARD(&s->port_lock);
2289
2290 for (i = 0; i < s->nr_ports; i++) {
2291 XenEvtchnPort *p = &s->port_table[i];
2292 EvtchnInfo *info;
2293
2294 if (p->type == EVTCHNSTAT_closed) {
2295 continue;
2296 }
2297
2298 info = g_new0(EvtchnInfo, 1);
2299
2300 info->port = i;
2301 qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
2302 qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
2303 qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
2304 qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
2305 qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
2306 qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
2307
2308 info->type = p->type;
2309 if (p->type == EVTCHNSTAT_interdomain) {
2310 info->remote_domain = g_strdup(p->u.interdomain.to_qemu ?
2311 "qemu" : "loopback");
2312 info->target = p->u.interdomain.port;
2313 } else {
2314 info->target = p->u.val; /* pirq# or virq# */
2315 }
2316 info->vcpu = p->vcpu;
2317 info->pending = test_bit(i, pending);
2318 info->masked = test_bit(i, mask);
2319
2320 QAPI_LIST_APPEND(tail, info);
2321 }
2322
2323 return head;
2324 }
2325
qmp_xen_event_inject(uint32_t port,Error ** errp)2326 void qmp_xen_event_inject(uint32_t port, Error **errp)
2327 {
2328 XenEvtchnState *s = xen_evtchn_singleton;
2329
2330 if (!s) {
2331 error_setg(errp, "Xen event channel emulation not enabled");
2332 return;
2333 }
2334
2335 if (!valid_port(port)) {
2336 error_setg(errp, "Invalid port %u", port);
2337 }
2338
2339 QEMU_LOCK_GUARD(&s->port_lock);
2340
2341 if (set_port_pending(s, port)) {
2342 error_setg(errp, "Failed to set port %u", port);
2343 return;
2344 }
2345 }
2346
hmp_xen_event_list(Monitor * mon,const QDict * qdict)2347 void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
2348 {
2349 EvtchnInfoList *iter, *info_list;
2350 Error *err = NULL;
2351
2352 info_list = qmp_xen_event_list(&err);
2353 if (err) {
2354 hmp_handle_error(mon, err);
2355 return;
2356 }
2357
2358 for (iter = info_list; iter; iter = iter->next) {
2359 EvtchnInfo *info = iter->value;
2360
2361 monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
2362 EvtchnPortType_str(info->type));
2363 if (info->type != EVTCHN_PORT_TYPE_IPI) {
2364 monitor_printf(mon, "(");
2365 if (info->remote_domain) {
2366 monitor_printf(mon, "%s:", info->remote_domain);
2367 }
2368 monitor_printf(mon, "%d)", info->target);
2369 }
2370 if (info->pending) {
2371 monitor_printf(mon, " PENDING");
2372 }
2373 if (info->masked) {
2374 monitor_printf(mon, " MASKED");
2375 }
2376 monitor_printf(mon, "\n");
2377 }
2378
2379 qapi_free_EvtchnInfoList(info_list);
2380 }
2381
hmp_xen_event_inject(Monitor * mon,const QDict * qdict)2382 void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
2383 {
2384 int port = qdict_get_int(qdict, "port");
2385 Error *err = NULL;
2386
2387 qmp_xen_event_inject(port, &err);
2388 if (err) {
2389 hmp_handle_error(mon, err);
2390 } else {
2391 monitor_printf(mon, "Delivered port %d\n", port);
2392 }
2393 }
2394
2395