xref: /qemu/hw/xen/xen-hvm-common.c (revision a28b0f857e55b4a4b8de590d32c0183253c6aa40)
1 #include "qemu/osdep.h"
2 #include "qemu/units.h"
3 #include "qemu/error-report.h"
4 #include "qapi/error.h"
5 #include "exec/target_long.h"
6 #include "exec/target_page.h"
7 #include "trace.h"
8 
9 #include "hw/hw.h"
10 #include "hw/pci/pci_host.h"
11 #include "hw/xen/xen-hvm-common.h"
12 #include "hw/xen/xen-bus.h"
13 #include "hw/boards.h"
14 #include "hw/xen/arch_hvm.h"
15 #include "system/runstate.h"
16 #include "system/system.h"
17 #include "system/xen.h"
18 #include "system/xen-mapcache.h"
19 
20 MemoryRegion xen_memory, xen_grants;
21 
22 /* Check for any kind of xen memory, foreign mappings or grants.  */
xen_mr_is_memory(MemoryRegion * mr)23 bool xen_mr_is_memory(MemoryRegion *mr)
24 {
25     return mr == &xen_memory || mr == &xen_grants;
26 }
27 
28 /* Check specifically for grants.  */
xen_mr_is_grants(MemoryRegion * mr)29 bool xen_mr_is_grants(MemoryRegion *mr)
30 {
31     return mr == &xen_grants;
32 }
33 
xen_ram_alloc(ram_addr_t ram_addr,ram_addr_t size,MemoryRegion * mr,Error ** errp)34 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
35                    Error **errp)
36 {
37     unsigned target_page_bits = qemu_target_page_bits();
38     unsigned long nr_pfn;
39     xen_pfn_t *pfn_list;
40     int i;
41 
42     if (runstate_check(RUN_STATE_INMIGRATE)) {
43         /* RAM already populated in Xen */
44         warn_report("%s: do not alloc "RAM_ADDR_FMT
45                 " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE",
46                 __func__, size, ram_addr);
47         return;
48     }
49 
50     if (xen_mr_is_memory(mr)) {
51         return;
52     }
53 
54     trace_xen_ram_alloc(ram_addr, size);
55 
56     nr_pfn = size >> target_page_bits;
57     pfn_list = g_new(xen_pfn_t, nr_pfn);
58 
59     for (i = 0; i < nr_pfn; i++) {
60         pfn_list[i] = (ram_addr >> target_page_bits) + i;
61     }
62 
63     if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
64         error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
65                    ram_addr);
66     }
67 
68     g_free(pfn_list);
69 }
70 
xen_set_memory(struct MemoryListener * listener,MemoryRegionSection * section,bool add)71 static void xen_set_memory(struct MemoryListener *listener,
72                            MemoryRegionSection *section,
73                            bool add)
74 {
75     XenIOState *state = container_of(listener, XenIOState, memory_listener);
76 
77     if (xen_mr_is_memory(section->mr)) {
78         return;
79     } else {
80         if (add) {
81             xen_map_memory_section(xen_domid, state->ioservid,
82                                    section);
83         } else {
84             xen_unmap_memory_section(xen_domid, state->ioservid,
85                                      section);
86         }
87     }
88 
89     arch_xen_set_memory(state, section, add);
90 }
91 
xen_region_add(MemoryListener * listener,MemoryRegionSection * section)92 void xen_region_add(MemoryListener *listener,
93                            MemoryRegionSection *section)
94 {
95     memory_region_ref(section->mr);
96     xen_set_memory(listener, section, true);
97 }
98 
xen_region_del(MemoryListener * listener,MemoryRegionSection * section)99 void xen_region_del(MemoryListener *listener,
100                            MemoryRegionSection *section)
101 {
102     xen_set_memory(listener, section, false);
103     memory_region_unref(section->mr);
104 }
105 
xen_io_add(MemoryListener * listener,MemoryRegionSection * section)106 void xen_io_add(MemoryListener *listener,
107                        MemoryRegionSection *section)
108 {
109     XenIOState *state = container_of(listener, XenIOState, io_listener);
110     MemoryRegion *mr = section->mr;
111 
112     if (mr->ops == &unassigned_io_ops) {
113         return;
114     }
115 
116     memory_region_ref(mr);
117 
118     xen_map_io_section(xen_domid, state->ioservid, section);
119 }
120 
xen_io_del(MemoryListener * listener,MemoryRegionSection * section)121 void xen_io_del(MemoryListener *listener,
122                        MemoryRegionSection *section)
123 {
124     XenIOState *state = container_of(listener, XenIOState, io_listener);
125     MemoryRegion *mr = section->mr;
126 
127     if (mr->ops == &unassigned_io_ops) {
128         return;
129     }
130 
131     xen_unmap_io_section(xen_domid, state->ioservid, section);
132 
133     memory_region_unref(mr);
134 }
135 
xen_device_realize(DeviceListener * listener,DeviceState * dev)136 void xen_device_realize(DeviceListener *listener,
137                                DeviceState *dev)
138 {
139     XenIOState *state = container_of(listener, XenIOState, device_listener);
140 
141     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
142         PCIDevice *pci_dev = PCI_DEVICE(dev);
143         XenPciDevice *xendev = g_new(XenPciDevice, 1);
144 
145         xendev->pci_dev = pci_dev;
146         xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
147                                      pci_dev->devfn);
148         QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
149 
150         xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
151     }
152 }
153 
xen_device_unrealize(DeviceListener * listener,DeviceState * dev)154 void xen_device_unrealize(DeviceListener *listener,
155                                  DeviceState *dev)
156 {
157     XenIOState *state = container_of(listener, XenIOState, device_listener);
158 
159     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
160         PCIDevice *pci_dev = PCI_DEVICE(dev);
161         XenPciDevice *xendev, *next;
162 
163         xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
164 
165         QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
166             if (xendev->pci_dev == pci_dev) {
167                 QLIST_REMOVE(xendev, entry);
168                 g_free(xendev);
169                 break;
170             }
171         }
172     }
173 }
174 
175 MemoryListener xen_io_listener = {
176     .name = "xen-io",
177     .region_add = xen_io_add,
178     .region_del = xen_io_del,
179     .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
180 };
181 
182 DeviceListener xen_device_listener = {
183     .realize = xen_device_realize,
184     .unrealize = xen_device_unrealize,
185 };
186 
187 /* get the ioreq packets from share mem */
cpu_get_ioreq_from_shared_memory(XenIOState * state,int vcpu)188 static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
189 {
190     ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
191 
192     if (req->state != STATE_IOREQ_READY) {
193         trace_cpu_get_ioreq_from_shared_memory_req_not_ready(req->state,
194                                                              req->data_is_ptr,
195                                                              req->addr,
196                                                              req->data,
197                                                              req->count,
198                                                              req->size);
199         return NULL;
200     }
201 
202     xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
203 
204     req->state = STATE_IOREQ_INPROCESS;
205     return req;
206 }
207 
208 /* use poll to get the port notification */
209 /* ioreq_vec--out,the */
210 /* retval--the number of ioreq packet */
cpu_get_ioreq(XenIOState * state)211 static ioreq_t *cpu_get_ioreq(XenIOState *state)
212 {
213     MachineState *ms = MACHINE(qdev_get_machine());
214     unsigned int max_cpus = ms->smp.max_cpus;
215     int i;
216     evtchn_port_t port;
217 
218     port = qemu_xen_evtchn_pending(state->xce_handle);
219     if (port == state->bufioreq_local_port) {
220         timer_mod(state->buffered_io_timer,
221                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
222         return NULL;
223     }
224 
225     if (port != -1) {
226         for (i = 0; i < max_cpus; i++) {
227             if (state->ioreq_local_port[i] == port) {
228                 break;
229             }
230         }
231 
232         if (i == max_cpus) {
233             hw_error("Fatal error while trying to get io event!\n");
234         }
235 
236         /* unmask the wanted port again */
237         qemu_xen_evtchn_unmask(state->xce_handle, port);
238 
239         /* get the io packet from shared memory */
240         state->send_vcpu = i;
241         return cpu_get_ioreq_from_shared_memory(state, i);
242     }
243 
244     /* read error or read nothing */
245     return NULL;
246 }
247 
do_inp(uint32_t addr,unsigned long size)248 static uint32_t do_inp(uint32_t addr, unsigned long size)
249 {
250     switch (size) {
251         case 1:
252             return cpu_inb(addr);
253         case 2:
254             return cpu_inw(addr);
255         case 4:
256             return cpu_inl(addr);
257         default:
258             hw_error("inp: bad size: %04x %lx", addr, size);
259     }
260 }
261 
do_outp(uint32_t addr,unsigned long size,uint32_t val)262 static void do_outp(uint32_t addr,
263         unsigned long size, uint32_t val)
264 {
265     switch (size) {
266         case 1:
267             return cpu_outb(addr, val);
268         case 2:
269             return cpu_outw(addr, val);
270         case 4:
271             return cpu_outl(addr, val);
272         default:
273             hw_error("outp: bad size: %04x %lx", addr, size);
274     }
275 }
276 
277 /*
278  * Helper functions which read/write an object from/to physical guest
279  * memory, as part of the implementation of an ioreq.
280  *
281  * Equivalent to
282  *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
283  *                          val, req->size, 0/1)
284  * except without the integer overflow problems.
285  */
rw_phys_req_item(hwaddr addr,ioreq_t * req,uint32_t i,void * val,int rw)286 static void rw_phys_req_item(hwaddr addr,
287                              ioreq_t *req, uint32_t i, void *val, int rw)
288 {
289     /* Do everything unsigned so overflow just results in a truncated result
290      * and accesses to undesired parts of guest memory, which is up
291      * to the guest */
292     hwaddr offset = (hwaddr)req->size * i;
293     if (req->df) {
294         addr -= offset;
295     } else {
296         addr += offset;
297     }
298     cpu_physical_memory_rw(addr, val, req->size, rw);
299 }
300 
read_phys_req_item(hwaddr addr,ioreq_t * req,uint32_t i,void * val)301 static inline void read_phys_req_item(hwaddr addr,
302                                       ioreq_t *req, uint32_t i, void *val)
303 {
304     rw_phys_req_item(addr, req, i, val, 0);
305 }
write_phys_req_item(hwaddr addr,ioreq_t * req,uint32_t i,void * val)306 static inline void write_phys_req_item(hwaddr addr,
307                                        ioreq_t *req, uint32_t i, void *val)
308 {
309     rw_phys_req_item(addr, req, i, val, 1);
310 }
311 
312 
cpu_ioreq_pio(ioreq_t * req)313 void cpu_ioreq_pio(ioreq_t *req)
314 {
315     uint32_t i;
316 
317     trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
318                          req->data, req->count, req->size);
319 
320     if (req->size > sizeof(uint32_t)) {
321         hw_error("PIO: bad size (%u)", req->size);
322     }
323 
324     if (req->dir == IOREQ_READ) {
325         if (!req->data_is_ptr) {
326             req->data = do_inp(req->addr, req->size);
327             trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
328                                          req->size);
329         } else {
330             uint32_t tmp;
331 
332             for (i = 0; i < req->count; i++) {
333                 tmp = do_inp(req->addr, req->size);
334                 write_phys_req_item(req->data, req, i, &tmp);
335             }
336         }
337     } else if (req->dir == IOREQ_WRITE) {
338         if (!req->data_is_ptr) {
339             trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
340                                           req->size);
341             do_outp(req->addr, req->size, req->data);
342         } else {
343             for (i = 0; i < req->count; i++) {
344                 uint32_t tmp = 0;
345 
346                 read_phys_req_item(req->data, req, i, &tmp);
347                 do_outp(req->addr, req->size, tmp);
348             }
349         }
350     }
351 }
352 
cpu_ioreq_move(ioreq_t * req)353 static void cpu_ioreq_move(ioreq_t *req)
354 {
355     uint32_t i;
356 
357     trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
358                          req->data, req->count, req->size);
359 
360     if (req->size > sizeof(req->data)) {
361         hw_error("MMIO: bad size (%u)", req->size);
362     }
363 
364     if (!req->data_is_ptr) {
365         if (req->dir == IOREQ_READ) {
366             for (i = 0; i < req->count; i++) {
367                 read_phys_req_item(req->addr, req, i, &req->data);
368             }
369         } else if (req->dir == IOREQ_WRITE) {
370             for (i = 0; i < req->count; i++) {
371                 write_phys_req_item(req->addr, req, i, &req->data);
372             }
373         }
374     } else {
375         uint64_t tmp;
376 
377         if (req->dir == IOREQ_READ) {
378             for (i = 0; i < req->count; i++) {
379                 read_phys_req_item(req->addr, req, i, &tmp);
380                 write_phys_req_item(req->data, req, i, &tmp);
381             }
382         } else if (req->dir == IOREQ_WRITE) {
383             for (i = 0; i < req->count; i++) {
384                 read_phys_req_item(req->data, req, i, &tmp);
385                 write_phys_req_item(req->addr, req, i, &tmp);
386             }
387         }
388     }
389 }
390 
cpu_ioreq_config(XenIOState * state,ioreq_t * req)391 static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
392 {
393     uint32_t sbdf = req->addr >> 32;
394     uint32_t reg = req->addr;
395     XenPciDevice *xendev;
396 
397     if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
398         req->size != sizeof(uint32_t)) {
399         hw_error("PCI config access: bad size (%u)", req->size);
400     }
401 
402     if (req->count != 1) {
403         hw_error("PCI config access: bad count (%u)", req->count);
404     }
405 
406     QLIST_FOREACH(xendev, &state->dev_list, entry) {
407         if (xendev->sbdf != sbdf) {
408             continue;
409         }
410 
411         if (!req->data_is_ptr) {
412             if (req->dir == IOREQ_READ) {
413                 req->data = pci_host_config_read_common(
414                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
415                     req->size);
416                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
417                                             req->size, req->data);
418             } else if (req->dir == IOREQ_WRITE) {
419                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
420                                              req->size, req->data);
421                 pci_host_config_write_common(
422                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
423                     req->data, req->size);
424             }
425         } else {
426             uint32_t tmp;
427 
428             if (req->dir == IOREQ_READ) {
429                 tmp = pci_host_config_read_common(
430                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
431                     req->size);
432                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
433                                             req->size, tmp);
434                 write_phys_req_item(req->data, req, 0, &tmp);
435             } else if (req->dir == IOREQ_WRITE) {
436                 read_phys_req_item(req->data, req, 0, &tmp);
437                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
438                                              req->size, tmp);
439                 pci_host_config_write_common(
440                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
441                     tmp, req->size);
442             }
443         }
444     }
445 }
446 
handle_ioreq(XenIOState * state,ioreq_t * req)447 static void handle_ioreq(XenIOState *state, ioreq_t *req)
448 {
449     trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
450                        req->addr, req->data, req->count, req->size);
451 
452     if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
453             (req->size < sizeof (target_ulong))) {
454         req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
455     }
456 
457     if (req->dir == IOREQ_WRITE)
458         trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
459                                  req->addr, req->data, req->count, req->size);
460 
461     switch (req->type) {
462         case IOREQ_TYPE_PIO:
463             cpu_ioreq_pio(req);
464             break;
465         case IOREQ_TYPE_COPY:
466             cpu_ioreq_move(req);
467             break;
468         case IOREQ_TYPE_TIMEOFFSET:
469             break;
470         case IOREQ_TYPE_INVALIDATE:
471             xen_invalidate_map_cache();
472             break;
473         case IOREQ_TYPE_PCI_CONFIG:
474             cpu_ioreq_config(state, req);
475             break;
476         default:
477             arch_handle_ioreq(state, req);
478     }
479     if (req->dir == IOREQ_READ) {
480         trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
481                                 req->addr, req->data, req->count, req->size);
482     }
483 }
484 
handle_buffered_iopage(XenIOState * state)485 static unsigned int handle_buffered_iopage(XenIOState *state)
486 {
487     buffered_iopage_t *buf_page = state->buffered_io_page;
488     buf_ioreq_t *buf_req = NULL;
489     unsigned int handled = 0;
490     ioreq_t req;
491     int qw;
492 
493     if (!buf_page) {
494         return 0;
495     }
496 
497     memset(&req, 0x00, sizeof(req));
498     req.state = STATE_IOREQ_READY;
499     req.count = 1;
500     req.dir = IOREQ_WRITE;
501 
502     do {
503         uint32_t rdptr = buf_page->read_pointer, wrptr;
504 
505         xen_rmb();
506         wrptr = buf_page->write_pointer;
507         xen_rmb();
508         if (rdptr != buf_page->read_pointer) {
509             continue;
510         }
511         if (rdptr == wrptr) {
512             break;
513         }
514         buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
515         req.size = 1U << buf_req->size;
516         req.addr = buf_req->addr;
517         req.data = buf_req->data;
518         req.type = buf_req->type;
519         xen_rmb();
520         qw = (req.size == 8);
521         if (qw) {
522             if (rdptr + 1 == wrptr) {
523                 hw_error("Incomplete quad word buffered ioreq");
524             }
525             buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
526                                            IOREQ_BUFFER_SLOT_NUM];
527             req.data |= ((uint64_t)buf_req->data) << 32;
528             xen_rmb();
529         }
530 
531         handle_ioreq(state, &req);
532 
533         /* Only req.data may get updated by handle_ioreq(), albeit even that
534          * should not happen as such data would never make it to the guest (we
535          * can only usefully see writes here after all).
536          */
537         assert(req.state == STATE_IOREQ_READY);
538         assert(req.count == 1);
539         assert(req.dir == IOREQ_WRITE);
540         assert(!req.data_is_ptr);
541 
542         qatomic_add(&buf_page->read_pointer, qw + 1);
543         handled += qw + 1;
544     } while (handled < IOREQ_BUFFER_SLOT_NUM);
545 
546     return handled;
547 }
548 
handle_buffered_io(void * opaque)549 static void handle_buffered_io(void *opaque)
550 {
551     unsigned int handled;
552     XenIOState *state = opaque;
553 
554     handled = handle_buffered_iopage(state);
555     if (handled >= IOREQ_BUFFER_SLOT_NUM) {
556         /* We handled a full page of ioreqs. Schedule a timer to continue
557          * processing while giving other stuff a chance to run.
558          */
559         timer_mod(state->buffered_io_timer,
560                 qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
561     } else if (handled == 0) {
562         timer_del(state->buffered_io_timer);
563         qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
564     } else {
565         timer_mod(state->buffered_io_timer,
566                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
567     }
568 }
569 
cpu_handle_ioreq(void * opaque)570 static void cpu_handle_ioreq(void *opaque)
571 {
572     XenIOState *state = opaque;
573     ioreq_t *req = cpu_get_ioreq(state);
574 
575     handle_buffered_iopage(state);
576     if (req) {
577         ioreq_t copy = *req;
578 
579         xen_rmb();
580         handle_ioreq(state, &copy);
581         req->data = copy.data;
582 
583         if (req->state != STATE_IOREQ_INPROCESS) {
584             warn_report("Badness in I/O request ... not in service?!: "
585                     "%x, ptr: %x, port: %"PRIx64", "
586                     "data: %"PRIx64", count: %u, size: %u, type: %u",
587                     req->state, req->data_is_ptr, req->addr,
588                     req->data, req->count, req->size, req->type);
589             destroy_hvm_domain(false);
590             return;
591         }
592 
593         xen_wmb(); /* Update ioreq contents /then/ update state. */
594 
595         /*
596          * We do this before we send the response so that the tools
597          * have the opportunity to pick up on the reset before the
598          * guest resumes and does a hlt with interrupts disabled which
599          * causes Xen to powerdown the domain.
600          */
601         if (runstate_is_running()) {
602             ShutdownCause request;
603 
604             if (qemu_shutdown_requested_get()) {
605                 destroy_hvm_domain(false);
606             }
607             request = qemu_reset_requested_get();
608             if (request) {
609                 qemu_system_reset(request);
610                 destroy_hvm_domain(true);
611             }
612         }
613 
614         req->state = STATE_IORESP_READY;
615         qemu_xen_evtchn_notify(state->xce_handle,
616                                state->ioreq_local_port[state->send_vcpu]);
617     }
618 }
619 
xen_main_loop_prepare(XenIOState * state)620 static void xen_main_loop_prepare(XenIOState *state)
621 {
622     int evtchn_fd = -1;
623 
624     if (state->xce_handle != NULL) {
625         evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
626     }
627 
628     state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
629                                                  state);
630 
631     if (evtchn_fd != -1) {
632         CPUState *cpu_state;
633 
634         CPU_FOREACH(cpu_state) {
635             trace_xen_main_loop_prepare_init_cpu(cpu_state->cpu_index,
636                                                  cpu_state);
637             state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
638         }
639         qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
640     }
641 }
642 
643 
xen_hvm_change_state_handler(void * opaque,bool running,RunState rstate)644 void xen_hvm_change_state_handler(void *opaque, bool running,
645                                          RunState rstate)
646 {
647     XenIOState *state = opaque;
648 
649     if (running) {
650         xen_main_loop_prepare(state);
651     }
652 
653     xen_set_ioreq_server_state(xen_domid,
654                                state->ioservid,
655                                running);
656 }
657 
xen_exit_notifier(Notifier * n,void * data)658 void xen_exit_notifier(Notifier *n, void *data)
659 {
660     XenIOState *state = container_of(n, XenIOState, exit);
661 
662     xen_destroy_ioreq_server(xen_domid, state->ioservid);
663     if (state->fres != NULL) {
664         xenforeignmemory_unmap_resource(xen_fmem, state->fres);
665     }
666 
667     qemu_xen_evtchn_close(state->xce_handle);
668     xs_daemon_close(state->xenstore);
669 }
670 
xen_map_ioreq_server(XenIOState * state)671 static int xen_map_ioreq_server(XenIOState *state)
672 {
673     void *addr = NULL;
674     xen_pfn_t ioreq_pfn;
675     xen_pfn_t bufioreq_pfn;
676     evtchn_port_t bufioreq_evtchn;
677     unsigned long num_frames = 1;
678     unsigned long frame = 1;
679     int rc;
680 
681     /*
682      * Attempt to map using the resource API and fall back to normal
683      * foreign mapping if this is not supported.
684      */
685     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
686     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
687 
688     if (state->has_bufioreq) {
689         frame = 0;
690         num_frames = 2;
691     }
692     state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
693                                          XENMEM_resource_ioreq_server,
694                                          state->ioservid,
695                                          frame, num_frames,
696                                          &addr,
697                                          PROT_READ | PROT_WRITE, 0);
698     if (state->fres != NULL) {
699         trace_xen_map_resource_ioreq(state->ioservid, addr);
700         state->shared_page = addr;
701         if (state->has_bufioreq) {
702             state->buffered_io_page = addr;
703             state->shared_page = addr + XC_PAGE_SIZE;
704         }
705     } else if (errno != EOPNOTSUPP) {
706         error_report("failed to map ioreq server resources: error %d handle=%p",
707                      errno, xen_xc);
708         return -1;
709     }
710 
711     /*
712      * If we fail to map the shared page with xenforeignmemory_map_resource()
713      * or if we're using buffered ioreqs, we need xen_get_ioreq_server_info()
714      * to provide the addresses to map the shared page and/or to get the
715      * event-channel port for buffered ioreqs.
716      */
717     if (state->shared_page == NULL || state->has_bufioreq) {
718         rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
719                                        (state->shared_page == NULL) ?
720                                        &ioreq_pfn : NULL,
721                                        (state->has_bufioreq &&
722                                         state->buffered_io_page == NULL) ?
723                                        &bufioreq_pfn : NULL,
724                                        &bufioreq_evtchn);
725         if (rc < 0) {
726             error_report("failed to get ioreq server info: error %d handle=%p",
727                          errno, xen_xc);
728             return rc;
729         }
730 
731         if (state->shared_page == NULL) {
732             trace_xen_map_ioreq_server_shared_page(ioreq_pfn);
733 
734             state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
735                                                       PROT_READ | PROT_WRITE,
736                                                       1, &ioreq_pfn, NULL);
737         }
738         if (state->shared_page == NULL) {
739             error_report("map shared IO page returned error %d handle=%p",
740                          errno, xen_xc);
741         }
742 
743         if (state->has_bufioreq && state->buffered_io_page == NULL) {
744             trace_xen_map_ioreq_server_buffered_io_page(bufioreq_pfn);
745 
746             state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
747                                                         PROT_READ | PROT_WRITE,
748                                                         1, &bufioreq_pfn,
749                                                         NULL);
750             if (state->buffered_io_page == NULL) {
751                 error_report("map buffered IO page returned error %d", errno);
752                 return -1;
753             }
754         }
755     }
756 
757     if (state->shared_page == NULL ||
758         (state->has_bufioreq && state->buffered_io_page == NULL)) {
759         return -1;
760     }
761 
762     if (state->has_bufioreq) {
763         trace_xen_map_ioreq_server_buffered_io_evtchn(bufioreq_evtchn);
764         state->bufioreq_remote_port = bufioreq_evtchn;
765     }
766 
767     return 0;
768 }
769 
destroy_hvm_domain(bool reboot)770 void destroy_hvm_domain(bool reboot)
771 {
772     xc_interface *xc_handle;
773     int sts;
774     int rc;
775 
776     unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
777 
778     if (xen_dmod) {
779         rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
780         if (!rc) {
781             return;
782         }
783         if (errno != ENOTTY /* old Xen */) {
784             error_report("xendevicemodel_shutdown failed with error %d", errno);
785         }
786         /* well, try the old thing then */
787     }
788 
789     xc_handle = xc_interface_open(0, 0, 0);
790     if (xc_handle == NULL) {
791         trace_destroy_hvm_domain_cannot_acquire_handle();
792     } else {
793         sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
794         if (sts != 0) {
795             trace_destroy_hvm_domain_failed_action(
796                 reboot ? "reboot" : "poweroff", sts, strerror(errno)
797             );
798         } else {
799             trace_destroy_hvm_domain_action(
800                 xen_domid, reboot ? "reboot" : "poweroff"
801             );
802         }
803         xc_interface_close(xc_handle);
804     }
805 }
806 
xen_shutdown_fatal_error(const char * fmt,...)807 void xen_shutdown_fatal_error(const char *fmt, ...)
808 {
809     va_list ap;
810 
811     va_start(ap, fmt);
812     error_vreport(fmt, ap);
813     va_end(ap);
814     error_report("Will destroy the domain.");
815     /* destroy the domain */
816     qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
817 }
818 
xen_do_ioreq_register(XenIOState * state,unsigned int max_cpus,const MemoryListener * xen_memory_listener)819 static void xen_do_ioreq_register(XenIOState *state,
820                                   unsigned int max_cpus,
821                                   const MemoryListener *xen_memory_listener)
822 {
823     int i, rc;
824 
825     state->exit.notify = xen_exit_notifier;
826     qemu_add_exit_notifier(&state->exit);
827 
828     /*
829      * Register wake-up support in QMP query-current-machine API
830      */
831     qemu_register_wakeup_support();
832 
833     rc = xen_map_ioreq_server(state);
834     if (rc < 0) {
835         goto err;
836     }
837 
838     /* Note: cpus is empty at this point in init */
839     state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus);
840 
841     rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
842     if (rc < 0) {
843         error_report("failed to enable ioreq server info: error %d handle=%p",
844                      errno, xen_xc);
845         goto err;
846     }
847 
848     state->ioreq_local_port = g_new0(evtchn_port_t, max_cpus);
849 
850     /* FIXME: how about if we overflow the page here? */
851     for (i = 0; i < max_cpus; i++) {
852         rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
853                                               xen_vcpu_eport(state->shared_page,
854                                                              i));
855         if (rc == -1) {
856             error_report("shared evtchn %d bind error %d", i, errno);
857             goto err;
858         }
859         state->ioreq_local_port[i] = rc;
860     }
861 
862     if (state->has_bufioreq) {
863         rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
864                                               state->bufioreq_remote_port);
865         if (rc == -1) {
866             error_report("buffered evtchn bind error %d", errno);
867             goto err;
868         }
869         state->bufioreq_local_port = rc;
870     }
871     /* Init RAM management */
872 #ifdef XEN_COMPAT_PHYSMAP
873     xen_map_cache_init(xen_phys_offset_to_gaddr, state);
874 #else
875     xen_map_cache_init(NULL, state);
876 #endif
877 
878     qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
879 
880     state->memory_listener = *xen_memory_listener;
881     memory_listener_register(&state->memory_listener, &address_space_memory);
882 
883     state->io_listener = xen_io_listener;
884     memory_listener_register(&state->io_listener, &address_space_io);
885 
886     state->device_listener = xen_device_listener;
887     QLIST_INIT(&state->dev_list);
888     device_listener_register(&state->device_listener);
889 
890     return;
891 
892 err:
893     error_report("xen hardware virtual machine initialisation failed");
894     exit(1);
895 }
896 
xen_register_ioreq(XenIOState * state,unsigned int max_cpus,uint8_t handle_bufioreq,const MemoryListener * xen_memory_listener)897 void xen_register_ioreq(XenIOState *state, unsigned int max_cpus,
898                         uint8_t handle_bufioreq,
899                         const MemoryListener *xen_memory_listener)
900 {
901     int rc;
902 
903     setup_xen_backend_ops();
904 
905     state->xce_handle = qemu_xen_evtchn_open();
906     if (state->xce_handle == NULL) {
907         error_report("xen: event channel open failed with error %d", errno);
908         goto err;
909     }
910 
911     state->xenstore = xs_daemon_open();
912     if (state->xenstore == NULL) {
913         error_report("xen: xenstore open failed with error %d", errno);
914         goto err;
915     }
916 
917     state->has_bufioreq = handle_bufioreq != HVM_IOREQSRV_BUFIOREQ_OFF;
918     rc = xen_create_ioreq_server(xen_domid, handle_bufioreq, &state->ioservid);
919     if (!rc) {
920         xen_do_ioreq_register(state, max_cpus, xen_memory_listener);
921     } else {
922         warn_report("xen: failed to create ioreq server");
923     }
924 
925     xen_bus_init();
926 
927     return;
928 
929 err:
930     error_report("xen hardware virtual machine backend registration failed");
931     exit(1);
932 }
933