1d40ddd52SDavid Woodhouse /* 2d40ddd52SDavid Woodhouse * QEMU Xen emulation: Shared/overlay pages support 3d40ddd52SDavid Woodhouse * 4d40ddd52SDavid Woodhouse * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5d40ddd52SDavid Woodhouse * 6d40ddd52SDavid Woodhouse * Authors: David Woodhouse <dwmw2@infradead.org> 7d40ddd52SDavid Woodhouse * 8d40ddd52SDavid Woodhouse * This work is licensed under the terms of the GNU GPL, version 2 or later. 9d40ddd52SDavid Woodhouse * See the COPYING file in the top-level directory. 10d40ddd52SDavid Woodhouse */ 11d40ddd52SDavid Woodhouse 12d40ddd52SDavid Woodhouse #include "qemu/osdep.h" 13d40ddd52SDavid Woodhouse #include "qemu/host-utils.h" 14d40ddd52SDavid Woodhouse #include "qemu/module.h" 15d40ddd52SDavid Woodhouse #include "qemu/main-loop.h" 16d40ddd52SDavid Woodhouse #include "qapi/error.h" 17d40ddd52SDavid Woodhouse #include "qom/object.h" 18d40ddd52SDavid Woodhouse #include "exec/target_page.h" 19d40ddd52SDavid Woodhouse #include "exec/address-spaces.h" 20d40ddd52SDavid Woodhouse #include "migration/vmstate.h" 21d40ddd52SDavid Woodhouse 22d40ddd52SDavid Woodhouse #include "hw/sysbus.h" 23d40ddd52SDavid Woodhouse #include "hw/xen/xen.h" 24d40ddd52SDavid Woodhouse #include "xen_overlay.h" 25d40ddd52SDavid Woodhouse 26d40ddd52SDavid Woodhouse #include "sysemu/kvm.h" 27d40ddd52SDavid Woodhouse #include "sysemu/kvm_xen.h" 28d40ddd52SDavid Woodhouse #include <linux/kvm.h> 29d40ddd52SDavid Woodhouse 30d40ddd52SDavid Woodhouse #include "hw/xen/interface/memory.h" 31d40ddd52SDavid Woodhouse 32d40ddd52SDavid Woodhouse 33d40ddd52SDavid Woodhouse #define TYPE_XEN_OVERLAY "xen-overlay" 34d40ddd52SDavid Woodhouse OBJECT_DECLARE_SIMPLE_TYPE(XenOverlayState, XEN_OVERLAY) 35d40ddd52SDavid Woodhouse 36d40ddd52SDavid Woodhouse #define XEN_PAGE_SHIFT 12 37d40ddd52SDavid Woodhouse #define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) 38d40ddd52SDavid Woodhouse 39d40ddd52SDavid Woodhouse struct XenOverlayState { 40d40ddd52SDavid Woodhouse /*< private >*/ 41d40ddd52SDavid Woodhouse SysBusDevice busdev; 42d40ddd52SDavid Woodhouse /*< public >*/ 43d40ddd52SDavid Woodhouse 44d40ddd52SDavid Woodhouse MemoryRegion shinfo_mem; 45d40ddd52SDavid Woodhouse void *shinfo_ptr; 46d40ddd52SDavid Woodhouse uint64_t shinfo_gpa; 47110a0ea5SDavid Woodhouse bool long_mode; 48d40ddd52SDavid Woodhouse }; 49d40ddd52SDavid Woodhouse 50d40ddd52SDavid Woodhouse struct XenOverlayState *xen_overlay_singleton; 51d40ddd52SDavid Woodhouse 52*e33cb789SDavid Woodhouse void xen_overlay_do_map_page(MemoryRegion *page, uint64_t gpa) 53d40ddd52SDavid Woodhouse { 54d40ddd52SDavid Woodhouse /* 55d40ddd52SDavid Woodhouse * Xen allows guests to map the same page as many times as it likes 56d40ddd52SDavid Woodhouse * into guest physical frames. We don't, because it would be hard 57d40ddd52SDavid Woodhouse * to track and restore them all. One mapping of each page is 58d40ddd52SDavid Woodhouse * perfectly sufficient for all known guests... and we've tested 59d40ddd52SDavid Woodhouse * that theory on a few now in other implementations. dwmw2. 60d40ddd52SDavid Woodhouse */ 61d40ddd52SDavid Woodhouse if (memory_region_is_mapped(page)) { 62d40ddd52SDavid Woodhouse if (gpa == INVALID_GPA) { 63d40ddd52SDavid Woodhouse memory_region_del_subregion(get_system_memory(), page); 64d40ddd52SDavid Woodhouse } else { 65d40ddd52SDavid Woodhouse /* Just move it */ 66d40ddd52SDavid Woodhouse memory_region_set_address(page, gpa); 67d40ddd52SDavid Woodhouse } 68d40ddd52SDavid Woodhouse } else if (gpa != INVALID_GPA) { 69d40ddd52SDavid Woodhouse memory_region_add_subregion_overlap(get_system_memory(), gpa, page, 0); 70d40ddd52SDavid Woodhouse } 71d40ddd52SDavid Woodhouse } 72d40ddd52SDavid Woodhouse 73d40ddd52SDavid Woodhouse /* KVM is the only existing back end for now. Let's not overengineer it yet. */ 74d40ddd52SDavid Woodhouse static int xen_overlay_set_be_shinfo(uint64_t gfn) 75d40ddd52SDavid Woodhouse { 76d40ddd52SDavid Woodhouse struct kvm_xen_hvm_attr xa = { 77d40ddd52SDavid Woodhouse .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 78d40ddd52SDavid Woodhouse .u.shared_info.gfn = gfn, 79d40ddd52SDavid Woodhouse }; 80d40ddd52SDavid Woodhouse 81d40ddd52SDavid Woodhouse return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa); 82d40ddd52SDavid Woodhouse } 83d40ddd52SDavid Woodhouse 84d40ddd52SDavid Woodhouse 85d40ddd52SDavid Woodhouse static void xen_overlay_realize(DeviceState *dev, Error **errp) 86d40ddd52SDavid Woodhouse { 87d40ddd52SDavid Woodhouse XenOverlayState *s = XEN_OVERLAY(dev); 88d40ddd52SDavid Woodhouse 89d40ddd52SDavid Woodhouse if (xen_mode != XEN_EMULATE) { 90d40ddd52SDavid Woodhouse error_setg(errp, "Xen overlay page support is for Xen emulation"); 91d40ddd52SDavid Woodhouse return; 92d40ddd52SDavid Woodhouse } 93d40ddd52SDavid Woodhouse 94d40ddd52SDavid Woodhouse memory_region_init_ram(&s->shinfo_mem, OBJECT(dev), "xen:shared_info", 95d40ddd52SDavid Woodhouse XEN_PAGE_SIZE, &error_abort); 96d40ddd52SDavid Woodhouse memory_region_set_enabled(&s->shinfo_mem, true); 97d40ddd52SDavid Woodhouse 98d40ddd52SDavid Woodhouse s->shinfo_ptr = memory_region_get_ram_ptr(&s->shinfo_mem); 99d40ddd52SDavid Woodhouse s->shinfo_gpa = INVALID_GPA; 100110a0ea5SDavid Woodhouse s->long_mode = false; 101d40ddd52SDavid Woodhouse memset(s->shinfo_ptr, 0, XEN_PAGE_SIZE); 102d40ddd52SDavid Woodhouse } 103d40ddd52SDavid Woodhouse 104110a0ea5SDavid Woodhouse static int xen_overlay_pre_save(void *opaque) 105110a0ea5SDavid Woodhouse { 106110a0ea5SDavid Woodhouse /* 107110a0ea5SDavid Woodhouse * Fetch the kernel's idea of long_mode to avoid the race condition 108110a0ea5SDavid Woodhouse * where the guest has set the hypercall page up in 64-bit mode but 109110a0ea5SDavid Woodhouse * not yet made a hypercall by the time migration happens, so qemu 110110a0ea5SDavid Woodhouse * hasn't yet noticed. 111110a0ea5SDavid Woodhouse */ 112110a0ea5SDavid Woodhouse return xen_sync_long_mode(); 113110a0ea5SDavid Woodhouse } 114110a0ea5SDavid Woodhouse 115d40ddd52SDavid Woodhouse static int xen_overlay_post_load(void *opaque, int version_id) 116d40ddd52SDavid Woodhouse { 117d40ddd52SDavid Woodhouse XenOverlayState *s = opaque; 118d40ddd52SDavid Woodhouse 119d40ddd52SDavid Woodhouse if (s->shinfo_gpa != INVALID_GPA) { 120d40ddd52SDavid Woodhouse xen_overlay_do_map_page(&s->shinfo_mem, s->shinfo_gpa); 121d40ddd52SDavid Woodhouse xen_overlay_set_be_shinfo(s->shinfo_gpa >> XEN_PAGE_SHIFT); 122d40ddd52SDavid Woodhouse } 123110a0ea5SDavid Woodhouse if (s->long_mode) { 124110a0ea5SDavid Woodhouse xen_set_long_mode(true); 125110a0ea5SDavid Woodhouse } 126d40ddd52SDavid Woodhouse 127d40ddd52SDavid Woodhouse return 0; 128d40ddd52SDavid Woodhouse } 129d40ddd52SDavid Woodhouse 130d40ddd52SDavid Woodhouse static bool xen_overlay_is_needed(void *opaque) 131d40ddd52SDavid Woodhouse { 132d40ddd52SDavid Woodhouse return xen_mode == XEN_EMULATE; 133d40ddd52SDavid Woodhouse } 134d40ddd52SDavid Woodhouse 135d40ddd52SDavid Woodhouse static const VMStateDescription xen_overlay_vmstate = { 136d40ddd52SDavid Woodhouse .name = "xen_overlay", 137d40ddd52SDavid Woodhouse .version_id = 1, 138d40ddd52SDavid Woodhouse .minimum_version_id = 1, 139d40ddd52SDavid Woodhouse .needed = xen_overlay_is_needed, 140110a0ea5SDavid Woodhouse .pre_save = xen_overlay_pre_save, 141d40ddd52SDavid Woodhouse .post_load = xen_overlay_post_load, 142d40ddd52SDavid Woodhouse .fields = (VMStateField[]) { 143d40ddd52SDavid Woodhouse VMSTATE_UINT64(shinfo_gpa, XenOverlayState), 144110a0ea5SDavid Woodhouse VMSTATE_BOOL(long_mode, XenOverlayState), 145d40ddd52SDavid Woodhouse VMSTATE_END_OF_LIST() 146d40ddd52SDavid Woodhouse } 147d40ddd52SDavid Woodhouse }; 148d40ddd52SDavid Woodhouse 149d40ddd52SDavid Woodhouse static void xen_overlay_reset(DeviceState *dev) 150d40ddd52SDavid Woodhouse { 151d40ddd52SDavid Woodhouse kvm_xen_soft_reset(); 152d40ddd52SDavid Woodhouse } 153d40ddd52SDavid Woodhouse 154d40ddd52SDavid Woodhouse static void xen_overlay_class_init(ObjectClass *klass, void *data) 155d40ddd52SDavid Woodhouse { 156d40ddd52SDavid Woodhouse DeviceClass *dc = DEVICE_CLASS(klass); 157d40ddd52SDavid Woodhouse 158d40ddd52SDavid Woodhouse dc->reset = xen_overlay_reset; 159d40ddd52SDavid Woodhouse dc->realize = xen_overlay_realize; 160d40ddd52SDavid Woodhouse dc->vmsd = &xen_overlay_vmstate; 161d40ddd52SDavid Woodhouse } 162d40ddd52SDavid Woodhouse 163d40ddd52SDavid Woodhouse static const TypeInfo xen_overlay_info = { 164d40ddd52SDavid Woodhouse .name = TYPE_XEN_OVERLAY, 165d40ddd52SDavid Woodhouse .parent = TYPE_SYS_BUS_DEVICE, 166d40ddd52SDavid Woodhouse .instance_size = sizeof(XenOverlayState), 167d40ddd52SDavid Woodhouse .class_init = xen_overlay_class_init, 168d40ddd52SDavid Woodhouse }; 169d40ddd52SDavid Woodhouse 170d40ddd52SDavid Woodhouse void xen_overlay_create(void) 171d40ddd52SDavid Woodhouse { 172d40ddd52SDavid Woodhouse xen_overlay_singleton = XEN_OVERLAY(sysbus_create_simple(TYPE_XEN_OVERLAY, 173d40ddd52SDavid Woodhouse -1, NULL)); 174d40ddd52SDavid Woodhouse 175d40ddd52SDavid Woodhouse /* If xen_domid wasn't explicitly set, at least make sure it isn't zero. */ 176d40ddd52SDavid Woodhouse if (xen_domid == DOMID_QEMU) { 177d40ddd52SDavid Woodhouse xen_domid = 1; 178d40ddd52SDavid Woodhouse }; 179d40ddd52SDavid Woodhouse } 180d40ddd52SDavid Woodhouse 181d40ddd52SDavid Woodhouse static void xen_overlay_register_types(void) 182d40ddd52SDavid Woodhouse { 183d40ddd52SDavid Woodhouse type_register_static(&xen_overlay_info); 184d40ddd52SDavid Woodhouse } 185d40ddd52SDavid Woodhouse 186d40ddd52SDavid Woodhouse type_init(xen_overlay_register_types) 187d40ddd52SDavid Woodhouse 188d40ddd52SDavid Woodhouse int xen_overlay_map_shinfo_page(uint64_t gpa) 189d40ddd52SDavid Woodhouse { 190d40ddd52SDavid Woodhouse XenOverlayState *s = xen_overlay_singleton; 191d40ddd52SDavid Woodhouse int ret; 192d40ddd52SDavid Woodhouse 193d40ddd52SDavid Woodhouse if (!s) { 194d40ddd52SDavid Woodhouse return -ENOENT; 195d40ddd52SDavid Woodhouse } 196d40ddd52SDavid Woodhouse 197d40ddd52SDavid Woodhouse assert(qemu_mutex_iothread_locked()); 198d40ddd52SDavid Woodhouse 199d40ddd52SDavid Woodhouse if (s->shinfo_gpa) { 200d40ddd52SDavid Woodhouse /* If removing shinfo page, turn the kernel magic off first */ 201d40ddd52SDavid Woodhouse ret = xen_overlay_set_be_shinfo(INVALID_GFN); 202d40ddd52SDavid Woodhouse if (ret) { 203d40ddd52SDavid Woodhouse return ret; 204d40ddd52SDavid Woodhouse } 205d40ddd52SDavid Woodhouse } 206d40ddd52SDavid Woodhouse 207d40ddd52SDavid Woodhouse xen_overlay_do_map_page(&s->shinfo_mem, gpa); 208d40ddd52SDavid Woodhouse if (gpa != INVALID_GPA) { 209d40ddd52SDavid Woodhouse ret = xen_overlay_set_be_shinfo(gpa >> XEN_PAGE_SHIFT); 210d40ddd52SDavid Woodhouse if (ret) { 211d40ddd52SDavid Woodhouse return ret; 212d40ddd52SDavid Woodhouse } 213d40ddd52SDavid Woodhouse } 214d40ddd52SDavid Woodhouse s->shinfo_gpa = gpa; 215d40ddd52SDavid Woodhouse 216d40ddd52SDavid Woodhouse return 0; 217d40ddd52SDavid Woodhouse } 218d40ddd52SDavid Woodhouse 219d40ddd52SDavid Woodhouse void *xen_overlay_get_shinfo_ptr(void) 220d40ddd52SDavid Woodhouse { 221d40ddd52SDavid Woodhouse XenOverlayState *s = xen_overlay_singleton; 222d40ddd52SDavid Woodhouse 223d40ddd52SDavid Woodhouse if (!s) { 224d40ddd52SDavid Woodhouse return NULL; 225d40ddd52SDavid Woodhouse } 226d40ddd52SDavid Woodhouse 227d40ddd52SDavid Woodhouse return s->shinfo_ptr; 228d40ddd52SDavid Woodhouse } 229110a0ea5SDavid Woodhouse 230110a0ea5SDavid Woodhouse int xen_sync_long_mode(void) 231110a0ea5SDavid Woodhouse { 232110a0ea5SDavid Woodhouse int ret; 233110a0ea5SDavid Woodhouse struct kvm_xen_hvm_attr xa = { 234110a0ea5SDavid Woodhouse .type = KVM_XEN_ATTR_TYPE_LONG_MODE, 235110a0ea5SDavid Woodhouse }; 236110a0ea5SDavid Woodhouse 237110a0ea5SDavid Woodhouse if (!xen_overlay_singleton) { 238110a0ea5SDavid Woodhouse return -ENOENT; 239110a0ea5SDavid Woodhouse } 240110a0ea5SDavid Woodhouse 241110a0ea5SDavid Woodhouse ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_GET_ATTR, &xa); 242110a0ea5SDavid Woodhouse if (!ret) { 243110a0ea5SDavid Woodhouse xen_overlay_singleton->long_mode = xa.u.long_mode; 244110a0ea5SDavid Woodhouse } 245110a0ea5SDavid Woodhouse 246110a0ea5SDavid Woodhouse return ret; 247110a0ea5SDavid Woodhouse } 248110a0ea5SDavid Woodhouse 249110a0ea5SDavid Woodhouse int xen_set_long_mode(bool long_mode) 250110a0ea5SDavid Woodhouse { 251110a0ea5SDavid Woodhouse int ret; 252110a0ea5SDavid Woodhouse struct kvm_xen_hvm_attr xa = { 253110a0ea5SDavid Woodhouse .type = KVM_XEN_ATTR_TYPE_LONG_MODE, 254110a0ea5SDavid Woodhouse .u.long_mode = long_mode, 255110a0ea5SDavid Woodhouse }; 256110a0ea5SDavid Woodhouse 257110a0ea5SDavid Woodhouse if (!xen_overlay_singleton) { 258110a0ea5SDavid Woodhouse return -ENOENT; 259110a0ea5SDavid Woodhouse } 260110a0ea5SDavid Woodhouse 261110a0ea5SDavid Woodhouse ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa); 262110a0ea5SDavid Woodhouse if (!ret) { 263110a0ea5SDavid Woodhouse xen_overlay_singleton->long_mode = xa.u.long_mode; 264110a0ea5SDavid Woodhouse } 265110a0ea5SDavid Woodhouse 266110a0ea5SDavid Woodhouse return ret; 267110a0ea5SDavid Woodhouse } 268110a0ea5SDavid Woodhouse 269110a0ea5SDavid Woodhouse bool xen_is_long_mode(void) 270110a0ea5SDavid Woodhouse { 271110a0ea5SDavid Woodhouse return xen_overlay_singleton && xen_overlay_singleton->long_mode; 272110a0ea5SDavid Woodhouse } 273