1 /* 2 * QEMU Xen emulation: Grant table support 3 * 4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * Authors: David Woodhouse <dwmw2@infradead.org> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qemu/host-utils.h" 14 #include "qemu/module.h" 15 #include "qemu/lockable.h" 16 #include "qemu/main-loop.h" 17 #include "qapi/error.h" 18 #include "qom/object.h" 19 #include "exec/target_page.h" 20 #include "exec/address-spaces.h" 21 #include "migration/vmstate.h" 22 23 #include "hw/sysbus.h" 24 #include "hw/xen/xen.h" 25 #include "hw/xen/xen_backend_ops.h" 26 #include "xen_overlay.h" 27 #include "xen_gnttab.h" 28 29 #include "sysemu/kvm.h" 30 #include "sysemu/kvm_xen.h" 31 32 #include "hw/xen/interface/memory.h" 33 #include "hw/xen/interface/grant_table.h" 34 35 #define TYPE_XEN_GNTTAB "xen-gnttab" 36 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB) 37 38 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) 39 40 static struct gnttab_backend_ops emu_gnttab_backend_ops; 41 42 struct XenGnttabState { 43 /*< private >*/ 44 SysBusDevice busdev; 45 /*< public >*/ 46 47 QemuMutex gnt_lock; 48 49 uint32_t nr_frames; 50 uint32_t max_frames; 51 52 union { 53 grant_entry_v1_t *v1; 54 /* Theoretically, v2 support could be added here. */ 55 } entries; 56 57 MemoryRegion gnt_frames; 58 MemoryRegion *gnt_aliases; 59 uint64_t *gnt_frame_gpas; 60 61 uint8_t *map_track; 62 }; 63 64 struct XenGnttabState *xen_gnttab_singleton; 65 66 static void xen_gnttab_realize(DeviceState *dev, Error **errp) 67 { 68 XenGnttabState *s = XEN_GNTTAB(dev); 69 int i; 70 71 if (xen_mode != XEN_EMULATE) { 72 error_setg(errp, "Xen grant table support is for Xen emulation"); 73 return; 74 } 75 s->nr_frames = 0; 76 s->max_frames = kvm_xen_get_gnttab_max_frames(); 77 memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table", 78 XEN_PAGE_SIZE * s->max_frames, &error_abort); 79 memory_region_set_enabled(&s->gnt_frames, true); 80 s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames); 81 memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); 82 83 /* Create individual page-sizes aliases for overlays */ 84 s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames); 85 s->gnt_frame_gpas = (void *)g_new(uint64_t, s->max_frames); 86 for (i = 0; i < s->max_frames; i++) { 87 memory_region_init_alias(&s->gnt_aliases[i], OBJECT(dev), 88 NULL, &s->gnt_frames, 89 i * XEN_PAGE_SIZE, XEN_PAGE_SIZE); 90 s->gnt_frame_gpas[i] = INVALID_GPA; 91 } 92 93 s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; 94 s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); 95 qemu_mutex_init(&s->gnt_lock); 96 97 xen_gnttab_singleton = s; 98 99 s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1); 100 101 xen_gnttab_ops = &emu_gnttab_backend_ops; 102 } 103 104 static int xen_gnttab_post_load(void *opaque, int version_id) 105 { 106 XenGnttabState *s = XEN_GNTTAB(opaque); 107 uint32_t i; 108 109 for (i = 0; i < s->nr_frames; i++) { 110 if (s->gnt_frame_gpas[i] != INVALID_GPA) { 111 xen_overlay_do_map_page(&s->gnt_aliases[i], s->gnt_frame_gpas[i]); 112 } 113 } 114 return 0; 115 } 116 117 static bool xen_gnttab_is_needed(void *opaque) 118 { 119 return xen_mode == XEN_EMULATE; 120 } 121 122 static const VMStateDescription xen_gnttab_vmstate = { 123 .name = "xen_gnttab", 124 .version_id = 1, 125 .minimum_version_id = 1, 126 .needed = xen_gnttab_is_needed, 127 .post_load = xen_gnttab_post_load, 128 .fields = (VMStateField[]) { 129 VMSTATE_UINT32(nr_frames, XenGnttabState), 130 VMSTATE_VARRAY_UINT32(gnt_frame_gpas, XenGnttabState, nr_frames, 0, 131 vmstate_info_uint64, uint64_t), 132 VMSTATE_END_OF_LIST() 133 } 134 }; 135 136 static void xen_gnttab_class_init(ObjectClass *klass, void *data) 137 { 138 DeviceClass *dc = DEVICE_CLASS(klass); 139 140 dc->realize = xen_gnttab_realize; 141 dc->vmsd = &xen_gnttab_vmstate; 142 } 143 144 static const TypeInfo xen_gnttab_info = { 145 .name = TYPE_XEN_GNTTAB, 146 .parent = TYPE_SYS_BUS_DEVICE, 147 .instance_size = sizeof(XenGnttabState), 148 .class_init = xen_gnttab_class_init, 149 }; 150 151 void xen_gnttab_create(void) 152 { 153 xen_gnttab_singleton = XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB, 154 -1, NULL)); 155 } 156 157 static void xen_gnttab_register_types(void) 158 { 159 type_register_static(&xen_gnttab_info); 160 } 161 162 type_init(xen_gnttab_register_types) 163 164 int xen_gnttab_map_page(uint64_t idx, uint64_t gfn) 165 { 166 XenGnttabState *s = xen_gnttab_singleton; 167 uint64_t gpa = gfn << XEN_PAGE_SHIFT; 168 169 if (!s) { 170 return -ENOTSUP; 171 } 172 173 if (idx >= s->max_frames) { 174 return -EINVAL; 175 } 176 177 QEMU_IOTHREAD_LOCK_GUARD(); 178 QEMU_LOCK_GUARD(&s->gnt_lock); 179 180 xen_overlay_do_map_page(&s->gnt_aliases[idx], gpa); 181 182 s->gnt_frame_gpas[idx] = gpa; 183 184 if (s->nr_frames <= idx) { 185 s->nr_frames = idx + 1; 186 } 187 188 return 0; 189 } 190 191 int xen_gnttab_set_version_op(struct gnttab_set_version *set) 192 { 193 int ret; 194 195 switch (set->version) { 196 case 1: 197 ret = 0; 198 break; 199 200 case 2: 201 /* Behave as before set_version was introduced. */ 202 ret = -ENOSYS; 203 break; 204 205 default: 206 ret = -EINVAL; 207 } 208 209 set->version = 1; 210 return ret; 211 } 212 213 int xen_gnttab_get_version_op(struct gnttab_get_version *get) 214 { 215 if (get->dom != DOMID_SELF && get->dom != xen_domid) { 216 return -ESRCH; 217 } 218 219 get->version = 1; 220 return 0; 221 } 222 223 int xen_gnttab_query_size_op(struct gnttab_query_size *size) 224 { 225 XenGnttabState *s = xen_gnttab_singleton; 226 227 if (!s) { 228 return -ENOTSUP; 229 } 230 231 if (size->dom != DOMID_SELF && size->dom != xen_domid) { 232 size->status = GNTST_bad_domain; 233 return 0; 234 } 235 236 size->status = GNTST_okay; 237 size->nr_frames = s->nr_frames; 238 size->max_nr_frames = s->max_frames; 239 return 0; 240 } 241 242 /* Track per-open refs, to allow close() to clean up. */ 243 struct active_ref { 244 MemoryRegionSection mrs; 245 void *virtaddr; 246 uint32_t refcnt; 247 int prot; 248 }; 249 250 static void gnt_unref(XenGnttabState *s, grant_ref_t ref, 251 MemoryRegionSection *mrs, int prot) 252 { 253 if (mrs && mrs->mr) { 254 if (prot & PROT_WRITE) { 255 memory_region_set_dirty(mrs->mr, mrs->offset_within_region, 256 XEN_PAGE_SIZE); 257 } 258 memory_region_unref(mrs->mr); 259 mrs->mr = NULL; 260 } 261 assert(s->map_track[ref] != 0); 262 263 if (--s->map_track[ref] == 0) { 264 grant_entry_v1_t *gnt_p = &s->entries.v1[ref]; 265 qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing)); 266 } 267 } 268 269 static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot) 270 { 271 uint16_t mask = GTF_type_mask | GTF_sub_page; 272 grant_entry_v1_t gnt, *gnt_p; 273 int retries = 0; 274 275 if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 || 276 s->map_track[ref] == UINT8_MAX) { 277 return INVALID_GPA; 278 } 279 280 if (prot & PROT_WRITE) { 281 mask |= GTF_readonly; 282 } 283 284 gnt_p = &s->entries.v1[ref]; 285 286 /* 287 * The guest can legitimately be changing the GTF_readonly flag. Allow 288 * that, but don't let a malicious guest cause a livelock. 289 */ 290 for (retries = 0; retries < 5; retries++) { 291 uint16_t new_flags; 292 293 /* Read the entry before an atomic operation on its flags */ 294 gnt = *(volatile grant_entry_v1_t *)gnt_p; 295 296 if ((gnt.flags & mask) != GTF_permit_access || 297 gnt.domid != DOMID_QEMU) { 298 return INVALID_GPA; 299 } 300 301 new_flags = gnt.flags | GTF_reading; 302 if (prot & PROT_WRITE) { 303 new_flags |= GTF_writing; 304 } 305 306 if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) { 307 return (uint64_t)gnt.frame << XEN_PAGE_SHIFT; 308 } 309 } 310 311 return INVALID_GPA; 312 } 313 314 struct xengntdev_handle { 315 GHashTable *active_maps; 316 }; 317 318 static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, 319 uint32_t nr_grants) 320 { 321 return 0; 322 } 323 324 static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, 325 uint32_t count, uint32_t domid, 326 uint32_t *refs, int prot) 327 { 328 XenGnttabState *s = xen_gnttab_singleton; 329 struct active_ref *act; 330 331 if (!s) { 332 errno = ENOTSUP; 333 return NULL; 334 } 335 336 if (domid != xen_domid) { 337 errno = EINVAL; 338 return NULL; 339 } 340 341 if (!count || count > 4096) { 342 errno = EINVAL; 343 return NULL; 344 } 345 346 /* 347 * Making a contiguous mapping from potentially discontiguous grant 348 * references would be... distinctly non-trivial. We don't support it. 349 * Even changing the API to return an array of pointers, one per page, 350 * wouldn't be simple to use in PV backends because some structures 351 * actually cross page boundaries (e.g. 32-bit blkif_response ring 352 * entries are 12 bytes). 353 */ 354 if (count != 1) { 355 errno = EINVAL; 356 return NULL; 357 } 358 359 QEMU_LOCK_GUARD(&s->gnt_lock); 360 361 act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); 362 if (act) { 363 if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) { 364 if (gnt_ref(s, refs[0], prot) == INVALID_GPA) { 365 return NULL; 366 } 367 act->prot |= PROT_WRITE; 368 } 369 act->refcnt++; 370 } else { 371 uint64_t gpa = gnt_ref(s, refs[0], prot); 372 if (gpa == INVALID_GPA) { 373 errno = EINVAL; 374 return NULL; 375 } 376 377 act = g_new0(struct active_ref, 1); 378 act->prot = prot; 379 act->refcnt = 1; 380 act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE); 381 382 if (act->mrs.mr && 383 !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) && 384 memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) { 385 act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block, 386 act->mrs.offset_within_region); 387 } 388 if (!act->virtaddr) { 389 gnt_unref(s, refs[0], &act->mrs, 0); 390 g_free(act); 391 errno = EINVAL; 392 return NULL; 393 } 394 395 s->map_track[refs[0]]++; 396 g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act); 397 } 398 399 return act->virtaddr; 400 } 401 402 static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data) 403 { 404 XenGnttabState *s = user_data; 405 grant_ref_t gref = GPOINTER_TO_INT(key); 406 struct active_ref *act = value; 407 408 gnt_unref(s, gref, &act->mrs, act->prot); 409 g_free(act); 410 return true; 411 } 412 413 static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt, 414 void *start_address, uint32_t *refs, 415 uint32_t count) 416 { 417 XenGnttabState *s = xen_gnttab_singleton; 418 struct active_ref *act; 419 420 if (!s) { 421 return -ENOTSUP; 422 } 423 424 if (count != 1) { 425 return -EINVAL; 426 } 427 428 QEMU_LOCK_GUARD(&s->gnt_lock); 429 430 act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); 431 if (!act) { 432 return -ENOENT; 433 } 434 435 if (act->virtaddr != start_address) { 436 return -EINVAL; 437 } 438 439 if (!--act->refcnt) { 440 do_unmap(GINT_TO_POINTER(refs[0]), act, s); 441 g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0])); 442 } 443 444 return 0; 445 } 446 447 /* 448 * This looks a bit like the one for true Xen in xen-operations.c but 449 * in emulation we don't support multi-page mappings. And under Xen we 450 * *want* the multi-page mappings so we have fewer bounces through the 451 * kernel and the hypervisor. So the code paths end up being similar, 452 * but different. 453 */ 454 static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain, 455 uint32_t domid, XenGrantCopySegment *segs, 456 uint32_t nr_segs, Error **errp) 457 { 458 int prot = to_domain ? PROT_WRITE : PROT_READ; 459 unsigned int i; 460 461 for (i = 0; i < nr_segs; i++) { 462 XenGrantCopySegment *seg = &segs[i]; 463 void *page; 464 uint32_t ref = to_domain ? seg->dest.foreign.ref : 465 seg->source.foreign.ref; 466 467 page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot); 468 if (!page) { 469 if (errp) { 470 error_setg_errno(errp, errno, 471 "xen_be_gnttab_map_refs failed"); 472 } 473 return -errno; 474 } 475 476 if (to_domain) { 477 memcpy(page + seg->dest.foreign.offset, seg->source.virt, 478 seg->len); 479 } else { 480 memcpy(seg->dest.virt, page + seg->source.foreign.offset, 481 seg->len); 482 } 483 484 if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) { 485 if (errp) { 486 error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed"); 487 } 488 return -errno; 489 } 490 } 491 492 return 0; 493 } 494 495 static struct xengntdev_handle *xen_be_gnttab_open(void) 496 { 497 struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1); 498 499 xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal); 500 return xgt; 501 } 502 503 static int xen_be_gnttab_close(struct xengntdev_handle *xgt) 504 { 505 XenGnttabState *s = xen_gnttab_singleton; 506 507 if (!s) { 508 return -ENOTSUP; 509 } 510 511 g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s); 512 g_hash_table_destroy(xgt->active_maps); 513 g_free(xgt); 514 return 0; 515 } 516 517 static struct gnttab_backend_ops emu_gnttab_backend_ops = { 518 .open = xen_be_gnttab_open, 519 .close = xen_be_gnttab_close, 520 .grant_copy = xen_be_gnttab_copy, 521 .set_max_grants = xen_be_gnttab_set_max_grants, 522 .map_refs = xen_be_gnttab_map_refs, 523 .unmap = xen_be_gnttab_unmap, 524 }; 525 526