xref: /qemu/hw/i386/kvm/xen_gnttab.c (revision b08d88e30f061d5d8ae080a453a078214d4b462a)
1 /*
2  * QEMU Xen emulation: Grant table support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qapi/error.h"
18 #include "qom/object.h"
19 #include "exec/target_page.h"
20 #include "exec/address-spaces.h"
21 #include "migration/vmstate.h"
22 
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_gnttab.h"
28 
29 #include "sysemu/kvm.h"
30 #include "sysemu/kvm_xen.h"
31 
32 #include "hw/xen/interface/memory.h"
33 #include "hw/xen/interface/grant_table.h"
34 
35 #define TYPE_XEN_GNTTAB "xen-gnttab"
36 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
37 
38 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
39 
40 static struct gnttab_backend_ops emu_gnttab_backend_ops;
41 
42 struct XenGnttabState {
43     /*< private >*/
44     SysBusDevice busdev;
45     /*< public >*/
46 
47     QemuMutex gnt_lock;
48 
49     uint32_t nr_frames;
50     uint32_t max_frames;
51 
52     union {
53         grant_entry_v1_t *v1;
54         /* Theoretically, v2 support could be added here. */
55     } entries;
56 
57     MemoryRegion gnt_frames;
58     MemoryRegion *gnt_aliases;
59     uint64_t *gnt_frame_gpas;
60 
61     uint8_t *map_track;
62 };
63 
64 struct XenGnttabState *xen_gnttab_singleton;
65 
66 static void xen_gnttab_realize(DeviceState *dev, Error **errp)
67 {
68     XenGnttabState *s = XEN_GNTTAB(dev);
69     int i;
70 
71     if (xen_mode != XEN_EMULATE) {
72         error_setg(errp, "Xen grant table support is for Xen emulation");
73         return;
74     }
75     s->nr_frames = 0;
76     s->max_frames = kvm_xen_get_gnttab_max_frames();
77     memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table",
78                            XEN_PAGE_SIZE * s->max_frames, &error_abort);
79     memory_region_set_enabled(&s->gnt_frames, true);
80     s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames);
81     memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
82 
83     /* Create individual page-sizes aliases for overlays */
84     s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
85     s->gnt_frame_gpas = (void *)g_new(uint64_t, s->max_frames);
86     for (i = 0; i < s->max_frames; i++) {
87         memory_region_init_alias(&s->gnt_aliases[i], OBJECT(dev),
88                                  NULL, &s->gnt_frames,
89                                  i * XEN_PAGE_SIZE, XEN_PAGE_SIZE);
90         s->gnt_frame_gpas[i] = INVALID_GPA;
91     }
92 
93     s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
94     s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
95     qemu_mutex_init(&s->gnt_lock);
96 
97     xen_gnttab_singleton = s;
98 
99     s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
100 
101     xen_gnttab_ops = &emu_gnttab_backend_ops;
102 }
103 
104 static int xen_gnttab_post_load(void *opaque, int version_id)
105 {
106     XenGnttabState *s = XEN_GNTTAB(opaque);
107     uint32_t i;
108 
109     for (i = 0; i < s->nr_frames; i++) {
110         if (s->gnt_frame_gpas[i] != INVALID_GPA) {
111             xen_overlay_do_map_page(&s->gnt_aliases[i], s->gnt_frame_gpas[i]);
112         }
113     }
114     return 0;
115 }
116 
117 static bool xen_gnttab_is_needed(void *opaque)
118 {
119     return xen_mode == XEN_EMULATE;
120 }
121 
122 static const VMStateDescription xen_gnttab_vmstate = {
123     .name = "xen_gnttab",
124     .version_id = 1,
125     .minimum_version_id = 1,
126     .needed = xen_gnttab_is_needed,
127     .post_load = xen_gnttab_post_load,
128     .fields = (VMStateField[]) {
129         VMSTATE_UINT32(nr_frames, XenGnttabState),
130         VMSTATE_VARRAY_UINT32(gnt_frame_gpas, XenGnttabState, nr_frames, 0,
131                               vmstate_info_uint64, uint64_t),
132         VMSTATE_END_OF_LIST()
133     }
134 };
135 
136 static void xen_gnttab_class_init(ObjectClass *klass, void *data)
137 {
138     DeviceClass *dc = DEVICE_CLASS(klass);
139 
140     dc->realize = xen_gnttab_realize;
141     dc->vmsd = &xen_gnttab_vmstate;
142 }
143 
144 static const TypeInfo xen_gnttab_info = {
145     .name          = TYPE_XEN_GNTTAB,
146     .parent        = TYPE_SYS_BUS_DEVICE,
147     .instance_size = sizeof(XenGnttabState),
148     .class_init    = xen_gnttab_class_init,
149 };
150 
151 void xen_gnttab_create(void)
152 {
153     xen_gnttab_singleton = XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB,
154                                                            -1, NULL));
155 }
156 
157 static void xen_gnttab_register_types(void)
158 {
159     type_register_static(&xen_gnttab_info);
160 }
161 
162 type_init(xen_gnttab_register_types)
163 
164 int xen_gnttab_map_page(uint64_t idx, uint64_t gfn)
165 {
166     XenGnttabState *s = xen_gnttab_singleton;
167     uint64_t gpa = gfn << XEN_PAGE_SHIFT;
168 
169     if (!s) {
170         return -ENOTSUP;
171     }
172 
173     if (idx >= s->max_frames) {
174         return -EINVAL;
175     }
176 
177     QEMU_IOTHREAD_LOCK_GUARD();
178     QEMU_LOCK_GUARD(&s->gnt_lock);
179 
180     xen_overlay_do_map_page(&s->gnt_aliases[idx], gpa);
181 
182     s->gnt_frame_gpas[idx] = gpa;
183 
184     if (s->nr_frames <= idx) {
185         s->nr_frames = idx + 1;
186     }
187 
188     return 0;
189 }
190 
191 int xen_gnttab_set_version_op(struct gnttab_set_version *set)
192 {
193     int ret;
194 
195     switch (set->version) {
196     case 1:
197         ret = 0;
198         break;
199 
200     case 2:
201         /* Behave as before set_version was introduced. */
202         ret = -ENOSYS;
203         break;
204 
205     default:
206         ret = -EINVAL;
207     }
208 
209     set->version = 1;
210     return ret;
211 }
212 
213 int xen_gnttab_get_version_op(struct gnttab_get_version *get)
214 {
215     if (get->dom != DOMID_SELF && get->dom != xen_domid) {
216         return -ESRCH;
217     }
218 
219     get->version = 1;
220     return 0;
221 }
222 
223 int xen_gnttab_query_size_op(struct gnttab_query_size *size)
224 {
225     XenGnttabState *s = xen_gnttab_singleton;
226 
227     if (!s) {
228         return -ENOTSUP;
229     }
230 
231     if (size->dom != DOMID_SELF && size->dom != xen_domid) {
232         size->status = GNTST_bad_domain;
233         return 0;
234     }
235 
236     size->status = GNTST_okay;
237     size->nr_frames = s->nr_frames;
238     size->max_nr_frames = s->max_frames;
239     return 0;
240 }
241 
242 /* Track per-open refs, to allow close() to clean up. */
243 struct active_ref {
244     MemoryRegionSection mrs;
245     void *virtaddr;
246     uint32_t refcnt;
247     int prot;
248 };
249 
250 static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
251                       MemoryRegionSection *mrs, int prot)
252 {
253     if (mrs && mrs->mr) {
254         if (prot & PROT_WRITE) {
255             memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
256                                     XEN_PAGE_SIZE);
257         }
258         memory_region_unref(mrs->mr);
259         mrs->mr = NULL;
260     }
261     assert(s->map_track[ref] != 0);
262 
263     if (--s->map_track[ref] == 0) {
264         grant_entry_v1_t *gnt_p = &s->entries.v1[ref];
265         qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing));
266     }
267 }
268 
269 static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
270 {
271     uint16_t mask = GTF_type_mask | GTF_sub_page;
272     grant_entry_v1_t gnt, *gnt_p;
273     int retries = 0;
274 
275     if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
276         s->map_track[ref] == UINT8_MAX) {
277         return INVALID_GPA;
278     }
279 
280     if (prot & PROT_WRITE) {
281         mask |= GTF_readonly;
282     }
283 
284     gnt_p = &s->entries.v1[ref];
285 
286     /*
287      * The guest can legitimately be changing the GTF_readonly flag. Allow
288      * that, but don't let a malicious guest cause a livelock.
289      */
290     for (retries = 0; retries < 5; retries++) {
291         uint16_t new_flags;
292 
293         /* Read the entry before an atomic operation on its flags */
294         gnt = *(volatile grant_entry_v1_t *)gnt_p;
295 
296         if ((gnt.flags & mask) != GTF_permit_access ||
297             gnt.domid != DOMID_QEMU) {
298             return INVALID_GPA;
299         }
300 
301         new_flags = gnt.flags | GTF_reading;
302         if (prot & PROT_WRITE) {
303             new_flags |= GTF_writing;
304         }
305 
306         if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) {
307             return (uint64_t)gnt.frame << XEN_PAGE_SHIFT;
308         }
309     }
310 
311     return INVALID_GPA;
312 }
313 
314 struct xengntdev_handle {
315     GHashTable *active_maps;
316 };
317 
318 static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt,
319                                         uint32_t nr_grants)
320 {
321     return 0;
322 }
323 
324 static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt,
325                                     uint32_t count, uint32_t domid,
326                                     uint32_t *refs, int prot)
327 {
328     XenGnttabState *s = xen_gnttab_singleton;
329     struct active_ref *act;
330 
331     if (!s) {
332         errno = ENOTSUP;
333         return NULL;
334     }
335 
336     if (domid != xen_domid) {
337         errno = EINVAL;
338         return NULL;
339     }
340 
341     if (!count || count > 4096) {
342         errno = EINVAL;
343         return NULL;
344     }
345 
346     /*
347      * Making a contiguous mapping from potentially discontiguous grant
348      * references would be... distinctly non-trivial. We don't support it.
349      * Even changing the API to return an array of pointers, one per page,
350      * wouldn't be simple to use in PV backends because some structures
351      * actually cross page boundaries (e.g. 32-bit blkif_response ring
352      * entries are 12 bytes).
353      */
354     if (count != 1) {
355         errno = EINVAL;
356         return NULL;
357     }
358 
359     QEMU_LOCK_GUARD(&s->gnt_lock);
360 
361     act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
362     if (act) {
363         if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) {
364             if (gnt_ref(s, refs[0], prot) == INVALID_GPA) {
365                 return NULL;
366             }
367             act->prot |= PROT_WRITE;
368         }
369         act->refcnt++;
370     } else {
371         uint64_t gpa = gnt_ref(s, refs[0], prot);
372         if (gpa == INVALID_GPA) {
373             errno = EINVAL;
374             return NULL;
375         }
376 
377         act = g_new0(struct active_ref, 1);
378         act->prot = prot;
379         act->refcnt = 1;
380         act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE);
381 
382         if (act->mrs.mr &&
383             !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) &&
384             memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) {
385             act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block,
386                                              act->mrs.offset_within_region);
387         }
388         if (!act->virtaddr) {
389             gnt_unref(s, refs[0], &act->mrs, 0);
390             g_free(act);
391             errno = EINVAL;
392             return NULL;
393         }
394 
395         s->map_track[refs[0]]++;
396         g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act);
397     }
398 
399     return act->virtaddr;
400 }
401 
402 static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data)
403 {
404     XenGnttabState *s = user_data;
405     grant_ref_t gref = GPOINTER_TO_INT(key);
406     struct active_ref *act = value;
407 
408     gnt_unref(s, gref, &act->mrs, act->prot);
409     g_free(act);
410     return true;
411 }
412 
413 static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt,
414                                void *start_address, uint32_t *refs,
415                                uint32_t count)
416 {
417     XenGnttabState *s = xen_gnttab_singleton;
418     struct active_ref *act;
419 
420     if (!s) {
421         return -ENOTSUP;
422     }
423 
424     if (count != 1) {
425         return -EINVAL;
426     }
427 
428     QEMU_LOCK_GUARD(&s->gnt_lock);
429 
430     act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
431     if (!act) {
432         return -ENOENT;
433     }
434 
435     if (act->virtaddr != start_address) {
436         return -EINVAL;
437     }
438 
439     if (!--act->refcnt) {
440         do_unmap(GINT_TO_POINTER(refs[0]), act, s);
441         g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0]));
442     }
443 
444     return 0;
445 }
446 
447 /*
448  * This looks a bit like the one for true Xen in xen-operations.c but
449  * in emulation we don't support multi-page mappings. And under Xen we
450  * *want* the multi-page mappings so we have fewer bounces through the
451  * kernel and the hypervisor. So the code paths end up being similar,
452  * but different.
453  */
454 static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain,
455                               uint32_t domid, XenGrantCopySegment *segs,
456                               uint32_t nr_segs, Error **errp)
457 {
458     int prot = to_domain ? PROT_WRITE : PROT_READ;
459     unsigned int i;
460 
461     for (i = 0; i < nr_segs; i++) {
462         XenGrantCopySegment *seg = &segs[i];
463         void *page;
464         uint32_t ref = to_domain ? seg->dest.foreign.ref :
465             seg->source.foreign.ref;
466 
467         page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot);
468         if (!page) {
469             if (errp) {
470                 error_setg_errno(errp, errno,
471                                  "xen_be_gnttab_map_refs failed");
472             }
473             return -errno;
474         }
475 
476         if (to_domain) {
477             memcpy(page + seg->dest.foreign.offset, seg->source.virt,
478                    seg->len);
479         } else {
480             memcpy(seg->dest.virt, page + seg->source.foreign.offset,
481                    seg->len);
482         }
483 
484         if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) {
485             if (errp) {
486                 error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed");
487             }
488             return -errno;
489         }
490     }
491 
492     return 0;
493 }
494 
495 static struct xengntdev_handle *xen_be_gnttab_open(void)
496 {
497     struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1);
498 
499     xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal);
500     return xgt;
501 }
502 
503 static int xen_be_gnttab_close(struct xengntdev_handle *xgt)
504 {
505     XenGnttabState *s = xen_gnttab_singleton;
506 
507     if (!s) {
508         return -ENOTSUP;
509     }
510 
511     g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s);
512     g_hash_table_destroy(xgt->active_maps);
513     g_free(xgt);
514     return 0;
515 }
516 
517 static struct gnttab_backend_ops emu_gnttab_backend_ops = {
518     .open = xen_be_gnttab_open,
519     .close = xen_be_gnttab_close,
520     .grant_copy = xen_be_gnttab_copy,
521     .set_max_grants = xen_be_gnttab_set_max_grants,
522     .map_refs = xen_be_gnttab_map_refs,
523     .unmap = xen_be_gnttab_unmap,
524 };
525 
526