xref: /qemu/hw/xen/xen-mapcache.c (revision 88fb705600a3b612c571efc9f1a6aed923a18dcc)
1 /*
2  * Copyright (C) 2011       Citrix Ltd.
3  *
4  * This work is licensed under the terms of the GNU GPL, version 2.  See
5  * the COPYING file in the top-level directory.
6  *
7  * Contributions after 2012-01-13 are licensed under the terms of the
8  * GNU GPL, version 2 or (at your option) any later version.
9  */
10 
11 #include "qemu/osdep.h"
12 #include "qemu/units.h"
13 #include "qemu/error-report.h"
14 
15 #include <sys/resource.h>
16 
17 #include "hw/xen/xen-hvm-common.h"
18 #include "hw/xen/xen_native.h"
19 #include "qemu/bitmap.h"
20 
21 #include "system/runstate.h"
22 #include "system/xen-mapcache.h"
23 #include "trace.h"
24 
25 #include <xenevtchn.h>
26 #include <xengnttab.h>
27 
28 #if HOST_LONG_BITS == 32
29 #  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
30 #else
31 #  define MCACHE_MAX_SIZE     (1UL<<35) /* 32GB Cap */
32 #endif
33 
34 /* This is the size of the virtual address space reserve to QEMU that will not
35  * be use by MapCache.
36  * From empirical tests I observed that qemu use 75MB more than the
37  * max_mcache_size.
38  */
39 #define NON_MCACHE_MEMORY_SIZE (80 * MiB)
40 
41 typedef struct MapCacheEntry {
42     hwaddr paddr_index;
43     uint8_t *vaddr_base;
44     unsigned long *valid_mapping;
45     uint32_t lock;
46 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
47 #define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)
48     uint8_t flags;
49     hwaddr size;
50     struct MapCacheEntry *next;
51 } MapCacheEntry;
52 
53 typedef struct MapCacheRev {
54     uint8_t *vaddr_req;
55     hwaddr paddr_index;
56     hwaddr size;
57     QTAILQ_ENTRY(MapCacheRev) next;
58     bool dma;
59 } MapCacheRev;
60 
61 typedef struct MapCache {
62     MapCacheEntry *entry;
63     unsigned long nr_buckets;
64     QTAILQ_HEAD(, MapCacheRev) locked_entries;
65 
66     /* For most cases (>99.9%), the page address is the same. */
67     MapCacheEntry *last_entry;
68     unsigned long max_mcache_size;
69     unsigned int bucket_shift;
70     unsigned long bucket_size;
71 
72     phys_offset_to_gaddr_t phys_offset_to_gaddr;
73     QemuMutex lock;
74     void *opaque;
75 } MapCache;
76 
77 static MapCache *mapcache;
78 static MapCache *mapcache_grants_ro;
79 static MapCache *mapcache_grants_rw;
80 static xengnttab_handle *xen_region_gnttabdev;
81 
mapcache_lock(MapCache * mc)82 static inline void mapcache_lock(MapCache *mc)
83 {
84     qemu_mutex_lock(&mc->lock);
85 }
86 
mapcache_unlock(MapCache * mc)87 static inline void mapcache_unlock(MapCache *mc)
88 {
89     qemu_mutex_unlock(&mc->lock);
90 }
91 
test_bits(int nr,int size,const unsigned long * addr)92 static inline int test_bits(int nr, int size, const unsigned long *addr)
93 {
94     unsigned long res = find_next_zero_bit(addr, size + nr, nr);
95     if (res >= nr + size)
96         return 1;
97     else
98         return 0;
99 }
100 
xen_map_cache_init_single(phys_offset_to_gaddr_t f,void * opaque,unsigned int bucket_shift,unsigned long max_size)101 static MapCache *xen_map_cache_init_single(phys_offset_to_gaddr_t f,
102                                            void *opaque,
103                                            unsigned int bucket_shift,
104                                            unsigned long max_size)
105 {
106     unsigned long size;
107     MapCache *mc;
108 
109     assert(bucket_shift >= XC_PAGE_SHIFT);
110 
111     mc = g_new0(MapCache, 1);
112 
113     mc->phys_offset_to_gaddr = f;
114     mc->opaque = opaque;
115     qemu_mutex_init(&mc->lock);
116 
117     QTAILQ_INIT(&mc->locked_entries);
118 
119     mc->bucket_shift = bucket_shift;
120     mc->bucket_size = 1UL << bucket_shift;
121     mc->max_mcache_size = max_size;
122 
123     mc->nr_buckets =
124         (((mc->max_mcache_size >> XC_PAGE_SHIFT) +
125           (1UL << (bucket_shift - XC_PAGE_SHIFT)) - 1) >>
126          (bucket_shift - XC_PAGE_SHIFT));
127 
128     size = mc->nr_buckets * sizeof(MapCacheEntry);
129     size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
130     trace_xen_map_cache_init(mc->nr_buckets, size);
131     mc->entry = g_malloc0(size);
132     return mc;
133 }
134 
xen_map_cache_init(phys_offset_to_gaddr_t f,void * opaque)135 void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
136 {
137     struct rlimit rlimit_as;
138     unsigned long max_mcache_size;
139     unsigned int bucket_shift;
140 
141     xen_region_gnttabdev = xengnttab_open(NULL, 0);
142     if (xen_region_gnttabdev == NULL) {
143         error_report("mapcache: Failed to open gnttab device");
144         exit(EXIT_FAILURE);
145     }
146 
147     if (HOST_LONG_BITS == 32) {
148         bucket_shift = 16;
149     } else {
150         bucket_shift = 20;
151     }
152 
153     if (geteuid() == 0) {
154         rlimit_as.rlim_cur = RLIM_INFINITY;
155         rlimit_as.rlim_max = RLIM_INFINITY;
156         max_mcache_size = MCACHE_MAX_SIZE;
157     } else {
158         getrlimit(RLIMIT_AS, &rlimit_as);
159         rlimit_as.rlim_cur = rlimit_as.rlim_max;
160 
161         if (rlimit_as.rlim_max != RLIM_INFINITY) {
162             warn_report("QEMU's maximum size of virtual"
163                         " memory is not infinity");
164         }
165         if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
166             max_mcache_size = rlimit_as.rlim_max - NON_MCACHE_MEMORY_SIZE;
167         } else {
168             max_mcache_size = MCACHE_MAX_SIZE;
169         }
170     }
171 
172     mapcache = xen_map_cache_init_single(f, opaque,
173                                          bucket_shift,
174                                          max_mcache_size);
175 
176     /*
177      * Grant mappings must use XC_PAGE_SIZE granularity since we can't
178      * map anything beyond the number of pages granted to us.
179      */
180     mapcache_grants_ro = xen_map_cache_init_single(f, opaque,
181                                                    XC_PAGE_SHIFT,
182                                                    max_mcache_size);
183     mapcache_grants_rw = xen_map_cache_init_single(f, opaque,
184                                                    XC_PAGE_SHIFT,
185                                                    max_mcache_size);
186 
187     setrlimit(RLIMIT_AS, &rlimit_as);
188 }
189 
xen_remap_bucket(MapCache * mc,MapCacheEntry * entry,void * vaddr,hwaddr size,hwaddr address_index,bool dummy,bool grant,bool is_write,ram_addr_t ram_offset)190 static void xen_remap_bucket(MapCache *mc,
191                              MapCacheEntry *entry,
192                              void *vaddr,
193                              hwaddr size,
194                              hwaddr address_index,
195                              bool dummy,
196                              bool grant,
197                              bool is_write,
198                              ram_addr_t ram_offset)
199 {
200     uint8_t *vaddr_base;
201     g_autofree uint32_t *refs = NULL;
202     g_autofree xen_pfn_t *pfns = NULL;
203     g_autofree int *err;
204     unsigned int i;
205     hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
206 
207     trace_xen_remap_bucket(address_index);
208 
209     if (grant) {
210         refs = g_new0(uint32_t, nb_pfn);
211     } else {
212         pfns = g_new0(xen_pfn_t, nb_pfn);
213     }
214     err = g_new0(int, nb_pfn);
215 
216     if (entry->vaddr_base != NULL) {
217         if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
218             ram_block_notify_remove(entry->vaddr_base, entry->size,
219                                     entry->size);
220         }
221 
222         /*
223          * If an entry is being replaced by another mapping and we're using
224          * MAP_FIXED flag for it - there is possibility of a race for vaddr
225          * address with another thread doing an mmap call itself
226          * (see man 2 mmap). To avoid that we skip explicit unmapping here
227          * and allow the kernel to destroy the previous mappings by replacing
228          * them in mmap call later.
229          *
230          * Non-identical replacements are not allowed therefore.
231          */
232         assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size));
233 
234         if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) {
235             perror("unmap fails");
236             exit(-1);
237         }
238     }
239     g_free(entry->valid_mapping);
240     entry->valid_mapping = NULL;
241 
242     if (grant) {
243         hwaddr grant_base = address_index - (ram_offset >> XC_PAGE_SHIFT);
244 
245         for (i = 0; i < nb_pfn; i++) {
246             refs[i] = grant_base + i;
247         }
248     } else {
249         for (i = 0; i < nb_pfn; i++) {
250             pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
251         }
252     }
253 
254     entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
255 
256     if (!dummy) {
257         if (grant) {
258             int prot = PROT_READ;
259 
260             if (is_write) {
261                 prot |= PROT_WRITE;
262             }
263 
264             entry->flags |= XEN_MAPCACHE_ENTRY_GRANT;
265             assert(vaddr == NULL);
266             vaddr_base = xengnttab_map_domain_grant_refs(xen_region_gnttabdev,
267                                                          nb_pfn,
268                                                          xen_domid, refs,
269                                                          prot);
270         } else {
271             /*
272              * If the caller has requested the mapping at a specific address use
273              * MAP_FIXED to make sure it's honored.
274              *
275              * We don't yet support upgrading mappings from RO to RW, to handle
276              * models using ordinary address_space_rw(), foreign mappings ignore
277              * is_write and are always mapped RW.
278              */
279             vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
280                                                PROT_READ | PROT_WRITE,
281                                                vaddr ? MAP_FIXED : 0,
282                                                nb_pfn, pfns, err);
283         }
284         if (vaddr_base == NULL) {
285             perror(grant ? "xengnttab_map_domain_grant_refs"
286                            : "xenforeignmemory_map2");
287             exit(-1);
288         }
289     } else {
290         /*
291          * We create dummy mappings where we are unable to create a foreign
292          * mapping immediately due to certain circumstances (i.e. on resume now)
293          */
294         vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE,
295                           MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0),
296                           -1, 0);
297         if (vaddr_base == MAP_FAILED) {
298             perror("mmap");
299             exit(-1);
300         }
301     }
302 
303     if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
304         ram_block_notify_add(vaddr_base, size, size);
305     }
306 
307     entry->vaddr_base = vaddr_base;
308     entry->paddr_index = address_index;
309     entry->size = size;
310     entry->valid_mapping = g_new0(unsigned long,
311                                   BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
312 
313     if (dummy) {
314         entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY;
315     } else {
316         entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY);
317     }
318 
319     bitmap_zero(entry->valid_mapping, nb_pfn);
320     for (i = 0; i < nb_pfn; i++) {
321         if (!err[i]) {
322             bitmap_set(entry->valid_mapping, i, 1);
323         }
324     }
325 }
326 
xen_map_cache_unlocked(MapCache * mc,hwaddr phys_addr,hwaddr size,ram_addr_t ram_offset,uint8_t lock,bool dma,bool grant,bool is_write)327 static uint8_t *xen_map_cache_unlocked(MapCache *mc,
328                                        hwaddr phys_addr, hwaddr size,
329                                        ram_addr_t ram_offset,
330                                        uint8_t lock, bool dma,
331                                        bool grant, bool is_write)
332 {
333     MapCacheEntry *entry, *pentry = NULL,
334                   *free_entry = NULL, *free_pentry = NULL;
335     hwaddr address_index;
336     hwaddr address_offset;
337     hwaddr cache_size = size;
338     hwaddr test_bit_size;
339     bool translated G_GNUC_UNUSED = false;
340     bool dummy = false;
341 
342 tryagain:
343     address_index  = phys_addr >> mc->bucket_shift;
344     address_offset = phys_addr & (mc->bucket_size - 1);
345 
346     trace_xen_map_cache(phys_addr);
347 
348     /* test_bit_size is always a multiple of XC_PAGE_SIZE */
349     if (size) {
350         test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1));
351 
352         if (test_bit_size % XC_PAGE_SIZE) {
353             test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
354         }
355     } else {
356         test_bit_size = XC_PAGE_SIZE;
357     }
358 
359     if (mc->last_entry != NULL &&
360         mc->last_entry->paddr_index == address_index &&
361         !lock && !size &&
362         test_bits(address_offset >> XC_PAGE_SHIFT,
363                   test_bit_size >> XC_PAGE_SHIFT,
364                   mc->last_entry->valid_mapping)) {
365         trace_xen_map_cache_return(
366             mc->last_entry->vaddr_base + address_offset
367         );
368         return mc->last_entry->vaddr_base + address_offset;
369     }
370 
371     /* size is always a multiple of mc->bucket_size */
372     if (size) {
373         cache_size = size + address_offset;
374         if (cache_size % mc->bucket_size) {
375             cache_size += mc->bucket_size - (cache_size % mc->bucket_size);
376         }
377     } else {
378         cache_size = mc->bucket_size;
379     }
380 
381     entry = &mc->entry[address_index % mc->nr_buckets];
382 
383     while (entry && (!entry->vaddr_base ||
384             entry->paddr_index != address_index || entry->size != cache_size ||
385              !test_bits(address_offset >> XC_PAGE_SHIFT,
386                  test_bit_size >> XC_PAGE_SHIFT,
387                  entry->valid_mapping))) {
388         if (!free_entry && (!entry->lock || !entry->vaddr_base)) {
389             free_entry = entry;
390             free_pentry = pentry;
391         }
392         pentry = entry;
393         entry = entry->next;
394     }
395     if (!entry && free_entry) {
396         entry = free_entry;
397         pentry = free_pentry;
398     }
399     if (!entry) {
400         entry = g_new0(MapCacheEntry, 1);
401         pentry->next = entry;
402         xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
403                          grant, is_write, ram_offset);
404     } else if (!entry->lock) {
405         if (!entry->vaddr_base || entry->paddr_index != address_index ||
406                 entry->size != cache_size ||
407                 !test_bits(address_offset >> XC_PAGE_SHIFT,
408                     test_bit_size >> XC_PAGE_SHIFT,
409                     entry->valid_mapping)) {
410             xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
411                              grant, is_write, ram_offset);
412         }
413     }
414 
415     if(!test_bits(address_offset >> XC_PAGE_SHIFT,
416                 test_bit_size >> XC_PAGE_SHIFT,
417                 entry->valid_mapping)) {
418         mc->last_entry = NULL;
419 #ifdef XEN_COMPAT_PHYSMAP
420         if (!translated && mc->phys_offset_to_gaddr) {
421             phys_addr = mc->phys_offset_to_gaddr(phys_addr, size);
422             translated = true;
423             goto tryagain;
424         }
425 #endif
426         if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
427             dummy = true;
428             goto tryagain;
429         }
430         trace_xen_map_cache_return(NULL);
431         return NULL;
432     }
433 
434     mc->last_entry = entry;
435     if (lock) {
436         MapCacheRev *reventry = g_new0(MapCacheRev, 1);
437         entry->lock++;
438         if (entry->lock == 0) {
439             error_report("mapcache entry lock overflow: "HWADDR_FMT_plx" -> %p",
440                          entry->paddr_index, entry->vaddr_base);
441             abort();
442         }
443         reventry->dma = dma;
444         reventry->vaddr_req = mc->last_entry->vaddr_base + address_offset;
445         reventry->paddr_index = mc->last_entry->paddr_index;
446         reventry->size = entry->size;
447         QTAILQ_INSERT_HEAD(&mc->locked_entries, reventry, next);
448     }
449 
450     trace_xen_map_cache_return(
451         mc->last_entry->vaddr_base + address_offset
452     );
453     return mc->last_entry->vaddr_base + address_offset;
454 }
455 
xen_map_cache(MemoryRegion * mr,hwaddr phys_addr,hwaddr size,ram_addr_t ram_addr_offset,uint8_t lock,bool dma,bool is_write)456 uint8_t *xen_map_cache(MemoryRegion *mr,
457                        hwaddr phys_addr, hwaddr size,
458                        ram_addr_t ram_addr_offset,
459                        uint8_t lock, bool dma,
460                        bool is_write)
461 {
462     bool grant = xen_mr_is_grants(mr);
463     MapCache *mc = mapcache;
464     uint8_t *p;
465 
466     if (grant) {
467         mc = is_write ? mapcache_grants_rw : mapcache_grants_ro;
468     }
469 
470     if (grant && !lock) {
471         /*
472          * Grants are only supported via address_space_map(). Anything
473          * else is considered a user/guest error.
474          *
475          * QEMU generally doesn't expect these mappings to ever fail, so
476          * if this happens we report an error message and abort().
477          */
478         error_report("Tried to access a grant reference without mapping it.");
479         abort();
480     }
481 
482     mapcache_lock(mc);
483     p = xen_map_cache_unlocked(mc, phys_addr, size, ram_addr_offset,
484                                lock, dma, grant, is_write);
485     mapcache_unlock(mc);
486     return p;
487 }
488 
xen_ram_addr_from_mapcache_single(MapCache * mc,void * ptr)489 static ram_addr_t xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
490 {
491     MapCacheEntry *entry = NULL;
492     MapCacheRev *reventry;
493     hwaddr paddr_index;
494     hwaddr size;
495     ram_addr_t raddr;
496     int found = 0;
497 
498     mapcache_lock(mc);
499     QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
500         if (reventry->vaddr_req == ptr) {
501             paddr_index = reventry->paddr_index;
502             size = reventry->size;
503             found = 1;
504             break;
505         }
506     }
507     if (!found) {
508         trace_xen_ram_addr_from_mapcache_not_found(ptr);
509         mapcache_unlock(mc);
510         return RAM_ADDR_INVALID;
511     }
512 
513     entry = &mc->entry[paddr_index % mc->nr_buckets];
514     while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
515         entry = entry->next;
516     }
517     if (!entry) {
518         trace_xen_ram_addr_from_mapcache_not_in_cache(ptr);
519         raddr = RAM_ADDR_INVALID;
520     } else {
521         raddr = (reventry->paddr_index << mc->bucket_shift) +
522              ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
523     }
524     mapcache_unlock(mc);
525     return raddr;
526 }
527 
xen_ram_addr_from_mapcache(void * ptr)528 ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
529 {
530     ram_addr_t addr;
531 
532     addr = xen_ram_addr_from_mapcache_single(mapcache, ptr);
533     if (addr == RAM_ADDR_INVALID) {
534         addr = xen_ram_addr_from_mapcache_single(mapcache_grants_ro, ptr);
535     }
536     if (addr == RAM_ADDR_INVALID) {
537         addr = xen_ram_addr_from_mapcache_single(mapcache_grants_rw, ptr);
538     }
539 
540     return addr;
541 }
542 
xen_invalidate_map_cache_entry_unlocked(MapCache * mc,uint8_t * buffer)543 static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
544                                                     uint8_t *buffer)
545 {
546     MapCacheEntry *entry = NULL, *pentry = NULL;
547     MapCacheRev *reventry;
548     hwaddr paddr_index;
549     hwaddr size;
550     int found = 0;
551     int rc;
552 
553     QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
554         if (reventry->vaddr_req == buffer) {
555             paddr_index = reventry->paddr_index;
556             size = reventry->size;
557             found = 1;
558             break;
559         }
560     }
561     if (!found) {
562         trace_xen_invalidate_map_cache_entry_unlocked_not_found(buffer);
563         QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
564             trace_xen_invalidate_map_cache_entry_unlocked_found(
565                 reventry->paddr_index,
566                 reventry->vaddr_req
567             );
568         }
569         return;
570     }
571     QTAILQ_REMOVE(&mc->locked_entries, reventry, next);
572     g_free(reventry);
573 
574     if (mc->last_entry != NULL &&
575         mc->last_entry->paddr_index == paddr_index) {
576         mc->last_entry = NULL;
577     }
578 
579     entry = &mc->entry[paddr_index % mc->nr_buckets];
580     while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
581         pentry = entry;
582         entry = entry->next;
583     }
584     if (!entry) {
585         trace_xen_invalidate_map_cache_entry_unlocked_miss(buffer);
586         return;
587     }
588     entry->lock--;
589     if (entry->lock > 0) {
590         return;
591     }
592 
593     ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
594     if (entry->flags & XEN_MAPCACHE_ENTRY_GRANT) {
595         rc = xengnttab_unmap(xen_region_gnttabdev, entry->vaddr_base,
596                              entry->size >> mc->bucket_shift);
597     } else {
598         rc = munmap(entry->vaddr_base, entry->size);
599     }
600 
601     if (rc) {
602         perror("unmap fails");
603         exit(-1);
604     }
605 
606     g_free(entry->valid_mapping);
607     if (pentry) {
608         pentry->next = entry->next;
609         g_free(entry);
610     } else {
611         /*
612          * Invalidate mapping but keep entry->next pointing to the rest
613          * of the list.
614          *
615          * Note that lock is already zero here, otherwise we don't unmap.
616          */
617         entry->paddr_index = 0;
618         entry->vaddr_base = NULL;
619         entry->valid_mapping = NULL;
620         entry->flags = 0;
621         entry->size = 0;
622     }
623 }
624 
625 typedef struct XenMapCacheData {
626     Coroutine *co;
627     uint8_t *buffer;
628 } XenMapCacheData;
629 
xen_invalidate_map_cache_entry_single(MapCache * mc,uint8_t * buffer)630 static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t *buffer)
631 {
632     mapcache_lock(mc);
633     xen_invalidate_map_cache_entry_unlocked(mc, buffer);
634     mapcache_unlock(mc);
635 }
636 
xen_invalidate_map_cache_entry_all(uint8_t * buffer)637 static void xen_invalidate_map_cache_entry_all(uint8_t *buffer)
638 {
639     xen_invalidate_map_cache_entry_single(mapcache, buffer);
640     xen_invalidate_map_cache_entry_single(mapcache_grants_ro, buffer);
641     xen_invalidate_map_cache_entry_single(mapcache_grants_rw, buffer);
642 }
643 
xen_invalidate_map_cache_entry_bh(void * opaque)644 static void xen_invalidate_map_cache_entry_bh(void *opaque)
645 {
646     XenMapCacheData *data = opaque;
647 
648     xen_invalidate_map_cache_entry_all(data->buffer);
649     aio_co_wake(data->co);
650 }
651 
xen_invalidate_map_cache_entry(uint8_t * buffer)652 void coroutine_mixed_fn xen_invalidate_map_cache_entry(uint8_t *buffer)
653 {
654     if (qemu_in_coroutine()) {
655         XenMapCacheData data = {
656             .co = qemu_coroutine_self(),
657             .buffer = buffer,
658         };
659         aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
660                                 xen_invalidate_map_cache_entry_bh, &data);
661         qemu_coroutine_yield();
662     } else {
663         xen_invalidate_map_cache_entry_all(buffer);
664     }
665 }
666 
xen_invalidate_map_cache_single(MapCache * mc)667 static void xen_invalidate_map_cache_single(MapCache *mc)
668 {
669     unsigned long i;
670     MapCacheRev *reventry;
671 
672     mapcache_lock(mc);
673 
674     QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
675         if (!reventry->dma) {
676             continue;
677         }
678         trace_xen_invalidate_map_cache(reventry->paddr_index,
679                                        reventry->vaddr_req);
680     }
681 
682     for (i = 0; i < mc->nr_buckets; i++) {
683         MapCacheEntry *entry = &mc->entry[i];
684 
685         if (entry->vaddr_base == NULL) {
686             continue;
687         }
688         if (entry->lock > 0) {
689             continue;
690         }
691 
692         if (munmap(entry->vaddr_base, entry->size) != 0) {
693             perror("unmap fails");
694             exit(-1);
695         }
696 
697         entry->paddr_index = 0;
698         entry->vaddr_base = NULL;
699         entry->size = 0;
700         g_free(entry->valid_mapping);
701         entry->valid_mapping = NULL;
702     }
703 
704     mc->last_entry = NULL;
705 
706     mapcache_unlock(mc);
707 }
708 
xen_invalidate_map_cache(void)709 void xen_invalidate_map_cache(void)
710 {
711     /* Flush pending AIO before destroying the mapcache */
712     bdrv_drain_all();
713 
714     xen_invalidate_map_cache_single(mapcache);
715 }
716 
xen_replace_cache_entry_unlocked(MapCache * mc,hwaddr old_phys_addr,hwaddr new_phys_addr,hwaddr size)717 static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
718                                                  hwaddr old_phys_addr,
719                                                  hwaddr new_phys_addr,
720                                                  hwaddr size)
721 {
722     MapCacheEntry *entry;
723     hwaddr address_index, address_offset;
724     hwaddr test_bit_size, cache_size = size;
725 
726     address_index  = old_phys_addr >> mc->bucket_shift;
727     address_offset = old_phys_addr & (mc->bucket_size - 1);
728 
729     assert(size);
730     /* test_bit_size is always a multiple of XC_PAGE_SIZE */
731     test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1));
732     if (test_bit_size % XC_PAGE_SIZE) {
733         test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
734     }
735     cache_size = size + address_offset;
736     if (cache_size % mc->bucket_size) {
737         cache_size += mc->bucket_size - (cache_size % mc->bucket_size);
738     }
739 
740     entry = &mc->entry[address_index % mc->nr_buckets];
741     while (entry && !(entry->paddr_index == address_index &&
742                       entry->size == cache_size)) {
743         entry = entry->next;
744     }
745     if (!entry) {
746         trace_xen_replace_cache_entry_unlocked(old_phys_addr);
747         return NULL;
748     }
749 
750     assert((entry->flags & XEN_MAPCACHE_ENTRY_GRANT) == 0);
751 
752     address_index  = new_phys_addr >> mc->bucket_shift;
753     address_offset = new_phys_addr & (mc->bucket_size - 1);
754 
755     trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
756 
757     xen_remap_bucket(mc, entry, entry->vaddr_base,
758                      cache_size, address_index, false,
759                      false, false, old_phys_addr);
760     if (!test_bits(address_offset >> XC_PAGE_SHIFT,
761                 test_bit_size >> XC_PAGE_SHIFT,
762                 entry->valid_mapping)) {
763         trace_xen_replace_cache_entry_unlocked_could_not_update_entry(
764             old_phys_addr
765         );
766         return NULL;
767     }
768 
769     return entry->vaddr_base + address_offset;
770 }
771 
xen_replace_cache_entry(hwaddr old_phys_addr,hwaddr new_phys_addr,hwaddr size)772 uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
773                                  hwaddr new_phys_addr,
774                                  hwaddr size)
775 {
776     uint8_t *p;
777 
778     mapcache_lock(mapcache);
779     p = xen_replace_cache_entry_unlocked(mapcache, old_phys_addr,
780                                          new_phys_addr, size);
781     mapcache_unlock(mapcache);
782     return p;
783 }
784