xref: /qemu/system/physmem.c (revision f051a9c4dc70cd1b6eafa61aec8f3b9344e02e85)
154936004Sbellard /*
2d9f24bf5SPaolo Bonzini  * RAM allocation and memory access
354936004Sbellard  *
454936004Sbellard  *  Copyright (c) 2003 Fabrice Bellard
554936004Sbellard  *
654936004Sbellard  * This library is free software; you can redistribute it and/or
754936004Sbellard  * modify it under the terms of the GNU Lesser General Public
854936004Sbellard  * License as published by the Free Software Foundation; either
961f3c91aSChetan Pant  * version 2.1 of the License, or (at your option) any later version.
1054936004Sbellard  *
1154936004Sbellard  * This library is distributed in the hope that it will be useful,
1254936004Sbellard  * but WITHOUT ANY WARRANTY; without even the implied warranty of
1354936004Sbellard  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1454936004Sbellard  * Lesser General Public License for more details.
1554936004Sbellard  *
1654936004Sbellard  * You should have received a copy of the GNU Lesser General Public
178167ee88SBlue Swirl  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
1854936004Sbellard  */
1914a48c1dSMarkus Armbruster 
207b31bbc2SPeter Maydell #include "qemu/osdep.h"
21ec5f7ca8SMarc-André Lureau #include "exec/page-vary.h"
22da34e65cSMarkus Armbruster #include "qapi/error.h"
2354936004Sbellard 
24f348b6d1SVeronia Bahaa #include "qemu/cutils.h"
25084cfca1SRichard Henderson #include "qemu/cacheflush.h"
26e2c1c34fSMarkus Armbruster #include "qemu/hbitmap.h"
27b85ea5faSPeter Maydell #include "qemu/madvise.h"
28d5e26819SPhilippe Mathieu-Daudé #include "qemu/lockable.h"
2978271684SClaudio Fontana 
3078271684SClaudio Fontana #ifdef CONFIG_TCG
3178271684SClaudio Fontana #include "hw/core/tcg-cpu-ops.h"
3278271684SClaudio Fontana #endif /* CONFIG_TCG */
3378271684SClaudio Fontana 
3463c91552SPaolo Bonzini #include "exec/exec-all.h"
3574781c08SPhilippe Mathieu-Daudé #include "exec/page-protection.h"
3651180423SJuan Quintela #include "exec/target_page.h"
3768df8c8dSPhilippe Mathieu-Daudé #include "exec/translation-block.h"
38741da0d3SPaolo Bonzini #include "hw/qdev-core.h"
39c7e002c5SFam Zheng #include "hw/qdev-properties.h"
4047c8ca53SMarcel Apfelbaum #include "hw/boards.h"
4132cad1ffSPhilippe Mathieu-Daudé #include "system/xen.h"
4232cad1ffSPhilippe Mathieu-Daudé #include "system/kvm.h"
4332cad1ffSPhilippe Mathieu-Daudé #include "system/tcg.h"
4432cad1ffSPhilippe Mathieu-Daudé #include "system/qtest.h"
451de7afc9SPaolo Bonzini #include "qemu/timer.h"
461de7afc9SPaolo Bonzini #include "qemu/config-file.h"
4775a34036SAndreas Färber #include "qemu/error-report.h"
48b6b71cb5SMarkus Armbruster #include "qemu/qemu-print.h"
493ab6fdc9SPhilippe Mathieu-Daudé #include "qemu/log.h"
505df022cfSPeter Maydell #include "qemu/memalign.h"
519fb40bb9SSteve Sistare #include "qemu/memfd.h"
52741da0d3SPaolo Bonzini #include "exec/memory.h"
53df43d49cSPaolo Bonzini #include "exec/ioport.h"
5432cad1ffSPhilippe Mathieu-Daudé #include "system/dma.h"
5532cad1ffSPhilippe Mathieu-Daudé #include "system/hostmem.h"
5632cad1ffSPhilippe Mathieu-Daudé #include "system/hw_accel.h"
5732cad1ffSPhilippe Mathieu-Daudé #include "system/xen-mapcache.h"
58d44fe13bSAlex Bennée #include "trace.h"
59d3a5038cSDr. David Alan Gilbert 
60e2fa71f5SDr. David Alan Gilbert #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
61e2fa71f5SDr. David Alan Gilbert #include <linux/falloc.h>
62e2fa71f5SDr. David Alan Gilbert #endif
63e2fa71f5SDr. David Alan Gilbert 
640dc3f44aSMike Day #include "qemu/rcu_queue.h"
654840f10eSJan Kiszka #include "qemu/main-loop.h"
6632cad1ffSPhilippe Mathieu-Daudé #include "system/replay.h"
670cac1b66SBlue Swirl 
68022c62cbSPaolo Bonzini #include "exec/memory-internal.h"
69220c3ebdSJuan Quintela #include "exec/ram_addr.h"
7067d95c15SAvi Kivity 
7161c490e2SBeata Michalska #include "qemu/pmem.h"
7261c490e2SBeata Michalska 
732b7e9739SSteve Sistare #include "migration/cpr.h"
749dfeca7cSBharata B Rao #include "migration/vmstate.h"
759dfeca7cSBharata B Rao 
76b35ba30fSMichael S. Tsirkin #include "qemu/range.h"
77794e8f30SMichael S. Tsirkin #ifndef _WIN32
78794e8f30SMichael S. Tsirkin #include "qemu/mmap-alloc.h"
79794e8f30SMichael S. Tsirkin #endif
80b35ba30fSMichael S. Tsirkin 
81be9b23c4SPeter Xu #include "monitor/monitor.h"
82be9b23c4SPeter Xu 
83ce317be9SJingqi Liu #ifdef CONFIG_LIBDAXCTL
84ce317be9SJingqi Liu #include <daxctl/libdaxctl.h>
85ce317be9SJingqi Liu #endif
86ce317be9SJingqi Liu 
87db7b5426Sblueswir1 //#define DEBUG_SUBPAGE
881196be37Sths 
890dc3f44aSMike Day /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
900dc3f44aSMike Day  * are protected by the ramlist lock.
910dc3f44aSMike Day  */
920d53d9feSMike Day RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
9362152b8aSAvi Kivity 
9462152b8aSAvi Kivity static MemoryRegion *system_memory;
95309cb471SAvi Kivity static MemoryRegion *system_io;
9662152b8aSAvi Kivity 
97f6790af6SAvi Kivity AddressSpace address_space_io;
98f6790af6SAvi Kivity AddressSpace address_space_memory;
992673a5daSAvi Kivity 
100acc9d80bSJan Kiszka static MemoryRegion io_mem_unassigned;
1014346ae3eSAvi Kivity 
1021db8abb1SPaolo Bonzini typedef struct PhysPageEntry PhysPageEntry;
1031db8abb1SPaolo Bonzini 
1041db8abb1SPaolo Bonzini struct PhysPageEntry {
1059736e55bSMichael S. Tsirkin     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
1068b795765SMichael S. Tsirkin     uint32_t skip : 6;
1079736e55bSMichael S. Tsirkin      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
1088b795765SMichael S. Tsirkin     uint32_t ptr : 26;
1091db8abb1SPaolo Bonzini };
1101db8abb1SPaolo Bonzini 
1118b795765SMichael S. Tsirkin #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
1128b795765SMichael S. Tsirkin 
11303f49957SPaolo Bonzini /* Size of the L2 (and L3, etc) page tables.  */
11457271d63SPaolo Bonzini #define ADDR_SPACE_BITS 64
11503f49957SPaolo Bonzini 
116026736ceSMichael S. Tsirkin #define P_L2_BITS 9
11703f49957SPaolo Bonzini #define P_L2_SIZE (1 << P_L2_BITS)
11803f49957SPaolo Bonzini 
11903f49957SPaolo Bonzini #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
12003f49957SPaolo Bonzini 
12103f49957SPaolo Bonzini typedef PhysPageEntry Node[P_L2_SIZE];
1220475d94fSPaolo Bonzini 
12353cb28cbSMarcel Apfelbaum typedef struct PhysPageMap {
12479e2b9aeSPaolo Bonzini     struct rcu_head rcu;
12579e2b9aeSPaolo Bonzini 
12653cb28cbSMarcel Apfelbaum     unsigned sections_nb;
12753cb28cbSMarcel Apfelbaum     unsigned sections_nb_alloc;
12853cb28cbSMarcel Apfelbaum     unsigned nodes_nb;
12953cb28cbSMarcel Apfelbaum     unsigned nodes_nb_alloc;
13053cb28cbSMarcel Apfelbaum     Node *nodes;
13153cb28cbSMarcel Apfelbaum     MemoryRegionSection *sections;
13253cb28cbSMarcel Apfelbaum } PhysPageMap;
13353cb28cbSMarcel Apfelbaum 
1341db8abb1SPaolo Bonzini struct AddressSpaceDispatch {
135729633c2SFam Zheng     MemoryRegionSection *mru_section;
1361db8abb1SPaolo Bonzini     /* This is a multi-level map on the physical address space.
1371db8abb1SPaolo Bonzini      * The bottom level has pointers to MemoryRegionSections.
1381db8abb1SPaolo Bonzini      */
1391db8abb1SPaolo Bonzini     PhysPageEntry phys_map;
14053cb28cbSMarcel Apfelbaum     PhysPageMap map;
1411db8abb1SPaolo Bonzini };
1421db8abb1SPaolo Bonzini 
14390260c6cSJan Kiszka #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
14490260c6cSJan Kiszka typedef struct subpage_t {
14590260c6cSJan Kiszka     MemoryRegion iomem;
14616620684SAlexey Kardashevskiy     FlatView *fv;
14790260c6cSJan Kiszka     hwaddr base;
1482615fabdSVijaya Kumar K     uint16_t sub_section[];
14990260c6cSJan Kiszka } subpage_t;
15090260c6cSJan Kiszka 
151b41aac4fSLiu Ping Fan #define PHYS_SECTION_UNASSIGNED 0
1525312bd8bSAvi Kivity 
153e2eef170Spbrook static void io_mem_init(void);
15462152b8aSAvi Kivity static void memory_map_init(void);
1559458a9a1SPaolo Bonzini static void tcg_log_global_after_sync(MemoryListener *listener);
15609daed84SEdgar E. Iglesias static void tcg_commit(MemoryListener *listener);
157e2eef170Spbrook 
15832857f4dSPeter Maydell /**
15932857f4dSPeter Maydell  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
16032857f4dSPeter Maydell  * @cpu: the CPU whose AddressSpace this is
16132857f4dSPeter Maydell  * @as: the AddressSpace itself
16232857f4dSPeter Maydell  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
16332857f4dSPeter Maydell  * @tcg_as_listener: listener for tracking changes to the AddressSpace
16432857f4dSPeter Maydell  */
16515d62536SPaolo Bonzini typedef struct CPUAddressSpace {
16632857f4dSPeter Maydell     CPUState *cpu;
16732857f4dSPeter Maydell     AddressSpace *as;
16832857f4dSPeter Maydell     struct AddressSpaceDispatch *memory_dispatch;
16932857f4dSPeter Maydell     MemoryListener tcg_as_listener;
17015d62536SPaolo Bonzini } CPUAddressSpace;
17132857f4dSPeter Maydell 
1728deaf12cSGerd Hoffmann struct DirtyBitmapSnapshot {
1738deaf12cSGerd Hoffmann     ram_addr_t start;
1748deaf12cSGerd Hoffmann     ram_addr_t end;
1758deaf12cSGerd Hoffmann     unsigned long dirty[];
1768deaf12cSGerd Hoffmann };
1778deaf12cSGerd Hoffmann 
17853cb28cbSMarcel Apfelbaum static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
179f7bf5461SAvi Kivity {
180101420b8SPeter Lieven     static unsigned alloc_hint = 16;
18153cb28cbSMarcel Apfelbaum     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
182c95cfd04SWei Yang         map->nodes_nb_alloc = MAX(alloc_hint, map->nodes_nb + nodes);
18353cb28cbSMarcel Apfelbaum         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
184101420b8SPeter Lieven         alloc_hint = map->nodes_nb_alloc;
185f7bf5461SAvi Kivity     }
186f7bf5461SAvi Kivity }
187f7bf5461SAvi Kivity 
188db94604bSPaolo Bonzini static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
189d6f2ea22SAvi Kivity {
190d6f2ea22SAvi Kivity     unsigned i;
1918b795765SMichael S. Tsirkin     uint32_t ret;
192db94604bSPaolo Bonzini     PhysPageEntry e;
193db94604bSPaolo Bonzini     PhysPageEntry *p;
194d6f2ea22SAvi Kivity 
19553cb28cbSMarcel Apfelbaum     ret = map->nodes_nb++;
196db94604bSPaolo Bonzini     p = map->nodes[ret];
197d6f2ea22SAvi Kivity     assert(ret != PHYS_MAP_NODE_NIL);
19853cb28cbSMarcel Apfelbaum     assert(ret != map->nodes_nb_alloc);
199db94604bSPaolo Bonzini 
200db94604bSPaolo Bonzini     e.skip = leaf ? 0 : 1;
201db94604bSPaolo Bonzini     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
20203f49957SPaolo Bonzini     for (i = 0; i < P_L2_SIZE; ++i) {
203db94604bSPaolo Bonzini         memcpy(&p[i], &e, sizeof(e));
204d6f2ea22SAvi Kivity     }
205f7bf5461SAvi Kivity     return ret;
206d6f2ea22SAvi Kivity }
207d6f2ea22SAvi Kivity 
20853cb28cbSMarcel Apfelbaum static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
20956b15076SWei Yang                                 hwaddr *index, uint64_t *nb, uint16_t leaf,
2102999097bSAvi Kivity                                 int level)
21192e873b9Sbellard {
212f7bf5461SAvi Kivity     PhysPageEntry *p;
21303f49957SPaolo Bonzini     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
2145cd2c5b6SRichard Henderson 
2159736e55bSMichael S. Tsirkin     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
216db94604bSPaolo Bonzini         lp->ptr = phys_map_node_alloc(map, level == 0);
217db94604bSPaolo Bonzini     }
21853cb28cbSMarcel Apfelbaum     p = map->nodes[lp->ptr];
21903f49957SPaolo Bonzini     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
220f7bf5461SAvi Kivity 
22103f49957SPaolo Bonzini     while (*nb && lp < &p[P_L2_SIZE]) {
22207f07b31SAvi Kivity         if ((*index & (step - 1)) == 0 && *nb >= step) {
2239736e55bSMichael S. Tsirkin             lp->skip = 0;
224c19e8800SAvi Kivity             lp->ptr = leaf;
22507f07b31SAvi Kivity             *index += step;
22607f07b31SAvi Kivity             *nb -= step;
227f7bf5461SAvi Kivity         } else {
22853cb28cbSMarcel Apfelbaum             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
2292999097bSAvi Kivity         }
2302999097bSAvi Kivity         ++lp;
231f7bf5461SAvi Kivity     }
2324346ae3eSAvi Kivity }
2335cd2c5b6SRichard Henderson 
234ac1970fbSAvi Kivity static void phys_page_set(AddressSpaceDispatch *d,
23556b15076SWei Yang                           hwaddr index, uint64_t nb,
2362999097bSAvi Kivity                           uint16_t leaf)
237f7bf5461SAvi Kivity {
2382999097bSAvi Kivity     /* Wildly overreserve - it doesn't matter much. */
23953cb28cbSMarcel Apfelbaum     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
240f7bf5461SAvi Kivity 
24153cb28cbSMarcel Apfelbaum     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
24292e873b9Sbellard }
24392e873b9Sbellard 
244b35ba30fSMichael S. Tsirkin /* Compact a non leaf page entry. Simply detect that the entry has a single child,
245b35ba30fSMichael S. Tsirkin  * and update our entry so we can skip it and go directly to the destination.
246b35ba30fSMichael S. Tsirkin  */
247efee678dSMarc-André Lureau static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
248b35ba30fSMichael S. Tsirkin {
249b35ba30fSMichael S. Tsirkin     unsigned valid_ptr = P_L2_SIZE;
250b35ba30fSMichael S. Tsirkin     int valid = 0;
251b35ba30fSMichael S. Tsirkin     PhysPageEntry *p;
252b35ba30fSMichael S. Tsirkin     int i;
253b35ba30fSMichael S. Tsirkin 
254b35ba30fSMichael S. Tsirkin     if (lp->ptr == PHYS_MAP_NODE_NIL) {
255b35ba30fSMichael S. Tsirkin         return;
256b35ba30fSMichael S. Tsirkin     }
257b35ba30fSMichael S. Tsirkin 
258b35ba30fSMichael S. Tsirkin     p = nodes[lp->ptr];
259b35ba30fSMichael S. Tsirkin     for (i = 0; i < P_L2_SIZE; i++) {
260b35ba30fSMichael S. Tsirkin         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
261b35ba30fSMichael S. Tsirkin             continue;
262b35ba30fSMichael S. Tsirkin         }
263b35ba30fSMichael S. Tsirkin 
264b35ba30fSMichael S. Tsirkin         valid_ptr = i;
265b35ba30fSMichael S. Tsirkin         valid++;
266b35ba30fSMichael S. Tsirkin         if (p[i].skip) {
267efee678dSMarc-André Lureau             phys_page_compact(&p[i], nodes);
268b35ba30fSMichael S. Tsirkin         }
269b35ba30fSMichael S. Tsirkin     }
270b35ba30fSMichael S. Tsirkin 
271b35ba30fSMichael S. Tsirkin     /* We can only compress if there's only one child. */
272b35ba30fSMichael S. Tsirkin     if (valid != 1) {
273b35ba30fSMichael S. Tsirkin         return;
274b35ba30fSMichael S. Tsirkin     }
275b35ba30fSMichael S. Tsirkin 
276b35ba30fSMichael S. Tsirkin     assert(valid_ptr < P_L2_SIZE);
277b35ba30fSMichael S. Tsirkin 
278b35ba30fSMichael S. Tsirkin     /* Don't compress if it won't fit in the # of bits we have. */
279526ca236SWei Yang     if (P_L2_LEVELS >= (1 << 6) &&
280526ca236SWei Yang         lp->skip + p[valid_ptr].skip >= (1 << 6)) {
281b35ba30fSMichael S. Tsirkin         return;
282b35ba30fSMichael S. Tsirkin     }
283b35ba30fSMichael S. Tsirkin 
284b35ba30fSMichael S. Tsirkin     lp->ptr = p[valid_ptr].ptr;
285b35ba30fSMichael S. Tsirkin     if (!p[valid_ptr].skip) {
286b35ba30fSMichael S. Tsirkin         /* If our only child is a leaf, make this a leaf. */
287b35ba30fSMichael S. Tsirkin         /* By design, we should have made this node a leaf to begin with so we
288b35ba30fSMichael S. Tsirkin          * should never reach here.
289b35ba30fSMichael S. Tsirkin          * But since it's so simple to handle this, let's do it just in case we
290b35ba30fSMichael S. Tsirkin          * change this rule.
291b35ba30fSMichael S. Tsirkin          */
292b35ba30fSMichael S. Tsirkin         lp->skip = 0;
293b35ba30fSMichael S. Tsirkin     } else {
294b35ba30fSMichael S. Tsirkin         lp->skip += p[valid_ptr].skip;
295b35ba30fSMichael S. Tsirkin     }
296b35ba30fSMichael S. Tsirkin }
297b35ba30fSMichael S. Tsirkin 
2988629d3fcSAlexey Kardashevskiy void address_space_dispatch_compact(AddressSpaceDispatch *d)
299b35ba30fSMichael S. Tsirkin {
300b35ba30fSMichael S. Tsirkin     if (d->phys_map.skip) {
301efee678dSMarc-André Lureau         phys_page_compact(&d->phys_map, d->map.nodes);
302b35ba30fSMichael S. Tsirkin     }
303b35ba30fSMichael S. Tsirkin }
304b35ba30fSMichael S. Tsirkin 
30529cb533dSFam Zheng static inline bool section_covers_addr(const MemoryRegionSection *section,
30629cb533dSFam Zheng                                        hwaddr addr)
30729cb533dSFam Zheng {
30829cb533dSFam Zheng     /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
30929cb533dSFam Zheng      * the section must cover the entire address space.
31029cb533dSFam Zheng      */
311258dfaaaSRichard Henderson     return int128_gethi(section->size) ||
31229cb533dSFam Zheng            range_covers_byte(section->offset_within_address_space,
313258dfaaaSRichard Henderson                              int128_getlo(section->size), addr);
31429cb533dSFam Zheng }
31529cb533dSFam Zheng 
316003a0cf2SPeter Xu static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr)
31792e873b9Sbellard {
318003a0cf2SPeter Xu     PhysPageEntry lp = d->phys_map, *p;
319003a0cf2SPeter Xu     Node *nodes = d->map.nodes;
320003a0cf2SPeter Xu     MemoryRegionSection *sections = d->map.sections;
32197115a8dSMichael S. Tsirkin     hwaddr index = addr >> TARGET_PAGE_BITS;
32231ab2b4aSAvi Kivity     int i;
323f1f6e3b8SAvi Kivity 
3249736e55bSMichael S. Tsirkin     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
325c19e8800SAvi Kivity         if (lp.ptr == PHYS_MAP_NODE_NIL) {
3269affd6fcSPaolo Bonzini             return &sections[PHYS_SECTION_UNASSIGNED];
327f1f6e3b8SAvi Kivity         }
3289affd6fcSPaolo Bonzini         p = nodes[lp.ptr];
32903f49957SPaolo Bonzini         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
33031ab2b4aSAvi Kivity     }
331b35ba30fSMichael S. Tsirkin 
33229cb533dSFam Zheng     if (section_covers_addr(&sections[lp.ptr], addr)) {
3339affd6fcSPaolo Bonzini         return &sections[lp.ptr];
334b35ba30fSMichael S. Tsirkin     } else {
335b35ba30fSMichael S. Tsirkin         return &sections[PHYS_SECTION_UNASSIGNED];
336b35ba30fSMichael S. Tsirkin     }
337f3705d53SAvi Kivity }
338f3705d53SAvi Kivity 
33979e2b9aeSPaolo Bonzini /* Called from RCU critical section */
340c7086b4aSPaolo Bonzini static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
34190260c6cSJan Kiszka                                                         hwaddr addr,
34290260c6cSJan Kiszka                                                         bool resolve_subpage)
3439f029603SJan Kiszka {
344d73415a3SStefan Hajnoczi     MemoryRegionSection *section = qatomic_read(&d->mru_section);
34590260c6cSJan Kiszka     subpage_t *subpage;
34690260c6cSJan Kiszka 
34707c114bbSPaolo Bonzini     if (!section || section == &d->map.sections[PHYS_SECTION_UNASSIGNED] ||
34807c114bbSPaolo Bonzini         !section_covers_addr(section, addr)) {
349003a0cf2SPeter Xu         section = phys_page_find(d, addr);
350d73415a3SStefan Hajnoczi         qatomic_set(&d->mru_section, section);
351729633c2SFam Zheng     }
35290260c6cSJan Kiszka     if (resolve_subpage && section->mr->subpage) {
35390260c6cSJan Kiszka         subpage = container_of(section->mr, subpage_t, iomem);
35453cb28cbSMarcel Apfelbaum         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
35590260c6cSJan Kiszka     }
35690260c6cSJan Kiszka     return section;
3579f029603SJan Kiszka }
3589f029603SJan Kiszka 
35979e2b9aeSPaolo Bonzini /* Called from RCU critical section */
36090260c6cSJan Kiszka static MemoryRegionSection *
361c7086b4aSPaolo Bonzini address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
36290260c6cSJan Kiszka                                  hwaddr *plen, bool resolve_subpage)
363149f54b5SPaolo Bonzini {
364149f54b5SPaolo Bonzini     MemoryRegionSection *section;
365965eb2fcSPaolo Bonzini     MemoryRegion *mr;
366a87f3954SPaolo Bonzini     Int128 diff;
367149f54b5SPaolo Bonzini 
368c7086b4aSPaolo Bonzini     section = address_space_lookup_region(d, addr, resolve_subpage);
369149f54b5SPaolo Bonzini     /* Compute offset within MemoryRegionSection */
370149f54b5SPaolo Bonzini     addr -= section->offset_within_address_space;
371149f54b5SPaolo Bonzini 
372149f54b5SPaolo Bonzini     /* Compute offset within MemoryRegion */
373149f54b5SPaolo Bonzini     *xlat = addr + section->offset_within_region;
374149f54b5SPaolo Bonzini 
375965eb2fcSPaolo Bonzini     mr = section->mr;
376b242e0e0SPaolo Bonzini 
377b242e0e0SPaolo Bonzini     /* MMIO registers can be expected to perform full-width accesses based only
378b242e0e0SPaolo Bonzini      * on their address, without considering adjacent registers that could
379b242e0e0SPaolo Bonzini      * decode to completely different MemoryRegions.  When such registers
380b242e0e0SPaolo Bonzini      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
381b242e0e0SPaolo Bonzini      * regions overlap wildly.  For this reason we cannot clamp the accesses
382b242e0e0SPaolo Bonzini      * here.
383b242e0e0SPaolo Bonzini      *
384b242e0e0SPaolo Bonzini      * If the length is small (as is the case for address_space_ldl/stl),
385b242e0e0SPaolo Bonzini      * everything works fine.  If the incoming length is large, however,
386b242e0e0SPaolo Bonzini      * the caller really has to do the clamping through memory_access_size.
387b242e0e0SPaolo Bonzini      */
388965eb2fcSPaolo Bonzini     if (memory_region_is_ram(mr)) {
389e4a511f8SPaolo Bonzini         diff = int128_sub(section->size, int128_make64(addr));
3903752a036SPeter Maydell         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
391965eb2fcSPaolo Bonzini     }
392149f54b5SPaolo Bonzini     return section;
393149f54b5SPaolo Bonzini }
39490260c6cSJan Kiszka 
395d5e5fafdSPeter Xu /**
396a411c84bSPaolo Bonzini  * address_space_translate_iommu - translate an address through an IOMMU
397a411c84bSPaolo Bonzini  * memory region and then through the target address space.
398a411c84bSPaolo Bonzini  *
399a411c84bSPaolo Bonzini  * @iommu_mr: the IOMMU memory region that we start the translation from
400a411c84bSPaolo Bonzini  * @addr: the address to be translated through the MMU
401a411c84bSPaolo Bonzini  * @xlat: the translated address offset within the destination memory region.
402a411c84bSPaolo Bonzini  *        It cannot be %NULL.
403a411c84bSPaolo Bonzini  * @plen_out: valid read/write length of the translated address. It
404a411c84bSPaolo Bonzini  *            cannot be %NULL.
405a411c84bSPaolo Bonzini  * @page_mask_out: page mask for the translated address. This
406a411c84bSPaolo Bonzini  *            should only be meaningful for IOMMU translated
407a411c84bSPaolo Bonzini  *            addresses, since there may be huge pages that this bit
408a411c84bSPaolo Bonzini  *            would tell. It can be %NULL if we don't care about it.
409a411c84bSPaolo Bonzini  * @is_write: whether the translation operation is for write
410a411c84bSPaolo Bonzini  * @is_mmio: whether this can be MMIO, set true if it can
411a411c84bSPaolo Bonzini  * @target_as: the address space targeted by the IOMMU
4122f7b009cSPeter Maydell  * @attrs: transaction attributes
413a411c84bSPaolo Bonzini  *
414a411c84bSPaolo Bonzini  * This function is called from RCU critical section.  It is the common
415a411c84bSPaolo Bonzini  * part of flatview_do_translate and address_space_translate_cached.
416a411c84bSPaolo Bonzini  */
417a411c84bSPaolo Bonzini static MemoryRegionSection address_space_translate_iommu(IOMMUMemoryRegion *iommu_mr,
418a411c84bSPaolo Bonzini                                                          hwaddr *xlat,
419a411c84bSPaolo Bonzini                                                          hwaddr *plen_out,
420a411c84bSPaolo Bonzini                                                          hwaddr *page_mask_out,
421a411c84bSPaolo Bonzini                                                          bool is_write,
422a411c84bSPaolo Bonzini                                                          bool is_mmio,
4232f7b009cSPeter Maydell                                                          AddressSpace **target_as,
4242f7b009cSPeter Maydell                                                          MemTxAttrs attrs)
425a411c84bSPaolo Bonzini {
426a411c84bSPaolo Bonzini     MemoryRegionSection *section;
427a411c84bSPaolo Bonzini     hwaddr page_mask = (hwaddr)-1;
428a411c84bSPaolo Bonzini 
429a411c84bSPaolo Bonzini     do {
430a411c84bSPaolo Bonzini         hwaddr addr = *xlat;
431a411c84bSPaolo Bonzini         IOMMUMemoryRegionClass *imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
4322c91bcf2SPeter Maydell         int iommu_idx = 0;
4332c91bcf2SPeter Maydell         IOMMUTLBEntry iotlb;
4342c91bcf2SPeter Maydell 
4352c91bcf2SPeter Maydell         if (imrc->attrs_to_index) {
4362c91bcf2SPeter Maydell             iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
4372c91bcf2SPeter Maydell         }
4382c91bcf2SPeter Maydell 
4392c91bcf2SPeter Maydell         iotlb = imrc->translate(iommu_mr, addr, is_write ?
4402c91bcf2SPeter Maydell                                 IOMMU_WO : IOMMU_RO, iommu_idx);
441a411c84bSPaolo Bonzini 
442a411c84bSPaolo Bonzini         if (!(iotlb.perm & (1 << is_write))) {
443a411c84bSPaolo Bonzini             goto unassigned;
444a411c84bSPaolo Bonzini         }
445a411c84bSPaolo Bonzini 
446a411c84bSPaolo Bonzini         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
447a411c84bSPaolo Bonzini                 | (addr & iotlb.addr_mask));
448a411c84bSPaolo Bonzini         page_mask &= iotlb.addr_mask;
449a411c84bSPaolo Bonzini         *plen_out = MIN(*plen_out, (addr | iotlb.addr_mask) - addr + 1);
450a411c84bSPaolo Bonzini         *target_as = iotlb.target_as;
451a411c84bSPaolo Bonzini 
452a411c84bSPaolo Bonzini         section = address_space_translate_internal(
453a411c84bSPaolo Bonzini                 address_space_to_dispatch(iotlb.target_as), addr, xlat,
454a411c84bSPaolo Bonzini                 plen_out, is_mmio);
455a411c84bSPaolo Bonzini 
456a411c84bSPaolo Bonzini         iommu_mr = memory_region_get_iommu(section->mr);
457a411c84bSPaolo Bonzini     } while (unlikely(iommu_mr));
458a411c84bSPaolo Bonzini 
459a411c84bSPaolo Bonzini     if (page_mask_out) {
460a411c84bSPaolo Bonzini         *page_mask_out = page_mask;
461a411c84bSPaolo Bonzini     }
462a411c84bSPaolo Bonzini     return *section;
463a411c84bSPaolo Bonzini 
464a411c84bSPaolo Bonzini unassigned:
465a411c84bSPaolo Bonzini     return (MemoryRegionSection) { .mr = &io_mem_unassigned };
466a411c84bSPaolo Bonzini }
467a411c84bSPaolo Bonzini 
468a411c84bSPaolo Bonzini /**
469d5e5fafdSPeter Xu  * flatview_do_translate - translate an address in FlatView
470d5e5fafdSPeter Xu  *
471d5e5fafdSPeter Xu  * @fv: the flat view that we want to translate on
472d5e5fafdSPeter Xu  * @addr: the address to be translated in above address space
473d5e5fafdSPeter Xu  * @xlat: the translated address offset within memory region. It
474d5e5fafdSPeter Xu  *        cannot be @NULL.
475d5e5fafdSPeter Xu  * @plen_out: valid read/write length of the translated address. It
476d5e5fafdSPeter Xu  *            can be @NULL when we don't care about it.
477d5e5fafdSPeter Xu  * @page_mask_out: page mask for the translated address. This
478d5e5fafdSPeter Xu  *            should only be meaningful for IOMMU translated
479d5e5fafdSPeter Xu  *            addresses, since there may be huge pages that this bit
480d5e5fafdSPeter Xu  *            would tell. It can be @NULL if we don't care about it.
481d5e5fafdSPeter Xu  * @is_write: whether the translation operation is for write
482d5e5fafdSPeter Xu  * @is_mmio: whether this can be MMIO, set true if it can
483ad2804d9SPaolo Bonzini  * @target_as: the address space targeted by the IOMMU
48449e14aa8SPeter Maydell  * @attrs: memory transaction attributes
485d5e5fafdSPeter Xu  *
486d5e5fafdSPeter Xu  * This function is called from RCU critical section
487d5e5fafdSPeter Xu  */
48816620684SAlexey Kardashevskiy static MemoryRegionSection flatview_do_translate(FlatView *fv,
489a764040cSPeter Xu                                                  hwaddr addr,
490a764040cSPeter Xu                                                  hwaddr *xlat,
491d5e5fafdSPeter Xu                                                  hwaddr *plen_out,
492d5e5fafdSPeter Xu                                                  hwaddr *page_mask_out,
493a764040cSPeter Xu                                                  bool is_write,
494e76bb18fSAlexey Kardashevskiy                                                  bool is_mmio,
49549e14aa8SPeter Maydell                                                  AddressSpace **target_as,
49649e14aa8SPeter Maydell                                                  MemTxAttrs attrs)
49790260c6cSJan Kiszka {
49830951157SAvi Kivity     MemoryRegionSection *section;
4993df9d748SAlexey Kardashevskiy     IOMMUMemoryRegion *iommu_mr;
500d5e5fafdSPeter Xu     hwaddr plen = (hwaddr)(-1);
501d5e5fafdSPeter Xu 
502ad2804d9SPaolo Bonzini     if (!plen_out) {
503ad2804d9SPaolo Bonzini         plen_out = &plen;
504d5e5fafdSPeter Xu     }
50530951157SAvi Kivity 
50616620684SAlexey Kardashevskiy     section = address_space_translate_internal(
507ad2804d9SPaolo Bonzini             flatview_to_dispatch(fv), addr, xlat,
508ad2804d9SPaolo Bonzini             plen_out, is_mmio);
50930951157SAvi Kivity 
5103df9d748SAlexey Kardashevskiy     iommu_mr = memory_region_get_iommu(section->mr);
511a411c84bSPaolo Bonzini     if (unlikely(iommu_mr)) {
512a411c84bSPaolo Bonzini         return address_space_translate_iommu(iommu_mr, xlat,
513a411c84bSPaolo Bonzini                                              plen_out, page_mask_out,
514a411c84bSPaolo Bonzini                                              is_write, is_mmio,
5152f7b009cSPeter Maydell                                              target_as, attrs);
51630951157SAvi Kivity     }
517ad2804d9SPaolo Bonzini     if (page_mask_out) {
518d5e5fafdSPeter Xu         /* Not behind an IOMMU, use default page size. */
519a411c84bSPaolo Bonzini         *page_mask_out = ~TARGET_PAGE_MASK;
520d5e5fafdSPeter Xu     }
521d5e5fafdSPeter Xu 
522a764040cSPeter Xu     return *section;
523a764040cSPeter Xu }
524a764040cSPeter Xu 
525a764040cSPeter Xu /* Called from RCU critical section */
526a764040cSPeter Xu IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
5277446eb07SPeter Maydell                                             bool is_write, MemTxAttrs attrs)
528a764040cSPeter Xu {
529a764040cSPeter Xu     MemoryRegionSection section;
530076a93d7SPeter Xu     hwaddr xlat, page_mask;
531a764040cSPeter Xu 
532076a93d7SPeter Xu     /*
533076a93d7SPeter Xu      * This can never be MMIO, and we don't really care about plen,
534076a93d7SPeter Xu      * but page mask.
535076a93d7SPeter Xu      */
536076a93d7SPeter Xu     section = flatview_do_translate(address_space_to_flatview(as), addr, &xlat,
53749e14aa8SPeter Maydell                                     NULL, &page_mask, is_write, false, &as,
53849e14aa8SPeter Maydell                                     attrs);
539a764040cSPeter Xu 
540a764040cSPeter Xu     /* Illegal translation */
541a764040cSPeter Xu     if (section.mr == &io_mem_unassigned) {
542a764040cSPeter Xu         goto iotlb_fail;
543a764040cSPeter Xu     }
544a764040cSPeter Xu 
545a764040cSPeter Xu     /* Convert memory region offset into address space offset */
546a764040cSPeter Xu     xlat += section.offset_within_address_space -
547a764040cSPeter Xu         section.offset_within_region;
548a764040cSPeter Xu 
549a764040cSPeter Xu     return (IOMMUTLBEntry) {
550e76bb18fSAlexey Kardashevskiy         .target_as = as,
551076a93d7SPeter Xu         .iova = addr & ~page_mask,
552076a93d7SPeter Xu         .translated_addr = xlat & ~page_mask,
553076a93d7SPeter Xu         .addr_mask = page_mask,
554a764040cSPeter Xu         /* IOTLBs are for DMAs, and DMA only allows on RAMs. */
555a764040cSPeter Xu         .perm = IOMMU_RW,
556a764040cSPeter Xu     };
557a764040cSPeter Xu 
558a764040cSPeter Xu iotlb_fail:
559a764040cSPeter Xu     return (IOMMUTLBEntry) {0};
560a764040cSPeter Xu }
561a764040cSPeter Xu 
562a764040cSPeter Xu /* Called from RCU critical section */
56316620684SAlexey Kardashevskiy MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
564efa99a2fSPeter Maydell                                  hwaddr *plen, bool is_write,
565efa99a2fSPeter Maydell                                  MemTxAttrs attrs)
566a764040cSPeter Xu {
567a764040cSPeter Xu     MemoryRegion *mr;
568a764040cSPeter Xu     MemoryRegionSection section;
56916620684SAlexey Kardashevskiy     AddressSpace *as = NULL;
570a764040cSPeter Xu 
571a764040cSPeter Xu     /* This can be MMIO, so setup MMIO bit. */
572d5e5fafdSPeter Xu     section = flatview_do_translate(fv, addr, xlat, plen, NULL,
57349e14aa8SPeter Maydell                                     is_write, true, &as, attrs);
574a764040cSPeter Xu     mr = section.mr;
575a764040cSPeter Xu 
576fe680d0dSAlexey Kardashevskiy     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
577a87f3954SPaolo Bonzini         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
57823820dbfSPeter Crosthwaite         *plen = MIN(page, *plen);
579a87f3954SPaolo Bonzini     }
580a87f3954SPaolo Bonzini 
58130951157SAvi Kivity     return mr;
58290260c6cSJan Kiszka }
58390260c6cSJan Kiszka 
5841f871c5eSPeter Maydell typedef struct TCGIOMMUNotifier {
5851f871c5eSPeter Maydell     IOMMUNotifier n;
5861f871c5eSPeter Maydell     MemoryRegion *mr;
5871f871c5eSPeter Maydell     CPUState *cpu;
5881f871c5eSPeter Maydell     int iommu_idx;
5891f871c5eSPeter Maydell     bool active;
5901f871c5eSPeter Maydell } TCGIOMMUNotifier;
5911f871c5eSPeter Maydell 
5921f871c5eSPeter Maydell static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
5931f871c5eSPeter Maydell {
5941f871c5eSPeter Maydell     TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);
5951f871c5eSPeter Maydell 
5961f871c5eSPeter Maydell     if (!notifier->active) {
5971f871c5eSPeter Maydell         return;
5981f871c5eSPeter Maydell     }
5991f871c5eSPeter Maydell     tlb_flush(notifier->cpu);
6001f871c5eSPeter Maydell     notifier->active = false;
6011f871c5eSPeter Maydell     /* We leave the notifier struct on the list to avoid reallocating it later.
6021f871c5eSPeter Maydell      * Generally the number of IOMMUs a CPU deals with will be small.
6031f871c5eSPeter Maydell      * In any case we can't unregister the iommu notifier from a notify
6041f871c5eSPeter Maydell      * callback.
6051f871c5eSPeter Maydell      */
6061f871c5eSPeter Maydell }
6071f871c5eSPeter Maydell 
6081f871c5eSPeter Maydell static void tcg_register_iommu_notifier(CPUState *cpu,
6091f871c5eSPeter Maydell                                         IOMMUMemoryRegion *iommu_mr,
6101f871c5eSPeter Maydell                                         int iommu_idx)
6111f871c5eSPeter Maydell {
6121f871c5eSPeter Maydell     /* Make sure this CPU has an IOMMU notifier registered for this
6131f871c5eSPeter Maydell      * IOMMU/IOMMU index combination, so that we can flush its TLB
6141f871c5eSPeter Maydell      * when the IOMMU tells us the mappings we've cached have changed.
6151f871c5eSPeter Maydell      */
6161f871c5eSPeter Maydell     MemoryRegion *mr = MEMORY_REGION(iommu_mr);
617bbf90191SPhilippe Mathieu-Daudé     TCGIOMMUNotifier *notifier = NULL;
618805d4496SMarkus Armbruster     int i;
6191f871c5eSPeter Maydell 
6201f871c5eSPeter Maydell     for (i = 0; i < cpu->iommu_notifiers->len; i++) {
6215601be3bSPeter Maydell         notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i);
6221f871c5eSPeter Maydell         if (notifier->mr == mr && notifier->iommu_idx == iommu_idx) {
6231f871c5eSPeter Maydell             break;
6241f871c5eSPeter Maydell         }
6251f871c5eSPeter Maydell     }
6261f871c5eSPeter Maydell     if (i == cpu->iommu_notifiers->len) {
6271f871c5eSPeter Maydell         /* Not found, add a new entry at the end of the array */
6281f871c5eSPeter Maydell         cpu->iommu_notifiers = g_array_set_size(cpu->iommu_notifiers, i + 1);
6295601be3bSPeter Maydell         notifier = g_new0(TCGIOMMUNotifier, 1);
6305601be3bSPeter Maydell         g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i) = notifier;
6311f871c5eSPeter Maydell 
6321f871c5eSPeter Maydell         notifier->mr = mr;
6331f871c5eSPeter Maydell         notifier->iommu_idx = iommu_idx;
6341f871c5eSPeter Maydell         notifier->cpu = cpu;
6351f871c5eSPeter Maydell         /* Rather than trying to register interest in the specific part
6361f871c5eSPeter Maydell          * of the iommu's address space that we've accessed and then
6371f871c5eSPeter Maydell          * expand it later as subsequent accesses touch more of it, we
6381f871c5eSPeter Maydell          * just register interest in the whole thing, on the assumption
6391f871c5eSPeter Maydell          * that iommu reconfiguration will be rare.
6401f871c5eSPeter Maydell          */
6411f871c5eSPeter Maydell         iommu_notifier_init(&notifier->n,
6421f871c5eSPeter Maydell                             tcg_iommu_unmap_notify,
6431f871c5eSPeter Maydell                             IOMMU_NOTIFIER_UNMAP,
6441f871c5eSPeter Maydell                             0,
6451f871c5eSPeter Maydell                             HWADDR_MAX,
6461f871c5eSPeter Maydell                             iommu_idx);
647805d4496SMarkus Armbruster         memory_region_register_iommu_notifier(notifier->mr, &notifier->n,
648805d4496SMarkus Armbruster                                               &error_fatal);
6491f871c5eSPeter Maydell     }
6501f871c5eSPeter Maydell 
6511f871c5eSPeter Maydell     if (!notifier->active) {
6521f871c5eSPeter Maydell         notifier->active = true;
6531f871c5eSPeter Maydell     }
6541f871c5eSPeter Maydell }
6551f871c5eSPeter Maydell 
656d9f24bf5SPaolo Bonzini void tcg_iommu_free_notifier_list(CPUState *cpu)
6571f871c5eSPeter Maydell {
6581f871c5eSPeter Maydell     /* Destroy the CPU's notifier list */
6591f871c5eSPeter Maydell     int i;
6601f871c5eSPeter Maydell     TCGIOMMUNotifier *notifier;
6611f871c5eSPeter Maydell 
6621f871c5eSPeter Maydell     for (i = 0; i < cpu->iommu_notifiers->len; i++) {
6635601be3bSPeter Maydell         notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i);
6641f871c5eSPeter Maydell         memory_region_unregister_iommu_notifier(notifier->mr, &notifier->n);
6655601be3bSPeter Maydell         g_free(notifier);
6661f871c5eSPeter Maydell     }
6671f871c5eSPeter Maydell     g_array_free(cpu->iommu_notifiers, true);
6681f871c5eSPeter Maydell }
6691f871c5eSPeter Maydell 
670d9f24bf5SPaolo Bonzini void tcg_iommu_init_notifier_list(CPUState *cpu)
671d9f24bf5SPaolo Bonzini {
672d9f24bf5SPaolo Bonzini     cpu->iommu_notifiers = g_array_new(false, true, sizeof(TCGIOMMUNotifier *));
673d9f24bf5SPaolo Bonzini }
674d9f24bf5SPaolo Bonzini 
67579e2b9aeSPaolo Bonzini /* Called from RCU critical section */
67690260c6cSJan Kiszka MemoryRegionSection *
677418ade78SRichard Henderson address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr,
6781f871c5eSPeter Maydell                                   hwaddr *xlat, hwaddr *plen,
6791f871c5eSPeter Maydell                                   MemTxAttrs attrs, int *prot)
68090260c6cSJan Kiszka {
68130951157SAvi Kivity     MemoryRegionSection *section;
6821f871c5eSPeter Maydell     IOMMUMemoryRegion *iommu_mr;
6831f871c5eSPeter Maydell     IOMMUMemoryRegionClass *imrc;
6841f871c5eSPeter Maydell     IOMMUTLBEntry iotlb;
6851f871c5eSPeter Maydell     int iommu_idx;
686418ade78SRichard Henderson     hwaddr addr = orig_addr;
6870d58c660SRichard Henderson     AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
688d7898cdaSPeter Maydell 
6891f871c5eSPeter Maydell     for (;;) {
6901f871c5eSPeter Maydell         section = address_space_translate_internal(d, addr, &addr, plen, false);
6911f871c5eSPeter Maydell 
6921f871c5eSPeter Maydell         iommu_mr = memory_region_get_iommu(section->mr);
6931f871c5eSPeter Maydell         if (!iommu_mr) {
6941f871c5eSPeter Maydell             break;
6951f871c5eSPeter Maydell         }
6961f871c5eSPeter Maydell 
6971f871c5eSPeter Maydell         imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
6981f871c5eSPeter Maydell 
6991f871c5eSPeter Maydell         iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
7001f871c5eSPeter Maydell         tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);
7011f871c5eSPeter Maydell         /* We need all the permissions, so pass IOMMU_NONE so the IOMMU
7021f871c5eSPeter Maydell          * doesn't short-cut its translation table walk.
7031f871c5eSPeter Maydell          */
7041f871c5eSPeter Maydell         iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);
7051f871c5eSPeter Maydell         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
7061f871c5eSPeter Maydell                 | (addr & iotlb.addr_mask));
7071f871c5eSPeter Maydell         /* Update the caller's prot bits to remove permissions the IOMMU
7081f871c5eSPeter Maydell          * is giving us a failure response for. If we get down to no
7091f871c5eSPeter Maydell          * permissions left at all we can give up now.
7101f871c5eSPeter Maydell          */
7111f871c5eSPeter Maydell         if (!(iotlb.perm & IOMMU_RO)) {
7121f871c5eSPeter Maydell             *prot &= ~(PAGE_READ | PAGE_EXEC);
7131f871c5eSPeter Maydell         }
7141f871c5eSPeter Maydell         if (!(iotlb.perm & IOMMU_WO)) {
7151f871c5eSPeter Maydell             *prot &= ~PAGE_WRITE;
7161f871c5eSPeter Maydell         }
7171f871c5eSPeter Maydell 
7181f871c5eSPeter Maydell         if (!*prot) {
7191f871c5eSPeter Maydell             goto translate_fail;
7201f871c5eSPeter Maydell         }
7211f871c5eSPeter Maydell 
7221f871c5eSPeter Maydell         d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));
7231f871c5eSPeter Maydell     }
72430951157SAvi Kivity 
7253df9d748SAlexey Kardashevskiy     assert(!memory_region_is_iommu(section->mr));
7261f871c5eSPeter Maydell     *xlat = addr;
72730951157SAvi Kivity     return section;
7281f871c5eSPeter Maydell 
7291f871c5eSPeter Maydell translate_fail:
730418ade78SRichard Henderson     /*
731418ade78SRichard Henderson      * We should be given a page-aligned address -- certainly
732418ade78SRichard Henderson      * tlb_set_page_with_attrs() does so.  The page offset of xlat
733418ade78SRichard Henderson      * is used to index sections[], and PHYS_SECTION_UNASSIGNED = 0.
734418ade78SRichard Henderson      * The page portion of xlat will be logged by memory_region_access_valid()
735418ade78SRichard Henderson      * when this memory access is rejected, so use the original untranslated
736418ade78SRichard Henderson      * physical address.
737418ade78SRichard Henderson      */
738418ade78SRichard Henderson     assert((orig_addr & ~TARGET_PAGE_MASK) == 0);
739418ade78SRichard Henderson     *xlat = orig_addr;
7401f871c5eSPeter Maydell     return &d->map.sections[PHYS_SECTION_UNASSIGNED];
74190260c6cSJan Kiszka }
7421a1562f5SAndreas Färber 
74380ceb07aSPeter Xu void cpu_address_space_init(CPUState *cpu, int asidx,
74480ceb07aSPeter Xu                             const char *prefix, MemoryRegion *mr)
74509daed84SEdgar E. Iglesias {
74612ebc9a7SPeter Maydell     CPUAddressSpace *newas;
74780ceb07aSPeter Xu     AddressSpace *as = g_new0(AddressSpace, 1);
74887a621d8SPeter Xu     char *as_name;
74980ceb07aSPeter Xu 
75080ceb07aSPeter Xu     assert(mr);
75187a621d8SPeter Xu     as_name = g_strdup_printf("%s-%d", prefix, cpu->cpu_index);
75287a621d8SPeter Xu     address_space_init(as, mr, as_name);
75387a621d8SPeter Xu     g_free(as_name);
75412ebc9a7SPeter Maydell 
75512ebc9a7SPeter Maydell     /* Target code should have set num_ases before calling us */
75612ebc9a7SPeter Maydell     assert(asidx < cpu->num_ases);
75712ebc9a7SPeter Maydell 
75856943e8cSPeter Maydell     if (asidx == 0) {
75956943e8cSPeter Maydell         /* address space 0 gets the convenience alias */
76056943e8cSPeter Maydell         cpu->as = as;
76156943e8cSPeter Maydell     }
76256943e8cSPeter Maydell 
76312ebc9a7SPeter Maydell     /* KVM cannot currently support multiple address spaces. */
76412ebc9a7SPeter Maydell     assert(asidx == 0 || !kvm_enabled());
76509daed84SEdgar E. Iglesias 
76612ebc9a7SPeter Maydell     if (!cpu->cpu_ases) {
76712ebc9a7SPeter Maydell         cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
76824bec42fSSalil Mehta         cpu->cpu_ases_count = cpu->num_ases;
76909daed84SEdgar E. Iglesias     }
77032857f4dSPeter Maydell 
77112ebc9a7SPeter Maydell     newas = &cpu->cpu_ases[asidx];
77212ebc9a7SPeter Maydell     newas->cpu = cpu;
77312ebc9a7SPeter Maydell     newas->as = as;
77456943e8cSPeter Maydell     if (tcg_enabled()) {
7759458a9a1SPaolo Bonzini         newas->tcg_as_listener.log_global_after_sync = tcg_log_global_after_sync;
77612ebc9a7SPeter Maydell         newas->tcg_as_listener.commit = tcg_commit;
777142518bdSPeter Xu         newas->tcg_as_listener.name = "tcg";
77812ebc9a7SPeter Maydell         memory_listener_register(&newas->tcg_as_listener, as);
77909daed84SEdgar E. Iglesias     }
78056943e8cSPeter Maydell }
781651a5bc0SPeter Maydell 
78224bec42fSSalil Mehta void cpu_address_space_destroy(CPUState *cpu, int asidx)
78324bec42fSSalil Mehta {
78424bec42fSSalil Mehta     CPUAddressSpace *cpuas;
78524bec42fSSalil Mehta 
78624bec42fSSalil Mehta     assert(cpu->cpu_ases);
78724bec42fSSalil Mehta     assert(asidx >= 0 && asidx < cpu->num_ases);
78824bec42fSSalil Mehta     /* KVM cannot currently support multiple address spaces. */
78924bec42fSSalil Mehta     assert(asidx == 0 || !kvm_enabled());
79024bec42fSSalil Mehta 
79124bec42fSSalil Mehta     cpuas = &cpu->cpu_ases[asidx];
79224bec42fSSalil Mehta     if (tcg_enabled()) {
79324bec42fSSalil Mehta         memory_listener_unregister(&cpuas->tcg_as_listener);
79424bec42fSSalil Mehta     }
79524bec42fSSalil Mehta 
79624bec42fSSalil Mehta     address_space_destroy(cpuas->as);
79724bec42fSSalil Mehta     g_free_rcu(cpuas->as, rcu);
79824bec42fSSalil Mehta 
79924bec42fSSalil Mehta     if (asidx == 0) {
80024bec42fSSalil Mehta         /* reset the convenience alias for address space 0 */
80124bec42fSSalil Mehta         cpu->as = NULL;
80224bec42fSSalil Mehta     }
80324bec42fSSalil Mehta 
80424bec42fSSalil Mehta     if (--cpu->cpu_ases_count == 0) {
80524bec42fSSalil Mehta         g_free(cpu->cpu_ases);
80624bec42fSSalil Mehta         cpu->cpu_ases = NULL;
80724bec42fSSalil Mehta     }
80824bec42fSSalil Mehta }
80924bec42fSSalil Mehta 
810651a5bc0SPeter Maydell AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
811651a5bc0SPeter Maydell {
812651a5bc0SPeter Maydell     /* Return the AddressSpace corresponding to the specified index */
813651a5bc0SPeter Maydell     return cpu->cpu_ases[asidx].as;
814651a5bc0SPeter Maydell }
81509daed84SEdgar E. Iglesias 
8160dc3f44aSMike Day /* Called from RCU critical section */
817041603feSPaolo Bonzini static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
818041603feSPaolo Bonzini {
819041603feSPaolo Bonzini     RAMBlock *block;
820041603feSPaolo Bonzini 
821d73415a3SStefan Hajnoczi     block = qatomic_rcu_read(&ram_list.mru_block);
8229b8424d5SMichael S. Tsirkin     if (block && addr - block->offset < block->max_length) {
82368851b98SPaolo Bonzini         return block;
824041603feSPaolo Bonzini     }
82599e15582SPeter Xu     RAMBLOCK_FOREACH(block) {
8269b8424d5SMichael S. Tsirkin         if (addr - block->offset < block->max_length) {
827041603feSPaolo Bonzini             goto found;
828041603feSPaolo Bonzini         }
829041603feSPaolo Bonzini     }
830041603feSPaolo Bonzini 
831041603feSPaolo Bonzini     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
832041603feSPaolo Bonzini     abort();
833041603feSPaolo Bonzini 
834041603feSPaolo Bonzini found:
835a4a411fbSStefan Hajnoczi     /* It is safe to write mru_block outside the BQL.  This
83643771539SPaolo Bonzini      * is what happens:
83743771539SPaolo Bonzini      *
83843771539SPaolo Bonzini      *     mru_block = xxx
83943771539SPaolo Bonzini      *     rcu_read_unlock()
84043771539SPaolo Bonzini      *                                        xxx removed from list
84143771539SPaolo Bonzini      *                  rcu_read_lock()
84243771539SPaolo Bonzini      *                  read mru_block
84343771539SPaolo Bonzini      *                                        mru_block = NULL;
84443771539SPaolo Bonzini      *                                        call_rcu(reclaim_ramblock, xxx);
84543771539SPaolo Bonzini      *                  rcu_read_unlock()
84643771539SPaolo Bonzini      *
847d73415a3SStefan Hajnoczi      * qatomic_rcu_set is not needed here.  The block was already published
84843771539SPaolo Bonzini      * when it was placed into the list.  Here we're just making an extra
84943771539SPaolo Bonzini      * copy of the pointer.
85043771539SPaolo Bonzini      */
851041603feSPaolo Bonzini     ram_list.mru_block = block;
852041603feSPaolo Bonzini     return block;
853041603feSPaolo Bonzini }
854041603feSPaolo Bonzini 
8557e8ccf99SPhilippe Mathieu-Daudé void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
8561ccde1cbSbellard {
8579a13565dSPeter Crosthwaite     CPUState *cpu;
858041603feSPaolo Bonzini     ram_addr_t start1;
859a2f4d5beSJuan Quintela     RAMBlock *block;
860a2f4d5beSJuan Quintela     ram_addr_t end;
861a2f4d5beSJuan Quintela 
862f28d0dfdSEmilio G. Cota     assert(tcg_enabled());
863a2f4d5beSJuan Quintela     end = TARGET_PAGE_ALIGN(start + length);
864a2f4d5beSJuan Quintela     start &= TARGET_PAGE_MASK;
865f23db169Sbellard 
866694ea274SDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
867041603feSPaolo Bonzini     block = qemu_get_ram_block(start);
868041603feSPaolo Bonzini     assert(block == qemu_get_ram_block(end - 1));
8691240be24SMichael S. Tsirkin     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
8709a13565dSPeter Crosthwaite     CPU_FOREACH(cpu) {
8719a13565dSPeter Crosthwaite         tlb_reset_dirty(cpu, start1, length);
8729a13565dSPeter Crosthwaite     }
873d24981d3SJuan Quintela }
874d24981d3SJuan Quintela 
875d24981d3SJuan Quintela /* Note: start and end must be within the same ram block.  */
87603eebc9eSStefan Hajnoczi bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
87703eebc9eSStefan Hajnoczi                                               ram_addr_t length,
87852159192SJuan Quintela                                               unsigned client)
879d24981d3SJuan Quintela {
8805b82b703SStefan Hajnoczi     DirtyMemoryBlocks *blocks;
88125aa6b37SMatt Borgerson     unsigned long end, page, start_page;
8825b82b703SStefan Hajnoczi     bool dirty = false;
883077874e0SPeter Xu     RAMBlock *ramblock;
884077874e0SPeter Xu     uint64_t mr_offset, mr_size;
885d24981d3SJuan Quintela 
88603eebc9eSStefan Hajnoczi     if (length == 0) {
88703eebc9eSStefan Hajnoczi         return false;
88803eebc9eSStefan Hajnoczi     }
88903eebc9eSStefan Hajnoczi 
89003eebc9eSStefan Hajnoczi     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
89125aa6b37SMatt Borgerson     start_page = start >> TARGET_PAGE_BITS;
89225aa6b37SMatt Borgerson     page = start_page;
8935b82b703SStefan Hajnoczi 
894694ea274SDr. David Alan Gilbert     WITH_RCU_READ_LOCK_GUARD() {
895d73415a3SStefan Hajnoczi         blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
896077874e0SPeter Xu         ramblock = qemu_get_ram_block(start);
897077874e0SPeter Xu         /* Range sanity check on the ramblock */
898077874e0SPeter Xu         assert(start >= ramblock->offset &&
899077874e0SPeter Xu                start + length <= ramblock->offset + ramblock->used_length);
9005b82b703SStefan Hajnoczi 
9015b82b703SStefan Hajnoczi         while (page < end) {
9025b82b703SStefan Hajnoczi             unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
9035b82b703SStefan Hajnoczi             unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
904694ea274SDr. David Alan Gilbert             unsigned long num = MIN(end - page,
905694ea274SDr. David Alan Gilbert                                     DIRTY_MEMORY_BLOCK_SIZE - offset);
9065b82b703SStefan Hajnoczi 
9075b82b703SStefan Hajnoczi             dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
9085b82b703SStefan Hajnoczi                                                   offset, num);
9095b82b703SStefan Hajnoczi             page += num;
9105b82b703SStefan Hajnoczi         }
9115b82b703SStefan Hajnoczi 
91225aa6b37SMatt Borgerson         mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - ramblock->offset;
91325aa6b37SMatt Borgerson         mr_size = (end - start_page) << TARGET_PAGE_BITS;
914077874e0SPeter Xu         memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
915694ea274SDr. David Alan Gilbert     }
91603eebc9eSStefan Hajnoczi 
91786a9ae80SNicholas Piggin     if (dirty) {
91886a9ae80SNicholas Piggin         cpu_physical_memory_dirty_bits_cleared(start, length);
919d24981d3SJuan Quintela     }
92003eebc9eSStefan Hajnoczi 
92103eebc9eSStefan Hajnoczi     return dirty;
9221ccde1cbSbellard }
9231ccde1cbSbellard 
9248deaf12cSGerd Hoffmann DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
9255dea4079SPeter Xu     (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client)
9268deaf12cSGerd Hoffmann {
9278deaf12cSGerd Hoffmann     DirtyMemoryBlocks *blocks;
92873188068SPeter Maydell     ram_addr_t start, first, last;
9298deaf12cSGerd Hoffmann     unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
9308deaf12cSGerd Hoffmann     DirtyBitmapSnapshot *snap;
9318deaf12cSGerd Hoffmann     unsigned long page, end, dest;
9328deaf12cSGerd Hoffmann 
93373188068SPeter Maydell     start = memory_region_get_ram_addr(mr);
93473188068SPeter Maydell     /* We know we're only called for RAM MemoryRegions */
93573188068SPeter Maydell     assert(start != RAM_ADDR_INVALID);
93673188068SPeter Maydell     start += offset;
93773188068SPeter Maydell 
93873188068SPeter Maydell     first = QEMU_ALIGN_DOWN(start, align);
93973188068SPeter Maydell     last  = QEMU_ALIGN_UP(start + length, align);
94073188068SPeter Maydell 
9418deaf12cSGerd Hoffmann     snap = g_malloc0(sizeof(*snap) +
9428deaf12cSGerd Hoffmann                      ((last - first) >> (TARGET_PAGE_BITS + 3)));
9438deaf12cSGerd Hoffmann     snap->start = first;
9448deaf12cSGerd Hoffmann     snap->end   = last;
9458deaf12cSGerd Hoffmann 
9468deaf12cSGerd Hoffmann     page = first >> TARGET_PAGE_BITS;
9478deaf12cSGerd Hoffmann     end  = last  >> TARGET_PAGE_BITS;
9488deaf12cSGerd Hoffmann     dest = 0;
9498deaf12cSGerd Hoffmann 
950694ea274SDr. David Alan Gilbert     WITH_RCU_READ_LOCK_GUARD() {
951d73415a3SStefan Hajnoczi         blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
9528deaf12cSGerd Hoffmann 
9538deaf12cSGerd Hoffmann         while (page < end) {
9548deaf12cSGerd Hoffmann             unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
9556ba9b60aSPhilippe Mathieu-Daudé             unsigned long ofs = page % DIRTY_MEMORY_BLOCK_SIZE;
956694ea274SDr. David Alan Gilbert             unsigned long num = MIN(end - page,
9576ba9b60aSPhilippe Mathieu-Daudé                                     DIRTY_MEMORY_BLOCK_SIZE - ofs);
9588deaf12cSGerd Hoffmann 
9596ba9b60aSPhilippe Mathieu-Daudé             assert(QEMU_IS_ALIGNED(ofs, (1 << BITS_PER_LEVEL)));
9608deaf12cSGerd Hoffmann             assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
9616ba9b60aSPhilippe Mathieu-Daudé             ofs >>= BITS_PER_LEVEL;
9628deaf12cSGerd Hoffmann 
9638deaf12cSGerd Hoffmann             bitmap_copy_and_clear_atomic(snap->dirty + dest,
9646ba9b60aSPhilippe Mathieu-Daudé                                          blocks->blocks[idx] + ofs,
9658deaf12cSGerd Hoffmann                                          num);
9668deaf12cSGerd Hoffmann             page += num;
9678deaf12cSGerd Hoffmann             dest += num >> BITS_PER_LEVEL;
9688deaf12cSGerd Hoffmann         }
969694ea274SDr. David Alan Gilbert     }
9708deaf12cSGerd Hoffmann 
97186a9ae80SNicholas Piggin     cpu_physical_memory_dirty_bits_cleared(start, length);
9728deaf12cSGerd Hoffmann 
973077874e0SPeter Xu     memory_region_clear_dirty_bitmap(mr, offset, length);
974077874e0SPeter Xu 
9758deaf12cSGerd Hoffmann     return snap;
9768deaf12cSGerd Hoffmann }
9778deaf12cSGerd Hoffmann 
9788deaf12cSGerd Hoffmann bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
9798deaf12cSGerd Hoffmann                                             ram_addr_t start,
9808deaf12cSGerd Hoffmann                                             ram_addr_t length)
9818deaf12cSGerd Hoffmann {
9828deaf12cSGerd Hoffmann     unsigned long page, end;
9838deaf12cSGerd Hoffmann 
9848deaf12cSGerd Hoffmann     assert(start >= snap->start);
9858deaf12cSGerd Hoffmann     assert(start + length <= snap->end);
9868deaf12cSGerd Hoffmann 
9878deaf12cSGerd Hoffmann     end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
9888deaf12cSGerd Hoffmann     page = (start - snap->start) >> TARGET_PAGE_BITS;
9898deaf12cSGerd Hoffmann 
9908deaf12cSGerd Hoffmann     while (page < end) {
9918deaf12cSGerd Hoffmann         if (test_bit(page, snap->dirty)) {
9928deaf12cSGerd Hoffmann             return true;
9938deaf12cSGerd Hoffmann         }
9948deaf12cSGerd Hoffmann         page++;
9958deaf12cSGerd Hoffmann     }
9968deaf12cSGerd Hoffmann     return false;
9978deaf12cSGerd Hoffmann }
9988deaf12cSGerd Hoffmann 
99979e2b9aeSPaolo Bonzini /* Called from RCU critical section */
1000bb0e627aSAndreas Färber hwaddr memory_region_section_get_iotlb(CPUState *cpu,
10018f5db641SRichard Henderson                                        MemoryRegionSection *section)
1002e5548617SBlue Swirl {
10038f5db641SRichard Henderson     AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);
10048f5db641SRichard Henderson     return section - d->map.sections;
1005e5548617SBlue Swirl }
10068da3ff18Spbrook 
1007c227f099SAnthony Liguori static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
10085312bd8bSAvi Kivity                             uint16_t section);
100916620684SAlexey Kardashevskiy static subpage_t *subpage_init(FlatView *fv, hwaddr base);
101054688b1eSAvi Kivity 
101153cb28cbSMarcel Apfelbaum static uint16_t phys_section_add(PhysPageMap *map,
101253cb28cbSMarcel Apfelbaum                                  MemoryRegionSection *section)
10135312bd8bSAvi Kivity {
101468f3f65bSPaolo Bonzini     /* The physical section number is ORed with a page-aligned
101568f3f65bSPaolo Bonzini      * pointer to produce the iotlb entries.  Thus it should
101668f3f65bSPaolo Bonzini      * never overflow into the page-aligned value.
101768f3f65bSPaolo Bonzini      */
101853cb28cbSMarcel Apfelbaum     assert(map->sections_nb < TARGET_PAGE_SIZE);
101968f3f65bSPaolo Bonzini 
102053cb28cbSMarcel Apfelbaum     if (map->sections_nb == map->sections_nb_alloc) {
102153cb28cbSMarcel Apfelbaum         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
102253cb28cbSMarcel Apfelbaum         map->sections = g_renew(MemoryRegionSection, map->sections,
102353cb28cbSMarcel Apfelbaum                                 map->sections_nb_alloc);
10245312bd8bSAvi Kivity     }
102553cb28cbSMarcel Apfelbaum     map->sections[map->sections_nb] = *section;
1026dfde4e6eSPaolo Bonzini     memory_region_ref(section->mr);
102753cb28cbSMarcel Apfelbaum     return map->sections_nb++;
10285312bd8bSAvi Kivity }
10295312bd8bSAvi Kivity 
1030058bc4b5SPaolo Bonzini static void phys_section_destroy(MemoryRegion *mr)
1031058bc4b5SPaolo Bonzini {
103255b4e80bSDon Slutz     bool have_sub_page = mr->subpage;
103355b4e80bSDon Slutz 
1034dfde4e6eSPaolo Bonzini     memory_region_unref(mr);
1035dfde4e6eSPaolo Bonzini 
103655b4e80bSDon Slutz     if (have_sub_page) {
1037058bc4b5SPaolo Bonzini         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1038b4fefef9SPeter Crosthwaite         object_unref(OBJECT(&subpage->iomem));
1039058bc4b5SPaolo Bonzini         g_free(subpage);
1040058bc4b5SPaolo Bonzini     }
1041058bc4b5SPaolo Bonzini }
1042058bc4b5SPaolo Bonzini 
10436092666eSPaolo Bonzini static void phys_sections_free(PhysPageMap *map)
10445312bd8bSAvi Kivity {
10459affd6fcSPaolo Bonzini     while (map->sections_nb > 0) {
10469affd6fcSPaolo Bonzini         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1047058bc4b5SPaolo Bonzini         phys_section_destroy(section->mr);
1048058bc4b5SPaolo Bonzini     }
10499affd6fcSPaolo Bonzini     g_free(map->sections);
10509affd6fcSPaolo Bonzini     g_free(map->nodes);
10515312bd8bSAvi Kivity }
10525312bd8bSAvi Kivity 
10539950322aSAlexey Kardashevskiy static void register_subpage(FlatView *fv, MemoryRegionSection *section)
10540f0cb164SAvi Kivity {
10559950322aSAlexey Kardashevskiy     AddressSpaceDispatch *d = flatview_to_dispatch(fv);
10560f0cb164SAvi Kivity     subpage_t *subpage;
1057a8170e5eSAvi Kivity     hwaddr base = section->offset_within_address_space
10580f0cb164SAvi Kivity         & TARGET_PAGE_MASK;
1059003a0cf2SPeter Xu     MemoryRegionSection *existing = phys_page_find(d, base);
10600f0cb164SAvi Kivity     MemoryRegionSection subsection = {
10610f0cb164SAvi Kivity         .offset_within_address_space = base,
1062052e87b0SPaolo Bonzini         .size = int128_make64(TARGET_PAGE_SIZE),
10630f0cb164SAvi Kivity     };
1064a8170e5eSAvi Kivity     hwaddr start, end;
10650f0cb164SAvi Kivity 
1066f3705d53SAvi Kivity     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
10670f0cb164SAvi Kivity 
1068f3705d53SAvi Kivity     if (!(existing->mr->subpage)) {
106916620684SAlexey Kardashevskiy         subpage = subpage_init(fv, base);
107016620684SAlexey Kardashevskiy         subsection.fv = fv;
10710f0cb164SAvi Kivity         subsection.mr = &subpage->iomem;
1072ac1970fbSAvi Kivity         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
107353cb28cbSMarcel Apfelbaum                       phys_section_add(&d->map, &subsection));
10740f0cb164SAvi Kivity     } else {
1075f3705d53SAvi Kivity         subpage = container_of(existing->mr, subpage_t, iomem);
10760f0cb164SAvi Kivity     }
10770f0cb164SAvi Kivity     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1078052e87b0SPaolo Bonzini     end = start + int128_get64(section->size) - 1;
107953cb28cbSMarcel Apfelbaum     subpage_register(subpage, start, end,
108053cb28cbSMarcel Apfelbaum                      phys_section_add(&d->map, section));
10810f0cb164SAvi Kivity }
10820f0cb164SAvi Kivity 
10830f0cb164SAvi Kivity 
10849950322aSAlexey Kardashevskiy static void register_multipage(FlatView *fv,
1085052e87b0SPaolo Bonzini                                MemoryRegionSection *section)
108633417e70Sbellard {
10879950322aSAlexey Kardashevskiy     AddressSpaceDispatch *d = flatview_to_dispatch(fv);
1088a8170e5eSAvi Kivity     hwaddr start_addr = section->offset_within_address_space;
108953cb28cbSMarcel Apfelbaum     uint16_t section_index = phys_section_add(&d->map, section);
1090052e87b0SPaolo Bonzini     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1091052e87b0SPaolo Bonzini                                                     TARGET_PAGE_BITS));
1092dd81124bSAvi Kivity 
1093733d5ef5SPaolo Bonzini     assert(num_pages);
1094733d5ef5SPaolo Bonzini     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
109533417e70Sbellard }
109633417e70Sbellard 
1097494d1997SWei Yang /*
1098494d1997SWei Yang  * The range in *section* may look like this:
1099494d1997SWei Yang  *
1100494d1997SWei Yang  *      |s|PPPPPPP|s|
1101494d1997SWei Yang  *
1102494d1997SWei Yang  * where s stands for subpage and P for page.
1103494d1997SWei Yang  */
11048629d3fcSAlexey Kardashevskiy void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section)
11050f0cb164SAvi Kivity {
1106494d1997SWei Yang     MemoryRegionSection remain = *section;
1107052e87b0SPaolo Bonzini     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
11080f0cb164SAvi Kivity 
1109494d1997SWei Yang     /* register first subpage */
1110494d1997SWei Yang     if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1111494d1997SWei Yang         uint64_t left = TARGET_PAGE_ALIGN(remain.offset_within_address_space)
1112494d1997SWei Yang                         - remain.offset_within_address_space;
1113733d5ef5SPaolo Bonzini 
1114494d1997SWei Yang         MemoryRegionSection now = remain;
1115052e87b0SPaolo Bonzini         now.size = int128_min(int128_make64(left), now.size);
11169950322aSAlexey Kardashevskiy         register_subpage(fv, &now);
1117494d1997SWei Yang         if (int128_eq(remain.size, now.size)) {
1118494d1997SWei Yang             return;
1119733d5ef5SPaolo Bonzini         }
1120052e87b0SPaolo Bonzini         remain.size = int128_sub(remain.size, now.size);
1121052e87b0SPaolo Bonzini         remain.offset_within_address_space += int128_get64(now.size);
1122052e87b0SPaolo Bonzini         remain.offset_within_region += int128_get64(now.size);
1123494d1997SWei Yang     }
1124494d1997SWei Yang 
1125494d1997SWei Yang     /* register whole pages */
1126494d1997SWei Yang     if (int128_ge(remain.size, page_size)) {
1127494d1997SWei Yang         MemoryRegionSection now = remain;
1128052e87b0SPaolo Bonzini         now.size = int128_and(now.size, int128_neg(page_size));
11299950322aSAlexey Kardashevskiy         register_multipage(fv, &now);
1130494d1997SWei Yang         if (int128_eq(remain.size, now.size)) {
1131494d1997SWei Yang             return;
113269b67646STyler Hall         }
1133494d1997SWei Yang         remain.size = int128_sub(remain.size, now.size);
1134494d1997SWei Yang         remain.offset_within_address_space += int128_get64(now.size);
1135494d1997SWei Yang         remain.offset_within_region += int128_get64(now.size);
11360f0cb164SAvi Kivity     }
1137494d1997SWei Yang 
1138494d1997SWei Yang     /* register last subpage */
1139494d1997SWei Yang     register_subpage(fv, &remain);
11400f0cb164SAvi Kivity }
11410f0cb164SAvi Kivity 
114262a2744cSSheng Yang void qemu_flush_coalesced_mmio_buffer(void)
114362a2744cSSheng Yang {
114462a2744cSSheng Yang     if (kvm_enabled())
114562a2744cSSheng Yang         kvm_flush_coalesced_mmio_buffer();
114662a2744cSSheng Yang }
114762a2744cSSheng Yang 
1148b2a8658eSUmesh Deshpande void qemu_mutex_lock_ramlist(void)
1149b2a8658eSUmesh Deshpande {
1150b2a8658eSUmesh Deshpande     qemu_mutex_lock(&ram_list.mutex);
1151b2a8658eSUmesh Deshpande }
1152b2a8658eSUmesh Deshpande 
1153b2a8658eSUmesh Deshpande void qemu_mutex_unlock_ramlist(void)
1154b2a8658eSUmesh Deshpande {
1155b2a8658eSUmesh Deshpande     qemu_mutex_unlock(&ram_list.mutex);
1156b2a8658eSUmesh Deshpande }
1157b2a8658eSUmesh Deshpande 
1158ca411b7cSDaniel P. Berrangé GString *ram_block_format(void)
1159be9b23c4SPeter Xu {
1160be9b23c4SPeter Xu     RAMBlock *block;
1161be9b23c4SPeter Xu     char *psize;
1162ca411b7cSDaniel P. Berrangé     GString *buf = g_string_new("");
1163be9b23c4SPeter Xu 
1164694ea274SDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
1165dbc6ae9cSTed Chen     g_string_append_printf(buf, "%24s %8s  %18s %18s %18s %18s %3s\n",
1166dbc6ae9cSTed Chen                            "Block Name", "PSize", "Offset", "Used", "Total",
1167dbc6ae9cSTed Chen                            "HVA", "RO");
1168dbc6ae9cSTed Chen 
1169be9b23c4SPeter Xu     RAMBLOCK_FOREACH(block) {
1170be9b23c4SPeter Xu         psize = size_to_str(block->page_size);
1171ca411b7cSDaniel P. Berrangé         g_string_append_printf(buf, "%24s %8s  0x%016" PRIx64 " 0x%016" PRIx64
1172dbc6ae9cSTed Chen                                " 0x%016" PRIx64 " 0x%016" PRIx64 " %3s\n",
1173dbc6ae9cSTed Chen                                block->idstr, psize,
1174be9b23c4SPeter Xu                                (uint64_t)block->offset,
1175be9b23c4SPeter Xu                                (uint64_t)block->used_length,
1176dbc6ae9cSTed Chen                                (uint64_t)block->max_length,
1177dbc6ae9cSTed Chen                                (uint64_t)(uintptr_t)block->host,
1178dbc6ae9cSTed Chen                                block->mr->readonly ? "ro" : "rw");
1179dbc6ae9cSTed Chen 
1180be9b23c4SPeter Xu         g_free(psize);
1181be9b23c4SPeter Xu     }
1182ca411b7cSDaniel P. Berrangé 
1183ca411b7cSDaniel P. Berrangé     return buf;
1184be9b23c4SPeter Xu }
1185be9b23c4SPeter Xu 
1186905b7ee4SDavid Hildenbrand static int find_min_backend_pagesize(Object *obj, void *opaque)
11879c607668SAlexey Kardashevskiy {
11889c607668SAlexey Kardashevskiy     long *hpsize_min = opaque;
11899c607668SAlexey Kardashevskiy 
11909c607668SAlexey Kardashevskiy     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
11917d5489e6SDavid Gibson         HostMemoryBackend *backend = MEMORY_BACKEND(obj);
11927d5489e6SDavid Gibson         long hpsize = host_memory_backend_pagesize(backend);
11932b108085SDavid Gibson 
11947d5489e6SDavid Gibson         if (host_memory_backend_is_mapped(backend) && (hpsize < *hpsize_min)) {
11959c607668SAlexey Kardashevskiy             *hpsize_min = hpsize;
11969c607668SAlexey Kardashevskiy         }
11979c607668SAlexey Kardashevskiy     }
11989c607668SAlexey Kardashevskiy 
11999c607668SAlexey Kardashevskiy     return 0;
12009c607668SAlexey Kardashevskiy }
12019c607668SAlexey Kardashevskiy 
1202905b7ee4SDavid Hildenbrand static int find_max_backend_pagesize(Object *obj, void *opaque)
1203905b7ee4SDavid Hildenbrand {
1204905b7ee4SDavid Hildenbrand     long *hpsize_max = opaque;
1205905b7ee4SDavid Hildenbrand 
1206905b7ee4SDavid Hildenbrand     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
1207905b7ee4SDavid Hildenbrand         HostMemoryBackend *backend = MEMORY_BACKEND(obj);
1208905b7ee4SDavid Hildenbrand         long hpsize = host_memory_backend_pagesize(backend);
1209905b7ee4SDavid Hildenbrand 
1210905b7ee4SDavid Hildenbrand         if (host_memory_backend_is_mapped(backend) && (hpsize > *hpsize_max)) {
1211905b7ee4SDavid Hildenbrand             *hpsize_max = hpsize;
1212905b7ee4SDavid Hildenbrand         }
1213905b7ee4SDavid Hildenbrand     }
1214905b7ee4SDavid Hildenbrand 
1215905b7ee4SDavid Hildenbrand     return 0;
1216905b7ee4SDavid Hildenbrand }
1217905b7ee4SDavid Hildenbrand 
1218905b7ee4SDavid Hildenbrand /*
1219905b7ee4SDavid Hildenbrand  * TODO: We assume right now that all mapped host memory backends are
1220905b7ee4SDavid Hildenbrand  * used as RAM, however some might be used for different purposes.
1221905b7ee4SDavid Hildenbrand  */
1222905b7ee4SDavid Hildenbrand long qemu_minrampagesize(void)
12239c607668SAlexey Kardashevskiy {
12249c607668SAlexey Kardashevskiy     long hpsize = LONG_MAX;
1225ad1172d8SIgor Mammedov     Object *memdev_root = object_resolve_path("/objects", NULL);
12269c607668SAlexey Kardashevskiy 
1227905b7ee4SDavid Hildenbrand     object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize);
12289c607668SAlexey Kardashevskiy     return hpsize;
12299c607668SAlexey Kardashevskiy }
1230905b7ee4SDavid Hildenbrand 
1231905b7ee4SDavid Hildenbrand long qemu_maxrampagesize(void)
1232905b7ee4SDavid Hildenbrand {
1233ad1172d8SIgor Mammedov     long pagesize = 0;
1234905b7ee4SDavid Hildenbrand     Object *memdev_root = object_resolve_path("/objects", NULL);
1235905b7ee4SDavid Hildenbrand 
1236ad1172d8SIgor Mammedov     object_child_foreach(memdev_root, find_max_backend_pagesize, &pagesize);
1237905b7ee4SDavid Hildenbrand     return pagesize;
1238905b7ee4SDavid Hildenbrand }
12399c607668SAlexey Kardashevskiy 
1240d5dbde46SHikaru Nishida #ifdef CONFIG_POSIX
1241d6af99c9SHaozhong Zhang static int64_t get_file_size(int fd)
1242d6af99c9SHaozhong Zhang {
124372d41eb4SStefan Hajnoczi     int64_t size;
124472d41eb4SStefan Hajnoczi #if defined(__linux__)
124572d41eb4SStefan Hajnoczi     struct stat st;
124672d41eb4SStefan Hajnoczi 
124772d41eb4SStefan Hajnoczi     if (fstat(fd, &st) < 0) {
124872d41eb4SStefan Hajnoczi         return -errno;
124972d41eb4SStefan Hajnoczi     }
125072d41eb4SStefan Hajnoczi 
125172d41eb4SStefan Hajnoczi     /* Special handling for devdax character devices */
125272d41eb4SStefan Hajnoczi     if (S_ISCHR(st.st_mode)) {
125372d41eb4SStefan Hajnoczi         g_autofree char *subsystem_path = NULL;
125472d41eb4SStefan Hajnoczi         g_autofree char *subsystem = NULL;
125572d41eb4SStefan Hajnoczi 
125672d41eb4SStefan Hajnoczi         subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem",
125772d41eb4SStefan Hajnoczi                                          major(st.st_rdev), minor(st.st_rdev));
125872d41eb4SStefan Hajnoczi         subsystem = g_file_read_link(subsystem_path, NULL);
125972d41eb4SStefan Hajnoczi 
126072d41eb4SStefan Hajnoczi         if (subsystem && g_str_has_suffix(subsystem, "/dax")) {
126172d41eb4SStefan Hajnoczi             g_autofree char *size_path = NULL;
126272d41eb4SStefan Hajnoczi             g_autofree char *size_str = NULL;
126372d41eb4SStefan Hajnoczi 
126472d41eb4SStefan Hajnoczi             size_path = g_strdup_printf("/sys/dev/char/%d:%d/size",
126572d41eb4SStefan Hajnoczi                                     major(st.st_rdev), minor(st.st_rdev));
126672d41eb4SStefan Hajnoczi 
126772d41eb4SStefan Hajnoczi             if (g_file_get_contents(size_path, &size_str, NULL, NULL)) {
126872d41eb4SStefan Hajnoczi                 return g_ascii_strtoll(size_str, NULL, 0);
126972d41eb4SStefan Hajnoczi             }
127072d41eb4SStefan Hajnoczi         }
127172d41eb4SStefan Hajnoczi     }
127272d41eb4SStefan Hajnoczi #endif /* defined(__linux__) */
127372d41eb4SStefan Hajnoczi 
127472d41eb4SStefan Hajnoczi     /* st.st_size may be zero for special files yet lseek(2) works */
127572d41eb4SStefan Hajnoczi     size = lseek(fd, 0, SEEK_END);
1276d6af99c9SHaozhong Zhang     if (size < 0) {
1277d6af99c9SHaozhong Zhang         return -errno;
1278d6af99c9SHaozhong Zhang     }
1279d6af99c9SHaozhong Zhang     return size;
1280d6af99c9SHaozhong Zhang }
1281d6af99c9SHaozhong Zhang 
1282ce317be9SJingqi Liu static int64_t get_file_align(int fd)
1283ce317be9SJingqi Liu {
1284ce317be9SJingqi Liu     int64_t align = -1;
1285ce317be9SJingqi Liu #if defined(__linux__) && defined(CONFIG_LIBDAXCTL)
1286ce317be9SJingqi Liu     struct stat st;
1287ce317be9SJingqi Liu 
1288ce317be9SJingqi Liu     if (fstat(fd, &st) < 0) {
1289ce317be9SJingqi Liu         return -errno;
1290ce317be9SJingqi Liu     }
1291ce317be9SJingqi Liu 
1292ce317be9SJingqi Liu     /* Special handling for devdax character devices */
1293ce317be9SJingqi Liu     if (S_ISCHR(st.st_mode)) {
1294ce317be9SJingqi Liu         g_autofree char *path = NULL;
1295ce317be9SJingqi Liu         g_autofree char *rpath = NULL;
1296ce317be9SJingqi Liu         struct daxctl_ctx *ctx;
1297ce317be9SJingqi Liu         struct daxctl_region *region;
1298ce317be9SJingqi Liu         int rc = 0;
1299ce317be9SJingqi Liu 
1300ce317be9SJingqi Liu         path = g_strdup_printf("/sys/dev/char/%d:%d",
1301ce317be9SJingqi Liu                     major(st.st_rdev), minor(st.st_rdev));
1302ce317be9SJingqi Liu         rpath = realpath(path, NULL);
13038efdb7baSPeter Maydell         if (!rpath) {
13048efdb7baSPeter Maydell             return -errno;
13058efdb7baSPeter Maydell         }
1306ce317be9SJingqi Liu 
1307ce317be9SJingqi Liu         rc = daxctl_new(&ctx);
1308ce317be9SJingqi Liu         if (rc) {
1309ce317be9SJingqi Liu             return -1;
1310ce317be9SJingqi Liu         }
1311ce317be9SJingqi Liu 
1312ce317be9SJingqi Liu         daxctl_region_foreach(ctx, region) {
1313ce317be9SJingqi Liu             if (strstr(rpath, daxctl_region_get_path(region))) {
1314ce317be9SJingqi Liu                 align = daxctl_region_get_align(region);
1315ce317be9SJingqi Liu                 break;
1316ce317be9SJingqi Liu             }
1317ce317be9SJingqi Liu         }
1318ce317be9SJingqi Liu         daxctl_unref(ctx);
1319ce317be9SJingqi Liu     }
1320ce317be9SJingqi Liu #endif /* defined(__linux__) && defined(CONFIG_LIBDAXCTL) */
1321ce317be9SJingqi Liu 
1322ce317be9SJingqi Liu     return align;
1323ce317be9SJingqi Liu }
1324ce317be9SJingqi Liu 
13258d37b030SMarc-André Lureau static int file_ram_open(const char *path,
13268d37b030SMarc-André Lureau                          const char *region_name,
1327369d6dc4SStefan Hajnoczi                          bool readonly,
13284d6b23f7SDavid Hildenbrand                          bool *created)
1329c902760fSMarcelo Tosatti {
1330c902760fSMarcelo Tosatti     char *filename;
13318ca761f6SPeter Feiner     char *sanitized_name;
13328ca761f6SPeter Feiner     char *c;
13335c3ece79SPaolo Bonzini     int fd = -1;
1334c902760fSMarcelo Tosatti 
13358d37b030SMarc-André Lureau     *created = false;
1336fd97fd44SMarkus Armbruster     for (;;) {
1337369d6dc4SStefan Hajnoczi         fd = open(path, readonly ? O_RDONLY : O_RDWR);
1338fd97fd44SMarkus Armbruster         if (fd >= 0) {
1339ca01f1b8SDavid Hildenbrand             /*
1340ca01f1b8SDavid Hildenbrand              * open(O_RDONLY) won't fail with EISDIR. Check manually if we
1341ca01f1b8SDavid Hildenbrand              * opened a directory and fail similarly to how we fail ENOENT
1342ca01f1b8SDavid Hildenbrand              * in readonly mode. Note that mkstemp() would imply O_RDWR.
1343ca01f1b8SDavid Hildenbrand              */
1344ca01f1b8SDavid Hildenbrand             if (readonly) {
1345ca01f1b8SDavid Hildenbrand                 struct stat file_stat;
1346ca01f1b8SDavid Hildenbrand 
1347ca01f1b8SDavid Hildenbrand                 if (fstat(fd, &file_stat)) {
1348ca01f1b8SDavid Hildenbrand                     close(fd);
1349ca01f1b8SDavid Hildenbrand                     if (errno == EINTR) {
1350ca01f1b8SDavid Hildenbrand                         continue;
1351ca01f1b8SDavid Hildenbrand                     }
1352ca01f1b8SDavid Hildenbrand                     return -errno;
1353ca01f1b8SDavid Hildenbrand                 } else if (S_ISDIR(file_stat.st_mode)) {
1354ca01f1b8SDavid Hildenbrand                     close(fd);
1355ca01f1b8SDavid Hildenbrand                     return -EISDIR;
1356ca01f1b8SDavid Hildenbrand                 }
1357ca01f1b8SDavid Hildenbrand             }
1358fd97fd44SMarkus Armbruster             /* @path names an existing file, use it */
1359fd97fd44SMarkus Armbruster             break;
1360fd97fd44SMarkus Armbruster         }
1361fd97fd44SMarkus Armbruster         if (errno == ENOENT) {
13624d6b23f7SDavid Hildenbrand             if (readonly) {
13634d6b23f7SDavid Hildenbrand                 /* Refuse to create new, readonly files. */
13644d6b23f7SDavid Hildenbrand                 return -ENOENT;
13654d6b23f7SDavid Hildenbrand             }
1366fd97fd44SMarkus Armbruster             /* @path names a file that doesn't exist, create it */
1367fd97fd44SMarkus Armbruster             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1368fd97fd44SMarkus Armbruster             if (fd >= 0) {
13698d37b030SMarc-André Lureau                 *created = true;
1370fd97fd44SMarkus Armbruster                 break;
1371fd97fd44SMarkus Armbruster             }
1372fd97fd44SMarkus Armbruster         } else if (errno == EISDIR) {
1373fd97fd44SMarkus Armbruster             /* @path names a directory, create a file there */
13748ca761f6SPeter Feiner             /* Make name safe to use with mkstemp by replacing '/' with '_'. */
13758d37b030SMarc-André Lureau             sanitized_name = g_strdup(region_name);
13768ca761f6SPeter Feiner             for (c = sanitized_name; *c != '\0'; c++) {
13778d31d6b6SPavel Fedin                 if (*c == '/') {
13788ca761f6SPeter Feiner                     *c = '_';
13798ca761f6SPeter Feiner                 }
13808d31d6b6SPavel Fedin             }
13818ca761f6SPeter Feiner 
13828ca761f6SPeter Feiner             filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
13838ca761f6SPeter Feiner                                        sanitized_name);
13848ca761f6SPeter Feiner             g_free(sanitized_name);
1385c902760fSMarcelo Tosatti 
1386c902760fSMarcelo Tosatti             fd = mkstemp(filename);
13878d31d6b6SPavel Fedin             if (fd >= 0) {
13888d31d6b6SPavel Fedin                 unlink(filename);
1389fd97fd44SMarkus Armbruster                 g_free(filename);
1390fd97fd44SMarkus Armbruster                 break;
13918d31d6b6SPavel Fedin             }
13928d31d6b6SPavel Fedin             g_free(filename);
1393fd97fd44SMarkus Armbruster         }
1394fd97fd44SMarkus Armbruster         if (errno != EEXIST && errno != EINTR) {
13954d6b23f7SDavid Hildenbrand             return -errno;
1396fd97fd44SMarkus Armbruster         }
1397fd97fd44SMarkus Armbruster         /*
1398fd97fd44SMarkus Armbruster          * Try again on EINTR and EEXIST.  The latter happens when
1399fd97fd44SMarkus Armbruster          * something else creates the file between our two open().
1400fd97fd44SMarkus Armbruster          */
14018d31d6b6SPavel Fedin     }
14028d31d6b6SPavel Fedin 
14038d37b030SMarc-André Lureau     return fd;
14048d37b030SMarc-André Lureau }
14058d37b030SMarc-André Lureau 
14068d37b030SMarc-André Lureau static void *file_ram_alloc(RAMBlock *block,
14078d37b030SMarc-André Lureau                             ram_addr_t memory,
14088d37b030SMarc-André Lureau                             int fd,
14098d37b030SMarc-André Lureau                             bool truncate,
141044a4ff31SJagannathan Raman                             off_t offset,
14118d37b030SMarc-André Lureau                             Error **errp)
14128d37b030SMarc-André Lureau {
1413b444f5c0SDavid Hildenbrand     uint32_t qemu_map_flags;
14148d37b030SMarc-André Lureau     void *area;
14158d37b030SMarc-André Lureau 
1416863e9621SDr. David Alan Gilbert     block->page_size = qemu_fd_getpagesize(fd);
141798376843SHaozhong Zhang     if (block->mr->align % block->page_size) {
141898376843SHaozhong Zhang         error_setg(errp, "alignment 0x%" PRIx64
141998376843SHaozhong Zhang                    " must be multiples of page size 0x%zx",
142098376843SHaozhong Zhang                    block->mr->align, block->page_size);
142198376843SHaozhong Zhang         return NULL;
142261362b71SDavid Hildenbrand     } else if (block->mr->align && !is_power_of_2(block->mr->align)) {
142361362b71SDavid Hildenbrand         error_setg(errp, "alignment 0x%" PRIx64
142461362b71SDavid Hildenbrand                    " must be a power of two", block->mr->align);
142561362b71SDavid Hildenbrand         return NULL;
14264b870dc4SAlexander Graf     } else if (offset % block->page_size) {
14274b870dc4SAlexander Graf         error_setg(errp, "offset 0x%" PRIx64
14284b870dc4SAlexander Graf                    " must be multiples of page size 0x%zx",
14294b870dc4SAlexander Graf                    offset, block->page_size);
14304b870dc4SAlexander Graf         return NULL;
143198376843SHaozhong Zhang     }
143298376843SHaozhong Zhang     block->mr->align = MAX(block->page_size, block->mr->align);
14338360668eSHaozhong Zhang #if defined(__s390x__)
14348360668eSHaozhong Zhang     if (kvm_enabled()) {
14358360668eSHaozhong Zhang         block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
14368360668eSHaozhong Zhang     }
14378360668eSHaozhong Zhang #endif
1438fd97fd44SMarkus Armbruster 
1439863e9621SDr. David Alan Gilbert     if (memory < block->page_size) {
1440fd97fd44SMarkus Armbruster         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1441863e9621SDr. David Alan Gilbert                    "or larger than page size 0x%zx",
1442863e9621SDr. David Alan Gilbert                    memory, block->page_size);
14438d37b030SMarc-André Lureau         return NULL;
14441775f111SHaozhong Zhang     }
14451775f111SHaozhong Zhang 
1446863e9621SDr. David Alan Gilbert     memory = ROUND_UP(memory, block->page_size);
1447c902760fSMarcelo Tosatti 
1448c902760fSMarcelo Tosatti     /*
1449c902760fSMarcelo Tosatti      * ftruncate is not supported by hugetlbfs in older
1450c902760fSMarcelo Tosatti      * hosts, so don't bother bailing out on errors.
1451c902760fSMarcelo Tosatti      * If anything goes wrong with it under other filesystems,
1452c902760fSMarcelo Tosatti      * mmap will fail.
1453d6af99c9SHaozhong Zhang      *
1454d6af99c9SHaozhong Zhang      * Do not truncate the non-empty backend file to avoid corrupting
1455d6af99c9SHaozhong Zhang      * the existing data in the file. Disabling shrinking is not
1456d6af99c9SHaozhong Zhang      * enough. For example, the current vNVDIMM implementation stores
1457d6af99c9SHaozhong Zhang      * the guest NVDIMM labels at the end of the backend file. If the
1458d6af99c9SHaozhong Zhang      * backend file is later extended, QEMU will not be able to find
1459d6af99c9SHaozhong Zhang      * those labels. Therefore, extending the non-empty backend file
1460d6af99c9SHaozhong Zhang      * is disabled as well.
1461c902760fSMarcelo Tosatti      */
14624b870dc4SAlexander Graf     if (truncate && ftruncate(fd, offset + memory)) {
1463c902760fSMarcelo Tosatti         perror("ftruncate");
14647f56e740SPaolo Bonzini     }
1465c902760fSMarcelo Tosatti 
14665c52a219SDavid Hildenbrand     qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0;
1467b444f5c0SDavid Hildenbrand     qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
1468b444f5c0SDavid Hildenbrand     qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
14698dbe22c6SDavid Hildenbrand     qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
1470b444f5c0SDavid Hildenbrand     area = qemu_ram_mmap(fd, memory, block->mr->align, qemu_map_flags, offset);
1471c902760fSMarcelo Tosatti     if (area == MAP_FAILED) {
14727f56e740SPaolo Bonzini         error_setg_errno(errp, errno,
1473fd97fd44SMarkus Armbruster                          "unable to map backing store for guest RAM");
14748d37b030SMarc-André Lureau         return NULL;
1475c902760fSMarcelo Tosatti     }
1476ef36fa14SMarcelo Tosatti 
147704b16653SAlex Williamson     block->fd = fd;
14784b870dc4SAlexander Graf     block->fd_offset = offset;
1479c902760fSMarcelo Tosatti     return area;
1480c902760fSMarcelo Tosatti }
1481c902760fSMarcelo Tosatti #endif
1482c902760fSMarcelo Tosatti 
1483154cc9eaSDr. David Alan Gilbert /* Allocate space within the ram_addr_t space that governs the
1484154cc9eaSDr. David Alan Gilbert  * dirty bitmaps.
1485154cc9eaSDr. David Alan Gilbert  * Called with the ramlist lock held.
1486154cc9eaSDr. David Alan Gilbert  */
1487d17b5288SAlex Williamson static ram_addr_t find_ram_offset(ram_addr_t size)
1488d17b5288SAlex Williamson {
148904b16653SAlex Williamson     RAMBlock *block, *next_block;
14903e837b2cSAlex Williamson     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
149104b16653SAlex Williamson 
149249cd9ac6SStefan Hajnoczi     assert(size != 0); /* it would hand out same offset multiple times */
149349cd9ac6SStefan Hajnoczi 
14940dc3f44aSMike Day     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
149504b16653SAlex Williamson         return 0;
14960d53d9feSMike Day     }
149704b16653SAlex Williamson 
149899e15582SPeter Xu     RAMBLOCK_FOREACH(block) {
1499154cc9eaSDr. David Alan Gilbert         ram_addr_t candidate, next = RAM_ADDR_MAX;
150004b16653SAlex Williamson 
1501801110abSDr. David Alan Gilbert         /* Align blocks to start on a 'long' in the bitmap
1502801110abSDr. David Alan Gilbert          * which makes the bitmap sync'ing take the fast path.
1503801110abSDr. David Alan Gilbert          */
1504154cc9eaSDr. David Alan Gilbert         candidate = block->offset + block->max_length;
1505801110abSDr. David Alan Gilbert         candidate = ROUND_UP(candidate, BITS_PER_LONG << TARGET_PAGE_BITS);
150604b16653SAlex Williamson 
1507154cc9eaSDr. David Alan Gilbert         /* Search for the closest following block
1508154cc9eaSDr. David Alan Gilbert          * and find the gap.
1509154cc9eaSDr. David Alan Gilbert          */
151099e15582SPeter Xu         RAMBLOCK_FOREACH(next_block) {
1511154cc9eaSDr. David Alan Gilbert             if (next_block->offset >= candidate) {
151204b16653SAlex Williamson                 next = MIN(next, next_block->offset);
151304b16653SAlex Williamson             }
151404b16653SAlex Williamson         }
1515154cc9eaSDr. David Alan Gilbert 
1516154cc9eaSDr. David Alan Gilbert         /* If it fits remember our place and remember the size
1517154cc9eaSDr. David Alan Gilbert          * of gap, but keep going so that we might find a smaller
1518154cc9eaSDr. David Alan Gilbert          * gap to fill so avoiding fragmentation.
1519154cc9eaSDr. David Alan Gilbert          */
1520154cc9eaSDr. David Alan Gilbert         if (next - candidate >= size && next - candidate < mingap) {
1521154cc9eaSDr. David Alan Gilbert             offset = candidate;
1522154cc9eaSDr. David Alan Gilbert             mingap = next - candidate;
152304b16653SAlex Williamson         }
1524154cc9eaSDr. David Alan Gilbert 
1525154cc9eaSDr. David Alan Gilbert         trace_find_ram_offset_loop(size, candidate, offset, next, mingap);
152604b16653SAlex Williamson     }
15273e837b2cSAlex Williamson 
15283e837b2cSAlex Williamson     if (offset == RAM_ADDR_MAX) {
15293e837b2cSAlex Williamson         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
15303e837b2cSAlex Williamson                 (uint64_t)size);
15313e837b2cSAlex Williamson         abort();
15323e837b2cSAlex Williamson     }
15333e837b2cSAlex Williamson 
1534154cc9eaSDr. David Alan Gilbert     trace_find_ram_offset(size, offset);
1535154cc9eaSDr. David Alan Gilbert 
153604b16653SAlex Williamson     return offset;
153704b16653SAlex Williamson }
153804b16653SAlex Williamson 
1539ddb97f1dSJason Baron static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1540ddb97f1dSJason Baron {
1541ddb97f1dSJason Baron     int ret;
1542ddb97f1dSJason Baron 
1543ddb97f1dSJason Baron     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
154447c8ca53SMarcel Apfelbaum     if (!machine_dump_guest_core(current_machine)) {
1545ddb97f1dSJason Baron         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1546ddb97f1dSJason Baron         if (ret) {
1547ddb97f1dSJason Baron             perror("qemu_madvise");
1548ddb97f1dSJason Baron             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
15490ff3243aSAkihiko Odaki                             "but dump-guest-core=off specified\n");
1550ddb97f1dSJason Baron         }
1551ddb97f1dSJason Baron     }
1552ddb97f1dSJason Baron }
1553ddb97f1dSJason Baron 
1554422148d3SDr. David Alan Gilbert const char *qemu_ram_get_idstr(RAMBlock *rb)
1555422148d3SDr. David Alan Gilbert {
1556422148d3SDr. David Alan Gilbert     return rb->idstr;
1557422148d3SDr. David Alan Gilbert }
1558422148d3SDr. David Alan Gilbert 
1559754cb9c0SYury Kotov void *qemu_ram_get_host_addr(RAMBlock *rb)
1560754cb9c0SYury Kotov {
1561754cb9c0SYury Kotov     return rb->host;
1562754cb9c0SYury Kotov }
1563754cb9c0SYury Kotov 
1564754cb9c0SYury Kotov ram_addr_t qemu_ram_get_offset(RAMBlock *rb)
1565754cb9c0SYury Kotov {
1566754cb9c0SYury Kotov     return rb->offset;
1567754cb9c0SYury Kotov }
1568754cb9c0SYury Kotov 
1569754cb9c0SYury Kotov ram_addr_t qemu_ram_get_used_length(RAMBlock *rb)
1570754cb9c0SYury Kotov {
1571754cb9c0SYury Kotov     return rb->used_length;
1572754cb9c0SYury Kotov }
1573754cb9c0SYury Kotov 
1574082851a3SDavid Hildenbrand ram_addr_t qemu_ram_get_max_length(RAMBlock *rb)
1575082851a3SDavid Hildenbrand {
1576082851a3SDavid Hildenbrand     return rb->max_length;
1577082851a3SDavid Hildenbrand }
1578082851a3SDavid Hildenbrand 
1579463a4ac2SDr. David Alan Gilbert bool qemu_ram_is_shared(RAMBlock *rb)
1580463a4ac2SDr. David Alan Gilbert {
1581463a4ac2SDr. David Alan Gilbert     return rb->flags & RAM_SHARED;
1582463a4ac2SDr. David Alan Gilbert }
1583463a4ac2SDr. David Alan Gilbert 
15848dbe22c6SDavid Hildenbrand bool qemu_ram_is_noreserve(RAMBlock *rb)
15858dbe22c6SDavid Hildenbrand {
15868dbe22c6SDavid Hildenbrand     return rb->flags & RAM_NORESERVE;
15878dbe22c6SDavid Hildenbrand }
15888dbe22c6SDavid Hildenbrand 
15892ce16640SDr. David Alan Gilbert /* Note: Only set at the start of postcopy */
15902ce16640SDr. David Alan Gilbert bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
15912ce16640SDr. David Alan Gilbert {
15922ce16640SDr. David Alan Gilbert     return rb->flags & RAM_UF_ZEROPAGE;
15932ce16640SDr. David Alan Gilbert }
15942ce16640SDr. David Alan Gilbert 
15952ce16640SDr. David Alan Gilbert void qemu_ram_set_uf_zeroable(RAMBlock *rb)
15962ce16640SDr. David Alan Gilbert {
15972ce16640SDr. David Alan Gilbert     rb->flags |= RAM_UF_ZEROPAGE;
15982ce16640SDr. David Alan Gilbert }
15992ce16640SDr. David Alan Gilbert 
1600b895de50SCédric Le Goater bool qemu_ram_is_migratable(RAMBlock *rb)
1601b895de50SCédric Le Goater {
1602b895de50SCédric Le Goater     return rb->flags & RAM_MIGRATABLE;
1603b895de50SCédric Le Goater }
1604b895de50SCédric Le Goater 
1605b895de50SCédric Le Goater void qemu_ram_set_migratable(RAMBlock *rb)
1606b895de50SCédric Le Goater {
1607b895de50SCédric Le Goater     rb->flags |= RAM_MIGRATABLE;
1608b895de50SCédric Le Goater }
1609b895de50SCédric Le Goater 
1610b895de50SCédric Le Goater void qemu_ram_unset_migratable(RAMBlock *rb)
1611b895de50SCédric Le Goater {
1612b895de50SCédric Le Goater     rb->flags &= ~RAM_MIGRATABLE;
1613b895de50SCédric Le Goater }
1614b895de50SCédric Le Goater 
1615b0182e53SSteve Sistare bool qemu_ram_is_named_file(RAMBlock *rb)
1616b0182e53SSteve Sistare {
1617b0182e53SSteve Sistare     return rb->flags & RAM_NAMED_FILE;
1618b0182e53SSteve Sistare }
1619b0182e53SSteve Sistare 
16206d998f3cSStefan Hajnoczi int qemu_ram_get_fd(RAMBlock *rb)
16216d998f3cSStefan Hajnoczi {
16226d998f3cSStefan Hajnoczi     return rb->fd;
16236d998f3cSStefan Hajnoczi }
16246d998f3cSStefan Hajnoczi 
1625a4a411fbSStefan Hajnoczi /* Called with the BQL held.  */
1626fa53a0e5SGonglei void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
162720cfe881SHu Tao {
1628fa53a0e5SGonglei     RAMBlock *block;
162920cfe881SHu Tao 
1630c5705a77SAvi Kivity     assert(new_block);
1631c5705a77SAvi Kivity     assert(!new_block->idstr[0]);
163284b89d78SCam Macdonell 
163309e5ab63SAnthony Liguori     if (dev) {
163409e5ab63SAnthony Liguori         char *id = qdev_get_dev_path(dev);
163584b89d78SCam Macdonell         if (id) {
163684b89d78SCam Macdonell             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
16377267c094SAnthony Liguori             g_free(id);
163884b89d78SCam Macdonell         }
163984b89d78SCam Macdonell     }
164084b89d78SCam Macdonell     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
164184b89d78SCam Macdonell 
1642694ea274SDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
164399e15582SPeter Xu     RAMBLOCK_FOREACH(block) {
1644fa53a0e5SGonglei         if (block != new_block &&
1645fa53a0e5SGonglei             !strcmp(block->idstr, new_block->idstr)) {
164684b89d78SCam Macdonell             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
164784b89d78SCam Macdonell                     new_block->idstr);
164884b89d78SCam Macdonell             abort();
164984b89d78SCam Macdonell         }
165084b89d78SCam Macdonell     }
1651c5705a77SAvi Kivity }
1652c5705a77SAvi Kivity 
1653a4a411fbSStefan Hajnoczi /* Called with the BQL held.  */
1654fa53a0e5SGonglei void qemu_ram_unset_idstr(RAMBlock *block)
165520cfe881SHu Tao {
1656ae3a7047SMike Day     /* FIXME: arch_init.c assumes that this is not called throughout
1657ae3a7047SMike Day      * migration.  Ignore the problem since hot-unplug during migration
1658ae3a7047SMike Day      * does not work anyway.
1659ae3a7047SMike Day      */
166020cfe881SHu Tao     if (block) {
166120cfe881SHu Tao         memset(block->idstr, 0, sizeof(block->idstr));
166220cfe881SHu Tao     }
166320cfe881SHu Tao }
166420cfe881SHu Tao 
16652b7e9739SSteve Sistare static char *cpr_name(MemoryRegion *mr)
16662b7e9739SSteve Sistare {
16672b7e9739SSteve Sistare     const char *mr_name = memory_region_name(mr);
16682b7e9739SSteve Sistare     g_autofree char *id = mr->dev ? qdev_get_dev_path(mr->dev) : NULL;
16692b7e9739SSteve Sistare 
16702b7e9739SSteve Sistare     if (id) {
16712b7e9739SSteve Sistare         return g_strdup_printf("%s/%s", id, mr_name);
16722b7e9739SSteve Sistare     } else {
16732b7e9739SSteve Sistare         return g_strdup(mr_name);
16742b7e9739SSteve Sistare     }
16752b7e9739SSteve Sistare }
16762b7e9739SSteve Sistare 
1677863e9621SDr. David Alan Gilbert size_t qemu_ram_pagesize(RAMBlock *rb)
1678863e9621SDr. David Alan Gilbert {
1679863e9621SDr. David Alan Gilbert     return rb->page_size;
1680863e9621SDr. David Alan Gilbert }
1681863e9621SDr. David Alan Gilbert 
168267f11b5cSDr. David Alan Gilbert /* Returns the largest size of page in use */
168367f11b5cSDr. David Alan Gilbert size_t qemu_ram_pagesize_largest(void)
168467f11b5cSDr. David Alan Gilbert {
168567f11b5cSDr. David Alan Gilbert     RAMBlock *block;
168667f11b5cSDr. David Alan Gilbert     size_t largest = 0;
168767f11b5cSDr. David Alan Gilbert 
168899e15582SPeter Xu     RAMBLOCK_FOREACH(block) {
168967f11b5cSDr. David Alan Gilbert         largest = MAX(largest, qemu_ram_pagesize(block));
169067f11b5cSDr. David Alan Gilbert     }
169167f11b5cSDr. David Alan Gilbert 
169267f11b5cSDr. David Alan Gilbert     return largest;
169367f11b5cSDr. David Alan Gilbert }
169467f11b5cSDr. David Alan Gilbert 
16958490fc78SLuiz Capitulino static int memory_try_enable_merging(void *addr, size_t len)
16968490fc78SLuiz Capitulino {
169775cc7f01SMarcel Apfelbaum     if (!machine_mem_merge(current_machine)) {
16988490fc78SLuiz Capitulino         /* disabled by the user */
16998490fc78SLuiz Capitulino         return 0;
17008490fc78SLuiz Capitulino     }
17018490fc78SLuiz Capitulino 
17028490fc78SLuiz Capitulino     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
17038490fc78SLuiz Capitulino }
17048490fc78SLuiz Capitulino 
1705c7c0e724SDavid Hildenbrand /*
1706c7c0e724SDavid Hildenbrand  * Resizing RAM while migrating can result in the migration being canceled.
1707c7c0e724SDavid Hildenbrand  * Care has to be taken if the guest might have already detected the memory.
170862be4e3aSMichael S. Tsirkin  *
170962be4e3aSMichael S. Tsirkin  * As memory core doesn't know how is memory accessed, it is up to
171062be4e3aSMichael S. Tsirkin  * resize callback to update device state and/or add assertions to detect
171162be4e3aSMichael S. Tsirkin  * misuse, if necessary.
171262be4e3aSMichael S. Tsirkin  */
1713fa53a0e5SGonglei int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
171462be4e3aSMichael S. Tsirkin {
17158f44304cSDavid Hildenbrand     const ram_addr_t oldsize = block->used_length;
1716ce4adc0bSDavid Hildenbrand     const ram_addr_t unaligned_size = newsize;
1717ce4adc0bSDavid Hildenbrand 
171862be4e3aSMichael S. Tsirkin     assert(block);
171962be4e3aSMichael S. Tsirkin 
17209260bd40SRichard Henderson     newsize = TARGET_PAGE_ALIGN(newsize);
17219260bd40SRichard Henderson     newsize = REAL_HOST_PAGE_ALIGN(newsize);
1722129ddaf3SMichael S. Tsirkin 
172362be4e3aSMichael S. Tsirkin     if (block->used_length == newsize) {
1724ce4adc0bSDavid Hildenbrand         /*
1725ce4adc0bSDavid Hildenbrand          * We don't have to resize the ram block (which only knows aligned
1726ce4adc0bSDavid Hildenbrand          * sizes), however, we have to notify if the unaligned size changed.
1727ce4adc0bSDavid Hildenbrand          */
1728ce4adc0bSDavid Hildenbrand         if (unaligned_size != memory_region_size(block->mr)) {
1729ce4adc0bSDavid Hildenbrand             memory_region_set_size(block->mr, unaligned_size);
1730ce4adc0bSDavid Hildenbrand             if (block->resized) {
1731ce4adc0bSDavid Hildenbrand                 block->resized(block->idstr, unaligned_size, block->host);
1732ce4adc0bSDavid Hildenbrand             }
1733ce4adc0bSDavid Hildenbrand         }
173462be4e3aSMichael S. Tsirkin         return 0;
173562be4e3aSMichael S. Tsirkin     }
173662be4e3aSMichael S. Tsirkin 
173762be4e3aSMichael S. Tsirkin     if (!(block->flags & RAM_RESIZEABLE)) {
173862be4e3aSMichael S. Tsirkin         error_setg_errno(errp, EINVAL,
1739a3a92908SPankaj Gupta                          "Size mismatch: %s: 0x" RAM_ADDR_FMT
1740a3a92908SPankaj Gupta                          " != 0x" RAM_ADDR_FMT, block->idstr,
174162be4e3aSMichael S. Tsirkin                          newsize, block->used_length);
174262be4e3aSMichael S. Tsirkin         return -EINVAL;
174362be4e3aSMichael S. Tsirkin     }
174462be4e3aSMichael S. Tsirkin 
174562be4e3aSMichael S. Tsirkin     if (block->max_length < newsize) {
174662be4e3aSMichael S. Tsirkin         error_setg_errno(errp, EINVAL,
1747a3a92908SPankaj Gupta                          "Size too large: %s: 0x" RAM_ADDR_FMT
174862be4e3aSMichael S. Tsirkin                          " > 0x" RAM_ADDR_FMT, block->idstr,
174962be4e3aSMichael S. Tsirkin                          newsize, block->max_length);
175062be4e3aSMichael S. Tsirkin         return -EINVAL;
175162be4e3aSMichael S. Tsirkin     }
175262be4e3aSMichael S. Tsirkin 
17538f44304cSDavid Hildenbrand     /* Notify before modifying the ram block and touching the bitmaps. */
17548f44304cSDavid Hildenbrand     if (block->host) {
17558f44304cSDavid Hildenbrand         ram_block_notify_resize(block->host, oldsize, newsize);
17568f44304cSDavid Hildenbrand     }
17578f44304cSDavid Hildenbrand 
175862be4e3aSMichael S. Tsirkin     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
175962be4e3aSMichael S. Tsirkin     block->used_length = newsize;
176058d2707eSPaolo Bonzini     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
176158d2707eSPaolo Bonzini                                         DIRTY_CLIENTS_ALL);
1762ce4adc0bSDavid Hildenbrand     memory_region_set_size(block->mr, unaligned_size);
176362be4e3aSMichael S. Tsirkin     if (block->resized) {
1764ce4adc0bSDavid Hildenbrand         block->resized(block->idstr, unaligned_size, block->host);
176562be4e3aSMichael S. Tsirkin     }
176662be4e3aSMichael S. Tsirkin     return 0;
176762be4e3aSMichael S. Tsirkin }
176862be4e3aSMichael S. Tsirkin 
176961c490e2SBeata Michalska /*
177061c490e2SBeata Michalska  * Trigger sync on the given ram block for range [start, start + length]
177161c490e2SBeata Michalska  * with the backing store if one is available.
177261c490e2SBeata Michalska  * Otherwise no-op.
177361c490e2SBeata Michalska  * @Note: this is supposed to be a synchronous op.
177461c490e2SBeata Michalska  */
1775ab7e41e6SPhilippe Mathieu-Daudé void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
177661c490e2SBeata Michalska {
177761c490e2SBeata Michalska     /* The requested range should fit in within the block range */
177861c490e2SBeata Michalska     g_assert((start + length) <= block->used_length);
177961c490e2SBeata Michalska 
178061c490e2SBeata Michalska #ifdef CONFIG_LIBPMEM
178161c490e2SBeata Michalska     /* The lack of support for pmem should not block the sync */
178261c490e2SBeata Michalska     if (ramblock_is_pmem(block)) {
17835d4c9549SAnthony PERARD         void *addr = ramblock_ptr(block, start);
178461c490e2SBeata Michalska         pmem_persist(addr, length);
178561c490e2SBeata Michalska         return;
178661c490e2SBeata Michalska     }
178761c490e2SBeata Michalska #endif
178861c490e2SBeata Michalska     if (block->fd >= 0) {
178961c490e2SBeata Michalska         /**
179061c490e2SBeata Michalska          * Case there is no support for PMEM or the memory has not been
179161c490e2SBeata Michalska          * specified as persistent (or is not one) - use the msync.
179261c490e2SBeata Michalska          * Less optimal but still achieves the same goal
179361c490e2SBeata Michalska          */
17945d4c9549SAnthony PERARD         void *addr = ramblock_ptr(block, start);
179561c490e2SBeata Michalska         if (qemu_msync(addr, length, block->fd)) {
179661c490e2SBeata Michalska             warn_report("%s: failed to sync memory range: start: "
179761c490e2SBeata Michalska                     RAM_ADDR_FMT " length: " RAM_ADDR_FMT,
179861c490e2SBeata Michalska                     __func__, start, length);
179961c490e2SBeata Michalska         }
180061c490e2SBeata Michalska     }
180161c490e2SBeata Michalska }
180261c490e2SBeata Michalska 
18035b82b703SStefan Hajnoczi /* Called with ram_list.mutex held */
1804b84f06c2SDavid Hildenbrand static void dirty_memory_extend(ram_addr_t new_ram_size)
18055b82b703SStefan Hajnoczi {
1806b84f06c2SDavid Hildenbrand     unsigned int old_num_blocks = ram_list.num_dirty_blocks;
1807b84f06c2SDavid Hildenbrand     unsigned int new_num_blocks = DIV_ROUND_UP(new_ram_size,
18085b82b703SStefan Hajnoczi                                                DIRTY_MEMORY_BLOCK_SIZE);
18095b82b703SStefan Hajnoczi     int i;
18105b82b703SStefan Hajnoczi 
18115b82b703SStefan Hajnoczi     /* Only need to extend if block count increased */
18125b82b703SStefan Hajnoczi     if (new_num_blocks <= old_num_blocks) {
18135b82b703SStefan Hajnoczi         return;
18145b82b703SStefan Hajnoczi     }
18155b82b703SStefan Hajnoczi 
18165b82b703SStefan Hajnoczi     for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
18175b82b703SStefan Hajnoczi         DirtyMemoryBlocks *old_blocks;
18185b82b703SStefan Hajnoczi         DirtyMemoryBlocks *new_blocks;
18195b82b703SStefan Hajnoczi         int j;
18205b82b703SStefan Hajnoczi 
1821d73415a3SStefan Hajnoczi         old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
18225b82b703SStefan Hajnoczi         new_blocks = g_malloc(sizeof(*new_blocks) +
18235b82b703SStefan Hajnoczi                               sizeof(new_blocks->blocks[0]) * new_num_blocks);
18245b82b703SStefan Hajnoczi 
18255b82b703SStefan Hajnoczi         if (old_num_blocks) {
18265b82b703SStefan Hajnoczi             memcpy(new_blocks->blocks, old_blocks->blocks,
18275b82b703SStefan Hajnoczi                    old_num_blocks * sizeof(old_blocks->blocks[0]));
18285b82b703SStefan Hajnoczi         }
18295b82b703SStefan Hajnoczi 
18305b82b703SStefan Hajnoczi         for (j = old_num_blocks; j < new_num_blocks; j++) {
18315b82b703SStefan Hajnoczi             new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
18325b82b703SStefan Hajnoczi         }
18335b82b703SStefan Hajnoczi 
1834d73415a3SStefan Hajnoczi         qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
18355b82b703SStefan Hajnoczi 
18365b82b703SStefan Hajnoczi         if (old_blocks) {
18375b82b703SStefan Hajnoczi             g_free_rcu(old_blocks, rcu);
18385b82b703SStefan Hajnoczi         }
18395b82b703SStefan Hajnoczi     }
1840b84f06c2SDavid Hildenbrand 
1841b84f06c2SDavid Hildenbrand     ram_list.num_dirty_blocks = new_num_blocks;
18425b82b703SStefan Hajnoczi }
18435b82b703SStefan Hajnoczi 
18447ce18ca0SDavid Hildenbrand static void ram_block_add(RAMBlock *new_block, Error **errp)
1845c5705a77SAvi Kivity {
18468dbe22c6SDavid Hildenbrand     const bool noreserve = qemu_ram_is_noreserve(new_block);
18477ce18ca0SDavid Hildenbrand     const bool shared = qemu_ram_is_shared(new_block);
1848e1c57ab8SPaolo Bonzini     RAMBlock *block;
18490d53d9feSMike Day     RAMBlock *last_block = NULL;
185015f7a80cSXiaoyao Li     bool free_on_error = false;
1851b84f06c2SDavid Hildenbrand     ram_addr_t ram_size;
185237aa7a0eSMarkus Armbruster     Error *err = NULL;
18532152f5caSJuan Quintela 
1854b2a8658eSUmesh Deshpande     qemu_mutex_lock_ramlist();
18559b8424d5SMichael S. Tsirkin     new_block->offset = find_ram_offset(new_block->max_length);
1856e1c57ab8SPaolo Bonzini 
18570628c182SMarkus Armbruster     if (!new_block->host) {
1858e1c57ab8SPaolo Bonzini         if (xen_enabled()) {
18599b8424d5SMichael S. Tsirkin             xen_ram_alloc(new_block->offset, new_block->max_length,
186037aa7a0eSMarkus Armbruster                           new_block->mr, &err);
186137aa7a0eSMarkus Armbruster             if (err) {
186237aa7a0eSMarkus Armbruster                 error_propagate(errp, err);
186337aa7a0eSMarkus Armbruster                 qemu_mutex_unlock_ramlist();
186439c350eeSPaolo Bonzini                 return;
186537aa7a0eSMarkus Armbruster             }
1866e1c57ab8SPaolo Bonzini         } else {
186725459eb7SDavid Hildenbrand             new_block->host = qemu_anon_ram_alloc(new_block->max_length,
186825459eb7SDavid Hildenbrand                                                   &new_block->mr->align,
18698dbe22c6SDavid Hildenbrand                                                   shared, noreserve);
187039228250SMarkus Armbruster             if (!new_block->host) {
1871ef701d7bSHu Tao                 error_setg_errno(errp, errno,
1872ef701d7bSHu Tao                                  "cannot set up guest memory '%s'",
1873ef701d7bSHu Tao                                  memory_region_name(new_block->mr));
1874ef701d7bSHu Tao                 qemu_mutex_unlock_ramlist();
187539c350eeSPaolo Bonzini                 return;
187639228250SMarkus Armbruster             }
18779b8424d5SMichael S. Tsirkin             memory_try_enable_merging(new_block->host, new_block->max_length);
187815f7a80cSXiaoyao Li             free_on_error = true;
187915f7a80cSXiaoyao Li         }
188015f7a80cSXiaoyao Li     }
188115f7a80cSXiaoyao Li 
188215f7a80cSXiaoyao Li     if (new_block->flags & RAM_GUEST_MEMFD) {
1883644a5277SZhenzhong Duan         int ret;
1884644a5277SZhenzhong Duan 
188515f7a80cSXiaoyao Li         assert(kvm_enabled());
188615f7a80cSXiaoyao Li         assert(new_block->guest_memfd < 0);
188715f7a80cSXiaoyao Li 
1888644a5277SZhenzhong Duan         ret = ram_block_discard_require(true);
1889644a5277SZhenzhong Duan         if (ret < 0) {
1890644a5277SZhenzhong Duan             error_setg_errno(errp, -ret,
1891852f0048SPaolo Bonzini                              "cannot set up private guest memory: discard currently blocked");
1892852f0048SPaolo Bonzini             error_append_hint(errp, "Are you using assigned devices?\n");
1893852f0048SPaolo Bonzini             goto out_free;
1894852f0048SPaolo Bonzini         }
1895852f0048SPaolo Bonzini 
189615f7a80cSXiaoyao Li         new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
189715f7a80cSXiaoyao Li                                                         0, errp);
189815f7a80cSXiaoyao Li         if (new_block->guest_memfd < 0) {
189915f7a80cSXiaoyao Li             qemu_mutex_unlock_ramlist();
190015f7a80cSXiaoyao Li             goto out_free;
1901c902760fSMarcelo Tosatti         }
19026977dfe6SYoshiaki Tamura     }
190394a6b54fSpbrook 
1904b84f06c2SDavid Hildenbrand     ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
1905b84f06c2SDavid Hildenbrand     dirty_memory_extend(ram_size);
19060d53d9feSMike Day     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
19070d53d9feSMike Day      * QLIST (which has an RCU-friendly variant) does not have insertion at
19080d53d9feSMike Day      * tail, so save the last element in last_block.
19090d53d9feSMike Day      */
191099e15582SPeter Xu     RAMBLOCK_FOREACH(block) {
19110d53d9feSMike Day         last_block = block;
19129b8424d5SMichael S. Tsirkin         if (block->max_length < new_block->max_length) {
1913abb26d63SPaolo Bonzini             break;
1914abb26d63SPaolo Bonzini         }
1915abb26d63SPaolo Bonzini     }
1916abb26d63SPaolo Bonzini     if (block) {
19170dc3f44aSMike Day         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
19180d53d9feSMike Day     } else if (last_block) {
19190dc3f44aSMike Day         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
19200d53d9feSMike Day     } else { /* list is empty */
19210dc3f44aSMike Day         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1922abb26d63SPaolo Bonzini     }
19230d6d3c87SPaolo Bonzini     ram_list.mru_block = NULL;
192494a6b54fSpbrook 
19250dc3f44aSMike Day     /* Write list before version */
19260dc3f44aSMike Day     smp_wmb();
1927f798b07fSUmesh Deshpande     ram_list.version++;
1928b2a8658eSUmesh Deshpande     qemu_mutex_unlock_ramlist();
1929f798b07fSUmesh Deshpande 
19309b8424d5SMichael S. Tsirkin     cpu_physical_memory_set_dirty_range(new_block->offset,
193158d2707eSPaolo Bonzini                                         new_block->used_length,
193258d2707eSPaolo Bonzini                                         DIRTY_CLIENTS_ALL);
193394a6b54fSpbrook 
1934a904c911SPaolo Bonzini     if (new_block->host) {
19359b8424d5SMichael S. Tsirkin         qemu_ram_setup_dump(new_block->host, new_block->max_length);
19369b8424d5SMichael S. Tsirkin         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1937a028edeaSAlexander Bulekov         /*
1938a028edeaSAlexander Bulekov          * MADV_DONTFORK is also needed by KVM in absence of synchronous MMU
1939a028edeaSAlexander Bulekov          * Configure it unless the machine is a qtest server, in which case
1940a028edeaSAlexander Bulekov          * KVM is not used and it may be forked (eg for fuzzing purposes).
1941a028edeaSAlexander Bulekov          */
1942a028edeaSAlexander Bulekov         if (!qtest_enabled()) {
1943a028edeaSAlexander Bulekov             qemu_madvise(new_block->host, new_block->max_length,
1944a028edeaSAlexander Bulekov                          QEMU_MADV_DONTFORK);
1945a028edeaSAlexander Bulekov         }
19468f44304cSDavid Hildenbrand         ram_block_notify_add(new_block->host, new_block->used_length,
19478f44304cSDavid Hildenbrand                              new_block->max_length);
1948a904c911SPaolo Bonzini     }
194915f7a80cSXiaoyao Li     return;
195015f7a80cSXiaoyao Li 
195115f7a80cSXiaoyao Li out_free:
195215f7a80cSXiaoyao Li     if (free_on_error) {
195315f7a80cSXiaoyao Li         qemu_anon_ram_free(new_block->host, new_block->max_length);
195415f7a80cSXiaoyao Li         new_block->host = NULL;
195515f7a80cSXiaoyao Li     }
195694a6b54fSpbrook }
1957e9a1ab19Sbellard 
1958d5dbde46SHikaru Nishida #ifdef CONFIG_POSIX
19593ec02148SSteve Sistare RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
19603ec02148SSteve Sistare                                  qemu_ram_resize_cb resized, MemoryRegion *mr,
196144a4ff31SJagannathan Raman                                  uint32_t ram_flags, int fd, off_t offset,
19623ec02148SSteve Sistare                                  bool grow,
19635c52a219SDavid Hildenbrand                                  Error **errp)
1964e1c57ab8SPaolo Bonzini {
19659fb40bb9SSteve Sistare     ERRP_GUARD();
1966e1c57ab8SPaolo Bonzini     RAMBlock *new_block;
1967ef701d7bSHu Tao     Error *local_err = NULL;
19686169f119SSteve Sistare     int64_t file_size, file_align, share_flags;
19696169f119SSteve Sistare 
19706169f119SSteve Sistare     share_flags = ram_flags & (RAM_PRIVATE | RAM_SHARED);
19716169f119SSteve Sistare     assert(share_flags != (RAM_SHARED | RAM_PRIVATE));
19726169f119SSteve Sistare     ram_flags &= ~RAM_PRIVATE;
1973e1c57ab8SPaolo Bonzini 
1974a4de8552SJunyan He     /* Just support these ram flags by now. */
197556918a12SSean Christopherson     assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
19765c52a219SDavid Hildenbrand                           RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
19773ec02148SSteve Sistare                           RAM_READONLY_FD | RAM_GUEST_MEMFD |
19783ec02148SSteve Sistare                           RAM_RESIZEABLE)) == 0);
19793ec02148SSteve Sistare     assert(max_size >= size);
1980a4de8552SJunyan He 
1981e1c57ab8SPaolo Bonzini     if (xen_enabled()) {
19827f56e740SPaolo Bonzini         error_setg(errp, "-mem-path not supported with Xen");
1983528f46afSFam Zheng         return NULL;
1984e1c57ab8SPaolo Bonzini     }
1985e1c57ab8SPaolo Bonzini 
1986e45e7ae2SMarc-André Lureau     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1987e45e7ae2SMarc-André Lureau         error_setg(errp,
1988e45e7ae2SMarc-André Lureau                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1989e45e7ae2SMarc-André Lureau         return NULL;
1990e45e7ae2SMarc-André Lureau     }
1991e45e7ae2SMarc-André Lureau 
19929260bd40SRichard Henderson     size = TARGET_PAGE_ALIGN(size);
19939260bd40SRichard Henderson     size = REAL_HOST_PAGE_ALIGN(size);
19943ec02148SSteve Sistare     max_size = TARGET_PAGE_ALIGN(max_size);
19953ec02148SSteve Sistare     max_size = REAL_HOST_PAGE_ALIGN(max_size);
19969260bd40SRichard Henderson 
19978d37b030SMarc-André Lureau     file_size = get_file_size(fd);
19983ec02148SSteve Sistare     if (file_size && file_size < offset + max_size && !grow) {
1999719168fbSSteve Sistare         error_setg(errp, "%s backing store size 0x%" PRIx64
2000719168fbSSteve Sistare                    " is too small for 'size' option 0x" RAM_ADDR_FMT
2001719168fbSSteve Sistare                    " plus 'offset' option 0x%" PRIx64,
20023ec02148SSteve Sistare                    memory_region_name(mr), file_size, max_size,
2003719168fbSSteve Sistare                    (uint64_t)offset);
20048d37b030SMarc-André Lureau         return NULL;
20058d37b030SMarc-André Lureau     }
20068d37b030SMarc-André Lureau 
2007ce317be9SJingqi Liu     file_align = get_file_align(fd);
20088f1bdb0eSPeter Maydell     if (file_align > 0 && file_align > mr->align) {
2009ce317be9SJingqi Liu         error_setg(errp, "backing store align 0x%" PRIx64
20105f509751SJingqi Liu                    " is larger than 'align' option 0x%" PRIx64,
2011ce317be9SJingqi Liu                    file_align, mr->align);
2012ce317be9SJingqi Liu         return NULL;
2013ce317be9SJingqi Liu     }
2014ce317be9SJingqi Liu 
2015e1c57ab8SPaolo Bonzini     new_block = g_malloc0(sizeof(*new_block));
2016e1c57ab8SPaolo Bonzini     new_block->mr = mr;
20179b8424d5SMichael S. Tsirkin     new_block->used_length = size;
20183ec02148SSteve Sistare     new_block->max_length = max_size;
20193ec02148SSteve Sistare     new_block->resized = resized;
2020cbfc0171SJunyan He     new_block->flags = ram_flags;
202115f7a80cSXiaoyao Li     new_block->guest_memfd = -1;
20223ec02148SSteve Sistare     new_block->host = file_ram_alloc(new_block, max_size, fd,
20233ec02148SSteve Sistare                                      file_size < offset + max_size,
20243ec02148SSteve Sistare                                      offset, errp);
20257f56e740SPaolo Bonzini     if (!new_block->host) {
20267f56e740SPaolo Bonzini         g_free(new_block);
2027528f46afSFam Zheng         return NULL;
20287f56e740SPaolo Bonzini     }
20297f56e740SPaolo Bonzini 
20307ce18ca0SDavid Hildenbrand     ram_block_add(new_block, &local_err);
2031ef701d7bSHu Tao     if (local_err) {
2032ef701d7bSHu Tao         g_free(new_block);
2033ef701d7bSHu Tao         error_propagate(errp, local_err);
2034528f46afSFam Zheng         return NULL;
2035ef701d7bSHu Tao     }
2036528f46afSFam Zheng     return new_block;
203738b3362dSMarc-André Lureau 
203838b3362dSMarc-André Lureau }
203938b3362dSMarc-André Lureau 
204038b3362dSMarc-André Lureau 
204138b3362dSMarc-André Lureau RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
2042cbfc0171SJunyan He                                    uint32_t ram_flags, const char *mem_path,
20435c52a219SDavid Hildenbrand                                    off_t offset, Error **errp)
204438b3362dSMarc-André Lureau {
204538b3362dSMarc-André Lureau     int fd;
204638b3362dSMarc-André Lureau     bool created;
204738b3362dSMarc-André Lureau     RAMBlock *block;
204838b3362dSMarc-André Lureau 
20495c52a219SDavid Hildenbrand     fd = file_ram_open(mem_path, memory_region_name(mr),
20504d6b23f7SDavid Hildenbrand                        !!(ram_flags & RAM_READONLY_FD), &created);
205138b3362dSMarc-André Lureau     if (fd < 0) {
20524d6b23f7SDavid Hildenbrand         error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM",
20534d6b23f7SDavid Hildenbrand                          mem_path);
20546da4b1c2SDavid Hildenbrand         if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) &&
20556da4b1c2SDavid Hildenbrand             fd == -EACCES) {
20566da4b1c2SDavid Hildenbrand             /*
20576da4b1c2SDavid Hildenbrand              * If we can open the file R/O (note: will never create a new file)
20586da4b1c2SDavid Hildenbrand              * and we are dealing with a private mapping, there are still ways
20596da4b1c2SDavid Hildenbrand              * to consume such files and get RAM instead of ROM.
20606da4b1c2SDavid Hildenbrand              */
20616da4b1c2SDavid Hildenbrand             fd = file_ram_open(mem_path, memory_region_name(mr), true,
20626da4b1c2SDavid Hildenbrand                                &created);
20636da4b1c2SDavid Hildenbrand             if (fd < 0) {
20646da4b1c2SDavid Hildenbrand                 return NULL;
20656da4b1c2SDavid Hildenbrand             }
20666da4b1c2SDavid Hildenbrand             assert(!created);
20676da4b1c2SDavid Hildenbrand             close(fd);
20686da4b1c2SDavid Hildenbrand             error_append_hint(errp, "Consider opening the backing store"
20696da4b1c2SDavid Hildenbrand                 " read-only but still creating writable RAM using"
20706da4b1c2SDavid Hildenbrand                 " '-object memory-backend-file,readonly=on,rom=off...'"
20716da4b1c2SDavid Hildenbrand                 " (see \"VM templating\" documentation)\n");
20726da4b1c2SDavid Hildenbrand         }
207338b3362dSMarc-André Lureau         return NULL;
207438b3362dSMarc-André Lureau     }
207538b3362dSMarc-André Lureau 
20763ec02148SSteve Sistare     block = qemu_ram_alloc_from_fd(size, size, NULL, mr, ram_flags, fd, offset,
20773ec02148SSteve Sistare                                    false, errp);
207838b3362dSMarc-André Lureau     if (!block) {
207938b3362dSMarc-André Lureau         if (created) {
208038b3362dSMarc-André Lureau             unlink(mem_path);
208138b3362dSMarc-André Lureau         }
208238b3362dSMarc-André Lureau         close(fd);
208338b3362dSMarc-André Lureau         return NULL;
208438b3362dSMarc-André Lureau     }
208538b3362dSMarc-André Lureau 
208638b3362dSMarc-André Lureau     return block;
2087e1c57ab8SPaolo Bonzini }
20880b183fc8SPaolo Bonzini #endif
2089e1c57ab8SPaolo Bonzini 
20909fb40bb9SSteve Sistare #ifdef CONFIG_POSIX
20919fb40bb9SSteve Sistare /*
20929fb40bb9SSteve Sistare  * Create MAP_SHARED RAMBlocks by mmap'ing a file descriptor, so it can be
20939fb40bb9SSteve Sistare  * shared with another process if CPR is being used.  Use memfd if available
20949fb40bb9SSteve Sistare  * because it has no size limits, else use POSIX shm.
20959fb40bb9SSteve Sistare  */
20962b7e9739SSteve Sistare static int qemu_ram_get_shared_fd(const char *name, bool *reused, Error **errp)
20979fb40bb9SSteve Sistare {
20982b7e9739SSteve Sistare     int fd = cpr_find_fd(name, 0);
20992b7e9739SSteve Sistare 
21002b7e9739SSteve Sistare     if (fd >= 0) {
21012b7e9739SSteve Sistare         *reused = true;
21022b7e9739SSteve Sistare         return fd;
21032b7e9739SSteve Sistare     }
21049fb40bb9SSteve Sistare 
21059fb40bb9SSteve Sistare     if (qemu_memfd_check(0)) {
21069fb40bb9SSteve Sistare         fd = qemu_memfd_create(name, 0, 0, 0, 0, errp);
21079fb40bb9SSteve Sistare     } else {
21089fb40bb9SSteve Sistare         fd = qemu_shm_alloc(0, errp);
21099fb40bb9SSteve Sistare     }
21102b7e9739SSteve Sistare 
21112b7e9739SSteve Sistare     if (fd >= 0) {
21122b7e9739SSteve Sistare         cpr_save_fd(name, 0, fd);
21132b7e9739SSteve Sistare     }
21142b7e9739SSteve Sistare     *reused = false;
21159fb40bb9SSteve Sistare     return fd;
21169fb40bb9SSteve Sistare }
21179fb40bb9SSteve Sistare #endif
21189fb40bb9SSteve Sistare 
211962be4e3aSMichael S. Tsirkin static
2120528f46afSFam Zheng RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
21213ec02148SSteve Sistare                                   qemu_ram_resize_cb resized,
2122ebef62d0SDavid Hildenbrand                                   void *host, uint32_t ram_flags,
2123ef701d7bSHu Tao                                   MemoryRegion *mr, Error **errp)
2124e1c57ab8SPaolo Bonzini {
2125e1c57ab8SPaolo Bonzini     RAMBlock *new_block;
2126ef701d7bSHu Tao     Error *local_err = NULL;
21276169f119SSteve Sistare     int align, share_flags;
21286169f119SSteve Sistare 
21296169f119SSteve Sistare     share_flags = ram_flags & (RAM_PRIVATE | RAM_SHARED);
21306169f119SSteve Sistare     assert(share_flags != (RAM_SHARED | RAM_PRIVATE));
21316169f119SSteve Sistare     ram_flags &= ~RAM_PRIVATE;
2132e1c57ab8SPaolo Bonzini 
21338dbe22c6SDavid Hildenbrand     assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
213415f7a80cSXiaoyao Li                           RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
2135ebef62d0SDavid Hildenbrand     assert(!host ^ (ram_flags & RAM_PREALLOC));
21369fb40bb9SSteve Sistare     assert(max_size >= size);
21379fb40bb9SSteve Sistare 
21389fb40bb9SSteve Sistare #ifdef CONFIG_POSIX         /* ignore RAM_SHARED for Windows */
21399fb40bb9SSteve Sistare     if (!host) {
214091792807SSteve Sistare         if (!share_flags && current_machine->aux_ram_share) {
214191792807SSteve Sistare             ram_flags |= RAM_SHARED;
214291792807SSteve Sistare         }
21439fb40bb9SSteve Sistare         if (ram_flags & RAM_SHARED) {
21442b7e9739SSteve Sistare             bool reused;
21452b7e9739SSteve Sistare             g_autofree char *name = cpr_name(mr);
21462b7e9739SSteve Sistare             int fd = qemu_ram_get_shared_fd(name, &reused, errp);
21479fb40bb9SSteve Sistare 
21489fb40bb9SSteve Sistare             if (fd < 0) {
21499fb40bb9SSteve Sistare                 return NULL;
21509fb40bb9SSteve Sistare             }
21519fb40bb9SSteve Sistare 
21529fb40bb9SSteve Sistare             /* Use same alignment as qemu_anon_ram_alloc */
21539fb40bb9SSteve Sistare             mr->align = QEMU_VMALLOC_ALIGN;
21549fb40bb9SSteve Sistare 
21559fb40bb9SSteve Sistare             /*
21569fb40bb9SSteve Sistare              * This can fail if the shm mount size is too small, or alloc from
21579fb40bb9SSteve Sistare              * fd is not supported, but previous QEMU versions that called
21589fb40bb9SSteve Sistare              * qemu_anon_ram_alloc for anonymous shared memory could have
21599fb40bb9SSteve Sistare              * succeeded.  Quietly fail and fall back.
21602b7e9739SSteve Sistare              *
21612b7e9739SSteve Sistare              * After cpr-transfer, new QEMU could create a memory region
21622b7e9739SSteve Sistare              * with a larger max size than old, so pass reused to grow the
21632b7e9739SSteve Sistare              * region if necessary.  The extra space will be usable after a
21642b7e9739SSteve Sistare              * guest reset.
21659fb40bb9SSteve Sistare              */
21669fb40bb9SSteve Sistare             new_block = qemu_ram_alloc_from_fd(size, max_size, resized, mr,
21672b7e9739SSteve Sistare                                                ram_flags, fd, 0, reused, NULL);
21689fb40bb9SSteve Sistare             if (new_block) {
21699fb40bb9SSteve Sistare                 trace_qemu_ram_alloc_shared(name, new_block->used_length,
21709fb40bb9SSteve Sistare                                             new_block->max_length, fd,
21719fb40bb9SSteve Sistare                                             new_block->host);
21729fb40bb9SSteve Sistare                 return new_block;
21739fb40bb9SSteve Sistare             }
21749fb40bb9SSteve Sistare 
21752b7e9739SSteve Sistare             cpr_delete_fd(name, 0);
21769fb40bb9SSteve Sistare             close(fd);
21779fb40bb9SSteve Sistare             /* fall back to anon allocation */
21789fb40bb9SSteve Sistare         }
21799fb40bb9SSteve Sistare     }
21809fb40bb9SSteve Sistare #endif
2181ebef62d0SDavid Hildenbrand 
21829260bd40SRichard Henderson     align = qemu_real_host_page_size();
21839260bd40SRichard Henderson     align = MAX(align, TARGET_PAGE_SIZE);
21849260bd40SRichard Henderson     size = ROUND_UP(size, align);
21859260bd40SRichard Henderson     max_size = ROUND_UP(max_size, align);
21869260bd40SRichard Henderson 
2187e1c57ab8SPaolo Bonzini     new_block = g_malloc0(sizeof(*new_block));
2188e1c57ab8SPaolo Bonzini     new_block->mr = mr;
218962be4e3aSMichael S. Tsirkin     new_block->resized = resized;
21909b8424d5SMichael S. Tsirkin     new_block->used_length = size;
21919b8424d5SMichael S. Tsirkin     new_block->max_length = max_size;
2192e1c57ab8SPaolo Bonzini     new_block->fd = -1;
219315f7a80cSXiaoyao Li     new_block->guest_memfd = -1;
21948e3b0cbbSMarc-André Lureau     new_block->page_size = qemu_real_host_page_size();
2195e1c57ab8SPaolo Bonzini     new_block->host = host;
2196ebef62d0SDavid Hildenbrand     new_block->flags = ram_flags;
21977ce18ca0SDavid Hildenbrand     ram_block_add(new_block, &local_err);
2198ef701d7bSHu Tao     if (local_err) {
2199ef701d7bSHu Tao         g_free(new_block);
2200ef701d7bSHu Tao         error_propagate(errp, local_err);
2201528f46afSFam Zheng         return NULL;
2202ef701d7bSHu Tao     }
2203528f46afSFam Zheng     return new_block;
2204e1c57ab8SPaolo Bonzini }
2205e1c57ab8SPaolo Bonzini 
2206528f46afSFam Zheng RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
220762be4e3aSMichael S. Tsirkin                                    MemoryRegion *mr, Error **errp)
220862be4e3aSMichael S. Tsirkin {
2209ebef62d0SDavid Hildenbrand     return qemu_ram_alloc_internal(size, size, NULL, host, RAM_PREALLOC, mr,
2210ebef62d0SDavid Hildenbrand                                    errp);
221162be4e3aSMichael S. Tsirkin }
221262be4e3aSMichael S. Tsirkin 
2213ebef62d0SDavid Hildenbrand RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
221406329cceSMarcel Apfelbaum                          MemoryRegion *mr, Error **errp)
22156977dfe6SYoshiaki Tamura {
22166169f119SSteve Sistare     assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD |
22176169f119SSteve Sistare                           RAM_PRIVATE)) == 0);
2218ebef62d0SDavid Hildenbrand     return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
221962be4e3aSMichael S. Tsirkin }
222062be4e3aSMichael S. Tsirkin 
2221528f46afSFam Zheng RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
22223ec02148SSteve Sistare                                     qemu_ram_resize_cb resized,
222362be4e3aSMichael S. Tsirkin                                     MemoryRegion *mr, Error **errp)
222462be4e3aSMichael S. Tsirkin {
2225ebef62d0SDavid Hildenbrand     return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
2226ebef62d0SDavid Hildenbrand                                    RAM_RESIZEABLE, mr, errp);
22276977dfe6SYoshiaki Tamura }
22286977dfe6SYoshiaki Tamura 
222943771539SPaolo Bonzini static void reclaim_ramblock(RAMBlock *block)
2230e9a1ab19Sbellard {
22317bd4f430SPaolo Bonzini     if (block->flags & RAM_PREALLOC) {
2232cd19cfa2SHuang Ying         ;
2233dfeaf2abSMarkus Armbruster     } else if (xen_enabled()) {
2234dfeaf2abSMarkus Armbruster         xen_invalidate_map_cache_entry(block->host);
2235089f3f76SStefan Weil #ifndef _WIN32
22363435f395SMarkus Armbruster     } else if (block->fd >= 0) {
223753adb9d4SMurilo Opsfelder Araujo         qemu_ram_munmap(block->fd, block->host, block->max_length);
223804b16653SAlex Williamson         close(block->fd);
2239089f3f76SStefan Weil #endif
224004b16653SAlex Williamson     } else {
22419b8424d5SMichael S. Tsirkin         qemu_anon_ram_free(block->host, block->max_length);
224204b16653SAlex Williamson     }
224315f7a80cSXiaoyao Li 
224415f7a80cSXiaoyao Li     if (block->guest_memfd >= 0) {
224515f7a80cSXiaoyao Li         close(block->guest_memfd);
2246852f0048SPaolo Bonzini         ram_block_discard_require(false);
224715f7a80cSXiaoyao Li     }
224815f7a80cSXiaoyao Li 
22497267c094SAnthony Liguori     g_free(block);
225043771539SPaolo Bonzini }
225143771539SPaolo Bonzini 
2252f1060c55SFam Zheng void qemu_ram_free(RAMBlock *block)
225343771539SPaolo Bonzini {
22542b7e9739SSteve Sistare     g_autofree char *name = NULL;
22552b7e9739SSteve Sistare 
225685bc2a15SMarc-André Lureau     if (!block) {
225785bc2a15SMarc-André Lureau         return;
225885bc2a15SMarc-André Lureau     }
225985bc2a15SMarc-André Lureau 
22600987d735SPaolo Bonzini     if (block->host) {
22618f44304cSDavid Hildenbrand         ram_block_notify_remove(block->host, block->used_length,
22628f44304cSDavid Hildenbrand                                 block->max_length);
22630987d735SPaolo Bonzini     }
22640987d735SPaolo Bonzini 
226543771539SPaolo Bonzini     qemu_mutex_lock_ramlist();
22662b7e9739SSteve Sistare     name = cpr_name(block->mr);
22672b7e9739SSteve Sistare     cpr_delete_fd(name, 0);
22680dc3f44aSMike Day     QLIST_REMOVE_RCU(block, next);
226943771539SPaolo Bonzini     ram_list.mru_block = NULL;
22700dc3f44aSMike Day     /* Write list before version */
22710dc3f44aSMike Day     smp_wmb();
227243771539SPaolo Bonzini     ram_list.version++;
227343771539SPaolo Bonzini     call_rcu(block, reclaim_ramblock, rcu);
2274b2a8658eSUmesh Deshpande     qemu_mutex_unlock_ramlist();
2275e9a1ab19Sbellard }
2276e9a1ab19Sbellard 
2277cd19cfa2SHuang Ying #ifndef _WIN32
2278cd19cfa2SHuang Ying void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2279cd19cfa2SHuang Ying {
2280cd19cfa2SHuang Ying     RAMBlock *block;
2281cd19cfa2SHuang Ying     ram_addr_t offset;
2282cd19cfa2SHuang Ying     int flags;
2283cd19cfa2SHuang Ying     void *area, *vaddr;
22849e6b9f37SDavid Hildenbrand     int prot;
2285cd19cfa2SHuang Ying 
228699e15582SPeter Xu     RAMBLOCK_FOREACH(block) {
2287cd19cfa2SHuang Ying         offset = addr - block->offset;
22889b8424d5SMichael S. Tsirkin         if (offset < block->max_length) {
22891240be24SMichael S. Tsirkin             vaddr = ramblock_ptr(block, offset);
22907bd4f430SPaolo Bonzini             if (block->flags & RAM_PREALLOC) {
2291cd19cfa2SHuang Ying                 ;
2292dfeaf2abSMarkus Armbruster             } else if (xen_enabled()) {
2293dfeaf2abSMarkus Armbruster                 abort();
2294cd19cfa2SHuang Ying             } else {
2295cd19cfa2SHuang Ying                 flags = MAP_FIXED;
2296dbb92eeaSDavid Hildenbrand                 flags |= block->flags & RAM_SHARED ?
2297dbb92eeaSDavid Hildenbrand                          MAP_SHARED : MAP_PRIVATE;
2298d94e0bc9SDavid Hildenbrand                 flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
22999e6b9f37SDavid Hildenbrand                 prot = PROT_READ;
23009e6b9f37SDavid Hildenbrand                 prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
23013435f395SMarkus Armbruster                 if (block->fd >= 0) {
23029e6b9f37SDavid Hildenbrand                     area = mmap(vaddr, length, prot, flags, block->fd,
23039e6b9f37SDavid Hildenbrand                                 offset + block->fd_offset);
2304cd19cfa2SHuang Ying                 } else {
2305dbb92eeaSDavid Hildenbrand                     flags |= MAP_ANONYMOUS;
23069e6b9f37SDavid Hildenbrand                     area = mmap(vaddr, length, prot, flags, -1, 0);
2307cd19cfa2SHuang Ying                 }
2308cd19cfa2SHuang Ying                 if (area != vaddr) {
2309493d89bfSAlistair Francis                     error_report("Could not remap addr: "
2310493d89bfSAlistair Francis                                  RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
2311cd19cfa2SHuang Ying                                  length, addr);
2312cd19cfa2SHuang Ying                     exit(1);
2313cd19cfa2SHuang Ying                 }
23148490fc78SLuiz Capitulino                 memory_try_enable_merging(vaddr, length);
2315ddb97f1dSJason Baron                 qemu_ram_setup_dump(vaddr, length);
2316cd19cfa2SHuang Ying             }
2317cd19cfa2SHuang Ying         }
2318cd19cfa2SHuang Ying     }
2319cd19cfa2SHuang Ying }
2320cd19cfa2SHuang Ying #endif /* !_WIN32 */
2321cd19cfa2SHuang Ying 
2322a99dd337SJuergen Gross /*
2323a99dd337SJuergen Gross  * Return a host pointer to guest's ram.
23245a5585f4SEdgar E. Iglesias  * For Xen, foreign mappings get created if they don't already exist.
23250dc3f44aSMike Day  *
23265a5585f4SEdgar E. Iglesias  * @block: block for the RAM to lookup (optional and may be NULL).
23275a5585f4SEdgar E. Iglesias  * @addr: address within the memory region.
23285a5585f4SEdgar E. Iglesias  * @size: pointer to requested size (optional and may be NULL).
23295a5585f4SEdgar E. Iglesias  *        size may get modified and return a value smaller than
23305a5585f4SEdgar E. Iglesias  *        what was requested.
23315a5585f4SEdgar E. Iglesias  * @lock: wether to lock the mapping in xen-mapcache until invalidated.
23325a5585f4SEdgar E. Iglesias  * @is_write: hint wether to map RW or RO in the xen-mapcache.
23335a5585f4SEdgar E. Iglesias  *            (optional and may always be set to true).
23340dc3f44aSMike Day  *
2335e81bcda5SPaolo Bonzini  * Called within RCU critical section.
2336ae3a7047SMike Day  */
2337aab4631aSManos Pitsidianakis static void *qemu_ram_ptr_length(RAMBlock *block, ram_addr_t addr,
23385a5585f4SEdgar E. Iglesias                                  hwaddr *size, bool lock,
23395a5585f4SEdgar E. Iglesias                                  bool is_write)
234038bee5dcSStefano Stabellini {
2341a99dd337SJuergen Gross     hwaddr len = 0;
2342a99dd337SJuergen Gross 
2343a99dd337SJuergen Gross     if (size && *size == 0) {
23448ab934f9SStefano Stabellini         return NULL;
23458ab934f9SStefano Stabellini     }
2346e81bcda5SPaolo Bonzini 
23473655cb9cSGonglei     if (block == NULL) {
2348e81bcda5SPaolo Bonzini         block = qemu_get_ram_block(addr);
23490878d0e1SPaolo Bonzini         addr -= block->offset;
23503655cb9cSGonglei     }
2351a99dd337SJuergen Gross     if (size) {
23520878d0e1SPaolo Bonzini         *size = MIN(*size, block->max_length - addr);
2353a99dd337SJuergen Gross         len = *size;
2354a99dd337SJuergen Gross     }
2355e81bcda5SPaolo Bonzini 
2356e81bcda5SPaolo Bonzini     if (xen_enabled() && block->host == NULL) {
2357e81bcda5SPaolo Bonzini         /* We need to check if the requested address is in the RAM
2358e81bcda5SPaolo Bonzini          * because we don't want to map the entire memory in QEMU.
2359e81bcda5SPaolo Bonzini          * In that case just map the requested area.
2360e81bcda5SPaolo Bonzini          */
2361a5bdc451SEdgar E. Iglesias         if (xen_mr_is_memory(block->mr)) {
23625d1c2602SEdgar E. Iglesias             return xen_map_cache(block->mr, block->offset + addr,
236349a72029SEdgar E. Iglesias                                  len, block->offset,
236449a72029SEdgar E. Iglesias                                  lock, lock, is_write);
236538bee5dcSStefano Stabellini         }
236638bee5dcSStefano Stabellini 
23675a5585f4SEdgar E. Iglesias         block->host = xen_map_cache(block->mr, block->offset,
236849a72029SEdgar E. Iglesias                                     block->max_length,
236949a72029SEdgar E. Iglesias                                     block->offset,
237049a72029SEdgar E. Iglesias                                     1, lock, is_write);
237138bee5dcSStefano Stabellini     }
2372e81bcda5SPaolo Bonzini 
23730878d0e1SPaolo Bonzini     return ramblock_ptr(block, addr);
237438bee5dcSStefano Stabellini }
237538bee5dcSStefano Stabellini 
2376a99dd337SJuergen Gross /*
2377a99dd337SJuergen Gross  * Return a host pointer to ram allocated with qemu_ram_alloc.
2378a99dd337SJuergen Gross  * This should not be used for general purpose DMA.  Use address_space_map
2379a99dd337SJuergen Gross  * or address_space_rw instead. For local memory (e.g. video ram) that the
2380a99dd337SJuergen Gross  * device owns, use memory_region_get_ram_ptr.
2381a99dd337SJuergen Gross  *
2382a99dd337SJuergen Gross  * Called within RCU critical section.
2383a99dd337SJuergen Gross  */
2384a99dd337SJuergen Gross void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
2385a99dd337SJuergen Gross {
23865a5585f4SEdgar E. Iglesias     return qemu_ram_ptr_length(ram_block, addr, NULL, false, true);
2387a99dd337SJuergen Gross }
2388a99dd337SJuergen Gross 
2389f90bb71bSDr. David Alan Gilbert /* Return the offset of a hostpointer within a ramblock */
2390f90bb71bSDr. David Alan Gilbert ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host)
2391f90bb71bSDr. David Alan Gilbert {
2392f90bb71bSDr. David Alan Gilbert     ram_addr_t res = (uint8_t *)host - (uint8_t *)rb->host;
2393f90bb71bSDr. David Alan Gilbert     assert((uintptr_t)host >= (uintptr_t)rb->host);
2394f90bb71bSDr. David Alan Gilbert     assert(res < rb->max_length);
2395f90bb71bSDr. David Alan Gilbert 
2396f90bb71bSDr. David Alan Gilbert     return res;
2397f90bb71bSDr. David Alan Gilbert }
2398f90bb71bSDr. David Alan Gilbert 
2399422148d3SDr. David Alan Gilbert RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
2400422148d3SDr. David Alan Gilbert                                    ram_addr_t *offset)
24015579c7f3Spbrook {
240294a6b54fSpbrook     RAMBlock *block;
240394a6b54fSpbrook     uint8_t *host = ptr;
240494a6b54fSpbrook 
2405868bb33fSJan Kiszka     if (xen_enabled()) {
2406f615f396SPaolo Bonzini         ram_addr_t ram_addr;
2407694ea274SDr. David Alan Gilbert         RCU_READ_LOCK_GUARD();
2408f615f396SPaolo Bonzini         ram_addr = xen_ram_addr_from_mapcache(ptr);
2409596ccccdSEdgar E. Iglesias         if (ram_addr == RAM_ADDR_INVALID) {
2410596ccccdSEdgar E. Iglesias             return NULL;
2411596ccccdSEdgar E. Iglesias         }
2412596ccccdSEdgar E. Iglesias 
2413f615f396SPaolo Bonzini         block = qemu_get_ram_block(ram_addr);
2414422148d3SDr. David Alan Gilbert         if (block) {
2415d6b6aec4SAnthony PERARD             *offset = ram_addr - block->offset;
2416422148d3SDr. David Alan Gilbert         }
2417422148d3SDr. David Alan Gilbert         return block;
2418712c2b41SStefano Stabellini     }
2419712c2b41SStefano Stabellini 
2420694ea274SDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
2421d73415a3SStefan Hajnoczi     block = qatomic_rcu_read(&ram_list.mru_block);
24229b8424d5SMichael S. Tsirkin     if (block && block->host && host - block->host < block->max_length) {
242323887b79SPaolo Bonzini         goto found;
242423887b79SPaolo Bonzini     }
242523887b79SPaolo Bonzini 
242699e15582SPeter Xu     RAMBLOCK_FOREACH(block) {
2427432d268cSJun Nakajima         /* This case append when the block is not mapped. */
2428432d268cSJun Nakajima         if (block->host == NULL) {
2429432d268cSJun Nakajima             continue;
2430432d268cSJun Nakajima         }
24319b8424d5SMichael S. Tsirkin         if (host - block->host < block->max_length) {
243223887b79SPaolo Bonzini             goto found;
243394a6b54fSpbrook         }
2434f471a17eSAlex Williamson     }
2435432d268cSJun Nakajima 
24361b5ec234SPaolo Bonzini     return NULL;
243723887b79SPaolo Bonzini 
243823887b79SPaolo Bonzini found:
2439422148d3SDr. David Alan Gilbert     *offset = (host - block->host);
2440422148d3SDr. David Alan Gilbert     if (round_offset) {
2441422148d3SDr. David Alan Gilbert         *offset &= TARGET_PAGE_MASK;
2442422148d3SDr. David Alan Gilbert     }
2443422148d3SDr. David Alan Gilbert     return block;
2444422148d3SDr. David Alan Gilbert }
2445422148d3SDr. David Alan Gilbert 
2446e3dd7493SDr. David Alan Gilbert /*
2447e3dd7493SDr. David Alan Gilbert  * Finds the named RAMBlock
2448e3dd7493SDr. David Alan Gilbert  *
2449e3dd7493SDr. David Alan Gilbert  * name: The name of RAMBlock to find
2450e3dd7493SDr. David Alan Gilbert  *
2451e3dd7493SDr. David Alan Gilbert  * Returns: RAMBlock (or NULL if not found)
2452e3dd7493SDr. David Alan Gilbert  */
2453e3dd7493SDr. David Alan Gilbert RAMBlock *qemu_ram_block_by_name(const char *name)
2454e3dd7493SDr. David Alan Gilbert {
2455e3dd7493SDr. David Alan Gilbert     RAMBlock *block;
2456e3dd7493SDr. David Alan Gilbert 
245799e15582SPeter Xu     RAMBLOCK_FOREACH(block) {
2458e3dd7493SDr. David Alan Gilbert         if (!strcmp(name, block->idstr)) {
2459e3dd7493SDr. David Alan Gilbert             return block;
2460e3dd7493SDr. David Alan Gilbert         }
2461e3dd7493SDr. David Alan Gilbert     }
2462e3dd7493SDr. David Alan Gilbert 
2463e3dd7493SDr. David Alan Gilbert     return NULL;
2464e3dd7493SDr. David Alan Gilbert }
2465e3dd7493SDr. David Alan Gilbert 
24668d7f2e76SPhilippe Mathieu-Daudé /*
24678d7f2e76SPhilippe Mathieu-Daudé  * Some of the system routines need to translate from a host pointer
24688d7f2e76SPhilippe Mathieu-Daudé  * (typically a TLB entry) back to a ram offset.
24698d7f2e76SPhilippe Mathieu-Daudé  */
247007bdaa41SPaolo Bonzini ram_addr_t qemu_ram_addr_from_host(void *ptr)
2471422148d3SDr. David Alan Gilbert {
2472422148d3SDr. David Alan Gilbert     RAMBlock *block;
2473f615f396SPaolo Bonzini     ram_addr_t offset;
2474422148d3SDr. David Alan Gilbert 
2475f615f396SPaolo Bonzini     block = qemu_ram_block_from_host(ptr, false, &offset);
2476422148d3SDr. David Alan Gilbert     if (!block) {
247707bdaa41SPaolo Bonzini         return RAM_ADDR_INVALID;
2478422148d3SDr. David Alan Gilbert     }
2479422148d3SDr. David Alan Gilbert 
248007bdaa41SPaolo Bonzini     return block->offset + offset;
2481e890261fSMarcelo Tosatti }
2482f471a17eSAlex Williamson 
248397e03465SRichard Henderson ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
248497e03465SRichard Henderson {
248597e03465SRichard Henderson     ram_addr_t ram_addr;
248697e03465SRichard Henderson 
248797e03465SRichard Henderson     ram_addr = qemu_ram_addr_from_host(ptr);
248897e03465SRichard Henderson     if (ram_addr == RAM_ADDR_INVALID) {
248997e03465SRichard Henderson         error_report("Bad ram pointer %p", ptr);
249097e03465SRichard Henderson         abort();
249197e03465SRichard Henderson     }
249297e03465SRichard Henderson     return ram_addr;
249397e03465SRichard Henderson }
249497e03465SRichard Henderson 
2495b2a44fcaSPaolo Bonzini static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
2496a152be43SPhilippe Mathieu-Daudé                                  MemTxAttrs attrs, void *buf, hwaddr len);
249716620684SAlexey Kardashevskiy static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
2498a152be43SPhilippe Mathieu-Daudé                                   const void *buf, hwaddr len);
24990c249ff7SLi Zhijian static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
2500eace72b7SPeter Maydell                                   bool is_write, MemTxAttrs attrs);
250116620684SAlexey Kardashevskiy 
2502f25a49e0SPeter Maydell static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2503f25a49e0SPeter Maydell                                 unsigned len, MemTxAttrs attrs)
2504db7b5426Sblueswir1 {
2505acc9d80bSJan Kiszka     subpage_t *subpage = opaque;
2506ff6cff75SPaolo Bonzini     uint8_t buf[8];
25075c9eb028SPeter Maydell     MemTxResult res;
2508791af8c8SPaolo Bonzini 
2509db7b5426Sblueswir1 #if defined(DEBUG_SUBPAGE)
2510883f2c59SPhilippe Mathieu-Daudé     printf("%s: subpage %p len %u addr " HWADDR_FMT_plx "\n", __func__,
2511acc9d80bSJan Kiszka            subpage, len, addr);
2512db7b5426Sblueswir1 #endif
251316620684SAlexey Kardashevskiy     res = flatview_read(subpage->fv, addr + subpage->base, attrs, buf, len);
25145c9eb028SPeter Maydell     if (res) {
25155c9eb028SPeter Maydell         return res;
2516f25a49e0SPeter Maydell     }
25176d3ede54SPeter Maydell     *data = ldn_p(buf, len);
2518f25a49e0SPeter Maydell     return MEMTX_OK;
2519db7b5426Sblueswir1 }
2520db7b5426Sblueswir1 
2521f25a49e0SPeter Maydell static MemTxResult subpage_write(void *opaque, hwaddr addr,
2522f25a49e0SPeter Maydell                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2523db7b5426Sblueswir1 {
2524acc9d80bSJan Kiszka     subpage_t *subpage = opaque;
2525ff6cff75SPaolo Bonzini     uint8_t buf[8];
2526acc9d80bSJan Kiszka 
2527db7b5426Sblueswir1 #if defined(DEBUG_SUBPAGE)
2528883f2c59SPhilippe Mathieu-Daudé     printf("%s: subpage %p len %u addr " HWADDR_FMT_plx
2529acc9d80bSJan Kiszka            " value %"PRIx64"\n",
2530acc9d80bSJan Kiszka            __func__, subpage, len, addr, value);
2531db7b5426Sblueswir1 #endif
25326d3ede54SPeter Maydell     stn_p(buf, len, value);
253316620684SAlexey Kardashevskiy     return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len);
2534db7b5426Sblueswir1 }
2535db7b5426Sblueswir1 
2536c353e4ccSPaolo Bonzini static bool subpage_accepts(void *opaque, hwaddr addr,
25378372d383SPeter Maydell                             unsigned len, bool is_write,
25388372d383SPeter Maydell                             MemTxAttrs attrs)
2539c353e4ccSPaolo Bonzini {
2540acc9d80bSJan Kiszka     subpage_t *subpage = opaque;
2541c353e4ccSPaolo Bonzini #if defined(DEBUG_SUBPAGE)
2542883f2c59SPhilippe Mathieu-Daudé     printf("%s: subpage %p %c len %u addr " HWADDR_FMT_plx "\n",
2543acc9d80bSJan Kiszka            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2544c353e4ccSPaolo Bonzini #endif
2545c353e4ccSPaolo Bonzini 
254616620684SAlexey Kardashevskiy     return flatview_access_valid(subpage->fv, addr + subpage->base,
2547eace72b7SPeter Maydell                                  len, is_write, attrs);
2548c353e4ccSPaolo Bonzini }
2549c353e4ccSPaolo Bonzini 
255070c68e44SAvi Kivity static const MemoryRegionOps subpage_ops = {
2551f25a49e0SPeter Maydell     .read_with_attrs = subpage_read,
2552f25a49e0SPeter Maydell     .write_with_attrs = subpage_write,
2553ff6cff75SPaolo Bonzini     .impl.min_access_size = 1,
2554ff6cff75SPaolo Bonzini     .impl.max_access_size = 8,
2555ff6cff75SPaolo Bonzini     .valid.min_access_size = 1,
2556ff6cff75SPaolo Bonzini     .valid.max_access_size = 8,
2557c353e4ccSPaolo Bonzini     .valid.accepts = subpage_accepts,
255870c68e44SAvi Kivity     .endianness = DEVICE_NATIVE_ENDIAN,
2559db7b5426Sblueswir1 };
2560db7b5426Sblueswir1 
2561c227f099SAnthony Liguori static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
25625312bd8bSAvi Kivity                             uint16_t section)
2563db7b5426Sblueswir1 {
2564db7b5426Sblueswir1     int idx, eidx;
2565db7b5426Sblueswir1 
2566db7b5426Sblueswir1     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2567db7b5426Sblueswir1         return -1;
2568db7b5426Sblueswir1     idx = SUBPAGE_IDX(start);
2569db7b5426Sblueswir1     eidx = SUBPAGE_IDX(end);
2570db7b5426Sblueswir1 #if defined(DEBUG_SUBPAGE)
2571016e9d62SAmos Kong     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2572016e9d62SAmos Kong            __func__, mmio, start, end, idx, eidx, section);
2573db7b5426Sblueswir1 #endif
2574db7b5426Sblueswir1     for (; idx <= eidx; idx++) {
25755312bd8bSAvi Kivity         mmio->sub_section[idx] = section;
2576db7b5426Sblueswir1     }
2577db7b5426Sblueswir1 
2578db7b5426Sblueswir1     return 0;
2579db7b5426Sblueswir1 }
2580db7b5426Sblueswir1 
258116620684SAlexey Kardashevskiy static subpage_t *subpage_init(FlatView *fv, hwaddr base)
2582db7b5426Sblueswir1 {
2583c227f099SAnthony Liguori     subpage_t *mmio;
2584db7b5426Sblueswir1 
2585b797ab1aSWei Yang     /* mmio->sub_section is set to PHYS_SECTION_UNASSIGNED with g_malloc0 */
25862615fabdSVijaya Kumar K     mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
258716620684SAlexey Kardashevskiy     mmio->fv = fv;
2588db7b5426Sblueswir1     mmio->base = base;
25892c9b15caSPaolo Bonzini     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2590b4fefef9SPeter Crosthwaite                           NULL, TARGET_PAGE_SIZE);
2591b3b00c78SAvi Kivity     mmio->iomem.subpage = true;
2592db7b5426Sblueswir1 #if defined(DEBUG_SUBPAGE)
2593883f2c59SPhilippe Mathieu-Daudé     printf("%s: %p base " HWADDR_FMT_plx " len %08x\n", __func__,
2594016e9d62SAmos Kong            mmio, base, TARGET_PAGE_SIZE);
2595db7b5426Sblueswir1 #endif
2596db7b5426Sblueswir1 
2597db7b5426Sblueswir1     return mmio;
2598db7b5426Sblueswir1 }
2599db7b5426Sblueswir1 
260016620684SAlexey Kardashevskiy static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr)
26015312bd8bSAvi Kivity {
260216620684SAlexey Kardashevskiy     assert(fv);
26035312bd8bSAvi Kivity     MemoryRegionSection section = {
260416620684SAlexey Kardashevskiy         .fv = fv,
26055312bd8bSAvi Kivity         .mr = mr,
26065312bd8bSAvi Kivity         .offset_within_address_space = 0,
26075312bd8bSAvi Kivity         .offset_within_region = 0,
2608052e87b0SPaolo Bonzini         .size = int128_2_64(),
26095312bd8bSAvi Kivity     };
26105312bd8bSAvi Kivity 
261153cb28cbSMarcel Apfelbaum     return phys_section_add(map, &section);
26125312bd8bSAvi Kivity }
26135312bd8bSAvi Kivity 
26142d54f194SPeter Maydell MemoryRegionSection *iotlb_to_section(CPUState *cpu,
26152d54f194SPeter Maydell                                       hwaddr index, MemTxAttrs attrs)
2616aa102231SAvi Kivity {
2617a54c87b6SPeter Maydell     int asidx = cpu_asidx_from_attrs(cpu, attrs);
2618a54c87b6SPeter Maydell     CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
26190d58c660SRichard Henderson     AddressSpaceDispatch *d = cpuas->memory_dispatch;
262086e4f93dSRichard Henderson     int section_index = index & ~TARGET_PAGE_MASK;
262186e4f93dSRichard Henderson     MemoryRegionSection *ret;
26229d82b5a7SPaolo Bonzini 
262386e4f93dSRichard Henderson     assert(section_index < d->map.sections_nb);
262486e4f93dSRichard Henderson     ret = d->map.sections + section_index;
262586e4f93dSRichard Henderson     assert(ret->mr);
262686e4f93dSRichard Henderson     assert(ret->mr->ops);
262786e4f93dSRichard Henderson 
262886e4f93dSRichard Henderson     return ret;
2629aa102231SAvi Kivity }
2630aa102231SAvi Kivity 
2631e9179ce1SAvi Kivity static void io_mem_init(void)
2632e9179ce1SAvi Kivity {
26332c9b15caSPaolo Bonzini     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
26341f6245e5SPaolo Bonzini                           NULL, UINT64_MAX);
2635e9179ce1SAvi Kivity }
2636e9179ce1SAvi Kivity 
26378629d3fcSAlexey Kardashevskiy AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv)
2638ac1970fbSAvi Kivity {
263953cb28cbSMarcel Apfelbaum     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
264053cb28cbSMarcel Apfelbaum     uint16_t n;
264153cb28cbSMarcel Apfelbaum 
264216620684SAlexey Kardashevskiy     n = dummy_section(&d->map, fv, &io_mem_unassigned);
264353cb28cbSMarcel Apfelbaum     assert(n == PHYS_SECTION_UNASSIGNED);
264400752703SPaolo Bonzini 
26459736e55bSMichael S. Tsirkin     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
264666a6df1dSAlexey Kardashevskiy 
264766a6df1dSAlexey Kardashevskiy     return d;
264800752703SPaolo Bonzini }
264900752703SPaolo Bonzini 
265066a6df1dSAlexey Kardashevskiy void address_space_dispatch_free(AddressSpaceDispatch *d)
265179e2b9aeSPaolo Bonzini {
265279e2b9aeSPaolo Bonzini     phys_sections_free(&d->map);
265379e2b9aeSPaolo Bonzini     g_free(d);
265479e2b9aeSPaolo Bonzini }
265579e2b9aeSPaolo Bonzini 
26569458a9a1SPaolo Bonzini static void do_nothing(CPUState *cpu, run_on_cpu_data d)
26579458a9a1SPaolo Bonzini {
26589458a9a1SPaolo Bonzini }
26599458a9a1SPaolo Bonzini 
26609458a9a1SPaolo Bonzini static void tcg_log_global_after_sync(MemoryListener *listener)
26619458a9a1SPaolo Bonzini {
26629458a9a1SPaolo Bonzini     CPUAddressSpace *cpuas;
26639458a9a1SPaolo Bonzini 
26649458a9a1SPaolo Bonzini     /* Wait for the CPU to end the current TB.  This avoids the following
26659458a9a1SPaolo Bonzini      * incorrect race:
26669458a9a1SPaolo Bonzini      *
26679458a9a1SPaolo Bonzini      *      vCPU                         migration
26689458a9a1SPaolo Bonzini      *      ----------------------       -------------------------
26699458a9a1SPaolo Bonzini      *      TLB check -> slow path
26709458a9a1SPaolo Bonzini      *        notdirty_mem_write
26719458a9a1SPaolo Bonzini      *          write to RAM
26729458a9a1SPaolo Bonzini      *          mark dirty
26739458a9a1SPaolo Bonzini      *                                   clear dirty flag
26749458a9a1SPaolo Bonzini      *      TLB check -> fast path
26759458a9a1SPaolo Bonzini      *                                   read memory
26769458a9a1SPaolo Bonzini      *        write to RAM
26779458a9a1SPaolo Bonzini      *
26789458a9a1SPaolo Bonzini      * by pushing the migration thread's memory read after the vCPU thread has
26799458a9a1SPaolo Bonzini      * written the memory.
26809458a9a1SPaolo Bonzini      */
268186cf9e15SPavel Dovgalyuk     if (replay_mode == REPLAY_MODE_NONE) {
268286cf9e15SPavel Dovgalyuk         /*
268386cf9e15SPavel Dovgalyuk          * VGA can make calls to this function while updating the screen.
268486cf9e15SPavel Dovgalyuk          * In record/replay mode this causes a deadlock, because
268586cf9e15SPavel Dovgalyuk          * run_on_cpu waits for rr mutex. Therefore no races are possible
268686cf9e15SPavel Dovgalyuk          * in this case and no need for making run_on_cpu when
2687f18d403fSGreg Kurz          * record/replay is enabled.
268886cf9e15SPavel Dovgalyuk          */
26899458a9a1SPaolo Bonzini         cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
26909458a9a1SPaolo Bonzini         run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
26919458a9a1SPaolo Bonzini     }
269286cf9e15SPavel Dovgalyuk }
26939458a9a1SPaolo Bonzini 
26940d58c660SRichard Henderson static void tcg_commit_cpu(CPUState *cpu, run_on_cpu_data data)
26950d58c660SRichard Henderson {
26960d58c660SRichard Henderson     CPUAddressSpace *cpuas = data.host_ptr;
26970d58c660SRichard Henderson 
26980d58c660SRichard Henderson     cpuas->memory_dispatch = address_space_to_dispatch(cpuas->as);
26990d58c660SRichard Henderson     tlb_flush(cpu);
27000d58c660SRichard Henderson }
27010d58c660SRichard Henderson 
27021d71148eSAvi Kivity static void tcg_commit(MemoryListener *listener)
270350c1e149SAvi Kivity {
270432857f4dSPeter Maydell     CPUAddressSpace *cpuas;
27050d58c660SRichard Henderson     CPUState *cpu;
2706117712c3SAvi Kivity 
2707f28d0dfdSEmilio G. Cota     assert(tcg_enabled());
2708117712c3SAvi Kivity     /* since each CPU stores ram addresses in its TLB cache, we must
2709117712c3SAvi Kivity        reset the modified entries */
271032857f4dSPeter Maydell     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
27110d58c660SRichard Henderson     cpu = cpuas->cpu;
27120d58c660SRichard Henderson 
27130d58c660SRichard Henderson     /*
27140d58c660SRichard Henderson      * Defer changes to as->memory_dispatch until the cpu is quiescent.
27150d58c660SRichard Henderson      * Otherwise we race between (1) other cpu threads and (2) ongoing
27160d58c660SRichard Henderson      * i/o for the current cpu thread, with data cached by mmu_lookup().
27170d58c660SRichard Henderson      *
27180d58c660SRichard Henderson      * In addition, queueing the work function will kick the cpu back to
27190d58c660SRichard Henderson      * the main loop, which will end the RCU critical section and reclaim
27200d58c660SRichard Henderson      * the memory data structures.
27210d58c660SRichard Henderson      *
27220d58c660SRichard Henderson      * That said, the listener is also called during realize, before
27230d58c660SRichard Henderson      * all of the tcg machinery for run-on is initialized: thus halt_cond.
272432857f4dSPeter Maydell      */
27250d58c660SRichard Henderson     if (cpu->halt_cond) {
27260d58c660SRichard Henderson         async_run_on_cpu(cpu, tcg_commit_cpu, RUN_ON_CPU_HOST_PTR(cpuas));
27270d58c660SRichard Henderson     } else {
27280d58c660SRichard Henderson         tcg_commit_cpu(cpu, RUN_ON_CPU_HOST_PTR(cpuas));
27290d58c660SRichard Henderson     }
273050c1e149SAvi Kivity }
273150c1e149SAvi Kivity 
273262152b8aSAvi Kivity static void memory_map_init(void)
273362152b8aSAvi Kivity {
27347267c094SAnthony Liguori     system_memory = g_malloc(sizeof(*system_memory));
273503f49957SPaolo Bonzini 
273657271d63SPaolo Bonzini     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
27377dca8043SAlexey Kardashevskiy     address_space_init(&address_space_memory, system_memory, "memory");
2738309cb471SAvi Kivity 
27397267c094SAnthony Liguori     system_io = g_malloc(sizeof(*system_io));
27403bb28b72SJan Kiszka     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
27413bb28b72SJan Kiszka                           65536);
27427dca8043SAlexey Kardashevskiy     address_space_init(&address_space_io, system_io, "I/O");
27432641689aSliguang }
274462152b8aSAvi Kivity 
274562152b8aSAvi Kivity MemoryRegion *get_system_memory(void)
274662152b8aSAvi Kivity {
274762152b8aSAvi Kivity     return system_memory;
274862152b8aSAvi Kivity }
274962152b8aSAvi Kivity 
2750309cb471SAvi Kivity MemoryRegion *get_system_io(void)
2751309cb471SAvi Kivity {
2752309cb471SAvi Kivity     return system_io;
2753309cb471SAvi Kivity }
2754309cb471SAvi Kivity 
2755845b6214SPaolo Bonzini static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2756a8170e5eSAvi Kivity                                      hwaddr length)
275751d7a9ebSAnthony PERARD {
2758845b6214SPaolo Bonzini     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
275973188068SPeter Maydell     ram_addr_t ramaddr = memory_region_get_ram_addr(mr);
276073188068SPeter Maydell 
276173188068SPeter Maydell     /* We know we're only called for RAM MemoryRegions */
276273188068SPeter Maydell     assert(ramaddr != RAM_ADDR_INVALID);
276373188068SPeter Maydell     addr += ramaddr;
27640878d0e1SPaolo Bonzini 
2765e87f7778SPaolo Bonzini     /* No early return if dirty_log_mask is or becomes 0, because
2766e87f7778SPaolo Bonzini      * cpu_physical_memory_set_dirty_range will still call
2767e87f7778SPaolo Bonzini      * xen_modified_memory.
2768e87f7778SPaolo Bonzini      */
2769e87f7778SPaolo Bonzini     if (dirty_log_mask) {
2770e87f7778SPaolo Bonzini         dirty_log_mask =
2771e87f7778SPaolo Bonzini             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2772e87f7778SPaolo Bonzini     }
2773845b6214SPaolo Bonzini     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
27745aa1ef71SPaolo Bonzini         assert(tcg_enabled());
2775e506ad6aSRichard Henderson         tb_invalidate_phys_range(addr, addr + length - 1);
2776845b6214SPaolo Bonzini         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2777845b6214SPaolo Bonzini     }
277858d2707eSPaolo Bonzini     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
277949dfcec4SPaolo Bonzini }
278051d7a9ebSAnthony PERARD 
2781047be4edSStefan Hajnoczi void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size)
2782047be4edSStefan Hajnoczi {
2783047be4edSStefan Hajnoczi     /*
2784047be4edSStefan Hajnoczi      * In principle this function would work on other memory region types too,
2785047be4edSStefan Hajnoczi      * but the ROM device use case is the only one where this operation is
2786047be4edSStefan Hajnoczi      * necessary.  Other memory regions should use the
2787047be4edSStefan Hajnoczi      * address_space_read/write() APIs.
2788047be4edSStefan Hajnoczi      */
2789047be4edSStefan Hajnoczi     assert(memory_region_is_romd(mr));
2790047be4edSStefan Hajnoczi 
2791047be4edSStefan Hajnoczi     invalidate_and_set_dirty(mr, addr, size);
2792047be4edSStefan Hajnoczi }
2793047be4edSStefan Hajnoczi 
27943123f93dSJagannathan Raman int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
279582f2563fSPaolo Bonzini {
2796e1622f4bSPaolo Bonzini     unsigned access_size_max = mr->ops->valid.max_access_size;
279723326164SRichard Henderson 
279823326164SRichard Henderson     /* Regions are assumed to support 1-4 byte accesses unless
279923326164SRichard Henderson        otherwise specified.  */
280023326164SRichard Henderson     if (access_size_max == 0) {
280123326164SRichard Henderson         access_size_max = 4;
280282f2563fSPaolo Bonzini     }
280323326164SRichard Henderson 
280423326164SRichard Henderson     /* Bound the maximum access by the alignment of the address.  */
280523326164SRichard Henderson     if (!mr->ops->impl.unaligned) {
280623326164SRichard Henderson         unsigned align_size_max = addr & -addr;
280723326164SRichard Henderson         if (align_size_max != 0 && align_size_max < access_size_max) {
280823326164SRichard Henderson             access_size_max = align_size_max;
280923326164SRichard Henderson         }
281023326164SRichard Henderson     }
281123326164SRichard Henderson 
281223326164SRichard Henderson     /* Don't attempt accesses larger than the maximum.  */
281323326164SRichard Henderson     if (l > access_size_max) {
281423326164SRichard Henderson         l = access_size_max;
281523326164SRichard Henderson     }
28166554f5c0SPeter Maydell     l = pow2floor(l);
281723326164SRichard Henderson 
281823326164SRichard Henderson     return l;
281982f2563fSPaolo Bonzini }
282082f2563fSPaolo Bonzini 
28213123f93dSJagannathan Raman bool prepare_mmio_access(MemoryRegion *mr)
2822125b3806SPaolo Bonzini {
28234840f10eSJan Kiszka     bool release_lock = false;
28244840f10eSJan Kiszka 
2825195801d7SStefan Hajnoczi     if (!bql_locked()) {
2826195801d7SStefan Hajnoczi         bql_lock();
28274840f10eSJan Kiszka         release_lock = true;
2828125b3806SPaolo Bonzini     }
28294840f10eSJan Kiszka     if (mr->flush_coalesced_mmio) {
28304840f10eSJan Kiszka         qemu_flush_coalesced_mmio_buffer();
28314840f10eSJan Kiszka     }
28324840f10eSJan Kiszka 
28334840f10eSJan Kiszka     return release_lock;
2834125b3806SPaolo Bonzini }
2835125b3806SPaolo Bonzini 
28363ab6fdc9SPhilippe Mathieu-Daudé /**
28373ab6fdc9SPhilippe Mathieu-Daudé  * flatview_access_allowed
28383ab6fdc9SPhilippe Mathieu-Daudé  * @mr: #MemoryRegion to be accessed
28393ab6fdc9SPhilippe Mathieu-Daudé  * @attrs: memory transaction attributes
28403ab6fdc9SPhilippe Mathieu-Daudé  * @addr: address within that memory region
28413ab6fdc9SPhilippe Mathieu-Daudé  * @len: the number of bytes to access
28423ab6fdc9SPhilippe Mathieu-Daudé  *
28433ab6fdc9SPhilippe Mathieu-Daudé  * Check if a memory transaction is allowed.
28443ab6fdc9SPhilippe Mathieu-Daudé  *
28453ab6fdc9SPhilippe Mathieu-Daudé  * Returns: true if transaction is allowed, false if denied.
28463ab6fdc9SPhilippe Mathieu-Daudé  */
28473ab6fdc9SPhilippe Mathieu-Daudé static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs,
28483ab6fdc9SPhilippe Mathieu-Daudé                                     hwaddr addr, hwaddr len)
28493ab6fdc9SPhilippe Mathieu-Daudé {
28503ab6fdc9SPhilippe Mathieu-Daudé     if (likely(!attrs.memory)) {
28513ab6fdc9SPhilippe Mathieu-Daudé         return true;
28523ab6fdc9SPhilippe Mathieu-Daudé     }
28533ab6fdc9SPhilippe Mathieu-Daudé     if (memory_region_is_ram(mr)) {
28543ab6fdc9SPhilippe Mathieu-Daudé         return true;
28553ab6fdc9SPhilippe Mathieu-Daudé     }
2856678bf8f2SBALATON Zoltan     qemu_log_mask(LOG_INVALID_MEM,
28573ab6fdc9SPhilippe Mathieu-Daudé                   "Invalid access to non-RAM device at "
28583ab6fdc9SPhilippe Mathieu-Daudé                   "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", "
28593ab6fdc9SPhilippe Mathieu-Daudé                   "region '%s'\n", addr, len, memory_region_name(mr));
28603ab6fdc9SPhilippe Mathieu-Daudé     return false;
28613ab6fdc9SPhilippe Mathieu-Daudé }
28623ab6fdc9SPhilippe Mathieu-Daudé 
2863e7927d33SJonathan Cameron static MemTxResult flatview_write_continue_step(MemTxAttrs attrs,
2864e7927d33SJonathan Cameron                                                 const uint8_t *buf,
2865e7927d33SJonathan Cameron                                                 hwaddr len, hwaddr mr_addr,
2866e7927d33SJonathan Cameron                                                 hwaddr *l, MemoryRegion *mr)
2867e7927d33SJonathan Cameron {
2868e7927d33SJonathan Cameron     if (!flatview_access_allowed(mr, attrs, mr_addr, *l)) {
2869e7927d33SJonathan Cameron         return MEMTX_ACCESS_ERROR;
2870e7927d33SJonathan Cameron     }
2871e7927d33SJonathan Cameron 
2872e7927d33SJonathan Cameron     if (!memory_access_is_direct(mr, true)) {
2873e7927d33SJonathan Cameron         uint64_t val;
2874e7927d33SJonathan Cameron         MemTxResult result;
2875e7927d33SJonathan Cameron         bool release_lock = prepare_mmio_access(mr);
2876e7927d33SJonathan Cameron 
2877e7927d33SJonathan Cameron         *l = memory_access_size(mr, *l, mr_addr);
2878e7927d33SJonathan Cameron         /*
2879e7927d33SJonathan Cameron          * XXX: could force current_cpu to NULL to avoid
2880e7927d33SJonathan Cameron          * potential bugs
2881e7927d33SJonathan Cameron          */
2882e7927d33SJonathan Cameron 
2883e7927d33SJonathan Cameron         /*
2884e7927d33SJonathan Cameron          * Assure Coverity (and ourselves) that we are not going to OVERRUN
2885e7927d33SJonathan Cameron          * the buffer by following ldn_he_p().
2886e7927d33SJonathan Cameron          */
2887e7927d33SJonathan Cameron #ifdef QEMU_STATIC_ANALYSIS
2888e7927d33SJonathan Cameron         assert((*l == 1 && len >= 1) ||
2889e7927d33SJonathan Cameron                (*l == 2 && len >= 2) ||
2890e7927d33SJonathan Cameron                (*l == 4 && len >= 4) ||
2891e7927d33SJonathan Cameron                (*l == 8 && len >= 8));
2892e7927d33SJonathan Cameron #endif
2893e7927d33SJonathan Cameron         val = ldn_he_p(buf, *l);
2894e7927d33SJonathan Cameron         result = memory_region_dispatch_write(mr, mr_addr, val,
2895e7927d33SJonathan Cameron                                               size_memop(*l), attrs);
2896e7927d33SJonathan Cameron         if (release_lock) {
2897e7927d33SJonathan Cameron             bql_unlock();
2898e7927d33SJonathan Cameron         }
2899e7927d33SJonathan Cameron 
2900e7927d33SJonathan Cameron         return result;
2901e7927d33SJonathan Cameron     } else {
2902e7927d33SJonathan Cameron         /* RAM case */
2903e7927d33SJonathan Cameron         uint8_t *ram_ptr = qemu_ram_ptr_length(mr->ram_block, mr_addr, l,
29045a5585f4SEdgar E. Iglesias                                                false, true);
2905e7927d33SJonathan Cameron 
2906e7927d33SJonathan Cameron         memmove(ram_ptr, buf, *l);
2907e7927d33SJonathan Cameron         invalidate_and_set_dirty(mr, mr_addr, *l);
2908e7927d33SJonathan Cameron 
2909e7927d33SJonathan Cameron         return MEMTX_OK;
2910e7927d33SJonathan Cameron     }
2911e7927d33SJonathan Cameron }
2912e7927d33SJonathan Cameron 
2913a203ac70SPaolo Bonzini /* Called within RCU critical section.  */
291416620684SAlexey Kardashevskiy static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
2915a203ac70SPaolo Bonzini                                            MemTxAttrs attrs,
2916a152be43SPhilippe Mathieu-Daudé                                            const void *ptr,
29174c7c8563SJonathan Cameron                                            hwaddr len, hwaddr mr_addr,
2918a203ac70SPaolo Bonzini                                            hwaddr l, MemoryRegion *mr)
291913eb76e0Sbellard {
29203b643495SPeter Maydell     MemTxResult result = MEMTX_OK;
2921a152be43SPhilippe Mathieu-Daudé     const uint8_t *buf = ptr;
292213eb76e0Sbellard 
2923a203ac70SPaolo Bonzini     for (;;) {
2924e7927d33SJonathan Cameron         result |= flatview_write_continue_step(attrs, buf, len, mr_addr, &l,
2925e7927d33SJonathan Cameron                                                mr);
2926eb7eeb88SPaolo Bonzini 
2927eb7eeb88SPaolo Bonzini         len -= l;
2928eb7eeb88SPaolo Bonzini         buf += l;
2929eb7eeb88SPaolo Bonzini         addr += l;
2930a203ac70SPaolo Bonzini 
2931a203ac70SPaolo Bonzini         if (!len) {
2932a203ac70SPaolo Bonzini             break;
2933eb7eeb88SPaolo Bonzini         }
2934a203ac70SPaolo Bonzini 
2935a203ac70SPaolo Bonzini         l = len;
29364c7c8563SJonathan Cameron         mr = flatview_translate(fv, addr, &mr_addr, &l, true, attrs);
2937a203ac70SPaolo Bonzini     }
2938eb7eeb88SPaolo Bonzini 
2939eb7eeb88SPaolo Bonzini     return result;
2940eb7eeb88SPaolo Bonzini }
2941eb7eeb88SPaolo Bonzini 
29424c6ebbb3SPaolo Bonzini /* Called from RCU critical section.  */
294316620684SAlexey Kardashevskiy static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
2944a152be43SPhilippe Mathieu-Daudé                                   const void *buf, hwaddr len)
2945eb7eeb88SPaolo Bonzini {
2946eb7eeb88SPaolo Bonzini     hwaddr l;
29474c7c8563SJonathan Cameron     hwaddr mr_addr;
2948eb7eeb88SPaolo Bonzini     MemoryRegion *mr;
2949a203ac70SPaolo Bonzini 
2950a203ac70SPaolo Bonzini     l = len;
29514c7c8563SJonathan Cameron     mr = flatview_translate(fv, addr, &mr_addr, &l, true, attrs);
29523ab6fdc9SPhilippe Mathieu-Daudé     if (!flatview_access_allowed(mr, attrs, addr, len)) {
29533ab6fdc9SPhilippe Mathieu-Daudé         return MEMTX_ACCESS_ERROR;
29543ab6fdc9SPhilippe Mathieu-Daudé     }
295558e74682SPhilippe Mathieu-Daudé     return flatview_write_continue(fv, addr, attrs, buf, len,
29564c7c8563SJonathan Cameron                                    mr_addr, l, mr);
2957a203ac70SPaolo Bonzini }
2958a203ac70SPaolo Bonzini 
2959e7927d33SJonathan Cameron static MemTxResult flatview_read_continue_step(MemTxAttrs attrs, uint8_t *buf,
2960e7927d33SJonathan Cameron                                                hwaddr len, hwaddr mr_addr,
2961e7927d33SJonathan Cameron                                                hwaddr *l,
2962e7927d33SJonathan Cameron                                                MemoryRegion *mr)
2963e7927d33SJonathan Cameron {
2964e7927d33SJonathan Cameron     if (!flatview_access_allowed(mr, attrs, mr_addr, *l)) {
2965e7927d33SJonathan Cameron         return MEMTX_ACCESS_ERROR;
2966e7927d33SJonathan Cameron     }
2967e7927d33SJonathan Cameron 
2968e7927d33SJonathan Cameron     if (!memory_access_is_direct(mr, false)) {
2969e7927d33SJonathan Cameron         /* I/O case */
2970e7927d33SJonathan Cameron         uint64_t val;
2971e7927d33SJonathan Cameron         MemTxResult result;
2972e7927d33SJonathan Cameron         bool release_lock = prepare_mmio_access(mr);
2973e7927d33SJonathan Cameron 
2974e7927d33SJonathan Cameron         *l = memory_access_size(mr, *l, mr_addr);
2975e7927d33SJonathan Cameron         result = memory_region_dispatch_read(mr, mr_addr, &val, size_memop(*l),
2976e7927d33SJonathan Cameron                                              attrs);
2977e7927d33SJonathan Cameron 
2978e7927d33SJonathan Cameron         /*
2979e7927d33SJonathan Cameron          * Assure Coverity (and ourselves) that we are not going to OVERRUN
2980e7927d33SJonathan Cameron          * the buffer by following stn_he_p().
2981e7927d33SJonathan Cameron          */
2982e7927d33SJonathan Cameron #ifdef QEMU_STATIC_ANALYSIS
2983e7927d33SJonathan Cameron         assert((*l == 1 && len >= 1) ||
2984e7927d33SJonathan Cameron                (*l == 2 && len >= 2) ||
2985e7927d33SJonathan Cameron                (*l == 4 && len >= 4) ||
2986e7927d33SJonathan Cameron                (*l == 8 && len >= 8));
2987e7927d33SJonathan Cameron #endif
2988e7927d33SJonathan Cameron         stn_he_p(buf, *l, val);
2989e7927d33SJonathan Cameron 
2990e7927d33SJonathan Cameron         if (release_lock) {
2991e7927d33SJonathan Cameron             bql_unlock();
2992e7927d33SJonathan Cameron         }
2993e7927d33SJonathan Cameron         return result;
2994e7927d33SJonathan Cameron     } else {
2995e7927d33SJonathan Cameron         /* RAM case */
2996e7927d33SJonathan Cameron         uint8_t *ram_ptr = qemu_ram_ptr_length(mr->ram_block, mr_addr, l,
29975a5585f4SEdgar E. Iglesias                                                false, false);
2998e7927d33SJonathan Cameron 
2999e7927d33SJonathan Cameron         memcpy(buf, ram_ptr, *l);
3000e7927d33SJonathan Cameron 
3001e7927d33SJonathan Cameron         return MEMTX_OK;
3002e7927d33SJonathan Cameron     }
3003e7927d33SJonathan Cameron }
3004e7927d33SJonathan Cameron 
3005a203ac70SPaolo Bonzini /* Called within RCU critical section.  */
300616620684SAlexey Kardashevskiy MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
3007a152be43SPhilippe Mathieu-Daudé                                    MemTxAttrs attrs, void *ptr,
30084c7c8563SJonathan Cameron                                    hwaddr len, hwaddr mr_addr, hwaddr l,
3009a203ac70SPaolo Bonzini                                    MemoryRegion *mr)
3010a203ac70SPaolo Bonzini {
3011a203ac70SPaolo Bonzini     MemTxResult result = MEMTX_OK;
3012a152be43SPhilippe Mathieu-Daudé     uint8_t *buf = ptr;
3013eb7eeb88SPaolo Bonzini 
30147cac7feaSAlexander Bulekov     fuzz_dma_read_cb(addr, len, mr);
3015a203ac70SPaolo Bonzini     for (;;) {
3016e7927d33SJonathan Cameron         result |= flatview_read_continue_step(attrs, buf, len, mr_addr, &l, mr);
30174840f10eSJan Kiszka 
301813eb76e0Sbellard         len -= l;
301913eb76e0Sbellard         buf += l;
302013eb76e0Sbellard         addr += l;
3021a203ac70SPaolo Bonzini 
3022a203ac70SPaolo Bonzini         if (!len) {
3023a203ac70SPaolo Bonzini             break;
302413eb76e0Sbellard         }
3025a203ac70SPaolo Bonzini 
3026a203ac70SPaolo Bonzini         l = len;
30274c7c8563SJonathan Cameron         mr = flatview_translate(fv, addr, &mr_addr, &l, false, attrs);
3028a203ac70SPaolo Bonzini     }
3029a203ac70SPaolo Bonzini 
3030a203ac70SPaolo Bonzini     return result;
3031a203ac70SPaolo Bonzini }
3032a203ac70SPaolo Bonzini 
3033b2a44fcaSPaolo Bonzini /* Called from RCU critical section.  */
3034b2a44fcaSPaolo Bonzini static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
3035a152be43SPhilippe Mathieu-Daudé                                  MemTxAttrs attrs, void *buf, hwaddr len)
3036a203ac70SPaolo Bonzini {
3037a203ac70SPaolo Bonzini     hwaddr l;
30384c7c8563SJonathan Cameron     hwaddr mr_addr;
3039a203ac70SPaolo Bonzini     MemoryRegion *mr;
3040a203ac70SPaolo Bonzini 
3041a203ac70SPaolo Bonzini     l = len;
30424c7c8563SJonathan Cameron     mr = flatview_translate(fv, addr, &mr_addr, &l, false, attrs);
30433ab6fdc9SPhilippe Mathieu-Daudé     if (!flatview_access_allowed(mr, attrs, addr, len)) {
30443ab6fdc9SPhilippe Mathieu-Daudé         return MEMTX_ACCESS_ERROR;
30453ab6fdc9SPhilippe Mathieu-Daudé     }
3046b2a44fcaSPaolo Bonzini     return flatview_read_continue(fv, addr, attrs, buf, len,
30474c7c8563SJonathan Cameron                                   mr_addr, l, mr);
304813eb76e0Sbellard }
30498df1cd07Sbellard 
3050b2a44fcaSPaolo Bonzini MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
3051daa3dda4SPhilippe Mathieu-Daudé                                     MemTxAttrs attrs, void *buf, hwaddr len)
3052b2a44fcaSPaolo Bonzini {
3053b2a44fcaSPaolo Bonzini     MemTxResult result = MEMTX_OK;
3054b2a44fcaSPaolo Bonzini     FlatView *fv;
3055b2a44fcaSPaolo Bonzini 
3056b2a44fcaSPaolo Bonzini     if (len > 0) {
3057694ea274SDr. David Alan Gilbert         RCU_READ_LOCK_GUARD();
3058b2a44fcaSPaolo Bonzini         fv = address_space_to_flatview(as);
3059b2a44fcaSPaolo Bonzini         result = flatview_read(fv, addr, attrs, buf, len);
3060b2a44fcaSPaolo Bonzini     }
3061b2a44fcaSPaolo Bonzini 
3062b2a44fcaSPaolo Bonzini     return result;
3063b2a44fcaSPaolo Bonzini }
3064b2a44fcaSPaolo Bonzini 
30654c6ebbb3SPaolo Bonzini MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
30664c6ebbb3SPaolo Bonzini                                 MemTxAttrs attrs,
3067daa3dda4SPhilippe Mathieu-Daudé                                 const void *buf, hwaddr len)
30684c6ebbb3SPaolo Bonzini {
30694c6ebbb3SPaolo Bonzini     MemTxResult result = MEMTX_OK;
30704c6ebbb3SPaolo Bonzini     FlatView *fv;
30714c6ebbb3SPaolo Bonzini 
30724c6ebbb3SPaolo Bonzini     if (len > 0) {
3073694ea274SDr. David Alan Gilbert         RCU_READ_LOCK_GUARD();
30744c6ebbb3SPaolo Bonzini         fv = address_space_to_flatview(as);
30754c6ebbb3SPaolo Bonzini         result = flatview_write(fv, addr, attrs, buf, len);
30764c6ebbb3SPaolo Bonzini     }
30774c6ebbb3SPaolo Bonzini 
30784c6ebbb3SPaolo Bonzini     return result;
30794c6ebbb3SPaolo Bonzini }
30804c6ebbb3SPaolo Bonzini 
3081db84fd97SPaolo Bonzini MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
3082daa3dda4SPhilippe Mathieu-Daudé                              void *buf, hwaddr len, bool is_write)
3083db84fd97SPaolo Bonzini {
3084db84fd97SPaolo Bonzini     if (is_write) {
3085db84fd97SPaolo Bonzini         return address_space_write(as, addr, attrs, buf, len);
3086db84fd97SPaolo Bonzini     } else {
3087db84fd97SPaolo Bonzini         return address_space_read_full(as, addr, attrs, buf, len);
3088db84fd97SPaolo Bonzini     }
3089db84fd97SPaolo Bonzini }
3090db84fd97SPaolo Bonzini 
309175f01c68SPhilippe Mathieu-Daudé MemTxResult address_space_set(AddressSpace *as, hwaddr addr,
309275f01c68SPhilippe Mathieu-Daudé                               uint8_t c, hwaddr len, MemTxAttrs attrs)
309375f01c68SPhilippe Mathieu-Daudé {
309475f01c68SPhilippe Mathieu-Daudé #define FILLBUF_SIZE 512
309575f01c68SPhilippe Mathieu-Daudé     uint8_t fillbuf[FILLBUF_SIZE];
309675f01c68SPhilippe Mathieu-Daudé     int l;
309775f01c68SPhilippe Mathieu-Daudé     MemTxResult error = MEMTX_OK;
309875f01c68SPhilippe Mathieu-Daudé 
309975f01c68SPhilippe Mathieu-Daudé     memset(fillbuf, c, FILLBUF_SIZE);
310075f01c68SPhilippe Mathieu-Daudé     while (len > 0) {
310175f01c68SPhilippe Mathieu-Daudé         l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
310275f01c68SPhilippe Mathieu-Daudé         error |= address_space_write(as, addr, attrs, fillbuf, l);
310375f01c68SPhilippe Mathieu-Daudé         len -= l;
310475f01c68SPhilippe Mathieu-Daudé         addr += l;
310575f01c68SPhilippe Mathieu-Daudé     }
310675f01c68SPhilippe Mathieu-Daudé 
310775f01c68SPhilippe Mathieu-Daudé     return error;
310875f01c68SPhilippe Mathieu-Daudé }
310975f01c68SPhilippe Mathieu-Daudé 
3110d7ef71efSPhilippe Mathieu-Daudé void cpu_physical_memory_rw(hwaddr addr, void *buf,
311128c80bfeSPhilippe Mathieu-Daudé                             hwaddr len, bool is_write)
3112ac1970fbSAvi Kivity {
31135c9eb028SPeter Maydell     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
31145c9eb028SPeter Maydell                      buf, len, is_write);
3115ac1970fbSAvi Kivity }
3116ac1970fbSAvi Kivity 
3117582b55a9SAlexander Graf enum write_rom_type {
3118582b55a9SAlexander Graf     WRITE_DATA,
3119582b55a9SAlexander Graf     FLUSH_CACHE,
3120582b55a9SAlexander Graf };
3121582b55a9SAlexander Graf 
312275693e14SPeter Maydell static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
312375693e14SPeter Maydell                                                            hwaddr addr,
312475693e14SPeter Maydell                                                            MemTxAttrs attrs,
3125daa3dda4SPhilippe Mathieu-Daudé                                                            const void *ptr,
31260c249ff7SLi Zhijian                                                            hwaddr len,
312775693e14SPeter Maydell                                                            enum write_rom_type type)
3128d0ecd2aaSbellard {
3129149f54b5SPaolo Bonzini     hwaddr l;
313020804676SPhilippe Mathieu-Daudé     uint8_t *ram_ptr;
3131149f54b5SPaolo Bonzini     hwaddr addr1;
31325c8a00ceSPaolo Bonzini     MemoryRegion *mr;
3133daa3dda4SPhilippe Mathieu-Daudé     const uint8_t *buf = ptr;
3134d0ecd2aaSbellard 
3135694ea274SDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
3136d0ecd2aaSbellard     while (len > 0) {
3137d0ecd2aaSbellard         l = len;
313875693e14SPeter Maydell         mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
3139d0ecd2aaSbellard 
31405c8a00ceSPaolo Bonzini         if (!(memory_region_is_ram(mr) ||
31415c8a00ceSPaolo Bonzini               memory_region_is_romd(mr))) {
3142b242e0e0SPaolo Bonzini             l = memory_access_size(mr, l, addr1);
3143d0ecd2aaSbellard         } else {
3144d0ecd2aaSbellard             /* ROM/RAM case */
314520804676SPhilippe Mathieu-Daudé             ram_ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3146582b55a9SAlexander Graf             switch (type) {
3147582b55a9SAlexander Graf             case WRITE_DATA:
314820804676SPhilippe Mathieu-Daudé                 memcpy(ram_ptr, buf, l);
3149845b6214SPaolo Bonzini                 invalidate_and_set_dirty(mr, addr1, l);
3150582b55a9SAlexander Graf                 break;
3151582b55a9SAlexander Graf             case FLUSH_CACHE:
31521da8de39SRichard Henderson                 flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
3153582b55a9SAlexander Graf                 break;
3154582b55a9SAlexander Graf             }
3155d0ecd2aaSbellard         }
3156d0ecd2aaSbellard         len -= l;
3157d0ecd2aaSbellard         buf += l;
3158d0ecd2aaSbellard         addr += l;
3159d0ecd2aaSbellard     }
316075693e14SPeter Maydell     return MEMTX_OK;
3161d0ecd2aaSbellard }
3162d0ecd2aaSbellard 
3163582b55a9SAlexander Graf /* used for ROM loading : can write in RAM and ROM */
31643c8133f9SPeter Maydell MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr,
31653c8133f9SPeter Maydell                                     MemTxAttrs attrs,
3166daa3dda4SPhilippe Mathieu-Daudé                                     const void *buf, hwaddr len)
3167582b55a9SAlexander Graf {
31683c8133f9SPeter Maydell     return address_space_write_rom_internal(as, addr, attrs,
316975693e14SPeter Maydell                                             buf, len, WRITE_DATA);
3170582b55a9SAlexander Graf }
3171582b55a9SAlexander Graf 
31720c249ff7SLi Zhijian void cpu_flush_icache_range(hwaddr start, hwaddr len)
3173582b55a9SAlexander Graf {
3174582b55a9SAlexander Graf     /*
3175582b55a9SAlexander Graf      * This function should do the same thing as an icache flush that was
3176582b55a9SAlexander Graf      * triggered from within the guest. For TCG we are always cache coherent,
3177582b55a9SAlexander Graf      * so there is no need to flush anything. For KVM / Xen we need to flush
3178582b55a9SAlexander Graf      * the host's instruction cache at least.
3179582b55a9SAlexander Graf      */
3180582b55a9SAlexander Graf     if (tcg_enabled()) {
3181582b55a9SAlexander Graf         return;
3182582b55a9SAlexander Graf     }
3183582b55a9SAlexander Graf 
318475693e14SPeter Maydell     address_space_write_rom_internal(&address_space_memory,
318575693e14SPeter Maydell                                      start, MEMTXATTRS_UNSPECIFIED,
318675693e14SPeter Maydell                                      NULL, len, FLUSH_CACHE);
3187582b55a9SAlexander Graf }
3188582b55a9SAlexander Graf 
3189637b0aa1SMattias Nissler /*
3190637b0aa1SMattias Nissler  * A magic value stored in the first 8 bytes of the bounce buffer struct. Used
3191637b0aa1SMattias Nissler  * to detect illegal pointers passed to address_space_unmap.
3192637b0aa1SMattias Nissler  */
3193637b0aa1SMattias Nissler #define BOUNCE_BUFFER_MAGIC 0xb4017ceb4ffe12ed
3194637b0aa1SMattias Nissler 
3195637b0aa1SMattias Nissler typedef struct {
3196637b0aa1SMattias Nissler     uint64_t magic;
3197637b0aa1SMattias Nissler     MemoryRegion *mr;
3198637b0aa1SMattias Nissler     hwaddr addr;
3199637b0aa1SMattias Nissler     size_t len;
3200637b0aa1SMattias Nissler     uint8_t buffer[];
3201637b0aa1SMattias Nissler } BounceBuffer;
3202637b0aa1SMattias Nissler 
320369e78f1bSMattias Nissler static void
320469e78f1bSMattias Nissler address_space_unregister_map_client_do(AddressSpaceMapClient *client)
3205ba223c29Saliguori {
320672cf2d4fSBlue Swirl     QLIST_REMOVE(client, link);
32077267c094SAnthony Liguori     g_free(client);
3208ba223c29Saliguori }
3209ba223c29Saliguori 
32105c627197SMattias Nissler static void address_space_notify_map_clients_locked(AddressSpace *as)
3211ba223c29Saliguori {
321269e78f1bSMattias Nissler     AddressSpaceMapClient *client;
3213ba223c29Saliguori 
321469e78f1bSMattias Nissler     while (!QLIST_EMPTY(&as->map_client_list)) {
321569e78f1bSMattias Nissler         client = QLIST_FIRST(&as->map_client_list);
3216e95205e1SFam Zheng         qemu_bh_schedule(client->bh);
32175c627197SMattias Nissler         address_space_unregister_map_client_do(client);
3218ba223c29Saliguori     }
3219ba223c29Saliguori }
3220ba223c29Saliguori 
32215c627197SMattias Nissler void address_space_register_map_client(AddressSpace *as, QEMUBH *bh)
3222d0ecd2aaSbellard {
322369e78f1bSMattias Nissler     AddressSpaceMapClient *client = g_malloc(sizeof(*client));
3224d0ecd2aaSbellard 
322569e78f1bSMattias Nissler     QEMU_LOCK_GUARD(&as->map_client_list_lock);
3226e95205e1SFam Zheng     client->bh = bh;
322769e78f1bSMattias Nissler     QLIST_INSERT_HEAD(&as->map_client_list, client, link);
3228637b0aa1SMattias Nissler     /* Write map_client_list before reading bounce_buffer_size. */
322933828ca1SPaolo Bonzini     smp_mb();
3230637b0aa1SMattias Nissler     if (qatomic_read(&as->bounce_buffer_size) < as->max_bounce_buffer_size) {
32315c627197SMattias Nissler         address_space_notify_map_clients_locked(as);
323233b6c2edSFam Zheng     }
3233d0ecd2aaSbellard }
3234d0ecd2aaSbellard 
323538e047b5SFam Zheng void cpu_exec_init_all(void)
323638e047b5SFam Zheng {
323738e047b5SFam Zheng     qemu_mutex_init(&ram_list.mutex);
323820bccb82SPeter Maydell     /* The data structures we set up here depend on knowing the page size,
323920bccb82SPeter Maydell      * so no more changes can be made after this point.
324020bccb82SPeter Maydell      * In an ideal world, nothing we did before we had finished the
324120bccb82SPeter Maydell      * machine setup would care about the target page size, and we could
324220bccb82SPeter Maydell      * do this much later, rather than requiring board models to state
324320bccb82SPeter Maydell      * up front what their requirements are.
324420bccb82SPeter Maydell      */
324520bccb82SPeter Maydell     finalize_target_page_bits();
324638e047b5SFam Zheng     io_mem_init();
3247680a4783SPaolo Bonzini     memory_map_init();
324838e047b5SFam Zheng }
324938e047b5SFam Zheng 
32505c627197SMattias Nissler void address_space_unregister_map_client(AddressSpace *as, QEMUBH *bh)
3251d0ecd2aaSbellard {
325269e78f1bSMattias Nissler     AddressSpaceMapClient *client;
3253d0ecd2aaSbellard 
325469e78f1bSMattias Nissler     QEMU_LOCK_GUARD(&as->map_client_list_lock);
325569e78f1bSMattias Nissler     QLIST_FOREACH(client, &as->map_client_list, link) {
3256e95205e1SFam Zheng         if (client->bh == bh) {
32575c627197SMattias Nissler             address_space_unregister_map_client_do(client);
3258e95205e1SFam Zheng             break;
3259e95205e1SFam Zheng         }
3260e95205e1SFam Zheng     }
3261d0ecd2aaSbellard }
3262d0ecd2aaSbellard 
32635c627197SMattias Nissler static void address_space_notify_map_clients(AddressSpace *as)
3264d0ecd2aaSbellard {
326569e78f1bSMattias Nissler     QEMU_LOCK_GUARD(&as->map_client_list_lock);
32665c627197SMattias Nissler     address_space_notify_map_clients_locked(as);
32676d16c2f8Saliguori }
32686d16c2f8Saliguori 
32690c249ff7SLi Zhijian static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
3270eace72b7SPeter Maydell                                   bool is_write, MemTxAttrs attrs)
327151644ab7SPaolo Bonzini {
32725c8a00ceSPaolo Bonzini     MemoryRegion *mr;
327351644ab7SPaolo Bonzini     hwaddr l, xlat;
327451644ab7SPaolo Bonzini 
327551644ab7SPaolo Bonzini     while (len > 0) {
327651644ab7SPaolo Bonzini         l = len;
3277efa99a2fSPeter Maydell         mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
32785c8a00ceSPaolo Bonzini         if (!memory_access_is_direct(mr, is_write)) {
32795c8a00ceSPaolo Bonzini             l = memory_access_size(mr, l, addr);
3280eace72b7SPeter Maydell             if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) {
328151644ab7SPaolo Bonzini                 return false;
328251644ab7SPaolo Bonzini             }
328351644ab7SPaolo Bonzini         }
328451644ab7SPaolo Bonzini 
328551644ab7SPaolo Bonzini         len -= l;
328651644ab7SPaolo Bonzini         addr += l;
328751644ab7SPaolo Bonzini     }
328851644ab7SPaolo Bonzini     return true;
328951644ab7SPaolo Bonzini }
329051644ab7SPaolo Bonzini 
329116620684SAlexey Kardashevskiy bool address_space_access_valid(AddressSpace *as, hwaddr addr,
32920c249ff7SLi Zhijian                                 hwaddr len, bool is_write,
3293fddffa42SPeter Maydell                                 MemTxAttrs attrs)
329416620684SAlexey Kardashevskiy {
329511e732a5SPaolo Bonzini     FlatView *fv;
329611e732a5SPaolo Bonzini 
3297694ea274SDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
329811e732a5SPaolo Bonzini     fv = address_space_to_flatview(as);
329958e74682SPhilippe Mathieu-Daudé     return flatview_access_valid(fv, addr, len, is_write, attrs);
330016620684SAlexey Kardashevskiy }
330116620684SAlexey Kardashevskiy 
3302715c31ecSPaolo Bonzini static hwaddr
330316620684SAlexey Kardashevskiy flatview_extend_translation(FlatView *fv, hwaddr addr,
330416620684SAlexey Kardashevskiy                             hwaddr target_len,
3305715c31ecSPaolo Bonzini                             MemoryRegion *mr, hwaddr base, hwaddr len,
330653d0790dSPeter Maydell                             bool is_write, MemTxAttrs attrs)
3307715c31ecSPaolo Bonzini {
3308715c31ecSPaolo Bonzini     hwaddr done = 0;
3309715c31ecSPaolo Bonzini     hwaddr xlat;
3310715c31ecSPaolo Bonzini     MemoryRegion *this_mr;
3311715c31ecSPaolo Bonzini 
3312715c31ecSPaolo Bonzini     for (;;) {
3313715c31ecSPaolo Bonzini         target_len -= len;
3314715c31ecSPaolo Bonzini         addr += len;
3315715c31ecSPaolo Bonzini         done += len;
3316715c31ecSPaolo Bonzini         if (target_len == 0) {
3317715c31ecSPaolo Bonzini             return done;
3318715c31ecSPaolo Bonzini         }
3319715c31ecSPaolo Bonzini 
3320715c31ecSPaolo Bonzini         len = target_len;
332116620684SAlexey Kardashevskiy         this_mr = flatview_translate(fv, addr, &xlat,
3322efa99a2fSPeter Maydell                                      &len, is_write, attrs);
3323715c31ecSPaolo Bonzini         if (this_mr != mr || xlat != base + done) {
3324715c31ecSPaolo Bonzini             return done;
3325715c31ecSPaolo Bonzini         }
3326715c31ecSPaolo Bonzini     }
3327715c31ecSPaolo Bonzini }
3328715c31ecSPaolo Bonzini 
33296d16c2f8Saliguori /* Map a physical memory region into a host virtual address.
33306d16c2f8Saliguori  * May map a subset of the requested range, given by and returned in *plen.
33316d16c2f8Saliguori  * May return NULL if resources needed to perform the mapping are exhausted.
33326d16c2f8Saliguori  * Use only for reads OR writes - not for read-modify-write operations.
33335c627197SMattias Nissler  * Use address_space_register_map_client() to know when retrying the map
33345c627197SMattias Nissler  * operation is likely to succeed.
33356d16c2f8Saliguori  */
3336ac1970fbSAvi Kivity void *address_space_map(AddressSpace *as,
3337a8170e5eSAvi Kivity                         hwaddr addr,
3338a8170e5eSAvi Kivity                         hwaddr *plen,
3339f26404fbSPeter Maydell                         bool is_write,
3340f26404fbSPeter Maydell                         MemTxAttrs attrs)
33416d16c2f8Saliguori {
3342a8170e5eSAvi Kivity     hwaddr len = *plen;
3343715c31ecSPaolo Bonzini     hwaddr l, xlat;
3344715c31ecSPaolo Bonzini     MemoryRegion *mr;
3345ad0c60faSPaolo Bonzini     FlatView *fv;
33466d16c2f8Saliguori 
3347d44fe13bSAlex Bennée     trace_address_space_map(as, addr, len, is_write, *(uint32_t *) &attrs);
3348d44fe13bSAlex Bennée 
3349e3127ae0SPaolo Bonzini     if (len == 0) {
3350e3127ae0SPaolo Bonzini         return NULL;
3351e3127ae0SPaolo Bonzini     }
3352e3127ae0SPaolo Bonzini 
33536d16c2f8Saliguori     l = len;
3354694ea274SDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
3355ad0c60faSPaolo Bonzini     fv = address_space_to_flatview(as);
3356efa99a2fSPeter Maydell     mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
335741063e1eSPaolo Bonzini 
33585c8a00ceSPaolo Bonzini     if (!memory_access_is_direct(mr, is_write)) {
3359637b0aa1SMattias Nissler         size_t used = qatomic_read(&as->bounce_buffer_size);
3360637b0aa1SMattias Nissler         for (;;) {
3361637b0aa1SMattias Nissler             hwaddr alloc = MIN(as->max_bounce_buffer_size - used, l);
3362637b0aa1SMattias Nissler             size_t new_size = used + alloc;
3363637b0aa1SMattias Nissler             size_t actual =
3364637b0aa1SMattias Nissler                 qatomic_cmpxchg(&as->bounce_buffer_size, used, new_size);
3365637b0aa1SMattias Nissler             if (actual == used) {
3366637b0aa1SMattias Nissler                 l = alloc;
3367637b0aa1SMattias Nissler                 break;
3368637b0aa1SMattias Nissler             }
3369637b0aa1SMattias Nissler             used = actual;
3370637b0aa1SMattias Nissler         }
3371637b0aa1SMattias Nissler 
3372637b0aa1SMattias Nissler         if (l == 0) {
337377f55eacSPrasad J Pandit             *plen = 0;
3374e3127ae0SPaolo Bonzini             return NULL;
33756d16c2f8Saliguori         }
3376d3e71559SPaolo Bonzini 
3377637b0aa1SMattias Nissler         BounceBuffer *bounce = g_malloc0(l + sizeof(BounceBuffer));
3378637b0aa1SMattias Nissler         bounce->magic = BOUNCE_BUFFER_MAGIC;
3379d3e71559SPaolo Bonzini         memory_region_ref(mr);
3380637b0aa1SMattias Nissler         bounce->mr = mr;
3381637b0aa1SMattias Nissler         bounce->addr = addr;
3382637b0aa1SMattias Nissler         bounce->len = l;
3383637b0aa1SMattias Nissler 
33846d16c2f8Saliguori         if (!is_write) {
3385d8d5ca40SFea.Wang             flatview_read(fv, addr, attrs,
3386637b0aa1SMattias Nissler                           bounce->buffer, l);
33876d16c2f8Saliguori         }
338838bee5dcSStefano Stabellini 
338938bee5dcSStefano Stabellini         *plen = l;
3390637b0aa1SMattias Nissler         return bounce->buffer;
33916d16c2f8Saliguori     }
3392e3127ae0SPaolo Bonzini 
3393d3e71559SPaolo Bonzini     memory_region_ref(mr);
339416620684SAlexey Kardashevskiy     *plen = flatview_extend_translation(fv, addr, len, mr, xlat,
339553d0790dSPeter Maydell                                         l, is_write, attrs);
3396fc1c8344SAlexander Bulekov     fuzz_dma_read_cb(addr, *plen, mr);
33975a5585f4SEdgar E. Iglesias     return qemu_ram_ptr_length(mr->ram_block, xlat, plen, true, is_write);
33986d16c2f8Saliguori }
33996d16c2f8Saliguori 
3400ac1970fbSAvi Kivity /* Unmaps a memory region previously mapped by address_space_map().
3401ae5883abSPhilippe Mathieu-Daudé  * Will also mark the memory as dirty if is_write is true.  access_len gives
34026d16c2f8Saliguori  * the amount of memory that was actually read or written by the caller.
34036d16c2f8Saliguori  */
3404a8170e5eSAvi Kivity void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3405ae5883abSPhilippe Mathieu-Daudé                          bool is_write, hwaddr access_len)
34066d16c2f8Saliguori {
3407d3e71559SPaolo Bonzini     MemoryRegion *mr;
34087443b437SPaolo Bonzini     ram_addr_t addr1;
3409d3e71559SPaolo Bonzini 
341007bdaa41SPaolo Bonzini     mr = memory_region_from_host(buffer, &addr1);
3411637b0aa1SMattias Nissler     if (mr != NULL) {
3412d3e71559SPaolo Bonzini         if (is_write) {
3413845b6214SPaolo Bonzini             invalidate_and_set_dirty(mr, addr1, access_len);
34146d16c2f8Saliguori         }
3415868bb33fSJan Kiszka         if (xen_enabled()) {
3416e41d7c69SJan Kiszka             xen_invalidate_map_cache_entry(buffer);
3417050a0ddfSAnthony PERARD         }
3418d3e71559SPaolo Bonzini         memory_region_unref(mr);
34196d16c2f8Saliguori         return;
34206d16c2f8Saliguori     }
3421637b0aa1SMattias Nissler 
3422637b0aa1SMattias Nissler 
3423637b0aa1SMattias Nissler     BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer);
3424637b0aa1SMattias Nissler     assert(bounce->magic == BOUNCE_BUFFER_MAGIC);
3425637b0aa1SMattias Nissler 
34266d16c2f8Saliguori     if (is_write) {
3427637b0aa1SMattias Nissler         address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED,
3428637b0aa1SMattias Nissler                             bounce->buffer, access_len);
34296d16c2f8Saliguori     }
3430637b0aa1SMattias Nissler 
3431637b0aa1SMattias Nissler     qatomic_sub(&as->bounce_buffer_size, bounce->len);
3432637b0aa1SMattias Nissler     bounce->magic = ~BOUNCE_BUFFER_MAGIC;
3433637b0aa1SMattias Nissler     memory_region_unref(bounce->mr);
3434637b0aa1SMattias Nissler     g_free(bounce);
3435637b0aa1SMattias Nissler     /* Write bounce_buffer_size before reading map_client_list. */
3436637b0aa1SMattias Nissler     smp_mb();
34375c627197SMattias Nissler     address_space_notify_map_clients(as);
34386d16c2f8Saliguori }
3439d0ecd2aaSbellard 
3440a8170e5eSAvi Kivity void *cpu_physical_memory_map(hwaddr addr,
3441a8170e5eSAvi Kivity                               hwaddr *plen,
344228c80bfeSPhilippe Mathieu-Daudé                               bool is_write)
3443ac1970fbSAvi Kivity {
3444f26404fbSPeter Maydell     return address_space_map(&address_space_memory, addr, plen, is_write,
3445f26404fbSPeter Maydell                              MEMTXATTRS_UNSPECIFIED);
3446ac1970fbSAvi Kivity }
3447ac1970fbSAvi Kivity 
3448a8170e5eSAvi Kivity void cpu_physical_memory_unmap(void *buffer, hwaddr len,
344928c80bfeSPhilippe Mathieu-Daudé                                bool is_write, hwaddr access_len)
3450ac1970fbSAvi Kivity {
3451ac1970fbSAvi Kivity     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3452ac1970fbSAvi Kivity }
3453ac1970fbSAvi Kivity 
34540ce265ffSPaolo Bonzini #define ARG1_DECL                AddressSpace *as
34550ce265ffSPaolo Bonzini #define ARG1                     as
34560ce265ffSPaolo Bonzini #define SUFFIX
34570ce265ffSPaolo Bonzini #define TRANSLATE(...)           address_space_translate(as, __VA_ARGS__)
34580ce265ffSPaolo Bonzini #define RCU_READ_LOCK(...)       rcu_read_lock()
34590ce265ffSPaolo Bonzini #define RCU_READ_UNLOCK(...)     rcu_read_unlock()
3460139c1837SPaolo Bonzini #include "memory_ldst.c.inc"
34611e78bcc1SAlexander Graf 
34621f4e496eSPaolo Bonzini int64_t address_space_cache_init(MemoryRegionCache *cache,
34631f4e496eSPaolo Bonzini                                  AddressSpace *as,
34641f4e496eSPaolo Bonzini                                  hwaddr addr,
34651f4e496eSPaolo Bonzini                                  hwaddr len,
34661f4e496eSPaolo Bonzini                                  bool is_write)
34671f4e496eSPaolo Bonzini {
346848564041SPaolo Bonzini     AddressSpaceDispatch *d;
346948564041SPaolo Bonzini     hwaddr l;
347048564041SPaolo Bonzini     MemoryRegion *mr;
34714bfb024bSPaolo Bonzini     Int128 diff;
347248564041SPaolo Bonzini 
347348564041SPaolo Bonzini     assert(len > 0);
347448564041SPaolo Bonzini 
347548564041SPaolo Bonzini     l = len;
347648564041SPaolo Bonzini     cache->fv = address_space_get_flatview(as);
347748564041SPaolo Bonzini     d = flatview_to_dispatch(cache->fv);
347848564041SPaolo Bonzini     cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true);
347948564041SPaolo Bonzini 
34804bfb024bSPaolo Bonzini     /*
34814bfb024bSPaolo Bonzini      * cache->xlat is now relative to cache->mrs.mr, not to the section itself.
34824bfb024bSPaolo Bonzini      * Take that into account to compute how many bytes are there between
34834bfb024bSPaolo Bonzini      * cache->xlat and the end of the section.
34844bfb024bSPaolo Bonzini      */
34854bfb024bSPaolo Bonzini     diff = int128_sub(cache->mrs.size,
34864bfb024bSPaolo Bonzini                       int128_make64(cache->xlat - cache->mrs.offset_within_region));
34874bfb024bSPaolo Bonzini     l = int128_get64(int128_min(diff, int128_make64(l)));
34884bfb024bSPaolo Bonzini 
348948564041SPaolo Bonzini     mr = cache->mrs.mr;
349048564041SPaolo Bonzini     memory_region_ref(mr);
349148564041SPaolo Bonzini     if (memory_access_is_direct(mr, is_write)) {
349253d0790dSPeter Maydell         /* We don't care about the memory attributes here as we're only
349353d0790dSPeter Maydell          * doing this if we found actual RAM, which behaves the same
349453d0790dSPeter Maydell          * regardless of attributes; so UNSPECIFIED is fine.
349553d0790dSPeter Maydell          */
349648564041SPaolo Bonzini         l = flatview_extend_translation(cache->fv, addr, len, mr,
349753d0790dSPeter Maydell                                         cache->xlat, l, is_write,
349853d0790dSPeter Maydell                                         MEMTXATTRS_UNSPECIFIED);
34995a5585f4SEdgar E. Iglesias         cache->ptr = qemu_ram_ptr_length(mr->ram_block, cache->xlat, &l, true,
35005a5585f4SEdgar E. Iglesias                                          is_write);
350148564041SPaolo Bonzini     } else {
350248564041SPaolo Bonzini         cache->ptr = NULL;
350348564041SPaolo Bonzini     }
350448564041SPaolo Bonzini 
350548564041SPaolo Bonzini     cache->len = l;
350648564041SPaolo Bonzini     cache->is_write = is_write;
350748564041SPaolo Bonzini     return l;
35081f4e496eSPaolo Bonzini }
35091f4e496eSPaolo Bonzini 
35101f4e496eSPaolo Bonzini void address_space_cache_invalidate(MemoryRegionCache *cache,
35111f4e496eSPaolo Bonzini                                     hwaddr addr,
35121f4e496eSPaolo Bonzini                                     hwaddr access_len)
35131f4e496eSPaolo Bonzini {
351448564041SPaolo Bonzini     assert(cache->is_write);
351548564041SPaolo Bonzini     if (likely(cache->ptr)) {
351648564041SPaolo Bonzini         invalidate_and_set_dirty(cache->mrs.mr, addr + cache->xlat, access_len);
351748564041SPaolo Bonzini     }
35181f4e496eSPaolo Bonzini }
35191f4e496eSPaolo Bonzini 
35201f4e496eSPaolo Bonzini void address_space_cache_destroy(MemoryRegionCache *cache)
35211f4e496eSPaolo Bonzini {
352248564041SPaolo Bonzini     if (!cache->mrs.mr) {
352348564041SPaolo Bonzini         return;
352448564041SPaolo Bonzini     }
352548564041SPaolo Bonzini 
352648564041SPaolo Bonzini     if (xen_enabled()) {
352748564041SPaolo Bonzini         xen_invalidate_map_cache_entry(cache->ptr);
352848564041SPaolo Bonzini     }
352948564041SPaolo Bonzini     memory_region_unref(cache->mrs.mr);
353048564041SPaolo Bonzini     flatview_unref(cache->fv);
353148564041SPaolo Bonzini     cache->mrs.mr = NULL;
353248564041SPaolo Bonzini     cache->fv = NULL;
353348564041SPaolo Bonzini }
353448564041SPaolo Bonzini 
353548564041SPaolo Bonzini /* Called from RCU critical section.  This function has the same
353648564041SPaolo Bonzini  * semantics as address_space_translate, but it only works on a
353748564041SPaolo Bonzini  * predefined range of a MemoryRegion that was mapped with
353848564041SPaolo Bonzini  * address_space_cache_init.
353948564041SPaolo Bonzini  */
354048564041SPaolo Bonzini static inline MemoryRegion *address_space_translate_cached(
354148564041SPaolo Bonzini     MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
3542bc6b1cecSPeter Maydell     hwaddr *plen, bool is_write, MemTxAttrs attrs)
354348564041SPaolo Bonzini {
354448564041SPaolo Bonzini     MemoryRegionSection section;
354548564041SPaolo Bonzini     MemoryRegion *mr;
354648564041SPaolo Bonzini     IOMMUMemoryRegion *iommu_mr;
354748564041SPaolo Bonzini     AddressSpace *target_as;
354848564041SPaolo Bonzini 
354948564041SPaolo Bonzini     assert(!cache->ptr);
355048564041SPaolo Bonzini     *xlat = addr + cache->xlat;
355148564041SPaolo Bonzini 
355248564041SPaolo Bonzini     mr = cache->mrs.mr;
355348564041SPaolo Bonzini     iommu_mr = memory_region_get_iommu(mr);
355448564041SPaolo Bonzini     if (!iommu_mr) {
355548564041SPaolo Bonzini         /* MMIO region.  */
355648564041SPaolo Bonzini         return mr;
355748564041SPaolo Bonzini     }
355848564041SPaolo Bonzini 
355948564041SPaolo Bonzini     section = address_space_translate_iommu(iommu_mr, xlat, plen,
356048564041SPaolo Bonzini                                             NULL, is_write, true,
35612f7b009cSPeter Maydell                                             &target_as, attrs);
356248564041SPaolo Bonzini     return section.mr;
356348564041SPaolo Bonzini }
356448564041SPaolo Bonzini 
356547293c92SJonathan Cameron /* Called within RCU critical section.  */
356647293c92SJonathan Cameron static MemTxResult address_space_write_continue_cached(MemTxAttrs attrs,
356747293c92SJonathan Cameron                                                        const void *ptr,
356847293c92SJonathan Cameron                                                        hwaddr len,
356947293c92SJonathan Cameron                                                        hwaddr mr_addr,
357047293c92SJonathan Cameron                                                        hwaddr l,
357147293c92SJonathan Cameron                                                        MemoryRegion *mr)
357247293c92SJonathan Cameron {
357347293c92SJonathan Cameron     MemTxResult result = MEMTX_OK;
357447293c92SJonathan Cameron     const uint8_t *buf = ptr;
357547293c92SJonathan Cameron 
357647293c92SJonathan Cameron     for (;;) {
357747293c92SJonathan Cameron         result |= flatview_write_continue_step(attrs, buf, len, mr_addr, &l,
357847293c92SJonathan Cameron                                                mr);
357947293c92SJonathan Cameron 
358047293c92SJonathan Cameron         len -= l;
358147293c92SJonathan Cameron         buf += l;
358247293c92SJonathan Cameron         mr_addr += l;
358347293c92SJonathan Cameron 
358447293c92SJonathan Cameron         if (!len) {
358547293c92SJonathan Cameron             break;
358647293c92SJonathan Cameron         }
358747293c92SJonathan Cameron 
358847293c92SJonathan Cameron         l = len;
358947293c92SJonathan Cameron     }
359047293c92SJonathan Cameron 
359147293c92SJonathan Cameron     return result;
359247293c92SJonathan Cameron }
359347293c92SJonathan Cameron 
359447293c92SJonathan Cameron /* Called within RCU critical section.  */
359547293c92SJonathan Cameron static MemTxResult address_space_read_continue_cached(MemTxAttrs attrs,
359647293c92SJonathan Cameron                                                       void *ptr, hwaddr len,
359747293c92SJonathan Cameron                                                       hwaddr mr_addr, hwaddr l,
359847293c92SJonathan Cameron                                                       MemoryRegion *mr)
359947293c92SJonathan Cameron {
360047293c92SJonathan Cameron     MemTxResult result = MEMTX_OK;
360147293c92SJonathan Cameron     uint8_t *buf = ptr;
360247293c92SJonathan Cameron 
360347293c92SJonathan Cameron     for (;;) {
360447293c92SJonathan Cameron         result |= flatview_read_continue_step(attrs, buf, len, mr_addr, &l, mr);
360547293c92SJonathan Cameron         len -= l;
360647293c92SJonathan Cameron         buf += l;
360747293c92SJonathan Cameron         mr_addr += l;
360847293c92SJonathan Cameron 
360947293c92SJonathan Cameron         if (!len) {
361047293c92SJonathan Cameron             break;
361147293c92SJonathan Cameron         }
361247293c92SJonathan Cameron         l = len;
361347293c92SJonathan Cameron     }
361447293c92SJonathan Cameron 
361547293c92SJonathan Cameron     return result;
361647293c92SJonathan Cameron }
361747293c92SJonathan Cameron 
361848564041SPaolo Bonzini /* Called from RCU critical section. address_space_read_cached uses this
361948564041SPaolo Bonzini  * out of line function when the target is an MMIO or IOMMU region.
362048564041SPaolo Bonzini  */
362138df19faSPhilippe Mathieu-Daudé MemTxResult
362248564041SPaolo Bonzini address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr,
36230c249ff7SLi Zhijian                                    void *buf, hwaddr len)
362448564041SPaolo Bonzini {
36254c7c8563SJonathan Cameron     hwaddr mr_addr, l;
362648564041SPaolo Bonzini     MemoryRegion *mr;
362748564041SPaolo Bonzini 
362848564041SPaolo Bonzini     l = len;
36294c7c8563SJonathan Cameron     mr = address_space_translate_cached(cache, addr, &mr_addr, &l, false,
3630bc6b1cecSPeter Maydell                                         MEMTXATTRS_UNSPECIFIED);
363147293c92SJonathan Cameron     return address_space_read_continue_cached(MEMTXATTRS_UNSPECIFIED,
363247293c92SJonathan Cameron                                               buf, len, mr_addr, l, mr);
363348564041SPaolo Bonzini }
363448564041SPaolo Bonzini 
363548564041SPaolo Bonzini /* Called from RCU critical section. address_space_write_cached uses this
363648564041SPaolo Bonzini  * out of line function when the target is an MMIO or IOMMU region.
363748564041SPaolo Bonzini  */
363838df19faSPhilippe Mathieu-Daudé MemTxResult
363948564041SPaolo Bonzini address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr,
36400c249ff7SLi Zhijian                                     const void *buf, hwaddr len)
364148564041SPaolo Bonzini {
36424c7c8563SJonathan Cameron     hwaddr mr_addr, l;
364348564041SPaolo Bonzini     MemoryRegion *mr;
364448564041SPaolo Bonzini 
364548564041SPaolo Bonzini     l = len;
36464c7c8563SJonathan Cameron     mr = address_space_translate_cached(cache, addr, &mr_addr, &l, true,
3647bc6b1cecSPeter Maydell                                         MEMTXATTRS_UNSPECIFIED);
364847293c92SJonathan Cameron     return address_space_write_continue_cached(MEMTXATTRS_UNSPECIFIED,
364947293c92SJonathan Cameron                                                buf, len, mr_addr, l, mr);
36501f4e496eSPaolo Bonzini }
36511f4e496eSPaolo Bonzini 
36521f4e496eSPaolo Bonzini #define ARG1_DECL                MemoryRegionCache *cache
36531f4e496eSPaolo Bonzini #define ARG1                     cache
365448564041SPaolo Bonzini #define SUFFIX                   _cached_slow
365548564041SPaolo Bonzini #define TRANSLATE(...)           address_space_translate_cached(cache, __VA_ARGS__)
365648564041SPaolo Bonzini #define RCU_READ_LOCK()          ((void)0)
365748564041SPaolo Bonzini #define RCU_READ_UNLOCK()        ((void)0)
3658139c1837SPaolo Bonzini #include "memory_ldst.c.inc"
36591f4e496eSPaolo Bonzini 
36605e2972fdSaliguori /* virtual memory access for debug (includes writing to ROM) */
366173842ef0SPhilippe Mathieu-Daudé int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
366273842ef0SPhilippe Mathieu-Daudé                         void *ptr, size_t len, bool is_write)
366313eb76e0Sbellard {
3664a8170e5eSAvi Kivity     hwaddr phys_addr;
366573842ef0SPhilippe Mathieu-Daudé     vaddr l, page;
3666d7ef71efSPhilippe Mathieu-Daudé     uint8_t *buf = ptr;
366713eb76e0Sbellard 
366879ca7a1bSChristian Borntraeger     cpu_synchronize_state(cpu);
366913eb76e0Sbellard     while (len > 0) {
36705232e4c7SPeter Maydell         int asidx;
36715232e4c7SPeter Maydell         MemTxAttrs attrs;
3672ddfc8b96SPhilippe Mathieu-Daudé         MemTxResult res;
36735232e4c7SPeter Maydell 
367413eb76e0Sbellard         page = addr & TARGET_PAGE_MASK;
36755232e4c7SPeter Maydell         phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
36765232e4c7SPeter Maydell         asidx = cpu_asidx_from_attrs(cpu, attrs);
367713eb76e0Sbellard         /* if no physical page mapped, return an error */
367813eb76e0Sbellard         if (phys_addr == -1)
367913eb76e0Sbellard             return -1;
368013eb76e0Sbellard         l = (page + TARGET_PAGE_SIZE) - addr;
368113eb76e0Sbellard         if (l > len)
368213eb76e0Sbellard             l = len;
36835e2972fdSaliguori         phys_addr += (addr & ~TARGET_PAGE_MASK);
36842e38847bSEdgar E. Iglesias         if (is_write) {
3685ddfc8b96SPhilippe Mathieu-Daudé             res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
3686ea7a5330SPeter Maydell                                           attrs, buf, l);
36872e38847bSEdgar E. Iglesias         } else {
3688ddfc8b96SPhilippe Mathieu-Daudé             res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr,
3689ddfc8b96SPhilippe Mathieu-Daudé                                      attrs, buf, l);
3690ddfc8b96SPhilippe Mathieu-Daudé         }
3691ddfc8b96SPhilippe Mathieu-Daudé         if (res != MEMTX_OK) {
3692ddfc8b96SPhilippe Mathieu-Daudé             return -1;
36932e38847bSEdgar E. Iglesias         }
369413eb76e0Sbellard         len -= l;
369513eb76e0Sbellard         buf += l;
369613eb76e0Sbellard         addr += l;
369713eb76e0Sbellard     }
369813eb76e0Sbellard     return 0;
369913eb76e0Sbellard }
3700038629a6SDr. David Alan Gilbert 
3701a8170e5eSAvi Kivity bool cpu_physical_memory_is_io(hwaddr phys_addr)
370276f35538SWen Congyang {
37035c8a00ceSPaolo Bonzini     MemoryRegion*mr;
3704149f54b5SPaolo Bonzini     hwaddr l = 1;
370576f35538SWen Congyang 
3706694ea274SDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
37075c8a00ceSPaolo Bonzini     mr = address_space_translate(&address_space_memory,
3708bc6b1cecSPeter Maydell                                  phys_addr, &phys_addr, &l, false,
3709bc6b1cecSPeter Maydell                                  MEMTXATTRS_UNSPECIFIED);
371076f35538SWen Congyang 
371166997c42SMarkus Armbruster     return !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
371276f35538SWen Congyang }
3713bd2fa51fSMichael R. Hines 
3714e3807054SDr. David Alan Gilbert int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3715bd2fa51fSMichael R. Hines {
3716bd2fa51fSMichael R. Hines     RAMBlock *block;
3717e3807054SDr. David Alan Gilbert     int ret = 0;
3718bd2fa51fSMichael R. Hines 
3719694ea274SDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
372099e15582SPeter Xu     RAMBLOCK_FOREACH(block) {
3721754cb9c0SYury Kotov         ret = func(block, opaque);
3722e3807054SDr. David Alan Gilbert         if (ret) {
3723e3807054SDr. David Alan Gilbert             break;
3724e3807054SDr. David Alan Gilbert         }
3725bd2fa51fSMichael R. Hines     }
3726e3807054SDr. David Alan Gilbert     return ret;
3727bd2fa51fSMichael R. Hines }
3728d3a5038cSDr. David Alan Gilbert 
3729d3a5038cSDr. David Alan Gilbert /*
3730d3a5038cSDr. David Alan Gilbert  * Unmap pages of memory from start to start+length such that
3731d3a5038cSDr. David Alan Gilbert  * they a) read as 0, b) Trigger whatever fault mechanism
3732d3a5038cSDr. David Alan Gilbert  * the OS provides for postcopy.
3733d3a5038cSDr. David Alan Gilbert  * The pages must be unmapped by the end of the function.
3734d3a5038cSDr. David Alan Gilbert  * Returns: 0 on success, none-0 on failure
3735d3a5038cSDr. David Alan Gilbert  *
3736d3a5038cSDr. David Alan Gilbert  */
3737d3a5038cSDr. David Alan Gilbert int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
3738d3a5038cSDr. David Alan Gilbert {
3739d3a5038cSDr. David Alan Gilbert     int ret = -1;
3740d3a5038cSDr. David Alan Gilbert 
3741d3a5038cSDr. David Alan Gilbert     uint8_t *host_startaddr = rb->host + start;
3742d3a5038cSDr. David Alan Gilbert 
3743619bd31dSMarc-André Lureau     if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
3744ea18be78SXiaoyao Li         error_report("%s: Unaligned start address: %p",
3745ea18be78SXiaoyao Li                      __func__, host_startaddr);
3746d3a5038cSDr. David Alan Gilbert         goto err;
3747d3a5038cSDr. David Alan Gilbert     }
3748d3a5038cSDr. David Alan Gilbert 
3749dcdc4607SDavid Hildenbrand     if ((start + length) <= rb->max_length) {
3750db144f70SDr. David Alan Gilbert         bool need_madvise, need_fallocate;
3751619bd31dSMarc-André Lureau         if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
3752ea18be78SXiaoyao Li             error_report("%s: Unaligned length: %zx", __func__, length);
3753d3a5038cSDr. David Alan Gilbert             goto err;
3754d3a5038cSDr. David Alan Gilbert         }
3755d3a5038cSDr. David Alan Gilbert 
3756d3a5038cSDr. David Alan Gilbert         errno = ENOTSUP; /* If we are missing MADVISE etc */
3757d3a5038cSDr. David Alan Gilbert 
3758db144f70SDr. David Alan Gilbert         /* The logic here is messy;
3759db144f70SDr. David Alan Gilbert          *    madvise DONTNEED fails for hugepages
3760db144f70SDr. David Alan Gilbert          *    fallocate works on hugepages and shmem
3761cdfa56c5SDavid Hildenbrand          *    shared anonymous memory requires madvise REMOVE
3762d3a5038cSDr. David Alan Gilbert          */
376380c3aeefSRichard Henderson         need_madvise = (rb->page_size == qemu_real_host_page_size());
3764db144f70SDr. David Alan Gilbert         need_fallocate = rb->fd != -1;
3765db144f70SDr. David Alan Gilbert         if (need_fallocate) {
3766db144f70SDr. David Alan Gilbert             /* For a file, this causes the area of the file to be zero'd
3767db144f70SDr. David Alan Gilbert              * if read, and for hugetlbfs also causes it to be unmapped
3768db144f70SDr. David Alan Gilbert              * so a userfault will trigger.
3769e2fa71f5SDr. David Alan Gilbert              */
3770e2fa71f5SDr. David Alan Gilbert #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
37711d44ff58SDavid Hildenbrand             /*
3772b2cccb52SDavid Hildenbrand              * fallocate() will fail with readonly files. Let's print a
3773b2cccb52SDavid Hildenbrand              * proper error message.
3774b2cccb52SDavid Hildenbrand              */
3775b2cccb52SDavid Hildenbrand             if (rb->flags & RAM_READONLY_FD) {
3776ea18be78SXiaoyao Li                 error_report("%s: Discarding RAM with readonly files is not"
3777ea18be78SXiaoyao Li                              " supported", __func__);
3778b2cccb52SDavid Hildenbrand                 goto err;
3779b2cccb52SDavid Hildenbrand 
3780b2cccb52SDavid Hildenbrand             }
3781b2cccb52SDavid Hildenbrand             /*
37821d44ff58SDavid Hildenbrand              * We'll discard data from the actual file, even though we only
37831d44ff58SDavid Hildenbrand              * have a MAP_PRIVATE mapping, possibly messing with other
37841d44ff58SDavid Hildenbrand              * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to
37851d44ff58SDavid Hildenbrand              * change that behavior whithout violating the promised
37861d44ff58SDavid Hildenbrand              * semantics of ram_block_discard_range().
37871d44ff58SDavid Hildenbrand              *
37881d44ff58SDavid Hildenbrand              * Only warn, because it works as long as nobody else uses that
37891d44ff58SDavid Hildenbrand              * file.
37901d44ff58SDavid Hildenbrand              */
37911d44ff58SDavid Hildenbrand             if (!qemu_ram_is_shared(rb)) {
3792ea18be78SXiaoyao Li                 warn_report_once("%s: Discarding RAM"
37931d44ff58SDavid Hildenbrand                                  " in private file mappings is possibly"
37941d44ff58SDavid Hildenbrand                                  " dangerous, because it will modify the"
37951d44ff58SDavid Hildenbrand                                  " underlying file and will affect other"
3796ea18be78SXiaoyao Li                                  " users of the file", __func__);
37971d44ff58SDavid Hildenbrand             }
37981d44ff58SDavid Hildenbrand 
3799e2fa71f5SDr. David Alan Gilbert             ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
3800*f051a9c4SWilliam Roche                             start + rb->fd_offset, length);
3801db144f70SDr. David Alan Gilbert             if (ret) {
3802db144f70SDr. David Alan Gilbert                 ret = -errno;
3803*f051a9c4SWilliam Roche                 error_report("%s: Failed to fallocate %s:%" PRIx64 "+%" PRIx64
3804*f051a9c4SWilliam Roche                              " +%zx (%d)", __func__, rb->idstr, start,
3805*f051a9c4SWilliam Roche                              rb->fd_offset, length, ret);
3806db144f70SDr. David Alan Gilbert                 goto err;
3807db144f70SDr. David Alan Gilbert             }
3808db144f70SDr. David Alan Gilbert #else
3809db144f70SDr. David Alan Gilbert             ret = -ENOSYS;
3810ea18be78SXiaoyao Li             error_report("%s: fallocate not available/file"
3811*f051a9c4SWilliam Roche                          "%s:%" PRIx64 "+%" PRIx64 " +%zx (%d)", __func__,
3812*f051a9c4SWilliam Roche                          rb->idstr, start, rb->fd_offset, length, ret);
3813db144f70SDr. David Alan Gilbert             goto err;
3814e2fa71f5SDr. David Alan Gilbert #endif
3815e2fa71f5SDr. David Alan Gilbert         }
3816db144f70SDr. David Alan Gilbert         if (need_madvise) {
3817db144f70SDr. David Alan Gilbert             /* For normal RAM this causes it to be unmapped,
3818db144f70SDr. David Alan Gilbert              * for shared memory it causes the local mapping to disappear
3819db144f70SDr. David Alan Gilbert              * and to fall back on the file contents (which we just
3820db144f70SDr. David Alan Gilbert              * fallocate'd away).
3821db144f70SDr. David Alan Gilbert              */
3822db144f70SDr. David Alan Gilbert #if defined(CONFIG_MADVISE)
3823cdfa56c5SDavid Hildenbrand             if (qemu_ram_is_shared(rb) && rb->fd < 0) {
3824cdfa56c5SDavid Hildenbrand                 ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE);
3825cdfa56c5SDavid Hildenbrand             } else {
3826cdfa56c5SDavid Hildenbrand                 ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
3827cdfa56c5SDavid Hildenbrand             }
3828d3a5038cSDr. David Alan Gilbert             if (ret) {
3829d3a5038cSDr. David Alan Gilbert                 ret = -errno;
3830ea18be78SXiaoyao Li                 error_report("%s: Failed to discard range "
3831d3a5038cSDr. David Alan Gilbert                              "%s:%" PRIx64 " +%zx (%d)",
3832ea18be78SXiaoyao Li                              __func__, rb->idstr, start, length, ret);
3833db144f70SDr. David Alan Gilbert                 goto err;
3834d3a5038cSDr. David Alan Gilbert             }
3835db144f70SDr. David Alan Gilbert #else
3836db144f70SDr. David Alan Gilbert             ret = -ENOSYS;
3837ea18be78SXiaoyao Li             error_report("%s: MADVISE not available %s:%" PRIx64 " +%zx (%d)",
3838ea18be78SXiaoyao Li                          __func__, rb->idstr, start, length, ret);
3839db144f70SDr. David Alan Gilbert             goto err;
3840db144f70SDr. David Alan Gilbert #endif
3841db144f70SDr. David Alan Gilbert         }
3842db144f70SDr. David Alan Gilbert         trace_ram_block_discard_range(rb->idstr, host_startaddr, length,
3843db144f70SDr. David Alan Gilbert                                       need_madvise, need_fallocate, ret);
3844d3a5038cSDr. David Alan Gilbert     } else {
3845ea18be78SXiaoyao Li         error_report("%s: Overrun block '%s' (%" PRIu64 "/%zx/" RAM_ADDR_FMT")",
3846ea18be78SXiaoyao Li                      __func__, rb->idstr, start, length, rb->max_length);
3847d3a5038cSDr. David Alan Gilbert     }
3848d3a5038cSDr. David Alan Gilbert 
3849d3a5038cSDr. David Alan Gilbert err:
3850d3a5038cSDr. David Alan Gilbert     return ret;
3851d3a5038cSDr. David Alan Gilbert }
3852d3a5038cSDr. David Alan Gilbert 
3853b2e9426cSXiaoyao Li int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
3854b2e9426cSXiaoyao Li                                         size_t length)
3855b2e9426cSXiaoyao Li {
3856b2e9426cSXiaoyao Li     int ret = -1;
3857b2e9426cSXiaoyao Li 
3858b2e9426cSXiaoyao Li #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
3859*f051a9c4SWilliam Roche     /* ignore fd_offset with guest_memfd */
3860b2e9426cSXiaoyao Li     ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
3861b2e9426cSXiaoyao Li                     start, length);
3862b2e9426cSXiaoyao Li 
3863b2e9426cSXiaoyao Li     if (ret) {
3864b2e9426cSXiaoyao Li         ret = -errno;
3865b2e9426cSXiaoyao Li         error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
3866b2e9426cSXiaoyao Li                      __func__, rb->idstr, start, length, ret);
3867b2e9426cSXiaoyao Li     }
3868b2e9426cSXiaoyao Li #else
3869b2e9426cSXiaoyao Li     ret = -ENOSYS;
3870b2e9426cSXiaoyao Li     error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
3871b2e9426cSXiaoyao Li                  __func__, rb->idstr, start, length, ret);
3872b2e9426cSXiaoyao Li #endif
3873b2e9426cSXiaoyao Li 
3874b2e9426cSXiaoyao Li     return ret;
3875b2e9426cSXiaoyao Li }
3876b2e9426cSXiaoyao Li 
3877a4de8552SJunyan He bool ramblock_is_pmem(RAMBlock *rb)
3878a4de8552SJunyan He {
3879a4de8552SJunyan He     return rb->flags & RAM_PMEM;
3880a4de8552SJunyan He }
3881a4de8552SJunyan He 
3882b6b71cb5SMarkus Armbruster static void mtree_print_phys_entries(int start, int end, int skip, int ptr)
38835e8fd947SAlexey Kardashevskiy {
38845e8fd947SAlexey Kardashevskiy     if (start == end - 1) {
3885b6b71cb5SMarkus Armbruster         qemu_printf("\t%3d      ", start);
38865e8fd947SAlexey Kardashevskiy     } else {
3887b6b71cb5SMarkus Armbruster         qemu_printf("\t%3d..%-3d ", start, end - 1);
38885e8fd947SAlexey Kardashevskiy     }
3889b6b71cb5SMarkus Armbruster     qemu_printf(" skip=%d ", skip);
38905e8fd947SAlexey Kardashevskiy     if (ptr == PHYS_MAP_NODE_NIL) {
3891b6b71cb5SMarkus Armbruster         qemu_printf(" ptr=NIL");
38925e8fd947SAlexey Kardashevskiy     } else if (!skip) {
3893b6b71cb5SMarkus Armbruster         qemu_printf(" ptr=#%d", ptr);
38945e8fd947SAlexey Kardashevskiy     } else {
3895b6b71cb5SMarkus Armbruster         qemu_printf(" ptr=[%d]", ptr);
38965e8fd947SAlexey Kardashevskiy     }
3897b6b71cb5SMarkus Armbruster     qemu_printf("\n");
38985e8fd947SAlexey Kardashevskiy }
38995e8fd947SAlexey Kardashevskiy 
39005e8fd947SAlexey Kardashevskiy #define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \
39015e8fd947SAlexey Kardashevskiy                            int128_sub((size), int128_one())) : 0)
39025e8fd947SAlexey Kardashevskiy 
3903b6b71cb5SMarkus Armbruster void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root)
39045e8fd947SAlexey Kardashevskiy {
39055e8fd947SAlexey Kardashevskiy     int i;
39065e8fd947SAlexey Kardashevskiy 
3907b6b71cb5SMarkus Armbruster     qemu_printf("  Dispatch\n");
3908b6b71cb5SMarkus Armbruster     qemu_printf("    Physical sections\n");
39095e8fd947SAlexey Kardashevskiy 
39105e8fd947SAlexey Kardashevskiy     for (i = 0; i < d->map.sections_nb; ++i) {
39115e8fd947SAlexey Kardashevskiy         MemoryRegionSection *s = d->map.sections + i;
39125e8fd947SAlexey Kardashevskiy         const char *names[] = { " [unassigned]", " [not dirty]",
39135e8fd947SAlexey Kardashevskiy                                 " [ROM]", " [watch]" };
39145e8fd947SAlexey Kardashevskiy 
3915883f2c59SPhilippe Mathieu-Daudé         qemu_printf("      #%d @" HWADDR_FMT_plx ".." HWADDR_FMT_plx
3916b6b71cb5SMarkus Armbruster                     " %s%s%s%s%s",
39175e8fd947SAlexey Kardashevskiy             i,
39185e8fd947SAlexey Kardashevskiy             s->offset_within_address_space,
3919f9c307c3SZhenzhong Duan             s->offset_within_address_space + MR_SIZE(s->size),
39205e8fd947SAlexey Kardashevskiy             s->mr->name ? s->mr->name : "(noname)",
39215e8fd947SAlexey Kardashevskiy             i < ARRAY_SIZE(names) ? names[i] : "",
39225e8fd947SAlexey Kardashevskiy             s->mr == root ? " [ROOT]" : "",
39235e8fd947SAlexey Kardashevskiy             s == d->mru_section ? " [MRU]" : "",
39245e8fd947SAlexey Kardashevskiy             s->mr->is_iommu ? " [iommu]" : "");
39255e8fd947SAlexey Kardashevskiy 
39265e8fd947SAlexey Kardashevskiy         if (s->mr->alias) {
3927b6b71cb5SMarkus Armbruster             qemu_printf(" alias=%s", s->mr->alias->name ?
39285e8fd947SAlexey Kardashevskiy                     s->mr->alias->name : "noname");
39295e8fd947SAlexey Kardashevskiy         }
3930b6b71cb5SMarkus Armbruster         qemu_printf("\n");
39315e8fd947SAlexey Kardashevskiy     }
39325e8fd947SAlexey Kardashevskiy 
3933b6b71cb5SMarkus Armbruster     qemu_printf("    Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n",
39345e8fd947SAlexey Kardashevskiy                P_L2_BITS, P_L2_LEVELS, d->phys_map.ptr, d->phys_map.skip);
39355e8fd947SAlexey Kardashevskiy     for (i = 0; i < d->map.nodes_nb; ++i) {
39365e8fd947SAlexey Kardashevskiy         int j, jprev;
39375e8fd947SAlexey Kardashevskiy         PhysPageEntry prev;
39385e8fd947SAlexey Kardashevskiy         Node *n = d->map.nodes + i;
39395e8fd947SAlexey Kardashevskiy 
3940b6b71cb5SMarkus Armbruster         qemu_printf("      [%d]\n", i);
39415e8fd947SAlexey Kardashevskiy 
39425e8fd947SAlexey Kardashevskiy         for (j = 0, jprev = 0, prev = *n[0]; j < ARRAY_SIZE(*n); ++j) {
39435e8fd947SAlexey Kardashevskiy             PhysPageEntry *pe = *n + j;
39445e8fd947SAlexey Kardashevskiy 
39455e8fd947SAlexey Kardashevskiy             if (pe->ptr == prev.ptr && pe->skip == prev.skip) {
39465e8fd947SAlexey Kardashevskiy                 continue;
39475e8fd947SAlexey Kardashevskiy             }
39485e8fd947SAlexey Kardashevskiy 
3949b6b71cb5SMarkus Armbruster             mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr);
39505e8fd947SAlexey Kardashevskiy 
39515e8fd947SAlexey Kardashevskiy             jprev = j;
39525e8fd947SAlexey Kardashevskiy             prev = *pe;
39535e8fd947SAlexey Kardashevskiy         }
39545e8fd947SAlexey Kardashevskiy 
39555e8fd947SAlexey Kardashevskiy         if (jprev != ARRAY_SIZE(*n)) {
3956b6b71cb5SMarkus Armbruster             mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr);
39575e8fd947SAlexey Kardashevskiy         }
39585e8fd947SAlexey Kardashevskiy     }
39595e8fd947SAlexey Kardashevskiy }
39605e8fd947SAlexey Kardashevskiy 
39617e6d32ebSDavid Hildenbrand /* Require any discards to work. */
396298da491dSDavid Hildenbrand static unsigned int ram_block_discard_required_cnt;
39637e6d32ebSDavid Hildenbrand /* Require only coordinated discards to work. */
39647e6d32ebSDavid Hildenbrand static unsigned int ram_block_coordinated_discard_required_cnt;
39657e6d32ebSDavid Hildenbrand /* Disable any discards. */
396698da491dSDavid Hildenbrand static unsigned int ram_block_discard_disabled_cnt;
39677e6d32ebSDavid Hildenbrand /* Disable only uncoordinated discards. */
39687e6d32ebSDavid Hildenbrand static unsigned int ram_block_uncoordinated_discard_disabled_cnt;
396998da491dSDavid Hildenbrand static QemuMutex ram_block_discard_disable_mutex;
397098da491dSDavid Hildenbrand 
397198da491dSDavid Hildenbrand static void ram_block_discard_disable_mutex_lock(void)
397298da491dSDavid Hildenbrand {
397398da491dSDavid Hildenbrand     static gsize initialized;
397498da491dSDavid Hildenbrand 
397598da491dSDavid Hildenbrand     if (g_once_init_enter(&initialized)) {
397698da491dSDavid Hildenbrand         qemu_mutex_init(&ram_block_discard_disable_mutex);
397798da491dSDavid Hildenbrand         g_once_init_leave(&initialized, 1);
397898da491dSDavid Hildenbrand     }
397998da491dSDavid Hildenbrand     qemu_mutex_lock(&ram_block_discard_disable_mutex);
398098da491dSDavid Hildenbrand }
398198da491dSDavid Hildenbrand 
398298da491dSDavid Hildenbrand static void ram_block_discard_disable_mutex_unlock(void)
398398da491dSDavid Hildenbrand {
398498da491dSDavid Hildenbrand     qemu_mutex_unlock(&ram_block_discard_disable_mutex);
398598da491dSDavid Hildenbrand }
3986d24f31dbSDavid Hildenbrand 
3987d24f31dbSDavid Hildenbrand int ram_block_discard_disable(bool state)
3988d24f31dbSDavid Hildenbrand {
398998da491dSDavid Hildenbrand     int ret = 0;
3990d24f31dbSDavid Hildenbrand 
399198da491dSDavid Hildenbrand     ram_block_discard_disable_mutex_lock();
3992d24f31dbSDavid Hildenbrand     if (!state) {
399398da491dSDavid Hildenbrand         ram_block_discard_disabled_cnt--;
39947e6d32ebSDavid Hildenbrand     } else if (ram_block_discard_required_cnt ||
39957e6d32ebSDavid Hildenbrand                ram_block_coordinated_discard_required_cnt) {
399698da491dSDavid Hildenbrand         ret = -EBUSY;
39977e6d32ebSDavid Hildenbrand     } else {
39987e6d32ebSDavid Hildenbrand         ram_block_discard_disabled_cnt++;
39997e6d32ebSDavid Hildenbrand     }
40007e6d32ebSDavid Hildenbrand     ram_block_discard_disable_mutex_unlock();
40017e6d32ebSDavid Hildenbrand     return ret;
40027e6d32ebSDavid Hildenbrand }
40037e6d32ebSDavid Hildenbrand 
40047e6d32ebSDavid Hildenbrand int ram_block_uncoordinated_discard_disable(bool state)
40057e6d32ebSDavid Hildenbrand {
40067e6d32ebSDavid Hildenbrand     int ret = 0;
40077e6d32ebSDavid Hildenbrand 
40087e6d32ebSDavid Hildenbrand     ram_block_discard_disable_mutex_lock();
40097e6d32ebSDavid Hildenbrand     if (!state) {
40107e6d32ebSDavid Hildenbrand         ram_block_uncoordinated_discard_disabled_cnt--;
40117e6d32ebSDavid Hildenbrand     } else if (ram_block_discard_required_cnt) {
40127e6d32ebSDavid Hildenbrand         ret = -EBUSY;
40137e6d32ebSDavid Hildenbrand     } else {
40147e6d32ebSDavid Hildenbrand         ram_block_uncoordinated_discard_disabled_cnt++;
4015d24f31dbSDavid Hildenbrand     }
401698da491dSDavid Hildenbrand     ram_block_discard_disable_mutex_unlock();
401798da491dSDavid Hildenbrand     return ret;
4018d24f31dbSDavid Hildenbrand }
4019d24f31dbSDavid Hildenbrand 
4020d24f31dbSDavid Hildenbrand int ram_block_discard_require(bool state)
4021d24f31dbSDavid Hildenbrand {
402298da491dSDavid Hildenbrand     int ret = 0;
4023d24f31dbSDavid Hildenbrand 
402498da491dSDavid Hildenbrand     ram_block_discard_disable_mutex_lock();
4025d24f31dbSDavid Hildenbrand     if (!state) {
402698da491dSDavid Hildenbrand         ram_block_discard_required_cnt--;
40277e6d32ebSDavid Hildenbrand     } else if (ram_block_discard_disabled_cnt ||
40287e6d32ebSDavid Hildenbrand                ram_block_uncoordinated_discard_disabled_cnt) {
402998da491dSDavid Hildenbrand         ret = -EBUSY;
40307e6d32ebSDavid Hildenbrand     } else {
40317e6d32ebSDavid Hildenbrand         ram_block_discard_required_cnt++;
40327e6d32ebSDavid Hildenbrand     }
40337e6d32ebSDavid Hildenbrand     ram_block_discard_disable_mutex_unlock();
40347e6d32ebSDavid Hildenbrand     return ret;
40357e6d32ebSDavid Hildenbrand }
40367e6d32ebSDavid Hildenbrand 
40377e6d32ebSDavid Hildenbrand int ram_block_coordinated_discard_require(bool state)
40387e6d32ebSDavid Hildenbrand {
40397e6d32ebSDavid Hildenbrand     int ret = 0;
40407e6d32ebSDavid Hildenbrand 
40417e6d32ebSDavid Hildenbrand     ram_block_discard_disable_mutex_lock();
40427e6d32ebSDavid Hildenbrand     if (!state) {
40437e6d32ebSDavid Hildenbrand         ram_block_coordinated_discard_required_cnt--;
40447e6d32ebSDavid Hildenbrand     } else if (ram_block_discard_disabled_cnt) {
40457e6d32ebSDavid Hildenbrand         ret = -EBUSY;
40467e6d32ebSDavid Hildenbrand     } else {
40477e6d32ebSDavid Hildenbrand         ram_block_coordinated_discard_required_cnt++;
4048d24f31dbSDavid Hildenbrand     }
404998da491dSDavid Hildenbrand     ram_block_discard_disable_mutex_unlock();
405098da491dSDavid Hildenbrand     return ret;
4051d24f31dbSDavid Hildenbrand }
4052d24f31dbSDavid Hildenbrand 
4053d24f31dbSDavid Hildenbrand bool ram_block_discard_is_disabled(void)
4054d24f31dbSDavid Hildenbrand {
40557e6d32ebSDavid Hildenbrand     return qatomic_read(&ram_block_discard_disabled_cnt) ||
40567e6d32ebSDavid Hildenbrand            qatomic_read(&ram_block_uncoordinated_discard_disabled_cnt);
4057d24f31dbSDavid Hildenbrand }
4058d24f31dbSDavid Hildenbrand 
4059d24f31dbSDavid Hildenbrand bool ram_block_discard_is_required(void)
4060d24f31dbSDavid Hildenbrand {
40617e6d32ebSDavid Hildenbrand     return qatomic_read(&ram_block_discard_required_cnt) ||
40627e6d32ebSDavid Hildenbrand            qatomic_read(&ram_block_coordinated_discard_required_cnt);
4063d24f31dbSDavid Hildenbrand }
4064