xref: /qemu/include/system/ram_addr.h (revision 84307cd6027c4602913177ff09aeefa4743b7234)
1 /*
2  * Declarations for cpu physical memory functions
3  *
4  * Copyright 2011 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Avi Kivity <avi@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  *
12  */
13 
14 /*
15  * This header is for use by exec.c and memory.c ONLY.  Do not include it.
16  * The functions declared here will be removed soon.
17  */
18 
19 #ifndef SYSTEM_RAM_ADDR_H
20 #define SYSTEM_RAM_ADDR_H
21 
22 #include "system/xen.h"
23 #include "system/tcg.h"
24 #include "exec/cputlb.h"
25 #include "exec/ramlist.h"
26 #include "system/ramblock.h"
27 #include "system/memory.h"
28 #include "exec/target_page.h"
29 #include "qemu/rcu.h"
30 
31 #include "exec/hwaddr.h"
32 #include "exec/cpu-common.h"
33 
34 extern uint64_t total_dirty_pages;
35 
36 /**
37  * clear_bmap_size: calculate clear bitmap size
38  *
39  * @pages: number of guest pages
40  * @shift: guest page number shift
41  *
42  * Returns: number of bits for the clear bitmap
43  */
clear_bmap_size(uint64_t pages,uint8_t shift)44 static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
45 {
46     return DIV_ROUND_UP(pages, 1UL << shift);
47 }
48 
49 /**
50  * clear_bmap_set: set clear bitmap for the page range.  Must be with
51  * bitmap_mutex held.
52  *
53  * @rb: the ramblock to operate on
54  * @start: the start page number
55  * @size: number of pages to set in the bitmap
56  *
57  * Returns: None
58  */
clear_bmap_set(RAMBlock * rb,uint64_t start,uint64_t npages)59 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
60                                   uint64_t npages)
61 {
62     uint8_t shift = rb->clear_bmap_shift;
63 
64     bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift));
65 }
66 
67 /**
68  * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set.
69  * Must be with bitmap_mutex held.
70  *
71  * @rb: the ramblock to operate on
72  * @page: the page number to check
73  *
74  * Returns: true if the bit was set, false otherwise
75  */
clear_bmap_test_and_clear(RAMBlock * rb,uint64_t page)76 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
77 {
78     uint8_t shift = rb->clear_bmap_shift;
79 
80     return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1);
81 }
82 
offset_in_ramblock(RAMBlock * b,ram_addr_t offset)83 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
84 {
85     return (b && b->host && offset < b->used_length) ? true : false;
86 }
87 
ramblock_ptr(RAMBlock * block,ram_addr_t offset)88 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
89 {
90     assert(offset_in_ramblock(block, offset));
91     return (char *)block->host + offset;
92 }
93 
ramblock_recv_bitmap_offset(void * host_addr,RAMBlock * rb)94 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
95                                                             RAMBlock *rb)
96 {
97     uint64_t host_addr_offset =
98             (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
99     return host_addr_offset >> TARGET_PAGE_BITS;
100 }
101 
102 bool ramblock_is_pmem(RAMBlock *rb);
103 
104 /**
105  * qemu_ram_alloc_from_file,
106  * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
107  *                          file or device
108  *
109  * Parameters:
110  *  @size: the size in bytes of the ram block
111  *  @max_size: the maximum size of the block after resizing
112  *  @mr: the memory region where the ram block is
113  *  @resized: callback after calls to qemu_ram_resize
114  *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
115  *              RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
116  *              RAM_READONLY_FD, RAM_GUEST_MEMFD
117  *  @mem_path or @fd: specify the backing file or device
118  *  @offset: Offset into target file
119  *  @grow: extend file if necessary (but an empty file is always extended).
120  *  @errp: pointer to Error*, to store an error if it happens
121  *
122  * Return:
123  *  On success, return a pointer to the ram block.
124  *  On failure, return NULL.
125  */
126 typedef void (*qemu_ram_resize_cb)(const char *, uint64_t length, void *host);
127 
128 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
129                                    uint32_t ram_flags, const char *mem_path,
130                                    off_t offset, Error **errp);
131 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
132                                  qemu_ram_resize_cb resized, MemoryRegion *mr,
133                                  uint32_t ram_flags, int fd, off_t offset,
134                                  bool grow,
135                                  Error **errp);
136 
137 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
138                                   MemoryRegion *mr, Error **errp);
139 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr,
140                          Error **errp);
141 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
142                                     qemu_ram_resize_cb resized,
143                                     MemoryRegion *mr, Error **errp);
144 void qemu_ram_free(RAMBlock *block);
145 
146 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
147 
148 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length);
149 
150 /* Clear whole block of mem */
qemu_ram_block_writeback(RAMBlock * block)151 static inline void qemu_ram_block_writeback(RAMBlock *block)
152 {
153     qemu_ram_msync(block, 0, block->used_length);
154 }
155 
156 #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
157 #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
158 
cpu_physical_memory_get_dirty(ram_addr_t start,ram_addr_t length,unsigned client)159 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
160                                                  ram_addr_t length,
161                                                  unsigned client)
162 {
163     DirtyMemoryBlocks *blocks;
164     unsigned long end, page;
165     unsigned long idx, offset, base;
166     bool dirty = false;
167 
168     assert(client < DIRTY_MEMORY_NUM);
169 
170     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
171     page = start >> TARGET_PAGE_BITS;
172 
173     WITH_RCU_READ_LOCK_GUARD() {
174         blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
175 
176         idx = page / DIRTY_MEMORY_BLOCK_SIZE;
177         offset = page % DIRTY_MEMORY_BLOCK_SIZE;
178         base = page - offset;
179         while (page < end) {
180             unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
181             unsigned long num = next - base;
182             unsigned long found = find_next_bit(blocks->blocks[idx],
183                                                 num, offset);
184             if (found < num) {
185                 dirty = true;
186                 break;
187             }
188 
189             page = next;
190             idx++;
191             offset = 0;
192             base += DIRTY_MEMORY_BLOCK_SIZE;
193         }
194     }
195 
196     return dirty;
197 }
198 
cpu_physical_memory_all_dirty(ram_addr_t start,ram_addr_t length,unsigned client)199 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
200                                                  ram_addr_t length,
201                                                  unsigned client)
202 {
203     DirtyMemoryBlocks *blocks;
204     unsigned long end, page;
205     unsigned long idx, offset, base;
206     bool dirty = true;
207 
208     assert(client < DIRTY_MEMORY_NUM);
209 
210     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
211     page = start >> TARGET_PAGE_BITS;
212 
213     RCU_READ_LOCK_GUARD();
214 
215     blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
216 
217     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
218     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
219     base = page - offset;
220     while (page < end) {
221         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
222         unsigned long num = next - base;
223         unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
224         if (found < num) {
225             dirty = false;
226             break;
227         }
228 
229         page = next;
230         idx++;
231         offset = 0;
232         base += DIRTY_MEMORY_BLOCK_SIZE;
233     }
234 
235     return dirty;
236 }
237 
cpu_physical_memory_get_dirty_flag(ram_addr_t addr,unsigned client)238 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
239                                                       unsigned client)
240 {
241     return cpu_physical_memory_get_dirty(addr, 1, client);
242 }
243 
cpu_physical_memory_is_clean(ram_addr_t addr)244 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
245 {
246     bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
247     bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
248     bool migration =
249         cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
250     return !(vga && code && migration);
251 }
252 
cpu_physical_memory_range_includes_clean(ram_addr_t start,ram_addr_t length,uint8_t mask)253 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
254                                                                ram_addr_t length,
255                                                                uint8_t mask)
256 {
257     uint8_t ret = 0;
258 
259     if (mask & (1 << DIRTY_MEMORY_VGA) &&
260         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
261         ret |= (1 << DIRTY_MEMORY_VGA);
262     }
263     if (mask & (1 << DIRTY_MEMORY_CODE) &&
264         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
265         ret |= (1 << DIRTY_MEMORY_CODE);
266     }
267     if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
268         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
269         ret |= (1 << DIRTY_MEMORY_MIGRATION);
270     }
271     return ret;
272 }
273 
cpu_physical_memory_set_dirty_flag(ram_addr_t addr,unsigned client)274 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
275                                                       unsigned client)
276 {
277     unsigned long page, idx, offset;
278     DirtyMemoryBlocks *blocks;
279 
280     assert(client < DIRTY_MEMORY_NUM);
281 
282     page = addr >> TARGET_PAGE_BITS;
283     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
284     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
285 
286     RCU_READ_LOCK_GUARD();
287 
288     blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
289 
290     set_bit_atomic(offset, blocks->blocks[idx]);
291 }
292 
cpu_physical_memory_set_dirty_range(ram_addr_t start,ram_addr_t length,uint8_t mask)293 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
294                                                        ram_addr_t length,
295                                                        uint8_t mask)
296 {
297     DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
298     unsigned long end, page;
299     unsigned long idx, offset, base;
300     int i;
301 
302     if (!mask && !xen_enabled()) {
303         return;
304     }
305 
306     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
307     page = start >> TARGET_PAGE_BITS;
308 
309     WITH_RCU_READ_LOCK_GUARD() {
310         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
311             blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]);
312         }
313 
314         idx = page / DIRTY_MEMORY_BLOCK_SIZE;
315         offset = page % DIRTY_MEMORY_BLOCK_SIZE;
316         base = page - offset;
317         while (page < end) {
318             unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
319 
320             if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
321                 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
322                                   offset, next - page);
323             }
324             if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
325                 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
326                                   offset, next - page);
327             }
328             if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
329                 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
330                                   offset, next - page);
331             }
332 
333             page = next;
334             idx++;
335             offset = 0;
336             base += DIRTY_MEMORY_BLOCK_SIZE;
337         }
338     }
339 
340     if (xen_enabled()) {
341         xen_hvm_modified_memory(start, length);
342     }
343 }
344 
345 #if !defined(_WIN32)
346 
347 /*
348  * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns
349  * the number of dirty pages in @bitmap passed as argument. On the other hand,
350  * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that
351  * weren't set in the global migration bitmap.
352  */
353 static inline
cpu_physical_memory_set_dirty_lebitmap(unsigned long * bitmap,ram_addr_t start,ram_addr_t pages)354 uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
355                                                 ram_addr_t start,
356                                                 ram_addr_t pages)
357 {
358     unsigned long i, j;
359     unsigned long page_number, c, nbits;
360     hwaddr addr;
361     ram_addr_t ram_addr;
362     uint64_t num_dirty = 0;
363     unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
364     unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE;
365     unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
366 
367     /* start address is aligned at the start of a word? */
368     if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
369         (hpratio == 1)) {
370         unsigned long **blocks[DIRTY_MEMORY_NUM];
371         unsigned long idx;
372         unsigned long offset;
373         long k;
374         long nr = BITS_TO_LONGS(pages);
375 
376         idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
377         offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
378                           DIRTY_MEMORY_BLOCK_SIZE);
379 
380         WITH_RCU_READ_LOCK_GUARD() {
381             for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
382                 blocks[i] =
383                     qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
384             }
385 
386             for (k = 0; k < nr; k++) {
387                 if (bitmap[k]) {
388                     unsigned long temp = leul_to_cpu(bitmap[k]);
389 
390                     nbits = ctpopl(temp);
391                     qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
392 
393                     if (global_dirty_tracking) {
394                         qatomic_or(
395                                 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
396                                 temp);
397                         if (unlikely(
398                             global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
399                             total_dirty_pages += nbits;
400                         }
401                     }
402 
403                     num_dirty += nbits;
404 
405                     if (tcg_enabled()) {
406                         qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset],
407                                    temp);
408                     }
409                 }
410 
411                 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
412                     offset = 0;
413                     idx++;
414                 }
415             }
416         }
417 
418         if (xen_enabled()) {
419             xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
420         }
421     } else {
422         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
423 
424         if (!global_dirty_tracking) {
425             clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
426         }
427 
428         /*
429          * bitmap-traveling is faster than memory-traveling (for addr...)
430          * especially when most of the memory is not dirty.
431          */
432         for (i = 0; i < len; i++) {
433             if (bitmap[i] != 0) {
434                 c = leul_to_cpu(bitmap[i]);
435                 nbits = ctpopl(c);
436                 if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
437                     total_dirty_pages += nbits;
438                 }
439                 num_dirty += nbits;
440                 do {
441                     j = ctzl(c);
442                     c &= ~(1ul << j);
443                     page_number = (i * HOST_LONG_BITS + j) * hpratio;
444                     addr = page_number * TARGET_PAGE_SIZE;
445                     ram_addr = start + addr;
446                     cpu_physical_memory_set_dirty_range(ram_addr,
447                                        TARGET_PAGE_SIZE * hpratio, clients);
448                 } while (c != 0);
449             }
450         }
451     }
452 
453     return num_dirty;
454 }
455 #endif /* not _WIN32 */
456 
cpu_physical_memory_dirty_bits_cleared(ram_addr_t start,ram_addr_t length)457 static inline void cpu_physical_memory_dirty_bits_cleared(ram_addr_t start,
458                                                           ram_addr_t length)
459 {
460     if (tcg_enabled()) {
461         tlb_reset_dirty_range_all(start, length);
462     }
463 
464 }
465 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
466                                               ram_addr_t length,
467                                               unsigned client);
468 
469 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
470     (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client);
471 
472 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
473                                             ram_addr_t start,
474                                             ram_addr_t length);
475 
cpu_physical_memory_clear_dirty_range(ram_addr_t start,ram_addr_t length)476 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
477                                                          ram_addr_t length)
478 {
479     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
480     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
481     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
482 }
483 
484 
485 /* Called with RCU critical section */
486 static inline
cpu_physical_memory_sync_dirty_bitmap(RAMBlock * rb,ram_addr_t start,ram_addr_t length)487 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
488                                                ram_addr_t start,
489                                                ram_addr_t length)
490 {
491     ram_addr_t addr;
492     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
493     uint64_t num_dirty = 0;
494     unsigned long *dest = rb->bmap;
495 
496     /* start address and length is aligned at the start of a word? */
497     if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
498          (start + rb->offset) &&
499         !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) {
500         int k;
501         int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
502         unsigned long * const *src;
503         unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
504         unsigned long offset = BIT_WORD((word * BITS_PER_LONG) %
505                                         DIRTY_MEMORY_BLOCK_SIZE);
506         unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
507 
508         src = qatomic_rcu_read(
509                 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
510 
511         for (k = page; k < page + nr; k++) {
512             if (src[idx][offset]) {
513                 unsigned long bits = qatomic_xchg(&src[idx][offset], 0);
514                 unsigned long new_dirty;
515                 new_dirty = ~dest[k];
516                 dest[k] |= bits;
517                 new_dirty &= bits;
518                 num_dirty += ctpopl(new_dirty);
519             }
520 
521             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
522                 offset = 0;
523                 idx++;
524             }
525         }
526         if (num_dirty) {
527             cpu_physical_memory_dirty_bits_cleared(start, length);
528         }
529 
530         if (rb->clear_bmap) {
531             /*
532              * Postpone the dirty bitmap clear to the point before we
533              * really send the pages, also we will split the clear
534              * dirty procedure into smaller chunks.
535              */
536             clear_bmap_set(rb, start >> TARGET_PAGE_BITS,
537                            length >> TARGET_PAGE_BITS);
538         } else {
539             /* Slow path - still do that in a huge chunk */
540             memory_region_clear_dirty_bitmap(rb->mr, start, length);
541         }
542     } else {
543         ram_addr_t offset = rb->offset;
544 
545         for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
546             if (cpu_physical_memory_test_and_clear_dirty(
547                         start + addr + offset,
548                         TARGET_PAGE_SIZE,
549                         DIRTY_MEMORY_MIGRATION)) {
550                 long k = (start + addr) >> TARGET_PAGE_BITS;
551                 if (!test_and_set_bit(k, dest)) {
552                     num_dirty++;
553                 }
554             }
555         }
556     }
557 
558     return num_dirty;
559 }
560 
561 #endif
562