xref: /qemu/include/system/ram_addr.h (revision 6c1ae457a17a9462fb89ef1f30ad7da5266bfea6)
1 /*
2  * Declarations for cpu physical memory functions
3  *
4  * Copyright 2011 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Avi Kivity <avi@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  *
12  */
13 
14 /*
15  * This header is for use by exec.c and memory.c ONLY.  Do not include it.
16  * The functions declared here will be removed soon.
17  */
18 
19 #ifndef SYSTEM_RAM_ADDR_H
20 #define SYSTEM_RAM_ADDR_H
21 
22 #include "system/xen.h"
23 #include "system/tcg.h"
24 #include "exec/cputlb.h"
25 #include "exec/ramlist.h"
26 #include "system/ramblock.h"
27 #include "exec/exec-all.h"
28 #include "system/memory.h"
29 #include "exec/target_page.h"
30 #include "qemu/rcu.h"
31 
32 #include "exec/hwaddr.h"
33 #include "exec/cpu-common.h"
34 
35 extern uint64_t total_dirty_pages;
36 
37 /**
38  * clear_bmap_size: calculate clear bitmap size
39  *
40  * @pages: number of guest pages
41  * @shift: guest page number shift
42  *
43  * Returns: number of bits for the clear bitmap
44  */
45 static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
46 {
47     return DIV_ROUND_UP(pages, 1UL << shift);
48 }
49 
50 /**
51  * clear_bmap_set: set clear bitmap for the page range.  Must be with
52  * bitmap_mutex held.
53  *
54  * @rb: the ramblock to operate on
55  * @start: the start page number
56  * @size: number of pages to set in the bitmap
57  *
58  * Returns: None
59  */
60 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
61                                   uint64_t npages)
62 {
63     uint8_t shift = rb->clear_bmap_shift;
64 
65     bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift));
66 }
67 
68 /**
69  * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set.
70  * Must be with bitmap_mutex held.
71  *
72  * @rb: the ramblock to operate on
73  * @page: the page number to check
74  *
75  * Returns: true if the bit was set, false otherwise
76  */
77 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
78 {
79     uint8_t shift = rb->clear_bmap_shift;
80 
81     return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1);
82 }
83 
84 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
85 {
86     return (b && b->host && offset < b->used_length) ? true : false;
87 }
88 
89 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
90 {
91     assert(offset_in_ramblock(block, offset));
92     return (char *)block->host + offset;
93 }
94 
95 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
96                                                             RAMBlock *rb)
97 {
98     uint64_t host_addr_offset =
99             (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
100     return host_addr_offset >> TARGET_PAGE_BITS;
101 }
102 
103 bool ramblock_is_pmem(RAMBlock *rb);
104 
105 /**
106  * qemu_ram_alloc_from_file,
107  * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
108  *                          file or device
109  *
110  * Parameters:
111  *  @size: the size in bytes of the ram block
112  *  @max_size: the maximum size of the block after resizing
113  *  @mr: the memory region where the ram block is
114  *  @resized: callback after calls to qemu_ram_resize
115  *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
116  *              RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
117  *              RAM_READONLY_FD, RAM_GUEST_MEMFD
118  *  @mem_path or @fd: specify the backing file or device
119  *  @offset: Offset into target file
120  *  @grow: extend file if necessary (but an empty file is always extended).
121  *  @errp: pointer to Error*, to store an error if it happens
122  *
123  * Return:
124  *  On success, return a pointer to the ram block.
125  *  On failure, return NULL.
126  */
127 typedef void (*qemu_ram_resize_cb)(const char *, uint64_t length, void *host);
128 
129 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
130                                    uint32_t ram_flags, const char *mem_path,
131                                    off_t offset, Error **errp);
132 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
133                                  qemu_ram_resize_cb resized, MemoryRegion *mr,
134                                  uint32_t ram_flags, int fd, off_t offset,
135                                  bool grow,
136                                  Error **errp);
137 
138 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
139                                   MemoryRegion *mr, Error **errp);
140 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr,
141                          Error **errp);
142 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
143                                     qemu_ram_resize_cb resized,
144                                     MemoryRegion *mr, Error **errp);
145 void qemu_ram_free(RAMBlock *block);
146 
147 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
148 
149 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length);
150 
151 /* Clear whole block of mem */
152 static inline void qemu_ram_block_writeback(RAMBlock *block)
153 {
154     qemu_ram_msync(block, 0, block->used_length);
155 }
156 
157 #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
158 #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
159 
160 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
161                                                  ram_addr_t length,
162                                                  unsigned client)
163 {
164     DirtyMemoryBlocks *blocks;
165     unsigned long end, page;
166     unsigned long idx, offset, base;
167     bool dirty = false;
168 
169     assert(client < DIRTY_MEMORY_NUM);
170 
171     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
172     page = start >> TARGET_PAGE_BITS;
173 
174     WITH_RCU_READ_LOCK_GUARD() {
175         blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
176 
177         idx = page / DIRTY_MEMORY_BLOCK_SIZE;
178         offset = page % DIRTY_MEMORY_BLOCK_SIZE;
179         base = page - offset;
180         while (page < end) {
181             unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
182             unsigned long num = next - base;
183             unsigned long found = find_next_bit(blocks->blocks[idx],
184                                                 num, offset);
185             if (found < num) {
186                 dirty = true;
187                 break;
188             }
189 
190             page = next;
191             idx++;
192             offset = 0;
193             base += DIRTY_MEMORY_BLOCK_SIZE;
194         }
195     }
196 
197     return dirty;
198 }
199 
200 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
201                                                  ram_addr_t length,
202                                                  unsigned client)
203 {
204     DirtyMemoryBlocks *blocks;
205     unsigned long end, page;
206     unsigned long idx, offset, base;
207     bool dirty = true;
208 
209     assert(client < DIRTY_MEMORY_NUM);
210 
211     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
212     page = start >> TARGET_PAGE_BITS;
213 
214     RCU_READ_LOCK_GUARD();
215 
216     blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
217 
218     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
219     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
220     base = page - offset;
221     while (page < end) {
222         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
223         unsigned long num = next - base;
224         unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
225         if (found < num) {
226             dirty = false;
227             break;
228         }
229 
230         page = next;
231         idx++;
232         offset = 0;
233         base += DIRTY_MEMORY_BLOCK_SIZE;
234     }
235 
236     return dirty;
237 }
238 
239 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
240                                                       unsigned client)
241 {
242     return cpu_physical_memory_get_dirty(addr, 1, client);
243 }
244 
245 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
246 {
247     bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
248     bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
249     bool migration =
250         cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
251     return !(vga && code && migration);
252 }
253 
254 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
255                                                                ram_addr_t length,
256                                                                uint8_t mask)
257 {
258     uint8_t ret = 0;
259 
260     if (mask & (1 << DIRTY_MEMORY_VGA) &&
261         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
262         ret |= (1 << DIRTY_MEMORY_VGA);
263     }
264     if (mask & (1 << DIRTY_MEMORY_CODE) &&
265         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
266         ret |= (1 << DIRTY_MEMORY_CODE);
267     }
268     if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
269         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
270         ret |= (1 << DIRTY_MEMORY_MIGRATION);
271     }
272     return ret;
273 }
274 
275 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
276                                                       unsigned client)
277 {
278     unsigned long page, idx, offset;
279     DirtyMemoryBlocks *blocks;
280 
281     assert(client < DIRTY_MEMORY_NUM);
282 
283     page = addr >> TARGET_PAGE_BITS;
284     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
285     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
286 
287     RCU_READ_LOCK_GUARD();
288 
289     blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
290 
291     set_bit_atomic(offset, blocks->blocks[idx]);
292 }
293 
294 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
295                                                        ram_addr_t length,
296                                                        uint8_t mask)
297 {
298     DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
299     unsigned long end, page;
300     unsigned long idx, offset, base;
301     int i;
302 
303     if (!mask && !xen_enabled()) {
304         return;
305     }
306 
307     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
308     page = start >> TARGET_PAGE_BITS;
309 
310     WITH_RCU_READ_LOCK_GUARD() {
311         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
312             blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]);
313         }
314 
315         idx = page / DIRTY_MEMORY_BLOCK_SIZE;
316         offset = page % DIRTY_MEMORY_BLOCK_SIZE;
317         base = page - offset;
318         while (page < end) {
319             unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
320 
321             if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
322                 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
323                                   offset, next - page);
324             }
325             if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
326                 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
327                                   offset, next - page);
328             }
329             if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
330                 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
331                                   offset, next - page);
332             }
333 
334             page = next;
335             idx++;
336             offset = 0;
337             base += DIRTY_MEMORY_BLOCK_SIZE;
338         }
339     }
340 
341     if (xen_enabled()) {
342         xen_hvm_modified_memory(start, length);
343     }
344 }
345 
346 #if !defined(_WIN32)
347 
348 /*
349  * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns
350  * the number of dirty pages in @bitmap passed as argument. On the other hand,
351  * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that
352  * weren't set in the global migration bitmap.
353  */
354 static inline
355 uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
356                                                 ram_addr_t start,
357                                                 ram_addr_t pages)
358 {
359     unsigned long i, j;
360     unsigned long page_number, c, nbits;
361     hwaddr addr;
362     ram_addr_t ram_addr;
363     uint64_t num_dirty = 0;
364     unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
365     unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE;
366     unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
367 
368     /* start address is aligned at the start of a word? */
369     if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
370         (hpratio == 1)) {
371         unsigned long **blocks[DIRTY_MEMORY_NUM];
372         unsigned long idx;
373         unsigned long offset;
374         long k;
375         long nr = BITS_TO_LONGS(pages);
376 
377         idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
378         offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
379                           DIRTY_MEMORY_BLOCK_SIZE);
380 
381         WITH_RCU_READ_LOCK_GUARD() {
382             for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
383                 blocks[i] =
384                     qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
385             }
386 
387             for (k = 0; k < nr; k++) {
388                 if (bitmap[k]) {
389                     unsigned long temp = leul_to_cpu(bitmap[k]);
390 
391                     nbits = ctpopl(temp);
392                     qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
393 
394                     if (global_dirty_tracking) {
395                         qatomic_or(
396                                 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
397                                 temp);
398                         if (unlikely(
399                             global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
400                             total_dirty_pages += nbits;
401                         }
402                     }
403 
404                     num_dirty += nbits;
405 
406                     if (tcg_enabled()) {
407                         qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset],
408                                    temp);
409                     }
410                 }
411 
412                 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
413                     offset = 0;
414                     idx++;
415                 }
416             }
417         }
418 
419         if (xen_enabled()) {
420             xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
421         }
422     } else {
423         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
424 
425         if (!global_dirty_tracking) {
426             clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
427         }
428 
429         /*
430          * bitmap-traveling is faster than memory-traveling (for addr...)
431          * especially when most of the memory is not dirty.
432          */
433         for (i = 0; i < len; i++) {
434             if (bitmap[i] != 0) {
435                 c = leul_to_cpu(bitmap[i]);
436                 nbits = ctpopl(c);
437                 if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
438                     total_dirty_pages += nbits;
439                 }
440                 num_dirty += nbits;
441                 do {
442                     j = ctzl(c);
443                     c &= ~(1ul << j);
444                     page_number = (i * HOST_LONG_BITS + j) * hpratio;
445                     addr = page_number * TARGET_PAGE_SIZE;
446                     ram_addr = start + addr;
447                     cpu_physical_memory_set_dirty_range(ram_addr,
448                                        TARGET_PAGE_SIZE * hpratio, clients);
449                 } while (c != 0);
450             }
451         }
452     }
453 
454     return num_dirty;
455 }
456 #endif /* not _WIN32 */
457 
458 static inline void cpu_physical_memory_dirty_bits_cleared(ram_addr_t start,
459                                                           ram_addr_t length)
460 {
461     if (tcg_enabled()) {
462         tlb_reset_dirty_range_all(start, length);
463     }
464 
465 }
466 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
467                                               ram_addr_t length,
468                                               unsigned client);
469 
470 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
471     (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client);
472 
473 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
474                                             ram_addr_t start,
475                                             ram_addr_t length);
476 
477 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
478                                                          ram_addr_t length)
479 {
480     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
481     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
482     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
483 }
484 
485 
486 /* Called with RCU critical section */
487 static inline
488 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
489                                                ram_addr_t start,
490                                                ram_addr_t length)
491 {
492     ram_addr_t addr;
493     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
494     uint64_t num_dirty = 0;
495     unsigned long *dest = rb->bmap;
496 
497     /* start address and length is aligned at the start of a word? */
498     if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
499          (start + rb->offset) &&
500         !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) {
501         int k;
502         int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
503         unsigned long * const *src;
504         unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
505         unsigned long offset = BIT_WORD((word * BITS_PER_LONG) %
506                                         DIRTY_MEMORY_BLOCK_SIZE);
507         unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
508 
509         src = qatomic_rcu_read(
510                 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
511 
512         for (k = page; k < page + nr; k++) {
513             if (src[idx][offset]) {
514                 unsigned long bits = qatomic_xchg(&src[idx][offset], 0);
515                 unsigned long new_dirty;
516                 new_dirty = ~dest[k];
517                 dest[k] |= bits;
518                 new_dirty &= bits;
519                 num_dirty += ctpopl(new_dirty);
520             }
521 
522             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
523                 offset = 0;
524                 idx++;
525             }
526         }
527         if (num_dirty) {
528             cpu_physical_memory_dirty_bits_cleared(start, length);
529         }
530 
531         if (rb->clear_bmap) {
532             /*
533              * Postpone the dirty bitmap clear to the point before we
534              * really send the pages, also we will split the clear
535              * dirty procedure into smaller chunks.
536              */
537             clear_bmap_set(rb, start >> TARGET_PAGE_BITS,
538                            length >> TARGET_PAGE_BITS);
539         } else {
540             /* Slow path - still do that in a huge chunk */
541             memory_region_clear_dirty_bitmap(rb->mr, start, length);
542         }
543     } else {
544         ram_addr_t offset = rb->offset;
545 
546         for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
547             if (cpu_physical_memory_test_and_clear_dirty(
548                         start + addr + offset,
549                         TARGET_PAGE_SIZE,
550                         DIRTY_MEMORY_MIGRATION)) {
551                 long k = (start + addr) >> TARGET_PAGE_BITS;
552                 if (!test_and_set_bit(k, dest)) {
553                     num_dirty++;
554                 }
555             }
556         }
557     }
558 
559     return num_dirty;
560 }
561 
562 #endif
563