1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef SYSTEM_RAM_ADDR_H 20 #define SYSTEM_RAM_ADDR_H 21 22 #include "system/xen.h" 23 #include "system/tcg.h" 24 #include "exec/cputlb.h" 25 #include "exec/ramlist.h" 26 #include "system/ramblock.h" 27 #include "exec/exec-all.h" 28 #include "system/memory.h" 29 #include "exec/target_page.h" 30 #include "qemu/rcu.h" 31 32 #include "exec/hwaddr.h" 33 #include "exec/cpu-common.h" 34 35 extern uint64_t total_dirty_pages; 36 37 /** 38 * clear_bmap_size: calculate clear bitmap size 39 * 40 * @pages: number of guest pages 41 * @shift: guest page number shift 42 * 43 * Returns: number of bits for the clear bitmap 44 */ 45 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 46 { 47 return DIV_ROUND_UP(pages, 1UL << shift); 48 } 49 50 /** 51 * clear_bmap_set: set clear bitmap for the page range. Must be with 52 * bitmap_mutex held. 53 * 54 * @rb: the ramblock to operate on 55 * @start: the start page number 56 * @size: number of pages to set in the bitmap 57 * 58 * Returns: None 59 */ 60 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 61 uint64_t npages) 62 { 63 uint8_t shift = rb->clear_bmap_shift; 64 65 bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift)); 66 } 67 68 /** 69 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set. 70 * Must be with bitmap_mutex held. 71 * 72 * @rb: the ramblock to operate on 73 * @page: the page number to check 74 * 75 * Returns: true if the bit was set, false otherwise 76 */ 77 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 78 { 79 uint8_t shift = rb->clear_bmap_shift; 80 81 return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1); 82 } 83 84 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 85 { 86 return (b && b->host && offset < b->used_length) ? true : false; 87 } 88 89 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 90 { 91 assert(offset_in_ramblock(block, offset)); 92 return (char *)block->host + offset; 93 } 94 95 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 96 RAMBlock *rb) 97 { 98 uint64_t host_addr_offset = 99 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 100 return host_addr_offset >> TARGET_PAGE_BITS; 101 } 102 103 bool ramblock_is_pmem(RAMBlock *rb); 104 105 /** 106 * qemu_ram_alloc_from_file, 107 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 108 * file or device 109 * 110 * Parameters: 111 * @size: the size in bytes of the ram block 112 * @max_size: the maximum size of the block after resizing 113 * @mr: the memory region where the ram block is 114 * @resized: callback after calls to qemu_ram_resize 115 * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, 116 * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, 117 * RAM_READONLY_FD, RAM_GUEST_MEMFD 118 * @mem_path or @fd: specify the backing file or device 119 * @offset: Offset into target file 120 * @grow: extend file if necessary (but an empty file is always extended). 121 * @errp: pointer to Error*, to store an error if it happens 122 * 123 * Return: 124 * On success, return a pointer to the ram block. 125 * On failure, return NULL. 126 */ 127 typedef void (*qemu_ram_resize_cb)(const char *, uint64_t length, void *host); 128 129 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 130 uint32_t ram_flags, const char *mem_path, 131 off_t offset, Error **errp); 132 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size, 133 qemu_ram_resize_cb resized, MemoryRegion *mr, 134 uint32_t ram_flags, int fd, off_t offset, 135 bool grow, 136 Error **errp); 137 138 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 139 MemoryRegion *mr, Error **errp); 140 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, 141 Error **errp); 142 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 143 qemu_ram_resize_cb resized, 144 MemoryRegion *mr, Error **errp); 145 void qemu_ram_free(RAMBlock *block); 146 147 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 148 149 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); 150 151 /* Clear whole block of mem */ 152 static inline void qemu_ram_block_writeback(RAMBlock *block) 153 { 154 qemu_ram_msync(block, 0, block->used_length); 155 } 156 157 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 158 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 159 160 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 161 ram_addr_t length, 162 unsigned client) 163 { 164 DirtyMemoryBlocks *blocks; 165 unsigned long end, page; 166 unsigned long idx, offset, base; 167 bool dirty = false; 168 169 assert(client < DIRTY_MEMORY_NUM); 170 171 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 172 page = start >> TARGET_PAGE_BITS; 173 174 WITH_RCU_READ_LOCK_GUARD() { 175 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 176 177 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 178 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 179 base = page - offset; 180 while (page < end) { 181 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 182 unsigned long num = next - base; 183 unsigned long found = find_next_bit(blocks->blocks[idx], 184 num, offset); 185 if (found < num) { 186 dirty = true; 187 break; 188 } 189 190 page = next; 191 idx++; 192 offset = 0; 193 base += DIRTY_MEMORY_BLOCK_SIZE; 194 } 195 } 196 197 return dirty; 198 } 199 200 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 201 ram_addr_t length, 202 unsigned client) 203 { 204 DirtyMemoryBlocks *blocks; 205 unsigned long end, page; 206 unsigned long idx, offset, base; 207 bool dirty = true; 208 209 assert(client < DIRTY_MEMORY_NUM); 210 211 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 212 page = start >> TARGET_PAGE_BITS; 213 214 RCU_READ_LOCK_GUARD(); 215 216 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 217 218 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 219 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 220 base = page - offset; 221 while (page < end) { 222 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 223 unsigned long num = next - base; 224 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 225 if (found < num) { 226 dirty = false; 227 break; 228 } 229 230 page = next; 231 idx++; 232 offset = 0; 233 base += DIRTY_MEMORY_BLOCK_SIZE; 234 } 235 236 return dirty; 237 } 238 239 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 240 unsigned client) 241 { 242 return cpu_physical_memory_get_dirty(addr, 1, client); 243 } 244 245 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 246 { 247 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 248 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 249 bool migration = 250 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 251 return !(vga && code && migration); 252 } 253 254 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 255 ram_addr_t length, 256 uint8_t mask) 257 { 258 uint8_t ret = 0; 259 260 if (mask & (1 << DIRTY_MEMORY_VGA) && 261 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 262 ret |= (1 << DIRTY_MEMORY_VGA); 263 } 264 if (mask & (1 << DIRTY_MEMORY_CODE) && 265 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 266 ret |= (1 << DIRTY_MEMORY_CODE); 267 } 268 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 269 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 270 ret |= (1 << DIRTY_MEMORY_MIGRATION); 271 } 272 return ret; 273 } 274 275 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 276 unsigned client) 277 { 278 unsigned long page, idx, offset; 279 DirtyMemoryBlocks *blocks; 280 281 assert(client < DIRTY_MEMORY_NUM); 282 283 page = addr >> TARGET_PAGE_BITS; 284 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 285 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 286 287 RCU_READ_LOCK_GUARD(); 288 289 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 290 291 set_bit_atomic(offset, blocks->blocks[idx]); 292 } 293 294 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 295 ram_addr_t length, 296 uint8_t mask) 297 { 298 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 299 unsigned long end, page; 300 unsigned long idx, offset, base; 301 int i; 302 303 if (!mask && !xen_enabled()) { 304 return; 305 } 306 307 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 308 page = start >> TARGET_PAGE_BITS; 309 310 WITH_RCU_READ_LOCK_GUARD() { 311 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 312 blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]); 313 } 314 315 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 316 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 317 base = page - offset; 318 while (page < end) { 319 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 320 321 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 322 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 323 offset, next - page); 324 } 325 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 326 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 327 offset, next - page); 328 } 329 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 330 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 331 offset, next - page); 332 } 333 334 page = next; 335 idx++; 336 offset = 0; 337 base += DIRTY_MEMORY_BLOCK_SIZE; 338 } 339 } 340 341 if (xen_enabled()) { 342 xen_hvm_modified_memory(start, length); 343 } 344 } 345 346 #if !defined(_WIN32) 347 348 /* 349 * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns 350 * the number of dirty pages in @bitmap passed as argument. On the other hand, 351 * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that 352 * weren't set in the global migration bitmap. 353 */ 354 static inline 355 uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 356 ram_addr_t start, 357 ram_addr_t pages) 358 { 359 unsigned long i, j; 360 unsigned long page_number, c, nbits; 361 hwaddr addr; 362 ram_addr_t ram_addr; 363 uint64_t num_dirty = 0; 364 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 365 unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE; 366 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 367 368 /* start address is aligned at the start of a word? */ 369 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 370 (hpratio == 1)) { 371 unsigned long **blocks[DIRTY_MEMORY_NUM]; 372 unsigned long idx; 373 unsigned long offset; 374 long k; 375 long nr = BITS_TO_LONGS(pages); 376 377 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 378 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 379 DIRTY_MEMORY_BLOCK_SIZE); 380 381 WITH_RCU_READ_LOCK_GUARD() { 382 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 383 blocks[i] = 384 qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 385 } 386 387 for (k = 0; k < nr; k++) { 388 if (bitmap[k]) { 389 unsigned long temp = leul_to_cpu(bitmap[k]); 390 391 nbits = ctpopl(temp); 392 qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 393 394 if (global_dirty_tracking) { 395 qatomic_or( 396 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 397 temp); 398 if (unlikely( 399 global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 400 total_dirty_pages += nbits; 401 } 402 } 403 404 num_dirty += nbits; 405 406 if (tcg_enabled()) { 407 qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], 408 temp); 409 } 410 } 411 412 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 413 offset = 0; 414 idx++; 415 } 416 } 417 } 418 419 if (xen_enabled()) { 420 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 421 } 422 } else { 423 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 424 425 if (!global_dirty_tracking) { 426 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 427 } 428 429 /* 430 * bitmap-traveling is faster than memory-traveling (for addr...) 431 * especially when most of the memory is not dirty. 432 */ 433 for (i = 0; i < len; i++) { 434 if (bitmap[i] != 0) { 435 c = leul_to_cpu(bitmap[i]); 436 nbits = ctpopl(c); 437 if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 438 total_dirty_pages += nbits; 439 } 440 num_dirty += nbits; 441 do { 442 j = ctzl(c); 443 c &= ~(1ul << j); 444 page_number = (i * HOST_LONG_BITS + j) * hpratio; 445 addr = page_number * TARGET_PAGE_SIZE; 446 ram_addr = start + addr; 447 cpu_physical_memory_set_dirty_range(ram_addr, 448 TARGET_PAGE_SIZE * hpratio, clients); 449 } while (c != 0); 450 } 451 } 452 } 453 454 return num_dirty; 455 } 456 #endif /* not _WIN32 */ 457 458 static inline void cpu_physical_memory_dirty_bits_cleared(ram_addr_t start, 459 ram_addr_t length) 460 { 461 if (tcg_enabled()) { 462 tlb_reset_dirty_range_all(start, length); 463 } 464 465 } 466 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 467 ram_addr_t length, 468 unsigned client); 469 470 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 471 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 472 473 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 474 ram_addr_t start, 475 ram_addr_t length); 476 477 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 478 ram_addr_t length) 479 { 480 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 481 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 482 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 483 } 484 485 486 /* Called with RCU critical section */ 487 static inline 488 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 489 ram_addr_t start, 490 ram_addr_t length) 491 { 492 ram_addr_t addr; 493 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 494 uint64_t num_dirty = 0; 495 unsigned long *dest = rb->bmap; 496 497 /* start address and length is aligned at the start of a word? */ 498 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 499 (start + rb->offset) && 500 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 501 int k; 502 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 503 unsigned long * const *src; 504 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 505 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 506 DIRTY_MEMORY_BLOCK_SIZE); 507 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 508 509 src = qatomic_rcu_read( 510 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 511 512 for (k = page; k < page + nr; k++) { 513 if (src[idx][offset]) { 514 unsigned long bits = qatomic_xchg(&src[idx][offset], 0); 515 unsigned long new_dirty; 516 new_dirty = ~dest[k]; 517 dest[k] |= bits; 518 new_dirty &= bits; 519 num_dirty += ctpopl(new_dirty); 520 } 521 522 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 523 offset = 0; 524 idx++; 525 } 526 } 527 if (num_dirty) { 528 cpu_physical_memory_dirty_bits_cleared(start, length); 529 } 530 531 if (rb->clear_bmap) { 532 /* 533 * Postpone the dirty bitmap clear to the point before we 534 * really send the pages, also we will split the clear 535 * dirty procedure into smaller chunks. 536 */ 537 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 538 length >> TARGET_PAGE_BITS); 539 } else { 540 /* Slow path - still do that in a huge chunk */ 541 memory_region_clear_dirty_bitmap(rb->mr, start, length); 542 } 543 } else { 544 ram_addr_t offset = rb->offset; 545 546 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 547 if (cpu_physical_memory_test_and_clear_dirty( 548 start + addr + offset, 549 TARGET_PAGE_SIZE, 550 DIRTY_MEMORY_MIGRATION)) { 551 long k = (start + addr) >> TARGET_PAGE_BITS; 552 if (!test_and_set_bit(k, dest)) { 553 num_dirty++; 554 } 555 } 556 } 557 } 558 559 return num_dirty; 560 } 561 562 #endif 563