1eb59db53SDr. David Alan Gilbert /* 2eb59db53SDr. David Alan Gilbert * Postcopy migration for RAM 3eb59db53SDr. David Alan Gilbert * 4eb59db53SDr. David Alan Gilbert * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates 5eb59db53SDr. David Alan Gilbert * 6eb59db53SDr. David Alan Gilbert * Authors: 7eb59db53SDr. David Alan Gilbert * Dave Gilbert <dgilbert@redhat.com> 8eb59db53SDr. David Alan Gilbert * 9eb59db53SDr. David Alan Gilbert * This work is licensed under the terms of the GNU GPL, version 2 or later. 10eb59db53SDr. David Alan Gilbert * See the COPYING file in the top-level directory. 11eb59db53SDr. David Alan Gilbert * 12eb59db53SDr. David Alan Gilbert */ 13eb59db53SDr. David Alan Gilbert 14eb59db53SDr. David Alan Gilbert /* 15eb59db53SDr. David Alan Gilbert * Postcopy is a migration technique where the execution flips from the 16eb59db53SDr. David Alan Gilbert * source to the destination before all the data has been copied. 17eb59db53SDr. David Alan Gilbert */ 18eb59db53SDr. David Alan Gilbert 19eb59db53SDr. David Alan Gilbert #include <glib.h> 20eb59db53SDr. David Alan Gilbert #include <stdio.h> 21eb59db53SDr. David Alan Gilbert #include <unistd.h> 22eb59db53SDr. David Alan Gilbert 23eb59db53SDr. David Alan Gilbert #include "qemu-common.h" 24eb59db53SDr. David Alan Gilbert #include "migration/migration.h" 25eb59db53SDr. David Alan Gilbert #include "migration/postcopy-ram.h" 26eb59db53SDr. David Alan Gilbert #include "sysemu/sysemu.h" 27eb59db53SDr. David Alan Gilbert #include "qemu/error-report.h" 28eb59db53SDr. David Alan Gilbert #include "trace.h" 29eb59db53SDr. David Alan Gilbert 30*e0b266f0SDr. David Alan Gilbert /* Arbitrary limit on size of each discard command, 31*e0b266f0SDr. David Alan Gilbert * keeps them around ~200 bytes 32*e0b266f0SDr. David Alan Gilbert */ 33*e0b266f0SDr. David Alan Gilbert #define MAX_DISCARDS_PER_COMMAND 12 34*e0b266f0SDr. David Alan Gilbert 35*e0b266f0SDr. David Alan Gilbert struct PostcopyDiscardState { 36*e0b266f0SDr. David Alan Gilbert const char *ramblock_name; 37*e0b266f0SDr. David Alan Gilbert uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */ 38*e0b266f0SDr. David Alan Gilbert uint16_t cur_entry; 39*e0b266f0SDr. David Alan Gilbert /* 40*e0b266f0SDr. David Alan Gilbert * Start and length of a discard range (bytes) 41*e0b266f0SDr. David Alan Gilbert */ 42*e0b266f0SDr. David Alan Gilbert uint64_t start_list[MAX_DISCARDS_PER_COMMAND]; 43*e0b266f0SDr. David Alan Gilbert uint64_t length_list[MAX_DISCARDS_PER_COMMAND]; 44*e0b266f0SDr. David Alan Gilbert unsigned int nsentwords; 45*e0b266f0SDr. David Alan Gilbert unsigned int nsentcmds; 46*e0b266f0SDr. David Alan Gilbert }; 47*e0b266f0SDr. David Alan Gilbert 48eb59db53SDr. David Alan Gilbert /* Postcopy needs to detect accesses to pages that haven't yet been copied 49eb59db53SDr. David Alan Gilbert * across, and efficiently map new pages in, the techniques for doing this 50eb59db53SDr. David Alan Gilbert * are target OS specific. 51eb59db53SDr. David Alan Gilbert */ 52eb59db53SDr. David Alan Gilbert #if defined(__linux__) 53eb59db53SDr. David Alan Gilbert 54eb59db53SDr. David Alan Gilbert #include <sys/mman.h> 55eb59db53SDr. David Alan Gilbert #include <sys/ioctl.h> 56eb59db53SDr. David Alan Gilbert #include <sys/syscall.h> 57eb59db53SDr. David Alan Gilbert #include <sys/types.h> 58eb59db53SDr. David Alan Gilbert #include <asm/types.h> /* for __u64 */ 59eb59db53SDr. David Alan Gilbert #endif 60eb59db53SDr. David Alan Gilbert 61eb59db53SDr. David Alan Gilbert #if defined(__linux__) && defined(__NR_userfaultfd) 62eb59db53SDr. David Alan Gilbert #include <linux/userfaultfd.h> 63eb59db53SDr. David Alan Gilbert 64eb59db53SDr. David Alan Gilbert static bool ufd_version_check(int ufd) 65eb59db53SDr. David Alan Gilbert { 66eb59db53SDr. David Alan Gilbert struct uffdio_api api_struct; 67eb59db53SDr. David Alan Gilbert uint64_t ioctl_mask; 68eb59db53SDr. David Alan Gilbert 69eb59db53SDr. David Alan Gilbert api_struct.api = UFFD_API; 70eb59db53SDr. David Alan Gilbert api_struct.features = 0; 71eb59db53SDr. David Alan Gilbert if (ioctl(ufd, UFFDIO_API, &api_struct)) { 72eb59db53SDr. David Alan Gilbert error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s", 73eb59db53SDr. David Alan Gilbert strerror(errno)); 74eb59db53SDr. David Alan Gilbert return false; 75eb59db53SDr. David Alan Gilbert } 76eb59db53SDr. David Alan Gilbert 77eb59db53SDr. David Alan Gilbert ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | 78eb59db53SDr. David Alan Gilbert (__u64)1 << _UFFDIO_UNREGISTER; 79eb59db53SDr. David Alan Gilbert if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) { 80eb59db53SDr. David Alan Gilbert error_report("Missing userfault features: %" PRIx64, 81eb59db53SDr. David Alan Gilbert (uint64_t)(~api_struct.ioctls & ioctl_mask)); 82eb59db53SDr. David Alan Gilbert return false; 83eb59db53SDr. David Alan Gilbert } 84eb59db53SDr. David Alan Gilbert 85eb59db53SDr. David Alan Gilbert return true; 86eb59db53SDr. David Alan Gilbert } 87eb59db53SDr. David Alan Gilbert 88eb59db53SDr. David Alan Gilbert bool postcopy_ram_supported_by_host(void) 89eb59db53SDr. David Alan Gilbert { 90eb59db53SDr. David Alan Gilbert long pagesize = getpagesize(); 91eb59db53SDr. David Alan Gilbert int ufd = -1; 92eb59db53SDr. David Alan Gilbert bool ret = false; /* Error unless we change it */ 93eb59db53SDr. David Alan Gilbert void *testarea = NULL; 94eb59db53SDr. David Alan Gilbert struct uffdio_register reg_struct; 95eb59db53SDr. David Alan Gilbert struct uffdio_range range_struct; 96eb59db53SDr. David Alan Gilbert uint64_t feature_mask; 97eb59db53SDr. David Alan Gilbert 98eb59db53SDr. David Alan Gilbert if ((1ul << qemu_target_page_bits()) > pagesize) { 99eb59db53SDr. David Alan Gilbert error_report("Target page size bigger than host page size"); 100eb59db53SDr. David Alan Gilbert goto out; 101eb59db53SDr. David Alan Gilbert } 102eb59db53SDr. David Alan Gilbert 103eb59db53SDr. David Alan Gilbert ufd = syscall(__NR_userfaultfd, O_CLOEXEC); 104eb59db53SDr. David Alan Gilbert if (ufd == -1) { 105eb59db53SDr. David Alan Gilbert error_report("%s: userfaultfd not available: %s", __func__, 106eb59db53SDr. David Alan Gilbert strerror(errno)); 107eb59db53SDr. David Alan Gilbert goto out; 108eb59db53SDr. David Alan Gilbert } 109eb59db53SDr. David Alan Gilbert 110eb59db53SDr. David Alan Gilbert /* Version and features check */ 111eb59db53SDr. David Alan Gilbert if (!ufd_version_check(ufd)) { 112eb59db53SDr. David Alan Gilbert goto out; 113eb59db53SDr. David Alan Gilbert } 114eb59db53SDr. David Alan Gilbert 115eb59db53SDr. David Alan Gilbert /* 116eb59db53SDr. David Alan Gilbert * We need to check that the ops we need are supported on anon memory 117eb59db53SDr. David Alan Gilbert * To do that we need to register a chunk and see the flags that 118eb59db53SDr. David Alan Gilbert * are returned. 119eb59db53SDr. David Alan Gilbert */ 120eb59db53SDr. David Alan Gilbert testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | 121eb59db53SDr. David Alan Gilbert MAP_ANONYMOUS, -1, 0); 122eb59db53SDr. David Alan Gilbert if (testarea == MAP_FAILED) { 123eb59db53SDr. David Alan Gilbert error_report("%s: Failed to map test area: %s", __func__, 124eb59db53SDr. David Alan Gilbert strerror(errno)); 125eb59db53SDr. David Alan Gilbert goto out; 126eb59db53SDr. David Alan Gilbert } 127eb59db53SDr. David Alan Gilbert g_assert(((size_t)testarea & (pagesize-1)) == 0); 128eb59db53SDr. David Alan Gilbert 129eb59db53SDr. David Alan Gilbert reg_struct.range.start = (uintptr_t)testarea; 130eb59db53SDr. David Alan Gilbert reg_struct.range.len = pagesize; 131eb59db53SDr. David Alan Gilbert reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; 132eb59db53SDr. David Alan Gilbert 133eb59db53SDr. David Alan Gilbert if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { 134eb59db53SDr. David Alan Gilbert error_report("%s userfault register: %s", __func__, strerror(errno)); 135eb59db53SDr. David Alan Gilbert goto out; 136eb59db53SDr. David Alan Gilbert } 137eb59db53SDr. David Alan Gilbert 138eb59db53SDr. David Alan Gilbert range_struct.start = (uintptr_t)testarea; 139eb59db53SDr. David Alan Gilbert range_struct.len = pagesize; 140eb59db53SDr. David Alan Gilbert if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { 141eb59db53SDr. David Alan Gilbert error_report("%s userfault unregister: %s", __func__, strerror(errno)); 142eb59db53SDr. David Alan Gilbert goto out; 143eb59db53SDr. David Alan Gilbert } 144eb59db53SDr. David Alan Gilbert 145eb59db53SDr. David Alan Gilbert feature_mask = (__u64)1 << _UFFDIO_WAKE | 146eb59db53SDr. David Alan Gilbert (__u64)1 << _UFFDIO_COPY | 147eb59db53SDr. David Alan Gilbert (__u64)1 << _UFFDIO_ZEROPAGE; 148eb59db53SDr. David Alan Gilbert if ((reg_struct.ioctls & feature_mask) != feature_mask) { 149eb59db53SDr. David Alan Gilbert error_report("Missing userfault map features: %" PRIx64, 150eb59db53SDr. David Alan Gilbert (uint64_t)(~reg_struct.ioctls & feature_mask)); 151eb59db53SDr. David Alan Gilbert goto out; 152eb59db53SDr. David Alan Gilbert } 153eb59db53SDr. David Alan Gilbert 154eb59db53SDr. David Alan Gilbert /* Success! */ 155eb59db53SDr. David Alan Gilbert ret = true; 156eb59db53SDr. David Alan Gilbert out: 157eb59db53SDr. David Alan Gilbert if (testarea) { 158eb59db53SDr. David Alan Gilbert munmap(testarea, pagesize); 159eb59db53SDr. David Alan Gilbert } 160eb59db53SDr. David Alan Gilbert if (ufd != -1) { 161eb59db53SDr. David Alan Gilbert close(ufd); 162eb59db53SDr. David Alan Gilbert } 163eb59db53SDr. David Alan Gilbert return ret; 164eb59db53SDr. David Alan Gilbert } 165eb59db53SDr. David Alan Gilbert 166*e0b266f0SDr. David Alan Gilbert /** 167*e0b266f0SDr. David Alan Gilbert * postcopy_ram_discard_range: Discard a range of memory. 168*e0b266f0SDr. David Alan Gilbert * We can assume that if we've been called postcopy_ram_hosttest returned true. 169*e0b266f0SDr. David Alan Gilbert * 170*e0b266f0SDr. David Alan Gilbert * @mis: Current incoming migration state. 171*e0b266f0SDr. David Alan Gilbert * @start, @length: range of memory to discard. 172*e0b266f0SDr. David Alan Gilbert * 173*e0b266f0SDr. David Alan Gilbert * returns: 0 on success. 174*e0b266f0SDr. David Alan Gilbert */ 175*e0b266f0SDr. David Alan Gilbert int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, 176*e0b266f0SDr. David Alan Gilbert size_t length) 177*e0b266f0SDr. David Alan Gilbert { 178*e0b266f0SDr. David Alan Gilbert trace_postcopy_ram_discard_range(start, length); 179*e0b266f0SDr. David Alan Gilbert if (madvise(start, length, MADV_DONTNEED)) { 180*e0b266f0SDr. David Alan Gilbert error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno)); 181*e0b266f0SDr. David Alan Gilbert return -1; 182*e0b266f0SDr. David Alan Gilbert } 183*e0b266f0SDr. David Alan Gilbert 184*e0b266f0SDr. David Alan Gilbert return 0; 185*e0b266f0SDr. David Alan Gilbert } 186*e0b266f0SDr. David Alan Gilbert 187eb59db53SDr. David Alan Gilbert #else 188eb59db53SDr. David Alan Gilbert /* No target OS support, stubs just fail */ 189eb59db53SDr. David Alan Gilbert bool postcopy_ram_supported_by_host(void) 190eb59db53SDr. David Alan Gilbert { 191eb59db53SDr. David Alan Gilbert error_report("%s: No OS support", __func__); 192eb59db53SDr. David Alan Gilbert return false; 193eb59db53SDr. David Alan Gilbert } 194eb59db53SDr. David Alan Gilbert 195*e0b266f0SDr. David Alan Gilbert int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, 196*e0b266f0SDr. David Alan Gilbert size_t length) 197*e0b266f0SDr. David Alan Gilbert { 198*e0b266f0SDr. David Alan Gilbert assert(0); 199*e0b266f0SDr. David Alan Gilbert } 200eb59db53SDr. David Alan Gilbert #endif 201eb59db53SDr. David Alan Gilbert 202*e0b266f0SDr. David Alan Gilbert /* ------------------------------------------------------------------------- */ 203*e0b266f0SDr. David Alan Gilbert 204*e0b266f0SDr. David Alan Gilbert /** 205*e0b266f0SDr. David Alan Gilbert * postcopy_discard_send_init: Called at the start of each RAMBlock before 206*e0b266f0SDr. David Alan Gilbert * asking to discard individual ranges. 207*e0b266f0SDr. David Alan Gilbert * 208*e0b266f0SDr. David Alan Gilbert * @ms: The current migration state. 209*e0b266f0SDr. David Alan Gilbert * @offset: the bitmap offset of the named RAMBlock in the migration 210*e0b266f0SDr. David Alan Gilbert * bitmap. 211*e0b266f0SDr. David Alan Gilbert * @name: RAMBlock that discards will operate on. 212*e0b266f0SDr. David Alan Gilbert * 213*e0b266f0SDr. David Alan Gilbert * returns: a new PDS. 214*e0b266f0SDr. David Alan Gilbert */ 215*e0b266f0SDr. David Alan Gilbert PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, 216*e0b266f0SDr. David Alan Gilbert unsigned long offset, 217*e0b266f0SDr. David Alan Gilbert const char *name) 218*e0b266f0SDr. David Alan Gilbert { 219*e0b266f0SDr. David Alan Gilbert PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState)); 220*e0b266f0SDr. David Alan Gilbert 221*e0b266f0SDr. David Alan Gilbert if (res) { 222*e0b266f0SDr. David Alan Gilbert res->ramblock_name = name; 223*e0b266f0SDr. David Alan Gilbert res->offset = offset; 224*e0b266f0SDr. David Alan Gilbert } 225*e0b266f0SDr. David Alan Gilbert 226*e0b266f0SDr. David Alan Gilbert return res; 227*e0b266f0SDr. David Alan Gilbert } 228*e0b266f0SDr. David Alan Gilbert 229*e0b266f0SDr. David Alan Gilbert /** 230*e0b266f0SDr. David Alan Gilbert * postcopy_discard_send_range: Called by the bitmap code for each chunk to 231*e0b266f0SDr. David Alan Gilbert * discard. May send a discard message, may just leave it queued to 232*e0b266f0SDr. David Alan Gilbert * be sent later. 233*e0b266f0SDr. David Alan Gilbert * 234*e0b266f0SDr. David Alan Gilbert * @ms: Current migration state. 235*e0b266f0SDr. David Alan Gilbert * @pds: Structure initialised by postcopy_discard_send_init(). 236*e0b266f0SDr. David Alan Gilbert * @start,@length: a range of pages in the migration bitmap in the 237*e0b266f0SDr. David Alan Gilbert * RAM block passed to postcopy_discard_send_init() (length=1 is one page) 238*e0b266f0SDr. David Alan Gilbert */ 239*e0b266f0SDr. David Alan Gilbert void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, 240*e0b266f0SDr. David Alan Gilbert unsigned long start, unsigned long length) 241*e0b266f0SDr. David Alan Gilbert { 242*e0b266f0SDr. David Alan Gilbert size_t tp_bits = qemu_target_page_bits(); 243*e0b266f0SDr. David Alan Gilbert /* Convert to byte offsets within the RAM block */ 244*e0b266f0SDr. David Alan Gilbert pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits; 245*e0b266f0SDr. David Alan Gilbert pds->length_list[pds->cur_entry] = length << tp_bits; 246*e0b266f0SDr. David Alan Gilbert trace_postcopy_discard_send_range(pds->ramblock_name, start, length); 247*e0b266f0SDr. David Alan Gilbert pds->cur_entry++; 248*e0b266f0SDr. David Alan Gilbert pds->nsentwords++; 249*e0b266f0SDr. David Alan Gilbert 250*e0b266f0SDr. David Alan Gilbert if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) { 251*e0b266f0SDr. David Alan Gilbert /* Full set, ship it! */ 252*e0b266f0SDr. David Alan Gilbert qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name, 253*e0b266f0SDr. David Alan Gilbert pds->cur_entry, 254*e0b266f0SDr. David Alan Gilbert pds->start_list, 255*e0b266f0SDr. David Alan Gilbert pds->length_list); 256*e0b266f0SDr. David Alan Gilbert pds->nsentcmds++; 257*e0b266f0SDr. David Alan Gilbert pds->cur_entry = 0; 258*e0b266f0SDr. David Alan Gilbert } 259*e0b266f0SDr. David Alan Gilbert } 260*e0b266f0SDr. David Alan Gilbert 261*e0b266f0SDr. David Alan Gilbert /** 262*e0b266f0SDr. David Alan Gilbert * postcopy_discard_send_finish: Called at the end of each RAMBlock by the 263*e0b266f0SDr. David Alan Gilbert * bitmap code. Sends any outstanding discard messages, frees the PDS 264*e0b266f0SDr. David Alan Gilbert * 265*e0b266f0SDr. David Alan Gilbert * @ms: Current migration state. 266*e0b266f0SDr. David Alan Gilbert * @pds: Structure initialised by postcopy_discard_send_init(). 267*e0b266f0SDr. David Alan Gilbert */ 268*e0b266f0SDr. David Alan Gilbert void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds) 269*e0b266f0SDr. David Alan Gilbert { 270*e0b266f0SDr. David Alan Gilbert /* Anything unsent? */ 271*e0b266f0SDr. David Alan Gilbert if (pds->cur_entry) { 272*e0b266f0SDr. David Alan Gilbert qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name, 273*e0b266f0SDr. David Alan Gilbert pds->cur_entry, 274*e0b266f0SDr. David Alan Gilbert pds->start_list, 275*e0b266f0SDr. David Alan Gilbert pds->length_list); 276*e0b266f0SDr. David Alan Gilbert pds->nsentcmds++; 277*e0b266f0SDr. David Alan Gilbert } 278*e0b266f0SDr. David Alan Gilbert 279*e0b266f0SDr. David Alan Gilbert trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords, 280*e0b266f0SDr. David Alan Gilbert pds->nsentcmds); 281*e0b266f0SDr. David Alan Gilbert 282*e0b266f0SDr. David Alan Gilbert g_free(pds); 283*e0b266f0SDr. David Alan Gilbert } 284