11f070489SIgor Mammedov /* 21f070489SIgor Mammedov * QEMU Host Memory Backend 31f070489SIgor Mammedov * 41f070489SIgor Mammedov * Copyright (C) 2013-2014 Red Hat Inc 51f070489SIgor Mammedov * 61f070489SIgor Mammedov * Authors: 71f070489SIgor Mammedov * Igor Mammedov <imammedo@redhat.com> 81f070489SIgor Mammedov * 91f070489SIgor Mammedov * This work is licensed under the terms of the GNU GPL, version 2 or later. 101f070489SIgor Mammedov * See the COPYING file in the top-level directory. 111f070489SIgor Mammedov */ 129af23989SMarkus Armbruster 139c058332SPeter Maydell #include "qemu/osdep.h" 141f070489SIgor Mammedov #include "sysemu/hostmem.h" 156b269967SEduardo Habkost #include "hw/boards.h" 16da34e65cSMarkus Armbruster #include "qapi/error.h" 17eb815e24SMarkus Armbruster #include "qapi/qapi-builtin-visit.h" 181f070489SIgor Mammedov #include "qapi/visitor.h" 191f070489SIgor Mammedov #include "qemu/config-file.h" 201f070489SIgor Mammedov #include "qom/object_interfaces.h" 212b108085SDavid Gibson #include "qemu/mmap-alloc.h" 22b85ea5faSPeter Maydell #include "qemu/madvise.h" 235d9a9a61SMichal Privoznik #include "qemu/cutils.h" 2404accf43SMark Kanda #include "hw/qdev-core.h" 251f070489SIgor Mammedov 264cf1b76bSHu Tao #ifdef CONFIG_NUMA 274cf1b76bSHu Tao #include <numaif.h> 286bb613f0SMichal Privoznik #include <numa.h> 294cf1b76bSHu Tao QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); 306bb613f0SMichal Privoznik /* 316bb613f0SMichal Privoznik * HOST_MEM_POLICY_PREFERRED may either translate to MPOL_PREFERRED or 326bb613f0SMichal Privoznik * MPOL_PREFERRED_MANY, see comments further below. 336bb613f0SMichal Privoznik */ 344cf1b76bSHu Tao QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); 354cf1b76bSHu Tao QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); 364cf1b76bSHu Tao QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); 374cf1b76bSHu Tao #endif 384cf1b76bSHu Tao 39fa0cb34dSMarc-André Lureau char * 40fa0cb34dSMarc-André Lureau host_memory_backend_get_name(HostMemoryBackend *backend) 41fa0cb34dSMarc-André Lureau { 42fa0cb34dSMarc-André Lureau if (!backend->use_canonical_path) { 437a309cc9SMarkus Armbruster return g_strdup(object_get_canonical_path_component(OBJECT(backend))); 44fa0cb34dSMarc-André Lureau } 45fa0cb34dSMarc-André Lureau 46fa0cb34dSMarc-André Lureau return object_get_canonical_path(OBJECT(backend)); 47fa0cb34dSMarc-André Lureau } 48fa0cb34dSMarc-André Lureau 491f070489SIgor Mammedov static void 50d7bce999SEric Blake host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, 51d7bce999SEric Blake void *opaque, Error **errp) 521f070489SIgor Mammedov { 531f070489SIgor Mammedov HostMemoryBackend *backend = MEMORY_BACKEND(obj); 541f070489SIgor Mammedov uint64_t value = backend->size; 551f070489SIgor Mammedov 5651e72bc1SEric Blake visit_type_size(v, name, &value, errp); 571f070489SIgor Mammedov } 581f070489SIgor Mammedov 591f070489SIgor Mammedov static void 60d7bce999SEric Blake host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, 61d7bce999SEric Blake void *opaque, Error **errp) 621f070489SIgor Mammedov { 631f070489SIgor Mammedov HostMemoryBackend *backend = MEMORY_BACKEND(obj); 641f070489SIgor Mammedov uint64_t value; 651f070489SIgor Mammedov 666f4c60e4SPeter Xu if (host_memory_backend_mr_inited(backend)) { 67dcfe4805SMarkus Armbruster error_setg(errp, "cannot change property %s of %s ", name, 68dcfe4805SMarkus Armbruster object_get_typename(obj)); 69dcfe4805SMarkus Armbruster return; 701f070489SIgor Mammedov } 711f070489SIgor Mammedov 72668f62ecSMarkus Armbruster if (!visit_type_size(v, name, &value, errp)) { 73dcfe4805SMarkus Armbruster return; 741f070489SIgor Mammedov } 751f070489SIgor Mammedov if (!value) { 76dcfe4805SMarkus Armbruster error_setg(errp, 7721d16836SZhang Yi "property '%s' of %s doesn't take value '%" PRIu64 "'", 7821d16836SZhang Yi name, object_get_typename(obj), value); 79dcfe4805SMarkus Armbruster return; 801f070489SIgor Mammedov } 811f070489SIgor Mammedov backend->size = value; 821f070489SIgor Mammedov } 831f070489SIgor Mammedov 844cf1b76bSHu Tao static void 85d7bce999SEric Blake host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, 86d7bce999SEric Blake void *opaque, Error **errp) 874cf1b76bSHu Tao { 884cf1b76bSHu Tao HostMemoryBackend *backend = MEMORY_BACKEND(obj); 894cf1b76bSHu Tao uint16List *host_nodes = NULL; 90c3033fd3SEric Blake uint16List **tail = &host_nodes; 914cf1b76bSHu Tao unsigned long value; 924cf1b76bSHu Tao 934cf1b76bSHu Tao value = find_first_bit(backend->host_nodes, MAX_NODES); 941454d33fSXiao Guangrong if (value == MAX_NODES) { 9515160ab7SIgor Mammedov goto ret; 961454d33fSXiao Guangrong } 974cf1b76bSHu Tao 98c3033fd3SEric Blake QAPI_LIST_APPEND(tail, value); 99658ae5a7SMarkus Armbruster 1004cf1b76bSHu Tao do { 1014cf1b76bSHu Tao value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); 1024cf1b76bSHu Tao if (value == MAX_NODES) { 1034cf1b76bSHu Tao break; 1044cf1b76bSHu Tao } 1054cf1b76bSHu Tao 106c3033fd3SEric Blake QAPI_LIST_APPEND(tail, value); 1074cf1b76bSHu Tao } while (true); 1084cf1b76bSHu Tao 10915160ab7SIgor Mammedov ret: 11051e72bc1SEric Blake visit_type_uint16List(v, name, &host_nodes, errp); 111bdd5ce05SKeqian Zhu qapi_free_uint16List(host_nodes); 1124cf1b76bSHu Tao } 1134cf1b76bSHu Tao 1144cf1b76bSHu Tao static void 115d7bce999SEric Blake host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, 116d7bce999SEric Blake void *opaque, Error **errp) 1174cf1b76bSHu Tao { 1184cf1b76bSHu Tao #ifdef CONFIG_NUMA 1194cf1b76bSHu Tao HostMemoryBackend *backend = MEMORY_BACKEND(obj); 120ffa144b3SEduardo Habkost uint16List *l, *host_nodes = NULL; 1214cf1b76bSHu Tao 122ffa144b3SEduardo Habkost visit_type_uint16List(v, name, &host_nodes, errp); 1234cf1b76bSHu Tao 124ffa144b3SEduardo Habkost for (l = host_nodes; l; l = l->next) { 125ffa144b3SEduardo Habkost if (l->value >= MAX_NODES) { 126ffa144b3SEduardo Habkost error_setg(errp, "Invalid host-nodes value: %d", l->value); 127ffa144b3SEduardo Habkost goto out; 1284cf1b76bSHu Tao } 129ffa144b3SEduardo Habkost } 130ffa144b3SEduardo Habkost 131ffa144b3SEduardo Habkost for (l = host_nodes; l; l = l->next) { 132ffa144b3SEduardo Habkost bitmap_set(backend->host_nodes, l->value, 1); 133ffa144b3SEduardo Habkost } 134ffa144b3SEduardo Habkost 135ffa144b3SEduardo Habkost out: 136ffa144b3SEduardo Habkost qapi_free_uint16List(host_nodes); 1374cf1b76bSHu Tao #else 1384cf1b76bSHu Tao error_setg(errp, "NUMA node binding are not supported by this QEMU"); 1394cf1b76bSHu Tao #endif 1404cf1b76bSHu Tao } 1414cf1b76bSHu Tao 142a3590dacSDaniel P. Berrange static int 143a3590dacSDaniel P. Berrange host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) 1444cf1b76bSHu Tao { 1454cf1b76bSHu Tao HostMemoryBackend *backend = MEMORY_BACKEND(obj); 146a3590dacSDaniel P. Berrange return backend->policy; 1474cf1b76bSHu Tao } 1484cf1b76bSHu Tao 1494cf1b76bSHu Tao static void 150a3590dacSDaniel P. Berrange host_memory_backend_set_policy(Object *obj, int policy, Error **errp) 1514cf1b76bSHu Tao { 1524cf1b76bSHu Tao HostMemoryBackend *backend = MEMORY_BACKEND(obj); 1534cf1b76bSHu Tao backend->policy = policy; 1544cf1b76bSHu Tao 1554cf1b76bSHu Tao #ifndef CONFIG_NUMA 1564cf1b76bSHu Tao if (policy != HOST_MEM_POLICY_DEFAULT) { 1574cf1b76bSHu Tao error_setg(errp, "NUMA policies are not supported by this QEMU"); 1584cf1b76bSHu Tao } 1594cf1b76bSHu Tao #endif 1604cf1b76bSHu Tao } 1614cf1b76bSHu Tao 162605d0a94SPaolo Bonzini static bool host_memory_backend_get_merge(Object *obj, Error **errp) 163605d0a94SPaolo Bonzini { 164605d0a94SPaolo Bonzini HostMemoryBackend *backend = MEMORY_BACKEND(obj); 165605d0a94SPaolo Bonzini 166605d0a94SPaolo Bonzini return backend->merge; 167605d0a94SPaolo Bonzini } 168605d0a94SPaolo Bonzini 169605d0a94SPaolo Bonzini static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) 170605d0a94SPaolo Bonzini { 171605d0a94SPaolo Bonzini HostMemoryBackend *backend = MEMORY_BACKEND(obj); 172605d0a94SPaolo Bonzini 173*a2b6a965SPaolo Bonzini if (QEMU_MADV_MERGEABLE == QEMU_MADV_INVALID) { 174*a2b6a965SPaolo Bonzini if (value) { 175*a2b6a965SPaolo Bonzini error_setg(errp, "Memory merging is not supported on this host"); 176*a2b6a965SPaolo Bonzini } 177*a2b6a965SPaolo Bonzini assert(!backend->merge); 178*a2b6a965SPaolo Bonzini return; 179*a2b6a965SPaolo Bonzini } 180*a2b6a965SPaolo Bonzini 1816f4c60e4SPeter Xu if (!host_memory_backend_mr_inited(backend)) { 182605d0a94SPaolo Bonzini backend->merge = value; 183605d0a94SPaolo Bonzini return; 184605d0a94SPaolo Bonzini } 185605d0a94SPaolo Bonzini 186605d0a94SPaolo Bonzini if (value != backend->merge) { 187605d0a94SPaolo Bonzini void *ptr = memory_region_get_ram_ptr(&backend->mr); 188605d0a94SPaolo Bonzini uint64_t sz = memory_region_size(&backend->mr); 189605d0a94SPaolo Bonzini 190605d0a94SPaolo Bonzini qemu_madvise(ptr, sz, 191605d0a94SPaolo Bonzini value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); 192605d0a94SPaolo Bonzini backend->merge = value; 193605d0a94SPaolo Bonzini } 194605d0a94SPaolo Bonzini } 195605d0a94SPaolo Bonzini 196605d0a94SPaolo Bonzini static bool host_memory_backend_get_dump(Object *obj, Error **errp) 197605d0a94SPaolo Bonzini { 198605d0a94SPaolo Bonzini HostMemoryBackend *backend = MEMORY_BACKEND(obj); 199605d0a94SPaolo Bonzini 200605d0a94SPaolo Bonzini return backend->dump; 201605d0a94SPaolo Bonzini } 202605d0a94SPaolo Bonzini 203605d0a94SPaolo Bonzini static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) 204605d0a94SPaolo Bonzini { 205605d0a94SPaolo Bonzini HostMemoryBackend *backend = MEMORY_BACKEND(obj); 206605d0a94SPaolo Bonzini 207*a2b6a965SPaolo Bonzini if (QEMU_MADV_DONTDUMP == QEMU_MADV_INVALID) { 208*a2b6a965SPaolo Bonzini if (!value) { 209*a2b6a965SPaolo Bonzini error_setg(errp, "Dumping guest memory cannot be disabled on this host"); 210*a2b6a965SPaolo Bonzini } 211*a2b6a965SPaolo Bonzini assert(backend->dump); 212*a2b6a965SPaolo Bonzini return; 213*a2b6a965SPaolo Bonzini } 214*a2b6a965SPaolo Bonzini 2156f4c60e4SPeter Xu if (!host_memory_backend_mr_inited(backend)) { 216605d0a94SPaolo Bonzini backend->dump = value; 217605d0a94SPaolo Bonzini return; 218605d0a94SPaolo Bonzini } 219605d0a94SPaolo Bonzini 220605d0a94SPaolo Bonzini if (value != backend->dump) { 221605d0a94SPaolo Bonzini void *ptr = memory_region_get_ram_ptr(&backend->mr); 222605d0a94SPaolo Bonzini uint64_t sz = memory_region_size(&backend->mr); 223605d0a94SPaolo Bonzini 224605d0a94SPaolo Bonzini qemu_madvise(ptr, sz, 225605d0a94SPaolo Bonzini value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); 226605d0a94SPaolo Bonzini backend->dump = value; 227605d0a94SPaolo Bonzini } 228605d0a94SPaolo Bonzini } 229605d0a94SPaolo Bonzini 230a35ba7beSPaolo Bonzini static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) 231a35ba7beSPaolo Bonzini { 232a35ba7beSPaolo Bonzini HostMemoryBackend *backend = MEMORY_BACKEND(obj); 233a35ba7beSPaolo Bonzini 2344ebc74dbSIgor Mammedov return backend->prealloc; 235a35ba7beSPaolo Bonzini } 236a35ba7beSPaolo Bonzini 237a35ba7beSPaolo Bonzini static void host_memory_backend_set_prealloc(Object *obj, bool value, 238a35ba7beSPaolo Bonzini Error **errp) 239a35ba7beSPaolo Bonzini { 240a35ba7beSPaolo Bonzini HostMemoryBackend *backend = MEMORY_BACKEND(obj); 241a35ba7beSPaolo Bonzini 2429181fb70SDavid Hildenbrand if (!backend->reserve && value) { 2439181fb70SDavid Hildenbrand error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible"); 2449181fb70SDavid Hildenbrand return; 2459181fb70SDavid Hildenbrand } 2469181fb70SDavid Hildenbrand 2476f4c60e4SPeter Xu if (!host_memory_backend_mr_inited(backend)) { 248a35ba7beSPaolo Bonzini backend->prealloc = value; 249a35ba7beSPaolo Bonzini return; 250a35ba7beSPaolo Bonzini } 251a35ba7beSPaolo Bonzini 252a35ba7beSPaolo Bonzini if (value && !backend->prealloc) { 253a35ba7beSPaolo Bonzini int fd = memory_region_get_fd(&backend->mr); 254a35ba7beSPaolo Bonzini void *ptr = memory_region_get_ram_ptr(&backend->mr); 255a35ba7beSPaolo Bonzini uint64_t sz = memory_region_size(&backend->mr); 256a35ba7beSPaolo Bonzini 2579c878ad6SPhilippe Mathieu-Daudé if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads, 25804accf43SMark Kanda backend->prealloc_context, false, errp)) { 259056b68afSIgor Mammedov return; 260056b68afSIgor Mammedov } 261a35ba7beSPaolo Bonzini backend->prealloc = true; 262a35ba7beSPaolo Bonzini } 263a35ba7beSPaolo Bonzini } 264a35ba7beSPaolo Bonzini 265ffac16faSIgor Mammedov static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v, 266ffac16faSIgor Mammedov const char *name, void *opaque, Error **errp) 267ffac16faSIgor Mammedov { 268ffac16faSIgor Mammedov HostMemoryBackend *backend = MEMORY_BACKEND(obj); 269ffac16faSIgor Mammedov visit_type_uint32(v, name, &backend->prealloc_threads, errp); 270ffac16faSIgor Mammedov } 271ffac16faSIgor Mammedov 272ffac16faSIgor Mammedov static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v, 273ffac16faSIgor Mammedov const char *name, void *opaque, Error **errp) 274ffac16faSIgor Mammedov { 275ffac16faSIgor Mammedov HostMemoryBackend *backend = MEMORY_BACKEND(obj); 276ffac16faSIgor Mammedov uint32_t value; 277ffac16faSIgor Mammedov 278668f62ecSMarkus Armbruster if (!visit_type_uint32(v, name, &value, errp)) { 279dcfe4805SMarkus Armbruster return; 280ffac16faSIgor Mammedov } 281ffac16faSIgor Mammedov if (value <= 0) { 282dcfe4805SMarkus Armbruster error_setg(errp, "property '%s' of %s doesn't take value '%d'", name, 283dcfe4805SMarkus Armbruster object_get_typename(obj), value); 284dcfe4805SMarkus Armbruster return; 285ffac16faSIgor Mammedov } 286ffac16faSIgor Mammedov backend->prealloc_threads = value; 287ffac16faSIgor Mammedov } 288ffac16faSIgor Mammedov 28958f4662cSHu Tao static void host_memory_backend_init(Object *obj) 2901f070489SIgor Mammedov { 291605d0a94SPaolo Bonzini HostMemoryBackend *backend = MEMORY_BACKEND(obj); 2926b269967SEduardo Habkost MachineState *machine = MACHINE(qdev_get_machine()); 293605d0a94SPaolo Bonzini 294ffac16faSIgor Mammedov /* TODO: convert access to globals to compat properties */ 2956b269967SEduardo Habkost backend->merge = machine_mem_merge(machine); 2966b269967SEduardo Habkost backend->dump = machine_dump_guest_core(machine); 29737662d85SXiaoyao Li backend->guest_memfd = machine_require_guest_memfd(machine); 2989181fb70SDavid Hildenbrand backend->reserve = true; 299f8d426a6SJaroslav Jindrak backend->prealloc_threads = machine->smp.cpus; 3001f070489SIgor Mammedov } 3011f070489SIgor Mammedov 302fa0cb34dSMarc-André Lureau static void host_memory_backend_post_init(Object *obj) 303fa0cb34dSMarc-André Lureau { 304fa0cb34dSMarc-André Lureau object_apply_compat_props(obj); 305fa0cb34dSMarc-André Lureau } 306fa0cb34dSMarc-André Lureau 3074728b574SPeter Xu bool host_memory_backend_mr_inited(HostMemoryBackend *backend) 3084728b574SPeter Xu { 3094728b574SPeter Xu /* 3104728b574SPeter Xu * NOTE: We forbid zero-length memory backend, so here zero means 3114728b574SPeter Xu * "we haven't inited the backend memory region yet". 3124728b574SPeter Xu */ 3134728b574SPeter Xu return memory_region_size(&backend->mr) != 0; 3144728b574SPeter Xu } 3154728b574SPeter Xu 3167943e97bSDavid Hildenbrand MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend) 3171f070489SIgor Mammedov { 3186f4c60e4SPeter Xu return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; 3191f070489SIgor Mammedov } 3201f070489SIgor Mammedov 3212aece63cSXiao Guangrong void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) 3222aece63cSXiao Guangrong { 3232aece63cSXiao Guangrong backend->is_mapped = mapped; 3242aece63cSXiao Guangrong } 3252aece63cSXiao Guangrong 3262aece63cSXiao Guangrong bool host_memory_backend_is_mapped(HostMemoryBackend *backend) 3272aece63cSXiao Guangrong { 3282aece63cSXiao Guangrong return backend->is_mapped; 3292aece63cSXiao Guangrong } 3302aece63cSXiao Guangrong 3312b108085SDavid Gibson size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 3322b108085SDavid Gibson { 3338be934b7SThomas Huth size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block); 3348be934b7SThomas Huth g_assert(pagesize >= qemu_real_host_page_size()); 3352b108085SDavid Gibson return pagesize; 3362b108085SDavid Gibson } 3372b108085SDavid Gibson 338bd9262d9SHu Tao static void 339bd9262d9SHu Tao host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) 340bd9262d9SHu Tao { 341bd9262d9SHu Tao HostMemoryBackend *backend = MEMORY_BACKEND(uc); 342bd9262d9SHu Tao HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); 343605d0a94SPaolo Bonzini void *ptr; 344605d0a94SPaolo Bonzini uint64_t sz; 3455d9a9a61SMichal Privoznik size_t pagesize; 34604accf43SMark Kanda bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED); 347bd9262d9SHu Tao 348e199f7adSPhilippe Mathieu-Daudé if (!bc->alloc) { 349e199f7adSPhilippe Mathieu-Daudé return; 350e199f7adSPhilippe Mathieu-Daudé } 351fdb63cf3SPhilippe Mathieu-Daudé if (!bc->alloc(backend, errp)) { 352fdb63cf3SPhilippe Mathieu-Daudé return; 353605d0a94SPaolo Bonzini } 354605d0a94SPaolo Bonzini 355605d0a94SPaolo Bonzini ptr = memory_region_get_ram_ptr(&backend->mr); 356605d0a94SPaolo Bonzini sz = memory_region_size(&backend->mr); 3575d9a9a61SMichal Privoznik pagesize = qemu_ram_pagesize(backend->mr.ram_block); 3585d9a9a61SMichal Privoznik 3595d9a9a61SMichal Privoznik if (backend->aligned && !QEMU_IS_ALIGNED(sz, pagesize)) { 3605d9a9a61SMichal Privoznik g_autofree char *pagesize_str = size_to_str(pagesize); 3615d9a9a61SMichal Privoznik error_setg(errp, "backend '%s' memory size must be multiple of %s", 3625d9a9a61SMichal Privoznik object_get_typename(OBJECT(uc)), pagesize_str); 3635d9a9a61SMichal Privoznik return; 3645d9a9a61SMichal Privoznik } 365605d0a94SPaolo Bonzini 366605d0a94SPaolo Bonzini if (backend->merge) { 367605d0a94SPaolo Bonzini qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); 368605d0a94SPaolo Bonzini } 369605d0a94SPaolo Bonzini if (!backend->dump) { 370605d0a94SPaolo Bonzini qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); 371605d0a94SPaolo Bonzini } 3724cf1b76bSHu Tao #ifdef CONFIG_NUMA 3734cf1b76bSHu Tao unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); 3744cf1b76bSHu Tao /* lastbit == MAX_NODES means maxnode = 0 */ 3754cf1b76bSHu Tao unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); 37693e08863SPhilippe Mathieu-Daudé /* 37793e08863SPhilippe Mathieu-Daudé * Ensure policy won't be ignored in case memory is preallocated 3784cf1b76bSHu Tao * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so 37993e08863SPhilippe Mathieu-Daudé * this doesn't catch hugepage case. 38093e08863SPhilippe Mathieu-Daudé */ 381288d3322SMichael S. Tsirkin unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; 3826bb613f0SMichal Privoznik int mode = backend->policy; 3834cf1b76bSHu Tao 3844cf1b76bSHu Tao /* check for invalid host-nodes and policies and give more verbose 3854cf1b76bSHu Tao * error messages than mbind(). */ 3864cf1b76bSHu Tao if (maxnode && backend->policy == MPOL_DEFAULT) { 3874cf1b76bSHu Tao error_setg(errp, "host-nodes must be empty for policy default," 3884cf1b76bSHu Tao " or you should explicitly specify a policy other" 3894cf1b76bSHu Tao " than default"); 3904cf1b76bSHu Tao return; 3914cf1b76bSHu Tao } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { 3924cf1b76bSHu Tao error_setg(errp, "host-nodes must be set for policy %s", 393977c736fSMarkus Armbruster HostMemPolicy_str(backend->policy)); 3944cf1b76bSHu Tao return; 3954cf1b76bSHu Tao } 3964cf1b76bSHu Tao 39793e08863SPhilippe Mathieu-Daudé /* 39893e08863SPhilippe Mathieu-Daudé * We can have up to MAX_NODES nodes, but we need to pass maxnode+1 3994cf1b76bSHu Tao * as argument to mbind() due to an old Linux bug (feature?) which 4004cf1b76bSHu Tao * cuts off the last specified node. This means backend->host_nodes 4014cf1b76bSHu Tao * must have MAX_NODES+1 bits available. 4024cf1b76bSHu Tao */ 4034cf1b76bSHu Tao assert(sizeof(backend->host_nodes) >= 4044cf1b76bSHu Tao BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); 4054cf1b76bSHu Tao assert(maxnode <= MAX_NODES); 40670b6d525SIgor Mammedov 4076bb613f0SMichal Privoznik #ifdef HAVE_NUMA_HAS_PREFERRED_MANY 4086bb613f0SMichal Privoznik if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) { 4096bb613f0SMichal Privoznik /* 4106bb613f0SMichal Privoznik * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below 4116bb613f0SMichal Privoznik * silently picks the first node. 4126bb613f0SMichal Privoznik */ 4136bb613f0SMichal Privoznik mode = MPOL_PREFERRED_MANY; 4146bb613f0SMichal Privoznik } 4156bb613f0SMichal Privoznik #endif 4166bb613f0SMichal Privoznik 41770b6d525SIgor Mammedov if (maxnode && 4186bb613f0SMichal Privoznik mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) { 419a3567ba1SPavel Fedin if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { 4204cf1b76bSHu Tao error_setg_errno(errp, errno, 4214cf1b76bSHu Tao "cannot bind memory to host NUMA nodes"); 4224cf1b76bSHu Tao return; 4234cf1b76bSHu Tao } 424a3567ba1SPavel Fedin } 4254cf1b76bSHu Tao #endif 42693e08863SPhilippe Mathieu-Daudé /* 42793e08863SPhilippe Mathieu-Daudé * Preallocate memory after the NUMA policy has been instantiated. 4284cf1b76bSHu Tao * This is necessary to guarantee memory is allocated with 4294cf1b76bSHu Tao * specified NUMA policy in place. 4304cf1b76bSHu Tao */ 4319c878ad6SPhilippe Mathieu-Daudé if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr), 4329c878ad6SPhilippe Mathieu-Daudé ptr, sz, 433e6816458SDavid Hildenbrand backend->prealloc_threads, 43404accf43SMark Kanda backend->prealloc_context, 43504accf43SMark Kanda async, errp)) { 4363961613aSPhilippe Mathieu-Daudé return; 4373961613aSPhilippe Mathieu-Daudé } 4383961613aSPhilippe Mathieu-Daudé } 439bd9262d9SHu Tao 44036bce5caSLin Ma static bool 4413beacfb9SEduardo Habkost host_memory_backend_can_be_deleted(UserCreatable *uc) 44236bce5caSLin Ma { 4432aece63cSXiao Guangrong if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { 44436bce5caSLin Ma return false; 44536bce5caSLin Ma } else { 44636bce5caSLin Ma return true; 44736bce5caSLin Ma } 44836bce5caSLin Ma } 44936bce5caSLin Ma 45006329cceSMarcel Apfelbaum static bool host_memory_backend_get_share(Object *o, Error **errp) 45106329cceSMarcel Apfelbaum { 45206329cceSMarcel Apfelbaum HostMemoryBackend *backend = MEMORY_BACKEND(o); 45306329cceSMarcel Apfelbaum 45406329cceSMarcel Apfelbaum return backend->share; 45506329cceSMarcel Apfelbaum } 45606329cceSMarcel Apfelbaum 45706329cceSMarcel Apfelbaum static void host_memory_backend_set_share(Object *o, bool value, Error **errp) 45806329cceSMarcel Apfelbaum { 45906329cceSMarcel Apfelbaum HostMemoryBackend *backend = MEMORY_BACKEND(o); 46006329cceSMarcel Apfelbaum 46106329cceSMarcel Apfelbaum if (host_memory_backend_mr_inited(backend)) { 46206329cceSMarcel Apfelbaum error_setg(errp, "cannot change property value"); 46306329cceSMarcel Apfelbaum return; 46406329cceSMarcel Apfelbaum } 46506329cceSMarcel Apfelbaum backend->share = value; 46606329cceSMarcel Apfelbaum } 46706329cceSMarcel Apfelbaum 4689181fb70SDavid Hildenbrand #ifdef CONFIG_LINUX 4699181fb70SDavid Hildenbrand static bool host_memory_backend_get_reserve(Object *o, Error **errp) 4709181fb70SDavid Hildenbrand { 4719181fb70SDavid Hildenbrand HostMemoryBackend *backend = MEMORY_BACKEND(o); 4729181fb70SDavid Hildenbrand 4739181fb70SDavid Hildenbrand return backend->reserve; 4749181fb70SDavid Hildenbrand } 4759181fb70SDavid Hildenbrand 4769181fb70SDavid Hildenbrand static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp) 4779181fb70SDavid Hildenbrand { 4789181fb70SDavid Hildenbrand HostMemoryBackend *backend = MEMORY_BACKEND(o); 4799181fb70SDavid Hildenbrand 4809181fb70SDavid Hildenbrand if (host_memory_backend_mr_inited(backend)) { 4819181fb70SDavid Hildenbrand error_setg(errp, "cannot change property value"); 4829181fb70SDavid Hildenbrand return; 4839181fb70SDavid Hildenbrand } 4849181fb70SDavid Hildenbrand if (backend->prealloc && !value) { 4859181fb70SDavid Hildenbrand error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible"); 4869181fb70SDavid Hildenbrand return; 4879181fb70SDavid Hildenbrand } 4889181fb70SDavid Hildenbrand backend->reserve = value; 4899181fb70SDavid Hildenbrand } 4909181fb70SDavid Hildenbrand #endif /* CONFIG_LINUX */ 4919181fb70SDavid Hildenbrand 492fa0cb34dSMarc-André Lureau static bool 493fa0cb34dSMarc-André Lureau host_memory_backend_get_use_canonical_path(Object *obj, Error **errp) 494fa0cb34dSMarc-André Lureau { 495fa0cb34dSMarc-André Lureau HostMemoryBackend *backend = MEMORY_BACKEND(obj); 496fa0cb34dSMarc-André Lureau 497fa0cb34dSMarc-André Lureau return backend->use_canonical_path; 498fa0cb34dSMarc-André Lureau } 499fa0cb34dSMarc-André Lureau 500fa0cb34dSMarc-André Lureau static void 501fa0cb34dSMarc-André Lureau host_memory_backend_set_use_canonical_path(Object *obj, bool value, 502fa0cb34dSMarc-André Lureau Error **errp) 503fa0cb34dSMarc-André Lureau { 504fa0cb34dSMarc-André Lureau HostMemoryBackend *backend = MEMORY_BACKEND(obj); 505fa0cb34dSMarc-André Lureau 506fa0cb34dSMarc-André Lureau backend->use_canonical_path = value; 507fa0cb34dSMarc-André Lureau } 508fa0cb34dSMarc-André Lureau 509bd9262d9SHu Tao static void 510bd9262d9SHu Tao host_memory_backend_class_init(ObjectClass *oc, void *data) 511bd9262d9SHu Tao { 512bd9262d9SHu Tao UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 513bd9262d9SHu Tao 514bd9262d9SHu Tao ucc->complete = host_memory_backend_memory_complete; 51536bce5caSLin Ma ucc->can_be_deleted = host_memory_backend_can_be_deleted; 516e62834caSEduardo Habkost 517e62834caSEduardo Habkost object_class_property_add_bool(oc, "merge", 518e62834caSEduardo Habkost host_memory_backend_get_merge, 519d2623129SMarkus Armbruster host_memory_backend_set_merge); 520033bfc5eSMarc-André Lureau object_class_property_set_description(oc, "merge", 5217eecec7dSMarkus Armbruster "Mark memory as mergeable"); 522e62834caSEduardo Habkost object_class_property_add_bool(oc, "dump", 523e62834caSEduardo Habkost host_memory_backend_get_dump, 524d2623129SMarkus Armbruster host_memory_backend_set_dump); 525033bfc5eSMarc-André Lureau object_class_property_set_description(oc, "dump", 5267eecec7dSMarkus Armbruster "Set to 'off' to exclude from core dump"); 527e62834caSEduardo Habkost object_class_property_add_bool(oc, "prealloc", 528e62834caSEduardo Habkost host_memory_backend_get_prealloc, 529d2623129SMarkus Armbruster host_memory_backend_set_prealloc); 530033bfc5eSMarc-André Lureau object_class_property_set_description(oc, "prealloc", 5317eecec7dSMarkus Armbruster "Preallocate memory"); 532ffac16faSIgor Mammedov object_class_property_add(oc, "prealloc-threads", "int", 533ffac16faSIgor Mammedov host_memory_backend_get_prealloc_threads, 534ffac16faSIgor Mammedov host_memory_backend_set_prealloc_threads, 535d2623129SMarkus Armbruster NULL, NULL); 536ffac16faSIgor Mammedov object_class_property_set_description(oc, "prealloc-threads", 5377eecec7dSMarkus Armbruster "Number of CPU threads to use for prealloc"); 538e6816458SDavid Hildenbrand object_class_property_add_link(oc, "prealloc-context", 539e6816458SDavid Hildenbrand TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context), 540e6816458SDavid Hildenbrand object_property_allow_set_link, OBJ_PROP_LINK_STRONG); 541e6816458SDavid Hildenbrand object_class_property_set_description(oc, "prealloc-context", 542e6816458SDavid Hildenbrand "Context to use for creating CPU threads for preallocation"); 543e62834caSEduardo Habkost object_class_property_add(oc, "size", "int", 544e62834caSEduardo Habkost host_memory_backend_get_size, 545e62834caSEduardo Habkost host_memory_backend_set_size, 546d2623129SMarkus Armbruster NULL, NULL); 547033bfc5eSMarc-André Lureau object_class_property_set_description(oc, "size", 5487eecec7dSMarkus Armbruster "Size of the memory region (ex: 500M)"); 549e62834caSEduardo Habkost object_class_property_add(oc, "host-nodes", "int", 550e62834caSEduardo Habkost host_memory_backend_get_host_nodes, 551e62834caSEduardo Habkost host_memory_backend_set_host_nodes, 552d2623129SMarkus Armbruster NULL, NULL); 553033bfc5eSMarc-André Lureau object_class_property_set_description(oc, "host-nodes", 5547eecec7dSMarkus Armbruster "Binds memory to the list of NUMA host nodes"); 555e62834caSEduardo Habkost object_class_property_add_enum(oc, "policy", "HostMemPolicy", 556f7abe0ecSMarc-André Lureau &HostMemPolicy_lookup, 557e62834caSEduardo Habkost host_memory_backend_get_policy, 558d2623129SMarkus Armbruster host_memory_backend_set_policy); 559033bfc5eSMarc-André Lureau object_class_property_set_description(oc, "policy", 5607eecec7dSMarkus Armbruster "Set the NUMA policy"); 56106329cceSMarcel Apfelbaum object_class_property_add_bool(oc, "share", 562d2623129SMarkus Armbruster host_memory_backend_get_share, host_memory_backend_set_share); 563033bfc5eSMarc-André Lureau object_class_property_set_description(oc, "share", 5647eecec7dSMarkus Armbruster "Mark the memory as private to QEMU or shared"); 5659181fb70SDavid Hildenbrand #ifdef CONFIG_LINUX 5669181fb70SDavid Hildenbrand object_class_property_add_bool(oc, "reserve", 5679181fb70SDavid Hildenbrand host_memory_backend_get_reserve, host_memory_backend_set_reserve); 5689181fb70SDavid Hildenbrand object_class_property_set_description(oc, "reserve", 5699181fb70SDavid Hildenbrand "Reserve swap space (or huge pages) if applicable"); 5709181fb70SDavid Hildenbrand #endif /* CONFIG_LINUX */ 5718db0b204SIgor Mammedov /* 5728db0b204SIgor Mammedov * Do not delete/rename option. This option must be considered stable 5738db0b204SIgor Mammedov * (as if it didn't have the 'x-' prefix including deprecation period) as 5748db0b204SIgor Mammedov * long as 4.0 and older machine types exists. 5758db0b204SIgor Mammedov * Option will be used by upper layers to override (disable) canonical path 5768db0b204SIgor Mammedov * for ramblock-id set by compat properties on old machine types ( <= 4.0), 5778db0b204SIgor Mammedov * to keep migration working when backend is used for main RAM with 5788db0b204SIgor Mammedov * -machine memory-backend= option (main RAM historically used prefix-less 5798db0b204SIgor Mammedov * ramblock-id). 5808db0b204SIgor Mammedov */ 581fa0cb34dSMarc-André Lureau object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id", 582fa0cb34dSMarc-André Lureau host_memory_backend_get_use_canonical_path, 583d2623129SMarkus Armbruster host_memory_backend_set_use_canonical_path); 584e1ff3c67SIgor Mammedov } 585e1ff3c67SIgor Mammedov 58658f4662cSHu Tao static const TypeInfo host_memory_backend_info = { 5871f070489SIgor Mammedov .name = TYPE_MEMORY_BACKEND, 5881f070489SIgor Mammedov .parent = TYPE_OBJECT, 5891f070489SIgor Mammedov .abstract = true, 5901f070489SIgor Mammedov .class_size = sizeof(HostMemoryBackendClass), 591bd9262d9SHu Tao .class_init = host_memory_backend_class_init, 5921f070489SIgor Mammedov .instance_size = sizeof(HostMemoryBackend), 59358f4662cSHu Tao .instance_init = host_memory_backend_init, 594fa0cb34dSMarc-André Lureau .instance_post_init = host_memory_backend_post_init, 5951f070489SIgor Mammedov .interfaces = (InterfaceInfo[]) { 5961f070489SIgor Mammedov { TYPE_USER_CREATABLE }, 5971f070489SIgor Mammedov { } 5981f070489SIgor Mammedov } 5991f070489SIgor Mammedov }; 6001f070489SIgor Mammedov 6011f070489SIgor Mammedov static void register_types(void) 6021f070489SIgor Mammedov { 60358f4662cSHu Tao type_register_static(&host_memory_backend_info); 6041f070489SIgor Mammedov } 6051f070489SIgor Mammedov 6061f070489SIgor Mammedov type_init(register_types); 607