xref: /qemu/backends/hostmem.c (revision a2b6a96505097c54f5db4c77f66e9c47af4dad22)
11f070489SIgor Mammedov /*
21f070489SIgor Mammedov  * QEMU Host Memory Backend
31f070489SIgor Mammedov  *
41f070489SIgor Mammedov  * Copyright (C) 2013-2014 Red Hat Inc
51f070489SIgor Mammedov  *
61f070489SIgor Mammedov  * Authors:
71f070489SIgor Mammedov  *   Igor Mammedov <imammedo@redhat.com>
81f070489SIgor Mammedov  *
91f070489SIgor Mammedov  * This work is licensed under the terms of the GNU GPL, version 2 or later.
101f070489SIgor Mammedov  * See the COPYING file in the top-level directory.
111f070489SIgor Mammedov  */
129af23989SMarkus Armbruster 
139c058332SPeter Maydell #include "qemu/osdep.h"
141f070489SIgor Mammedov #include "sysemu/hostmem.h"
156b269967SEduardo Habkost #include "hw/boards.h"
16da34e65cSMarkus Armbruster #include "qapi/error.h"
17eb815e24SMarkus Armbruster #include "qapi/qapi-builtin-visit.h"
181f070489SIgor Mammedov #include "qapi/visitor.h"
191f070489SIgor Mammedov #include "qemu/config-file.h"
201f070489SIgor Mammedov #include "qom/object_interfaces.h"
212b108085SDavid Gibson #include "qemu/mmap-alloc.h"
22b85ea5faSPeter Maydell #include "qemu/madvise.h"
235d9a9a61SMichal Privoznik #include "qemu/cutils.h"
2404accf43SMark Kanda #include "hw/qdev-core.h"
251f070489SIgor Mammedov 
264cf1b76bSHu Tao #ifdef CONFIG_NUMA
274cf1b76bSHu Tao #include <numaif.h>
286bb613f0SMichal Privoznik #include <numa.h>
294cf1b76bSHu Tao QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
306bb613f0SMichal Privoznik /*
316bb613f0SMichal Privoznik  * HOST_MEM_POLICY_PREFERRED may either translate to MPOL_PREFERRED or
326bb613f0SMichal Privoznik  * MPOL_PREFERRED_MANY, see comments further below.
336bb613f0SMichal Privoznik  */
344cf1b76bSHu Tao QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
354cf1b76bSHu Tao QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
364cf1b76bSHu Tao QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
374cf1b76bSHu Tao #endif
384cf1b76bSHu Tao 
39fa0cb34dSMarc-André Lureau char *
40fa0cb34dSMarc-André Lureau host_memory_backend_get_name(HostMemoryBackend *backend)
41fa0cb34dSMarc-André Lureau {
42fa0cb34dSMarc-André Lureau     if (!backend->use_canonical_path) {
437a309cc9SMarkus Armbruster         return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
44fa0cb34dSMarc-André Lureau     }
45fa0cb34dSMarc-André Lureau 
46fa0cb34dSMarc-André Lureau     return object_get_canonical_path(OBJECT(backend));
47fa0cb34dSMarc-André Lureau }
48fa0cb34dSMarc-André Lureau 
491f070489SIgor Mammedov static void
50d7bce999SEric Blake host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
51d7bce999SEric Blake                              void *opaque, Error **errp)
521f070489SIgor Mammedov {
531f070489SIgor Mammedov     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
541f070489SIgor Mammedov     uint64_t value = backend->size;
551f070489SIgor Mammedov 
5651e72bc1SEric Blake     visit_type_size(v, name, &value, errp);
571f070489SIgor Mammedov }
581f070489SIgor Mammedov 
591f070489SIgor Mammedov static void
60d7bce999SEric Blake host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
61d7bce999SEric Blake                              void *opaque, Error **errp)
621f070489SIgor Mammedov {
631f070489SIgor Mammedov     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
641f070489SIgor Mammedov     uint64_t value;
651f070489SIgor Mammedov 
666f4c60e4SPeter Xu     if (host_memory_backend_mr_inited(backend)) {
67dcfe4805SMarkus Armbruster         error_setg(errp, "cannot change property %s of %s ", name,
68dcfe4805SMarkus Armbruster                    object_get_typename(obj));
69dcfe4805SMarkus Armbruster         return;
701f070489SIgor Mammedov     }
711f070489SIgor Mammedov 
72668f62ecSMarkus Armbruster     if (!visit_type_size(v, name, &value, errp)) {
73dcfe4805SMarkus Armbruster         return;
741f070489SIgor Mammedov     }
751f070489SIgor Mammedov     if (!value) {
76dcfe4805SMarkus Armbruster         error_setg(errp,
7721d16836SZhang Yi                    "property '%s' of %s doesn't take value '%" PRIu64 "'",
7821d16836SZhang Yi                    name, object_get_typename(obj), value);
79dcfe4805SMarkus Armbruster         return;
801f070489SIgor Mammedov     }
811f070489SIgor Mammedov     backend->size = value;
821f070489SIgor Mammedov }
831f070489SIgor Mammedov 
844cf1b76bSHu Tao static void
85d7bce999SEric Blake host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
86d7bce999SEric Blake                                    void *opaque, Error **errp)
874cf1b76bSHu Tao {
884cf1b76bSHu Tao     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
894cf1b76bSHu Tao     uint16List *host_nodes = NULL;
90c3033fd3SEric Blake     uint16List **tail = &host_nodes;
914cf1b76bSHu Tao     unsigned long value;
924cf1b76bSHu Tao 
934cf1b76bSHu Tao     value = find_first_bit(backend->host_nodes, MAX_NODES);
941454d33fSXiao Guangrong     if (value == MAX_NODES) {
9515160ab7SIgor Mammedov         goto ret;
961454d33fSXiao Guangrong     }
974cf1b76bSHu Tao 
98c3033fd3SEric Blake     QAPI_LIST_APPEND(tail, value);
99658ae5a7SMarkus Armbruster 
1004cf1b76bSHu Tao     do {
1014cf1b76bSHu Tao         value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
1024cf1b76bSHu Tao         if (value == MAX_NODES) {
1034cf1b76bSHu Tao             break;
1044cf1b76bSHu Tao         }
1054cf1b76bSHu Tao 
106c3033fd3SEric Blake         QAPI_LIST_APPEND(tail, value);
1074cf1b76bSHu Tao     } while (true);
1084cf1b76bSHu Tao 
10915160ab7SIgor Mammedov ret:
11051e72bc1SEric Blake     visit_type_uint16List(v, name, &host_nodes, errp);
111bdd5ce05SKeqian Zhu     qapi_free_uint16List(host_nodes);
1124cf1b76bSHu Tao }
1134cf1b76bSHu Tao 
1144cf1b76bSHu Tao static void
115d7bce999SEric Blake host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
116d7bce999SEric Blake                                    void *opaque, Error **errp)
1174cf1b76bSHu Tao {
1184cf1b76bSHu Tao #ifdef CONFIG_NUMA
1194cf1b76bSHu Tao     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
120ffa144b3SEduardo Habkost     uint16List *l, *host_nodes = NULL;
1214cf1b76bSHu Tao 
122ffa144b3SEduardo Habkost     visit_type_uint16List(v, name, &host_nodes, errp);
1234cf1b76bSHu Tao 
124ffa144b3SEduardo Habkost     for (l = host_nodes; l; l = l->next) {
125ffa144b3SEduardo Habkost         if (l->value >= MAX_NODES) {
126ffa144b3SEduardo Habkost             error_setg(errp, "Invalid host-nodes value: %d", l->value);
127ffa144b3SEduardo Habkost             goto out;
1284cf1b76bSHu Tao         }
129ffa144b3SEduardo Habkost     }
130ffa144b3SEduardo Habkost 
131ffa144b3SEduardo Habkost     for (l = host_nodes; l; l = l->next) {
132ffa144b3SEduardo Habkost         bitmap_set(backend->host_nodes, l->value, 1);
133ffa144b3SEduardo Habkost     }
134ffa144b3SEduardo Habkost 
135ffa144b3SEduardo Habkost out:
136ffa144b3SEduardo Habkost     qapi_free_uint16List(host_nodes);
1374cf1b76bSHu Tao #else
1384cf1b76bSHu Tao     error_setg(errp, "NUMA node binding are not supported by this QEMU");
1394cf1b76bSHu Tao #endif
1404cf1b76bSHu Tao }
1414cf1b76bSHu Tao 
142a3590dacSDaniel P. Berrange static int
143a3590dacSDaniel P. Berrange host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
1444cf1b76bSHu Tao {
1454cf1b76bSHu Tao     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
146a3590dacSDaniel P. Berrange     return backend->policy;
1474cf1b76bSHu Tao }
1484cf1b76bSHu Tao 
1494cf1b76bSHu Tao static void
150a3590dacSDaniel P. Berrange host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
1514cf1b76bSHu Tao {
1524cf1b76bSHu Tao     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
1534cf1b76bSHu Tao     backend->policy = policy;
1544cf1b76bSHu Tao 
1554cf1b76bSHu Tao #ifndef CONFIG_NUMA
1564cf1b76bSHu Tao     if (policy != HOST_MEM_POLICY_DEFAULT) {
1574cf1b76bSHu Tao         error_setg(errp, "NUMA policies are not supported by this QEMU");
1584cf1b76bSHu Tao     }
1594cf1b76bSHu Tao #endif
1604cf1b76bSHu Tao }
1614cf1b76bSHu Tao 
162605d0a94SPaolo Bonzini static bool host_memory_backend_get_merge(Object *obj, Error **errp)
163605d0a94SPaolo Bonzini {
164605d0a94SPaolo Bonzini     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
165605d0a94SPaolo Bonzini 
166605d0a94SPaolo Bonzini     return backend->merge;
167605d0a94SPaolo Bonzini }
168605d0a94SPaolo Bonzini 
169605d0a94SPaolo Bonzini static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
170605d0a94SPaolo Bonzini {
171605d0a94SPaolo Bonzini     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
172605d0a94SPaolo Bonzini 
173*a2b6a965SPaolo Bonzini     if (QEMU_MADV_MERGEABLE == QEMU_MADV_INVALID) {
174*a2b6a965SPaolo Bonzini         if (value) {
175*a2b6a965SPaolo Bonzini             error_setg(errp, "Memory merging is not supported on this host");
176*a2b6a965SPaolo Bonzini         }
177*a2b6a965SPaolo Bonzini         assert(!backend->merge);
178*a2b6a965SPaolo Bonzini         return;
179*a2b6a965SPaolo Bonzini     }
180*a2b6a965SPaolo Bonzini 
1816f4c60e4SPeter Xu     if (!host_memory_backend_mr_inited(backend)) {
182605d0a94SPaolo Bonzini         backend->merge = value;
183605d0a94SPaolo Bonzini         return;
184605d0a94SPaolo Bonzini     }
185605d0a94SPaolo Bonzini 
186605d0a94SPaolo Bonzini     if (value != backend->merge) {
187605d0a94SPaolo Bonzini         void *ptr = memory_region_get_ram_ptr(&backend->mr);
188605d0a94SPaolo Bonzini         uint64_t sz = memory_region_size(&backend->mr);
189605d0a94SPaolo Bonzini 
190605d0a94SPaolo Bonzini         qemu_madvise(ptr, sz,
191605d0a94SPaolo Bonzini                      value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
192605d0a94SPaolo Bonzini         backend->merge = value;
193605d0a94SPaolo Bonzini     }
194605d0a94SPaolo Bonzini }
195605d0a94SPaolo Bonzini 
196605d0a94SPaolo Bonzini static bool host_memory_backend_get_dump(Object *obj, Error **errp)
197605d0a94SPaolo Bonzini {
198605d0a94SPaolo Bonzini     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
199605d0a94SPaolo Bonzini 
200605d0a94SPaolo Bonzini     return backend->dump;
201605d0a94SPaolo Bonzini }
202605d0a94SPaolo Bonzini 
203605d0a94SPaolo Bonzini static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
204605d0a94SPaolo Bonzini {
205605d0a94SPaolo Bonzini     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
206605d0a94SPaolo Bonzini 
207*a2b6a965SPaolo Bonzini     if (QEMU_MADV_DONTDUMP == QEMU_MADV_INVALID) {
208*a2b6a965SPaolo Bonzini         if (!value) {
209*a2b6a965SPaolo Bonzini             error_setg(errp, "Dumping guest memory cannot be disabled on this host");
210*a2b6a965SPaolo Bonzini         }
211*a2b6a965SPaolo Bonzini         assert(backend->dump);
212*a2b6a965SPaolo Bonzini         return;
213*a2b6a965SPaolo Bonzini     }
214*a2b6a965SPaolo Bonzini 
2156f4c60e4SPeter Xu     if (!host_memory_backend_mr_inited(backend)) {
216605d0a94SPaolo Bonzini         backend->dump = value;
217605d0a94SPaolo Bonzini         return;
218605d0a94SPaolo Bonzini     }
219605d0a94SPaolo Bonzini 
220605d0a94SPaolo Bonzini     if (value != backend->dump) {
221605d0a94SPaolo Bonzini         void *ptr = memory_region_get_ram_ptr(&backend->mr);
222605d0a94SPaolo Bonzini         uint64_t sz = memory_region_size(&backend->mr);
223605d0a94SPaolo Bonzini 
224605d0a94SPaolo Bonzini         qemu_madvise(ptr, sz,
225605d0a94SPaolo Bonzini                      value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
226605d0a94SPaolo Bonzini         backend->dump = value;
227605d0a94SPaolo Bonzini     }
228605d0a94SPaolo Bonzini }
229605d0a94SPaolo Bonzini 
230a35ba7beSPaolo Bonzini static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
231a35ba7beSPaolo Bonzini {
232a35ba7beSPaolo Bonzini     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
233a35ba7beSPaolo Bonzini 
2344ebc74dbSIgor Mammedov     return backend->prealloc;
235a35ba7beSPaolo Bonzini }
236a35ba7beSPaolo Bonzini 
237a35ba7beSPaolo Bonzini static void host_memory_backend_set_prealloc(Object *obj, bool value,
238a35ba7beSPaolo Bonzini                                              Error **errp)
239a35ba7beSPaolo Bonzini {
240a35ba7beSPaolo Bonzini     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
241a35ba7beSPaolo Bonzini 
2429181fb70SDavid Hildenbrand     if (!backend->reserve && value) {
2439181fb70SDavid Hildenbrand         error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
2449181fb70SDavid Hildenbrand         return;
2459181fb70SDavid Hildenbrand     }
2469181fb70SDavid Hildenbrand 
2476f4c60e4SPeter Xu     if (!host_memory_backend_mr_inited(backend)) {
248a35ba7beSPaolo Bonzini         backend->prealloc = value;
249a35ba7beSPaolo Bonzini         return;
250a35ba7beSPaolo Bonzini     }
251a35ba7beSPaolo Bonzini 
252a35ba7beSPaolo Bonzini     if (value && !backend->prealloc) {
253a35ba7beSPaolo Bonzini         int fd = memory_region_get_fd(&backend->mr);
254a35ba7beSPaolo Bonzini         void *ptr = memory_region_get_ram_ptr(&backend->mr);
255a35ba7beSPaolo Bonzini         uint64_t sz = memory_region_size(&backend->mr);
256a35ba7beSPaolo Bonzini 
2579c878ad6SPhilippe Mathieu-Daudé         if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
25804accf43SMark Kanda                                backend->prealloc_context, false, errp)) {
259056b68afSIgor Mammedov             return;
260056b68afSIgor Mammedov         }
261a35ba7beSPaolo Bonzini         backend->prealloc = true;
262a35ba7beSPaolo Bonzini     }
263a35ba7beSPaolo Bonzini }
264a35ba7beSPaolo Bonzini 
265ffac16faSIgor Mammedov static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v,
266ffac16faSIgor Mammedov     const char *name, void *opaque, Error **errp)
267ffac16faSIgor Mammedov {
268ffac16faSIgor Mammedov     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
269ffac16faSIgor Mammedov     visit_type_uint32(v, name, &backend->prealloc_threads, errp);
270ffac16faSIgor Mammedov }
271ffac16faSIgor Mammedov 
272ffac16faSIgor Mammedov static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v,
273ffac16faSIgor Mammedov     const char *name, void *opaque, Error **errp)
274ffac16faSIgor Mammedov {
275ffac16faSIgor Mammedov     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
276ffac16faSIgor Mammedov     uint32_t value;
277ffac16faSIgor Mammedov 
278668f62ecSMarkus Armbruster     if (!visit_type_uint32(v, name, &value, errp)) {
279dcfe4805SMarkus Armbruster         return;
280ffac16faSIgor Mammedov     }
281ffac16faSIgor Mammedov     if (value <= 0) {
282dcfe4805SMarkus Armbruster         error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
283dcfe4805SMarkus Armbruster                    object_get_typename(obj), value);
284dcfe4805SMarkus Armbruster         return;
285ffac16faSIgor Mammedov     }
286ffac16faSIgor Mammedov     backend->prealloc_threads = value;
287ffac16faSIgor Mammedov }
288ffac16faSIgor Mammedov 
28958f4662cSHu Tao static void host_memory_backend_init(Object *obj)
2901f070489SIgor Mammedov {
291605d0a94SPaolo Bonzini     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
2926b269967SEduardo Habkost     MachineState *machine = MACHINE(qdev_get_machine());
293605d0a94SPaolo Bonzini 
294ffac16faSIgor Mammedov     /* TODO: convert access to globals to compat properties */
2956b269967SEduardo Habkost     backend->merge = machine_mem_merge(machine);
2966b269967SEduardo Habkost     backend->dump = machine_dump_guest_core(machine);
29737662d85SXiaoyao Li     backend->guest_memfd = machine_require_guest_memfd(machine);
2989181fb70SDavid Hildenbrand     backend->reserve = true;
299f8d426a6SJaroslav Jindrak     backend->prealloc_threads = machine->smp.cpus;
3001f070489SIgor Mammedov }
3011f070489SIgor Mammedov 
302fa0cb34dSMarc-André Lureau static void host_memory_backend_post_init(Object *obj)
303fa0cb34dSMarc-André Lureau {
304fa0cb34dSMarc-André Lureau     object_apply_compat_props(obj);
305fa0cb34dSMarc-André Lureau }
306fa0cb34dSMarc-André Lureau 
3074728b574SPeter Xu bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
3084728b574SPeter Xu {
3094728b574SPeter Xu     /*
3104728b574SPeter Xu      * NOTE: We forbid zero-length memory backend, so here zero means
3114728b574SPeter Xu      * "we haven't inited the backend memory region yet".
3124728b574SPeter Xu      */
3134728b574SPeter Xu     return memory_region_size(&backend->mr) != 0;
3144728b574SPeter Xu }
3154728b574SPeter Xu 
3167943e97bSDavid Hildenbrand MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
3171f070489SIgor Mammedov {
3186f4c60e4SPeter Xu     return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
3191f070489SIgor Mammedov }
3201f070489SIgor Mammedov 
3212aece63cSXiao Guangrong void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
3222aece63cSXiao Guangrong {
3232aece63cSXiao Guangrong     backend->is_mapped = mapped;
3242aece63cSXiao Guangrong }
3252aece63cSXiao Guangrong 
3262aece63cSXiao Guangrong bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
3272aece63cSXiao Guangrong {
3282aece63cSXiao Guangrong     return backend->is_mapped;
3292aece63cSXiao Guangrong }
3302aece63cSXiao Guangrong 
3312b108085SDavid Gibson size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
3322b108085SDavid Gibson {
3338be934b7SThomas Huth     size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block);
3348be934b7SThomas Huth     g_assert(pagesize >= qemu_real_host_page_size());
3352b108085SDavid Gibson     return pagesize;
3362b108085SDavid Gibson }
3372b108085SDavid Gibson 
338bd9262d9SHu Tao static void
339bd9262d9SHu Tao host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
340bd9262d9SHu Tao {
341bd9262d9SHu Tao     HostMemoryBackend *backend = MEMORY_BACKEND(uc);
342bd9262d9SHu Tao     HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
343605d0a94SPaolo Bonzini     void *ptr;
344605d0a94SPaolo Bonzini     uint64_t sz;
3455d9a9a61SMichal Privoznik     size_t pagesize;
34604accf43SMark Kanda     bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
347bd9262d9SHu Tao 
348e199f7adSPhilippe Mathieu-Daudé     if (!bc->alloc) {
349e199f7adSPhilippe Mathieu-Daudé         return;
350e199f7adSPhilippe Mathieu-Daudé     }
351fdb63cf3SPhilippe Mathieu-Daudé     if (!bc->alloc(backend, errp)) {
352fdb63cf3SPhilippe Mathieu-Daudé         return;
353605d0a94SPaolo Bonzini     }
354605d0a94SPaolo Bonzini 
355605d0a94SPaolo Bonzini     ptr = memory_region_get_ram_ptr(&backend->mr);
356605d0a94SPaolo Bonzini     sz = memory_region_size(&backend->mr);
3575d9a9a61SMichal Privoznik     pagesize = qemu_ram_pagesize(backend->mr.ram_block);
3585d9a9a61SMichal Privoznik 
3595d9a9a61SMichal Privoznik     if (backend->aligned && !QEMU_IS_ALIGNED(sz, pagesize)) {
3605d9a9a61SMichal Privoznik         g_autofree char *pagesize_str = size_to_str(pagesize);
3615d9a9a61SMichal Privoznik         error_setg(errp, "backend '%s' memory size must be multiple of %s",
3625d9a9a61SMichal Privoznik                    object_get_typename(OBJECT(uc)), pagesize_str);
3635d9a9a61SMichal Privoznik         return;
3645d9a9a61SMichal Privoznik     }
365605d0a94SPaolo Bonzini 
366605d0a94SPaolo Bonzini     if (backend->merge) {
367605d0a94SPaolo Bonzini         qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
368605d0a94SPaolo Bonzini     }
369605d0a94SPaolo Bonzini     if (!backend->dump) {
370605d0a94SPaolo Bonzini         qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
371605d0a94SPaolo Bonzini     }
3724cf1b76bSHu Tao #ifdef CONFIG_NUMA
3734cf1b76bSHu Tao     unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
3744cf1b76bSHu Tao     /* lastbit == MAX_NODES means maxnode = 0 */
3754cf1b76bSHu Tao     unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
37693e08863SPhilippe Mathieu-Daudé     /*
37793e08863SPhilippe Mathieu-Daudé      * Ensure policy won't be ignored in case memory is preallocated
3784cf1b76bSHu Tao      * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
37993e08863SPhilippe Mathieu-Daudé      * this doesn't catch hugepage case.
38093e08863SPhilippe Mathieu-Daudé      */
381288d3322SMichael S. Tsirkin     unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
3826bb613f0SMichal Privoznik     int mode = backend->policy;
3834cf1b76bSHu Tao 
3844cf1b76bSHu Tao     /* check for invalid host-nodes and policies and give more verbose
3854cf1b76bSHu Tao      * error messages than mbind(). */
3864cf1b76bSHu Tao     if (maxnode && backend->policy == MPOL_DEFAULT) {
3874cf1b76bSHu Tao         error_setg(errp, "host-nodes must be empty for policy default,"
3884cf1b76bSHu Tao                    " or you should explicitly specify a policy other"
3894cf1b76bSHu Tao                    " than default");
3904cf1b76bSHu Tao         return;
3914cf1b76bSHu Tao     } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
3924cf1b76bSHu Tao         error_setg(errp, "host-nodes must be set for policy %s",
393977c736fSMarkus Armbruster                    HostMemPolicy_str(backend->policy));
3944cf1b76bSHu Tao         return;
3954cf1b76bSHu Tao     }
3964cf1b76bSHu Tao 
39793e08863SPhilippe Mathieu-Daudé     /*
39893e08863SPhilippe Mathieu-Daudé      * We can have up to MAX_NODES nodes, but we need to pass maxnode+1
3994cf1b76bSHu Tao      * as argument to mbind() due to an old Linux bug (feature?) which
4004cf1b76bSHu Tao      * cuts off the last specified node. This means backend->host_nodes
4014cf1b76bSHu Tao      * must have MAX_NODES+1 bits available.
4024cf1b76bSHu Tao      */
4034cf1b76bSHu Tao     assert(sizeof(backend->host_nodes) >=
4044cf1b76bSHu Tao            BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
4054cf1b76bSHu Tao     assert(maxnode <= MAX_NODES);
40670b6d525SIgor Mammedov 
4076bb613f0SMichal Privoznik #ifdef HAVE_NUMA_HAS_PREFERRED_MANY
4086bb613f0SMichal Privoznik     if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) {
4096bb613f0SMichal Privoznik         /*
4106bb613f0SMichal Privoznik          * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below
4116bb613f0SMichal Privoznik          * silently picks the first node.
4126bb613f0SMichal Privoznik          */
4136bb613f0SMichal Privoznik         mode = MPOL_PREFERRED_MANY;
4146bb613f0SMichal Privoznik     }
4156bb613f0SMichal Privoznik #endif
4166bb613f0SMichal Privoznik 
41770b6d525SIgor Mammedov     if (maxnode &&
4186bb613f0SMichal Privoznik         mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) {
419a3567ba1SPavel Fedin         if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
4204cf1b76bSHu Tao             error_setg_errno(errp, errno,
4214cf1b76bSHu Tao                              "cannot bind memory to host NUMA nodes");
4224cf1b76bSHu Tao             return;
4234cf1b76bSHu Tao         }
424a3567ba1SPavel Fedin     }
4254cf1b76bSHu Tao #endif
42693e08863SPhilippe Mathieu-Daudé     /*
42793e08863SPhilippe Mathieu-Daudé      * Preallocate memory after the NUMA policy has been instantiated.
4284cf1b76bSHu Tao      * This is necessary to guarantee memory is allocated with
4294cf1b76bSHu Tao      * specified NUMA policy in place.
4304cf1b76bSHu Tao      */
4319c878ad6SPhilippe Mathieu-Daudé     if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr),
4329c878ad6SPhilippe Mathieu-Daudé                                                 ptr, sz,
433e6816458SDavid Hildenbrand                                                 backend->prealloc_threads,
43404accf43SMark Kanda                                                 backend->prealloc_context,
43504accf43SMark Kanda                                                 async, errp)) {
4363961613aSPhilippe Mathieu-Daudé         return;
4373961613aSPhilippe Mathieu-Daudé     }
4383961613aSPhilippe Mathieu-Daudé }
439bd9262d9SHu Tao 
44036bce5caSLin Ma static bool
4413beacfb9SEduardo Habkost host_memory_backend_can_be_deleted(UserCreatable *uc)
44236bce5caSLin Ma {
4432aece63cSXiao Guangrong     if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
44436bce5caSLin Ma         return false;
44536bce5caSLin Ma     } else {
44636bce5caSLin Ma         return true;
44736bce5caSLin Ma     }
44836bce5caSLin Ma }
44936bce5caSLin Ma 
45006329cceSMarcel Apfelbaum static bool host_memory_backend_get_share(Object *o, Error **errp)
45106329cceSMarcel Apfelbaum {
45206329cceSMarcel Apfelbaum     HostMemoryBackend *backend = MEMORY_BACKEND(o);
45306329cceSMarcel Apfelbaum 
45406329cceSMarcel Apfelbaum     return backend->share;
45506329cceSMarcel Apfelbaum }
45606329cceSMarcel Apfelbaum 
45706329cceSMarcel Apfelbaum static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
45806329cceSMarcel Apfelbaum {
45906329cceSMarcel Apfelbaum     HostMemoryBackend *backend = MEMORY_BACKEND(o);
46006329cceSMarcel Apfelbaum 
46106329cceSMarcel Apfelbaum     if (host_memory_backend_mr_inited(backend)) {
46206329cceSMarcel Apfelbaum         error_setg(errp, "cannot change property value");
46306329cceSMarcel Apfelbaum         return;
46406329cceSMarcel Apfelbaum     }
46506329cceSMarcel Apfelbaum     backend->share = value;
46606329cceSMarcel Apfelbaum }
46706329cceSMarcel Apfelbaum 
4689181fb70SDavid Hildenbrand #ifdef CONFIG_LINUX
4699181fb70SDavid Hildenbrand static bool host_memory_backend_get_reserve(Object *o, Error **errp)
4709181fb70SDavid Hildenbrand {
4719181fb70SDavid Hildenbrand     HostMemoryBackend *backend = MEMORY_BACKEND(o);
4729181fb70SDavid Hildenbrand 
4739181fb70SDavid Hildenbrand     return backend->reserve;
4749181fb70SDavid Hildenbrand }
4759181fb70SDavid Hildenbrand 
4769181fb70SDavid Hildenbrand static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
4779181fb70SDavid Hildenbrand {
4789181fb70SDavid Hildenbrand     HostMemoryBackend *backend = MEMORY_BACKEND(o);
4799181fb70SDavid Hildenbrand 
4809181fb70SDavid Hildenbrand     if (host_memory_backend_mr_inited(backend)) {
4819181fb70SDavid Hildenbrand         error_setg(errp, "cannot change property value");
4829181fb70SDavid Hildenbrand         return;
4839181fb70SDavid Hildenbrand     }
4849181fb70SDavid Hildenbrand     if (backend->prealloc && !value) {
4859181fb70SDavid Hildenbrand         error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
4869181fb70SDavid Hildenbrand         return;
4879181fb70SDavid Hildenbrand     }
4889181fb70SDavid Hildenbrand     backend->reserve = value;
4899181fb70SDavid Hildenbrand }
4909181fb70SDavid Hildenbrand #endif /* CONFIG_LINUX */
4919181fb70SDavid Hildenbrand 
492fa0cb34dSMarc-André Lureau static bool
493fa0cb34dSMarc-André Lureau host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
494fa0cb34dSMarc-André Lureau {
495fa0cb34dSMarc-André Lureau     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
496fa0cb34dSMarc-André Lureau 
497fa0cb34dSMarc-André Lureau     return backend->use_canonical_path;
498fa0cb34dSMarc-André Lureau }
499fa0cb34dSMarc-André Lureau 
500fa0cb34dSMarc-André Lureau static void
501fa0cb34dSMarc-André Lureau host_memory_backend_set_use_canonical_path(Object *obj, bool value,
502fa0cb34dSMarc-André Lureau                                            Error **errp)
503fa0cb34dSMarc-André Lureau {
504fa0cb34dSMarc-André Lureau     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
505fa0cb34dSMarc-André Lureau 
506fa0cb34dSMarc-André Lureau     backend->use_canonical_path = value;
507fa0cb34dSMarc-André Lureau }
508fa0cb34dSMarc-André Lureau 
509bd9262d9SHu Tao static void
510bd9262d9SHu Tao host_memory_backend_class_init(ObjectClass *oc, void *data)
511bd9262d9SHu Tao {
512bd9262d9SHu Tao     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
513bd9262d9SHu Tao 
514bd9262d9SHu Tao     ucc->complete = host_memory_backend_memory_complete;
51536bce5caSLin Ma     ucc->can_be_deleted = host_memory_backend_can_be_deleted;
516e62834caSEduardo Habkost 
517e62834caSEduardo Habkost     object_class_property_add_bool(oc, "merge",
518e62834caSEduardo Habkost         host_memory_backend_get_merge,
519d2623129SMarkus Armbruster         host_memory_backend_set_merge);
520033bfc5eSMarc-André Lureau     object_class_property_set_description(oc, "merge",
5217eecec7dSMarkus Armbruster         "Mark memory as mergeable");
522e62834caSEduardo Habkost     object_class_property_add_bool(oc, "dump",
523e62834caSEduardo Habkost         host_memory_backend_get_dump,
524d2623129SMarkus Armbruster         host_memory_backend_set_dump);
525033bfc5eSMarc-André Lureau     object_class_property_set_description(oc, "dump",
5267eecec7dSMarkus Armbruster         "Set to 'off' to exclude from core dump");
527e62834caSEduardo Habkost     object_class_property_add_bool(oc, "prealloc",
528e62834caSEduardo Habkost         host_memory_backend_get_prealloc,
529d2623129SMarkus Armbruster         host_memory_backend_set_prealloc);
530033bfc5eSMarc-André Lureau     object_class_property_set_description(oc, "prealloc",
5317eecec7dSMarkus Armbruster         "Preallocate memory");
532ffac16faSIgor Mammedov     object_class_property_add(oc, "prealloc-threads", "int",
533ffac16faSIgor Mammedov         host_memory_backend_get_prealloc_threads,
534ffac16faSIgor Mammedov         host_memory_backend_set_prealloc_threads,
535d2623129SMarkus Armbruster         NULL, NULL);
536ffac16faSIgor Mammedov     object_class_property_set_description(oc, "prealloc-threads",
5377eecec7dSMarkus Armbruster         "Number of CPU threads to use for prealloc");
538e6816458SDavid Hildenbrand     object_class_property_add_link(oc, "prealloc-context",
539e6816458SDavid Hildenbrand         TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context),
540e6816458SDavid Hildenbrand         object_property_allow_set_link, OBJ_PROP_LINK_STRONG);
541e6816458SDavid Hildenbrand     object_class_property_set_description(oc, "prealloc-context",
542e6816458SDavid Hildenbrand         "Context to use for creating CPU threads for preallocation");
543e62834caSEduardo Habkost     object_class_property_add(oc, "size", "int",
544e62834caSEduardo Habkost         host_memory_backend_get_size,
545e62834caSEduardo Habkost         host_memory_backend_set_size,
546d2623129SMarkus Armbruster         NULL, NULL);
547033bfc5eSMarc-André Lureau     object_class_property_set_description(oc, "size",
5487eecec7dSMarkus Armbruster         "Size of the memory region (ex: 500M)");
549e62834caSEduardo Habkost     object_class_property_add(oc, "host-nodes", "int",
550e62834caSEduardo Habkost         host_memory_backend_get_host_nodes,
551e62834caSEduardo Habkost         host_memory_backend_set_host_nodes,
552d2623129SMarkus Armbruster         NULL, NULL);
553033bfc5eSMarc-André Lureau     object_class_property_set_description(oc, "host-nodes",
5547eecec7dSMarkus Armbruster         "Binds memory to the list of NUMA host nodes");
555e62834caSEduardo Habkost     object_class_property_add_enum(oc, "policy", "HostMemPolicy",
556f7abe0ecSMarc-André Lureau         &HostMemPolicy_lookup,
557e62834caSEduardo Habkost         host_memory_backend_get_policy,
558d2623129SMarkus Armbruster         host_memory_backend_set_policy);
559033bfc5eSMarc-André Lureau     object_class_property_set_description(oc, "policy",
5607eecec7dSMarkus Armbruster         "Set the NUMA policy");
56106329cceSMarcel Apfelbaum     object_class_property_add_bool(oc, "share",
562d2623129SMarkus Armbruster         host_memory_backend_get_share, host_memory_backend_set_share);
563033bfc5eSMarc-André Lureau     object_class_property_set_description(oc, "share",
5647eecec7dSMarkus Armbruster         "Mark the memory as private to QEMU or shared");
5659181fb70SDavid Hildenbrand #ifdef CONFIG_LINUX
5669181fb70SDavid Hildenbrand     object_class_property_add_bool(oc, "reserve",
5679181fb70SDavid Hildenbrand         host_memory_backend_get_reserve, host_memory_backend_set_reserve);
5689181fb70SDavid Hildenbrand     object_class_property_set_description(oc, "reserve",
5699181fb70SDavid Hildenbrand         "Reserve swap space (or huge pages) if applicable");
5709181fb70SDavid Hildenbrand #endif /* CONFIG_LINUX */
5718db0b204SIgor Mammedov     /*
5728db0b204SIgor Mammedov      * Do not delete/rename option. This option must be considered stable
5738db0b204SIgor Mammedov      * (as if it didn't have the 'x-' prefix including deprecation period) as
5748db0b204SIgor Mammedov      * long as 4.0 and older machine types exists.
5758db0b204SIgor Mammedov      * Option will be used by upper layers to override (disable) canonical path
5768db0b204SIgor Mammedov      * for ramblock-id set by compat properties on old machine types ( <= 4.0),
5778db0b204SIgor Mammedov      * to keep migration working when backend is used for main RAM with
5788db0b204SIgor Mammedov      * -machine memory-backend= option (main RAM historically used prefix-less
5798db0b204SIgor Mammedov      * ramblock-id).
5808db0b204SIgor Mammedov      */
581fa0cb34dSMarc-André Lureau     object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
582fa0cb34dSMarc-André Lureau         host_memory_backend_get_use_canonical_path,
583d2623129SMarkus Armbruster         host_memory_backend_set_use_canonical_path);
584e1ff3c67SIgor Mammedov }
585e1ff3c67SIgor Mammedov 
58658f4662cSHu Tao static const TypeInfo host_memory_backend_info = {
5871f070489SIgor Mammedov     .name = TYPE_MEMORY_BACKEND,
5881f070489SIgor Mammedov     .parent = TYPE_OBJECT,
5891f070489SIgor Mammedov     .abstract = true,
5901f070489SIgor Mammedov     .class_size = sizeof(HostMemoryBackendClass),
591bd9262d9SHu Tao     .class_init = host_memory_backend_class_init,
5921f070489SIgor Mammedov     .instance_size = sizeof(HostMemoryBackend),
59358f4662cSHu Tao     .instance_init = host_memory_backend_init,
594fa0cb34dSMarc-André Lureau     .instance_post_init = host_memory_backend_post_init,
5951f070489SIgor Mammedov     .interfaces = (InterfaceInfo[]) {
5961f070489SIgor Mammedov         { TYPE_USER_CREATABLE },
5971f070489SIgor Mammedov         { }
5981f070489SIgor Mammedov     }
5991f070489SIgor Mammedov };
6001f070489SIgor Mammedov 
6011f070489SIgor Mammedov static void register_types(void)
6021f070489SIgor Mammedov {
60358f4662cSHu Tao     type_register_static(&host_memory_backend_info);
6041f070489SIgor Mammedov }
6051f070489SIgor Mammedov 
6061f070489SIgor Mammedov type_init(register_types);
607