xref: /qemu/system/ram-block-attributes.c (revision 24c00b754121f3569ea9e68f5f188747cf5b8439)
1 /*
2  * QEMU ram block attributes
3  *
4  * Copyright Intel
5  *
6  * Author:
7  *      Chenyi Qiang <chenyi.qiang@intel.com>
8  *
9  * SPDX-License-Identifier: GPL-2.0-or-later
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/error-report.h"
14 #include "system/ramblock.h"
15 #include "trace.h"
16 
17 OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RamBlockAttributes,
18                                           ram_block_attributes,
19                                           RAM_BLOCK_ATTRIBUTES,
20                                           OBJECT,
21                                           { TYPE_RAM_DISCARD_MANAGER },
22                                           { })
23 
24 static size_t
ram_block_attributes_get_block_size(const RamBlockAttributes * attr)25 ram_block_attributes_get_block_size(const RamBlockAttributes *attr)
26 {
27     /*
28      * Because page conversion could be manipulated in the size of at least 4K
29      * or 4K aligned, Use the host page size as the granularity to track the
30      * memory attribute.
31      */
32     g_assert(attr && attr->ram_block);
33     g_assert(attr->ram_block->page_size == qemu_real_host_page_size());
34     return attr->ram_block->page_size;
35 }
36 
37 
38 static bool
ram_block_attributes_rdm_is_populated(const RamDiscardManager * rdm,const MemoryRegionSection * section)39 ram_block_attributes_rdm_is_populated(const RamDiscardManager *rdm,
40                                       const MemoryRegionSection *section)
41 {
42     const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
43     const size_t block_size = ram_block_attributes_get_block_size(attr);
44     const uint64_t first_bit = section->offset_within_region / block_size;
45     const uint64_t last_bit =
46         first_bit + int128_get64(section->size) / block_size - 1;
47     unsigned long first_discarded_bit;
48 
49     first_discarded_bit = find_next_zero_bit(attr->bitmap, last_bit + 1,
50                                            first_bit);
51     return first_discarded_bit > last_bit;
52 }
53 
54 typedef int (*ram_block_attributes_section_cb)(MemoryRegionSection *s,
55                                                void *arg);
56 
57 static int
ram_block_attributes_notify_populate_cb(MemoryRegionSection * section,void * arg)58 ram_block_attributes_notify_populate_cb(MemoryRegionSection *section,
59                                         void *arg)
60 {
61     RamDiscardListener *rdl = arg;
62 
63     return rdl->notify_populate(rdl, section);
64 }
65 
66 static int
ram_block_attributes_notify_discard_cb(MemoryRegionSection * section,void * arg)67 ram_block_attributes_notify_discard_cb(MemoryRegionSection *section,
68                                        void *arg)
69 {
70     RamDiscardListener *rdl = arg;
71 
72     rdl->notify_discard(rdl, section);
73     return 0;
74 }
75 
76 static int
ram_block_attributes_for_each_populated_section(const RamBlockAttributes * attr,MemoryRegionSection * section,void * arg,ram_block_attributes_section_cb cb)77 ram_block_attributes_for_each_populated_section(const RamBlockAttributes *attr,
78                                                 MemoryRegionSection *section,
79                                                 void *arg,
80                                                 ram_block_attributes_section_cb cb)
81 {
82     unsigned long first_bit, last_bit;
83     uint64_t offset, size;
84     const size_t block_size = ram_block_attributes_get_block_size(attr);
85     int ret = 0;
86 
87     first_bit = section->offset_within_region / block_size;
88     first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
89                               first_bit);
90 
91     while (first_bit < attr->bitmap_size) {
92         MemoryRegionSection tmp = *section;
93 
94         offset = first_bit * block_size;
95         last_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
96                                       first_bit + 1) - 1;
97         size = (last_bit - first_bit + 1) * block_size;
98 
99         if (!memory_region_section_intersect_range(&tmp, offset, size)) {
100             break;
101         }
102 
103         ret = cb(&tmp, arg);
104         if (ret) {
105             error_report("%s: Failed to notify RAM discard listener: %s",
106                          __func__, strerror(-ret));
107             break;
108         }
109 
110         first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
111                                   last_bit + 2);
112     }
113 
114     return ret;
115 }
116 
117 static int
ram_block_attributes_for_each_discarded_section(const RamBlockAttributes * attr,MemoryRegionSection * section,void * arg,ram_block_attributes_section_cb cb)118 ram_block_attributes_for_each_discarded_section(const RamBlockAttributes *attr,
119                                                 MemoryRegionSection *section,
120                                                 void *arg,
121                                                 ram_block_attributes_section_cb cb)
122 {
123     unsigned long first_bit, last_bit;
124     uint64_t offset, size;
125     const size_t block_size = ram_block_attributes_get_block_size(attr);
126     int ret = 0;
127 
128     first_bit = section->offset_within_region / block_size;
129     first_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
130                                    first_bit);
131 
132     while (first_bit < attr->bitmap_size) {
133         MemoryRegionSection tmp = *section;
134 
135         offset = first_bit * block_size;
136         last_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
137                                  first_bit + 1) - 1;
138         size = (last_bit - first_bit + 1) * block_size;
139 
140         if (!memory_region_section_intersect_range(&tmp, offset, size)) {
141             break;
142         }
143 
144         ret = cb(&tmp, arg);
145         if (ret) {
146             error_report("%s: Failed to notify RAM discard listener: %s",
147                          __func__, strerror(-ret));
148             break;
149         }
150 
151         first_bit = find_next_zero_bit(attr->bitmap,
152                                        attr->bitmap_size,
153                                        last_bit + 2);
154     }
155 
156     return ret;
157 }
158 
159 static uint64_t
ram_block_attributes_rdm_get_min_granularity(const RamDiscardManager * rdm,const MemoryRegion * mr)160 ram_block_attributes_rdm_get_min_granularity(const RamDiscardManager *rdm,
161                                              const MemoryRegion *mr)
162 {
163     const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
164 
165     g_assert(mr == attr->ram_block->mr);
166     return ram_block_attributes_get_block_size(attr);
167 }
168 
169 static void
ram_block_attributes_rdm_register_listener(RamDiscardManager * rdm,RamDiscardListener * rdl,MemoryRegionSection * section)170 ram_block_attributes_rdm_register_listener(RamDiscardManager *rdm,
171                                            RamDiscardListener *rdl,
172                                            MemoryRegionSection *section)
173 {
174     RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
175     int ret;
176 
177     g_assert(section->mr == attr->ram_block->mr);
178     rdl->section = memory_region_section_new_copy(section);
179 
180     QLIST_INSERT_HEAD(&attr->rdl_list, rdl, next);
181 
182     ret = ram_block_attributes_for_each_populated_section(attr, section, rdl,
183                                     ram_block_attributes_notify_populate_cb);
184     if (ret) {
185         error_report("%s: Failed to register RAM discard listener: %s",
186                      __func__, strerror(-ret));
187         exit(1);
188     }
189 }
190 
191 static void
ram_block_attributes_rdm_unregister_listener(RamDiscardManager * rdm,RamDiscardListener * rdl)192 ram_block_attributes_rdm_unregister_listener(RamDiscardManager *rdm,
193                                              RamDiscardListener *rdl)
194 {
195     RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
196     int ret;
197 
198     g_assert(rdl->section);
199     g_assert(rdl->section->mr == attr->ram_block->mr);
200 
201     if (rdl->double_discard_supported) {
202         rdl->notify_discard(rdl, rdl->section);
203     } else {
204         ret = ram_block_attributes_for_each_populated_section(attr,
205                 rdl->section, rdl, ram_block_attributes_notify_discard_cb);
206         if (ret) {
207             error_report("%s: Failed to unregister RAM discard listener: %s",
208                          __func__, strerror(-ret));
209             exit(1);
210         }
211     }
212 
213     memory_region_section_free_copy(rdl->section);
214     rdl->section = NULL;
215     QLIST_REMOVE(rdl, next);
216 }
217 
218 typedef struct RamBlockAttributesReplayData {
219     ReplayRamDiscardState fn;
220     void *opaque;
221 } RamBlockAttributesReplayData;
222 
ram_block_attributes_rdm_replay_cb(MemoryRegionSection * section,void * arg)223 static int ram_block_attributes_rdm_replay_cb(MemoryRegionSection *section,
224                                               void *arg)
225 {
226     RamBlockAttributesReplayData *data = arg;
227 
228     return data->fn(section, data->opaque);
229 }
230 
231 static int
ram_block_attributes_rdm_replay_populated(const RamDiscardManager * rdm,MemoryRegionSection * section,ReplayRamDiscardState replay_fn,void * opaque)232 ram_block_attributes_rdm_replay_populated(const RamDiscardManager *rdm,
233                                           MemoryRegionSection *section,
234                                           ReplayRamDiscardState replay_fn,
235                                           void *opaque)
236 {
237     RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
238     RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
239 
240     g_assert(section->mr == attr->ram_block->mr);
241     return ram_block_attributes_for_each_populated_section(attr, section, &data,
242                                             ram_block_attributes_rdm_replay_cb);
243 }
244 
245 static int
ram_block_attributes_rdm_replay_discarded(const RamDiscardManager * rdm,MemoryRegionSection * section,ReplayRamDiscardState replay_fn,void * opaque)246 ram_block_attributes_rdm_replay_discarded(const RamDiscardManager *rdm,
247                                           MemoryRegionSection *section,
248                                           ReplayRamDiscardState replay_fn,
249                                           void *opaque)
250 {
251     RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
252     RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
253 
254     g_assert(section->mr == attr->ram_block->mr);
255     return ram_block_attributes_for_each_discarded_section(attr, section, &data,
256                                             ram_block_attributes_rdm_replay_cb);
257 }
258 
259 static bool
ram_block_attributes_is_valid_range(RamBlockAttributes * attr,uint64_t offset,uint64_t size)260 ram_block_attributes_is_valid_range(RamBlockAttributes *attr, uint64_t offset,
261                                     uint64_t size)
262 {
263     MemoryRegion *mr = attr->ram_block->mr;
264 
265     g_assert(mr);
266 
267     uint64_t region_size = memory_region_size(mr);
268     const size_t block_size = ram_block_attributes_get_block_size(attr);
269 
270     if (!QEMU_IS_ALIGNED(offset, block_size) ||
271         !QEMU_IS_ALIGNED(size, block_size)) {
272         return false;
273     }
274     if (offset + size <= offset) {
275         return false;
276     }
277     if (offset + size > region_size) {
278         return false;
279     }
280     return true;
281 }
282 
ram_block_attributes_notify_discard(RamBlockAttributes * attr,uint64_t offset,uint64_t size)283 static void ram_block_attributes_notify_discard(RamBlockAttributes *attr,
284                                                 uint64_t offset,
285                                                 uint64_t size)
286 {
287     RamDiscardListener *rdl;
288 
289     QLIST_FOREACH(rdl, &attr->rdl_list, next) {
290         MemoryRegionSection tmp = *rdl->section;
291 
292         if (!memory_region_section_intersect_range(&tmp, offset, size)) {
293             continue;
294         }
295         rdl->notify_discard(rdl, &tmp);
296     }
297 }
298 
299 static int
ram_block_attributes_notify_populate(RamBlockAttributes * attr,uint64_t offset,uint64_t size)300 ram_block_attributes_notify_populate(RamBlockAttributes *attr,
301                                      uint64_t offset, uint64_t size)
302 {
303     RamDiscardListener *rdl;
304     int ret = 0;
305 
306     QLIST_FOREACH(rdl, &attr->rdl_list, next) {
307         MemoryRegionSection tmp = *rdl->section;
308 
309         if (!memory_region_section_intersect_range(&tmp, offset, size)) {
310             continue;
311         }
312         ret = rdl->notify_populate(rdl, &tmp);
313         if (ret) {
314             break;
315         }
316     }
317 
318     return ret;
319 }
320 
ram_block_attributes_state_change(RamBlockAttributes * attr,uint64_t offset,uint64_t size,bool to_discard)321 int ram_block_attributes_state_change(RamBlockAttributes *attr,
322                                       uint64_t offset, uint64_t size,
323                                       bool to_discard)
324 {
325     const size_t block_size = ram_block_attributes_get_block_size(attr);
326     const unsigned long first_bit = offset / block_size;
327     const unsigned long nbits = size / block_size;
328     const unsigned long last_bit = first_bit + nbits - 1;
329     const bool is_discarded = find_next_bit(attr->bitmap, attr->bitmap_size,
330                                             first_bit) > last_bit;
331     const bool is_populated = find_next_zero_bit(attr->bitmap,
332                                 attr->bitmap_size, first_bit) > last_bit;
333     unsigned long bit;
334     int ret = 0;
335 
336     if (!ram_block_attributes_is_valid_range(attr, offset, size)) {
337         error_report("%s, invalid range: offset 0x%" PRIx64 ", size "
338                      "0x%" PRIx64, __func__, offset, size);
339         return -EINVAL;
340     }
341 
342     trace_ram_block_attributes_state_change(offset, size,
343                                             is_discarded ? "discarded" :
344                                             is_populated ? "populated" :
345                                             "mixture",
346                                             to_discard ? "discarded" :
347                                             "populated");
348     if (to_discard) {
349         if (is_discarded) {
350             /* Already private */
351         } else if (is_populated) {
352             /* Completely shared */
353             bitmap_clear(attr->bitmap, first_bit, nbits);
354             ram_block_attributes_notify_discard(attr, offset, size);
355         } else {
356             /* Unexpected mixture: process individual blocks */
357             for (bit = first_bit; bit < first_bit + nbits; bit++) {
358                 if (!test_bit(bit, attr->bitmap)) {
359                     continue;
360                 }
361                 clear_bit(bit, attr->bitmap);
362                 ram_block_attributes_notify_discard(attr, bit * block_size,
363                                                     block_size);
364             }
365         }
366     } else {
367         if (is_populated) {
368             /* Already shared */
369         } else if (is_discarded) {
370             /* Completely private */
371             bitmap_set(attr->bitmap, first_bit, nbits);
372             ret = ram_block_attributes_notify_populate(attr, offset, size);
373         } else {
374             /* Unexpected mixture: process individual blocks */
375             for (bit = first_bit; bit < first_bit + nbits; bit++) {
376                 if (test_bit(bit, attr->bitmap)) {
377                     continue;
378                 }
379                 set_bit(bit, attr->bitmap);
380                 ret = ram_block_attributes_notify_populate(attr,
381                                                            bit * block_size,
382                                                            block_size);
383                 if (ret) {
384                     break;
385                 }
386             }
387         }
388     }
389 
390     return ret;
391 }
392 
ram_block_attributes_create(RAMBlock * ram_block)393 RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block)
394 {
395     const int block_size  = qemu_real_host_page_size();
396     RamBlockAttributes *attr;
397     MemoryRegion *mr = ram_block->mr;
398 
399     attr = RAM_BLOCK_ATTRIBUTES(object_new(TYPE_RAM_BLOCK_ATTRIBUTES));
400 
401     attr->ram_block = ram_block;
402     if (memory_region_set_ram_discard_manager(mr, RAM_DISCARD_MANAGER(attr))) {
403         object_unref(OBJECT(attr));
404         return NULL;
405     }
406     attr->bitmap_size =
407         ROUND_UP(int128_get64(mr->size), block_size) / block_size;
408     attr->bitmap = bitmap_new(attr->bitmap_size);
409 
410     return attr;
411 }
412 
ram_block_attributes_destroy(RamBlockAttributes * attr)413 void ram_block_attributes_destroy(RamBlockAttributes *attr)
414 {
415     g_assert(attr);
416 
417     g_free(attr->bitmap);
418     memory_region_set_ram_discard_manager(attr->ram_block->mr, NULL);
419     object_unref(OBJECT(attr));
420 }
421 
ram_block_attributes_init(Object * obj)422 static void ram_block_attributes_init(Object *obj)
423 {
424     RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(obj);
425 
426     QLIST_INIT(&attr->rdl_list);
427 }
428 
ram_block_attributes_finalize(Object * obj)429 static void ram_block_attributes_finalize(Object *obj)
430 {
431 }
432 
ram_block_attributes_class_init(ObjectClass * klass,const void * data)433 static void ram_block_attributes_class_init(ObjectClass *klass,
434                                             const void *data)
435 {
436     RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
437 
438     rdmc->get_min_granularity = ram_block_attributes_rdm_get_min_granularity;
439     rdmc->register_listener = ram_block_attributes_rdm_register_listener;
440     rdmc->unregister_listener = ram_block_attributes_rdm_unregister_listener;
441     rdmc->is_populated = ram_block_attributes_rdm_is_populated;
442     rdmc->replay_populated = ram_block_attributes_rdm_replay_populated;
443     rdmc->replay_discarded = ram_block_attributes_rdm_replay_discarded;
444 }
445