xref: /qemu/system/dirtylimit.c (revision 513823e7521a09ed7ad1e32e6454bac3b2cbf52d)
1 /*
2  * Dirty page rate limit implementation code
3  *
4  * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
5  *
6  * Authors:
7  *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qobject/qdict.h"
17 #include "qapi/error.h"
18 #include "system/dirtyrate.h"
19 #include "system/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "exec/target_page.h"
24 #include "hw/boards.h"
25 #include "system/kvm.h"
26 #include "trace.h"
27 #include "migration/misc.h"
28 
29 /*
30  * Dirtylimit stop working if dirty page rate error
31  * value less than DIRTYLIMIT_TOLERANCE_RANGE
32  */
33 #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
34 /*
35  * Plus or minus vcpu sleep time linearly if dirty
36  * page rate error value percentage over
37  * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
38  * Otherwise, plus or minus a fixed vcpu sleep time.
39  */
40 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT     50
41 /*
42  * Max vcpu sleep time percentage during a cycle
43  * composed of dirty ring full and sleep time.
44  */
45 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
46 
47 struct {
48     VcpuStat stat;
49     bool running;
50     QemuThread thread;
51 } *vcpu_dirty_rate_stat;
52 
53 typedef struct VcpuDirtyLimitState {
54     int cpu_index;
55     bool enabled;
56     /*
57      * Quota dirty page rate, unit is MB/s
58      * zero if not enabled.
59      */
60     uint64_t quota;
61 } VcpuDirtyLimitState;
62 
63 struct {
64     VcpuDirtyLimitState *states;
65     /* Max cpus number configured by user */
66     int max_cpus;
67     /* Number of vcpu under dirtylimit */
68     int limited_nvcpu;
69 } *dirtylimit_state;
70 
71 /* protect dirtylimit_state */
72 static QemuMutex dirtylimit_mutex;
73 
74 /* dirtylimit thread quit if dirtylimit_quit is true */
75 static bool dirtylimit_quit;
76 
77 static void vcpu_dirty_rate_stat_collect(void)
78 {
79     VcpuStat stat;
80     int i = 0;
81     int64_t period = DIRTYLIMIT_CALC_TIME_MS;
82 
83     if (migrate_dirty_limit() && migration_is_running()) {
84         period = migrate_vcpu_dirty_limit_period();
85     }
86 
87     /* calculate vcpu dirtyrate */
88     vcpu_calculate_dirtyrate(period,
89                               &stat,
90                               GLOBAL_DIRTY_LIMIT,
91                               false);
92 
93     for (i = 0; i < stat.nvcpu; i++) {
94         vcpu_dirty_rate_stat->stat.rates[i].id = i;
95         vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
96             stat.rates[i].dirty_rate;
97     }
98 
99     g_free(stat.rates);
100 }
101 
102 static void *vcpu_dirty_rate_stat_thread(void *opaque)
103 {
104     rcu_register_thread();
105 
106     /* start log sync */
107     global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
108 
109     while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
110         vcpu_dirty_rate_stat_collect();
111         if (dirtylimit_in_service()) {
112             dirtylimit_process();
113         }
114     }
115 
116     /* stop log sync */
117     global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
118 
119     rcu_unregister_thread();
120     return NULL;
121 }
122 
123 int64_t vcpu_dirty_rate_get(int cpu_index)
124 {
125     DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
126     return qatomic_read_i64(&rates[cpu_index].dirty_rate);
127 }
128 
129 void vcpu_dirty_rate_stat_start(void)
130 {
131     if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
132         return;
133     }
134 
135     qatomic_set(&vcpu_dirty_rate_stat->running, 1);
136     qemu_thread_create(&vcpu_dirty_rate_stat->thread,
137                        "dirtyrate-stat",
138                        vcpu_dirty_rate_stat_thread,
139                        NULL,
140                        QEMU_THREAD_JOINABLE);
141 }
142 
143 void vcpu_dirty_rate_stat_stop(void)
144 {
145     qatomic_set(&vcpu_dirty_rate_stat->running, 0);
146     dirtylimit_state_unlock();
147     bql_unlock();
148     qemu_thread_join(&vcpu_dirty_rate_stat->thread);
149     bql_lock();
150     dirtylimit_state_lock();
151 }
152 
153 void vcpu_dirty_rate_stat_initialize(void)
154 {
155     MachineState *ms = MACHINE(qdev_get_machine());
156     int max_cpus = ms->smp.max_cpus;
157 
158     vcpu_dirty_rate_stat =
159         g_malloc0(sizeof(*vcpu_dirty_rate_stat));
160 
161     vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
162     vcpu_dirty_rate_stat->stat.rates =
163         g_new0(DirtyRateVcpu, max_cpus);
164 
165     vcpu_dirty_rate_stat->running = false;
166 }
167 
168 void vcpu_dirty_rate_stat_finalize(void)
169 {
170     g_free(vcpu_dirty_rate_stat->stat.rates);
171     vcpu_dirty_rate_stat->stat.rates = NULL;
172 
173     g_free(vcpu_dirty_rate_stat);
174     vcpu_dirty_rate_stat = NULL;
175 }
176 
177 void dirtylimit_state_lock(void)
178 {
179     qemu_mutex_lock(&dirtylimit_mutex);
180 }
181 
182 void dirtylimit_state_unlock(void)
183 {
184     qemu_mutex_unlock(&dirtylimit_mutex);
185 }
186 
187 static void
188 __attribute__((__constructor__)) dirtylimit_mutex_init(void)
189 {
190     qemu_mutex_init(&dirtylimit_mutex);
191 }
192 
193 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
194 {
195     return &dirtylimit_state->states[cpu_index];
196 }
197 
198 void dirtylimit_state_initialize(void)
199 {
200     MachineState *ms = MACHINE(qdev_get_machine());
201     int max_cpus = ms->smp.max_cpus;
202     int i;
203 
204     dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
205 
206     dirtylimit_state->states =
207             g_new0(VcpuDirtyLimitState, max_cpus);
208 
209     for (i = 0; i < max_cpus; i++) {
210         dirtylimit_state->states[i].cpu_index = i;
211     }
212 
213     dirtylimit_state->max_cpus = max_cpus;
214     trace_dirtylimit_state_initialize(max_cpus);
215 }
216 
217 void dirtylimit_state_finalize(void)
218 {
219     g_free(dirtylimit_state->states);
220     dirtylimit_state->states = NULL;
221 
222     g_free(dirtylimit_state);
223     dirtylimit_state = NULL;
224 
225     trace_dirtylimit_state_finalize();
226 }
227 
228 bool dirtylimit_in_service(void)
229 {
230     return !!dirtylimit_state;
231 }
232 
233 bool dirtylimit_vcpu_index_valid(int cpu_index)
234 {
235     MachineState *ms = MACHINE(qdev_get_machine());
236 
237     return !(cpu_index < 0 ||
238              cpu_index >= ms->smp.max_cpus);
239 }
240 
241 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
242 {
243     static uint64_t max_dirtyrate;
244     uint64_t dirty_ring_size_MiB;
245 
246     dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
247 
248     if (max_dirtyrate < dirtyrate) {
249         max_dirtyrate = dirtyrate;
250     }
251 
252     return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
253 }
254 
255 static inline bool dirtylimit_done(uint64_t quota,
256                                    uint64_t current)
257 {
258     uint64_t min, max;
259 
260     min = MIN(quota, current);
261     max = MAX(quota, current);
262 
263     return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
264 }
265 
266 static inline bool
267 dirtylimit_need_linear_adjustment(uint64_t quota,
268                                   uint64_t current)
269 {
270     uint64_t min, max;
271 
272     min = MIN(quota, current);
273     max = MAX(quota, current);
274 
275     return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
276 }
277 
278 static void dirtylimit_set_throttle(CPUState *cpu,
279                                     uint64_t quota,
280                                     uint64_t current)
281 {
282     int64_t ring_full_time_us = 0;
283     uint64_t sleep_pct = 0;
284     uint64_t throttle_us = 0;
285 
286     if (current == 0) {
287         cpu->throttle_us_per_full = 0;
288         return;
289     }
290 
291     ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
292 
293     if (dirtylimit_need_linear_adjustment(quota, current)) {
294         if (quota < current) {
295             sleep_pct = (current - quota) * 100 / current;
296             throttle_us =
297                 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
298             cpu->throttle_us_per_full += throttle_us;
299         } else {
300             sleep_pct = (quota - current) * 100 / quota;
301             throttle_us =
302                 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
303             cpu->throttle_us_per_full -= throttle_us;
304         }
305 
306         trace_dirtylimit_throttle_pct(cpu->cpu_index,
307                                       sleep_pct,
308                                       throttle_us);
309     } else {
310         if (quota < current) {
311             cpu->throttle_us_per_full += ring_full_time_us / 10;
312         } else {
313             cpu->throttle_us_per_full -= ring_full_time_us / 10;
314         }
315     }
316 
317     /*
318      * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
319      *       current dirty page rate may never reach the quota, we should stop
320      *       increasing sleep time?
321      */
322     cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
323         ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
324 
325     cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
326 }
327 
328 static void dirtylimit_adjust_throttle(CPUState *cpu)
329 {
330     uint64_t quota = 0;
331     uint64_t current = 0;
332     int cpu_index = cpu->cpu_index;
333 
334     quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
335     current = vcpu_dirty_rate_get(cpu_index);
336 
337     if (!dirtylimit_done(quota, current)) {
338         dirtylimit_set_throttle(cpu, quota, current);
339     }
340 
341     return;
342 }
343 
344 void dirtylimit_process(void)
345 {
346     CPUState *cpu;
347 
348     if (!qatomic_read(&dirtylimit_quit)) {
349         dirtylimit_state_lock();
350 
351         if (!dirtylimit_in_service()) {
352             dirtylimit_state_unlock();
353             return;
354         }
355 
356         CPU_FOREACH(cpu) {
357             if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
358                 continue;
359             }
360             dirtylimit_adjust_throttle(cpu);
361         }
362         dirtylimit_state_unlock();
363     }
364 }
365 
366 void dirtylimit_change(bool start)
367 {
368     if (start) {
369         qatomic_set(&dirtylimit_quit, 0);
370     } else {
371         qatomic_set(&dirtylimit_quit, 1);
372     }
373 }
374 
375 void dirtylimit_set_vcpu(int cpu_index,
376                          uint64_t quota,
377                          bool enable)
378 {
379     trace_dirtylimit_set_vcpu(cpu_index, quota);
380 
381     if (enable) {
382         dirtylimit_state->states[cpu_index].quota = quota;
383         if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
384             dirtylimit_state->limited_nvcpu++;
385         }
386     } else {
387         dirtylimit_state->states[cpu_index].quota = 0;
388         if (dirtylimit_state->states[cpu_index].enabled) {
389             dirtylimit_state->limited_nvcpu--;
390         }
391     }
392 
393     dirtylimit_state->states[cpu_index].enabled = enable;
394 }
395 
396 void dirtylimit_set_all(uint64_t quota,
397                         bool enable)
398 {
399     MachineState *ms = MACHINE(qdev_get_machine());
400     int max_cpus = ms->smp.max_cpus;
401     int i;
402 
403     for (i = 0; i < max_cpus; i++) {
404         dirtylimit_set_vcpu(i, quota, enable);
405     }
406 }
407 
408 void dirtylimit_vcpu_execute(CPUState *cpu)
409 {
410     if (cpu->throttle_us_per_full) {
411         dirtylimit_state_lock();
412 
413         if (dirtylimit_in_service() &&
414             dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
415             dirtylimit_state_unlock();
416             trace_dirtylimit_vcpu_execute(cpu->cpu_index,
417                     cpu->throttle_us_per_full);
418 
419             g_usleep(cpu->throttle_us_per_full);
420             return;
421         }
422 
423         dirtylimit_state_unlock();
424     }
425 }
426 
427 static void dirtylimit_init(void)
428 {
429     dirtylimit_state_initialize();
430     dirtylimit_change(true);
431     vcpu_dirty_rate_stat_initialize();
432     vcpu_dirty_rate_stat_start();
433 }
434 
435 static void dirtylimit_cleanup(void)
436 {
437     vcpu_dirty_rate_stat_stop();
438     vcpu_dirty_rate_stat_finalize();
439     dirtylimit_change(false);
440     dirtylimit_state_finalize();
441 }
442 
443 /*
444  * dirty page rate limit is not allowed to set if migration
445  * is running with dirty-limit capability enabled.
446  */
447 static bool dirtylimit_is_allowed(void)
448 {
449     if (migration_is_running() &&
450         !migration_thread_is_self() &&
451         migrate_dirty_limit() &&
452         dirtylimit_in_service()) {
453         return false;
454     }
455     return true;
456 }
457 
458 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
459                                  int64_t cpu_index,
460                                  Error **errp)
461 {
462     if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
463         return;
464     }
465 
466     if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
467         error_setg(errp, "incorrect cpu index specified");
468         return;
469     }
470 
471     if (!dirtylimit_is_allowed()) {
472         error_setg(errp, "can't cancel dirty page rate limit while"
473                    " migration is running");
474         return;
475     }
476 
477     if (!dirtylimit_in_service()) {
478         return;
479     }
480 
481     dirtylimit_state_lock();
482 
483     if (has_cpu_index) {
484         dirtylimit_set_vcpu(cpu_index, 0, false);
485     } else {
486         dirtylimit_set_all(0, false);
487     }
488 
489     if (!dirtylimit_state->limited_nvcpu) {
490         dirtylimit_cleanup();
491     }
492 
493     dirtylimit_state_unlock();
494 }
495 
496 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
497 {
498     int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
499     Error *err = NULL;
500 
501     qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
502     if (err) {
503         hmp_handle_error(mon, err);
504         return;
505     }
506 
507     monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
508                    "dirty limit for virtual CPU]\n");
509 }
510 
511 void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
512                               int64_t cpu_index,
513                               uint64_t dirty_rate,
514                               Error **errp)
515 {
516     if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
517         error_setg(errp, "dirty page limit feature requires KVM with"
518                    " accelerator property 'dirty-ring-size' set'");
519         return;
520     }
521 
522     if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
523         error_setg(errp, "incorrect cpu index specified");
524         return;
525     }
526 
527     if (!dirtylimit_is_allowed()) {
528         error_setg(errp, "can't set dirty page rate limit while"
529                    " migration is running");
530         return;
531     }
532 
533     if (!dirty_rate) {
534         qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
535         return;
536     }
537 
538     dirtylimit_state_lock();
539 
540     if (!dirtylimit_in_service()) {
541         dirtylimit_init();
542     }
543 
544     if (has_cpu_index) {
545         dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
546     } else {
547         dirtylimit_set_all(dirty_rate, true);
548     }
549 
550     dirtylimit_state_unlock();
551 }
552 
553 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
554 {
555     int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
556     int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
557     Error *err = NULL;
558 
559     if (dirty_rate < 0) {
560         error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
561         goto out;
562     }
563 
564     qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
565 
566 out:
567     hmp_handle_error(mon, err);
568 }
569 
570 /* Return the max throttle time of each virtual CPU */
571 uint64_t dirtylimit_throttle_time_per_round(void)
572 {
573     CPUState *cpu;
574     int64_t max = 0;
575 
576     CPU_FOREACH(cpu) {
577         if (cpu->throttle_us_per_full > max) {
578             max = cpu->throttle_us_per_full;
579         }
580     }
581 
582     return max;
583 }
584 
585 /*
586  * Estimate average dirty ring full time of each virtaul CPU.
587  * Return 0 if guest doesn't dirty memory.
588  */
589 uint64_t dirtylimit_ring_full_time(void)
590 {
591     CPUState *cpu;
592     uint64_t curr_rate = 0;
593     int nvcpus = 0;
594 
595     CPU_FOREACH(cpu) {
596         if (cpu->running) {
597             nvcpus++;
598             curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
599         }
600     }
601 
602     if (!curr_rate || !nvcpus) {
603         return 0;
604     }
605 
606     return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
607 }
608 
609 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
610 {
611     DirtyLimitInfo *info = NULL;
612 
613     info = g_malloc0(sizeof(*info));
614     info->cpu_index = cpu_index;
615     info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
616     info->current_rate = vcpu_dirty_rate_get(cpu_index);
617 
618     return info;
619 }
620 
621 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
622 {
623     int i, index;
624     DirtyLimitInfo *info = NULL;
625     DirtyLimitInfoList *head = NULL, **tail = &head;
626 
627     dirtylimit_state_lock();
628 
629     if (!dirtylimit_in_service()) {
630         dirtylimit_state_unlock();
631         return NULL;
632     }
633 
634     for (i = 0; i < dirtylimit_state->max_cpus; i++) {
635         index = dirtylimit_state->states[i].cpu_index;
636         if (dirtylimit_vcpu_get_state(index)->enabled) {
637             info = dirtylimit_query_vcpu(index);
638             QAPI_LIST_APPEND(tail, info);
639         }
640     }
641 
642     dirtylimit_state_unlock();
643 
644     return head;
645 }
646 
647 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
648 {
649     return dirtylimit_query_all();
650 }
651 
652 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
653 {
654     DirtyLimitInfoList *info;
655     g_autoptr(DirtyLimitInfoList) head = NULL;
656     Error *err = NULL;
657 
658     if (!dirtylimit_in_service()) {
659         monitor_printf(mon, "Dirty page limit not enabled!\n");
660         return;
661     }
662 
663     head = qmp_query_vcpu_dirty_limit(&err);
664     if (err) {
665         hmp_handle_error(mon, err);
666         return;
667     }
668 
669     for (info = head; info != NULL; info = info->next) {
670         monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
671                             " current rate %"PRIi64 " (MB/s)\n",
672                             info->value->cpu_index,
673                             info->value->limit_rate,
674                             info->value->current_rate);
675     }
676 }
677