1cc2b33eaSHyman Huang(黄勇) /* 2cc2b33eaSHyman Huang(黄勇) * Dirty page rate limit implementation code 3cc2b33eaSHyman Huang(黄勇) * 4cc2b33eaSHyman Huang(黄勇) * Copyright (c) 2022 CHINA TELECOM CO.,LTD. 5cc2b33eaSHyman Huang(黄勇) * 6cc2b33eaSHyman Huang(黄勇) * Authors: 7cc2b33eaSHyman Huang(黄勇) * Hyman Huang(黄勇) <huangy81@chinatelecom.cn> 8cc2b33eaSHyman Huang(黄勇) * 9cc2b33eaSHyman Huang(黄勇) * This work is licensed under the terms of the GNU GPL, version 2 or later. 10cc2b33eaSHyman Huang(黄勇) * See the COPYING file in the top-level directory. 11cc2b33eaSHyman Huang(黄勇) */ 12cc2b33eaSHyman Huang(黄勇) 13cc2b33eaSHyman Huang(黄勇) #include "qemu/osdep.h" 14cc2b33eaSHyman Huang(黄勇) #include "qemu/main-loop.h" 15cc2b33eaSHyman Huang(黄勇) #include "qapi/qapi-commands-migration.h" 16f3b2e38cSHyman Huang(黄勇) #include "qapi/qmp/qdict.h" 17f3b2e38cSHyman Huang(黄勇) #include "qapi/error.h" 18cc2b33eaSHyman Huang(黄勇) #include "sysemu/dirtyrate.h" 19cc2b33eaSHyman Huang(黄勇) #include "sysemu/dirtylimit.h" 20f3b2e38cSHyman Huang(黄勇) #include "monitor/hmp.h" 21f3b2e38cSHyman Huang(黄勇) #include "monitor/monitor.h" 22cc2b33eaSHyman Huang(黄勇) #include "exec/memory.h" 2330ee29fdSThomas Huth #include "exec/target_page.h" 24cc2b33eaSHyman Huang(黄勇) #include "hw/boards.h" 25baa60983SHyman Huang(黄勇) #include "sysemu/kvm.h" 26baa60983SHyman Huang(黄勇) #include "trace.h" 27baa60983SHyman Huang(黄勇) 28baa60983SHyman Huang(黄勇) /* 29baa60983SHyman Huang(黄勇) * Dirtylimit stop working if dirty page rate error 30baa60983SHyman Huang(黄勇) * value less than DIRTYLIMIT_TOLERANCE_RANGE 31baa60983SHyman Huang(黄勇) */ 32baa60983SHyman Huang(黄勇) #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 33baa60983SHyman Huang(黄勇) /* 34baa60983SHyman Huang(黄勇) * Plus or minus vcpu sleep time linearly if dirty 35baa60983SHyman Huang(黄勇) * page rate error value percentage over 36baa60983SHyman Huang(黄勇) * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. 37baa60983SHyman Huang(黄勇) * Otherwise, plus or minus a fixed vcpu sleep time. 38baa60983SHyman Huang(黄勇) */ 39baa60983SHyman Huang(黄勇) #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 40baa60983SHyman Huang(黄勇) /* 41baa60983SHyman Huang(黄勇) * Max vcpu sleep time percentage during a cycle 42baa60983SHyman Huang(黄勇) * composed of dirty ring full and sleep time. 43baa60983SHyman Huang(黄勇) */ 44baa60983SHyman Huang(黄勇) #define DIRTYLIMIT_THROTTLE_PCT_MAX 99 45cc2b33eaSHyman Huang(黄勇) 46cc2b33eaSHyman Huang(黄勇) struct { 47cc2b33eaSHyman Huang(黄勇) VcpuStat stat; 48cc2b33eaSHyman Huang(黄勇) bool running; 49cc2b33eaSHyman Huang(黄勇) QemuThread thread; 50cc2b33eaSHyman Huang(黄勇) } *vcpu_dirty_rate_stat; 51cc2b33eaSHyman Huang(黄勇) 52baa60983SHyman Huang(黄勇) typedef struct VcpuDirtyLimitState { 53baa60983SHyman Huang(黄勇) int cpu_index; 54baa60983SHyman Huang(黄勇) bool enabled; 55baa60983SHyman Huang(黄勇) /* 56baa60983SHyman Huang(黄勇) * Quota dirty page rate, unit is MB/s 57baa60983SHyman Huang(黄勇) * zero if not enabled. 58baa60983SHyman Huang(黄勇) */ 59baa60983SHyman Huang(黄勇) uint64_t quota; 60baa60983SHyman Huang(黄勇) } VcpuDirtyLimitState; 61baa60983SHyman Huang(黄勇) 62baa60983SHyman Huang(黄勇) struct { 63baa60983SHyman Huang(黄勇) VcpuDirtyLimitState *states; 64baa60983SHyman Huang(黄勇) /* Max cpus number configured by user */ 65baa60983SHyman Huang(黄勇) int max_cpus; 66baa60983SHyman Huang(黄勇) /* Number of vcpu under dirtylimit */ 67baa60983SHyman Huang(黄勇) int limited_nvcpu; 68baa60983SHyman Huang(黄勇) } *dirtylimit_state; 69baa60983SHyman Huang(黄勇) 70baa60983SHyman Huang(黄勇) /* protect dirtylimit_state */ 71baa60983SHyman Huang(黄勇) static QemuMutex dirtylimit_mutex; 72baa60983SHyman Huang(黄勇) 73baa60983SHyman Huang(黄勇) /* dirtylimit thread quit if dirtylimit_quit is true */ 74baa60983SHyman Huang(黄勇) static bool dirtylimit_quit; 75baa60983SHyman Huang(黄勇) 76cc2b33eaSHyman Huang(黄勇) static void vcpu_dirty_rate_stat_collect(void) 77cc2b33eaSHyman Huang(黄勇) { 78cc2b33eaSHyman Huang(黄勇) VcpuStat stat; 79cc2b33eaSHyman Huang(黄勇) int i = 0; 80cc2b33eaSHyman Huang(黄勇) 81cc2b33eaSHyman Huang(黄勇) /* calculate vcpu dirtyrate */ 82cc2b33eaSHyman Huang(黄勇) vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS, 83cc2b33eaSHyman Huang(黄勇) &stat, 84cc2b33eaSHyman Huang(黄勇) GLOBAL_DIRTY_LIMIT, 85cc2b33eaSHyman Huang(黄勇) false); 86cc2b33eaSHyman Huang(黄勇) 87cc2b33eaSHyman Huang(黄勇) for (i = 0; i < stat.nvcpu; i++) { 88cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.rates[i].id = i; 89cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = 90cc2b33eaSHyman Huang(黄勇) stat.rates[i].dirty_rate; 91cc2b33eaSHyman Huang(黄勇) } 92cc2b33eaSHyman Huang(黄勇) 93cc2b33eaSHyman Huang(黄勇) free(stat.rates); 94cc2b33eaSHyman Huang(黄勇) } 95cc2b33eaSHyman Huang(黄勇) 96cc2b33eaSHyman Huang(黄勇) static void *vcpu_dirty_rate_stat_thread(void *opaque) 97cc2b33eaSHyman Huang(黄勇) { 98cc2b33eaSHyman Huang(黄勇) rcu_register_thread(); 99cc2b33eaSHyman Huang(黄勇) 100cc2b33eaSHyman Huang(黄勇) /* start log sync */ 101cc2b33eaSHyman Huang(黄勇) global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); 102cc2b33eaSHyman Huang(黄勇) 103cc2b33eaSHyman Huang(黄勇) while (qatomic_read(&vcpu_dirty_rate_stat->running)) { 104cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat_collect(); 105baa60983SHyman Huang(黄勇) if (dirtylimit_in_service()) { 106baa60983SHyman Huang(黄勇) dirtylimit_process(); 107baa60983SHyman Huang(黄勇) } 108cc2b33eaSHyman Huang(黄勇) } 109cc2b33eaSHyman Huang(黄勇) 110cc2b33eaSHyman Huang(黄勇) /* stop log sync */ 111cc2b33eaSHyman Huang(黄勇) global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); 112cc2b33eaSHyman Huang(黄勇) 113cc2b33eaSHyman Huang(黄勇) rcu_unregister_thread(); 114cc2b33eaSHyman Huang(黄勇) return NULL; 115cc2b33eaSHyman Huang(黄勇) } 116cc2b33eaSHyman Huang(黄勇) 117cc2b33eaSHyman Huang(黄勇) int64_t vcpu_dirty_rate_get(int cpu_index) 118cc2b33eaSHyman Huang(黄勇) { 119cc2b33eaSHyman Huang(黄勇) DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; 120cc2b33eaSHyman Huang(黄勇) return qatomic_read_i64(&rates[cpu_index].dirty_rate); 121cc2b33eaSHyman Huang(黄勇) } 122cc2b33eaSHyman Huang(黄勇) 123cc2b33eaSHyman Huang(黄勇) void vcpu_dirty_rate_stat_start(void) 124cc2b33eaSHyman Huang(黄勇) { 125cc2b33eaSHyman Huang(黄勇) if (qatomic_read(&vcpu_dirty_rate_stat->running)) { 126cc2b33eaSHyman Huang(黄勇) return; 127cc2b33eaSHyman Huang(黄勇) } 128cc2b33eaSHyman Huang(黄勇) 129cc2b33eaSHyman Huang(黄勇) qatomic_set(&vcpu_dirty_rate_stat->running, 1); 130cc2b33eaSHyman Huang(黄勇) qemu_thread_create(&vcpu_dirty_rate_stat->thread, 131cc2b33eaSHyman Huang(黄勇) "dirtyrate-stat", 132cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat_thread, 133cc2b33eaSHyman Huang(黄勇) NULL, 134cc2b33eaSHyman Huang(黄勇) QEMU_THREAD_JOINABLE); 135cc2b33eaSHyman Huang(黄勇) } 136cc2b33eaSHyman Huang(黄勇) 137cc2b33eaSHyman Huang(黄勇) void vcpu_dirty_rate_stat_stop(void) 138cc2b33eaSHyman Huang(黄勇) { 139cc2b33eaSHyman Huang(黄勇) qatomic_set(&vcpu_dirty_rate_stat->running, 0); 140baa60983SHyman Huang(黄勇) dirtylimit_state_unlock(); 141cc2b33eaSHyman Huang(黄勇) qemu_mutex_unlock_iothread(); 142cc2b33eaSHyman Huang(黄勇) qemu_thread_join(&vcpu_dirty_rate_stat->thread); 143cc2b33eaSHyman Huang(黄勇) qemu_mutex_lock_iothread(); 144baa60983SHyman Huang(黄勇) dirtylimit_state_lock(); 145cc2b33eaSHyman Huang(黄勇) } 146cc2b33eaSHyman Huang(黄勇) 147cc2b33eaSHyman Huang(黄勇) void vcpu_dirty_rate_stat_initialize(void) 148cc2b33eaSHyman Huang(黄勇) { 149cc2b33eaSHyman Huang(黄勇) MachineState *ms = MACHINE(qdev_get_machine()); 150cc2b33eaSHyman Huang(黄勇) int max_cpus = ms->smp.max_cpus; 151cc2b33eaSHyman Huang(黄勇) 152cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat = 153cc2b33eaSHyman Huang(黄勇) g_malloc0(sizeof(*vcpu_dirty_rate_stat)); 154cc2b33eaSHyman Huang(黄勇) 155cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; 156cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.rates = 157c5e8d518SMarkus Armbruster g_new0(DirtyRateVcpu, max_cpus); 158cc2b33eaSHyman Huang(黄勇) 159cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->running = false; 160cc2b33eaSHyman Huang(黄勇) } 161cc2b33eaSHyman Huang(黄勇) 162cc2b33eaSHyman Huang(黄勇) void vcpu_dirty_rate_stat_finalize(void) 163cc2b33eaSHyman Huang(黄勇) { 164cc2b33eaSHyman Huang(黄勇) free(vcpu_dirty_rate_stat->stat.rates); 165cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.rates = NULL; 166cc2b33eaSHyman Huang(黄勇) 167cc2b33eaSHyman Huang(黄勇) free(vcpu_dirty_rate_stat); 168cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat = NULL; 169cc2b33eaSHyman Huang(黄勇) } 170baa60983SHyman Huang(黄勇) 171baa60983SHyman Huang(黄勇) void dirtylimit_state_lock(void) 172baa60983SHyman Huang(黄勇) { 173baa60983SHyman Huang(黄勇) qemu_mutex_lock(&dirtylimit_mutex); 174baa60983SHyman Huang(黄勇) } 175baa60983SHyman Huang(黄勇) 176baa60983SHyman Huang(黄勇) void dirtylimit_state_unlock(void) 177baa60983SHyman Huang(黄勇) { 178baa60983SHyman Huang(黄勇) qemu_mutex_unlock(&dirtylimit_mutex); 179baa60983SHyman Huang(黄勇) } 180baa60983SHyman Huang(黄勇) 181baa60983SHyman Huang(黄勇) static void 182baa60983SHyman Huang(黄勇) __attribute__((__constructor__)) dirtylimit_mutex_init(void) 183baa60983SHyman Huang(黄勇) { 184baa60983SHyman Huang(黄勇) qemu_mutex_init(&dirtylimit_mutex); 185baa60983SHyman Huang(黄勇) } 186baa60983SHyman Huang(黄勇) 187baa60983SHyman Huang(黄勇) static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) 188baa60983SHyman Huang(黄勇) { 189baa60983SHyman Huang(黄勇) return &dirtylimit_state->states[cpu_index]; 190baa60983SHyman Huang(黄勇) } 191baa60983SHyman Huang(黄勇) 192baa60983SHyman Huang(黄勇) void dirtylimit_state_initialize(void) 193baa60983SHyman Huang(黄勇) { 194baa60983SHyman Huang(黄勇) MachineState *ms = MACHINE(qdev_get_machine()); 195baa60983SHyman Huang(黄勇) int max_cpus = ms->smp.max_cpus; 196baa60983SHyman Huang(黄勇) int i; 197baa60983SHyman Huang(黄勇) 198baa60983SHyman Huang(黄勇) dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); 199baa60983SHyman Huang(黄勇) 200baa60983SHyman Huang(黄勇) dirtylimit_state->states = 201c5e8d518SMarkus Armbruster g_new0(VcpuDirtyLimitState, max_cpus); 202baa60983SHyman Huang(黄勇) 203baa60983SHyman Huang(黄勇) for (i = 0; i < max_cpus; i++) { 204baa60983SHyman Huang(黄勇) dirtylimit_state->states[i].cpu_index = i; 205baa60983SHyman Huang(黄勇) } 206baa60983SHyman Huang(黄勇) 207baa60983SHyman Huang(黄勇) dirtylimit_state->max_cpus = max_cpus; 208baa60983SHyman Huang(黄勇) trace_dirtylimit_state_initialize(max_cpus); 209baa60983SHyman Huang(黄勇) } 210baa60983SHyman Huang(黄勇) 211baa60983SHyman Huang(黄勇) void dirtylimit_state_finalize(void) 212baa60983SHyman Huang(黄勇) { 213baa60983SHyman Huang(黄勇) free(dirtylimit_state->states); 214baa60983SHyman Huang(黄勇) dirtylimit_state->states = NULL; 215baa60983SHyman Huang(黄勇) 216baa60983SHyman Huang(黄勇) free(dirtylimit_state); 217baa60983SHyman Huang(黄勇) dirtylimit_state = NULL; 218baa60983SHyman Huang(黄勇) 219baa60983SHyman Huang(黄勇) trace_dirtylimit_state_finalize(); 220baa60983SHyman Huang(黄勇) } 221baa60983SHyman Huang(黄勇) 222baa60983SHyman Huang(黄勇) bool dirtylimit_in_service(void) 223baa60983SHyman Huang(黄勇) { 224baa60983SHyman Huang(黄勇) return !!dirtylimit_state; 225baa60983SHyman Huang(黄勇) } 226baa60983SHyman Huang(黄勇) 227baa60983SHyman Huang(黄勇) bool dirtylimit_vcpu_index_valid(int cpu_index) 228baa60983SHyman Huang(黄勇) { 229baa60983SHyman Huang(黄勇) MachineState *ms = MACHINE(qdev_get_machine()); 230baa60983SHyman Huang(黄勇) 231baa60983SHyman Huang(黄勇) return !(cpu_index < 0 || 232baa60983SHyman Huang(黄勇) cpu_index >= ms->smp.max_cpus); 233baa60983SHyman Huang(黄勇) } 234baa60983SHyman Huang(黄勇) 2356a6447feSRichard Henderson static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) 236baa60983SHyman Huang(黄勇) { 237baa60983SHyman Huang(黄勇) static uint64_t max_dirtyrate; 238*beeda9b7SJuan Quintela uint64_t dirty_ring_size_MiB; 2396a6447feSRichard Henderson 240*beeda9b7SJuan Quintela dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size()); 241baa60983SHyman Huang(黄勇) 242baa60983SHyman Huang(黄勇) if (max_dirtyrate < dirtyrate) { 243baa60983SHyman Huang(黄勇) max_dirtyrate = dirtyrate; 244baa60983SHyman Huang(黄勇) } 245baa60983SHyman Huang(黄勇) 246*beeda9b7SJuan Quintela return dirty_ring_size_MiB * 1000000 / max_dirtyrate; 247baa60983SHyman Huang(黄勇) } 248baa60983SHyman Huang(黄勇) 249baa60983SHyman Huang(黄勇) static inline bool dirtylimit_done(uint64_t quota, 250baa60983SHyman Huang(黄勇) uint64_t current) 251baa60983SHyman Huang(黄勇) { 252baa60983SHyman Huang(黄勇) uint64_t min, max; 253baa60983SHyman Huang(黄勇) 254baa60983SHyman Huang(黄勇) min = MIN(quota, current); 255baa60983SHyman Huang(黄勇) max = MAX(quota, current); 256baa60983SHyman Huang(黄勇) 257baa60983SHyman Huang(黄勇) return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; 258baa60983SHyman Huang(黄勇) } 259baa60983SHyman Huang(黄勇) 260baa60983SHyman Huang(黄勇) static inline bool 261baa60983SHyman Huang(黄勇) dirtylimit_need_linear_adjustment(uint64_t quota, 262baa60983SHyman Huang(黄勇) uint64_t current) 263baa60983SHyman Huang(黄勇) { 264baa60983SHyman Huang(黄勇) uint64_t min, max; 265baa60983SHyman Huang(黄勇) 266baa60983SHyman Huang(黄勇) min = MIN(quota, current); 267baa60983SHyman Huang(黄勇) max = MAX(quota, current); 268baa60983SHyman Huang(黄勇) 269baa60983SHyman Huang(黄勇) return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; 270baa60983SHyman Huang(黄勇) } 271baa60983SHyman Huang(黄勇) 272baa60983SHyman Huang(黄勇) static void dirtylimit_set_throttle(CPUState *cpu, 273baa60983SHyman Huang(黄勇) uint64_t quota, 274baa60983SHyman Huang(黄勇) uint64_t current) 275baa60983SHyman Huang(黄勇) { 276baa60983SHyman Huang(黄勇) int64_t ring_full_time_us = 0; 277baa60983SHyman Huang(黄勇) uint64_t sleep_pct = 0; 278baa60983SHyman Huang(黄勇) uint64_t throttle_us = 0; 279baa60983SHyman Huang(黄勇) 280baa60983SHyman Huang(黄勇) if (current == 0) { 281baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full = 0; 282baa60983SHyman Huang(黄勇) return; 283baa60983SHyman Huang(黄勇) } 284baa60983SHyman Huang(黄勇) 285baa60983SHyman Huang(黄勇) ring_full_time_us = dirtylimit_dirty_ring_full_time(current); 286baa60983SHyman Huang(黄勇) 287baa60983SHyman Huang(黄勇) if (dirtylimit_need_linear_adjustment(quota, current)) { 288baa60983SHyman Huang(黄勇) if (quota < current) { 289baa60983SHyman Huang(黄勇) sleep_pct = (current - quota) * 100 / current; 290baa60983SHyman Huang(黄勇) throttle_us = 291baa60983SHyman Huang(黄勇) ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 292baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full += throttle_us; 293baa60983SHyman Huang(黄勇) } else { 294baa60983SHyman Huang(黄勇) sleep_pct = (quota - current) * 100 / quota; 295baa60983SHyman Huang(黄勇) throttle_us = 296baa60983SHyman Huang(黄勇) ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 297baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full -= throttle_us; 298baa60983SHyman Huang(黄勇) } 299baa60983SHyman Huang(黄勇) 300baa60983SHyman Huang(黄勇) trace_dirtylimit_throttle_pct(cpu->cpu_index, 301baa60983SHyman Huang(黄勇) sleep_pct, 302baa60983SHyman Huang(黄勇) throttle_us); 303baa60983SHyman Huang(黄勇) } else { 304baa60983SHyman Huang(黄勇) if (quota < current) { 305baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full += ring_full_time_us / 10; 306baa60983SHyman Huang(黄勇) } else { 307baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full -= ring_full_time_us / 10; 308baa60983SHyman Huang(黄勇) } 309baa60983SHyman Huang(黄勇) } 310baa60983SHyman Huang(黄勇) 311baa60983SHyman Huang(黄勇) /* 312baa60983SHyman Huang(黄勇) * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), 313baa60983SHyman Huang(黄勇) * current dirty page rate may never reach the quota, we should stop 314baa60983SHyman Huang(黄勇) * increasing sleep time? 315baa60983SHyman Huang(黄勇) */ 316baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, 317baa60983SHyman Huang(黄勇) ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); 318baa60983SHyman Huang(黄勇) 319baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); 320baa60983SHyman Huang(黄勇) } 321baa60983SHyman Huang(黄勇) 322baa60983SHyman Huang(黄勇) static void dirtylimit_adjust_throttle(CPUState *cpu) 323baa60983SHyman Huang(黄勇) { 324baa60983SHyman Huang(黄勇) uint64_t quota = 0; 325baa60983SHyman Huang(黄勇) uint64_t current = 0; 326baa60983SHyman Huang(黄勇) int cpu_index = cpu->cpu_index; 327baa60983SHyman Huang(黄勇) 328baa60983SHyman Huang(黄勇) quota = dirtylimit_vcpu_get_state(cpu_index)->quota; 329baa60983SHyman Huang(黄勇) current = vcpu_dirty_rate_get(cpu_index); 330baa60983SHyman Huang(黄勇) 331baa60983SHyman Huang(黄勇) if (!dirtylimit_done(quota, current)) { 332baa60983SHyman Huang(黄勇) dirtylimit_set_throttle(cpu, quota, current); 333baa60983SHyman Huang(黄勇) } 334baa60983SHyman Huang(黄勇) 335baa60983SHyman Huang(黄勇) return; 336baa60983SHyman Huang(黄勇) } 337baa60983SHyman Huang(黄勇) 338baa60983SHyman Huang(黄勇) void dirtylimit_process(void) 339baa60983SHyman Huang(黄勇) { 340baa60983SHyman Huang(黄勇) CPUState *cpu; 341baa60983SHyman Huang(黄勇) 342baa60983SHyman Huang(黄勇) if (!qatomic_read(&dirtylimit_quit)) { 343baa60983SHyman Huang(黄勇) dirtylimit_state_lock(); 344baa60983SHyman Huang(黄勇) 345baa60983SHyman Huang(黄勇) if (!dirtylimit_in_service()) { 346baa60983SHyman Huang(黄勇) dirtylimit_state_unlock(); 347baa60983SHyman Huang(黄勇) return; 348baa60983SHyman Huang(黄勇) } 349baa60983SHyman Huang(黄勇) 350baa60983SHyman Huang(黄勇) CPU_FOREACH(cpu) { 351baa60983SHyman Huang(黄勇) if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { 352baa60983SHyman Huang(黄勇) continue; 353baa60983SHyman Huang(黄勇) } 354baa60983SHyman Huang(黄勇) dirtylimit_adjust_throttle(cpu); 355baa60983SHyman Huang(黄勇) } 356baa60983SHyman Huang(黄勇) dirtylimit_state_unlock(); 357baa60983SHyman Huang(黄勇) } 358baa60983SHyman Huang(黄勇) } 359baa60983SHyman Huang(黄勇) 360baa60983SHyman Huang(黄勇) void dirtylimit_change(bool start) 361baa60983SHyman Huang(黄勇) { 362baa60983SHyman Huang(黄勇) if (start) { 363baa60983SHyman Huang(黄勇) qatomic_set(&dirtylimit_quit, 0); 364baa60983SHyman Huang(黄勇) } else { 365baa60983SHyman Huang(黄勇) qatomic_set(&dirtylimit_quit, 1); 366baa60983SHyman Huang(黄勇) } 367baa60983SHyman Huang(黄勇) } 368baa60983SHyman Huang(黄勇) 369baa60983SHyman Huang(黄勇) void dirtylimit_set_vcpu(int cpu_index, 370baa60983SHyman Huang(黄勇) uint64_t quota, 371baa60983SHyman Huang(黄勇) bool enable) 372baa60983SHyman Huang(黄勇) { 373baa60983SHyman Huang(黄勇) trace_dirtylimit_set_vcpu(cpu_index, quota); 374baa60983SHyman Huang(黄勇) 375baa60983SHyman Huang(黄勇) if (enable) { 376baa60983SHyman Huang(黄勇) dirtylimit_state->states[cpu_index].quota = quota; 377baa60983SHyman Huang(黄勇) if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { 378baa60983SHyman Huang(黄勇) dirtylimit_state->limited_nvcpu++; 379baa60983SHyman Huang(黄勇) } 380baa60983SHyman Huang(黄勇) } else { 381baa60983SHyman Huang(黄勇) dirtylimit_state->states[cpu_index].quota = 0; 382baa60983SHyman Huang(黄勇) if (dirtylimit_state->states[cpu_index].enabled) { 383baa60983SHyman Huang(黄勇) dirtylimit_state->limited_nvcpu--; 384baa60983SHyman Huang(黄勇) } 385baa60983SHyman Huang(黄勇) } 386baa60983SHyman Huang(黄勇) 387baa60983SHyman Huang(黄勇) dirtylimit_state->states[cpu_index].enabled = enable; 388baa60983SHyman Huang(黄勇) } 389baa60983SHyman Huang(黄勇) 390baa60983SHyman Huang(黄勇) void dirtylimit_set_all(uint64_t quota, 391baa60983SHyman Huang(黄勇) bool enable) 392baa60983SHyman Huang(黄勇) { 393baa60983SHyman Huang(黄勇) MachineState *ms = MACHINE(qdev_get_machine()); 394baa60983SHyman Huang(黄勇) int max_cpus = ms->smp.max_cpus; 395baa60983SHyman Huang(黄勇) int i; 396baa60983SHyman Huang(黄勇) 397baa60983SHyman Huang(黄勇) for (i = 0; i < max_cpus; i++) { 398baa60983SHyman Huang(黄勇) dirtylimit_set_vcpu(i, quota, enable); 399baa60983SHyman Huang(黄勇) } 400baa60983SHyman Huang(黄勇) } 401baa60983SHyman Huang(黄勇) 402baa60983SHyman Huang(黄勇) void dirtylimit_vcpu_execute(CPUState *cpu) 403baa60983SHyman Huang(黄勇) { 404baa60983SHyman Huang(黄勇) if (dirtylimit_in_service() && 405baa60983SHyman Huang(黄勇) dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && 406baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full) { 407baa60983SHyman Huang(黄勇) trace_dirtylimit_vcpu_execute(cpu->cpu_index, 408baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full); 409baa60983SHyman Huang(黄勇) usleep(cpu->throttle_us_per_full); 410baa60983SHyman Huang(黄勇) } 411baa60983SHyman Huang(黄勇) } 412f3b2e38cSHyman Huang(黄勇) 413f3b2e38cSHyman Huang(黄勇) static void dirtylimit_init(void) 414f3b2e38cSHyman Huang(黄勇) { 415f3b2e38cSHyman Huang(黄勇) dirtylimit_state_initialize(); 416f3b2e38cSHyman Huang(黄勇) dirtylimit_change(true); 417f3b2e38cSHyman Huang(黄勇) vcpu_dirty_rate_stat_initialize(); 418f3b2e38cSHyman Huang(黄勇) vcpu_dirty_rate_stat_start(); 419f3b2e38cSHyman Huang(黄勇) } 420f3b2e38cSHyman Huang(黄勇) 421f3b2e38cSHyman Huang(黄勇) static void dirtylimit_cleanup(void) 422f3b2e38cSHyman Huang(黄勇) { 423f3b2e38cSHyman Huang(黄勇) vcpu_dirty_rate_stat_stop(); 424f3b2e38cSHyman Huang(黄勇) vcpu_dirty_rate_stat_finalize(); 425f3b2e38cSHyman Huang(黄勇) dirtylimit_change(false); 426f3b2e38cSHyman Huang(黄勇) dirtylimit_state_finalize(); 427f3b2e38cSHyman Huang(黄勇) } 428f3b2e38cSHyman Huang(黄勇) 429f3b2e38cSHyman Huang(黄勇) void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, 430f3b2e38cSHyman Huang(黄勇) int64_t cpu_index, 431f3b2e38cSHyman Huang(黄勇) Error **errp) 432f3b2e38cSHyman Huang(黄勇) { 433f3b2e38cSHyman Huang(黄勇) if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 434f3b2e38cSHyman Huang(黄勇) return; 435f3b2e38cSHyman Huang(黄勇) } 436f3b2e38cSHyman Huang(黄勇) 437f3b2e38cSHyman Huang(黄勇) if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 438f3b2e38cSHyman Huang(黄勇) error_setg(errp, "incorrect cpu index specified"); 439f3b2e38cSHyman Huang(黄勇) return; 440f3b2e38cSHyman Huang(黄勇) } 441f3b2e38cSHyman Huang(黄勇) 442f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 443f3b2e38cSHyman Huang(黄勇) return; 444f3b2e38cSHyman Huang(黄勇) } 445f3b2e38cSHyman Huang(黄勇) 446f3b2e38cSHyman Huang(黄勇) dirtylimit_state_lock(); 447f3b2e38cSHyman Huang(黄勇) 448f3b2e38cSHyman Huang(黄勇) if (has_cpu_index) { 449f3b2e38cSHyman Huang(黄勇) dirtylimit_set_vcpu(cpu_index, 0, false); 450f3b2e38cSHyman Huang(黄勇) } else { 451f3b2e38cSHyman Huang(黄勇) dirtylimit_set_all(0, false); 452f3b2e38cSHyman Huang(黄勇) } 453f3b2e38cSHyman Huang(黄勇) 454f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_state->limited_nvcpu) { 455f3b2e38cSHyman Huang(黄勇) dirtylimit_cleanup(); 456f3b2e38cSHyman Huang(黄勇) } 457f3b2e38cSHyman Huang(黄勇) 458f3b2e38cSHyman Huang(黄勇) dirtylimit_state_unlock(); 459f3b2e38cSHyman Huang(黄勇) } 460f3b2e38cSHyman Huang(黄勇) 461f3b2e38cSHyman Huang(黄勇) void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 462f3b2e38cSHyman Huang(黄勇) { 463f3b2e38cSHyman Huang(黄勇) int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 464f3b2e38cSHyman Huang(黄勇) Error *err = NULL; 465f3b2e38cSHyman Huang(黄勇) 466f3b2e38cSHyman Huang(黄勇) qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); 467f3b2e38cSHyman Huang(黄勇) if (err) { 468f3b2e38cSHyman Huang(黄勇) hmp_handle_error(mon, err); 469f3b2e38cSHyman Huang(黄勇) return; 470f3b2e38cSHyman Huang(黄勇) } 471f3b2e38cSHyman Huang(黄勇) 472f3b2e38cSHyman Huang(黄勇) monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " 473f3b2e38cSHyman Huang(黄勇) "dirty limit for virtual CPU]\n"); 474f3b2e38cSHyman Huang(黄勇) } 475f3b2e38cSHyman Huang(黄勇) 476f3b2e38cSHyman Huang(黄勇) void qmp_set_vcpu_dirty_limit(bool has_cpu_index, 477f3b2e38cSHyman Huang(黄勇) int64_t cpu_index, 478f3b2e38cSHyman Huang(黄勇) uint64_t dirty_rate, 479f3b2e38cSHyman Huang(黄勇) Error **errp) 480f3b2e38cSHyman Huang(黄勇) { 481f3b2e38cSHyman Huang(黄勇) if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 482f3b2e38cSHyman Huang(黄勇) error_setg(errp, "dirty page limit feature requires KVM with" 483f3b2e38cSHyman Huang(黄勇) " accelerator property 'dirty-ring-size' set'"); 484f3b2e38cSHyman Huang(黄勇) return; 485f3b2e38cSHyman Huang(黄勇) } 486f3b2e38cSHyman Huang(黄勇) 487f3b2e38cSHyman Huang(黄勇) if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 488f3b2e38cSHyman Huang(黄勇) error_setg(errp, "incorrect cpu index specified"); 489f3b2e38cSHyman Huang(黄勇) return; 490f3b2e38cSHyman Huang(黄勇) } 491f3b2e38cSHyman Huang(黄勇) 492f3b2e38cSHyman Huang(黄勇) if (!dirty_rate) { 493f3b2e38cSHyman Huang(黄勇) qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); 494f3b2e38cSHyman Huang(黄勇) return; 495f3b2e38cSHyman Huang(黄勇) } 496f3b2e38cSHyman Huang(黄勇) 497f3b2e38cSHyman Huang(黄勇) dirtylimit_state_lock(); 498f3b2e38cSHyman Huang(黄勇) 499f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 500f3b2e38cSHyman Huang(黄勇) dirtylimit_init(); 501f3b2e38cSHyman Huang(黄勇) } 502f3b2e38cSHyman Huang(黄勇) 503f3b2e38cSHyman Huang(黄勇) if (has_cpu_index) { 504f3b2e38cSHyman Huang(黄勇) dirtylimit_set_vcpu(cpu_index, dirty_rate, true); 505f3b2e38cSHyman Huang(黄勇) } else { 506f3b2e38cSHyman Huang(黄勇) dirtylimit_set_all(dirty_rate, true); 507f3b2e38cSHyman Huang(黄勇) } 508f3b2e38cSHyman Huang(黄勇) 509f3b2e38cSHyman Huang(黄勇) dirtylimit_state_unlock(); 510f3b2e38cSHyman Huang(黄勇) } 511f3b2e38cSHyman Huang(黄勇) 512f3b2e38cSHyman Huang(黄勇) void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 513f3b2e38cSHyman Huang(黄勇) { 514f3b2e38cSHyman Huang(黄勇) int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); 515f3b2e38cSHyman Huang(黄勇) int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 516f3b2e38cSHyman Huang(黄勇) Error *err = NULL; 517f3b2e38cSHyman Huang(黄勇) 518f3b2e38cSHyman Huang(黄勇) qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); 519f3b2e38cSHyman Huang(黄勇) if (err) { 520f3b2e38cSHyman Huang(黄勇) hmp_handle_error(mon, err); 521f3b2e38cSHyman Huang(黄勇) return; 522f3b2e38cSHyman Huang(黄勇) } 523f3b2e38cSHyman Huang(黄勇) 524f3b2e38cSHyman Huang(黄勇) monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " 525f3b2e38cSHyman Huang(黄勇) "dirty limit for virtual CPU]\n"); 526f3b2e38cSHyman Huang(黄勇) } 527f3b2e38cSHyman Huang(黄勇) 528f3b2e38cSHyman Huang(黄勇) static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) 529f3b2e38cSHyman Huang(黄勇) { 530f3b2e38cSHyman Huang(黄勇) DirtyLimitInfo *info = NULL; 531f3b2e38cSHyman Huang(黄勇) 532f3b2e38cSHyman Huang(黄勇) info = g_malloc0(sizeof(*info)); 533f3b2e38cSHyman Huang(黄勇) info->cpu_index = cpu_index; 534f3b2e38cSHyman Huang(黄勇) info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; 535f3b2e38cSHyman Huang(黄勇) info->current_rate = vcpu_dirty_rate_get(cpu_index); 536f3b2e38cSHyman Huang(黄勇) 537f3b2e38cSHyman Huang(黄勇) return info; 538f3b2e38cSHyman Huang(黄勇) } 539f3b2e38cSHyman Huang(黄勇) 540f3b2e38cSHyman Huang(黄勇) static struct DirtyLimitInfoList *dirtylimit_query_all(void) 541f3b2e38cSHyman Huang(黄勇) { 542f3b2e38cSHyman Huang(黄勇) int i, index; 543f3b2e38cSHyman Huang(黄勇) DirtyLimitInfo *info = NULL; 544f3b2e38cSHyman Huang(黄勇) DirtyLimitInfoList *head = NULL, **tail = &head; 545f3b2e38cSHyman Huang(黄勇) 546f3b2e38cSHyman Huang(黄勇) dirtylimit_state_lock(); 547f3b2e38cSHyman Huang(黄勇) 548f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 549f3b2e38cSHyman Huang(黄勇) dirtylimit_state_unlock(); 550f3b2e38cSHyman Huang(黄勇) return NULL; 551f3b2e38cSHyman Huang(黄勇) } 552f3b2e38cSHyman Huang(黄勇) 553f3b2e38cSHyman Huang(黄勇) for (i = 0; i < dirtylimit_state->max_cpus; i++) { 554f3b2e38cSHyman Huang(黄勇) index = dirtylimit_state->states[i].cpu_index; 555f3b2e38cSHyman Huang(黄勇) if (dirtylimit_vcpu_get_state(index)->enabled) { 556f3b2e38cSHyman Huang(黄勇) info = dirtylimit_query_vcpu(index); 557f3b2e38cSHyman Huang(黄勇) QAPI_LIST_APPEND(tail, info); 558f3b2e38cSHyman Huang(黄勇) } 559f3b2e38cSHyman Huang(黄勇) } 560f3b2e38cSHyman Huang(黄勇) 561f3b2e38cSHyman Huang(黄勇) dirtylimit_state_unlock(); 562f3b2e38cSHyman Huang(黄勇) 563f3b2e38cSHyman Huang(黄勇) return head; 564f3b2e38cSHyman Huang(黄勇) } 565f3b2e38cSHyman Huang(黄勇) 566f3b2e38cSHyman Huang(黄勇) struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) 567f3b2e38cSHyman Huang(黄勇) { 568f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 569f3b2e38cSHyman Huang(黄勇) return NULL; 570f3b2e38cSHyman Huang(黄勇) } 571f3b2e38cSHyman Huang(黄勇) 572f3b2e38cSHyman Huang(黄勇) return dirtylimit_query_all(); 573f3b2e38cSHyman Huang(黄勇) } 574f3b2e38cSHyman Huang(黄勇) 575f3b2e38cSHyman Huang(黄勇) void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 576f3b2e38cSHyman Huang(黄勇) { 577f3b2e38cSHyman Huang(黄勇) DirtyLimitInfoList *limit, *head, *info = NULL; 578f3b2e38cSHyman Huang(黄勇) Error *err = NULL; 579f3b2e38cSHyman Huang(黄勇) 580f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 581f3b2e38cSHyman Huang(黄勇) monitor_printf(mon, "Dirty page limit not enabled!\n"); 582f3b2e38cSHyman Huang(黄勇) return; 583f3b2e38cSHyman Huang(黄勇) } 584f3b2e38cSHyman Huang(黄勇) 585f3b2e38cSHyman Huang(黄勇) info = qmp_query_vcpu_dirty_limit(&err); 586f3b2e38cSHyman Huang(黄勇) if (err) { 587f3b2e38cSHyman Huang(黄勇) hmp_handle_error(mon, err); 588f3b2e38cSHyman Huang(黄勇) return; 589f3b2e38cSHyman Huang(黄勇) } 590f3b2e38cSHyman Huang(黄勇) 591f3b2e38cSHyman Huang(黄勇) head = info; 592f3b2e38cSHyman Huang(黄勇) for (limit = head; limit != NULL; limit = limit->next) { 593f3b2e38cSHyman Huang(黄勇) monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," 594f3b2e38cSHyman Huang(黄勇) " current rate %"PRIi64 " (MB/s)\n", 595f3b2e38cSHyman Huang(黄勇) limit->value->cpu_index, 596f3b2e38cSHyman Huang(黄勇) limit->value->limit_rate, 597f3b2e38cSHyman Huang(黄勇) limit->value->current_rate); 598f3b2e38cSHyman Huang(黄勇) } 599f3b2e38cSHyman Huang(黄勇) 600f3b2e38cSHyman Huang(黄勇) g_free(info); 601f3b2e38cSHyman Huang(黄勇) } 602