1cc2b33eaSHyman Huang(黄勇) /* 2cc2b33eaSHyman Huang(黄勇) * Dirty page rate limit implementation code 3cc2b33eaSHyman Huang(黄勇) * 4cc2b33eaSHyman Huang(黄勇) * Copyright (c) 2022 CHINA TELECOM CO.,LTD. 5cc2b33eaSHyman Huang(黄勇) * 6cc2b33eaSHyman Huang(黄勇) * Authors: 7cc2b33eaSHyman Huang(黄勇) * Hyman Huang(黄勇) <huangy81@chinatelecom.cn> 8cc2b33eaSHyman Huang(黄勇) * 9cc2b33eaSHyman Huang(黄勇) * This work is licensed under the terms of the GNU GPL, version 2 or later. 10cc2b33eaSHyman Huang(黄勇) * See the COPYING file in the top-level directory. 11cc2b33eaSHyman Huang(黄勇) */ 12cc2b33eaSHyman Huang(黄勇) 13cc2b33eaSHyman Huang(黄勇) #include "qemu/osdep.h" 14cc2b33eaSHyman Huang(黄勇) #include "qemu/main-loop.h" 15cc2b33eaSHyman Huang(黄勇) #include "qapi/qapi-commands-migration.h" 16f3b2e38cSHyman Huang(黄勇) #include "qapi/qmp/qdict.h" 17f3b2e38cSHyman Huang(黄勇) #include "qapi/error.h" 18cc2b33eaSHyman Huang(黄勇) #include "sysemu/dirtyrate.h" 19cc2b33eaSHyman Huang(黄勇) #include "sysemu/dirtylimit.h" 20f3b2e38cSHyman Huang(黄勇) #include "monitor/hmp.h" 21f3b2e38cSHyman Huang(黄勇) #include "monitor/monitor.h" 22cc2b33eaSHyman Huang(黄勇) #include "exec/memory.h" 2330ee29fdSThomas Huth #include "exec/target_page.h" 24cc2b33eaSHyman Huang(黄勇) #include "hw/boards.h" 25baa60983SHyman Huang(黄勇) #include "sysemu/kvm.h" 26baa60983SHyman Huang(黄勇) #include "trace.h" 27baa60983SHyman Huang(黄勇) 28baa60983SHyman Huang(黄勇) /* 29baa60983SHyman Huang(黄勇) * Dirtylimit stop working if dirty page rate error 30baa60983SHyman Huang(黄勇) * value less than DIRTYLIMIT_TOLERANCE_RANGE 31baa60983SHyman Huang(黄勇) */ 32baa60983SHyman Huang(黄勇) #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 33baa60983SHyman Huang(黄勇) /* 34baa60983SHyman Huang(黄勇) * Plus or minus vcpu sleep time linearly if dirty 35baa60983SHyman Huang(黄勇) * page rate error value percentage over 36baa60983SHyman Huang(黄勇) * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. 37baa60983SHyman Huang(黄勇) * Otherwise, plus or minus a fixed vcpu sleep time. 38baa60983SHyman Huang(黄勇) */ 39baa60983SHyman Huang(黄勇) #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 40baa60983SHyman Huang(黄勇) /* 41baa60983SHyman Huang(黄勇) * Max vcpu sleep time percentage during a cycle 42baa60983SHyman Huang(黄勇) * composed of dirty ring full and sleep time. 43baa60983SHyman Huang(黄勇) */ 44baa60983SHyman Huang(黄勇) #define DIRTYLIMIT_THROTTLE_PCT_MAX 99 45cc2b33eaSHyman Huang(黄勇) 46cc2b33eaSHyman Huang(黄勇) struct { 47cc2b33eaSHyman Huang(黄勇) VcpuStat stat; 48cc2b33eaSHyman Huang(黄勇) bool running; 49cc2b33eaSHyman Huang(黄勇) QemuThread thread; 50cc2b33eaSHyman Huang(黄勇) } *vcpu_dirty_rate_stat; 51cc2b33eaSHyman Huang(黄勇) 52baa60983SHyman Huang(黄勇) typedef struct VcpuDirtyLimitState { 53baa60983SHyman Huang(黄勇) int cpu_index; 54baa60983SHyman Huang(黄勇) bool enabled; 55baa60983SHyman Huang(黄勇) /* 56baa60983SHyman Huang(黄勇) * Quota dirty page rate, unit is MB/s 57baa60983SHyman Huang(黄勇) * zero if not enabled. 58baa60983SHyman Huang(黄勇) */ 59baa60983SHyman Huang(黄勇) uint64_t quota; 60baa60983SHyman Huang(黄勇) } VcpuDirtyLimitState; 61baa60983SHyman Huang(黄勇) 62baa60983SHyman Huang(黄勇) struct { 63baa60983SHyman Huang(黄勇) VcpuDirtyLimitState *states; 64baa60983SHyman Huang(黄勇) /* Max cpus number configured by user */ 65baa60983SHyman Huang(黄勇) int max_cpus; 66baa60983SHyman Huang(黄勇) /* Number of vcpu under dirtylimit */ 67baa60983SHyman Huang(黄勇) int limited_nvcpu; 68baa60983SHyman Huang(黄勇) } *dirtylimit_state; 69baa60983SHyman Huang(黄勇) 70baa60983SHyman Huang(黄勇) /* protect dirtylimit_state */ 71baa60983SHyman Huang(黄勇) static QemuMutex dirtylimit_mutex; 72baa60983SHyman Huang(黄勇) 73baa60983SHyman Huang(黄勇) /* dirtylimit thread quit if dirtylimit_quit is true */ 74baa60983SHyman Huang(黄勇) static bool dirtylimit_quit; 75baa60983SHyman Huang(黄勇) 76cc2b33eaSHyman Huang(黄勇) static void vcpu_dirty_rate_stat_collect(void) 77cc2b33eaSHyman Huang(黄勇) { 78cc2b33eaSHyman Huang(黄勇) VcpuStat stat; 79cc2b33eaSHyman Huang(黄勇) int i = 0; 80cc2b33eaSHyman Huang(黄勇) 81cc2b33eaSHyman Huang(黄勇) /* calculate vcpu dirtyrate */ 82cc2b33eaSHyman Huang(黄勇) vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS, 83cc2b33eaSHyman Huang(黄勇) &stat, 84cc2b33eaSHyman Huang(黄勇) GLOBAL_DIRTY_LIMIT, 85cc2b33eaSHyman Huang(黄勇) false); 86cc2b33eaSHyman Huang(黄勇) 87cc2b33eaSHyman Huang(黄勇) for (i = 0; i < stat.nvcpu; i++) { 88cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.rates[i].id = i; 89cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = 90cc2b33eaSHyman Huang(黄勇) stat.rates[i].dirty_rate; 91cc2b33eaSHyman Huang(黄勇) } 92cc2b33eaSHyman Huang(黄勇) 93cc2b33eaSHyman Huang(黄勇) free(stat.rates); 94cc2b33eaSHyman Huang(黄勇) } 95cc2b33eaSHyman Huang(黄勇) 96cc2b33eaSHyman Huang(黄勇) static void *vcpu_dirty_rate_stat_thread(void *opaque) 97cc2b33eaSHyman Huang(黄勇) { 98cc2b33eaSHyman Huang(黄勇) rcu_register_thread(); 99cc2b33eaSHyman Huang(黄勇) 100cc2b33eaSHyman Huang(黄勇) /* start log sync */ 101cc2b33eaSHyman Huang(黄勇) global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); 102cc2b33eaSHyman Huang(黄勇) 103cc2b33eaSHyman Huang(黄勇) while (qatomic_read(&vcpu_dirty_rate_stat->running)) { 104cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat_collect(); 105baa60983SHyman Huang(黄勇) if (dirtylimit_in_service()) { 106baa60983SHyman Huang(黄勇) dirtylimit_process(); 107baa60983SHyman Huang(黄勇) } 108cc2b33eaSHyman Huang(黄勇) } 109cc2b33eaSHyman Huang(黄勇) 110cc2b33eaSHyman Huang(黄勇) /* stop log sync */ 111cc2b33eaSHyman Huang(黄勇) global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); 112cc2b33eaSHyman Huang(黄勇) 113cc2b33eaSHyman Huang(黄勇) rcu_unregister_thread(); 114cc2b33eaSHyman Huang(黄勇) return NULL; 115cc2b33eaSHyman Huang(黄勇) } 116cc2b33eaSHyman Huang(黄勇) 117cc2b33eaSHyman Huang(黄勇) int64_t vcpu_dirty_rate_get(int cpu_index) 118cc2b33eaSHyman Huang(黄勇) { 119cc2b33eaSHyman Huang(黄勇) DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; 120cc2b33eaSHyman Huang(黄勇) return qatomic_read_i64(&rates[cpu_index].dirty_rate); 121cc2b33eaSHyman Huang(黄勇) } 122cc2b33eaSHyman Huang(黄勇) 123cc2b33eaSHyman Huang(黄勇) void vcpu_dirty_rate_stat_start(void) 124cc2b33eaSHyman Huang(黄勇) { 125cc2b33eaSHyman Huang(黄勇) if (qatomic_read(&vcpu_dirty_rate_stat->running)) { 126cc2b33eaSHyman Huang(黄勇) return; 127cc2b33eaSHyman Huang(黄勇) } 128cc2b33eaSHyman Huang(黄勇) 129cc2b33eaSHyman Huang(黄勇) qatomic_set(&vcpu_dirty_rate_stat->running, 1); 130cc2b33eaSHyman Huang(黄勇) qemu_thread_create(&vcpu_dirty_rate_stat->thread, 131cc2b33eaSHyman Huang(黄勇) "dirtyrate-stat", 132cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat_thread, 133cc2b33eaSHyman Huang(黄勇) NULL, 134cc2b33eaSHyman Huang(黄勇) QEMU_THREAD_JOINABLE); 135cc2b33eaSHyman Huang(黄勇) } 136cc2b33eaSHyman Huang(黄勇) 137cc2b33eaSHyman Huang(黄勇) void vcpu_dirty_rate_stat_stop(void) 138cc2b33eaSHyman Huang(黄勇) { 139cc2b33eaSHyman Huang(黄勇) qatomic_set(&vcpu_dirty_rate_stat->running, 0); 140baa60983SHyman Huang(黄勇) dirtylimit_state_unlock(); 141cc2b33eaSHyman Huang(黄勇) qemu_mutex_unlock_iothread(); 142cc2b33eaSHyman Huang(黄勇) qemu_thread_join(&vcpu_dirty_rate_stat->thread); 143cc2b33eaSHyman Huang(黄勇) qemu_mutex_lock_iothread(); 144baa60983SHyman Huang(黄勇) dirtylimit_state_lock(); 145cc2b33eaSHyman Huang(黄勇) } 146cc2b33eaSHyman Huang(黄勇) 147cc2b33eaSHyman Huang(黄勇) void vcpu_dirty_rate_stat_initialize(void) 148cc2b33eaSHyman Huang(黄勇) { 149cc2b33eaSHyman Huang(黄勇) MachineState *ms = MACHINE(qdev_get_machine()); 150cc2b33eaSHyman Huang(黄勇) int max_cpus = ms->smp.max_cpus; 151cc2b33eaSHyman Huang(黄勇) 152cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat = 153cc2b33eaSHyman Huang(黄勇) g_malloc0(sizeof(*vcpu_dirty_rate_stat)); 154cc2b33eaSHyman Huang(黄勇) 155cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; 156cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.rates = 157c5e8d518SMarkus Armbruster g_new0(DirtyRateVcpu, max_cpus); 158cc2b33eaSHyman Huang(黄勇) 159cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->running = false; 160cc2b33eaSHyman Huang(黄勇) } 161cc2b33eaSHyman Huang(黄勇) 162cc2b33eaSHyman Huang(黄勇) void vcpu_dirty_rate_stat_finalize(void) 163cc2b33eaSHyman Huang(黄勇) { 164cc2b33eaSHyman Huang(黄勇) free(vcpu_dirty_rate_stat->stat.rates); 165cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat->stat.rates = NULL; 166cc2b33eaSHyman Huang(黄勇) 167cc2b33eaSHyman Huang(黄勇) free(vcpu_dirty_rate_stat); 168cc2b33eaSHyman Huang(黄勇) vcpu_dirty_rate_stat = NULL; 169cc2b33eaSHyman Huang(黄勇) } 170baa60983SHyman Huang(黄勇) 171baa60983SHyman Huang(黄勇) void dirtylimit_state_lock(void) 172baa60983SHyman Huang(黄勇) { 173baa60983SHyman Huang(黄勇) qemu_mutex_lock(&dirtylimit_mutex); 174baa60983SHyman Huang(黄勇) } 175baa60983SHyman Huang(黄勇) 176baa60983SHyman Huang(黄勇) void dirtylimit_state_unlock(void) 177baa60983SHyman Huang(黄勇) { 178baa60983SHyman Huang(黄勇) qemu_mutex_unlock(&dirtylimit_mutex); 179baa60983SHyman Huang(黄勇) } 180baa60983SHyman Huang(黄勇) 181baa60983SHyman Huang(黄勇) static void 182baa60983SHyman Huang(黄勇) __attribute__((__constructor__)) dirtylimit_mutex_init(void) 183baa60983SHyman Huang(黄勇) { 184baa60983SHyman Huang(黄勇) qemu_mutex_init(&dirtylimit_mutex); 185baa60983SHyman Huang(黄勇) } 186baa60983SHyman Huang(黄勇) 187baa60983SHyman Huang(黄勇) static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) 188baa60983SHyman Huang(黄勇) { 189baa60983SHyman Huang(黄勇) return &dirtylimit_state->states[cpu_index]; 190baa60983SHyman Huang(黄勇) } 191baa60983SHyman Huang(黄勇) 192baa60983SHyman Huang(黄勇) void dirtylimit_state_initialize(void) 193baa60983SHyman Huang(黄勇) { 194baa60983SHyman Huang(黄勇) MachineState *ms = MACHINE(qdev_get_machine()); 195baa60983SHyman Huang(黄勇) int max_cpus = ms->smp.max_cpus; 196baa60983SHyman Huang(黄勇) int i; 197baa60983SHyman Huang(黄勇) 198baa60983SHyman Huang(黄勇) dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); 199baa60983SHyman Huang(黄勇) 200baa60983SHyman Huang(黄勇) dirtylimit_state->states = 201c5e8d518SMarkus Armbruster g_new0(VcpuDirtyLimitState, max_cpus); 202baa60983SHyman Huang(黄勇) 203baa60983SHyman Huang(黄勇) for (i = 0; i < max_cpus; i++) { 204baa60983SHyman Huang(黄勇) dirtylimit_state->states[i].cpu_index = i; 205baa60983SHyman Huang(黄勇) } 206baa60983SHyman Huang(黄勇) 207baa60983SHyman Huang(黄勇) dirtylimit_state->max_cpus = max_cpus; 208baa60983SHyman Huang(黄勇) trace_dirtylimit_state_initialize(max_cpus); 209baa60983SHyman Huang(黄勇) } 210baa60983SHyman Huang(黄勇) 211baa60983SHyman Huang(黄勇) void dirtylimit_state_finalize(void) 212baa60983SHyman Huang(黄勇) { 213baa60983SHyman Huang(黄勇) free(dirtylimit_state->states); 214baa60983SHyman Huang(黄勇) dirtylimit_state->states = NULL; 215baa60983SHyman Huang(黄勇) 216baa60983SHyman Huang(黄勇) free(dirtylimit_state); 217baa60983SHyman Huang(黄勇) dirtylimit_state = NULL; 218baa60983SHyman Huang(黄勇) 219baa60983SHyman Huang(黄勇) trace_dirtylimit_state_finalize(); 220baa60983SHyman Huang(黄勇) } 221baa60983SHyman Huang(黄勇) 222baa60983SHyman Huang(黄勇) bool dirtylimit_in_service(void) 223baa60983SHyman Huang(黄勇) { 224baa60983SHyman Huang(黄勇) return !!dirtylimit_state; 225baa60983SHyman Huang(黄勇) } 226baa60983SHyman Huang(黄勇) 227baa60983SHyman Huang(黄勇) bool dirtylimit_vcpu_index_valid(int cpu_index) 228baa60983SHyman Huang(黄勇) { 229baa60983SHyman Huang(黄勇) MachineState *ms = MACHINE(qdev_get_machine()); 230baa60983SHyman Huang(黄勇) 231baa60983SHyman Huang(黄勇) return !(cpu_index < 0 || 232baa60983SHyman Huang(黄勇) cpu_index >= ms->smp.max_cpus); 233baa60983SHyman Huang(黄勇) } 234baa60983SHyman Huang(黄勇) 235*6a6447feSRichard Henderson static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) 236baa60983SHyman Huang(黄勇) { 237baa60983SHyman Huang(黄勇) static uint64_t max_dirtyrate; 238*6a6447feSRichard Henderson unsigned target_page_bits = qemu_target_page_bits(); 239*6a6447feSRichard Henderson uint64_t dirty_ring_size_MB; 240*6a6447feSRichard Henderson 241*6a6447feSRichard Henderson /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */ 242*6a6447feSRichard Henderson assert(target_page_bits < 20); 243*6a6447feSRichard Henderson 244*6a6447feSRichard Henderson /* Convert ring size (pages) to MiB (2**20). */ 245*6a6447feSRichard Henderson dirty_ring_size_MB = kvm_dirty_ring_size() >> (20 - target_page_bits); 246baa60983SHyman Huang(黄勇) 247baa60983SHyman Huang(黄勇) if (max_dirtyrate < dirtyrate) { 248baa60983SHyman Huang(黄勇) max_dirtyrate = dirtyrate; 249baa60983SHyman Huang(黄勇) } 250baa60983SHyman Huang(黄勇) 251*6a6447feSRichard Henderson return dirty_ring_size_MB * 1000000 / max_dirtyrate; 252baa60983SHyman Huang(黄勇) } 253baa60983SHyman Huang(黄勇) 254baa60983SHyman Huang(黄勇) static inline bool dirtylimit_done(uint64_t quota, 255baa60983SHyman Huang(黄勇) uint64_t current) 256baa60983SHyman Huang(黄勇) { 257baa60983SHyman Huang(黄勇) uint64_t min, max; 258baa60983SHyman Huang(黄勇) 259baa60983SHyman Huang(黄勇) min = MIN(quota, current); 260baa60983SHyman Huang(黄勇) max = MAX(quota, current); 261baa60983SHyman Huang(黄勇) 262baa60983SHyman Huang(黄勇) return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; 263baa60983SHyman Huang(黄勇) } 264baa60983SHyman Huang(黄勇) 265baa60983SHyman Huang(黄勇) static inline bool 266baa60983SHyman Huang(黄勇) dirtylimit_need_linear_adjustment(uint64_t quota, 267baa60983SHyman Huang(黄勇) uint64_t current) 268baa60983SHyman Huang(黄勇) { 269baa60983SHyman Huang(黄勇) uint64_t min, max; 270baa60983SHyman Huang(黄勇) 271baa60983SHyman Huang(黄勇) min = MIN(quota, current); 272baa60983SHyman Huang(黄勇) max = MAX(quota, current); 273baa60983SHyman Huang(黄勇) 274baa60983SHyman Huang(黄勇) return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; 275baa60983SHyman Huang(黄勇) } 276baa60983SHyman Huang(黄勇) 277baa60983SHyman Huang(黄勇) static void dirtylimit_set_throttle(CPUState *cpu, 278baa60983SHyman Huang(黄勇) uint64_t quota, 279baa60983SHyman Huang(黄勇) uint64_t current) 280baa60983SHyman Huang(黄勇) { 281baa60983SHyman Huang(黄勇) int64_t ring_full_time_us = 0; 282baa60983SHyman Huang(黄勇) uint64_t sleep_pct = 0; 283baa60983SHyman Huang(黄勇) uint64_t throttle_us = 0; 284baa60983SHyman Huang(黄勇) 285baa60983SHyman Huang(黄勇) if (current == 0) { 286baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full = 0; 287baa60983SHyman Huang(黄勇) return; 288baa60983SHyman Huang(黄勇) } 289baa60983SHyman Huang(黄勇) 290baa60983SHyman Huang(黄勇) ring_full_time_us = dirtylimit_dirty_ring_full_time(current); 291baa60983SHyman Huang(黄勇) 292baa60983SHyman Huang(黄勇) if (dirtylimit_need_linear_adjustment(quota, current)) { 293baa60983SHyman Huang(黄勇) if (quota < current) { 294baa60983SHyman Huang(黄勇) sleep_pct = (current - quota) * 100 / current; 295baa60983SHyman Huang(黄勇) throttle_us = 296baa60983SHyman Huang(黄勇) ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 297baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full += throttle_us; 298baa60983SHyman Huang(黄勇) } else { 299baa60983SHyman Huang(黄勇) sleep_pct = (quota - current) * 100 / quota; 300baa60983SHyman Huang(黄勇) throttle_us = 301baa60983SHyman Huang(黄勇) ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 302baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full -= throttle_us; 303baa60983SHyman Huang(黄勇) } 304baa60983SHyman Huang(黄勇) 305baa60983SHyman Huang(黄勇) trace_dirtylimit_throttle_pct(cpu->cpu_index, 306baa60983SHyman Huang(黄勇) sleep_pct, 307baa60983SHyman Huang(黄勇) throttle_us); 308baa60983SHyman Huang(黄勇) } else { 309baa60983SHyman Huang(黄勇) if (quota < current) { 310baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full += ring_full_time_us / 10; 311baa60983SHyman Huang(黄勇) } else { 312baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full -= ring_full_time_us / 10; 313baa60983SHyman Huang(黄勇) } 314baa60983SHyman Huang(黄勇) } 315baa60983SHyman Huang(黄勇) 316baa60983SHyman Huang(黄勇) /* 317baa60983SHyman Huang(黄勇) * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), 318baa60983SHyman Huang(黄勇) * current dirty page rate may never reach the quota, we should stop 319baa60983SHyman Huang(黄勇) * increasing sleep time? 320baa60983SHyman Huang(黄勇) */ 321baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, 322baa60983SHyman Huang(黄勇) ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); 323baa60983SHyman Huang(黄勇) 324baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); 325baa60983SHyman Huang(黄勇) } 326baa60983SHyman Huang(黄勇) 327baa60983SHyman Huang(黄勇) static void dirtylimit_adjust_throttle(CPUState *cpu) 328baa60983SHyman Huang(黄勇) { 329baa60983SHyman Huang(黄勇) uint64_t quota = 0; 330baa60983SHyman Huang(黄勇) uint64_t current = 0; 331baa60983SHyman Huang(黄勇) int cpu_index = cpu->cpu_index; 332baa60983SHyman Huang(黄勇) 333baa60983SHyman Huang(黄勇) quota = dirtylimit_vcpu_get_state(cpu_index)->quota; 334baa60983SHyman Huang(黄勇) current = vcpu_dirty_rate_get(cpu_index); 335baa60983SHyman Huang(黄勇) 336baa60983SHyman Huang(黄勇) if (!dirtylimit_done(quota, current)) { 337baa60983SHyman Huang(黄勇) dirtylimit_set_throttle(cpu, quota, current); 338baa60983SHyman Huang(黄勇) } 339baa60983SHyman Huang(黄勇) 340baa60983SHyman Huang(黄勇) return; 341baa60983SHyman Huang(黄勇) } 342baa60983SHyman Huang(黄勇) 343baa60983SHyman Huang(黄勇) void dirtylimit_process(void) 344baa60983SHyman Huang(黄勇) { 345baa60983SHyman Huang(黄勇) CPUState *cpu; 346baa60983SHyman Huang(黄勇) 347baa60983SHyman Huang(黄勇) if (!qatomic_read(&dirtylimit_quit)) { 348baa60983SHyman Huang(黄勇) dirtylimit_state_lock(); 349baa60983SHyman Huang(黄勇) 350baa60983SHyman Huang(黄勇) if (!dirtylimit_in_service()) { 351baa60983SHyman Huang(黄勇) dirtylimit_state_unlock(); 352baa60983SHyman Huang(黄勇) return; 353baa60983SHyman Huang(黄勇) } 354baa60983SHyman Huang(黄勇) 355baa60983SHyman Huang(黄勇) CPU_FOREACH(cpu) { 356baa60983SHyman Huang(黄勇) if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { 357baa60983SHyman Huang(黄勇) continue; 358baa60983SHyman Huang(黄勇) } 359baa60983SHyman Huang(黄勇) dirtylimit_adjust_throttle(cpu); 360baa60983SHyman Huang(黄勇) } 361baa60983SHyman Huang(黄勇) dirtylimit_state_unlock(); 362baa60983SHyman Huang(黄勇) } 363baa60983SHyman Huang(黄勇) } 364baa60983SHyman Huang(黄勇) 365baa60983SHyman Huang(黄勇) void dirtylimit_change(bool start) 366baa60983SHyman Huang(黄勇) { 367baa60983SHyman Huang(黄勇) if (start) { 368baa60983SHyman Huang(黄勇) qatomic_set(&dirtylimit_quit, 0); 369baa60983SHyman Huang(黄勇) } else { 370baa60983SHyman Huang(黄勇) qatomic_set(&dirtylimit_quit, 1); 371baa60983SHyman Huang(黄勇) } 372baa60983SHyman Huang(黄勇) } 373baa60983SHyman Huang(黄勇) 374baa60983SHyman Huang(黄勇) void dirtylimit_set_vcpu(int cpu_index, 375baa60983SHyman Huang(黄勇) uint64_t quota, 376baa60983SHyman Huang(黄勇) bool enable) 377baa60983SHyman Huang(黄勇) { 378baa60983SHyman Huang(黄勇) trace_dirtylimit_set_vcpu(cpu_index, quota); 379baa60983SHyman Huang(黄勇) 380baa60983SHyman Huang(黄勇) if (enable) { 381baa60983SHyman Huang(黄勇) dirtylimit_state->states[cpu_index].quota = quota; 382baa60983SHyman Huang(黄勇) if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { 383baa60983SHyman Huang(黄勇) dirtylimit_state->limited_nvcpu++; 384baa60983SHyman Huang(黄勇) } 385baa60983SHyman Huang(黄勇) } else { 386baa60983SHyman Huang(黄勇) dirtylimit_state->states[cpu_index].quota = 0; 387baa60983SHyman Huang(黄勇) if (dirtylimit_state->states[cpu_index].enabled) { 388baa60983SHyman Huang(黄勇) dirtylimit_state->limited_nvcpu--; 389baa60983SHyman Huang(黄勇) } 390baa60983SHyman Huang(黄勇) } 391baa60983SHyman Huang(黄勇) 392baa60983SHyman Huang(黄勇) dirtylimit_state->states[cpu_index].enabled = enable; 393baa60983SHyman Huang(黄勇) } 394baa60983SHyman Huang(黄勇) 395baa60983SHyman Huang(黄勇) void dirtylimit_set_all(uint64_t quota, 396baa60983SHyman Huang(黄勇) bool enable) 397baa60983SHyman Huang(黄勇) { 398baa60983SHyman Huang(黄勇) MachineState *ms = MACHINE(qdev_get_machine()); 399baa60983SHyman Huang(黄勇) int max_cpus = ms->smp.max_cpus; 400baa60983SHyman Huang(黄勇) int i; 401baa60983SHyman Huang(黄勇) 402baa60983SHyman Huang(黄勇) for (i = 0; i < max_cpus; i++) { 403baa60983SHyman Huang(黄勇) dirtylimit_set_vcpu(i, quota, enable); 404baa60983SHyman Huang(黄勇) } 405baa60983SHyman Huang(黄勇) } 406baa60983SHyman Huang(黄勇) 407baa60983SHyman Huang(黄勇) void dirtylimit_vcpu_execute(CPUState *cpu) 408baa60983SHyman Huang(黄勇) { 409baa60983SHyman Huang(黄勇) if (dirtylimit_in_service() && 410baa60983SHyman Huang(黄勇) dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && 411baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full) { 412baa60983SHyman Huang(黄勇) trace_dirtylimit_vcpu_execute(cpu->cpu_index, 413baa60983SHyman Huang(黄勇) cpu->throttle_us_per_full); 414baa60983SHyman Huang(黄勇) usleep(cpu->throttle_us_per_full); 415baa60983SHyman Huang(黄勇) } 416baa60983SHyman Huang(黄勇) } 417f3b2e38cSHyman Huang(黄勇) 418f3b2e38cSHyman Huang(黄勇) static void dirtylimit_init(void) 419f3b2e38cSHyman Huang(黄勇) { 420f3b2e38cSHyman Huang(黄勇) dirtylimit_state_initialize(); 421f3b2e38cSHyman Huang(黄勇) dirtylimit_change(true); 422f3b2e38cSHyman Huang(黄勇) vcpu_dirty_rate_stat_initialize(); 423f3b2e38cSHyman Huang(黄勇) vcpu_dirty_rate_stat_start(); 424f3b2e38cSHyman Huang(黄勇) } 425f3b2e38cSHyman Huang(黄勇) 426f3b2e38cSHyman Huang(黄勇) static void dirtylimit_cleanup(void) 427f3b2e38cSHyman Huang(黄勇) { 428f3b2e38cSHyman Huang(黄勇) vcpu_dirty_rate_stat_stop(); 429f3b2e38cSHyman Huang(黄勇) vcpu_dirty_rate_stat_finalize(); 430f3b2e38cSHyman Huang(黄勇) dirtylimit_change(false); 431f3b2e38cSHyman Huang(黄勇) dirtylimit_state_finalize(); 432f3b2e38cSHyman Huang(黄勇) } 433f3b2e38cSHyman Huang(黄勇) 434f3b2e38cSHyman Huang(黄勇) void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, 435f3b2e38cSHyman Huang(黄勇) int64_t cpu_index, 436f3b2e38cSHyman Huang(黄勇) Error **errp) 437f3b2e38cSHyman Huang(黄勇) { 438f3b2e38cSHyman Huang(黄勇) if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 439f3b2e38cSHyman Huang(黄勇) return; 440f3b2e38cSHyman Huang(黄勇) } 441f3b2e38cSHyman Huang(黄勇) 442f3b2e38cSHyman Huang(黄勇) if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 443f3b2e38cSHyman Huang(黄勇) error_setg(errp, "incorrect cpu index specified"); 444f3b2e38cSHyman Huang(黄勇) return; 445f3b2e38cSHyman Huang(黄勇) } 446f3b2e38cSHyman Huang(黄勇) 447f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 448f3b2e38cSHyman Huang(黄勇) return; 449f3b2e38cSHyman Huang(黄勇) } 450f3b2e38cSHyman Huang(黄勇) 451f3b2e38cSHyman Huang(黄勇) dirtylimit_state_lock(); 452f3b2e38cSHyman Huang(黄勇) 453f3b2e38cSHyman Huang(黄勇) if (has_cpu_index) { 454f3b2e38cSHyman Huang(黄勇) dirtylimit_set_vcpu(cpu_index, 0, false); 455f3b2e38cSHyman Huang(黄勇) } else { 456f3b2e38cSHyman Huang(黄勇) dirtylimit_set_all(0, false); 457f3b2e38cSHyman Huang(黄勇) } 458f3b2e38cSHyman Huang(黄勇) 459f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_state->limited_nvcpu) { 460f3b2e38cSHyman Huang(黄勇) dirtylimit_cleanup(); 461f3b2e38cSHyman Huang(黄勇) } 462f3b2e38cSHyman Huang(黄勇) 463f3b2e38cSHyman Huang(黄勇) dirtylimit_state_unlock(); 464f3b2e38cSHyman Huang(黄勇) } 465f3b2e38cSHyman Huang(黄勇) 466f3b2e38cSHyman Huang(黄勇) void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 467f3b2e38cSHyman Huang(黄勇) { 468f3b2e38cSHyman Huang(黄勇) int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 469f3b2e38cSHyman Huang(黄勇) Error *err = NULL; 470f3b2e38cSHyman Huang(黄勇) 471f3b2e38cSHyman Huang(黄勇) qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); 472f3b2e38cSHyman Huang(黄勇) if (err) { 473f3b2e38cSHyman Huang(黄勇) hmp_handle_error(mon, err); 474f3b2e38cSHyman Huang(黄勇) return; 475f3b2e38cSHyman Huang(黄勇) } 476f3b2e38cSHyman Huang(黄勇) 477f3b2e38cSHyman Huang(黄勇) monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " 478f3b2e38cSHyman Huang(黄勇) "dirty limit for virtual CPU]\n"); 479f3b2e38cSHyman Huang(黄勇) } 480f3b2e38cSHyman Huang(黄勇) 481f3b2e38cSHyman Huang(黄勇) void qmp_set_vcpu_dirty_limit(bool has_cpu_index, 482f3b2e38cSHyman Huang(黄勇) int64_t cpu_index, 483f3b2e38cSHyman Huang(黄勇) uint64_t dirty_rate, 484f3b2e38cSHyman Huang(黄勇) Error **errp) 485f3b2e38cSHyman Huang(黄勇) { 486f3b2e38cSHyman Huang(黄勇) if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 487f3b2e38cSHyman Huang(黄勇) error_setg(errp, "dirty page limit feature requires KVM with" 488f3b2e38cSHyman Huang(黄勇) " accelerator property 'dirty-ring-size' set'"); 489f3b2e38cSHyman Huang(黄勇) return; 490f3b2e38cSHyman Huang(黄勇) } 491f3b2e38cSHyman Huang(黄勇) 492f3b2e38cSHyman Huang(黄勇) if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 493f3b2e38cSHyman Huang(黄勇) error_setg(errp, "incorrect cpu index specified"); 494f3b2e38cSHyman Huang(黄勇) return; 495f3b2e38cSHyman Huang(黄勇) } 496f3b2e38cSHyman Huang(黄勇) 497f3b2e38cSHyman Huang(黄勇) if (!dirty_rate) { 498f3b2e38cSHyman Huang(黄勇) qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); 499f3b2e38cSHyman Huang(黄勇) return; 500f3b2e38cSHyman Huang(黄勇) } 501f3b2e38cSHyman Huang(黄勇) 502f3b2e38cSHyman Huang(黄勇) dirtylimit_state_lock(); 503f3b2e38cSHyman Huang(黄勇) 504f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 505f3b2e38cSHyman Huang(黄勇) dirtylimit_init(); 506f3b2e38cSHyman Huang(黄勇) } 507f3b2e38cSHyman Huang(黄勇) 508f3b2e38cSHyman Huang(黄勇) if (has_cpu_index) { 509f3b2e38cSHyman Huang(黄勇) dirtylimit_set_vcpu(cpu_index, dirty_rate, true); 510f3b2e38cSHyman Huang(黄勇) } else { 511f3b2e38cSHyman Huang(黄勇) dirtylimit_set_all(dirty_rate, true); 512f3b2e38cSHyman Huang(黄勇) } 513f3b2e38cSHyman Huang(黄勇) 514f3b2e38cSHyman Huang(黄勇) dirtylimit_state_unlock(); 515f3b2e38cSHyman Huang(黄勇) } 516f3b2e38cSHyman Huang(黄勇) 517f3b2e38cSHyman Huang(黄勇) void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 518f3b2e38cSHyman Huang(黄勇) { 519f3b2e38cSHyman Huang(黄勇) int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); 520f3b2e38cSHyman Huang(黄勇) int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 521f3b2e38cSHyman Huang(黄勇) Error *err = NULL; 522f3b2e38cSHyman Huang(黄勇) 523f3b2e38cSHyman Huang(黄勇) qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); 524f3b2e38cSHyman Huang(黄勇) if (err) { 525f3b2e38cSHyman Huang(黄勇) hmp_handle_error(mon, err); 526f3b2e38cSHyman Huang(黄勇) return; 527f3b2e38cSHyman Huang(黄勇) } 528f3b2e38cSHyman Huang(黄勇) 529f3b2e38cSHyman Huang(黄勇) monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " 530f3b2e38cSHyman Huang(黄勇) "dirty limit for virtual CPU]\n"); 531f3b2e38cSHyman Huang(黄勇) } 532f3b2e38cSHyman Huang(黄勇) 533f3b2e38cSHyman Huang(黄勇) static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) 534f3b2e38cSHyman Huang(黄勇) { 535f3b2e38cSHyman Huang(黄勇) DirtyLimitInfo *info = NULL; 536f3b2e38cSHyman Huang(黄勇) 537f3b2e38cSHyman Huang(黄勇) info = g_malloc0(sizeof(*info)); 538f3b2e38cSHyman Huang(黄勇) info->cpu_index = cpu_index; 539f3b2e38cSHyman Huang(黄勇) info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; 540f3b2e38cSHyman Huang(黄勇) info->current_rate = vcpu_dirty_rate_get(cpu_index); 541f3b2e38cSHyman Huang(黄勇) 542f3b2e38cSHyman Huang(黄勇) return info; 543f3b2e38cSHyman Huang(黄勇) } 544f3b2e38cSHyman Huang(黄勇) 545f3b2e38cSHyman Huang(黄勇) static struct DirtyLimitInfoList *dirtylimit_query_all(void) 546f3b2e38cSHyman Huang(黄勇) { 547f3b2e38cSHyman Huang(黄勇) int i, index; 548f3b2e38cSHyman Huang(黄勇) DirtyLimitInfo *info = NULL; 549f3b2e38cSHyman Huang(黄勇) DirtyLimitInfoList *head = NULL, **tail = &head; 550f3b2e38cSHyman Huang(黄勇) 551f3b2e38cSHyman Huang(黄勇) dirtylimit_state_lock(); 552f3b2e38cSHyman Huang(黄勇) 553f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 554f3b2e38cSHyman Huang(黄勇) dirtylimit_state_unlock(); 555f3b2e38cSHyman Huang(黄勇) return NULL; 556f3b2e38cSHyman Huang(黄勇) } 557f3b2e38cSHyman Huang(黄勇) 558f3b2e38cSHyman Huang(黄勇) for (i = 0; i < dirtylimit_state->max_cpus; i++) { 559f3b2e38cSHyman Huang(黄勇) index = dirtylimit_state->states[i].cpu_index; 560f3b2e38cSHyman Huang(黄勇) if (dirtylimit_vcpu_get_state(index)->enabled) { 561f3b2e38cSHyman Huang(黄勇) info = dirtylimit_query_vcpu(index); 562f3b2e38cSHyman Huang(黄勇) QAPI_LIST_APPEND(tail, info); 563f3b2e38cSHyman Huang(黄勇) } 564f3b2e38cSHyman Huang(黄勇) } 565f3b2e38cSHyman Huang(黄勇) 566f3b2e38cSHyman Huang(黄勇) dirtylimit_state_unlock(); 567f3b2e38cSHyman Huang(黄勇) 568f3b2e38cSHyman Huang(黄勇) return head; 569f3b2e38cSHyman Huang(黄勇) } 570f3b2e38cSHyman Huang(黄勇) 571f3b2e38cSHyman Huang(黄勇) struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) 572f3b2e38cSHyman Huang(黄勇) { 573f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 574f3b2e38cSHyman Huang(黄勇) return NULL; 575f3b2e38cSHyman Huang(黄勇) } 576f3b2e38cSHyman Huang(黄勇) 577f3b2e38cSHyman Huang(黄勇) return dirtylimit_query_all(); 578f3b2e38cSHyman Huang(黄勇) } 579f3b2e38cSHyman Huang(黄勇) 580f3b2e38cSHyman Huang(黄勇) void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 581f3b2e38cSHyman Huang(黄勇) { 582f3b2e38cSHyman Huang(黄勇) DirtyLimitInfoList *limit, *head, *info = NULL; 583f3b2e38cSHyman Huang(黄勇) Error *err = NULL; 584f3b2e38cSHyman Huang(黄勇) 585f3b2e38cSHyman Huang(黄勇) if (!dirtylimit_in_service()) { 586f3b2e38cSHyman Huang(黄勇) monitor_printf(mon, "Dirty page limit not enabled!\n"); 587f3b2e38cSHyman Huang(黄勇) return; 588f3b2e38cSHyman Huang(黄勇) } 589f3b2e38cSHyman Huang(黄勇) 590f3b2e38cSHyman Huang(黄勇) info = qmp_query_vcpu_dirty_limit(&err); 591f3b2e38cSHyman Huang(黄勇) if (err) { 592f3b2e38cSHyman Huang(黄勇) hmp_handle_error(mon, err); 593f3b2e38cSHyman Huang(黄勇) return; 594f3b2e38cSHyman Huang(黄勇) } 595f3b2e38cSHyman Huang(黄勇) 596f3b2e38cSHyman Huang(黄勇) head = info; 597f3b2e38cSHyman Huang(黄勇) for (limit = head; limit != NULL; limit = limit->next) { 598f3b2e38cSHyman Huang(黄勇) monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," 599f3b2e38cSHyman Huang(黄勇) " current rate %"PRIi64 " (MB/s)\n", 600f3b2e38cSHyman Huang(黄勇) limit->value->cpu_index, 601f3b2e38cSHyman Huang(黄勇) limit->value->limit_rate, 602f3b2e38cSHyman Huang(黄勇) limit->value->current_rate); 603f3b2e38cSHyman Huang(黄勇) } 604f3b2e38cSHyman Huang(黄勇) 605f3b2e38cSHyman Huang(黄勇) g_free(info); 606f3b2e38cSHyman Huang(黄勇) } 607