1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu-common.h" 27 #include "qemu/cutils.h" 28 #include "migration/vmstate.h" 29 #include "qapi/error.h" 30 #include "qemu/error-report.h" 31 #include "exec/exec-all.h" 32 #include "sysemu/cpus.h" 33 #include "sysemu/qtest.h" 34 #include "qemu/main-loop.h" 35 #include "qemu/option.h" 36 #include "qemu/seqlock.h" 37 #include "sysemu/replay.h" 38 #include "sysemu/runstate.h" 39 #include "hw/core/cpu.h" 40 #include "sysemu/cpu-timers.h" 41 #include "sysemu/cpu-throttle.h" 42 #include "timers-state.h" 43 44 /* 45 * ICOUNT: Instruction Counter 46 * 47 * this module is split off from cpu-timers because the icount part 48 * is TCG-specific, and does not need to be built for other accels. 49 */ 50 static bool icount_sleep = true; 51 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ 52 #define MAX_ICOUNT_SHIFT 10 53 54 /* 55 * 0 = Do not count executed instructions. 56 * 1 = Fixed conversion of insn to ns via "shift" option 57 * 2 = Runtime adaptive algorithm to compute shift 58 */ 59 int use_icount; 60 61 static void icount_enable_precise(void) 62 { 63 use_icount = 1; 64 } 65 66 static void icount_enable_adaptive(void) 67 { 68 use_icount = 2; 69 } 70 71 /* 72 * The current number of executed instructions is based on what we 73 * originally budgeted minus the current state of the decrementing 74 * icount counters in extra/u16.low. 75 */ 76 static int64_t icount_get_executed(CPUState *cpu) 77 { 78 return (cpu->icount_budget - 79 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra)); 80 } 81 82 /* 83 * Update the global shared timer_state.qemu_icount to take into 84 * account executed instructions. This is done by the TCG vCPU 85 * thread so the main-loop can see time has moved forward. 86 */ 87 static void icount_update_locked(CPUState *cpu) 88 { 89 int64_t executed = icount_get_executed(cpu); 90 cpu->icount_budget -= executed; 91 92 qatomic_set_i64(&timers_state.qemu_icount, 93 timers_state.qemu_icount + executed); 94 } 95 96 /* 97 * Update the global shared timer_state.qemu_icount to take into 98 * account executed instructions. This is done by the TCG vCPU 99 * thread so the main-loop can see time has moved forward. 100 */ 101 void icount_update(CPUState *cpu) 102 { 103 seqlock_write_lock(&timers_state.vm_clock_seqlock, 104 &timers_state.vm_clock_lock); 105 icount_update_locked(cpu); 106 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 107 &timers_state.vm_clock_lock); 108 } 109 110 static int64_t icount_get_raw_locked(void) 111 { 112 CPUState *cpu = current_cpu; 113 114 if (cpu && cpu->running) { 115 if (!cpu->can_do_io) { 116 error_report("Bad icount read"); 117 exit(1); 118 } 119 /* Take into account what has run */ 120 icount_update_locked(cpu); 121 } 122 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ 123 return qatomic_read_i64(&timers_state.qemu_icount); 124 } 125 126 static int64_t icount_get_locked(void) 127 { 128 int64_t icount = icount_get_raw_locked(); 129 return qatomic_read_i64(&timers_state.qemu_icount_bias) + 130 icount_to_ns(icount); 131 } 132 133 int64_t icount_get_raw(void) 134 { 135 int64_t icount; 136 unsigned start; 137 138 do { 139 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 140 icount = icount_get_raw_locked(); 141 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 142 143 return icount; 144 } 145 146 /* Return the virtual CPU time, based on the instruction counter. */ 147 int64_t icount_get(void) 148 { 149 int64_t icount; 150 unsigned start; 151 152 do { 153 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 154 icount = icount_get_locked(); 155 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 156 157 return icount; 158 } 159 160 int64_t icount_to_ns(int64_t icount) 161 { 162 return icount << qatomic_read(&timers_state.icount_time_shift); 163 } 164 165 /* 166 * Correlation between real and virtual time is always going to be 167 * fairly approximate, so ignore small variation. 168 * When the guest is idle real and virtual time will be aligned in 169 * the IO wait loop. 170 */ 171 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) 172 173 static void icount_adjust(void) 174 { 175 int64_t cur_time; 176 int64_t cur_icount; 177 int64_t delta; 178 179 /* Protected by TimersState mutex. */ 180 static int64_t last_delta; 181 182 /* If the VM is not running, then do nothing. */ 183 if (!runstate_is_running()) { 184 return; 185 } 186 187 seqlock_write_lock(&timers_state.vm_clock_seqlock, 188 &timers_state.vm_clock_lock); 189 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 190 cpu_get_clock_locked()); 191 cur_icount = icount_get_locked(); 192 193 delta = cur_icount - cur_time; 194 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ 195 if (delta > 0 196 && last_delta + ICOUNT_WOBBLE < delta * 2 197 && timers_state.icount_time_shift > 0) { 198 /* The guest is getting too far ahead. Slow time down. */ 199 qatomic_set(&timers_state.icount_time_shift, 200 timers_state.icount_time_shift - 1); 201 } 202 if (delta < 0 203 && last_delta - ICOUNT_WOBBLE > delta * 2 204 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { 205 /* The guest is getting too far behind. Speed time up. */ 206 qatomic_set(&timers_state.icount_time_shift, 207 timers_state.icount_time_shift + 1); 208 } 209 last_delta = delta; 210 qatomic_set_i64(&timers_state.qemu_icount_bias, 211 cur_icount - (timers_state.qemu_icount 212 << timers_state.icount_time_shift)); 213 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 214 &timers_state.vm_clock_lock); 215 } 216 217 static void icount_adjust_rt(void *opaque) 218 { 219 timer_mod(timers_state.icount_rt_timer, 220 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 221 icount_adjust(); 222 } 223 224 static void icount_adjust_vm(void *opaque) 225 { 226 timer_mod(timers_state.icount_vm_timer, 227 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 228 NANOSECONDS_PER_SECOND / 10); 229 icount_adjust(); 230 } 231 232 int64_t icount_round(int64_t count) 233 { 234 int shift = qatomic_read(&timers_state.icount_time_shift); 235 return (count + (1 << shift) - 1) >> shift; 236 } 237 238 static void icount_warp_rt(void) 239 { 240 unsigned seq; 241 int64_t warp_start; 242 243 /* 244 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start 245 * changes from -1 to another value, so the race here is okay. 246 */ 247 do { 248 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); 249 warp_start = timers_state.vm_clock_warp_start; 250 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); 251 252 if (warp_start == -1) { 253 return; 254 } 255 256 seqlock_write_lock(&timers_state.vm_clock_seqlock, 257 &timers_state.vm_clock_lock); 258 if (runstate_is_running()) { 259 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 260 cpu_get_clock_locked()); 261 int64_t warp_delta; 262 263 warp_delta = clock - timers_state.vm_clock_warp_start; 264 if (icount_enabled() == 2) { 265 /* 266 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too 267 * far ahead of real time. 268 */ 269 int64_t cur_icount = icount_get_locked(); 270 int64_t delta = clock - cur_icount; 271 warp_delta = MIN(warp_delta, delta); 272 } 273 qatomic_set_i64(&timers_state.qemu_icount_bias, 274 timers_state.qemu_icount_bias + warp_delta); 275 } 276 timers_state.vm_clock_warp_start = -1; 277 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 278 &timers_state.vm_clock_lock); 279 280 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { 281 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 282 } 283 } 284 285 static void icount_timer_cb(void *opaque) 286 { 287 /* 288 * No need for a checkpoint because the timer already synchronizes 289 * with CHECKPOINT_CLOCK_VIRTUAL_RT. 290 */ 291 icount_warp_rt(); 292 } 293 294 void icount_start_warp_timer(void) 295 { 296 int64_t clock; 297 int64_t deadline; 298 299 assert(icount_enabled()); 300 301 /* 302 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 303 * do not fire, so computing the deadline does not make sense. 304 */ 305 if (!runstate_is_running()) { 306 return; 307 } 308 309 if (replay_mode != REPLAY_MODE_PLAY) { 310 if (!all_cpu_threads_idle()) { 311 return; 312 } 313 314 if (qtest_enabled()) { 315 /* When testing, qtest commands advance icount. */ 316 return; 317 } 318 319 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); 320 } else { 321 /* warp clock deterministically in record/replay mode */ 322 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { 323 /* 324 * vCPU is sleeping and warp can't be started. 325 * It is probably a race condition: notification sent 326 * to vCPU was processed in advance and vCPU went to sleep. 327 * Therefore we have to wake it up for doing someting. 328 */ 329 if (replay_has_checkpoint()) { 330 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 331 } 332 return; 333 } 334 } 335 336 /* We want to use the earliest deadline from ALL vm_clocks */ 337 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); 338 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, 339 ~QEMU_TIMER_ATTR_EXTERNAL); 340 if (deadline < 0) { 341 static bool notified; 342 if (!icount_sleep && !notified) { 343 warn_report("icount sleep disabled and no active timers"); 344 notified = true; 345 } 346 return; 347 } 348 349 if (deadline > 0) { 350 /* 351 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to 352 * sleep. Otherwise, the CPU might be waiting for a future timer 353 * interrupt to wake it up, but the interrupt never comes because 354 * the vCPU isn't running any insns and thus doesn't advance the 355 * QEMU_CLOCK_VIRTUAL. 356 */ 357 if (!icount_sleep) { 358 /* 359 * We never let VCPUs sleep in no sleep icount mode. 360 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance 361 * to the next QEMU_CLOCK_VIRTUAL event and notify it. 362 * It is useful when we want a deterministic execution time, 363 * isolated from host latencies. 364 */ 365 seqlock_write_lock(&timers_state.vm_clock_seqlock, 366 &timers_state.vm_clock_lock); 367 qatomic_set_i64(&timers_state.qemu_icount_bias, 368 timers_state.qemu_icount_bias + deadline); 369 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 370 &timers_state.vm_clock_lock); 371 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 372 } else { 373 /* 374 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some 375 * "real" time, (related to the time left until the next event) has 376 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. 377 * This avoids that the warps are visible externally; for example, 378 * you will not be sending network packets continuously instead of 379 * every 100ms. 380 */ 381 seqlock_write_lock(&timers_state.vm_clock_seqlock, 382 &timers_state.vm_clock_lock); 383 if (timers_state.vm_clock_warp_start == -1 384 || timers_state.vm_clock_warp_start > clock) { 385 timers_state.vm_clock_warp_start = clock; 386 } 387 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 388 &timers_state.vm_clock_lock); 389 timer_mod_anticipate(timers_state.icount_warp_timer, 390 clock + deadline); 391 } 392 } else if (deadline == 0) { 393 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 394 } 395 } 396 397 void icount_account_warp_timer(void) 398 { 399 if (!icount_sleep) { 400 return; 401 } 402 403 /* 404 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 405 * do not fire, so computing the deadline does not make sense. 406 */ 407 if (!runstate_is_running()) { 408 return; 409 } 410 411 /* warp clock deterministically in record/replay mode */ 412 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { 413 return; 414 } 415 416 timer_del(timers_state.icount_warp_timer); 417 icount_warp_rt(); 418 } 419 420 void icount_configure(QemuOpts *opts, Error **errp) 421 { 422 const char *option = qemu_opt_get(opts, "shift"); 423 bool sleep = qemu_opt_get_bool(opts, "sleep", true); 424 bool align = qemu_opt_get_bool(opts, "align", false); 425 long time_shift = -1; 426 427 if (!option) { 428 if (qemu_opt_get(opts, "align") != NULL) { 429 error_setg(errp, "Please specify shift option when using align"); 430 } 431 return; 432 } 433 434 if (align && !sleep) { 435 error_setg(errp, "align=on and sleep=off are incompatible"); 436 return; 437 } 438 439 if (strcmp(option, "auto") != 0) { 440 if (qemu_strtol(option, NULL, 0, &time_shift) < 0 441 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { 442 error_setg(errp, "icount: Invalid shift value"); 443 return; 444 } 445 } else if (icount_align_option) { 446 error_setg(errp, "shift=auto and align=on are incompatible"); 447 return; 448 } else if (!icount_sleep) { 449 error_setg(errp, "shift=auto and sleep=off are incompatible"); 450 return; 451 } 452 453 icount_sleep = sleep; 454 if (icount_sleep) { 455 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, 456 icount_timer_cb, NULL); 457 } 458 459 icount_align_option = align; 460 461 if (time_shift >= 0) { 462 timers_state.icount_time_shift = time_shift; 463 icount_enable_precise(); 464 return; 465 } 466 467 icount_enable_adaptive(); 468 469 /* 470 * 125MIPS seems a reasonable initial guess at the guest speed. 471 * It will be corrected fairly quickly anyway. 472 */ 473 timers_state.icount_time_shift = 3; 474 475 /* 476 * Have both realtime and virtual time triggers for speed adjustment. 477 * The realtime trigger catches emulated time passing too slowly, 478 * the virtual time trigger catches emulated time passing too fast. 479 * Realtime triggers occur even when idle, so use them less frequently 480 * than VM triggers. 481 */ 482 timers_state.vm_clock_warp_start = -1; 483 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, 484 icount_adjust_rt, NULL); 485 timer_mod(timers_state.icount_rt_timer, 486 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 487 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 488 icount_adjust_vm, NULL); 489 timer_mod(timers_state.icount_vm_timer, 490 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 491 NANOSECONDS_PER_SECOND / 10); 492 } 493