1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/cutils.h" 27 #include "migration/vmstate.h" 28 #include "qapi/error.h" 29 #include "qemu/error-report.h" 30 #include "exec/exec-all.h" 31 #include "sysemu/cpus.h" 32 #include "sysemu/qtest.h" 33 #include "qemu/main-loop.h" 34 #include "qemu/option.h" 35 #include "qemu/seqlock.h" 36 #include "sysemu/replay.h" 37 #include "sysemu/runstate.h" 38 #include "hw/core/cpu.h" 39 #include "sysemu/cpu-timers.h" 40 #include "sysemu/cpu-throttle.h" 41 #include "timers-state.h" 42 43 /* 44 * ICOUNT: Instruction Counter 45 * 46 * this module is split off from cpu-timers because the icount part 47 * is TCG-specific, and does not need to be built for other accels. 48 */ 49 static bool icount_sleep = true; 50 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ 51 #define MAX_ICOUNT_SHIFT 10 52 53 /* 54 * 0 = Do not count executed instructions. 55 * 1 = Fixed conversion of insn to ns via "shift" option 56 * 2 = Runtime adaptive algorithm to compute shift 57 */ 58 int use_icount; 59 60 static void icount_enable_precise(void) 61 { 62 use_icount = 1; 63 } 64 65 static void icount_enable_adaptive(void) 66 { 67 use_icount = 2; 68 } 69 70 /* 71 * The current number of executed instructions is based on what we 72 * originally budgeted minus the current state of the decrementing 73 * icount counters in extra/u16.low. 74 */ 75 static int64_t icount_get_executed(CPUState *cpu) 76 { 77 return (cpu->icount_budget - 78 (cpu->neg.icount_decr.u16.low + cpu->icount_extra)); 79 } 80 81 /* 82 * Update the global shared timer_state.qemu_icount to take into 83 * account executed instructions. This is done by the TCG vCPU 84 * thread so the main-loop can see time has moved forward. 85 */ 86 static void icount_update_locked(CPUState *cpu) 87 { 88 int64_t executed = icount_get_executed(cpu); 89 cpu->icount_budget -= executed; 90 91 qatomic_set_i64(&timers_state.qemu_icount, 92 timers_state.qemu_icount + executed); 93 } 94 95 /* 96 * Update the global shared timer_state.qemu_icount to take into 97 * account executed instructions. This is done by the TCG vCPU 98 * thread so the main-loop can see time has moved forward. 99 */ 100 void icount_update(CPUState *cpu) 101 { 102 seqlock_write_lock(&timers_state.vm_clock_seqlock, 103 &timers_state.vm_clock_lock); 104 icount_update_locked(cpu); 105 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 106 &timers_state.vm_clock_lock); 107 } 108 109 static int64_t icount_get_raw_locked(void) 110 { 111 CPUState *cpu = current_cpu; 112 113 if (cpu && cpu->running) { 114 if (!cpu->neg.can_do_io) { 115 error_report("Bad icount read"); 116 exit(1); 117 } 118 /* Take into account what has run */ 119 icount_update_locked(cpu); 120 } 121 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ 122 return qatomic_read_i64(&timers_state.qemu_icount); 123 } 124 125 static int64_t icount_get_locked(void) 126 { 127 int64_t icount = icount_get_raw_locked(); 128 return qatomic_read_i64(&timers_state.qemu_icount_bias) + 129 icount_to_ns(icount); 130 } 131 132 int64_t icount_get_raw(void) 133 { 134 int64_t icount; 135 unsigned start; 136 137 do { 138 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 139 icount = icount_get_raw_locked(); 140 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 141 142 return icount; 143 } 144 145 /* Return the virtual CPU time, based on the instruction counter. */ 146 int64_t icount_get(void) 147 { 148 int64_t icount; 149 unsigned start; 150 151 do { 152 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 153 icount = icount_get_locked(); 154 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 155 156 return icount; 157 } 158 159 int64_t icount_to_ns(int64_t icount) 160 { 161 return icount << qatomic_read(&timers_state.icount_time_shift); 162 } 163 164 /* 165 * Correlation between real and virtual time is always going to be 166 * fairly approximate, so ignore small variation. 167 * When the guest is idle real and virtual time will be aligned in 168 * the IO wait loop. 169 */ 170 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) 171 172 static void icount_adjust(void) 173 { 174 int64_t cur_time; 175 int64_t cur_icount; 176 int64_t delta; 177 178 /* If the VM is not running, then do nothing. */ 179 if (!runstate_is_running()) { 180 return; 181 } 182 183 seqlock_write_lock(&timers_state.vm_clock_seqlock, 184 &timers_state.vm_clock_lock); 185 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 186 cpu_get_clock_locked()); 187 cur_icount = icount_get_locked(); 188 189 delta = cur_icount - cur_time; 190 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ 191 if (delta > 0 192 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 193 && timers_state.icount_time_shift > 0) { 194 /* The guest is getting too far ahead. Slow time down. */ 195 qatomic_set(&timers_state.icount_time_shift, 196 timers_state.icount_time_shift - 1); 197 } 198 if (delta < 0 199 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 200 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { 201 /* The guest is getting too far behind. Speed time up. */ 202 qatomic_set(&timers_state.icount_time_shift, 203 timers_state.icount_time_shift + 1); 204 } 205 timers_state.last_delta = delta; 206 qatomic_set_i64(&timers_state.qemu_icount_bias, 207 cur_icount - (timers_state.qemu_icount 208 << timers_state.icount_time_shift)); 209 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 210 &timers_state.vm_clock_lock); 211 } 212 213 static void icount_adjust_rt(void *opaque) 214 { 215 timer_mod(timers_state.icount_rt_timer, 216 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 217 icount_adjust(); 218 } 219 220 static void icount_adjust_vm(void *opaque) 221 { 222 timer_mod(timers_state.icount_vm_timer, 223 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 224 NANOSECONDS_PER_SECOND / 10); 225 icount_adjust(); 226 } 227 228 int64_t icount_round(int64_t count) 229 { 230 int shift = qatomic_read(&timers_state.icount_time_shift); 231 return (count + (1 << shift) - 1) >> shift; 232 } 233 234 static void icount_warp_rt(void) 235 { 236 unsigned seq; 237 int64_t warp_start; 238 239 /* 240 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start 241 * changes from -1 to another value, so the race here is okay. 242 */ 243 do { 244 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); 245 warp_start = timers_state.vm_clock_warp_start; 246 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); 247 248 if (warp_start == -1) { 249 return; 250 } 251 252 seqlock_write_lock(&timers_state.vm_clock_seqlock, 253 &timers_state.vm_clock_lock); 254 if (runstate_is_running()) { 255 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 256 cpu_get_clock_locked()); 257 int64_t warp_delta; 258 259 warp_delta = clock - timers_state.vm_clock_warp_start; 260 if (icount_enabled() == 2) { 261 /* 262 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far 263 * ahead of real time (it might already be ahead so careful not 264 * to go backwards). 265 */ 266 int64_t cur_icount = icount_get_locked(); 267 int64_t delta = clock - cur_icount; 268 269 if (delta < 0) { 270 delta = 0; 271 } 272 warp_delta = MIN(warp_delta, delta); 273 } 274 qatomic_set_i64(&timers_state.qemu_icount_bias, 275 timers_state.qemu_icount_bias + warp_delta); 276 } 277 timers_state.vm_clock_warp_start = -1; 278 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 279 &timers_state.vm_clock_lock); 280 281 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { 282 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 283 } 284 } 285 286 static void icount_timer_cb(void *opaque) 287 { 288 /* 289 * No need for a checkpoint because the timer already synchronizes 290 * with CHECKPOINT_CLOCK_VIRTUAL_RT. 291 */ 292 icount_warp_rt(); 293 } 294 295 void icount_start_warp_timer(void) 296 { 297 int64_t clock; 298 int64_t deadline; 299 300 assert(icount_enabled()); 301 302 /* 303 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 304 * do not fire, so computing the deadline does not make sense. 305 */ 306 if (!runstate_is_running()) { 307 return; 308 } 309 310 if (replay_mode != REPLAY_MODE_PLAY) { 311 if (!all_cpu_threads_idle()) { 312 return; 313 } 314 315 if (qtest_enabled()) { 316 /* When testing, qtest commands advance icount. */ 317 return; 318 } 319 320 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); 321 } else { 322 /* warp clock deterministically in record/replay mode */ 323 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { 324 /* 325 * vCPU is sleeping and warp can't be started. 326 * It is probably a race condition: notification sent 327 * to vCPU was processed in advance and vCPU went to sleep. 328 * Therefore we have to wake it up for doing something. 329 */ 330 if (replay_has_event()) { 331 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 332 } 333 return; 334 } 335 } 336 337 /* We want to use the earliest deadline from ALL vm_clocks */ 338 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); 339 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, 340 ~QEMU_TIMER_ATTR_EXTERNAL); 341 if (deadline < 0) { 342 static bool notified; 343 if (!icount_sleep && !notified) { 344 warn_report("icount sleep disabled and no active timers"); 345 notified = true; 346 } 347 return; 348 } 349 350 if (deadline > 0) { 351 /* 352 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to 353 * sleep. Otherwise, the CPU might be waiting for a future timer 354 * interrupt to wake it up, but the interrupt never comes because 355 * the vCPU isn't running any insns and thus doesn't advance the 356 * QEMU_CLOCK_VIRTUAL. 357 */ 358 if (!icount_sleep) { 359 /* 360 * We never let VCPUs sleep in no sleep icount mode. 361 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance 362 * to the next QEMU_CLOCK_VIRTUAL event and notify it. 363 * It is useful when we want a deterministic execution time, 364 * isolated from host latencies. 365 */ 366 seqlock_write_lock(&timers_state.vm_clock_seqlock, 367 &timers_state.vm_clock_lock); 368 qatomic_set_i64(&timers_state.qemu_icount_bias, 369 timers_state.qemu_icount_bias + deadline); 370 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 371 &timers_state.vm_clock_lock); 372 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 373 } else { 374 /* 375 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some 376 * "real" time, (related to the time left until the next event) has 377 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. 378 * This avoids that the warps are visible externally; for example, 379 * you will not be sending network packets continuously instead of 380 * every 100ms. 381 */ 382 seqlock_write_lock(&timers_state.vm_clock_seqlock, 383 &timers_state.vm_clock_lock); 384 if (timers_state.vm_clock_warp_start == -1 385 || timers_state.vm_clock_warp_start > clock) { 386 timers_state.vm_clock_warp_start = clock; 387 } 388 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 389 &timers_state.vm_clock_lock); 390 timer_mod_anticipate(timers_state.icount_warp_timer, 391 clock + deadline); 392 } 393 } else if (deadline == 0) { 394 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 395 } 396 } 397 398 void icount_account_warp_timer(void) 399 { 400 if (!icount_sleep) { 401 return; 402 } 403 404 /* 405 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 406 * do not fire, so computing the deadline does not make sense. 407 */ 408 if (!runstate_is_running()) { 409 return; 410 } 411 412 replay_async_events(); 413 414 /* warp clock deterministically in record/replay mode */ 415 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { 416 return; 417 } 418 419 timer_del(timers_state.icount_warp_timer); 420 icount_warp_rt(); 421 } 422 423 void icount_configure(QemuOpts *opts, Error **errp) 424 { 425 const char *option = qemu_opt_get(opts, "shift"); 426 bool sleep = qemu_opt_get_bool(opts, "sleep", true); 427 bool align = qemu_opt_get_bool(opts, "align", false); 428 long time_shift = -1; 429 430 if (!option) { 431 if (qemu_opt_get(opts, "align") != NULL) { 432 error_setg(errp, "Please specify shift option when using align"); 433 } 434 return; 435 } 436 437 if (align && !sleep) { 438 error_setg(errp, "align=on and sleep=off are incompatible"); 439 return; 440 } 441 442 if (strcmp(option, "auto") != 0) { 443 if (qemu_strtol(option, NULL, 0, &time_shift) < 0 444 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { 445 error_setg(errp, "icount: Invalid shift value"); 446 return; 447 } 448 } else if (icount_align_option) { 449 error_setg(errp, "shift=auto and align=on are incompatible"); 450 return; 451 } else if (!icount_sleep) { 452 error_setg(errp, "shift=auto and sleep=off are incompatible"); 453 return; 454 } 455 456 icount_sleep = sleep; 457 if (icount_sleep) { 458 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, 459 icount_timer_cb, NULL); 460 } 461 462 icount_align_option = align; 463 464 if (time_shift >= 0) { 465 timers_state.icount_time_shift = time_shift; 466 icount_enable_precise(); 467 return; 468 } 469 470 icount_enable_adaptive(); 471 472 /* 473 * 125MIPS seems a reasonable initial guess at the guest speed. 474 * It will be corrected fairly quickly anyway. 475 */ 476 timers_state.icount_time_shift = 3; 477 478 /* 479 * Have both realtime and virtual time triggers for speed adjustment. 480 * The realtime trigger catches emulated time passing too slowly, 481 * the virtual time trigger catches emulated time passing too fast. 482 * Realtime triggers occur even when idle, so use them less frequently 483 * than VM triggers. 484 */ 485 timers_state.vm_clock_warp_start = -1; 486 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, 487 icount_adjust_rt, NULL); 488 timer_mod(timers_state.icount_rt_timer, 489 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 490 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 491 icount_adjust_vm, NULL); 492 timer_mod(timers_state.icount_vm_timer, 493 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 494 NANOSECONDS_PER_SECOND / 10); 495 } 496 497 void icount_notify_exit(void) 498 { 499 if (icount_enabled() && current_cpu) { 500 qemu_cpu_kick(current_cpu); 501 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 502 } 503 } 504