1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/cutils.h" 27 #include "migration/vmstate.h" 28 #include "qapi/error.h" 29 #include "qemu/error-report.h" 30 #include "system/cpus.h" 31 #include "system/qtest.h" 32 #include "qemu/main-loop.h" 33 #include "qemu/option.h" 34 #include "qemu/seqlock.h" 35 #include "system/replay.h" 36 #include "system/runstate.h" 37 #include "hw/core/cpu.h" 38 #include "exec/icount.h" 39 #include "system/cpu-timers-internal.h" 40 41 /* 42 * ICOUNT: Instruction Counter 43 * 44 * this module is split off from cpu-timers because the icount part 45 * is TCG-specific, and does not need to be built for other accels. 46 */ 47 static bool icount_sleep = true; 48 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ 49 #define MAX_ICOUNT_SHIFT 10 50 51 bool icount_align_option; 52 53 /* Do not count executed instructions */ 54 ICountMode use_icount = ICOUNT_DISABLED; 55 56 static void icount_enable_precise(void) 57 { 58 /* Fixed conversion of insn to ns via "shift" option */ 59 use_icount = ICOUNT_PRECISE; 60 } 61 62 static void icount_enable_adaptive(void) 63 { 64 /* Runtime adaptive algorithm to compute shift */ 65 use_icount = ICOUNT_ADAPTATIVE; 66 } 67 68 /* 69 * The current number of executed instructions is based on what we 70 * originally budgeted minus the current state of the decrementing 71 * icount counters in extra/u16.low. 72 */ 73 static int64_t icount_get_executed(CPUState *cpu) 74 { 75 return (cpu->icount_budget - 76 (cpu->neg.icount_decr.u16.low + cpu->icount_extra)); 77 } 78 79 /* 80 * Update the global shared timer_state.qemu_icount to take into 81 * account executed instructions. This is done by the TCG vCPU 82 * thread so the main-loop can see time has moved forward. 83 */ 84 static void icount_update_locked(CPUState *cpu) 85 { 86 int64_t executed = icount_get_executed(cpu); 87 cpu->icount_budget -= executed; 88 89 qatomic_set_i64(&timers_state.qemu_icount, 90 timers_state.qemu_icount + executed); 91 } 92 93 /* 94 * Update the global shared timer_state.qemu_icount to take into 95 * account executed instructions. This is done by the TCG vCPU 96 * thread so the main-loop can see time has moved forward. 97 */ 98 void icount_update(CPUState *cpu) 99 { 100 seqlock_write_lock(&timers_state.vm_clock_seqlock, 101 &timers_state.vm_clock_lock); 102 icount_update_locked(cpu); 103 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 104 &timers_state.vm_clock_lock); 105 } 106 107 static int64_t icount_get_raw_locked(void) 108 { 109 CPUState *cpu = current_cpu; 110 111 if (cpu && cpu->running) { 112 if (!cpu->neg.can_do_io) { 113 error_report("Bad icount read"); 114 exit(1); 115 } 116 /* Take into account what has run */ 117 icount_update_locked(cpu); 118 } 119 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ 120 return qatomic_read_i64(&timers_state.qemu_icount); 121 } 122 123 static int64_t icount_get_locked(void) 124 { 125 int64_t icount = icount_get_raw_locked(); 126 return qatomic_read_i64(&timers_state.qemu_icount_bias) + 127 icount_to_ns(icount); 128 } 129 130 int64_t icount_get_raw(void) 131 { 132 int64_t icount; 133 unsigned start; 134 135 do { 136 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 137 icount = icount_get_raw_locked(); 138 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 139 140 return icount; 141 } 142 143 /* Return the virtual CPU time, based on the instruction counter. */ 144 int64_t icount_get(void) 145 { 146 int64_t icount; 147 unsigned start; 148 149 do { 150 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 151 icount = icount_get_locked(); 152 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 153 154 return icount; 155 } 156 157 int64_t icount_to_ns(int64_t icount) 158 { 159 return icount << qatomic_read(&timers_state.icount_time_shift); 160 } 161 162 /* 163 * Correlation between real and virtual time is always going to be 164 * fairly approximate, so ignore small variation. 165 * When the guest is idle real and virtual time will be aligned in 166 * the IO wait loop. 167 */ 168 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) 169 170 static void icount_adjust(void) 171 { 172 int64_t cur_time; 173 int64_t cur_icount; 174 int64_t delta; 175 176 /* If the VM is not running, then do nothing. */ 177 if (!runstate_is_running()) { 178 return; 179 } 180 181 seqlock_write_lock(&timers_state.vm_clock_seqlock, 182 &timers_state.vm_clock_lock); 183 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 184 cpu_get_clock_locked()); 185 cur_icount = icount_get_locked(); 186 187 delta = cur_icount - cur_time; 188 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ 189 if (delta > 0 190 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 191 && timers_state.icount_time_shift > 0) { 192 /* The guest is getting too far ahead. Slow time down. */ 193 qatomic_set(&timers_state.icount_time_shift, 194 timers_state.icount_time_shift - 1); 195 } 196 if (delta < 0 197 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 198 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { 199 /* The guest is getting too far behind. Speed time up. */ 200 qatomic_set(&timers_state.icount_time_shift, 201 timers_state.icount_time_shift + 1); 202 } 203 timers_state.last_delta = delta; 204 qatomic_set_i64(&timers_state.qemu_icount_bias, 205 cur_icount - (timers_state.qemu_icount 206 << timers_state.icount_time_shift)); 207 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 208 &timers_state.vm_clock_lock); 209 } 210 211 static void icount_adjust_rt(void *opaque) 212 { 213 timer_mod(timers_state.icount_rt_timer, 214 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 215 icount_adjust(); 216 } 217 218 static void icount_adjust_vm(void *opaque) 219 { 220 timer_mod(timers_state.icount_vm_timer, 221 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 222 NANOSECONDS_PER_SECOND / 10); 223 icount_adjust(); 224 } 225 226 int64_t icount_round(int64_t count) 227 { 228 int shift = qatomic_read(&timers_state.icount_time_shift); 229 return (count + (1 << shift) - 1) >> shift; 230 } 231 232 static void icount_warp_rt(void) 233 { 234 unsigned seq; 235 int64_t warp_start; 236 237 /* 238 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start 239 * changes from -1 to another value, so the race here is okay. 240 */ 241 do { 242 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); 243 warp_start = timers_state.vm_clock_warp_start; 244 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); 245 246 if (warp_start == -1) { 247 return; 248 } 249 250 seqlock_write_lock(&timers_state.vm_clock_seqlock, 251 &timers_state.vm_clock_lock); 252 if (runstate_is_running()) { 253 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 254 cpu_get_clock_locked()); 255 int64_t warp_delta; 256 257 warp_delta = clock - timers_state.vm_clock_warp_start; 258 if (icount_enabled() == ICOUNT_ADAPTATIVE) { 259 /* 260 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far 261 * ahead of real time (it might already be ahead so careful not 262 * to go backwards). 263 */ 264 int64_t cur_icount = icount_get_locked(); 265 int64_t delta = clock - cur_icount; 266 267 if (delta < 0) { 268 delta = 0; 269 } 270 warp_delta = MIN(warp_delta, delta); 271 } 272 qatomic_set_i64(&timers_state.qemu_icount_bias, 273 timers_state.qemu_icount_bias + warp_delta); 274 } 275 timers_state.vm_clock_warp_start = -1; 276 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 277 &timers_state.vm_clock_lock); 278 279 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { 280 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 281 } 282 } 283 284 static void icount_timer_cb(void *opaque) 285 { 286 /* 287 * No need for a checkpoint because the timer already synchronizes 288 * with CHECKPOINT_CLOCK_VIRTUAL_RT. 289 */ 290 icount_warp_rt(); 291 } 292 293 void icount_start_warp_timer(void) 294 { 295 int64_t clock; 296 int64_t deadline; 297 298 assert(icount_enabled()); 299 300 /* 301 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 302 * do not fire, so computing the deadline does not make sense. 303 */ 304 if (!runstate_is_running()) { 305 return; 306 } 307 308 if (replay_mode != REPLAY_MODE_PLAY) { 309 if (!all_cpu_threads_idle()) { 310 return; 311 } 312 313 if (qtest_enabled()) { 314 /* When testing, qtest commands advance icount. */ 315 return; 316 } 317 318 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); 319 } else { 320 /* warp clock deterministically in record/replay mode */ 321 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { 322 /* 323 * vCPU is sleeping and warp can't be started. 324 * It is probably a race condition: notification sent 325 * to vCPU was processed in advance and vCPU went to sleep. 326 * Therefore we have to wake it up for doing something. 327 */ 328 if (replay_has_event()) { 329 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 330 } 331 return; 332 } 333 } 334 335 /* We want to use the earliest deadline from ALL vm_clocks */ 336 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); 337 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, 338 ~QEMU_TIMER_ATTR_EXTERNAL); 339 if (deadline < 0) { 340 if (!icount_sleep) { 341 warn_report_once("icount sleep disabled and no active timers"); 342 } 343 return; 344 } 345 346 if (deadline > 0) { 347 /* 348 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to 349 * sleep. Otherwise, the CPU might be waiting for a future timer 350 * interrupt to wake it up, but the interrupt never comes because 351 * the vCPU isn't running any insns and thus doesn't advance the 352 * QEMU_CLOCK_VIRTUAL. 353 */ 354 if (!icount_sleep) { 355 /* 356 * We never let VCPUs sleep in no sleep icount mode. 357 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance 358 * to the next QEMU_CLOCK_VIRTUAL event and notify it. 359 * It is useful when we want a deterministic execution time, 360 * isolated from host latencies. 361 */ 362 seqlock_write_lock(&timers_state.vm_clock_seqlock, 363 &timers_state.vm_clock_lock); 364 qatomic_set_i64(&timers_state.qemu_icount_bias, 365 timers_state.qemu_icount_bias + deadline); 366 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 367 &timers_state.vm_clock_lock); 368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 369 } else { 370 /* 371 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some 372 * "real" time, (related to the time left until the next event) has 373 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. 374 * This avoids that the warps are visible externally; for example, 375 * you will not be sending network packets continuously instead of 376 * every 100ms. 377 */ 378 seqlock_write_lock(&timers_state.vm_clock_seqlock, 379 &timers_state.vm_clock_lock); 380 if (timers_state.vm_clock_warp_start == -1 381 || timers_state.vm_clock_warp_start > clock) { 382 timers_state.vm_clock_warp_start = clock; 383 } 384 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 385 &timers_state.vm_clock_lock); 386 timer_mod_anticipate(timers_state.icount_warp_timer, 387 clock + deadline); 388 } 389 } else if (deadline == 0) { 390 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 391 } 392 } 393 394 void icount_account_warp_timer(void) 395 { 396 if (!icount_sleep) { 397 return; 398 } 399 400 /* 401 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 402 * do not fire, so computing the deadline does not make sense. 403 */ 404 if (!runstate_is_running()) { 405 return; 406 } 407 408 replay_async_events(); 409 410 /* warp clock deterministically in record/replay mode */ 411 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { 412 return; 413 } 414 415 timer_del(timers_state.icount_warp_timer); 416 icount_warp_rt(); 417 } 418 419 bool icount_configure(QemuOpts *opts, Error **errp) 420 { 421 const char *option = qemu_opt_get(opts, "shift"); 422 bool sleep = qemu_opt_get_bool(opts, "sleep", true); 423 bool align = qemu_opt_get_bool(opts, "align", false); 424 long time_shift = -1; 425 426 if (!option) { 427 if (qemu_opt_get(opts, "align") != NULL) { 428 error_setg(errp, "Please specify shift option when using align"); 429 return false; 430 } 431 return true; 432 } 433 434 if (align && !sleep) { 435 error_setg(errp, "align=on and sleep=off are incompatible"); 436 return false; 437 } 438 439 if (strcmp(option, "auto") != 0) { 440 if (qemu_strtol(option, NULL, 0, &time_shift) < 0 441 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { 442 error_setg(errp, "icount: Invalid shift value"); 443 return false; 444 } 445 } else if (icount_align_option) { 446 error_setg(errp, "shift=auto and align=on are incompatible"); 447 return false; 448 } else if (!icount_sleep) { 449 error_setg(errp, "shift=auto and sleep=off are incompatible"); 450 return false; 451 } 452 453 icount_sleep = sleep; 454 if (icount_sleep) { 455 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, 456 icount_timer_cb, NULL); 457 } 458 459 icount_align_option = align; 460 461 if (time_shift >= 0) { 462 timers_state.icount_time_shift = time_shift; 463 icount_enable_precise(); 464 return true; 465 } 466 467 icount_enable_adaptive(); 468 469 /* 470 * 125MIPS seems a reasonable initial guess at the guest speed. 471 * It will be corrected fairly quickly anyway. 472 */ 473 timers_state.icount_time_shift = 3; 474 475 /* 476 * Have both realtime and virtual time triggers for speed adjustment. 477 * The realtime trigger catches emulated time passing too slowly, 478 * the virtual time trigger catches emulated time passing too fast. 479 * Realtime triggers occur even when idle, so use them less frequently 480 * than VM triggers. 481 */ 482 timers_state.vm_clock_warp_start = -1; 483 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, 484 icount_adjust_rt, NULL); 485 timer_mod(timers_state.icount_rt_timer, 486 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 487 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 488 icount_adjust_vm, NULL); 489 timer_mod(timers_state.icount_vm_timer, 490 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 491 NANOSECONDS_PER_SECOND / 10); 492 return true; 493 } 494 495 void icount_notify_exit(void) 496 { 497 assert(icount_enabled()); 498 499 if (current_cpu) { 500 qemu_cpu_kick(current_cpu); 501 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 502 } 503 } 504