1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu/cutils.h"
27 #include "migration/vmstate.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "system/cpus.h"
31 #include "system/qtest.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/option.h"
34 #include "qemu/seqlock.h"
35 #include "system/replay.h"
36 #include "system/runstate.h"
37 #include "hw/core/cpu.h"
38 #include "exec/icount.h"
39 #include "system/cpu-timers-internal.h"
40
41 /*
42 * ICOUNT: Instruction Counter
43 *
44 * this module is split off from cpu-timers because the icount part
45 * is TCG-specific, and does not need to be built for other accels.
46 */
47 static bool icount_sleep = true;
48 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
49 #define MAX_ICOUNT_SHIFT 10
50
51 bool icount_align_option;
52
53 /* Do not count executed instructions */
54 ICountMode use_icount = ICOUNT_DISABLED;
55
icount_enable_precise(void)56 static void icount_enable_precise(void)
57 {
58 /* Fixed conversion of insn to ns via "shift" option */
59 use_icount = ICOUNT_PRECISE;
60 }
61
icount_enable_adaptive(void)62 static void icount_enable_adaptive(void)
63 {
64 /* Runtime adaptive algorithm to compute shift */
65 use_icount = ICOUNT_ADAPTATIVE;
66 }
67
68 /*
69 * The current number of executed instructions is based on what we
70 * originally budgeted minus the current state of the decrementing
71 * icount counters in extra/u16.low.
72 */
icount_get_executed(CPUState * cpu)73 static int64_t icount_get_executed(CPUState *cpu)
74 {
75 return (cpu->icount_budget -
76 (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
77 }
78
79 /*
80 * Update the global shared timer_state.qemu_icount to take into
81 * account executed instructions. This is done by the TCG vCPU
82 * thread so the main-loop can see time has moved forward.
83 */
icount_update_locked(CPUState * cpu)84 static void icount_update_locked(CPUState *cpu)
85 {
86 int64_t executed = icount_get_executed(cpu);
87 cpu->icount_budget -= executed;
88
89 qatomic_set_i64(&timers_state.qemu_icount,
90 timers_state.qemu_icount + executed);
91 }
92
93 /*
94 * Update the global shared timer_state.qemu_icount to take into
95 * account executed instructions. This is done by the TCG vCPU
96 * thread so the main-loop can see time has moved forward.
97 */
icount_update(CPUState * cpu)98 void icount_update(CPUState *cpu)
99 {
100 seqlock_write_lock(&timers_state.vm_clock_seqlock,
101 &timers_state.vm_clock_lock);
102 icount_update_locked(cpu);
103 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
104 &timers_state.vm_clock_lock);
105 }
106
icount_get_raw_locked(void)107 static int64_t icount_get_raw_locked(void)
108 {
109 CPUState *cpu = current_cpu;
110
111 if (cpu && cpu->running) {
112 if (!cpu->neg.can_do_io) {
113 error_report("Bad icount read");
114 exit(1);
115 }
116 /* Take into account what has run */
117 icount_update_locked(cpu);
118 }
119 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
120 return qatomic_read_i64(&timers_state.qemu_icount);
121 }
122
icount_get_locked(void)123 static int64_t icount_get_locked(void)
124 {
125 int64_t icount = icount_get_raw_locked();
126 return qatomic_read_i64(&timers_state.qemu_icount_bias) +
127 icount_to_ns(icount);
128 }
129
icount_get_raw(void)130 int64_t icount_get_raw(void)
131 {
132 int64_t icount;
133 unsigned start;
134
135 do {
136 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
137 icount = icount_get_raw_locked();
138 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
139
140 return icount;
141 }
142
143 /* Return the virtual CPU time, based on the instruction counter. */
icount_get(void)144 int64_t icount_get(void)
145 {
146 int64_t icount;
147 unsigned start;
148
149 do {
150 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
151 icount = icount_get_locked();
152 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
153
154 return icount;
155 }
156
icount_to_ns(int64_t icount)157 int64_t icount_to_ns(int64_t icount)
158 {
159 return icount << qatomic_read(&timers_state.icount_time_shift);
160 }
161
162 /*
163 * Correlation between real and virtual time is always going to be
164 * fairly approximate, so ignore small variation.
165 * When the guest is idle real and virtual time will be aligned in
166 * the IO wait loop.
167 */
168 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
169
icount_adjust(void)170 static void icount_adjust(void)
171 {
172 int64_t cur_time;
173 int64_t cur_icount;
174 int64_t delta;
175
176 /* If the VM is not running, then do nothing. */
177 if (!runstate_is_running()) {
178 return;
179 }
180
181 seqlock_write_lock(&timers_state.vm_clock_seqlock,
182 &timers_state.vm_clock_lock);
183 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
184 cpu_get_clock_locked());
185 cur_icount = icount_get_locked();
186
187 delta = cur_icount - cur_time;
188 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
189 if (delta > 0
190 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
191 && timers_state.icount_time_shift > 0) {
192 /* The guest is getting too far ahead. Slow time down. */
193 qatomic_set(&timers_state.icount_time_shift,
194 timers_state.icount_time_shift - 1);
195 }
196 if (delta < 0
197 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
198 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
199 /* The guest is getting too far behind. Speed time up. */
200 qatomic_set(&timers_state.icount_time_shift,
201 timers_state.icount_time_shift + 1);
202 }
203 timers_state.last_delta = delta;
204 qatomic_set_i64(&timers_state.qemu_icount_bias,
205 cur_icount - (timers_state.qemu_icount
206 << timers_state.icount_time_shift));
207 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
208 &timers_state.vm_clock_lock);
209 }
210
icount_adjust_rt(void * opaque)211 static void icount_adjust_rt(void *opaque)
212 {
213 timer_mod(timers_state.icount_rt_timer,
214 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
215 icount_adjust();
216 }
217
icount_adjust_vm(void * opaque)218 static void icount_adjust_vm(void *opaque)
219 {
220 timer_mod(timers_state.icount_vm_timer,
221 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
222 NANOSECONDS_PER_SECOND / 10);
223 icount_adjust();
224 }
225
icount_round(int64_t count)226 int64_t icount_round(int64_t count)
227 {
228 int shift = qatomic_read(&timers_state.icount_time_shift);
229 return (count + (1 << shift) - 1) >> shift;
230 }
231
icount_warp_rt(void)232 static void icount_warp_rt(void)
233 {
234 unsigned seq;
235 int64_t warp_start;
236
237 /*
238 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
239 * changes from -1 to another value, so the race here is okay.
240 */
241 do {
242 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
243 warp_start = timers_state.vm_clock_warp_start;
244 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
245
246 if (warp_start == -1) {
247 return;
248 }
249
250 seqlock_write_lock(&timers_state.vm_clock_seqlock,
251 &timers_state.vm_clock_lock);
252 if (runstate_is_running()) {
253 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
254 cpu_get_clock_locked());
255 int64_t warp_delta;
256
257 warp_delta = clock - timers_state.vm_clock_warp_start;
258 if (icount_enabled() == ICOUNT_ADAPTATIVE) {
259 /*
260 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
261 * ahead of real time (it might already be ahead so careful not
262 * to go backwards).
263 */
264 int64_t cur_icount = icount_get_locked();
265 int64_t delta = clock - cur_icount;
266
267 if (delta < 0) {
268 delta = 0;
269 }
270 warp_delta = MIN(warp_delta, delta);
271 }
272 qatomic_set_i64(&timers_state.qemu_icount_bias,
273 timers_state.qemu_icount_bias + warp_delta);
274 }
275 timers_state.vm_clock_warp_start = -1;
276 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
277 &timers_state.vm_clock_lock);
278
279 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
280 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
281 }
282 }
283
icount_timer_cb(void * opaque)284 static void icount_timer_cb(void *opaque)
285 {
286 /*
287 * No need for a checkpoint because the timer already synchronizes
288 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
289 */
290 icount_warp_rt();
291 }
292
icount_start_warp_timer(void)293 void icount_start_warp_timer(void)
294 {
295 int64_t clock;
296 int64_t deadline;
297
298 assert(icount_enabled());
299
300 /*
301 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
302 * do not fire, so computing the deadline does not make sense.
303 */
304 if (!runstate_is_running()) {
305 return;
306 }
307
308 if (replay_mode != REPLAY_MODE_PLAY) {
309 if (!all_cpu_threads_idle()) {
310 return;
311 }
312
313 if (qtest_enabled()) {
314 /* When testing, qtest commands advance icount. */
315 return;
316 }
317
318 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
319 } else {
320 /* warp clock deterministically in record/replay mode */
321 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
322 /*
323 * vCPU is sleeping and warp can't be started.
324 * It is probably a race condition: notification sent
325 * to vCPU was processed in advance and vCPU went to sleep.
326 * Therefore we have to wake it up for doing something.
327 */
328 if (replay_has_event()) {
329 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
330 }
331 return;
332 }
333 }
334
335 /* We want to use the earliest deadline from ALL vm_clocks */
336 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
337 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
338 ~QEMU_TIMER_ATTR_EXTERNAL);
339 if (deadline < 0) {
340 if (!icount_sleep) {
341 warn_report_once("icount sleep disabled and no active timers");
342 }
343 return;
344 }
345
346 if (deadline > 0) {
347 /*
348 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
349 * sleep. Otherwise, the CPU might be waiting for a future timer
350 * interrupt to wake it up, but the interrupt never comes because
351 * the vCPU isn't running any insns and thus doesn't advance the
352 * QEMU_CLOCK_VIRTUAL.
353 */
354 if (!icount_sleep) {
355 /*
356 * We never let VCPUs sleep in no sleep icount mode.
357 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
358 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
359 * It is useful when we want a deterministic execution time,
360 * isolated from host latencies.
361 */
362 seqlock_write_lock(&timers_state.vm_clock_seqlock,
363 &timers_state.vm_clock_lock);
364 qatomic_set_i64(&timers_state.qemu_icount_bias,
365 timers_state.qemu_icount_bias + deadline);
366 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
367 &timers_state.vm_clock_lock);
368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
369 } else {
370 /*
371 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
372 * "real" time, (related to the time left until the next event) has
373 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
374 * This avoids that the warps are visible externally; for example,
375 * you will not be sending network packets continuously instead of
376 * every 100ms.
377 */
378 seqlock_write_lock(&timers_state.vm_clock_seqlock,
379 &timers_state.vm_clock_lock);
380 if (timers_state.vm_clock_warp_start == -1
381 || timers_state.vm_clock_warp_start > clock) {
382 timers_state.vm_clock_warp_start = clock;
383 }
384 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
385 &timers_state.vm_clock_lock);
386 timer_mod_anticipate(timers_state.icount_warp_timer,
387 clock + deadline);
388 }
389 } else if (deadline == 0) {
390 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
391 }
392 }
393
icount_account_warp_timer(void)394 void icount_account_warp_timer(void)
395 {
396 if (!icount_sleep) {
397 return;
398 }
399
400 /*
401 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
402 * do not fire, so computing the deadline does not make sense.
403 */
404 if (!runstate_is_running()) {
405 return;
406 }
407
408 replay_async_events();
409
410 /* warp clock deterministically in record/replay mode */
411 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
412 return;
413 }
414
415 timer_del(timers_state.icount_warp_timer);
416 icount_warp_rt();
417 }
418
icount_configure(QemuOpts * opts,Error ** errp)419 bool icount_configure(QemuOpts *opts, Error **errp)
420 {
421 const char *option = qemu_opt_get(opts, "shift");
422 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
423 bool align = qemu_opt_get_bool(opts, "align", false);
424 long time_shift = -1;
425
426 if (!option) {
427 if (qemu_opt_get(opts, "align") != NULL) {
428 error_setg(errp, "Please specify shift option when using align");
429 return false;
430 }
431 return true;
432 }
433
434 if (align && !sleep) {
435 error_setg(errp, "align=on and sleep=off are incompatible");
436 return false;
437 }
438
439 if (strcmp(option, "auto") != 0) {
440 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
441 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
442 error_setg(errp, "icount: Invalid shift value");
443 return false;
444 }
445 } else if (icount_align_option) {
446 error_setg(errp, "shift=auto and align=on are incompatible");
447 return false;
448 } else if (!icount_sleep) {
449 error_setg(errp, "shift=auto and sleep=off are incompatible");
450 return false;
451 }
452
453 icount_sleep = sleep;
454 if (icount_sleep) {
455 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
456 icount_timer_cb, NULL);
457 }
458
459 icount_align_option = align;
460
461 if (time_shift >= 0) {
462 timers_state.icount_time_shift = time_shift;
463 icount_enable_precise();
464 return true;
465 }
466
467 icount_enable_adaptive();
468
469 /*
470 * 125MIPS seems a reasonable initial guess at the guest speed.
471 * It will be corrected fairly quickly anyway.
472 */
473 timers_state.icount_time_shift = 3;
474
475 /*
476 * Have both realtime and virtual time triggers for speed adjustment.
477 * The realtime trigger catches emulated time passing too slowly,
478 * the virtual time trigger catches emulated time passing too fast.
479 * Realtime triggers occur even when idle, so use them less frequently
480 * than VM triggers.
481 */
482 timers_state.vm_clock_warp_start = -1;
483 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
484 icount_adjust_rt, NULL);
485 timer_mod(timers_state.icount_rt_timer,
486 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
487 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
488 icount_adjust_vm, NULL);
489 timer_mod(timers_state.icount_vm_timer,
490 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
491 NANOSECONDS_PER_SECOND / 10);
492 return true;
493 }
494
icount_notify_exit(void)495 void icount_notify_exit(void)
496 {
497 assert(icount_enabled());
498
499 if (current_cpu) {
500 qemu_cpu_kick(current_cpu);
501 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
502 }
503 }
504