xref: /qemu/accel/tcg/icount-common.c (revision 161f5bc8e965fa8255db435683e6b52042037bb7)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/cutils.h"
27 #include "migration/vmstate.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "system/cpus.h"
31 #include "system/qtest.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/option.h"
34 #include "qemu/seqlock.h"
35 #include "system/replay.h"
36 #include "system/runstate.h"
37 #include "hw/core/cpu.h"
38 #include "exec/icount.h"
39 #include "system/cpu-timers-internal.h"
40 
41 /*
42  * ICOUNT: Instruction Counter
43  *
44  * this module is split off from cpu-timers because the icount part
45  * is TCG-specific, and does not need to be built for other accels.
46  */
47 static bool icount_sleep = true;
48 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
49 #define MAX_ICOUNT_SHIFT 10
50 
51 bool icount_align_option;
52 
53 /* Do not count executed instructions */
54 ICountMode use_icount = ICOUNT_DISABLED;
55 
icount_enable_precise(void)56 static void icount_enable_precise(void)
57 {
58     /* Fixed conversion of insn to ns via "shift" option */
59     use_icount = ICOUNT_PRECISE;
60 }
61 
icount_enable_adaptive(void)62 static void icount_enable_adaptive(void)
63 {
64     /* Runtime adaptive algorithm to compute shift */
65     use_icount = ICOUNT_ADAPTATIVE;
66 }
67 
68 /*
69  * The current number of executed instructions is based on what we
70  * originally budgeted minus the current state of the decrementing
71  * icount counters in extra/u16.low.
72  */
icount_get_executed(CPUState * cpu)73 static int64_t icount_get_executed(CPUState *cpu)
74 {
75     return (cpu->icount_budget -
76             (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
77 }
78 
79 /*
80  * Update the global shared timer_state.qemu_icount to take into
81  * account executed instructions. This is done by the TCG vCPU
82  * thread so the main-loop can see time has moved forward.
83  */
icount_update_locked(CPUState * cpu)84 static void icount_update_locked(CPUState *cpu)
85 {
86     int64_t executed = icount_get_executed(cpu);
87     cpu->icount_budget -= executed;
88 
89     qatomic_set_i64(&timers_state.qemu_icount,
90                     timers_state.qemu_icount + executed);
91 }
92 
93 /*
94  * Update the global shared timer_state.qemu_icount to take into
95  * account executed instructions. This is done by the TCG vCPU
96  * thread so the main-loop can see time has moved forward.
97  */
icount_update(CPUState * cpu)98 void icount_update(CPUState *cpu)
99 {
100     seqlock_write_lock(&timers_state.vm_clock_seqlock,
101                        &timers_state.vm_clock_lock);
102     icount_update_locked(cpu);
103     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
104                          &timers_state.vm_clock_lock);
105 }
106 
icount_get_raw_locked(void)107 static int64_t icount_get_raw_locked(void)
108 {
109     CPUState *cpu = current_cpu;
110 
111     if (cpu && cpu->running) {
112         if (!cpu->neg.can_do_io) {
113             error_report("Bad icount read");
114             exit(1);
115         }
116         /* Take into account what has run */
117         icount_update_locked(cpu);
118     }
119     /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
120     return qatomic_read_i64(&timers_state.qemu_icount);
121 }
122 
icount_get_locked(void)123 static int64_t icount_get_locked(void)
124 {
125     int64_t icount = icount_get_raw_locked();
126     return qatomic_read_i64(&timers_state.qemu_icount_bias) +
127         icount_to_ns(icount);
128 }
129 
icount_get_raw(void)130 int64_t icount_get_raw(void)
131 {
132     int64_t icount;
133     unsigned start;
134 
135     do {
136         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
137         icount = icount_get_raw_locked();
138     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
139 
140     return icount;
141 }
142 
143 /* Return the virtual CPU time, based on the instruction counter.  */
icount_get(void)144 int64_t icount_get(void)
145 {
146     int64_t icount;
147     unsigned start;
148 
149     do {
150         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
151         icount = icount_get_locked();
152     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
153 
154     return icount;
155 }
156 
icount_to_ns(int64_t icount)157 int64_t icount_to_ns(int64_t icount)
158 {
159     return icount << qatomic_read(&timers_state.icount_time_shift);
160 }
161 
162 /*
163  * Correlation between real and virtual time is always going to be
164  * fairly approximate, so ignore small variation.
165  * When the guest is idle real and virtual time will be aligned in
166  * the IO wait loop.
167  */
168 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
169 
icount_adjust(void)170 static void icount_adjust(void)
171 {
172     int64_t cur_time;
173     int64_t cur_icount;
174     int64_t delta;
175 
176     /* If the VM is not running, then do nothing.  */
177     if (!runstate_is_running()) {
178         return;
179     }
180 
181     seqlock_write_lock(&timers_state.vm_clock_seqlock,
182                        &timers_state.vm_clock_lock);
183     cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
184                                    cpu_get_clock_locked());
185     cur_icount = icount_get_locked();
186 
187     delta = cur_icount - cur_time;
188     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
189     if (delta > 0
190         && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
191         && timers_state.icount_time_shift > 0) {
192         /* The guest is getting too far ahead.  Slow time down.  */
193         qatomic_set(&timers_state.icount_time_shift,
194                     timers_state.icount_time_shift - 1);
195     }
196     if (delta < 0
197         && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
198         && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
199         /* The guest is getting too far behind.  Speed time up.  */
200         qatomic_set(&timers_state.icount_time_shift,
201                     timers_state.icount_time_shift + 1);
202     }
203     timers_state.last_delta = delta;
204     qatomic_set_i64(&timers_state.qemu_icount_bias,
205                     cur_icount - (timers_state.qemu_icount
206                                   << timers_state.icount_time_shift));
207     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
208                          &timers_state.vm_clock_lock);
209 }
210 
icount_adjust_rt(void * opaque)211 static void icount_adjust_rt(void *opaque)
212 {
213     timer_mod(timers_state.icount_rt_timer,
214               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
215     icount_adjust();
216 }
217 
icount_adjust_vm(void * opaque)218 static void icount_adjust_vm(void *opaque)
219 {
220     timer_mod(timers_state.icount_vm_timer,
221                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
222                    NANOSECONDS_PER_SECOND / 10);
223     icount_adjust();
224 }
225 
icount_round(int64_t count)226 int64_t icount_round(int64_t count)
227 {
228     int shift = qatomic_read(&timers_state.icount_time_shift);
229     return (count + (1 << shift) - 1) >> shift;
230 }
231 
icount_warp_rt(void)232 static void icount_warp_rt(void)
233 {
234     unsigned seq;
235     int64_t warp_start;
236 
237     /*
238      * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
239      * changes from -1 to another value, so the race here is okay.
240      */
241     do {
242         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
243         warp_start = timers_state.vm_clock_warp_start;
244     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
245 
246     if (warp_start == -1) {
247         return;
248     }
249 
250     seqlock_write_lock(&timers_state.vm_clock_seqlock,
251                        &timers_state.vm_clock_lock);
252     if (runstate_is_running()) {
253         int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
254                                             cpu_get_clock_locked());
255         int64_t warp_delta;
256 
257         warp_delta = clock - timers_state.vm_clock_warp_start;
258         if (icount_enabled() == ICOUNT_ADAPTATIVE) {
259             /*
260              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
261              * ahead of real time (it might already be ahead so careful not
262              * to go backwards).
263              */
264             int64_t cur_icount = icount_get_locked();
265             int64_t delta = clock - cur_icount;
266 
267             if (delta < 0) {
268                 delta = 0;
269             }
270             warp_delta = MIN(warp_delta, delta);
271         }
272         qatomic_set_i64(&timers_state.qemu_icount_bias,
273                         timers_state.qemu_icount_bias + warp_delta);
274     }
275     timers_state.vm_clock_warp_start = -1;
276     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
277                        &timers_state.vm_clock_lock);
278 
279     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
280         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
281     }
282 }
283 
icount_timer_cb(void * opaque)284 static void icount_timer_cb(void *opaque)
285 {
286     /*
287      * No need for a checkpoint because the timer already synchronizes
288      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
289      */
290     icount_warp_rt();
291 }
292 
icount_start_warp_timer(void)293 void icount_start_warp_timer(void)
294 {
295     int64_t clock;
296     int64_t deadline;
297 
298     assert(icount_enabled());
299 
300     /*
301      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
302      * do not fire, so computing the deadline does not make sense.
303      */
304     if (!runstate_is_running()) {
305         return;
306     }
307 
308     if (replay_mode != REPLAY_MODE_PLAY) {
309         if (!all_cpu_threads_idle()) {
310             return;
311         }
312 
313         if (qtest_enabled()) {
314             /* When testing, qtest commands advance icount.  */
315             return;
316         }
317 
318         replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
319     } else {
320         /* warp clock deterministically in record/replay mode */
321         if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
322             /*
323              * vCPU is sleeping and warp can't be started.
324              * It is probably a race condition: notification sent
325              * to vCPU was processed in advance and vCPU went to sleep.
326              * Therefore we have to wake it up for doing something.
327              */
328             if (replay_has_event()) {
329                 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
330             }
331             return;
332         }
333     }
334 
335     /* We want to use the earliest deadline from ALL vm_clocks */
336     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
337     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
338                                           ~QEMU_TIMER_ATTR_EXTERNAL);
339     if (deadline < 0) {
340         if (!icount_sleep) {
341             warn_report_once("icount sleep disabled and no active timers");
342         }
343         return;
344     }
345 
346     if (deadline > 0) {
347         /*
348          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
349          * sleep.  Otherwise, the CPU might be waiting for a future timer
350          * interrupt to wake it up, but the interrupt never comes because
351          * the vCPU isn't running any insns and thus doesn't advance the
352          * QEMU_CLOCK_VIRTUAL.
353          */
354         if (!icount_sleep) {
355             /*
356              * We never let VCPUs sleep in no sleep icount mode.
357              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
358              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
359              * It is useful when we want a deterministic execution time,
360              * isolated from host latencies.
361              */
362             seqlock_write_lock(&timers_state.vm_clock_seqlock,
363                                &timers_state.vm_clock_lock);
364             qatomic_set_i64(&timers_state.qemu_icount_bias,
365                             timers_state.qemu_icount_bias + deadline);
366             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
367                                  &timers_state.vm_clock_lock);
368             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
369         } else {
370             /*
371              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
372              * "real" time, (related to the time left until the next event) has
373              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
374              * This avoids that the warps are visible externally; for example,
375              * you will not be sending network packets continuously instead of
376              * every 100ms.
377              */
378             seqlock_write_lock(&timers_state.vm_clock_seqlock,
379                                &timers_state.vm_clock_lock);
380             if (timers_state.vm_clock_warp_start == -1
381                 || timers_state.vm_clock_warp_start > clock) {
382                 timers_state.vm_clock_warp_start = clock;
383             }
384             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
385                                  &timers_state.vm_clock_lock);
386             timer_mod_anticipate(timers_state.icount_warp_timer,
387                                  clock + deadline);
388         }
389     } else if (deadline == 0) {
390         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
391     }
392 }
393 
icount_account_warp_timer(void)394 void icount_account_warp_timer(void)
395 {
396     if (!icount_sleep) {
397         return;
398     }
399 
400     /*
401      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
402      * do not fire, so computing the deadline does not make sense.
403      */
404     if (!runstate_is_running()) {
405         return;
406     }
407 
408     replay_async_events();
409 
410     /* warp clock deterministically in record/replay mode */
411     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
412         return;
413     }
414 
415     timer_del(timers_state.icount_warp_timer);
416     icount_warp_rt();
417 }
418 
icount_configure(QemuOpts * opts,Error ** errp)419 bool icount_configure(QemuOpts *opts, Error **errp)
420 {
421     const char *option = qemu_opt_get(opts, "shift");
422     bool sleep = qemu_opt_get_bool(opts, "sleep", true);
423     bool align = qemu_opt_get_bool(opts, "align", false);
424     long time_shift = -1;
425 
426     if (!option) {
427         if (qemu_opt_get(opts, "align") != NULL) {
428             error_setg(errp, "Please specify shift option when using align");
429             return false;
430         }
431         return true;
432     }
433 
434     if (align && !sleep) {
435         error_setg(errp, "align=on and sleep=off are incompatible");
436         return false;
437     }
438 
439     if (strcmp(option, "auto") != 0) {
440         if (qemu_strtol(option, NULL, 0, &time_shift) < 0
441             || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
442             error_setg(errp, "icount: Invalid shift value");
443             return false;
444         }
445     } else if (icount_align_option) {
446         error_setg(errp, "shift=auto and align=on are incompatible");
447         return false;
448     } else if (!icount_sleep) {
449         error_setg(errp, "shift=auto and sleep=off are incompatible");
450         return false;
451     }
452 
453     icount_sleep = sleep;
454     if (icount_sleep) {
455         timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
456                                          icount_timer_cb, NULL);
457     }
458 
459     icount_align_option = align;
460 
461     if (time_shift >= 0) {
462         timers_state.icount_time_shift = time_shift;
463         icount_enable_precise();
464         return true;
465     }
466 
467     icount_enable_adaptive();
468 
469     /*
470      * 125MIPS seems a reasonable initial guess at the guest speed.
471      * It will be corrected fairly quickly anyway.
472      */
473     timers_state.icount_time_shift = 3;
474 
475     /*
476      * Have both realtime and virtual time triggers for speed adjustment.
477      * The realtime trigger catches emulated time passing too slowly,
478      * the virtual time trigger catches emulated time passing too fast.
479      * Realtime triggers occur even when idle, so use them less frequently
480      * than VM triggers.
481      */
482     timers_state.vm_clock_warp_start = -1;
483     timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
484                                    icount_adjust_rt, NULL);
485     timer_mod(timers_state.icount_rt_timer,
486                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
487     timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
488                                         icount_adjust_vm, NULL);
489     timer_mod(timers_state.icount_vm_timer,
490                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
491                    NANOSECONDS_PER_SECOND / 10);
492     return true;
493 }
494 
icount_notify_exit(void)495 void icount_notify_exit(void)
496 {
497     assert(icount_enabled());
498 
499     if (current_cpu) {
500         qemu_cpu_kick(current_cpu);
501         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
502     }
503 }
504