1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/cutils.h"
28 #include "migration/vmstate.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "exec/exec-all.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/qtest.h"
34 #include "qemu/main-loop.h"
35 #include "qemu/option.h"
36 #include "qemu/seqlock.h"
37 #include "sysemu/replay.h"
38 #include "sysemu/runstate.h"
39 #include "hw/core/cpu.h"
40 #include "sysemu/cpu-timers.h"
41 #include "sysemu/cpu-throttle.h"
42 #include "timers-state.h"
43 
44 /*
45  * ICOUNT: Instruction Counter
46  *
47  * this module is split off from cpu-timers because the icount part
48  * is TCG-specific, and does not need to be built for other accels.
49  */
50 static bool icount_sleep = true;
51 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
52 #define MAX_ICOUNT_SHIFT 10
53 
54 /*
55  * 0 = Do not count executed instructions.
56  * 1 = Fixed conversion of insn to ns via "shift" option
57  * 2 = Runtime adaptive algorithm to compute shift
58  */
59 int use_icount;
60 
icount_enable_precise(void)61 static void icount_enable_precise(void)
62 {
63     use_icount = 1;
64 }
65 
icount_enable_adaptive(void)66 static void icount_enable_adaptive(void)
67 {
68     use_icount = 2;
69 }
70 
71 /*
72  * The current number of executed instructions is based on what we
73  * originally budgeted minus the current state of the decrementing
74  * icount counters in extra/u16.low.
75  */
icount_get_executed(CPUState * cpu)76 static int64_t icount_get_executed(CPUState *cpu)
77 {
78     return (cpu->icount_budget -
79             (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
80 }
81 
82 /*
83  * Update the global shared timer_state.qemu_icount to take into
84  * account executed instructions. This is done by the TCG vCPU
85  * thread so the main-loop can see time has moved forward.
86  */
icount_update_locked(CPUState * cpu)87 static void icount_update_locked(CPUState *cpu)
88 {
89     int64_t executed = icount_get_executed(cpu);
90     cpu->icount_budget -= executed;
91 
92     qatomic_set_i64(&timers_state.qemu_icount,
93                     timers_state.qemu_icount + executed);
94 }
95 
96 /*
97  * Update the global shared timer_state.qemu_icount to take into
98  * account executed instructions. This is done by the TCG vCPU
99  * thread so the main-loop can see time has moved forward.
100  */
icount_update(CPUState * cpu)101 void icount_update(CPUState *cpu)
102 {
103     seqlock_write_lock(&timers_state.vm_clock_seqlock,
104                        &timers_state.vm_clock_lock);
105     icount_update_locked(cpu);
106     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
107                          &timers_state.vm_clock_lock);
108 }
109 
icount_get_raw_locked(void)110 static int64_t icount_get_raw_locked(void)
111 {
112     CPUState *cpu = current_cpu;
113 
114     if (cpu && cpu->running) {
115         if (!cpu->can_do_io) {
116             error_report("Bad icount read");
117             exit(1);
118         }
119         /* Take into account what has run */
120         icount_update_locked(cpu);
121     }
122     /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
123     return qatomic_read_i64(&timers_state.qemu_icount);
124 }
125 
icount_get_locked(void)126 static int64_t icount_get_locked(void)
127 {
128     int64_t icount = icount_get_raw_locked();
129     return qatomic_read_i64(&timers_state.qemu_icount_bias) +
130         icount_to_ns(icount);
131 }
132 
icount_get_raw(void)133 int64_t icount_get_raw(void)
134 {
135     int64_t icount;
136     unsigned start;
137 
138     do {
139         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
140         icount = icount_get_raw_locked();
141     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
142 
143     return icount;
144 }
145 
146 /* Return the virtual CPU time, based on the instruction counter.  */
icount_get(void)147 int64_t icount_get(void)
148 {
149     int64_t icount;
150     unsigned start;
151 
152     do {
153         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
154         icount = icount_get_locked();
155     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
156 
157     return icount;
158 }
159 
icount_to_ns(int64_t icount)160 int64_t icount_to_ns(int64_t icount)
161 {
162     return icount << qatomic_read(&timers_state.icount_time_shift);
163 }
164 
165 /*
166  * Correlation between real and virtual time is always going to be
167  * fairly approximate, so ignore small variation.
168  * When the guest is idle real and virtual time will be aligned in
169  * the IO wait loop.
170  */
171 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
172 
icount_adjust(void)173 static void icount_adjust(void)
174 {
175     int64_t cur_time;
176     int64_t cur_icount;
177     int64_t delta;
178 
179     /* If the VM is not running, then do nothing.  */
180     if (!runstate_is_running()) {
181         return;
182     }
183 
184     seqlock_write_lock(&timers_state.vm_clock_seqlock,
185                        &timers_state.vm_clock_lock);
186     cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
187                                    cpu_get_clock_locked());
188     cur_icount = icount_get_locked();
189 
190     delta = cur_icount - cur_time;
191     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
192     if (delta > 0
193         && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
194         && timers_state.icount_time_shift > 0) {
195         /* The guest is getting too far ahead.  Slow time down.  */
196         qatomic_set(&timers_state.icount_time_shift,
197                     timers_state.icount_time_shift - 1);
198     }
199     if (delta < 0
200         && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
201         && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
202         /* The guest is getting too far behind.  Speed time up.  */
203         qatomic_set(&timers_state.icount_time_shift,
204                     timers_state.icount_time_shift + 1);
205     }
206     timers_state.last_delta = delta;
207     qatomic_set_i64(&timers_state.qemu_icount_bias,
208                     cur_icount - (timers_state.qemu_icount
209                                   << timers_state.icount_time_shift));
210     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
211                          &timers_state.vm_clock_lock);
212 }
213 
icount_adjust_rt(void * opaque)214 static void icount_adjust_rt(void *opaque)
215 {
216     timer_mod(timers_state.icount_rt_timer,
217               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
218     icount_adjust();
219 }
220 
icount_adjust_vm(void * opaque)221 static void icount_adjust_vm(void *opaque)
222 {
223     timer_mod(timers_state.icount_vm_timer,
224                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
225                    NANOSECONDS_PER_SECOND / 10);
226     icount_adjust();
227 }
228 
icount_round(int64_t count)229 int64_t icount_round(int64_t count)
230 {
231     int shift = qatomic_read(&timers_state.icount_time_shift);
232     return (count + (1 << shift) - 1) >> shift;
233 }
234 
icount_warp_rt(void)235 static void icount_warp_rt(void)
236 {
237     unsigned seq;
238     int64_t warp_start;
239 
240     /*
241      * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
242      * changes from -1 to another value, so the race here is okay.
243      */
244     do {
245         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
246         warp_start = timers_state.vm_clock_warp_start;
247     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
248 
249     if (warp_start == -1) {
250         return;
251     }
252 
253     seqlock_write_lock(&timers_state.vm_clock_seqlock,
254                        &timers_state.vm_clock_lock);
255     if (runstate_is_running()) {
256         int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
257                                             cpu_get_clock_locked());
258         int64_t warp_delta;
259 
260         warp_delta = clock - timers_state.vm_clock_warp_start;
261         if (icount_enabled() == 2) {
262             /*
263              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
264              * far ahead of real time.
265              */
266             int64_t cur_icount = icount_get_locked();
267             int64_t delta = clock - cur_icount;
268             warp_delta = MIN(warp_delta, delta);
269         }
270         qatomic_set_i64(&timers_state.qemu_icount_bias,
271                         timers_state.qemu_icount_bias + warp_delta);
272     }
273     timers_state.vm_clock_warp_start = -1;
274     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
275                        &timers_state.vm_clock_lock);
276 
277     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
278         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
279     }
280 }
281 
icount_timer_cb(void * opaque)282 static void icount_timer_cb(void *opaque)
283 {
284     /*
285      * No need for a checkpoint because the timer already synchronizes
286      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
287      */
288     icount_warp_rt();
289 }
290 
icount_start_warp_timer(void)291 void icount_start_warp_timer(void)
292 {
293     int64_t clock;
294     int64_t deadline;
295 
296     assert(icount_enabled());
297 
298     /*
299      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
300      * do not fire, so computing the deadline does not make sense.
301      */
302     if (!runstate_is_running()) {
303         return;
304     }
305 
306     if (replay_mode != REPLAY_MODE_PLAY) {
307         if (!all_cpu_threads_idle()) {
308             return;
309         }
310 
311         if (qtest_enabled()) {
312             /* When testing, qtest commands advance icount.  */
313             return;
314         }
315 
316         replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
317     } else {
318         /* warp clock deterministically in record/replay mode */
319         if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
320             /*
321              * vCPU is sleeping and warp can't be started.
322              * It is probably a race condition: notification sent
323              * to vCPU was processed in advance and vCPU went to sleep.
324              * Therefore we have to wake it up for doing someting.
325              */
326             if (replay_has_checkpoint()) {
327                 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
328             }
329             return;
330         }
331     }
332 
333     /* We want to use the earliest deadline from ALL vm_clocks */
334     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
335     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
336                                           ~QEMU_TIMER_ATTR_EXTERNAL);
337     if (deadline < 0) {
338         static bool notified;
339         if (!icount_sleep && !notified) {
340             warn_report("icount sleep disabled and no active timers");
341             notified = true;
342         }
343         return;
344     }
345 
346     if (deadline > 0) {
347         /*
348          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
349          * sleep.  Otherwise, the CPU might be waiting for a future timer
350          * interrupt to wake it up, but the interrupt never comes because
351          * the vCPU isn't running any insns and thus doesn't advance the
352          * QEMU_CLOCK_VIRTUAL.
353          */
354         if (!icount_sleep) {
355             /*
356              * We never let VCPUs sleep in no sleep icount mode.
357              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
358              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
359              * It is useful when we want a deterministic execution time,
360              * isolated from host latencies.
361              */
362             seqlock_write_lock(&timers_state.vm_clock_seqlock,
363                                &timers_state.vm_clock_lock);
364             qatomic_set_i64(&timers_state.qemu_icount_bias,
365                             timers_state.qemu_icount_bias + deadline);
366             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
367                                  &timers_state.vm_clock_lock);
368             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
369         } else {
370             /*
371              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
372              * "real" time, (related to the time left until the next event) has
373              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
374              * This avoids that the warps are visible externally; for example,
375              * you will not be sending network packets continuously instead of
376              * every 100ms.
377              */
378             seqlock_write_lock(&timers_state.vm_clock_seqlock,
379                                &timers_state.vm_clock_lock);
380             if (timers_state.vm_clock_warp_start == -1
381                 || timers_state.vm_clock_warp_start > clock) {
382                 timers_state.vm_clock_warp_start = clock;
383             }
384             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
385                                  &timers_state.vm_clock_lock);
386             timer_mod_anticipate(timers_state.icount_warp_timer,
387                                  clock + deadline);
388         }
389     } else if (deadline == 0) {
390         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
391     }
392 }
393 
icount_account_warp_timer(void)394 void icount_account_warp_timer(void)
395 {
396     if (!icount_sleep) {
397         return;
398     }
399 
400     /*
401      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
402      * do not fire, so computing the deadline does not make sense.
403      */
404     if (!runstate_is_running()) {
405         return;
406     }
407 
408     /* warp clock deterministically in record/replay mode */
409     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
410         return;
411     }
412 
413     timer_del(timers_state.icount_warp_timer);
414     icount_warp_rt();
415 }
416 
icount_configure(QemuOpts * opts,Error ** errp)417 void icount_configure(QemuOpts *opts, Error **errp)
418 {
419     const char *option = qemu_opt_get(opts, "shift");
420     bool sleep = qemu_opt_get_bool(opts, "sleep", true);
421     bool align = qemu_opt_get_bool(opts, "align", false);
422     long time_shift = -1;
423 
424     if (!option) {
425         if (qemu_opt_get(opts, "align") != NULL) {
426             error_setg(errp, "Please specify shift option when using align");
427         }
428         return;
429     }
430 
431     if (align && !sleep) {
432         error_setg(errp, "align=on and sleep=off are incompatible");
433         return;
434     }
435 
436     if (strcmp(option, "auto") != 0) {
437         if (qemu_strtol(option, NULL, 0, &time_shift) < 0
438             || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
439             error_setg(errp, "icount: Invalid shift value");
440             return;
441         }
442     } else if (icount_align_option) {
443         error_setg(errp, "shift=auto and align=on are incompatible");
444         return;
445     } else if (!icount_sleep) {
446         error_setg(errp, "shift=auto and sleep=off are incompatible");
447         return;
448     }
449 
450     icount_sleep = sleep;
451     if (icount_sleep) {
452         timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
453                                          icount_timer_cb, NULL);
454     }
455 
456     icount_align_option = align;
457 
458     if (time_shift >= 0) {
459         timers_state.icount_time_shift = time_shift;
460         icount_enable_precise();
461         return;
462     }
463 
464     icount_enable_adaptive();
465 
466     /*
467      * 125MIPS seems a reasonable initial guess at the guest speed.
468      * It will be corrected fairly quickly anyway.
469      */
470     timers_state.icount_time_shift = 3;
471 
472     /*
473      * Have both realtime and virtual time triggers for speed adjustment.
474      * The realtime trigger catches emulated time passing too slowly,
475      * the virtual time trigger catches emulated time passing too fast.
476      * Realtime triggers occur even when idle, so use them less frequently
477      * than VM triggers.
478      */
479     timers_state.vm_clock_warp_start = -1;
480     timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
481                                    icount_adjust_rt, NULL);
482     timer_mod(timers_state.icount_rt_timer,
483                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
484     timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
485                                         icount_adjust_vm, NULL);
486     timer_mod(timers_state.icount_vm_timer,
487                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
488                    NANOSECONDS_PER_SECOND / 10);
489 }
490