1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu/cutils.h"
27 #include "migration/vmstate.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "sysemu/cpus.h"
31 #include "sysemu/qtest.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/option.h"
34 #include "qemu/seqlock.h"
35 #include "sysemu/replay.h"
36 #include "sysemu/runstate.h"
37 #include "hw/core/cpu.h"
38 #include "sysemu/cpu-timers.h"
39 #include "sysemu/cpu-timers-internal.h"
40
41 /*
42 * ICOUNT: Instruction Counter
43 *
44 * this module is split off from cpu-timers because the icount part
45 * is TCG-specific, and does not need to be built for other accels.
46 */
47 static bool icount_sleep = true;
48 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
49 #define MAX_ICOUNT_SHIFT 10
50
51 /* Do not count executed instructions */
52 ICountMode use_icount = ICOUNT_DISABLED;
53
icount_enable_precise(void)54 static void icount_enable_precise(void)
55 {
56 /* Fixed conversion of insn to ns via "shift" option */
57 use_icount = ICOUNT_PRECISE;
58 }
59
icount_enable_adaptive(void)60 static void icount_enable_adaptive(void)
61 {
62 /* Runtime adaptive algorithm to compute shift */
63 use_icount = ICOUNT_ADAPTATIVE;
64 }
65
66 /*
67 * The current number of executed instructions is based on what we
68 * originally budgeted minus the current state of the decrementing
69 * icount counters in extra/u16.low.
70 */
icount_get_executed(CPUState * cpu)71 static int64_t icount_get_executed(CPUState *cpu)
72 {
73 return (cpu->icount_budget -
74 (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
75 }
76
77 /*
78 * Update the global shared timer_state.qemu_icount to take into
79 * account executed instructions. This is done by the TCG vCPU
80 * thread so the main-loop can see time has moved forward.
81 */
icount_update_locked(CPUState * cpu)82 static void icount_update_locked(CPUState *cpu)
83 {
84 int64_t executed = icount_get_executed(cpu);
85 cpu->icount_budget -= executed;
86
87 qatomic_set_i64(&timers_state.qemu_icount,
88 timers_state.qemu_icount + executed);
89 }
90
91 /*
92 * Update the global shared timer_state.qemu_icount to take into
93 * account executed instructions. This is done by the TCG vCPU
94 * thread so the main-loop can see time has moved forward.
95 */
icount_update(CPUState * cpu)96 void icount_update(CPUState *cpu)
97 {
98 seqlock_write_lock(&timers_state.vm_clock_seqlock,
99 &timers_state.vm_clock_lock);
100 icount_update_locked(cpu);
101 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
102 &timers_state.vm_clock_lock);
103 }
104
icount_get_raw_locked(void)105 static int64_t icount_get_raw_locked(void)
106 {
107 CPUState *cpu = current_cpu;
108
109 if (cpu && cpu->running) {
110 if (!cpu->neg.can_do_io) {
111 error_report("Bad icount read");
112 exit(1);
113 }
114 /* Take into account what has run */
115 icount_update_locked(cpu);
116 }
117 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
118 return qatomic_read_i64(&timers_state.qemu_icount);
119 }
120
icount_get_locked(void)121 static int64_t icount_get_locked(void)
122 {
123 int64_t icount = icount_get_raw_locked();
124 return qatomic_read_i64(&timers_state.qemu_icount_bias) +
125 icount_to_ns(icount);
126 }
127
icount_get_raw(void)128 int64_t icount_get_raw(void)
129 {
130 int64_t icount;
131 unsigned start;
132
133 do {
134 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
135 icount = icount_get_raw_locked();
136 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
137
138 return icount;
139 }
140
141 /* Return the virtual CPU time, based on the instruction counter. */
icount_get(void)142 int64_t icount_get(void)
143 {
144 int64_t icount;
145 unsigned start;
146
147 do {
148 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
149 icount = icount_get_locked();
150 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
151
152 return icount;
153 }
154
icount_to_ns(int64_t icount)155 int64_t icount_to_ns(int64_t icount)
156 {
157 return icount << qatomic_read(&timers_state.icount_time_shift);
158 }
159
160 /*
161 * Correlation between real and virtual time is always going to be
162 * fairly approximate, so ignore small variation.
163 * When the guest is idle real and virtual time will be aligned in
164 * the IO wait loop.
165 */
166 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
167
icount_adjust(void)168 static void icount_adjust(void)
169 {
170 int64_t cur_time;
171 int64_t cur_icount;
172 int64_t delta;
173
174 /* If the VM is not running, then do nothing. */
175 if (!runstate_is_running()) {
176 return;
177 }
178
179 seqlock_write_lock(&timers_state.vm_clock_seqlock,
180 &timers_state.vm_clock_lock);
181 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
182 cpu_get_clock_locked());
183 cur_icount = icount_get_locked();
184
185 delta = cur_icount - cur_time;
186 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
187 if (delta > 0
188 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
189 && timers_state.icount_time_shift > 0) {
190 /* The guest is getting too far ahead. Slow time down. */
191 qatomic_set(&timers_state.icount_time_shift,
192 timers_state.icount_time_shift - 1);
193 }
194 if (delta < 0
195 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
196 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
197 /* The guest is getting too far behind. Speed time up. */
198 qatomic_set(&timers_state.icount_time_shift,
199 timers_state.icount_time_shift + 1);
200 }
201 timers_state.last_delta = delta;
202 qatomic_set_i64(&timers_state.qemu_icount_bias,
203 cur_icount - (timers_state.qemu_icount
204 << timers_state.icount_time_shift));
205 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
206 &timers_state.vm_clock_lock);
207 }
208
icount_adjust_rt(void * opaque)209 static void icount_adjust_rt(void *opaque)
210 {
211 timer_mod(timers_state.icount_rt_timer,
212 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
213 icount_adjust();
214 }
215
icount_adjust_vm(void * opaque)216 static void icount_adjust_vm(void *opaque)
217 {
218 timer_mod(timers_state.icount_vm_timer,
219 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
220 NANOSECONDS_PER_SECOND / 10);
221 icount_adjust();
222 }
223
icount_round(int64_t count)224 int64_t icount_round(int64_t count)
225 {
226 int shift = qatomic_read(&timers_state.icount_time_shift);
227 return (count + (1 << shift) - 1) >> shift;
228 }
229
icount_warp_rt(void)230 static void icount_warp_rt(void)
231 {
232 unsigned seq;
233 int64_t warp_start;
234
235 /*
236 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
237 * changes from -1 to another value, so the race here is okay.
238 */
239 do {
240 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
241 warp_start = timers_state.vm_clock_warp_start;
242 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
243
244 if (warp_start == -1) {
245 return;
246 }
247
248 seqlock_write_lock(&timers_state.vm_clock_seqlock,
249 &timers_state.vm_clock_lock);
250 if (runstate_is_running()) {
251 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
252 cpu_get_clock_locked());
253 int64_t warp_delta;
254
255 warp_delta = clock - timers_state.vm_clock_warp_start;
256 if (icount_enabled() == ICOUNT_ADAPTATIVE) {
257 /*
258 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
259 * ahead of real time (it might already be ahead so careful not
260 * to go backwards).
261 */
262 int64_t cur_icount = icount_get_locked();
263 int64_t delta = clock - cur_icount;
264
265 if (delta < 0) {
266 delta = 0;
267 }
268 warp_delta = MIN(warp_delta, delta);
269 }
270 qatomic_set_i64(&timers_state.qemu_icount_bias,
271 timers_state.qemu_icount_bias + warp_delta);
272 }
273 timers_state.vm_clock_warp_start = -1;
274 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
275 &timers_state.vm_clock_lock);
276
277 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
278 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
279 }
280 }
281
icount_timer_cb(void * opaque)282 static void icount_timer_cb(void *opaque)
283 {
284 /*
285 * No need for a checkpoint because the timer already synchronizes
286 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
287 */
288 icount_warp_rt();
289 }
290
icount_start_warp_timer(void)291 void icount_start_warp_timer(void)
292 {
293 int64_t clock;
294 int64_t deadline;
295
296 assert(icount_enabled());
297
298 /*
299 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
300 * do not fire, so computing the deadline does not make sense.
301 */
302 if (!runstate_is_running()) {
303 return;
304 }
305
306 if (replay_mode != REPLAY_MODE_PLAY) {
307 if (!all_cpu_threads_idle()) {
308 return;
309 }
310
311 if (qtest_enabled()) {
312 /* When testing, qtest commands advance icount. */
313 return;
314 }
315
316 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
317 } else {
318 /* warp clock deterministically in record/replay mode */
319 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
320 /*
321 * vCPU is sleeping and warp can't be started.
322 * It is probably a race condition: notification sent
323 * to vCPU was processed in advance and vCPU went to sleep.
324 * Therefore we have to wake it up for doing something.
325 */
326 if (replay_has_event()) {
327 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
328 }
329 return;
330 }
331 }
332
333 /* We want to use the earliest deadline from ALL vm_clocks */
334 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
335 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
336 ~QEMU_TIMER_ATTR_EXTERNAL);
337 if (deadline < 0) {
338 if (!icount_sleep) {
339 warn_report_once("icount sleep disabled and no active timers");
340 }
341 return;
342 }
343
344 if (deadline > 0) {
345 /*
346 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
347 * sleep. Otherwise, the CPU might be waiting for a future timer
348 * interrupt to wake it up, but the interrupt never comes because
349 * the vCPU isn't running any insns and thus doesn't advance the
350 * QEMU_CLOCK_VIRTUAL.
351 */
352 if (!icount_sleep) {
353 /*
354 * We never let VCPUs sleep in no sleep icount mode.
355 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
356 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
357 * It is useful when we want a deterministic execution time,
358 * isolated from host latencies.
359 */
360 seqlock_write_lock(&timers_state.vm_clock_seqlock,
361 &timers_state.vm_clock_lock);
362 qatomic_set_i64(&timers_state.qemu_icount_bias,
363 timers_state.qemu_icount_bias + deadline);
364 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
365 &timers_state.vm_clock_lock);
366 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
367 } else {
368 /*
369 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
370 * "real" time, (related to the time left until the next event) has
371 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
372 * This avoids that the warps are visible externally; for example,
373 * you will not be sending network packets continuously instead of
374 * every 100ms.
375 */
376 seqlock_write_lock(&timers_state.vm_clock_seqlock,
377 &timers_state.vm_clock_lock);
378 if (timers_state.vm_clock_warp_start == -1
379 || timers_state.vm_clock_warp_start > clock) {
380 timers_state.vm_clock_warp_start = clock;
381 }
382 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
383 &timers_state.vm_clock_lock);
384 timer_mod_anticipate(timers_state.icount_warp_timer,
385 clock + deadline);
386 }
387 } else if (deadline == 0) {
388 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
389 }
390 }
391
icount_account_warp_timer(void)392 void icount_account_warp_timer(void)
393 {
394 if (!icount_sleep) {
395 return;
396 }
397
398 /*
399 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
400 * do not fire, so computing the deadline does not make sense.
401 */
402 if (!runstate_is_running()) {
403 return;
404 }
405
406 replay_async_events();
407
408 /* warp clock deterministically in record/replay mode */
409 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
410 return;
411 }
412
413 timer_del(timers_state.icount_warp_timer);
414 icount_warp_rt();
415 }
416
icount_configure(QemuOpts * opts,Error ** errp)417 bool icount_configure(QemuOpts *opts, Error **errp)
418 {
419 const char *option = qemu_opt_get(opts, "shift");
420 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
421 bool align = qemu_opt_get_bool(opts, "align", false);
422 long time_shift = -1;
423
424 if (!option) {
425 if (qemu_opt_get(opts, "align") != NULL) {
426 error_setg(errp, "Please specify shift option when using align");
427 return false;
428 }
429 return true;
430 }
431
432 if (align && !sleep) {
433 error_setg(errp, "align=on and sleep=off are incompatible");
434 return false;
435 }
436
437 if (strcmp(option, "auto") != 0) {
438 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
439 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
440 error_setg(errp, "icount: Invalid shift value");
441 return false;
442 }
443 } else if (icount_align_option) {
444 error_setg(errp, "shift=auto and align=on are incompatible");
445 return false;
446 } else if (!icount_sleep) {
447 error_setg(errp, "shift=auto and sleep=off are incompatible");
448 return false;
449 }
450
451 icount_sleep = sleep;
452 if (icount_sleep) {
453 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
454 icount_timer_cb, NULL);
455 }
456
457 icount_align_option = align;
458
459 if (time_shift >= 0) {
460 timers_state.icount_time_shift = time_shift;
461 icount_enable_precise();
462 return true;
463 }
464
465 icount_enable_adaptive();
466
467 /*
468 * 125MIPS seems a reasonable initial guess at the guest speed.
469 * It will be corrected fairly quickly anyway.
470 */
471 timers_state.icount_time_shift = 3;
472
473 /*
474 * Have both realtime and virtual time triggers for speed adjustment.
475 * The realtime trigger catches emulated time passing too slowly,
476 * the virtual time trigger catches emulated time passing too fast.
477 * Realtime triggers occur even when idle, so use them less frequently
478 * than VM triggers.
479 */
480 timers_state.vm_clock_warp_start = -1;
481 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
482 icount_adjust_rt, NULL);
483 timer_mod(timers_state.icount_rt_timer,
484 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
485 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
486 icount_adjust_vm, NULL);
487 timer_mod(timers_state.icount_vm_timer,
488 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
489 NANOSECONDS_PER_SECOND / 10);
490 return true;
491 }
492
icount_notify_exit(void)493 void icount_notify_exit(void)
494 {
495 assert(icount_enabled());
496
497 if (current_cpu) {
498 qemu_cpu_kick(current_cpu);
499 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
500 }
501 }
502