1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/config-file.h"
28 #include "migration/vmstate.h"
29 #include "monitor/monitor.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-commands-misc.h"
32 #include "qapi/qapi-events-run-state.h"
33 #include "qapi/qmp/qerror.h"
34 #include "qemu/error-report.h"
35 #include "qemu/qemu-print.h"
36 #include "sysemu/tcg.h"
37 #include "sysemu/block-backend.h"
38 #include "exec/gdbstub.h"
39 #include "sysemu/dma.h"
40 #include "sysemu/hw_accel.h"
41 #include "sysemu/kvm.h"
42 #include "sysemu/hax.h"
43 #include "sysemu/hvf.h"
44 #include "sysemu/whpx.h"
45 #include "exec/exec-all.h"
46 
47 #include "qemu/thread.h"
48 #include "qemu/plugin.h"
49 #include "sysemu/cpus.h"
50 #include "sysemu/qtest.h"
51 #include "qemu/main-loop.h"
52 #include "qemu/option.h"
53 #include "qemu/bitmap.h"
54 #include "qemu/seqlock.h"
55 #include "qemu/guest-random.h"
56 #include "tcg/tcg.h"
57 #include "hw/nmi.h"
58 #include "sysemu/replay.h"
59 #include "sysemu/runstate.h"
60 #include "hw/boards.h"
61 #include "hw/hw.h"
62 
63 #ifdef CONFIG_LINUX
64 
65 #include <sys/prctl.h>
66 
67 #ifndef PR_MCE_KILL
68 #define PR_MCE_KILL 33
69 #endif
70 
71 #ifndef PR_MCE_KILL_SET
72 #define PR_MCE_KILL_SET 1
73 #endif
74 
75 #ifndef PR_MCE_KILL_EARLY
76 #define PR_MCE_KILL_EARLY 1
77 #endif
78 
79 #endif /* CONFIG_LINUX */
80 
81 static QemuMutex qemu_global_mutex;
82 
83 int64_t max_delay;
84 int64_t max_advance;
85 
86 /* vcpu throttling controls */
87 static QEMUTimer *throttle_timer;
88 static unsigned int throttle_percentage;
89 
90 #define CPU_THROTTLE_PCT_MIN 1
91 #define CPU_THROTTLE_PCT_MAX 99
92 #define CPU_THROTTLE_TIMESLICE_NS 10000000
93 
cpu_is_stopped(CPUState * cpu)94 bool cpu_is_stopped(CPUState *cpu)
95 {
96     return cpu->stopped || !runstate_is_running();
97 }
98 
cpu_thread_is_idle(CPUState * cpu)99 static bool cpu_thread_is_idle(CPUState *cpu)
100 {
101     if (cpu->stop || cpu->queued_work_first) {
102         return false;
103     }
104     if (cpu_is_stopped(cpu)) {
105         return true;
106     }
107     if (!cpu->halted || cpu_has_work(cpu) ||
108         kvm_halt_in_kernel()) {
109         return false;
110     }
111     return true;
112 }
113 
all_cpu_threads_idle(void)114 static bool all_cpu_threads_idle(void)
115 {
116     CPUState *cpu;
117 
118     CPU_FOREACH(cpu) {
119         if (!cpu_thread_is_idle(cpu)) {
120             return false;
121         }
122     }
123     return true;
124 }
125 
126 /***********************************************************/
127 /* guest cycle counter */
128 
129 /* Protected by TimersState seqlock */
130 
131 static bool icount_sleep = true;
132 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
133 #define MAX_ICOUNT_SHIFT 10
134 
135 typedef struct TimersState {
136     /* Protected by BQL.  */
137     int64_t cpu_ticks_prev;
138     int64_t cpu_ticks_offset;
139 
140     /* Protect fields that can be respectively read outside the
141      * BQL, and written from multiple threads.
142      */
143     QemuSeqLock vm_clock_seqlock;
144     QemuSpin vm_clock_lock;
145 
146     int16_t cpu_ticks_enabled;
147 
148     /* Conversion factor from emulated instructions to virtual clock ticks.  */
149     int16_t icount_time_shift;
150 
151     /* Compensate for varying guest execution speed.  */
152     int64_t qemu_icount_bias;
153 
154     int64_t vm_clock_warp_start;
155     int64_t cpu_clock_offset;
156 
157     /* Only written by TCG thread */
158     int64_t qemu_icount;
159 
160     /* for adjusting icount */
161     QEMUTimer *icount_rt_timer;
162     QEMUTimer *icount_vm_timer;
163     QEMUTimer *icount_warp_timer;
164 } TimersState;
165 
166 static TimersState timers_state;
167 bool mttcg_enabled;
168 
169 
170 /* The current number of executed instructions is based on what we
171  * originally budgeted minus the current state of the decrementing
172  * icount counters in extra/u16.low.
173  */
cpu_get_icount_executed(CPUState * cpu)174 static int64_t cpu_get_icount_executed(CPUState *cpu)
175 {
176     return (cpu->icount_budget -
177             (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
178 }
179 
180 /*
181  * Update the global shared timer_state.qemu_icount to take into
182  * account executed instructions. This is done by the TCG vCPU
183  * thread so the main-loop can see time has moved forward.
184  */
cpu_update_icount_locked(CPUState * cpu)185 static void cpu_update_icount_locked(CPUState *cpu)
186 {
187     int64_t executed = cpu_get_icount_executed(cpu);
188     cpu->icount_budget -= executed;
189 
190     atomic_set_i64(&timers_state.qemu_icount,
191                    timers_state.qemu_icount + executed);
192 }
193 
194 /*
195  * Update the global shared timer_state.qemu_icount to take into
196  * account executed instructions. This is done by the TCG vCPU
197  * thread so the main-loop can see time has moved forward.
198  */
cpu_update_icount(CPUState * cpu)199 void cpu_update_icount(CPUState *cpu)
200 {
201     seqlock_write_lock(&timers_state.vm_clock_seqlock,
202                        &timers_state.vm_clock_lock);
203     cpu_update_icount_locked(cpu);
204     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
205                          &timers_state.vm_clock_lock);
206 }
207 
cpu_get_icount_raw_locked(void)208 static int64_t cpu_get_icount_raw_locked(void)
209 {
210     CPUState *cpu = current_cpu;
211 
212     if (cpu && cpu->running) {
213         if (!cpu->can_do_io) {
214             error_report("Bad icount read");
215             exit(1);
216         }
217         /* Take into account what has run */
218         cpu_update_icount_locked(cpu);
219     }
220     /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
221     return atomic_read_i64(&timers_state.qemu_icount);
222 }
223 
cpu_get_icount_locked(void)224 static int64_t cpu_get_icount_locked(void)
225 {
226     int64_t icount = cpu_get_icount_raw_locked();
227     return atomic_read_i64(&timers_state.qemu_icount_bias) +
228         cpu_icount_to_ns(icount);
229 }
230 
cpu_get_icount_raw(void)231 int64_t cpu_get_icount_raw(void)
232 {
233     int64_t icount;
234     unsigned start;
235 
236     do {
237         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
238         icount = cpu_get_icount_raw_locked();
239     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
240 
241     return icount;
242 }
243 
244 /* Return the virtual CPU time, based on the instruction counter.  */
cpu_get_icount(void)245 int64_t cpu_get_icount(void)
246 {
247     int64_t icount;
248     unsigned start;
249 
250     do {
251         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
252         icount = cpu_get_icount_locked();
253     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
254 
255     return icount;
256 }
257 
cpu_icount_to_ns(int64_t icount)258 int64_t cpu_icount_to_ns(int64_t icount)
259 {
260     return icount << atomic_read(&timers_state.icount_time_shift);
261 }
262 
cpu_get_ticks_locked(void)263 static int64_t cpu_get_ticks_locked(void)
264 {
265     int64_t ticks = timers_state.cpu_ticks_offset;
266     if (timers_state.cpu_ticks_enabled) {
267         ticks += cpu_get_host_ticks();
268     }
269 
270     if (timers_state.cpu_ticks_prev > ticks) {
271         /* Non increasing ticks may happen if the host uses software suspend.  */
272         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
273         ticks = timers_state.cpu_ticks_prev;
274     }
275 
276     timers_state.cpu_ticks_prev = ticks;
277     return ticks;
278 }
279 
280 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
281  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
282  * counter.
283  */
cpu_get_ticks(void)284 int64_t cpu_get_ticks(void)
285 {
286     int64_t ticks;
287 
288     if (use_icount) {
289         return cpu_get_icount();
290     }
291 
292     qemu_spin_lock(&timers_state.vm_clock_lock);
293     ticks = cpu_get_ticks_locked();
294     qemu_spin_unlock(&timers_state.vm_clock_lock);
295     return ticks;
296 }
297 
cpu_get_clock_locked(void)298 static int64_t cpu_get_clock_locked(void)
299 {
300     int64_t time;
301 
302     time = timers_state.cpu_clock_offset;
303     if (timers_state.cpu_ticks_enabled) {
304         time += get_clock();
305     }
306 
307     return time;
308 }
309 
310 /* Return the monotonic time elapsed in VM, i.e.,
311  * the time between vm_start and vm_stop
312  */
cpu_get_clock(void)313 int64_t cpu_get_clock(void)
314 {
315     int64_t ti;
316     unsigned start;
317 
318     do {
319         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
320         ti = cpu_get_clock_locked();
321     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
322 
323     return ti;
324 }
325 
326 /* enable cpu_get_ticks()
327  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
328  */
cpu_enable_ticks(void)329 void cpu_enable_ticks(void)
330 {
331     seqlock_write_lock(&timers_state.vm_clock_seqlock,
332                        &timers_state.vm_clock_lock);
333     if (!timers_state.cpu_ticks_enabled) {
334         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
335         timers_state.cpu_clock_offset -= get_clock();
336         timers_state.cpu_ticks_enabled = 1;
337     }
338     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
339                        &timers_state.vm_clock_lock);
340 }
341 
342 /* disable cpu_get_ticks() : the clock is stopped. You must not call
343  * cpu_get_ticks() after that.
344  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
345  */
cpu_disable_ticks(void)346 void cpu_disable_ticks(void)
347 {
348     seqlock_write_lock(&timers_state.vm_clock_seqlock,
349                        &timers_state.vm_clock_lock);
350     if (timers_state.cpu_ticks_enabled) {
351         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
352         timers_state.cpu_clock_offset = cpu_get_clock_locked();
353         timers_state.cpu_ticks_enabled = 0;
354     }
355     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
356                          &timers_state.vm_clock_lock);
357 }
358 
359 /* Correlation between real and virtual time is always going to be
360    fairly approximate, so ignore small variation.
361    When the guest is idle real and virtual time will be aligned in
362    the IO wait loop.  */
363 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
364 
icount_adjust(void)365 static void icount_adjust(void)
366 {
367     int64_t cur_time;
368     int64_t cur_icount;
369     int64_t delta;
370 
371     /* Protected by TimersState mutex.  */
372     static int64_t last_delta;
373 
374     /* If the VM is not running, then do nothing.  */
375     if (!runstate_is_running()) {
376         return;
377     }
378 
379     seqlock_write_lock(&timers_state.vm_clock_seqlock,
380                        &timers_state.vm_clock_lock);
381     cur_time = cpu_get_clock_locked();
382     cur_icount = cpu_get_icount_locked();
383 
384     delta = cur_icount - cur_time;
385     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
386     if (delta > 0
387         && last_delta + ICOUNT_WOBBLE < delta * 2
388         && timers_state.icount_time_shift > 0) {
389         /* The guest is getting too far ahead.  Slow time down.  */
390         atomic_set(&timers_state.icount_time_shift,
391                    timers_state.icount_time_shift - 1);
392     }
393     if (delta < 0
394         && last_delta - ICOUNT_WOBBLE > delta * 2
395         && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
396         /* The guest is getting too far behind.  Speed time up.  */
397         atomic_set(&timers_state.icount_time_shift,
398                    timers_state.icount_time_shift + 1);
399     }
400     last_delta = delta;
401     atomic_set_i64(&timers_state.qemu_icount_bias,
402                    cur_icount - (timers_state.qemu_icount
403                                  << timers_state.icount_time_shift));
404     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
405                          &timers_state.vm_clock_lock);
406 }
407 
icount_adjust_rt(void * opaque)408 static void icount_adjust_rt(void *opaque)
409 {
410     timer_mod(timers_state.icount_rt_timer,
411               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
412     icount_adjust();
413 }
414 
icount_adjust_vm(void * opaque)415 static void icount_adjust_vm(void *opaque)
416 {
417     timer_mod(timers_state.icount_vm_timer,
418                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
419                    NANOSECONDS_PER_SECOND / 10);
420     icount_adjust();
421 }
422 
qemu_icount_round(int64_t count)423 static int64_t qemu_icount_round(int64_t count)
424 {
425     int shift = atomic_read(&timers_state.icount_time_shift);
426     return (count + (1 << shift) - 1) >> shift;
427 }
428 
icount_warp_rt(void)429 static void icount_warp_rt(void)
430 {
431     unsigned seq;
432     int64_t warp_start;
433 
434     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
435      * changes from -1 to another value, so the race here is okay.
436      */
437     do {
438         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
439         warp_start = timers_state.vm_clock_warp_start;
440     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
441 
442     if (warp_start == -1) {
443         return;
444     }
445 
446     seqlock_write_lock(&timers_state.vm_clock_seqlock,
447                        &timers_state.vm_clock_lock);
448     if (runstate_is_running()) {
449         int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
450                                             cpu_get_clock_locked());
451         int64_t warp_delta;
452 
453         warp_delta = clock - timers_state.vm_clock_warp_start;
454         if (use_icount == 2) {
455             /*
456              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
457              * far ahead of real time.
458              */
459             int64_t cur_icount = cpu_get_icount_locked();
460             int64_t delta = clock - cur_icount;
461             warp_delta = MIN(warp_delta, delta);
462         }
463         atomic_set_i64(&timers_state.qemu_icount_bias,
464                        timers_state.qemu_icount_bias + warp_delta);
465     }
466     timers_state.vm_clock_warp_start = -1;
467     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
468                        &timers_state.vm_clock_lock);
469 
470     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
471         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
472     }
473 }
474 
icount_timer_cb(void * opaque)475 static void icount_timer_cb(void *opaque)
476 {
477     /* No need for a checkpoint because the timer already synchronizes
478      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
479      */
480     icount_warp_rt();
481 }
482 
qtest_clock_warp(int64_t dest)483 void qtest_clock_warp(int64_t dest)
484 {
485     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
486     AioContext *aio_context;
487     assert(qtest_enabled());
488     aio_context = qemu_get_aio_context();
489     while (clock < dest) {
490         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
491                                                       QEMU_TIMER_ATTR_ALL);
492         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
493 
494         seqlock_write_lock(&timers_state.vm_clock_seqlock,
495                            &timers_state.vm_clock_lock);
496         atomic_set_i64(&timers_state.qemu_icount_bias,
497                        timers_state.qemu_icount_bias + warp);
498         seqlock_write_unlock(&timers_state.vm_clock_seqlock,
499                              &timers_state.vm_clock_lock);
500 
501         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
502         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
503         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
504     }
505     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
506 }
507 
qemu_start_warp_timer(void)508 void qemu_start_warp_timer(void)
509 {
510     int64_t clock;
511     int64_t deadline;
512 
513     if (!use_icount) {
514         return;
515     }
516 
517     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
518      * do not fire, so computing the deadline does not make sense.
519      */
520     if (!runstate_is_running()) {
521         return;
522     }
523 
524     if (replay_mode != REPLAY_MODE_PLAY) {
525         if (!all_cpu_threads_idle()) {
526             return;
527         }
528 
529         if (qtest_enabled()) {
530             /* When testing, qtest commands advance icount.  */
531             return;
532         }
533 
534         replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
535     } else {
536         /* warp clock deterministically in record/replay mode */
537         if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
538             /* vCPU is sleeping and warp can't be started.
539                It is probably a race condition: notification sent
540                to vCPU was processed in advance and vCPU went to sleep.
541                Therefore we have to wake it up for doing someting. */
542             if (replay_has_checkpoint()) {
543                 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
544             }
545             return;
546         }
547     }
548 
549     /* We want to use the earliest deadline from ALL vm_clocks */
550     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
551     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
552                                           ~QEMU_TIMER_ATTR_EXTERNAL);
553     if (deadline < 0) {
554         static bool notified;
555         if (!icount_sleep && !notified) {
556             warn_report("icount sleep disabled and no active timers");
557             notified = true;
558         }
559         return;
560     }
561 
562     if (deadline > 0) {
563         /*
564          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
565          * sleep.  Otherwise, the CPU might be waiting for a future timer
566          * interrupt to wake it up, but the interrupt never comes because
567          * the vCPU isn't running any insns and thus doesn't advance the
568          * QEMU_CLOCK_VIRTUAL.
569          */
570         if (!icount_sleep) {
571             /*
572              * We never let VCPUs sleep in no sleep icount mode.
573              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
574              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
575              * It is useful when we want a deterministic execution time,
576              * isolated from host latencies.
577              */
578             seqlock_write_lock(&timers_state.vm_clock_seqlock,
579                                &timers_state.vm_clock_lock);
580             atomic_set_i64(&timers_state.qemu_icount_bias,
581                            timers_state.qemu_icount_bias + deadline);
582             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
583                                  &timers_state.vm_clock_lock);
584             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
585         } else {
586             /*
587              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
588              * "real" time, (related to the time left until the next event) has
589              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
590              * This avoids that the warps are visible externally; for example,
591              * you will not be sending network packets continuously instead of
592              * every 100ms.
593              */
594             seqlock_write_lock(&timers_state.vm_clock_seqlock,
595                                &timers_state.vm_clock_lock);
596             if (timers_state.vm_clock_warp_start == -1
597                 || timers_state.vm_clock_warp_start > clock) {
598                 timers_state.vm_clock_warp_start = clock;
599             }
600             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
601                                  &timers_state.vm_clock_lock);
602             timer_mod_anticipate(timers_state.icount_warp_timer,
603                                  clock + deadline);
604         }
605     } else if (deadline == 0) {
606         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
607     }
608 }
609 
qemu_account_warp_timer(void)610 static void qemu_account_warp_timer(void)
611 {
612     if (!use_icount || !icount_sleep) {
613         return;
614     }
615 
616     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
617      * do not fire, so computing the deadline does not make sense.
618      */
619     if (!runstate_is_running()) {
620         return;
621     }
622 
623     /* warp clock deterministically in record/replay mode */
624     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
625         return;
626     }
627 
628     timer_del(timers_state.icount_warp_timer);
629     icount_warp_rt();
630 }
631 
icount_state_needed(void * opaque)632 static bool icount_state_needed(void *opaque)
633 {
634     return use_icount;
635 }
636 
warp_timer_state_needed(void * opaque)637 static bool warp_timer_state_needed(void *opaque)
638 {
639     TimersState *s = opaque;
640     return s->icount_warp_timer != NULL;
641 }
642 
adjust_timers_state_needed(void * opaque)643 static bool adjust_timers_state_needed(void *opaque)
644 {
645     TimersState *s = opaque;
646     return s->icount_rt_timer != NULL;
647 }
648 
649 /*
650  * Subsection for warp timer migration is optional, because may not be created
651  */
652 static const VMStateDescription icount_vmstate_warp_timer = {
653     .name = "timer/icount/warp_timer",
654     .version_id = 1,
655     .minimum_version_id = 1,
656     .needed = warp_timer_state_needed,
657     .fields = (VMStateField[]) {
658         VMSTATE_INT64(vm_clock_warp_start, TimersState),
659         VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
660         VMSTATE_END_OF_LIST()
661     }
662 };
663 
664 static const VMStateDescription icount_vmstate_adjust_timers = {
665     .name = "timer/icount/timers",
666     .version_id = 1,
667     .minimum_version_id = 1,
668     .needed = adjust_timers_state_needed,
669     .fields = (VMStateField[]) {
670         VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
671         VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
672         VMSTATE_END_OF_LIST()
673     }
674 };
675 
676 /*
677  * This is a subsection for icount migration.
678  */
679 static const VMStateDescription icount_vmstate_timers = {
680     .name = "timer/icount",
681     .version_id = 1,
682     .minimum_version_id = 1,
683     .needed = icount_state_needed,
684     .fields = (VMStateField[]) {
685         VMSTATE_INT64(qemu_icount_bias, TimersState),
686         VMSTATE_INT64(qemu_icount, TimersState),
687         VMSTATE_END_OF_LIST()
688     },
689     .subsections = (const VMStateDescription*[]) {
690         &icount_vmstate_warp_timer,
691         &icount_vmstate_adjust_timers,
692         NULL
693     }
694 };
695 
696 static const VMStateDescription vmstate_timers = {
697     .name = "timer",
698     .version_id = 2,
699     .minimum_version_id = 1,
700     .fields = (VMStateField[]) {
701         VMSTATE_INT64(cpu_ticks_offset, TimersState),
702         VMSTATE_UNUSED(8),
703         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
704         VMSTATE_END_OF_LIST()
705     },
706     .subsections = (const VMStateDescription*[]) {
707         &icount_vmstate_timers,
708         NULL
709     }
710 };
711 
cpu_throttle_thread(CPUState * cpu,run_on_cpu_data opaque)712 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
713 {
714     double pct;
715     double throttle_ratio;
716     int64_t sleeptime_ns, endtime_ns;
717 
718     if (!cpu_throttle_get_percentage()) {
719         return;
720     }
721 
722     pct = (double)cpu_throttle_get_percentage()/100;
723     throttle_ratio = pct / (1 - pct);
724     /* Add 1ns to fix double's rounding error (like 0.9999999...) */
725     sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
726     endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
727     while (sleeptime_ns > 0 && !cpu->stop) {
728         if (sleeptime_ns > SCALE_MS) {
729             qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
730                                 sleeptime_ns / SCALE_MS);
731         } else {
732             qemu_mutex_unlock_iothread();
733             g_usleep(sleeptime_ns / SCALE_US);
734             qemu_mutex_lock_iothread();
735         }
736         sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
737     }
738     atomic_set(&cpu->throttle_thread_scheduled, 0);
739 }
740 
cpu_throttle_timer_tick(void * opaque)741 static void cpu_throttle_timer_tick(void *opaque)
742 {
743     CPUState *cpu;
744     double pct;
745 
746     /* Stop the timer if needed */
747     if (!cpu_throttle_get_percentage()) {
748         return;
749     }
750     CPU_FOREACH(cpu) {
751         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
752             async_run_on_cpu(cpu, cpu_throttle_thread,
753                              RUN_ON_CPU_NULL);
754         }
755     }
756 
757     pct = (double)cpu_throttle_get_percentage()/100;
758     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
759                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
760 }
761 
cpu_throttle_set(int new_throttle_pct)762 void cpu_throttle_set(int new_throttle_pct)
763 {
764     /* Ensure throttle percentage is within valid range */
765     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
766     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
767 
768     atomic_set(&throttle_percentage, new_throttle_pct);
769 
770     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
771                                        CPU_THROTTLE_TIMESLICE_NS);
772 }
773 
cpu_throttle_stop(void)774 void cpu_throttle_stop(void)
775 {
776     atomic_set(&throttle_percentage, 0);
777 }
778 
cpu_throttle_active(void)779 bool cpu_throttle_active(void)
780 {
781     return (cpu_throttle_get_percentage() != 0);
782 }
783 
cpu_throttle_get_percentage(void)784 int cpu_throttle_get_percentage(void)
785 {
786     return atomic_read(&throttle_percentage);
787 }
788 
cpu_ticks_init(void)789 void cpu_ticks_init(void)
790 {
791     seqlock_init(&timers_state.vm_clock_seqlock);
792     qemu_spin_init(&timers_state.vm_clock_lock);
793     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
794     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
795                                            cpu_throttle_timer_tick, NULL);
796 }
797 
configure_icount(QemuOpts * opts,Error ** errp)798 void configure_icount(QemuOpts *opts, Error **errp)
799 {
800     const char *option;
801     char *rem_str = NULL;
802 
803     option = qemu_opt_get(opts, "shift");
804     if (!option) {
805         if (qemu_opt_get(opts, "align") != NULL) {
806             error_setg(errp, "Please specify shift option when using align");
807         }
808         return;
809     }
810 
811     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
812     if (icount_sleep) {
813         timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
814                                          icount_timer_cb, NULL);
815     }
816 
817     icount_align_option = qemu_opt_get_bool(opts, "align", false);
818 
819     if (icount_align_option && !icount_sleep) {
820         error_setg(errp, "align=on and sleep=off are incompatible");
821     }
822     if (strcmp(option, "auto") != 0) {
823         errno = 0;
824         timers_state.icount_time_shift = strtol(option, &rem_str, 0);
825         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
826             error_setg(errp, "icount: Invalid shift value");
827         }
828         use_icount = 1;
829         return;
830     } else if (icount_align_option) {
831         error_setg(errp, "shift=auto and align=on are incompatible");
832     } else if (!icount_sleep) {
833         error_setg(errp, "shift=auto and sleep=off are incompatible");
834     }
835 
836     use_icount = 2;
837 
838     /* 125MIPS seems a reasonable initial guess at the guest speed.
839        It will be corrected fairly quickly anyway.  */
840     timers_state.icount_time_shift = 3;
841 
842     /* Have both realtime and virtual time triggers for speed adjustment.
843        The realtime trigger catches emulated time passing too slowly,
844        the virtual time trigger catches emulated time passing too fast.
845        Realtime triggers occur even when idle, so use them less frequently
846        than VM triggers.  */
847     timers_state.vm_clock_warp_start = -1;
848     timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
849                                    icount_adjust_rt, NULL);
850     timer_mod(timers_state.icount_rt_timer,
851                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
852     timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
853                                         icount_adjust_vm, NULL);
854     timer_mod(timers_state.icount_vm_timer,
855                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
856                    NANOSECONDS_PER_SECOND / 10);
857 }
858 
859 /***********************************************************/
860 /* TCG vCPU kick timer
861  *
862  * The kick timer is responsible for moving single threaded vCPU
863  * emulation on to the next vCPU. If more than one vCPU is running a
864  * timer event with force a cpu->exit so the next vCPU can get
865  * scheduled.
866  *
867  * The timer is removed if all vCPUs are idle and restarted again once
868  * idleness is complete.
869  */
870 
871 static QEMUTimer *tcg_kick_vcpu_timer;
872 static CPUState *tcg_current_rr_cpu;
873 
874 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
875 
qemu_tcg_next_kick(void)876 static inline int64_t qemu_tcg_next_kick(void)
877 {
878     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
879 }
880 
881 /* Kick the currently round-robin scheduled vCPU to next */
qemu_cpu_kick_rr_next_cpu(void)882 static void qemu_cpu_kick_rr_next_cpu(void)
883 {
884     CPUState *cpu;
885     do {
886         cpu = atomic_mb_read(&tcg_current_rr_cpu);
887         if (cpu) {
888             cpu_exit(cpu);
889         }
890     } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
891 }
892 
893 /* Kick all RR vCPUs */
qemu_cpu_kick_rr_cpus(void)894 static void qemu_cpu_kick_rr_cpus(void)
895 {
896     CPUState *cpu;
897 
898     CPU_FOREACH(cpu) {
899         cpu_exit(cpu);
900     };
901 }
902 
do_nothing(CPUState * cpu,run_on_cpu_data unused)903 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
904 {
905 }
906 
qemu_timer_notify_cb(void * opaque,QEMUClockType type)907 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
908 {
909     if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
910         qemu_notify_event();
911         return;
912     }
913 
914     if (qemu_in_vcpu_thread()) {
915         /* A CPU is currently running; kick it back out to the
916          * tcg_cpu_exec() loop so it will recalculate its
917          * icount deadline immediately.
918          */
919         qemu_cpu_kick(current_cpu);
920     } else if (first_cpu) {
921         /* qemu_cpu_kick is not enough to kick a halted CPU out of
922          * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
923          * causes cpu_thread_is_idle to return false.  This way,
924          * handle_icount_deadline can run.
925          * If we have no CPUs at all for some reason, we don't
926          * need to do anything.
927          */
928         async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
929     }
930 }
931 
kick_tcg_thread(void * opaque)932 static void kick_tcg_thread(void *opaque)
933 {
934     timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
935     qemu_cpu_kick_rr_next_cpu();
936 }
937 
start_tcg_kick_timer(void)938 static void start_tcg_kick_timer(void)
939 {
940     assert(!mttcg_enabled);
941     if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
942         tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
943                                            kick_tcg_thread, NULL);
944     }
945     if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
946         timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
947     }
948 }
949 
stop_tcg_kick_timer(void)950 static void stop_tcg_kick_timer(void)
951 {
952     assert(!mttcg_enabled);
953     if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
954         timer_del(tcg_kick_vcpu_timer);
955     }
956 }
957 
958 /***********************************************************/
hw_error(const char * fmt,...)959 void hw_error(const char *fmt, ...)
960 {
961     va_list ap;
962     CPUState *cpu;
963 
964     va_start(ap, fmt);
965     fprintf(stderr, "qemu: hardware error: ");
966     vfprintf(stderr, fmt, ap);
967     fprintf(stderr, "\n");
968     CPU_FOREACH(cpu) {
969         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
970         cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
971     }
972     va_end(ap);
973     abort();
974 }
975 
cpu_synchronize_all_states(void)976 void cpu_synchronize_all_states(void)
977 {
978     CPUState *cpu;
979 
980     CPU_FOREACH(cpu) {
981         cpu_synchronize_state(cpu);
982         /* TODO: move to cpu_synchronize_state() */
983         if (hvf_enabled()) {
984             hvf_cpu_synchronize_state(cpu);
985         }
986     }
987 }
988 
cpu_synchronize_all_post_reset(void)989 void cpu_synchronize_all_post_reset(void)
990 {
991     CPUState *cpu;
992 
993     CPU_FOREACH(cpu) {
994         cpu_synchronize_post_reset(cpu);
995         /* TODO: move to cpu_synchronize_post_reset() */
996         if (hvf_enabled()) {
997             hvf_cpu_synchronize_post_reset(cpu);
998         }
999     }
1000 }
1001 
cpu_synchronize_all_post_init(void)1002 void cpu_synchronize_all_post_init(void)
1003 {
1004     CPUState *cpu;
1005 
1006     CPU_FOREACH(cpu) {
1007         cpu_synchronize_post_init(cpu);
1008         /* TODO: move to cpu_synchronize_post_init() */
1009         if (hvf_enabled()) {
1010             hvf_cpu_synchronize_post_init(cpu);
1011         }
1012     }
1013 }
1014 
cpu_synchronize_all_pre_loadvm(void)1015 void cpu_synchronize_all_pre_loadvm(void)
1016 {
1017     CPUState *cpu;
1018 
1019     CPU_FOREACH(cpu) {
1020         cpu_synchronize_pre_loadvm(cpu);
1021     }
1022 }
1023 
do_vm_stop(RunState state,bool send_stop)1024 static int do_vm_stop(RunState state, bool send_stop)
1025 {
1026     int ret = 0;
1027 
1028     if (runstate_is_running()) {
1029         runstate_set(state);
1030         cpu_disable_ticks();
1031         pause_all_vcpus();
1032         vm_state_notify(0, state);
1033         if (send_stop) {
1034             qapi_event_send_stop();
1035         }
1036     }
1037 
1038     bdrv_drain_all();
1039     ret = bdrv_flush_all();
1040 
1041     return ret;
1042 }
1043 
1044 /* Special vm_stop() variant for terminating the process.  Historically clients
1045  * did not expect a QMP STOP event and so we need to retain compatibility.
1046  */
vm_shutdown(void)1047 int vm_shutdown(void)
1048 {
1049     return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1050 }
1051 
cpu_can_run(CPUState * cpu)1052 static bool cpu_can_run(CPUState *cpu)
1053 {
1054     if (cpu->stop) {
1055         return false;
1056     }
1057     if (cpu_is_stopped(cpu)) {
1058         return false;
1059     }
1060     return true;
1061 }
1062 
cpu_handle_guest_debug(CPUState * cpu)1063 static void cpu_handle_guest_debug(CPUState *cpu)
1064 {
1065     gdb_set_stop_cpu(cpu);
1066     qemu_system_debug_request();
1067     cpu->stopped = true;
1068 }
1069 
1070 #ifdef CONFIG_LINUX
sigbus_reraise(void)1071 static void sigbus_reraise(void)
1072 {
1073     sigset_t set;
1074     struct sigaction action;
1075 
1076     memset(&action, 0, sizeof(action));
1077     action.sa_handler = SIG_DFL;
1078     if (!sigaction(SIGBUS, &action, NULL)) {
1079         raise(SIGBUS);
1080         sigemptyset(&set);
1081         sigaddset(&set, SIGBUS);
1082         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1083     }
1084     perror("Failed to re-raise SIGBUS!\n");
1085     abort();
1086 }
1087 
sigbus_handler(int n,siginfo_t * siginfo,void * ctx)1088 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1089 {
1090     if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1091         sigbus_reraise();
1092     }
1093 
1094     if (current_cpu) {
1095         /* Called asynchronously in VCPU thread.  */
1096         if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1097             sigbus_reraise();
1098         }
1099     } else {
1100         /* Called synchronously (via signalfd) in main thread.  */
1101         if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1102             sigbus_reraise();
1103         }
1104     }
1105 }
1106 
qemu_init_sigbus(void)1107 static void qemu_init_sigbus(void)
1108 {
1109     struct sigaction action;
1110 
1111     memset(&action, 0, sizeof(action));
1112     action.sa_flags = SA_SIGINFO;
1113     action.sa_sigaction = sigbus_handler;
1114     sigaction(SIGBUS, &action, NULL);
1115 
1116     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1117 }
1118 #else /* !CONFIG_LINUX */
qemu_init_sigbus(void)1119 static void qemu_init_sigbus(void)
1120 {
1121 }
1122 #endif /* !CONFIG_LINUX */
1123 
1124 static QemuThread io_thread;
1125 
1126 /* cpu creation */
1127 static QemuCond qemu_cpu_cond;
1128 /* system init */
1129 static QemuCond qemu_pause_cond;
1130 
qemu_init_cpu_loop(void)1131 void qemu_init_cpu_loop(void)
1132 {
1133     qemu_init_sigbus();
1134     qemu_cond_init(&qemu_cpu_cond);
1135     qemu_cond_init(&qemu_pause_cond);
1136     qemu_mutex_init(&qemu_global_mutex);
1137 
1138     qemu_thread_get_self(&io_thread);
1139 }
1140 
run_on_cpu(CPUState * cpu,run_on_cpu_func func,run_on_cpu_data data)1141 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1142 {
1143     do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1144 }
1145 
qemu_kvm_destroy_vcpu(CPUState * cpu)1146 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1147 {
1148     if (kvm_destroy_vcpu(cpu) < 0) {
1149         error_report("kvm_destroy_vcpu failed");
1150         exit(EXIT_FAILURE);
1151     }
1152 }
1153 
qemu_tcg_destroy_vcpu(CPUState * cpu)1154 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1155 {
1156 }
1157 
qemu_cpu_stop(CPUState * cpu,bool exit)1158 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1159 {
1160     g_assert(qemu_cpu_is_self(cpu));
1161     cpu->stop = false;
1162     cpu->stopped = true;
1163     if (exit) {
1164         cpu_exit(cpu);
1165     }
1166     qemu_cond_broadcast(&qemu_pause_cond);
1167 }
1168 
qemu_wait_io_event_common(CPUState * cpu)1169 static void qemu_wait_io_event_common(CPUState *cpu)
1170 {
1171     atomic_mb_set(&cpu->thread_kicked, false);
1172     if (cpu->stop) {
1173         qemu_cpu_stop(cpu, false);
1174     }
1175     process_queued_cpu_work(cpu);
1176 }
1177 
qemu_tcg_rr_wait_io_event(void)1178 static void qemu_tcg_rr_wait_io_event(void)
1179 {
1180     CPUState *cpu;
1181 
1182     while (all_cpu_threads_idle()) {
1183         stop_tcg_kick_timer();
1184         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1185     }
1186 
1187     start_tcg_kick_timer();
1188 
1189     CPU_FOREACH(cpu) {
1190         qemu_wait_io_event_common(cpu);
1191     }
1192 }
1193 
qemu_wait_io_event(CPUState * cpu)1194 static void qemu_wait_io_event(CPUState *cpu)
1195 {
1196     bool slept = false;
1197 
1198     while (cpu_thread_is_idle(cpu)) {
1199         if (!slept) {
1200             slept = true;
1201             qemu_plugin_vcpu_idle_cb(cpu);
1202         }
1203         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1204     }
1205     if (slept) {
1206         qemu_plugin_vcpu_resume_cb(cpu);
1207     }
1208 
1209 #ifdef _WIN32
1210     /* Eat dummy APC queued by qemu_cpu_kick_thread.  */
1211     if (!tcg_enabled()) {
1212         SleepEx(0, TRUE);
1213     }
1214 #endif
1215     qemu_wait_io_event_common(cpu);
1216 }
1217 
qemu_kvm_cpu_thread_fn(void * arg)1218 static void *qemu_kvm_cpu_thread_fn(void *arg)
1219 {
1220     CPUState *cpu = arg;
1221     int r;
1222 
1223     rcu_register_thread();
1224 
1225     qemu_mutex_lock_iothread();
1226     qemu_thread_get_self(cpu->thread);
1227     cpu->thread_id = qemu_get_thread_id();
1228     cpu->can_do_io = 1;
1229     current_cpu = cpu;
1230 
1231     r = kvm_init_vcpu(cpu);
1232     if (r < 0) {
1233         error_report("kvm_init_vcpu failed: %s", strerror(-r));
1234         exit(1);
1235     }
1236 
1237     kvm_init_cpu_signals(cpu);
1238 
1239     /* signal CPU creation */
1240     cpu->created = true;
1241     qemu_cond_signal(&qemu_cpu_cond);
1242     qemu_guest_random_seed_thread_part2(cpu->random_seed);
1243 
1244     do {
1245         if (cpu_can_run(cpu)) {
1246             r = kvm_cpu_exec(cpu);
1247             if (r == EXCP_DEBUG) {
1248                 cpu_handle_guest_debug(cpu);
1249             }
1250         }
1251         qemu_wait_io_event(cpu);
1252     } while (!cpu->unplug || cpu_can_run(cpu));
1253 
1254     qemu_kvm_destroy_vcpu(cpu);
1255     cpu->created = false;
1256     qemu_cond_signal(&qemu_cpu_cond);
1257     qemu_mutex_unlock_iothread();
1258     rcu_unregister_thread();
1259     return NULL;
1260 }
1261 
qemu_dummy_cpu_thread_fn(void * arg)1262 static void *qemu_dummy_cpu_thread_fn(void *arg)
1263 {
1264 #ifdef _WIN32
1265     error_report("qtest is not supported under Windows");
1266     exit(1);
1267 #else
1268     CPUState *cpu = arg;
1269     sigset_t waitset;
1270     int r;
1271 
1272     rcu_register_thread();
1273 
1274     qemu_mutex_lock_iothread();
1275     qemu_thread_get_self(cpu->thread);
1276     cpu->thread_id = qemu_get_thread_id();
1277     cpu->can_do_io = 1;
1278     current_cpu = cpu;
1279 
1280     sigemptyset(&waitset);
1281     sigaddset(&waitset, SIG_IPI);
1282 
1283     /* signal CPU creation */
1284     cpu->created = true;
1285     qemu_cond_signal(&qemu_cpu_cond);
1286     qemu_guest_random_seed_thread_part2(cpu->random_seed);
1287 
1288     do {
1289         qemu_mutex_unlock_iothread();
1290         do {
1291             int sig;
1292             r = sigwait(&waitset, &sig);
1293         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1294         if (r == -1) {
1295             perror("sigwait");
1296             exit(1);
1297         }
1298         qemu_mutex_lock_iothread();
1299         qemu_wait_io_event(cpu);
1300     } while (!cpu->unplug);
1301 
1302     qemu_mutex_unlock_iothread();
1303     rcu_unregister_thread();
1304     return NULL;
1305 #endif
1306 }
1307 
tcg_get_icount_limit(void)1308 static int64_t tcg_get_icount_limit(void)
1309 {
1310     int64_t deadline;
1311 
1312     if (replay_mode != REPLAY_MODE_PLAY) {
1313         /*
1314          * Include all the timers, because they may need an attention.
1315          * Too long CPU execution may create unnecessary delay in UI.
1316          */
1317         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1318                                               QEMU_TIMER_ATTR_ALL);
1319         /* Check realtime timers, because they help with input processing */
1320         deadline = qemu_soonest_timeout(deadline,
1321                 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1322                                            QEMU_TIMER_ATTR_ALL));
1323 
1324         /* Maintain prior (possibly buggy) behaviour where if no deadline
1325          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1326          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1327          * nanoseconds.
1328          */
1329         if ((deadline < 0) || (deadline > INT32_MAX)) {
1330             deadline = INT32_MAX;
1331         }
1332 
1333         return qemu_icount_round(deadline);
1334     } else {
1335         return replay_get_instructions();
1336     }
1337 }
1338 
handle_icount_deadline(void)1339 static void handle_icount_deadline(void)
1340 {
1341     assert(qemu_in_vcpu_thread());
1342     if (use_icount) {
1343         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1344                                                       QEMU_TIMER_ATTR_ALL);
1345 
1346         if (deadline == 0) {
1347             /* Wake up other AioContexts.  */
1348             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1349             qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1350         }
1351     }
1352 }
1353 
prepare_icount_for_run(CPUState * cpu)1354 static void prepare_icount_for_run(CPUState *cpu)
1355 {
1356     if (use_icount) {
1357         int insns_left;
1358 
1359         /* These should always be cleared by process_icount_data after
1360          * each vCPU execution. However u16.high can be raised
1361          * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1362          */
1363         g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
1364         g_assert(cpu->icount_extra == 0);
1365 
1366         cpu->icount_budget = tcg_get_icount_limit();
1367         insns_left = MIN(0xffff, cpu->icount_budget);
1368         cpu_neg(cpu)->icount_decr.u16.low = insns_left;
1369         cpu->icount_extra = cpu->icount_budget - insns_left;
1370 
1371         replay_mutex_lock();
1372     }
1373 }
1374 
process_icount_data(CPUState * cpu)1375 static void process_icount_data(CPUState *cpu)
1376 {
1377     if (use_icount) {
1378         /* Account for executed instructions */
1379         cpu_update_icount(cpu);
1380 
1381         /* Reset the counters */
1382         cpu_neg(cpu)->icount_decr.u16.low = 0;
1383         cpu->icount_extra = 0;
1384         cpu->icount_budget = 0;
1385 
1386         replay_account_executed_instructions();
1387 
1388         replay_mutex_unlock();
1389     }
1390 }
1391 
1392 
tcg_cpu_exec(CPUState * cpu)1393 static int tcg_cpu_exec(CPUState *cpu)
1394 {
1395     int ret;
1396 #ifdef CONFIG_PROFILER
1397     int64_t ti;
1398 #endif
1399 
1400     assert(tcg_enabled());
1401 #ifdef CONFIG_PROFILER
1402     ti = profile_getclock();
1403 #endif
1404     cpu_exec_start(cpu);
1405     ret = cpu_exec(cpu);
1406     cpu_exec_end(cpu);
1407 #ifdef CONFIG_PROFILER
1408     atomic_set(&tcg_ctx->prof.cpu_exec_time,
1409                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1410 #endif
1411     return ret;
1412 }
1413 
1414 /* Destroy any remaining vCPUs which have been unplugged and have
1415  * finished running
1416  */
deal_with_unplugged_cpus(void)1417 static void deal_with_unplugged_cpus(void)
1418 {
1419     CPUState *cpu;
1420 
1421     CPU_FOREACH(cpu) {
1422         if (cpu->unplug && !cpu_can_run(cpu)) {
1423             qemu_tcg_destroy_vcpu(cpu);
1424             cpu->created = false;
1425             qemu_cond_signal(&qemu_cpu_cond);
1426             break;
1427         }
1428     }
1429 }
1430 
1431 /* Single-threaded TCG
1432  *
1433  * In the single-threaded case each vCPU is simulated in turn. If
1434  * there is more than a single vCPU we create a simple timer to kick
1435  * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1436  * This is done explicitly rather than relying on side-effects
1437  * elsewhere.
1438  */
1439 
qemu_tcg_rr_cpu_thread_fn(void * arg)1440 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1441 {
1442     CPUState *cpu = arg;
1443 
1444     assert(tcg_enabled());
1445     rcu_register_thread();
1446     tcg_register_thread();
1447 
1448     qemu_mutex_lock_iothread();
1449     qemu_thread_get_self(cpu->thread);
1450 
1451     cpu->thread_id = qemu_get_thread_id();
1452     cpu->created = true;
1453     cpu->can_do_io = 1;
1454     qemu_cond_signal(&qemu_cpu_cond);
1455     qemu_guest_random_seed_thread_part2(cpu->random_seed);
1456 
1457     /* wait for initial kick-off after machine start */
1458     while (first_cpu->stopped) {
1459         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1460 
1461         /* process any pending work */
1462         CPU_FOREACH(cpu) {
1463             current_cpu = cpu;
1464             qemu_wait_io_event_common(cpu);
1465         }
1466     }
1467 
1468     start_tcg_kick_timer();
1469 
1470     cpu = first_cpu;
1471 
1472     /* process any pending work */
1473     cpu->exit_request = 1;
1474 
1475     while (1) {
1476         qemu_mutex_unlock_iothread();
1477         replay_mutex_lock();
1478         qemu_mutex_lock_iothread();
1479         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1480         qemu_account_warp_timer();
1481 
1482         /* Run the timers here.  This is much more efficient than
1483          * waking up the I/O thread and waiting for completion.
1484          */
1485         handle_icount_deadline();
1486 
1487         replay_mutex_unlock();
1488 
1489         if (!cpu) {
1490             cpu = first_cpu;
1491         }
1492 
1493         while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1494 
1495             atomic_mb_set(&tcg_current_rr_cpu, cpu);
1496             current_cpu = cpu;
1497 
1498             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1499                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1500 
1501             if (cpu_can_run(cpu)) {
1502                 int r;
1503 
1504                 qemu_mutex_unlock_iothread();
1505                 prepare_icount_for_run(cpu);
1506 
1507                 r = tcg_cpu_exec(cpu);
1508 
1509                 process_icount_data(cpu);
1510                 qemu_mutex_lock_iothread();
1511 
1512                 if (r == EXCP_DEBUG) {
1513                     cpu_handle_guest_debug(cpu);
1514                     break;
1515                 } else if (r == EXCP_ATOMIC) {
1516                     qemu_mutex_unlock_iothread();
1517                     cpu_exec_step_atomic(cpu);
1518                     qemu_mutex_lock_iothread();
1519                     break;
1520                 }
1521             } else if (cpu->stop) {
1522                 if (cpu->unplug) {
1523                     cpu = CPU_NEXT(cpu);
1524                 }
1525                 break;
1526             }
1527 
1528             cpu = CPU_NEXT(cpu);
1529         } /* while (cpu && !cpu->exit_request).. */
1530 
1531         /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
1532         atomic_set(&tcg_current_rr_cpu, NULL);
1533 
1534         if (cpu && cpu->exit_request) {
1535             atomic_mb_set(&cpu->exit_request, 0);
1536         }
1537 
1538         if (use_icount && all_cpu_threads_idle()) {
1539             /*
1540              * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1541              * in the main_loop, wake it up in order to start the warp timer.
1542              */
1543             qemu_notify_event();
1544         }
1545 
1546         qemu_tcg_rr_wait_io_event();
1547         deal_with_unplugged_cpus();
1548     }
1549 
1550     rcu_unregister_thread();
1551     return NULL;
1552 }
1553 
qemu_hax_cpu_thread_fn(void * arg)1554 static void *qemu_hax_cpu_thread_fn(void *arg)
1555 {
1556     CPUState *cpu = arg;
1557     int r;
1558 
1559     rcu_register_thread();
1560     qemu_mutex_lock_iothread();
1561     qemu_thread_get_self(cpu->thread);
1562 
1563     cpu->thread_id = qemu_get_thread_id();
1564     cpu->created = true;
1565     current_cpu = cpu;
1566 
1567     hax_init_vcpu(cpu);
1568     qemu_cond_signal(&qemu_cpu_cond);
1569     qemu_guest_random_seed_thread_part2(cpu->random_seed);
1570 
1571     do {
1572         if (cpu_can_run(cpu)) {
1573             r = hax_smp_cpu_exec(cpu);
1574             if (r == EXCP_DEBUG) {
1575                 cpu_handle_guest_debug(cpu);
1576             }
1577         }
1578 
1579         qemu_wait_io_event(cpu);
1580     } while (!cpu->unplug || cpu_can_run(cpu));
1581     rcu_unregister_thread();
1582     return NULL;
1583 }
1584 
1585 /* The HVF-specific vCPU thread function. This one should only run when the host
1586  * CPU supports the VMX "unrestricted guest" feature. */
qemu_hvf_cpu_thread_fn(void * arg)1587 static void *qemu_hvf_cpu_thread_fn(void *arg)
1588 {
1589     CPUState *cpu = arg;
1590 
1591     int r;
1592 
1593     assert(hvf_enabled());
1594 
1595     rcu_register_thread();
1596 
1597     qemu_mutex_lock_iothread();
1598     qemu_thread_get_self(cpu->thread);
1599 
1600     cpu->thread_id = qemu_get_thread_id();
1601     cpu->can_do_io = 1;
1602     current_cpu = cpu;
1603 
1604     hvf_init_vcpu(cpu);
1605 
1606     /* signal CPU creation */
1607     cpu->created = true;
1608     qemu_cond_signal(&qemu_cpu_cond);
1609     qemu_guest_random_seed_thread_part2(cpu->random_seed);
1610 
1611     do {
1612         if (cpu_can_run(cpu)) {
1613             r = hvf_vcpu_exec(cpu);
1614             if (r == EXCP_DEBUG) {
1615                 cpu_handle_guest_debug(cpu);
1616             }
1617         }
1618         qemu_wait_io_event(cpu);
1619     } while (!cpu->unplug || cpu_can_run(cpu));
1620 
1621     hvf_vcpu_destroy(cpu);
1622     cpu->created = false;
1623     qemu_cond_signal(&qemu_cpu_cond);
1624     qemu_mutex_unlock_iothread();
1625     rcu_unregister_thread();
1626     return NULL;
1627 }
1628 
qemu_whpx_cpu_thread_fn(void * arg)1629 static void *qemu_whpx_cpu_thread_fn(void *arg)
1630 {
1631     CPUState *cpu = arg;
1632     int r;
1633 
1634     rcu_register_thread();
1635 
1636     qemu_mutex_lock_iothread();
1637     qemu_thread_get_self(cpu->thread);
1638     cpu->thread_id = qemu_get_thread_id();
1639     current_cpu = cpu;
1640 
1641     r = whpx_init_vcpu(cpu);
1642     if (r < 0) {
1643         fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1644         exit(1);
1645     }
1646 
1647     /* signal CPU creation */
1648     cpu->created = true;
1649     qemu_cond_signal(&qemu_cpu_cond);
1650     qemu_guest_random_seed_thread_part2(cpu->random_seed);
1651 
1652     do {
1653         if (cpu_can_run(cpu)) {
1654             r = whpx_vcpu_exec(cpu);
1655             if (r == EXCP_DEBUG) {
1656                 cpu_handle_guest_debug(cpu);
1657             }
1658         }
1659         while (cpu_thread_is_idle(cpu)) {
1660             qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1661         }
1662         qemu_wait_io_event_common(cpu);
1663     } while (!cpu->unplug || cpu_can_run(cpu));
1664 
1665     whpx_destroy_vcpu(cpu);
1666     cpu->created = false;
1667     qemu_cond_signal(&qemu_cpu_cond);
1668     qemu_mutex_unlock_iothread();
1669     rcu_unregister_thread();
1670     return NULL;
1671 }
1672 
1673 #ifdef _WIN32
dummy_apc_func(ULONG_PTR unused)1674 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1675 {
1676 }
1677 #endif
1678 
1679 /* Multi-threaded TCG
1680  *
1681  * In the multi-threaded case each vCPU has its own thread. The TLS
1682  * variable current_cpu can be used deep in the code to find the
1683  * current CPUState for a given thread.
1684  */
1685 
qemu_tcg_cpu_thread_fn(void * arg)1686 static void *qemu_tcg_cpu_thread_fn(void *arg)
1687 {
1688     CPUState *cpu = arg;
1689 
1690     assert(tcg_enabled());
1691     g_assert(!use_icount);
1692 
1693     rcu_register_thread();
1694     tcg_register_thread();
1695 
1696     qemu_mutex_lock_iothread();
1697     qemu_thread_get_self(cpu->thread);
1698 
1699     cpu->thread_id = qemu_get_thread_id();
1700     cpu->created = true;
1701     cpu->can_do_io = 1;
1702     current_cpu = cpu;
1703     qemu_cond_signal(&qemu_cpu_cond);
1704     qemu_guest_random_seed_thread_part2(cpu->random_seed);
1705 
1706     /* process any pending work */
1707     cpu->exit_request = 1;
1708 
1709     do {
1710         if (cpu_can_run(cpu)) {
1711             int r;
1712             qemu_mutex_unlock_iothread();
1713             r = tcg_cpu_exec(cpu);
1714             qemu_mutex_lock_iothread();
1715             switch (r) {
1716             case EXCP_DEBUG:
1717                 cpu_handle_guest_debug(cpu);
1718                 break;
1719             case EXCP_HALTED:
1720                 /* during start-up the vCPU is reset and the thread is
1721                  * kicked several times. If we don't ensure we go back
1722                  * to sleep in the halted state we won't cleanly
1723                  * start-up when the vCPU is enabled.
1724                  *
1725                  * cpu->halted should ensure we sleep in wait_io_event
1726                  */
1727                 g_assert(cpu->halted);
1728                 break;
1729             case EXCP_ATOMIC:
1730                 qemu_mutex_unlock_iothread();
1731                 cpu_exec_step_atomic(cpu);
1732                 qemu_mutex_lock_iothread();
1733             default:
1734                 /* Ignore everything else? */
1735                 break;
1736             }
1737         }
1738 
1739         atomic_mb_set(&cpu->exit_request, 0);
1740         qemu_wait_io_event(cpu);
1741     } while (!cpu->unplug || cpu_can_run(cpu));
1742 
1743     qemu_tcg_destroy_vcpu(cpu);
1744     cpu->created = false;
1745     qemu_cond_signal(&qemu_cpu_cond);
1746     qemu_mutex_unlock_iothread();
1747     rcu_unregister_thread();
1748     return NULL;
1749 }
1750 
qemu_cpu_kick_thread(CPUState * cpu)1751 static void qemu_cpu_kick_thread(CPUState *cpu)
1752 {
1753 #ifndef _WIN32
1754     int err;
1755 
1756     if (cpu->thread_kicked) {
1757         return;
1758     }
1759     cpu->thread_kicked = true;
1760     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1761     if (err && err != ESRCH) {
1762         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1763         exit(1);
1764     }
1765 #else /* _WIN32 */
1766     if (!qemu_cpu_is_self(cpu)) {
1767         if (whpx_enabled()) {
1768             whpx_vcpu_kick(cpu);
1769         } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1770             fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1771                     __func__, GetLastError());
1772             exit(1);
1773         }
1774     }
1775 #endif
1776 }
1777 
qemu_cpu_kick(CPUState * cpu)1778 void qemu_cpu_kick(CPUState *cpu)
1779 {
1780     qemu_cond_broadcast(cpu->halt_cond);
1781     if (tcg_enabled()) {
1782         if (qemu_tcg_mttcg_enabled()) {
1783             cpu_exit(cpu);
1784         } else {
1785             qemu_cpu_kick_rr_cpus();
1786         }
1787     } else {
1788         if (hax_enabled()) {
1789             /*
1790              * FIXME: race condition with the exit_request check in
1791              * hax_vcpu_hax_exec
1792              */
1793             cpu->exit_request = 1;
1794         }
1795         qemu_cpu_kick_thread(cpu);
1796     }
1797 }
1798 
qemu_cpu_kick_self(void)1799 void qemu_cpu_kick_self(void)
1800 {
1801     assert(current_cpu);
1802     qemu_cpu_kick_thread(current_cpu);
1803 }
1804 
qemu_cpu_is_self(CPUState * cpu)1805 bool qemu_cpu_is_self(CPUState *cpu)
1806 {
1807     return qemu_thread_is_self(cpu->thread);
1808 }
1809 
qemu_in_vcpu_thread(void)1810 bool qemu_in_vcpu_thread(void)
1811 {
1812     return current_cpu && qemu_cpu_is_self(current_cpu);
1813 }
1814 
1815 static __thread bool iothread_locked = false;
1816 
qemu_mutex_iothread_locked(void)1817 bool qemu_mutex_iothread_locked(void)
1818 {
1819     return iothread_locked;
1820 }
1821 
1822 /*
1823  * The BQL is taken from so many places that it is worth profiling the
1824  * callers directly, instead of funneling them all through a single function.
1825  */
qemu_mutex_lock_iothread_impl(const char * file,int line)1826 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1827 {
1828     QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1829 
1830     g_assert(!qemu_mutex_iothread_locked());
1831     bql_lock(&qemu_global_mutex, file, line);
1832     iothread_locked = true;
1833 }
1834 
qemu_mutex_unlock_iothread(void)1835 void qemu_mutex_unlock_iothread(void)
1836 {
1837     g_assert(qemu_mutex_iothread_locked());
1838     iothread_locked = false;
1839     qemu_mutex_unlock(&qemu_global_mutex);
1840 }
1841 
qemu_cond_wait_iothread(QemuCond * cond)1842 void qemu_cond_wait_iothread(QemuCond *cond)
1843 {
1844     qemu_cond_wait(cond, &qemu_global_mutex);
1845 }
1846 
all_vcpus_paused(void)1847 static bool all_vcpus_paused(void)
1848 {
1849     CPUState *cpu;
1850 
1851     CPU_FOREACH(cpu) {
1852         if (!cpu->stopped) {
1853             return false;
1854         }
1855     }
1856 
1857     return true;
1858 }
1859 
pause_all_vcpus(void)1860 void pause_all_vcpus(void)
1861 {
1862     CPUState *cpu;
1863 
1864     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1865     CPU_FOREACH(cpu) {
1866         if (qemu_cpu_is_self(cpu)) {
1867             qemu_cpu_stop(cpu, true);
1868         } else {
1869             cpu->stop = true;
1870             qemu_cpu_kick(cpu);
1871         }
1872     }
1873 
1874     /* We need to drop the replay_lock so any vCPU threads woken up
1875      * can finish their replay tasks
1876      */
1877     replay_mutex_unlock();
1878 
1879     while (!all_vcpus_paused()) {
1880         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1881         CPU_FOREACH(cpu) {
1882             qemu_cpu_kick(cpu);
1883         }
1884     }
1885 
1886     qemu_mutex_unlock_iothread();
1887     replay_mutex_lock();
1888     qemu_mutex_lock_iothread();
1889 }
1890 
cpu_resume(CPUState * cpu)1891 void cpu_resume(CPUState *cpu)
1892 {
1893     cpu->stop = false;
1894     cpu->stopped = false;
1895     qemu_cpu_kick(cpu);
1896 }
1897 
resume_all_vcpus(void)1898 void resume_all_vcpus(void)
1899 {
1900     CPUState *cpu;
1901 
1902     if (!runstate_is_running()) {
1903         return;
1904     }
1905 
1906     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1907     CPU_FOREACH(cpu) {
1908         cpu_resume(cpu);
1909     }
1910 }
1911 
cpu_remove_sync(CPUState * cpu)1912 void cpu_remove_sync(CPUState *cpu)
1913 {
1914     cpu->stop = true;
1915     cpu->unplug = true;
1916     qemu_cpu_kick(cpu);
1917     qemu_mutex_unlock_iothread();
1918     qemu_thread_join(cpu->thread);
1919     qemu_mutex_lock_iothread();
1920 }
1921 
1922 /* For temporary buffers for forming a name */
1923 #define VCPU_THREAD_NAME_SIZE 16
1924 
qemu_tcg_init_vcpu(CPUState * cpu)1925 static void qemu_tcg_init_vcpu(CPUState *cpu)
1926 {
1927     char thread_name[VCPU_THREAD_NAME_SIZE];
1928     static QemuCond *single_tcg_halt_cond;
1929     static QemuThread *single_tcg_cpu_thread;
1930     static int tcg_region_inited;
1931 
1932     assert(tcg_enabled());
1933     /*
1934      * Initialize TCG regions--once. Now is a good time, because:
1935      * (1) TCG's init context, prologue and target globals have been set up.
1936      * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1937      *     -accel flag is processed, so the check doesn't work then).
1938      */
1939     if (!tcg_region_inited) {
1940         tcg_region_inited = 1;
1941         tcg_region_init();
1942     }
1943 
1944     if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1945         cpu->thread = g_malloc0(sizeof(QemuThread));
1946         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1947         qemu_cond_init(cpu->halt_cond);
1948 
1949         if (qemu_tcg_mttcg_enabled()) {
1950             /* create a thread per vCPU with TCG (MTTCG) */
1951             parallel_cpus = true;
1952             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1953                  cpu->cpu_index);
1954 
1955             qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1956                                cpu, QEMU_THREAD_JOINABLE);
1957 
1958         } else {
1959             /* share a single thread for all cpus with TCG */
1960             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1961             qemu_thread_create(cpu->thread, thread_name,
1962                                qemu_tcg_rr_cpu_thread_fn,
1963                                cpu, QEMU_THREAD_JOINABLE);
1964 
1965             single_tcg_halt_cond = cpu->halt_cond;
1966             single_tcg_cpu_thread = cpu->thread;
1967         }
1968 #ifdef _WIN32
1969         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1970 #endif
1971     } else {
1972         /* For non-MTTCG cases we share the thread */
1973         cpu->thread = single_tcg_cpu_thread;
1974         cpu->halt_cond = single_tcg_halt_cond;
1975         cpu->thread_id = first_cpu->thread_id;
1976         cpu->can_do_io = 1;
1977         cpu->created = true;
1978     }
1979 }
1980 
qemu_hax_start_vcpu(CPUState * cpu)1981 static void qemu_hax_start_vcpu(CPUState *cpu)
1982 {
1983     char thread_name[VCPU_THREAD_NAME_SIZE];
1984 
1985     cpu->thread = g_malloc0(sizeof(QemuThread));
1986     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1987     qemu_cond_init(cpu->halt_cond);
1988 
1989     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1990              cpu->cpu_index);
1991     qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1992                        cpu, QEMU_THREAD_JOINABLE);
1993 #ifdef _WIN32
1994     cpu->hThread = qemu_thread_get_handle(cpu->thread);
1995 #endif
1996 }
1997 
qemu_kvm_start_vcpu(CPUState * cpu)1998 static void qemu_kvm_start_vcpu(CPUState *cpu)
1999 {
2000     char thread_name[VCPU_THREAD_NAME_SIZE];
2001 
2002     cpu->thread = g_malloc0(sizeof(QemuThread));
2003     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2004     qemu_cond_init(cpu->halt_cond);
2005     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2006              cpu->cpu_index);
2007     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2008                        cpu, QEMU_THREAD_JOINABLE);
2009 }
2010 
qemu_hvf_start_vcpu(CPUState * cpu)2011 static void qemu_hvf_start_vcpu(CPUState *cpu)
2012 {
2013     char thread_name[VCPU_THREAD_NAME_SIZE];
2014 
2015     /* HVF currently does not support TCG, and only runs in
2016      * unrestricted-guest mode. */
2017     assert(hvf_enabled());
2018 
2019     cpu->thread = g_malloc0(sizeof(QemuThread));
2020     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2021     qemu_cond_init(cpu->halt_cond);
2022 
2023     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2024              cpu->cpu_index);
2025     qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2026                        cpu, QEMU_THREAD_JOINABLE);
2027 }
2028 
qemu_whpx_start_vcpu(CPUState * cpu)2029 static void qemu_whpx_start_vcpu(CPUState *cpu)
2030 {
2031     char thread_name[VCPU_THREAD_NAME_SIZE];
2032 
2033     cpu->thread = g_malloc0(sizeof(QemuThread));
2034     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2035     qemu_cond_init(cpu->halt_cond);
2036     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2037              cpu->cpu_index);
2038     qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2039                        cpu, QEMU_THREAD_JOINABLE);
2040 #ifdef _WIN32
2041     cpu->hThread = qemu_thread_get_handle(cpu->thread);
2042 #endif
2043 }
2044 
qemu_dummy_start_vcpu(CPUState * cpu)2045 static void qemu_dummy_start_vcpu(CPUState *cpu)
2046 {
2047     char thread_name[VCPU_THREAD_NAME_SIZE];
2048 
2049     cpu->thread = g_malloc0(sizeof(QemuThread));
2050     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2051     qemu_cond_init(cpu->halt_cond);
2052     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2053              cpu->cpu_index);
2054     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2055                        QEMU_THREAD_JOINABLE);
2056 }
2057 
qemu_init_vcpu(CPUState * cpu)2058 void qemu_init_vcpu(CPUState *cpu)
2059 {
2060     MachineState *ms = MACHINE(qdev_get_machine());
2061 
2062     cpu->nr_cores = ms->smp.cores;
2063     cpu->nr_threads =  ms->smp.threads;
2064     cpu->stopped = true;
2065     cpu->random_seed = qemu_guest_random_seed_thread_part1();
2066 
2067     if (!cpu->as) {
2068         /* If the target cpu hasn't set up any address spaces itself,
2069          * give it the default one.
2070          */
2071         cpu->num_ases = 1;
2072         cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2073     }
2074 
2075     if (kvm_enabled()) {
2076         qemu_kvm_start_vcpu(cpu);
2077     } else if (hax_enabled()) {
2078         qemu_hax_start_vcpu(cpu);
2079     } else if (hvf_enabled()) {
2080         qemu_hvf_start_vcpu(cpu);
2081     } else if (tcg_enabled()) {
2082         qemu_tcg_init_vcpu(cpu);
2083     } else if (whpx_enabled()) {
2084         qemu_whpx_start_vcpu(cpu);
2085     } else {
2086         qemu_dummy_start_vcpu(cpu);
2087     }
2088 
2089     while (!cpu->created) {
2090         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2091     }
2092 }
2093 
cpu_stop_current(void)2094 void cpu_stop_current(void)
2095 {
2096     if (current_cpu) {
2097         current_cpu->stop = true;
2098         cpu_exit(current_cpu);
2099     }
2100 }
2101 
vm_stop(RunState state)2102 int vm_stop(RunState state)
2103 {
2104     if (qemu_in_vcpu_thread()) {
2105         qemu_system_vmstop_request_prepare();
2106         qemu_system_vmstop_request(state);
2107         /*
2108          * FIXME: should not return to device code in case
2109          * vm_stop() has been requested.
2110          */
2111         cpu_stop_current();
2112         return 0;
2113     }
2114 
2115     return do_vm_stop(state, true);
2116 }
2117 
2118 /**
2119  * Prepare for (re)starting the VM.
2120  * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2121  * running or in case of an error condition), 0 otherwise.
2122  */
vm_prepare_start(void)2123 int vm_prepare_start(void)
2124 {
2125     RunState requested;
2126 
2127     qemu_vmstop_requested(&requested);
2128     if (runstate_is_running() && requested == RUN_STATE__MAX) {
2129         return -1;
2130     }
2131 
2132     /* Ensure that a STOP/RESUME pair of events is emitted if a
2133      * vmstop request was pending.  The BLOCK_IO_ERROR event, for
2134      * example, according to documentation is always followed by
2135      * the STOP event.
2136      */
2137     if (runstate_is_running()) {
2138         qapi_event_send_stop();
2139         qapi_event_send_resume();
2140         return -1;
2141     }
2142 
2143     /* We are sending this now, but the CPUs will be resumed shortly later */
2144     qapi_event_send_resume();
2145 
2146     cpu_enable_ticks();
2147     runstate_set(RUN_STATE_RUNNING);
2148     vm_state_notify(1, RUN_STATE_RUNNING);
2149     return 0;
2150 }
2151 
vm_start(void)2152 void vm_start(void)
2153 {
2154     if (!vm_prepare_start()) {
2155         resume_all_vcpus();
2156     }
2157 }
2158 
2159 /* does a state transition even if the VM is already stopped,
2160    current state is forgotten forever */
vm_stop_force_state(RunState state)2161 int vm_stop_force_state(RunState state)
2162 {
2163     if (runstate_is_running()) {
2164         return vm_stop(state);
2165     } else {
2166         runstate_set(state);
2167 
2168         bdrv_drain_all();
2169         /* Make sure to return an error if the flush in a previous vm_stop()
2170          * failed. */
2171         return bdrv_flush_all();
2172     }
2173 }
2174 
list_cpus(const char * optarg)2175 void list_cpus(const char *optarg)
2176 {
2177     /* XXX: implement xxx_cpu_list for targets that still miss it */
2178 #if defined(cpu_list)
2179     cpu_list();
2180 #endif
2181 }
2182 
qmp_memsave(int64_t addr,int64_t size,const char * filename,bool has_cpu,int64_t cpu_index,Error ** errp)2183 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2184                  bool has_cpu, int64_t cpu_index, Error **errp)
2185 {
2186     FILE *f;
2187     uint32_t l;
2188     CPUState *cpu;
2189     uint8_t buf[1024];
2190     int64_t orig_addr = addr, orig_size = size;
2191 
2192     if (!has_cpu) {
2193         cpu_index = 0;
2194     }
2195 
2196     cpu = qemu_get_cpu(cpu_index);
2197     if (cpu == NULL) {
2198         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2199                    "a CPU number");
2200         return;
2201     }
2202 
2203     f = fopen(filename, "wb");
2204     if (!f) {
2205         error_setg_file_open(errp, errno, filename);
2206         return;
2207     }
2208 
2209     while (size != 0) {
2210         l = sizeof(buf);
2211         if (l > size)
2212             l = size;
2213         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2214             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2215                              " specified", orig_addr, orig_size);
2216             goto exit;
2217         }
2218         if (fwrite(buf, 1, l, f) != l) {
2219             error_setg(errp, QERR_IO_ERROR);
2220             goto exit;
2221         }
2222         addr += l;
2223         size -= l;
2224     }
2225 
2226 exit:
2227     fclose(f);
2228 }
2229 
qmp_pmemsave(int64_t addr,int64_t size,const char * filename,Error ** errp)2230 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2231                   Error **errp)
2232 {
2233     FILE *f;
2234     uint32_t l;
2235     uint8_t buf[1024];
2236 
2237     f = fopen(filename, "wb");
2238     if (!f) {
2239         error_setg_file_open(errp, errno, filename);
2240         return;
2241     }
2242 
2243     while (size != 0) {
2244         l = sizeof(buf);
2245         if (l > size)
2246             l = size;
2247         cpu_physical_memory_read(addr, buf, l);
2248         if (fwrite(buf, 1, l, f) != l) {
2249             error_setg(errp, QERR_IO_ERROR);
2250             goto exit;
2251         }
2252         addr += l;
2253         size -= l;
2254     }
2255 
2256 exit:
2257     fclose(f);
2258 }
2259 
qmp_inject_nmi(Error ** errp)2260 void qmp_inject_nmi(Error **errp)
2261 {
2262     nmi_monitor_handle(monitor_get_cpu_index(), errp);
2263 }
2264 
dump_drift_info(void)2265 void dump_drift_info(void)
2266 {
2267     if (!use_icount) {
2268         return;
2269     }
2270 
2271     qemu_printf("Host - Guest clock  %"PRIi64" ms\n",
2272                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2273     if (icount_align_option) {
2274         qemu_printf("Max guest delay     %"PRIi64" ms\n",
2275                     -max_delay / SCALE_MS);
2276         qemu_printf("Max guest advance   %"PRIi64" ms\n",
2277                     max_advance / SCALE_MS);
2278     } else {
2279         qemu_printf("Max guest delay     NA\n");
2280         qemu_printf("Max guest advance   NA\n");
2281     }
2282 }
2283