1 /*
2  * qemu_process.c: QEMU process management
3  *
4  * Copyright (C) 2006-2016 Red Hat, Inc.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library.  If not, see
18  * <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 #include <config.h>
23 
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <signal.h>
27 #include <sys/stat.h>
28 #if defined(__linux__)
29 # include <linux/capability.h>
30 #elif defined(__FreeBSD__) && !defined(__DragonFly__)
31 # include <sys/param.h>
32 # include <sys/cpuset.h>
33 #endif
34 
35 #include <sys/utsname.h>
36 
37 #if WITH_CAPNG
38 # include <cap-ng.h>
39 #endif
40 
41 #include "qemu_process.h"
42 #define LIBVIRT_QEMU_PROCESSPRIV_H_ALLOW
43 #include "qemu_processpriv.h"
44 #include "qemu_alias.h"
45 #include "qemu_block.h"
46 #include "qemu_domain.h"
47 #include "qemu_domain_address.h"
48 #include "qemu_namespace.h"
49 #include "qemu_cgroup.h"
50 #include "qemu_capabilities.h"
51 #include "qemu_monitor.h"
52 #include "qemu_command.h"
53 #include "qemu_hostdev.h"
54 #include "qemu_hotplug.h"
55 #include "qemu_migration.h"
56 #include "qemu_migration_params.h"
57 #include "qemu_interface.h"
58 #include "qemu_security.h"
59 #include "qemu_extdevice.h"
60 #include "qemu_firmware.h"
61 #include "qemu_backup.h"
62 #include "qemu_dbus.h"
63 #include "qemu_snapshot.h"
64 
65 #include "cpu/cpu.h"
66 #include "cpu/cpu_x86.h"
67 #include "datatypes.h"
68 #include "virlog.h"
69 #include "virerror.h"
70 #include "viralloc.h"
71 #include "virhook.h"
72 #include "virfile.h"
73 #include "virpidfile.h"
74 #include "virhostcpu.h"
75 #include "domain_audit.h"
76 #include "domain_nwfilter.h"
77 #include "domain_validate.h"
78 #include "locking/domain_lock.h"
79 #include "viruuid.h"
80 #include "virprocess.h"
81 #include "virtime.h"
82 #include "virnetdevtap.h"
83 #include "virnetdevopenvswitch.h"
84 #include "virnetdevmidonet.h"
85 #include "virbitmap.h"
86 #include "virnuma.h"
87 #include "virstring.h"
88 #include "virhostdev.h"
89 #include "virsecret.h"
90 #include "configmake.h"
91 #include "nwfilter_conf.h"
92 #include "netdev_bandwidth_conf.h"
93 #include "virresctrl.h"
94 #include "virvsock.h"
95 #include "viridentity.h"
96 #include "virthreadjob.h"
97 #include "virutil.h"
98 #include "storage_source.h"
99 #include "backup_conf.h"
100 
101 #define VIR_FROM_THIS VIR_FROM_QEMU
102 
103 VIR_LOG_INIT("qemu.qemu_process");
104 
105 /**
106  * qemuProcessRemoveDomainStatus
107  *
108  * remove all state files of a domain from statedir
109  */
110 static void
qemuProcessRemoveDomainStatus(virQEMUDriver * driver,virDomainObj * vm)111 qemuProcessRemoveDomainStatus(virQEMUDriver *driver,
112                               virDomainObj *vm)
113 {
114     g_autofree char *file = NULL;
115     qemuDomainObjPrivate *priv = vm->privateData;
116     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
117 
118     file = g_strdup_printf("%s/%s.xml", cfg->stateDir, vm->def->name);
119 
120     if (unlink(file) < 0 && errno != ENOENT && errno != ENOTDIR)
121         VIR_WARN("Failed to remove domain XML for %s: %s",
122                  vm->def->name, g_strerror(errno));
123 
124     if (priv->pidfile &&
125         unlink(priv->pidfile) < 0 &&
126         errno != ENOENT)
127         VIR_WARN("Failed to remove PID file for %s: %s",
128                  vm->def->name, g_strerror(errno));
129 }
130 
131 
132 /*
133  * This is a callback registered with a qemuAgent *instance,
134  * and to be invoked when the agent console hits an end of file
135  * condition, or error, thus indicating VM shutdown should be
136  * performed
137  */
138 static void
qemuProcessHandleAgentEOF(qemuAgent * agent,virDomainObj * vm)139 qemuProcessHandleAgentEOF(qemuAgent *agent,
140                           virDomainObj *vm)
141 {
142     qemuDomainObjPrivate *priv;
143 
144     VIR_DEBUG("Received EOF from agent on %p '%s'", vm, vm->def->name);
145 
146     virObjectLock(vm);
147 
148     priv = vm->privateData;
149 
150     if (!priv->agent) {
151         VIR_DEBUG("Agent freed already");
152         goto unlock;
153     }
154 
155     if (priv->beingDestroyed) {
156         VIR_DEBUG("Domain is being destroyed, agent EOF is expected");
157         goto unlock;
158     }
159 
160     qemuAgentClose(agent);
161     priv->agent = NULL;
162     priv->agentError = false;
163 
164     virObjectUnlock(vm);
165     return;
166 
167  unlock:
168     virObjectUnlock(vm);
169     return;
170 }
171 
172 
173 /*
174  * This is invoked when there is some kind of error
175  * parsing data to/from the agent. The VM can continue
176  * to run, but no further agent commands will be
177  * allowed
178  */
179 static void
qemuProcessHandleAgentError(qemuAgent * agent G_GNUC_UNUSED,virDomainObj * vm)180 qemuProcessHandleAgentError(qemuAgent *agent G_GNUC_UNUSED,
181                             virDomainObj *vm)
182 {
183     qemuDomainObjPrivate *priv;
184 
185     VIR_DEBUG("Received error from agent on %p '%s'", vm, vm->def->name);
186 
187     virObjectLock(vm);
188 
189     priv = vm->privateData;
190 
191     priv->agentError = true;
192 
193     virObjectUnlock(vm);
194 }
195 
196 
197 static qemuAgentCallbacks agentCallbacks = {
198     .eofNotify = qemuProcessHandleAgentEOF,
199     .errorNotify = qemuProcessHandleAgentError,
200 };
201 
202 
203 int
qemuConnectAgent(virQEMUDriver * driver,virDomainObj * vm)204 qemuConnectAgent(virQEMUDriver *driver, virDomainObj *vm)
205 {
206     qemuDomainObjPrivate *priv = vm->privateData;
207     qemuAgent *agent = NULL;
208     virDomainChrDef *config = qemuFindAgentConfig(vm->def);
209 
210     if (!config)
211         return 0;
212 
213     if (priv->agent)
214         return 0;
215 
216     if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VSERPORT_CHANGE) &&
217         config->state != VIR_DOMAIN_CHR_DEVICE_STATE_CONNECTED) {
218         VIR_DEBUG("Deferring connecting to guest agent");
219         return 0;
220     }
221 
222     if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) {
223         VIR_ERROR(_("Failed to set security context for agent for %s"),
224                   vm->def->name);
225         goto cleanup;
226     }
227 
228     agent = qemuAgentOpen(vm,
229                           config->source,
230                           virEventThreadGetContext(priv->eventThread),
231                           &agentCallbacks,
232                           virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VSERPORT_CHANGE));
233 
234     if (!virDomainObjIsActive(vm)) {
235         qemuAgentClose(agent);
236         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
237                        _("guest crashed while connecting to the guest agent"));
238         return -1;
239     }
240 
241     if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) {
242         VIR_ERROR(_("Failed to clear security context for agent for %s"),
243                   vm->def->name);
244         qemuAgentClose(agent);
245         goto cleanup;
246     }
247 
248     priv->agent = agent;
249     if (!priv->agent)
250         VIR_INFO("Failed to connect agent for %s", vm->def->name);
251 
252  cleanup:
253     if (!priv->agent) {
254         VIR_WARN("Cannot connect to QEMU guest agent for %s", vm->def->name);
255         priv->agentError = true;
256         virResetLastError();
257     }
258 
259     return 0;
260 }
261 
262 
263 /**
264  * qemuProcessEventSubmit:
265  * @driver: QEMU driver object
266  * @event: pointer to the variable holding the event processing data (stolen and cleared)
267  *
268  * Submits @event to be processed by the asynchronous event handling thread.
269  * In case when submission of the handling fails @event is properly freed and
270  * cleared. If (*event)->vm is non-NULL the domain object is uref'd before freeing
271  * @event.
272  */
273 static void
qemuProcessEventSubmit(virQEMUDriver * driver,struct qemuProcessEvent ** event)274 qemuProcessEventSubmit(virQEMUDriver *driver,
275                        struct qemuProcessEvent **event)
276 {
277     if (!*event)
278         return;
279 
280     if (virThreadPoolSendJob(driver->workerPool, 0, *event) < 0) {
281         if ((*event)->vm)
282             virObjectUnref((*event)->vm);
283         qemuProcessEventFree(*event);
284     }
285 
286     *event = NULL;
287 }
288 
289 
290 /*
291  * This is a callback registered with a qemuMonitor *instance,
292  * and to be invoked when the monitor console hits an end of file
293  * condition, or error, thus indicating VM shutdown should be
294  * performed
295  */
296 static void
qemuProcessHandleMonitorEOF(qemuMonitor * mon,virDomainObj * vm,void * opaque)297 qemuProcessHandleMonitorEOF(qemuMonitor *mon,
298                             virDomainObj *vm,
299                             void *opaque)
300 {
301     virQEMUDriver *driver = opaque;
302     qemuDomainObjPrivate *priv;
303     struct qemuProcessEvent *processEvent;
304 
305     virObjectLock(vm);
306 
307     VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
308 
309     priv = vm->privateData;
310     if (priv->beingDestroyed) {
311         VIR_DEBUG("Domain is being destroyed, EOF is expected");
312         goto cleanup;
313     }
314 
315     processEvent = g_new0(struct qemuProcessEvent, 1);
316 
317     processEvent->eventType = QEMU_PROCESS_EVENT_MONITOR_EOF;
318     processEvent->vm = virObjectRef(vm);
319 
320     qemuProcessEventSubmit(driver, &processEvent);
321 
322     /* We don't want this EOF handler to be called over and over while the
323      * thread is waiting for a job.
324      */
325     virObjectLock(mon);
326     qemuMonitorUnregister(mon);
327     virObjectUnlock(mon);
328 
329     /* We don't want any cleanup from EOF handler (or any other
330      * thread) to enter qemu namespace. */
331     qemuDomainDestroyNamespace(driver, vm);
332 
333  cleanup:
334     virObjectUnlock(vm);
335 }
336 
337 
338 /*
339  * This is invoked when there is some kind of error
340  * parsing data to/from the monitor. The VM can continue
341  * to run, but no further monitor commands will be
342  * allowed
343  */
344 static void
qemuProcessHandleMonitorError(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)345 qemuProcessHandleMonitorError(qemuMonitor *mon G_GNUC_UNUSED,
346                               virDomainObj *vm,
347                               void *opaque)
348 {
349     virQEMUDriver *driver = opaque;
350     virObjectEvent *event = NULL;
351 
352     VIR_DEBUG("Received error on %p '%s'", vm, vm->def->name);
353 
354     virObjectLock(vm);
355 
356     ((qemuDomainObjPrivate *) vm->privateData)->monError = true;
357     event = virDomainEventControlErrorNewFromObj(vm);
358     virObjectEventStateQueue(driver->domainEventState, event);
359 
360     virObjectUnlock(vm);
361 }
362 
363 
364 /**
365  * qemuProcessFindDomainDiskByAliasOrQOM:
366  * @vm: domain object to search for the disk
367  * @alias: -drive or -device alias of the disk
368  * @qomid: QOM tree device name
369  *
370  * Looks up a disk in the domain definition of @vm which either matches the
371  * -drive or -device alias used for the backend and frontend respectively or the
372  * QOM name. If @alias is empty it's treated as NULL as it's a mandatory field
373  * in some cases.
374  *
375  * Returns a disk from @vm or NULL if it could not be found.
376  */
377 virDomainDiskDef *
qemuProcessFindDomainDiskByAliasOrQOM(virDomainObj * vm,const char * alias,const char * qomid)378 qemuProcessFindDomainDiskByAliasOrQOM(virDomainObj *vm,
379                                       const char *alias,
380                                       const char *qomid)
381 {
382     size_t i;
383 
384     if (alias && *alias == '\0')
385         alias = NULL;
386 
387     if (alias)
388         alias = qemuAliasDiskDriveSkipPrefix(alias);
389 
390     for (i = 0; i < vm->def->ndisks; i++) {
391         virDomainDiskDef *disk = vm->def->disks[i];
392         qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
393 
394         if ((disk->info.alias && STREQ_NULLABLE(disk->info.alias, alias)) ||
395             (diskPriv->qomName && STREQ_NULLABLE(diskPriv->qomName, qomid)))
396             return disk;
397     }
398 
399     virReportError(VIR_ERR_INTERNAL_ERROR,
400                    _("no disk found with alias '%s' or id '%s'"),
401                    NULLSTR(alias), NULLSTR(qomid));
402     return NULL;
403 }
404 
405 
406 static void
qemuProcessHandleReset(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)407 qemuProcessHandleReset(qemuMonitor *mon G_GNUC_UNUSED,
408                        virDomainObj *vm,
409                        void *opaque)
410 {
411     virQEMUDriver *driver = opaque;
412     virObjectEvent *event = NULL;
413     qemuDomainObjPrivate *priv;
414     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
415     virDomainState state;
416     int reason;
417 
418     virObjectLock(vm);
419 
420     state = virDomainObjGetState(vm, &reason);
421 
422     /* ignore reset events on VM startup. Libvirt in certain instances does a
423      * reset during startup so that the ACPI tables are re-generated */
424     if (state == VIR_DOMAIN_PAUSED &&
425         reason == VIR_DOMAIN_PAUSED_STARTING_UP) {
426         VIR_DEBUG("ignoring reset event during startup");
427         goto unlock;
428     }
429 
430     event = virDomainEventRebootNewFromObj(vm);
431     priv = vm->privateData;
432     if (priv->agent)
433         qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_RESET);
434 
435     if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
436         VIR_WARN("Failed to save status on vm %s", vm->def->name);
437 
438  unlock:
439     virObjectUnlock(vm);
440     virObjectEventStateQueue(driver->domainEventState, event);
441 }
442 
443 
444 /*
445  * Since we have the '-no-shutdown' flag set, the
446  * QEMU process will currently have guest OS shutdown
447  * and the CPUS stopped. To fake the reboot, we thus
448  * want todo a reset of the virtual hardware, followed
449  * by restart of the CPUs. This should result in the
450  * guest OS booting up again
451  */
452 static void
qemuProcessFakeReboot(void * opaque)453 qemuProcessFakeReboot(void *opaque)
454 {
455     virDomainObj *vm = opaque;
456     qemuDomainObjPrivate *priv = vm->privateData;
457     virQEMUDriver *driver = priv->driver;
458     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
459     virDomainRunningReason reason = VIR_DOMAIN_RUNNING_BOOTED;
460     int ret = -1, rc;
461 
462     VIR_DEBUG("vm=%p", vm);
463     virObjectLock(vm);
464     if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
465         goto cleanup;
466 
467     if (!virDomainObjIsActive(vm)) {
468         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
469                        _("guest unexpectedly quit"));
470         goto endjob;
471     }
472 
473     qemuDomainObjEnterMonitor(driver, vm);
474     rc = qemuMonitorSystemReset(priv->mon);
475 
476     if (qemuDomainObjExitMonitor(driver, vm) < 0)
477         goto endjob;
478 
479     if (rc < 0)
480         goto endjob;
481 
482     if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_CRASHED)
483         reason = VIR_DOMAIN_RUNNING_CRASHED;
484 
485     if (qemuProcessStartCPUs(driver, vm,
486                              reason,
487                              QEMU_ASYNC_JOB_NONE) < 0) {
488         if (virGetLastErrorCode() == VIR_ERR_OK)
489             virReportError(VIR_ERR_INTERNAL_ERROR,
490                            "%s", _("resume operation failed"));
491         goto endjob;
492     }
493 
494     if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
495         VIR_WARN("Unable to save status on vm %s after state change",
496                  vm->def->name);
497     }
498 
499     ret = 0;
500 
501  endjob:
502     qemuDomainObjEndJob(driver, vm);
503 
504  cleanup:
505     priv->pausedShutdown = false;
506     qemuDomainSetFakeReboot(driver, vm, false);
507     if (ret == -1)
508         ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_FORCE));
509     virDomainObjEndAPI(&vm);
510 }
511 
512 
513 void
qemuProcessShutdownOrReboot(virQEMUDriver * driver,virDomainObj * vm)514 qemuProcessShutdownOrReboot(virQEMUDriver *driver,
515                             virDomainObj *vm)
516 {
517     qemuDomainObjPrivate *priv = vm->privateData;
518 
519     if (priv->fakeReboot ||
520         vm->def->onPoweroff == VIR_DOMAIN_LIFECYCLE_ACTION_RESTART) {
521         g_autofree char *name = g_strdup_printf("reboot-%s", vm->def->name);
522         virThread th;
523 
524         virObjectRef(vm);
525         if (virThreadCreateFull(&th,
526                                 false,
527                                 qemuProcessFakeReboot,
528                                 name,
529                                 false,
530                                 vm) < 0) {
531             VIR_ERROR(_("Failed to create reboot thread, killing domain"));
532             ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
533             priv->pausedShutdown = false;
534             qemuDomainSetFakeReboot(driver, vm, false);
535             virObjectUnref(vm);
536         }
537     } else {
538         ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
539     }
540 }
541 
542 
543 static void
qemuProcessHandleEvent(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * eventName,long long seconds,unsigned int micros,const char * details,void * opaque)544 qemuProcessHandleEvent(qemuMonitor *mon G_GNUC_UNUSED,
545                        virDomainObj *vm,
546                        const char *eventName,
547                        long long seconds,
548                        unsigned int micros,
549                        const char *details,
550                        void *opaque)
551 {
552     virQEMUDriver *driver = opaque;
553     virObjectEvent *event = NULL;
554 
555     VIR_DEBUG("vm=%p", vm);
556 
557     virObjectLock(vm);
558     event = virDomainQemuMonitorEventNew(vm->def->id, vm->def->name,
559                                          vm->def->uuid, eventName,
560                                          seconds, micros, details);
561 
562     virObjectUnlock(vm);
563     virObjectEventStateQueue(driver->domainEventState, event);
564 }
565 
566 
567 static void
qemuProcessHandleShutdown(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,virTristateBool guest_initiated,void * opaque)568 qemuProcessHandleShutdown(qemuMonitor *mon G_GNUC_UNUSED,
569                           virDomainObj *vm,
570                           virTristateBool guest_initiated,
571                           void *opaque)
572 {
573     virQEMUDriver *driver = opaque;
574     qemuDomainObjPrivate *priv;
575     virObjectEvent *event = NULL;
576     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
577     int detail = 0;
578 
579     VIR_DEBUG("vm=%p", vm);
580 
581     virObjectLock(vm);
582 
583     priv = vm->privateData;
584     if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_SHUTDOWN) {
585         VIR_DEBUG("Ignoring repeated SHUTDOWN event from domain %s",
586                   vm->def->name);
587         goto unlock;
588     } else if (!virDomainObjIsActive(vm)) {
589         VIR_DEBUG("Ignoring SHUTDOWN event from inactive domain %s",
590                   vm->def->name);
591         goto unlock;
592     }
593 
594     /* In case of fake reboot qemu shutdown state is transient so don't
595      * change domain state nor send events. */
596     if (!priv->fakeReboot &&
597         vm->def->onPoweroff != VIR_DOMAIN_LIFECYCLE_ACTION_RESTART) {
598         VIR_DEBUG("Transitioned guest %s to shutdown state",
599                   vm->def->name);
600         virDomainObjSetState(vm,
601                              VIR_DOMAIN_SHUTDOWN,
602                              VIR_DOMAIN_SHUTDOWN_UNKNOWN);
603 
604         switch (guest_initiated) {
605         case VIR_TRISTATE_BOOL_YES:
606             detail = VIR_DOMAIN_EVENT_SHUTDOWN_GUEST;
607             break;
608 
609         case VIR_TRISTATE_BOOL_NO:
610             detail = VIR_DOMAIN_EVENT_SHUTDOWN_HOST;
611             break;
612 
613         case VIR_TRISTATE_BOOL_ABSENT:
614         case VIR_TRISTATE_BOOL_LAST:
615         default:
616             detail = VIR_DOMAIN_EVENT_SHUTDOWN_FINISHED;
617             break;
618         }
619 
620         event = virDomainEventLifecycleNewFromObj(vm,
621                                                   VIR_DOMAIN_EVENT_SHUTDOWN,
622                                                   detail);
623 
624         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
625             VIR_WARN("Unable to save status on vm %s after state change",
626                      vm->def->name);
627         }
628     } else {
629         priv->pausedShutdown = true;
630     }
631 
632     if (priv->agent)
633         qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SHUTDOWN);
634 
635     qemuProcessShutdownOrReboot(driver, vm);
636 
637  unlock:
638     virObjectUnlock(vm);
639     virObjectEventStateQueue(driver->domainEventState, event);
640 }
641 
642 
643 static void
qemuProcessHandleStop(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)644 qemuProcessHandleStop(qemuMonitor *mon G_GNUC_UNUSED,
645                       virDomainObj *vm,
646                       void *opaque)
647 {
648     virQEMUDriver *driver = opaque;
649     virObjectEvent *event = NULL;
650     virDomainPausedReason reason;
651     virDomainEventSuspendedDetailType detail;
652     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
653     qemuDomainObjPrivate *priv = vm->privateData;
654 
655     virObjectLock(vm);
656 
657     reason = priv->pausedReason;
658     priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN;
659 
660     /* In case of fake reboot qemu paused state is transient so don't
661      * reveal it in domain state nor sent events */
662     if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING &&
663         !priv->pausedShutdown) {
664         if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) {
665             if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY)
666                 reason = VIR_DOMAIN_PAUSED_POSTCOPY;
667             else
668                 reason = VIR_DOMAIN_PAUSED_MIGRATION;
669         }
670 
671         detail = qemuDomainPausedReasonToSuspendedEvent(reason);
672         VIR_DEBUG("Transitioned guest %s to paused state, "
673                   "reason %s, event detail %d",
674                   vm->def->name, virDomainPausedReasonTypeToString(reason),
675                   detail);
676 
677         if (priv->job.current)
678             ignore_value(virTimeMillisNow(&priv->job.current->stopped));
679 
680         if (priv->signalStop)
681             virDomainObjBroadcast(vm);
682 
683         virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
684         event = virDomainEventLifecycleNewFromObj(vm,
685                                                   VIR_DOMAIN_EVENT_SUSPENDED,
686                                                   detail);
687 
688         VIR_FREE(priv->lockState);
689         if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
690             VIR_WARN("Unable to release lease on %s", vm->def->name);
691         VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
692 
693         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
694             VIR_WARN("Unable to save status on vm %s after state change",
695                      vm->def->name);
696         }
697     }
698 
699     virObjectUnlock(vm);
700     virObjectEventStateQueue(driver->domainEventState, event);
701 }
702 
703 
704 static void
qemuProcessHandleResume(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)705 qemuProcessHandleResume(qemuMonitor *mon G_GNUC_UNUSED,
706                         virDomainObj *vm,
707                         void *opaque)
708 {
709     virQEMUDriver *driver = opaque;
710     virObjectEvent *event = NULL;
711     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
712     qemuDomainObjPrivate *priv;
713     virDomainRunningReason reason = VIR_DOMAIN_RUNNING_UNPAUSED;
714     virDomainEventResumedDetailType eventDetail;
715 
716     virObjectLock(vm);
717 
718     priv = vm->privateData;
719     if (priv->runningReason != VIR_DOMAIN_RUNNING_UNKNOWN) {
720         reason = priv->runningReason;
721         priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
722     }
723 
724     if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) {
725         eventDetail = qemuDomainRunningReasonToResumeEvent(reason);
726         VIR_DEBUG("Transitioned guest %s into running state, reason '%s', "
727                   "event detail %d",
728                   vm->def->name, virDomainRunningReasonTypeToString(reason),
729                   eventDetail);
730 
731         virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
732         event = virDomainEventLifecycleNewFromObj(vm,
733                                                   VIR_DOMAIN_EVENT_RESUMED,
734                                                   eventDetail);
735 
736         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
737             VIR_WARN("Unable to save status on vm %s after state change",
738                      vm->def->name);
739         }
740     }
741 
742     virObjectUnlock(vm);
743     virObjectEventStateQueue(driver->domainEventState, event);
744 }
745 
746 static void
qemuProcessHandleRTCChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,long long offset,void * opaque)747 qemuProcessHandleRTCChange(qemuMonitor *mon G_GNUC_UNUSED,
748                            virDomainObj *vm,
749                            long long offset,
750                            void *opaque)
751 {
752     virQEMUDriver *driver = opaque;
753     virObjectEvent *event = NULL;
754     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
755 
756     virObjectLock(vm);
757 
758     if (vm->def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_VARIABLE) {
759         /* when a basedate is manually given on the qemu commandline
760          * rather than simply "-rtc base=utc", the offset sent by qemu
761          * in this event is *not* the new offset from UTC, but is
762          * instead the new offset from the *original basedate* +
763          * uptime. For example, if the original offset was 3600 and
764          * the guest clock has been advanced by 10 seconds, qemu will
765          * send "10" in the event - this means that the new offset
766          * from UTC is 3610, *not* 10. If the guest clock is advanced
767          * by another 10 seconds, qemu will now send "20" - i.e. each
768          * event is the sum of the most recent change and all previous
769          * changes since the domain was started. Fortunately, we have
770          * saved the initial offset in "adjustment0", so to arrive at
771          * the proper new "adjustment", we just add the most recent
772          * offset to adjustment0.
773          */
774         offset += vm->def->clock.data.variable.adjustment0;
775         vm->def->clock.data.variable.adjustment = offset;
776 
777         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
778            VIR_WARN("unable to save domain status with RTC change");
779     }
780 
781     event = virDomainEventRTCChangeNewFromObj(vm, offset);
782 
783     virObjectUnlock(vm);
784 
785     virObjectEventStateQueue(driver->domainEventState, event);
786 }
787 
788 
789 static void
qemuProcessHandleWatchdog(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int action,void * opaque)790 qemuProcessHandleWatchdog(qemuMonitor *mon G_GNUC_UNUSED,
791                           virDomainObj *vm,
792                           int action,
793                           void *opaque)
794 {
795     virQEMUDriver *driver = opaque;
796     virObjectEvent *watchdogEvent = NULL;
797     virObjectEvent *lifecycleEvent = NULL;
798     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
799 
800     virObjectLock(vm);
801     watchdogEvent = virDomainEventWatchdogNewFromObj(vm, action);
802 
803     if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE &&
804         virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
805         qemuDomainObjPrivate *priv = vm->privateData;
806         VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name);
807 
808         virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_WATCHDOG);
809         lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
810                                                   VIR_DOMAIN_EVENT_SUSPENDED,
811                                                   VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG);
812 
813         VIR_FREE(priv->lockState);
814         if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
815             VIR_WARN("Unable to release lease on %s", vm->def->name);
816         VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
817 
818         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
819             VIR_WARN("Unable to save status on vm %s after watchdog event",
820                      vm->def->name);
821         }
822     }
823 
824     if (vm->def->watchdog->action == VIR_DOMAIN_WATCHDOG_ACTION_DUMP) {
825         struct qemuProcessEvent *processEvent;
826         processEvent = g_new0(struct qemuProcessEvent, 1);
827 
828         processEvent->eventType = QEMU_PROCESS_EVENT_WATCHDOG;
829         processEvent->action = VIR_DOMAIN_WATCHDOG_ACTION_DUMP;
830         /* Hold an extra reference because we can't allow 'vm' to be
831          * deleted before handling watchdog event is finished.
832          */
833         processEvent->vm = virObjectRef(vm);
834 
835         qemuProcessEventSubmit(driver, &processEvent);
836     }
837 
838     virObjectUnlock(vm);
839     virObjectEventStateQueue(driver->domainEventState, watchdogEvent);
840     virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
841 }
842 
843 
844 static void
qemuProcessHandleIOError(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * diskAlias,const char * nodename,int action,const char * reason,void * opaque)845 qemuProcessHandleIOError(qemuMonitor *mon G_GNUC_UNUSED,
846                          virDomainObj *vm,
847                          const char *diskAlias,
848                          const char *nodename,
849                          int action,
850                          const char *reason,
851                          void *opaque)
852 {
853     virQEMUDriver *driver = opaque;
854     virObjectEvent *ioErrorEvent = NULL;
855     virObjectEvent *ioErrorEvent2 = NULL;
856     virObjectEvent *lifecycleEvent = NULL;
857     const char *srcPath;
858     const char *devAlias;
859     virDomainDiskDef *disk;
860     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
861 
862     virObjectLock(vm);
863 
864     if (*diskAlias == '\0')
865         diskAlias = NULL;
866 
867     if (diskAlias)
868         disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, diskAlias, NULL);
869     else if (nodename)
870         disk = qemuDomainDiskLookupByNodename(vm->def, NULL, nodename, NULL);
871     else
872         disk = NULL;
873 
874     if (disk) {
875         srcPath = virDomainDiskGetSource(disk);
876         devAlias = disk->info.alias;
877     } else {
878         srcPath = "";
879         devAlias = "";
880     }
881 
882     ioErrorEvent = virDomainEventIOErrorNewFromObj(vm, srcPath, devAlias, action);
883     ioErrorEvent2 = virDomainEventIOErrorReasonNewFromObj(vm, srcPath, devAlias, action, reason);
884 
885     if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE &&
886         virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
887         qemuDomainObjPrivate *priv = vm->privateData;
888         VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name);
889 
890         if (priv->signalIOError)
891             virDomainObjBroadcast(vm);
892 
893         virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_IOERROR);
894         lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
895                                                   VIR_DOMAIN_EVENT_SUSPENDED,
896                                                   VIR_DOMAIN_EVENT_SUSPENDED_IOERROR);
897 
898         VIR_FREE(priv->lockState);
899         if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
900             VIR_WARN("Unable to release lease on %s", vm->def->name);
901         VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
902 
903         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
904             VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name);
905     }
906     virObjectUnlock(vm);
907 
908     virObjectEventStateQueue(driver->domainEventState, ioErrorEvent);
909     virObjectEventStateQueue(driver->domainEventState, ioErrorEvent2);
910     virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
911 }
912 
913 static void
qemuProcessHandleBlockJob(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * diskAlias,int type,int status,const char * error,void * opaque)914 qemuProcessHandleBlockJob(qemuMonitor *mon G_GNUC_UNUSED,
915                           virDomainObj *vm,
916                           const char *diskAlias,
917                           int type,
918                           int status,
919                           const char *error,
920                           void *opaque)
921 {
922     qemuDomainObjPrivate *priv;
923     virQEMUDriver *driver = opaque;
924     virDomainDiskDef *disk;
925     g_autoptr(qemuBlockJobData) job = NULL;
926     char *data = NULL;
927 
928     virObjectLock(vm);
929 
930     priv = vm->privateData;
931 
932     /* with QEMU_CAPS_BLOCKDEV we handle block job events via JOB_STATUS_CHANGE */
933     if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV))
934         goto cleanup;
935 
936     VIR_DEBUG("Block job for device %s (domain: %p,%s) type %d status %d",
937               diskAlias, vm, vm->def->name, type, status);
938 
939     if (!(disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, diskAlias, NULL)))
940         goto cleanup;
941 
942     job = qemuBlockJobDiskGetJob(disk);
943 
944     if (job && job->synchronous) {
945         /* We have a SYNC API waiting for this event, dispatch it back */
946         job->newstate = status;
947         VIR_FREE(job->errmsg);
948         job->errmsg = g_strdup(error);
949         virDomainObjBroadcast(vm);
950     } else {
951         /* there is no waiting SYNC API, dispatch the update to a thread */
952         struct qemuProcessEvent *processEvent = g_new0(struct qemuProcessEvent, 1);
953 
954         processEvent->eventType = QEMU_PROCESS_EVENT_BLOCK_JOB;
955         data = g_strdup(diskAlias);
956         processEvent->data = data;
957         processEvent->vm = virObjectRef(vm);
958         processEvent->action = type;
959         processEvent->status = status;
960 
961         qemuProcessEventSubmit(driver, &processEvent);
962     }
963 
964  cleanup:
965     virObjectUnlock(vm);
966 }
967 
968 
969 static void
qemuProcessHandleJobStatusChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * jobname,int status,void * opaque)970 qemuProcessHandleJobStatusChange(qemuMonitor *mon G_GNUC_UNUSED,
971                                  virDomainObj *vm,
972                                  const char *jobname,
973                                  int status,
974                                  void *opaque)
975 {
976     virQEMUDriver *driver = opaque;
977     qemuDomainObjPrivate *priv;
978     qemuBlockJobData *job = NULL;
979     int jobnewstate;
980 
981     virObjectLock(vm);
982     priv = vm->privateData;
983 
984     VIR_DEBUG("job '%s'(domain: %p,%s) state changed to '%s'(%d)",
985               jobname, vm, vm->def->name,
986               qemuMonitorJobStatusTypeToString(status), status);
987 
988     if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV)) {
989         VIR_DEBUG("job '%s' handled by old blockjob handler", jobname);
990         goto cleanup;
991     }
992 
993     if ((jobnewstate = qemuBlockjobConvertMonitorStatus(status)) == QEMU_BLOCKJOB_STATE_LAST)
994         goto cleanup;
995 
996     if (!(job = virHashLookup(priv->blockjobs, jobname))) {
997         VIR_DEBUG("job '%s' not registered", jobname);
998         goto cleanup;
999     }
1000 
1001     job->newstate = jobnewstate;
1002 
1003     if (job->synchronous) {
1004         VIR_DEBUG("job '%s' handled synchronously", jobname);
1005         virDomainObjBroadcast(vm);
1006     } else {
1007         struct qemuProcessEvent *processEvent = g_new0(struct qemuProcessEvent, 1);
1008 
1009         VIR_DEBUG("job '%s' handled by event thread", jobname);
1010 
1011         processEvent->eventType = QEMU_PROCESS_EVENT_JOB_STATUS_CHANGE;
1012         processEvent->vm = virObjectRef(vm);
1013         processEvent->data = virObjectRef(job);
1014 
1015         qemuProcessEventSubmit(driver, &processEvent);
1016     }
1017 
1018  cleanup:
1019     virObjectUnlock(vm);
1020 }
1021 
1022 
1023 static void
qemuProcessHandleGraphics(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int phase,int localFamily,const char * localNode,const char * localService,int remoteFamily,const char * remoteNode,const char * remoteService,const char * authScheme,const char * x509dname,const char * saslUsername,void * opaque)1024 qemuProcessHandleGraphics(qemuMonitor *mon G_GNUC_UNUSED,
1025                           virDomainObj *vm,
1026                           int phase,
1027                           int localFamily,
1028                           const char *localNode,
1029                           const char *localService,
1030                           int remoteFamily,
1031                           const char *remoteNode,
1032                           const char *remoteService,
1033                           const char *authScheme,
1034                           const char *x509dname,
1035                           const char *saslUsername,
1036                           void *opaque)
1037 {
1038     virQEMUDriver *driver = opaque;
1039     virObjectEvent *event;
1040     virDomainEventGraphicsAddressPtr localAddr = NULL;
1041     virDomainEventGraphicsAddressPtr remoteAddr = NULL;
1042     virDomainEventGraphicsSubjectPtr subject = NULL;
1043 
1044     localAddr = g_new0(virDomainEventGraphicsAddress, 1);
1045     localAddr->family = localFamily;
1046     localAddr->service = g_strdup(localService);
1047     localAddr->node = g_strdup(localNode);
1048 
1049     remoteAddr = g_new0(virDomainEventGraphicsAddress, 1);
1050     remoteAddr->family = remoteFamily;
1051     remoteAddr->service = g_strdup(remoteService);
1052     remoteAddr->node = g_strdup(remoteNode);
1053 
1054     subject = g_new0(virDomainEventGraphicsSubject, 1);
1055     if (x509dname) {
1056         VIR_REALLOC_N(subject->identities, subject->nidentity+1);
1057         subject->nidentity++;
1058         subject->identities[subject->nidentity - 1].type = g_strdup("x509dname");
1059         subject->identities[subject->nidentity - 1].name = g_strdup(x509dname);
1060     }
1061     if (saslUsername) {
1062         VIR_REALLOC_N(subject->identities, subject->nidentity+1);
1063         subject->nidentity++;
1064         subject->identities[subject->nidentity - 1].type = g_strdup("saslUsername");
1065         subject->identities[subject->nidentity - 1].name = g_strdup(saslUsername);
1066     }
1067 
1068     virObjectLock(vm);
1069     event = virDomainEventGraphicsNewFromObj(vm, phase, localAddr, remoteAddr, authScheme, subject);
1070     virObjectUnlock(vm);
1071 
1072     virObjectEventStateQueue(driver->domainEventState, event);
1073 }
1074 
1075 static void
qemuProcessHandleTrayChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,const char * devid,int reason,void * opaque)1076 qemuProcessHandleTrayChange(qemuMonitor *mon G_GNUC_UNUSED,
1077                             virDomainObj *vm,
1078                             const char *devAlias,
1079                             const char *devid,
1080                             int reason,
1081                             void *opaque)
1082 {
1083     virQEMUDriver *driver = opaque;
1084     virObjectEvent *event = NULL;
1085     virDomainDiskDef *disk;
1086     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1087 
1088     virObjectLock(vm);
1089     disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, devAlias, devid);
1090 
1091     if (disk) {
1092         event = virDomainEventTrayChangeNewFromObj(vm, disk->info.alias, reason);
1093         /* Update disk tray status */
1094         if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_OPEN)
1095             disk->tray_status = VIR_DOMAIN_DISK_TRAY_OPEN;
1096         else if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_CLOSE)
1097             disk->tray_status = VIR_DOMAIN_DISK_TRAY_CLOSED;
1098 
1099         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1100             VIR_WARN("Unable to save status on vm %s after tray moved event",
1101                      vm->def->name);
1102         }
1103 
1104         virDomainObjBroadcast(vm);
1105     }
1106 
1107     virObjectUnlock(vm);
1108     virObjectEventStateQueue(driver->domainEventState, event);
1109 }
1110 
1111 static void
qemuProcessHandlePMWakeup(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)1112 qemuProcessHandlePMWakeup(qemuMonitor *mon G_GNUC_UNUSED,
1113                           virDomainObj *vm,
1114                           void *opaque)
1115 {
1116     virQEMUDriver *driver = opaque;
1117     virObjectEvent *event = NULL;
1118     virObjectEvent *lifecycleEvent = NULL;
1119     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1120 
1121     virObjectLock(vm);
1122     event = virDomainEventPMWakeupNewFromObj(vm);
1123 
1124     /* Don't set domain status back to running if it wasn't paused
1125      * from guest side, otherwise it can just cause confusion.
1126      */
1127     if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PMSUSPENDED) {
1128         VIR_DEBUG("Transitioned guest %s from pmsuspended to running "
1129                   "state due to QMP wakeup event", vm->def->name);
1130 
1131         virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
1132                              VIR_DOMAIN_RUNNING_WAKEUP);
1133         lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
1134                                                   VIR_DOMAIN_EVENT_STARTED,
1135                                                   VIR_DOMAIN_EVENT_STARTED_WAKEUP);
1136 
1137         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1138             VIR_WARN("Unable to save status on vm %s after wakeup event",
1139                      vm->def->name);
1140         }
1141     }
1142 
1143     virObjectUnlock(vm);
1144     virObjectEventStateQueue(driver->domainEventState, event);
1145     virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
1146 }
1147 
1148 static void
qemuProcessHandlePMSuspend(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)1149 qemuProcessHandlePMSuspend(qemuMonitor *mon G_GNUC_UNUSED,
1150                            virDomainObj *vm,
1151                            void *opaque)
1152 {
1153     virQEMUDriver *driver = opaque;
1154     virObjectEvent *event = NULL;
1155     virObjectEvent *lifecycleEvent = NULL;
1156     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1157 
1158     virObjectLock(vm);
1159     event = virDomainEventPMSuspendNewFromObj(vm);
1160 
1161     if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
1162         qemuDomainObjPrivate *priv = vm->privateData;
1163         VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
1164                   "QMP suspend event", vm->def->name);
1165 
1166         virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
1167                              VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
1168         lifecycleEvent =
1169             virDomainEventLifecycleNewFromObj(vm,
1170                                      VIR_DOMAIN_EVENT_PMSUSPENDED,
1171                                      VIR_DOMAIN_EVENT_PMSUSPENDED_MEMORY);
1172 
1173         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1174             VIR_WARN("Unable to save status on vm %s after suspend event",
1175                      vm->def->name);
1176         }
1177 
1178         if (priv->agent)
1179             qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
1180     }
1181 
1182     virObjectUnlock(vm);
1183 
1184     virObjectEventStateQueue(driver->domainEventState, event);
1185     virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
1186 }
1187 
1188 static void
qemuProcessHandleBalloonChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,unsigned long long actual,void * opaque)1189 qemuProcessHandleBalloonChange(qemuMonitor *mon G_GNUC_UNUSED,
1190                                virDomainObj *vm,
1191                                unsigned long long actual,
1192                                void *opaque)
1193 {
1194     virQEMUDriver *driver = opaque;
1195     virObjectEvent *event = NULL;
1196     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1197     size_t i;
1198 
1199     virObjectLock(vm);
1200     event = virDomainEventBalloonChangeNewFromObj(vm, actual);
1201 
1202     /* We want the balloon size stored in domain definition to
1203      * account for the actual size of virtio-mem too. But the
1204      * balloon size as reported by QEMU (@actual) contains just
1205      * the balloon size without any virtio-mem. Do a wee bit of
1206      * math to fix it. */
1207     VIR_DEBUG("balloon size before fix is %lld", actual);
1208     for (i = 0; i < vm->def->nmems; i++) {
1209         if (vm->def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM)
1210             actual += vm->def->mems[i]->currentsize;
1211     }
1212 
1213     VIR_DEBUG("Updating balloon from %lld to %lld kb",
1214               vm->def->mem.cur_balloon, actual);
1215     vm->def->mem.cur_balloon = actual;
1216 
1217     if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
1218         VIR_WARN("unable to save domain status with balloon change");
1219 
1220     virObjectUnlock(vm);
1221 
1222     virObjectEventStateQueue(driver->domainEventState, event);
1223 }
1224 
1225 static void
qemuProcessHandlePMSuspendDisk(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)1226 qemuProcessHandlePMSuspendDisk(qemuMonitor *mon G_GNUC_UNUSED,
1227                                virDomainObj *vm,
1228                                void *opaque)
1229 {
1230     virQEMUDriver *driver = opaque;
1231     virObjectEvent *event = NULL;
1232     virObjectEvent *lifecycleEvent = NULL;
1233     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1234 
1235     virObjectLock(vm);
1236     event = virDomainEventPMSuspendDiskNewFromObj(vm);
1237 
1238     if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
1239         qemuDomainObjPrivate *priv = vm->privateData;
1240         VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
1241                   "QMP suspend_disk event", vm->def->name);
1242 
1243         virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
1244                              VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
1245         lifecycleEvent =
1246             virDomainEventLifecycleNewFromObj(vm,
1247                                      VIR_DOMAIN_EVENT_PMSUSPENDED,
1248                                      VIR_DOMAIN_EVENT_PMSUSPENDED_DISK);
1249 
1250         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1251             VIR_WARN("Unable to save status on vm %s after suspend event",
1252                      vm->def->name);
1253         }
1254 
1255         if (priv->agent)
1256             qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
1257     }
1258 
1259     virObjectUnlock(vm);
1260 
1261     virObjectEventStateQueue(driver->domainEventState, event);
1262     virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
1263 }
1264 
1265 
1266 static void
qemuProcessHandleGuestPanic(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,qemuMonitorEventPanicInfo * info,void * opaque)1267 qemuProcessHandleGuestPanic(qemuMonitor *mon G_GNUC_UNUSED,
1268                             virDomainObj *vm,
1269                             qemuMonitorEventPanicInfo *info,
1270                             void *opaque)
1271 {
1272     virQEMUDriver *driver = opaque;
1273     struct qemuProcessEvent *processEvent;
1274 
1275     virObjectLock(vm);
1276     processEvent = g_new0(struct qemuProcessEvent, 1);
1277 
1278     processEvent->eventType = QEMU_PROCESS_EVENT_GUESTPANIC;
1279     processEvent->action = vm->def->onCrash;
1280     processEvent->data = info;
1281     /* Hold an extra reference because we can't allow 'vm' to be
1282      * deleted before handling guest panic event is finished.
1283      */
1284     processEvent->vm = virObjectRef(vm);
1285 
1286     qemuProcessEventSubmit(driver, &processEvent);
1287 
1288     virObjectUnlock(vm);
1289 }
1290 
1291 
1292 void
qemuProcessHandleDeviceDeleted(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,void * opaque)1293 qemuProcessHandleDeviceDeleted(qemuMonitor *mon G_GNUC_UNUSED,
1294                                virDomainObj *vm,
1295                                const char *devAlias,
1296                                void *opaque)
1297 {
1298     virQEMUDriver *driver = opaque;
1299     struct qemuProcessEvent *processEvent = NULL;
1300     char *data;
1301 
1302     virObjectLock(vm);
1303 
1304     VIR_DEBUG("Device %s removed from domain %p %s",
1305               devAlias, vm, vm->def->name);
1306 
1307     if (qemuDomainSignalDeviceRemoval(vm, devAlias,
1308                                       QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_OK))
1309         goto cleanup;
1310 
1311     processEvent = g_new0(struct qemuProcessEvent, 1);
1312 
1313     processEvent->eventType = QEMU_PROCESS_EVENT_DEVICE_DELETED;
1314     data = g_strdup(devAlias);
1315     processEvent->data = data;
1316     processEvent->vm = virObjectRef(vm);
1317 
1318     qemuProcessEventSubmit(driver, &processEvent);
1319 
1320  cleanup:
1321     virObjectUnlock(vm);
1322 }
1323 
1324 
1325 static void
qemuProcessHandleDeviceUnplugErr(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devPath,const char * devAlias,void * opaque)1326 qemuProcessHandleDeviceUnplugErr(qemuMonitor *mon G_GNUC_UNUSED,
1327                                  virDomainObj *vm,
1328                                  const char *devPath,
1329                                  const char *devAlias,
1330                                  void *opaque)
1331 {
1332     virQEMUDriver *driver = opaque;
1333     virObjectEvent *event = NULL;
1334 
1335     virObjectLock(vm);
1336 
1337     VIR_DEBUG("Device %s QOM path %s failed to be removed from domain %p %s",
1338               devAlias, devPath, vm, vm->def->name);
1339 
1340     /*
1341      * DEVICE_UNPLUG_GUEST_ERROR will always contain the QOM path
1342      * but QEMU will not guarantee that devAlias will be provided.
1343      *
1344      * However, given that all Libvirt devices have a devAlias, we
1345      * can ignore the case where QEMU emitted this event without it.
1346      */
1347     if (!devAlias)
1348         goto cleanup;
1349 
1350     qemuDomainSignalDeviceRemoval(vm, devAlias,
1351                                   QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_GUEST_REJECTED);
1352 
1353     event = virDomainEventDeviceRemovalFailedNewFromObj(vm, devAlias);
1354 
1355  cleanup:
1356     virObjectUnlock(vm);
1357     virObjectEventStateQueue(driver->domainEventState, event);
1358 }
1359 
1360 
1361 /**
1362  *
1363  * Meaning of fields reported by the event according to the ACPI standard:
1364  * @source:
1365  *  0x00 - 0xff: Notification values, as passed at the request time
1366  *  0x100: Operating System Shutdown Processing
1367  *  0x103: Ejection processing
1368  *  0x200: Insertion processing
1369  *  other values are reserved
1370  *
1371  * @status:
1372  *   general values
1373  *     0x00: success
1374  *     0x01: non-specific failure
1375  *     0x02: unrecognized notify code
1376  *     0x03 - 0x7f: reserved
1377  *     other values are specific to the notification type (see below)
1378  *
1379  *   for the 0x100 source the following additional codes are standardized:
1380  *     0x80: OS Shutdown request denied
1381  *     0x81: OS Shutdown in progress
1382  *     0x82: OS Shutdown completed
1383  *     0x83: OS Graceful shutdown not supported
1384  *     other higher values are reserved
1385  *
1386  *  for the 0x003 (Ejection request) and 0x103 (Ejection processing) source
1387  *  the following additional codes are standardized:
1388  *     0x80: Device ejection not supported by OSPM
1389  *     0x81: Device in use by application
1390  *     0x82: Device Busy
1391  *     0x83: Ejection dependency is busy or not supported for ejection by OSPM
1392  *     0x84: Ejection is in progress (pending)
1393  *     other higher values are reserved
1394  *
1395  *  for the 0x200 source the following additional codes are standardized:
1396  *     0x80: Device insertion in progress (pending)
1397  *     0x81: Device driver load failure
1398  *     0x82: Device insertion not supported by OSPM
1399  *     0x83-0x8F: Reserved
1400  *     0x90-0x9F: Insertion failure - Resources Unavailable as described by the
1401  *                                    following bit encodings:
1402  *                                    Bit [3]: Bus or Segment Numbers
1403  *                                    Bit [2]: Interrupts
1404  *                                    Bit [1]: I/O
1405  *                                    Bit [0]: Memory
1406  *     other higher values are reserved
1407  *
1408  * Other fields and semantics are specific to the qemu handling of the event.
1409  *  - @alias may be NULL for successful unplug operations
1410  *  - @slotType describes the device type a bit more closely, currently the
1411  *    only known value is 'DIMM'
1412  *  - @slot describes the specific device
1413  *
1414  *  Note that qemu does not emit the event for all the documented sources or
1415  *  devices.
1416  */
1417 static void
qemuProcessHandleAcpiOstInfo(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * alias,const char * slotType,const char * slot,unsigned int source,unsigned int status,void * opaque)1418 qemuProcessHandleAcpiOstInfo(qemuMonitor *mon G_GNUC_UNUSED,
1419                              virDomainObj *vm,
1420                              const char *alias,
1421                              const char *slotType,
1422                              const char *slot,
1423                              unsigned int source,
1424                              unsigned int status,
1425                              void *opaque)
1426 {
1427     virQEMUDriver *driver = opaque;
1428     virObjectEvent *event = NULL;
1429 
1430     virObjectLock(vm);
1431 
1432     VIR_DEBUG("ACPI OST info for device %s domain %p %s. "
1433               "slotType='%s' slot='%s' source=%u status=%u",
1434               NULLSTR(alias), vm, vm->def->name, slotType, slot, source, status);
1435 
1436     if (!alias)
1437         goto cleanup;
1438 
1439     if (STREQ(slotType, "DIMM")) {
1440         if ((source == 0x003 || source == 0x103) &&
1441             (status == 0x01 || (status >= 0x80 && status <= 0x83))) {
1442             qemuDomainSignalDeviceRemoval(vm, alias,
1443                                           QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_GUEST_REJECTED);
1444 
1445             event = virDomainEventDeviceRemovalFailedNewFromObj(vm, alias);
1446         }
1447     }
1448 
1449  cleanup:
1450     virObjectUnlock(vm);
1451     virObjectEventStateQueue(driver->domainEventState, event);
1452 }
1453 
1454 
1455 static void
qemuProcessHandleBlockThreshold(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * nodename,unsigned long long threshold,unsigned long long excess,void * opaque)1456 qemuProcessHandleBlockThreshold(qemuMonitor *mon G_GNUC_UNUSED,
1457                                 virDomainObj *vm,
1458                                 const char *nodename,
1459                                 unsigned long long threshold,
1460                                 unsigned long long excess,
1461                                 void *opaque)
1462 {
1463     qemuDomainObjPrivate *priv;
1464     virQEMUDriver *driver = opaque;
1465     virObjectEvent *eventSource = NULL;
1466     virObjectEvent *eventDevice = NULL;
1467     virDomainDiskDef *disk;
1468     virStorageSource *src;
1469     const char *path = NULL;
1470 
1471     virObjectLock(vm);
1472 
1473     priv  = vm->privateData;
1474 
1475     VIR_DEBUG("BLOCK_WRITE_THRESHOLD event for block node '%s' in domain %p %s:"
1476               "threshold '%llu' exceeded by '%llu'",
1477               nodename, vm, vm->def->name, threshold, excess);
1478 
1479     if ((disk = qemuDomainDiskLookupByNodename(vm->def, priv->backup, nodename, &src))) {
1480         if (virStorageSourceIsLocalStorage(src))
1481             path = src->path;
1482 
1483         if (src == disk->src &&
1484             !src->thresholdEventWithIndex) {
1485             g_autofree char *dev = qemuDomainDiskBackingStoreGetName(disk, 0);
1486 
1487             eventDevice = virDomainEventBlockThresholdNewFromObj(vm, dev, path,
1488                                                                  threshold, excess);
1489         }
1490 
1491         if (src->id != 0) {
1492             g_autofree char *dev = qemuDomainDiskBackingStoreGetName(disk, src->id);
1493 
1494             eventSource = virDomainEventBlockThresholdNewFromObj(vm, dev, path,
1495                                                                  threshold, excess);
1496         }
1497     }
1498 
1499     virObjectUnlock(vm);
1500     virObjectEventStateQueue(driver->domainEventState, eventDevice);
1501     virObjectEventStateQueue(driver->domainEventState, eventSource);
1502 }
1503 
1504 
1505 static void
qemuProcessHandleNicRxFilterChanged(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,void * opaque)1506 qemuProcessHandleNicRxFilterChanged(qemuMonitor *mon G_GNUC_UNUSED,
1507                                     virDomainObj *vm,
1508                                     const char *devAlias,
1509                                     void *opaque)
1510 {
1511     virQEMUDriver *driver = opaque;
1512     struct qemuProcessEvent *processEvent = NULL;
1513     char *data;
1514 
1515     virObjectLock(vm);
1516 
1517     VIR_DEBUG("Device %s RX Filter changed in domain %p %s",
1518               devAlias, vm, vm->def->name);
1519 
1520     processEvent = g_new0(struct qemuProcessEvent, 1);
1521 
1522     processEvent->eventType = QEMU_PROCESS_EVENT_NIC_RX_FILTER_CHANGED;
1523     data = g_strdup(devAlias);
1524     processEvent->data = data;
1525     processEvent->vm = virObjectRef(vm);
1526 
1527     qemuProcessEventSubmit(driver, &processEvent);
1528 
1529     virObjectUnlock(vm);
1530 }
1531 
1532 
1533 static void
qemuProcessHandleSerialChanged(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,bool connected,void * opaque)1534 qemuProcessHandleSerialChanged(qemuMonitor *mon G_GNUC_UNUSED,
1535                                virDomainObj *vm,
1536                                const char *devAlias,
1537                                bool connected,
1538                                void *opaque)
1539 {
1540     virQEMUDriver *driver = opaque;
1541     struct qemuProcessEvent *processEvent = NULL;
1542     char *data;
1543 
1544     virObjectLock(vm);
1545 
1546     VIR_DEBUG("Serial port %s state changed to '%d' in domain %p %s",
1547               devAlias, connected, vm, vm->def->name);
1548 
1549     processEvent = g_new0(struct qemuProcessEvent, 1);
1550 
1551     processEvent->eventType = QEMU_PROCESS_EVENT_SERIAL_CHANGED;
1552     data = g_strdup(devAlias);
1553     processEvent->data = data;
1554     processEvent->action = connected;
1555     processEvent->vm = virObjectRef(vm);
1556 
1557     qemuProcessEventSubmit(driver, &processEvent);
1558 
1559     virObjectUnlock(vm);
1560 }
1561 
1562 
1563 static void
qemuProcessHandleSpiceMigrated(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque G_GNUC_UNUSED)1564 qemuProcessHandleSpiceMigrated(qemuMonitor *mon G_GNUC_UNUSED,
1565                                virDomainObj *vm,
1566                                void *opaque G_GNUC_UNUSED)
1567 {
1568     qemuDomainObjPrivate *priv;
1569     qemuDomainJobPrivate *jobPriv;
1570 
1571     virObjectLock(vm);
1572 
1573     VIR_DEBUG("Spice migration completed for domain %p %s",
1574               vm, vm->def->name);
1575 
1576     priv = vm->privateData;
1577     jobPriv = priv->job.privateData;
1578     if (priv->job.asyncJob != QEMU_ASYNC_JOB_MIGRATION_OUT) {
1579         VIR_DEBUG("got SPICE_MIGRATE_COMPLETED event without a migration job");
1580         goto cleanup;
1581     }
1582 
1583     jobPriv->spiceMigrated = true;
1584     virDomainObjBroadcast(vm);
1585 
1586  cleanup:
1587     virObjectUnlock(vm);
1588 }
1589 
1590 
1591 static void
qemuProcessHandleMigrationStatus(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int status,void * opaque)1592 qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
1593                                  virDomainObj *vm,
1594                                  int status,
1595                                  void *opaque)
1596 {
1597     qemuDomainObjPrivate *priv;
1598     virQEMUDriver *driver = opaque;
1599     virObjectEvent *event = NULL;
1600     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1601     int reason;
1602 
1603     virObjectLock(vm);
1604 
1605     VIR_DEBUG("Migration of domain %p %s changed state to %s",
1606               vm, vm->def->name,
1607               qemuMonitorMigrationStatusTypeToString(status));
1608 
1609     priv = vm->privateData;
1610     if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
1611         VIR_DEBUG("got MIGRATION event without a migration job");
1612         goto cleanup;
1613     }
1614 
1615     priv->job.current->stats.mig.status = status;
1616     virDomainObjBroadcast(vm);
1617 
1618     if (status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY &&
1619         priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT &&
1620         virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
1621         reason == VIR_DOMAIN_PAUSED_MIGRATION) {
1622         VIR_DEBUG("Correcting paused state reason for domain %s to %s",
1623                   vm->def->name,
1624                   virDomainPausedReasonTypeToString(VIR_DOMAIN_PAUSED_POSTCOPY));
1625 
1626         virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY);
1627         event = virDomainEventLifecycleNewFromObj(vm,
1628                                                   VIR_DOMAIN_EVENT_SUSPENDED,
1629                                                   VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY);
1630 
1631         if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1632             VIR_WARN("Unable to save status on vm %s after state change",
1633                      vm->def->name);
1634         }
1635     }
1636 
1637  cleanup:
1638     virObjectUnlock(vm);
1639     virObjectEventStateQueue(driver->domainEventState, event);
1640 }
1641 
1642 
1643 static void
qemuProcessHandleMigrationPass(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int pass,void * opaque)1644 qemuProcessHandleMigrationPass(qemuMonitor *mon G_GNUC_UNUSED,
1645                                virDomainObj *vm,
1646                                int pass,
1647                                void *opaque)
1648 {
1649     virQEMUDriver *driver = opaque;
1650     qemuDomainObjPrivate *priv;
1651 
1652     virObjectLock(vm);
1653 
1654     VIR_DEBUG("Migrating domain %p %s, iteration %d",
1655               vm, vm->def->name, pass);
1656 
1657     priv = vm->privateData;
1658     if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
1659         VIR_DEBUG("got MIGRATION_PASS event without a migration job");
1660         goto cleanup;
1661     }
1662 
1663     virObjectEventStateQueue(driver->domainEventState,
1664                          virDomainEventMigrationIterationNewFromObj(vm, pass));
1665 
1666  cleanup:
1667     virObjectUnlock(vm);
1668 }
1669 
1670 
1671 static void
qemuProcessHandleDumpCompleted(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int status,qemuMonitorDumpStats * stats,const char * error,void * opaque G_GNUC_UNUSED)1672 qemuProcessHandleDumpCompleted(qemuMonitor *mon G_GNUC_UNUSED,
1673                                virDomainObj *vm,
1674                                int status,
1675                                qemuMonitorDumpStats *stats,
1676                                const char *error,
1677                                void *opaque G_GNUC_UNUSED)
1678 {
1679     qemuDomainObjPrivate *priv;
1680     qemuDomainJobPrivate *jobPriv;
1681 
1682     virObjectLock(vm);
1683 
1684     VIR_DEBUG("Dump completed for domain %p %s with stats=%p error='%s'",
1685               vm, vm->def->name, stats, NULLSTR(error));
1686 
1687     priv = vm->privateData;
1688     jobPriv = priv->job.privateData;
1689     if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
1690         VIR_DEBUG("got DUMP_COMPLETED event without a dump_completed job");
1691         goto cleanup;
1692     }
1693     jobPriv->dumpCompleted = true;
1694     priv->job.current->stats.dump = *stats;
1695     priv->job.error = g_strdup(error);
1696 
1697     /* Force error if extracting the DUMP_COMPLETED status failed */
1698     if (!error && status < 0) {
1699         priv->job.error = g_strdup(virGetLastErrorMessage());
1700         priv->job.current->stats.dump.status = QEMU_MONITOR_DUMP_STATUS_FAILED;
1701     }
1702 
1703     virDomainObjBroadcast(vm);
1704 
1705  cleanup:
1706     virResetLastError();
1707     virObjectUnlock(vm);
1708 }
1709 
1710 
1711 static void
qemuProcessHandlePRManagerStatusChanged(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * prManager,bool connected,void * opaque)1712 qemuProcessHandlePRManagerStatusChanged(qemuMonitor *mon G_GNUC_UNUSED,
1713                                         virDomainObj *vm,
1714                                         const char *prManager,
1715                                         bool connected,
1716                                         void *opaque)
1717 {
1718     virQEMUDriver *driver = opaque;
1719     qemuDomainObjPrivate *priv;
1720     struct qemuProcessEvent *processEvent = NULL;
1721     const char *managedAlias = qemuDomainGetManagedPRAlias();
1722 
1723     virObjectLock(vm);
1724 
1725     VIR_DEBUG("pr-manager %s status changed for domain %p %s connected=%d",
1726               prManager, vm, vm->def->name, connected);
1727 
1728     /* Connect events are boring. */
1729     if (connected)
1730         goto cleanup;
1731 
1732     /* Disconnect events are more interesting. */
1733 
1734     if (STRNEQ(prManager, managedAlias)) {
1735         VIR_DEBUG("pr-manager %s not managed, ignoring event",
1736                   prManager);
1737         goto cleanup;
1738     }
1739 
1740     priv = vm->privateData;
1741     priv->prDaemonRunning = false;
1742 
1743     processEvent = g_new0(struct qemuProcessEvent, 1);
1744 
1745     processEvent->eventType = QEMU_PROCESS_EVENT_PR_DISCONNECT;
1746     processEvent->vm = virObjectRef(vm);
1747 
1748     qemuProcessEventSubmit(driver, &processEvent);
1749 
1750  cleanup:
1751     virObjectUnlock(vm);
1752 }
1753 
1754 
1755 static void
qemuProcessHandleRdmaGidStatusChanged(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * netdev,bool gid_status,unsigned long long subnet_prefix,unsigned long long interface_id,void * opaque)1756 qemuProcessHandleRdmaGidStatusChanged(qemuMonitor *mon G_GNUC_UNUSED,
1757                                       virDomainObj *vm,
1758                                       const char *netdev,
1759                                       bool gid_status,
1760                                       unsigned long long subnet_prefix,
1761                                       unsigned long long interface_id,
1762                                       void *opaque)
1763 {
1764     virQEMUDriver *driver = opaque;
1765     struct qemuProcessEvent *processEvent = NULL;
1766     qemuMonitorRdmaGidStatus *info = NULL;
1767 
1768     virObjectLock(vm);
1769 
1770     VIR_DEBUG("netdev=%s,gid_status=%d,subnet_prefix=0x%llx,interface_id=0x%llx",
1771               netdev, gid_status, subnet_prefix, interface_id);
1772 
1773     info = g_new0(qemuMonitorRdmaGidStatus, 1);
1774 
1775     info->netdev = g_strdup(netdev);
1776 
1777     info->gid_status = gid_status;
1778     info->subnet_prefix = subnet_prefix;
1779     info->interface_id = interface_id;
1780 
1781     processEvent = g_new0(struct qemuProcessEvent, 1);
1782 
1783     processEvent->eventType = QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED;
1784     processEvent->vm = virObjectRef(vm);
1785     processEvent->data = g_steal_pointer(&info);
1786 
1787     qemuProcessEventSubmit(driver, &processEvent);
1788 
1789     virObjectUnlock(vm);
1790 }
1791 
1792 
1793 static void
qemuProcessHandleGuestCrashloaded(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)1794 qemuProcessHandleGuestCrashloaded(qemuMonitor *mon G_GNUC_UNUSED,
1795                                   virDomainObj *vm,
1796                                   void *opaque)
1797 {
1798     virQEMUDriver *driver = opaque;
1799     struct qemuProcessEvent *processEvent;
1800 
1801     virObjectLock(vm);
1802     processEvent = g_new0(struct qemuProcessEvent, 1);
1803 
1804     processEvent->eventType = QEMU_PROCESS_EVENT_GUEST_CRASHLOADED;
1805     processEvent->vm = virObjectRef(vm);
1806 
1807     qemuProcessEventSubmit(driver, &processEvent);
1808 
1809     virObjectUnlock(vm);
1810 }
1811 
1812 
1813 static void
qemuProcessHandleMemoryFailure(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,qemuMonitorEventMemoryFailure * mfp,void * opaque)1814 qemuProcessHandleMemoryFailure(qemuMonitor *mon G_GNUC_UNUSED,
1815                                virDomainObj *vm,
1816                                qemuMonitorEventMemoryFailure *mfp,
1817                                void *opaque)
1818 {
1819     virQEMUDriver *driver = opaque;
1820     virObjectEvent *event = NULL;
1821     virDomainMemoryFailureRecipientType recipient;
1822     virDomainMemoryFailureActionType action;
1823     unsigned int flags = 0;
1824 
1825     switch (mfp->recipient) {
1826     case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR:
1827         recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR;
1828         break;
1829     case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST:
1830         recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST;
1831         break;
1832     case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST:
1833     default:
1834         return;
1835     }
1836 
1837     switch (mfp->action) {
1838     case QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE:
1839         action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE;
1840         break;
1841     case QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT:
1842         action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT;
1843         break;
1844     case QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL:
1845         action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL;
1846         break;
1847     case QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET:
1848         action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET;
1849         break;
1850     case QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST:
1851     default:
1852         return;
1853     }
1854 
1855     if (mfp->action_required)
1856         flags |= VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED;
1857     if (mfp->recursive)
1858         flags |= VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE;
1859 
1860     event = virDomainEventMemoryFailureNewFromObj(vm, recipient, action, flags);
1861     virObjectEventStateQueue(driver->domainEventState, event);
1862 }
1863 
1864 
1865 static void
qemuProcessHandleMemoryDeviceSizeChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,unsigned long long size,void * opaque)1866 qemuProcessHandleMemoryDeviceSizeChange(qemuMonitor *mon G_GNUC_UNUSED,
1867                                         virDomainObj *vm,
1868                                         const char *devAlias,
1869                                         unsigned long long size,
1870                                         void *opaque)
1871 {
1872     virQEMUDriver *driver = opaque;
1873     struct qemuProcessEvent *processEvent = NULL;
1874     qemuMonitorMemoryDeviceSizeChange *info = NULL;
1875 
1876     virObjectLock(vm);
1877 
1878     VIR_DEBUG("Memory device '%s' changed size to '%llu' in domain '%s'",
1879               devAlias, size, vm->def->name);
1880 
1881     info = g_new0(qemuMonitorMemoryDeviceSizeChange, 1);
1882     info->devAlias = g_strdup(devAlias);
1883     info->size = size;
1884 
1885     processEvent = g_new0(struct qemuProcessEvent, 1);
1886     processEvent->eventType = QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE;
1887     processEvent->vm = virObjectRef(vm);
1888     processEvent->data = g_steal_pointer(&info);
1889 
1890     qemuProcessEventSubmit(driver, &processEvent);
1891 
1892     virObjectUnlock(vm);
1893 }
1894 
1895 
1896 static qemuMonitorCallbacks monitorCallbacks = {
1897     .eofNotify = qemuProcessHandleMonitorEOF,
1898     .errorNotify = qemuProcessHandleMonitorError,
1899     .domainEvent = qemuProcessHandleEvent,
1900     .domainShutdown = qemuProcessHandleShutdown,
1901     .domainStop = qemuProcessHandleStop,
1902     .domainResume = qemuProcessHandleResume,
1903     .domainReset = qemuProcessHandleReset,
1904     .domainRTCChange = qemuProcessHandleRTCChange,
1905     .domainWatchdog = qemuProcessHandleWatchdog,
1906     .domainIOError = qemuProcessHandleIOError,
1907     .domainGraphics = qemuProcessHandleGraphics,
1908     .domainBlockJob = qemuProcessHandleBlockJob,
1909     .jobStatusChange = qemuProcessHandleJobStatusChange,
1910     .domainTrayChange = qemuProcessHandleTrayChange,
1911     .domainPMWakeup = qemuProcessHandlePMWakeup,
1912     .domainPMSuspend = qemuProcessHandlePMSuspend,
1913     .domainBalloonChange = qemuProcessHandleBalloonChange,
1914     .domainPMSuspendDisk = qemuProcessHandlePMSuspendDisk,
1915     .domainGuestPanic = qemuProcessHandleGuestPanic,
1916     .domainDeviceDeleted = qemuProcessHandleDeviceDeleted,
1917     .domainNicRxFilterChanged = qemuProcessHandleNicRxFilterChanged,
1918     .domainSerialChange = qemuProcessHandleSerialChanged,
1919     .domainSpiceMigrated = qemuProcessHandleSpiceMigrated,
1920     .domainMigrationStatus = qemuProcessHandleMigrationStatus,
1921     .domainMigrationPass = qemuProcessHandleMigrationPass,
1922     .domainAcpiOstInfo = qemuProcessHandleAcpiOstInfo,
1923     .domainBlockThreshold = qemuProcessHandleBlockThreshold,
1924     .domainDumpCompleted = qemuProcessHandleDumpCompleted,
1925     .domainPRManagerStatusChanged = qemuProcessHandlePRManagerStatusChanged,
1926     .domainRdmaGidStatusChanged = qemuProcessHandleRdmaGidStatusChanged,
1927     .domainGuestCrashloaded = qemuProcessHandleGuestCrashloaded,
1928     .domainMemoryFailure = qemuProcessHandleMemoryFailure,
1929     .domainMemoryDeviceSizeChange = qemuProcessHandleMemoryDeviceSizeChange,
1930     .domainDeviceUnplugError = qemuProcessHandleDeviceUnplugErr,
1931 };
1932 
1933 static void
1934 qemuProcessMonitorReportLogError(qemuMonitor *mon,
1935                                  const char *msg,
1936                                  void *opaque);
1937 
1938 
1939 static void
qemuProcessMonitorLogFree(void * opaque)1940 qemuProcessMonitorLogFree(void *opaque)
1941 {
1942     qemuDomainLogContext *logCtxt = opaque;
1943     g_clear_object(&logCtxt);
1944 }
1945 
1946 
1947 static int
qemuProcessInitMonitor(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)1948 qemuProcessInitMonitor(virQEMUDriver *driver,
1949                        virDomainObj *vm,
1950                        qemuDomainAsyncJob asyncJob)
1951 {
1952     int ret;
1953 
1954     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
1955         return -1;
1956 
1957     ret = qemuMonitorSetCapabilities(QEMU_DOMAIN_PRIVATE(vm)->mon);
1958 
1959     if (qemuDomainObjExitMonitor(driver, vm) < 0)
1960         ret = -1;
1961 
1962     return ret;
1963 }
1964 
1965 
1966 static int
qemuConnectMonitor(virQEMUDriver * driver,virDomainObj * vm,int asyncJob,bool retry,qemuDomainLogContext * logCtxt)1967 qemuConnectMonitor(virQEMUDriver *driver, virDomainObj *vm, int asyncJob,
1968                    bool retry, qemuDomainLogContext *logCtxt)
1969 {
1970     qemuDomainObjPrivate *priv = vm->privateData;
1971     qemuMonitor *mon = NULL;
1972     unsigned long long timeout = 0;
1973 
1974     if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) {
1975         VIR_ERROR(_("Failed to set security context for monitor for %s"),
1976                   vm->def->name);
1977         return -1;
1978     }
1979 
1980     /* When using hugepages, kernel zeroes them out before
1981      * handing them over to qemu. This can be very time
1982      * consuming. Therefore, add a second to timeout for each
1983      * 1GiB of guest RAM. */
1984     timeout = virDomainDefGetMemoryTotal(vm->def) / (1024 * 1024);
1985 
1986     ignore_value(virTimeMillisNow(&priv->monStart));
1987 
1988     mon = qemuMonitorOpen(vm,
1989                           priv->monConfig,
1990                           retry,
1991                           timeout,
1992                           virEventThreadGetContext(priv->eventThread),
1993                           &monitorCallbacks,
1994                           driver);
1995 
1996     if (mon && logCtxt) {
1997         g_object_ref(logCtxt);
1998         qemuMonitorSetDomainLog(mon,
1999                                 qemuProcessMonitorReportLogError,
2000                                 logCtxt,
2001                                 qemuProcessMonitorLogFree);
2002     }
2003 
2004     priv->monStart = 0;
2005     priv->mon = mon;
2006 
2007     if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) {
2008         VIR_ERROR(_("Failed to clear security context for monitor for %s"),
2009                   vm->def->name);
2010         return -1;
2011     }
2012 
2013     if (priv->mon == NULL) {
2014         VIR_INFO("Failed to connect monitor for %s", vm->def->name);
2015         return -1;
2016     }
2017 
2018     if (qemuProcessInitMonitor(driver, vm, asyncJob) < 0)
2019         return -1;
2020 
2021     if (qemuMigrationCapsCheck(driver, vm, asyncJob) < 0)
2022         return -1;
2023 
2024     return 0;
2025 }
2026 
2027 
2028 /**
2029  * qemuProcessReadLog: Read log file of a qemu VM
2030  * @logCtxt: the domain log context
2031  * @msg: pointer to buffer to store the read messages in
2032  * @max: maximum length of the message returned in @msg
2033  *
2034  * Reads log of a qemu VM. Skips messages not produced by qemu or irrelevant
2035  * messages. If @max is not zero, @msg will contain at most @max characters
2036  * from the end of the log and @msg will start after a new line if possible.
2037  *
2038  * Returns 0 on success or -1 on error
2039  */
2040 static int
qemuProcessReadLog(qemuDomainLogContext * logCtxt,char ** msg,size_t max)2041 qemuProcessReadLog(qemuDomainLogContext *logCtxt,
2042                    char **msg,
2043                    size_t max)
2044 {
2045     char *buf;
2046     ssize_t got;
2047     char *eol;
2048     char *filter_next;
2049     size_t skip;
2050 
2051     if ((got = qemuDomainLogContextRead(logCtxt, &buf)) < 0)
2052         return -1;
2053 
2054     /* Filter out debug messages from intermediate libvirt process */
2055     filter_next = buf;
2056     while ((eol = strchr(filter_next, '\n'))) {
2057         *eol = '\0';
2058         if (virLogProbablyLogMessage(filter_next) ||
2059             strstr(filter_next, "char device redirected to")) {
2060             skip = (eol + 1) - filter_next;
2061             memmove(filter_next, eol + 1, buf + got - eol);
2062             got -= skip;
2063         } else {
2064             filter_next = eol + 1;
2065             *eol = '\n';
2066         }
2067     }
2068 
2069     if (got > 0 &&
2070         buf[got - 1] == '\n') {
2071         buf[got - 1] = '\0';
2072         got--;
2073     }
2074 
2075     if (max > 0 && got > max) {
2076         skip = got - max;
2077 
2078         if (buf[skip - 1] != '\n' &&
2079             (eol = strchr(buf + skip, '\n')) &&
2080             !virStringIsEmpty(eol + 1))
2081             skip = eol + 1 - buf;
2082 
2083         memmove(buf, buf + skip, got - skip + 1);
2084         got -= skip;
2085     }
2086 
2087     buf = g_renew(char, buf, got + 1);
2088     *msg = buf;
2089     return 0;
2090 }
2091 
2092 
2093 static int
qemuProcessReportLogError(qemuDomainLogContext * logCtxt,const char * msgprefix)2094 qemuProcessReportLogError(qemuDomainLogContext *logCtxt,
2095                           const char *msgprefix)
2096 {
2097     g_autofree char *logmsg = NULL;
2098 
2099     /* assume that 1024 chars of qemu log is the right balance */
2100     if (qemuProcessReadLog(logCtxt, &logmsg, 1024) < 0)
2101         return -1;
2102 
2103     virResetLastError();
2104     if (virStringIsEmpty(logmsg))
2105         virReportError(VIR_ERR_INTERNAL_ERROR, "%s", msgprefix);
2106     else
2107         virReportError(VIR_ERR_INTERNAL_ERROR, "%s: %s", /* _( silence sc_libvirt_unmarked_diagnostics */
2108                        msgprefix, logmsg);
2109 
2110     return 0;
2111 }
2112 
2113 
2114 static void
qemuProcessMonitorReportLogError(qemuMonitor * mon G_GNUC_UNUSED,const char * msg,void * opaque)2115 qemuProcessMonitorReportLogError(qemuMonitor *mon G_GNUC_UNUSED,
2116                                  const char *msg,
2117                                  void *opaque)
2118 {
2119     qemuDomainLogContext *logCtxt = opaque;
2120     qemuProcessReportLogError(logCtxt, msg);
2121 }
2122 
2123 
2124 static int
qemuProcessLookupPTYs(virDomainChrDef ** devices,int count,GHashTable * info)2125 qemuProcessLookupPTYs(virDomainChrDef **devices,
2126                       int count,
2127                       GHashTable *info)
2128 {
2129     size_t i;
2130 
2131     for (i = 0; i < count; i++) {
2132         g_autofree char *id = NULL;
2133         virDomainChrDef *chr = devices[i];
2134         if (chr->source->type == VIR_DOMAIN_CHR_TYPE_PTY) {
2135             qemuMonitorChardevInfo *entry;
2136 
2137             id = g_strdup_printf("char%s", chr->info.alias);
2138 
2139             entry = virHashLookup(info, id);
2140             if (!entry || !entry->ptyPath) {
2141                 if (chr->source->data.file.path == NULL) {
2142                     /* neither the log output nor 'info chardev' had a
2143                      * pty path for this chardev, report an error
2144                      */
2145                     virReportError(VIR_ERR_INTERNAL_ERROR,
2146                                    _("no assigned pty for device %s"), id);
2147                     return -1;
2148                 } else {
2149                     /* 'info chardev' had no pty path for this chardev,
2150                      * but the log output had, so we're fine
2151                      */
2152                     continue;
2153                 }
2154             }
2155 
2156             g_free(chr->source->data.file.path);
2157             chr->source->data.file.path = g_strdup(entry->ptyPath);
2158         }
2159     }
2160 
2161     return 0;
2162 }
2163 
2164 static int
qemuProcessFindCharDevicePTYsMonitor(virDomainObj * vm,GHashTable * info)2165 qemuProcessFindCharDevicePTYsMonitor(virDomainObj *vm,
2166                                      GHashTable *info)
2167 {
2168     size_t i = 0;
2169 
2170     if (qemuProcessLookupPTYs(vm->def->serials, vm->def->nserials, info) < 0)
2171         return -1;
2172 
2173     if (qemuProcessLookupPTYs(vm->def->parallels, vm->def->nparallels,
2174                               info) < 0)
2175         return -1;
2176 
2177     if (qemuProcessLookupPTYs(vm->def->channels, vm->def->nchannels, info) < 0)
2178         return -1;
2179     /* For historical reasons, console[0] can be just an alias
2180      * for serial[0]. That's why we need to update it as well. */
2181     if (vm->def->nconsoles) {
2182         virDomainChrDef *chr = vm->def->consoles[0];
2183 
2184         if (vm->def->nserials &&
2185             chr->deviceType == VIR_DOMAIN_CHR_DEVICE_TYPE_CONSOLE &&
2186             chr->targetType == VIR_DOMAIN_CHR_CONSOLE_TARGET_TYPE_SERIAL) {
2187             /* yes, the first console is just an alias for serials[0] */
2188             i = 1;
2189             if (virDomainChrSourceDefCopy(chr->source,
2190                                           ((vm->def->serials[0])->source)) < 0)
2191                 return -1;
2192         }
2193     }
2194 
2195     if (qemuProcessLookupPTYs(vm->def->consoles + i, vm->def->nconsoles - i,
2196                               info) < 0)
2197         return -1;
2198 
2199     return 0;
2200 }
2201 
2202 
2203 static void
qemuProcessRefreshChannelVirtioState(virQEMUDriver * driver,virDomainObj * vm,GHashTable * info,int booted)2204 qemuProcessRefreshChannelVirtioState(virQEMUDriver *driver,
2205                                      virDomainObj *vm,
2206                                      GHashTable *info,
2207                                      int booted)
2208 {
2209     size_t i;
2210     int agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_CHANNEL;
2211     qemuMonitorChardevInfo *entry;
2212     virObjectEvent *event = NULL;
2213     g_autofree char *id = NULL;
2214 
2215     if (booted)
2216         agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_DOMAIN_STARTED;
2217 
2218     for (i = 0; i < vm->def->nchannels; i++) {
2219         virDomainChrDef *chr = vm->def->channels[i];
2220         if (chr->targetType == VIR_DOMAIN_CHR_CHANNEL_TARGET_TYPE_VIRTIO) {
2221 
2222             VIR_FREE(id);
2223             id = g_strdup_printf("char%s", chr->info.alias);
2224 
2225             /* port state not reported */
2226             if (!(entry = virHashLookup(info, id)) ||
2227                 !entry->state)
2228                 continue;
2229 
2230             if (entry->state != VIR_DOMAIN_CHR_DEVICE_STATE_DEFAULT &&
2231                 STREQ_NULLABLE(chr->target.name, "org.qemu.guest_agent.0") &&
2232                 (event = virDomainEventAgentLifecycleNewFromObj(vm, entry->state,
2233                                                                 agentReason)))
2234                 virObjectEventStateQueue(driver->domainEventState, event);
2235 
2236             chr->state = entry->state;
2237         }
2238     }
2239 }
2240 
2241 
2242 int
qemuRefreshVirtioChannelState(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)2243 qemuRefreshVirtioChannelState(virQEMUDriver *driver,
2244                               virDomainObj *vm,
2245                               qemuDomainAsyncJob asyncJob)
2246 {
2247     qemuDomainObjPrivate *priv = vm->privateData;
2248     GHashTable *info = NULL;
2249     int ret = -1;
2250 
2251     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2252         goto cleanup;
2253 
2254     ret = qemuMonitorGetChardevInfo(priv->mon, &info);
2255     if (qemuDomainObjExitMonitor(driver, vm) < 0)
2256         ret = -1;
2257 
2258     if (ret < 0)
2259         goto cleanup;
2260 
2261     qemuProcessRefreshChannelVirtioState(driver, vm, info, false);
2262     ret = 0;
2263 
2264  cleanup:
2265     virHashFree(info);
2266     return ret;
2267 }
2268 
2269 
2270 static int
qemuProcessRefreshPRManagerState(virDomainObj * vm,GHashTable * info)2271 qemuProcessRefreshPRManagerState(virDomainObj *vm,
2272                                  GHashTable *info)
2273 {
2274     qemuDomainObjPrivate *priv = vm->privateData;
2275     qemuMonitorPRManagerInfo *prManagerInfo;
2276     const char *managedAlias = qemuDomainGetManagedPRAlias();
2277 
2278     if (!(prManagerInfo = virHashLookup(info, managedAlias))) {
2279         virReportError(VIR_ERR_OPERATION_FAILED,
2280                        _("missing info on pr-manager %s"),
2281                        managedAlias);
2282         return -1;
2283     }
2284 
2285     priv->prDaemonRunning = prManagerInfo->connected;
2286 
2287     if (!priv->prDaemonRunning &&
2288         qemuProcessStartManagedPRDaemon(vm) < 0)
2289         return -1;
2290 
2291     return 0;
2292 }
2293 
2294 
2295 static int
qemuRefreshPRManagerState(virQEMUDriver * driver,virDomainObj * vm)2296 qemuRefreshPRManagerState(virQEMUDriver *driver,
2297                           virDomainObj *vm)
2298 {
2299     qemuDomainObjPrivate *priv = vm->privateData;
2300     GHashTable *info = NULL;
2301     int ret = -1;
2302 
2303     if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PR_MANAGER_HELPER) ||
2304         !qemuDomainDefHasManagedPR(vm))
2305         return 0;
2306 
2307     qemuDomainObjEnterMonitor(driver, vm);
2308     ret = qemuMonitorGetPRManagerInfo(priv->mon, &info);
2309     if (qemuDomainObjExitMonitor(driver, vm) < 0)
2310         ret = -1;
2311 
2312     if (ret < 0)
2313         goto cleanup;
2314 
2315     ret = qemuProcessRefreshPRManagerState(vm, info);
2316 
2317  cleanup:
2318     virHashFree(info);
2319     return ret;
2320 }
2321 
2322 
2323 static void
qemuRefreshRTC(virQEMUDriver * driver,virDomainObj * vm)2324 qemuRefreshRTC(virQEMUDriver *driver,
2325                virDomainObj *vm)
2326 {
2327     qemuDomainObjPrivate *priv = vm->privateData;
2328     time_t now, then;
2329     struct tm thenbits;
2330     long localOffset;
2331     int rv;
2332 
2333     if (vm->def->clock.offset != VIR_DOMAIN_CLOCK_OFFSET_VARIABLE)
2334         return;
2335 
2336     memset(&thenbits, 0, sizeof(thenbits));
2337     qemuDomainObjEnterMonitor(driver, vm);
2338     now = time(NULL);
2339     rv = qemuMonitorGetRTCTime(priv->mon, &thenbits);
2340     if (qemuDomainObjExitMonitor(driver, vm) < 0)
2341         rv = -1;
2342 
2343     if (rv < 0)
2344         return;
2345 
2346     thenbits.tm_isdst = -1;
2347     if ((then = mktime(&thenbits)) == (time_t)-1) {
2348         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2349                        _("Unable to convert time"));
2350         return;
2351     }
2352 
2353     /* Thing is, @now is in local TZ but @then in UTC. */
2354     if (virTimeLocalOffsetFromUTC(&localOffset) < 0)
2355         return;
2356 
2357     vm->def->clock.data.variable.adjustment = then - now + localOffset;
2358 }
2359 
2360 int
qemuProcessRefreshBalloonState(virQEMUDriver * driver,virDomainObj * vm,int asyncJob)2361 qemuProcessRefreshBalloonState(virQEMUDriver *driver,
2362                                virDomainObj *vm,
2363                                int asyncJob)
2364 {
2365     unsigned long long balloon;
2366     size_t i;
2367     int rc;
2368 
2369     /* if no ballooning is available, the current size equals to the current
2370      * full memory size */
2371     if (!virDomainDefHasMemballoon(vm->def)) {
2372         vm->def->mem.cur_balloon = virDomainDefGetMemoryTotal(vm->def);
2373         return 0;
2374     }
2375 
2376     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2377         return -1;
2378 
2379     rc = qemuMonitorGetBalloonInfo(qemuDomainGetMonitor(vm), &balloon);
2380     if (qemuDomainObjExitMonitor(driver, vm) < 0 || rc < 0)
2381         return -1;
2382 
2383     /* We want the balloon size stored in domain definition to
2384      * account for the actual size of virtio-mem too. But the
2385      * balloon size as reported by QEMU (@balloon) contains just
2386      * the balloon size without any virtio-mem. Do a wee bit of
2387      * math to fix it. */
2388     VIR_DEBUG("balloon size before fix is %lld", balloon);
2389     for (i = 0; i < vm->def->nmems; i++) {
2390         if (vm->def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM)
2391             balloon += vm->def->mems[i]->currentsize;
2392     }
2393     VIR_DEBUG("Updating balloon from %lld to %lld kb",
2394               vm->def->mem.cur_balloon, balloon);
2395     vm->def->mem.cur_balloon = balloon;
2396 
2397     return 0;
2398 }
2399 
2400 
2401 static int
qemuProcessWaitForMonitor(virQEMUDriver * driver,virDomainObj * vm,int asyncJob,qemuDomainLogContext * logCtxt)2402 qemuProcessWaitForMonitor(virQEMUDriver *driver,
2403                           virDomainObj *vm,
2404                           int asyncJob,
2405                           qemuDomainLogContext *logCtxt)
2406 {
2407     int ret = -1;
2408     GHashTable *info = NULL;
2409     qemuDomainObjPrivate *priv = vm->privateData;
2410     bool retry = true;
2411 
2412     if (priv->qemuCaps &&
2413         virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CHARDEV_FD_PASS_COMMANDLINE))
2414         retry = false;
2415 
2416     VIR_DEBUG("Connect monitor to vm=%p name='%s' retry=%d",
2417               vm, vm->def->name, retry);
2418 
2419     if (qemuConnectMonitor(driver, vm, asyncJob, retry, logCtxt) < 0)
2420         goto cleanup;
2421 
2422     /* Try to get the pty path mappings again via the monitor. This is much more
2423      * reliable if it's available.
2424      * Note that the monitor itself can be on a pty, so we still need to try the
2425      * log output method. */
2426     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2427         goto cleanup;
2428     ret = qemuMonitorGetChardevInfo(priv->mon, &info);
2429     VIR_DEBUG("qemuMonitorGetChardevInfo returned %i", ret);
2430     if (qemuDomainObjExitMonitor(driver, vm) < 0)
2431         ret = -1;
2432 
2433     if (ret == 0) {
2434         if ((ret = qemuProcessFindCharDevicePTYsMonitor(vm, info)) < 0)
2435             goto cleanup;
2436 
2437          qemuProcessRefreshChannelVirtioState(driver, vm, info, true);
2438     }
2439 
2440  cleanup:
2441     virHashFree(info);
2442 
2443     if (logCtxt && kill(vm->pid, 0) == -1 && errno == ESRCH) {
2444         qemuProcessReportLogError(logCtxt,
2445                                   _("process exited while connecting to monitor"));
2446         ret = -1;
2447     }
2448 
2449     return ret;
2450 }
2451 
2452 
2453 static int
qemuProcessDetectIOThreadPIDs(virQEMUDriver * driver,virDomainObj * vm,int asyncJob)2454 qemuProcessDetectIOThreadPIDs(virQEMUDriver *driver,
2455                               virDomainObj *vm,
2456                               int asyncJob)
2457 {
2458     qemuDomainObjPrivate *priv = vm->privateData;
2459     qemuMonitorIOThreadInfo **iothreads = NULL;
2460     int niothreads = 0;
2461     int ret = -1;
2462     size_t i;
2463 
2464     if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_OBJECT_IOTHREAD)) {
2465         ret = 0;
2466         goto cleanup;
2467     }
2468 
2469     /* Get the list of IOThreads from qemu */
2470     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2471         goto cleanup;
2472     ret = qemuMonitorGetIOThreads(priv->mon, &iothreads, &niothreads);
2473     if (qemuDomainObjExitMonitor(driver, vm) < 0)
2474         goto cleanup;
2475     if (ret < 0)
2476         goto cleanup;
2477 
2478     if (niothreads != vm->def->niothreadids) {
2479         virReportError(VIR_ERR_INTERNAL_ERROR,
2480                        _("got wrong number of IOThread pids from QEMU monitor. "
2481                          "got %d, wanted %zu"),
2482                        niothreads, vm->def->niothreadids);
2483         goto cleanup;
2484     }
2485 
2486     /* Nothing to do */
2487     if (niothreads == 0) {
2488         ret = 0;
2489         goto cleanup;
2490     }
2491 
2492     for (i = 0; i < niothreads; i++) {
2493         virDomainIOThreadIDDef *iothrid;
2494 
2495         if (!(iothrid = virDomainIOThreadIDFind(vm->def,
2496                                                 iothreads[i]->iothread_id))) {
2497             virReportError(VIR_ERR_INTERNAL_ERROR,
2498                            _("iothread %d not found"),
2499                            iothreads[i]->iothread_id);
2500             goto cleanup;
2501         }
2502         iothrid->thread_id = iothreads[i]->thread_id;
2503     }
2504 
2505     ret = 0;
2506 
2507  cleanup:
2508     if (iothreads) {
2509         for (i = 0; i < niothreads; i++)
2510             VIR_FREE(iothreads[i]);
2511         VIR_FREE(iothreads);
2512     }
2513     return ret;
2514 }
2515 
2516 
2517 static int
qemuProcessGetAllCpuAffinity(virBitmap ** cpumapRet)2518 qemuProcessGetAllCpuAffinity(virBitmap **cpumapRet)
2519 {
2520     *cpumapRet = NULL;
2521 
2522     if (!virHostCPUHasBitmap())
2523         return 0;
2524 
2525     if (!(*cpumapRet = virHostCPUGetOnlineBitmap()))
2526         return -1;
2527 
2528     return 0;
2529 }
2530 
2531 
2532 /*
2533  * To be run between fork/exec of QEMU only
2534  */
2535 #if defined(WITH_SCHED_GETAFFINITY) || defined(WITH_BSD_CPU_AFFINITY)
2536 static int
qemuProcessInitCpuAffinity(virDomainObj * vm)2537 qemuProcessInitCpuAffinity(virDomainObj *vm)
2538 {
2539     bool settingAll = false;
2540     g_autoptr(virBitmap) cpumapToSet = NULL;
2541     virDomainNumatuneMemMode mem_mode;
2542     qemuDomainObjPrivate *priv = vm->privateData;
2543 
2544     if (!vm->pid) {
2545         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2546                        _("Cannot setup CPU affinity until process is started"));
2547         return -1;
2548     }
2549 
2550     /* Here is the deal, we can't set cpuset.mems before qemu is
2551      * started as it clashes with KVM allocation. Therefore, we
2552      * used to let qemu allocate its memory anywhere as we would
2553      * then move the memory to desired NUMA node via CGroups.
2554      * However, that might not be always possible because qemu
2555      * might lock some parts of its memory (e.g. due to VFIO).
2556      * Even if it possible, memory has to be copied between NUMA
2557      * nodes which is suboptimal.
2558      * Solution is to set affinity that matches the best what we
2559      * would have set in CGroups and then fix it later, once qemu
2560      * is already running. */
2561     if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 &&
2562         virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
2563         mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
2564         virBitmap *nodeset = NULL;
2565 
2566         if (virDomainNumatuneMaybeGetNodeset(vm->def->numa,
2567                                              priv->autoNodeset,
2568                                              &nodeset,
2569                                              -1) < 0)
2570             return -1;
2571 
2572         if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0)
2573             return -1;
2574     } else if (vm->def->cputune.emulatorpin) {
2575         cpumapToSet = virBitmapNewCopy(vm->def->cputune.emulatorpin);
2576     } else {
2577         settingAll = true;
2578         if (qemuProcessGetAllCpuAffinity(&cpumapToSet) < 0)
2579             return -1;
2580     }
2581 
2582     /*
2583      * We only want to error out if we failed to set the affinity to
2584      * user-requested mapping.  If we are just trying to reset the affinity
2585      * to all CPUs and this fails it can only be an issue if:
2586      *  1) libvirtd does not have CAP_SYS_NICE
2587      *  2) libvirtd does not run on all CPUs
2588      *
2589      * This scenario can easily occur when libvirtd is run inside a
2590      * container with restrictive permissions and CPU pinning.
2591      *
2592      * See also: https://bugzilla.redhat.com/1819801#c2
2593      */
2594     if (cpumapToSet &&
2595         virProcessSetAffinity(vm->pid, cpumapToSet, settingAll) < 0) {
2596         return -1;
2597     }
2598 
2599     return 0;
2600 }
2601 #else /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */
2602 static int
qemuProcessInitCpuAffinity(virDomainObj * vm G_GNUC_UNUSED)2603 qemuProcessInitCpuAffinity(virDomainObj *vm G_GNUC_UNUSED)
2604 {
2605     return 0;
2606 }
2607 #endif /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */
2608 
2609 /* set link states to down on interfaces at qemu start */
2610 static int
qemuProcessSetLinkStates(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)2611 qemuProcessSetLinkStates(virQEMUDriver *driver,
2612                          virDomainObj *vm,
2613                          qemuDomainAsyncJob asyncJob)
2614 {
2615     qemuDomainObjPrivate *priv = vm->privateData;
2616     virDomainDef *def = vm->def;
2617     size_t i;
2618     int ret = -1;
2619     int rv;
2620 
2621     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2622         return -1;
2623 
2624     for (i = 0; i < def->nnets; i++) {
2625         if (def->nets[i]->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN) {
2626             if (!def->nets[i]->info.alias) {
2627                 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2628                                _("missing alias for network device"));
2629                 goto cleanup;
2630             }
2631 
2632             VIR_DEBUG("Setting link state: %s", def->nets[i]->info.alias);
2633 
2634             rv = qemuMonitorSetLink(priv->mon,
2635                                     def->nets[i]->info.alias,
2636                                     VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN);
2637             if (rv < 0) {
2638                 virReportError(VIR_ERR_OPERATION_FAILED,
2639                                _("Couldn't set link state on interface: %s"),
2640                                def->nets[i]->info.alias);
2641                 goto cleanup;
2642             }
2643         }
2644     }
2645 
2646     ret = 0;
2647 
2648  cleanup:
2649     if (qemuDomainObjExitMonitor(driver, vm) < 0)
2650         ret = -1;
2651     return ret;
2652 }
2653 
2654 
2655 /**
2656  * qemuProcessSetupPid:
2657  *
2658  * This function sets resource properties (affinity, cgroups,
2659  * scheduler) for any PID associated with a domain.  It should be used
2660  * to set up emulator PIDs as well as vCPU and I/O thread pids to
2661  * ensure they are all handled the same way.
2662  *
2663  * Returns 0 on success, -1 on error.
2664  */
2665 static int
qemuProcessSetupPid(virDomainObj * vm,pid_t pid,virCgroupThreadName nameval,int id,virBitmap * cpumask,unsigned long long period,long long quota,virDomainThreadSchedParam * sched)2666 qemuProcessSetupPid(virDomainObj *vm,
2667                     pid_t pid,
2668                     virCgroupThreadName nameval,
2669                     int id,
2670                     virBitmap *cpumask,
2671                     unsigned long long period,
2672                     long long quota,
2673                     virDomainThreadSchedParam *sched)
2674 {
2675     qemuDomainObjPrivate *priv = vm->privateData;
2676     virDomainNumatuneMemMode mem_mode;
2677     virCgroup *cgroup = NULL;
2678     virBitmap *use_cpumask = NULL;
2679     virBitmap *affinity_cpumask = NULL;
2680     g_autoptr(virBitmap) hostcpumap = NULL;
2681     g_autofree char *mem_mask = NULL;
2682     int ret = -1;
2683     size_t i;
2684 
2685     if ((period || quota) &&
2686         !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
2687         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
2688                        _("cgroup cpu is required for scheduler tuning"));
2689         goto cleanup;
2690     }
2691 
2692     /* Infer which cpumask shall be used. */
2693     if (cpumask) {
2694         use_cpumask = cpumask;
2695     } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
2696         use_cpumask = priv->autoCpuset;
2697     } else if (vm->def->cpumask) {
2698         use_cpumask = vm->def->cpumask;
2699     } else {
2700         /* You may think this is redundant, but we can't assume libvirtd
2701          * itself is running on all pCPUs, so we need to explicitly set
2702          * the spawned QEMU instance to all pCPUs if no map is given in
2703          * its config file */
2704         if (qemuProcessGetAllCpuAffinity(&hostcpumap) < 0)
2705             goto cleanup;
2706         affinity_cpumask = hostcpumap;
2707     }
2708 
2709     /*
2710      * If CPU cgroup controller is not initialized here, then we need
2711      * neither period nor quota settings.  And if CPUSET controller is
2712      * not initialized either, then there's nothing to do anyway.
2713      */
2714     if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) ||
2715         virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
2716 
2717         if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
2718             mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
2719             virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
2720                                                 priv->autoNodeset,
2721                                                 &mem_mask, -1) < 0)
2722             goto cleanup;
2723 
2724         /* For restrictive numatune mode we need to set cpuset.mems for vCPU
2725          * threads based on the node they are in as there is nothing else uses
2726          * for such restriction (e.g. numa_set_membind). */
2727         if (nameval == VIR_CGROUP_THREAD_VCPU) {
2728             virDomainNuma *numatune = vm->def->numa;
2729 
2730             /* Look for the guest NUMA node of this vCPU */
2731             for (i = 0; i < virDomainNumaGetNodeCount(numatune); i++) {
2732                 virBitmap *node_cpus = virDomainNumaGetNodeCpumask(numatune, i);
2733 
2734                 if (!virBitmapIsBitSet(node_cpus, id))
2735                     continue;
2736 
2737                 /* Update the mem_mask for this vCPU if the mode of its node is
2738                  * 'restrictive'. */
2739                 if (virDomainNumatuneGetMode(numatune, i, &mem_mode) == 0 &&
2740                     mem_mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) {
2741                     VIR_FREE(mem_mask);
2742 
2743                     if (virDomainNumatuneMaybeFormatNodeset(numatune,
2744                                                             priv->autoNodeset,
2745                                                             &mem_mask, i) < 0) {
2746                         goto cleanup;
2747                     }
2748                 }
2749 
2750                 break;
2751             }
2752         }
2753 
2754         if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0)
2755             goto cleanup;
2756 
2757         if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
2758             if (use_cpumask &&
2759                 qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
2760                 goto cleanup;
2761 
2762             if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
2763                 goto cleanup;
2764 
2765         }
2766 
2767         if ((period || quota) &&
2768             qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
2769             goto cleanup;
2770 
2771         /* Move the thread to the sub dir */
2772         if (virCgroupAddThread(cgroup, pid) < 0)
2773             goto cleanup;
2774 
2775     }
2776 
2777     if (!affinity_cpumask)
2778         affinity_cpumask = use_cpumask;
2779 
2780     /* Setup legacy affinity.
2781      *
2782      * We only want to error out if we failed to set the affinity to
2783      * user-requested mapping.  If we are just trying to reset the affinity
2784      * to all CPUs and this fails it can only be an issue if:
2785      *  1) libvirtd does not have CAP_SYS_NICE
2786      *  2) libvirtd does not run on all CPUs
2787      *
2788      * This scenario can easily occur when libvirtd is run inside a
2789      * container with restrictive permissions and CPU pinning.
2790      *
2791      * See also: https://bugzilla.redhat.com/1819801#c2
2792      */
2793     if (affinity_cpumask &&
2794         virProcessSetAffinity(pid, affinity_cpumask,
2795                               affinity_cpumask == hostcpumap) < 0) {
2796         goto cleanup;
2797     }
2798 
2799     /* Set scheduler type and priority, but not for the main thread. */
2800     if (sched &&
2801         nameval != VIR_CGROUP_THREAD_EMULATOR &&
2802         virProcessSetScheduler(pid, sched->policy, sched->priority) < 0)
2803         goto cleanup;
2804 
2805     ret = 0;
2806  cleanup:
2807     if (cgroup) {
2808         if (ret < 0)
2809             virCgroupRemove(cgroup);
2810         virCgroupFree(cgroup);
2811     }
2812 
2813     return ret;
2814 }
2815 
2816 
2817 static int
qemuProcessSetupEmulator(virDomainObj * vm)2818 qemuProcessSetupEmulator(virDomainObj *vm)
2819 {
2820     return qemuProcessSetupPid(vm, vm->pid, VIR_CGROUP_THREAD_EMULATOR,
2821                                0, vm->def->cputune.emulatorpin,
2822                                vm->def->cputune.emulator_period,
2823                                vm->def->cputune.emulator_quota,
2824                                vm->def->cputune.emulatorsched);
2825 }
2826 
2827 
2828 static int
qemuProcessResctrlCreate(virQEMUDriver * driver,virDomainObj * vm)2829 qemuProcessResctrlCreate(virQEMUDriver *driver,
2830                          virDomainObj *vm)
2831 {
2832     size_t i = 0;
2833     g_autoptr(virCaps) caps = NULL;
2834     qemuDomainObjPrivate *priv = vm->privateData;
2835 
2836     if (!vm->def->nresctrls)
2837         return 0;
2838 
2839     /* Force capability refresh since resctrl info can change
2840      * XXX: move cache info into virresctrl so caps are not needed */
2841     caps = virQEMUDriverGetCapabilities(driver, true);
2842     if (!caps)
2843         return -1;
2844 
2845     for (i = 0; i < vm->def->nresctrls; i++) {
2846         size_t j = 0;
2847         if (virResctrlAllocCreate(caps->host.resctrl,
2848                                   vm->def->resctrls[i]->alloc,
2849                                   priv->machineName) < 0)
2850             return -1;
2851 
2852         for (j = 0; j < vm->def->resctrls[i]->nmonitors; j++) {
2853             virDomainResctrlMonDef *mon = NULL;
2854 
2855             mon = vm->def->resctrls[i]->monitors[j];
2856             if (virResctrlMonitorCreate(mon->instance,
2857                                         priv->machineName) < 0)
2858                 return -1;
2859         }
2860     }
2861 
2862     return 0;
2863 }
2864 
2865 
2866 static char *
qemuProcessBuildPRHelperPidfilePathOld(virDomainObj * vm)2867 qemuProcessBuildPRHelperPidfilePathOld(virDomainObj *vm)
2868 {
2869     qemuDomainObjPrivate *priv = vm->privateData;
2870     const char *prdAlias = qemuDomainGetManagedPRAlias();
2871 
2872     return virPidFileBuildPath(priv->libDir, prdAlias);
2873 }
2874 
2875 
2876 static char *
qemuProcessBuildPRHelperPidfilePath(virDomainObj * vm)2877 qemuProcessBuildPRHelperPidfilePath(virDomainObj *vm)
2878 {
2879     qemuDomainObjPrivate *priv = vm->privateData;
2880     g_autofree char *domname = virDomainDefGetShortName(vm->def);
2881     g_autofree char *prdName = g_strdup_printf("%s-%s", domname, qemuDomainGetManagedPRAlias());
2882     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
2883 
2884     return virPidFileBuildPath(cfg->stateDir, prdName);
2885 }
2886 
2887 
2888 void
qemuProcessKillManagedPRDaemon(virDomainObj * vm)2889 qemuProcessKillManagedPRDaemon(virDomainObj *vm)
2890 {
2891     qemuDomainObjPrivate *priv = vm->privateData;
2892     virErrorPtr orig_err;
2893     g_autofree char *pidfile = NULL;
2894 
2895     if (!(pidfile = qemuProcessBuildPRHelperPidfilePath(vm))) {
2896         VIR_WARN("Unable to construct pr-helper pidfile path");
2897         return;
2898     }
2899 
2900     if (!virFileExists(pidfile)) {
2901         g_free(pidfile);
2902         if (!(pidfile = qemuProcessBuildPRHelperPidfilePathOld(vm))) {
2903             VIR_WARN("Unable to construct pr-helper pidfile path");
2904             return;
2905         }
2906     }
2907 
2908     virErrorPreserveLast(&orig_err);
2909     if (virPidFileForceCleanupPath(pidfile) < 0) {
2910         VIR_WARN("Unable to kill pr-helper process");
2911     } else {
2912         priv->prDaemonRunning = false;
2913     }
2914     virErrorRestore(&orig_err);
2915 }
2916 
2917 
2918 static int
qemuProcessStartPRDaemonHook(void * opaque)2919 qemuProcessStartPRDaemonHook(void *opaque)
2920 {
2921     virDomainObj *vm = opaque;
2922     size_t i, nfds = 0;
2923     g_autofree int *fds = NULL;
2924     int ret = -1;
2925 
2926     if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
2927         if (virProcessGetNamespaces(vm->pid, &nfds, &fds) < 0)
2928             return ret;
2929 
2930         if (nfds > 0 &&
2931             virProcessSetNamespaces(nfds, fds) < 0)
2932             goto cleanup;
2933     }
2934 
2935     ret = 0;
2936  cleanup:
2937     for (i = 0; i < nfds; i++)
2938         VIR_FORCE_CLOSE(fds[i]);
2939     return ret;
2940 }
2941 
2942 
2943 int
qemuProcessStartManagedPRDaemon(virDomainObj * vm)2944 qemuProcessStartManagedPRDaemon(virDomainObj *vm)
2945 {
2946     qemuDomainObjPrivate *priv = vm->privateData;
2947     virQEMUDriver *driver = priv->driver;
2948     g_autoptr(virQEMUDriverConfig) cfg = NULL;
2949     int errfd = -1;
2950     g_autofree char *pidfile = NULL;
2951     g_autofree char *socketPath = NULL;
2952     pid_t cpid = -1;
2953     g_autoptr(virCommand) cmd = NULL;
2954     virTimeBackOffVar timebackoff;
2955     const unsigned long long timeout = 500000; /* ms */
2956     int ret = -1;
2957 
2958     cfg = virQEMUDriverGetConfig(driver);
2959 
2960     if (!virFileIsExecutable(cfg->prHelperName)) {
2961         virReportSystemError(errno, _("'%s' is not a suitable pr helper"),
2962                              cfg->prHelperName);
2963         goto cleanup;
2964     }
2965 
2966     if (!(pidfile = qemuProcessBuildPRHelperPidfilePath(vm)))
2967         goto cleanup;
2968 
2969     if (!(socketPath = qemuDomainGetManagedPRSocketPath(priv)))
2970         goto cleanup;
2971 
2972     /* Remove stale socket */
2973     if (unlink(socketPath) < 0 &&
2974         errno != ENOENT) {
2975         virReportSystemError(errno,
2976                              _("Unable to remove stale socket path: %s"),
2977                              socketPath);
2978         goto cleanup;
2979     }
2980 
2981     if (!(cmd = virCommandNewArgList(cfg->prHelperName,
2982                                      "-k", socketPath,
2983                                      NULL)))
2984         goto cleanup;
2985 
2986     virCommandDaemonize(cmd);
2987     virCommandSetPidFile(cmd, pidfile);
2988     virCommandSetErrorFD(cmd, &errfd);
2989 
2990     /* Place the process into the same namespace and cgroup as
2991      * qemu (so that it shares the same view of the system). */
2992     virCommandSetPreExecHook(cmd, qemuProcessStartPRDaemonHook, vm);
2993 
2994     if (virCommandRun(cmd, NULL) < 0)
2995         goto cleanup;
2996 
2997     if (virPidFileReadPath(pidfile, &cpid) < 0) {
2998         virReportError(VIR_ERR_INTERNAL_ERROR,
2999                        _("pr helper %s didn't show up"),
3000                        cfg->prHelperName);
3001         goto cleanup;
3002     }
3003 
3004     if (virTimeBackOffStart(&timebackoff, 1, timeout) < 0)
3005         goto cleanup;
3006     while (virTimeBackOffWait(&timebackoff)) {
3007         char errbuf[1024] = { 0 };
3008 
3009         if (virFileExists(socketPath))
3010             break;
3011 
3012         if (virProcessKill(cpid, 0) == 0)
3013             continue;
3014 
3015         if (saferead(errfd, errbuf, sizeof(errbuf) - 1) < 0) {
3016             virReportSystemError(errno,
3017                                  _("pr helper %s died unexpectedly"),
3018                                  cfg->prHelperName);
3019         } else {
3020             virReportError(VIR_ERR_OPERATION_FAILED,
3021                            _("pr helper died and reported: %s"), errbuf);
3022         }
3023         goto cleanup;
3024     }
3025 
3026     if (!virFileExists(socketPath)) {
3027         virReportError(VIR_ERR_OPERATION_TIMEOUT, "%s",
3028                        _("pr helper socked did not show up"));
3029         goto cleanup;
3030     }
3031 
3032     if (priv->cgroup &&
3033         virCgroupAddMachineProcess(priv->cgroup, cpid) < 0)
3034         goto cleanup;
3035 
3036     if (qemuSecurityDomainSetPathLabel(driver, vm, socketPath, true) < 0)
3037         goto cleanup;
3038 
3039     priv->prDaemonRunning = true;
3040     ret = 0;
3041  cleanup:
3042     if (ret < 0) {
3043         virCommandAbort(cmd);
3044         if (cpid >= 0)
3045             virProcessKillPainfully(cpid, true);
3046         if (pidfile)
3047             unlink(pidfile);
3048     }
3049     VIR_FORCE_CLOSE(errfd);
3050     return ret;
3051 }
3052 
3053 
3054 static int
qemuProcessInitPasswords(virQEMUDriver * driver,virDomainObj * vm,int asyncJob)3055 qemuProcessInitPasswords(virQEMUDriver *driver,
3056                          virDomainObj *vm,
3057                          int asyncJob)
3058 {
3059     int ret = 0;
3060     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
3061     size_t i;
3062 
3063     for (i = 0; i < vm->def->ngraphics; ++i) {
3064         virDomainGraphicsDef *graphics = vm->def->graphics[i];
3065         if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
3066             ret = qemuDomainChangeGraphicsPasswords(driver, vm,
3067                                                     VIR_DOMAIN_GRAPHICS_TYPE_VNC,
3068                                                     &graphics->data.vnc.auth,
3069                                                     cfg->vncPassword,
3070                                                     asyncJob);
3071         } else if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
3072             ret = qemuDomainChangeGraphicsPasswords(driver, vm,
3073                                                     VIR_DOMAIN_GRAPHICS_TYPE_SPICE,
3074                                                     &graphics->data.spice.auth,
3075                                                     cfg->spicePassword,
3076                                                     asyncJob);
3077         }
3078 
3079         if (ret < 0)
3080             return ret;
3081     }
3082 
3083     return ret;
3084 }
3085 
3086 
3087 static int
qemuProcessPrepareChardevDevice(virDomainDef * def G_GNUC_UNUSED,virDomainChrDef * dev,void * opaque G_GNUC_UNUSED)3088 qemuProcessPrepareChardevDevice(virDomainDef *def G_GNUC_UNUSED,
3089                                 virDomainChrDef *dev,
3090                                 void *opaque G_GNUC_UNUSED)
3091 {
3092     int fd;
3093     if (dev->source->type != VIR_DOMAIN_CHR_TYPE_FILE)
3094         return 0;
3095 
3096     if ((fd = open(dev->source->data.file.path,
3097                    O_CREAT | O_APPEND, S_IRUSR|S_IWUSR)) < 0) {
3098         virReportSystemError(errno,
3099                              _("Unable to pre-create chardev file '%s'"),
3100                              dev->source->data.file.path);
3101         return -1;
3102     }
3103 
3104     VIR_FORCE_CLOSE(fd);
3105 
3106     return 0;
3107 }
3108 
3109 
3110 static int
qemuProcessCleanupChardevDevice(virDomainDef * def G_GNUC_UNUSED,virDomainChrDef * dev,void * opaque G_GNUC_UNUSED)3111 qemuProcessCleanupChardevDevice(virDomainDef *def G_GNUC_UNUSED,
3112                                 virDomainChrDef *dev,
3113                                 void *opaque G_GNUC_UNUSED)
3114 {
3115     if (dev->source->type == VIR_DOMAIN_CHR_TYPE_UNIX &&
3116         dev->source->data.nix.listen &&
3117         dev->source->data.nix.path)
3118         unlink(dev->source->data.nix.path);
3119 
3120     return 0;
3121 }
3122 
3123 
3124 /**
3125  * Loads and update video memory size for video devices according to QEMU
3126  * process as the QEMU will silently update the values that we pass to QEMU
3127  * through command line.  We need to load these updated values and store them
3128  * into the status XML.
3129  *
3130  * We will fail if for some reason the values cannot be loaded from QEMU because
3131  * its mandatory to get the correct video memory size to status XML to not break
3132  * migration.
3133  */
3134 static int
qemuProcessUpdateVideoRamSize(virQEMUDriver * driver,virDomainObj * vm,int asyncJob)3135 qemuProcessUpdateVideoRamSize(virQEMUDriver *driver,
3136                               virDomainObj *vm,
3137                               int asyncJob)
3138 {
3139     int ret = -1;
3140     ssize_t i;
3141     qemuDomainObjPrivate *priv = vm->privateData;
3142     virDomainVideoDef *video = NULL;
3143     g_autoptr(virQEMUDriverConfig) cfg = NULL;
3144 
3145     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
3146         return -1;
3147 
3148     for (i = 0; i < vm->def->nvideos; i++) {
3149         video = vm->def->videos[i];
3150 
3151         switch (video->type) {
3152         case VIR_DOMAIN_VIDEO_TYPE_VGA:
3153             if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VGA_VGAMEM)) {
3154                 if (qemuMonitorUpdateVideoMemorySize(priv->mon, video, "VGA") < 0)
3155                     goto error;
3156             }
3157             break;
3158         case VIR_DOMAIN_VIDEO_TYPE_QXL:
3159             if (i == 0) {
3160                 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGAMEM) &&
3161                     qemuMonitorUpdateVideoMemorySize(priv->mon, video,
3162                                                      "qxl-vga") < 0)
3163                         goto error;
3164 
3165                 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VRAM64) &&
3166                     qemuMonitorUpdateVideoVram64Size(priv->mon, video,
3167                                                      "qxl-vga") < 0)
3168                     goto error;
3169             } else {
3170                 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGAMEM) &&
3171                     qemuMonitorUpdateVideoMemorySize(priv->mon, video,
3172                                                      "qxl") < 0)
3173                         goto error;
3174 
3175                 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VRAM64) &&
3176                     qemuMonitorUpdateVideoVram64Size(priv->mon, video,
3177                                                      "qxl") < 0)
3178                         goto error;
3179             }
3180             break;
3181         case VIR_DOMAIN_VIDEO_TYPE_VMVGA:
3182             if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VMWARE_SVGA_VGAMEM)) {
3183                 if (qemuMonitorUpdateVideoMemorySize(priv->mon, video,
3184                                                      "vmware-svga") < 0)
3185                     goto error;
3186             }
3187             break;
3188         case VIR_DOMAIN_VIDEO_TYPE_CIRRUS:
3189         case VIR_DOMAIN_VIDEO_TYPE_XEN:
3190         case VIR_DOMAIN_VIDEO_TYPE_VBOX:
3191         case VIR_DOMAIN_VIDEO_TYPE_LAST:
3192             break;
3193         }
3194 
3195     }
3196 
3197     if (qemuDomainObjExitMonitor(driver, vm) < 0)
3198         return -1;
3199 
3200     cfg = virQEMUDriverGetConfig(driver);
3201     ret = virDomainObjSave(vm, driver->xmlopt, cfg->stateDir);
3202 
3203     return ret;
3204 
3205  error:
3206     ignore_value(qemuDomainObjExitMonitor(driver, vm));
3207     return -1;
3208 }
3209 
3210 
3211 struct qemuProcessHookData {
3212     virDomainObj *vm;
3213     virQEMUDriver *driver;
3214     virQEMUDriverConfig *cfg;
3215 };
3216 
qemuProcessHook(void * data)3217 static int qemuProcessHook(void *data)
3218 {
3219     struct qemuProcessHookData *h = data;
3220     qemuDomainObjPrivate *priv = h->vm->privateData;
3221     int ret = -1;
3222     int fd;
3223     virBitmap *nodeset = NULL;
3224     virDomainNumatuneMemMode mode;
3225 
3226     /* This method cannot use any mutexes, which are not
3227      * protected across fork()
3228      */
3229 
3230     qemuSecurityPostFork(h->driver->securityManager);
3231 
3232     /* Some later calls want pid present */
3233     h->vm->pid = getpid();
3234 
3235     VIR_DEBUG("Obtaining domain lock");
3236     /*
3237      * Since we're going to leak the returned FD to QEMU,
3238      * we need to make sure it gets a sensible label.
3239      * This mildly sucks, because there could be other
3240      * sockets the lock driver opens that we don't want
3241      * labelled. So far we're ok though.
3242      */
3243     if (qemuSecuritySetSocketLabel(h->driver->securityManager, h->vm->def) < 0)
3244         goto cleanup;
3245     if (virDomainLockProcessStart(h->driver->lockManager,
3246                                   h->cfg->uri,
3247                                   h->vm,
3248                                   /* QEMU is always paused initially */
3249                                   true,
3250                                   &fd) < 0)
3251         goto cleanup;
3252     if (qemuSecurityClearSocketLabel(h->driver->securityManager, h->vm->def) < 0)
3253         goto cleanup;
3254 
3255     if (qemuDomainUnshareNamespace(h->cfg, h->driver->securityManager, h->vm) < 0)
3256         goto cleanup;
3257 
3258     if (virDomainNumatuneGetMode(h->vm->def->numa, -1, &mode) == 0) {
3259         if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
3260             h->cfg->cgroupControllers & (1 << VIR_CGROUP_CONTROLLER_CPUSET) &&
3261             virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) {
3262             /* Use virNuma* API iff necessary. Once set and child is exec()-ed,
3263              * there's no way for us to change it. Rely on cgroups (if available
3264              * and enabled in the config) rather than virNuma*. */
3265             VIR_DEBUG("Relying on CGroups for memory binding");
3266         } else {
3267             nodeset = virDomainNumatuneGetNodeset(h->vm->def->numa,
3268                                                   priv->autoNodeset, -1);
3269 
3270             if (virNumaSetupMemoryPolicy(mode, nodeset) < 0)
3271                 goto cleanup;
3272         }
3273     }
3274 
3275     ret = 0;
3276 
3277  cleanup:
3278     virObjectUnref(h->cfg);
3279     VIR_DEBUG("Hook complete ret=%d", ret);
3280     return ret;
3281 }
3282 
3283 int
qemuProcessPrepareMonitorChr(virDomainChrSourceDef * monConfig,const char * domainDir)3284 qemuProcessPrepareMonitorChr(virDomainChrSourceDef *monConfig,
3285                              const char *domainDir)
3286 {
3287     monConfig->type = VIR_DOMAIN_CHR_TYPE_UNIX;
3288     monConfig->data.nix.listen = true;
3289 
3290     monConfig->data.nix.path = g_strdup_printf("%s/monitor.sock", domainDir);
3291     return 0;
3292 }
3293 
3294 
3295 /*
3296  * Precondition: vm must be locked, and a job must be active.
3297  * This method will call {Enter,Exit}Monitor
3298  */
3299 int
qemuProcessStartCPUs(virQEMUDriver * driver,virDomainObj * vm,virDomainRunningReason reason,qemuDomainAsyncJob asyncJob)3300 qemuProcessStartCPUs(virQEMUDriver *driver, virDomainObj *vm,
3301                      virDomainRunningReason reason,
3302                      qemuDomainAsyncJob asyncJob)
3303 {
3304     int ret = -1;
3305     qemuDomainObjPrivate *priv = vm->privateData;
3306     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
3307 
3308     /* Bring up netdevs before starting CPUs */
3309     if (qemuInterfaceStartDevices(vm->def) < 0)
3310        return -1;
3311 
3312     VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState));
3313     if (virDomainLockProcessResume(driver->lockManager, cfg->uri,
3314                                    vm, priv->lockState) < 0) {
3315         /* Don't free priv->lockState on error, because we need
3316          * to make sure we have state still present if the user
3317          * tries to resume again
3318          */
3319         return -1;
3320     }
3321     VIR_FREE(priv->lockState);
3322 
3323     priv->runningReason = reason;
3324 
3325     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
3326         goto release;
3327 
3328     ret = qemuMonitorStartCPUs(priv->mon);
3329     if (qemuDomainObjExitMonitor(driver, vm) < 0)
3330         ret = -1;
3331 
3332     if (ret < 0)
3333         goto release;
3334 
3335     /* The RESUME event handler will change the domain state with the reason
3336      * saved in priv->runningReason and it will also emit corresponding domain
3337      * lifecycle event.
3338      */
3339 
3340     return ret;
3341 
3342  release:
3343     priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
3344     if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
3345         VIR_WARN("Unable to release lease on %s", vm->def->name);
3346     VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
3347     return ret;
3348 }
3349 
3350 
qemuProcessStopCPUs(virQEMUDriver * driver,virDomainObj * vm,virDomainPausedReason reason,qemuDomainAsyncJob asyncJob)3351 int qemuProcessStopCPUs(virQEMUDriver *driver,
3352                         virDomainObj *vm,
3353                         virDomainPausedReason reason,
3354                         qemuDomainAsyncJob asyncJob)
3355 {
3356     int ret = -1;
3357     qemuDomainObjPrivate *priv = vm->privateData;
3358 
3359     VIR_FREE(priv->lockState);
3360 
3361     priv->pausedReason = reason;
3362 
3363     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
3364         goto cleanup;
3365 
3366     ret = qemuMonitorStopCPUs(priv->mon);
3367     if (qemuDomainObjExitMonitor(driver, vm) < 0)
3368         ret = -1;
3369 
3370     if (ret < 0)
3371         goto cleanup;
3372 
3373     /* de-activate netdevs after stopping CPUs */
3374     ignore_value(qemuInterfaceStopDevices(vm->def));
3375 
3376     if (priv->job.current)
3377         ignore_value(virTimeMillisNow(&priv->job.current->stopped));
3378 
3379     /* The STOP event handler will change the domain state with the reason
3380      * saved in priv->pausedReason and it will also emit corresponding domain
3381      * lifecycle event.
3382      */
3383 
3384     if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
3385         VIR_WARN("Unable to release lease on %s", vm->def->name);
3386     VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
3387 
3388  cleanup:
3389     if (ret < 0)
3390         priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN;
3391 
3392     return ret;
3393 }
3394 
3395 
3396 
3397 static void
qemuProcessNotifyNets(virDomainDef * def)3398 qemuProcessNotifyNets(virDomainDef *def)
3399 {
3400     size_t i;
3401     g_autoptr(virConnect) conn = NULL;
3402 
3403     for (i = 0; i < def->nnets; i++) {
3404         virDomainNetDef *net = def->nets[i];
3405         /* keep others from trying to use the macvtap device name, but
3406          * don't return error if this happens, since that causes the
3407          * domain to be unceremoniously killed, which would be *very*
3408          * impolite.
3409          */
3410         switch (virDomainNetGetActualType(net)) {
3411         case VIR_DOMAIN_NET_TYPE_DIRECT:
3412             virNetDevReserveName(net->ifname);
3413             break;
3414         case VIR_DOMAIN_NET_TYPE_BRIDGE:
3415         case VIR_DOMAIN_NET_TYPE_NETWORK:
3416         case VIR_DOMAIN_NET_TYPE_ETHERNET:
3417             virNetDevReserveName(net->ifname);
3418             break;
3419         case VIR_DOMAIN_NET_TYPE_USER:
3420         case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
3421         case VIR_DOMAIN_NET_TYPE_SERVER:
3422         case VIR_DOMAIN_NET_TYPE_CLIENT:
3423         case VIR_DOMAIN_NET_TYPE_MCAST:
3424         case VIR_DOMAIN_NET_TYPE_INTERNAL:
3425         case VIR_DOMAIN_NET_TYPE_HOSTDEV:
3426         case VIR_DOMAIN_NET_TYPE_UDP:
3427         case VIR_DOMAIN_NET_TYPE_VDPA:
3428         case VIR_DOMAIN_NET_TYPE_LAST:
3429             break;
3430         }
3431 
3432         if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK && !conn)
3433             conn = virGetConnectNetwork();
3434 
3435         virDomainNetNotifyActualDevice(conn, def, net);
3436     }
3437 }
3438 
3439 /* Attempt to instantiate the filters. Ignore failures because it's
3440  * possible that someone deleted a filter binding and the associated
3441  * filter while the guest was running and we don't want that action
3442  * to cause failure to keep the guest running during the reconnection
3443  * processing. Nor do we necessarily want other failures to do the
3444  * same. We'll just log the error conditions other than of course
3445  * ignoreExists possibility (e.g. the true flag) */
3446 static void
qemuProcessFiltersInstantiate(virDomainDef * def)3447 qemuProcessFiltersInstantiate(virDomainDef *def)
3448 {
3449     size_t i;
3450 
3451     for (i = 0; i < def->nnets; i++) {
3452         virDomainNetDef *net = def->nets[i];
3453         if ((net->filter) && (net->ifname)) {
3454             if (virDomainConfNWFilterInstantiate(def->name, def->uuid, net,
3455                                                  true) < 0) {
3456                 VIR_WARN("filter '%s' instantiation for '%s' failed '%s'",
3457                          net->filter, net->ifname, virGetLastErrorMessage());
3458                 virResetLastError();
3459             }
3460         }
3461     }
3462 }
3463 
3464 static int
qemuProcessUpdateState(virQEMUDriver * driver,virDomainObj * vm)3465 qemuProcessUpdateState(virQEMUDriver *driver, virDomainObj *vm)
3466 {
3467     qemuDomainObjPrivate *priv = vm->privateData;
3468     virDomainState state;
3469     virDomainPausedReason reason;
3470     virDomainState newState = VIR_DOMAIN_NOSTATE;
3471     int oldReason;
3472     int newReason;
3473     bool running;
3474     g_autofree char *msg = NULL;
3475     int ret;
3476 
3477     qemuDomainObjEnterMonitor(driver, vm);
3478     ret = qemuMonitorGetStatus(priv->mon, &running, &reason);
3479     if (qemuDomainObjExitMonitor(driver, vm) < 0)
3480         return -1;
3481 
3482     if (ret < 0)
3483         return -1;
3484 
3485     state = virDomainObjGetState(vm, &oldReason);
3486 
3487     if (running &&
3488         (state == VIR_DOMAIN_SHUTOFF ||
3489          (state == VIR_DOMAIN_PAUSED &&
3490           oldReason == VIR_DOMAIN_PAUSED_STARTING_UP))) {
3491         newState = VIR_DOMAIN_RUNNING;
3492         newReason = VIR_DOMAIN_RUNNING_BOOTED;
3493         msg = g_strdup("finished booting");
3494     } else if (state == VIR_DOMAIN_PAUSED && running) {
3495         newState = VIR_DOMAIN_RUNNING;
3496         newReason = VIR_DOMAIN_RUNNING_UNPAUSED;
3497         msg = g_strdup("was unpaused");
3498     } else if (state == VIR_DOMAIN_RUNNING && !running) {
3499         if (reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN) {
3500             newState = VIR_DOMAIN_SHUTDOWN;
3501             newReason = VIR_DOMAIN_SHUTDOWN_UNKNOWN;
3502             msg = g_strdup("shutdown");
3503         } else if (reason == VIR_DOMAIN_PAUSED_CRASHED) {
3504             newState = VIR_DOMAIN_CRASHED;
3505             newReason = VIR_DOMAIN_CRASHED_PANICKED;
3506             msg = g_strdup("crashed");
3507         } else {
3508             newState = VIR_DOMAIN_PAUSED;
3509             newReason = reason;
3510             msg = g_strdup_printf("was paused (%s)",
3511                                   virDomainPausedReasonTypeToString(reason));
3512         }
3513     }
3514 
3515     if (newState != VIR_DOMAIN_NOSTATE) {
3516         VIR_DEBUG("Domain %s %s while its monitor was disconnected;"
3517                   " changing state to %s (%s)",
3518                   vm->def->name,
3519                   NULLSTR(msg),
3520                   virDomainStateTypeToString(newState),
3521                   virDomainStateReasonToString(newState, newReason));
3522         virDomainObjSetState(vm, newState, newReason);
3523     }
3524 
3525     return 0;
3526 }
3527 
3528 static int
qemuProcessRecoverMigrationIn(virQEMUDriver * driver,virDomainObj * vm,const qemuDomainJobObj * job,virDomainState state,int reason)3529 qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
3530                               virDomainObj *vm,
3531                               const qemuDomainJobObj *job,
3532                               virDomainState state,
3533                               int reason)
3534 {
3535 
3536     qemuDomainJobPrivate *jobPriv = job->privateData;
3537     bool postcopy = (state == VIR_DOMAIN_PAUSED &&
3538                      reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED) ||
3539                     (state == VIR_DOMAIN_RUNNING &&
3540                      reason == VIR_DOMAIN_RUNNING_POSTCOPY);
3541 
3542     switch ((qemuMigrationJobPhase) job->phase) {
3543     case QEMU_MIGRATION_PHASE_NONE:
3544     case QEMU_MIGRATION_PHASE_PERFORM2:
3545     case QEMU_MIGRATION_PHASE_BEGIN3:
3546     case QEMU_MIGRATION_PHASE_PERFORM3:
3547     case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
3548     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
3549     case QEMU_MIGRATION_PHASE_CONFIRM3:
3550     case QEMU_MIGRATION_PHASE_LAST:
3551         /* N/A for incoming migration */
3552         break;
3553 
3554     case QEMU_MIGRATION_PHASE_PREPARE:
3555         VIR_DEBUG("Killing unfinished incoming migration for domain %s",
3556                   vm->def->name);
3557         return -1;
3558 
3559     case QEMU_MIGRATION_PHASE_FINISH2:
3560         /* source domain is already killed so let's just resume the domain
3561          * and hope we are all set */
3562         VIR_DEBUG("Incoming migration finished, resuming domain %s",
3563                   vm->def->name);
3564         if (qemuProcessStartCPUs(driver, vm,
3565                                  VIR_DOMAIN_RUNNING_MIGRATED,
3566                                  QEMU_ASYNC_JOB_NONE) < 0) {
3567             VIR_WARN("Could not resume domain %s", vm->def->name);
3568         }
3569         break;
3570 
3571     case QEMU_MIGRATION_PHASE_FINISH3:
3572         /* migration finished, we started resuming the domain but didn't
3573          * confirm success or failure yet; killing it seems safest unless
3574          * we already started guest CPUs or we were in post-copy mode */
3575         if (postcopy) {
3576             qemuMigrationAnyPostcopyFailed(driver, vm);
3577         } else if (state != VIR_DOMAIN_RUNNING) {
3578             VIR_DEBUG("Killing migrated domain %s", vm->def->name);
3579             return -1;
3580         }
3581         break;
3582     }
3583 
3584     qemuMigrationParamsReset(driver, vm, QEMU_ASYNC_JOB_NONE,
3585                              jobPriv->migParams, job->apiFlags);
3586     return 0;
3587 }
3588 
3589 static int
qemuProcessRecoverMigrationOut(virQEMUDriver * driver,virDomainObj * vm,const qemuDomainJobObj * job,virDomainState state,int reason,unsigned int * stopFlags)3590 qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
3591                                virDomainObj *vm,
3592                                const qemuDomainJobObj *job,
3593                                virDomainState state,
3594                                int reason,
3595                                unsigned int *stopFlags)
3596 {
3597     qemuDomainJobPrivate *jobPriv = job->privateData;
3598     bool postcopy = state == VIR_DOMAIN_PAUSED &&
3599                     (reason == VIR_DOMAIN_PAUSED_POSTCOPY ||
3600                      reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
3601     bool resume = false;
3602 
3603     switch ((qemuMigrationJobPhase) job->phase) {
3604     case QEMU_MIGRATION_PHASE_NONE:
3605     case QEMU_MIGRATION_PHASE_PREPARE:
3606     case QEMU_MIGRATION_PHASE_FINISH2:
3607     case QEMU_MIGRATION_PHASE_FINISH3:
3608     case QEMU_MIGRATION_PHASE_LAST:
3609         /* N/A for outgoing migration */
3610         break;
3611 
3612     case QEMU_MIGRATION_PHASE_BEGIN3:
3613         /* nothing happened so far, just forget we were about to migrate the
3614          * domain */
3615         break;
3616 
3617     case QEMU_MIGRATION_PHASE_PERFORM2:
3618     case QEMU_MIGRATION_PHASE_PERFORM3:
3619         /* migration is still in progress, let's cancel it and resume the
3620          * domain; however we can only do that before migration enters
3621          * post-copy mode
3622          */
3623         if (postcopy) {
3624             qemuMigrationAnyPostcopyFailed(driver, vm);
3625         } else {
3626             VIR_DEBUG("Cancelling unfinished migration of domain %s",
3627                       vm->def->name);
3628             if (qemuMigrationSrcCancel(driver, vm) < 0) {
3629                 VIR_WARN("Could not cancel ongoing migration of domain %s",
3630                          vm->def->name);
3631             }
3632             resume = true;
3633         }
3634         break;
3635 
3636     case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
3637         /* migration finished but we didn't have a chance to get the result
3638          * of Finish3 step; third party needs to check what to do next; in
3639          * post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this
3640          */
3641         if (postcopy)
3642             qemuMigrationAnyPostcopyFailed(driver, vm);
3643         break;
3644 
3645     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
3646         /* Finish3 failed, we need to resume the domain, but once we enter
3647          * post-copy mode there's no way back, so let's just mark the domain
3648          * as broken in that case
3649          */
3650         if (postcopy) {
3651             qemuMigrationAnyPostcopyFailed(driver, vm);
3652         } else {
3653             VIR_DEBUG("Resuming domain %s after failed migration",
3654                       vm->def->name);
3655             resume = true;
3656         }
3657         break;
3658 
3659     case QEMU_MIGRATION_PHASE_CONFIRM3:
3660         /* migration completed, we need to kill the domain here */
3661         *stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
3662         return -1;
3663     }
3664 
3665     if (resume) {
3666         /* resume the domain but only if it was paused as a result of
3667          * migration
3668          */
3669         if (state == VIR_DOMAIN_PAUSED &&
3670             (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
3671              reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
3672             if (qemuProcessStartCPUs(driver, vm,
3673                                      VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
3674                                      QEMU_ASYNC_JOB_NONE) < 0) {
3675                 VIR_WARN("Could not resume domain %s", vm->def->name);
3676             }
3677         }
3678     }
3679 
3680     qemuMigrationParamsReset(driver, vm, QEMU_ASYNC_JOB_NONE,
3681                              jobPriv->migParams, job->apiFlags);
3682     return 0;
3683 }
3684 
3685 static int
qemuProcessRecoverJob(virQEMUDriver * driver,virDomainObj * vm,const qemuDomainJobObj * job,unsigned int * stopFlags)3686 qemuProcessRecoverJob(virQEMUDriver *driver,
3687                       virDomainObj *vm,
3688                       const qemuDomainJobObj *job,
3689                       unsigned int *stopFlags)
3690 {
3691     qemuDomainObjPrivate *priv = vm->privateData;
3692     virDomainState state;
3693     int reason;
3694     unsigned long long now;
3695 
3696     state = virDomainObjGetState(vm, &reason);
3697 
3698     switch (job->asyncJob) {
3699     case QEMU_ASYNC_JOB_MIGRATION_OUT:
3700         if (qemuProcessRecoverMigrationOut(driver, vm, job,
3701                                            state, reason, stopFlags) < 0)
3702             return -1;
3703         break;
3704 
3705     case QEMU_ASYNC_JOB_MIGRATION_IN:
3706         if (qemuProcessRecoverMigrationIn(driver, vm, job,
3707                                           state, reason) < 0)
3708             return -1;
3709         break;
3710 
3711     case QEMU_ASYNC_JOB_SAVE:
3712     case QEMU_ASYNC_JOB_DUMP:
3713     case QEMU_ASYNC_JOB_SNAPSHOT:
3714         qemuDomainObjEnterMonitor(driver, vm);
3715         ignore_value(qemuMonitorMigrateCancel(priv->mon));
3716         if (qemuDomainObjExitMonitor(driver, vm) < 0)
3717             return -1;
3718         /* resume the domain but only if it was paused as a result of
3719          * running a migration-to-file operation.  Although we are
3720          * recovering an async job, this function is run at startup
3721          * and must resume things using sync monitor connections.  */
3722          if (state == VIR_DOMAIN_PAUSED &&
3723              ((job->asyncJob == QEMU_ASYNC_JOB_DUMP &&
3724                reason == VIR_DOMAIN_PAUSED_DUMP) ||
3725               (job->asyncJob == QEMU_ASYNC_JOB_SAVE &&
3726                reason == VIR_DOMAIN_PAUSED_SAVE) ||
3727               (job->asyncJob == QEMU_ASYNC_JOB_SNAPSHOT &&
3728                (reason == VIR_DOMAIN_PAUSED_SNAPSHOT ||
3729                 reason == VIR_DOMAIN_PAUSED_MIGRATION)) ||
3730               reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
3731              if (qemuProcessStartCPUs(driver, vm,
3732                                       VIR_DOMAIN_RUNNING_SAVE_CANCELED,
3733                                       QEMU_ASYNC_JOB_NONE) < 0) {
3734                  VIR_WARN("Could not resume domain '%s' after migration to file",
3735                           vm->def->name);
3736             }
3737         }
3738         break;
3739 
3740     case QEMU_ASYNC_JOB_START:
3741         /* Already handled in VIR_DOMAIN_PAUSED_STARTING_UP check. */
3742         break;
3743 
3744     case QEMU_ASYNC_JOB_BACKUP:
3745         ignore_value(virTimeMillisNow(&now));
3746 
3747         /* Restore the config of the async job which is not persisted */
3748         priv->jobs_queued++;
3749         priv->job.asyncJob = QEMU_ASYNC_JOB_BACKUP;
3750         priv->job.asyncOwnerAPI = g_strdup(virThreadJobGet());
3751         priv->job.asyncStarted = now;
3752 
3753         qemuDomainObjSetAsyncJobMask(vm, (QEMU_JOB_DEFAULT_MASK |
3754                                           JOB_MASK(QEMU_JOB_SUSPEND) |
3755                                           JOB_MASK(QEMU_JOB_MODIFY)));
3756 
3757         /* We reset the job parameters for backup so that the job will look
3758          * active. This is possible because we are able to recover the state
3759          * of blockjobs and also the backup job allows all sub-job types */
3760         priv->job.current = g_new0(qemuDomainJobInfo, 1);
3761         priv->job.current->operation = VIR_DOMAIN_JOB_OPERATION_BACKUP;
3762         priv->job.current->statsType = QEMU_DOMAIN_JOB_STATS_TYPE_BACKUP;
3763         priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE;
3764         priv->job.current->started = now;
3765         break;
3766 
3767     case QEMU_ASYNC_JOB_NONE:
3768     case QEMU_ASYNC_JOB_LAST:
3769         break;
3770     }
3771 
3772     if (!virDomainObjIsActive(vm))
3773         return -1;
3774 
3775     /* In case any special handling is added for job type that has been ignored
3776      * before, QEMU_DOMAIN_TRACK_JOBS (from qemu_domain.h) needs to be updated
3777      * for the job to be properly tracked in domain state XML.
3778      */
3779     switch (job->active) {
3780     case QEMU_JOB_QUERY:
3781         /* harmless */
3782         break;
3783 
3784     case QEMU_JOB_DESTROY:
3785         VIR_DEBUG("Domain %s should have already been destroyed",
3786                   vm->def->name);
3787         return -1;
3788 
3789     case QEMU_JOB_SUSPEND:
3790         /* mostly harmless */
3791         break;
3792 
3793     case QEMU_JOB_MODIFY:
3794         /* XXX depending on the command we may be in an inconsistent state and
3795          * we should probably fall back to "monitor error" state and refuse to
3796          */
3797         break;
3798 
3799     case QEMU_JOB_MIGRATION_OP:
3800     case QEMU_JOB_ABORT:
3801     case QEMU_JOB_ASYNC:
3802     case QEMU_JOB_ASYNC_NESTED:
3803         /* async job was already handled above */
3804     case QEMU_JOB_NONE:
3805     case QEMU_JOB_LAST:
3806         break;
3807     }
3808 
3809     return 0;
3810 }
3811 
3812 static int
qemuProcessUpdateDevices(virQEMUDriver * driver,virDomainObj * vm)3813 qemuProcessUpdateDevices(virQEMUDriver *driver,
3814                          virDomainObj *vm)
3815 {
3816     qemuDomainObjPrivate *priv = vm->privateData;
3817     virDomainDeviceDef dev;
3818     g_auto(GStrv) old = g_steal_pointer(&priv->qemuDevices);
3819     GStrv tmp;
3820 
3821     if (qemuDomainUpdateDeviceList(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
3822         return -1;
3823 
3824     if (!old)
3825         return 0;
3826 
3827     for (tmp = old; *tmp; tmp++) {
3828         if (!g_strv_contains((const char **) priv->qemuDevices, *tmp) &&
3829             virDomainDefFindDevice(vm->def, *tmp, &dev, false) == 0 &&
3830             qemuDomainRemoveDevice(driver, vm, &dev))
3831             return -1;
3832     }
3833 
3834     return 0;
3835 }
3836 
3837 static int
qemuDomainPerfRestart(virDomainObj * vm)3838 qemuDomainPerfRestart(virDomainObj *vm)
3839 {
3840     size_t i;
3841     virDomainDef *def = vm->def;
3842     qemuDomainObjPrivate *priv = vm->privateData;
3843 
3844     if (!(priv->perf = virPerfNew()))
3845         return -1;
3846 
3847     for (i = 0; i < VIR_PERF_EVENT_LAST; i++) {
3848         if (def->perf.events[i] &&
3849             def->perf.events[i] == VIR_TRISTATE_BOOL_YES) {
3850 
3851             /* Failure to re-enable the perf event should not be fatal */
3852             if (virPerfEventEnable(priv->perf, i, vm->pid) < 0)
3853                 def->perf.events[i] = VIR_TRISTATE_BOOL_NO;
3854         }
3855     }
3856 
3857     return 0;
3858 }
3859 
3860 
3861 static void
qemuProcessReconnectCheckMemAliasOrderMismatch(virDomainObj * vm)3862 qemuProcessReconnectCheckMemAliasOrderMismatch(virDomainObj *vm)
3863 {
3864     size_t i;
3865     int aliasidx;
3866     virDomainDef *def = vm->def;
3867     qemuDomainObjPrivate *priv = vm->privateData;
3868 
3869     if (!virDomainDefHasMemoryHotplug(def) || def->nmems == 0)
3870         return;
3871 
3872     for (i = 0; i < def->nmems; i++) {
3873         aliasidx = qemuDomainDeviceAliasIndex(&def->mems[i]->info, "dimm");
3874 
3875         if (def->mems[i]->info.addr.dimm.slot != aliasidx) {
3876             priv->memAliasOrderMismatch = true;
3877             break;
3878         }
3879     }
3880 }
3881 
3882 
3883 static bool
qemuProcessDomainMemoryDefNeedHugepagesPath(const virDomainMemoryDef * mem,const long system_pagesize)3884 qemuProcessDomainMemoryDefNeedHugepagesPath(const virDomainMemoryDef *mem,
3885                                             const long system_pagesize)
3886 {
3887     switch (mem->model) {
3888     case VIR_DOMAIN_MEMORY_MODEL_DIMM:
3889     case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
3890         return mem->pagesize && mem->pagesize != system_pagesize;
3891 
3892     case VIR_DOMAIN_MEMORY_MODEL_NONE:
3893     case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
3894     case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
3895     case VIR_DOMAIN_MEMORY_MODEL_LAST:
3896         /* None of these can be backed by hugepages. */
3897         return false;
3898     }
3899 
3900     return false;
3901 }
3902 
3903 
3904 static bool
qemuProcessNeedHugepagesPath(virDomainDef * def,virDomainMemoryDef * mem)3905 qemuProcessNeedHugepagesPath(virDomainDef *def,
3906                              virDomainMemoryDef *mem)
3907 {
3908     const long system_pagesize = virGetSystemPageSizeKB();
3909     size_t i;
3910 
3911     if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE)
3912         return true;
3913 
3914     for (i = 0; i < def->mem.nhugepages; i++) {
3915         if (def->mem.hugepages[i].size != system_pagesize)
3916             return true;
3917     }
3918 
3919     for (i = 0; i < def->nmems; i++) {
3920         if (qemuProcessDomainMemoryDefNeedHugepagesPath(def->mems[i], system_pagesize))
3921             return true;
3922     }
3923 
3924     if (mem &&
3925         qemuProcessDomainMemoryDefNeedHugepagesPath(mem, system_pagesize))
3926         return true;
3927 
3928     return false;
3929 }
3930 
3931 
3932 static bool
qemuProcessNeedMemoryBackingPath(virDomainDef * def,virDomainMemoryDef * mem)3933 qemuProcessNeedMemoryBackingPath(virDomainDef *def,
3934                                  virDomainMemoryDef *mem)
3935 {
3936     size_t i;
3937     size_t numaNodes;
3938 
3939     if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE ||
3940         def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
3941         return true;
3942 
3943     numaNodes = virDomainNumaGetNodeCount(def->numa);
3944     for (i = 0; i < numaNodes; i++) {
3945         if (virDomainNumaGetNodeMemoryAccessMode(def->numa, i)
3946             != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
3947             return true;
3948     }
3949 
3950     for (i = 0; i < def->nmems; i++) {
3951         if (def->mems[i]->access != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
3952             return true;
3953     }
3954 
3955     if (mem) {
3956         switch (mem->model) {
3957         case VIR_DOMAIN_MEMORY_MODEL_DIMM:
3958         case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
3959             if (mem->access != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT) {
3960                 /* No need to check for access mode on the target node,
3961                  * it was checked for in the previous loop. */
3962                 return true;
3963             }
3964             break;
3965 
3966         case VIR_DOMAIN_MEMORY_MODEL_NONE:
3967         case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
3968         case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
3969         case VIR_DOMAIN_MEMORY_MODEL_LAST:
3970             /* Backed by user provided path. Not stored in memory
3971              * backing dir anyway. */
3972             break;
3973         }
3974     }
3975 
3976     return false;
3977 }
3978 
3979 
3980 static int
qemuProcessBuildDestroyMemoryPathsImpl(virQEMUDriver * driver,virDomainObj * vm,const char * path,bool build)3981 qemuProcessBuildDestroyMemoryPathsImpl(virQEMUDriver *driver,
3982                                        virDomainObj *vm,
3983                                        const char *path,
3984                                        bool build)
3985 {
3986     if (build) {
3987         if (virFileExists(path))
3988             return 0;
3989 
3990         if (g_mkdir_with_parents(path, 0700) < 0) {
3991             virReportSystemError(errno,
3992                                  _("Unable to create %s"),
3993                                  path);
3994             return -1;
3995         }
3996 
3997         if (qemuSecurityDomainSetPathLabel(driver, vm, path, true) < 0)
3998             return -1;
3999     } else {
4000         if (virFileDeleteTree(path) < 0)
4001             return -1;
4002     }
4003 
4004     return 0;
4005 }
4006 
4007 
4008 int
qemuProcessBuildDestroyMemoryPaths(virQEMUDriver * driver,virDomainObj * vm,virDomainMemoryDef * mem,bool build)4009 qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver,
4010                                    virDomainObj *vm,
4011                                    virDomainMemoryDef *mem,
4012                                    bool build)
4013 {
4014 
4015     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4016     size_t i;
4017     bool shouldBuildHP = false;
4018     bool shouldBuildMB = false;
4019 
4020     if (build) {
4021         shouldBuildHP = qemuProcessNeedHugepagesPath(vm->def, mem);
4022         shouldBuildMB = qemuProcessNeedMemoryBackingPath(vm->def, mem);
4023     }
4024 
4025     if (!build || shouldBuildHP) {
4026         for (i = 0; i < cfg->nhugetlbfs; i++) {
4027             g_autofree char *path = NULL;
4028             path = qemuGetDomainHugepagePath(driver, vm->def, &cfg->hugetlbfs[i]);
4029 
4030             if (!path)
4031                 return -1;
4032 
4033             if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm,
4034                                                        path, build) < 0)
4035                 return -1;
4036         }
4037     }
4038 
4039     if (!build || shouldBuildMB) {
4040         g_autofree char *path = NULL;
4041         if (qemuGetMemoryBackingDomainPath(driver, vm->def, &path) < 0)
4042             return -1;
4043 
4044         if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm,
4045                                                    path, build) < 0)
4046             return -1;
4047     }
4048 
4049     return 0;
4050 }
4051 
4052 
4053 int
qemuProcessDestroyMemoryBackingPath(virQEMUDriver * driver,virDomainObj * vm,virDomainMemoryDef * mem)4054 qemuProcessDestroyMemoryBackingPath(virQEMUDriver *driver,
4055                                     virDomainObj *vm,
4056                                     virDomainMemoryDef *mem)
4057 {
4058     g_autofree char *path = NULL;
4059 
4060     if (qemuGetMemoryBackingPath(driver, vm->def, mem->info.alias, &path) < 0)
4061         return -1;
4062 
4063     if (unlink(path) < 0 &&
4064         errno != ENOENT) {
4065         virReportSystemError(errno, _("Unable to remove %s"), path);
4066         return -1;
4067     }
4068 
4069     return 0;
4070 }
4071 
4072 
4073 static int
qemuProcessVNCAllocatePorts(virQEMUDriver * driver,virDomainGraphicsDef * graphics,bool allocate)4074 qemuProcessVNCAllocatePorts(virQEMUDriver *driver,
4075                             virDomainGraphicsDef *graphics,
4076                             bool allocate)
4077 {
4078     unsigned short port;
4079 
4080     if (!allocate) {
4081         if (graphics->data.vnc.autoport)
4082             graphics->data.vnc.port = 5900;
4083 
4084         return 0;
4085     }
4086 
4087     if (graphics->data.vnc.autoport) {
4088         if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
4089             return -1;
4090         graphics->data.vnc.port = port;
4091     }
4092 
4093     if (graphics->data.vnc.websocket == -1) {
4094         if (virPortAllocatorAcquire(driver->webSocketPorts, &port) < 0)
4095             return -1;
4096         graphics->data.vnc.websocket = port;
4097         graphics->data.vnc.websocketGenerated = true;
4098     }
4099 
4100     return 0;
4101 }
4102 
4103 static int
qemuProcessSPICEAllocatePorts(virQEMUDriver * driver,virDomainGraphicsDef * graphics,bool allocate)4104 qemuProcessSPICEAllocatePorts(virQEMUDriver *driver,
4105                               virDomainGraphicsDef *graphics,
4106                               bool allocate)
4107 {
4108     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4109     unsigned short port = 0;
4110     unsigned short tlsPort;
4111     size_t i;
4112     int defaultMode = graphics->data.spice.defaultMode;
4113 
4114     bool needTLSPort = false;
4115     bool needPort = false;
4116 
4117     if (graphics->data.spice.autoport) {
4118         /* check if tlsPort or port need allocation */
4119         for (i = 0; i < VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_LAST; i++) {
4120             switch (graphics->data.spice.channels[i]) {
4121             case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
4122                 needTLSPort = true;
4123                 break;
4124 
4125             case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
4126                 needPort = true;
4127                 break;
4128 
4129             case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
4130                 /* default mode will be used */
4131                 break;
4132             }
4133         }
4134         switch (defaultMode) {
4135         case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
4136             needTLSPort = true;
4137             break;
4138 
4139         case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
4140             needPort = true;
4141             break;
4142 
4143         case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
4144             if (cfg->spiceTLS)
4145                 needTLSPort = true;
4146             needPort = true;
4147             break;
4148         }
4149     }
4150 
4151     if (!allocate) {
4152         if (needPort || graphics->data.spice.port == -1)
4153             graphics->data.spice.port = 5901;
4154 
4155         if (needTLSPort || graphics->data.spice.tlsPort == -1)
4156             graphics->data.spice.tlsPort = 5902;
4157 
4158         return 0;
4159     }
4160 
4161     if (needPort || graphics->data.spice.port == -1) {
4162         if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
4163             return -1;
4164 
4165         graphics->data.spice.port = port;
4166 
4167         if (!graphics->data.spice.autoport)
4168             graphics->data.spice.portReserved = true;
4169     }
4170 
4171     if (needTLSPort || graphics->data.spice.tlsPort == -1) {
4172         if (!cfg->spiceTLS) {
4173             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4174                            _("Auto allocation of spice TLS port requested "
4175                              "but spice TLS is disabled in qemu.conf"));
4176             return -1;
4177         }
4178 
4179         if (virPortAllocatorAcquire(driver->remotePorts, &tlsPort) < 0)
4180             return -1;
4181 
4182         graphics->data.spice.tlsPort = tlsPort;
4183 
4184         if (!graphics->data.spice.autoport)
4185             graphics->data.spice.tlsPortReserved = true;
4186     }
4187 
4188     return 0;
4189 }
4190 
4191 
4192 static int
qemuProcessVerifyHypervFeatures(virDomainDef * def,virCPUData * cpu)4193 qemuProcessVerifyHypervFeatures(virDomainDef *def,
4194                                 virCPUData *cpu)
4195 {
4196     size_t i;
4197     int rc;
4198 
4199     for (i = 0; i < VIR_DOMAIN_HYPERV_LAST; i++) {
4200         g_autofree char *cpuFeature = NULL;
4201 
4202         /* always supported string property */
4203         if (i == VIR_DOMAIN_HYPERV_VENDOR_ID ||
4204             i == VIR_DOMAIN_HYPERV_SPINLOCKS)
4205             continue;
4206 
4207         if (def->hyperv_features[i] != VIR_TRISTATE_SWITCH_ON)
4208             continue;
4209 
4210         cpuFeature = g_strdup_printf("hv-%s", virDomainHypervTypeToString(i));
4211 
4212         rc = virCPUDataCheckFeature(cpu, cpuFeature);
4213 
4214         if (rc < 0) {
4215             return -1;
4216         } else if (rc == 1) {
4217             if (i == VIR_DOMAIN_HYPERV_STIMER) {
4218                 if (def->hyperv_stimer_direct != VIR_TRISTATE_SWITCH_ON)
4219                     continue;
4220 
4221                 rc = virCPUDataCheckFeature(cpu, VIR_CPU_x86_HV_STIMER_DIRECT);
4222                 if (rc < 0)
4223                     return -1;
4224                 else if (rc == 1)
4225                     continue;
4226 
4227                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
4228                                _("host doesn't support hyperv stimer '%s' feature"),
4229                                "direct");
4230                 return -1;
4231             }
4232             continue;
4233         }
4234 
4235         switch ((virDomainHyperv) i) {
4236         case VIR_DOMAIN_HYPERV_RELAXED:
4237         case VIR_DOMAIN_HYPERV_VAPIC:
4238             VIR_WARN("host doesn't support hyperv '%s' feature",
4239                      virDomainHypervTypeToString(i));
4240             break;
4241 
4242         case VIR_DOMAIN_HYPERV_VPINDEX:
4243         case VIR_DOMAIN_HYPERV_RUNTIME:
4244         case VIR_DOMAIN_HYPERV_SYNIC:
4245         case VIR_DOMAIN_HYPERV_STIMER:
4246         case VIR_DOMAIN_HYPERV_RESET:
4247         case VIR_DOMAIN_HYPERV_FREQUENCIES:
4248         case VIR_DOMAIN_HYPERV_REENLIGHTENMENT:
4249         case VIR_DOMAIN_HYPERV_TLBFLUSH:
4250         case VIR_DOMAIN_HYPERV_IPI:
4251         case VIR_DOMAIN_HYPERV_EVMCS:
4252             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
4253                            _("host doesn't support hyperv '%s' feature"),
4254                            virDomainHypervTypeToString(i));
4255             return -1;
4256 
4257         case VIR_DOMAIN_HYPERV_SPINLOCKS:
4258         case VIR_DOMAIN_HYPERV_VENDOR_ID:
4259         case VIR_DOMAIN_HYPERV_LAST:
4260             break;
4261         }
4262     }
4263 
4264     return 0;
4265 }
4266 
4267 
4268 static int
qemuProcessVerifyKVMFeatures(virDomainDef * def,virCPUData * cpu)4269 qemuProcessVerifyKVMFeatures(virDomainDef *def,
4270                              virCPUData *cpu)
4271 {
4272     int rc = 0;
4273 
4274     if (def->features[VIR_DOMAIN_FEATURE_PVSPINLOCK] != VIR_TRISTATE_SWITCH_ON)
4275         return 0;
4276 
4277     rc = virCPUDataCheckFeature(cpu, VIR_CPU_x86_KVM_PV_UNHALT);
4278 
4279     if (rc <= 0) {
4280         if (rc == 0)
4281             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4282                            _("host doesn't support paravirtual spinlocks"));
4283         return -1;
4284     }
4285 
4286     return 0;
4287 }
4288 
4289 
4290 static int
qemuProcessVerifyCPUFeatures(virDomainDef * def,virCPUData * cpu)4291 qemuProcessVerifyCPUFeatures(virDomainDef *def,
4292                              virCPUData *cpu)
4293 {
4294     int rc;
4295 
4296     rc = virCPUCheckFeature(def->os.arch, def->cpu, "invtsc");
4297 
4298     if (rc < 0) {
4299         return -1;
4300     } else if (rc == 1) {
4301         rc = virCPUDataCheckFeature(cpu, "invtsc");
4302         if (rc <= 0) {
4303             if (rc == 0) {
4304                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4305                                _("host doesn't support invariant TSC"));
4306             }
4307             return -1;
4308         }
4309     }
4310 
4311     return 0;
4312 }
4313 
4314 
4315 static const char *
qemuProcessTranslateCPUFeatures(const char * name,void * opaque)4316 qemuProcessTranslateCPUFeatures(const char *name,
4317                                 void *opaque)
4318 {
4319     virQEMUCaps *qemuCaps = opaque;
4320 
4321     return virQEMUCapsCPUFeatureFromQEMU(qemuCaps, name);
4322 }
4323 
4324 
4325 static int
qemuProcessFetchGuestCPU(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob,virCPUData ** enabled,virCPUData ** disabled)4326 qemuProcessFetchGuestCPU(virQEMUDriver *driver,
4327                          virDomainObj *vm,
4328                          qemuDomainAsyncJob asyncJob,
4329                          virCPUData **enabled,
4330                          virCPUData **disabled)
4331 {
4332     qemuDomainObjPrivate *priv = vm->privateData;
4333     g_autoptr(virCPUData) dataEnabled = NULL;
4334     g_autoptr(virCPUData) dataDisabled = NULL;
4335     bool generic;
4336     int rc;
4337 
4338     *enabled = NULL;
4339     *disabled = NULL;
4340 
4341     generic = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CPU_UNAVAILABLE_FEATURES);
4342 
4343     if (!generic && !ARCH_IS_X86(vm->def->os.arch))
4344         return 0;
4345 
4346     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
4347         return -1;
4348 
4349     if (generic) {
4350         rc = qemuMonitorGetGuestCPU(priv->mon,
4351                                     vm->def->os.arch,
4352                                     qemuProcessTranslateCPUFeatures, priv->qemuCaps,
4353                                     &dataEnabled, &dataDisabled);
4354     } else {
4355         rc = qemuMonitorGetGuestCPUx86(priv->mon, &dataEnabled, &dataDisabled);
4356     }
4357 
4358     if (qemuDomainObjExitMonitor(driver, vm) < 0)
4359         return -1;
4360 
4361     if (rc == -1)
4362         return -1;
4363 
4364     *enabled = g_steal_pointer(&dataEnabled);
4365     *disabled = g_steal_pointer(&dataDisabled);
4366     return 0;
4367 }
4368 
4369 
4370 static int
qemuProcessVerifyCPU(virDomainObj * vm,virCPUData * cpu)4371 qemuProcessVerifyCPU(virDomainObj *vm,
4372                      virCPUData *cpu)
4373 {
4374     virDomainDef *def = vm->def;
4375 
4376     if (!cpu)
4377         return 0;
4378 
4379     if (qemuProcessVerifyKVMFeatures(def, cpu) < 0 ||
4380         qemuProcessVerifyHypervFeatures(def, cpu) < 0)
4381         return -1;
4382 
4383     if (!def->cpu ||
4384         (def->cpu->mode == VIR_CPU_MODE_CUSTOM &&
4385          !def->cpu->model))
4386         return 0;
4387 
4388     if (qemuProcessVerifyCPUFeatures(def, cpu) < 0)
4389         return -1;
4390 
4391     return 0;
4392 }
4393 
4394 
4395 static int
qemuProcessUpdateLiveGuestCPU(virDomainObj * vm,virCPUData * enabled,virCPUData * disabled)4396 qemuProcessUpdateLiveGuestCPU(virDomainObj *vm,
4397                               virCPUData *enabled,
4398                               virCPUData *disabled)
4399 {
4400     virDomainDef *def = vm->def;
4401     qemuDomainObjPrivate *priv = vm->privateData;
4402     g_autoptr(virCPUDef) orig = NULL;
4403     int rc;
4404 
4405     if (!enabled)
4406         return 0;
4407 
4408     if (!def->cpu ||
4409         (def->cpu->mode == VIR_CPU_MODE_CUSTOM &&
4410          !def->cpu->model))
4411         return 0;
4412 
4413     if (!(orig = virCPUDefCopy(def->cpu)))
4414         return -1;
4415 
4416     if ((rc = virCPUUpdateLive(def->os.arch, def->cpu, enabled, disabled)) < 0) {
4417         return -1;
4418     } else if (rc == 0) {
4419         /* Store the original CPU in priv if QEMU changed it and we didn't
4420          * get the original CPU via migration, restore, or snapshot revert.
4421          */
4422         if (!priv->origCPU && !virCPUDefIsEqual(def->cpu, orig, false))
4423             priv->origCPU = g_steal_pointer(&orig);
4424 
4425         def->cpu->check = VIR_CPU_CHECK_FULL;
4426     }
4427 
4428     return 0;
4429 }
4430 
4431 
4432 static int
qemuProcessUpdateAndVerifyCPU(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)4433 qemuProcessUpdateAndVerifyCPU(virQEMUDriver *driver,
4434                               virDomainObj *vm,
4435                               qemuDomainAsyncJob asyncJob)
4436 {
4437     virCPUData *cpu = NULL;
4438     virCPUData *disabled = NULL;
4439     int ret = -1;
4440 
4441     if (qemuProcessFetchGuestCPU(driver, vm, asyncJob, &cpu, &disabled) < 0)
4442         goto cleanup;
4443 
4444     if (qemuProcessVerifyCPU(vm, cpu) < 0)
4445         goto cleanup;
4446 
4447     if (qemuProcessUpdateLiveGuestCPU(vm, cpu, disabled) < 0)
4448         goto cleanup;
4449 
4450     ret = 0;
4451 
4452  cleanup:
4453     virCPUDataFree(cpu);
4454     virCPUDataFree(disabled);
4455     return ret;
4456 }
4457 
4458 
4459 static int
qemuProcessFetchCPUDefinitions(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob,virDomainCapsCPUModels ** cpuModels)4460 qemuProcessFetchCPUDefinitions(virQEMUDriver *driver,
4461                                virDomainObj *vm,
4462                                qemuDomainAsyncJob asyncJob,
4463                                virDomainCapsCPUModels **cpuModels)
4464 {
4465     qemuDomainObjPrivate *priv = vm->privateData;
4466     g_autoptr(virDomainCapsCPUModels) models = NULL;
4467     int rc;
4468 
4469     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
4470         return -1;
4471 
4472     rc = virQEMUCapsFetchCPUModels(priv->mon, vm->def->os.arch, &models);
4473 
4474     if (qemuDomainObjExitMonitor(driver, vm) < 0 || rc < 0)
4475         return -1;
4476 
4477     *cpuModels = g_steal_pointer(&models);
4478     return 0;
4479 }
4480 
4481 
4482 static int
qemuProcessUpdateCPU(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)4483 qemuProcessUpdateCPU(virQEMUDriver *driver,
4484                      virDomainObj *vm,
4485                      qemuDomainAsyncJob asyncJob)
4486 {
4487     g_autoptr(virCPUData) cpu = NULL;
4488     g_autoptr(virCPUData) disabled = NULL;
4489     g_autoptr(virDomainCapsCPUModels) models = NULL;
4490 
4491     /* The host CPU model comes from host caps rather than QEMU caps so
4492      * fallback must be allowed no matter what the user specified in the XML.
4493      */
4494     vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW;
4495 
4496     if (qemuProcessFetchGuestCPU(driver, vm, asyncJob, &cpu, &disabled) < 0)
4497         return -1;
4498 
4499     if (qemuProcessUpdateLiveGuestCPU(vm, cpu, disabled) < 0)
4500         return -1;
4501 
4502     if (qemuProcessFetchCPUDefinitions(driver, vm, asyncJob, &models) < 0 ||
4503         virCPUTranslate(vm->def->os.arch, vm->def->cpu, models) < 0)
4504         return -1;
4505 
4506     return 0;
4507 }
4508 
4509 
4510 static int
qemuPrepareNVRAM(virQEMUDriver * driver,virDomainObj * vm)4511 qemuPrepareNVRAM(virQEMUDriver *driver,
4512                  virDomainObj *vm)
4513 {
4514     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4515     int ret = -1;
4516     int srcFD = -1;
4517     int dstFD = -1;
4518     virDomainLoaderDef *loader = vm->def->os.loader;
4519     bool created = false;
4520     const char *master_nvram_path;
4521     ssize_t r;
4522 
4523     if (!loader || !loader->nvram || virFileExists(loader->nvram))
4524         return 0;
4525 
4526     master_nvram_path = loader->templt;
4527     if (!loader->templt) {
4528         size_t i;
4529         for (i = 0; i < cfg->nfirmwares; i++) {
4530             if (STREQ(cfg->firmwares[i]->name, loader->path)) {
4531                 master_nvram_path = cfg->firmwares[i]->nvram;
4532                 break;
4533             }
4534         }
4535     }
4536 
4537     if (!master_nvram_path) {
4538         virReportError(VIR_ERR_OPERATION_FAILED,
4539                        _("unable to find any master var store for "
4540                          "loader: %s"), loader->path);
4541         goto cleanup;
4542     }
4543 
4544     if ((srcFD = virFileOpenAs(master_nvram_path, O_RDONLY,
4545                                0, -1, -1, 0)) < 0) {
4546         virReportSystemError(-srcFD,
4547                              _("Failed to open file '%s'"),
4548                              master_nvram_path);
4549         goto cleanup;
4550     }
4551 
4552     if ((dstFD = virFileOpenAs(loader->nvram,
4553                                O_WRONLY | O_CREAT | O_EXCL,
4554                                S_IRUSR | S_IWUSR,
4555                                cfg->user, cfg->group,
4556                                VIR_FILE_OPEN_FORCE_OWNER)) < 0) {
4557         virReportSystemError(-dstFD,
4558                              _("Failed to create file '%s'"),
4559                              loader->nvram);
4560         goto cleanup;
4561     }
4562 
4563     created = true;
4564 
4565     do {
4566         char buf[1024];
4567 
4568         if ((r = saferead(srcFD, buf, sizeof(buf))) < 0) {
4569             virReportSystemError(errno,
4570                                  _("Unable to read from file '%s'"),
4571                                  master_nvram_path);
4572             goto cleanup;
4573         }
4574 
4575         if (safewrite(dstFD, buf, r) < 0) {
4576             virReportSystemError(errno,
4577                                  _("Unable to write to file '%s'"),
4578                                  loader->nvram);
4579             goto cleanup;
4580         }
4581     } while (r);
4582 
4583     if (VIR_CLOSE(srcFD) < 0) {
4584         virReportSystemError(errno,
4585                              _("Unable to close file '%s'"),
4586                              master_nvram_path);
4587         goto cleanup;
4588     }
4589     if (VIR_CLOSE(dstFD) < 0) {
4590         virReportSystemError(errno,
4591                              _("Unable to close file '%s'"),
4592                              loader->nvram);
4593         goto cleanup;
4594     }
4595 
4596     ret = 0;
4597  cleanup:
4598     /* We successfully generated the nvram path, but failed to
4599      * copy the file content. Roll back. */
4600     if (ret < 0) {
4601         if (created)
4602             unlink(loader->nvram);
4603     }
4604 
4605     VIR_FORCE_CLOSE(srcFD);
4606     VIR_FORCE_CLOSE(dstFD);
4607     return ret;
4608 }
4609 
4610 
4611 static void
qemuLogOperation(virDomainObj * vm,const char * msg,virCommand * cmd,qemuDomainLogContext * logCtxt)4612 qemuLogOperation(virDomainObj *vm,
4613                  const char *msg,
4614                  virCommand *cmd,
4615                  qemuDomainLogContext *logCtxt)
4616 {
4617     g_autofree char *timestamp = NULL;
4618     qemuDomainObjPrivate *priv = vm->privateData;
4619     int qemuVersion = virQEMUCapsGetVersion(priv->qemuCaps);
4620     const char *package = virQEMUCapsGetPackage(priv->qemuCaps);
4621     g_autofree char *hostname = virGetHostname();
4622     struct utsname uts;
4623 
4624     uname(&uts);
4625 
4626     if ((timestamp = virTimeStringNow()) == NULL)
4627         return;
4628 
4629     if (qemuDomainLogContextWrite(logCtxt,
4630                                   "%s: %s %s, qemu version: %d.%d.%d%s, kernel: %s, hostname: %s\n",
4631                                   timestamp, msg, VIR_LOG_VERSION_STRING,
4632                                   (qemuVersion / 1000000) % 1000,
4633                                   (qemuVersion / 1000) % 1000,
4634                                   qemuVersion % 1000,
4635                                   NULLSTR_EMPTY(package),
4636                                   uts.release,
4637                                   NULLSTR_EMPTY(hostname)) < 0)
4638         return;
4639 
4640     if (cmd) {
4641         g_autofree char *args = virCommandToString(cmd, true);
4642         qemuDomainLogContextWrite(logCtxt, "%s\n", args);
4643     }
4644 }
4645 
4646 
4647 void
qemuProcessIncomingDefFree(qemuProcessIncomingDef * inc)4648 qemuProcessIncomingDefFree(qemuProcessIncomingDef *inc)
4649 {
4650     if (!inc)
4651         return;
4652 
4653     g_free(inc->address);
4654     g_free(inc->launchURI);
4655     g_free(inc->deferredURI);
4656     g_free(inc);
4657 }
4658 
4659 
4660 /*
4661  * This function does not copy @path, the caller is responsible for keeping
4662  * the @path pointer valid during the lifetime of the allocated
4663  * qemuProcessIncomingDef structure.
4664  *
4665  * The caller is responsible for closing @fd, calling
4666  * qemuProcessIncomingDefFree will NOT close it.
4667  */
4668 qemuProcessIncomingDef *
qemuProcessIncomingDefNew(virQEMUCaps * qemuCaps,const char * listenAddress,const char * migrateFrom,int fd,const char * path)4669 qemuProcessIncomingDefNew(virQEMUCaps *qemuCaps,
4670                           const char *listenAddress,
4671                           const char *migrateFrom,
4672                           int fd,
4673                           const char *path)
4674 {
4675     qemuProcessIncomingDef *inc = NULL;
4676 
4677     if (qemuMigrationDstCheckProtocol(qemuCaps, migrateFrom) < 0)
4678         return NULL;
4679 
4680     inc = g_new0(qemuProcessIncomingDef, 1);
4681 
4682     inc->address = g_strdup(listenAddress);
4683 
4684     inc->launchURI = qemuMigrationDstGetURI(migrateFrom, fd);
4685     if (!inc->launchURI)
4686         goto error;
4687 
4688     if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_INCOMING_DEFER)) {
4689         inc->deferredURI = inc->launchURI;
4690         inc->launchURI = g_strdup("defer");
4691     }
4692 
4693     inc->fd = fd;
4694     inc->path = path;
4695 
4696     return inc;
4697 
4698  error:
4699     qemuProcessIncomingDefFree(inc);
4700     return NULL;
4701 }
4702 
4703 
4704 /*
4705  * This function starts a new QEMU_ASYNC_JOB_START async job. The user is
4706  * responsible for calling qemuProcessEndJob to stop this job and for passing
4707  * QEMU_ASYNC_JOB_START as @asyncJob argument to any function requiring this
4708  * parameter between qemuProcessBeginJob and qemuProcessEndJob.
4709  */
4710 int
qemuProcessBeginJob(virQEMUDriver * driver,virDomainObj * vm,virDomainJobOperation operation,unsigned long apiFlags)4711 qemuProcessBeginJob(virQEMUDriver *driver,
4712                     virDomainObj *vm,
4713                     virDomainJobOperation operation,
4714                     unsigned long apiFlags)
4715 {
4716     if (qemuDomainObjBeginAsyncJob(driver, vm, QEMU_ASYNC_JOB_START,
4717                                    operation, apiFlags) < 0)
4718         return -1;
4719 
4720     qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE);
4721     return 0;
4722 }
4723 
4724 
4725 void
qemuProcessEndJob(virQEMUDriver * driver,virDomainObj * vm)4726 qemuProcessEndJob(virQEMUDriver *driver,
4727                   virDomainObj *vm)
4728 {
4729     qemuDomainObjEndAsyncJob(driver, vm);
4730 }
4731 
4732 
4733 static int
qemuProcessStartHook(virQEMUDriver * driver,virDomainObj * vm,virHookQemuOpType op,virHookSubopType subop)4734 qemuProcessStartHook(virQEMUDriver *driver,
4735                      virDomainObj *vm,
4736                      virHookQemuOpType op,
4737                      virHookSubopType subop)
4738 {
4739     qemuDomainObjPrivate *priv = vm->privateData;
4740     g_autofree char *xml = NULL;
4741     int ret;
4742 
4743     if (!virHookPresent(VIR_HOOK_DRIVER_QEMU))
4744         return 0;
4745 
4746     if (!(xml = qemuDomainDefFormatXML(driver, priv->qemuCaps, vm->def, 0)))
4747         return -1;
4748 
4749     ret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, op, subop,
4750                       NULL, xml, NULL);
4751 
4752     return ret;
4753 }
4754 
4755 
4756 static int
qemuProcessGraphicsReservePorts(virDomainGraphicsDef * graphics,bool reconnect)4757 qemuProcessGraphicsReservePorts(virDomainGraphicsDef *graphics,
4758                                 bool reconnect)
4759 {
4760     virDomainGraphicsListenDef *glisten;
4761 
4762     if (graphics->nListens <= 0)
4763         return 0;
4764 
4765     glisten = &graphics->listens[0];
4766 
4767     if (glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS &&
4768         glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK)
4769         return 0;
4770 
4771     switch (graphics->type) {
4772     case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
4773         if (!graphics->data.vnc.autoport ||
4774             reconnect) {
4775             if (virPortAllocatorSetUsed(graphics->data.vnc.port) < 0)
4776                 return -1;
4777             graphics->data.vnc.portReserved = true;
4778         }
4779         if (graphics->data.vnc.websocket > 0 &&
4780             virPortAllocatorSetUsed(graphics->data.vnc.websocket) < 0)
4781             return -1;
4782         break;
4783 
4784     case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
4785         if (graphics->data.spice.autoport && !reconnect)
4786             return 0;
4787 
4788         if (graphics->data.spice.port > 0) {
4789             if (virPortAllocatorSetUsed(graphics->data.spice.port) < 0)
4790                 return -1;
4791             graphics->data.spice.portReserved = true;
4792         }
4793 
4794         if (graphics->data.spice.tlsPort > 0) {
4795             if (virPortAllocatorSetUsed(graphics->data.spice.tlsPort) < 0)
4796                 return -1;
4797             graphics->data.spice.tlsPortReserved = true;
4798         }
4799         break;
4800 
4801     case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
4802     case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
4803     case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
4804     case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
4805     case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
4806         break;
4807     }
4808 
4809     return 0;
4810 }
4811 
4812 
4813 static int
qemuProcessGraphicsAllocatePorts(virQEMUDriver * driver,virDomainGraphicsDef * graphics,bool allocate)4814 qemuProcessGraphicsAllocatePorts(virQEMUDriver *driver,
4815                                  virDomainGraphicsDef *graphics,
4816                                  bool allocate)
4817 {
4818     virDomainGraphicsListenDef *glisten;
4819 
4820     if (graphics->nListens <= 0)
4821         return 0;
4822 
4823     glisten = &graphics->listens[0];
4824 
4825     if (glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS &&
4826         glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK)
4827         return 0;
4828 
4829     switch (graphics->type) {
4830     case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
4831         if (qemuProcessVNCAllocatePorts(driver, graphics, allocate) < 0)
4832             return -1;
4833         break;
4834 
4835     case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
4836         if (qemuProcessSPICEAllocatePorts(driver, graphics, allocate) < 0)
4837             return -1;
4838         break;
4839 
4840     case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
4841     case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
4842     case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
4843     case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
4844     case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
4845         break;
4846     }
4847 
4848     return 0;
4849 }
4850 
4851 static int
qemuProcessGetNetworkAddress(const char * netname,char ** netaddr)4852 qemuProcessGetNetworkAddress(const char *netname,
4853                              char **netaddr)
4854 {
4855     g_autoptr(virConnect) conn = NULL;
4856     int ret = -1;
4857     g_autoptr(virNetwork) net = NULL;
4858     virNetworkDef *netdef = NULL;
4859     virNetworkIPDef *ipdef;
4860     virSocketAddr addr;
4861     virSocketAddr *addrptr = NULL;
4862     char *dev_name = NULL;
4863     g_autofree char *xml = NULL;
4864 
4865     *netaddr = NULL;
4866 
4867     if (!(conn = virGetConnectNetwork()))
4868         return -1;
4869 
4870     net = virNetworkLookupByName(conn, netname);
4871     if (!net)
4872         goto cleanup;
4873 
4874     xml = virNetworkGetXMLDesc(net, 0);
4875     if (!xml)
4876         goto cleanup;
4877 
4878     netdef = virNetworkDefParseString(xml, NULL, false);
4879     if (!netdef)
4880         goto cleanup;
4881 
4882     switch ((virNetworkForwardType) netdef->forward.type) {
4883     case VIR_NETWORK_FORWARD_NONE:
4884     case VIR_NETWORK_FORWARD_NAT:
4885     case VIR_NETWORK_FORWARD_ROUTE:
4886     case VIR_NETWORK_FORWARD_OPEN:
4887         ipdef = virNetworkDefGetIPByIndex(netdef, AF_UNSPEC, 0);
4888         if (!ipdef) {
4889             virReportError(VIR_ERR_INTERNAL_ERROR,
4890                            _("network '%s' doesn't have an IP address"),
4891                            netdef->name);
4892             goto cleanup;
4893         }
4894         addrptr = &ipdef->address;
4895         break;
4896 
4897     case VIR_NETWORK_FORWARD_BRIDGE:
4898         if ((dev_name = netdef->bridge))
4899             break;
4900         /*
4901          * fall through if netdef->bridge wasn't set, since that is
4902          * macvtap bridge mode network.
4903          */
4904         G_GNUC_FALLTHROUGH;
4905 
4906     case VIR_NETWORK_FORWARD_PRIVATE:
4907     case VIR_NETWORK_FORWARD_VEPA:
4908     case VIR_NETWORK_FORWARD_PASSTHROUGH:
4909         if ((netdef->forward.nifs > 0) && netdef->forward.ifs)
4910             dev_name = netdef->forward.ifs[0].device.dev;
4911 
4912         if (!dev_name) {
4913             virReportError(VIR_ERR_INTERNAL_ERROR,
4914                            _("network '%s' has no associated interface or bridge"),
4915                            netdef->name);
4916             goto cleanup;
4917         }
4918         break;
4919 
4920     case VIR_NETWORK_FORWARD_HOSTDEV:
4921         break;
4922 
4923     case VIR_NETWORK_FORWARD_LAST:
4924     default:
4925         virReportEnumRangeError(virNetworkForwardType, netdef->forward.type);
4926         goto cleanup;
4927     }
4928 
4929     if (dev_name) {
4930         if (virNetDevIPAddrGet(dev_name, &addr) < 0)
4931             goto cleanup;
4932         addrptr = &addr;
4933     }
4934 
4935     if (!(addrptr &&
4936           (*netaddr = virSocketAddrFormat(addrptr)))) {
4937         goto cleanup;
4938     }
4939 
4940     ret = 0;
4941  cleanup:
4942     virNetworkDefFree(netdef);
4943     return ret;
4944 }
4945 
4946 
4947 static int
qemuProcessGraphicsSetupNetworkAddress(virDomainGraphicsListenDef * glisten,const char * listenAddr)4948 qemuProcessGraphicsSetupNetworkAddress(virDomainGraphicsListenDef *glisten,
4949                                        const char *listenAddr)
4950 {
4951     int rc;
4952 
4953     /* TODO: reject configuration without network specified for network listen */
4954     if (!glisten->network) {
4955         glisten->address = g_strdup(listenAddr);
4956         return 0;
4957     }
4958 
4959     rc = qemuProcessGetNetworkAddress(glisten->network, &glisten->address);
4960     if (rc <= -2) {
4961         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4962                        _("network-based listen isn't possible, "
4963                          "network driver isn't present"));
4964         return -1;
4965     }
4966     if (rc < 0)
4967         return -1;
4968 
4969     return 0;
4970 }
4971 
4972 
4973 static int
qemuProcessGraphicsSetupListen(virQEMUDriver * driver,virDomainGraphicsDef * graphics,virDomainObj * vm)4974 qemuProcessGraphicsSetupListen(virQEMUDriver *driver,
4975                                virDomainGraphicsDef *graphics,
4976                                virDomainObj *vm)
4977 {
4978     qemuDomainObjPrivate *priv = vm->privateData;
4979     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4980     const char *type = virDomainGraphicsTypeToString(graphics->type);
4981     char *listenAddr = NULL;
4982     bool useSocket = false;
4983     size_t i;
4984 
4985     switch (graphics->type) {
4986     case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
4987         useSocket = cfg->vncAutoUnixSocket;
4988         listenAddr = cfg->vncListen;
4989         break;
4990 
4991     case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
4992         useSocket = cfg->spiceAutoUnixSocket;
4993         listenAddr = cfg->spiceListen;
4994         break;
4995 
4996     case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
4997     case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
4998     case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
4999     case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
5000     case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
5001         break;
5002     }
5003 
5004     for (i = 0; i < graphics->nListens; i++) {
5005         virDomainGraphicsListenDef *glisten = &graphics->listens[i];
5006 
5007         switch (glisten->type) {
5008         case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS:
5009             if (!glisten->address) {
5010                 /* If there is no address specified and qemu.conf has
5011                  * *_auto_unix_socket set we should use unix socket as
5012                  * default instead of tcp listen. */
5013                 if (useSocket) {
5014                     memset(glisten, 0, sizeof(virDomainGraphicsListenDef));
5015                     glisten->socket = g_strdup_printf("%s/%s.sock", priv->libDir,
5016                                                       type);
5017                     glisten->fromConfig = true;
5018                     glisten->type = VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_SOCKET;
5019                 } else if (listenAddr) {
5020                     glisten->address = g_strdup(listenAddr);
5021                     glisten->fromConfig = true;
5022                 }
5023             }
5024             break;
5025 
5026         case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK:
5027             if (glisten->address || !listenAddr)
5028                 continue;
5029 
5030             if (qemuProcessGraphicsSetupNetworkAddress(glisten,
5031                                                        listenAddr) < 0)
5032                 return -1;
5033             break;
5034 
5035         case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_SOCKET:
5036             if (!glisten->socket) {
5037                 glisten->socket = g_strdup_printf("%s/%s.sock", priv->libDir,
5038                                                   type);
5039                 glisten->autoGenerated = true;
5040             }
5041             break;
5042 
5043         case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NONE:
5044         case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_LAST:
5045             break;
5046         }
5047     }
5048 
5049     return 0;
5050 }
5051 
5052 
5053 static int
qemuProcessGraphicsSetupRenderNode(virDomainGraphicsDef * graphics,virQEMUCaps * qemuCaps)5054 qemuProcessGraphicsSetupRenderNode(virDomainGraphicsDef *graphics,
5055                                    virQEMUCaps *qemuCaps)
5056 {
5057     char **rendernode = NULL;
5058 
5059     if (!virDomainGraphicsNeedsAutoRenderNode(graphics))
5060         return 0;
5061 
5062     /* Don't bother picking a DRM node if QEMU doesn't support it. */
5063     if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
5064         if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPICE_RENDERNODE))
5065             return 0;
5066 
5067         rendernode = &graphics->data.spice.rendernode;
5068     } else {
5069         if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_EGL_HEADLESS_RENDERNODE))
5070             return 0;
5071 
5072         rendernode = &graphics->data.egl_headless.rendernode;
5073     }
5074 
5075     if (!(*rendernode = virHostGetDRMRenderNode()))
5076         return -1;
5077 
5078     return 0;
5079 }
5080 
5081 
5082 static int
qemuProcessSetupGraphics(virQEMUDriver * driver,virDomainObj * vm,virQEMUCaps * qemuCaps,unsigned int flags)5083 qemuProcessSetupGraphics(virQEMUDriver *driver,
5084                          virDomainObj *vm,
5085                          virQEMUCaps *qemuCaps,
5086                          unsigned int flags)
5087 {
5088     virDomainGraphicsDef *graphics;
5089     bool allocate = !(flags & VIR_QEMU_PROCESS_START_PRETEND);
5090     size_t i;
5091 
5092     for (i = 0; i < vm->def->ngraphics; i++) {
5093         graphics = vm->def->graphics[i];
5094 
5095         if (qemuProcessGraphicsSetupRenderNode(graphics, qemuCaps) < 0)
5096             return -1;
5097 
5098         if (qemuProcessGraphicsSetupListen(driver, graphics, vm) < 0)
5099             return -1;
5100     }
5101 
5102     if (allocate) {
5103         for (i = 0; i < vm->def->ngraphics; i++) {
5104             graphics = vm->def->graphics[i];
5105 
5106             if (qemuProcessGraphicsReservePorts(graphics, false) < 0)
5107                 return -1;
5108         }
5109     }
5110 
5111     for (i = 0; i < vm->def->ngraphics; ++i) {
5112         graphics = vm->def->graphics[i];
5113 
5114         if (qemuProcessGraphicsAllocatePorts(driver, graphics, allocate) < 0)
5115             return -1;
5116     }
5117 
5118     return 0;
5119 }
5120 
5121 
5122 static int
qemuProcessSetupRawIO(virQEMUDriver * driver,virDomainObj * vm,virCommand * cmd G_GNUC_UNUSED)5123 qemuProcessSetupRawIO(virQEMUDriver *driver,
5124                       virDomainObj *vm,
5125                       virCommand *cmd G_GNUC_UNUSED)
5126 {
5127     bool rawio = false;
5128     size_t i;
5129     int ret = -1;
5130 
5131     /* in case a certain disk is desirous of CAP_SYS_RAWIO, add this */
5132     for (i = 0; i < vm->def->ndisks; i++) {
5133         virDomainDeviceDef dev;
5134         virDomainDiskDef *disk = vm->def->disks[i];
5135 
5136         if (disk->rawio == VIR_TRISTATE_BOOL_YES) {
5137             rawio = true;
5138 #ifndef CAP_SYS_RAWIO
5139             break;
5140 #endif
5141         }
5142 
5143         dev.type = VIR_DOMAIN_DEVICE_DISK;
5144         dev.data.disk = disk;
5145         if (qemuAddSharedDevice(driver, &dev, vm->def->name) < 0)
5146             goto cleanup;
5147 
5148         if (qemuSetUnprivSGIO(&dev) < 0)
5149             goto cleanup;
5150     }
5151 
5152     /* If rawio not already set, check hostdevs as well */
5153     if (!rawio) {
5154         for (i = 0; i < vm->def->nhostdevs; i++) {
5155             virDomainHostdevSubsysSCSI *scsisrc;
5156 
5157             if (!virHostdevIsSCSIDevice(vm->def->hostdevs[i]))
5158                 continue;
5159 
5160             scsisrc = &vm->def->hostdevs[i]->source.subsys.u.scsi;
5161             if (scsisrc->rawio == VIR_TRISTATE_BOOL_YES) {
5162                 rawio = true;
5163                 break;
5164             }
5165         }
5166     }
5167 
5168     ret = 0;
5169 
5170  cleanup:
5171     if (rawio) {
5172 #ifdef CAP_SYS_RAWIO
5173         if (ret == 0)
5174             virCommandAllowCap(cmd, CAP_SYS_RAWIO);
5175 #else
5176         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5177                        _("Raw I/O is not supported on this platform"));
5178         ret = -1;
5179 #endif
5180     }
5181     return ret;
5182 }
5183 
5184 
5185 static int
qemuProcessSetupBalloon(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)5186 qemuProcessSetupBalloon(virQEMUDriver *driver,
5187                         virDomainObj *vm,
5188                         qemuDomainAsyncJob asyncJob)
5189 {
5190     unsigned long long balloon = vm->def->mem.cur_balloon;
5191     qemuDomainObjPrivate *priv = vm->privateData;
5192     int ret = -1;
5193 
5194     if (!virDomainDefHasMemballoon(vm->def))
5195         return 0;
5196 
5197     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
5198         return -1;
5199 
5200     if (vm->def->memballoon->period)
5201         qemuMonitorSetMemoryStatsPeriod(priv->mon, vm->def->memballoon,
5202                                         vm->def->memballoon->period);
5203     if (qemuMonitorSetBalloon(priv->mon, balloon) < 0)
5204         goto cleanup;
5205 
5206     ret = 0;
5207 
5208  cleanup:
5209     if (qemuDomainObjExitMonitor(driver, vm) < 0)
5210         ret = -1;
5211     return ret;
5212 }
5213 
5214 
5215 static int
qemuProcessMakeDir(virQEMUDriver * driver,virDomainObj * vm,const char * path)5216 qemuProcessMakeDir(virQEMUDriver *driver,
5217                    virDomainObj *vm,
5218                    const char *path)
5219 {
5220     if (g_mkdir_with_parents(path, 0750) < 0) {
5221         virReportSystemError(errno, _("Cannot create directory '%s'"), path);
5222         return -1;
5223     }
5224 
5225     if (qemuSecurityDomainSetPathLabel(driver, vm, path, true) < 0)
5226         return -1;
5227 
5228     return 0;
5229 }
5230 
5231 
5232 static void
qemuProcessStartWarnShmem(virDomainObj * vm)5233 qemuProcessStartWarnShmem(virDomainObj *vm)
5234 {
5235     size_t i;
5236     bool check_shmem = false;
5237     bool shmem = vm->def->nshmems;
5238 
5239     /*
5240      * For vhost-user to work, the domain has to have some type of
5241      * shared memory configured.  We're not the proper ones to judge
5242      * whether shared hugepages or shm are enough and will be in the
5243      * future, so we'll just warn in case neither is configured.
5244      * Moreover failing would give the false illusion that libvirt is
5245      * really checking that everything works before running the domain
5246      * and not only we are unable to do that, but it's also not our
5247      * aim to do so.
5248      */
5249     for (i = 0; i < vm->def->nnets; i++) {
5250         if (virDomainNetGetActualType(vm->def->nets[i]) ==
5251                                       VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
5252             check_shmem = true;
5253             break;
5254         }
5255     }
5256 
5257     if (!check_shmem)
5258         return;
5259 
5260     /*
5261      * This check is by no means complete.  We merely check
5262      * whether there are *some* hugepages enabled and *some* NUMA
5263      * nodes with shared memory access.
5264      */
5265     if (!shmem && vm->def->mem.nhugepages) {
5266         for (i = 0; i < virDomainNumaGetNodeCount(vm->def->numa); i++) {
5267             if (virDomainNumaGetNodeMemoryAccessMode(vm->def->numa, i) ==
5268                 VIR_DOMAIN_MEMORY_ACCESS_SHARED) {
5269                 shmem = true;
5270                 break;
5271             }
5272         }
5273     }
5274 
5275     if (!shmem) {
5276         VIR_WARN("Detected vhost-user interface without any shared memory, "
5277                  "the interface might not be operational");
5278     }
5279 }
5280 
5281 
5282 static int
qemuProcessStartValidateGraphics(virDomainObj * vm)5283 qemuProcessStartValidateGraphics(virDomainObj *vm)
5284 {
5285     size_t i;
5286 
5287     for (i = 0; i < vm->def->ngraphics; i++) {
5288         virDomainGraphicsDef *graphics = vm->def->graphics[i];
5289 
5290         switch (graphics->type) {
5291         case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
5292         case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
5293             if (graphics->nListens > 1) {
5294                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5295                                _("QEMU does not support multiple listens for "
5296                                  "one graphics device."));
5297                 return -1;
5298             }
5299             break;
5300 
5301         case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
5302         case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
5303         case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
5304         case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
5305         case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
5306             break;
5307         }
5308     }
5309 
5310     return 0;
5311 }
5312 
5313 
5314 static int
qemuProcessStartValidateIOThreads(virDomainObj * vm,virQEMUCaps * qemuCaps)5315 qemuProcessStartValidateIOThreads(virDomainObj *vm,
5316                                   virQEMUCaps *qemuCaps)
5317 {
5318     size_t i;
5319 
5320     if (vm->def->niothreadids > 0 &&
5321         !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_IOTHREAD)) {
5322         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5323                        _("IOThreads not supported for this QEMU"));
5324         return -1;
5325     }
5326 
5327     for (i = 0; i < vm->def->ncontrollers; i++) {
5328         virDomainControllerDef *cont = vm->def->controllers[i];
5329 
5330         if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_SCSI &&
5331             cont->model == VIR_DOMAIN_CONTROLLER_MODEL_SCSI_VIRTIO_SCSI &&
5332             cont->iothread > 0 &&
5333             !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_SCSI_IOTHREAD)) {
5334             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5335                            _("IOThreads for virtio-scsi not supported for "
5336                              "this QEMU"));
5337             return -1;
5338         }
5339     }
5340 
5341     return 0;
5342 }
5343 
5344 
5345 static int
qemuProcessStartValidateShmem(virDomainObj * vm)5346 qemuProcessStartValidateShmem(virDomainObj *vm)
5347 {
5348     size_t i;
5349 
5350     for (i = 0; i < vm->def->nshmems; i++) {
5351         virDomainShmemDef *shmem = vm->def->shmems[i];
5352 
5353         if (strchr(shmem->name, '/')) {
5354             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5355                            _("shmem name '%s' must not contain '/'"),
5356                            shmem->name);
5357             return -1;
5358         }
5359     }
5360 
5361     return 0;
5362 }
5363 
5364 
5365 static int
qemuProcessStartValidateDisks(virDomainObj * vm,virQEMUCaps * qemuCaps)5366 qemuProcessStartValidateDisks(virDomainObj *vm,
5367                               virQEMUCaps *qemuCaps)
5368 {
5369     size_t i;
5370 
5371     for (i = 0; i < vm->def->ndisks; i++) {
5372         virDomainDiskDef *disk = vm->def->disks[i];
5373         virStorageSource *src = disk->src;
5374 
5375         /* This is a best effort check as we can only check if the command
5376          * option exists, but we cannot determine whether the running QEMU
5377          * was build with '--enable-vxhs'. */
5378         if (src->type == VIR_STORAGE_TYPE_NETWORK &&
5379             src->protocol == VIR_STORAGE_NET_PROTOCOL_VXHS &&
5380             !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VXHS)) {
5381             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5382                            _("VxHS protocol is not supported with this "
5383                              "QEMU binary"));
5384             return -1;
5385         }
5386 
5387         /* PowerPC pseries based VMs do not support floppy device */
5388         if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY &&
5389             qemuDomainIsPSeries(vm->def)) {
5390             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5391                            _("PowerPC pseries machines do not support floppy device"));
5392             return -1;
5393         }
5394 
5395         if (src->type == VIR_STORAGE_TYPE_NVME &&
5396             !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DRIVE_NVME)) {
5397             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5398                            _("NVMe disks are not supported with this QEMU binary"));
5399             return -1;
5400         }
5401     }
5402 
5403     return 0;
5404 }
5405 
5406 
5407 /* 250 parts per million (ppm) is a half of NTP threshold */
5408 #define TSC_TOLERANCE 250
5409 
5410 static int
qemuProcessStartValidateTSC(virQEMUDriver * driver,virDomainObj * vm)5411 qemuProcessStartValidateTSC(virQEMUDriver *driver,
5412                             virDomainObj *vm)
5413 {
5414     size_t i;
5415     unsigned long long freq = 0;
5416     unsigned long long tolerance;
5417     unsigned long long minFreq;
5418     unsigned long long maxFreq;
5419     virHostCPUTscInfo *tsc;
5420     g_autoptr(virCPUDef) cpu = NULL;
5421 
5422     for (i = 0; i < vm->def->clock.ntimers; i++) {
5423         virDomainTimerDef *timer = vm->def->clock.timers[i];
5424 
5425         if (timer->name == VIR_DOMAIN_TIMER_NAME_TSC &&
5426             timer->frequency > 0) {
5427             freq = timer->frequency;
5428             break;
5429         }
5430     }
5431 
5432     if (freq == 0)
5433         return 0;
5434 
5435     VIR_DEBUG("Requested TSC frequency %llu Hz", freq);
5436 
5437     cpu = virQEMUDriverGetHostCPU(driver);
5438     if (!cpu || !cpu->tsc) {
5439         VIR_DEBUG("Host TSC frequency could not be probed");
5440         return 0;
5441     }
5442 
5443     tsc = cpu->tsc;
5444     tolerance = tsc->frequency * TSC_TOLERANCE / 1000000;
5445     minFreq = tsc->frequency - tolerance;
5446     maxFreq = tsc->frequency + tolerance;
5447 
5448     VIR_DEBUG("Host TSC frequency %llu Hz, scaling %s, tolerance +/- %llu Hz",
5449               tsc->frequency, virTristateBoolTypeToString(tsc->scaling),
5450               tolerance);
5451 
5452     if (freq >= minFreq && freq <= maxFreq) {
5453         VIR_DEBUG("Requested TSC frequency is within tolerance interval");
5454         return 0;
5455     }
5456 
5457     if (tsc->scaling == VIR_TRISTATE_BOOL_YES)
5458         return 0;
5459 
5460     if (tsc->scaling == VIR_TRISTATE_BOOL_ABSENT) {
5461         VIR_DEBUG("Requested TSC frequency falls outside tolerance range and "
5462                   "scaling support is unknown, QEMU will try and possibly "
5463                   "fail later");
5464         return 0;
5465     }
5466 
5467     virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5468                    _("Requested TSC frequency %llu Hz is outside tolerance "
5469                      "range ([%llu, %llu] Hz) around host frequency %llu Hz "
5470                      "and TSC scaling is not supported by the host CPU"),
5471                    freq, minFreq, maxFreq, tsc->frequency);
5472     return -1;
5473 }
5474 
5475 
5476 /**
5477  * qemuProcessStartValidate:
5478  * @vm: domain object
5479  * @qemuCaps: emulator capabilities
5480  * @migration: restoration of existing state
5481  *
5482  * This function aggregates checks done prior to start of a VM.
5483  *
5484  * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
5485  * start the domain but create a valid qemu command.  If some code shouldn't be
5486  * executed in this case, make sure to check this flag.
5487  */
5488 static int
qemuProcessStartValidate(virQEMUDriver * driver,virDomainObj * vm,virQEMUCaps * qemuCaps,unsigned int flags)5489 qemuProcessStartValidate(virQEMUDriver *driver,
5490                          virDomainObj *vm,
5491                          virQEMUCaps *qemuCaps,
5492                          unsigned int flags)
5493 {
5494     if (!(flags & VIR_QEMU_PROCESS_START_PRETEND)) {
5495         if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
5496             VIR_DEBUG("Checking for KVM availability");
5497             if (!virFileExists("/dev/kvm")) {
5498                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5499                                _("Domain requires KVM, but it is not available. "
5500                                  "Check that virtualization is enabled in the "
5501                                  "host BIOS, and host configuration is setup to "
5502                                  "load the kvm modules."));
5503                 return -1;
5504             }
5505         }
5506 
5507         VIR_DEBUG("Checking domain and device security labels");
5508         if (qemuSecurityCheckAllLabel(driver->securityManager, vm->def) < 0)
5509             return -1;
5510 
5511     }
5512 
5513     /* Checks below should not be executed when starting a qemu process for a
5514      * VM that was running before (migration, snapshots, save). It's more
5515      * important to start such VM than keep the configuration clean */
5516     if ((flags & VIR_QEMU_PROCESS_START_NEW) &&
5517         virDomainDefValidate(vm->def, 0, driver->xmlopt, qemuCaps) < 0)
5518         return -1;
5519 
5520     if (qemuProcessStartValidateGraphics(vm) < 0)
5521         return -1;
5522 
5523     if (qemuProcessStartValidateIOThreads(vm, qemuCaps) < 0)
5524         return -1;
5525 
5526     if (qemuProcessStartValidateShmem(vm) < 0)
5527         return -1;
5528 
5529     if (vm->def->cpu) {
5530         if (virCPUValidateFeatures(vm->def->os.arch, vm->def->cpu) < 0)
5531             return -1;
5532 
5533         if (ARCH_IS_X86(vm->def->os.arch) &&
5534             !virQEMUCapsGet(qemuCaps, QEMU_CAPS_CPU_UNAVAILABLE_FEATURES)) {
5535             g_auto(GStrv) features = NULL;
5536             int n;
5537 
5538             if ((n = virCPUDefCheckFeatures(vm->def->cpu,
5539                                             virCPUx86FeatureFilterSelectMSR,
5540                                             NULL,
5541                                             &features)) < 0)
5542                 return -1;
5543 
5544             if (n > 0) {
5545                 g_autofree char *str = NULL;
5546 
5547                 str = g_strjoinv(", ", features);
5548                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5549                                _("Some features cannot be reliably used "
5550                                  "with this QEMU: %s"), str);
5551                 return -1;
5552             }
5553         }
5554     }
5555 
5556     if (qemuProcessStartValidateDisks(vm, qemuCaps) < 0)
5557         return -1;
5558 
5559     if (qemuProcessStartValidateTSC(driver, vm) < 0)
5560         return -1;
5561 
5562     VIR_DEBUG("Checking for any possible (non-fatal) issues");
5563 
5564     qemuProcessStartWarnShmem(vm);
5565 
5566     return 0;
5567 }
5568 
5569 
5570 static int
qemuProcessStartUpdateCustomCaps(virDomainObj * vm)5571 qemuProcessStartUpdateCustomCaps(virDomainObj *vm)
5572 {
5573     qemuDomainObjPrivate *priv = vm->privateData;
5574     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
5575     qemuDomainXmlNsDef *nsdef = vm->def->namespaceData;
5576     char **next;
5577     int tmp;
5578 
5579     if (cfg->capabilityfilters) {
5580         for (next = cfg->capabilityfilters; *next; next++) {
5581             if ((tmp = virQEMUCapsTypeFromString(*next)) < 0) {
5582                 virReportError(VIR_ERR_INTERNAL_ERROR,
5583                                _("invalid capability_filters capability '%s'"),
5584                                *next);
5585                 return -1;
5586             }
5587 
5588             virQEMUCapsClear(priv->qemuCaps, tmp);
5589         }
5590     }
5591 
5592     if (nsdef) {
5593         for (next = nsdef->capsadd; next && *next; next++) {
5594             if ((tmp = virQEMUCapsTypeFromString(*next)) < 0) {
5595                 virReportError(VIR_ERR_INTERNAL_ERROR,
5596                                _("invalid qemu namespace capability '%s'"),
5597                                *next);
5598                 return -1;
5599             }
5600 
5601             virQEMUCapsSet(priv->qemuCaps, tmp);
5602         }
5603 
5604         for (next = nsdef->capsdel; next && *next; next++) {
5605             if ((tmp = virQEMUCapsTypeFromString(*next)) < 0) {
5606                 virReportError(VIR_ERR_INTERNAL_ERROR,
5607                                _("invalid qemu namespace capability '%s'"),
5608                                *next);
5609                 return -1;
5610             }
5611 
5612             virQEMUCapsClear(priv->qemuCaps, tmp);
5613         }
5614     }
5615 
5616     return 0;
5617 }
5618 
5619 
5620 /**
5621  * qemuProcessPrepareQEMUCaps:
5622  * @vm: domain object
5623  * @qemuCapsCache: cache of QEMU capabilities
5624  * @processStartFlags: flags based on the VIR_QEMU_PROCESS_START_* enum
5625  *
5626  * Prepare the capabilities of a QEMU process for startup. This includes
5627  * copying the caps to a static cache and potential post-processing depending
5628  * on the configuration of the VM and startup process.
5629  *
5630  * Returns 0 on success, -1 on error.
5631  */
5632 static int
qemuProcessPrepareQEMUCaps(virDomainObj * vm,virFileCache * qemuCapsCache,unsigned int processStartFlags)5633 qemuProcessPrepareQEMUCaps(virDomainObj *vm,
5634                            virFileCache *qemuCapsCache,
5635                            unsigned int processStartFlags)
5636 {
5637     qemuDomainObjPrivate *priv = vm->privateData;
5638 
5639     virObjectUnref(priv->qemuCaps);
5640     if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(qemuCapsCache,
5641                                                       vm->def->virtType,
5642                                                       vm->def->emulator,
5643                                                       vm->def->os.machine)))
5644         return -1;
5645 
5646     if (processStartFlags & VIR_QEMU_PROCESS_START_STANDALONE)
5647         virQEMUCapsClear(priv->qemuCaps, QEMU_CAPS_CHARDEV_FD_PASS_COMMANDLINE);
5648 
5649     /* Update qemu capabilities according to lists passed in via namespace */
5650     if (qemuProcessStartUpdateCustomCaps(vm) < 0)
5651         return -1;
5652 
5653     /* re-process capability lockouts since we might have removed capabilities */
5654     virQEMUCapsInitProcessCapsInterlock(priv->qemuCaps);
5655 
5656     return 0;
5657 }
5658 
5659 
5660 /**
5661  * qemuProcessInit:
5662  *
5663  * Prepares the domain up to the point when priv->qemuCaps is initialized. The
5664  * function calls qemuProcessStop when needed.
5665  *
5666  * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
5667  * start the domain but create a valid qemu command.  If some code shouldn't be
5668  * executed in this case, make sure to check this flag.
5669  *
5670  * Returns 0 on success, -1 on error.
5671  */
5672 int
qemuProcessInit(virQEMUDriver * driver,virDomainObj * vm,virCPUDef * updatedCPU,qemuDomainAsyncJob asyncJob,bool migration,unsigned int flags)5673 qemuProcessInit(virQEMUDriver *driver,
5674                 virDomainObj *vm,
5675                 virCPUDef *updatedCPU,
5676                 qemuDomainAsyncJob asyncJob,
5677                 bool migration,
5678                 unsigned int flags)
5679 {
5680     qemuDomainObjPrivate *priv = vm->privateData;
5681     int stopFlags;
5682     virCPUDef *origCPU = NULL;
5683     int ret = -1;
5684 
5685     VIR_DEBUG("vm=%p name=%s id=%d migration=%d",
5686               vm, vm->def->name, vm->def->id, migration);
5687 
5688     VIR_DEBUG("Beginning VM startup process");
5689 
5690     if (virDomainObjIsActive(vm)) {
5691         virReportError(VIR_ERR_OPERATION_INVALID, "%s",
5692                        _("VM is already active"));
5693         goto cleanup;
5694     }
5695 
5696     /* in case when the post parse callback failed we need to re-run it on the
5697      * old config prior we start the VM */
5698     if (vm->def->postParseFailed) {
5699         VIR_DEBUG("re-running the post parse callback");
5700 
5701         /* we don't have the private copy of qemuCaps at this point */
5702         if (virDomainDefPostParse(vm->def, 0, driver->xmlopt, NULL) < 0)
5703             goto cleanup;
5704     }
5705 
5706     VIR_DEBUG("Determining emulator version");
5707     if (qemuProcessPrepareQEMUCaps(vm, driver->qemuCapsCache, flags) < 0)
5708         goto cleanup;
5709 
5710     if (qemuDomainUpdateCPU(vm, updatedCPU, &origCPU) < 0)
5711         goto cleanup;
5712 
5713     if (qemuProcessStartValidate(driver, vm, priv->qemuCaps, flags) < 0)
5714         goto cleanup;
5715 
5716     /* Do this upfront, so any part of the startup process can add
5717      * runtime state to vm->def that won't be persisted. This let's us
5718      * report implicit runtime defaults in the XML, like vnc listen/socket
5719      */
5720     VIR_DEBUG("Setting current domain def as transient");
5721     if (virDomainObjSetDefTransient(driver->xmlopt, vm, priv->qemuCaps) < 0)
5722         goto cleanup;
5723 
5724     if (flags & VIR_QEMU_PROCESS_START_PRETEND) {
5725         if (qemuDomainSetPrivatePaths(driver, vm) < 0) {
5726             virDomainObjRemoveTransientDef(vm);
5727             goto cleanup;
5728         }
5729     } else {
5730         vm->def->id = qemuDriverAllocateID(driver);
5731         qemuDomainSetFakeReboot(driver, vm, false);
5732         virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_STARTING_UP);
5733 
5734         if (g_atomic_int_add(&driver->nactive, 1) == 0 && driver->inhibitCallback)
5735             driver->inhibitCallback(true, driver->inhibitOpaque);
5736 
5737         /* Run an early hook to set-up missing devices */
5738         if (qemuProcessStartHook(driver, vm,
5739                                  VIR_HOOK_QEMU_OP_PREPARE,
5740                                  VIR_HOOK_SUBOP_BEGIN) < 0)
5741             goto stop;
5742 
5743         if (qemuDomainSetPrivatePaths(driver, vm) < 0)
5744             goto stop;
5745 
5746         priv->origCPU = g_steal_pointer(&origCPU);
5747     }
5748 
5749     ret = 0;
5750 
5751  cleanup:
5752     virCPUDefFree(origCPU);
5753     return ret;
5754 
5755  stop:
5756     stopFlags = VIR_QEMU_PROCESS_STOP_NO_RELABEL;
5757     if (migration)
5758         stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
5759     qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
5760     goto cleanup;
5761 }
5762 
5763 
5764 /**
5765  * qemuProcessNetworkPrepareDevices
5766  */
5767 static int
qemuProcessNetworkPrepareDevices(virQEMUDriver * driver,virDomainObj * vm)5768 qemuProcessNetworkPrepareDevices(virQEMUDriver *driver,
5769                                  virDomainObj *vm)
5770 {
5771     virDomainDef *def = vm->def;
5772     qemuDomainObjPrivate *priv = vm->privateData;
5773     size_t i;
5774     g_autoptr(virConnect) conn = NULL;
5775 
5776     for (i = 0; i < def->nnets; i++) {
5777         virDomainNetDef *net = def->nets[i];
5778         virDomainNetType actualType;
5779 
5780         /* If appropriate, grab a physical device from the configured
5781          * network's pool of devices, or resolve bridge device name
5782          * to the one defined in the network definition.
5783          */
5784         if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
5785             if (!conn && !(conn = virGetConnectNetwork()))
5786                 return -1;
5787             if (virDomainNetAllocateActualDevice(conn, def, net) < 0)
5788                 return -1;
5789         }
5790 
5791         actualType = virDomainNetGetActualType(net);
5792         if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV &&
5793             net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
5794             /* Each type='hostdev' network device must also have a
5795              * corresponding entry in the hostdevs array. For netdevs
5796              * that are hardcoded as type='hostdev', this is already
5797              * done by the parser, but for those allocated from a
5798              * network / determined at runtime, we need to do it
5799              * separately.
5800              */
5801             virDomainHostdevDef *hostdev = virDomainNetGetActualHostdev(net);
5802             virDomainHostdevSubsysPCI *pcisrc = &hostdev->source.subsys.u.pci;
5803 
5804             if (virDomainHostdevFind(def, hostdev, NULL) >= 0) {
5805                 virReportError(VIR_ERR_INTERNAL_ERROR,
5806                                _("PCI device %04x:%02x:%02x.%x "
5807                                  "allocated from network %s is already "
5808                                  "in use by domain %s"),
5809                                pcisrc->addr.domain, pcisrc->addr.bus,
5810                                pcisrc->addr.slot, pcisrc->addr.function,
5811                                net->data.network.name, def->name);
5812                 return -1;
5813             }
5814             if (virDomainHostdevInsert(def, hostdev) < 0)
5815                 return -1;
5816         } else if (actualType == VIR_DOMAIN_NET_TYPE_USER &&
5817                    !priv->disableSlirp &&
5818                    virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DBUS_VMSTATE)) {
5819             qemuSlirp *slirp = NULL;
5820             int rv = qemuInterfacePrepareSlirp(driver, net, &slirp);
5821 
5822             if (rv == -1)
5823                 return -1;
5824             if (rv == 1)
5825                 QEMU_DOMAIN_NETWORK_PRIVATE(net)->slirp = slirp;
5826          }
5827 
5828     }
5829     return 0;
5830 }
5831 
5832 
5833 /**
5834  * qemuProcessSetupVcpu:
5835  * @vm: domain object
5836  * @vcpuid: id of VCPU to set defaults
5837  *
5838  * This function sets resource properties (cgroups, affinity, scheduler) for a
5839  * vCPU. This function expects that the vCPU is online and the vCPU pids were
5840  * correctly detected at the point when it's called.
5841  *
5842  * Returns 0 on success, -1 on error.
5843  */
5844 int
qemuProcessSetupVcpu(virDomainObj * vm,unsigned int vcpuid)5845 qemuProcessSetupVcpu(virDomainObj *vm,
5846                      unsigned int vcpuid)
5847 {
5848     pid_t vcpupid = qemuDomainGetVcpuPid(vm, vcpuid);
5849     virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid);
5850     virDomainResctrlMonDef *mon = NULL;
5851     size_t i = 0;
5852 
5853     if (qemuProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
5854                             vcpuid, vcpu->cpumask,
5855                             vm->def->cputune.period,
5856                             vm->def->cputune.quota,
5857                             &vcpu->sched) < 0)
5858         return -1;
5859 
5860     for (i = 0; i < vm->def->nresctrls; i++) {
5861         size_t j = 0;
5862         virDomainResctrlDef *ct = vm->def->resctrls[i];
5863 
5864         if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
5865             if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
5866                 return -1;
5867 
5868             for (j = 0; j < ct->nmonitors; j++) {
5869                 mon = ct->monitors[j];
5870 
5871                 if (virBitmapEqual(ct->vcpus, mon->vcpus) &&
5872                     !virResctrlAllocIsEmpty(ct->alloc))
5873                     continue;
5874 
5875                 if (virBitmapIsBitSet(mon->vcpus, vcpuid)) {
5876                     if (virResctrlMonitorAddPID(mon->instance, vcpupid) < 0)
5877                         return -1;
5878                     break;
5879                 }
5880             }
5881 
5882             break;
5883         }
5884     }
5885 
5886     return 0;
5887 }
5888 
5889 
5890 static int
qemuProcessSetupVcpus(virDomainObj * vm)5891 qemuProcessSetupVcpus(virDomainObj *vm)
5892 {
5893     virDomainVcpuDef *vcpu;
5894     unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
5895     size_t i;
5896 
5897     if ((vm->def->cputune.period || vm->def->cputune.quota) &&
5898         !virCgroupHasController(((qemuDomainObjPrivate *) vm->privateData)->cgroup,
5899                                 VIR_CGROUP_CONTROLLER_CPU)) {
5900         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5901                        _("cgroup cpu is required for scheduler tuning"));
5902         return -1;
5903     }
5904 
5905     if (!qemuDomainHasVcpuPids(vm)) {
5906         /* If any CPU has custom affinity that differs from the
5907          * VM default affinity, we must reject it */
5908         for (i = 0; i < maxvcpus; i++) {
5909             vcpu = virDomainDefGetVcpu(vm->def, i);
5910 
5911             if (!vcpu->online)
5912                 continue;
5913 
5914             if (vcpu->cpumask &&
5915                 !virBitmapEqual(vm->def->cpumask, vcpu->cpumask)) {
5916                 virReportError(VIR_ERR_OPERATION_INVALID, "%s",
5917                                 _("cpu affinity is not supported"));
5918                 return -1;
5919             }
5920         }
5921 
5922         return 0;
5923     }
5924 
5925     for (i = 0; i < maxvcpus; i++) {
5926         vcpu = virDomainDefGetVcpu(vm->def, i);
5927 
5928         if (!vcpu->online)
5929             continue;
5930 
5931         if (qemuProcessSetupVcpu(vm, i) < 0)
5932             return -1;
5933     }
5934 
5935     return 0;
5936 }
5937 
5938 
5939 int
qemuProcessSetupIOThread(virDomainObj * vm,virDomainIOThreadIDDef * iothread)5940 qemuProcessSetupIOThread(virDomainObj *vm,
5941                          virDomainIOThreadIDDef *iothread)
5942 {
5943     return qemuProcessSetupPid(vm, iothread->thread_id,
5944                                VIR_CGROUP_THREAD_IOTHREAD,
5945                                iothread->iothread_id,
5946                                iothread->cpumask,
5947                                vm->def->cputune.iothread_period,
5948                                vm->def->cputune.iothread_quota,
5949                                &iothread->sched);
5950 }
5951 
5952 
5953 static int
qemuProcessSetupIOThreads(virDomainObj * vm)5954 qemuProcessSetupIOThreads(virDomainObj *vm)
5955 {
5956     size_t i;
5957 
5958     for (i = 0; i < vm->def->niothreadids; i++) {
5959         virDomainIOThreadIDDef *info = vm->def->iothreadids[i];
5960 
5961         if (qemuProcessSetupIOThread(vm, info) < 0)
5962             return -1;
5963     }
5964 
5965     return 0;
5966 }
5967 
5968 
5969 static int
qemuProcessValidateHotpluggableVcpus(virDomainDef * def)5970 qemuProcessValidateHotpluggableVcpus(virDomainDef *def)
5971 {
5972     virDomainVcpuDef *vcpu;
5973     virDomainVcpuDef *subvcpu;
5974     qemuDomainVcpuPrivate *vcpupriv;
5975     unsigned int maxvcpus = virDomainDefGetVcpusMax(def);
5976     size_t i = 0;
5977     size_t j;
5978     virBitmap *ordermap = virBitmapNew(maxvcpus + 1);
5979     int ret = -1;
5980 
5981     /* validate:
5982      * - all hotpluggable entities to be hotplugged have the correct data
5983      * - vcpus belonging to a hotpluggable entity share configuration
5984      * - order of the hotpluggable entities is unique
5985      */
5986     for (i = 0; i < maxvcpus; i++) {
5987         vcpu = virDomainDefGetVcpu(def, i);
5988         vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu);
5989 
5990         /* skip over hotpluggable entities  */
5991         if (vcpupriv->vcpus == 0)
5992             continue;
5993 
5994         if (vcpu->order != 0) {
5995             if (virBitmapIsBitSet(ordermap, vcpu->order)) {
5996                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5997                                _("duplicate vcpu order '%u'"), vcpu->order);
5998                 goto cleanup;
5999             }
6000 
6001             if (virBitmapSetBit(ordermap, vcpu->order)) {
6002                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6003                                _("vcpu order '%u' exceeds vcpu count"),
6004                                vcpu->order);
6005                 goto cleanup;
6006             }
6007         }
6008 
6009         for (j = i + 1; j < (i + vcpupriv->vcpus); j++) {
6010             subvcpu = virDomainDefGetVcpu(def, j);
6011             if (subvcpu->hotpluggable != vcpu->hotpluggable ||
6012                 subvcpu->online != vcpu->online ||
6013                 subvcpu->order != vcpu->order) {
6014                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6015                                _("vcpus '%zu' and '%zu' are in the same hotplug "
6016                                  "group but differ in configuration"), i, j);
6017                 goto cleanup;
6018             }
6019         }
6020 
6021         if (vcpu->online && vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES) {
6022             if ((vcpupriv->socket_id == -1 && vcpupriv->core_id == -1 &&
6023                  vcpupriv->thread_id == -1 && vcpupriv->node_id == -1) ||
6024                 !vcpupriv->type) {
6025                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6026                                _("vcpu '%zu' is missing hotplug data"), i);
6027                 goto cleanup;
6028             }
6029         }
6030     }
6031 
6032     ret = 0;
6033  cleanup:
6034     virBitmapFree(ordermap);
6035     return ret;
6036 }
6037 
6038 
6039 static int
qemuDomainHasHotpluggableStartupVcpus(virDomainDef * def)6040 qemuDomainHasHotpluggableStartupVcpus(virDomainDef *def)
6041 {
6042     size_t maxvcpus = virDomainDefGetVcpusMax(def);
6043     virDomainVcpuDef *vcpu;
6044     size_t i;
6045 
6046     for (i = 0; i < maxvcpus; i++) {
6047         vcpu = virDomainDefGetVcpu(def, i);
6048 
6049         if (vcpu->online && vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES)
6050             return true;
6051     }
6052 
6053     return false;
6054 }
6055 
6056 
6057 static int
qemuProcessVcpusSortOrder(const void * a,const void * b)6058 qemuProcessVcpusSortOrder(const void *a,
6059                           const void *b)
6060 {
6061     virDomainVcpuDef *vcpua = *((virDomainVcpuDef **)a);
6062     virDomainVcpuDef *vcpub = *((virDomainVcpuDef **)b);
6063 
6064     return vcpua->order - vcpub->order;
6065 }
6066 
6067 
6068 static int
qemuProcessSetupHotpluggableVcpus(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)6069 qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
6070                                   virDomainObj *vm,
6071                                   qemuDomainAsyncJob asyncJob)
6072 {
6073     unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
6074     qemuDomainObjPrivate *priv = vm->privateData;
6075     qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
6076     virDomainVcpuDef *vcpu;
6077     qemuDomainVcpuPrivate *vcpupriv;
6078     size_t i;
6079     int ret = -1;
6080     int rc;
6081 
6082     g_autofree virDomainVcpuDef **bootHotplug = NULL;
6083     size_t nbootHotplug = 0;
6084 
6085     for (i = 0; i < maxvcpus; i++) {
6086         vcpu = virDomainDefGetVcpu(vm->def, i);
6087         vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu);
6088 
6089         if (vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES && vcpu->online &&
6090             vcpupriv->vcpus != 0) {
6091             vcpupriv->alias = g_strdup_printf("vcpu%zu", i);
6092 
6093             VIR_APPEND_ELEMENT(bootHotplug, nbootHotplug, vcpu);
6094         }
6095     }
6096 
6097     if (nbootHotplug == 0)
6098         return 0;
6099 
6100     qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug),
6101           qemuProcessVcpusSortOrder);
6102 
6103     if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
6104         goto cleanup;
6105 
6106     for (i = 0; i < nbootHotplug; i++) {
6107         g_autoptr(virJSONValue) vcpuprops = NULL;
6108         vcpu = bootHotplug[i];
6109 
6110         if (!(vcpuprops = qemuBuildHotpluggableCPUProps(vcpu)))
6111             goto cleanup;
6112 
6113         if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
6114             goto cleanup;
6115 
6116         rc = qemuMonitorAddDeviceProps(qemuDomainGetMonitor(vm), &vcpuprops);
6117 
6118         if (qemuDomainObjExitMonitor(driver, vm) < 0)
6119             goto cleanup;
6120 
6121         if (rc < 0)
6122             goto cleanup;
6123     }
6124 
6125     ret = 0;
6126 
6127  cleanup:
6128     qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
6129     return ret;
6130 }
6131 
6132 
6133 static bool
qemuProcessDropUnknownCPUFeatures(const char * name,virCPUFeaturePolicy policy,void * opaque)6134 qemuProcessDropUnknownCPUFeatures(const char *name,
6135                                   virCPUFeaturePolicy policy,
6136                                   void *opaque)
6137 {
6138     const char **features = opaque;
6139 
6140     if (policy != VIR_CPU_FEATURE_DISABLE &&
6141         policy != VIR_CPU_FEATURE_FORBID)
6142         return true;
6143 
6144     if (g_strv_contains(features, name))
6145         return true;
6146 
6147     /* Features unknown to QEMU are implicitly disabled, we can just drop them
6148      * from the definition. */
6149     return false;
6150 }
6151 
6152 
6153 static int
qemuProcessUpdateGuestCPU(virDomainDef * def,virQEMUCaps * qemuCaps,virArch hostarch,unsigned int flags)6154 qemuProcessUpdateGuestCPU(virDomainDef *def,
6155                           virQEMUCaps *qemuCaps,
6156                           virArch hostarch,
6157                           unsigned int flags)
6158 {
6159     if (!def->cpu)
6160         return 0;
6161 
6162     /* nothing to do if only topology part of CPU def is used */
6163     if (def->cpu->mode == VIR_CPU_MODE_CUSTOM && !def->cpu->model)
6164         return 0;
6165 
6166     /* Old libvirt added host CPU model to host-model CPUs for migrations,
6167      * while new libvirt just turns host-model into custom mode. We need
6168      * to fix the mode to maintain backward compatibility and to avoid
6169      * the CPU model to be replaced in virCPUUpdate.
6170      */
6171     if (!(flags & VIR_QEMU_PROCESS_START_NEW) &&
6172         ARCH_IS_X86(def->os.arch) &&
6173         def->cpu->mode == VIR_CPU_MODE_HOST_MODEL &&
6174         def->cpu->model) {
6175         def->cpu->mode = VIR_CPU_MODE_CUSTOM;
6176     }
6177 
6178     if (!virQEMUCapsIsCPUModeSupported(qemuCaps, hostarch, def->virtType,
6179                                        def->cpu->mode, def->os.machine)) {
6180         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6181                        _("CPU mode '%s' for %s %s domain on %s host is not "
6182                          "supported by hypervisor"),
6183                        virCPUModeTypeToString(def->cpu->mode),
6184                        virArchToString(def->os.arch),
6185                        virDomainVirtTypeToString(def->virtType),
6186                        virArchToString(hostarch));
6187         return -1;
6188     }
6189 
6190     if (virCPUConvertLegacy(hostarch, def->cpu) < 0)
6191         return -1;
6192 
6193     if (def->cpu->check != VIR_CPU_CHECK_NONE) {
6194         virCPUDef *host;
6195 
6196         host = virQEMUCapsGetHostModel(qemuCaps, def->virtType,
6197                                        VIR_QEMU_CAPS_HOST_CPU_FULL);
6198 
6199         if (host && virCPUCheckForbiddenFeatures(def->cpu, host) < 0)
6200             return -1;
6201     }
6202 
6203     /* nothing to update for host-passthrough / maximum */
6204     if (def->cpu->mode != VIR_CPU_MODE_HOST_PASSTHROUGH &&
6205         def->cpu->mode != VIR_CPU_MODE_MAXIMUM) {
6206         g_autoptr(virDomainCapsCPUModels) cpuModels = NULL;
6207 
6208         if (def->cpu->check == VIR_CPU_CHECK_PARTIAL &&
6209             virCPUCompare(hostarch,
6210                           virQEMUCapsGetHostModel(qemuCaps, def->virtType,
6211                                                   VIR_QEMU_CAPS_HOST_CPU_FULL),
6212                           def->cpu, true) < 0)
6213             return -1;
6214 
6215         if (virCPUUpdate(def->os.arch, def->cpu,
6216                          virQEMUCapsGetHostModel(qemuCaps, def->virtType,
6217                                                  VIR_QEMU_CAPS_HOST_CPU_MIGRATABLE)) < 0)
6218             return -1;
6219 
6220         cpuModels = virQEMUCapsGetCPUModels(qemuCaps, def->virtType, NULL, NULL);
6221 
6222         if (virCPUTranslate(def->os.arch, def->cpu, cpuModels) < 0)
6223             return -1;
6224 
6225         def->cpu->fallback = VIR_CPU_FALLBACK_FORBID;
6226     }
6227 
6228     if (virCPUDefFilterFeatures(def->cpu, virQEMUCapsCPUFilterFeatures,
6229                                 &def->os.arch) < 0)
6230         return -1;
6231 
6232     if (ARCH_IS_X86(def->os.arch)) {
6233         g_auto(GStrv) features = NULL;
6234 
6235         if (virQEMUCapsGetCPUFeatures(qemuCaps, def->virtType, false, &features) < 0)
6236             return -1;
6237 
6238         if (features &&
6239             virCPUDefFilterFeatures(def->cpu, qemuProcessDropUnknownCPUFeatures,
6240                                     features) < 0)
6241             return -1;
6242     }
6243 
6244     return 0;
6245 }
6246 
6247 
6248 static int
qemuProcessPrepareDomainNUMAPlacement(virDomainObj * vm)6249 qemuProcessPrepareDomainNUMAPlacement(virDomainObj *vm)
6250 {
6251     qemuDomainObjPrivate *priv = vm->privateData;
6252     g_autofree char *nodeset = NULL;
6253     g_autoptr(virBitmap) numadNodeset = NULL;
6254     g_autoptr(virBitmap) hostMemoryNodeset = NULL;
6255     g_autoptr(virCapsHostNUMA) caps = NULL;
6256 
6257     /* Get the advisory nodeset from numad if 'placement' of
6258      * either <vcpu> or <numatune> is 'auto'.
6259      */
6260     if (!virDomainDefNeedsPlacementAdvice(vm->def))
6261         return 0;
6262 
6263     nodeset = virNumaGetAutoPlacementAdvice(virDomainDefGetVcpus(vm->def),
6264                                             virDomainDefGetMemoryTotal(vm->def));
6265 
6266     if (!nodeset)
6267         return -1;
6268 
6269     if (!(hostMemoryNodeset = virNumaGetHostMemoryNodeset()))
6270         return -1;
6271 
6272     VIR_DEBUG("Nodeset returned from numad: %s", nodeset);
6273 
6274     if (virBitmapParse(nodeset, &numadNodeset, VIR_DOMAIN_CPUMASK_LEN) < 0)
6275         return -1;
6276 
6277     if (!(caps = virCapabilitiesHostNUMANewHost()))
6278         return -1;
6279 
6280     /* numad may return a nodeset that only contains cpus but cgroups don't play
6281      * well with that. Set the autoCpuset from all cpus from that nodeset, but
6282      * assign autoNodeset only with nodes containing memory. */
6283     if (!(priv->autoCpuset = virCapabilitiesHostNUMAGetCpus(caps, numadNodeset)))
6284         return -1;
6285 
6286     virBitmapIntersect(numadNodeset, hostMemoryNodeset);
6287 
6288     priv->autoNodeset = g_steal_pointer(&numadNodeset);
6289 
6290     return 0;
6291 }
6292 
6293 
6294 static void
qemuProcessPrepareDeviceBootorder(virDomainDef * def)6295 qemuProcessPrepareDeviceBootorder(virDomainDef *def)
6296 {
6297     size_t i;
6298     unsigned int bootCD = 0;
6299     unsigned int bootFloppy = 0;
6300     unsigned int bootDisk = 0;
6301     unsigned int bootNetwork = 0;
6302 
6303     if (def->os.nBootDevs == 0)
6304         return;
6305 
6306     for (i = 0; i < def->os.nBootDevs; i++) {
6307         switch ((virDomainBootOrder) def->os.bootDevs[i]) {
6308         case VIR_DOMAIN_BOOT_CDROM:
6309             bootCD = i + 1;
6310             break;
6311 
6312         case VIR_DOMAIN_BOOT_FLOPPY:
6313             bootFloppy = i + 1;
6314             break;
6315 
6316         case VIR_DOMAIN_BOOT_DISK:
6317             bootDisk = i + 1;
6318             break;
6319 
6320         case VIR_DOMAIN_BOOT_NET:
6321             bootNetwork = i + 1;
6322             break;
6323 
6324         case VIR_DOMAIN_BOOT_LAST:
6325         default:
6326             break;
6327         }
6328     }
6329 
6330     for (i = 0; i < def->ndisks; i++) {
6331         virDomainDiskDef *disk = def->disks[i];
6332 
6333         switch (disk->device) {
6334         case VIR_DOMAIN_DISK_DEVICE_CDROM:
6335             disk->info.effectiveBootIndex = bootCD;
6336             bootCD = 0;
6337             break;
6338 
6339         case VIR_DOMAIN_DISK_DEVICE_DISK:
6340         case VIR_DOMAIN_DISK_DEVICE_LUN:
6341             disk->info.effectiveBootIndex = bootDisk;
6342             bootDisk = 0;
6343             break;
6344 
6345         case VIR_DOMAIN_DISK_DEVICE_FLOPPY:
6346             disk->info.effectiveBootIndex = bootFloppy;
6347             bootFloppy = 0;
6348             break;
6349 
6350         case VIR_DOMAIN_DISK_DEVICE_LAST:
6351         default:
6352             break;
6353         }
6354     }
6355 
6356     if (def->nnets > 0 && bootNetwork > 0) {
6357         /* If network boot is enabled, the first network device gets enabled. If
6358          * that one is backed by a host device, then we need to find the first
6359          * corresponding host device */
6360         if (virDomainNetGetActualType(def->nets[0]) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
6361             for (i = 0; i < def->nhostdevs; i++) {
6362                 virDomainHostdevDef *hostdev = def->hostdevs[i];
6363                 virDomainHostdevSubsys *subsys = &hostdev->source.subsys;
6364 
6365                 if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
6366                     subsys->type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
6367                     hostdev->info->type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED &&
6368                     hostdev->parentnet) {
6369                     hostdev->info->effectiveBootIndex = bootNetwork;
6370                     break;
6371                 }
6372             }
6373         } else {
6374             def->nets[0]->info.effectiveBootIndex = bootNetwork;
6375         }
6376     }
6377 }
6378 
6379 
6380 static int
qemuProcessPrepareDomainStorage(virQEMUDriver * driver,virDomainObj * vm,qemuDomainObjPrivate * priv,virQEMUDriverConfig * cfg,unsigned int flags)6381 qemuProcessPrepareDomainStorage(virQEMUDriver *driver,
6382                                 virDomainObj *vm,
6383                                 qemuDomainObjPrivate *priv,
6384                                 virQEMUDriverConfig *cfg,
6385                                 unsigned int flags)
6386 {
6387     size_t i;
6388     bool cold_boot = flags & VIR_QEMU_PROCESS_START_COLD;
6389 
6390     for (i = vm->def->ndisks; i > 0; i--) {
6391         size_t idx = i - 1;
6392         virDomainDiskDef *disk = vm->def->disks[idx];
6393 
6394         if (virDomainDiskTranslateSourcePool(disk) < 0) {
6395             if (qemuDomainCheckDiskStartupPolicy(driver, vm, idx, cold_boot) < 0)
6396                 return -1;
6397 
6398             /* disk source was dropped */
6399             continue;
6400         }
6401 
6402         if (qemuDomainPrepareDiskSource(disk, priv, cfg) < 0)
6403             return -1;
6404     }
6405 
6406     return 0;
6407 }
6408 
6409 
6410 static int
qemuProcessPrepareDomainHostdevs(virDomainObj * vm,qemuDomainObjPrivate * priv)6411 qemuProcessPrepareDomainHostdevs(virDomainObj *vm,
6412                                  qemuDomainObjPrivate *priv)
6413 {
6414     size_t i;
6415 
6416     for (i = 0; i < vm->def->nhostdevs; i++) {
6417         virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
6418 
6419         if (qemuDomainPrepareHostdev(hostdev, priv) < 0)
6420             return -1;
6421     }
6422 
6423     return 0;
6424 }
6425 
6426 
6427 int
qemuProcessPrepareHostHostdev(virDomainHostdevDef * hostdev)6428 qemuProcessPrepareHostHostdev(virDomainHostdevDef *hostdev)
6429 {
6430     if (virHostdevIsSCSIDevice(hostdev)) {
6431         virDomainHostdevSubsysSCSI *scsisrc = &hostdev->source.subsys.u.scsi;
6432 
6433         switch ((virDomainHostdevSCSIProtocolType) scsisrc->protocol) {
6434         case VIR_DOMAIN_HOSTDEV_SCSI_PROTOCOL_TYPE_NONE: {
6435             virDomainHostdevSubsysSCSIHost *scsihostsrc = &scsisrc->u.host;
6436             virStorageSource *src = scsisrc->u.host.src;
6437             g_autofree char *devstr = NULL;
6438 
6439             if (!(devstr = virSCSIDeviceGetSgName(NULL,
6440                                                   scsihostsrc->adapter,
6441                                                   scsihostsrc->bus,
6442                                                   scsihostsrc->target,
6443                                                   scsihostsrc->unit)))
6444                 return -1;
6445 
6446             src->path = g_strdup_printf("/dev/%s", devstr);
6447             break;
6448         }
6449 
6450         case VIR_DOMAIN_HOSTDEV_SCSI_PROTOCOL_TYPE_ISCSI:
6451             break;
6452 
6453         case VIR_DOMAIN_HOSTDEV_SCSI_PROTOCOL_TYPE_LAST:
6454         default:
6455             virReportEnumRangeError(virDomainHostdevSCSIProtocolType, scsisrc->protocol);
6456             return -1;
6457         }
6458     }
6459 
6460     return 0;
6461 }
6462 
6463 
6464 static int
qemuProcessPrepareHostHostdevs(virDomainObj * vm)6465 qemuProcessPrepareHostHostdevs(virDomainObj *vm)
6466 {
6467     size_t i;
6468 
6469     for (i = 0; i < vm->def->nhostdevs; i++) {
6470         virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
6471 
6472         if (qemuProcessPrepareHostHostdev(hostdev) < 0)
6473             return -1;
6474     }
6475 
6476     return 0;
6477 }
6478 
6479 
6480 /**
6481  * qemuProcessRebootAllowed:
6482  * @def: domain definition
6483  *
6484  * This function encapsulates the logic which dictated whether '-no-reboot' was
6485  * used instead of '-no-shutdown' which is used  QEMU versions which don't
6486  * support the 'set-action' QMP command.
6487  */
6488 bool
qemuProcessRebootAllowed(const virDomainDef * def)6489 qemuProcessRebootAllowed(const virDomainDef *def)
6490 {
6491     return def->onReboot != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY ||
6492            def->onPoweroff != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY ||
6493            (def->onCrash != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY &&
6494             def->onCrash != VIR_DOMAIN_LIFECYCLE_ACTION_COREDUMP_DESTROY);
6495 }
6496 
6497 
6498 static void
qemuProcessPrepareAllowReboot(virDomainObj * vm)6499 qemuProcessPrepareAllowReboot(virDomainObj *vm)
6500 {
6501     virDomainDef *def = vm->def;
6502     qemuDomainObjPrivate *priv = vm->privateData;
6503 
6504     /* with 'set-action' QMP command we don't need to keep this around as
6505      * we always update qemu with the proper state */
6506     if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SET_ACTION))
6507         return;
6508 
6509     if (priv->allowReboot != VIR_TRISTATE_BOOL_ABSENT)
6510         return;
6511 
6512     priv->allowReboot = virTristateBoolFromBool(qemuProcessRebootAllowed(def));
6513 }
6514 
6515 
6516 static int
qemuProcessUpdateSEVInfo(virDomainObj * vm)6517 qemuProcessUpdateSEVInfo(virDomainObj *vm)
6518 {
6519     qemuDomainObjPrivate *priv = vm->privateData;
6520     virQEMUCaps *qemuCaps = priv->qemuCaps;
6521     virDomainSEVDef *sev = &vm->def->sec->data.sev;
6522     virSEVCapability *sevCaps = NULL;
6523 
6524     /* if platform specific info like 'cbitpos' and 'reducedPhysBits' have
6525      * not been supplied, we need to autofill them from caps now as both are
6526      * mandatory on QEMU cmdline
6527      */
6528     sevCaps = virQEMUCapsGetSEVCapabilities(qemuCaps);
6529     if (!sev->haveCbitpos) {
6530         sev->cbitpos = sevCaps->cbitpos;
6531         sev->haveCbitpos = true;
6532     }
6533 
6534     if (!sev->haveReducedPhysBits) {
6535         sev->reduced_phys_bits = sevCaps->reduced_phys_bits;
6536         sev->haveReducedPhysBits = true;
6537     }
6538 
6539     return 0;
6540 }
6541 
6542 
6543 /**
6544  * qemuProcessPrepareDomain:
6545  * @driver: qemu driver
6546  * @vm: domain object
6547  * @flags: qemuProcessStartFlags
6548  *
6549  * This function groups all code that modifies only live XML of a domain which
6550  * is about to start and it's the only place to do those modifications.
6551  *
6552  * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
6553  * start the domain but create a valid qemu command.  If some code shouldn't be
6554  * executed in this case, make sure to check this flag.
6555  *
6556  * TODO: move all XML modification from qemuBuildCommandLine into this function
6557  */
6558 int
qemuProcessPrepareDomain(virQEMUDriver * driver,virDomainObj * vm,unsigned int flags)6559 qemuProcessPrepareDomain(virQEMUDriver *driver,
6560                          virDomainObj *vm,
6561                          unsigned int flags)
6562 {
6563     size_t i;
6564     qemuDomainObjPrivate *priv = vm->privateData;
6565     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
6566 
6567     priv->machineName = qemuDomainGetMachineName(vm);
6568     if (!priv->machineName)
6569         return -1;
6570 
6571     if (!(flags & VIR_QEMU_PROCESS_START_PRETEND)) {
6572         /* If you are using a SecurityDriver with dynamic labelling,
6573            then generate a security label for isolation */
6574         VIR_DEBUG("Generating domain security label (if required)");
6575         if (qemuSecurityGenLabel(driver->securityManager, vm->def) < 0) {
6576             virDomainAuditSecurityLabel(vm, false);
6577             return -1;
6578         }
6579         virDomainAuditSecurityLabel(vm, true);
6580 
6581         if (qemuProcessPrepareDomainNUMAPlacement(vm) < 0)
6582             return -1;
6583     }
6584 
6585     /* Whether we should use virtlogd as stdio handler for character
6586      * devices source backend. */
6587     if (cfg->stdioLogD &&
6588         virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CHARDEV_FILE_APPEND)) {
6589         priv->chardevStdioLogd = true;
6590     }
6591 
6592     /* Track if this domain remembers original owner */
6593     priv->rememberOwner = cfg->rememberOwner;
6594 
6595     qemuProcessPrepareAllowReboot(vm);
6596 
6597     /*
6598      * Normally PCI addresses are assigned in the virDomainCreate
6599      * or virDomainDefine methods. We might still need to assign
6600      * some here to cope with the question of upgrades. Regardless
6601      * we also need to populate the PCI address set cache for later
6602      * use in hotplug
6603      */
6604     VIR_DEBUG("Assigning domain PCI addresses");
6605     if ((qemuDomainAssignAddresses(vm->def, priv->qemuCaps, driver, vm,
6606                                    !!(flags & VIR_QEMU_PROCESS_START_NEW))) < 0) {
6607         return -1;
6608     }
6609 
6610     if (qemuAssignDeviceAliases(vm->def, priv->qemuCaps) < 0)
6611         return -1;
6612 
6613     qemuProcessPrepareDeviceBootorder(vm->def);
6614 
6615     VIR_DEBUG("Setting graphics devices");
6616     if (qemuProcessSetupGraphics(driver, vm, priv->qemuCaps, flags) < 0)
6617         return -1;
6618 
6619     VIR_DEBUG("Create domain masterKey");
6620     if (qemuDomainMasterKeyCreate(vm) < 0)
6621         return -1;
6622 
6623     VIR_DEBUG("Setting up storage");
6624     if (qemuProcessPrepareDomainStorage(driver, vm, priv, cfg, flags) < 0)
6625         return -1;
6626 
6627     VIR_DEBUG("Setting up host devices");
6628     if (qemuProcessPrepareDomainHostdevs(vm, priv) < 0)
6629         return -1;
6630 
6631     VIR_DEBUG("Prepare chardev source backends for TLS");
6632     qemuDomainPrepareChardevSource(vm->def, cfg);
6633 
6634     VIR_DEBUG("Prepare device secrets");
6635     if (qemuDomainSecretPrepare(driver, vm) < 0)
6636         return -1;
6637 
6638     VIR_DEBUG("Prepare bios/uefi paths");
6639     if (qemuFirmwareFillDomain(driver, vm->def, flags) < 0)
6640         return -1;
6641     if (qemuDomainInitializePflashStorageSource(vm) < 0)
6642         return -1;
6643 
6644     VIR_DEBUG("Preparing external devices");
6645     if (qemuExtDevicesPrepareDomain(driver, vm) < 0)
6646         return -1;
6647 
6648     if (flags & VIR_QEMU_PROCESS_START_NEW) {
6649         VIR_DEBUG("Aligning guest memory");
6650         if (qemuDomainAlignMemorySizes(vm->def) < 0)
6651             return -1;
6652     }
6653 
6654     for (i = 0; i < vm->def->nchannels; i++) {
6655         if (qemuDomainPrepareChannel(vm->def->channels[i],
6656                                      priv->channelTargetDir) < 0)
6657             return -1;
6658     }
6659 
6660     if (!(priv->monConfig = virDomainChrSourceDefNew(driver->xmlopt)))
6661         return -1;
6662 
6663     VIR_DEBUG("Preparing monitor state");
6664     if (qemuProcessPrepareMonitorChr(priv->monConfig, priv->libDir) < 0)
6665         return -1;
6666 
6667     priv->monError = false;
6668     priv->monStart = 0;
6669     priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
6670     priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN;
6671 
6672     VIR_DEBUG("Updating guest CPU definition");
6673     if (qemuProcessUpdateGuestCPU(vm->def, priv->qemuCaps, driver->hostarch, flags) < 0)
6674         return -1;
6675 
6676     for (i = 0; i < vm->def->nshmems; i++)
6677         qemuDomainPrepareShmemChardev(vm->def->shmems[i]);
6678 
6679     if (vm->def->sec &&
6680         vm->def->sec->sectype == VIR_DOMAIN_LAUNCH_SECURITY_SEV) {
6681         VIR_DEBUG("Updating SEV platform info");
6682         if (qemuProcessUpdateSEVInfo(vm) < 0)
6683             return -1;
6684     }
6685 
6686     return 0;
6687 }
6688 
6689 
6690 static int
qemuProcessSEVCreateFile(virDomainObj * vm,const char * name,const char * data)6691 qemuProcessSEVCreateFile(virDomainObj *vm,
6692                          const char *name,
6693                          const char *data)
6694 {
6695     qemuDomainObjPrivate *priv = vm->privateData;
6696     virQEMUDriver *driver = priv->driver;
6697     g_autofree char *configFile = NULL;
6698 
6699     if (!(configFile = virFileBuildPath(priv->libDir, name, ".base64")))
6700         return -1;
6701 
6702     if (virFileRewriteStr(configFile, S_IRUSR | S_IWUSR, data) < 0) {
6703         virReportSystemError(errno, _("failed to write data to config '%s'"),
6704                              configFile);
6705         return -1;
6706     }
6707 
6708     if (qemuSecurityDomainSetPathLabel(driver, vm, configFile, true) < 0)
6709         return -1;
6710 
6711     return 0;
6712 }
6713 
6714 
6715 static int
qemuProcessPrepareSEVGuestInput(virDomainObj * vm)6716 qemuProcessPrepareSEVGuestInput(virDomainObj *vm)
6717 {
6718     virDomainSEVDef *sev = &vm->def->sec->data.sev;
6719 
6720     VIR_DEBUG("Preparing SEV guest");
6721 
6722     if (sev->dh_cert) {
6723         if (qemuProcessSEVCreateFile(vm, "dh_cert", sev->dh_cert) < 0)
6724             return -1;
6725     }
6726 
6727     if (sev->session) {
6728         if (qemuProcessSEVCreateFile(vm, "session", sev->session) < 0)
6729             return -1;
6730     }
6731 
6732     return 0;
6733 }
6734 
6735 
6736 static int
qemuProcessPrepareLaunchSecurityGuestInput(virDomainObj * vm)6737 qemuProcessPrepareLaunchSecurityGuestInput(virDomainObj *vm)
6738 {
6739     virDomainSecDef *sec = vm->def->sec;
6740 
6741     if (!sec)
6742         return 0;
6743 
6744     switch ((virDomainLaunchSecurity) sec->sectype) {
6745     case VIR_DOMAIN_LAUNCH_SECURITY_SEV:
6746         return qemuProcessPrepareSEVGuestInput(vm);
6747     case VIR_DOMAIN_LAUNCH_SECURITY_PV:
6748         return 0;
6749     case VIR_DOMAIN_LAUNCH_SECURITY_NONE:
6750     case VIR_DOMAIN_LAUNCH_SECURITY_LAST:
6751         virReportEnumRangeError(virDomainLaunchSecurity, sec->sectype);
6752         return -1;
6753     }
6754 
6755     return 0;
6756 }
6757 
6758 
6759 static int
qemuProcessPrepareHostStorage(virQEMUDriver * driver,virDomainObj * vm,unsigned int flags)6760 qemuProcessPrepareHostStorage(virQEMUDriver *driver,
6761                               virDomainObj *vm,
6762                               unsigned int flags)
6763 {
6764     qemuDomainObjPrivate *priv = vm->privateData;
6765     size_t i;
6766     bool cold_boot = flags & VIR_QEMU_PROCESS_START_COLD;
6767     bool blockdev = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV);
6768 
6769     for (i = vm->def->ndisks; i > 0; i--) {
6770         size_t idx = i - 1;
6771         virDomainDiskDef *disk = vm->def->disks[idx];
6772 
6773         if (virStorageSourceIsEmpty(disk->src))
6774             continue;
6775 
6776         /* backing chain needs to be redetected if we aren't using blockdev */
6777         if (!blockdev || qemuDiskBusIsSD(disk->bus))
6778             virStorageSourceBackingStoreClear(disk->src);
6779 
6780         /*
6781          * Go to applying startup policy for optional disk with nonexistent
6782          * source file immediately as determining chain will surely fail
6783          * and we don't want noisy error notice in logs for this case.
6784          */
6785         if (qemuDomainDiskIsMissingLocalOptional(disk) && cold_boot)
6786             VIR_INFO("optional disk '%s' source file is missing, "
6787                      "skip checking disk chain", disk->dst);
6788         else if (qemuDomainDetermineDiskChain(driver, vm, disk, NULL, true) >= 0)
6789             continue;
6790 
6791         if (qemuDomainCheckDiskStartupPolicy(driver, vm, idx, cold_boot) >= 0)
6792             continue;
6793 
6794         return -1;
6795     }
6796 
6797     return 0;
6798 }
6799 
6800 
6801 int
qemuProcessOpenVhostVsock(virDomainVsockDef * vsock)6802 qemuProcessOpenVhostVsock(virDomainVsockDef *vsock)
6803 {
6804     qemuDomainVsockPrivate *priv = (qemuDomainVsockPrivate *)vsock->privateData;
6805     const char *vsock_path = "/dev/vhost-vsock";
6806     int fd;
6807 
6808     if ((fd = open(vsock_path, O_RDWR)) < 0) {
6809         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6810                        "%s", _("unable to open vhost-vsock device"));
6811         return -1;
6812     }
6813 
6814     if (vsock->auto_cid == VIR_TRISTATE_BOOL_YES) {
6815         if (virVsockAcquireGuestCid(fd, &vsock->guest_cid) < 0)
6816             goto error;
6817     } else {
6818         if (virVsockSetGuestCid(fd, vsock->guest_cid) < 0)
6819             goto error;
6820     }
6821 
6822     priv->vhostfd = fd;
6823     return 0;
6824 
6825  error:
6826     VIR_FORCE_CLOSE(fd);
6827     return -1;
6828 }
6829 
6830 
6831 /**
6832  * qemuProcessPrepareHost:
6833  * @driver: qemu driver
6834  * @vm: domain object
6835  * @flags: qemuProcessStartFlags
6836  *
6837  * This function groups all code that modifies host system (which also may
6838  * update live XML) to prepare environment for a domain which is about to start
6839  * and it's the only place to do those modifications.
6840  *
6841  * TODO: move all host modification from qemuBuildCommandLine into this function
6842  */
6843 int
qemuProcessPrepareHost(virQEMUDriver * driver,virDomainObj * vm,unsigned int flags)6844 qemuProcessPrepareHost(virQEMUDriver *driver,
6845                        virDomainObj *vm,
6846                        unsigned int flags)
6847 {
6848     unsigned int hostdev_flags = 0;
6849     qemuDomainObjPrivate *priv = vm->privateData;
6850     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
6851 
6852     if (qemuPrepareNVRAM(driver, vm) < 0)
6853         return -1;
6854 
6855     if (vm->def->vsock) {
6856         if (qemuProcessOpenVhostVsock(vm->def->vsock) < 0)
6857             return -1;
6858     }
6859     /* network devices must be "prepared" before hostdevs, because
6860      * setting up a network device might create a new hostdev that
6861      * will need to be setup.
6862      */
6863     VIR_DEBUG("Preparing network devices");
6864     if (qemuProcessNetworkPrepareDevices(driver, vm) < 0)
6865         return -1;
6866 
6867     /* Must be run before security labelling */
6868     VIR_DEBUG("Preparing host devices");
6869     if (!cfg->relaxedACS)
6870         hostdev_flags |= VIR_HOSTDEV_STRICT_ACS_CHECK;
6871     if (flags & VIR_QEMU_PROCESS_START_NEW)
6872         hostdev_flags |= VIR_HOSTDEV_COLD_BOOT;
6873     if (qemuHostdevPrepareDomainDevices(driver, vm->def, priv->qemuCaps,
6874                                         hostdev_flags) < 0)
6875         return -1;
6876 
6877     VIR_DEBUG("Preparing chr devices");
6878     if (virDomainChrDefForeach(vm->def,
6879                                true,
6880                                qemuProcessPrepareChardevDevice,
6881                                NULL) < 0)
6882         return -1;
6883 
6884     if (qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, true) < 0)
6885         return -1;
6886 
6887     /* Ensure no historical cgroup for this VM is lying around bogus
6888      * settings */
6889     VIR_DEBUG("Ensuring no historical cgroup is lying around");
6890     qemuRemoveCgroup(vm);
6891 
6892     if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) {
6893         virReportSystemError(errno,
6894                              _("cannot create log directory %s"),
6895                              cfg->logDir);
6896         return -1;
6897     }
6898 
6899     VIR_FREE(priv->pidfile);
6900     if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, vm->def->name))) {
6901         virReportSystemError(errno,
6902                              "%s", _("Failed to build pidfile path."));
6903         return -1;
6904     }
6905 
6906     if (unlink(priv->pidfile) < 0 &&
6907         errno != ENOENT) {
6908         virReportSystemError(errno,
6909                              _("Cannot remove stale PID file %s"),
6910                              priv->pidfile);
6911         return -1;
6912     }
6913 
6914     /*
6915      * Create all per-domain directories in order to make sure domain
6916      * with any possible seclabels can access it.
6917      */
6918     if (qemuProcessMakeDir(driver, vm, priv->libDir) < 0 ||
6919         qemuProcessMakeDir(driver, vm, priv->channelTargetDir) < 0)
6920         return -1;
6921 
6922     VIR_DEBUG("Write domain masterKey");
6923     if (qemuDomainWriteMasterKeyFile(driver, vm) < 0)
6924         return -1;
6925 
6926     VIR_DEBUG("Preparing disks (host)");
6927     if (qemuProcessPrepareHostStorage(driver, vm, flags) < 0)
6928         return -1;
6929 
6930     VIR_DEBUG("Preparing hostdevs (host-side)");
6931     if (qemuProcessPrepareHostHostdevs(vm) < 0)
6932         return -1;
6933 
6934     VIR_DEBUG("Preparing external devices");
6935     if (qemuExtDevicesPrepareHost(driver, vm) < 0)
6936         return -1;
6937 
6938     if (qemuProcessPrepareLaunchSecurityGuestInput(vm) < 0)
6939         return -1;
6940 
6941     return 0;
6942 }
6943 
6944 
6945 /**
6946  * qemuProcessGenID:
6947  * @vm: Pointer to domain object
6948  * @flags: qemuProcessStartFlags
6949  *
6950  * If this domain is requesting to use genid, then update the GUID
6951  * value if the VIR_QEMU_PROCESS_START_GEN_VMID flag is set. This
6952  * flag is set on specific paths during domain start processing when
6953  * there is the possibility that the VM is potentially re-executing
6954  * something that has already been executed before.
6955  */
6956 static int
qemuProcessGenID(virDomainObj * vm,unsigned int flags)6957 qemuProcessGenID(virDomainObj *vm,
6958                  unsigned int flags)
6959 {
6960     if (!vm->def->genidRequested)
6961         return 0;
6962 
6963     /* If we are coming from a path where we must provide a new gen id
6964      * value regardless of whether it was previously generated or provided,
6965      * then generate a new GUID value before we build the command line. */
6966     if (flags & VIR_QEMU_PROCESS_START_GEN_VMID) {
6967         if (virUUIDGenerate(vm->def->genid) < 0) {
6968             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
6969                            _("failed to regenerate genid"));
6970             return -1;
6971         }
6972     }
6973 
6974     return 0;
6975 }
6976 
6977 
6978 /**
6979  * qemuProcessSetupDiskThrottlingBlockdev:
6980  *
6981  * Sets up disk trottling for -blockdev via block_set_io_throttle monitor
6982  * command. This hack should be replaced by proper use of the 'throttle'
6983  * blockdev driver in qemu once it will support changing of the throttle group.
6984  * Same hack is done in qemuDomainAttachDiskGeneric.
6985  */
6986 static int
qemuProcessSetupDiskThrottlingBlockdev(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)6987 qemuProcessSetupDiskThrottlingBlockdev(virQEMUDriver *driver,
6988                                        virDomainObj *vm,
6989                                        qemuDomainAsyncJob asyncJob)
6990 {
6991     qemuDomainObjPrivate *priv = vm->privateData;
6992     size_t i;
6993     int ret = -1;
6994 
6995     if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV))
6996         return 0;
6997 
6998     VIR_DEBUG("Setting up disk throttling for -blockdev via block_set_io_throttle");
6999 
7000     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
7001         return -1;
7002 
7003     for (i = 0; i < vm->def->ndisks; i++) {
7004         virDomainDiskDef *disk = vm->def->disks[i];
7005         qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
7006 
7007         /* sd-cards are instantiated via -drive */
7008         if (qemuDiskBusIsSD(disk->bus))
7009             continue;
7010 
7011         /* Setting throttling for empty drives fails */
7012         if (virStorageSourceIsEmpty(disk->src))
7013             continue;
7014 
7015         if (!qemuDiskConfigBlkdeviotuneEnabled(disk))
7016             continue;
7017 
7018         if (qemuMonitorSetBlockIoThrottle(qemuDomainGetMonitor(vm), NULL,
7019                                           diskPriv->qomName, &disk->blkdeviotune) < 0)
7020             goto cleanup;
7021     }
7022 
7023     ret = 0;
7024 
7025  cleanup:
7026     if (qemuDomainObjExitMonitor(driver, vm) < 0)
7027         ret = -1;
7028     return ret;
7029 }
7030 
7031 
7032 static int
qemuProcessEnableDomainNamespaces(virQEMUDriver * driver,virDomainObj * vm)7033 qemuProcessEnableDomainNamespaces(virQEMUDriver *driver,
7034                                   virDomainObj *vm)
7035 {
7036     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
7037 
7038     if (virBitmapIsBitSet(cfg->namespaces, QEMU_DOMAIN_NS_MOUNT) &&
7039         qemuDomainEnableNamespace(vm, QEMU_DOMAIN_NS_MOUNT) < 0)
7040         return -1;
7041 
7042     return 0;
7043 }
7044 
7045 
7046 static int
qemuProcessEnablePerf(virDomainObj * vm)7047 qemuProcessEnablePerf(virDomainObj *vm)
7048 {
7049     qemuDomainObjPrivate *priv = vm->privateData;
7050     size_t i;
7051 
7052     if (!(priv->perf = virPerfNew()))
7053         return -1;
7054 
7055     for (i = 0; i < VIR_PERF_EVENT_LAST; i++) {
7056         if (vm->def->perf.events[i] == VIR_TRISTATE_BOOL_YES &&
7057             virPerfEventEnable(priv->perf, i, vm->pid) < 0)
7058             return -1;
7059     }
7060 
7061     return 0;
7062 }
7063 
7064 
7065 static int
qemuProcessSetupDisksTransientSnapshot(virDomainObj * vm,qemuDomainAsyncJob asyncJob)7066 qemuProcessSetupDisksTransientSnapshot(virDomainObj *vm,
7067                                        qemuDomainAsyncJob asyncJob)
7068 {
7069     g_autoptr(qemuSnapshotDiskContext) snapctxt = NULL;
7070     g_autoptr(GHashTable) blockNamedNodeData = NULL;
7071     size_t i;
7072 
7073     if (!(blockNamedNodeData = qemuBlockGetNamedNodeData(vm, asyncJob)))
7074         return -1;
7075 
7076     snapctxt = qemuSnapshotDiskContextNew(vm->def->ndisks, vm, asyncJob);
7077 
7078     for (i = 0; i < vm->def->ndisks; i++) {
7079         virDomainDiskDef *domdisk = vm->def->disks[i];
7080         g_autoptr(virDomainSnapshotDiskDef) snapdisk = NULL;
7081 
7082         if (!domdisk->transient ||
7083             domdisk->transientShareBacking == VIR_TRISTATE_BOOL_YES)
7084             continue;
7085 
7086         /* validation code makes sure that we do this only for local disks
7087          * with a file source */
7088 
7089         if (!(snapdisk = qemuSnapshotGetTransientDiskDef(domdisk, vm->def->name)))
7090             return -1;
7091 
7092         if (qemuSnapshotDiskPrepareOne(snapctxt, domdisk, snapdisk,
7093                                        blockNamedNodeData,
7094                                        false,
7095                                        false) < 0)
7096             return -1;
7097     }
7098 
7099     if (qemuSnapshotDiskCreate(snapctxt) < 0)
7100         return -1;
7101 
7102     for (i = 0; i < vm->def->ndisks; i++) {
7103         virDomainDiskDef *domdisk = vm->def->disks[i];
7104 
7105         if (!domdisk->transient ||
7106             domdisk->transientShareBacking == VIR_TRISTATE_BOOL_YES)
7107             continue;
7108 
7109         QEMU_DOMAIN_DISK_PRIVATE(domdisk)->transientOverlayCreated = true;
7110     }
7111 
7112     return 0;
7113 }
7114 
7115 
7116 static int
qemuProcessSetupDisksTransientHotplug(virDomainObj * vm,qemuDomainAsyncJob asyncJob)7117 qemuProcessSetupDisksTransientHotplug(virDomainObj *vm,
7118                                       qemuDomainAsyncJob asyncJob)
7119 {
7120     qemuDomainObjPrivate *priv = vm->privateData;
7121     bool hasHotpluggedDisk = false;
7122     size_t i;
7123 
7124     for (i = 0; i < vm->def->ndisks; i++) {
7125         virDomainDiskDef *domdisk = vm->def->disks[i];
7126 
7127         if (!domdisk->transient ||
7128             domdisk->transientShareBacking != VIR_TRISTATE_BOOL_YES)
7129             continue;
7130 
7131         if (qemuDomainAttachDiskGeneric(priv->driver, vm, domdisk, asyncJob) < 0)
7132             return -1;
7133 
7134         hasHotpluggedDisk = true;
7135     }
7136 
7137     /* in order to allow booting from such disks we need to issue a system-reset
7138      * so that the firmware tables recording bootable devices are regerated */
7139     if (hasHotpluggedDisk) {
7140         int rc;
7141 
7142         if (qemuDomainObjEnterMonitorAsync(priv->driver, vm, asyncJob) < 0)
7143             return -1;
7144 
7145         rc = qemuMonitorSystemReset(priv->mon);
7146 
7147         if (qemuDomainObjExitMonitor(priv->driver, vm) < 0 || rc < 0)
7148             return -1;
7149     }
7150 
7151     return 0;
7152 }
7153 
7154 
7155 static int
qemuProcessSetupDisksTransient(virDomainObj * vm,qemuDomainAsyncJob asyncJob)7156 qemuProcessSetupDisksTransient(virDomainObj *vm,
7157                                qemuDomainAsyncJob asyncJob)
7158 {
7159     qemuDomainObjPrivate *priv = vm->privateData;
7160 
7161     if (!(virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV)))
7162         return 0;
7163 
7164     if (qemuProcessSetupDisksTransientSnapshot(vm, asyncJob) < 0)
7165         return -1;
7166 
7167     if (qemuProcessSetupDisksTransientHotplug(vm, asyncJob) < 0)
7168         return -1;
7169 
7170     return 0;
7171 }
7172 
7173 
7174 static int
qemuProcessSetupLifecycleActions(virDomainObj * vm,qemuDomainAsyncJob asyncJob)7175 qemuProcessSetupLifecycleActions(virDomainObj *vm,
7176                                  qemuDomainAsyncJob asyncJob)
7177 {
7178     qemuDomainObjPrivate *priv = vm->privateData;
7179     int rc;
7180 
7181     if (!(virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SET_ACTION)))
7182         return 0;
7183 
7184     /* for now we handle only onReboot->destroy here as an alternative to
7185      * '-no-reboot' on the commandline */
7186     if (vm->def->onReboot != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY)
7187         return 0;
7188 
7189     if (qemuDomainObjEnterMonitorAsync(priv->driver, vm, asyncJob) < 0)
7190         return -1;
7191 
7192     rc = qemuMonitorSetAction(priv->mon,
7193                               QEMU_MONITOR_ACTION_SHUTDOWN_KEEP,
7194                               QEMU_MONITOR_ACTION_REBOOT_SHUTDOWN,
7195                               QEMU_MONITOR_ACTION_WATCHDOG_KEEP,
7196                               QEMU_MONITOR_ACTION_PANIC_KEEP);
7197 
7198     if (qemuDomainObjExitMonitor(priv->driver, vm) < 0 || rc < 0)
7199         return -1;
7200 
7201     return 0;
7202 }
7203 
7204 
7205 /**
7206  * qemuProcessLaunch:
7207  *
7208  * Launch a new QEMU process with stopped virtual CPUs.
7209  *
7210  * The caller is supposed to call qemuProcessStop with appropriate
7211  * flags in case of failure.
7212  *
7213  * Returns 0 on success,
7214  *        -1 on error which happened before devices were labeled and thus
7215  *           there is no need to restore them,
7216  *        -2 on error requesting security labels to be restored.
7217  */
7218 int
qemuProcessLaunch(virConnectPtr conn,virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob,qemuProcessIncomingDef * incoming,virDomainMomentObj * snapshot,virNetDevVPortProfileOp vmop,unsigned int flags)7219 qemuProcessLaunch(virConnectPtr conn,
7220                   virQEMUDriver *driver,
7221                   virDomainObj *vm,
7222                   qemuDomainAsyncJob asyncJob,
7223                   qemuProcessIncomingDef *incoming,
7224                   virDomainMomentObj *snapshot,
7225                   virNetDevVPortProfileOp vmop,
7226                   unsigned int flags)
7227 {
7228     int ret = -1;
7229     int rv;
7230     int logfile = -1;
7231     g_autoptr(qemuDomainLogContext) logCtxt = NULL;
7232     qemuDomainObjPrivate *priv = vm->privateData;
7233     g_autoptr(virCommand) cmd = NULL;
7234     struct qemuProcessHookData hookData;
7235     g_autoptr(virQEMUDriverConfig) cfg = NULL;
7236     size_t nnicindexes = 0;
7237     g_autofree int *nicindexes = NULL;
7238     unsigned long long maxMemLock = 0;
7239 
7240     VIR_DEBUG("conn=%p driver=%p vm=%p name=%s if=%d asyncJob=%d "
7241               "incoming.launchURI=%s incoming.deferredURI=%s "
7242               "incoming.fd=%d incoming.path=%s "
7243               "snapshot=%p vmop=%d flags=0x%x",
7244               conn, driver, vm, vm->def->name, vm->def->id, asyncJob,
7245               NULLSTR(incoming ? incoming->launchURI : NULL),
7246               NULLSTR(incoming ? incoming->deferredURI : NULL),
7247               incoming ? incoming->fd : -1,
7248               NULLSTR(incoming ? incoming->path : NULL),
7249               snapshot, vmop, flags);
7250 
7251     /* Okay, these are just internal flags,
7252      * but doesn't hurt to check */
7253     virCheckFlags(VIR_QEMU_PROCESS_START_COLD |
7254                   VIR_QEMU_PROCESS_START_PAUSED |
7255                   VIR_QEMU_PROCESS_START_AUTODESTROY |
7256                   VIR_QEMU_PROCESS_START_NEW |
7257                   VIR_QEMU_PROCESS_START_GEN_VMID, -1);
7258 
7259     cfg = virQEMUDriverGetConfig(driver);
7260 
7261     if (flags & VIR_QEMU_PROCESS_START_AUTODESTROY) {
7262         if (!conn) {
7263             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
7264                            _("Domain autodestroy requires a connection handle"));
7265             return -1;
7266         }
7267         if (driver->embeddedRoot) {
7268             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
7269                            _("Domain autodestroy not supported for embedded drivers yet"));
7270             return -1;
7271         }
7272     }
7273 
7274     hookData.vm = vm;
7275     hookData.driver = driver;
7276     /* We don't increase cfg's reference counter here. */
7277     hookData.cfg = cfg;
7278 
7279     VIR_DEBUG("Creating domain log file");
7280     if (!(logCtxt = qemuDomainLogContextNew(driver, vm,
7281                                             QEMU_DOMAIN_LOG_CONTEXT_MODE_START))) {
7282         virLastErrorPrefixMessage("%s", _("can't connect to virtlogd"));
7283         goto cleanup;
7284     }
7285     logfile = qemuDomainLogContextGetWriteFD(logCtxt);
7286 
7287     if (qemuProcessGenID(vm, flags) < 0)
7288         goto cleanup;
7289 
7290     if (qemuExtDevicesStart(driver, vm, incoming != NULL) < 0)
7291         goto cleanup;
7292 
7293     VIR_DEBUG("Building emulator command line");
7294     if (!(cmd = qemuBuildCommandLine(driver,
7295                                      qemuDomainLogContextGetManager(logCtxt),
7296                                      driver->securityManager,
7297                                      vm,
7298                                      incoming ? incoming->launchURI : NULL,
7299                                      snapshot, vmop,
7300                                      false,
7301                                      qemuCheckFips(vm),
7302                                      &nnicindexes, &nicindexes, 0)))
7303         goto cleanup;
7304 
7305     if (incoming && incoming->fd != -1)
7306         virCommandPassFD(cmd, incoming->fd, 0);
7307 
7308     /* now that we know it is about to start call the hook if present */
7309     if (qemuProcessStartHook(driver, vm,
7310                              VIR_HOOK_QEMU_OP_START,
7311                              VIR_HOOK_SUBOP_BEGIN) < 0)
7312         goto cleanup;
7313 
7314     qemuLogOperation(vm, "starting up", cmd, logCtxt);
7315 
7316     qemuDomainObjCheckTaint(driver, vm, logCtxt, incoming != NULL);
7317 
7318     qemuDomainLogContextMarkPosition(logCtxt);
7319 
7320     VIR_DEBUG("Building mount namespace");
7321 
7322     if (qemuProcessEnableDomainNamespaces(driver, vm) < 0)
7323         goto cleanup;
7324 
7325     VIR_DEBUG("Setting up raw IO");
7326     if (qemuProcessSetupRawIO(driver, vm, cmd) < 0)
7327         goto cleanup;
7328 
7329     virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData);
7330     virCommandSetUmask(cmd, 0x002);
7331 
7332     VIR_DEBUG("Setting up process limits");
7333 
7334     /* In some situations, eg. VFIO passthrough, QEMU might need to lock a
7335      * significant amount of memory, so we need to set the limit accordingly */
7336     maxMemLock = qemuDomainGetMemLockLimitBytes(vm->def, false);
7337 
7338     /* For all these settings, zero indicates that the limit should
7339      * not be set explicitly and the default/inherited limit should
7340      * be applied instead */
7341     if (maxMemLock > 0)
7342         virCommandSetMaxMemLock(cmd, maxMemLock);
7343     if (cfg->maxProcesses > 0)
7344         virCommandSetMaxProcesses(cmd, cfg->maxProcesses);
7345     if (cfg->maxFiles > 0)
7346         virCommandSetMaxFiles(cmd, cfg->maxFiles);
7347 
7348     /* In this case, however, zero means that core dumps should be
7349      * disabled, and so we always need to set the limit explicitly */
7350     virCommandSetMaxCoreSize(cmd, cfg->maxCore);
7351 
7352     VIR_DEBUG("Setting up security labelling");
7353     if (qemuSecuritySetChildProcessLabel(driver->securityManager,
7354                                          vm->def, cmd) < 0)
7355         goto cleanup;
7356 
7357     virCommandSetOutputFD(cmd, &logfile);
7358     virCommandSetErrorFD(cmd, &logfile);
7359     virCommandNonblockingFDs(cmd);
7360     virCommandSetPidFile(cmd, priv->pidfile);
7361     virCommandDaemonize(cmd);
7362     virCommandRequireHandshake(cmd);
7363 
7364     if (qemuSecurityPreFork(driver->securityManager) < 0)
7365         goto cleanup;
7366     rv = virCommandRun(cmd, NULL);
7367     qemuSecurityPostFork(driver->securityManager);
7368 
7369     /* wait for qemu process to show up */
7370     if (rv == 0) {
7371         if ((rv = virPidFileReadPath(priv->pidfile, &vm->pid)) < 0) {
7372             virReportSystemError(-rv,
7373                                  _("Domain %s didn't show up"),
7374                                  vm->def->name);
7375             goto cleanup;
7376         }
7377         VIR_DEBUG("QEMU vm=%p name=%s running with pid=%lld",
7378                   vm, vm->def->name, (long long)vm->pid);
7379     } else {
7380         VIR_DEBUG("QEMU vm=%p name=%s failed to spawn",
7381                   vm, vm->def->name);
7382         goto cleanup;
7383     }
7384 
7385     VIR_DEBUG("Writing early domain status to disk");
7386     if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
7387         goto cleanup;
7388 
7389     VIR_DEBUG("Waiting for handshake from child");
7390     if (virCommandHandshakeWait(cmd) < 0) {
7391         /* Read errors from child that occurred between fork and exec. */
7392         qemuProcessReportLogError(logCtxt,
7393                                   _("Process exited prior to exec"));
7394         goto cleanup;
7395     }
7396 
7397     VIR_DEBUG("Building domain mount namespace (if required)");
7398     if (qemuDomainBuildNamespace(cfg, vm) < 0)
7399         goto cleanup;
7400 
7401     VIR_DEBUG("Setting up domain cgroup (if required)");
7402     if (qemuSetupCgroup(vm, nnicindexes, nicindexes) < 0)
7403         goto cleanup;
7404 
7405     VIR_DEBUG("Setting up domain perf (if required)");
7406     if (qemuProcessEnablePerf(vm) < 0)
7407         goto cleanup;
7408 
7409     /* This must be done after cgroup placement to avoid resetting CPU
7410      * affinity */
7411     if (qemuProcessInitCpuAffinity(vm) < 0)
7412         goto cleanup;
7413 
7414     VIR_DEBUG("Setting emulator tuning/settings");
7415     if (qemuProcessSetupEmulator(vm) < 0)
7416         goto cleanup;
7417 
7418     VIR_DEBUG("Setting cgroup for external devices (if required)");
7419     if (qemuSetupCgroupForExtDevices(vm, driver) < 0)
7420         goto cleanup;
7421 
7422     VIR_DEBUG("Setting up resctrl");
7423     if (qemuProcessResctrlCreate(driver, vm) < 0)
7424         goto cleanup;
7425 
7426     VIR_DEBUG("Setting up managed PR daemon");
7427     if (virDomainDefHasManagedPR(vm->def) &&
7428         qemuProcessStartManagedPRDaemon(vm) < 0)
7429         goto cleanup;
7430 
7431     VIR_DEBUG("Setting domain security labels");
7432     if (qemuSecuritySetAllLabel(driver,
7433                                 vm,
7434                                 incoming ? incoming->path : NULL,
7435                                 incoming != NULL) < 0)
7436         goto cleanup;
7437 
7438     /* Security manager labeled all devices, therefore
7439      * if any operation from now on fails, we need to ask the caller to
7440      * restore labels.
7441      */
7442     ret = -2;
7443 
7444     if (incoming && incoming->fd != -1) {
7445         /* if there's an fd to migrate from, and it's a pipe, put the
7446          * proper security label on it
7447          */
7448         struct stat stdin_sb;
7449 
7450         VIR_DEBUG("setting security label on pipe used for migration");
7451 
7452         if (fstat(incoming->fd, &stdin_sb) < 0) {
7453             virReportSystemError(errno,
7454                                  _("cannot stat fd %d"), incoming->fd);
7455             goto cleanup;
7456         }
7457         if (S_ISFIFO(stdin_sb.st_mode) &&
7458             qemuSecuritySetImageFDLabel(driver->securityManager,
7459                                         vm->def, incoming->fd) < 0)
7460             goto cleanup;
7461     }
7462 
7463     VIR_DEBUG("Labelling done, completing handshake to child");
7464     if (virCommandHandshakeNotify(cmd) < 0)
7465         goto cleanup;
7466     VIR_DEBUG("Handshake complete, child running");
7467 
7468     if (qemuDomainObjStartWorker(vm) < 0)
7469         goto cleanup;
7470 
7471     VIR_DEBUG("Waiting for monitor to show up");
7472     if (qemuProcessWaitForMonitor(driver, vm, asyncJob, logCtxt) < 0)
7473         goto cleanup;
7474 
7475     if (qemuConnectAgent(driver, vm) < 0)
7476         goto cleanup;
7477 
7478     VIR_DEBUG("Verifying and updating provided guest CPU");
7479     if (qemuProcessUpdateAndVerifyCPU(driver, vm, asyncJob) < 0)
7480         goto cleanup;
7481 
7482     VIR_DEBUG("setting up hotpluggable cpus");
7483     if (qemuDomainHasHotpluggableStartupVcpus(vm->def)) {
7484         if (qemuDomainRefreshVcpuInfo(driver, vm, asyncJob, false) < 0)
7485             goto cleanup;
7486 
7487         if (qemuProcessValidateHotpluggableVcpus(vm->def) < 0)
7488             goto cleanup;
7489 
7490         if (qemuProcessSetupHotpluggableVcpus(driver, vm, asyncJob) < 0)
7491             goto cleanup;
7492     }
7493 
7494     VIR_DEBUG("Refreshing VCPU info");
7495     if (qemuDomainRefreshVcpuInfo(driver, vm, asyncJob, false) < 0)
7496         goto cleanup;
7497 
7498     if (qemuDomainValidateVcpuInfo(vm) < 0)
7499         goto cleanup;
7500 
7501     qemuDomainVcpuPersistOrder(vm->def);
7502 
7503     VIR_DEBUG("Detecting IOThread PIDs");
7504     if (qemuProcessDetectIOThreadPIDs(driver, vm, asyncJob) < 0)
7505         goto cleanup;
7506 
7507     VIR_DEBUG("Setting global CPU cgroup (if required)");
7508     if (qemuSetupGlobalCpuCgroup(vm) < 0)
7509         goto cleanup;
7510 
7511     VIR_DEBUG("Setting vCPU tuning/settings");
7512     if (qemuProcessSetupVcpus(vm) < 0)
7513         goto cleanup;
7514 
7515     VIR_DEBUG("Setting IOThread tuning/settings");
7516     if (qemuProcessSetupIOThreads(vm) < 0)
7517         goto cleanup;
7518 
7519     VIR_DEBUG("Setting emulator scheduler");
7520     if (vm->def->cputune.emulatorsched &&
7521         virProcessSetScheduler(vm->pid,
7522                                vm->def->cputune.emulatorsched->policy,
7523                                vm->def->cputune.emulatorsched->priority) < 0)
7524         goto cleanup;
7525 
7526     VIR_DEBUG("Setting any required VM passwords");
7527     if (qemuProcessInitPasswords(driver, vm, asyncJob) < 0)
7528         goto cleanup;
7529 
7530     /* set default link states */
7531     /* qemu doesn't support setting this on the command line, so
7532      * enter the monitor */
7533     VIR_DEBUG("Setting network link states");
7534     if (qemuProcessSetLinkStates(driver, vm, asyncJob) < 0)
7535         goto cleanup;
7536 
7537     VIR_DEBUG("Setting initial memory amount");
7538     if (qemuProcessSetupBalloon(driver, vm, asyncJob) < 0)
7539         goto cleanup;
7540 
7541     if (qemuProcessSetupDiskThrottlingBlockdev(driver, vm, asyncJob) < 0)
7542         goto cleanup;
7543 
7544     /* Since CPUs were not started yet, the balloon could not return the memory
7545      * to the host and thus cur_balloon needs to be updated so that GetXMLdesc
7546      * and friends return the correct size in case they can't grab the job */
7547     if (!incoming && !snapshot &&
7548         qemuProcessRefreshBalloonState(driver, vm, asyncJob) < 0)
7549         goto cleanup;
7550 
7551     if (flags & VIR_QEMU_PROCESS_START_AUTODESTROY &&
7552         qemuProcessAutoDestroyAdd(driver, vm, conn) < 0)
7553         goto cleanup;
7554 
7555     if (!incoming && !snapshot) {
7556         VIR_DEBUG("Setting up transient disk");
7557         if (qemuProcessSetupDisksTransient(vm, asyncJob) < 0)
7558             goto cleanup;
7559     }
7560 
7561     VIR_DEBUG("Setting handling of lifecycle actions");
7562     if (qemuProcessSetupLifecycleActions(vm, asyncJob) < 0)
7563         goto cleanup;
7564 
7565     ret = 0;
7566 
7567  cleanup:
7568     qemuDomainSecretDestroy(vm);
7569     return ret;
7570 }
7571 
7572 
7573 /**
7574  * qemuProcessRefreshState:
7575  * @driver: qemu driver data
7576  * @vm: domain to refresh
7577  * @asyncJob: async job type
7578  *
7579  * This function gathers calls to refresh qemu state after startup. This
7580  * function is called after a deferred migration finishes so that we can update
7581  * state influenced by the migration stream.
7582  */
7583 int
qemuProcessRefreshState(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)7584 qemuProcessRefreshState(virQEMUDriver *driver,
7585                         virDomainObj *vm,
7586                         qemuDomainAsyncJob asyncJob)
7587 {
7588     qemuDomainObjPrivate *priv = vm->privateData;
7589 
7590     VIR_DEBUG("Fetching list of active devices");
7591     if (qemuDomainUpdateDeviceList(driver, vm, asyncJob) < 0)
7592         return -1;
7593 
7594     VIR_DEBUG("Updating info of memory devices");
7595     if (qemuDomainUpdateMemoryDeviceInfo(driver, vm, asyncJob) < 0)
7596         return -1;
7597 
7598     VIR_DEBUG("Detecting actual memory size for video device");
7599     if (qemuProcessUpdateVideoRamSize(driver, vm, asyncJob) < 0)
7600         return -1;
7601 
7602     VIR_DEBUG("Updating disk data");
7603     if (qemuProcessRefreshDisks(driver, vm, asyncJob) < 0)
7604         return -1;
7605     if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV) &&
7606         qemuBlockNodeNamesDetect(driver, vm, asyncJob) < 0)
7607         return -1;
7608 
7609     return 0;
7610 }
7611 
7612 
7613 /**
7614  * qemuProcessFinishStartup:
7615  *
7616  * Finish starting a new domain.
7617  */
7618 int
qemuProcessFinishStartup(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob,bool startCPUs,virDomainPausedReason pausedReason)7619 qemuProcessFinishStartup(virQEMUDriver *driver,
7620                          virDomainObj *vm,
7621                          qemuDomainAsyncJob asyncJob,
7622                          bool startCPUs,
7623                          virDomainPausedReason pausedReason)
7624 {
7625     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
7626 
7627     if (startCPUs) {
7628         VIR_DEBUG("Starting domain CPUs");
7629         if (qemuProcessStartCPUs(driver, vm,
7630                                  VIR_DOMAIN_RUNNING_BOOTED,
7631                                  asyncJob) < 0) {
7632             if (virGetLastErrorCode() == VIR_ERR_OK)
7633                 virReportError(VIR_ERR_OPERATION_FAILED, "%s",
7634                                _("resume operation failed"));
7635             return -1;
7636         }
7637     } else {
7638         virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, pausedReason);
7639     }
7640 
7641     VIR_DEBUG("Writing domain status to disk");
7642     if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
7643         return -1;
7644 
7645     if (qemuProcessStartHook(driver, vm,
7646                              VIR_HOOK_QEMU_OP_STARTED,
7647                              VIR_HOOK_SUBOP_BEGIN) < 0)
7648         return -1;
7649 
7650     return 0;
7651 }
7652 
7653 
7654 int
qemuProcessStart(virConnectPtr conn,virQEMUDriver * driver,virDomainObj * vm,virCPUDef * updatedCPU,qemuDomainAsyncJob asyncJob,const char * migrateFrom,int migrateFd,const char * migratePath,virDomainMomentObj * snapshot,virNetDevVPortProfileOp vmop,unsigned int flags)7655 qemuProcessStart(virConnectPtr conn,
7656                  virQEMUDriver *driver,
7657                  virDomainObj *vm,
7658                  virCPUDef *updatedCPU,
7659                  qemuDomainAsyncJob asyncJob,
7660                  const char *migrateFrom,
7661                  int migrateFd,
7662                  const char *migratePath,
7663                  virDomainMomentObj *snapshot,
7664                  virNetDevVPortProfileOp vmop,
7665                  unsigned int flags)
7666 {
7667     qemuDomainObjPrivate *priv = vm->privateData;
7668     qemuProcessIncomingDef *incoming = NULL;
7669     unsigned int stopFlags;
7670     bool relabel = false;
7671     bool relabelSavedState = false;
7672     int ret = -1;
7673     int rv;
7674 
7675     VIR_DEBUG("conn=%p driver=%p vm=%p name=%s id=%d asyncJob=%s "
7676               "migrateFrom=%s migrateFd=%d migratePath=%s "
7677               "snapshot=%p vmop=%d flags=0x%x",
7678               conn, driver, vm, vm->def->name, vm->def->id,
7679               qemuDomainAsyncJobTypeToString(asyncJob),
7680               NULLSTR(migrateFrom), migrateFd, NULLSTR(migratePath),
7681               snapshot, vmop, flags);
7682 
7683     virCheckFlagsGoto(VIR_QEMU_PROCESS_START_COLD |
7684                       VIR_QEMU_PROCESS_START_PAUSED |
7685                       VIR_QEMU_PROCESS_START_AUTODESTROY |
7686                       VIR_QEMU_PROCESS_START_GEN_VMID, cleanup);
7687 
7688     if (!migrateFrom && !snapshot)
7689         flags |= VIR_QEMU_PROCESS_START_NEW;
7690 
7691     if (qemuProcessInit(driver, vm, updatedCPU,
7692                         asyncJob, !!migrateFrom, flags) < 0)
7693         goto cleanup;
7694 
7695     if (migrateFrom) {
7696         incoming = qemuProcessIncomingDefNew(priv->qemuCaps, NULL, migrateFrom,
7697                                              migrateFd, migratePath);
7698         if (!incoming)
7699             goto stop;
7700     }
7701 
7702     if (qemuProcessPrepareDomain(driver, vm, flags) < 0)
7703         goto stop;
7704 
7705     if (qemuProcessPrepareHost(driver, vm, flags) < 0)
7706         goto stop;
7707 
7708     if (migratePath) {
7709         if (qemuSecuritySetSavedStateLabel(driver->securityManager,
7710                                            vm->def, migratePath) < 0)
7711             goto cleanup;
7712         relabelSavedState = true;
7713     }
7714 
7715     if ((rv = qemuProcessLaunch(conn, driver, vm, asyncJob, incoming,
7716                                 snapshot, vmop, flags)) < 0) {
7717         if (rv == -2)
7718             relabel = true;
7719         goto stop;
7720     }
7721     relabel = true;
7722 
7723     if (incoming) {
7724         if (incoming->deferredURI &&
7725             qemuMigrationDstRun(driver, vm, incoming->deferredURI, asyncJob) < 0)
7726             goto stop;
7727     } else {
7728         /* Refresh state of devices from QEMU. During migration this happens
7729          * in qemuMigrationDstFinish to ensure that state information is fully
7730          * transferred. */
7731         if (qemuProcessRefreshState(driver, vm, asyncJob) < 0)
7732             goto stop;
7733     }
7734 
7735     if (qemuProcessFinishStartup(driver, vm, asyncJob,
7736                                  !(flags & VIR_QEMU_PROCESS_START_PAUSED),
7737                                  incoming ?
7738                                  VIR_DOMAIN_PAUSED_MIGRATION :
7739                                  VIR_DOMAIN_PAUSED_USER) < 0)
7740         goto stop;
7741 
7742     if (!incoming) {
7743         /* Keep watching qemu log for errors during incoming migration, otherwise
7744          * unset reporting errors from qemu log. */
7745         qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
7746     }
7747 
7748     ret = 0;
7749 
7750  cleanup:
7751     if (relabelSavedState &&
7752         qemuSecurityRestoreSavedStateLabel(driver->securityManager,
7753                                            vm->def, migratePath) < 0)
7754         VIR_WARN("failed to restore save state label on %s", migratePath);
7755     qemuProcessIncomingDefFree(incoming);
7756     return ret;
7757 
7758  stop:
7759     stopFlags = 0;
7760     if (!relabel)
7761         stopFlags |= VIR_QEMU_PROCESS_STOP_NO_RELABEL;
7762     if (migrateFrom)
7763         stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
7764     if (priv->mon)
7765         qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
7766     qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
7767     goto cleanup;
7768 }
7769 
7770 
7771 int
qemuProcessCreatePretendCmdPrepare(virQEMUDriver * driver,virDomainObj * vm,const char * migrateURI,bool standalone,unsigned int flags)7772 qemuProcessCreatePretendCmdPrepare(virQEMUDriver *driver,
7773                                    virDomainObj *vm,
7774                                    const char *migrateURI,
7775                                    bool standalone,
7776                                    unsigned int flags)
7777 {
7778     virCheckFlags(VIR_QEMU_PROCESS_START_COLD |
7779                   VIR_QEMU_PROCESS_START_PAUSED |
7780                   VIR_QEMU_PROCESS_START_AUTODESTROY, -1);
7781 
7782     flags |= VIR_QEMU_PROCESS_START_PRETEND;
7783 
7784     if (!migrateURI)
7785         flags |= VIR_QEMU_PROCESS_START_NEW;
7786 
7787     if (standalone)
7788         flags |= VIR_QEMU_PROCESS_START_STANDALONE;
7789 
7790     if (qemuProcessInit(driver, vm, NULL, QEMU_ASYNC_JOB_NONE,
7791                         !!migrateURI, flags) < 0)
7792         return -1;
7793 
7794     if (qemuProcessPrepareDomain(driver, vm, flags) < 0)
7795         return -1;
7796 
7797     return 0;
7798 }
7799 
7800 
7801 virCommand *
qemuProcessCreatePretendCmdBuild(virQEMUDriver * driver,virDomainObj * vm,const char * migrateURI,bool enableFips,bool standalone)7802 qemuProcessCreatePretendCmdBuild(virQEMUDriver *driver,
7803                                  virDomainObj *vm,
7804                                  const char *migrateURI,
7805                                  bool enableFips,
7806                                  bool standalone)
7807 {
7808     VIR_DEBUG("Building emulator command line");
7809     return qemuBuildCommandLine(driver,
7810                                 NULL,
7811                                 driver->securityManager,
7812                                 vm,
7813                                 migrateURI,
7814                                 NULL,
7815                                 VIR_NETDEV_VPORT_PROFILE_OP_NO_OP,
7816                                 standalone,
7817                                 enableFips,
7818                                 NULL,
7819                                 NULL,
7820                                 0);
7821 }
7822 
7823 
7824 int
qemuProcessKill(virDomainObj * vm,unsigned int flags)7825 qemuProcessKill(virDomainObj *vm, unsigned int flags)
7826 {
7827     VIR_DEBUG("vm=%p name=%s pid=%lld flags=0x%x",
7828               vm, vm->def->name,
7829               (long long)vm->pid, flags);
7830 
7831     if (!(flags & VIR_QEMU_PROCESS_KILL_NOCHECK)) {
7832         if (!virDomainObjIsActive(vm)) {
7833             VIR_DEBUG("VM '%s' not active", vm->def->name);
7834             return 0;
7835         }
7836     }
7837 
7838     if (flags & VIR_QEMU_PROCESS_KILL_NOWAIT) {
7839         virProcessKill(vm->pid,
7840                        (flags & VIR_QEMU_PROCESS_KILL_FORCE) ?
7841                        SIGKILL : SIGTERM);
7842         return 0;
7843     }
7844 
7845     /* Request an extra delay of two seconds per current nhostdevs
7846      * to be safe against stalls by the kernel freeing up the resources */
7847     return virProcessKillPainfullyDelay(vm->pid,
7848                                         !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
7849                                         vm->def->nhostdevs * 2,
7850                                         false);
7851 }
7852 
7853 
7854 /**
7855  * qemuProcessBeginStopJob:
7856  *
7857  * Stop all current jobs by killing the domain and start a new one for
7858  * qemuProcessStop.
7859  */
7860 int
qemuProcessBeginStopJob(virQEMUDriver * driver,virDomainObj * vm,qemuDomainJob job,bool forceKill)7861 qemuProcessBeginStopJob(virQEMUDriver *driver,
7862                         virDomainObj *vm,
7863                         qemuDomainJob job,
7864                         bool forceKill)
7865 {
7866     qemuDomainObjPrivate *priv = vm->privateData;
7867     unsigned int killFlags = forceKill ? VIR_QEMU_PROCESS_KILL_FORCE : 0;
7868     int ret = -1;
7869 
7870     /* We need to prevent monitor EOF callback from doing our work (and
7871      * sending misleading events) while the vm is unlocked inside
7872      * BeginJob/ProcessKill API
7873      */
7874     priv->beingDestroyed = true;
7875 
7876     if (qemuProcessKill(vm, killFlags) < 0)
7877         goto cleanup;
7878 
7879     /* Wake up anything waiting on domain condition */
7880     virDomainObjBroadcast(vm);
7881 
7882     if (qemuDomainObjBeginJob(driver, vm, job) < 0)
7883         goto cleanup;
7884 
7885     ret = 0;
7886 
7887  cleanup:
7888     priv->beingDestroyed = false;
7889     return ret;
7890 }
7891 
7892 
qemuProcessStop(virQEMUDriver * driver,virDomainObj * vm,virDomainShutoffReason reason,qemuDomainAsyncJob asyncJob,unsigned int flags)7893 void qemuProcessStop(virQEMUDriver *driver,
7894                      virDomainObj *vm,
7895                      virDomainShutoffReason reason,
7896                      qemuDomainAsyncJob asyncJob,
7897                      unsigned int flags)
7898 {
7899     int ret;
7900     int retries = 0;
7901     qemuDomainObjPrivate *priv = vm->privateData;
7902     virErrorPtr orig_err;
7903     virDomainDef *def = vm->def;
7904     const virNetDevVPortProfile *vport = NULL;
7905     size_t i;
7906     g_autofree char *timestamp = NULL;
7907     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
7908     g_autoptr(virConnect) conn = NULL;
7909 
7910     VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%lld, "
7911               "reason=%s, asyncJob=%s, flags=0x%x",
7912               vm, vm->def->name, vm->def->id,
7913               (long long)vm->pid,
7914               virDomainShutoffReasonTypeToString(reason),
7915               qemuDomainAsyncJobTypeToString(asyncJob),
7916               flags);
7917 
7918     /* This method is routinely used in clean up paths. Disable error
7919      * reporting so we don't squash a legit error. */
7920     virErrorPreserveLast(&orig_err);
7921 
7922     if (asyncJob != QEMU_ASYNC_JOB_NONE) {
7923         if (qemuDomainObjBeginNestedJob(driver, vm, asyncJob) < 0)
7924             goto cleanup;
7925     } else if (priv->job.asyncJob != QEMU_ASYNC_JOB_NONE &&
7926                priv->job.asyncOwner == virThreadSelfID() &&
7927                priv->job.active != QEMU_JOB_ASYNC_NESTED) {
7928         VIR_WARN("qemuProcessStop called without a nested job (async=%s)",
7929                  qemuDomainAsyncJobTypeToString(asyncJob));
7930     }
7931 
7932     if (!virDomainObjIsActive(vm)) {
7933         VIR_DEBUG("VM '%s' not active", vm->def->name);
7934         goto endjob;
7935     }
7936 
7937     qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, false);
7938 
7939     if (!!g_atomic_int_dec_and_test(&driver->nactive) && driver->inhibitCallback)
7940         driver->inhibitCallback(false, driver->inhibitOpaque);
7941 
7942     if ((timestamp = virTimeStringNow()) != NULL) {
7943         qemuDomainLogAppendMessage(driver, vm, "%s: shutting down, reason=%s\n",
7944                                    timestamp,
7945                                    virDomainShutoffReasonTypeToString(reason));
7946     }
7947 
7948     /* Clear network bandwidth */
7949     virDomainClearNetBandwidth(vm->def);
7950 
7951     virDomainConfVMNWFilterTeardown(vm);
7952 
7953     if (cfg->macFilter) {
7954         for (i = 0; i < def->nnets; i++) {
7955             virDomainNetDef *net = def->nets[i];
7956             if (net->ifname == NULL)
7957                 continue;
7958             ignore_value(ebtablesRemoveForwardAllowIn(driver->ebtables,
7959                                                       net->ifname,
7960                                                       &net->mac));
7961         }
7962     }
7963 
7964     virPortAllocatorRelease(priv->nbdPort);
7965     priv->nbdPort = 0;
7966 
7967     if (priv->agent) {
7968         qemuAgentClose(priv->agent);
7969         priv->agent = NULL;
7970     }
7971     priv->agentError = false;
7972 
7973     if (priv->mon) {
7974         qemuMonitorClose(priv->mon);
7975         priv->mon = NULL;
7976     }
7977 
7978     if (priv->monConfig) {
7979         if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
7980             unlink(priv->monConfig->data.nix.path);
7981         virObjectUnref(priv->monConfig);
7982         priv->monConfig = NULL;
7983     }
7984 
7985     qemuDomainObjStopWorker(vm);
7986 
7987     /* Remove the master key */
7988     qemuDomainMasterKeyRemove(priv);
7989 
7990     /* Do this before we delete the tree and remove pidfile. */
7991     qemuProcessKillManagedPRDaemon(vm);
7992 
7993     ignore_value(virDomainChrDefForeach(vm->def,
7994                                         false,
7995                                         qemuProcessCleanupChardevDevice,
7996                                         NULL));
7997 
7998 
7999     /* shut it off for sure */
8000     ignore_value(qemuProcessKill(vm,
8001                                  VIR_QEMU_PROCESS_KILL_FORCE|
8002                                  VIR_QEMU_PROCESS_KILL_NOCHECK));
8003 
8004     qemuDomainCleanupRun(driver, vm);
8005 
8006     qemuExtDevicesStop(driver, vm);
8007 
8008     qemuDBusStop(driver, vm);
8009 
8010     vm->def->id = -1;
8011 
8012     /* Wake up anything waiting on domain condition */
8013     virDomainObjBroadcast(vm);
8014 
8015     virFileDeleteTree(priv->libDir);
8016     virFileDeleteTree(priv->channelTargetDir);
8017 
8018     /* Stop autodestroy in case guest is restarted */
8019     qemuProcessAutoDestroyRemove(driver, vm);
8020 
8021     /* now that we know it's stopped call the hook if present */
8022     if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
8023         g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
8024 
8025         /* we can't stop the operation even if the script raised an error */
8026         ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
8027                                  VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END,
8028                                  NULL, xml, NULL));
8029     }
8030 
8031     /* Reset Security Labels unless caller don't want us to */
8032     if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL))
8033         qemuSecurityRestoreAllLabel(driver, vm,
8034                                     !!(flags & VIR_QEMU_PROCESS_STOP_MIGRATED));
8035 
8036     for (i = 0; i < vm->def->ndisks; i++) {
8037         virDomainDeviceDef dev;
8038         virDomainDiskDef *disk = vm->def->disks[i];
8039 
8040         dev.type = VIR_DOMAIN_DEVICE_DISK;
8041         dev.data.disk = disk;
8042         ignore_value(qemuRemoveSharedDevice(driver, &dev, vm->def->name));
8043     }
8044 
8045     /* Clear out dynamically assigned labels */
8046     for (i = 0; i < vm->def->nseclabels; i++) {
8047         if (vm->def->seclabels[i]->type == VIR_DOMAIN_SECLABEL_DYNAMIC)
8048             VIR_FREE(vm->def->seclabels[i]->label);
8049         VIR_FREE(vm->def->seclabels[i]->imagelabel);
8050     }
8051 
8052     qemuHostdevReAttachDomainDevices(driver, vm->def);
8053 
8054     for (i = 0; i < def->nnets; i++) {
8055         virDomainNetDef *net = def->nets[i];
8056         vport = virDomainNetGetActualVirtPortProfile(net);
8057         switch (virDomainNetGetActualType(net)) {
8058         case VIR_DOMAIN_NET_TYPE_DIRECT:
8059             ignore_value(virNetDevMacVLanDeleteWithVPortProfile(
8060                              net->ifname, &net->mac,
8061                              virDomainNetGetActualDirectDev(net),
8062                              virDomainNetGetActualDirectMode(net),
8063                              virDomainNetGetActualVirtPortProfile(net),
8064                              cfg->stateDir));
8065             break;
8066         case VIR_DOMAIN_NET_TYPE_ETHERNET:
8067             if (net->managed_tap != VIR_TRISTATE_BOOL_NO && net->ifname) {
8068                 ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap));
8069                 VIR_FREE(net->ifname);
8070             }
8071             break;
8072         case VIR_DOMAIN_NET_TYPE_BRIDGE:
8073         case VIR_DOMAIN_NET_TYPE_NETWORK:
8074 #ifdef VIR_NETDEV_TAP_REQUIRE_MANUAL_CLEANUP
8075             if (!(vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH))
8076                 ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap));
8077 #endif
8078             break;
8079         case VIR_DOMAIN_NET_TYPE_USER:
8080         case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
8081         case VIR_DOMAIN_NET_TYPE_SERVER:
8082         case VIR_DOMAIN_NET_TYPE_CLIENT:
8083         case VIR_DOMAIN_NET_TYPE_MCAST:
8084         case VIR_DOMAIN_NET_TYPE_INTERNAL:
8085         case VIR_DOMAIN_NET_TYPE_HOSTDEV:
8086         case VIR_DOMAIN_NET_TYPE_UDP:
8087         case VIR_DOMAIN_NET_TYPE_VDPA:
8088         case VIR_DOMAIN_NET_TYPE_LAST:
8089             /* No special cleanup procedure for these types. */
8090             break;
8091         }
8092         /* release the physical device (or any other resources used by
8093          * this interface in the network driver
8094          */
8095         if (vport) {
8096             if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_MIDONET) {
8097                 ignore_value(virNetDevMidonetUnbindPort(vport));
8098             } else if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) {
8099                 ignore_value(virNetDevOpenvswitchRemovePort(
8100                                  virDomainNetGetActualBridgeName(net),
8101                                  net->ifname));
8102             }
8103         }
8104 
8105         /* kick the device out of the hostdev list too */
8106         virDomainNetRemoveHostdev(def, net);
8107         if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
8108             if (conn || (conn = virGetConnectNetwork()))
8109                 virDomainNetReleaseActualDevice(conn, vm->def, net);
8110             else
8111                 VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname));
8112         }
8113 
8114         if (virDomainNetDefIsOvsport(net) &&
8115             virNetDevOpenvswitchInterfaceClearQos(net->ifname, vm->def->uuid) < 0) {
8116             VIR_WARN("cannot clear bandwidth setting for ovs device : %s",
8117                      net->ifname);
8118         }
8119     }
8120 
8121  retry:
8122     if ((ret = qemuRemoveCgroup(vm)) < 0) {
8123         if (ret == -EBUSY && (retries++ < 5)) {
8124             g_usleep(200*1000);
8125             goto retry;
8126         }
8127         VIR_WARN("Failed to remove cgroup for %s",
8128                  vm->def->name);
8129     }
8130 
8131     /* Remove resctrl allocation after cgroups are cleaned up which makes it
8132      * kind of safer (although removing the allocation should work even with
8133      * pids in tasks file */
8134     for (i = 0; i < vm->def->nresctrls; i++) {
8135         size_t j = 0;
8136 
8137         for (j = 0; j < vm->def->resctrls[i]->nmonitors; j++) {
8138             virDomainResctrlMonDef *mon = NULL;
8139 
8140             mon = vm->def->resctrls[i]->monitors[j];
8141             virResctrlMonitorRemove(mon->instance);
8142         }
8143 
8144         virResctrlAllocRemove(vm->def->resctrls[i]->alloc);
8145     }
8146 
8147     qemuProcessRemoveDomainStatus(driver, vm);
8148 
8149     /* Remove VNC and Spice ports from port reservation bitmap, but only if
8150        they were reserved by the driver (autoport=yes)
8151     */
8152     for (i = 0; i < vm->def->ngraphics; ++i) {
8153         virDomainGraphicsDef *graphics = vm->def->graphics[i];
8154         if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
8155             if (graphics->data.vnc.autoport) {
8156                 virPortAllocatorRelease(graphics->data.vnc.port);
8157             } else if (graphics->data.vnc.portReserved) {
8158                 virPortAllocatorRelease(graphics->data.vnc.port);
8159                 graphics->data.vnc.portReserved = false;
8160             }
8161             if (graphics->data.vnc.websocketGenerated) {
8162                 virPortAllocatorRelease(graphics->data.vnc.websocket);
8163                 graphics->data.vnc.websocketGenerated = false;
8164                 graphics->data.vnc.websocket = -1;
8165             } else if (graphics->data.vnc.websocket) {
8166                 virPortAllocatorRelease(graphics->data.vnc.websocket);
8167             }
8168         }
8169         if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
8170             if (graphics->data.spice.autoport) {
8171                 virPortAllocatorRelease(graphics->data.spice.port);
8172                 virPortAllocatorRelease(graphics->data.spice.tlsPort);
8173             } else {
8174                 if (graphics->data.spice.portReserved) {
8175                     virPortAllocatorRelease(graphics->data.spice.port);
8176                     graphics->data.spice.portReserved = false;
8177                 }
8178 
8179                 if (graphics->data.spice.tlsPortReserved) {
8180                     virPortAllocatorRelease(graphics->data.spice.tlsPort);
8181                     graphics->data.spice.tlsPortReserved = false;
8182                 }
8183             }
8184         }
8185     }
8186 
8187     for (i = 0; i < vm->ndeprecations; i++)
8188         g_free(vm->deprecations[i]);
8189     g_free(vm->deprecations);
8190     vm->ndeprecations = 0;
8191     vm->deprecations = NULL;
8192     vm->taint = 0;
8193     vm->pid = -1;
8194     virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
8195     for (i = 0; i < vm->def->niothreadids; i++)
8196         vm->def->iothreadids[i]->thread_id = 0;
8197 
8198     /* clean up a possible backup job */
8199     if (priv->backup)
8200         qemuBackupJobTerminate(vm, QEMU_DOMAIN_JOB_STATUS_CANCELED);
8201 
8202     /* Do this explicitly after vm->pid is reset so that security drivers don't
8203      * try to enter the domain's namespace which is non-existent by now as qemu
8204      * is no longer running. */
8205     if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL)) {
8206         for (i = 0; i < def->ndisks; i++) {
8207             virDomainDiskDef *disk = def->disks[i];
8208 
8209             if (disk->mirror) {
8210                 if (qemuSecurityRestoreImageLabel(driver, vm, disk->mirror, false) < 0)
8211                     VIR_WARN("Unable to restore security label on %s", disk->dst);
8212 
8213                 if (virStorageSourceChainHasNVMe(disk->mirror))
8214                     qemuHostdevReAttachOneNVMeDisk(driver, vm->def->name, disk->mirror);
8215             }
8216 
8217             qemuBlockRemoveImageMetadata(driver, vm, disk->dst, disk->src);
8218 
8219             /* for now transient disks are forbidden with migration so they
8220              * can be handled here */
8221             if (disk->transient &&
8222                 QEMU_DOMAIN_DISK_PRIVATE(disk)->transientOverlayCreated) {
8223                 VIR_DEBUG("Removing transient overlay '%s' of disk '%s'",
8224                           disk->src->path, disk->dst);
8225                 if (qemuDomainStorageFileInit(driver, vm, disk->src, NULL) >= 0) {
8226                     virStorageSourceUnlink(disk->src);
8227                     virStorageSourceDeinit(disk->src);
8228                 }
8229             }
8230         }
8231     }
8232 
8233     qemuSecurityReleaseLabel(driver->securityManager, vm->def);
8234 
8235     /* clear all private data entries which are no longer needed */
8236     qemuDomainObjPrivateDataClear(priv);
8237 
8238     /* The "release" hook cleans up additional resources */
8239     if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
8240         g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
8241 
8242         /* we can't stop the operation even if the script raised an error */
8243         virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
8244                     VIR_HOOK_QEMU_OP_RELEASE, VIR_HOOK_SUBOP_END,
8245                     NULL, xml, NULL);
8246     }
8247 
8248     virDomainObjRemoveTransientDef(vm);
8249 
8250  endjob:
8251     if (asyncJob != QEMU_ASYNC_JOB_NONE)
8252         qemuDomainObjEndJob(driver, vm);
8253 
8254  cleanup:
8255     virErrorRestore(&orig_err);
8256 }
8257 
8258 
8259 static void
qemuProcessAutoDestroy(virDomainObj * dom,virConnectPtr conn,void * opaque)8260 qemuProcessAutoDestroy(virDomainObj *dom,
8261                        virConnectPtr conn,
8262                        void *opaque)
8263 {
8264     virQEMUDriver *driver = opaque;
8265     qemuDomainObjPrivate *priv = dom->privateData;
8266     virObjectEvent *event = NULL;
8267     unsigned int stopFlags = 0;
8268 
8269     VIR_DEBUG("vm=%s, conn=%p", dom->def->name, conn);
8270 
8271     if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
8272         stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
8273 
8274     if (priv->job.asyncJob) {
8275         VIR_DEBUG("vm=%s has long-term job active, cancelling",
8276                   dom->def->name);
8277         qemuDomainObjDiscardAsyncJob(driver, dom);
8278     }
8279 
8280     VIR_DEBUG("Killing domain");
8281 
8282     if (qemuProcessBeginStopJob(driver, dom, QEMU_JOB_DESTROY, true) < 0)
8283         return;
8284 
8285     qemuProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED,
8286                     QEMU_ASYNC_JOB_NONE, stopFlags);
8287 
8288     virDomainAuditStop(dom, "destroyed");
8289     event = virDomainEventLifecycleNewFromObj(dom,
8290                                      VIR_DOMAIN_EVENT_STOPPED,
8291                                      VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
8292 
8293     qemuDomainRemoveInactive(driver, dom);
8294 
8295     qemuDomainObjEndJob(driver, dom);
8296 
8297     virObjectEventStateQueue(driver->domainEventState, event);
8298 }
8299 
qemuProcessAutoDestroyAdd(virQEMUDriver * driver,virDomainObj * vm,virConnectPtr conn)8300 int qemuProcessAutoDestroyAdd(virQEMUDriver *driver,
8301                               virDomainObj *vm,
8302                               virConnectPtr conn)
8303 {
8304     VIR_DEBUG("vm=%s, conn=%p", vm->def->name, conn);
8305     return virCloseCallbacksSet(driver->closeCallbacks, vm, conn,
8306                                 qemuProcessAutoDestroy);
8307 }
8308 
qemuProcessAutoDestroyRemove(virQEMUDriver * driver,virDomainObj * vm)8309 int qemuProcessAutoDestroyRemove(virQEMUDriver *driver,
8310                                  virDomainObj *vm)
8311 {
8312     VIR_DEBUG("vm=%s", vm->def->name);
8313     return virCloseCallbacksUnset(driver->closeCallbacks, vm,
8314                                   qemuProcessAutoDestroy);
8315 }
8316 
qemuProcessAutoDestroyActive(virQEMUDriver * driver,virDomainObj * vm)8317 bool qemuProcessAutoDestroyActive(virQEMUDriver *driver,
8318                                   virDomainObj *vm)
8319 {
8320     virCloseCallback cb;
8321     VIR_DEBUG("vm=%s", vm->def->name);
8322     cb = virCloseCallbacksGet(driver->closeCallbacks, vm, NULL);
8323     return cb == qemuProcessAutoDestroy;
8324 }
8325 
8326 
8327 int
qemuProcessRefreshDisks(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)8328 qemuProcessRefreshDisks(virQEMUDriver *driver,
8329                         virDomainObj *vm,
8330                         qemuDomainAsyncJob asyncJob)
8331 {
8332     qemuDomainObjPrivate *priv = vm->privateData;
8333     bool blockdev = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV);
8334     GHashTable *table = NULL;
8335     int ret = -1;
8336     size_t i;
8337 
8338     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) == 0) {
8339         table = qemuMonitorGetBlockInfo(priv->mon);
8340         if (qemuDomainObjExitMonitor(driver, vm) < 0)
8341             goto cleanup;
8342     }
8343 
8344     if (!table)
8345         goto cleanup;
8346 
8347     for (i = 0; i < vm->def->ndisks; i++) {
8348         virDomainDiskDef *disk = vm->def->disks[i];
8349         qemuDomainDiskPrivate *diskpriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
8350         struct qemuDomainDiskInfo *info;
8351         const char *entryname = disk->info.alias;
8352 
8353         if (blockdev && diskpriv->qomName)
8354             entryname = diskpriv->qomName;
8355 
8356         if (!(info = virHashLookup(table, entryname)))
8357             continue;
8358 
8359         if (info->removable) {
8360             if (info->empty)
8361                 virDomainDiskEmptySource(disk);
8362 
8363             if (info->tray) {
8364                 if (info->tray_open)
8365                     disk->tray_status = VIR_DOMAIN_DISK_TRAY_OPEN;
8366                 else
8367                     disk->tray_status = VIR_DOMAIN_DISK_TRAY_CLOSED;
8368             }
8369         }
8370 
8371         /* fill in additional data */
8372         diskpriv->removable = info->removable;
8373         diskpriv->tray = info->tray;
8374     }
8375 
8376     ret = 0;
8377 
8378  cleanup:
8379     virHashFree(table);
8380     return ret;
8381 }
8382 
8383 
8384 static int
qemuProcessRefreshCPUMigratability(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)8385 qemuProcessRefreshCPUMigratability(virQEMUDriver *driver,
8386                                    virDomainObj *vm,
8387                                    qemuDomainAsyncJob asyncJob)
8388 {
8389     qemuDomainObjPrivate *priv = vm->privateData;
8390     virDomainDef *def = vm->def;
8391     bool migratable;
8392     int rc;
8393 
8394     if (def->cpu->mode != VIR_CPU_MODE_HOST_PASSTHROUGH &&
8395         def->cpu->mode != VIR_CPU_MODE_MAXIMUM)
8396         return 0;
8397 
8398     /* If the cpu.migratable capability is present, the migratable attribute
8399      * is set correctly. */
8400     if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CPU_MIGRATABLE))
8401         return 0;
8402 
8403     if (!ARCH_IS_X86(def->os.arch))
8404         return 0;
8405 
8406     if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
8407         return -1;
8408 
8409     rc = qemuMonitorGetCPUMigratable(priv->mon, &migratable);
8410 
8411     if (qemuDomainObjExitMonitor(driver, vm) < 0 || rc < 0)
8412         return -1;
8413 
8414     if (rc == 1)
8415         migratable = false;
8416 
8417     /* Libvirt 6.5.0 would set migratable='off' for running domains even though
8418      * the actual default used by QEMU was 'on'. */
8419     if (def->cpu->migratable == VIR_TRISTATE_SWITCH_OFF && migratable) {
8420         VIR_DEBUG("Fixing CPU migratable attribute");
8421         def->cpu->migratable = VIR_TRISTATE_SWITCH_ON;
8422     }
8423 
8424     if (def->cpu->migratable == VIR_TRISTATE_SWITCH_ABSENT)
8425         def->cpu->migratable = virTristateSwitchFromBool(migratable);
8426 
8427     return 0;
8428 }
8429 
8430 
8431 static int
qemuProcessRefreshCPU(virQEMUDriver * driver,virDomainObj * vm)8432 qemuProcessRefreshCPU(virQEMUDriver *driver,
8433                       virDomainObj *vm)
8434 {
8435     qemuDomainObjPrivate *priv = vm->privateData;
8436     g_autoptr(virCPUDef) host = NULL;
8437     g_autoptr(virCPUDef) hostmig = NULL;
8438     g_autoptr(virCPUDef) cpu = NULL;
8439 
8440     if (!virQEMUCapsGuestIsNative(driver->hostarch, vm->def->os.arch))
8441         return 0;
8442 
8443     if (!vm->def->cpu)
8444         return 0;
8445 
8446     if (qemuProcessRefreshCPUMigratability(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
8447         return -1;
8448 
8449     if (!(host = virQEMUDriverGetHostCPU(driver))) {
8450         virResetLastError();
8451         return 0;
8452     }
8453 
8454     /* If the domain with a host-model CPU was started by an old libvirt
8455      * (< 2.3) which didn't replace the CPU with a custom one, let's do it now
8456      * since the rest of our code does not really expect a host-model CPU in a
8457      * running domain.
8458      */
8459     if (vm->def->cpu->mode == VIR_CPU_MODE_HOST_MODEL) {
8460         /*
8461          * PSeries domains are able to run with host-model CPU by design,
8462          * even on Libvirt newer than 2.3, never replacing host-model with
8463          * custom in the virCPUUpdate() call. It is not needed to call
8464          * virCPUUpdate() and qemuProcessUpdateCPU() in this case.
8465          */
8466         if (qemuDomainIsPSeries(vm->def))
8467             return 0;
8468 
8469         if (!(hostmig = virCPUCopyMigratable(host->arch, host)))
8470             return -1;
8471 
8472         if (!(cpu = virCPUDefCopyWithoutModel(hostmig)) ||
8473             virCPUDefCopyModelFilter(cpu, hostmig, false,
8474                                      virQEMUCapsCPUFilterFeatures,
8475                                      &host->arch) < 0)
8476             return -1;
8477 
8478         if (virCPUUpdate(vm->def->os.arch, vm->def->cpu, cpu) < 0)
8479             return -1;
8480 
8481         if (qemuProcessUpdateCPU(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
8482             return -1;
8483     } else if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_CPU_MODEL_EXPANSION)) {
8484         /* We only try to fix CPUs when the libvirt/QEMU combo used to start
8485          * the domain did not know about query-cpu-model-expansion in which
8486          * case the host-model is known to not contain features which QEMU
8487          * doesn't know about.
8488          */
8489         if (qemuDomainFixupCPUs(vm, &priv->origCPU) < 0)
8490             return -1;
8491     }
8492 
8493     return 0;
8494 }
8495 
8496 
8497 static int
qemuProcessRefreshLegacyBlockjob(void * payload,const char * name,void * opaque)8498 qemuProcessRefreshLegacyBlockjob(void *payload,
8499                                  const char *name,
8500                                  void *opaque)
8501 {
8502     const char *jobname = name;
8503     virDomainObj *vm = opaque;
8504     qemuMonitorBlockJobInfo *info = payload;
8505     virDomainDiskDef *disk;
8506     qemuBlockJobData *job;
8507     qemuBlockJobType jobtype = info->type;
8508     qemuDomainObjPrivate *priv = vm->privateData;
8509 
8510     if (!(disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, jobname, jobname))) {
8511         VIR_DEBUG("could not find disk for block job '%s'", jobname);
8512         return 0;
8513     }
8514 
8515     if (jobtype == QEMU_BLOCKJOB_TYPE_COMMIT &&
8516         disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT)
8517         jobtype = disk->mirrorJob;
8518 
8519     if (!(job = qemuBlockJobDiskNew(vm, disk, jobtype, jobname)))
8520         return -1;
8521 
8522     if (disk->mirror) {
8523         if ((!info->ready_present && info->end == info->cur) ||
8524             info->ready) {
8525             disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY;
8526             job->state = VIR_DOMAIN_BLOCK_JOB_READY;
8527         }
8528 
8529         /* Pre-blockdev block copy labelled the chain of the mirrored device
8530          * just before pivoting. At that point it was no longer known whether
8531          * it's even necessary (e.g. disk is being reused). This code fixes
8532          * the labelling in case the job was started in a libvirt version
8533          * which did not label the chain when the block copy is being started.
8534          * Note that we can't do much on failure. */
8535         if (disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_COPY) {
8536             if (qemuDomainDetermineDiskChain(priv->driver, vm, disk,
8537                                              disk->mirror, true) < 0)
8538                 goto cleanup;
8539 
8540             if (disk->mirror->format &&
8541                 disk->mirror->format != VIR_STORAGE_FILE_RAW &&
8542                 (qemuDomainNamespaceSetupDisk(vm, disk->mirror, NULL) < 0 ||
8543                  qemuSetupImageChainCgroup(vm, disk->mirror) < 0 ||
8544                  qemuSecuritySetImageLabel(priv->driver, vm, disk->mirror,
8545                                            true, true) < 0))
8546                 goto cleanup;
8547         }
8548     }
8549 
8550     qemuBlockJobStarted(job, vm);
8551 
8552  cleanup:
8553     qemuBlockJobStartupFinalize(vm, job);
8554 
8555     return 0;
8556 }
8557 
8558 
8559 static int
qemuProcessRefreshLegacyBlockjobs(virQEMUDriver * driver,virDomainObj * vm)8560 qemuProcessRefreshLegacyBlockjobs(virQEMUDriver *driver,
8561                                   virDomainObj *vm)
8562 {
8563     GHashTable *blockJobs = NULL;
8564     int ret = -1;
8565 
8566     qemuDomainObjEnterMonitor(driver, vm);
8567     blockJobs = qemuMonitorGetAllBlockJobInfo(qemuDomainGetMonitor(vm), true);
8568     if (qemuDomainObjExitMonitor(driver, vm) < 0 || !blockJobs)
8569         goto cleanup;
8570 
8571     if (virHashForEach(blockJobs, qemuProcessRefreshLegacyBlockjob, vm) < 0)
8572         goto cleanup;
8573 
8574     ret = 0;
8575 
8576  cleanup:
8577     virHashFree(blockJobs);
8578     return ret;
8579 }
8580 
8581 
8582 static int
qemuProcessRefreshBlockjobs(virQEMUDriver * driver,virDomainObj * vm)8583 qemuProcessRefreshBlockjobs(virQEMUDriver *driver,
8584                             virDomainObj *vm)
8585 {
8586     qemuDomainObjPrivate *priv = vm->privateData;
8587 
8588     if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV))
8589         return qemuBlockJobRefreshJobs(driver, vm);
8590     else
8591         return qemuProcessRefreshLegacyBlockjobs(driver, vm);
8592 }
8593 
8594 
8595 struct qemuProcessReconnectData {
8596     virQEMUDriver *driver;
8597     virDomainObj *obj;
8598     virIdentity *identity;
8599 };
8600 /*
8601  * Open an existing VM's monitor, re-detect VCPU threads
8602  * and re-reserve the security labels in use
8603  *
8604  * This function also inherits a locked and ref'd domain object.
8605  *
8606  * This function needs to:
8607  * 1. Enter job
8608  * 1. just before monitor reconnect do lightweight MonitorEnter
8609  *    (increase VM refcount and unlock VM)
8610  * 2. reconnect to monitor
8611  * 3. do lightweight MonitorExit (lock VM)
8612  * 4. continue reconnect process
8613  * 5. EndJob
8614  *
8615  * We can't do normal MonitorEnter & MonitorExit because these two lock the
8616  * monitor lock, which does not exists in this early phase.
8617  */
8618 static void
qemuProcessReconnect(void * opaque)8619 qemuProcessReconnect(void *opaque)
8620 {
8621     struct qemuProcessReconnectData *data = opaque;
8622     virQEMUDriver *driver = data->driver;
8623     virDomainObj *obj = data->obj;
8624     qemuDomainObjPrivate *priv;
8625     g_auto(qemuDomainJobObj) oldjob = {
8626       .cb = NULL,
8627     };
8628     int state;
8629     int reason;
8630     g_autoptr(virQEMUDriverConfig) cfg = NULL;
8631     size_t i;
8632     unsigned int stopFlags = 0;
8633     bool jobStarted = false;
8634     bool retry = true;
8635     bool tryMonReconn = false;
8636 
8637     virIdentitySetCurrent(data->identity);
8638     g_clear_object(&data->identity);
8639     VIR_FREE(data);
8640 
8641     cfg = virQEMUDriverGetConfig(driver);
8642     priv = obj->privateData;
8643 
8644     qemuDomainObjRestoreJob(obj, &oldjob);
8645     if (oldjob.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
8646         stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
8647     if (oldjob.asyncJob == QEMU_ASYNC_JOB_BACKUP && priv->backup)
8648         priv->backup->apiFlags = oldjob.apiFlags;
8649 
8650     if (qemuDomainObjBeginJob(driver, obj, QEMU_JOB_MODIFY) < 0)
8651         goto error;
8652     jobStarted = true;
8653 
8654     /* XXX If we ever gonna change pid file pattern, come up with
8655      * some intelligence here to deal with old paths. */
8656     if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, obj->def->name)))
8657         goto error;
8658 
8659     /* Restore the masterKey */
8660     if (qemuDomainMasterKeyReadFile(priv) < 0)
8661         goto error;
8662 
8663     /* If we are connecting to a guest started by old libvirt there is no
8664      * allowReboot in status XML and we need to initialize it. */
8665     qemuProcessPrepareAllowReboot(obj);
8666 
8667     if (qemuHostdevUpdateActiveDomainDevices(driver, obj->def) < 0)
8668         goto error;
8669 
8670     if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CHARDEV_FD_PASS_COMMANDLINE))
8671         retry = false;
8672 
8673     if (qemuDomainObjStartWorker(obj) < 0)
8674         goto error;
8675 
8676     VIR_DEBUG("Reconnect monitor to def=%p name='%s' retry=%d",
8677               obj, obj->def->name, retry);
8678 
8679     tryMonReconn = true;
8680 
8681     /* XXX check PID liveliness & EXE path */
8682     if (qemuConnectMonitor(driver, obj, QEMU_ASYNC_JOB_NONE, retry, NULL) < 0)
8683         goto error;
8684 
8685     priv->machineName = qemuDomainGetMachineName(obj);
8686     if (!priv->machineName)
8687         goto error;
8688 
8689     if (qemuConnectCgroup(obj) < 0)
8690         goto error;
8691 
8692     if (qemuDomainPerfRestart(obj) < 0)
8693         goto error;
8694 
8695     /* recreate the pflash storage sources */
8696     if (qemuDomainInitializePflashStorageSource(obj) < 0)
8697         goto error;
8698 
8699     /* XXX: Need to change as long as lock is introduced for
8700      * qemu_driver->sharedDevices.
8701      */
8702     for (i = 0; i < obj->def->ndisks; i++) {
8703         virDomainDiskDef *disk = obj->def->disks[i];
8704         virDomainDeviceDef dev;
8705 
8706         if (virDomainDiskTranslateSourcePool(disk) < 0)
8707             goto error;
8708 
8709         /* backing chains need to be refreshed only if they could change */
8710         if (priv->reconnectBlockjobs != VIR_TRISTATE_BOOL_NO &&
8711             !virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV)) {
8712             /* This should be the only place that calls
8713              * qemuDomainDetermineDiskChain with @report_broken == false
8714              * to guarantee best-effort domain reconnect */
8715             virStorageSourceBackingStoreClear(disk->src);
8716             if (qemuDomainDetermineDiskChain(driver, obj, disk, NULL, false) < 0)
8717                 goto error;
8718         } else {
8719             VIR_DEBUG("skipping backing chain detection for '%s'", disk->dst);
8720         }
8721 
8722         dev.type = VIR_DOMAIN_DEVICE_DISK;
8723         dev.data.disk = disk;
8724         if (qemuAddSharedDevice(driver, &dev, obj->def->name) < 0)
8725             goto error;
8726     }
8727 
8728     for (i = 0; i < obj->def->ngraphics; i++) {
8729         if (qemuProcessGraphicsReservePorts(obj->def->graphics[i], true) < 0)
8730             goto error;
8731     }
8732 
8733     if (qemuProcessUpdateState(driver, obj) < 0)
8734         goto error;
8735 
8736     state = virDomainObjGetState(obj, &reason);
8737     if (state == VIR_DOMAIN_SHUTOFF ||
8738         (state == VIR_DOMAIN_PAUSED &&
8739          reason == VIR_DOMAIN_PAUSED_STARTING_UP)) {
8740         VIR_DEBUG("Domain '%s' wasn't fully started yet, killing it",
8741                   obj->def->name);
8742         goto error;
8743     }
8744 
8745     if (!priv->qemuCaps) {
8746         virReportError(VIR_ERR_INTERNAL_ERROR,
8747                        _("domain '%s' has no capabilities recorded"),
8748                        obj->def->name);
8749         goto error;
8750     }
8751 
8752     /* In case the domain shutdown or fake reboot while we were not running,
8753      * we need to finish the shutdown or fake reboot process. And we need to
8754      * do it after we have virQEMUCaps filled in.
8755      */
8756     if (state == VIR_DOMAIN_SHUTDOWN ||
8757         (state == VIR_DOMAIN_PAUSED &&
8758          reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN) ||
8759         (priv->fakeReboot && state == VIR_DOMAIN_PAUSED &&
8760          reason == VIR_DOMAIN_PAUSED_USER)) {
8761         VIR_DEBUG("Finishing shutdown sequence for domain %s",
8762                   obj->def->name);
8763         qemuProcessShutdownOrReboot(driver, obj);
8764         goto cleanup;
8765     }
8766 
8767     if (qemuProcessBuildDestroyMemoryPaths(driver, obj, NULL, true) < 0)
8768         goto error;
8769 
8770     if ((qemuDomainAssignAddresses(obj->def, priv->qemuCaps,
8771                                    driver, obj, false)) < 0) {
8772         goto error;
8773     }
8774 
8775     /* if domain requests security driver we haven't loaded, report error, but
8776      * do not kill the domain
8777      */
8778     ignore_value(qemuSecurityCheckAllLabel(driver->securityManager,
8779                                            obj->def));
8780 
8781     if (qemuProcessRefreshCPU(driver, obj) < 0)
8782         goto error;
8783 
8784     if (qemuDomainRefreshVcpuInfo(driver, obj, QEMU_ASYNC_JOB_NONE, true) < 0)
8785         goto error;
8786 
8787     qemuDomainVcpuPersistOrder(obj->def);
8788 
8789     if (qemuDomainUpdateMemoryDeviceInfo(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8790         goto error;
8791 
8792     if (qemuProcessDetectIOThreadPIDs(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8793         goto error;
8794 
8795     if (qemuSecurityReserveLabel(driver->securityManager, obj->def, obj->pid) < 0)
8796         goto error;
8797 
8798     qemuProcessNotifyNets(obj->def);
8799 
8800     qemuProcessFiltersInstantiate(obj->def);
8801 
8802     if (qemuProcessRefreshDisks(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8803         goto error;
8804 
8805     /* At this point we've already checked that the startup of the VM was
8806      * completed successfully before, thus that also implies that all transient
8807      * disk overlays were created. */
8808     for (i = 0; i < obj->def->ndisks; i++) {
8809         virDomainDiskDef *disk = obj->def->disks[i];
8810 
8811         if (disk->transient)
8812             QEMU_DOMAIN_DISK_PRIVATE(disk)->transientOverlayCreated = true;
8813     }
8814 
8815     if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV) &&
8816         qemuBlockNodeNamesDetect(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8817         goto error;
8818 
8819     if (qemuRefreshVirtioChannelState(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8820         goto error;
8821 
8822     /* If querying of guest's RTC failed, report error, but do not kill the domain. */
8823     qemuRefreshRTC(driver, obj);
8824 
8825     if (qemuProcessRefreshBalloonState(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8826         goto error;
8827 
8828     if (qemuProcessRecoverJob(driver, obj, &oldjob, &stopFlags) < 0)
8829         goto error;
8830 
8831     if (qemuProcessRefreshBlockjobs(driver, obj) < 0)
8832         goto error;
8833 
8834     if (qemuProcessUpdateDevices(driver, obj) < 0)
8835         goto error;
8836 
8837     if (qemuRefreshPRManagerState(driver, obj) < 0)
8838         goto error;
8839 
8840     qemuProcessReconnectCheckMemAliasOrderMismatch(obj);
8841 
8842     if (qemuConnectAgent(driver, obj) < 0)
8843         goto error;
8844 
8845     for (i = 0; i < obj->def->nresctrls; i++) {
8846         size_t j = 0;
8847 
8848         if (virResctrlAllocDeterminePath(obj->def->resctrls[i]->alloc,
8849                                          priv->machineName) < 0)
8850             goto error;
8851 
8852         for (j = 0; j < obj->def->resctrls[i]->nmonitors; j++) {
8853             virDomainResctrlMonDef *mon = NULL;
8854 
8855             mon = obj->def->resctrls[i]->monitors[j];
8856             if (virResctrlMonitorDeterminePath(mon->instance,
8857                                                priv->machineName) < 0)
8858                 goto error;
8859         }
8860     }
8861 
8862     /* update domain state XML with possibly updated state in virDomainObj */
8863     if (virDomainObjSave(obj, driver->xmlopt, cfg->stateDir) < 0)
8864         goto error;
8865 
8866     /* Run an hook to allow admins to do some magic */
8867     if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
8868         g_autofree char *xml = qemuDomainDefFormatXML(driver,
8869                                                           priv->qemuCaps,
8870                                                           obj->def, 0);
8871         int hookret;
8872 
8873         hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, obj->def->name,
8874                               VIR_HOOK_QEMU_OP_RECONNECT, VIR_HOOK_SUBOP_BEGIN,
8875                               NULL, xml, NULL);
8876 
8877         /*
8878          * If the script raised an error abort the launch
8879          */
8880         if (hookret < 0)
8881             goto error;
8882     }
8883 
8884     if (g_atomic_int_add(&driver->nactive, 1) == 0 && driver->inhibitCallback)
8885         driver->inhibitCallback(true, driver->inhibitOpaque);
8886 
8887  cleanup:
8888     if (jobStarted) {
8889         if (!virDomainObjIsActive(obj))
8890             qemuDomainRemoveInactive(driver, obj);
8891         qemuDomainObjEndJob(driver, obj);
8892     } else {
8893         if (!virDomainObjIsActive(obj))
8894             qemuDomainRemoveInactiveJob(driver, obj);
8895     }
8896     virDomainObjEndAPI(&obj);
8897     virNWFilterUnlockFilterUpdates();
8898     virIdentitySetCurrent(NULL);
8899     return;
8900 
8901  error:
8902     if (virDomainObjIsActive(obj)) {
8903         /* We can't get the monitor back, so must kill the VM
8904          * to remove danger of it ending up running twice if
8905          * user tries to start it again later.
8906          *
8907          * If we cannot get to the monitor when the QEMU command
8908          * line used -no-shutdown, then we can safely say that the
8909          * domain crashed; otherwise, if the monitor was started,
8910          * then we can blame ourselves, else we failed before the
8911          * monitor started so we don't really know. */
8912         if (!priv->mon && tryMonReconn &&
8913             (priv->allowReboot == VIR_TRISTATE_BOOL_YES ||
8914              virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SET_ACTION)))
8915             state = VIR_DOMAIN_SHUTOFF_CRASHED;
8916         else if (priv->mon)
8917             state = VIR_DOMAIN_SHUTOFF_DAEMON;
8918         else
8919             state = VIR_DOMAIN_SHUTOFF_UNKNOWN;
8920 
8921         /* If BeginJob failed, we jumped here without a job, let's hope another
8922          * thread didn't have a chance to start playing with the domain yet
8923          * (it's all we can do anyway).
8924          */
8925         qemuProcessStop(driver, obj, state, QEMU_ASYNC_JOB_NONE, stopFlags);
8926     }
8927     goto cleanup;
8928 }
8929 
8930 static int
qemuProcessReconnectHelper(virDomainObj * obj,void * opaque)8931 qemuProcessReconnectHelper(virDomainObj *obj,
8932                            void *opaque)
8933 {
8934     virThread thread;
8935     struct qemuProcessReconnectData *src = opaque;
8936     struct qemuProcessReconnectData *data;
8937     g_autofree char *name = NULL;
8938 
8939     /* If the VM was inactive, we don't need to reconnect */
8940     if (!obj->pid)
8941         return 0;
8942 
8943     data = g_new0(struct qemuProcessReconnectData, 1);
8944 
8945     memcpy(data, src, sizeof(*data));
8946     data->obj = obj;
8947     data->identity = virIdentityGetCurrent();
8948 
8949     virNWFilterReadLockFilterUpdates();
8950 
8951     /* this lock and reference will be eventually transferred to the thread
8952      * that handles the reconnect */
8953     virObjectLock(obj);
8954     virObjectRef(obj);
8955 
8956     name = g_strdup_printf("init-%s", obj->def->name);
8957 
8958     if (virThreadCreateFull(&thread, false, qemuProcessReconnect,
8959                             name, false, data) < 0) {
8960         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
8961                        _("Could not create thread. QEMU initialization "
8962                          "might be incomplete"));
8963         /* We can't spawn a thread and thus connect to monitor. Kill qemu.
8964          * It's safe to call qemuProcessStop without a job here since there
8965          * is no thread that could be doing anything else with the same domain
8966          * object.
8967          */
8968         qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED,
8969                         QEMU_ASYNC_JOB_NONE, 0);
8970         qemuDomainRemoveInactiveJobLocked(src->driver, obj);
8971 
8972         virDomainObjEndAPI(&obj);
8973         virNWFilterUnlockFilterUpdates();
8974         g_clear_object(&data->identity);
8975         VIR_FREE(data);
8976         return -1;
8977     }
8978 
8979     return 0;
8980 }
8981 
8982 /**
8983  * qemuProcessReconnectAll
8984  *
8985  * Try to re-open the resources for live VMs that we care
8986  * about.
8987  */
8988 void
qemuProcessReconnectAll(virQEMUDriver * driver)8989 qemuProcessReconnectAll(virQEMUDriver *driver)
8990 {
8991     struct qemuProcessReconnectData data = {.driver = driver};
8992     virDomainObjListForEach(driver->domains, true,
8993                             qemuProcessReconnectHelper, &data);
8994 }
8995 
8996 
virQEMUCapsMonitorNotify(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm G_GNUC_UNUSED,void * opaque G_GNUC_UNUSED)8997 static void virQEMUCapsMonitorNotify(qemuMonitor *mon G_GNUC_UNUSED,
8998                                      virDomainObj *vm G_GNUC_UNUSED,
8999                                      void *opaque G_GNUC_UNUSED)
9000 {
9001 }
9002 
9003 static qemuMonitorCallbacks callbacks = {
9004     .eofNotify = virQEMUCapsMonitorNotify,
9005     .errorNotify = virQEMUCapsMonitorNotify,
9006 };
9007 
9008 
9009 static void
qemuProcessQMPStop(qemuProcessQMP * proc)9010 qemuProcessQMPStop(qemuProcessQMP *proc)
9011 {
9012     if (proc->mon) {
9013         virObjectUnlock(proc->mon);
9014         qemuMonitorClose(proc->mon);
9015         proc->mon = NULL;
9016     }
9017 
9018     if (proc->cmd) {
9019         virCommandAbort(proc->cmd);
9020         virCommandFree(proc->cmd);
9021         proc->cmd = NULL;
9022     }
9023 
9024     if (proc->monpath)
9025         unlink(proc->monpath);
9026 
9027     virDomainObjEndAPI(&proc->vm);
9028 
9029     if (proc->pid != 0) {
9030         VIR_DEBUG("Killing QMP caps process %lld", (long long)proc->pid);
9031         if (virProcessKill(proc->pid, SIGKILL) < 0 && errno != ESRCH)
9032             VIR_ERROR(_("Failed to kill process %lld: %s"),
9033                       (long long)proc->pid,
9034                       g_strerror(errno));
9035 
9036         proc->pid = 0;
9037     }
9038 
9039     if (proc->pidfile)
9040         unlink(proc->pidfile);
9041 
9042     if (proc->uniqDir)
9043         rmdir(proc->uniqDir);
9044 }
9045 
9046 
9047 /**
9048  * qemuProcessQMPFree:
9049  * @proc: Stores process and connection state
9050  *
9051  * Kill QEMU process and free process data structure.
9052  */
9053 void
qemuProcessQMPFree(qemuProcessQMP * proc)9054 qemuProcessQMPFree(qemuProcessQMP *proc)
9055 {
9056     if (!proc)
9057         return;
9058 
9059     qemuProcessQMPStop(proc);
9060 
9061     g_object_unref(proc->eventThread);
9062 
9063     g_free(proc->binary);
9064     g_free(proc->libDir);
9065     g_free(proc->uniqDir);
9066     g_free(proc->monpath);
9067     g_free(proc->monarg);
9068     g_free(proc->pidfile);
9069     g_free(proc->stdErr);
9070     g_free(proc);
9071 }
9072 
9073 
9074 /**
9075  * qemuProcessQMPNew:
9076  * @binary: QEMU binary
9077  * @libDir: Directory for process and connection artifacts
9078  * @runUid: UserId for QEMU process
9079  * @runGid: GroupId for QEMU process
9080  * @forceTCG: Force TCG mode if true
9081  *
9082  * Allocate and initialize domain structure encapsulating QEMU process state
9083  * and monitor connection for completing QMP queries.
9084  */
9085 qemuProcessQMP *
qemuProcessQMPNew(const char * binary,const char * libDir,uid_t runUid,gid_t runGid,bool forceTCG)9086 qemuProcessQMPNew(const char *binary,
9087                   const char *libDir,
9088                   uid_t runUid,
9089                   gid_t runGid,
9090                   bool forceTCG)
9091 {
9092     g_autoptr(qemuProcessQMP) proc = NULL;
9093     const char *threadSuffix;
9094     g_autofree char *threadName = NULL;
9095 
9096     VIR_DEBUG("exec=%s, libDir=%s, runUid=%u, runGid=%u, forceTCG=%d",
9097               binary, libDir, runUid, runGid, forceTCG);
9098 
9099     proc = g_new0(qemuProcessQMP, 1);
9100 
9101     proc->binary = g_strdup(binary);
9102     proc->libDir = g_strdup(libDir);
9103 
9104     proc->runUid = runUid;
9105     proc->runGid = runGid;
9106     proc->forceTCG = forceTCG;
9107 
9108     threadSuffix = strrchr(binary, '-');
9109     if (threadSuffix)
9110         threadSuffix++;
9111     else
9112         threadSuffix = binary;
9113     threadName = g_strdup_printf("qmp-%s", threadSuffix);
9114 
9115     if (!(proc->eventThread = virEventThreadNew(threadName)))
9116         return NULL;
9117 
9118     return g_steal_pointer(&proc);
9119 }
9120 
9121 
9122 static int
qemuProcessQEMULabelUniqPath(qemuProcessQMP * proc)9123 qemuProcessQEMULabelUniqPath(qemuProcessQMP *proc)
9124 {
9125     /* We cannot use the security driver here, but we should not need to. */
9126     if (chown(proc->uniqDir, proc->runUid, -1) < 0) {
9127         virReportSystemError(errno,
9128                              _("Cannot chown uniq path: %s"),
9129                              proc->uniqDir);
9130         return -1;
9131     }
9132 
9133     return 0;
9134 }
9135 
9136 
9137 static int
qemuProcessQMPInit(qemuProcessQMP * proc)9138 qemuProcessQMPInit(qemuProcessQMP *proc)
9139 {
9140     g_autofree char *template = NULL;
9141 
9142     VIR_DEBUG("proc=%p, emulator=%s", proc, proc->binary);
9143 
9144     template = g_strdup_printf("%s/qmp-XXXXXX", proc->libDir);
9145 
9146     if (!(proc->uniqDir = g_mkdtemp(template))) {
9147         virReportSystemError(errno,
9148                              _("Failed to create unique directory with "
9149                                "template '%s' for probing QEMU"),
9150                              template);
9151         return -1;
9152     }
9153     /* if g_mkdtemp succeeds, proc->uniqDir is now the owner of
9154      * the string. Set template to NULL to avoid freeing
9155      * the memory in this case */
9156     template = NULL;
9157 
9158     if (qemuProcessQEMULabelUniqPath(proc) < 0)
9159         return -1;
9160 
9161     proc->monpath = g_strdup_printf("%s/%s", proc->uniqDir, "qmp.monitor");
9162 
9163     proc->monarg = g_strdup_printf("unix:%s,server=on,wait=off", proc->monpath);
9164 
9165     /*
9166      * Normally we'd use runDir for pid files, but because we're using
9167      * -daemonize we need QEMU to be allowed to create them, rather
9168      * than libvirtd. So we're using libDir which QEMU can write to
9169      */
9170     proc->pidfile = g_strdup_printf("%s/%s", proc->uniqDir, "qmp.pid");
9171 
9172     return 0;
9173 }
9174 
9175 
9176 static int
qemuProcessQMPLaunch(qemuProcessQMP * proc)9177 qemuProcessQMPLaunch(qemuProcessQMP *proc)
9178 {
9179     const char *machine;
9180     int status = 0;
9181     int rc;
9182 
9183     if (proc->forceTCG)
9184         machine = "none,accel=tcg";
9185     else
9186         machine = "none,accel=nvmm:kvm:tcg";
9187 
9188     VIR_DEBUG("Try to probe capabilities of '%s' via QMP, machine %s",
9189               proc->binary, machine);
9190 
9191     /*
9192      * We explicitly need to use -daemonize here, rather than
9193      * virCommandDaemonize, because we need to synchronize
9194      * with QEMU creating its monitor socket API. Using
9195      * daemonize guarantees control won't return to libvirt
9196      * until the socket is present.
9197      */
9198     proc->cmd = virCommandNewArgList(proc->binary,
9199                                      "-S",
9200                                      "-no-user-config",
9201                                      "-nodefaults",
9202                                      "-nographic",
9203                                      "-machine", machine,
9204                                      "-qmp", proc->monarg,
9205                                      "-pidfile", proc->pidfile,
9206                                      "-daemonize",
9207                                     NULL);
9208     virCommandAddEnvPassCommon(proc->cmd);
9209     virCommandClearCaps(proc->cmd);
9210 
9211 #if WITH_CAPNG
9212     /* QEMU might run into permission issues, e.g. /dev/sev (0600), override
9213      * them just for the purpose of probing */
9214     if (geteuid() == 0)
9215         virCommandAllowCap(proc->cmd, CAP_DAC_OVERRIDE);
9216 #endif
9217 
9218     virCommandSetGID(proc->cmd, proc->runGid);
9219     virCommandSetUID(proc->cmd, proc->runUid);
9220 
9221     virCommandSetErrorBuffer(proc->cmd, &(proc->stdErr));
9222 
9223     if (virCommandRun(proc->cmd, &status) < 0)
9224         return -1;
9225 
9226     if (status != 0) {
9227         VIR_DEBUG("QEMU %s exited with status %d", proc->binary, status);
9228         virReportError(VIR_ERR_INTERNAL_ERROR,
9229                        _("Failed to start QEMU binary %s for probing: %s"),
9230                        proc->binary,
9231                        proc->stdErr ? proc->stdErr : _("unknown error"));
9232         return -1;
9233     }
9234 
9235     if ((rc = virPidFileReadPath(proc->pidfile, &proc->pid)) < 0) {
9236         virReportSystemError(-rc, _("Failed to read pidfile %s"), proc->pidfile);
9237         return -1;
9238     }
9239 
9240     return 0;
9241 }
9242 
9243 
9244 int
qemuProcessQMPInitMonitor(qemuMonitor * mon)9245 qemuProcessQMPInitMonitor(qemuMonitor *mon)
9246 {
9247     if (qemuMonitorSetCapabilities(mon) < 0) {
9248         VIR_DEBUG("Failed to set monitor capabilities %s",
9249                   virGetLastErrorMessage());
9250         return -1;
9251     }
9252 
9253     return 0;
9254 }
9255 
9256 
9257 static int
qemuProcessQMPConnectMonitor(qemuProcessQMP * proc)9258 qemuProcessQMPConnectMonitor(qemuProcessQMP *proc)
9259 {
9260     g_autoptr(virDomainXMLOption) xmlopt = NULL;
9261     virDomainChrSourceDef monConfig;
9262 
9263     VIR_DEBUG("proc=%p, emulator=%s, proc->pid=%lld",
9264               proc, proc->binary, (long long)proc->pid);
9265 
9266     monConfig.type = VIR_DOMAIN_CHR_TYPE_UNIX;
9267     monConfig.data.nix.path = proc->monpath;
9268     monConfig.data.nix.listen = false;
9269 
9270     if (!(xmlopt = virDomainXMLOptionNew(NULL, NULL, NULL, NULL, NULL)) ||
9271         !(proc->vm = virDomainObjNew(xmlopt)) ||
9272         !(proc->vm->def = virDomainDefNew(xmlopt)))
9273         return -1;
9274 
9275     proc->vm->pid = proc->pid;
9276 
9277     if (!(proc->mon = qemuMonitorOpen(proc->vm, &monConfig, true, 0,
9278                                       virEventThreadGetContext(proc->eventThread),
9279                                       &callbacks, NULL)))
9280         return -1;
9281 
9282     virObjectLock(proc->mon);
9283 
9284     if (qemuProcessQMPInitMonitor(proc->mon) < 0)
9285         return -1;
9286 
9287     return 0;
9288 }
9289 
9290 
9291 /**
9292  * qemuProcessQMPStart:
9293  * @proc: QEMU process and connection state created by qemuProcessQMPNew()
9294  *
9295  * Start and connect to QEMU binary so QMP queries can be made.
9296  *
9297  * Usage:
9298  *   proc = qemuProcessQMPNew(binary, libDir, runUid, runGid, forceTCG);
9299  *   qemuProcessQMPStart(proc);
9300  *   ** Send QMP Queries to QEMU using monitor (proc->mon) **
9301  *   qemuProcessQMPFree(proc);
9302  *
9303  * Process error output (proc->stdErr) remains available in qemuProcessQMP
9304  * struct until qemuProcessQMPFree is called.
9305  */
9306 int
qemuProcessQMPStart(qemuProcessQMP * proc)9307 qemuProcessQMPStart(qemuProcessQMP *proc)
9308 {
9309     VIR_DEBUG("proc=%p, emulator=%s", proc, proc->binary);
9310 
9311     if (qemuProcessQMPInit(proc) < 0)
9312         return -1;
9313 
9314     if (qemuProcessQMPLaunch(proc) < 0)
9315         return -1;
9316 
9317     if (qemuProcessQMPConnectMonitor(proc) < 0)
9318         return -1;
9319 
9320     return 0;
9321 }
9322