1 /*
2 * qemu_process.c: QEMU process management
3 *
4 * Copyright (C) 2006-2016 Red Hat, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library. If not, see
18 * <http://www.gnu.org/licenses/>.
19 *
20 */
21
22 #include <config.h>
23
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <signal.h>
27 #include <sys/stat.h>
28 #if defined(__linux__)
29 # include <linux/capability.h>
30 #elif defined(__FreeBSD__) && !defined(__DragonFly__)
31 # include <sys/param.h>
32 # include <sys/cpuset.h>
33 #endif
34
35 #include <sys/utsname.h>
36
37 #if WITH_CAPNG
38 # include <cap-ng.h>
39 #endif
40
41 #include "qemu_process.h"
42 #define LIBVIRT_QEMU_PROCESSPRIV_H_ALLOW
43 #include "qemu_processpriv.h"
44 #include "qemu_alias.h"
45 #include "qemu_block.h"
46 #include "qemu_domain.h"
47 #include "qemu_domain_address.h"
48 #include "qemu_namespace.h"
49 #include "qemu_cgroup.h"
50 #include "qemu_capabilities.h"
51 #include "qemu_monitor.h"
52 #include "qemu_command.h"
53 #include "qemu_hostdev.h"
54 #include "qemu_hotplug.h"
55 #include "qemu_migration.h"
56 #include "qemu_migration_params.h"
57 #include "qemu_interface.h"
58 #include "qemu_security.h"
59 #include "qemu_extdevice.h"
60 #include "qemu_firmware.h"
61 #include "qemu_backup.h"
62 #include "qemu_dbus.h"
63 #include "qemu_snapshot.h"
64
65 #include "cpu/cpu.h"
66 #include "cpu/cpu_x86.h"
67 #include "datatypes.h"
68 #include "virlog.h"
69 #include "virerror.h"
70 #include "viralloc.h"
71 #include "virhook.h"
72 #include "virfile.h"
73 #include "virpidfile.h"
74 #include "virhostcpu.h"
75 #include "domain_audit.h"
76 #include "domain_nwfilter.h"
77 #include "domain_validate.h"
78 #include "locking/domain_lock.h"
79 #include "viruuid.h"
80 #include "virprocess.h"
81 #include "virtime.h"
82 #include "virnetdevtap.h"
83 #include "virnetdevopenvswitch.h"
84 #include "virnetdevmidonet.h"
85 #include "virbitmap.h"
86 #include "virnuma.h"
87 #include "virstring.h"
88 #include "virhostdev.h"
89 #include "virsecret.h"
90 #include "configmake.h"
91 #include "nwfilter_conf.h"
92 #include "netdev_bandwidth_conf.h"
93 #include "virresctrl.h"
94 #include "virvsock.h"
95 #include "viridentity.h"
96 #include "virthreadjob.h"
97 #include "virutil.h"
98 #include "storage_source.h"
99 #include "backup_conf.h"
100
101 #define VIR_FROM_THIS VIR_FROM_QEMU
102
103 VIR_LOG_INIT("qemu.qemu_process");
104
105 /**
106 * qemuProcessRemoveDomainStatus
107 *
108 * remove all state files of a domain from statedir
109 */
110 static void
qemuProcessRemoveDomainStatus(virQEMUDriver * driver,virDomainObj * vm)111 qemuProcessRemoveDomainStatus(virQEMUDriver *driver,
112 virDomainObj *vm)
113 {
114 g_autofree char *file = NULL;
115 qemuDomainObjPrivate *priv = vm->privateData;
116 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
117
118 file = g_strdup_printf("%s/%s.xml", cfg->stateDir, vm->def->name);
119
120 if (unlink(file) < 0 && errno != ENOENT && errno != ENOTDIR)
121 VIR_WARN("Failed to remove domain XML for %s: %s",
122 vm->def->name, g_strerror(errno));
123
124 if (priv->pidfile &&
125 unlink(priv->pidfile) < 0 &&
126 errno != ENOENT)
127 VIR_WARN("Failed to remove PID file for %s: %s",
128 vm->def->name, g_strerror(errno));
129 }
130
131
132 /*
133 * This is a callback registered with a qemuAgent *instance,
134 * and to be invoked when the agent console hits an end of file
135 * condition, or error, thus indicating VM shutdown should be
136 * performed
137 */
138 static void
qemuProcessHandleAgentEOF(qemuAgent * agent,virDomainObj * vm)139 qemuProcessHandleAgentEOF(qemuAgent *agent,
140 virDomainObj *vm)
141 {
142 qemuDomainObjPrivate *priv;
143
144 VIR_DEBUG("Received EOF from agent on %p '%s'", vm, vm->def->name);
145
146 virObjectLock(vm);
147
148 priv = vm->privateData;
149
150 if (!priv->agent) {
151 VIR_DEBUG("Agent freed already");
152 goto unlock;
153 }
154
155 if (priv->beingDestroyed) {
156 VIR_DEBUG("Domain is being destroyed, agent EOF is expected");
157 goto unlock;
158 }
159
160 qemuAgentClose(agent);
161 priv->agent = NULL;
162 priv->agentError = false;
163
164 virObjectUnlock(vm);
165 return;
166
167 unlock:
168 virObjectUnlock(vm);
169 return;
170 }
171
172
173 /*
174 * This is invoked when there is some kind of error
175 * parsing data to/from the agent. The VM can continue
176 * to run, but no further agent commands will be
177 * allowed
178 */
179 static void
qemuProcessHandleAgentError(qemuAgent * agent G_GNUC_UNUSED,virDomainObj * vm)180 qemuProcessHandleAgentError(qemuAgent *agent G_GNUC_UNUSED,
181 virDomainObj *vm)
182 {
183 qemuDomainObjPrivate *priv;
184
185 VIR_DEBUG("Received error from agent on %p '%s'", vm, vm->def->name);
186
187 virObjectLock(vm);
188
189 priv = vm->privateData;
190
191 priv->agentError = true;
192
193 virObjectUnlock(vm);
194 }
195
196
197 static qemuAgentCallbacks agentCallbacks = {
198 .eofNotify = qemuProcessHandleAgentEOF,
199 .errorNotify = qemuProcessHandleAgentError,
200 };
201
202
203 int
qemuConnectAgent(virQEMUDriver * driver,virDomainObj * vm)204 qemuConnectAgent(virQEMUDriver *driver, virDomainObj *vm)
205 {
206 qemuDomainObjPrivate *priv = vm->privateData;
207 qemuAgent *agent = NULL;
208 virDomainChrDef *config = qemuFindAgentConfig(vm->def);
209
210 if (!config)
211 return 0;
212
213 if (priv->agent)
214 return 0;
215
216 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VSERPORT_CHANGE) &&
217 config->state != VIR_DOMAIN_CHR_DEVICE_STATE_CONNECTED) {
218 VIR_DEBUG("Deferring connecting to guest agent");
219 return 0;
220 }
221
222 if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) {
223 VIR_ERROR(_("Failed to set security context for agent for %s"),
224 vm->def->name);
225 goto cleanup;
226 }
227
228 agent = qemuAgentOpen(vm,
229 config->source,
230 virEventThreadGetContext(priv->eventThread),
231 &agentCallbacks,
232 virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VSERPORT_CHANGE));
233
234 if (!virDomainObjIsActive(vm)) {
235 qemuAgentClose(agent);
236 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
237 _("guest crashed while connecting to the guest agent"));
238 return -1;
239 }
240
241 if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) {
242 VIR_ERROR(_("Failed to clear security context for agent for %s"),
243 vm->def->name);
244 qemuAgentClose(agent);
245 goto cleanup;
246 }
247
248 priv->agent = agent;
249 if (!priv->agent)
250 VIR_INFO("Failed to connect agent for %s", vm->def->name);
251
252 cleanup:
253 if (!priv->agent) {
254 VIR_WARN("Cannot connect to QEMU guest agent for %s", vm->def->name);
255 priv->agentError = true;
256 virResetLastError();
257 }
258
259 return 0;
260 }
261
262
263 /**
264 * qemuProcessEventSubmit:
265 * @driver: QEMU driver object
266 * @event: pointer to the variable holding the event processing data (stolen and cleared)
267 *
268 * Submits @event to be processed by the asynchronous event handling thread.
269 * In case when submission of the handling fails @event is properly freed and
270 * cleared. If (*event)->vm is non-NULL the domain object is uref'd before freeing
271 * @event.
272 */
273 static void
qemuProcessEventSubmit(virQEMUDriver * driver,struct qemuProcessEvent ** event)274 qemuProcessEventSubmit(virQEMUDriver *driver,
275 struct qemuProcessEvent **event)
276 {
277 if (!*event)
278 return;
279
280 if (virThreadPoolSendJob(driver->workerPool, 0, *event) < 0) {
281 if ((*event)->vm)
282 virObjectUnref((*event)->vm);
283 qemuProcessEventFree(*event);
284 }
285
286 *event = NULL;
287 }
288
289
290 /*
291 * This is a callback registered with a qemuMonitor *instance,
292 * and to be invoked when the monitor console hits an end of file
293 * condition, or error, thus indicating VM shutdown should be
294 * performed
295 */
296 static void
qemuProcessHandleMonitorEOF(qemuMonitor * mon,virDomainObj * vm,void * opaque)297 qemuProcessHandleMonitorEOF(qemuMonitor *mon,
298 virDomainObj *vm,
299 void *opaque)
300 {
301 virQEMUDriver *driver = opaque;
302 qemuDomainObjPrivate *priv;
303 struct qemuProcessEvent *processEvent;
304
305 virObjectLock(vm);
306
307 VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
308
309 priv = vm->privateData;
310 if (priv->beingDestroyed) {
311 VIR_DEBUG("Domain is being destroyed, EOF is expected");
312 goto cleanup;
313 }
314
315 processEvent = g_new0(struct qemuProcessEvent, 1);
316
317 processEvent->eventType = QEMU_PROCESS_EVENT_MONITOR_EOF;
318 processEvent->vm = virObjectRef(vm);
319
320 qemuProcessEventSubmit(driver, &processEvent);
321
322 /* We don't want this EOF handler to be called over and over while the
323 * thread is waiting for a job.
324 */
325 virObjectLock(mon);
326 qemuMonitorUnregister(mon);
327 virObjectUnlock(mon);
328
329 /* We don't want any cleanup from EOF handler (or any other
330 * thread) to enter qemu namespace. */
331 qemuDomainDestroyNamespace(driver, vm);
332
333 cleanup:
334 virObjectUnlock(vm);
335 }
336
337
338 /*
339 * This is invoked when there is some kind of error
340 * parsing data to/from the monitor. The VM can continue
341 * to run, but no further monitor commands will be
342 * allowed
343 */
344 static void
qemuProcessHandleMonitorError(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)345 qemuProcessHandleMonitorError(qemuMonitor *mon G_GNUC_UNUSED,
346 virDomainObj *vm,
347 void *opaque)
348 {
349 virQEMUDriver *driver = opaque;
350 virObjectEvent *event = NULL;
351
352 VIR_DEBUG("Received error on %p '%s'", vm, vm->def->name);
353
354 virObjectLock(vm);
355
356 ((qemuDomainObjPrivate *) vm->privateData)->monError = true;
357 event = virDomainEventControlErrorNewFromObj(vm);
358 virObjectEventStateQueue(driver->domainEventState, event);
359
360 virObjectUnlock(vm);
361 }
362
363
364 /**
365 * qemuProcessFindDomainDiskByAliasOrQOM:
366 * @vm: domain object to search for the disk
367 * @alias: -drive or -device alias of the disk
368 * @qomid: QOM tree device name
369 *
370 * Looks up a disk in the domain definition of @vm which either matches the
371 * -drive or -device alias used for the backend and frontend respectively or the
372 * QOM name. If @alias is empty it's treated as NULL as it's a mandatory field
373 * in some cases.
374 *
375 * Returns a disk from @vm or NULL if it could not be found.
376 */
377 virDomainDiskDef *
qemuProcessFindDomainDiskByAliasOrQOM(virDomainObj * vm,const char * alias,const char * qomid)378 qemuProcessFindDomainDiskByAliasOrQOM(virDomainObj *vm,
379 const char *alias,
380 const char *qomid)
381 {
382 size_t i;
383
384 if (alias && *alias == '\0')
385 alias = NULL;
386
387 if (alias)
388 alias = qemuAliasDiskDriveSkipPrefix(alias);
389
390 for (i = 0; i < vm->def->ndisks; i++) {
391 virDomainDiskDef *disk = vm->def->disks[i];
392 qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
393
394 if ((disk->info.alias && STREQ_NULLABLE(disk->info.alias, alias)) ||
395 (diskPriv->qomName && STREQ_NULLABLE(diskPriv->qomName, qomid)))
396 return disk;
397 }
398
399 virReportError(VIR_ERR_INTERNAL_ERROR,
400 _("no disk found with alias '%s' or id '%s'"),
401 NULLSTR(alias), NULLSTR(qomid));
402 return NULL;
403 }
404
405
406 static void
qemuProcessHandleReset(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)407 qemuProcessHandleReset(qemuMonitor *mon G_GNUC_UNUSED,
408 virDomainObj *vm,
409 void *opaque)
410 {
411 virQEMUDriver *driver = opaque;
412 virObjectEvent *event = NULL;
413 qemuDomainObjPrivate *priv;
414 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
415 virDomainState state;
416 int reason;
417
418 virObjectLock(vm);
419
420 state = virDomainObjGetState(vm, &reason);
421
422 /* ignore reset events on VM startup. Libvirt in certain instances does a
423 * reset during startup so that the ACPI tables are re-generated */
424 if (state == VIR_DOMAIN_PAUSED &&
425 reason == VIR_DOMAIN_PAUSED_STARTING_UP) {
426 VIR_DEBUG("ignoring reset event during startup");
427 goto unlock;
428 }
429
430 event = virDomainEventRebootNewFromObj(vm);
431 priv = vm->privateData;
432 if (priv->agent)
433 qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_RESET);
434
435 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
436 VIR_WARN("Failed to save status on vm %s", vm->def->name);
437
438 unlock:
439 virObjectUnlock(vm);
440 virObjectEventStateQueue(driver->domainEventState, event);
441 }
442
443
444 /*
445 * Since we have the '-no-shutdown' flag set, the
446 * QEMU process will currently have guest OS shutdown
447 * and the CPUS stopped. To fake the reboot, we thus
448 * want todo a reset of the virtual hardware, followed
449 * by restart of the CPUs. This should result in the
450 * guest OS booting up again
451 */
452 static void
qemuProcessFakeReboot(void * opaque)453 qemuProcessFakeReboot(void *opaque)
454 {
455 virDomainObj *vm = opaque;
456 qemuDomainObjPrivate *priv = vm->privateData;
457 virQEMUDriver *driver = priv->driver;
458 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
459 virDomainRunningReason reason = VIR_DOMAIN_RUNNING_BOOTED;
460 int ret = -1, rc;
461
462 VIR_DEBUG("vm=%p", vm);
463 virObjectLock(vm);
464 if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
465 goto cleanup;
466
467 if (!virDomainObjIsActive(vm)) {
468 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
469 _("guest unexpectedly quit"));
470 goto endjob;
471 }
472
473 qemuDomainObjEnterMonitor(driver, vm);
474 rc = qemuMonitorSystemReset(priv->mon);
475
476 if (qemuDomainObjExitMonitor(driver, vm) < 0)
477 goto endjob;
478
479 if (rc < 0)
480 goto endjob;
481
482 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_CRASHED)
483 reason = VIR_DOMAIN_RUNNING_CRASHED;
484
485 if (qemuProcessStartCPUs(driver, vm,
486 reason,
487 QEMU_ASYNC_JOB_NONE) < 0) {
488 if (virGetLastErrorCode() == VIR_ERR_OK)
489 virReportError(VIR_ERR_INTERNAL_ERROR,
490 "%s", _("resume operation failed"));
491 goto endjob;
492 }
493
494 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
495 VIR_WARN("Unable to save status on vm %s after state change",
496 vm->def->name);
497 }
498
499 ret = 0;
500
501 endjob:
502 qemuDomainObjEndJob(driver, vm);
503
504 cleanup:
505 priv->pausedShutdown = false;
506 qemuDomainSetFakeReboot(driver, vm, false);
507 if (ret == -1)
508 ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_FORCE));
509 virDomainObjEndAPI(&vm);
510 }
511
512
513 void
qemuProcessShutdownOrReboot(virQEMUDriver * driver,virDomainObj * vm)514 qemuProcessShutdownOrReboot(virQEMUDriver *driver,
515 virDomainObj *vm)
516 {
517 qemuDomainObjPrivate *priv = vm->privateData;
518
519 if (priv->fakeReboot ||
520 vm->def->onPoweroff == VIR_DOMAIN_LIFECYCLE_ACTION_RESTART) {
521 g_autofree char *name = g_strdup_printf("reboot-%s", vm->def->name);
522 virThread th;
523
524 virObjectRef(vm);
525 if (virThreadCreateFull(&th,
526 false,
527 qemuProcessFakeReboot,
528 name,
529 false,
530 vm) < 0) {
531 VIR_ERROR(_("Failed to create reboot thread, killing domain"));
532 ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
533 priv->pausedShutdown = false;
534 qemuDomainSetFakeReboot(driver, vm, false);
535 virObjectUnref(vm);
536 }
537 } else {
538 ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
539 }
540 }
541
542
543 static void
qemuProcessHandleEvent(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * eventName,long long seconds,unsigned int micros,const char * details,void * opaque)544 qemuProcessHandleEvent(qemuMonitor *mon G_GNUC_UNUSED,
545 virDomainObj *vm,
546 const char *eventName,
547 long long seconds,
548 unsigned int micros,
549 const char *details,
550 void *opaque)
551 {
552 virQEMUDriver *driver = opaque;
553 virObjectEvent *event = NULL;
554
555 VIR_DEBUG("vm=%p", vm);
556
557 virObjectLock(vm);
558 event = virDomainQemuMonitorEventNew(vm->def->id, vm->def->name,
559 vm->def->uuid, eventName,
560 seconds, micros, details);
561
562 virObjectUnlock(vm);
563 virObjectEventStateQueue(driver->domainEventState, event);
564 }
565
566
567 static void
qemuProcessHandleShutdown(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,virTristateBool guest_initiated,void * opaque)568 qemuProcessHandleShutdown(qemuMonitor *mon G_GNUC_UNUSED,
569 virDomainObj *vm,
570 virTristateBool guest_initiated,
571 void *opaque)
572 {
573 virQEMUDriver *driver = opaque;
574 qemuDomainObjPrivate *priv;
575 virObjectEvent *event = NULL;
576 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
577 int detail = 0;
578
579 VIR_DEBUG("vm=%p", vm);
580
581 virObjectLock(vm);
582
583 priv = vm->privateData;
584 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_SHUTDOWN) {
585 VIR_DEBUG("Ignoring repeated SHUTDOWN event from domain %s",
586 vm->def->name);
587 goto unlock;
588 } else if (!virDomainObjIsActive(vm)) {
589 VIR_DEBUG("Ignoring SHUTDOWN event from inactive domain %s",
590 vm->def->name);
591 goto unlock;
592 }
593
594 /* In case of fake reboot qemu shutdown state is transient so don't
595 * change domain state nor send events. */
596 if (!priv->fakeReboot &&
597 vm->def->onPoweroff != VIR_DOMAIN_LIFECYCLE_ACTION_RESTART) {
598 VIR_DEBUG("Transitioned guest %s to shutdown state",
599 vm->def->name);
600 virDomainObjSetState(vm,
601 VIR_DOMAIN_SHUTDOWN,
602 VIR_DOMAIN_SHUTDOWN_UNKNOWN);
603
604 switch (guest_initiated) {
605 case VIR_TRISTATE_BOOL_YES:
606 detail = VIR_DOMAIN_EVENT_SHUTDOWN_GUEST;
607 break;
608
609 case VIR_TRISTATE_BOOL_NO:
610 detail = VIR_DOMAIN_EVENT_SHUTDOWN_HOST;
611 break;
612
613 case VIR_TRISTATE_BOOL_ABSENT:
614 case VIR_TRISTATE_BOOL_LAST:
615 default:
616 detail = VIR_DOMAIN_EVENT_SHUTDOWN_FINISHED;
617 break;
618 }
619
620 event = virDomainEventLifecycleNewFromObj(vm,
621 VIR_DOMAIN_EVENT_SHUTDOWN,
622 detail);
623
624 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
625 VIR_WARN("Unable to save status on vm %s after state change",
626 vm->def->name);
627 }
628 } else {
629 priv->pausedShutdown = true;
630 }
631
632 if (priv->agent)
633 qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SHUTDOWN);
634
635 qemuProcessShutdownOrReboot(driver, vm);
636
637 unlock:
638 virObjectUnlock(vm);
639 virObjectEventStateQueue(driver->domainEventState, event);
640 }
641
642
643 static void
qemuProcessHandleStop(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)644 qemuProcessHandleStop(qemuMonitor *mon G_GNUC_UNUSED,
645 virDomainObj *vm,
646 void *opaque)
647 {
648 virQEMUDriver *driver = opaque;
649 virObjectEvent *event = NULL;
650 virDomainPausedReason reason;
651 virDomainEventSuspendedDetailType detail;
652 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
653 qemuDomainObjPrivate *priv = vm->privateData;
654
655 virObjectLock(vm);
656
657 reason = priv->pausedReason;
658 priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN;
659
660 /* In case of fake reboot qemu paused state is transient so don't
661 * reveal it in domain state nor sent events */
662 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING &&
663 !priv->pausedShutdown) {
664 if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) {
665 if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY)
666 reason = VIR_DOMAIN_PAUSED_POSTCOPY;
667 else
668 reason = VIR_DOMAIN_PAUSED_MIGRATION;
669 }
670
671 detail = qemuDomainPausedReasonToSuspendedEvent(reason);
672 VIR_DEBUG("Transitioned guest %s to paused state, "
673 "reason %s, event detail %d",
674 vm->def->name, virDomainPausedReasonTypeToString(reason),
675 detail);
676
677 if (priv->job.current)
678 ignore_value(virTimeMillisNow(&priv->job.current->stopped));
679
680 if (priv->signalStop)
681 virDomainObjBroadcast(vm);
682
683 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
684 event = virDomainEventLifecycleNewFromObj(vm,
685 VIR_DOMAIN_EVENT_SUSPENDED,
686 detail);
687
688 VIR_FREE(priv->lockState);
689 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
690 VIR_WARN("Unable to release lease on %s", vm->def->name);
691 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
692
693 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
694 VIR_WARN("Unable to save status on vm %s after state change",
695 vm->def->name);
696 }
697 }
698
699 virObjectUnlock(vm);
700 virObjectEventStateQueue(driver->domainEventState, event);
701 }
702
703
704 static void
qemuProcessHandleResume(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)705 qemuProcessHandleResume(qemuMonitor *mon G_GNUC_UNUSED,
706 virDomainObj *vm,
707 void *opaque)
708 {
709 virQEMUDriver *driver = opaque;
710 virObjectEvent *event = NULL;
711 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
712 qemuDomainObjPrivate *priv;
713 virDomainRunningReason reason = VIR_DOMAIN_RUNNING_UNPAUSED;
714 virDomainEventResumedDetailType eventDetail;
715
716 virObjectLock(vm);
717
718 priv = vm->privateData;
719 if (priv->runningReason != VIR_DOMAIN_RUNNING_UNKNOWN) {
720 reason = priv->runningReason;
721 priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
722 }
723
724 if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) {
725 eventDetail = qemuDomainRunningReasonToResumeEvent(reason);
726 VIR_DEBUG("Transitioned guest %s into running state, reason '%s', "
727 "event detail %d",
728 vm->def->name, virDomainRunningReasonTypeToString(reason),
729 eventDetail);
730
731 virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
732 event = virDomainEventLifecycleNewFromObj(vm,
733 VIR_DOMAIN_EVENT_RESUMED,
734 eventDetail);
735
736 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
737 VIR_WARN("Unable to save status on vm %s after state change",
738 vm->def->name);
739 }
740 }
741
742 virObjectUnlock(vm);
743 virObjectEventStateQueue(driver->domainEventState, event);
744 }
745
746 static void
qemuProcessHandleRTCChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,long long offset,void * opaque)747 qemuProcessHandleRTCChange(qemuMonitor *mon G_GNUC_UNUSED,
748 virDomainObj *vm,
749 long long offset,
750 void *opaque)
751 {
752 virQEMUDriver *driver = opaque;
753 virObjectEvent *event = NULL;
754 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
755
756 virObjectLock(vm);
757
758 if (vm->def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_VARIABLE) {
759 /* when a basedate is manually given on the qemu commandline
760 * rather than simply "-rtc base=utc", the offset sent by qemu
761 * in this event is *not* the new offset from UTC, but is
762 * instead the new offset from the *original basedate* +
763 * uptime. For example, if the original offset was 3600 and
764 * the guest clock has been advanced by 10 seconds, qemu will
765 * send "10" in the event - this means that the new offset
766 * from UTC is 3610, *not* 10. If the guest clock is advanced
767 * by another 10 seconds, qemu will now send "20" - i.e. each
768 * event is the sum of the most recent change and all previous
769 * changes since the domain was started. Fortunately, we have
770 * saved the initial offset in "adjustment0", so to arrive at
771 * the proper new "adjustment", we just add the most recent
772 * offset to adjustment0.
773 */
774 offset += vm->def->clock.data.variable.adjustment0;
775 vm->def->clock.data.variable.adjustment = offset;
776
777 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
778 VIR_WARN("unable to save domain status with RTC change");
779 }
780
781 event = virDomainEventRTCChangeNewFromObj(vm, offset);
782
783 virObjectUnlock(vm);
784
785 virObjectEventStateQueue(driver->domainEventState, event);
786 }
787
788
789 static void
qemuProcessHandleWatchdog(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int action,void * opaque)790 qemuProcessHandleWatchdog(qemuMonitor *mon G_GNUC_UNUSED,
791 virDomainObj *vm,
792 int action,
793 void *opaque)
794 {
795 virQEMUDriver *driver = opaque;
796 virObjectEvent *watchdogEvent = NULL;
797 virObjectEvent *lifecycleEvent = NULL;
798 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
799
800 virObjectLock(vm);
801 watchdogEvent = virDomainEventWatchdogNewFromObj(vm, action);
802
803 if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE &&
804 virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
805 qemuDomainObjPrivate *priv = vm->privateData;
806 VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name);
807
808 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_WATCHDOG);
809 lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
810 VIR_DOMAIN_EVENT_SUSPENDED,
811 VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG);
812
813 VIR_FREE(priv->lockState);
814 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
815 VIR_WARN("Unable to release lease on %s", vm->def->name);
816 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
817
818 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
819 VIR_WARN("Unable to save status on vm %s after watchdog event",
820 vm->def->name);
821 }
822 }
823
824 if (vm->def->watchdog->action == VIR_DOMAIN_WATCHDOG_ACTION_DUMP) {
825 struct qemuProcessEvent *processEvent;
826 processEvent = g_new0(struct qemuProcessEvent, 1);
827
828 processEvent->eventType = QEMU_PROCESS_EVENT_WATCHDOG;
829 processEvent->action = VIR_DOMAIN_WATCHDOG_ACTION_DUMP;
830 /* Hold an extra reference because we can't allow 'vm' to be
831 * deleted before handling watchdog event is finished.
832 */
833 processEvent->vm = virObjectRef(vm);
834
835 qemuProcessEventSubmit(driver, &processEvent);
836 }
837
838 virObjectUnlock(vm);
839 virObjectEventStateQueue(driver->domainEventState, watchdogEvent);
840 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
841 }
842
843
844 static void
qemuProcessHandleIOError(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * diskAlias,const char * nodename,int action,const char * reason,void * opaque)845 qemuProcessHandleIOError(qemuMonitor *mon G_GNUC_UNUSED,
846 virDomainObj *vm,
847 const char *diskAlias,
848 const char *nodename,
849 int action,
850 const char *reason,
851 void *opaque)
852 {
853 virQEMUDriver *driver = opaque;
854 virObjectEvent *ioErrorEvent = NULL;
855 virObjectEvent *ioErrorEvent2 = NULL;
856 virObjectEvent *lifecycleEvent = NULL;
857 const char *srcPath;
858 const char *devAlias;
859 virDomainDiskDef *disk;
860 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
861
862 virObjectLock(vm);
863
864 if (*diskAlias == '\0')
865 diskAlias = NULL;
866
867 if (diskAlias)
868 disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, diskAlias, NULL);
869 else if (nodename)
870 disk = qemuDomainDiskLookupByNodename(vm->def, NULL, nodename, NULL);
871 else
872 disk = NULL;
873
874 if (disk) {
875 srcPath = virDomainDiskGetSource(disk);
876 devAlias = disk->info.alias;
877 } else {
878 srcPath = "";
879 devAlias = "";
880 }
881
882 ioErrorEvent = virDomainEventIOErrorNewFromObj(vm, srcPath, devAlias, action);
883 ioErrorEvent2 = virDomainEventIOErrorReasonNewFromObj(vm, srcPath, devAlias, action, reason);
884
885 if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE &&
886 virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
887 qemuDomainObjPrivate *priv = vm->privateData;
888 VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name);
889
890 if (priv->signalIOError)
891 virDomainObjBroadcast(vm);
892
893 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_IOERROR);
894 lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
895 VIR_DOMAIN_EVENT_SUSPENDED,
896 VIR_DOMAIN_EVENT_SUSPENDED_IOERROR);
897
898 VIR_FREE(priv->lockState);
899 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
900 VIR_WARN("Unable to release lease on %s", vm->def->name);
901 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
902
903 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
904 VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name);
905 }
906 virObjectUnlock(vm);
907
908 virObjectEventStateQueue(driver->domainEventState, ioErrorEvent);
909 virObjectEventStateQueue(driver->domainEventState, ioErrorEvent2);
910 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
911 }
912
913 static void
qemuProcessHandleBlockJob(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * diskAlias,int type,int status,const char * error,void * opaque)914 qemuProcessHandleBlockJob(qemuMonitor *mon G_GNUC_UNUSED,
915 virDomainObj *vm,
916 const char *diskAlias,
917 int type,
918 int status,
919 const char *error,
920 void *opaque)
921 {
922 qemuDomainObjPrivate *priv;
923 virQEMUDriver *driver = opaque;
924 virDomainDiskDef *disk;
925 g_autoptr(qemuBlockJobData) job = NULL;
926 char *data = NULL;
927
928 virObjectLock(vm);
929
930 priv = vm->privateData;
931
932 /* with QEMU_CAPS_BLOCKDEV we handle block job events via JOB_STATUS_CHANGE */
933 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV))
934 goto cleanup;
935
936 VIR_DEBUG("Block job for device %s (domain: %p,%s) type %d status %d",
937 diskAlias, vm, vm->def->name, type, status);
938
939 if (!(disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, diskAlias, NULL)))
940 goto cleanup;
941
942 job = qemuBlockJobDiskGetJob(disk);
943
944 if (job && job->synchronous) {
945 /* We have a SYNC API waiting for this event, dispatch it back */
946 job->newstate = status;
947 VIR_FREE(job->errmsg);
948 job->errmsg = g_strdup(error);
949 virDomainObjBroadcast(vm);
950 } else {
951 /* there is no waiting SYNC API, dispatch the update to a thread */
952 struct qemuProcessEvent *processEvent = g_new0(struct qemuProcessEvent, 1);
953
954 processEvent->eventType = QEMU_PROCESS_EVENT_BLOCK_JOB;
955 data = g_strdup(diskAlias);
956 processEvent->data = data;
957 processEvent->vm = virObjectRef(vm);
958 processEvent->action = type;
959 processEvent->status = status;
960
961 qemuProcessEventSubmit(driver, &processEvent);
962 }
963
964 cleanup:
965 virObjectUnlock(vm);
966 }
967
968
969 static void
qemuProcessHandleJobStatusChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * jobname,int status,void * opaque)970 qemuProcessHandleJobStatusChange(qemuMonitor *mon G_GNUC_UNUSED,
971 virDomainObj *vm,
972 const char *jobname,
973 int status,
974 void *opaque)
975 {
976 virQEMUDriver *driver = opaque;
977 qemuDomainObjPrivate *priv;
978 qemuBlockJobData *job = NULL;
979 int jobnewstate;
980
981 virObjectLock(vm);
982 priv = vm->privateData;
983
984 VIR_DEBUG("job '%s'(domain: %p,%s) state changed to '%s'(%d)",
985 jobname, vm, vm->def->name,
986 qemuMonitorJobStatusTypeToString(status), status);
987
988 if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV)) {
989 VIR_DEBUG("job '%s' handled by old blockjob handler", jobname);
990 goto cleanup;
991 }
992
993 if ((jobnewstate = qemuBlockjobConvertMonitorStatus(status)) == QEMU_BLOCKJOB_STATE_LAST)
994 goto cleanup;
995
996 if (!(job = virHashLookup(priv->blockjobs, jobname))) {
997 VIR_DEBUG("job '%s' not registered", jobname);
998 goto cleanup;
999 }
1000
1001 job->newstate = jobnewstate;
1002
1003 if (job->synchronous) {
1004 VIR_DEBUG("job '%s' handled synchronously", jobname);
1005 virDomainObjBroadcast(vm);
1006 } else {
1007 struct qemuProcessEvent *processEvent = g_new0(struct qemuProcessEvent, 1);
1008
1009 VIR_DEBUG("job '%s' handled by event thread", jobname);
1010
1011 processEvent->eventType = QEMU_PROCESS_EVENT_JOB_STATUS_CHANGE;
1012 processEvent->vm = virObjectRef(vm);
1013 processEvent->data = virObjectRef(job);
1014
1015 qemuProcessEventSubmit(driver, &processEvent);
1016 }
1017
1018 cleanup:
1019 virObjectUnlock(vm);
1020 }
1021
1022
1023 static void
qemuProcessHandleGraphics(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int phase,int localFamily,const char * localNode,const char * localService,int remoteFamily,const char * remoteNode,const char * remoteService,const char * authScheme,const char * x509dname,const char * saslUsername,void * opaque)1024 qemuProcessHandleGraphics(qemuMonitor *mon G_GNUC_UNUSED,
1025 virDomainObj *vm,
1026 int phase,
1027 int localFamily,
1028 const char *localNode,
1029 const char *localService,
1030 int remoteFamily,
1031 const char *remoteNode,
1032 const char *remoteService,
1033 const char *authScheme,
1034 const char *x509dname,
1035 const char *saslUsername,
1036 void *opaque)
1037 {
1038 virQEMUDriver *driver = opaque;
1039 virObjectEvent *event;
1040 virDomainEventGraphicsAddressPtr localAddr = NULL;
1041 virDomainEventGraphicsAddressPtr remoteAddr = NULL;
1042 virDomainEventGraphicsSubjectPtr subject = NULL;
1043
1044 localAddr = g_new0(virDomainEventGraphicsAddress, 1);
1045 localAddr->family = localFamily;
1046 localAddr->service = g_strdup(localService);
1047 localAddr->node = g_strdup(localNode);
1048
1049 remoteAddr = g_new0(virDomainEventGraphicsAddress, 1);
1050 remoteAddr->family = remoteFamily;
1051 remoteAddr->service = g_strdup(remoteService);
1052 remoteAddr->node = g_strdup(remoteNode);
1053
1054 subject = g_new0(virDomainEventGraphicsSubject, 1);
1055 if (x509dname) {
1056 VIR_REALLOC_N(subject->identities, subject->nidentity+1);
1057 subject->nidentity++;
1058 subject->identities[subject->nidentity - 1].type = g_strdup("x509dname");
1059 subject->identities[subject->nidentity - 1].name = g_strdup(x509dname);
1060 }
1061 if (saslUsername) {
1062 VIR_REALLOC_N(subject->identities, subject->nidentity+1);
1063 subject->nidentity++;
1064 subject->identities[subject->nidentity - 1].type = g_strdup("saslUsername");
1065 subject->identities[subject->nidentity - 1].name = g_strdup(saslUsername);
1066 }
1067
1068 virObjectLock(vm);
1069 event = virDomainEventGraphicsNewFromObj(vm, phase, localAddr, remoteAddr, authScheme, subject);
1070 virObjectUnlock(vm);
1071
1072 virObjectEventStateQueue(driver->domainEventState, event);
1073 }
1074
1075 static void
qemuProcessHandleTrayChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,const char * devid,int reason,void * opaque)1076 qemuProcessHandleTrayChange(qemuMonitor *mon G_GNUC_UNUSED,
1077 virDomainObj *vm,
1078 const char *devAlias,
1079 const char *devid,
1080 int reason,
1081 void *opaque)
1082 {
1083 virQEMUDriver *driver = opaque;
1084 virObjectEvent *event = NULL;
1085 virDomainDiskDef *disk;
1086 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1087
1088 virObjectLock(vm);
1089 disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, devAlias, devid);
1090
1091 if (disk) {
1092 event = virDomainEventTrayChangeNewFromObj(vm, disk->info.alias, reason);
1093 /* Update disk tray status */
1094 if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_OPEN)
1095 disk->tray_status = VIR_DOMAIN_DISK_TRAY_OPEN;
1096 else if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_CLOSE)
1097 disk->tray_status = VIR_DOMAIN_DISK_TRAY_CLOSED;
1098
1099 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1100 VIR_WARN("Unable to save status on vm %s after tray moved event",
1101 vm->def->name);
1102 }
1103
1104 virDomainObjBroadcast(vm);
1105 }
1106
1107 virObjectUnlock(vm);
1108 virObjectEventStateQueue(driver->domainEventState, event);
1109 }
1110
1111 static void
qemuProcessHandlePMWakeup(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)1112 qemuProcessHandlePMWakeup(qemuMonitor *mon G_GNUC_UNUSED,
1113 virDomainObj *vm,
1114 void *opaque)
1115 {
1116 virQEMUDriver *driver = opaque;
1117 virObjectEvent *event = NULL;
1118 virObjectEvent *lifecycleEvent = NULL;
1119 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1120
1121 virObjectLock(vm);
1122 event = virDomainEventPMWakeupNewFromObj(vm);
1123
1124 /* Don't set domain status back to running if it wasn't paused
1125 * from guest side, otherwise it can just cause confusion.
1126 */
1127 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PMSUSPENDED) {
1128 VIR_DEBUG("Transitioned guest %s from pmsuspended to running "
1129 "state due to QMP wakeup event", vm->def->name);
1130
1131 virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
1132 VIR_DOMAIN_RUNNING_WAKEUP);
1133 lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
1134 VIR_DOMAIN_EVENT_STARTED,
1135 VIR_DOMAIN_EVENT_STARTED_WAKEUP);
1136
1137 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1138 VIR_WARN("Unable to save status on vm %s after wakeup event",
1139 vm->def->name);
1140 }
1141 }
1142
1143 virObjectUnlock(vm);
1144 virObjectEventStateQueue(driver->domainEventState, event);
1145 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
1146 }
1147
1148 static void
qemuProcessHandlePMSuspend(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)1149 qemuProcessHandlePMSuspend(qemuMonitor *mon G_GNUC_UNUSED,
1150 virDomainObj *vm,
1151 void *opaque)
1152 {
1153 virQEMUDriver *driver = opaque;
1154 virObjectEvent *event = NULL;
1155 virObjectEvent *lifecycleEvent = NULL;
1156 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1157
1158 virObjectLock(vm);
1159 event = virDomainEventPMSuspendNewFromObj(vm);
1160
1161 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
1162 qemuDomainObjPrivate *priv = vm->privateData;
1163 VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
1164 "QMP suspend event", vm->def->name);
1165
1166 virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
1167 VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
1168 lifecycleEvent =
1169 virDomainEventLifecycleNewFromObj(vm,
1170 VIR_DOMAIN_EVENT_PMSUSPENDED,
1171 VIR_DOMAIN_EVENT_PMSUSPENDED_MEMORY);
1172
1173 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1174 VIR_WARN("Unable to save status on vm %s after suspend event",
1175 vm->def->name);
1176 }
1177
1178 if (priv->agent)
1179 qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
1180 }
1181
1182 virObjectUnlock(vm);
1183
1184 virObjectEventStateQueue(driver->domainEventState, event);
1185 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
1186 }
1187
1188 static void
qemuProcessHandleBalloonChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,unsigned long long actual,void * opaque)1189 qemuProcessHandleBalloonChange(qemuMonitor *mon G_GNUC_UNUSED,
1190 virDomainObj *vm,
1191 unsigned long long actual,
1192 void *opaque)
1193 {
1194 virQEMUDriver *driver = opaque;
1195 virObjectEvent *event = NULL;
1196 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1197 size_t i;
1198
1199 virObjectLock(vm);
1200 event = virDomainEventBalloonChangeNewFromObj(vm, actual);
1201
1202 /* We want the balloon size stored in domain definition to
1203 * account for the actual size of virtio-mem too. But the
1204 * balloon size as reported by QEMU (@actual) contains just
1205 * the balloon size without any virtio-mem. Do a wee bit of
1206 * math to fix it. */
1207 VIR_DEBUG("balloon size before fix is %lld", actual);
1208 for (i = 0; i < vm->def->nmems; i++) {
1209 if (vm->def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM)
1210 actual += vm->def->mems[i]->currentsize;
1211 }
1212
1213 VIR_DEBUG("Updating balloon from %lld to %lld kb",
1214 vm->def->mem.cur_balloon, actual);
1215 vm->def->mem.cur_balloon = actual;
1216
1217 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
1218 VIR_WARN("unable to save domain status with balloon change");
1219
1220 virObjectUnlock(vm);
1221
1222 virObjectEventStateQueue(driver->domainEventState, event);
1223 }
1224
1225 static void
qemuProcessHandlePMSuspendDisk(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)1226 qemuProcessHandlePMSuspendDisk(qemuMonitor *mon G_GNUC_UNUSED,
1227 virDomainObj *vm,
1228 void *opaque)
1229 {
1230 virQEMUDriver *driver = opaque;
1231 virObjectEvent *event = NULL;
1232 virObjectEvent *lifecycleEvent = NULL;
1233 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1234
1235 virObjectLock(vm);
1236 event = virDomainEventPMSuspendDiskNewFromObj(vm);
1237
1238 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
1239 qemuDomainObjPrivate *priv = vm->privateData;
1240 VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
1241 "QMP suspend_disk event", vm->def->name);
1242
1243 virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
1244 VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
1245 lifecycleEvent =
1246 virDomainEventLifecycleNewFromObj(vm,
1247 VIR_DOMAIN_EVENT_PMSUSPENDED,
1248 VIR_DOMAIN_EVENT_PMSUSPENDED_DISK);
1249
1250 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1251 VIR_WARN("Unable to save status on vm %s after suspend event",
1252 vm->def->name);
1253 }
1254
1255 if (priv->agent)
1256 qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
1257 }
1258
1259 virObjectUnlock(vm);
1260
1261 virObjectEventStateQueue(driver->domainEventState, event);
1262 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
1263 }
1264
1265
1266 static void
qemuProcessHandleGuestPanic(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,qemuMonitorEventPanicInfo * info,void * opaque)1267 qemuProcessHandleGuestPanic(qemuMonitor *mon G_GNUC_UNUSED,
1268 virDomainObj *vm,
1269 qemuMonitorEventPanicInfo *info,
1270 void *opaque)
1271 {
1272 virQEMUDriver *driver = opaque;
1273 struct qemuProcessEvent *processEvent;
1274
1275 virObjectLock(vm);
1276 processEvent = g_new0(struct qemuProcessEvent, 1);
1277
1278 processEvent->eventType = QEMU_PROCESS_EVENT_GUESTPANIC;
1279 processEvent->action = vm->def->onCrash;
1280 processEvent->data = info;
1281 /* Hold an extra reference because we can't allow 'vm' to be
1282 * deleted before handling guest panic event is finished.
1283 */
1284 processEvent->vm = virObjectRef(vm);
1285
1286 qemuProcessEventSubmit(driver, &processEvent);
1287
1288 virObjectUnlock(vm);
1289 }
1290
1291
1292 void
qemuProcessHandleDeviceDeleted(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,void * opaque)1293 qemuProcessHandleDeviceDeleted(qemuMonitor *mon G_GNUC_UNUSED,
1294 virDomainObj *vm,
1295 const char *devAlias,
1296 void *opaque)
1297 {
1298 virQEMUDriver *driver = opaque;
1299 struct qemuProcessEvent *processEvent = NULL;
1300 char *data;
1301
1302 virObjectLock(vm);
1303
1304 VIR_DEBUG("Device %s removed from domain %p %s",
1305 devAlias, vm, vm->def->name);
1306
1307 if (qemuDomainSignalDeviceRemoval(vm, devAlias,
1308 QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_OK))
1309 goto cleanup;
1310
1311 processEvent = g_new0(struct qemuProcessEvent, 1);
1312
1313 processEvent->eventType = QEMU_PROCESS_EVENT_DEVICE_DELETED;
1314 data = g_strdup(devAlias);
1315 processEvent->data = data;
1316 processEvent->vm = virObjectRef(vm);
1317
1318 qemuProcessEventSubmit(driver, &processEvent);
1319
1320 cleanup:
1321 virObjectUnlock(vm);
1322 }
1323
1324
1325 static void
qemuProcessHandleDeviceUnplugErr(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devPath,const char * devAlias,void * opaque)1326 qemuProcessHandleDeviceUnplugErr(qemuMonitor *mon G_GNUC_UNUSED,
1327 virDomainObj *vm,
1328 const char *devPath,
1329 const char *devAlias,
1330 void *opaque)
1331 {
1332 virQEMUDriver *driver = opaque;
1333 virObjectEvent *event = NULL;
1334
1335 virObjectLock(vm);
1336
1337 VIR_DEBUG("Device %s QOM path %s failed to be removed from domain %p %s",
1338 devAlias, devPath, vm, vm->def->name);
1339
1340 /*
1341 * DEVICE_UNPLUG_GUEST_ERROR will always contain the QOM path
1342 * but QEMU will not guarantee that devAlias will be provided.
1343 *
1344 * However, given that all Libvirt devices have a devAlias, we
1345 * can ignore the case where QEMU emitted this event without it.
1346 */
1347 if (!devAlias)
1348 goto cleanup;
1349
1350 qemuDomainSignalDeviceRemoval(vm, devAlias,
1351 QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_GUEST_REJECTED);
1352
1353 event = virDomainEventDeviceRemovalFailedNewFromObj(vm, devAlias);
1354
1355 cleanup:
1356 virObjectUnlock(vm);
1357 virObjectEventStateQueue(driver->domainEventState, event);
1358 }
1359
1360
1361 /**
1362 *
1363 * Meaning of fields reported by the event according to the ACPI standard:
1364 * @source:
1365 * 0x00 - 0xff: Notification values, as passed at the request time
1366 * 0x100: Operating System Shutdown Processing
1367 * 0x103: Ejection processing
1368 * 0x200: Insertion processing
1369 * other values are reserved
1370 *
1371 * @status:
1372 * general values
1373 * 0x00: success
1374 * 0x01: non-specific failure
1375 * 0x02: unrecognized notify code
1376 * 0x03 - 0x7f: reserved
1377 * other values are specific to the notification type (see below)
1378 *
1379 * for the 0x100 source the following additional codes are standardized:
1380 * 0x80: OS Shutdown request denied
1381 * 0x81: OS Shutdown in progress
1382 * 0x82: OS Shutdown completed
1383 * 0x83: OS Graceful shutdown not supported
1384 * other higher values are reserved
1385 *
1386 * for the 0x003 (Ejection request) and 0x103 (Ejection processing) source
1387 * the following additional codes are standardized:
1388 * 0x80: Device ejection not supported by OSPM
1389 * 0x81: Device in use by application
1390 * 0x82: Device Busy
1391 * 0x83: Ejection dependency is busy or not supported for ejection by OSPM
1392 * 0x84: Ejection is in progress (pending)
1393 * other higher values are reserved
1394 *
1395 * for the 0x200 source the following additional codes are standardized:
1396 * 0x80: Device insertion in progress (pending)
1397 * 0x81: Device driver load failure
1398 * 0x82: Device insertion not supported by OSPM
1399 * 0x83-0x8F: Reserved
1400 * 0x90-0x9F: Insertion failure - Resources Unavailable as described by the
1401 * following bit encodings:
1402 * Bit [3]: Bus or Segment Numbers
1403 * Bit [2]: Interrupts
1404 * Bit [1]: I/O
1405 * Bit [0]: Memory
1406 * other higher values are reserved
1407 *
1408 * Other fields and semantics are specific to the qemu handling of the event.
1409 * - @alias may be NULL for successful unplug operations
1410 * - @slotType describes the device type a bit more closely, currently the
1411 * only known value is 'DIMM'
1412 * - @slot describes the specific device
1413 *
1414 * Note that qemu does not emit the event for all the documented sources or
1415 * devices.
1416 */
1417 static void
qemuProcessHandleAcpiOstInfo(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * alias,const char * slotType,const char * slot,unsigned int source,unsigned int status,void * opaque)1418 qemuProcessHandleAcpiOstInfo(qemuMonitor *mon G_GNUC_UNUSED,
1419 virDomainObj *vm,
1420 const char *alias,
1421 const char *slotType,
1422 const char *slot,
1423 unsigned int source,
1424 unsigned int status,
1425 void *opaque)
1426 {
1427 virQEMUDriver *driver = opaque;
1428 virObjectEvent *event = NULL;
1429
1430 virObjectLock(vm);
1431
1432 VIR_DEBUG("ACPI OST info for device %s domain %p %s. "
1433 "slotType='%s' slot='%s' source=%u status=%u",
1434 NULLSTR(alias), vm, vm->def->name, slotType, slot, source, status);
1435
1436 if (!alias)
1437 goto cleanup;
1438
1439 if (STREQ(slotType, "DIMM")) {
1440 if ((source == 0x003 || source == 0x103) &&
1441 (status == 0x01 || (status >= 0x80 && status <= 0x83))) {
1442 qemuDomainSignalDeviceRemoval(vm, alias,
1443 QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_GUEST_REJECTED);
1444
1445 event = virDomainEventDeviceRemovalFailedNewFromObj(vm, alias);
1446 }
1447 }
1448
1449 cleanup:
1450 virObjectUnlock(vm);
1451 virObjectEventStateQueue(driver->domainEventState, event);
1452 }
1453
1454
1455 static void
qemuProcessHandleBlockThreshold(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * nodename,unsigned long long threshold,unsigned long long excess,void * opaque)1456 qemuProcessHandleBlockThreshold(qemuMonitor *mon G_GNUC_UNUSED,
1457 virDomainObj *vm,
1458 const char *nodename,
1459 unsigned long long threshold,
1460 unsigned long long excess,
1461 void *opaque)
1462 {
1463 qemuDomainObjPrivate *priv;
1464 virQEMUDriver *driver = opaque;
1465 virObjectEvent *eventSource = NULL;
1466 virObjectEvent *eventDevice = NULL;
1467 virDomainDiskDef *disk;
1468 virStorageSource *src;
1469 const char *path = NULL;
1470
1471 virObjectLock(vm);
1472
1473 priv = vm->privateData;
1474
1475 VIR_DEBUG("BLOCK_WRITE_THRESHOLD event for block node '%s' in domain %p %s:"
1476 "threshold '%llu' exceeded by '%llu'",
1477 nodename, vm, vm->def->name, threshold, excess);
1478
1479 if ((disk = qemuDomainDiskLookupByNodename(vm->def, priv->backup, nodename, &src))) {
1480 if (virStorageSourceIsLocalStorage(src))
1481 path = src->path;
1482
1483 if (src == disk->src &&
1484 !src->thresholdEventWithIndex) {
1485 g_autofree char *dev = qemuDomainDiskBackingStoreGetName(disk, 0);
1486
1487 eventDevice = virDomainEventBlockThresholdNewFromObj(vm, dev, path,
1488 threshold, excess);
1489 }
1490
1491 if (src->id != 0) {
1492 g_autofree char *dev = qemuDomainDiskBackingStoreGetName(disk, src->id);
1493
1494 eventSource = virDomainEventBlockThresholdNewFromObj(vm, dev, path,
1495 threshold, excess);
1496 }
1497 }
1498
1499 virObjectUnlock(vm);
1500 virObjectEventStateQueue(driver->domainEventState, eventDevice);
1501 virObjectEventStateQueue(driver->domainEventState, eventSource);
1502 }
1503
1504
1505 static void
qemuProcessHandleNicRxFilterChanged(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,void * opaque)1506 qemuProcessHandleNicRxFilterChanged(qemuMonitor *mon G_GNUC_UNUSED,
1507 virDomainObj *vm,
1508 const char *devAlias,
1509 void *opaque)
1510 {
1511 virQEMUDriver *driver = opaque;
1512 struct qemuProcessEvent *processEvent = NULL;
1513 char *data;
1514
1515 virObjectLock(vm);
1516
1517 VIR_DEBUG("Device %s RX Filter changed in domain %p %s",
1518 devAlias, vm, vm->def->name);
1519
1520 processEvent = g_new0(struct qemuProcessEvent, 1);
1521
1522 processEvent->eventType = QEMU_PROCESS_EVENT_NIC_RX_FILTER_CHANGED;
1523 data = g_strdup(devAlias);
1524 processEvent->data = data;
1525 processEvent->vm = virObjectRef(vm);
1526
1527 qemuProcessEventSubmit(driver, &processEvent);
1528
1529 virObjectUnlock(vm);
1530 }
1531
1532
1533 static void
qemuProcessHandleSerialChanged(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,bool connected,void * opaque)1534 qemuProcessHandleSerialChanged(qemuMonitor *mon G_GNUC_UNUSED,
1535 virDomainObj *vm,
1536 const char *devAlias,
1537 bool connected,
1538 void *opaque)
1539 {
1540 virQEMUDriver *driver = opaque;
1541 struct qemuProcessEvent *processEvent = NULL;
1542 char *data;
1543
1544 virObjectLock(vm);
1545
1546 VIR_DEBUG("Serial port %s state changed to '%d' in domain %p %s",
1547 devAlias, connected, vm, vm->def->name);
1548
1549 processEvent = g_new0(struct qemuProcessEvent, 1);
1550
1551 processEvent->eventType = QEMU_PROCESS_EVENT_SERIAL_CHANGED;
1552 data = g_strdup(devAlias);
1553 processEvent->data = data;
1554 processEvent->action = connected;
1555 processEvent->vm = virObjectRef(vm);
1556
1557 qemuProcessEventSubmit(driver, &processEvent);
1558
1559 virObjectUnlock(vm);
1560 }
1561
1562
1563 static void
qemuProcessHandleSpiceMigrated(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque G_GNUC_UNUSED)1564 qemuProcessHandleSpiceMigrated(qemuMonitor *mon G_GNUC_UNUSED,
1565 virDomainObj *vm,
1566 void *opaque G_GNUC_UNUSED)
1567 {
1568 qemuDomainObjPrivate *priv;
1569 qemuDomainJobPrivate *jobPriv;
1570
1571 virObjectLock(vm);
1572
1573 VIR_DEBUG("Spice migration completed for domain %p %s",
1574 vm, vm->def->name);
1575
1576 priv = vm->privateData;
1577 jobPriv = priv->job.privateData;
1578 if (priv->job.asyncJob != QEMU_ASYNC_JOB_MIGRATION_OUT) {
1579 VIR_DEBUG("got SPICE_MIGRATE_COMPLETED event without a migration job");
1580 goto cleanup;
1581 }
1582
1583 jobPriv->spiceMigrated = true;
1584 virDomainObjBroadcast(vm);
1585
1586 cleanup:
1587 virObjectUnlock(vm);
1588 }
1589
1590
1591 static void
qemuProcessHandleMigrationStatus(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int status,void * opaque)1592 qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
1593 virDomainObj *vm,
1594 int status,
1595 void *opaque)
1596 {
1597 qemuDomainObjPrivate *priv;
1598 virQEMUDriver *driver = opaque;
1599 virObjectEvent *event = NULL;
1600 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
1601 int reason;
1602
1603 virObjectLock(vm);
1604
1605 VIR_DEBUG("Migration of domain %p %s changed state to %s",
1606 vm, vm->def->name,
1607 qemuMonitorMigrationStatusTypeToString(status));
1608
1609 priv = vm->privateData;
1610 if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
1611 VIR_DEBUG("got MIGRATION event without a migration job");
1612 goto cleanup;
1613 }
1614
1615 priv->job.current->stats.mig.status = status;
1616 virDomainObjBroadcast(vm);
1617
1618 if (status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY &&
1619 priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT &&
1620 virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
1621 reason == VIR_DOMAIN_PAUSED_MIGRATION) {
1622 VIR_DEBUG("Correcting paused state reason for domain %s to %s",
1623 vm->def->name,
1624 virDomainPausedReasonTypeToString(VIR_DOMAIN_PAUSED_POSTCOPY));
1625
1626 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY);
1627 event = virDomainEventLifecycleNewFromObj(vm,
1628 VIR_DOMAIN_EVENT_SUSPENDED,
1629 VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY);
1630
1631 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) {
1632 VIR_WARN("Unable to save status on vm %s after state change",
1633 vm->def->name);
1634 }
1635 }
1636
1637 cleanup:
1638 virObjectUnlock(vm);
1639 virObjectEventStateQueue(driver->domainEventState, event);
1640 }
1641
1642
1643 static void
qemuProcessHandleMigrationPass(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int pass,void * opaque)1644 qemuProcessHandleMigrationPass(qemuMonitor *mon G_GNUC_UNUSED,
1645 virDomainObj *vm,
1646 int pass,
1647 void *opaque)
1648 {
1649 virQEMUDriver *driver = opaque;
1650 qemuDomainObjPrivate *priv;
1651
1652 virObjectLock(vm);
1653
1654 VIR_DEBUG("Migrating domain %p %s, iteration %d",
1655 vm, vm->def->name, pass);
1656
1657 priv = vm->privateData;
1658 if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
1659 VIR_DEBUG("got MIGRATION_PASS event without a migration job");
1660 goto cleanup;
1661 }
1662
1663 virObjectEventStateQueue(driver->domainEventState,
1664 virDomainEventMigrationIterationNewFromObj(vm, pass));
1665
1666 cleanup:
1667 virObjectUnlock(vm);
1668 }
1669
1670
1671 static void
qemuProcessHandleDumpCompleted(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,int status,qemuMonitorDumpStats * stats,const char * error,void * opaque G_GNUC_UNUSED)1672 qemuProcessHandleDumpCompleted(qemuMonitor *mon G_GNUC_UNUSED,
1673 virDomainObj *vm,
1674 int status,
1675 qemuMonitorDumpStats *stats,
1676 const char *error,
1677 void *opaque G_GNUC_UNUSED)
1678 {
1679 qemuDomainObjPrivate *priv;
1680 qemuDomainJobPrivate *jobPriv;
1681
1682 virObjectLock(vm);
1683
1684 VIR_DEBUG("Dump completed for domain %p %s with stats=%p error='%s'",
1685 vm, vm->def->name, stats, NULLSTR(error));
1686
1687 priv = vm->privateData;
1688 jobPriv = priv->job.privateData;
1689 if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
1690 VIR_DEBUG("got DUMP_COMPLETED event without a dump_completed job");
1691 goto cleanup;
1692 }
1693 jobPriv->dumpCompleted = true;
1694 priv->job.current->stats.dump = *stats;
1695 priv->job.error = g_strdup(error);
1696
1697 /* Force error if extracting the DUMP_COMPLETED status failed */
1698 if (!error && status < 0) {
1699 priv->job.error = g_strdup(virGetLastErrorMessage());
1700 priv->job.current->stats.dump.status = QEMU_MONITOR_DUMP_STATUS_FAILED;
1701 }
1702
1703 virDomainObjBroadcast(vm);
1704
1705 cleanup:
1706 virResetLastError();
1707 virObjectUnlock(vm);
1708 }
1709
1710
1711 static void
qemuProcessHandlePRManagerStatusChanged(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * prManager,bool connected,void * opaque)1712 qemuProcessHandlePRManagerStatusChanged(qemuMonitor *mon G_GNUC_UNUSED,
1713 virDomainObj *vm,
1714 const char *prManager,
1715 bool connected,
1716 void *opaque)
1717 {
1718 virQEMUDriver *driver = opaque;
1719 qemuDomainObjPrivate *priv;
1720 struct qemuProcessEvent *processEvent = NULL;
1721 const char *managedAlias = qemuDomainGetManagedPRAlias();
1722
1723 virObjectLock(vm);
1724
1725 VIR_DEBUG("pr-manager %s status changed for domain %p %s connected=%d",
1726 prManager, vm, vm->def->name, connected);
1727
1728 /* Connect events are boring. */
1729 if (connected)
1730 goto cleanup;
1731
1732 /* Disconnect events are more interesting. */
1733
1734 if (STRNEQ(prManager, managedAlias)) {
1735 VIR_DEBUG("pr-manager %s not managed, ignoring event",
1736 prManager);
1737 goto cleanup;
1738 }
1739
1740 priv = vm->privateData;
1741 priv->prDaemonRunning = false;
1742
1743 processEvent = g_new0(struct qemuProcessEvent, 1);
1744
1745 processEvent->eventType = QEMU_PROCESS_EVENT_PR_DISCONNECT;
1746 processEvent->vm = virObjectRef(vm);
1747
1748 qemuProcessEventSubmit(driver, &processEvent);
1749
1750 cleanup:
1751 virObjectUnlock(vm);
1752 }
1753
1754
1755 static void
qemuProcessHandleRdmaGidStatusChanged(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * netdev,bool gid_status,unsigned long long subnet_prefix,unsigned long long interface_id,void * opaque)1756 qemuProcessHandleRdmaGidStatusChanged(qemuMonitor *mon G_GNUC_UNUSED,
1757 virDomainObj *vm,
1758 const char *netdev,
1759 bool gid_status,
1760 unsigned long long subnet_prefix,
1761 unsigned long long interface_id,
1762 void *opaque)
1763 {
1764 virQEMUDriver *driver = opaque;
1765 struct qemuProcessEvent *processEvent = NULL;
1766 qemuMonitorRdmaGidStatus *info = NULL;
1767
1768 virObjectLock(vm);
1769
1770 VIR_DEBUG("netdev=%s,gid_status=%d,subnet_prefix=0x%llx,interface_id=0x%llx",
1771 netdev, gid_status, subnet_prefix, interface_id);
1772
1773 info = g_new0(qemuMonitorRdmaGidStatus, 1);
1774
1775 info->netdev = g_strdup(netdev);
1776
1777 info->gid_status = gid_status;
1778 info->subnet_prefix = subnet_prefix;
1779 info->interface_id = interface_id;
1780
1781 processEvent = g_new0(struct qemuProcessEvent, 1);
1782
1783 processEvent->eventType = QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED;
1784 processEvent->vm = virObjectRef(vm);
1785 processEvent->data = g_steal_pointer(&info);
1786
1787 qemuProcessEventSubmit(driver, &processEvent);
1788
1789 virObjectUnlock(vm);
1790 }
1791
1792
1793 static void
qemuProcessHandleGuestCrashloaded(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,void * opaque)1794 qemuProcessHandleGuestCrashloaded(qemuMonitor *mon G_GNUC_UNUSED,
1795 virDomainObj *vm,
1796 void *opaque)
1797 {
1798 virQEMUDriver *driver = opaque;
1799 struct qemuProcessEvent *processEvent;
1800
1801 virObjectLock(vm);
1802 processEvent = g_new0(struct qemuProcessEvent, 1);
1803
1804 processEvent->eventType = QEMU_PROCESS_EVENT_GUEST_CRASHLOADED;
1805 processEvent->vm = virObjectRef(vm);
1806
1807 qemuProcessEventSubmit(driver, &processEvent);
1808
1809 virObjectUnlock(vm);
1810 }
1811
1812
1813 static void
qemuProcessHandleMemoryFailure(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,qemuMonitorEventMemoryFailure * mfp,void * opaque)1814 qemuProcessHandleMemoryFailure(qemuMonitor *mon G_GNUC_UNUSED,
1815 virDomainObj *vm,
1816 qemuMonitorEventMemoryFailure *mfp,
1817 void *opaque)
1818 {
1819 virQEMUDriver *driver = opaque;
1820 virObjectEvent *event = NULL;
1821 virDomainMemoryFailureRecipientType recipient;
1822 virDomainMemoryFailureActionType action;
1823 unsigned int flags = 0;
1824
1825 switch (mfp->recipient) {
1826 case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR:
1827 recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR;
1828 break;
1829 case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST:
1830 recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST;
1831 break;
1832 case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST:
1833 default:
1834 return;
1835 }
1836
1837 switch (mfp->action) {
1838 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE:
1839 action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE;
1840 break;
1841 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT:
1842 action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT;
1843 break;
1844 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL:
1845 action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL;
1846 break;
1847 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET:
1848 action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET;
1849 break;
1850 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST:
1851 default:
1852 return;
1853 }
1854
1855 if (mfp->action_required)
1856 flags |= VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED;
1857 if (mfp->recursive)
1858 flags |= VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE;
1859
1860 event = virDomainEventMemoryFailureNewFromObj(vm, recipient, action, flags);
1861 virObjectEventStateQueue(driver->domainEventState, event);
1862 }
1863
1864
1865 static void
qemuProcessHandleMemoryDeviceSizeChange(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm,const char * devAlias,unsigned long long size,void * opaque)1866 qemuProcessHandleMemoryDeviceSizeChange(qemuMonitor *mon G_GNUC_UNUSED,
1867 virDomainObj *vm,
1868 const char *devAlias,
1869 unsigned long long size,
1870 void *opaque)
1871 {
1872 virQEMUDriver *driver = opaque;
1873 struct qemuProcessEvent *processEvent = NULL;
1874 qemuMonitorMemoryDeviceSizeChange *info = NULL;
1875
1876 virObjectLock(vm);
1877
1878 VIR_DEBUG("Memory device '%s' changed size to '%llu' in domain '%s'",
1879 devAlias, size, vm->def->name);
1880
1881 info = g_new0(qemuMonitorMemoryDeviceSizeChange, 1);
1882 info->devAlias = g_strdup(devAlias);
1883 info->size = size;
1884
1885 processEvent = g_new0(struct qemuProcessEvent, 1);
1886 processEvent->eventType = QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE;
1887 processEvent->vm = virObjectRef(vm);
1888 processEvent->data = g_steal_pointer(&info);
1889
1890 qemuProcessEventSubmit(driver, &processEvent);
1891
1892 virObjectUnlock(vm);
1893 }
1894
1895
1896 static qemuMonitorCallbacks monitorCallbacks = {
1897 .eofNotify = qemuProcessHandleMonitorEOF,
1898 .errorNotify = qemuProcessHandleMonitorError,
1899 .domainEvent = qemuProcessHandleEvent,
1900 .domainShutdown = qemuProcessHandleShutdown,
1901 .domainStop = qemuProcessHandleStop,
1902 .domainResume = qemuProcessHandleResume,
1903 .domainReset = qemuProcessHandleReset,
1904 .domainRTCChange = qemuProcessHandleRTCChange,
1905 .domainWatchdog = qemuProcessHandleWatchdog,
1906 .domainIOError = qemuProcessHandleIOError,
1907 .domainGraphics = qemuProcessHandleGraphics,
1908 .domainBlockJob = qemuProcessHandleBlockJob,
1909 .jobStatusChange = qemuProcessHandleJobStatusChange,
1910 .domainTrayChange = qemuProcessHandleTrayChange,
1911 .domainPMWakeup = qemuProcessHandlePMWakeup,
1912 .domainPMSuspend = qemuProcessHandlePMSuspend,
1913 .domainBalloonChange = qemuProcessHandleBalloonChange,
1914 .domainPMSuspendDisk = qemuProcessHandlePMSuspendDisk,
1915 .domainGuestPanic = qemuProcessHandleGuestPanic,
1916 .domainDeviceDeleted = qemuProcessHandleDeviceDeleted,
1917 .domainNicRxFilterChanged = qemuProcessHandleNicRxFilterChanged,
1918 .domainSerialChange = qemuProcessHandleSerialChanged,
1919 .domainSpiceMigrated = qemuProcessHandleSpiceMigrated,
1920 .domainMigrationStatus = qemuProcessHandleMigrationStatus,
1921 .domainMigrationPass = qemuProcessHandleMigrationPass,
1922 .domainAcpiOstInfo = qemuProcessHandleAcpiOstInfo,
1923 .domainBlockThreshold = qemuProcessHandleBlockThreshold,
1924 .domainDumpCompleted = qemuProcessHandleDumpCompleted,
1925 .domainPRManagerStatusChanged = qemuProcessHandlePRManagerStatusChanged,
1926 .domainRdmaGidStatusChanged = qemuProcessHandleRdmaGidStatusChanged,
1927 .domainGuestCrashloaded = qemuProcessHandleGuestCrashloaded,
1928 .domainMemoryFailure = qemuProcessHandleMemoryFailure,
1929 .domainMemoryDeviceSizeChange = qemuProcessHandleMemoryDeviceSizeChange,
1930 .domainDeviceUnplugError = qemuProcessHandleDeviceUnplugErr,
1931 };
1932
1933 static void
1934 qemuProcessMonitorReportLogError(qemuMonitor *mon,
1935 const char *msg,
1936 void *opaque);
1937
1938
1939 static void
qemuProcessMonitorLogFree(void * opaque)1940 qemuProcessMonitorLogFree(void *opaque)
1941 {
1942 qemuDomainLogContext *logCtxt = opaque;
1943 g_clear_object(&logCtxt);
1944 }
1945
1946
1947 static int
qemuProcessInitMonitor(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)1948 qemuProcessInitMonitor(virQEMUDriver *driver,
1949 virDomainObj *vm,
1950 qemuDomainAsyncJob asyncJob)
1951 {
1952 int ret;
1953
1954 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
1955 return -1;
1956
1957 ret = qemuMonitorSetCapabilities(QEMU_DOMAIN_PRIVATE(vm)->mon);
1958
1959 if (qemuDomainObjExitMonitor(driver, vm) < 0)
1960 ret = -1;
1961
1962 return ret;
1963 }
1964
1965
1966 static int
qemuConnectMonitor(virQEMUDriver * driver,virDomainObj * vm,int asyncJob,bool retry,qemuDomainLogContext * logCtxt)1967 qemuConnectMonitor(virQEMUDriver *driver, virDomainObj *vm, int asyncJob,
1968 bool retry, qemuDomainLogContext *logCtxt)
1969 {
1970 qemuDomainObjPrivate *priv = vm->privateData;
1971 qemuMonitor *mon = NULL;
1972 unsigned long long timeout = 0;
1973
1974 if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) {
1975 VIR_ERROR(_("Failed to set security context for monitor for %s"),
1976 vm->def->name);
1977 return -1;
1978 }
1979
1980 /* When using hugepages, kernel zeroes them out before
1981 * handing them over to qemu. This can be very time
1982 * consuming. Therefore, add a second to timeout for each
1983 * 1GiB of guest RAM. */
1984 timeout = virDomainDefGetMemoryTotal(vm->def) / (1024 * 1024);
1985
1986 ignore_value(virTimeMillisNow(&priv->monStart));
1987
1988 mon = qemuMonitorOpen(vm,
1989 priv->monConfig,
1990 retry,
1991 timeout,
1992 virEventThreadGetContext(priv->eventThread),
1993 &monitorCallbacks,
1994 driver);
1995
1996 if (mon && logCtxt) {
1997 g_object_ref(logCtxt);
1998 qemuMonitorSetDomainLog(mon,
1999 qemuProcessMonitorReportLogError,
2000 logCtxt,
2001 qemuProcessMonitorLogFree);
2002 }
2003
2004 priv->monStart = 0;
2005 priv->mon = mon;
2006
2007 if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) {
2008 VIR_ERROR(_("Failed to clear security context for monitor for %s"),
2009 vm->def->name);
2010 return -1;
2011 }
2012
2013 if (priv->mon == NULL) {
2014 VIR_INFO("Failed to connect monitor for %s", vm->def->name);
2015 return -1;
2016 }
2017
2018 if (qemuProcessInitMonitor(driver, vm, asyncJob) < 0)
2019 return -1;
2020
2021 if (qemuMigrationCapsCheck(driver, vm, asyncJob) < 0)
2022 return -1;
2023
2024 return 0;
2025 }
2026
2027
2028 /**
2029 * qemuProcessReadLog: Read log file of a qemu VM
2030 * @logCtxt: the domain log context
2031 * @msg: pointer to buffer to store the read messages in
2032 * @max: maximum length of the message returned in @msg
2033 *
2034 * Reads log of a qemu VM. Skips messages not produced by qemu or irrelevant
2035 * messages. If @max is not zero, @msg will contain at most @max characters
2036 * from the end of the log and @msg will start after a new line if possible.
2037 *
2038 * Returns 0 on success or -1 on error
2039 */
2040 static int
qemuProcessReadLog(qemuDomainLogContext * logCtxt,char ** msg,size_t max)2041 qemuProcessReadLog(qemuDomainLogContext *logCtxt,
2042 char **msg,
2043 size_t max)
2044 {
2045 char *buf;
2046 ssize_t got;
2047 char *eol;
2048 char *filter_next;
2049 size_t skip;
2050
2051 if ((got = qemuDomainLogContextRead(logCtxt, &buf)) < 0)
2052 return -1;
2053
2054 /* Filter out debug messages from intermediate libvirt process */
2055 filter_next = buf;
2056 while ((eol = strchr(filter_next, '\n'))) {
2057 *eol = '\0';
2058 if (virLogProbablyLogMessage(filter_next) ||
2059 strstr(filter_next, "char device redirected to")) {
2060 skip = (eol + 1) - filter_next;
2061 memmove(filter_next, eol + 1, buf + got - eol);
2062 got -= skip;
2063 } else {
2064 filter_next = eol + 1;
2065 *eol = '\n';
2066 }
2067 }
2068
2069 if (got > 0 &&
2070 buf[got - 1] == '\n') {
2071 buf[got - 1] = '\0';
2072 got--;
2073 }
2074
2075 if (max > 0 && got > max) {
2076 skip = got - max;
2077
2078 if (buf[skip - 1] != '\n' &&
2079 (eol = strchr(buf + skip, '\n')) &&
2080 !virStringIsEmpty(eol + 1))
2081 skip = eol + 1 - buf;
2082
2083 memmove(buf, buf + skip, got - skip + 1);
2084 got -= skip;
2085 }
2086
2087 buf = g_renew(char, buf, got + 1);
2088 *msg = buf;
2089 return 0;
2090 }
2091
2092
2093 static int
qemuProcessReportLogError(qemuDomainLogContext * logCtxt,const char * msgprefix)2094 qemuProcessReportLogError(qemuDomainLogContext *logCtxt,
2095 const char *msgprefix)
2096 {
2097 g_autofree char *logmsg = NULL;
2098
2099 /* assume that 1024 chars of qemu log is the right balance */
2100 if (qemuProcessReadLog(logCtxt, &logmsg, 1024) < 0)
2101 return -1;
2102
2103 virResetLastError();
2104 if (virStringIsEmpty(logmsg))
2105 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", msgprefix);
2106 else
2107 virReportError(VIR_ERR_INTERNAL_ERROR, "%s: %s", /* _( silence sc_libvirt_unmarked_diagnostics */
2108 msgprefix, logmsg);
2109
2110 return 0;
2111 }
2112
2113
2114 static void
qemuProcessMonitorReportLogError(qemuMonitor * mon G_GNUC_UNUSED,const char * msg,void * opaque)2115 qemuProcessMonitorReportLogError(qemuMonitor *mon G_GNUC_UNUSED,
2116 const char *msg,
2117 void *opaque)
2118 {
2119 qemuDomainLogContext *logCtxt = opaque;
2120 qemuProcessReportLogError(logCtxt, msg);
2121 }
2122
2123
2124 static int
qemuProcessLookupPTYs(virDomainChrDef ** devices,int count,GHashTable * info)2125 qemuProcessLookupPTYs(virDomainChrDef **devices,
2126 int count,
2127 GHashTable *info)
2128 {
2129 size_t i;
2130
2131 for (i = 0; i < count; i++) {
2132 g_autofree char *id = NULL;
2133 virDomainChrDef *chr = devices[i];
2134 if (chr->source->type == VIR_DOMAIN_CHR_TYPE_PTY) {
2135 qemuMonitorChardevInfo *entry;
2136
2137 id = g_strdup_printf("char%s", chr->info.alias);
2138
2139 entry = virHashLookup(info, id);
2140 if (!entry || !entry->ptyPath) {
2141 if (chr->source->data.file.path == NULL) {
2142 /* neither the log output nor 'info chardev' had a
2143 * pty path for this chardev, report an error
2144 */
2145 virReportError(VIR_ERR_INTERNAL_ERROR,
2146 _("no assigned pty for device %s"), id);
2147 return -1;
2148 } else {
2149 /* 'info chardev' had no pty path for this chardev,
2150 * but the log output had, so we're fine
2151 */
2152 continue;
2153 }
2154 }
2155
2156 g_free(chr->source->data.file.path);
2157 chr->source->data.file.path = g_strdup(entry->ptyPath);
2158 }
2159 }
2160
2161 return 0;
2162 }
2163
2164 static int
qemuProcessFindCharDevicePTYsMonitor(virDomainObj * vm,GHashTable * info)2165 qemuProcessFindCharDevicePTYsMonitor(virDomainObj *vm,
2166 GHashTable *info)
2167 {
2168 size_t i = 0;
2169
2170 if (qemuProcessLookupPTYs(vm->def->serials, vm->def->nserials, info) < 0)
2171 return -1;
2172
2173 if (qemuProcessLookupPTYs(vm->def->parallels, vm->def->nparallels,
2174 info) < 0)
2175 return -1;
2176
2177 if (qemuProcessLookupPTYs(vm->def->channels, vm->def->nchannels, info) < 0)
2178 return -1;
2179 /* For historical reasons, console[0] can be just an alias
2180 * for serial[0]. That's why we need to update it as well. */
2181 if (vm->def->nconsoles) {
2182 virDomainChrDef *chr = vm->def->consoles[0];
2183
2184 if (vm->def->nserials &&
2185 chr->deviceType == VIR_DOMAIN_CHR_DEVICE_TYPE_CONSOLE &&
2186 chr->targetType == VIR_DOMAIN_CHR_CONSOLE_TARGET_TYPE_SERIAL) {
2187 /* yes, the first console is just an alias for serials[0] */
2188 i = 1;
2189 if (virDomainChrSourceDefCopy(chr->source,
2190 ((vm->def->serials[0])->source)) < 0)
2191 return -1;
2192 }
2193 }
2194
2195 if (qemuProcessLookupPTYs(vm->def->consoles + i, vm->def->nconsoles - i,
2196 info) < 0)
2197 return -1;
2198
2199 return 0;
2200 }
2201
2202
2203 static void
qemuProcessRefreshChannelVirtioState(virQEMUDriver * driver,virDomainObj * vm,GHashTable * info,int booted)2204 qemuProcessRefreshChannelVirtioState(virQEMUDriver *driver,
2205 virDomainObj *vm,
2206 GHashTable *info,
2207 int booted)
2208 {
2209 size_t i;
2210 int agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_CHANNEL;
2211 qemuMonitorChardevInfo *entry;
2212 virObjectEvent *event = NULL;
2213 g_autofree char *id = NULL;
2214
2215 if (booted)
2216 agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_DOMAIN_STARTED;
2217
2218 for (i = 0; i < vm->def->nchannels; i++) {
2219 virDomainChrDef *chr = vm->def->channels[i];
2220 if (chr->targetType == VIR_DOMAIN_CHR_CHANNEL_TARGET_TYPE_VIRTIO) {
2221
2222 VIR_FREE(id);
2223 id = g_strdup_printf("char%s", chr->info.alias);
2224
2225 /* port state not reported */
2226 if (!(entry = virHashLookup(info, id)) ||
2227 !entry->state)
2228 continue;
2229
2230 if (entry->state != VIR_DOMAIN_CHR_DEVICE_STATE_DEFAULT &&
2231 STREQ_NULLABLE(chr->target.name, "org.qemu.guest_agent.0") &&
2232 (event = virDomainEventAgentLifecycleNewFromObj(vm, entry->state,
2233 agentReason)))
2234 virObjectEventStateQueue(driver->domainEventState, event);
2235
2236 chr->state = entry->state;
2237 }
2238 }
2239 }
2240
2241
2242 int
qemuRefreshVirtioChannelState(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)2243 qemuRefreshVirtioChannelState(virQEMUDriver *driver,
2244 virDomainObj *vm,
2245 qemuDomainAsyncJob asyncJob)
2246 {
2247 qemuDomainObjPrivate *priv = vm->privateData;
2248 GHashTable *info = NULL;
2249 int ret = -1;
2250
2251 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2252 goto cleanup;
2253
2254 ret = qemuMonitorGetChardevInfo(priv->mon, &info);
2255 if (qemuDomainObjExitMonitor(driver, vm) < 0)
2256 ret = -1;
2257
2258 if (ret < 0)
2259 goto cleanup;
2260
2261 qemuProcessRefreshChannelVirtioState(driver, vm, info, false);
2262 ret = 0;
2263
2264 cleanup:
2265 virHashFree(info);
2266 return ret;
2267 }
2268
2269
2270 static int
qemuProcessRefreshPRManagerState(virDomainObj * vm,GHashTable * info)2271 qemuProcessRefreshPRManagerState(virDomainObj *vm,
2272 GHashTable *info)
2273 {
2274 qemuDomainObjPrivate *priv = vm->privateData;
2275 qemuMonitorPRManagerInfo *prManagerInfo;
2276 const char *managedAlias = qemuDomainGetManagedPRAlias();
2277
2278 if (!(prManagerInfo = virHashLookup(info, managedAlias))) {
2279 virReportError(VIR_ERR_OPERATION_FAILED,
2280 _("missing info on pr-manager %s"),
2281 managedAlias);
2282 return -1;
2283 }
2284
2285 priv->prDaemonRunning = prManagerInfo->connected;
2286
2287 if (!priv->prDaemonRunning &&
2288 qemuProcessStartManagedPRDaemon(vm) < 0)
2289 return -1;
2290
2291 return 0;
2292 }
2293
2294
2295 static int
qemuRefreshPRManagerState(virQEMUDriver * driver,virDomainObj * vm)2296 qemuRefreshPRManagerState(virQEMUDriver *driver,
2297 virDomainObj *vm)
2298 {
2299 qemuDomainObjPrivate *priv = vm->privateData;
2300 GHashTable *info = NULL;
2301 int ret = -1;
2302
2303 if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PR_MANAGER_HELPER) ||
2304 !qemuDomainDefHasManagedPR(vm))
2305 return 0;
2306
2307 qemuDomainObjEnterMonitor(driver, vm);
2308 ret = qemuMonitorGetPRManagerInfo(priv->mon, &info);
2309 if (qemuDomainObjExitMonitor(driver, vm) < 0)
2310 ret = -1;
2311
2312 if (ret < 0)
2313 goto cleanup;
2314
2315 ret = qemuProcessRefreshPRManagerState(vm, info);
2316
2317 cleanup:
2318 virHashFree(info);
2319 return ret;
2320 }
2321
2322
2323 static void
qemuRefreshRTC(virQEMUDriver * driver,virDomainObj * vm)2324 qemuRefreshRTC(virQEMUDriver *driver,
2325 virDomainObj *vm)
2326 {
2327 qemuDomainObjPrivate *priv = vm->privateData;
2328 time_t now, then;
2329 struct tm thenbits;
2330 long localOffset;
2331 int rv;
2332
2333 if (vm->def->clock.offset != VIR_DOMAIN_CLOCK_OFFSET_VARIABLE)
2334 return;
2335
2336 memset(&thenbits, 0, sizeof(thenbits));
2337 qemuDomainObjEnterMonitor(driver, vm);
2338 now = time(NULL);
2339 rv = qemuMonitorGetRTCTime(priv->mon, &thenbits);
2340 if (qemuDomainObjExitMonitor(driver, vm) < 0)
2341 rv = -1;
2342
2343 if (rv < 0)
2344 return;
2345
2346 thenbits.tm_isdst = -1;
2347 if ((then = mktime(&thenbits)) == (time_t)-1) {
2348 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2349 _("Unable to convert time"));
2350 return;
2351 }
2352
2353 /* Thing is, @now is in local TZ but @then in UTC. */
2354 if (virTimeLocalOffsetFromUTC(&localOffset) < 0)
2355 return;
2356
2357 vm->def->clock.data.variable.adjustment = then - now + localOffset;
2358 }
2359
2360 int
qemuProcessRefreshBalloonState(virQEMUDriver * driver,virDomainObj * vm,int asyncJob)2361 qemuProcessRefreshBalloonState(virQEMUDriver *driver,
2362 virDomainObj *vm,
2363 int asyncJob)
2364 {
2365 unsigned long long balloon;
2366 size_t i;
2367 int rc;
2368
2369 /* if no ballooning is available, the current size equals to the current
2370 * full memory size */
2371 if (!virDomainDefHasMemballoon(vm->def)) {
2372 vm->def->mem.cur_balloon = virDomainDefGetMemoryTotal(vm->def);
2373 return 0;
2374 }
2375
2376 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2377 return -1;
2378
2379 rc = qemuMonitorGetBalloonInfo(qemuDomainGetMonitor(vm), &balloon);
2380 if (qemuDomainObjExitMonitor(driver, vm) < 0 || rc < 0)
2381 return -1;
2382
2383 /* We want the balloon size stored in domain definition to
2384 * account for the actual size of virtio-mem too. But the
2385 * balloon size as reported by QEMU (@balloon) contains just
2386 * the balloon size without any virtio-mem. Do a wee bit of
2387 * math to fix it. */
2388 VIR_DEBUG("balloon size before fix is %lld", balloon);
2389 for (i = 0; i < vm->def->nmems; i++) {
2390 if (vm->def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM)
2391 balloon += vm->def->mems[i]->currentsize;
2392 }
2393 VIR_DEBUG("Updating balloon from %lld to %lld kb",
2394 vm->def->mem.cur_balloon, balloon);
2395 vm->def->mem.cur_balloon = balloon;
2396
2397 return 0;
2398 }
2399
2400
2401 static int
qemuProcessWaitForMonitor(virQEMUDriver * driver,virDomainObj * vm,int asyncJob,qemuDomainLogContext * logCtxt)2402 qemuProcessWaitForMonitor(virQEMUDriver *driver,
2403 virDomainObj *vm,
2404 int asyncJob,
2405 qemuDomainLogContext *logCtxt)
2406 {
2407 int ret = -1;
2408 GHashTable *info = NULL;
2409 qemuDomainObjPrivate *priv = vm->privateData;
2410 bool retry = true;
2411
2412 if (priv->qemuCaps &&
2413 virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CHARDEV_FD_PASS_COMMANDLINE))
2414 retry = false;
2415
2416 VIR_DEBUG("Connect monitor to vm=%p name='%s' retry=%d",
2417 vm, vm->def->name, retry);
2418
2419 if (qemuConnectMonitor(driver, vm, asyncJob, retry, logCtxt) < 0)
2420 goto cleanup;
2421
2422 /* Try to get the pty path mappings again via the monitor. This is much more
2423 * reliable if it's available.
2424 * Note that the monitor itself can be on a pty, so we still need to try the
2425 * log output method. */
2426 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2427 goto cleanup;
2428 ret = qemuMonitorGetChardevInfo(priv->mon, &info);
2429 VIR_DEBUG("qemuMonitorGetChardevInfo returned %i", ret);
2430 if (qemuDomainObjExitMonitor(driver, vm) < 0)
2431 ret = -1;
2432
2433 if (ret == 0) {
2434 if ((ret = qemuProcessFindCharDevicePTYsMonitor(vm, info)) < 0)
2435 goto cleanup;
2436
2437 qemuProcessRefreshChannelVirtioState(driver, vm, info, true);
2438 }
2439
2440 cleanup:
2441 virHashFree(info);
2442
2443 if (logCtxt && kill(vm->pid, 0) == -1 && errno == ESRCH) {
2444 qemuProcessReportLogError(logCtxt,
2445 _("process exited while connecting to monitor"));
2446 ret = -1;
2447 }
2448
2449 return ret;
2450 }
2451
2452
2453 static int
qemuProcessDetectIOThreadPIDs(virQEMUDriver * driver,virDomainObj * vm,int asyncJob)2454 qemuProcessDetectIOThreadPIDs(virQEMUDriver *driver,
2455 virDomainObj *vm,
2456 int asyncJob)
2457 {
2458 qemuDomainObjPrivate *priv = vm->privateData;
2459 qemuMonitorIOThreadInfo **iothreads = NULL;
2460 int niothreads = 0;
2461 int ret = -1;
2462 size_t i;
2463
2464 if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_OBJECT_IOTHREAD)) {
2465 ret = 0;
2466 goto cleanup;
2467 }
2468
2469 /* Get the list of IOThreads from qemu */
2470 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2471 goto cleanup;
2472 ret = qemuMonitorGetIOThreads(priv->mon, &iothreads, &niothreads);
2473 if (qemuDomainObjExitMonitor(driver, vm) < 0)
2474 goto cleanup;
2475 if (ret < 0)
2476 goto cleanup;
2477
2478 if (niothreads != vm->def->niothreadids) {
2479 virReportError(VIR_ERR_INTERNAL_ERROR,
2480 _("got wrong number of IOThread pids from QEMU monitor. "
2481 "got %d, wanted %zu"),
2482 niothreads, vm->def->niothreadids);
2483 goto cleanup;
2484 }
2485
2486 /* Nothing to do */
2487 if (niothreads == 0) {
2488 ret = 0;
2489 goto cleanup;
2490 }
2491
2492 for (i = 0; i < niothreads; i++) {
2493 virDomainIOThreadIDDef *iothrid;
2494
2495 if (!(iothrid = virDomainIOThreadIDFind(vm->def,
2496 iothreads[i]->iothread_id))) {
2497 virReportError(VIR_ERR_INTERNAL_ERROR,
2498 _("iothread %d not found"),
2499 iothreads[i]->iothread_id);
2500 goto cleanup;
2501 }
2502 iothrid->thread_id = iothreads[i]->thread_id;
2503 }
2504
2505 ret = 0;
2506
2507 cleanup:
2508 if (iothreads) {
2509 for (i = 0; i < niothreads; i++)
2510 VIR_FREE(iothreads[i]);
2511 VIR_FREE(iothreads);
2512 }
2513 return ret;
2514 }
2515
2516
2517 static int
qemuProcessGetAllCpuAffinity(virBitmap ** cpumapRet)2518 qemuProcessGetAllCpuAffinity(virBitmap **cpumapRet)
2519 {
2520 *cpumapRet = NULL;
2521
2522 if (!virHostCPUHasBitmap())
2523 return 0;
2524
2525 if (!(*cpumapRet = virHostCPUGetOnlineBitmap()))
2526 return -1;
2527
2528 return 0;
2529 }
2530
2531
2532 /*
2533 * To be run between fork/exec of QEMU only
2534 */
2535 #if defined(WITH_SCHED_GETAFFINITY) || defined(WITH_BSD_CPU_AFFINITY)
2536 static int
qemuProcessInitCpuAffinity(virDomainObj * vm)2537 qemuProcessInitCpuAffinity(virDomainObj *vm)
2538 {
2539 bool settingAll = false;
2540 g_autoptr(virBitmap) cpumapToSet = NULL;
2541 virDomainNumatuneMemMode mem_mode;
2542 qemuDomainObjPrivate *priv = vm->privateData;
2543
2544 if (!vm->pid) {
2545 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2546 _("Cannot setup CPU affinity until process is started"));
2547 return -1;
2548 }
2549
2550 /* Here is the deal, we can't set cpuset.mems before qemu is
2551 * started as it clashes with KVM allocation. Therefore, we
2552 * used to let qemu allocate its memory anywhere as we would
2553 * then move the memory to desired NUMA node via CGroups.
2554 * However, that might not be always possible because qemu
2555 * might lock some parts of its memory (e.g. due to VFIO).
2556 * Even if it possible, memory has to be copied between NUMA
2557 * nodes which is suboptimal.
2558 * Solution is to set affinity that matches the best what we
2559 * would have set in CGroups and then fix it later, once qemu
2560 * is already running. */
2561 if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 &&
2562 virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
2563 mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
2564 virBitmap *nodeset = NULL;
2565
2566 if (virDomainNumatuneMaybeGetNodeset(vm->def->numa,
2567 priv->autoNodeset,
2568 &nodeset,
2569 -1) < 0)
2570 return -1;
2571
2572 if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0)
2573 return -1;
2574 } else if (vm->def->cputune.emulatorpin) {
2575 cpumapToSet = virBitmapNewCopy(vm->def->cputune.emulatorpin);
2576 } else {
2577 settingAll = true;
2578 if (qemuProcessGetAllCpuAffinity(&cpumapToSet) < 0)
2579 return -1;
2580 }
2581
2582 /*
2583 * We only want to error out if we failed to set the affinity to
2584 * user-requested mapping. If we are just trying to reset the affinity
2585 * to all CPUs and this fails it can only be an issue if:
2586 * 1) libvirtd does not have CAP_SYS_NICE
2587 * 2) libvirtd does not run on all CPUs
2588 *
2589 * This scenario can easily occur when libvirtd is run inside a
2590 * container with restrictive permissions and CPU pinning.
2591 *
2592 * See also: https://bugzilla.redhat.com/1819801#c2
2593 */
2594 if (cpumapToSet &&
2595 virProcessSetAffinity(vm->pid, cpumapToSet, settingAll) < 0) {
2596 return -1;
2597 }
2598
2599 return 0;
2600 }
2601 #else /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */
2602 static int
qemuProcessInitCpuAffinity(virDomainObj * vm G_GNUC_UNUSED)2603 qemuProcessInitCpuAffinity(virDomainObj *vm G_GNUC_UNUSED)
2604 {
2605 return 0;
2606 }
2607 #endif /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */
2608
2609 /* set link states to down on interfaces at qemu start */
2610 static int
qemuProcessSetLinkStates(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)2611 qemuProcessSetLinkStates(virQEMUDriver *driver,
2612 virDomainObj *vm,
2613 qemuDomainAsyncJob asyncJob)
2614 {
2615 qemuDomainObjPrivate *priv = vm->privateData;
2616 virDomainDef *def = vm->def;
2617 size_t i;
2618 int ret = -1;
2619 int rv;
2620
2621 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2622 return -1;
2623
2624 for (i = 0; i < def->nnets; i++) {
2625 if (def->nets[i]->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN) {
2626 if (!def->nets[i]->info.alias) {
2627 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2628 _("missing alias for network device"));
2629 goto cleanup;
2630 }
2631
2632 VIR_DEBUG("Setting link state: %s", def->nets[i]->info.alias);
2633
2634 rv = qemuMonitorSetLink(priv->mon,
2635 def->nets[i]->info.alias,
2636 VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN);
2637 if (rv < 0) {
2638 virReportError(VIR_ERR_OPERATION_FAILED,
2639 _("Couldn't set link state on interface: %s"),
2640 def->nets[i]->info.alias);
2641 goto cleanup;
2642 }
2643 }
2644 }
2645
2646 ret = 0;
2647
2648 cleanup:
2649 if (qemuDomainObjExitMonitor(driver, vm) < 0)
2650 ret = -1;
2651 return ret;
2652 }
2653
2654
2655 /**
2656 * qemuProcessSetupPid:
2657 *
2658 * This function sets resource properties (affinity, cgroups,
2659 * scheduler) for any PID associated with a domain. It should be used
2660 * to set up emulator PIDs as well as vCPU and I/O thread pids to
2661 * ensure they are all handled the same way.
2662 *
2663 * Returns 0 on success, -1 on error.
2664 */
2665 static int
qemuProcessSetupPid(virDomainObj * vm,pid_t pid,virCgroupThreadName nameval,int id,virBitmap * cpumask,unsigned long long period,long long quota,virDomainThreadSchedParam * sched)2666 qemuProcessSetupPid(virDomainObj *vm,
2667 pid_t pid,
2668 virCgroupThreadName nameval,
2669 int id,
2670 virBitmap *cpumask,
2671 unsigned long long period,
2672 long long quota,
2673 virDomainThreadSchedParam *sched)
2674 {
2675 qemuDomainObjPrivate *priv = vm->privateData;
2676 virDomainNumatuneMemMode mem_mode;
2677 virCgroup *cgroup = NULL;
2678 virBitmap *use_cpumask = NULL;
2679 virBitmap *affinity_cpumask = NULL;
2680 g_autoptr(virBitmap) hostcpumap = NULL;
2681 g_autofree char *mem_mask = NULL;
2682 int ret = -1;
2683 size_t i;
2684
2685 if ((period || quota) &&
2686 !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
2687 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
2688 _("cgroup cpu is required for scheduler tuning"));
2689 goto cleanup;
2690 }
2691
2692 /* Infer which cpumask shall be used. */
2693 if (cpumask) {
2694 use_cpumask = cpumask;
2695 } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
2696 use_cpumask = priv->autoCpuset;
2697 } else if (vm->def->cpumask) {
2698 use_cpumask = vm->def->cpumask;
2699 } else {
2700 /* You may think this is redundant, but we can't assume libvirtd
2701 * itself is running on all pCPUs, so we need to explicitly set
2702 * the spawned QEMU instance to all pCPUs if no map is given in
2703 * its config file */
2704 if (qemuProcessGetAllCpuAffinity(&hostcpumap) < 0)
2705 goto cleanup;
2706 affinity_cpumask = hostcpumap;
2707 }
2708
2709 /*
2710 * If CPU cgroup controller is not initialized here, then we need
2711 * neither period nor quota settings. And if CPUSET controller is
2712 * not initialized either, then there's nothing to do anyway.
2713 */
2714 if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) ||
2715 virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
2716
2717 if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
2718 mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
2719 virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
2720 priv->autoNodeset,
2721 &mem_mask, -1) < 0)
2722 goto cleanup;
2723
2724 /* For restrictive numatune mode we need to set cpuset.mems for vCPU
2725 * threads based on the node they are in as there is nothing else uses
2726 * for such restriction (e.g. numa_set_membind). */
2727 if (nameval == VIR_CGROUP_THREAD_VCPU) {
2728 virDomainNuma *numatune = vm->def->numa;
2729
2730 /* Look for the guest NUMA node of this vCPU */
2731 for (i = 0; i < virDomainNumaGetNodeCount(numatune); i++) {
2732 virBitmap *node_cpus = virDomainNumaGetNodeCpumask(numatune, i);
2733
2734 if (!virBitmapIsBitSet(node_cpus, id))
2735 continue;
2736
2737 /* Update the mem_mask for this vCPU if the mode of its node is
2738 * 'restrictive'. */
2739 if (virDomainNumatuneGetMode(numatune, i, &mem_mode) == 0 &&
2740 mem_mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) {
2741 VIR_FREE(mem_mask);
2742
2743 if (virDomainNumatuneMaybeFormatNodeset(numatune,
2744 priv->autoNodeset,
2745 &mem_mask, i) < 0) {
2746 goto cleanup;
2747 }
2748 }
2749
2750 break;
2751 }
2752 }
2753
2754 if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0)
2755 goto cleanup;
2756
2757 if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
2758 if (use_cpumask &&
2759 qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
2760 goto cleanup;
2761
2762 if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
2763 goto cleanup;
2764
2765 }
2766
2767 if ((period || quota) &&
2768 qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
2769 goto cleanup;
2770
2771 /* Move the thread to the sub dir */
2772 if (virCgroupAddThread(cgroup, pid) < 0)
2773 goto cleanup;
2774
2775 }
2776
2777 if (!affinity_cpumask)
2778 affinity_cpumask = use_cpumask;
2779
2780 /* Setup legacy affinity.
2781 *
2782 * We only want to error out if we failed to set the affinity to
2783 * user-requested mapping. If we are just trying to reset the affinity
2784 * to all CPUs and this fails it can only be an issue if:
2785 * 1) libvirtd does not have CAP_SYS_NICE
2786 * 2) libvirtd does not run on all CPUs
2787 *
2788 * This scenario can easily occur when libvirtd is run inside a
2789 * container with restrictive permissions and CPU pinning.
2790 *
2791 * See also: https://bugzilla.redhat.com/1819801#c2
2792 */
2793 if (affinity_cpumask &&
2794 virProcessSetAffinity(pid, affinity_cpumask,
2795 affinity_cpumask == hostcpumap) < 0) {
2796 goto cleanup;
2797 }
2798
2799 /* Set scheduler type and priority, but not for the main thread. */
2800 if (sched &&
2801 nameval != VIR_CGROUP_THREAD_EMULATOR &&
2802 virProcessSetScheduler(pid, sched->policy, sched->priority) < 0)
2803 goto cleanup;
2804
2805 ret = 0;
2806 cleanup:
2807 if (cgroup) {
2808 if (ret < 0)
2809 virCgroupRemove(cgroup);
2810 virCgroupFree(cgroup);
2811 }
2812
2813 return ret;
2814 }
2815
2816
2817 static int
qemuProcessSetupEmulator(virDomainObj * vm)2818 qemuProcessSetupEmulator(virDomainObj *vm)
2819 {
2820 return qemuProcessSetupPid(vm, vm->pid, VIR_CGROUP_THREAD_EMULATOR,
2821 0, vm->def->cputune.emulatorpin,
2822 vm->def->cputune.emulator_period,
2823 vm->def->cputune.emulator_quota,
2824 vm->def->cputune.emulatorsched);
2825 }
2826
2827
2828 static int
qemuProcessResctrlCreate(virQEMUDriver * driver,virDomainObj * vm)2829 qemuProcessResctrlCreate(virQEMUDriver *driver,
2830 virDomainObj *vm)
2831 {
2832 size_t i = 0;
2833 g_autoptr(virCaps) caps = NULL;
2834 qemuDomainObjPrivate *priv = vm->privateData;
2835
2836 if (!vm->def->nresctrls)
2837 return 0;
2838
2839 /* Force capability refresh since resctrl info can change
2840 * XXX: move cache info into virresctrl so caps are not needed */
2841 caps = virQEMUDriverGetCapabilities(driver, true);
2842 if (!caps)
2843 return -1;
2844
2845 for (i = 0; i < vm->def->nresctrls; i++) {
2846 size_t j = 0;
2847 if (virResctrlAllocCreate(caps->host.resctrl,
2848 vm->def->resctrls[i]->alloc,
2849 priv->machineName) < 0)
2850 return -1;
2851
2852 for (j = 0; j < vm->def->resctrls[i]->nmonitors; j++) {
2853 virDomainResctrlMonDef *mon = NULL;
2854
2855 mon = vm->def->resctrls[i]->monitors[j];
2856 if (virResctrlMonitorCreate(mon->instance,
2857 priv->machineName) < 0)
2858 return -1;
2859 }
2860 }
2861
2862 return 0;
2863 }
2864
2865
2866 static char *
qemuProcessBuildPRHelperPidfilePathOld(virDomainObj * vm)2867 qemuProcessBuildPRHelperPidfilePathOld(virDomainObj *vm)
2868 {
2869 qemuDomainObjPrivate *priv = vm->privateData;
2870 const char *prdAlias = qemuDomainGetManagedPRAlias();
2871
2872 return virPidFileBuildPath(priv->libDir, prdAlias);
2873 }
2874
2875
2876 static char *
qemuProcessBuildPRHelperPidfilePath(virDomainObj * vm)2877 qemuProcessBuildPRHelperPidfilePath(virDomainObj *vm)
2878 {
2879 qemuDomainObjPrivate *priv = vm->privateData;
2880 g_autofree char *domname = virDomainDefGetShortName(vm->def);
2881 g_autofree char *prdName = g_strdup_printf("%s-%s", domname, qemuDomainGetManagedPRAlias());
2882 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
2883
2884 return virPidFileBuildPath(cfg->stateDir, prdName);
2885 }
2886
2887
2888 void
qemuProcessKillManagedPRDaemon(virDomainObj * vm)2889 qemuProcessKillManagedPRDaemon(virDomainObj *vm)
2890 {
2891 qemuDomainObjPrivate *priv = vm->privateData;
2892 virErrorPtr orig_err;
2893 g_autofree char *pidfile = NULL;
2894
2895 if (!(pidfile = qemuProcessBuildPRHelperPidfilePath(vm))) {
2896 VIR_WARN("Unable to construct pr-helper pidfile path");
2897 return;
2898 }
2899
2900 if (!virFileExists(pidfile)) {
2901 g_free(pidfile);
2902 if (!(pidfile = qemuProcessBuildPRHelperPidfilePathOld(vm))) {
2903 VIR_WARN("Unable to construct pr-helper pidfile path");
2904 return;
2905 }
2906 }
2907
2908 virErrorPreserveLast(&orig_err);
2909 if (virPidFileForceCleanupPath(pidfile) < 0) {
2910 VIR_WARN("Unable to kill pr-helper process");
2911 } else {
2912 priv->prDaemonRunning = false;
2913 }
2914 virErrorRestore(&orig_err);
2915 }
2916
2917
2918 static int
qemuProcessStartPRDaemonHook(void * opaque)2919 qemuProcessStartPRDaemonHook(void *opaque)
2920 {
2921 virDomainObj *vm = opaque;
2922 size_t i, nfds = 0;
2923 g_autofree int *fds = NULL;
2924 int ret = -1;
2925
2926 if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
2927 if (virProcessGetNamespaces(vm->pid, &nfds, &fds) < 0)
2928 return ret;
2929
2930 if (nfds > 0 &&
2931 virProcessSetNamespaces(nfds, fds) < 0)
2932 goto cleanup;
2933 }
2934
2935 ret = 0;
2936 cleanup:
2937 for (i = 0; i < nfds; i++)
2938 VIR_FORCE_CLOSE(fds[i]);
2939 return ret;
2940 }
2941
2942
2943 int
qemuProcessStartManagedPRDaemon(virDomainObj * vm)2944 qemuProcessStartManagedPRDaemon(virDomainObj *vm)
2945 {
2946 qemuDomainObjPrivate *priv = vm->privateData;
2947 virQEMUDriver *driver = priv->driver;
2948 g_autoptr(virQEMUDriverConfig) cfg = NULL;
2949 int errfd = -1;
2950 g_autofree char *pidfile = NULL;
2951 g_autofree char *socketPath = NULL;
2952 pid_t cpid = -1;
2953 g_autoptr(virCommand) cmd = NULL;
2954 virTimeBackOffVar timebackoff;
2955 const unsigned long long timeout = 500000; /* ms */
2956 int ret = -1;
2957
2958 cfg = virQEMUDriverGetConfig(driver);
2959
2960 if (!virFileIsExecutable(cfg->prHelperName)) {
2961 virReportSystemError(errno, _("'%s' is not a suitable pr helper"),
2962 cfg->prHelperName);
2963 goto cleanup;
2964 }
2965
2966 if (!(pidfile = qemuProcessBuildPRHelperPidfilePath(vm)))
2967 goto cleanup;
2968
2969 if (!(socketPath = qemuDomainGetManagedPRSocketPath(priv)))
2970 goto cleanup;
2971
2972 /* Remove stale socket */
2973 if (unlink(socketPath) < 0 &&
2974 errno != ENOENT) {
2975 virReportSystemError(errno,
2976 _("Unable to remove stale socket path: %s"),
2977 socketPath);
2978 goto cleanup;
2979 }
2980
2981 if (!(cmd = virCommandNewArgList(cfg->prHelperName,
2982 "-k", socketPath,
2983 NULL)))
2984 goto cleanup;
2985
2986 virCommandDaemonize(cmd);
2987 virCommandSetPidFile(cmd, pidfile);
2988 virCommandSetErrorFD(cmd, &errfd);
2989
2990 /* Place the process into the same namespace and cgroup as
2991 * qemu (so that it shares the same view of the system). */
2992 virCommandSetPreExecHook(cmd, qemuProcessStartPRDaemonHook, vm);
2993
2994 if (virCommandRun(cmd, NULL) < 0)
2995 goto cleanup;
2996
2997 if (virPidFileReadPath(pidfile, &cpid) < 0) {
2998 virReportError(VIR_ERR_INTERNAL_ERROR,
2999 _("pr helper %s didn't show up"),
3000 cfg->prHelperName);
3001 goto cleanup;
3002 }
3003
3004 if (virTimeBackOffStart(&timebackoff, 1, timeout) < 0)
3005 goto cleanup;
3006 while (virTimeBackOffWait(&timebackoff)) {
3007 char errbuf[1024] = { 0 };
3008
3009 if (virFileExists(socketPath))
3010 break;
3011
3012 if (virProcessKill(cpid, 0) == 0)
3013 continue;
3014
3015 if (saferead(errfd, errbuf, sizeof(errbuf) - 1) < 0) {
3016 virReportSystemError(errno,
3017 _("pr helper %s died unexpectedly"),
3018 cfg->prHelperName);
3019 } else {
3020 virReportError(VIR_ERR_OPERATION_FAILED,
3021 _("pr helper died and reported: %s"), errbuf);
3022 }
3023 goto cleanup;
3024 }
3025
3026 if (!virFileExists(socketPath)) {
3027 virReportError(VIR_ERR_OPERATION_TIMEOUT, "%s",
3028 _("pr helper socked did not show up"));
3029 goto cleanup;
3030 }
3031
3032 if (priv->cgroup &&
3033 virCgroupAddMachineProcess(priv->cgroup, cpid) < 0)
3034 goto cleanup;
3035
3036 if (qemuSecurityDomainSetPathLabel(driver, vm, socketPath, true) < 0)
3037 goto cleanup;
3038
3039 priv->prDaemonRunning = true;
3040 ret = 0;
3041 cleanup:
3042 if (ret < 0) {
3043 virCommandAbort(cmd);
3044 if (cpid >= 0)
3045 virProcessKillPainfully(cpid, true);
3046 if (pidfile)
3047 unlink(pidfile);
3048 }
3049 VIR_FORCE_CLOSE(errfd);
3050 return ret;
3051 }
3052
3053
3054 static int
qemuProcessInitPasswords(virQEMUDriver * driver,virDomainObj * vm,int asyncJob)3055 qemuProcessInitPasswords(virQEMUDriver *driver,
3056 virDomainObj *vm,
3057 int asyncJob)
3058 {
3059 int ret = 0;
3060 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
3061 size_t i;
3062
3063 for (i = 0; i < vm->def->ngraphics; ++i) {
3064 virDomainGraphicsDef *graphics = vm->def->graphics[i];
3065 if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
3066 ret = qemuDomainChangeGraphicsPasswords(driver, vm,
3067 VIR_DOMAIN_GRAPHICS_TYPE_VNC,
3068 &graphics->data.vnc.auth,
3069 cfg->vncPassword,
3070 asyncJob);
3071 } else if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
3072 ret = qemuDomainChangeGraphicsPasswords(driver, vm,
3073 VIR_DOMAIN_GRAPHICS_TYPE_SPICE,
3074 &graphics->data.spice.auth,
3075 cfg->spicePassword,
3076 asyncJob);
3077 }
3078
3079 if (ret < 0)
3080 return ret;
3081 }
3082
3083 return ret;
3084 }
3085
3086
3087 static int
qemuProcessPrepareChardevDevice(virDomainDef * def G_GNUC_UNUSED,virDomainChrDef * dev,void * opaque G_GNUC_UNUSED)3088 qemuProcessPrepareChardevDevice(virDomainDef *def G_GNUC_UNUSED,
3089 virDomainChrDef *dev,
3090 void *opaque G_GNUC_UNUSED)
3091 {
3092 int fd;
3093 if (dev->source->type != VIR_DOMAIN_CHR_TYPE_FILE)
3094 return 0;
3095
3096 if ((fd = open(dev->source->data.file.path,
3097 O_CREAT | O_APPEND, S_IRUSR|S_IWUSR)) < 0) {
3098 virReportSystemError(errno,
3099 _("Unable to pre-create chardev file '%s'"),
3100 dev->source->data.file.path);
3101 return -1;
3102 }
3103
3104 VIR_FORCE_CLOSE(fd);
3105
3106 return 0;
3107 }
3108
3109
3110 static int
qemuProcessCleanupChardevDevice(virDomainDef * def G_GNUC_UNUSED,virDomainChrDef * dev,void * opaque G_GNUC_UNUSED)3111 qemuProcessCleanupChardevDevice(virDomainDef *def G_GNUC_UNUSED,
3112 virDomainChrDef *dev,
3113 void *opaque G_GNUC_UNUSED)
3114 {
3115 if (dev->source->type == VIR_DOMAIN_CHR_TYPE_UNIX &&
3116 dev->source->data.nix.listen &&
3117 dev->source->data.nix.path)
3118 unlink(dev->source->data.nix.path);
3119
3120 return 0;
3121 }
3122
3123
3124 /**
3125 * Loads and update video memory size for video devices according to QEMU
3126 * process as the QEMU will silently update the values that we pass to QEMU
3127 * through command line. We need to load these updated values and store them
3128 * into the status XML.
3129 *
3130 * We will fail if for some reason the values cannot be loaded from QEMU because
3131 * its mandatory to get the correct video memory size to status XML to not break
3132 * migration.
3133 */
3134 static int
qemuProcessUpdateVideoRamSize(virQEMUDriver * driver,virDomainObj * vm,int asyncJob)3135 qemuProcessUpdateVideoRamSize(virQEMUDriver *driver,
3136 virDomainObj *vm,
3137 int asyncJob)
3138 {
3139 int ret = -1;
3140 ssize_t i;
3141 qemuDomainObjPrivate *priv = vm->privateData;
3142 virDomainVideoDef *video = NULL;
3143 g_autoptr(virQEMUDriverConfig) cfg = NULL;
3144
3145 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
3146 return -1;
3147
3148 for (i = 0; i < vm->def->nvideos; i++) {
3149 video = vm->def->videos[i];
3150
3151 switch (video->type) {
3152 case VIR_DOMAIN_VIDEO_TYPE_VGA:
3153 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VGA_VGAMEM)) {
3154 if (qemuMonitorUpdateVideoMemorySize(priv->mon, video, "VGA") < 0)
3155 goto error;
3156 }
3157 break;
3158 case VIR_DOMAIN_VIDEO_TYPE_QXL:
3159 if (i == 0) {
3160 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGAMEM) &&
3161 qemuMonitorUpdateVideoMemorySize(priv->mon, video,
3162 "qxl-vga") < 0)
3163 goto error;
3164
3165 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VRAM64) &&
3166 qemuMonitorUpdateVideoVram64Size(priv->mon, video,
3167 "qxl-vga") < 0)
3168 goto error;
3169 } else {
3170 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGAMEM) &&
3171 qemuMonitorUpdateVideoMemorySize(priv->mon, video,
3172 "qxl") < 0)
3173 goto error;
3174
3175 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VRAM64) &&
3176 qemuMonitorUpdateVideoVram64Size(priv->mon, video,
3177 "qxl") < 0)
3178 goto error;
3179 }
3180 break;
3181 case VIR_DOMAIN_VIDEO_TYPE_VMVGA:
3182 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VMWARE_SVGA_VGAMEM)) {
3183 if (qemuMonitorUpdateVideoMemorySize(priv->mon, video,
3184 "vmware-svga") < 0)
3185 goto error;
3186 }
3187 break;
3188 case VIR_DOMAIN_VIDEO_TYPE_CIRRUS:
3189 case VIR_DOMAIN_VIDEO_TYPE_XEN:
3190 case VIR_DOMAIN_VIDEO_TYPE_VBOX:
3191 case VIR_DOMAIN_VIDEO_TYPE_LAST:
3192 break;
3193 }
3194
3195 }
3196
3197 if (qemuDomainObjExitMonitor(driver, vm) < 0)
3198 return -1;
3199
3200 cfg = virQEMUDriverGetConfig(driver);
3201 ret = virDomainObjSave(vm, driver->xmlopt, cfg->stateDir);
3202
3203 return ret;
3204
3205 error:
3206 ignore_value(qemuDomainObjExitMonitor(driver, vm));
3207 return -1;
3208 }
3209
3210
3211 struct qemuProcessHookData {
3212 virDomainObj *vm;
3213 virQEMUDriver *driver;
3214 virQEMUDriverConfig *cfg;
3215 };
3216
qemuProcessHook(void * data)3217 static int qemuProcessHook(void *data)
3218 {
3219 struct qemuProcessHookData *h = data;
3220 qemuDomainObjPrivate *priv = h->vm->privateData;
3221 int ret = -1;
3222 int fd;
3223 virBitmap *nodeset = NULL;
3224 virDomainNumatuneMemMode mode;
3225
3226 /* This method cannot use any mutexes, which are not
3227 * protected across fork()
3228 */
3229
3230 qemuSecurityPostFork(h->driver->securityManager);
3231
3232 /* Some later calls want pid present */
3233 h->vm->pid = getpid();
3234
3235 VIR_DEBUG("Obtaining domain lock");
3236 /*
3237 * Since we're going to leak the returned FD to QEMU,
3238 * we need to make sure it gets a sensible label.
3239 * This mildly sucks, because there could be other
3240 * sockets the lock driver opens that we don't want
3241 * labelled. So far we're ok though.
3242 */
3243 if (qemuSecuritySetSocketLabel(h->driver->securityManager, h->vm->def) < 0)
3244 goto cleanup;
3245 if (virDomainLockProcessStart(h->driver->lockManager,
3246 h->cfg->uri,
3247 h->vm,
3248 /* QEMU is always paused initially */
3249 true,
3250 &fd) < 0)
3251 goto cleanup;
3252 if (qemuSecurityClearSocketLabel(h->driver->securityManager, h->vm->def) < 0)
3253 goto cleanup;
3254
3255 if (qemuDomainUnshareNamespace(h->cfg, h->driver->securityManager, h->vm) < 0)
3256 goto cleanup;
3257
3258 if (virDomainNumatuneGetMode(h->vm->def->numa, -1, &mode) == 0) {
3259 if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
3260 h->cfg->cgroupControllers & (1 << VIR_CGROUP_CONTROLLER_CPUSET) &&
3261 virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) {
3262 /* Use virNuma* API iff necessary. Once set and child is exec()-ed,
3263 * there's no way for us to change it. Rely on cgroups (if available
3264 * and enabled in the config) rather than virNuma*. */
3265 VIR_DEBUG("Relying on CGroups for memory binding");
3266 } else {
3267 nodeset = virDomainNumatuneGetNodeset(h->vm->def->numa,
3268 priv->autoNodeset, -1);
3269
3270 if (virNumaSetupMemoryPolicy(mode, nodeset) < 0)
3271 goto cleanup;
3272 }
3273 }
3274
3275 ret = 0;
3276
3277 cleanup:
3278 virObjectUnref(h->cfg);
3279 VIR_DEBUG("Hook complete ret=%d", ret);
3280 return ret;
3281 }
3282
3283 int
qemuProcessPrepareMonitorChr(virDomainChrSourceDef * monConfig,const char * domainDir)3284 qemuProcessPrepareMonitorChr(virDomainChrSourceDef *monConfig,
3285 const char *domainDir)
3286 {
3287 monConfig->type = VIR_DOMAIN_CHR_TYPE_UNIX;
3288 monConfig->data.nix.listen = true;
3289
3290 monConfig->data.nix.path = g_strdup_printf("%s/monitor.sock", domainDir);
3291 return 0;
3292 }
3293
3294
3295 /*
3296 * Precondition: vm must be locked, and a job must be active.
3297 * This method will call {Enter,Exit}Monitor
3298 */
3299 int
qemuProcessStartCPUs(virQEMUDriver * driver,virDomainObj * vm,virDomainRunningReason reason,qemuDomainAsyncJob asyncJob)3300 qemuProcessStartCPUs(virQEMUDriver *driver, virDomainObj *vm,
3301 virDomainRunningReason reason,
3302 qemuDomainAsyncJob asyncJob)
3303 {
3304 int ret = -1;
3305 qemuDomainObjPrivate *priv = vm->privateData;
3306 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
3307
3308 /* Bring up netdevs before starting CPUs */
3309 if (qemuInterfaceStartDevices(vm->def) < 0)
3310 return -1;
3311
3312 VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState));
3313 if (virDomainLockProcessResume(driver->lockManager, cfg->uri,
3314 vm, priv->lockState) < 0) {
3315 /* Don't free priv->lockState on error, because we need
3316 * to make sure we have state still present if the user
3317 * tries to resume again
3318 */
3319 return -1;
3320 }
3321 VIR_FREE(priv->lockState);
3322
3323 priv->runningReason = reason;
3324
3325 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
3326 goto release;
3327
3328 ret = qemuMonitorStartCPUs(priv->mon);
3329 if (qemuDomainObjExitMonitor(driver, vm) < 0)
3330 ret = -1;
3331
3332 if (ret < 0)
3333 goto release;
3334
3335 /* The RESUME event handler will change the domain state with the reason
3336 * saved in priv->runningReason and it will also emit corresponding domain
3337 * lifecycle event.
3338 */
3339
3340 return ret;
3341
3342 release:
3343 priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
3344 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
3345 VIR_WARN("Unable to release lease on %s", vm->def->name);
3346 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
3347 return ret;
3348 }
3349
3350
qemuProcessStopCPUs(virQEMUDriver * driver,virDomainObj * vm,virDomainPausedReason reason,qemuDomainAsyncJob asyncJob)3351 int qemuProcessStopCPUs(virQEMUDriver *driver,
3352 virDomainObj *vm,
3353 virDomainPausedReason reason,
3354 qemuDomainAsyncJob asyncJob)
3355 {
3356 int ret = -1;
3357 qemuDomainObjPrivate *priv = vm->privateData;
3358
3359 VIR_FREE(priv->lockState);
3360
3361 priv->pausedReason = reason;
3362
3363 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
3364 goto cleanup;
3365
3366 ret = qemuMonitorStopCPUs(priv->mon);
3367 if (qemuDomainObjExitMonitor(driver, vm) < 0)
3368 ret = -1;
3369
3370 if (ret < 0)
3371 goto cleanup;
3372
3373 /* de-activate netdevs after stopping CPUs */
3374 ignore_value(qemuInterfaceStopDevices(vm->def));
3375
3376 if (priv->job.current)
3377 ignore_value(virTimeMillisNow(&priv->job.current->stopped));
3378
3379 /* The STOP event handler will change the domain state with the reason
3380 * saved in priv->pausedReason and it will also emit corresponding domain
3381 * lifecycle event.
3382 */
3383
3384 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
3385 VIR_WARN("Unable to release lease on %s", vm->def->name);
3386 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
3387
3388 cleanup:
3389 if (ret < 0)
3390 priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN;
3391
3392 return ret;
3393 }
3394
3395
3396
3397 static void
qemuProcessNotifyNets(virDomainDef * def)3398 qemuProcessNotifyNets(virDomainDef *def)
3399 {
3400 size_t i;
3401 g_autoptr(virConnect) conn = NULL;
3402
3403 for (i = 0; i < def->nnets; i++) {
3404 virDomainNetDef *net = def->nets[i];
3405 /* keep others from trying to use the macvtap device name, but
3406 * don't return error if this happens, since that causes the
3407 * domain to be unceremoniously killed, which would be *very*
3408 * impolite.
3409 */
3410 switch (virDomainNetGetActualType(net)) {
3411 case VIR_DOMAIN_NET_TYPE_DIRECT:
3412 virNetDevReserveName(net->ifname);
3413 break;
3414 case VIR_DOMAIN_NET_TYPE_BRIDGE:
3415 case VIR_DOMAIN_NET_TYPE_NETWORK:
3416 case VIR_DOMAIN_NET_TYPE_ETHERNET:
3417 virNetDevReserveName(net->ifname);
3418 break;
3419 case VIR_DOMAIN_NET_TYPE_USER:
3420 case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
3421 case VIR_DOMAIN_NET_TYPE_SERVER:
3422 case VIR_DOMAIN_NET_TYPE_CLIENT:
3423 case VIR_DOMAIN_NET_TYPE_MCAST:
3424 case VIR_DOMAIN_NET_TYPE_INTERNAL:
3425 case VIR_DOMAIN_NET_TYPE_HOSTDEV:
3426 case VIR_DOMAIN_NET_TYPE_UDP:
3427 case VIR_DOMAIN_NET_TYPE_VDPA:
3428 case VIR_DOMAIN_NET_TYPE_LAST:
3429 break;
3430 }
3431
3432 if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK && !conn)
3433 conn = virGetConnectNetwork();
3434
3435 virDomainNetNotifyActualDevice(conn, def, net);
3436 }
3437 }
3438
3439 /* Attempt to instantiate the filters. Ignore failures because it's
3440 * possible that someone deleted a filter binding and the associated
3441 * filter while the guest was running and we don't want that action
3442 * to cause failure to keep the guest running during the reconnection
3443 * processing. Nor do we necessarily want other failures to do the
3444 * same. We'll just log the error conditions other than of course
3445 * ignoreExists possibility (e.g. the true flag) */
3446 static void
qemuProcessFiltersInstantiate(virDomainDef * def)3447 qemuProcessFiltersInstantiate(virDomainDef *def)
3448 {
3449 size_t i;
3450
3451 for (i = 0; i < def->nnets; i++) {
3452 virDomainNetDef *net = def->nets[i];
3453 if ((net->filter) && (net->ifname)) {
3454 if (virDomainConfNWFilterInstantiate(def->name, def->uuid, net,
3455 true) < 0) {
3456 VIR_WARN("filter '%s' instantiation for '%s' failed '%s'",
3457 net->filter, net->ifname, virGetLastErrorMessage());
3458 virResetLastError();
3459 }
3460 }
3461 }
3462 }
3463
3464 static int
qemuProcessUpdateState(virQEMUDriver * driver,virDomainObj * vm)3465 qemuProcessUpdateState(virQEMUDriver *driver, virDomainObj *vm)
3466 {
3467 qemuDomainObjPrivate *priv = vm->privateData;
3468 virDomainState state;
3469 virDomainPausedReason reason;
3470 virDomainState newState = VIR_DOMAIN_NOSTATE;
3471 int oldReason;
3472 int newReason;
3473 bool running;
3474 g_autofree char *msg = NULL;
3475 int ret;
3476
3477 qemuDomainObjEnterMonitor(driver, vm);
3478 ret = qemuMonitorGetStatus(priv->mon, &running, &reason);
3479 if (qemuDomainObjExitMonitor(driver, vm) < 0)
3480 return -1;
3481
3482 if (ret < 0)
3483 return -1;
3484
3485 state = virDomainObjGetState(vm, &oldReason);
3486
3487 if (running &&
3488 (state == VIR_DOMAIN_SHUTOFF ||
3489 (state == VIR_DOMAIN_PAUSED &&
3490 oldReason == VIR_DOMAIN_PAUSED_STARTING_UP))) {
3491 newState = VIR_DOMAIN_RUNNING;
3492 newReason = VIR_DOMAIN_RUNNING_BOOTED;
3493 msg = g_strdup("finished booting");
3494 } else if (state == VIR_DOMAIN_PAUSED && running) {
3495 newState = VIR_DOMAIN_RUNNING;
3496 newReason = VIR_DOMAIN_RUNNING_UNPAUSED;
3497 msg = g_strdup("was unpaused");
3498 } else if (state == VIR_DOMAIN_RUNNING && !running) {
3499 if (reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN) {
3500 newState = VIR_DOMAIN_SHUTDOWN;
3501 newReason = VIR_DOMAIN_SHUTDOWN_UNKNOWN;
3502 msg = g_strdup("shutdown");
3503 } else if (reason == VIR_DOMAIN_PAUSED_CRASHED) {
3504 newState = VIR_DOMAIN_CRASHED;
3505 newReason = VIR_DOMAIN_CRASHED_PANICKED;
3506 msg = g_strdup("crashed");
3507 } else {
3508 newState = VIR_DOMAIN_PAUSED;
3509 newReason = reason;
3510 msg = g_strdup_printf("was paused (%s)",
3511 virDomainPausedReasonTypeToString(reason));
3512 }
3513 }
3514
3515 if (newState != VIR_DOMAIN_NOSTATE) {
3516 VIR_DEBUG("Domain %s %s while its monitor was disconnected;"
3517 " changing state to %s (%s)",
3518 vm->def->name,
3519 NULLSTR(msg),
3520 virDomainStateTypeToString(newState),
3521 virDomainStateReasonToString(newState, newReason));
3522 virDomainObjSetState(vm, newState, newReason);
3523 }
3524
3525 return 0;
3526 }
3527
3528 static int
qemuProcessRecoverMigrationIn(virQEMUDriver * driver,virDomainObj * vm,const qemuDomainJobObj * job,virDomainState state,int reason)3529 qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
3530 virDomainObj *vm,
3531 const qemuDomainJobObj *job,
3532 virDomainState state,
3533 int reason)
3534 {
3535
3536 qemuDomainJobPrivate *jobPriv = job->privateData;
3537 bool postcopy = (state == VIR_DOMAIN_PAUSED &&
3538 reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED) ||
3539 (state == VIR_DOMAIN_RUNNING &&
3540 reason == VIR_DOMAIN_RUNNING_POSTCOPY);
3541
3542 switch ((qemuMigrationJobPhase) job->phase) {
3543 case QEMU_MIGRATION_PHASE_NONE:
3544 case QEMU_MIGRATION_PHASE_PERFORM2:
3545 case QEMU_MIGRATION_PHASE_BEGIN3:
3546 case QEMU_MIGRATION_PHASE_PERFORM3:
3547 case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
3548 case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
3549 case QEMU_MIGRATION_PHASE_CONFIRM3:
3550 case QEMU_MIGRATION_PHASE_LAST:
3551 /* N/A for incoming migration */
3552 break;
3553
3554 case QEMU_MIGRATION_PHASE_PREPARE:
3555 VIR_DEBUG("Killing unfinished incoming migration for domain %s",
3556 vm->def->name);
3557 return -1;
3558
3559 case QEMU_MIGRATION_PHASE_FINISH2:
3560 /* source domain is already killed so let's just resume the domain
3561 * and hope we are all set */
3562 VIR_DEBUG("Incoming migration finished, resuming domain %s",
3563 vm->def->name);
3564 if (qemuProcessStartCPUs(driver, vm,
3565 VIR_DOMAIN_RUNNING_MIGRATED,
3566 QEMU_ASYNC_JOB_NONE) < 0) {
3567 VIR_WARN("Could not resume domain %s", vm->def->name);
3568 }
3569 break;
3570
3571 case QEMU_MIGRATION_PHASE_FINISH3:
3572 /* migration finished, we started resuming the domain but didn't
3573 * confirm success or failure yet; killing it seems safest unless
3574 * we already started guest CPUs or we were in post-copy mode */
3575 if (postcopy) {
3576 qemuMigrationAnyPostcopyFailed(driver, vm);
3577 } else if (state != VIR_DOMAIN_RUNNING) {
3578 VIR_DEBUG("Killing migrated domain %s", vm->def->name);
3579 return -1;
3580 }
3581 break;
3582 }
3583
3584 qemuMigrationParamsReset(driver, vm, QEMU_ASYNC_JOB_NONE,
3585 jobPriv->migParams, job->apiFlags);
3586 return 0;
3587 }
3588
3589 static int
qemuProcessRecoverMigrationOut(virQEMUDriver * driver,virDomainObj * vm,const qemuDomainJobObj * job,virDomainState state,int reason,unsigned int * stopFlags)3590 qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
3591 virDomainObj *vm,
3592 const qemuDomainJobObj *job,
3593 virDomainState state,
3594 int reason,
3595 unsigned int *stopFlags)
3596 {
3597 qemuDomainJobPrivate *jobPriv = job->privateData;
3598 bool postcopy = state == VIR_DOMAIN_PAUSED &&
3599 (reason == VIR_DOMAIN_PAUSED_POSTCOPY ||
3600 reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
3601 bool resume = false;
3602
3603 switch ((qemuMigrationJobPhase) job->phase) {
3604 case QEMU_MIGRATION_PHASE_NONE:
3605 case QEMU_MIGRATION_PHASE_PREPARE:
3606 case QEMU_MIGRATION_PHASE_FINISH2:
3607 case QEMU_MIGRATION_PHASE_FINISH3:
3608 case QEMU_MIGRATION_PHASE_LAST:
3609 /* N/A for outgoing migration */
3610 break;
3611
3612 case QEMU_MIGRATION_PHASE_BEGIN3:
3613 /* nothing happened so far, just forget we were about to migrate the
3614 * domain */
3615 break;
3616
3617 case QEMU_MIGRATION_PHASE_PERFORM2:
3618 case QEMU_MIGRATION_PHASE_PERFORM3:
3619 /* migration is still in progress, let's cancel it and resume the
3620 * domain; however we can only do that before migration enters
3621 * post-copy mode
3622 */
3623 if (postcopy) {
3624 qemuMigrationAnyPostcopyFailed(driver, vm);
3625 } else {
3626 VIR_DEBUG("Cancelling unfinished migration of domain %s",
3627 vm->def->name);
3628 if (qemuMigrationSrcCancel(driver, vm) < 0) {
3629 VIR_WARN("Could not cancel ongoing migration of domain %s",
3630 vm->def->name);
3631 }
3632 resume = true;
3633 }
3634 break;
3635
3636 case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
3637 /* migration finished but we didn't have a chance to get the result
3638 * of Finish3 step; third party needs to check what to do next; in
3639 * post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this
3640 */
3641 if (postcopy)
3642 qemuMigrationAnyPostcopyFailed(driver, vm);
3643 break;
3644
3645 case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
3646 /* Finish3 failed, we need to resume the domain, but once we enter
3647 * post-copy mode there's no way back, so let's just mark the domain
3648 * as broken in that case
3649 */
3650 if (postcopy) {
3651 qemuMigrationAnyPostcopyFailed(driver, vm);
3652 } else {
3653 VIR_DEBUG("Resuming domain %s after failed migration",
3654 vm->def->name);
3655 resume = true;
3656 }
3657 break;
3658
3659 case QEMU_MIGRATION_PHASE_CONFIRM3:
3660 /* migration completed, we need to kill the domain here */
3661 *stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
3662 return -1;
3663 }
3664
3665 if (resume) {
3666 /* resume the domain but only if it was paused as a result of
3667 * migration
3668 */
3669 if (state == VIR_DOMAIN_PAUSED &&
3670 (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
3671 reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
3672 if (qemuProcessStartCPUs(driver, vm,
3673 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
3674 QEMU_ASYNC_JOB_NONE) < 0) {
3675 VIR_WARN("Could not resume domain %s", vm->def->name);
3676 }
3677 }
3678 }
3679
3680 qemuMigrationParamsReset(driver, vm, QEMU_ASYNC_JOB_NONE,
3681 jobPriv->migParams, job->apiFlags);
3682 return 0;
3683 }
3684
3685 static int
qemuProcessRecoverJob(virQEMUDriver * driver,virDomainObj * vm,const qemuDomainJobObj * job,unsigned int * stopFlags)3686 qemuProcessRecoverJob(virQEMUDriver *driver,
3687 virDomainObj *vm,
3688 const qemuDomainJobObj *job,
3689 unsigned int *stopFlags)
3690 {
3691 qemuDomainObjPrivate *priv = vm->privateData;
3692 virDomainState state;
3693 int reason;
3694 unsigned long long now;
3695
3696 state = virDomainObjGetState(vm, &reason);
3697
3698 switch (job->asyncJob) {
3699 case QEMU_ASYNC_JOB_MIGRATION_OUT:
3700 if (qemuProcessRecoverMigrationOut(driver, vm, job,
3701 state, reason, stopFlags) < 0)
3702 return -1;
3703 break;
3704
3705 case QEMU_ASYNC_JOB_MIGRATION_IN:
3706 if (qemuProcessRecoverMigrationIn(driver, vm, job,
3707 state, reason) < 0)
3708 return -1;
3709 break;
3710
3711 case QEMU_ASYNC_JOB_SAVE:
3712 case QEMU_ASYNC_JOB_DUMP:
3713 case QEMU_ASYNC_JOB_SNAPSHOT:
3714 qemuDomainObjEnterMonitor(driver, vm);
3715 ignore_value(qemuMonitorMigrateCancel(priv->mon));
3716 if (qemuDomainObjExitMonitor(driver, vm) < 0)
3717 return -1;
3718 /* resume the domain but only if it was paused as a result of
3719 * running a migration-to-file operation. Although we are
3720 * recovering an async job, this function is run at startup
3721 * and must resume things using sync monitor connections. */
3722 if (state == VIR_DOMAIN_PAUSED &&
3723 ((job->asyncJob == QEMU_ASYNC_JOB_DUMP &&
3724 reason == VIR_DOMAIN_PAUSED_DUMP) ||
3725 (job->asyncJob == QEMU_ASYNC_JOB_SAVE &&
3726 reason == VIR_DOMAIN_PAUSED_SAVE) ||
3727 (job->asyncJob == QEMU_ASYNC_JOB_SNAPSHOT &&
3728 (reason == VIR_DOMAIN_PAUSED_SNAPSHOT ||
3729 reason == VIR_DOMAIN_PAUSED_MIGRATION)) ||
3730 reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
3731 if (qemuProcessStartCPUs(driver, vm,
3732 VIR_DOMAIN_RUNNING_SAVE_CANCELED,
3733 QEMU_ASYNC_JOB_NONE) < 0) {
3734 VIR_WARN("Could not resume domain '%s' after migration to file",
3735 vm->def->name);
3736 }
3737 }
3738 break;
3739
3740 case QEMU_ASYNC_JOB_START:
3741 /* Already handled in VIR_DOMAIN_PAUSED_STARTING_UP check. */
3742 break;
3743
3744 case QEMU_ASYNC_JOB_BACKUP:
3745 ignore_value(virTimeMillisNow(&now));
3746
3747 /* Restore the config of the async job which is not persisted */
3748 priv->jobs_queued++;
3749 priv->job.asyncJob = QEMU_ASYNC_JOB_BACKUP;
3750 priv->job.asyncOwnerAPI = g_strdup(virThreadJobGet());
3751 priv->job.asyncStarted = now;
3752
3753 qemuDomainObjSetAsyncJobMask(vm, (QEMU_JOB_DEFAULT_MASK |
3754 JOB_MASK(QEMU_JOB_SUSPEND) |
3755 JOB_MASK(QEMU_JOB_MODIFY)));
3756
3757 /* We reset the job parameters for backup so that the job will look
3758 * active. This is possible because we are able to recover the state
3759 * of blockjobs and also the backup job allows all sub-job types */
3760 priv->job.current = g_new0(qemuDomainJobInfo, 1);
3761 priv->job.current->operation = VIR_DOMAIN_JOB_OPERATION_BACKUP;
3762 priv->job.current->statsType = QEMU_DOMAIN_JOB_STATS_TYPE_BACKUP;
3763 priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE;
3764 priv->job.current->started = now;
3765 break;
3766
3767 case QEMU_ASYNC_JOB_NONE:
3768 case QEMU_ASYNC_JOB_LAST:
3769 break;
3770 }
3771
3772 if (!virDomainObjIsActive(vm))
3773 return -1;
3774
3775 /* In case any special handling is added for job type that has been ignored
3776 * before, QEMU_DOMAIN_TRACK_JOBS (from qemu_domain.h) needs to be updated
3777 * for the job to be properly tracked in domain state XML.
3778 */
3779 switch (job->active) {
3780 case QEMU_JOB_QUERY:
3781 /* harmless */
3782 break;
3783
3784 case QEMU_JOB_DESTROY:
3785 VIR_DEBUG("Domain %s should have already been destroyed",
3786 vm->def->name);
3787 return -1;
3788
3789 case QEMU_JOB_SUSPEND:
3790 /* mostly harmless */
3791 break;
3792
3793 case QEMU_JOB_MODIFY:
3794 /* XXX depending on the command we may be in an inconsistent state and
3795 * we should probably fall back to "monitor error" state and refuse to
3796 */
3797 break;
3798
3799 case QEMU_JOB_MIGRATION_OP:
3800 case QEMU_JOB_ABORT:
3801 case QEMU_JOB_ASYNC:
3802 case QEMU_JOB_ASYNC_NESTED:
3803 /* async job was already handled above */
3804 case QEMU_JOB_NONE:
3805 case QEMU_JOB_LAST:
3806 break;
3807 }
3808
3809 return 0;
3810 }
3811
3812 static int
qemuProcessUpdateDevices(virQEMUDriver * driver,virDomainObj * vm)3813 qemuProcessUpdateDevices(virQEMUDriver *driver,
3814 virDomainObj *vm)
3815 {
3816 qemuDomainObjPrivate *priv = vm->privateData;
3817 virDomainDeviceDef dev;
3818 g_auto(GStrv) old = g_steal_pointer(&priv->qemuDevices);
3819 GStrv tmp;
3820
3821 if (qemuDomainUpdateDeviceList(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
3822 return -1;
3823
3824 if (!old)
3825 return 0;
3826
3827 for (tmp = old; *tmp; tmp++) {
3828 if (!g_strv_contains((const char **) priv->qemuDevices, *tmp) &&
3829 virDomainDefFindDevice(vm->def, *tmp, &dev, false) == 0 &&
3830 qemuDomainRemoveDevice(driver, vm, &dev))
3831 return -1;
3832 }
3833
3834 return 0;
3835 }
3836
3837 static int
qemuDomainPerfRestart(virDomainObj * vm)3838 qemuDomainPerfRestart(virDomainObj *vm)
3839 {
3840 size_t i;
3841 virDomainDef *def = vm->def;
3842 qemuDomainObjPrivate *priv = vm->privateData;
3843
3844 if (!(priv->perf = virPerfNew()))
3845 return -1;
3846
3847 for (i = 0; i < VIR_PERF_EVENT_LAST; i++) {
3848 if (def->perf.events[i] &&
3849 def->perf.events[i] == VIR_TRISTATE_BOOL_YES) {
3850
3851 /* Failure to re-enable the perf event should not be fatal */
3852 if (virPerfEventEnable(priv->perf, i, vm->pid) < 0)
3853 def->perf.events[i] = VIR_TRISTATE_BOOL_NO;
3854 }
3855 }
3856
3857 return 0;
3858 }
3859
3860
3861 static void
qemuProcessReconnectCheckMemAliasOrderMismatch(virDomainObj * vm)3862 qemuProcessReconnectCheckMemAliasOrderMismatch(virDomainObj *vm)
3863 {
3864 size_t i;
3865 int aliasidx;
3866 virDomainDef *def = vm->def;
3867 qemuDomainObjPrivate *priv = vm->privateData;
3868
3869 if (!virDomainDefHasMemoryHotplug(def) || def->nmems == 0)
3870 return;
3871
3872 for (i = 0; i < def->nmems; i++) {
3873 aliasidx = qemuDomainDeviceAliasIndex(&def->mems[i]->info, "dimm");
3874
3875 if (def->mems[i]->info.addr.dimm.slot != aliasidx) {
3876 priv->memAliasOrderMismatch = true;
3877 break;
3878 }
3879 }
3880 }
3881
3882
3883 static bool
qemuProcessDomainMemoryDefNeedHugepagesPath(const virDomainMemoryDef * mem,const long system_pagesize)3884 qemuProcessDomainMemoryDefNeedHugepagesPath(const virDomainMemoryDef *mem,
3885 const long system_pagesize)
3886 {
3887 switch (mem->model) {
3888 case VIR_DOMAIN_MEMORY_MODEL_DIMM:
3889 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
3890 return mem->pagesize && mem->pagesize != system_pagesize;
3891
3892 case VIR_DOMAIN_MEMORY_MODEL_NONE:
3893 case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
3894 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
3895 case VIR_DOMAIN_MEMORY_MODEL_LAST:
3896 /* None of these can be backed by hugepages. */
3897 return false;
3898 }
3899
3900 return false;
3901 }
3902
3903
3904 static bool
qemuProcessNeedHugepagesPath(virDomainDef * def,virDomainMemoryDef * mem)3905 qemuProcessNeedHugepagesPath(virDomainDef *def,
3906 virDomainMemoryDef *mem)
3907 {
3908 const long system_pagesize = virGetSystemPageSizeKB();
3909 size_t i;
3910
3911 if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE)
3912 return true;
3913
3914 for (i = 0; i < def->mem.nhugepages; i++) {
3915 if (def->mem.hugepages[i].size != system_pagesize)
3916 return true;
3917 }
3918
3919 for (i = 0; i < def->nmems; i++) {
3920 if (qemuProcessDomainMemoryDefNeedHugepagesPath(def->mems[i], system_pagesize))
3921 return true;
3922 }
3923
3924 if (mem &&
3925 qemuProcessDomainMemoryDefNeedHugepagesPath(mem, system_pagesize))
3926 return true;
3927
3928 return false;
3929 }
3930
3931
3932 static bool
qemuProcessNeedMemoryBackingPath(virDomainDef * def,virDomainMemoryDef * mem)3933 qemuProcessNeedMemoryBackingPath(virDomainDef *def,
3934 virDomainMemoryDef *mem)
3935 {
3936 size_t i;
3937 size_t numaNodes;
3938
3939 if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE ||
3940 def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
3941 return true;
3942
3943 numaNodes = virDomainNumaGetNodeCount(def->numa);
3944 for (i = 0; i < numaNodes; i++) {
3945 if (virDomainNumaGetNodeMemoryAccessMode(def->numa, i)
3946 != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
3947 return true;
3948 }
3949
3950 for (i = 0; i < def->nmems; i++) {
3951 if (def->mems[i]->access != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
3952 return true;
3953 }
3954
3955 if (mem) {
3956 switch (mem->model) {
3957 case VIR_DOMAIN_MEMORY_MODEL_DIMM:
3958 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
3959 if (mem->access != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT) {
3960 /* No need to check for access mode on the target node,
3961 * it was checked for in the previous loop. */
3962 return true;
3963 }
3964 break;
3965
3966 case VIR_DOMAIN_MEMORY_MODEL_NONE:
3967 case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
3968 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
3969 case VIR_DOMAIN_MEMORY_MODEL_LAST:
3970 /* Backed by user provided path. Not stored in memory
3971 * backing dir anyway. */
3972 break;
3973 }
3974 }
3975
3976 return false;
3977 }
3978
3979
3980 static int
qemuProcessBuildDestroyMemoryPathsImpl(virQEMUDriver * driver,virDomainObj * vm,const char * path,bool build)3981 qemuProcessBuildDestroyMemoryPathsImpl(virQEMUDriver *driver,
3982 virDomainObj *vm,
3983 const char *path,
3984 bool build)
3985 {
3986 if (build) {
3987 if (virFileExists(path))
3988 return 0;
3989
3990 if (g_mkdir_with_parents(path, 0700) < 0) {
3991 virReportSystemError(errno,
3992 _("Unable to create %s"),
3993 path);
3994 return -1;
3995 }
3996
3997 if (qemuSecurityDomainSetPathLabel(driver, vm, path, true) < 0)
3998 return -1;
3999 } else {
4000 if (virFileDeleteTree(path) < 0)
4001 return -1;
4002 }
4003
4004 return 0;
4005 }
4006
4007
4008 int
qemuProcessBuildDestroyMemoryPaths(virQEMUDriver * driver,virDomainObj * vm,virDomainMemoryDef * mem,bool build)4009 qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver,
4010 virDomainObj *vm,
4011 virDomainMemoryDef *mem,
4012 bool build)
4013 {
4014
4015 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4016 size_t i;
4017 bool shouldBuildHP = false;
4018 bool shouldBuildMB = false;
4019
4020 if (build) {
4021 shouldBuildHP = qemuProcessNeedHugepagesPath(vm->def, mem);
4022 shouldBuildMB = qemuProcessNeedMemoryBackingPath(vm->def, mem);
4023 }
4024
4025 if (!build || shouldBuildHP) {
4026 for (i = 0; i < cfg->nhugetlbfs; i++) {
4027 g_autofree char *path = NULL;
4028 path = qemuGetDomainHugepagePath(driver, vm->def, &cfg->hugetlbfs[i]);
4029
4030 if (!path)
4031 return -1;
4032
4033 if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm,
4034 path, build) < 0)
4035 return -1;
4036 }
4037 }
4038
4039 if (!build || shouldBuildMB) {
4040 g_autofree char *path = NULL;
4041 if (qemuGetMemoryBackingDomainPath(driver, vm->def, &path) < 0)
4042 return -1;
4043
4044 if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm,
4045 path, build) < 0)
4046 return -1;
4047 }
4048
4049 return 0;
4050 }
4051
4052
4053 int
qemuProcessDestroyMemoryBackingPath(virQEMUDriver * driver,virDomainObj * vm,virDomainMemoryDef * mem)4054 qemuProcessDestroyMemoryBackingPath(virQEMUDriver *driver,
4055 virDomainObj *vm,
4056 virDomainMemoryDef *mem)
4057 {
4058 g_autofree char *path = NULL;
4059
4060 if (qemuGetMemoryBackingPath(driver, vm->def, mem->info.alias, &path) < 0)
4061 return -1;
4062
4063 if (unlink(path) < 0 &&
4064 errno != ENOENT) {
4065 virReportSystemError(errno, _("Unable to remove %s"), path);
4066 return -1;
4067 }
4068
4069 return 0;
4070 }
4071
4072
4073 static int
qemuProcessVNCAllocatePorts(virQEMUDriver * driver,virDomainGraphicsDef * graphics,bool allocate)4074 qemuProcessVNCAllocatePorts(virQEMUDriver *driver,
4075 virDomainGraphicsDef *graphics,
4076 bool allocate)
4077 {
4078 unsigned short port;
4079
4080 if (!allocate) {
4081 if (graphics->data.vnc.autoport)
4082 graphics->data.vnc.port = 5900;
4083
4084 return 0;
4085 }
4086
4087 if (graphics->data.vnc.autoport) {
4088 if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
4089 return -1;
4090 graphics->data.vnc.port = port;
4091 }
4092
4093 if (graphics->data.vnc.websocket == -1) {
4094 if (virPortAllocatorAcquire(driver->webSocketPorts, &port) < 0)
4095 return -1;
4096 graphics->data.vnc.websocket = port;
4097 graphics->data.vnc.websocketGenerated = true;
4098 }
4099
4100 return 0;
4101 }
4102
4103 static int
qemuProcessSPICEAllocatePorts(virQEMUDriver * driver,virDomainGraphicsDef * graphics,bool allocate)4104 qemuProcessSPICEAllocatePorts(virQEMUDriver *driver,
4105 virDomainGraphicsDef *graphics,
4106 bool allocate)
4107 {
4108 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4109 unsigned short port = 0;
4110 unsigned short tlsPort;
4111 size_t i;
4112 int defaultMode = graphics->data.spice.defaultMode;
4113
4114 bool needTLSPort = false;
4115 bool needPort = false;
4116
4117 if (graphics->data.spice.autoport) {
4118 /* check if tlsPort or port need allocation */
4119 for (i = 0; i < VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_LAST; i++) {
4120 switch (graphics->data.spice.channels[i]) {
4121 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
4122 needTLSPort = true;
4123 break;
4124
4125 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
4126 needPort = true;
4127 break;
4128
4129 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
4130 /* default mode will be used */
4131 break;
4132 }
4133 }
4134 switch (defaultMode) {
4135 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
4136 needTLSPort = true;
4137 break;
4138
4139 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
4140 needPort = true;
4141 break;
4142
4143 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
4144 if (cfg->spiceTLS)
4145 needTLSPort = true;
4146 needPort = true;
4147 break;
4148 }
4149 }
4150
4151 if (!allocate) {
4152 if (needPort || graphics->data.spice.port == -1)
4153 graphics->data.spice.port = 5901;
4154
4155 if (needTLSPort || graphics->data.spice.tlsPort == -1)
4156 graphics->data.spice.tlsPort = 5902;
4157
4158 return 0;
4159 }
4160
4161 if (needPort || graphics->data.spice.port == -1) {
4162 if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
4163 return -1;
4164
4165 graphics->data.spice.port = port;
4166
4167 if (!graphics->data.spice.autoport)
4168 graphics->data.spice.portReserved = true;
4169 }
4170
4171 if (needTLSPort || graphics->data.spice.tlsPort == -1) {
4172 if (!cfg->spiceTLS) {
4173 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4174 _("Auto allocation of spice TLS port requested "
4175 "but spice TLS is disabled in qemu.conf"));
4176 return -1;
4177 }
4178
4179 if (virPortAllocatorAcquire(driver->remotePorts, &tlsPort) < 0)
4180 return -1;
4181
4182 graphics->data.spice.tlsPort = tlsPort;
4183
4184 if (!graphics->data.spice.autoport)
4185 graphics->data.spice.tlsPortReserved = true;
4186 }
4187
4188 return 0;
4189 }
4190
4191
4192 static int
qemuProcessVerifyHypervFeatures(virDomainDef * def,virCPUData * cpu)4193 qemuProcessVerifyHypervFeatures(virDomainDef *def,
4194 virCPUData *cpu)
4195 {
4196 size_t i;
4197 int rc;
4198
4199 for (i = 0; i < VIR_DOMAIN_HYPERV_LAST; i++) {
4200 g_autofree char *cpuFeature = NULL;
4201
4202 /* always supported string property */
4203 if (i == VIR_DOMAIN_HYPERV_VENDOR_ID ||
4204 i == VIR_DOMAIN_HYPERV_SPINLOCKS)
4205 continue;
4206
4207 if (def->hyperv_features[i] != VIR_TRISTATE_SWITCH_ON)
4208 continue;
4209
4210 cpuFeature = g_strdup_printf("hv-%s", virDomainHypervTypeToString(i));
4211
4212 rc = virCPUDataCheckFeature(cpu, cpuFeature);
4213
4214 if (rc < 0) {
4215 return -1;
4216 } else if (rc == 1) {
4217 if (i == VIR_DOMAIN_HYPERV_STIMER) {
4218 if (def->hyperv_stimer_direct != VIR_TRISTATE_SWITCH_ON)
4219 continue;
4220
4221 rc = virCPUDataCheckFeature(cpu, VIR_CPU_x86_HV_STIMER_DIRECT);
4222 if (rc < 0)
4223 return -1;
4224 else if (rc == 1)
4225 continue;
4226
4227 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
4228 _("host doesn't support hyperv stimer '%s' feature"),
4229 "direct");
4230 return -1;
4231 }
4232 continue;
4233 }
4234
4235 switch ((virDomainHyperv) i) {
4236 case VIR_DOMAIN_HYPERV_RELAXED:
4237 case VIR_DOMAIN_HYPERV_VAPIC:
4238 VIR_WARN("host doesn't support hyperv '%s' feature",
4239 virDomainHypervTypeToString(i));
4240 break;
4241
4242 case VIR_DOMAIN_HYPERV_VPINDEX:
4243 case VIR_DOMAIN_HYPERV_RUNTIME:
4244 case VIR_DOMAIN_HYPERV_SYNIC:
4245 case VIR_DOMAIN_HYPERV_STIMER:
4246 case VIR_DOMAIN_HYPERV_RESET:
4247 case VIR_DOMAIN_HYPERV_FREQUENCIES:
4248 case VIR_DOMAIN_HYPERV_REENLIGHTENMENT:
4249 case VIR_DOMAIN_HYPERV_TLBFLUSH:
4250 case VIR_DOMAIN_HYPERV_IPI:
4251 case VIR_DOMAIN_HYPERV_EVMCS:
4252 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
4253 _("host doesn't support hyperv '%s' feature"),
4254 virDomainHypervTypeToString(i));
4255 return -1;
4256
4257 case VIR_DOMAIN_HYPERV_SPINLOCKS:
4258 case VIR_DOMAIN_HYPERV_VENDOR_ID:
4259 case VIR_DOMAIN_HYPERV_LAST:
4260 break;
4261 }
4262 }
4263
4264 return 0;
4265 }
4266
4267
4268 static int
qemuProcessVerifyKVMFeatures(virDomainDef * def,virCPUData * cpu)4269 qemuProcessVerifyKVMFeatures(virDomainDef *def,
4270 virCPUData *cpu)
4271 {
4272 int rc = 0;
4273
4274 if (def->features[VIR_DOMAIN_FEATURE_PVSPINLOCK] != VIR_TRISTATE_SWITCH_ON)
4275 return 0;
4276
4277 rc = virCPUDataCheckFeature(cpu, VIR_CPU_x86_KVM_PV_UNHALT);
4278
4279 if (rc <= 0) {
4280 if (rc == 0)
4281 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4282 _("host doesn't support paravirtual spinlocks"));
4283 return -1;
4284 }
4285
4286 return 0;
4287 }
4288
4289
4290 static int
qemuProcessVerifyCPUFeatures(virDomainDef * def,virCPUData * cpu)4291 qemuProcessVerifyCPUFeatures(virDomainDef *def,
4292 virCPUData *cpu)
4293 {
4294 int rc;
4295
4296 rc = virCPUCheckFeature(def->os.arch, def->cpu, "invtsc");
4297
4298 if (rc < 0) {
4299 return -1;
4300 } else if (rc == 1) {
4301 rc = virCPUDataCheckFeature(cpu, "invtsc");
4302 if (rc <= 0) {
4303 if (rc == 0) {
4304 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4305 _("host doesn't support invariant TSC"));
4306 }
4307 return -1;
4308 }
4309 }
4310
4311 return 0;
4312 }
4313
4314
4315 static const char *
qemuProcessTranslateCPUFeatures(const char * name,void * opaque)4316 qemuProcessTranslateCPUFeatures(const char *name,
4317 void *opaque)
4318 {
4319 virQEMUCaps *qemuCaps = opaque;
4320
4321 return virQEMUCapsCPUFeatureFromQEMU(qemuCaps, name);
4322 }
4323
4324
4325 static int
qemuProcessFetchGuestCPU(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob,virCPUData ** enabled,virCPUData ** disabled)4326 qemuProcessFetchGuestCPU(virQEMUDriver *driver,
4327 virDomainObj *vm,
4328 qemuDomainAsyncJob asyncJob,
4329 virCPUData **enabled,
4330 virCPUData **disabled)
4331 {
4332 qemuDomainObjPrivate *priv = vm->privateData;
4333 g_autoptr(virCPUData) dataEnabled = NULL;
4334 g_autoptr(virCPUData) dataDisabled = NULL;
4335 bool generic;
4336 int rc;
4337
4338 *enabled = NULL;
4339 *disabled = NULL;
4340
4341 generic = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CPU_UNAVAILABLE_FEATURES);
4342
4343 if (!generic && !ARCH_IS_X86(vm->def->os.arch))
4344 return 0;
4345
4346 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
4347 return -1;
4348
4349 if (generic) {
4350 rc = qemuMonitorGetGuestCPU(priv->mon,
4351 vm->def->os.arch,
4352 qemuProcessTranslateCPUFeatures, priv->qemuCaps,
4353 &dataEnabled, &dataDisabled);
4354 } else {
4355 rc = qemuMonitorGetGuestCPUx86(priv->mon, &dataEnabled, &dataDisabled);
4356 }
4357
4358 if (qemuDomainObjExitMonitor(driver, vm) < 0)
4359 return -1;
4360
4361 if (rc == -1)
4362 return -1;
4363
4364 *enabled = g_steal_pointer(&dataEnabled);
4365 *disabled = g_steal_pointer(&dataDisabled);
4366 return 0;
4367 }
4368
4369
4370 static int
qemuProcessVerifyCPU(virDomainObj * vm,virCPUData * cpu)4371 qemuProcessVerifyCPU(virDomainObj *vm,
4372 virCPUData *cpu)
4373 {
4374 virDomainDef *def = vm->def;
4375
4376 if (!cpu)
4377 return 0;
4378
4379 if (qemuProcessVerifyKVMFeatures(def, cpu) < 0 ||
4380 qemuProcessVerifyHypervFeatures(def, cpu) < 0)
4381 return -1;
4382
4383 if (!def->cpu ||
4384 (def->cpu->mode == VIR_CPU_MODE_CUSTOM &&
4385 !def->cpu->model))
4386 return 0;
4387
4388 if (qemuProcessVerifyCPUFeatures(def, cpu) < 0)
4389 return -1;
4390
4391 return 0;
4392 }
4393
4394
4395 static int
qemuProcessUpdateLiveGuestCPU(virDomainObj * vm,virCPUData * enabled,virCPUData * disabled)4396 qemuProcessUpdateLiveGuestCPU(virDomainObj *vm,
4397 virCPUData *enabled,
4398 virCPUData *disabled)
4399 {
4400 virDomainDef *def = vm->def;
4401 qemuDomainObjPrivate *priv = vm->privateData;
4402 g_autoptr(virCPUDef) orig = NULL;
4403 int rc;
4404
4405 if (!enabled)
4406 return 0;
4407
4408 if (!def->cpu ||
4409 (def->cpu->mode == VIR_CPU_MODE_CUSTOM &&
4410 !def->cpu->model))
4411 return 0;
4412
4413 if (!(orig = virCPUDefCopy(def->cpu)))
4414 return -1;
4415
4416 if ((rc = virCPUUpdateLive(def->os.arch, def->cpu, enabled, disabled)) < 0) {
4417 return -1;
4418 } else if (rc == 0) {
4419 /* Store the original CPU in priv if QEMU changed it and we didn't
4420 * get the original CPU via migration, restore, or snapshot revert.
4421 */
4422 if (!priv->origCPU && !virCPUDefIsEqual(def->cpu, orig, false))
4423 priv->origCPU = g_steal_pointer(&orig);
4424
4425 def->cpu->check = VIR_CPU_CHECK_FULL;
4426 }
4427
4428 return 0;
4429 }
4430
4431
4432 static int
qemuProcessUpdateAndVerifyCPU(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)4433 qemuProcessUpdateAndVerifyCPU(virQEMUDriver *driver,
4434 virDomainObj *vm,
4435 qemuDomainAsyncJob asyncJob)
4436 {
4437 virCPUData *cpu = NULL;
4438 virCPUData *disabled = NULL;
4439 int ret = -1;
4440
4441 if (qemuProcessFetchGuestCPU(driver, vm, asyncJob, &cpu, &disabled) < 0)
4442 goto cleanup;
4443
4444 if (qemuProcessVerifyCPU(vm, cpu) < 0)
4445 goto cleanup;
4446
4447 if (qemuProcessUpdateLiveGuestCPU(vm, cpu, disabled) < 0)
4448 goto cleanup;
4449
4450 ret = 0;
4451
4452 cleanup:
4453 virCPUDataFree(cpu);
4454 virCPUDataFree(disabled);
4455 return ret;
4456 }
4457
4458
4459 static int
qemuProcessFetchCPUDefinitions(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob,virDomainCapsCPUModels ** cpuModels)4460 qemuProcessFetchCPUDefinitions(virQEMUDriver *driver,
4461 virDomainObj *vm,
4462 qemuDomainAsyncJob asyncJob,
4463 virDomainCapsCPUModels **cpuModels)
4464 {
4465 qemuDomainObjPrivate *priv = vm->privateData;
4466 g_autoptr(virDomainCapsCPUModels) models = NULL;
4467 int rc;
4468
4469 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
4470 return -1;
4471
4472 rc = virQEMUCapsFetchCPUModels(priv->mon, vm->def->os.arch, &models);
4473
4474 if (qemuDomainObjExitMonitor(driver, vm) < 0 || rc < 0)
4475 return -1;
4476
4477 *cpuModels = g_steal_pointer(&models);
4478 return 0;
4479 }
4480
4481
4482 static int
qemuProcessUpdateCPU(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)4483 qemuProcessUpdateCPU(virQEMUDriver *driver,
4484 virDomainObj *vm,
4485 qemuDomainAsyncJob asyncJob)
4486 {
4487 g_autoptr(virCPUData) cpu = NULL;
4488 g_autoptr(virCPUData) disabled = NULL;
4489 g_autoptr(virDomainCapsCPUModels) models = NULL;
4490
4491 /* The host CPU model comes from host caps rather than QEMU caps so
4492 * fallback must be allowed no matter what the user specified in the XML.
4493 */
4494 vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW;
4495
4496 if (qemuProcessFetchGuestCPU(driver, vm, asyncJob, &cpu, &disabled) < 0)
4497 return -1;
4498
4499 if (qemuProcessUpdateLiveGuestCPU(vm, cpu, disabled) < 0)
4500 return -1;
4501
4502 if (qemuProcessFetchCPUDefinitions(driver, vm, asyncJob, &models) < 0 ||
4503 virCPUTranslate(vm->def->os.arch, vm->def->cpu, models) < 0)
4504 return -1;
4505
4506 return 0;
4507 }
4508
4509
4510 static int
qemuPrepareNVRAM(virQEMUDriver * driver,virDomainObj * vm)4511 qemuPrepareNVRAM(virQEMUDriver *driver,
4512 virDomainObj *vm)
4513 {
4514 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4515 int ret = -1;
4516 int srcFD = -1;
4517 int dstFD = -1;
4518 virDomainLoaderDef *loader = vm->def->os.loader;
4519 bool created = false;
4520 const char *master_nvram_path;
4521 ssize_t r;
4522
4523 if (!loader || !loader->nvram || virFileExists(loader->nvram))
4524 return 0;
4525
4526 master_nvram_path = loader->templt;
4527 if (!loader->templt) {
4528 size_t i;
4529 for (i = 0; i < cfg->nfirmwares; i++) {
4530 if (STREQ(cfg->firmwares[i]->name, loader->path)) {
4531 master_nvram_path = cfg->firmwares[i]->nvram;
4532 break;
4533 }
4534 }
4535 }
4536
4537 if (!master_nvram_path) {
4538 virReportError(VIR_ERR_OPERATION_FAILED,
4539 _("unable to find any master var store for "
4540 "loader: %s"), loader->path);
4541 goto cleanup;
4542 }
4543
4544 if ((srcFD = virFileOpenAs(master_nvram_path, O_RDONLY,
4545 0, -1, -1, 0)) < 0) {
4546 virReportSystemError(-srcFD,
4547 _("Failed to open file '%s'"),
4548 master_nvram_path);
4549 goto cleanup;
4550 }
4551
4552 if ((dstFD = virFileOpenAs(loader->nvram,
4553 O_WRONLY | O_CREAT | O_EXCL,
4554 S_IRUSR | S_IWUSR,
4555 cfg->user, cfg->group,
4556 VIR_FILE_OPEN_FORCE_OWNER)) < 0) {
4557 virReportSystemError(-dstFD,
4558 _("Failed to create file '%s'"),
4559 loader->nvram);
4560 goto cleanup;
4561 }
4562
4563 created = true;
4564
4565 do {
4566 char buf[1024];
4567
4568 if ((r = saferead(srcFD, buf, sizeof(buf))) < 0) {
4569 virReportSystemError(errno,
4570 _("Unable to read from file '%s'"),
4571 master_nvram_path);
4572 goto cleanup;
4573 }
4574
4575 if (safewrite(dstFD, buf, r) < 0) {
4576 virReportSystemError(errno,
4577 _("Unable to write to file '%s'"),
4578 loader->nvram);
4579 goto cleanup;
4580 }
4581 } while (r);
4582
4583 if (VIR_CLOSE(srcFD) < 0) {
4584 virReportSystemError(errno,
4585 _("Unable to close file '%s'"),
4586 master_nvram_path);
4587 goto cleanup;
4588 }
4589 if (VIR_CLOSE(dstFD) < 0) {
4590 virReportSystemError(errno,
4591 _("Unable to close file '%s'"),
4592 loader->nvram);
4593 goto cleanup;
4594 }
4595
4596 ret = 0;
4597 cleanup:
4598 /* We successfully generated the nvram path, but failed to
4599 * copy the file content. Roll back. */
4600 if (ret < 0) {
4601 if (created)
4602 unlink(loader->nvram);
4603 }
4604
4605 VIR_FORCE_CLOSE(srcFD);
4606 VIR_FORCE_CLOSE(dstFD);
4607 return ret;
4608 }
4609
4610
4611 static void
qemuLogOperation(virDomainObj * vm,const char * msg,virCommand * cmd,qemuDomainLogContext * logCtxt)4612 qemuLogOperation(virDomainObj *vm,
4613 const char *msg,
4614 virCommand *cmd,
4615 qemuDomainLogContext *logCtxt)
4616 {
4617 g_autofree char *timestamp = NULL;
4618 qemuDomainObjPrivate *priv = vm->privateData;
4619 int qemuVersion = virQEMUCapsGetVersion(priv->qemuCaps);
4620 const char *package = virQEMUCapsGetPackage(priv->qemuCaps);
4621 g_autofree char *hostname = virGetHostname();
4622 struct utsname uts;
4623
4624 uname(&uts);
4625
4626 if ((timestamp = virTimeStringNow()) == NULL)
4627 return;
4628
4629 if (qemuDomainLogContextWrite(logCtxt,
4630 "%s: %s %s, qemu version: %d.%d.%d%s, kernel: %s, hostname: %s\n",
4631 timestamp, msg, VIR_LOG_VERSION_STRING,
4632 (qemuVersion / 1000000) % 1000,
4633 (qemuVersion / 1000) % 1000,
4634 qemuVersion % 1000,
4635 NULLSTR_EMPTY(package),
4636 uts.release,
4637 NULLSTR_EMPTY(hostname)) < 0)
4638 return;
4639
4640 if (cmd) {
4641 g_autofree char *args = virCommandToString(cmd, true);
4642 qemuDomainLogContextWrite(logCtxt, "%s\n", args);
4643 }
4644 }
4645
4646
4647 void
qemuProcessIncomingDefFree(qemuProcessIncomingDef * inc)4648 qemuProcessIncomingDefFree(qemuProcessIncomingDef *inc)
4649 {
4650 if (!inc)
4651 return;
4652
4653 g_free(inc->address);
4654 g_free(inc->launchURI);
4655 g_free(inc->deferredURI);
4656 g_free(inc);
4657 }
4658
4659
4660 /*
4661 * This function does not copy @path, the caller is responsible for keeping
4662 * the @path pointer valid during the lifetime of the allocated
4663 * qemuProcessIncomingDef structure.
4664 *
4665 * The caller is responsible for closing @fd, calling
4666 * qemuProcessIncomingDefFree will NOT close it.
4667 */
4668 qemuProcessIncomingDef *
qemuProcessIncomingDefNew(virQEMUCaps * qemuCaps,const char * listenAddress,const char * migrateFrom,int fd,const char * path)4669 qemuProcessIncomingDefNew(virQEMUCaps *qemuCaps,
4670 const char *listenAddress,
4671 const char *migrateFrom,
4672 int fd,
4673 const char *path)
4674 {
4675 qemuProcessIncomingDef *inc = NULL;
4676
4677 if (qemuMigrationDstCheckProtocol(qemuCaps, migrateFrom) < 0)
4678 return NULL;
4679
4680 inc = g_new0(qemuProcessIncomingDef, 1);
4681
4682 inc->address = g_strdup(listenAddress);
4683
4684 inc->launchURI = qemuMigrationDstGetURI(migrateFrom, fd);
4685 if (!inc->launchURI)
4686 goto error;
4687
4688 if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_INCOMING_DEFER)) {
4689 inc->deferredURI = inc->launchURI;
4690 inc->launchURI = g_strdup("defer");
4691 }
4692
4693 inc->fd = fd;
4694 inc->path = path;
4695
4696 return inc;
4697
4698 error:
4699 qemuProcessIncomingDefFree(inc);
4700 return NULL;
4701 }
4702
4703
4704 /*
4705 * This function starts a new QEMU_ASYNC_JOB_START async job. The user is
4706 * responsible for calling qemuProcessEndJob to stop this job and for passing
4707 * QEMU_ASYNC_JOB_START as @asyncJob argument to any function requiring this
4708 * parameter between qemuProcessBeginJob and qemuProcessEndJob.
4709 */
4710 int
qemuProcessBeginJob(virQEMUDriver * driver,virDomainObj * vm,virDomainJobOperation operation,unsigned long apiFlags)4711 qemuProcessBeginJob(virQEMUDriver *driver,
4712 virDomainObj *vm,
4713 virDomainJobOperation operation,
4714 unsigned long apiFlags)
4715 {
4716 if (qemuDomainObjBeginAsyncJob(driver, vm, QEMU_ASYNC_JOB_START,
4717 operation, apiFlags) < 0)
4718 return -1;
4719
4720 qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE);
4721 return 0;
4722 }
4723
4724
4725 void
qemuProcessEndJob(virQEMUDriver * driver,virDomainObj * vm)4726 qemuProcessEndJob(virQEMUDriver *driver,
4727 virDomainObj *vm)
4728 {
4729 qemuDomainObjEndAsyncJob(driver, vm);
4730 }
4731
4732
4733 static int
qemuProcessStartHook(virQEMUDriver * driver,virDomainObj * vm,virHookQemuOpType op,virHookSubopType subop)4734 qemuProcessStartHook(virQEMUDriver *driver,
4735 virDomainObj *vm,
4736 virHookQemuOpType op,
4737 virHookSubopType subop)
4738 {
4739 qemuDomainObjPrivate *priv = vm->privateData;
4740 g_autofree char *xml = NULL;
4741 int ret;
4742
4743 if (!virHookPresent(VIR_HOOK_DRIVER_QEMU))
4744 return 0;
4745
4746 if (!(xml = qemuDomainDefFormatXML(driver, priv->qemuCaps, vm->def, 0)))
4747 return -1;
4748
4749 ret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, op, subop,
4750 NULL, xml, NULL);
4751
4752 return ret;
4753 }
4754
4755
4756 static int
qemuProcessGraphicsReservePorts(virDomainGraphicsDef * graphics,bool reconnect)4757 qemuProcessGraphicsReservePorts(virDomainGraphicsDef *graphics,
4758 bool reconnect)
4759 {
4760 virDomainGraphicsListenDef *glisten;
4761
4762 if (graphics->nListens <= 0)
4763 return 0;
4764
4765 glisten = &graphics->listens[0];
4766
4767 if (glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS &&
4768 glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK)
4769 return 0;
4770
4771 switch (graphics->type) {
4772 case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
4773 if (!graphics->data.vnc.autoport ||
4774 reconnect) {
4775 if (virPortAllocatorSetUsed(graphics->data.vnc.port) < 0)
4776 return -1;
4777 graphics->data.vnc.portReserved = true;
4778 }
4779 if (graphics->data.vnc.websocket > 0 &&
4780 virPortAllocatorSetUsed(graphics->data.vnc.websocket) < 0)
4781 return -1;
4782 break;
4783
4784 case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
4785 if (graphics->data.spice.autoport && !reconnect)
4786 return 0;
4787
4788 if (graphics->data.spice.port > 0) {
4789 if (virPortAllocatorSetUsed(graphics->data.spice.port) < 0)
4790 return -1;
4791 graphics->data.spice.portReserved = true;
4792 }
4793
4794 if (graphics->data.spice.tlsPort > 0) {
4795 if (virPortAllocatorSetUsed(graphics->data.spice.tlsPort) < 0)
4796 return -1;
4797 graphics->data.spice.tlsPortReserved = true;
4798 }
4799 break;
4800
4801 case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
4802 case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
4803 case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
4804 case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
4805 case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
4806 break;
4807 }
4808
4809 return 0;
4810 }
4811
4812
4813 static int
qemuProcessGraphicsAllocatePorts(virQEMUDriver * driver,virDomainGraphicsDef * graphics,bool allocate)4814 qemuProcessGraphicsAllocatePorts(virQEMUDriver *driver,
4815 virDomainGraphicsDef *graphics,
4816 bool allocate)
4817 {
4818 virDomainGraphicsListenDef *glisten;
4819
4820 if (graphics->nListens <= 0)
4821 return 0;
4822
4823 glisten = &graphics->listens[0];
4824
4825 if (glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS &&
4826 glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK)
4827 return 0;
4828
4829 switch (graphics->type) {
4830 case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
4831 if (qemuProcessVNCAllocatePorts(driver, graphics, allocate) < 0)
4832 return -1;
4833 break;
4834
4835 case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
4836 if (qemuProcessSPICEAllocatePorts(driver, graphics, allocate) < 0)
4837 return -1;
4838 break;
4839
4840 case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
4841 case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
4842 case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
4843 case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
4844 case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
4845 break;
4846 }
4847
4848 return 0;
4849 }
4850
4851 static int
qemuProcessGetNetworkAddress(const char * netname,char ** netaddr)4852 qemuProcessGetNetworkAddress(const char *netname,
4853 char **netaddr)
4854 {
4855 g_autoptr(virConnect) conn = NULL;
4856 int ret = -1;
4857 g_autoptr(virNetwork) net = NULL;
4858 virNetworkDef *netdef = NULL;
4859 virNetworkIPDef *ipdef;
4860 virSocketAddr addr;
4861 virSocketAddr *addrptr = NULL;
4862 char *dev_name = NULL;
4863 g_autofree char *xml = NULL;
4864
4865 *netaddr = NULL;
4866
4867 if (!(conn = virGetConnectNetwork()))
4868 return -1;
4869
4870 net = virNetworkLookupByName(conn, netname);
4871 if (!net)
4872 goto cleanup;
4873
4874 xml = virNetworkGetXMLDesc(net, 0);
4875 if (!xml)
4876 goto cleanup;
4877
4878 netdef = virNetworkDefParseString(xml, NULL, false);
4879 if (!netdef)
4880 goto cleanup;
4881
4882 switch ((virNetworkForwardType) netdef->forward.type) {
4883 case VIR_NETWORK_FORWARD_NONE:
4884 case VIR_NETWORK_FORWARD_NAT:
4885 case VIR_NETWORK_FORWARD_ROUTE:
4886 case VIR_NETWORK_FORWARD_OPEN:
4887 ipdef = virNetworkDefGetIPByIndex(netdef, AF_UNSPEC, 0);
4888 if (!ipdef) {
4889 virReportError(VIR_ERR_INTERNAL_ERROR,
4890 _("network '%s' doesn't have an IP address"),
4891 netdef->name);
4892 goto cleanup;
4893 }
4894 addrptr = &ipdef->address;
4895 break;
4896
4897 case VIR_NETWORK_FORWARD_BRIDGE:
4898 if ((dev_name = netdef->bridge))
4899 break;
4900 /*
4901 * fall through if netdef->bridge wasn't set, since that is
4902 * macvtap bridge mode network.
4903 */
4904 G_GNUC_FALLTHROUGH;
4905
4906 case VIR_NETWORK_FORWARD_PRIVATE:
4907 case VIR_NETWORK_FORWARD_VEPA:
4908 case VIR_NETWORK_FORWARD_PASSTHROUGH:
4909 if ((netdef->forward.nifs > 0) && netdef->forward.ifs)
4910 dev_name = netdef->forward.ifs[0].device.dev;
4911
4912 if (!dev_name) {
4913 virReportError(VIR_ERR_INTERNAL_ERROR,
4914 _("network '%s' has no associated interface or bridge"),
4915 netdef->name);
4916 goto cleanup;
4917 }
4918 break;
4919
4920 case VIR_NETWORK_FORWARD_HOSTDEV:
4921 break;
4922
4923 case VIR_NETWORK_FORWARD_LAST:
4924 default:
4925 virReportEnumRangeError(virNetworkForwardType, netdef->forward.type);
4926 goto cleanup;
4927 }
4928
4929 if (dev_name) {
4930 if (virNetDevIPAddrGet(dev_name, &addr) < 0)
4931 goto cleanup;
4932 addrptr = &addr;
4933 }
4934
4935 if (!(addrptr &&
4936 (*netaddr = virSocketAddrFormat(addrptr)))) {
4937 goto cleanup;
4938 }
4939
4940 ret = 0;
4941 cleanup:
4942 virNetworkDefFree(netdef);
4943 return ret;
4944 }
4945
4946
4947 static int
qemuProcessGraphicsSetupNetworkAddress(virDomainGraphicsListenDef * glisten,const char * listenAddr)4948 qemuProcessGraphicsSetupNetworkAddress(virDomainGraphicsListenDef *glisten,
4949 const char *listenAddr)
4950 {
4951 int rc;
4952
4953 /* TODO: reject configuration without network specified for network listen */
4954 if (!glisten->network) {
4955 glisten->address = g_strdup(listenAddr);
4956 return 0;
4957 }
4958
4959 rc = qemuProcessGetNetworkAddress(glisten->network, &glisten->address);
4960 if (rc <= -2) {
4961 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4962 _("network-based listen isn't possible, "
4963 "network driver isn't present"));
4964 return -1;
4965 }
4966 if (rc < 0)
4967 return -1;
4968
4969 return 0;
4970 }
4971
4972
4973 static int
qemuProcessGraphicsSetupListen(virQEMUDriver * driver,virDomainGraphicsDef * graphics,virDomainObj * vm)4974 qemuProcessGraphicsSetupListen(virQEMUDriver *driver,
4975 virDomainGraphicsDef *graphics,
4976 virDomainObj *vm)
4977 {
4978 qemuDomainObjPrivate *priv = vm->privateData;
4979 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4980 const char *type = virDomainGraphicsTypeToString(graphics->type);
4981 char *listenAddr = NULL;
4982 bool useSocket = false;
4983 size_t i;
4984
4985 switch (graphics->type) {
4986 case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
4987 useSocket = cfg->vncAutoUnixSocket;
4988 listenAddr = cfg->vncListen;
4989 break;
4990
4991 case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
4992 useSocket = cfg->spiceAutoUnixSocket;
4993 listenAddr = cfg->spiceListen;
4994 break;
4995
4996 case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
4997 case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
4998 case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
4999 case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
5000 case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
5001 break;
5002 }
5003
5004 for (i = 0; i < graphics->nListens; i++) {
5005 virDomainGraphicsListenDef *glisten = &graphics->listens[i];
5006
5007 switch (glisten->type) {
5008 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS:
5009 if (!glisten->address) {
5010 /* If there is no address specified and qemu.conf has
5011 * *_auto_unix_socket set we should use unix socket as
5012 * default instead of tcp listen. */
5013 if (useSocket) {
5014 memset(glisten, 0, sizeof(virDomainGraphicsListenDef));
5015 glisten->socket = g_strdup_printf("%s/%s.sock", priv->libDir,
5016 type);
5017 glisten->fromConfig = true;
5018 glisten->type = VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_SOCKET;
5019 } else if (listenAddr) {
5020 glisten->address = g_strdup(listenAddr);
5021 glisten->fromConfig = true;
5022 }
5023 }
5024 break;
5025
5026 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK:
5027 if (glisten->address || !listenAddr)
5028 continue;
5029
5030 if (qemuProcessGraphicsSetupNetworkAddress(glisten,
5031 listenAddr) < 0)
5032 return -1;
5033 break;
5034
5035 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_SOCKET:
5036 if (!glisten->socket) {
5037 glisten->socket = g_strdup_printf("%s/%s.sock", priv->libDir,
5038 type);
5039 glisten->autoGenerated = true;
5040 }
5041 break;
5042
5043 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NONE:
5044 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_LAST:
5045 break;
5046 }
5047 }
5048
5049 return 0;
5050 }
5051
5052
5053 static int
qemuProcessGraphicsSetupRenderNode(virDomainGraphicsDef * graphics,virQEMUCaps * qemuCaps)5054 qemuProcessGraphicsSetupRenderNode(virDomainGraphicsDef *graphics,
5055 virQEMUCaps *qemuCaps)
5056 {
5057 char **rendernode = NULL;
5058
5059 if (!virDomainGraphicsNeedsAutoRenderNode(graphics))
5060 return 0;
5061
5062 /* Don't bother picking a DRM node if QEMU doesn't support it. */
5063 if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
5064 if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPICE_RENDERNODE))
5065 return 0;
5066
5067 rendernode = &graphics->data.spice.rendernode;
5068 } else {
5069 if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_EGL_HEADLESS_RENDERNODE))
5070 return 0;
5071
5072 rendernode = &graphics->data.egl_headless.rendernode;
5073 }
5074
5075 if (!(*rendernode = virHostGetDRMRenderNode()))
5076 return -1;
5077
5078 return 0;
5079 }
5080
5081
5082 static int
qemuProcessSetupGraphics(virQEMUDriver * driver,virDomainObj * vm,virQEMUCaps * qemuCaps,unsigned int flags)5083 qemuProcessSetupGraphics(virQEMUDriver *driver,
5084 virDomainObj *vm,
5085 virQEMUCaps *qemuCaps,
5086 unsigned int flags)
5087 {
5088 virDomainGraphicsDef *graphics;
5089 bool allocate = !(flags & VIR_QEMU_PROCESS_START_PRETEND);
5090 size_t i;
5091
5092 for (i = 0; i < vm->def->ngraphics; i++) {
5093 graphics = vm->def->graphics[i];
5094
5095 if (qemuProcessGraphicsSetupRenderNode(graphics, qemuCaps) < 0)
5096 return -1;
5097
5098 if (qemuProcessGraphicsSetupListen(driver, graphics, vm) < 0)
5099 return -1;
5100 }
5101
5102 if (allocate) {
5103 for (i = 0; i < vm->def->ngraphics; i++) {
5104 graphics = vm->def->graphics[i];
5105
5106 if (qemuProcessGraphicsReservePorts(graphics, false) < 0)
5107 return -1;
5108 }
5109 }
5110
5111 for (i = 0; i < vm->def->ngraphics; ++i) {
5112 graphics = vm->def->graphics[i];
5113
5114 if (qemuProcessGraphicsAllocatePorts(driver, graphics, allocate) < 0)
5115 return -1;
5116 }
5117
5118 return 0;
5119 }
5120
5121
5122 static int
qemuProcessSetupRawIO(virQEMUDriver * driver,virDomainObj * vm,virCommand * cmd G_GNUC_UNUSED)5123 qemuProcessSetupRawIO(virQEMUDriver *driver,
5124 virDomainObj *vm,
5125 virCommand *cmd G_GNUC_UNUSED)
5126 {
5127 bool rawio = false;
5128 size_t i;
5129 int ret = -1;
5130
5131 /* in case a certain disk is desirous of CAP_SYS_RAWIO, add this */
5132 for (i = 0; i < vm->def->ndisks; i++) {
5133 virDomainDeviceDef dev;
5134 virDomainDiskDef *disk = vm->def->disks[i];
5135
5136 if (disk->rawio == VIR_TRISTATE_BOOL_YES) {
5137 rawio = true;
5138 #ifndef CAP_SYS_RAWIO
5139 break;
5140 #endif
5141 }
5142
5143 dev.type = VIR_DOMAIN_DEVICE_DISK;
5144 dev.data.disk = disk;
5145 if (qemuAddSharedDevice(driver, &dev, vm->def->name) < 0)
5146 goto cleanup;
5147
5148 if (qemuSetUnprivSGIO(&dev) < 0)
5149 goto cleanup;
5150 }
5151
5152 /* If rawio not already set, check hostdevs as well */
5153 if (!rawio) {
5154 for (i = 0; i < vm->def->nhostdevs; i++) {
5155 virDomainHostdevSubsysSCSI *scsisrc;
5156
5157 if (!virHostdevIsSCSIDevice(vm->def->hostdevs[i]))
5158 continue;
5159
5160 scsisrc = &vm->def->hostdevs[i]->source.subsys.u.scsi;
5161 if (scsisrc->rawio == VIR_TRISTATE_BOOL_YES) {
5162 rawio = true;
5163 break;
5164 }
5165 }
5166 }
5167
5168 ret = 0;
5169
5170 cleanup:
5171 if (rawio) {
5172 #ifdef CAP_SYS_RAWIO
5173 if (ret == 0)
5174 virCommandAllowCap(cmd, CAP_SYS_RAWIO);
5175 #else
5176 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5177 _("Raw I/O is not supported on this platform"));
5178 ret = -1;
5179 #endif
5180 }
5181 return ret;
5182 }
5183
5184
5185 static int
qemuProcessSetupBalloon(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)5186 qemuProcessSetupBalloon(virQEMUDriver *driver,
5187 virDomainObj *vm,
5188 qemuDomainAsyncJob asyncJob)
5189 {
5190 unsigned long long balloon = vm->def->mem.cur_balloon;
5191 qemuDomainObjPrivate *priv = vm->privateData;
5192 int ret = -1;
5193
5194 if (!virDomainDefHasMemballoon(vm->def))
5195 return 0;
5196
5197 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
5198 return -1;
5199
5200 if (vm->def->memballoon->period)
5201 qemuMonitorSetMemoryStatsPeriod(priv->mon, vm->def->memballoon,
5202 vm->def->memballoon->period);
5203 if (qemuMonitorSetBalloon(priv->mon, balloon) < 0)
5204 goto cleanup;
5205
5206 ret = 0;
5207
5208 cleanup:
5209 if (qemuDomainObjExitMonitor(driver, vm) < 0)
5210 ret = -1;
5211 return ret;
5212 }
5213
5214
5215 static int
qemuProcessMakeDir(virQEMUDriver * driver,virDomainObj * vm,const char * path)5216 qemuProcessMakeDir(virQEMUDriver *driver,
5217 virDomainObj *vm,
5218 const char *path)
5219 {
5220 if (g_mkdir_with_parents(path, 0750) < 0) {
5221 virReportSystemError(errno, _("Cannot create directory '%s'"), path);
5222 return -1;
5223 }
5224
5225 if (qemuSecurityDomainSetPathLabel(driver, vm, path, true) < 0)
5226 return -1;
5227
5228 return 0;
5229 }
5230
5231
5232 static void
qemuProcessStartWarnShmem(virDomainObj * vm)5233 qemuProcessStartWarnShmem(virDomainObj *vm)
5234 {
5235 size_t i;
5236 bool check_shmem = false;
5237 bool shmem = vm->def->nshmems;
5238
5239 /*
5240 * For vhost-user to work, the domain has to have some type of
5241 * shared memory configured. We're not the proper ones to judge
5242 * whether shared hugepages or shm are enough and will be in the
5243 * future, so we'll just warn in case neither is configured.
5244 * Moreover failing would give the false illusion that libvirt is
5245 * really checking that everything works before running the domain
5246 * and not only we are unable to do that, but it's also not our
5247 * aim to do so.
5248 */
5249 for (i = 0; i < vm->def->nnets; i++) {
5250 if (virDomainNetGetActualType(vm->def->nets[i]) ==
5251 VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
5252 check_shmem = true;
5253 break;
5254 }
5255 }
5256
5257 if (!check_shmem)
5258 return;
5259
5260 /*
5261 * This check is by no means complete. We merely check
5262 * whether there are *some* hugepages enabled and *some* NUMA
5263 * nodes with shared memory access.
5264 */
5265 if (!shmem && vm->def->mem.nhugepages) {
5266 for (i = 0; i < virDomainNumaGetNodeCount(vm->def->numa); i++) {
5267 if (virDomainNumaGetNodeMemoryAccessMode(vm->def->numa, i) ==
5268 VIR_DOMAIN_MEMORY_ACCESS_SHARED) {
5269 shmem = true;
5270 break;
5271 }
5272 }
5273 }
5274
5275 if (!shmem) {
5276 VIR_WARN("Detected vhost-user interface without any shared memory, "
5277 "the interface might not be operational");
5278 }
5279 }
5280
5281
5282 static int
qemuProcessStartValidateGraphics(virDomainObj * vm)5283 qemuProcessStartValidateGraphics(virDomainObj *vm)
5284 {
5285 size_t i;
5286
5287 for (i = 0; i < vm->def->ngraphics; i++) {
5288 virDomainGraphicsDef *graphics = vm->def->graphics[i];
5289
5290 switch (graphics->type) {
5291 case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
5292 case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
5293 if (graphics->nListens > 1) {
5294 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5295 _("QEMU does not support multiple listens for "
5296 "one graphics device."));
5297 return -1;
5298 }
5299 break;
5300
5301 case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
5302 case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
5303 case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
5304 case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
5305 case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
5306 break;
5307 }
5308 }
5309
5310 return 0;
5311 }
5312
5313
5314 static int
qemuProcessStartValidateIOThreads(virDomainObj * vm,virQEMUCaps * qemuCaps)5315 qemuProcessStartValidateIOThreads(virDomainObj *vm,
5316 virQEMUCaps *qemuCaps)
5317 {
5318 size_t i;
5319
5320 if (vm->def->niothreadids > 0 &&
5321 !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_IOTHREAD)) {
5322 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5323 _("IOThreads not supported for this QEMU"));
5324 return -1;
5325 }
5326
5327 for (i = 0; i < vm->def->ncontrollers; i++) {
5328 virDomainControllerDef *cont = vm->def->controllers[i];
5329
5330 if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_SCSI &&
5331 cont->model == VIR_DOMAIN_CONTROLLER_MODEL_SCSI_VIRTIO_SCSI &&
5332 cont->iothread > 0 &&
5333 !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_SCSI_IOTHREAD)) {
5334 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5335 _("IOThreads for virtio-scsi not supported for "
5336 "this QEMU"));
5337 return -1;
5338 }
5339 }
5340
5341 return 0;
5342 }
5343
5344
5345 static int
qemuProcessStartValidateShmem(virDomainObj * vm)5346 qemuProcessStartValidateShmem(virDomainObj *vm)
5347 {
5348 size_t i;
5349
5350 for (i = 0; i < vm->def->nshmems; i++) {
5351 virDomainShmemDef *shmem = vm->def->shmems[i];
5352
5353 if (strchr(shmem->name, '/')) {
5354 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5355 _("shmem name '%s' must not contain '/'"),
5356 shmem->name);
5357 return -1;
5358 }
5359 }
5360
5361 return 0;
5362 }
5363
5364
5365 static int
qemuProcessStartValidateDisks(virDomainObj * vm,virQEMUCaps * qemuCaps)5366 qemuProcessStartValidateDisks(virDomainObj *vm,
5367 virQEMUCaps *qemuCaps)
5368 {
5369 size_t i;
5370
5371 for (i = 0; i < vm->def->ndisks; i++) {
5372 virDomainDiskDef *disk = vm->def->disks[i];
5373 virStorageSource *src = disk->src;
5374
5375 /* This is a best effort check as we can only check if the command
5376 * option exists, but we cannot determine whether the running QEMU
5377 * was build with '--enable-vxhs'. */
5378 if (src->type == VIR_STORAGE_TYPE_NETWORK &&
5379 src->protocol == VIR_STORAGE_NET_PROTOCOL_VXHS &&
5380 !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VXHS)) {
5381 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5382 _("VxHS protocol is not supported with this "
5383 "QEMU binary"));
5384 return -1;
5385 }
5386
5387 /* PowerPC pseries based VMs do not support floppy device */
5388 if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY &&
5389 qemuDomainIsPSeries(vm->def)) {
5390 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5391 _("PowerPC pseries machines do not support floppy device"));
5392 return -1;
5393 }
5394
5395 if (src->type == VIR_STORAGE_TYPE_NVME &&
5396 !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DRIVE_NVME)) {
5397 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5398 _("NVMe disks are not supported with this QEMU binary"));
5399 return -1;
5400 }
5401 }
5402
5403 return 0;
5404 }
5405
5406
5407 /* 250 parts per million (ppm) is a half of NTP threshold */
5408 #define TSC_TOLERANCE 250
5409
5410 static int
qemuProcessStartValidateTSC(virQEMUDriver * driver,virDomainObj * vm)5411 qemuProcessStartValidateTSC(virQEMUDriver *driver,
5412 virDomainObj *vm)
5413 {
5414 size_t i;
5415 unsigned long long freq = 0;
5416 unsigned long long tolerance;
5417 unsigned long long minFreq;
5418 unsigned long long maxFreq;
5419 virHostCPUTscInfo *tsc;
5420 g_autoptr(virCPUDef) cpu = NULL;
5421
5422 for (i = 0; i < vm->def->clock.ntimers; i++) {
5423 virDomainTimerDef *timer = vm->def->clock.timers[i];
5424
5425 if (timer->name == VIR_DOMAIN_TIMER_NAME_TSC &&
5426 timer->frequency > 0) {
5427 freq = timer->frequency;
5428 break;
5429 }
5430 }
5431
5432 if (freq == 0)
5433 return 0;
5434
5435 VIR_DEBUG("Requested TSC frequency %llu Hz", freq);
5436
5437 cpu = virQEMUDriverGetHostCPU(driver);
5438 if (!cpu || !cpu->tsc) {
5439 VIR_DEBUG("Host TSC frequency could not be probed");
5440 return 0;
5441 }
5442
5443 tsc = cpu->tsc;
5444 tolerance = tsc->frequency * TSC_TOLERANCE / 1000000;
5445 minFreq = tsc->frequency - tolerance;
5446 maxFreq = tsc->frequency + tolerance;
5447
5448 VIR_DEBUG("Host TSC frequency %llu Hz, scaling %s, tolerance +/- %llu Hz",
5449 tsc->frequency, virTristateBoolTypeToString(tsc->scaling),
5450 tolerance);
5451
5452 if (freq >= minFreq && freq <= maxFreq) {
5453 VIR_DEBUG("Requested TSC frequency is within tolerance interval");
5454 return 0;
5455 }
5456
5457 if (tsc->scaling == VIR_TRISTATE_BOOL_YES)
5458 return 0;
5459
5460 if (tsc->scaling == VIR_TRISTATE_BOOL_ABSENT) {
5461 VIR_DEBUG("Requested TSC frequency falls outside tolerance range and "
5462 "scaling support is unknown, QEMU will try and possibly "
5463 "fail later");
5464 return 0;
5465 }
5466
5467 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5468 _("Requested TSC frequency %llu Hz is outside tolerance "
5469 "range ([%llu, %llu] Hz) around host frequency %llu Hz "
5470 "and TSC scaling is not supported by the host CPU"),
5471 freq, minFreq, maxFreq, tsc->frequency);
5472 return -1;
5473 }
5474
5475
5476 /**
5477 * qemuProcessStartValidate:
5478 * @vm: domain object
5479 * @qemuCaps: emulator capabilities
5480 * @migration: restoration of existing state
5481 *
5482 * This function aggregates checks done prior to start of a VM.
5483 *
5484 * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
5485 * start the domain but create a valid qemu command. If some code shouldn't be
5486 * executed in this case, make sure to check this flag.
5487 */
5488 static int
qemuProcessStartValidate(virQEMUDriver * driver,virDomainObj * vm,virQEMUCaps * qemuCaps,unsigned int flags)5489 qemuProcessStartValidate(virQEMUDriver *driver,
5490 virDomainObj *vm,
5491 virQEMUCaps *qemuCaps,
5492 unsigned int flags)
5493 {
5494 if (!(flags & VIR_QEMU_PROCESS_START_PRETEND)) {
5495 if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
5496 VIR_DEBUG("Checking for KVM availability");
5497 if (!virFileExists("/dev/kvm")) {
5498 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5499 _("Domain requires KVM, but it is not available. "
5500 "Check that virtualization is enabled in the "
5501 "host BIOS, and host configuration is setup to "
5502 "load the kvm modules."));
5503 return -1;
5504 }
5505 }
5506
5507 VIR_DEBUG("Checking domain and device security labels");
5508 if (qemuSecurityCheckAllLabel(driver->securityManager, vm->def) < 0)
5509 return -1;
5510
5511 }
5512
5513 /* Checks below should not be executed when starting a qemu process for a
5514 * VM that was running before (migration, snapshots, save). It's more
5515 * important to start such VM than keep the configuration clean */
5516 if ((flags & VIR_QEMU_PROCESS_START_NEW) &&
5517 virDomainDefValidate(vm->def, 0, driver->xmlopt, qemuCaps) < 0)
5518 return -1;
5519
5520 if (qemuProcessStartValidateGraphics(vm) < 0)
5521 return -1;
5522
5523 if (qemuProcessStartValidateIOThreads(vm, qemuCaps) < 0)
5524 return -1;
5525
5526 if (qemuProcessStartValidateShmem(vm) < 0)
5527 return -1;
5528
5529 if (vm->def->cpu) {
5530 if (virCPUValidateFeatures(vm->def->os.arch, vm->def->cpu) < 0)
5531 return -1;
5532
5533 if (ARCH_IS_X86(vm->def->os.arch) &&
5534 !virQEMUCapsGet(qemuCaps, QEMU_CAPS_CPU_UNAVAILABLE_FEATURES)) {
5535 g_auto(GStrv) features = NULL;
5536 int n;
5537
5538 if ((n = virCPUDefCheckFeatures(vm->def->cpu,
5539 virCPUx86FeatureFilterSelectMSR,
5540 NULL,
5541 &features)) < 0)
5542 return -1;
5543
5544 if (n > 0) {
5545 g_autofree char *str = NULL;
5546
5547 str = g_strjoinv(", ", features);
5548 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5549 _("Some features cannot be reliably used "
5550 "with this QEMU: %s"), str);
5551 return -1;
5552 }
5553 }
5554 }
5555
5556 if (qemuProcessStartValidateDisks(vm, qemuCaps) < 0)
5557 return -1;
5558
5559 if (qemuProcessStartValidateTSC(driver, vm) < 0)
5560 return -1;
5561
5562 VIR_DEBUG("Checking for any possible (non-fatal) issues");
5563
5564 qemuProcessStartWarnShmem(vm);
5565
5566 return 0;
5567 }
5568
5569
5570 static int
qemuProcessStartUpdateCustomCaps(virDomainObj * vm)5571 qemuProcessStartUpdateCustomCaps(virDomainObj *vm)
5572 {
5573 qemuDomainObjPrivate *priv = vm->privateData;
5574 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
5575 qemuDomainXmlNsDef *nsdef = vm->def->namespaceData;
5576 char **next;
5577 int tmp;
5578
5579 if (cfg->capabilityfilters) {
5580 for (next = cfg->capabilityfilters; *next; next++) {
5581 if ((tmp = virQEMUCapsTypeFromString(*next)) < 0) {
5582 virReportError(VIR_ERR_INTERNAL_ERROR,
5583 _("invalid capability_filters capability '%s'"),
5584 *next);
5585 return -1;
5586 }
5587
5588 virQEMUCapsClear(priv->qemuCaps, tmp);
5589 }
5590 }
5591
5592 if (nsdef) {
5593 for (next = nsdef->capsadd; next && *next; next++) {
5594 if ((tmp = virQEMUCapsTypeFromString(*next)) < 0) {
5595 virReportError(VIR_ERR_INTERNAL_ERROR,
5596 _("invalid qemu namespace capability '%s'"),
5597 *next);
5598 return -1;
5599 }
5600
5601 virQEMUCapsSet(priv->qemuCaps, tmp);
5602 }
5603
5604 for (next = nsdef->capsdel; next && *next; next++) {
5605 if ((tmp = virQEMUCapsTypeFromString(*next)) < 0) {
5606 virReportError(VIR_ERR_INTERNAL_ERROR,
5607 _("invalid qemu namespace capability '%s'"),
5608 *next);
5609 return -1;
5610 }
5611
5612 virQEMUCapsClear(priv->qemuCaps, tmp);
5613 }
5614 }
5615
5616 return 0;
5617 }
5618
5619
5620 /**
5621 * qemuProcessPrepareQEMUCaps:
5622 * @vm: domain object
5623 * @qemuCapsCache: cache of QEMU capabilities
5624 * @processStartFlags: flags based on the VIR_QEMU_PROCESS_START_* enum
5625 *
5626 * Prepare the capabilities of a QEMU process for startup. This includes
5627 * copying the caps to a static cache and potential post-processing depending
5628 * on the configuration of the VM and startup process.
5629 *
5630 * Returns 0 on success, -1 on error.
5631 */
5632 static int
qemuProcessPrepareQEMUCaps(virDomainObj * vm,virFileCache * qemuCapsCache,unsigned int processStartFlags)5633 qemuProcessPrepareQEMUCaps(virDomainObj *vm,
5634 virFileCache *qemuCapsCache,
5635 unsigned int processStartFlags)
5636 {
5637 qemuDomainObjPrivate *priv = vm->privateData;
5638
5639 virObjectUnref(priv->qemuCaps);
5640 if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(qemuCapsCache,
5641 vm->def->virtType,
5642 vm->def->emulator,
5643 vm->def->os.machine)))
5644 return -1;
5645
5646 if (processStartFlags & VIR_QEMU_PROCESS_START_STANDALONE)
5647 virQEMUCapsClear(priv->qemuCaps, QEMU_CAPS_CHARDEV_FD_PASS_COMMANDLINE);
5648
5649 /* Update qemu capabilities according to lists passed in via namespace */
5650 if (qemuProcessStartUpdateCustomCaps(vm) < 0)
5651 return -1;
5652
5653 /* re-process capability lockouts since we might have removed capabilities */
5654 virQEMUCapsInitProcessCapsInterlock(priv->qemuCaps);
5655
5656 return 0;
5657 }
5658
5659
5660 /**
5661 * qemuProcessInit:
5662 *
5663 * Prepares the domain up to the point when priv->qemuCaps is initialized. The
5664 * function calls qemuProcessStop when needed.
5665 *
5666 * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
5667 * start the domain but create a valid qemu command. If some code shouldn't be
5668 * executed in this case, make sure to check this flag.
5669 *
5670 * Returns 0 on success, -1 on error.
5671 */
5672 int
qemuProcessInit(virQEMUDriver * driver,virDomainObj * vm,virCPUDef * updatedCPU,qemuDomainAsyncJob asyncJob,bool migration,unsigned int flags)5673 qemuProcessInit(virQEMUDriver *driver,
5674 virDomainObj *vm,
5675 virCPUDef *updatedCPU,
5676 qemuDomainAsyncJob asyncJob,
5677 bool migration,
5678 unsigned int flags)
5679 {
5680 qemuDomainObjPrivate *priv = vm->privateData;
5681 int stopFlags;
5682 virCPUDef *origCPU = NULL;
5683 int ret = -1;
5684
5685 VIR_DEBUG("vm=%p name=%s id=%d migration=%d",
5686 vm, vm->def->name, vm->def->id, migration);
5687
5688 VIR_DEBUG("Beginning VM startup process");
5689
5690 if (virDomainObjIsActive(vm)) {
5691 virReportError(VIR_ERR_OPERATION_INVALID, "%s",
5692 _("VM is already active"));
5693 goto cleanup;
5694 }
5695
5696 /* in case when the post parse callback failed we need to re-run it on the
5697 * old config prior we start the VM */
5698 if (vm->def->postParseFailed) {
5699 VIR_DEBUG("re-running the post parse callback");
5700
5701 /* we don't have the private copy of qemuCaps at this point */
5702 if (virDomainDefPostParse(vm->def, 0, driver->xmlopt, NULL) < 0)
5703 goto cleanup;
5704 }
5705
5706 VIR_DEBUG("Determining emulator version");
5707 if (qemuProcessPrepareQEMUCaps(vm, driver->qemuCapsCache, flags) < 0)
5708 goto cleanup;
5709
5710 if (qemuDomainUpdateCPU(vm, updatedCPU, &origCPU) < 0)
5711 goto cleanup;
5712
5713 if (qemuProcessStartValidate(driver, vm, priv->qemuCaps, flags) < 0)
5714 goto cleanup;
5715
5716 /* Do this upfront, so any part of the startup process can add
5717 * runtime state to vm->def that won't be persisted. This let's us
5718 * report implicit runtime defaults in the XML, like vnc listen/socket
5719 */
5720 VIR_DEBUG("Setting current domain def as transient");
5721 if (virDomainObjSetDefTransient(driver->xmlopt, vm, priv->qemuCaps) < 0)
5722 goto cleanup;
5723
5724 if (flags & VIR_QEMU_PROCESS_START_PRETEND) {
5725 if (qemuDomainSetPrivatePaths(driver, vm) < 0) {
5726 virDomainObjRemoveTransientDef(vm);
5727 goto cleanup;
5728 }
5729 } else {
5730 vm->def->id = qemuDriverAllocateID(driver);
5731 qemuDomainSetFakeReboot(driver, vm, false);
5732 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_STARTING_UP);
5733
5734 if (g_atomic_int_add(&driver->nactive, 1) == 0 && driver->inhibitCallback)
5735 driver->inhibitCallback(true, driver->inhibitOpaque);
5736
5737 /* Run an early hook to set-up missing devices */
5738 if (qemuProcessStartHook(driver, vm,
5739 VIR_HOOK_QEMU_OP_PREPARE,
5740 VIR_HOOK_SUBOP_BEGIN) < 0)
5741 goto stop;
5742
5743 if (qemuDomainSetPrivatePaths(driver, vm) < 0)
5744 goto stop;
5745
5746 priv->origCPU = g_steal_pointer(&origCPU);
5747 }
5748
5749 ret = 0;
5750
5751 cleanup:
5752 virCPUDefFree(origCPU);
5753 return ret;
5754
5755 stop:
5756 stopFlags = VIR_QEMU_PROCESS_STOP_NO_RELABEL;
5757 if (migration)
5758 stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
5759 qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
5760 goto cleanup;
5761 }
5762
5763
5764 /**
5765 * qemuProcessNetworkPrepareDevices
5766 */
5767 static int
qemuProcessNetworkPrepareDevices(virQEMUDriver * driver,virDomainObj * vm)5768 qemuProcessNetworkPrepareDevices(virQEMUDriver *driver,
5769 virDomainObj *vm)
5770 {
5771 virDomainDef *def = vm->def;
5772 qemuDomainObjPrivate *priv = vm->privateData;
5773 size_t i;
5774 g_autoptr(virConnect) conn = NULL;
5775
5776 for (i = 0; i < def->nnets; i++) {
5777 virDomainNetDef *net = def->nets[i];
5778 virDomainNetType actualType;
5779
5780 /* If appropriate, grab a physical device from the configured
5781 * network's pool of devices, or resolve bridge device name
5782 * to the one defined in the network definition.
5783 */
5784 if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
5785 if (!conn && !(conn = virGetConnectNetwork()))
5786 return -1;
5787 if (virDomainNetAllocateActualDevice(conn, def, net) < 0)
5788 return -1;
5789 }
5790
5791 actualType = virDomainNetGetActualType(net);
5792 if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV &&
5793 net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
5794 /* Each type='hostdev' network device must also have a
5795 * corresponding entry in the hostdevs array. For netdevs
5796 * that are hardcoded as type='hostdev', this is already
5797 * done by the parser, but for those allocated from a
5798 * network / determined at runtime, we need to do it
5799 * separately.
5800 */
5801 virDomainHostdevDef *hostdev = virDomainNetGetActualHostdev(net);
5802 virDomainHostdevSubsysPCI *pcisrc = &hostdev->source.subsys.u.pci;
5803
5804 if (virDomainHostdevFind(def, hostdev, NULL) >= 0) {
5805 virReportError(VIR_ERR_INTERNAL_ERROR,
5806 _("PCI device %04x:%02x:%02x.%x "
5807 "allocated from network %s is already "
5808 "in use by domain %s"),
5809 pcisrc->addr.domain, pcisrc->addr.bus,
5810 pcisrc->addr.slot, pcisrc->addr.function,
5811 net->data.network.name, def->name);
5812 return -1;
5813 }
5814 if (virDomainHostdevInsert(def, hostdev) < 0)
5815 return -1;
5816 } else if (actualType == VIR_DOMAIN_NET_TYPE_USER &&
5817 !priv->disableSlirp &&
5818 virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DBUS_VMSTATE)) {
5819 qemuSlirp *slirp = NULL;
5820 int rv = qemuInterfacePrepareSlirp(driver, net, &slirp);
5821
5822 if (rv == -1)
5823 return -1;
5824 if (rv == 1)
5825 QEMU_DOMAIN_NETWORK_PRIVATE(net)->slirp = slirp;
5826 }
5827
5828 }
5829 return 0;
5830 }
5831
5832
5833 /**
5834 * qemuProcessSetupVcpu:
5835 * @vm: domain object
5836 * @vcpuid: id of VCPU to set defaults
5837 *
5838 * This function sets resource properties (cgroups, affinity, scheduler) for a
5839 * vCPU. This function expects that the vCPU is online and the vCPU pids were
5840 * correctly detected at the point when it's called.
5841 *
5842 * Returns 0 on success, -1 on error.
5843 */
5844 int
qemuProcessSetupVcpu(virDomainObj * vm,unsigned int vcpuid)5845 qemuProcessSetupVcpu(virDomainObj *vm,
5846 unsigned int vcpuid)
5847 {
5848 pid_t vcpupid = qemuDomainGetVcpuPid(vm, vcpuid);
5849 virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid);
5850 virDomainResctrlMonDef *mon = NULL;
5851 size_t i = 0;
5852
5853 if (qemuProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
5854 vcpuid, vcpu->cpumask,
5855 vm->def->cputune.period,
5856 vm->def->cputune.quota,
5857 &vcpu->sched) < 0)
5858 return -1;
5859
5860 for (i = 0; i < vm->def->nresctrls; i++) {
5861 size_t j = 0;
5862 virDomainResctrlDef *ct = vm->def->resctrls[i];
5863
5864 if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
5865 if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
5866 return -1;
5867
5868 for (j = 0; j < ct->nmonitors; j++) {
5869 mon = ct->monitors[j];
5870
5871 if (virBitmapEqual(ct->vcpus, mon->vcpus) &&
5872 !virResctrlAllocIsEmpty(ct->alloc))
5873 continue;
5874
5875 if (virBitmapIsBitSet(mon->vcpus, vcpuid)) {
5876 if (virResctrlMonitorAddPID(mon->instance, vcpupid) < 0)
5877 return -1;
5878 break;
5879 }
5880 }
5881
5882 break;
5883 }
5884 }
5885
5886 return 0;
5887 }
5888
5889
5890 static int
qemuProcessSetupVcpus(virDomainObj * vm)5891 qemuProcessSetupVcpus(virDomainObj *vm)
5892 {
5893 virDomainVcpuDef *vcpu;
5894 unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
5895 size_t i;
5896
5897 if ((vm->def->cputune.period || vm->def->cputune.quota) &&
5898 !virCgroupHasController(((qemuDomainObjPrivate *) vm->privateData)->cgroup,
5899 VIR_CGROUP_CONTROLLER_CPU)) {
5900 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5901 _("cgroup cpu is required for scheduler tuning"));
5902 return -1;
5903 }
5904
5905 if (!qemuDomainHasVcpuPids(vm)) {
5906 /* If any CPU has custom affinity that differs from the
5907 * VM default affinity, we must reject it */
5908 for (i = 0; i < maxvcpus; i++) {
5909 vcpu = virDomainDefGetVcpu(vm->def, i);
5910
5911 if (!vcpu->online)
5912 continue;
5913
5914 if (vcpu->cpumask &&
5915 !virBitmapEqual(vm->def->cpumask, vcpu->cpumask)) {
5916 virReportError(VIR_ERR_OPERATION_INVALID, "%s",
5917 _("cpu affinity is not supported"));
5918 return -1;
5919 }
5920 }
5921
5922 return 0;
5923 }
5924
5925 for (i = 0; i < maxvcpus; i++) {
5926 vcpu = virDomainDefGetVcpu(vm->def, i);
5927
5928 if (!vcpu->online)
5929 continue;
5930
5931 if (qemuProcessSetupVcpu(vm, i) < 0)
5932 return -1;
5933 }
5934
5935 return 0;
5936 }
5937
5938
5939 int
qemuProcessSetupIOThread(virDomainObj * vm,virDomainIOThreadIDDef * iothread)5940 qemuProcessSetupIOThread(virDomainObj *vm,
5941 virDomainIOThreadIDDef *iothread)
5942 {
5943 return qemuProcessSetupPid(vm, iothread->thread_id,
5944 VIR_CGROUP_THREAD_IOTHREAD,
5945 iothread->iothread_id,
5946 iothread->cpumask,
5947 vm->def->cputune.iothread_period,
5948 vm->def->cputune.iothread_quota,
5949 &iothread->sched);
5950 }
5951
5952
5953 static int
qemuProcessSetupIOThreads(virDomainObj * vm)5954 qemuProcessSetupIOThreads(virDomainObj *vm)
5955 {
5956 size_t i;
5957
5958 for (i = 0; i < vm->def->niothreadids; i++) {
5959 virDomainIOThreadIDDef *info = vm->def->iothreadids[i];
5960
5961 if (qemuProcessSetupIOThread(vm, info) < 0)
5962 return -1;
5963 }
5964
5965 return 0;
5966 }
5967
5968
5969 static int
qemuProcessValidateHotpluggableVcpus(virDomainDef * def)5970 qemuProcessValidateHotpluggableVcpus(virDomainDef *def)
5971 {
5972 virDomainVcpuDef *vcpu;
5973 virDomainVcpuDef *subvcpu;
5974 qemuDomainVcpuPrivate *vcpupriv;
5975 unsigned int maxvcpus = virDomainDefGetVcpusMax(def);
5976 size_t i = 0;
5977 size_t j;
5978 virBitmap *ordermap = virBitmapNew(maxvcpus + 1);
5979 int ret = -1;
5980
5981 /* validate:
5982 * - all hotpluggable entities to be hotplugged have the correct data
5983 * - vcpus belonging to a hotpluggable entity share configuration
5984 * - order of the hotpluggable entities is unique
5985 */
5986 for (i = 0; i < maxvcpus; i++) {
5987 vcpu = virDomainDefGetVcpu(def, i);
5988 vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu);
5989
5990 /* skip over hotpluggable entities */
5991 if (vcpupriv->vcpus == 0)
5992 continue;
5993
5994 if (vcpu->order != 0) {
5995 if (virBitmapIsBitSet(ordermap, vcpu->order)) {
5996 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5997 _("duplicate vcpu order '%u'"), vcpu->order);
5998 goto cleanup;
5999 }
6000
6001 if (virBitmapSetBit(ordermap, vcpu->order)) {
6002 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6003 _("vcpu order '%u' exceeds vcpu count"),
6004 vcpu->order);
6005 goto cleanup;
6006 }
6007 }
6008
6009 for (j = i + 1; j < (i + vcpupriv->vcpus); j++) {
6010 subvcpu = virDomainDefGetVcpu(def, j);
6011 if (subvcpu->hotpluggable != vcpu->hotpluggable ||
6012 subvcpu->online != vcpu->online ||
6013 subvcpu->order != vcpu->order) {
6014 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6015 _("vcpus '%zu' and '%zu' are in the same hotplug "
6016 "group but differ in configuration"), i, j);
6017 goto cleanup;
6018 }
6019 }
6020
6021 if (vcpu->online && vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES) {
6022 if ((vcpupriv->socket_id == -1 && vcpupriv->core_id == -1 &&
6023 vcpupriv->thread_id == -1 && vcpupriv->node_id == -1) ||
6024 !vcpupriv->type) {
6025 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6026 _("vcpu '%zu' is missing hotplug data"), i);
6027 goto cleanup;
6028 }
6029 }
6030 }
6031
6032 ret = 0;
6033 cleanup:
6034 virBitmapFree(ordermap);
6035 return ret;
6036 }
6037
6038
6039 static int
qemuDomainHasHotpluggableStartupVcpus(virDomainDef * def)6040 qemuDomainHasHotpluggableStartupVcpus(virDomainDef *def)
6041 {
6042 size_t maxvcpus = virDomainDefGetVcpusMax(def);
6043 virDomainVcpuDef *vcpu;
6044 size_t i;
6045
6046 for (i = 0; i < maxvcpus; i++) {
6047 vcpu = virDomainDefGetVcpu(def, i);
6048
6049 if (vcpu->online && vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES)
6050 return true;
6051 }
6052
6053 return false;
6054 }
6055
6056
6057 static int
qemuProcessVcpusSortOrder(const void * a,const void * b)6058 qemuProcessVcpusSortOrder(const void *a,
6059 const void *b)
6060 {
6061 virDomainVcpuDef *vcpua = *((virDomainVcpuDef **)a);
6062 virDomainVcpuDef *vcpub = *((virDomainVcpuDef **)b);
6063
6064 return vcpua->order - vcpub->order;
6065 }
6066
6067
6068 static int
qemuProcessSetupHotpluggableVcpus(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)6069 qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
6070 virDomainObj *vm,
6071 qemuDomainAsyncJob asyncJob)
6072 {
6073 unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
6074 qemuDomainObjPrivate *priv = vm->privateData;
6075 qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
6076 virDomainVcpuDef *vcpu;
6077 qemuDomainVcpuPrivate *vcpupriv;
6078 size_t i;
6079 int ret = -1;
6080 int rc;
6081
6082 g_autofree virDomainVcpuDef **bootHotplug = NULL;
6083 size_t nbootHotplug = 0;
6084
6085 for (i = 0; i < maxvcpus; i++) {
6086 vcpu = virDomainDefGetVcpu(vm->def, i);
6087 vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu);
6088
6089 if (vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES && vcpu->online &&
6090 vcpupriv->vcpus != 0) {
6091 vcpupriv->alias = g_strdup_printf("vcpu%zu", i);
6092
6093 VIR_APPEND_ELEMENT(bootHotplug, nbootHotplug, vcpu);
6094 }
6095 }
6096
6097 if (nbootHotplug == 0)
6098 return 0;
6099
6100 qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug),
6101 qemuProcessVcpusSortOrder);
6102
6103 if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
6104 goto cleanup;
6105
6106 for (i = 0; i < nbootHotplug; i++) {
6107 g_autoptr(virJSONValue) vcpuprops = NULL;
6108 vcpu = bootHotplug[i];
6109
6110 if (!(vcpuprops = qemuBuildHotpluggableCPUProps(vcpu)))
6111 goto cleanup;
6112
6113 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
6114 goto cleanup;
6115
6116 rc = qemuMonitorAddDeviceProps(qemuDomainGetMonitor(vm), &vcpuprops);
6117
6118 if (qemuDomainObjExitMonitor(driver, vm) < 0)
6119 goto cleanup;
6120
6121 if (rc < 0)
6122 goto cleanup;
6123 }
6124
6125 ret = 0;
6126
6127 cleanup:
6128 qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
6129 return ret;
6130 }
6131
6132
6133 static bool
qemuProcessDropUnknownCPUFeatures(const char * name,virCPUFeaturePolicy policy,void * opaque)6134 qemuProcessDropUnknownCPUFeatures(const char *name,
6135 virCPUFeaturePolicy policy,
6136 void *opaque)
6137 {
6138 const char **features = opaque;
6139
6140 if (policy != VIR_CPU_FEATURE_DISABLE &&
6141 policy != VIR_CPU_FEATURE_FORBID)
6142 return true;
6143
6144 if (g_strv_contains(features, name))
6145 return true;
6146
6147 /* Features unknown to QEMU are implicitly disabled, we can just drop them
6148 * from the definition. */
6149 return false;
6150 }
6151
6152
6153 static int
qemuProcessUpdateGuestCPU(virDomainDef * def,virQEMUCaps * qemuCaps,virArch hostarch,unsigned int flags)6154 qemuProcessUpdateGuestCPU(virDomainDef *def,
6155 virQEMUCaps *qemuCaps,
6156 virArch hostarch,
6157 unsigned int flags)
6158 {
6159 if (!def->cpu)
6160 return 0;
6161
6162 /* nothing to do if only topology part of CPU def is used */
6163 if (def->cpu->mode == VIR_CPU_MODE_CUSTOM && !def->cpu->model)
6164 return 0;
6165
6166 /* Old libvirt added host CPU model to host-model CPUs for migrations,
6167 * while new libvirt just turns host-model into custom mode. We need
6168 * to fix the mode to maintain backward compatibility and to avoid
6169 * the CPU model to be replaced in virCPUUpdate.
6170 */
6171 if (!(flags & VIR_QEMU_PROCESS_START_NEW) &&
6172 ARCH_IS_X86(def->os.arch) &&
6173 def->cpu->mode == VIR_CPU_MODE_HOST_MODEL &&
6174 def->cpu->model) {
6175 def->cpu->mode = VIR_CPU_MODE_CUSTOM;
6176 }
6177
6178 if (!virQEMUCapsIsCPUModeSupported(qemuCaps, hostarch, def->virtType,
6179 def->cpu->mode, def->os.machine)) {
6180 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6181 _("CPU mode '%s' for %s %s domain on %s host is not "
6182 "supported by hypervisor"),
6183 virCPUModeTypeToString(def->cpu->mode),
6184 virArchToString(def->os.arch),
6185 virDomainVirtTypeToString(def->virtType),
6186 virArchToString(hostarch));
6187 return -1;
6188 }
6189
6190 if (virCPUConvertLegacy(hostarch, def->cpu) < 0)
6191 return -1;
6192
6193 if (def->cpu->check != VIR_CPU_CHECK_NONE) {
6194 virCPUDef *host;
6195
6196 host = virQEMUCapsGetHostModel(qemuCaps, def->virtType,
6197 VIR_QEMU_CAPS_HOST_CPU_FULL);
6198
6199 if (host && virCPUCheckForbiddenFeatures(def->cpu, host) < 0)
6200 return -1;
6201 }
6202
6203 /* nothing to update for host-passthrough / maximum */
6204 if (def->cpu->mode != VIR_CPU_MODE_HOST_PASSTHROUGH &&
6205 def->cpu->mode != VIR_CPU_MODE_MAXIMUM) {
6206 g_autoptr(virDomainCapsCPUModels) cpuModels = NULL;
6207
6208 if (def->cpu->check == VIR_CPU_CHECK_PARTIAL &&
6209 virCPUCompare(hostarch,
6210 virQEMUCapsGetHostModel(qemuCaps, def->virtType,
6211 VIR_QEMU_CAPS_HOST_CPU_FULL),
6212 def->cpu, true) < 0)
6213 return -1;
6214
6215 if (virCPUUpdate(def->os.arch, def->cpu,
6216 virQEMUCapsGetHostModel(qemuCaps, def->virtType,
6217 VIR_QEMU_CAPS_HOST_CPU_MIGRATABLE)) < 0)
6218 return -1;
6219
6220 cpuModels = virQEMUCapsGetCPUModels(qemuCaps, def->virtType, NULL, NULL);
6221
6222 if (virCPUTranslate(def->os.arch, def->cpu, cpuModels) < 0)
6223 return -1;
6224
6225 def->cpu->fallback = VIR_CPU_FALLBACK_FORBID;
6226 }
6227
6228 if (virCPUDefFilterFeatures(def->cpu, virQEMUCapsCPUFilterFeatures,
6229 &def->os.arch) < 0)
6230 return -1;
6231
6232 if (ARCH_IS_X86(def->os.arch)) {
6233 g_auto(GStrv) features = NULL;
6234
6235 if (virQEMUCapsGetCPUFeatures(qemuCaps, def->virtType, false, &features) < 0)
6236 return -1;
6237
6238 if (features &&
6239 virCPUDefFilterFeatures(def->cpu, qemuProcessDropUnknownCPUFeatures,
6240 features) < 0)
6241 return -1;
6242 }
6243
6244 return 0;
6245 }
6246
6247
6248 static int
qemuProcessPrepareDomainNUMAPlacement(virDomainObj * vm)6249 qemuProcessPrepareDomainNUMAPlacement(virDomainObj *vm)
6250 {
6251 qemuDomainObjPrivate *priv = vm->privateData;
6252 g_autofree char *nodeset = NULL;
6253 g_autoptr(virBitmap) numadNodeset = NULL;
6254 g_autoptr(virBitmap) hostMemoryNodeset = NULL;
6255 g_autoptr(virCapsHostNUMA) caps = NULL;
6256
6257 /* Get the advisory nodeset from numad if 'placement' of
6258 * either <vcpu> or <numatune> is 'auto'.
6259 */
6260 if (!virDomainDefNeedsPlacementAdvice(vm->def))
6261 return 0;
6262
6263 nodeset = virNumaGetAutoPlacementAdvice(virDomainDefGetVcpus(vm->def),
6264 virDomainDefGetMemoryTotal(vm->def));
6265
6266 if (!nodeset)
6267 return -1;
6268
6269 if (!(hostMemoryNodeset = virNumaGetHostMemoryNodeset()))
6270 return -1;
6271
6272 VIR_DEBUG("Nodeset returned from numad: %s", nodeset);
6273
6274 if (virBitmapParse(nodeset, &numadNodeset, VIR_DOMAIN_CPUMASK_LEN) < 0)
6275 return -1;
6276
6277 if (!(caps = virCapabilitiesHostNUMANewHost()))
6278 return -1;
6279
6280 /* numad may return a nodeset that only contains cpus but cgroups don't play
6281 * well with that. Set the autoCpuset from all cpus from that nodeset, but
6282 * assign autoNodeset only with nodes containing memory. */
6283 if (!(priv->autoCpuset = virCapabilitiesHostNUMAGetCpus(caps, numadNodeset)))
6284 return -1;
6285
6286 virBitmapIntersect(numadNodeset, hostMemoryNodeset);
6287
6288 priv->autoNodeset = g_steal_pointer(&numadNodeset);
6289
6290 return 0;
6291 }
6292
6293
6294 static void
qemuProcessPrepareDeviceBootorder(virDomainDef * def)6295 qemuProcessPrepareDeviceBootorder(virDomainDef *def)
6296 {
6297 size_t i;
6298 unsigned int bootCD = 0;
6299 unsigned int bootFloppy = 0;
6300 unsigned int bootDisk = 0;
6301 unsigned int bootNetwork = 0;
6302
6303 if (def->os.nBootDevs == 0)
6304 return;
6305
6306 for (i = 0; i < def->os.nBootDevs; i++) {
6307 switch ((virDomainBootOrder) def->os.bootDevs[i]) {
6308 case VIR_DOMAIN_BOOT_CDROM:
6309 bootCD = i + 1;
6310 break;
6311
6312 case VIR_DOMAIN_BOOT_FLOPPY:
6313 bootFloppy = i + 1;
6314 break;
6315
6316 case VIR_DOMAIN_BOOT_DISK:
6317 bootDisk = i + 1;
6318 break;
6319
6320 case VIR_DOMAIN_BOOT_NET:
6321 bootNetwork = i + 1;
6322 break;
6323
6324 case VIR_DOMAIN_BOOT_LAST:
6325 default:
6326 break;
6327 }
6328 }
6329
6330 for (i = 0; i < def->ndisks; i++) {
6331 virDomainDiskDef *disk = def->disks[i];
6332
6333 switch (disk->device) {
6334 case VIR_DOMAIN_DISK_DEVICE_CDROM:
6335 disk->info.effectiveBootIndex = bootCD;
6336 bootCD = 0;
6337 break;
6338
6339 case VIR_DOMAIN_DISK_DEVICE_DISK:
6340 case VIR_DOMAIN_DISK_DEVICE_LUN:
6341 disk->info.effectiveBootIndex = bootDisk;
6342 bootDisk = 0;
6343 break;
6344
6345 case VIR_DOMAIN_DISK_DEVICE_FLOPPY:
6346 disk->info.effectiveBootIndex = bootFloppy;
6347 bootFloppy = 0;
6348 break;
6349
6350 case VIR_DOMAIN_DISK_DEVICE_LAST:
6351 default:
6352 break;
6353 }
6354 }
6355
6356 if (def->nnets > 0 && bootNetwork > 0) {
6357 /* If network boot is enabled, the first network device gets enabled. If
6358 * that one is backed by a host device, then we need to find the first
6359 * corresponding host device */
6360 if (virDomainNetGetActualType(def->nets[0]) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
6361 for (i = 0; i < def->nhostdevs; i++) {
6362 virDomainHostdevDef *hostdev = def->hostdevs[i];
6363 virDomainHostdevSubsys *subsys = &hostdev->source.subsys;
6364
6365 if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
6366 subsys->type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
6367 hostdev->info->type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED &&
6368 hostdev->parentnet) {
6369 hostdev->info->effectiveBootIndex = bootNetwork;
6370 break;
6371 }
6372 }
6373 } else {
6374 def->nets[0]->info.effectiveBootIndex = bootNetwork;
6375 }
6376 }
6377 }
6378
6379
6380 static int
qemuProcessPrepareDomainStorage(virQEMUDriver * driver,virDomainObj * vm,qemuDomainObjPrivate * priv,virQEMUDriverConfig * cfg,unsigned int flags)6381 qemuProcessPrepareDomainStorage(virQEMUDriver *driver,
6382 virDomainObj *vm,
6383 qemuDomainObjPrivate *priv,
6384 virQEMUDriverConfig *cfg,
6385 unsigned int flags)
6386 {
6387 size_t i;
6388 bool cold_boot = flags & VIR_QEMU_PROCESS_START_COLD;
6389
6390 for (i = vm->def->ndisks; i > 0; i--) {
6391 size_t idx = i - 1;
6392 virDomainDiskDef *disk = vm->def->disks[idx];
6393
6394 if (virDomainDiskTranslateSourcePool(disk) < 0) {
6395 if (qemuDomainCheckDiskStartupPolicy(driver, vm, idx, cold_boot) < 0)
6396 return -1;
6397
6398 /* disk source was dropped */
6399 continue;
6400 }
6401
6402 if (qemuDomainPrepareDiskSource(disk, priv, cfg) < 0)
6403 return -1;
6404 }
6405
6406 return 0;
6407 }
6408
6409
6410 static int
qemuProcessPrepareDomainHostdevs(virDomainObj * vm,qemuDomainObjPrivate * priv)6411 qemuProcessPrepareDomainHostdevs(virDomainObj *vm,
6412 qemuDomainObjPrivate *priv)
6413 {
6414 size_t i;
6415
6416 for (i = 0; i < vm->def->nhostdevs; i++) {
6417 virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
6418
6419 if (qemuDomainPrepareHostdev(hostdev, priv) < 0)
6420 return -1;
6421 }
6422
6423 return 0;
6424 }
6425
6426
6427 int
qemuProcessPrepareHostHostdev(virDomainHostdevDef * hostdev)6428 qemuProcessPrepareHostHostdev(virDomainHostdevDef *hostdev)
6429 {
6430 if (virHostdevIsSCSIDevice(hostdev)) {
6431 virDomainHostdevSubsysSCSI *scsisrc = &hostdev->source.subsys.u.scsi;
6432
6433 switch ((virDomainHostdevSCSIProtocolType) scsisrc->protocol) {
6434 case VIR_DOMAIN_HOSTDEV_SCSI_PROTOCOL_TYPE_NONE: {
6435 virDomainHostdevSubsysSCSIHost *scsihostsrc = &scsisrc->u.host;
6436 virStorageSource *src = scsisrc->u.host.src;
6437 g_autofree char *devstr = NULL;
6438
6439 if (!(devstr = virSCSIDeviceGetSgName(NULL,
6440 scsihostsrc->adapter,
6441 scsihostsrc->bus,
6442 scsihostsrc->target,
6443 scsihostsrc->unit)))
6444 return -1;
6445
6446 src->path = g_strdup_printf("/dev/%s", devstr);
6447 break;
6448 }
6449
6450 case VIR_DOMAIN_HOSTDEV_SCSI_PROTOCOL_TYPE_ISCSI:
6451 break;
6452
6453 case VIR_DOMAIN_HOSTDEV_SCSI_PROTOCOL_TYPE_LAST:
6454 default:
6455 virReportEnumRangeError(virDomainHostdevSCSIProtocolType, scsisrc->protocol);
6456 return -1;
6457 }
6458 }
6459
6460 return 0;
6461 }
6462
6463
6464 static int
qemuProcessPrepareHostHostdevs(virDomainObj * vm)6465 qemuProcessPrepareHostHostdevs(virDomainObj *vm)
6466 {
6467 size_t i;
6468
6469 for (i = 0; i < vm->def->nhostdevs; i++) {
6470 virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
6471
6472 if (qemuProcessPrepareHostHostdev(hostdev) < 0)
6473 return -1;
6474 }
6475
6476 return 0;
6477 }
6478
6479
6480 /**
6481 * qemuProcessRebootAllowed:
6482 * @def: domain definition
6483 *
6484 * This function encapsulates the logic which dictated whether '-no-reboot' was
6485 * used instead of '-no-shutdown' which is used QEMU versions which don't
6486 * support the 'set-action' QMP command.
6487 */
6488 bool
qemuProcessRebootAllowed(const virDomainDef * def)6489 qemuProcessRebootAllowed(const virDomainDef *def)
6490 {
6491 return def->onReboot != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY ||
6492 def->onPoweroff != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY ||
6493 (def->onCrash != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY &&
6494 def->onCrash != VIR_DOMAIN_LIFECYCLE_ACTION_COREDUMP_DESTROY);
6495 }
6496
6497
6498 static void
qemuProcessPrepareAllowReboot(virDomainObj * vm)6499 qemuProcessPrepareAllowReboot(virDomainObj *vm)
6500 {
6501 virDomainDef *def = vm->def;
6502 qemuDomainObjPrivate *priv = vm->privateData;
6503
6504 /* with 'set-action' QMP command we don't need to keep this around as
6505 * we always update qemu with the proper state */
6506 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SET_ACTION))
6507 return;
6508
6509 if (priv->allowReboot != VIR_TRISTATE_BOOL_ABSENT)
6510 return;
6511
6512 priv->allowReboot = virTristateBoolFromBool(qemuProcessRebootAllowed(def));
6513 }
6514
6515
6516 static int
qemuProcessUpdateSEVInfo(virDomainObj * vm)6517 qemuProcessUpdateSEVInfo(virDomainObj *vm)
6518 {
6519 qemuDomainObjPrivate *priv = vm->privateData;
6520 virQEMUCaps *qemuCaps = priv->qemuCaps;
6521 virDomainSEVDef *sev = &vm->def->sec->data.sev;
6522 virSEVCapability *sevCaps = NULL;
6523
6524 /* if platform specific info like 'cbitpos' and 'reducedPhysBits' have
6525 * not been supplied, we need to autofill them from caps now as both are
6526 * mandatory on QEMU cmdline
6527 */
6528 sevCaps = virQEMUCapsGetSEVCapabilities(qemuCaps);
6529 if (!sev->haveCbitpos) {
6530 sev->cbitpos = sevCaps->cbitpos;
6531 sev->haveCbitpos = true;
6532 }
6533
6534 if (!sev->haveReducedPhysBits) {
6535 sev->reduced_phys_bits = sevCaps->reduced_phys_bits;
6536 sev->haveReducedPhysBits = true;
6537 }
6538
6539 return 0;
6540 }
6541
6542
6543 /**
6544 * qemuProcessPrepareDomain:
6545 * @driver: qemu driver
6546 * @vm: domain object
6547 * @flags: qemuProcessStartFlags
6548 *
6549 * This function groups all code that modifies only live XML of a domain which
6550 * is about to start and it's the only place to do those modifications.
6551 *
6552 * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
6553 * start the domain but create a valid qemu command. If some code shouldn't be
6554 * executed in this case, make sure to check this flag.
6555 *
6556 * TODO: move all XML modification from qemuBuildCommandLine into this function
6557 */
6558 int
qemuProcessPrepareDomain(virQEMUDriver * driver,virDomainObj * vm,unsigned int flags)6559 qemuProcessPrepareDomain(virQEMUDriver *driver,
6560 virDomainObj *vm,
6561 unsigned int flags)
6562 {
6563 size_t i;
6564 qemuDomainObjPrivate *priv = vm->privateData;
6565 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
6566
6567 priv->machineName = qemuDomainGetMachineName(vm);
6568 if (!priv->machineName)
6569 return -1;
6570
6571 if (!(flags & VIR_QEMU_PROCESS_START_PRETEND)) {
6572 /* If you are using a SecurityDriver with dynamic labelling,
6573 then generate a security label for isolation */
6574 VIR_DEBUG("Generating domain security label (if required)");
6575 if (qemuSecurityGenLabel(driver->securityManager, vm->def) < 0) {
6576 virDomainAuditSecurityLabel(vm, false);
6577 return -1;
6578 }
6579 virDomainAuditSecurityLabel(vm, true);
6580
6581 if (qemuProcessPrepareDomainNUMAPlacement(vm) < 0)
6582 return -1;
6583 }
6584
6585 /* Whether we should use virtlogd as stdio handler for character
6586 * devices source backend. */
6587 if (cfg->stdioLogD &&
6588 virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CHARDEV_FILE_APPEND)) {
6589 priv->chardevStdioLogd = true;
6590 }
6591
6592 /* Track if this domain remembers original owner */
6593 priv->rememberOwner = cfg->rememberOwner;
6594
6595 qemuProcessPrepareAllowReboot(vm);
6596
6597 /*
6598 * Normally PCI addresses are assigned in the virDomainCreate
6599 * or virDomainDefine methods. We might still need to assign
6600 * some here to cope with the question of upgrades. Regardless
6601 * we also need to populate the PCI address set cache for later
6602 * use in hotplug
6603 */
6604 VIR_DEBUG("Assigning domain PCI addresses");
6605 if ((qemuDomainAssignAddresses(vm->def, priv->qemuCaps, driver, vm,
6606 !!(flags & VIR_QEMU_PROCESS_START_NEW))) < 0) {
6607 return -1;
6608 }
6609
6610 if (qemuAssignDeviceAliases(vm->def, priv->qemuCaps) < 0)
6611 return -1;
6612
6613 qemuProcessPrepareDeviceBootorder(vm->def);
6614
6615 VIR_DEBUG("Setting graphics devices");
6616 if (qemuProcessSetupGraphics(driver, vm, priv->qemuCaps, flags) < 0)
6617 return -1;
6618
6619 VIR_DEBUG("Create domain masterKey");
6620 if (qemuDomainMasterKeyCreate(vm) < 0)
6621 return -1;
6622
6623 VIR_DEBUG("Setting up storage");
6624 if (qemuProcessPrepareDomainStorage(driver, vm, priv, cfg, flags) < 0)
6625 return -1;
6626
6627 VIR_DEBUG("Setting up host devices");
6628 if (qemuProcessPrepareDomainHostdevs(vm, priv) < 0)
6629 return -1;
6630
6631 VIR_DEBUG("Prepare chardev source backends for TLS");
6632 qemuDomainPrepareChardevSource(vm->def, cfg);
6633
6634 VIR_DEBUG("Prepare device secrets");
6635 if (qemuDomainSecretPrepare(driver, vm) < 0)
6636 return -1;
6637
6638 VIR_DEBUG("Prepare bios/uefi paths");
6639 if (qemuFirmwareFillDomain(driver, vm->def, flags) < 0)
6640 return -1;
6641 if (qemuDomainInitializePflashStorageSource(vm) < 0)
6642 return -1;
6643
6644 VIR_DEBUG("Preparing external devices");
6645 if (qemuExtDevicesPrepareDomain(driver, vm) < 0)
6646 return -1;
6647
6648 if (flags & VIR_QEMU_PROCESS_START_NEW) {
6649 VIR_DEBUG("Aligning guest memory");
6650 if (qemuDomainAlignMemorySizes(vm->def) < 0)
6651 return -1;
6652 }
6653
6654 for (i = 0; i < vm->def->nchannels; i++) {
6655 if (qemuDomainPrepareChannel(vm->def->channels[i],
6656 priv->channelTargetDir) < 0)
6657 return -1;
6658 }
6659
6660 if (!(priv->monConfig = virDomainChrSourceDefNew(driver->xmlopt)))
6661 return -1;
6662
6663 VIR_DEBUG("Preparing monitor state");
6664 if (qemuProcessPrepareMonitorChr(priv->monConfig, priv->libDir) < 0)
6665 return -1;
6666
6667 priv->monError = false;
6668 priv->monStart = 0;
6669 priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
6670 priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN;
6671
6672 VIR_DEBUG("Updating guest CPU definition");
6673 if (qemuProcessUpdateGuestCPU(vm->def, priv->qemuCaps, driver->hostarch, flags) < 0)
6674 return -1;
6675
6676 for (i = 0; i < vm->def->nshmems; i++)
6677 qemuDomainPrepareShmemChardev(vm->def->shmems[i]);
6678
6679 if (vm->def->sec &&
6680 vm->def->sec->sectype == VIR_DOMAIN_LAUNCH_SECURITY_SEV) {
6681 VIR_DEBUG("Updating SEV platform info");
6682 if (qemuProcessUpdateSEVInfo(vm) < 0)
6683 return -1;
6684 }
6685
6686 return 0;
6687 }
6688
6689
6690 static int
qemuProcessSEVCreateFile(virDomainObj * vm,const char * name,const char * data)6691 qemuProcessSEVCreateFile(virDomainObj *vm,
6692 const char *name,
6693 const char *data)
6694 {
6695 qemuDomainObjPrivate *priv = vm->privateData;
6696 virQEMUDriver *driver = priv->driver;
6697 g_autofree char *configFile = NULL;
6698
6699 if (!(configFile = virFileBuildPath(priv->libDir, name, ".base64")))
6700 return -1;
6701
6702 if (virFileRewriteStr(configFile, S_IRUSR | S_IWUSR, data) < 0) {
6703 virReportSystemError(errno, _("failed to write data to config '%s'"),
6704 configFile);
6705 return -1;
6706 }
6707
6708 if (qemuSecurityDomainSetPathLabel(driver, vm, configFile, true) < 0)
6709 return -1;
6710
6711 return 0;
6712 }
6713
6714
6715 static int
qemuProcessPrepareSEVGuestInput(virDomainObj * vm)6716 qemuProcessPrepareSEVGuestInput(virDomainObj *vm)
6717 {
6718 virDomainSEVDef *sev = &vm->def->sec->data.sev;
6719
6720 VIR_DEBUG("Preparing SEV guest");
6721
6722 if (sev->dh_cert) {
6723 if (qemuProcessSEVCreateFile(vm, "dh_cert", sev->dh_cert) < 0)
6724 return -1;
6725 }
6726
6727 if (sev->session) {
6728 if (qemuProcessSEVCreateFile(vm, "session", sev->session) < 0)
6729 return -1;
6730 }
6731
6732 return 0;
6733 }
6734
6735
6736 static int
qemuProcessPrepareLaunchSecurityGuestInput(virDomainObj * vm)6737 qemuProcessPrepareLaunchSecurityGuestInput(virDomainObj *vm)
6738 {
6739 virDomainSecDef *sec = vm->def->sec;
6740
6741 if (!sec)
6742 return 0;
6743
6744 switch ((virDomainLaunchSecurity) sec->sectype) {
6745 case VIR_DOMAIN_LAUNCH_SECURITY_SEV:
6746 return qemuProcessPrepareSEVGuestInput(vm);
6747 case VIR_DOMAIN_LAUNCH_SECURITY_PV:
6748 return 0;
6749 case VIR_DOMAIN_LAUNCH_SECURITY_NONE:
6750 case VIR_DOMAIN_LAUNCH_SECURITY_LAST:
6751 virReportEnumRangeError(virDomainLaunchSecurity, sec->sectype);
6752 return -1;
6753 }
6754
6755 return 0;
6756 }
6757
6758
6759 static int
qemuProcessPrepareHostStorage(virQEMUDriver * driver,virDomainObj * vm,unsigned int flags)6760 qemuProcessPrepareHostStorage(virQEMUDriver *driver,
6761 virDomainObj *vm,
6762 unsigned int flags)
6763 {
6764 qemuDomainObjPrivate *priv = vm->privateData;
6765 size_t i;
6766 bool cold_boot = flags & VIR_QEMU_PROCESS_START_COLD;
6767 bool blockdev = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV);
6768
6769 for (i = vm->def->ndisks; i > 0; i--) {
6770 size_t idx = i - 1;
6771 virDomainDiskDef *disk = vm->def->disks[idx];
6772
6773 if (virStorageSourceIsEmpty(disk->src))
6774 continue;
6775
6776 /* backing chain needs to be redetected if we aren't using blockdev */
6777 if (!blockdev || qemuDiskBusIsSD(disk->bus))
6778 virStorageSourceBackingStoreClear(disk->src);
6779
6780 /*
6781 * Go to applying startup policy for optional disk with nonexistent
6782 * source file immediately as determining chain will surely fail
6783 * and we don't want noisy error notice in logs for this case.
6784 */
6785 if (qemuDomainDiskIsMissingLocalOptional(disk) && cold_boot)
6786 VIR_INFO("optional disk '%s' source file is missing, "
6787 "skip checking disk chain", disk->dst);
6788 else if (qemuDomainDetermineDiskChain(driver, vm, disk, NULL, true) >= 0)
6789 continue;
6790
6791 if (qemuDomainCheckDiskStartupPolicy(driver, vm, idx, cold_boot) >= 0)
6792 continue;
6793
6794 return -1;
6795 }
6796
6797 return 0;
6798 }
6799
6800
6801 int
qemuProcessOpenVhostVsock(virDomainVsockDef * vsock)6802 qemuProcessOpenVhostVsock(virDomainVsockDef *vsock)
6803 {
6804 qemuDomainVsockPrivate *priv = (qemuDomainVsockPrivate *)vsock->privateData;
6805 const char *vsock_path = "/dev/vhost-vsock";
6806 int fd;
6807
6808 if ((fd = open(vsock_path, O_RDWR)) < 0) {
6809 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6810 "%s", _("unable to open vhost-vsock device"));
6811 return -1;
6812 }
6813
6814 if (vsock->auto_cid == VIR_TRISTATE_BOOL_YES) {
6815 if (virVsockAcquireGuestCid(fd, &vsock->guest_cid) < 0)
6816 goto error;
6817 } else {
6818 if (virVsockSetGuestCid(fd, vsock->guest_cid) < 0)
6819 goto error;
6820 }
6821
6822 priv->vhostfd = fd;
6823 return 0;
6824
6825 error:
6826 VIR_FORCE_CLOSE(fd);
6827 return -1;
6828 }
6829
6830
6831 /**
6832 * qemuProcessPrepareHost:
6833 * @driver: qemu driver
6834 * @vm: domain object
6835 * @flags: qemuProcessStartFlags
6836 *
6837 * This function groups all code that modifies host system (which also may
6838 * update live XML) to prepare environment for a domain which is about to start
6839 * and it's the only place to do those modifications.
6840 *
6841 * TODO: move all host modification from qemuBuildCommandLine into this function
6842 */
6843 int
qemuProcessPrepareHost(virQEMUDriver * driver,virDomainObj * vm,unsigned int flags)6844 qemuProcessPrepareHost(virQEMUDriver *driver,
6845 virDomainObj *vm,
6846 unsigned int flags)
6847 {
6848 unsigned int hostdev_flags = 0;
6849 qemuDomainObjPrivate *priv = vm->privateData;
6850 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
6851
6852 if (qemuPrepareNVRAM(driver, vm) < 0)
6853 return -1;
6854
6855 if (vm->def->vsock) {
6856 if (qemuProcessOpenVhostVsock(vm->def->vsock) < 0)
6857 return -1;
6858 }
6859 /* network devices must be "prepared" before hostdevs, because
6860 * setting up a network device might create a new hostdev that
6861 * will need to be setup.
6862 */
6863 VIR_DEBUG("Preparing network devices");
6864 if (qemuProcessNetworkPrepareDevices(driver, vm) < 0)
6865 return -1;
6866
6867 /* Must be run before security labelling */
6868 VIR_DEBUG("Preparing host devices");
6869 if (!cfg->relaxedACS)
6870 hostdev_flags |= VIR_HOSTDEV_STRICT_ACS_CHECK;
6871 if (flags & VIR_QEMU_PROCESS_START_NEW)
6872 hostdev_flags |= VIR_HOSTDEV_COLD_BOOT;
6873 if (qemuHostdevPrepareDomainDevices(driver, vm->def, priv->qemuCaps,
6874 hostdev_flags) < 0)
6875 return -1;
6876
6877 VIR_DEBUG("Preparing chr devices");
6878 if (virDomainChrDefForeach(vm->def,
6879 true,
6880 qemuProcessPrepareChardevDevice,
6881 NULL) < 0)
6882 return -1;
6883
6884 if (qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, true) < 0)
6885 return -1;
6886
6887 /* Ensure no historical cgroup for this VM is lying around bogus
6888 * settings */
6889 VIR_DEBUG("Ensuring no historical cgroup is lying around");
6890 qemuRemoveCgroup(vm);
6891
6892 if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) {
6893 virReportSystemError(errno,
6894 _("cannot create log directory %s"),
6895 cfg->logDir);
6896 return -1;
6897 }
6898
6899 VIR_FREE(priv->pidfile);
6900 if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, vm->def->name))) {
6901 virReportSystemError(errno,
6902 "%s", _("Failed to build pidfile path."));
6903 return -1;
6904 }
6905
6906 if (unlink(priv->pidfile) < 0 &&
6907 errno != ENOENT) {
6908 virReportSystemError(errno,
6909 _("Cannot remove stale PID file %s"),
6910 priv->pidfile);
6911 return -1;
6912 }
6913
6914 /*
6915 * Create all per-domain directories in order to make sure domain
6916 * with any possible seclabels can access it.
6917 */
6918 if (qemuProcessMakeDir(driver, vm, priv->libDir) < 0 ||
6919 qemuProcessMakeDir(driver, vm, priv->channelTargetDir) < 0)
6920 return -1;
6921
6922 VIR_DEBUG("Write domain masterKey");
6923 if (qemuDomainWriteMasterKeyFile(driver, vm) < 0)
6924 return -1;
6925
6926 VIR_DEBUG("Preparing disks (host)");
6927 if (qemuProcessPrepareHostStorage(driver, vm, flags) < 0)
6928 return -1;
6929
6930 VIR_DEBUG("Preparing hostdevs (host-side)");
6931 if (qemuProcessPrepareHostHostdevs(vm) < 0)
6932 return -1;
6933
6934 VIR_DEBUG("Preparing external devices");
6935 if (qemuExtDevicesPrepareHost(driver, vm) < 0)
6936 return -1;
6937
6938 if (qemuProcessPrepareLaunchSecurityGuestInput(vm) < 0)
6939 return -1;
6940
6941 return 0;
6942 }
6943
6944
6945 /**
6946 * qemuProcessGenID:
6947 * @vm: Pointer to domain object
6948 * @flags: qemuProcessStartFlags
6949 *
6950 * If this domain is requesting to use genid, then update the GUID
6951 * value if the VIR_QEMU_PROCESS_START_GEN_VMID flag is set. This
6952 * flag is set on specific paths during domain start processing when
6953 * there is the possibility that the VM is potentially re-executing
6954 * something that has already been executed before.
6955 */
6956 static int
qemuProcessGenID(virDomainObj * vm,unsigned int flags)6957 qemuProcessGenID(virDomainObj *vm,
6958 unsigned int flags)
6959 {
6960 if (!vm->def->genidRequested)
6961 return 0;
6962
6963 /* If we are coming from a path where we must provide a new gen id
6964 * value regardless of whether it was previously generated or provided,
6965 * then generate a new GUID value before we build the command line. */
6966 if (flags & VIR_QEMU_PROCESS_START_GEN_VMID) {
6967 if (virUUIDGenerate(vm->def->genid) < 0) {
6968 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
6969 _("failed to regenerate genid"));
6970 return -1;
6971 }
6972 }
6973
6974 return 0;
6975 }
6976
6977
6978 /**
6979 * qemuProcessSetupDiskThrottlingBlockdev:
6980 *
6981 * Sets up disk trottling for -blockdev via block_set_io_throttle monitor
6982 * command. This hack should be replaced by proper use of the 'throttle'
6983 * blockdev driver in qemu once it will support changing of the throttle group.
6984 * Same hack is done in qemuDomainAttachDiskGeneric.
6985 */
6986 static int
qemuProcessSetupDiskThrottlingBlockdev(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)6987 qemuProcessSetupDiskThrottlingBlockdev(virQEMUDriver *driver,
6988 virDomainObj *vm,
6989 qemuDomainAsyncJob asyncJob)
6990 {
6991 qemuDomainObjPrivate *priv = vm->privateData;
6992 size_t i;
6993 int ret = -1;
6994
6995 if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV))
6996 return 0;
6997
6998 VIR_DEBUG("Setting up disk throttling for -blockdev via block_set_io_throttle");
6999
7000 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
7001 return -1;
7002
7003 for (i = 0; i < vm->def->ndisks; i++) {
7004 virDomainDiskDef *disk = vm->def->disks[i];
7005 qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
7006
7007 /* sd-cards are instantiated via -drive */
7008 if (qemuDiskBusIsSD(disk->bus))
7009 continue;
7010
7011 /* Setting throttling for empty drives fails */
7012 if (virStorageSourceIsEmpty(disk->src))
7013 continue;
7014
7015 if (!qemuDiskConfigBlkdeviotuneEnabled(disk))
7016 continue;
7017
7018 if (qemuMonitorSetBlockIoThrottle(qemuDomainGetMonitor(vm), NULL,
7019 diskPriv->qomName, &disk->blkdeviotune) < 0)
7020 goto cleanup;
7021 }
7022
7023 ret = 0;
7024
7025 cleanup:
7026 if (qemuDomainObjExitMonitor(driver, vm) < 0)
7027 ret = -1;
7028 return ret;
7029 }
7030
7031
7032 static int
qemuProcessEnableDomainNamespaces(virQEMUDriver * driver,virDomainObj * vm)7033 qemuProcessEnableDomainNamespaces(virQEMUDriver *driver,
7034 virDomainObj *vm)
7035 {
7036 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
7037
7038 if (virBitmapIsBitSet(cfg->namespaces, QEMU_DOMAIN_NS_MOUNT) &&
7039 qemuDomainEnableNamespace(vm, QEMU_DOMAIN_NS_MOUNT) < 0)
7040 return -1;
7041
7042 return 0;
7043 }
7044
7045
7046 static int
qemuProcessEnablePerf(virDomainObj * vm)7047 qemuProcessEnablePerf(virDomainObj *vm)
7048 {
7049 qemuDomainObjPrivate *priv = vm->privateData;
7050 size_t i;
7051
7052 if (!(priv->perf = virPerfNew()))
7053 return -1;
7054
7055 for (i = 0; i < VIR_PERF_EVENT_LAST; i++) {
7056 if (vm->def->perf.events[i] == VIR_TRISTATE_BOOL_YES &&
7057 virPerfEventEnable(priv->perf, i, vm->pid) < 0)
7058 return -1;
7059 }
7060
7061 return 0;
7062 }
7063
7064
7065 static int
qemuProcessSetupDisksTransientSnapshot(virDomainObj * vm,qemuDomainAsyncJob asyncJob)7066 qemuProcessSetupDisksTransientSnapshot(virDomainObj *vm,
7067 qemuDomainAsyncJob asyncJob)
7068 {
7069 g_autoptr(qemuSnapshotDiskContext) snapctxt = NULL;
7070 g_autoptr(GHashTable) blockNamedNodeData = NULL;
7071 size_t i;
7072
7073 if (!(blockNamedNodeData = qemuBlockGetNamedNodeData(vm, asyncJob)))
7074 return -1;
7075
7076 snapctxt = qemuSnapshotDiskContextNew(vm->def->ndisks, vm, asyncJob);
7077
7078 for (i = 0; i < vm->def->ndisks; i++) {
7079 virDomainDiskDef *domdisk = vm->def->disks[i];
7080 g_autoptr(virDomainSnapshotDiskDef) snapdisk = NULL;
7081
7082 if (!domdisk->transient ||
7083 domdisk->transientShareBacking == VIR_TRISTATE_BOOL_YES)
7084 continue;
7085
7086 /* validation code makes sure that we do this only for local disks
7087 * with a file source */
7088
7089 if (!(snapdisk = qemuSnapshotGetTransientDiskDef(domdisk, vm->def->name)))
7090 return -1;
7091
7092 if (qemuSnapshotDiskPrepareOne(snapctxt, domdisk, snapdisk,
7093 blockNamedNodeData,
7094 false,
7095 false) < 0)
7096 return -1;
7097 }
7098
7099 if (qemuSnapshotDiskCreate(snapctxt) < 0)
7100 return -1;
7101
7102 for (i = 0; i < vm->def->ndisks; i++) {
7103 virDomainDiskDef *domdisk = vm->def->disks[i];
7104
7105 if (!domdisk->transient ||
7106 domdisk->transientShareBacking == VIR_TRISTATE_BOOL_YES)
7107 continue;
7108
7109 QEMU_DOMAIN_DISK_PRIVATE(domdisk)->transientOverlayCreated = true;
7110 }
7111
7112 return 0;
7113 }
7114
7115
7116 static int
qemuProcessSetupDisksTransientHotplug(virDomainObj * vm,qemuDomainAsyncJob asyncJob)7117 qemuProcessSetupDisksTransientHotplug(virDomainObj *vm,
7118 qemuDomainAsyncJob asyncJob)
7119 {
7120 qemuDomainObjPrivate *priv = vm->privateData;
7121 bool hasHotpluggedDisk = false;
7122 size_t i;
7123
7124 for (i = 0; i < vm->def->ndisks; i++) {
7125 virDomainDiskDef *domdisk = vm->def->disks[i];
7126
7127 if (!domdisk->transient ||
7128 domdisk->transientShareBacking != VIR_TRISTATE_BOOL_YES)
7129 continue;
7130
7131 if (qemuDomainAttachDiskGeneric(priv->driver, vm, domdisk, asyncJob) < 0)
7132 return -1;
7133
7134 hasHotpluggedDisk = true;
7135 }
7136
7137 /* in order to allow booting from such disks we need to issue a system-reset
7138 * so that the firmware tables recording bootable devices are regerated */
7139 if (hasHotpluggedDisk) {
7140 int rc;
7141
7142 if (qemuDomainObjEnterMonitorAsync(priv->driver, vm, asyncJob) < 0)
7143 return -1;
7144
7145 rc = qemuMonitorSystemReset(priv->mon);
7146
7147 if (qemuDomainObjExitMonitor(priv->driver, vm) < 0 || rc < 0)
7148 return -1;
7149 }
7150
7151 return 0;
7152 }
7153
7154
7155 static int
qemuProcessSetupDisksTransient(virDomainObj * vm,qemuDomainAsyncJob asyncJob)7156 qemuProcessSetupDisksTransient(virDomainObj *vm,
7157 qemuDomainAsyncJob asyncJob)
7158 {
7159 qemuDomainObjPrivate *priv = vm->privateData;
7160
7161 if (!(virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV)))
7162 return 0;
7163
7164 if (qemuProcessSetupDisksTransientSnapshot(vm, asyncJob) < 0)
7165 return -1;
7166
7167 if (qemuProcessSetupDisksTransientHotplug(vm, asyncJob) < 0)
7168 return -1;
7169
7170 return 0;
7171 }
7172
7173
7174 static int
qemuProcessSetupLifecycleActions(virDomainObj * vm,qemuDomainAsyncJob asyncJob)7175 qemuProcessSetupLifecycleActions(virDomainObj *vm,
7176 qemuDomainAsyncJob asyncJob)
7177 {
7178 qemuDomainObjPrivate *priv = vm->privateData;
7179 int rc;
7180
7181 if (!(virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SET_ACTION)))
7182 return 0;
7183
7184 /* for now we handle only onReboot->destroy here as an alternative to
7185 * '-no-reboot' on the commandline */
7186 if (vm->def->onReboot != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY)
7187 return 0;
7188
7189 if (qemuDomainObjEnterMonitorAsync(priv->driver, vm, asyncJob) < 0)
7190 return -1;
7191
7192 rc = qemuMonitorSetAction(priv->mon,
7193 QEMU_MONITOR_ACTION_SHUTDOWN_KEEP,
7194 QEMU_MONITOR_ACTION_REBOOT_SHUTDOWN,
7195 QEMU_MONITOR_ACTION_WATCHDOG_KEEP,
7196 QEMU_MONITOR_ACTION_PANIC_KEEP);
7197
7198 if (qemuDomainObjExitMonitor(priv->driver, vm) < 0 || rc < 0)
7199 return -1;
7200
7201 return 0;
7202 }
7203
7204
7205 /**
7206 * qemuProcessLaunch:
7207 *
7208 * Launch a new QEMU process with stopped virtual CPUs.
7209 *
7210 * The caller is supposed to call qemuProcessStop with appropriate
7211 * flags in case of failure.
7212 *
7213 * Returns 0 on success,
7214 * -1 on error which happened before devices were labeled and thus
7215 * there is no need to restore them,
7216 * -2 on error requesting security labels to be restored.
7217 */
7218 int
qemuProcessLaunch(virConnectPtr conn,virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob,qemuProcessIncomingDef * incoming,virDomainMomentObj * snapshot,virNetDevVPortProfileOp vmop,unsigned int flags)7219 qemuProcessLaunch(virConnectPtr conn,
7220 virQEMUDriver *driver,
7221 virDomainObj *vm,
7222 qemuDomainAsyncJob asyncJob,
7223 qemuProcessIncomingDef *incoming,
7224 virDomainMomentObj *snapshot,
7225 virNetDevVPortProfileOp vmop,
7226 unsigned int flags)
7227 {
7228 int ret = -1;
7229 int rv;
7230 int logfile = -1;
7231 g_autoptr(qemuDomainLogContext) logCtxt = NULL;
7232 qemuDomainObjPrivate *priv = vm->privateData;
7233 g_autoptr(virCommand) cmd = NULL;
7234 struct qemuProcessHookData hookData;
7235 g_autoptr(virQEMUDriverConfig) cfg = NULL;
7236 size_t nnicindexes = 0;
7237 g_autofree int *nicindexes = NULL;
7238 unsigned long long maxMemLock = 0;
7239
7240 VIR_DEBUG("conn=%p driver=%p vm=%p name=%s if=%d asyncJob=%d "
7241 "incoming.launchURI=%s incoming.deferredURI=%s "
7242 "incoming.fd=%d incoming.path=%s "
7243 "snapshot=%p vmop=%d flags=0x%x",
7244 conn, driver, vm, vm->def->name, vm->def->id, asyncJob,
7245 NULLSTR(incoming ? incoming->launchURI : NULL),
7246 NULLSTR(incoming ? incoming->deferredURI : NULL),
7247 incoming ? incoming->fd : -1,
7248 NULLSTR(incoming ? incoming->path : NULL),
7249 snapshot, vmop, flags);
7250
7251 /* Okay, these are just internal flags,
7252 * but doesn't hurt to check */
7253 virCheckFlags(VIR_QEMU_PROCESS_START_COLD |
7254 VIR_QEMU_PROCESS_START_PAUSED |
7255 VIR_QEMU_PROCESS_START_AUTODESTROY |
7256 VIR_QEMU_PROCESS_START_NEW |
7257 VIR_QEMU_PROCESS_START_GEN_VMID, -1);
7258
7259 cfg = virQEMUDriverGetConfig(driver);
7260
7261 if (flags & VIR_QEMU_PROCESS_START_AUTODESTROY) {
7262 if (!conn) {
7263 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
7264 _("Domain autodestroy requires a connection handle"));
7265 return -1;
7266 }
7267 if (driver->embeddedRoot) {
7268 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
7269 _("Domain autodestroy not supported for embedded drivers yet"));
7270 return -1;
7271 }
7272 }
7273
7274 hookData.vm = vm;
7275 hookData.driver = driver;
7276 /* We don't increase cfg's reference counter here. */
7277 hookData.cfg = cfg;
7278
7279 VIR_DEBUG("Creating domain log file");
7280 if (!(logCtxt = qemuDomainLogContextNew(driver, vm,
7281 QEMU_DOMAIN_LOG_CONTEXT_MODE_START))) {
7282 virLastErrorPrefixMessage("%s", _("can't connect to virtlogd"));
7283 goto cleanup;
7284 }
7285 logfile = qemuDomainLogContextGetWriteFD(logCtxt);
7286
7287 if (qemuProcessGenID(vm, flags) < 0)
7288 goto cleanup;
7289
7290 if (qemuExtDevicesStart(driver, vm, incoming != NULL) < 0)
7291 goto cleanup;
7292
7293 VIR_DEBUG("Building emulator command line");
7294 if (!(cmd = qemuBuildCommandLine(driver,
7295 qemuDomainLogContextGetManager(logCtxt),
7296 driver->securityManager,
7297 vm,
7298 incoming ? incoming->launchURI : NULL,
7299 snapshot, vmop,
7300 false,
7301 qemuCheckFips(vm),
7302 &nnicindexes, &nicindexes, 0)))
7303 goto cleanup;
7304
7305 if (incoming && incoming->fd != -1)
7306 virCommandPassFD(cmd, incoming->fd, 0);
7307
7308 /* now that we know it is about to start call the hook if present */
7309 if (qemuProcessStartHook(driver, vm,
7310 VIR_HOOK_QEMU_OP_START,
7311 VIR_HOOK_SUBOP_BEGIN) < 0)
7312 goto cleanup;
7313
7314 qemuLogOperation(vm, "starting up", cmd, logCtxt);
7315
7316 qemuDomainObjCheckTaint(driver, vm, logCtxt, incoming != NULL);
7317
7318 qemuDomainLogContextMarkPosition(logCtxt);
7319
7320 VIR_DEBUG("Building mount namespace");
7321
7322 if (qemuProcessEnableDomainNamespaces(driver, vm) < 0)
7323 goto cleanup;
7324
7325 VIR_DEBUG("Setting up raw IO");
7326 if (qemuProcessSetupRawIO(driver, vm, cmd) < 0)
7327 goto cleanup;
7328
7329 virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData);
7330 virCommandSetUmask(cmd, 0x002);
7331
7332 VIR_DEBUG("Setting up process limits");
7333
7334 /* In some situations, eg. VFIO passthrough, QEMU might need to lock a
7335 * significant amount of memory, so we need to set the limit accordingly */
7336 maxMemLock = qemuDomainGetMemLockLimitBytes(vm->def, false);
7337
7338 /* For all these settings, zero indicates that the limit should
7339 * not be set explicitly and the default/inherited limit should
7340 * be applied instead */
7341 if (maxMemLock > 0)
7342 virCommandSetMaxMemLock(cmd, maxMemLock);
7343 if (cfg->maxProcesses > 0)
7344 virCommandSetMaxProcesses(cmd, cfg->maxProcesses);
7345 if (cfg->maxFiles > 0)
7346 virCommandSetMaxFiles(cmd, cfg->maxFiles);
7347
7348 /* In this case, however, zero means that core dumps should be
7349 * disabled, and so we always need to set the limit explicitly */
7350 virCommandSetMaxCoreSize(cmd, cfg->maxCore);
7351
7352 VIR_DEBUG("Setting up security labelling");
7353 if (qemuSecuritySetChildProcessLabel(driver->securityManager,
7354 vm->def, cmd) < 0)
7355 goto cleanup;
7356
7357 virCommandSetOutputFD(cmd, &logfile);
7358 virCommandSetErrorFD(cmd, &logfile);
7359 virCommandNonblockingFDs(cmd);
7360 virCommandSetPidFile(cmd, priv->pidfile);
7361 virCommandDaemonize(cmd);
7362 virCommandRequireHandshake(cmd);
7363
7364 if (qemuSecurityPreFork(driver->securityManager) < 0)
7365 goto cleanup;
7366 rv = virCommandRun(cmd, NULL);
7367 qemuSecurityPostFork(driver->securityManager);
7368
7369 /* wait for qemu process to show up */
7370 if (rv == 0) {
7371 if ((rv = virPidFileReadPath(priv->pidfile, &vm->pid)) < 0) {
7372 virReportSystemError(-rv,
7373 _("Domain %s didn't show up"),
7374 vm->def->name);
7375 goto cleanup;
7376 }
7377 VIR_DEBUG("QEMU vm=%p name=%s running with pid=%lld",
7378 vm, vm->def->name, (long long)vm->pid);
7379 } else {
7380 VIR_DEBUG("QEMU vm=%p name=%s failed to spawn",
7381 vm, vm->def->name);
7382 goto cleanup;
7383 }
7384
7385 VIR_DEBUG("Writing early domain status to disk");
7386 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
7387 goto cleanup;
7388
7389 VIR_DEBUG("Waiting for handshake from child");
7390 if (virCommandHandshakeWait(cmd) < 0) {
7391 /* Read errors from child that occurred between fork and exec. */
7392 qemuProcessReportLogError(logCtxt,
7393 _("Process exited prior to exec"));
7394 goto cleanup;
7395 }
7396
7397 VIR_DEBUG("Building domain mount namespace (if required)");
7398 if (qemuDomainBuildNamespace(cfg, vm) < 0)
7399 goto cleanup;
7400
7401 VIR_DEBUG("Setting up domain cgroup (if required)");
7402 if (qemuSetupCgroup(vm, nnicindexes, nicindexes) < 0)
7403 goto cleanup;
7404
7405 VIR_DEBUG("Setting up domain perf (if required)");
7406 if (qemuProcessEnablePerf(vm) < 0)
7407 goto cleanup;
7408
7409 /* This must be done after cgroup placement to avoid resetting CPU
7410 * affinity */
7411 if (qemuProcessInitCpuAffinity(vm) < 0)
7412 goto cleanup;
7413
7414 VIR_DEBUG("Setting emulator tuning/settings");
7415 if (qemuProcessSetupEmulator(vm) < 0)
7416 goto cleanup;
7417
7418 VIR_DEBUG("Setting cgroup for external devices (if required)");
7419 if (qemuSetupCgroupForExtDevices(vm, driver) < 0)
7420 goto cleanup;
7421
7422 VIR_DEBUG("Setting up resctrl");
7423 if (qemuProcessResctrlCreate(driver, vm) < 0)
7424 goto cleanup;
7425
7426 VIR_DEBUG("Setting up managed PR daemon");
7427 if (virDomainDefHasManagedPR(vm->def) &&
7428 qemuProcessStartManagedPRDaemon(vm) < 0)
7429 goto cleanup;
7430
7431 VIR_DEBUG("Setting domain security labels");
7432 if (qemuSecuritySetAllLabel(driver,
7433 vm,
7434 incoming ? incoming->path : NULL,
7435 incoming != NULL) < 0)
7436 goto cleanup;
7437
7438 /* Security manager labeled all devices, therefore
7439 * if any operation from now on fails, we need to ask the caller to
7440 * restore labels.
7441 */
7442 ret = -2;
7443
7444 if (incoming && incoming->fd != -1) {
7445 /* if there's an fd to migrate from, and it's a pipe, put the
7446 * proper security label on it
7447 */
7448 struct stat stdin_sb;
7449
7450 VIR_DEBUG("setting security label on pipe used for migration");
7451
7452 if (fstat(incoming->fd, &stdin_sb) < 0) {
7453 virReportSystemError(errno,
7454 _("cannot stat fd %d"), incoming->fd);
7455 goto cleanup;
7456 }
7457 if (S_ISFIFO(stdin_sb.st_mode) &&
7458 qemuSecuritySetImageFDLabel(driver->securityManager,
7459 vm->def, incoming->fd) < 0)
7460 goto cleanup;
7461 }
7462
7463 VIR_DEBUG("Labelling done, completing handshake to child");
7464 if (virCommandHandshakeNotify(cmd) < 0)
7465 goto cleanup;
7466 VIR_DEBUG("Handshake complete, child running");
7467
7468 if (qemuDomainObjStartWorker(vm) < 0)
7469 goto cleanup;
7470
7471 VIR_DEBUG("Waiting for monitor to show up");
7472 if (qemuProcessWaitForMonitor(driver, vm, asyncJob, logCtxt) < 0)
7473 goto cleanup;
7474
7475 if (qemuConnectAgent(driver, vm) < 0)
7476 goto cleanup;
7477
7478 VIR_DEBUG("Verifying and updating provided guest CPU");
7479 if (qemuProcessUpdateAndVerifyCPU(driver, vm, asyncJob) < 0)
7480 goto cleanup;
7481
7482 VIR_DEBUG("setting up hotpluggable cpus");
7483 if (qemuDomainHasHotpluggableStartupVcpus(vm->def)) {
7484 if (qemuDomainRefreshVcpuInfo(driver, vm, asyncJob, false) < 0)
7485 goto cleanup;
7486
7487 if (qemuProcessValidateHotpluggableVcpus(vm->def) < 0)
7488 goto cleanup;
7489
7490 if (qemuProcessSetupHotpluggableVcpus(driver, vm, asyncJob) < 0)
7491 goto cleanup;
7492 }
7493
7494 VIR_DEBUG("Refreshing VCPU info");
7495 if (qemuDomainRefreshVcpuInfo(driver, vm, asyncJob, false) < 0)
7496 goto cleanup;
7497
7498 if (qemuDomainValidateVcpuInfo(vm) < 0)
7499 goto cleanup;
7500
7501 qemuDomainVcpuPersistOrder(vm->def);
7502
7503 VIR_DEBUG("Detecting IOThread PIDs");
7504 if (qemuProcessDetectIOThreadPIDs(driver, vm, asyncJob) < 0)
7505 goto cleanup;
7506
7507 VIR_DEBUG("Setting global CPU cgroup (if required)");
7508 if (qemuSetupGlobalCpuCgroup(vm) < 0)
7509 goto cleanup;
7510
7511 VIR_DEBUG("Setting vCPU tuning/settings");
7512 if (qemuProcessSetupVcpus(vm) < 0)
7513 goto cleanup;
7514
7515 VIR_DEBUG("Setting IOThread tuning/settings");
7516 if (qemuProcessSetupIOThreads(vm) < 0)
7517 goto cleanup;
7518
7519 VIR_DEBUG("Setting emulator scheduler");
7520 if (vm->def->cputune.emulatorsched &&
7521 virProcessSetScheduler(vm->pid,
7522 vm->def->cputune.emulatorsched->policy,
7523 vm->def->cputune.emulatorsched->priority) < 0)
7524 goto cleanup;
7525
7526 VIR_DEBUG("Setting any required VM passwords");
7527 if (qemuProcessInitPasswords(driver, vm, asyncJob) < 0)
7528 goto cleanup;
7529
7530 /* set default link states */
7531 /* qemu doesn't support setting this on the command line, so
7532 * enter the monitor */
7533 VIR_DEBUG("Setting network link states");
7534 if (qemuProcessSetLinkStates(driver, vm, asyncJob) < 0)
7535 goto cleanup;
7536
7537 VIR_DEBUG("Setting initial memory amount");
7538 if (qemuProcessSetupBalloon(driver, vm, asyncJob) < 0)
7539 goto cleanup;
7540
7541 if (qemuProcessSetupDiskThrottlingBlockdev(driver, vm, asyncJob) < 0)
7542 goto cleanup;
7543
7544 /* Since CPUs were not started yet, the balloon could not return the memory
7545 * to the host and thus cur_balloon needs to be updated so that GetXMLdesc
7546 * and friends return the correct size in case they can't grab the job */
7547 if (!incoming && !snapshot &&
7548 qemuProcessRefreshBalloonState(driver, vm, asyncJob) < 0)
7549 goto cleanup;
7550
7551 if (flags & VIR_QEMU_PROCESS_START_AUTODESTROY &&
7552 qemuProcessAutoDestroyAdd(driver, vm, conn) < 0)
7553 goto cleanup;
7554
7555 if (!incoming && !snapshot) {
7556 VIR_DEBUG("Setting up transient disk");
7557 if (qemuProcessSetupDisksTransient(vm, asyncJob) < 0)
7558 goto cleanup;
7559 }
7560
7561 VIR_DEBUG("Setting handling of lifecycle actions");
7562 if (qemuProcessSetupLifecycleActions(vm, asyncJob) < 0)
7563 goto cleanup;
7564
7565 ret = 0;
7566
7567 cleanup:
7568 qemuDomainSecretDestroy(vm);
7569 return ret;
7570 }
7571
7572
7573 /**
7574 * qemuProcessRefreshState:
7575 * @driver: qemu driver data
7576 * @vm: domain to refresh
7577 * @asyncJob: async job type
7578 *
7579 * This function gathers calls to refresh qemu state after startup. This
7580 * function is called after a deferred migration finishes so that we can update
7581 * state influenced by the migration stream.
7582 */
7583 int
qemuProcessRefreshState(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)7584 qemuProcessRefreshState(virQEMUDriver *driver,
7585 virDomainObj *vm,
7586 qemuDomainAsyncJob asyncJob)
7587 {
7588 qemuDomainObjPrivate *priv = vm->privateData;
7589
7590 VIR_DEBUG("Fetching list of active devices");
7591 if (qemuDomainUpdateDeviceList(driver, vm, asyncJob) < 0)
7592 return -1;
7593
7594 VIR_DEBUG("Updating info of memory devices");
7595 if (qemuDomainUpdateMemoryDeviceInfo(driver, vm, asyncJob) < 0)
7596 return -1;
7597
7598 VIR_DEBUG("Detecting actual memory size for video device");
7599 if (qemuProcessUpdateVideoRamSize(driver, vm, asyncJob) < 0)
7600 return -1;
7601
7602 VIR_DEBUG("Updating disk data");
7603 if (qemuProcessRefreshDisks(driver, vm, asyncJob) < 0)
7604 return -1;
7605 if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV) &&
7606 qemuBlockNodeNamesDetect(driver, vm, asyncJob) < 0)
7607 return -1;
7608
7609 return 0;
7610 }
7611
7612
7613 /**
7614 * qemuProcessFinishStartup:
7615 *
7616 * Finish starting a new domain.
7617 */
7618 int
qemuProcessFinishStartup(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob,bool startCPUs,virDomainPausedReason pausedReason)7619 qemuProcessFinishStartup(virQEMUDriver *driver,
7620 virDomainObj *vm,
7621 qemuDomainAsyncJob asyncJob,
7622 bool startCPUs,
7623 virDomainPausedReason pausedReason)
7624 {
7625 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
7626
7627 if (startCPUs) {
7628 VIR_DEBUG("Starting domain CPUs");
7629 if (qemuProcessStartCPUs(driver, vm,
7630 VIR_DOMAIN_RUNNING_BOOTED,
7631 asyncJob) < 0) {
7632 if (virGetLastErrorCode() == VIR_ERR_OK)
7633 virReportError(VIR_ERR_OPERATION_FAILED, "%s",
7634 _("resume operation failed"));
7635 return -1;
7636 }
7637 } else {
7638 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, pausedReason);
7639 }
7640
7641 VIR_DEBUG("Writing domain status to disk");
7642 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
7643 return -1;
7644
7645 if (qemuProcessStartHook(driver, vm,
7646 VIR_HOOK_QEMU_OP_STARTED,
7647 VIR_HOOK_SUBOP_BEGIN) < 0)
7648 return -1;
7649
7650 return 0;
7651 }
7652
7653
7654 int
qemuProcessStart(virConnectPtr conn,virQEMUDriver * driver,virDomainObj * vm,virCPUDef * updatedCPU,qemuDomainAsyncJob asyncJob,const char * migrateFrom,int migrateFd,const char * migratePath,virDomainMomentObj * snapshot,virNetDevVPortProfileOp vmop,unsigned int flags)7655 qemuProcessStart(virConnectPtr conn,
7656 virQEMUDriver *driver,
7657 virDomainObj *vm,
7658 virCPUDef *updatedCPU,
7659 qemuDomainAsyncJob asyncJob,
7660 const char *migrateFrom,
7661 int migrateFd,
7662 const char *migratePath,
7663 virDomainMomentObj *snapshot,
7664 virNetDevVPortProfileOp vmop,
7665 unsigned int flags)
7666 {
7667 qemuDomainObjPrivate *priv = vm->privateData;
7668 qemuProcessIncomingDef *incoming = NULL;
7669 unsigned int stopFlags;
7670 bool relabel = false;
7671 bool relabelSavedState = false;
7672 int ret = -1;
7673 int rv;
7674
7675 VIR_DEBUG("conn=%p driver=%p vm=%p name=%s id=%d asyncJob=%s "
7676 "migrateFrom=%s migrateFd=%d migratePath=%s "
7677 "snapshot=%p vmop=%d flags=0x%x",
7678 conn, driver, vm, vm->def->name, vm->def->id,
7679 qemuDomainAsyncJobTypeToString(asyncJob),
7680 NULLSTR(migrateFrom), migrateFd, NULLSTR(migratePath),
7681 snapshot, vmop, flags);
7682
7683 virCheckFlagsGoto(VIR_QEMU_PROCESS_START_COLD |
7684 VIR_QEMU_PROCESS_START_PAUSED |
7685 VIR_QEMU_PROCESS_START_AUTODESTROY |
7686 VIR_QEMU_PROCESS_START_GEN_VMID, cleanup);
7687
7688 if (!migrateFrom && !snapshot)
7689 flags |= VIR_QEMU_PROCESS_START_NEW;
7690
7691 if (qemuProcessInit(driver, vm, updatedCPU,
7692 asyncJob, !!migrateFrom, flags) < 0)
7693 goto cleanup;
7694
7695 if (migrateFrom) {
7696 incoming = qemuProcessIncomingDefNew(priv->qemuCaps, NULL, migrateFrom,
7697 migrateFd, migratePath);
7698 if (!incoming)
7699 goto stop;
7700 }
7701
7702 if (qemuProcessPrepareDomain(driver, vm, flags) < 0)
7703 goto stop;
7704
7705 if (qemuProcessPrepareHost(driver, vm, flags) < 0)
7706 goto stop;
7707
7708 if (migratePath) {
7709 if (qemuSecuritySetSavedStateLabel(driver->securityManager,
7710 vm->def, migratePath) < 0)
7711 goto cleanup;
7712 relabelSavedState = true;
7713 }
7714
7715 if ((rv = qemuProcessLaunch(conn, driver, vm, asyncJob, incoming,
7716 snapshot, vmop, flags)) < 0) {
7717 if (rv == -2)
7718 relabel = true;
7719 goto stop;
7720 }
7721 relabel = true;
7722
7723 if (incoming) {
7724 if (incoming->deferredURI &&
7725 qemuMigrationDstRun(driver, vm, incoming->deferredURI, asyncJob) < 0)
7726 goto stop;
7727 } else {
7728 /* Refresh state of devices from QEMU. During migration this happens
7729 * in qemuMigrationDstFinish to ensure that state information is fully
7730 * transferred. */
7731 if (qemuProcessRefreshState(driver, vm, asyncJob) < 0)
7732 goto stop;
7733 }
7734
7735 if (qemuProcessFinishStartup(driver, vm, asyncJob,
7736 !(flags & VIR_QEMU_PROCESS_START_PAUSED),
7737 incoming ?
7738 VIR_DOMAIN_PAUSED_MIGRATION :
7739 VIR_DOMAIN_PAUSED_USER) < 0)
7740 goto stop;
7741
7742 if (!incoming) {
7743 /* Keep watching qemu log for errors during incoming migration, otherwise
7744 * unset reporting errors from qemu log. */
7745 qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
7746 }
7747
7748 ret = 0;
7749
7750 cleanup:
7751 if (relabelSavedState &&
7752 qemuSecurityRestoreSavedStateLabel(driver->securityManager,
7753 vm->def, migratePath) < 0)
7754 VIR_WARN("failed to restore save state label on %s", migratePath);
7755 qemuProcessIncomingDefFree(incoming);
7756 return ret;
7757
7758 stop:
7759 stopFlags = 0;
7760 if (!relabel)
7761 stopFlags |= VIR_QEMU_PROCESS_STOP_NO_RELABEL;
7762 if (migrateFrom)
7763 stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
7764 if (priv->mon)
7765 qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
7766 qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
7767 goto cleanup;
7768 }
7769
7770
7771 int
qemuProcessCreatePretendCmdPrepare(virQEMUDriver * driver,virDomainObj * vm,const char * migrateURI,bool standalone,unsigned int flags)7772 qemuProcessCreatePretendCmdPrepare(virQEMUDriver *driver,
7773 virDomainObj *vm,
7774 const char *migrateURI,
7775 bool standalone,
7776 unsigned int flags)
7777 {
7778 virCheckFlags(VIR_QEMU_PROCESS_START_COLD |
7779 VIR_QEMU_PROCESS_START_PAUSED |
7780 VIR_QEMU_PROCESS_START_AUTODESTROY, -1);
7781
7782 flags |= VIR_QEMU_PROCESS_START_PRETEND;
7783
7784 if (!migrateURI)
7785 flags |= VIR_QEMU_PROCESS_START_NEW;
7786
7787 if (standalone)
7788 flags |= VIR_QEMU_PROCESS_START_STANDALONE;
7789
7790 if (qemuProcessInit(driver, vm, NULL, QEMU_ASYNC_JOB_NONE,
7791 !!migrateURI, flags) < 0)
7792 return -1;
7793
7794 if (qemuProcessPrepareDomain(driver, vm, flags) < 0)
7795 return -1;
7796
7797 return 0;
7798 }
7799
7800
7801 virCommand *
qemuProcessCreatePretendCmdBuild(virQEMUDriver * driver,virDomainObj * vm,const char * migrateURI,bool enableFips,bool standalone)7802 qemuProcessCreatePretendCmdBuild(virQEMUDriver *driver,
7803 virDomainObj *vm,
7804 const char *migrateURI,
7805 bool enableFips,
7806 bool standalone)
7807 {
7808 VIR_DEBUG("Building emulator command line");
7809 return qemuBuildCommandLine(driver,
7810 NULL,
7811 driver->securityManager,
7812 vm,
7813 migrateURI,
7814 NULL,
7815 VIR_NETDEV_VPORT_PROFILE_OP_NO_OP,
7816 standalone,
7817 enableFips,
7818 NULL,
7819 NULL,
7820 0);
7821 }
7822
7823
7824 int
qemuProcessKill(virDomainObj * vm,unsigned int flags)7825 qemuProcessKill(virDomainObj *vm, unsigned int flags)
7826 {
7827 VIR_DEBUG("vm=%p name=%s pid=%lld flags=0x%x",
7828 vm, vm->def->name,
7829 (long long)vm->pid, flags);
7830
7831 if (!(flags & VIR_QEMU_PROCESS_KILL_NOCHECK)) {
7832 if (!virDomainObjIsActive(vm)) {
7833 VIR_DEBUG("VM '%s' not active", vm->def->name);
7834 return 0;
7835 }
7836 }
7837
7838 if (flags & VIR_QEMU_PROCESS_KILL_NOWAIT) {
7839 virProcessKill(vm->pid,
7840 (flags & VIR_QEMU_PROCESS_KILL_FORCE) ?
7841 SIGKILL : SIGTERM);
7842 return 0;
7843 }
7844
7845 /* Request an extra delay of two seconds per current nhostdevs
7846 * to be safe against stalls by the kernel freeing up the resources */
7847 return virProcessKillPainfullyDelay(vm->pid,
7848 !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
7849 vm->def->nhostdevs * 2,
7850 false);
7851 }
7852
7853
7854 /**
7855 * qemuProcessBeginStopJob:
7856 *
7857 * Stop all current jobs by killing the domain and start a new one for
7858 * qemuProcessStop.
7859 */
7860 int
qemuProcessBeginStopJob(virQEMUDriver * driver,virDomainObj * vm,qemuDomainJob job,bool forceKill)7861 qemuProcessBeginStopJob(virQEMUDriver *driver,
7862 virDomainObj *vm,
7863 qemuDomainJob job,
7864 bool forceKill)
7865 {
7866 qemuDomainObjPrivate *priv = vm->privateData;
7867 unsigned int killFlags = forceKill ? VIR_QEMU_PROCESS_KILL_FORCE : 0;
7868 int ret = -1;
7869
7870 /* We need to prevent monitor EOF callback from doing our work (and
7871 * sending misleading events) while the vm is unlocked inside
7872 * BeginJob/ProcessKill API
7873 */
7874 priv->beingDestroyed = true;
7875
7876 if (qemuProcessKill(vm, killFlags) < 0)
7877 goto cleanup;
7878
7879 /* Wake up anything waiting on domain condition */
7880 virDomainObjBroadcast(vm);
7881
7882 if (qemuDomainObjBeginJob(driver, vm, job) < 0)
7883 goto cleanup;
7884
7885 ret = 0;
7886
7887 cleanup:
7888 priv->beingDestroyed = false;
7889 return ret;
7890 }
7891
7892
qemuProcessStop(virQEMUDriver * driver,virDomainObj * vm,virDomainShutoffReason reason,qemuDomainAsyncJob asyncJob,unsigned int flags)7893 void qemuProcessStop(virQEMUDriver *driver,
7894 virDomainObj *vm,
7895 virDomainShutoffReason reason,
7896 qemuDomainAsyncJob asyncJob,
7897 unsigned int flags)
7898 {
7899 int ret;
7900 int retries = 0;
7901 qemuDomainObjPrivate *priv = vm->privateData;
7902 virErrorPtr orig_err;
7903 virDomainDef *def = vm->def;
7904 const virNetDevVPortProfile *vport = NULL;
7905 size_t i;
7906 g_autofree char *timestamp = NULL;
7907 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
7908 g_autoptr(virConnect) conn = NULL;
7909
7910 VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%lld, "
7911 "reason=%s, asyncJob=%s, flags=0x%x",
7912 vm, vm->def->name, vm->def->id,
7913 (long long)vm->pid,
7914 virDomainShutoffReasonTypeToString(reason),
7915 qemuDomainAsyncJobTypeToString(asyncJob),
7916 flags);
7917
7918 /* This method is routinely used in clean up paths. Disable error
7919 * reporting so we don't squash a legit error. */
7920 virErrorPreserveLast(&orig_err);
7921
7922 if (asyncJob != QEMU_ASYNC_JOB_NONE) {
7923 if (qemuDomainObjBeginNestedJob(driver, vm, asyncJob) < 0)
7924 goto cleanup;
7925 } else if (priv->job.asyncJob != QEMU_ASYNC_JOB_NONE &&
7926 priv->job.asyncOwner == virThreadSelfID() &&
7927 priv->job.active != QEMU_JOB_ASYNC_NESTED) {
7928 VIR_WARN("qemuProcessStop called without a nested job (async=%s)",
7929 qemuDomainAsyncJobTypeToString(asyncJob));
7930 }
7931
7932 if (!virDomainObjIsActive(vm)) {
7933 VIR_DEBUG("VM '%s' not active", vm->def->name);
7934 goto endjob;
7935 }
7936
7937 qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, false);
7938
7939 if (!!g_atomic_int_dec_and_test(&driver->nactive) && driver->inhibitCallback)
7940 driver->inhibitCallback(false, driver->inhibitOpaque);
7941
7942 if ((timestamp = virTimeStringNow()) != NULL) {
7943 qemuDomainLogAppendMessage(driver, vm, "%s: shutting down, reason=%s\n",
7944 timestamp,
7945 virDomainShutoffReasonTypeToString(reason));
7946 }
7947
7948 /* Clear network bandwidth */
7949 virDomainClearNetBandwidth(vm->def);
7950
7951 virDomainConfVMNWFilterTeardown(vm);
7952
7953 if (cfg->macFilter) {
7954 for (i = 0; i < def->nnets; i++) {
7955 virDomainNetDef *net = def->nets[i];
7956 if (net->ifname == NULL)
7957 continue;
7958 ignore_value(ebtablesRemoveForwardAllowIn(driver->ebtables,
7959 net->ifname,
7960 &net->mac));
7961 }
7962 }
7963
7964 virPortAllocatorRelease(priv->nbdPort);
7965 priv->nbdPort = 0;
7966
7967 if (priv->agent) {
7968 qemuAgentClose(priv->agent);
7969 priv->agent = NULL;
7970 }
7971 priv->agentError = false;
7972
7973 if (priv->mon) {
7974 qemuMonitorClose(priv->mon);
7975 priv->mon = NULL;
7976 }
7977
7978 if (priv->monConfig) {
7979 if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
7980 unlink(priv->monConfig->data.nix.path);
7981 virObjectUnref(priv->monConfig);
7982 priv->monConfig = NULL;
7983 }
7984
7985 qemuDomainObjStopWorker(vm);
7986
7987 /* Remove the master key */
7988 qemuDomainMasterKeyRemove(priv);
7989
7990 /* Do this before we delete the tree and remove pidfile. */
7991 qemuProcessKillManagedPRDaemon(vm);
7992
7993 ignore_value(virDomainChrDefForeach(vm->def,
7994 false,
7995 qemuProcessCleanupChardevDevice,
7996 NULL));
7997
7998
7999 /* shut it off for sure */
8000 ignore_value(qemuProcessKill(vm,
8001 VIR_QEMU_PROCESS_KILL_FORCE|
8002 VIR_QEMU_PROCESS_KILL_NOCHECK));
8003
8004 qemuDomainCleanupRun(driver, vm);
8005
8006 qemuExtDevicesStop(driver, vm);
8007
8008 qemuDBusStop(driver, vm);
8009
8010 vm->def->id = -1;
8011
8012 /* Wake up anything waiting on domain condition */
8013 virDomainObjBroadcast(vm);
8014
8015 virFileDeleteTree(priv->libDir);
8016 virFileDeleteTree(priv->channelTargetDir);
8017
8018 /* Stop autodestroy in case guest is restarted */
8019 qemuProcessAutoDestroyRemove(driver, vm);
8020
8021 /* now that we know it's stopped call the hook if present */
8022 if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
8023 g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
8024
8025 /* we can't stop the operation even if the script raised an error */
8026 ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
8027 VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END,
8028 NULL, xml, NULL));
8029 }
8030
8031 /* Reset Security Labels unless caller don't want us to */
8032 if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL))
8033 qemuSecurityRestoreAllLabel(driver, vm,
8034 !!(flags & VIR_QEMU_PROCESS_STOP_MIGRATED));
8035
8036 for (i = 0; i < vm->def->ndisks; i++) {
8037 virDomainDeviceDef dev;
8038 virDomainDiskDef *disk = vm->def->disks[i];
8039
8040 dev.type = VIR_DOMAIN_DEVICE_DISK;
8041 dev.data.disk = disk;
8042 ignore_value(qemuRemoveSharedDevice(driver, &dev, vm->def->name));
8043 }
8044
8045 /* Clear out dynamically assigned labels */
8046 for (i = 0; i < vm->def->nseclabels; i++) {
8047 if (vm->def->seclabels[i]->type == VIR_DOMAIN_SECLABEL_DYNAMIC)
8048 VIR_FREE(vm->def->seclabels[i]->label);
8049 VIR_FREE(vm->def->seclabels[i]->imagelabel);
8050 }
8051
8052 qemuHostdevReAttachDomainDevices(driver, vm->def);
8053
8054 for (i = 0; i < def->nnets; i++) {
8055 virDomainNetDef *net = def->nets[i];
8056 vport = virDomainNetGetActualVirtPortProfile(net);
8057 switch (virDomainNetGetActualType(net)) {
8058 case VIR_DOMAIN_NET_TYPE_DIRECT:
8059 ignore_value(virNetDevMacVLanDeleteWithVPortProfile(
8060 net->ifname, &net->mac,
8061 virDomainNetGetActualDirectDev(net),
8062 virDomainNetGetActualDirectMode(net),
8063 virDomainNetGetActualVirtPortProfile(net),
8064 cfg->stateDir));
8065 break;
8066 case VIR_DOMAIN_NET_TYPE_ETHERNET:
8067 if (net->managed_tap != VIR_TRISTATE_BOOL_NO && net->ifname) {
8068 ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap));
8069 VIR_FREE(net->ifname);
8070 }
8071 break;
8072 case VIR_DOMAIN_NET_TYPE_BRIDGE:
8073 case VIR_DOMAIN_NET_TYPE_NETWORK:
8074 #ifdef VIR_NETDEV_TAP_REQUIRE_MANUAL_CLEANUP
8075 if (!(vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH))
8076 ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap));
8077 #endif
8078 break;
8079 case VIR_DOMAIN_NET_TYPE_USER:
8080 case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
8081 case VIR_DOMAIN_NET_TYPE_SERVER:
8082 case VIR_DOMAIN_NET_TYPE_CLIENT:
8083 case VIR_DOMAIN_NET_TYPE_MCAST:
8084 case VIR_DOMAIN_NET_TYPE_INTERNAL:
8085 case VIR_DOMAIN_NET_TYPE_HOSTDEV:
8086 case VIR_DOMAIN_NET_TYPE_UDP:
8087 case VIR_DOMAIN_NET_TYPE_VDPA:
8088 case VIR_DOMAIN_NET_TYPE_LAST:
8089 /* No special cleanup procedure for these types. */
8090 break;
8091 }
8092 /* release the physical device (or any other resources used by
8093 * this interface in the network driver
8094 */
8095 if (vport) {
8096 if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_MIDONET) {
8097 ignore_value(virNetDevMidonetUnbindPort(vport));
8098 } else if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) {
8099 ignore_value(virNetDevOpenvswitchRemovePort(
8100 virDomainNetGetActualBridgeName(net),
8101 net->ifname));
8102 }
8103 }
8104
8105 /* kick the device out of the hostdev list too */
8106 virDomainNetRemoveHostdev(def, net);
8107 if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
8108 if (conn || (conn = virGetConnectNetwork()))
8109 virDomainNetReleaseActualDevice(conn, vm->def, net);
8110 else
8111 VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname));
8112 }
8113
8114 if (virDomainNetDefIsOvsport(net) &&
8115 virNetDevOpenvswitchInterfaceClearQos(net->ifname, vm->def->uuid) < 0) {
8116 VIR_WARN("cannot clear bandwidth setting for ovs device : %s",
8117 net->ifname);
8118 }
8119 }
8120
8121 retry:
8122 if ((ret = qemuRemoveCgroup(vm)) < 0) {
8123 if (ret == -EBUSY && (retries++ < 5)) {
8124 g_usleep(200*1000);
8125 goto retry;
8126 }
8127 VIR_WARN("Failed to remove cgroup for %s",
8128 vm->def->name);
8129 }
8130
8131 /* Remove resctrl allocation after cgroups are cleaned up which makes it
8132 * kind of safer (although removing the allocation should work even with
8133 * pids in tasks file */
8134 for (i = 0; i < vm->def->nresctrls; i++) {
8135 size_t j = 0;
8136
8137 for (j = 0; j < vm->def->resctrls[i]->nmonitors; j++) {
8138 virDomainResctrlMonDef *mon = NULL;
8139
8140 mon = vm->def->resctrls[i]->monitors[j];
8141 virResctrlMonitorRemove(mon->instance);
8142 }
8143
8144 virResctrlAllocRemove(vm->def->resctrls[i]->alloc);
8145 }
8146
8147 qemuProcessRemoveDomainStatus(driver, vm);
8148
8149 /* Remove VNC and Spice ports from port reservation bitmap, but only if
8150 they were reserved by the driver (autoport=yes)
8151 */
8152 for (i = 0; i < vm->def->ngraphics; ++i) {
8153 virDomainGraphicsDef *graphics = vm->def->graphics[i];
8154 if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
8155 if (graphics->data.vnc.autoport) {
8156 virPortAllocatorRelease(graphics->data.vnc.port);
8157 } else if (graphics->data.vnc.portReserved) {
8158 virPortAllocatorRelease(graphics->data.vnc.port);
8159 graphics->data.vnc.portReserved = false;
8160 }
8161 if (graphics->data.vnc.websocketGenerated) {
8162 virPortAllocatorRelease(graphics->data.vnc.websocket);
8163 graphics->data.vnc.websocketGenerated = false;
8164 graphics->data.vnc.websocket = -1;
8165 } else if (graphics->data.vnc.websocket) {
8166 virPortAllocatorRelease(graphics->data.vnc.websocket);
8167 }
8168 }
8169 if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
8170 if (graphics->data.spice.autoport) {
8171 virPortAllocatorRelease(graphics->data.spice.port);
8172 virPortAllocatorRelease(graphics->data.spice.tlsPort);
8173 } else {
8174 if (graphics->data.spice.portReserved) {
8175 virPortAllocatorRelease(graphics->data.spice.port);
8176 graphics->data.spice.portReserved = false;
8177 }
8178
8179 if (graphics->data.spice.tlsPortReserved) {
8180 virPortAllocatorRelease(graphics->data.spice.tlsPort);
8181 graphics->data.spice.tlsPortReserved = false;
8182 }
8183 }
8184 }
8185 }
8186
8187 for (i = 0; i < vm->ndeprecations; i++)
8188 g_free(vm->deprecations[i]);
8189 g_free(vm->deprecations);
8190 vm->ndeprecations = 0;
8191 vm->deprecations = NULL;
8192 vm->taint = 0;
8193 vm->pid = -1;
8194 virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
8195 for (i = 0; i < vm->def->niothreadids; i++)
8196 vm->def->iothreadids[i]->thread_id = 0;
8197
8198 /* clean up a possible backup job */
8199 if (priv->backup)
8200 qemuBackupJobTerminate(vm, QEMU_DOMAIN_JOB_STATUS_CANCELED);
8201
8202 /* Do this explicitly after vm->pid is reset so that security drivers don't
8203 * try to enter the domain's namespace which is non-existent by now as qemu
8204 * is no longer running. */
8205 if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL)) {
8206 for (i = 0; i < def->ndisks; i++) {
8207 virDomainDiskDef *disk = def->disks[i];
8208
8209 if (disk->mirror) {
8210 if (qemuSecurityRestoreImageLabel(driver, vm, disk->mirror, false) < 0)
8211 VIR_WARN("Unable to restore security label on %s", disk->dst);
8212
8213 if (virStorageSourceChainHasNVMe(disk->mirror))
8214 qemuHostdevReAttachOneNVMeDisk(driver, vm->def->name, disk->mirror);
8215 }
8216
8217 qemuBlockRemoveImageMetadata(driver, vm, disk->dst, disk->src);
8218
8219 /* for now transient disks are forbidden with migration so they
8220 * can be handled here */
8221 if (disk->transient &&
8222 QEMU_DOMAIN_DISK_PRIVATE(disk)->transientOverlayCreated) {
8223 VIR_DEBUG("Removing transient overlay '%s' of disk '%s'",
8224 disk->src->path, disk->dst);
8225 if (qemuDomainStorageFileInit(driver, vm, disk->src, NULL) >= 0) {
8226 virStorageSourceUnlink(disk->src);
8227 virStorageSourceDeinit(disk->src);
8228 }
8229 }
8230 }
8231 }
8232
8233 qemuSecurityReleaseLabel(driver->securityManager, vm->def);
8234
8235 /* clear all private data entries which are no longer needed */
8236 qemuDomainObjPrivateDataClear(priv);
8237
8238 /* The "release" hook cleans up additional resources */
8239 if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
8240 g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
8241
8242 /* we can't stop the operation even if the script raised an error */
8243 virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
8244 VIR_HOOK_QEMU_OP_RELEASE, VIR_HOOK_SUBOP_END,
8245 NULL, xml, NULL);
8246 }
8247
8248 virDomainObjRemoveTransientDef(vm);
8249
8250 endjob:
8251 if (asyncJob != QEMU_ASYNC_JOB_NONE)
8252 qemuDomainObjEndJob(driver, vm);
8253
8254 cleanup:
8255 virErrorRestore(&orig_err);
8256 }
8257
8258
8259 static void
qemuProcessAutoDestroy(virDomainObj * dom,virConnectPtr conn,void * opaque)8260 qemuProcessAutoDestroy(virDomainObj *dom,
8261 virConnectPtr conn,
8262 void *opaque)
8263 {
8264 virQEMUDriver *driver = opaque;
8265 qemuDomainObjPrivate *priv = dom->privateData;
8266 virObjectEvent *event = NULL;
8267 unsigned int stopFlags = 0;
8268
8269 VIR_DEBUG("vm=%s, conn=%p", dom->def->name, conn);
8270
8271 if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
8272 stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
8273
8274 if (priv->job.asyncJob) {
8275 VIR_DEBUG("vm=%s has long-term job active, cancelling",
8276 dom->def->name);
8277 qemuDomainObjDiscardAsyncJob(driver, dom);
8278 }
8279
8280 VIR_DEBUG("Killing domain");
8281
8282 if (qemuProcessBeginStopJob(driver, dom, QEMU_JOB_DESTROY, true) < 0)
8283 return;
8284
8285 qemuProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED,
8286 QEMU_ASYNC_JOB_NONE, stopFlags);
8287
8288 virDomainAuditStop(dom, "destroyed");
8289 event = virDomainEventLifecycleNewFromObj(dom,
8290 VIR_DOMAIN_EVENT_STOPPED,
8291 VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
8292
8293 qemuDomainRemoveInactive(driver, dom);
8294
8295 qemuDomainObjEndJob(driver, dom);
8296
8297 virObjectEventStateQueue(driver->domainEventState, event);
8298 }
8299
qemuProcessAutoDestroyAdd(virQEMUDriver * driver,virDomainObj * vm,virConnectPtr conn)8300 int qemuProcessAutoDestroyAdd(virQEMUDriver *driver,
8301 virDomainObj *vm,
8302 virConnectPtr conn)
8303 {
8304 VIR_DEBUG("vm=%s, conn=%p", vm->def->name, conn);
8305 return virCloseCallbacksSet(driver->closeCallbacks, vm, conn,
8306 qemuProcessAutoDestroy);
8307 }
8308
qemuProcessAutoDestroyRemove(virQEMUDriver * driver,virDomainObj * vm)8309 int qemuProcessAutoDestroyRemove(virQEMUDriver *driver,
8310 virDomainObj *vm)
8311 {
8312 VIR_DEBUG("vm=%s", vm->def->name);
8313 return virCloseCallbacksUnset(driver->closeCallbacks, vm,
8314 qemuProcessAutoDestroy);
8315 }
8316
qemuProcessAutoDestroyActive(virQEMUDriver * driver,virDomainObj * vm)8317 bool qemuProcessAutoDestroyActive(virQEMUDriver *driver,
8318 virDomainObj *vm)
8319 {
8320 virCloseCallback cb;
8321 VIR_DEBUG("vm=%s", vm->def->name);
8322 cb = virCloseCallbacksGet(driver->closeCallbacks, vm, NULL);
8323 return cb == qemuProcessAutoDestroy;
8324 }
8325
8326
8327 int
qemuProcessRefreshDisks(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)8328 qemuProcessRefreshDisks(virQEMUDriver *driver,
8329 virDomainObj *vm,
8330 qemuDomainAsyncJob asyncJob)
8331 {
8332 qemuDomainObjPrivate *priv = vm->privateData;
8333 bool blockdev = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV);
8334 GHashTable *table = NULL;
8335 int ret = -1;
8336 size_t i;
8337
8338 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) == 0) {
8339 table = qemuMonitorGetBlockInfo(priv->mon);
8340 if (qemuDomainObjExitMonitor(driver, vm) < 0)
8341 goto cleanup;
8342 }
8343
8344 if (!table)
8345 goto cleanup;
8346
8347 for (i = 0; i < vm->def->ndisks; i++) {
8348 virDomainDiskDef *disk = vm->def->disks[i];
8349 qemuDomainDiskPrivate *diskpriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
8350 struct qemuDomainDiskInfo *info;
8351 const char *entryname = disk->info.alias;
8352
8353 if (blockdev && diskpriv->qomName)
8354 entryname = diskpriv->qomName;
8355
8356 if (!(info = virHashLookup(table, entryname)))
8357 continue;
8358
8359 if (info->removable) {
8360 if (info->empty)
8361 virDomainDiskEmptySource(disk);
8362
8363 if (info->tray) {
8364 if (info->tray_open)
8365 disk->tray_status = VIR_DOMAIN_DISK_TRAY_OPEN;
8366 else
8367 disk->tray_status = VIR_DOMAIN_DISK_TRAY_CLOSED;
8368 }
8369 }
8370
8371 /* fill in additional data */
8372 diskpriv->removable = info->removable;
8373 diskpriv->tray = info->tray;
8374 }
8375
8376 ret = 0;
8377
8378 cleanup:
8379 virHashFree(table);
8380 return ret;
8381 }
8382
8383
8384 static int
qemuProcessRefreshCPUMigratability(virQEMUDriver * driver,virDomainObj * vm,qemuDomainAsyncJob asyncJob)8385 qemuProcessRefreshCPUMigratability(virQEMUDriver *driver,
8386 virDomainObj *vm,
8387 qemuDomainAsyncJob asyncJob)
8388 {
8389 qemuDomainObjPrivate *priv = vm->privateData;
8390 virDomainDef *def = vm->def;
8391 bool migratable;
8392 int rc;
8393
8394 if (def->cpu->mode != VIR_CPU_MODE_HOST_PASSTHROUGH &&
8395 def->cpu->mode != VIR_CPU_MODE_MAXIMUM)
8396 return 0;
8397
8398 /* If the cpu.migratable capability is present, the migratable attribute
8399 * is set correctly. */
8400 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CPU_MIGRATABLE))
8401 return 0;
8402
8403 if (!ARCH_IS_X86(def->os.arch))
8404 return 0;
8405
8406 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
8407 return -1;
8408
8409 rc = qemuMonitorGetCPUMigratable(priv->mon, &migratable);
8410
8411 if (qemuDomainObjExitMonitor(driver, vm) < 0 || rc < 0)
8412 return -1;
8413
8414 if (rc == 1)
8415 migratable = false;
8416
8417 /* Libvirt 6.5.0 would set migratable='off' for running domains even though
8418 * the actual default used by QEMU was 'on'. */
8419 if (def->cpu->migratable == VIR_TRISTATE_SWITCH_OFF && migratable) {
8420 VIR_DEBUG("Fixing CPU migratable attribute");
8421 def->cpu->migratable = VIR_TRISTATE_SWITCH_ON;
8422 }
8423
8424 if (def->cpu->migratable == VIR_TRISTATE_SWITCH_ABSENT)
8425 def->cpu->migratable = virTristateSwitchFromBool(migratable);
8426
8427 return 0;
8428 }
8429
8430
8431 static int
qemuProcessRefreshCPU(virQEMUDriver * driver,virDomainObj * vm)8432 qemuProcessRefreshCPU(virQEMUDriver *driver,
8433 virDomainObj *vm)
8434 {
8435 qemuDomainObjPrivate *priv = vm->privateData;
8436 g_autoptr(virCPUDef) host = NULL;
8437 g_autoptr(virCPUDef) hostmig = NULL;
8438 g_autoptr(virCPUDef) cpu = NULL;
8439
8440 if (!virQEMUCapsGuestIsNative(driver->hostarch, vm->def->os.arch))
8441 return 0;
8442
8443 if (!vm->def->cpu)
8444 return 0;
8445
8446 if (qemuProcessRefreshCPUMigratability(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
8447 return -1;
8448
8449 if (!(host = virQEMUDriverGetHostCPU(driver))) {
8450 virResetLastError();
8451 return 0;
8452 }
8453
8454 /* If the domain with a host-model CPU was started by an old libvirt
8455 * (< 2.3) which didn't replace the CPU with a custom one, let's do it now
8456 * since the rest of our code does not really expect a host-model CPU in a
8457 * running domain.
8458 */
8459 if (vm->def->cpu->mode == VIR_CPU_MODE_HOST_MODEL) {
8460 /*
8461 * PSeries domains are able to run with host-model CPU by design,
8462 * even on Libvirt newer than 2.3, never replacing host-model with
8463 * custom in the virCPUUpdate() call. It is not needed to call
8464 * virCPUUpdate() and qemuProcessUpdateCPU() in this case.
8465 */
8466 if (qemuDomainIsPSeries(vm->def))
8467 return 0;
8468
8469 if (!(hostmig = virCPUCopyMigratable(host->arch, host)))
8470 return -1;
8471
8472 if (!(cpu = virCPUDefCopyWithoutModel(hostmig)) ||
8473 virCPUDefCopyModelFilter(cpu, hostmig, false,
8474 virQEMUCapsCPUFilterFeatures,
8475 &host->arch) < 0)
8476 return -1;
8477
8478 if (virCPUUpdate(vm->def->os.arch, vm->def->cpu, cpu) < 0)
8479 return -1;
8480
8481 if (qemuProcessUpdateCPU(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
8482 return -1;
8483 } else if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_CPU_MODEL_EXPANSION)) {
8484 /* We only try to fix CPUs when the libvirt/QEMU combo used to start
8485 * the domain did not know about query-cpu-model-expansion in which
8486 * case the host-model is known to not contain features which QEMU
8487 * doesn't know about.
8488 */
8489 if (qemuDomainFixupCPUs(vm, &priv->origCPU) < 0)
8490 return -1;
8491 }
8492
8493 return 0;
8494 }
8495
8496
8497 static int
qemuProcessRefreshLegacyBlockjob(void * payload,const char * name,void * opaque)8498 qemuProcessRefreshLegacyBlockjob(void *payload,
8499 const char *name,
8500 void *opaque)
8501 {
8502 const char *jobname = name;
8503 virDomainObj *vm = opaque;
8504 qemuMonitorBlockJobInfo *info = payload;
8505 virDomainDiskDef *disk;
8506 qemuBlockJobData *job;
8507 qemuBlockJobType jobtype = info->type;
8508 qemuDomainObjPrivate *priv = vm->privateData;
8509
8510 if (!(disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, jobname, jobname))) {
8511 VIR_DEBUG("could not find disk for block job '%s'", jobname);
8512 return 0;
8513 }
8514
8515 if (jobtype == QEMU_BLOCKJOB_TYPE_COMMIT &&
8516 disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT)
8517 jobtype = disk->mirrorJob;
8518
8519 if (!(job = qemuBlockJobDiskNew(vm, disk, jobtype, jobname)))
8520 return -1;
8521
8522 if (disk->mirror) {
8523 if ((!info->ready_present && info->end == info->cur) ||
8524 info->ready) {
8525 disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY;
8526 job->state = VIR_DOMAIN_BLOCK_JOB_READY;
8527 }
8528
8529 /* Pre-blockdev block copy labelled the chain of the mirrored device
8530 * just before pivoting. At that point it was no longer known whether
8531 * it's even necessary (e.g. disk is being reused). This code fixes
8532 * the labelling in case the job was started in a libvirt version
8533 * which did not label the chain when the block copy is being started.
8534 * Note that we can't do much on failure. */
8535 if (disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_COPY) {
8536 if (qemuDomainDetermineDiskChain(priv->driver, vm, disk,
8537 disk->mirror, true) < 0)
8538 goto cleanup;
8539
8540 if (disk->mirror->format &&
8541 disk->mirror->format != VIR_STORAGE_FILE_RAW &&
8542 (qemuDomainNamespaceSetupDisk(vm, disk->mirror, NULL) < 0 ||
8543 qemuSetupImageChainCgroup(vm, disk->mirror) < 0 ||
8544 qemuSecuritySetImageLabel(priv->driver, vm, disk->mirror,
8545 true, true) < 0))
8546 goto cleanup;
8547 }
8548 }
8549
8550 qemuBlockJobStarted(job, vm);
8551
8552 cleanup:
8553 qemuBlockJobStartupFinalize(vm, job);
8554
8555 return 0;
8556 }
8557
8558
8559 static int
qemuProcessRefreshLegacyBlockjobs(virQEMUDriver * driver,virDomainObj * vm)8560 qemuProcessRefreshLegacyBlockjobs(virQEMUDriver *driver,
8561 virDomainObj *vm)
8562 {
8563 GHashTable *blockJobs = NULL;
8564 int ret = -1;
8565
8566 qemuDomainObjEnterMonitor(driver, vm);
8567 blockJobs = qemuMonitorGetAllBlockJobInfo(qemuDomainGetMonitor(vm), true);
8568 if (qemuDomainObjExitMonitor(driver, vm) < 0 || !blockJobs)
8569 goto cleanup;
8570
8571 if (virHashForEach(blockJobs, qemuProcessRefreshLegacyBlockjob, vm) < 0)
8572 goto cleanup;
8573
8574 ret = 0;
8575
8576 cleanup:
8577 virHashFree(blockJobs);
8578 return ret;
8579 }
8580
8581
8582 static int
qemuProcessRefreshBlockjobs(virQEMUDriver * driver,virDomainObj * vm)8583 qemuProcessRefreshBlockjobs(virQEMUDriver *driver,
8584 virDomainObj *vm)
8585 {
8586 qemuDomainObjPrivate *priv = vm->privateData;
8587
8588 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV))
8589 return qemuBlockJobRefreshJobs(driver, vm);
8590 else
8591 return qemuProcessRefreshLegacyBlockjobs(driver, vm);
8592 }
8593
8594
8595 struct qemuProcessReconnectData {
8596 virQEMUDriver *driver;
8597 virDomainObj *obj;
8598 virIdentity *identity;
8599 };
8600 /*
8601 * Open an existing VM's monitor, re-detect VCPU threads
8602 * and re-reserve the security labels in use
8603 *
8604 * This function also inherits a locked and ref'd domain object.
8605 *
8606 * This function needs to:
8607 * 1. Enter job
8608 * 1. just before monitor reconnect do lightweight MonitorEnter
8609 * (increase VM refcount and unlock VM)
8610 * 2. reconnect to monitor
8611 * 3. do lightweight MonitorExit (lock VM)
8612 * 4. continue reconnect process
8613 * 5. EndJob
8614 *
8615 * We can't do normal MonitorEnter & MonitorExit because these two lock the
8616 * monitor lock, which does not exists in this early phase.
8617 */
8618 static void
qemuProcessReconnect(void * opaque)8619 qemuProcessReconnect(void *opaque)
8620 {
8621 struct qemuProcessReconnectData *data = opaque;
8622 virQEMUDriver *driver = data->driver;
8623 virDomainObj *obj = data->obj;
8624 qemuDomainObjPrivate *priv;
8625 g_auto(qemuDomainJobObj) oldjob = {
8626 .cb = NULL,
8627 };
8628 int state;
8629 int reason;
8630 g_autoptr(virQEMUDriverConfig) cfg = NULL;
8631 size_t i;
8632 unsigned int stopFlags = 0;
8633 bool jobStarted = false;
8634 bool retry = true;
8635 bool tryMonReconn = false;
8636
8637 virIdentitySetCurrent(data->identity);
8638 g_clear_object(&data->identity);
8639 VIR_FREE(data);
8640
8641 cfg = virQEMUDriverGetConfig(driver);
8642 priv = obj->privateData;
8643
8644 qemuDomainObjRestoreJob(obj, &oldjob);
8645 if (oldjob.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
8646 stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
8647 if (oldjob.asyncJob == QEMU_ASYNC_JOB_BACKUP && priv->backup)
8648 priv->backup->apiFlags = oldjob.apiFlags;
8649
8650 if (qemuDomainObjBeginJob(driver, obj, QEMU_JOB_MODIFY) < 0)
8651 goto error;
8652 jobStarted = true;
8653
8654 /* XXX If we ever gonna change pid file pattern, come up with
8655 * some intelligence here to deal with old paths. */
8656 if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, obj->def->name)))
8657 goto error;
8658
8659 /* Restore the masterKey */
8660 if (qemuDomainMasterKeyReadFile(priv) < 0)
8661 goto error;
8662
8663 /* If we are connecting to a guest started by old libvirt there is no
8664 * allowReboot in status XML and we need to initialize it. */
8665 qemuProcessPrepareAllowReboot(obj);
8666
8667 if (qemuHostdevUpdateActiveDomainDevices(driver, obj->def) < 0)
8668 goto error;
8669
8670 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CHARDEV_FD_PASS_COMMANDLINE))
8671 retry = false;
8672
8673 if (qemuDomainObjStartWorker(obj) < 0)
8674 goto error;
8675
8676 VIR_DEBUG("Reconnect monitor to def=%p name='%s' retry=%d",
8677 obj, obj->def->name, retry);
8678
8679 tryMonReconn = true;
8680
8681 /* XXX check PID liveliness & EXE path */
8682 if (qemuConnectMonitor(driver, obj, QEMU_ASYNC_JOB_NONE, retry, NULL) < 0)
8683 goto error;
8684
8685 priv->machineName = qemuDomainGetMachineName(obj);
8686 if (!priv->machineName)
8687 goto error;
8688
8689 if (qemuConnectCgroup(obj) < 0)
8690 goto error;
8691
8692 if (qemuDomainPerfRestart(obj) < 0)
8693 goto error;
8694
8695 /* recreate the pflash storage sources */
8696 if (qemuDomainInitializePflashStorageSource(obj) < 0)
8697 goto error;
8698
8699 /* XXX: Need to change as long as lock is introduced for
8700 * qemu_driver->sharedDevices.
8701 */
8702 for (i = 0; i < obj->def->ndisks; i++) {
8703 virDomainDiskDef *disk = obj->def->disks[i];
8704 virDomainDeviceDef dev;
8705
8706 if (virDomainDiskTranslateSourcePool(disk) < 0)
8707 goto error;
8708
8709 /* backing chains need to be refreshed only if they could change */
8710 if (priv->reconnectBlockjobs != VIR_TRISTATE_BOOL_NO &&
8711 !virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV)) {
8712 /* This should be the only place that calls
8713 * qemuDomainDetermineDiskChain with @report_broken == false
8714 * to guarantee best-effort domain reconnect */
8715 virStorageSourceBackingStoreClear(disk->src);
8716 if (qemuDomainDetermineDiskChain(driver, obj, disk, NULL, false) < 0)
8717 goto error;
8718 } else {
8719 VIR_DEBUG("skipping backing chain detection for '%s'", disk->dst);
8720 }
8721
8722 dev.type = VIR_DOMAIN_DEVICE_DISK;
8723 dev.data.disk = disk;
8724 if (qemuAddSharedDevice(driver, &dev, obj->def->name) < 0)
8725 goto error;
8726 }
8727
8728 for (i = 0; i < obj->def->ngraphics; i++) {
8729 if (qemuProcessGraphicsReservePorts(obj->def->graphics[i], true) < 0)
8730 goto error;
8731 }
8732
8733 if (qemuProcessUpdateState(driver, obj) < 0)
8734 goto error;
8735
8736 state = virDomainObjGetState(obj, &reason);
8737 if (state == VIR_DOMAIN_SHUTOFF ||
8738 (state == VIR_DOMAIN_PAUSED &&
8739 reason == VIR_DOMAIN_PAUSED_STARTING_UP)) {
8740 VIR_DEBUG("Domain '%s' wasn't fully started yet, killing it",
8741 obj->def->name);
8742 goto error;
8743 }
8744
8745 if (!priv->qemuCaps) {
8746 virReportError(VIR_ERR_INTERNAL_ERROR,
8747 _("domain '%s' has no capabilities recorded"),
8748 obj->def->name);
8749 goto error;
8750 }
8751
8752 /* In case the domain shutdown or fake reboot while we were not running,
8753 * we need to finish the shutdown or fake reboot process. And we need to
8754 * do it after we have virQEMUCaps filled in.
8755 */
8756 if (state == VIR_DOMAIN_SHUTDOWN ||
8757 (state == VIR_DOMAIN_PAUSED &&
8758 reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN) ||
8759 (priv->fakeReboot && state == VIR_DOMAIN_PAUSED &&
8760 reason == VIR_DOMAIN_PAUSED_USER)) {
8761 VIR_DEBUG("Finishing shutdown sequence for domain %s",
8762 obj->def->name);
8763 qemuProcessShutdownOrReboot(driver, obj);
8764 goto cleanup;
8765 }
8766
8767 if (qemuProcessBuildDestroyMemoryPaths(driver, obj, NULL, true) < 0)
8768 goto error;
8769
8770 if ((qemuDomainAssignAddresses(obj->def, priv->qemuCaps,
8771 driver, obj, false)) < 0) {
8772 goto error;
8773 }
8774
8775 /* if domain requests security driver we haven't loaded, report error, but
8776 * do not kill the domain
8777 */
8778 ignore_value(qemuSecurityCheckAllLabel(driver->securityManager,
8779 obj->def));
8780
8781 if (qemuProcessRefreshCPU(driver, obj) < 0)
8782 goto error;
8783
8784 if (qemuDomainRefreshVcpuInfo(driver, obj, QEMU_ASYNC_JOB_NONE, true) < 0)
8785 goto error;
8786
8787 qemuDomainVcpuPersistOrder(obj->def);
8788
8789 if (qemuDomainUpdateMemoryDeviceInfo(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8790 goto error;
8791
8792 if (qemuProcessDetectIOThreadPIDs(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8793 goto error;
8794
8795 if (qemuSecurityReserveLabel(driver->securityManager, obj->def, obj->pid) < 0)
8796 goto error;
8797
8798 qemuProcessNotifyNets(obj->def);
8799
8800 qemuProcessFiltersInstantiate(obj->def);
8801
8802 if (qemuProcessRefreshDisks(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8803 goto error;
8804
8805 /* At this point we've already checked that the startup of the VM was
8806 * completed successfully before, thus that also implies that all transient
8807 * disk overlays were created. */
8808 for (i = 0; i < obj->def->ndisks; i++) {
8809 virDomainDiskDef *disk = obj->def->disks[i];
8810
8811 if (disk->transient)
8812 QEMU_DOMAIN_DISK_PRIVATE(disk)->transientOverlayCreated = true;
8813 }
8814
8815 if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV) &&
8816 qemuBlockNodeNamesDetect(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8817 goto error;
8818
8819 if (qemuRefreshVirtioChannelState(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8820 goto error;
8821
8822 /* If querying of guest's RTC failed, report error, but do not kill the domain. */
8823 qemuRefreshRTC(driver, obj);
8824
8825 if (qemuProcessRefreshBalloonState(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
8826 goto error;
8827
8828 if (qemuProcessRecoverJob(driver, obj, &oldjob, &stopFlags) < 0)
8829 goto error;
8830
8831 if (qemuProcessRefreshBlockjobs(driver, obj) < 0)
8832 goto error;
8833
8834 if (qemuProcessUpdateDevices(driver, obj) < 0)
8835 goto error;
8836
8837 if (qemuRefreshPRManagerState(driver, obj) < 0)
8838 goto error;
8839
8840 qemuProcessReconnectCheckMemAliasOrderMismatch(obj);
8841
8842 if (qemuConnectAgent(driver, obj) < 0)
8843 goto error;
8844
8845 for (i = 0; i < obj->def->nresctrls; i++) {
8846 size_t j = 0;
8847
8848 if (virResctrlAllocDeterminePath(obj->def->resctrls[i]->alloc,
8849 priv->machineName) < 0)
8850 goto error;
8851
8852 for (j = 0; j < obj->def->resctrls[i]->nmonitors; j++) {
8853 virDomainResctrlMonDef *mon = NULL;
8854
8855 mon = obj->def->resctrls[i]->monitors[j];
8856 if (virResctrlMonitorDeterminePath(mon->instance,
8857 priv->machineName) < 0)
8858 goto error;
8859 }
8860 }
8861
8862 /* update domain state XML with possibly updated state in virDomainObj */
8863 if (virDomainObjSave(obj, driver->xmlopt, cfg->stateDir) < 0)
8864 goto error;
8865
8866 /* Run an hook to allow admins to do some magic */
8867 if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
8868 g_autofree char *xml = qemuDomainDefFormatXML(driver,
8869 priv->qemuCaps,
8870 obj->def, 0);
8871 int hookret;
8872
8873 hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, obj->def->name,
8874 VIR_HOOK_QEMU_OP_RECONNECT, VIR_HOOK_SUBOP_BEGIN,
8875 NULL, xml, NULL);
8876
8877 /*
8878 * If the script raised an error abort the launch
8879 */
8880 if (hookret < 0)
8881 goto error;
8882 }
8883
8884 if (g_atomic_int_add(&driver->nactive, 1) == 0 && driver->inhibitCallback)
8885 driver->inhibitCallback(true, driver->inhibitOpaque);
8886
8887 cleanup:
8888 if (jobStarted) {
8889 if (!virDomainObjIsActive(obj))
8890 qemuDomainRemoveInactive(driver, obj);
8891 qemuDomainObjEndJob(driver, obj);
8892 } else {
8893 if (!virDomainObjIsActive(obj))
8894 qemuDomainRemoveInactiveJob(driver, obj);
8895 }
8896 virDomainObjEndAPI(&obj);
8897 virNWFilterUnlockFilterUpdates();
8898 virIdentitySetCurrent(NULL);
8899 return;
8900
8901 error:
8902 if (virDomainObjIsActive(obj)) {
8903 /* We can't get the monitor back, so must kill the VM
8904 * to remove danger of it ending up running twice if
8905 * user tries to start it again later.
8906 *
8907 * If we cannot get to the monitor when the QEMU command
8908 * line used -no-shutdown, then we can safely say that the
8909 * domain crashed; otherwise, if the monitor was started,
8910 * then we can blame ourselves, else we failed before the
8911 * monitor started so we don't really know. */
8912 if (!priv->mon && tryMonReconn &&
8913 (priv->allowReboot == VIR_TRISTATE_BOOL_YES ||
8914 virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SET_ACTION)))
8915 state = VIR_DOMAIN_SHUTOFF_CRASHED;
8916 else if (priv->mon)
8917 state = VIR_DOMAIN_SHUTOFF_DAEMON;
8918 else
8919 state = VIR_DOMAIN_SHUTOFF_UNKNOWN;
8920
8921 /* If BeginJob failed, we jumped here without a job, let's hope another
8922 * thread didn't have a chance to start playing with the domain yet
8923 * (it's all we can do anyway).
8924 */
8925 qemuProcessStop(driver, obj, state, QEMU_ASYNC_JOB_NONE, stopFlags);
8926 }
8927 goto cleanup;
8928 }
8929
8930 static int
qemuProcessReconnectHelper(virDomainObj * obj,void * opaque)8931 qemuProcessReconnectHelper(virDomainObj *obj,
8932 void *opaque)
8933 {
8934 virThread thread;
8935 struct qemuProcessReconnectData *src = opaque;
8936 struct qemuProcessReconnectData *data;
8937 g_autofree char *name = NULL;
8938
8939 /* If the VM was inactive, we don't need to reconnect */
8940 if (!obj->pid)
8941 return 0;
8942
8943 data = g_new0(struct qemuProcessReconnectData, 1);
8944
8945 memcpy(data, src, sizeof(*data));
8946 data->obj = obj;
8947 data->identity = virIdentityGetCurrent();
8948
8949 virNWFilterReadLockFilterUpdates();
8950
8951 /* this lock and reference will be eventually transferred to the thread
8952 * that handles the reconnect */
8953 virObjectLock(obj);
8954 virObjectRef(obj);
8955
8956 name = g_strdup_printf("init-%s", obj->def->name);
8957
8958 if (virThreadCreateFull(&thread, false, qemuProcessReconnect,
8959 name, false, data) < 0) {
8960 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
8961 _("Could not create thread. QEMU initialization "
8962 "might be incomplete"));
8963 /* We can't spawn a thread and thus connect to monitor. Kill qemu.
8964 * It's safe to call qemuProcessStop without a job here since there
8965 * is no thread that could be doing anything else with the same domain
8966 * object.
8967 */
8968 qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED,
8969 QEMU_ASYNC_JOB_NONE, 0);
8970 qemuDomainRemoveInactiveJobLocked(src->driver, obj);
8971
8972 virDomainObjEndAPI(&obj);
8973 virNWFilterUnlockFilterUpdates();
8974 g_clear_object(&data->identity);
8975 VIR_FREE(data);
8976 return -1;
8977 }
8978
8979 return 0;
8980 }
8981
8982 /**
8983 * qemuProcessReconnectAll
8984 *
8985 * Try to re-open the resources for live VMs that we care
8986 * about.
8987 */
8988 void
qemuProcessReconnectAll(virQEMUDriver * driver)8989 qemuProcessReconnectAll(virQEMUDriver *driver)
8990 {
8991 struct qemuProcessReconnectData data = {.driver = driver};
8992 virDomainObjListForEach(driver->domains, true,
8993 qemuProcessReconnectHelper, &data);
8994 }
8995
8996
virQEMUCapsMonitorNotify(qemuMonitor * mon G_GNUC_UNUSED,virDomainObj * vm G_GNUC_UNUSED,void * opaque G_GNUC_UNUSED)8997 static void virQEMUCapsMonitorNotify(qemuMonitor *mon G_GNUC_UNUSED,
8998 virDomainObj *vm G_GNUC_UNUSED,
8999 void *opaque G_GNUC_UNUSED)
9000 {
9001 }
9002
9003 static qemuMonitorCallbacks callbacks = {
9004 .eofNotify = virQEMUCapsMonitorNotify,
9005 .errorNotify = virQEMUCapsMonitorNotify,
9006 };
9007
9008
9009 static void
qemuProcessQMPStop(qemuProcessQMP * proc)9010 qemuProcessQMPStop(qemuProcessQMP *proc)
9011 {
9012 if (proc->mon) {
9013 virObjectUnlock(proc->mon);
9014 qemuMonitorClose(proc->mon);
9015 proc->mon = NULL;
9016 }
9017
9018 if (proc->cmd) {
9019 virCommandAbort(proc->cmd);
9020 virCommandFree(proc->cmd);
9021 proc->cmd = NULL;
9022 }
9023
9024 if (proc->monpath)
9025 unlink(proc->monpath);
9026
9027 virDomainObjEndAPI(&proc->vm);
9028
9029 if (proc->pid != 0) {
9030 VIR_DEBUG("Killing QMP caps process %lld", (long long)proc->pid);
9031 if (virProcessKill(proc->pid, SIGKILL) < 0 && errno != ESRCH)
9032 VIR_ERROR(_("Failed to kill process %lld: %s"),
9033 (long long)proc->pid,
9034 g_strerror(errno));
9035
9036 proc->pid = 0;
9037 }
9038
9039 if (proc->pidfile)
9040 unlink(proc->pidfile);
9041
9042 if (proc->uniqDir)
9043 rmdir(proc->uniqDir);
9044 }
9045
9046
9047 /**
9048 * qemuProcessQMPFree:
9049 * @proc: Stores process and connection state
9050 *
9051 * Kill QEMU process and free process data structure.
9052 */
9053 void
qemuProcessQMPFree(qemuProcessQMP * proc)9054 qemuProcessQMPFree(qemuProcessQMP *proc)
9055 {
9056 if (!proc)
9057 return;
9058
9059 qemuProcessQMPStop(proc);
9060
9061 g_object_unref(proc->eventThread);
9062
9063 g_free(proc->binary);
9064 g_free(proc->libDir);
9065 g_free(proc->uniqDir);
9066 g_free(proc->monpath);
9067 g_free(proc->monarg);
9068 g_free(proc->pidfile);
9069 g_free(proc->stdErr);
9070 g_free(proc);
9071 }
9072
9073
9074 /**
9075 * qemuProcessQMPNew:
9076 * @binary: QEMU binary
9077 * @libDir: Directory for process and connection artifacts
9078 * @runUid: UserId for QEMU process
9079 * @runGid: GroupId for QEMU process
9080 * @forceTCG: Force TCG mode if true
9081 *
9082 * Allocate and initialize domain structure encapsulating QEMU process state
9083 * and monitor connection for completing QMP queries.
9084 */
9085 qemuProcessQMP *
qemuProcessQMPNew(const char * binary,const char * libDir,uid_t runUid,gid_t runGid,bool forceTCG)9086 qemuProcessQMPNew(const char *binary,
9087 const char *libDir,
9088 uid_t runUid,
9089 gid_t runGid,
9090 bool forceTCG)
9091 {
9092 g_autoptr(qemuProcessQMP) proc = NULL;
9093 const char *threadSuffix;
9094 g_autofree char *threadName = NULL;
9095
9096 VIR_DEBUG("exec=%s, libDir=%s, runUid=%u, runGid=%u, forceTCG=%d",
9097 binary, libDir, runUid, runGid, forceTCG);
9098
9099 proc = g_new0(qemuProcessQMP, 1);
9100
9101 proc->binary = g_strdup(binary);
9102 proc->libDir = g_strdup(libDir);
9103
9104 proc->runUid = runUid;
9105 proc->runGid = runGid;
9106 proc->forceTCG = forceTCG;
9107
9108 threadSuffix = strrchr(binary, '-');
9109 if (threadSuffix)
9110 threadSuffix++;
9111 else
9112 threadSuffix = binary;
9113 threadName = g_strdup_printf("qmp-%s", threadSuffix);
9114
9115 if (!(proc->eventThread = virEventThreadNew(threadName)))
9116 return NULL;
9117
9118 return g_steal_pointer(&proc);
9119 }
9120
9121
9122 static int
qemuProcessQEMULabelUniqPath(qemuProcessQMP * proc)9123 qemuProcessQEMULabelUniqPath(qemuProcessQMP *proc)
9124 {
9125 /* We cannot use the security driver here, but we should not need to. */
9126 if (chown(proc->uniqDir, proc->runUid, -1) < 0) {
9127 virReportSystemError(errno,
9128 _("Cannot chown uniq path: %s"),
9129 proc->uniqDir);
9130 return -1;
9131 }
9132
9133 return 0;
9134 }
9135
9136
9137 static int
qemuProcessQMPInit(qemuProcessQMP * proc)9138 qemuProcessQMPInit(qemuProcessQMP *proc)
9139 {
9140 g_autofree char *template = NULL;
9141
9142 VIR_DEBUG("proc=%p, emulator=%s", proc, proc->binary);
9143
9144 template = g_strdup_printf("%s/qmp-XXXXXX", proc->libDir);
9145
9146 if (!(proc->uniqDir = g_mkdtemp(template))) {
9147 virReportSystemError(errno,
9148 _("Failed to create unique directory with "
9149 "template '%s' for probing QEMU"),
9150 template);
9151 return -1;
9152 }
9153 /* if g_mkdtemp succeeds, proc->uniqDir is now the owner of
9154 * the string. Set template to NULL to avoid freeing
9155 * the memory in this case */
9156 template = NULL;
9157
9158 if (qemuProcessQEMULabelUniqPath(proc) < 0)
9159 return -1;
9160
9161 proc->monpath = g_strdup_printf("%s/%s", proc->uniqDir, "qmp.monitor");
9162
9163 proc->monarg = g_strdup_printf("unix:%s,server=on,wait=off", proc->monpath);
9164
9165 /*
9166 * Normally we'd use runDir for pid files, but because we're using
9167 * -daemonize we need QEMU to be allowed to create them, rather
9168 * than libvirtd. So we're using libDir which QEMU can write to
9169 */
9170 proc->pidfile = g_strdup_printf("%s/%s", proc->uniqDir, "qmp.pid");
9171
9172 return 0;
9173 }
9174
9175
9176 static int
qemuProcessQMPLaunch(qemuProcessQMP * proc)9177 qemuProcessQMPLaunch(qemuProcessQMP *proc)
9178 {
9179 const char *machine;
9180 int status = 0;
9181 int rc;
9182
9183 if (proc->forceTCG)
9184 machine = "none,accel=tcg";
9185 else
9186 machine = "none,accel=nvmm:kvm:tcg";
9187
9188 VIR_DEBUG("Try to probe capabilities of '%s' via QMP, machine %s",
9189 proc->binary, machine);
9190
9191 /*
9192 * We explicitly need to use -daemonize here, rather than
9193 * virCommandDaemonize, because we need to synchronize
9194 * with QEMU creating its monitor socket API. Using
9195 * daemonize guarantees control won't return to libvirt
9196 * until the socket is present.
9197 */
9198 proc->cmd = virCommandNewArgList(proc->binary,
9199 "-S",
9200 "-no-user-config",
9201 "-nodefaults",
9202 "-nographic",
9203 "-machine", machine,
9204 "-qmp", proc->monarg,
9205 "-pidfile", proc->pidfile,
9206 "-daemonize",
9207 NULL);
9208 virCommandAddEnvPassCommon(proc->cmd);
9209 virCommandClearCaps(proc->cmd);
9210
9211 #if WITH_CAPNG
9212 /* QEMU might run into permission issues, e.g. /dev/sev (0600), override
9213 * them just for the purpose of probing */
9214 if (geteuid() == 0)
9215 virCommandAllowCap(proc->cmd, CAP_DAC_OVERRIDE);
9216 #endif
9217
9218 virCommandSetGID(proc->cmd, proc->runGid);
9219 virCommandSetUID(proc->cmd, proc->runUid);
9220
9221 virCommandSetErrorBuffer(proc->cmd, &(proc->stdErr));
9222
9223 if (virCommandRun(proc->cmd, &status) < 0)
9224 return -1;
9225
9226 if (status != 0) {
9227 VIR_DEBUG("QEMU %s exited with status %d", proc->binary, status);
9228 virReportError(VIR_ERR_INTERNAL_ERROR,
9229 _("Failed to start QEMU binary %s for probing: %s"),
9230 proc->binary,
9231 proc->stdErr ? proc->stdErr : _("unknown error"));
9232 return -1;
9233 }
9234
9235 if ((rc = virPidFileReadPath(proc->pidfile, &proc->pid)) < 0) {
9236 virReportSystemError(-rc, _("Failed to read pidfile %s"), proc->pidfile);
9237 return -1;
9238 }
9239
9240 return 0;
9241 }
9242
9243
9244 int
qemuProcessQMPInitMonitor(qemuMonitor * mon)9245 qemuProcessQMPInitMonitor(qemuMonitor *mon)
9246 {
9247 if (qemuMonitorSetCapabilities(mon) < 0) {
9248 VIR_DEBUG("Failed to set monitor capabilities %s",
9249 virGetLastErrorMessage());
9250 return -1;
9251 }
9252
9253 return 0;
9254 }
9255
9256
9257 static int
qemuProcessQMPConnectMonitor(qemuProcessQMP * proc)9258 qemuProcessQMPConnectMonitor(qemuProcessQMP *proc)
9259 {
9260 g_autoptr(virDomainXMLOption) xmlopt = NULL;
9261 virDomainChrSourceDef monConfig;
9262
9263 VIR_DEBUG("proc=%p, emulator=%s, proc->pid=%lld",
9264 proc, proc->binary, (long long)proc->pid);
9265
9266 monConfig.type = VIR_DOMAIN_CHR_TYPE_UNIX;
9267 monConfig.data.nix.path = proc->monpath;
9268 monConfig.data.nix.listen = false;
9269
9270 if (!(xmlopt = virDomainXMLOptionNew(NULL, NULL, NULL, NULL, NULL)) ||
9271 !(proc->vm = virDomainObjNew(xmlopt)) ||
9272 !(proc->vm->def = virDomainDefNew(xmlopt)))
9273 return -1;
9274
9275 proc->vm->pid = proc->pid;
9276
9277 if (!(proc->mon = qemuMonitorOpen(proc->vm, &monConfig, true, 0,
9278 virEventThreadGetContext(proc->eventThread),
9279 &callbacks, NULL)))
9280 return -1;
9281
9282 virObjectLock(proc->mon);
9283
9284 if (qemuProcessQMPInitMonitor(proc->mon) < 0)
9285 return -1;
9286
9287 return 0;
9288 }
9289
9290
9291 /**
9292 * qemuProcessQMPStart:
9293 * @proc: QEMU process and connection state created by qemuProcessQMPNew()
9294 *
9295 * Start and connect to QEMU binary so QMP queries can be made.
9296 *
9297 * Usage:
9298 * proc = qemuProcessQMPNew(binary, libDir, runUid, runGid, forceTCG);
9299 * qemuProcessQMPStart(proc);
9300 * ** Send QMP Queries to QEMU using monitor (proc->mon) **
9301 * qemuProcessQMPFree(proc);
9302 *
9303 * Process error output (proc->stdErr) remains available in qemuProcessQMP
9304 * struct until qemuProcessQMPFree is called.
9305 */
9306 int
qemuProcessQMPStart(qemuProcessQMP * proc)9307 qemuProcessQMPStart(qemuProcessQMP *proc)
9308 {
9309 VIR_DEBUG("proc=%p, emulator=%s", proc, proc->binary);
9310
9311 if (qemuProcessQMPInit(proc) < 0)
9312 return -1;
9313
9314 if (qemuProcessQMPLaunch(proc) < 0)
9315 return -1;
9316
9317 if (qemuProcessQMPConnectMonitor(proc) < 0)
9318 return -1;
9319
9320 return 0;
9321 }
9322