1 /*
2 * Copyright (C) 2010-2016 Red Hat, Inc.
3 * Copyright IBM Corp. 2008
4 *
5 * lxc_controller.c: linux container process controller
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library. If not, see
19 * <http://www.gnu.org/licenses/>.
20 */
21
22 #include <config.h>
23
24 #include <sys/epoll.h>
25 #include <sys/wait.h>
26
27 #ifdef __linux__
28 # include <sys/sysmacros.h>
29 #endif
30
31 #include <sys/personality.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <signal.h>
35 #include <getopt.h>
36 #include <sys/mount.h>
37 #include <grp.h>
38 #include <sys/stat.h>
39 #include <time.h>
40
41 #if WITH_CAPNG
42 # include <cap-ng.h>
43 #endif
44
45 #include "virerror.h"
46 #include "virlog.h"
47
48 #include "lxc_conf.h"
49 #include "lxc_container.h"
50 #include "lxc_cgroup.h"
51 #include "lxc_monitor_protocol.h"
52 #include "lxc_fuse.h"
53 #include "virnetdev.h"
54 #include "virnetdevveth.h"
55 #include "viralloc.h"
56 #include "virfile.h"
57 #include "virgdbus.h"
58 #include "virpidfile.h"
59 #include "vircommand.h"
60 #include "virhostcpu.h"
61 #include "virrandom.h"
62 #include "virprocess.h"
63 #include "virnuma.h"
64 #include "rpc/virnetdaemon.h"
65 #include "virstring.h"
66 #include "virgettext.h"
67 #include "virsocket.h"
68 #include "virutil.h"
69
70 #define VIR_FROM_THIS VIR_FROM_LXC
71
72 VIR_LOG_INIT("lxc.lxc_controller");
73
74 typedef struct _virLXCControllerConsole virLXCControllerConsole;
75 struct _virLXCControllerConsole {
76 int hostWatch;
77 int hostFd; /* PTY FD in the host OS */
78 bool hostClosed;
79 int hostEpoll;
80
81 int contWatch;
82 int contFd; /* PTY FD in the container */
83 bool contClosed;
84 int contEpoll;
85
86 int epollWatch;
87 int epollFd; /* epoll FD for dealing with EOF */
88
89 size_t fromHostLen;
90 char fromHostBuf[1024];
91 size_t fromContLen;
92 char fromContBuf[1024];
93
94 virNetDaemon *daemon;
95 };
96
97 typedef struct _virLXCController virLXCController;
98 struct _virLXCController {
99 char *name;
100 virDomainObj *vm;
101 virDomainDef *def;
102
103 int handshakeFds[2]; /* { read FD, write FD } */
104
105 pid_t initpid;
106
107 size_t nnbdpids;
108 pid_t *nbdpids;
109
110 size_t nveths;
111 char **veths;
112
113 size_t nnicindexes;
114 int *nicindexes;
115
116 size_t npassFDs;
117 int *passFDs;
118
119 int *nsFDs;
120
121 size_t nconsoles;
122 virLXCControllerConsole *consoles;
123 char *devptmx;
124
125 size_t nloopDevs;
126 int *loopDevFds;
127
128 virSecurityManager *securityManager;
129
130 virNetDaemon *daemon;
131 bool firstClient;
132 virNetServerClient *client;
133 virNetServerProgram *prog;
134 bool inShutdown;
135 int timerShutdown;
136
137 virCgroup *cgroup;
138
139 struct virLXCFuse *fuse;
140 };
141
142 #include "lxc_controller_dispatch.h"
143
144 static void virLXCControllerFree(virLXCController *ctrl);
145 static int virLXCControllerEventSendInit(virLXCController *ctrl,
146 pid_t initpid);
147
virLXCControllerQuitTimer(int timer G_GNUC_UNUSED,void * opaque)148 static void virLXCControllerQuitTimer(int timer G_GNUC_UNUSED, void *opaque)
149 {
150 virLXCController *ctrl = opaque;
151
152 VIR_DEBUG("Triggering event loop quit");
153 virNetDaemonQuit(ctrl->daemon);
154 }
155
156
157 static virLXCDriver *
virLXCControllerDriverNew(void)158 virLXCControllerDriverNew(void)
159 {
160 virLXCDriver *driver = g_new0(virLXCDriver, 1);
161
162 if (virMutexInit(&driver->lock) < 0) {
163 virReportError(VIR_ERR_INTERNAL_ERROR,
164 "%s", _("cannot initialize mutex"));
165 g_free(driver);
166 return NULL;
167 }
168
169 driver->caps = virLXCDriverCapsInit(NULL);
170 driver->xmlopt = lxcDomainXMLConfInit(driver, NULL);
171
172 return driver;
173 }
174
175
176 static void
virLXCControllerDriverFree(virLXCDriver * driver)177 virLXCControllerDriverFree(virLXCDriver *driver)
178 {
179 if (!driver)
180 return;
181 virObjectUnref(driver->xmlopt);
182 virObjectUnref(driver->caps);
183 virMutexDestroy(&driver->lock);
184 g_free(driver);
185 }
186
187
virLXCControllerNew(const char * name)188 static virLXCController *virLXCControllerNew(const char *name)
189 {
190 virLXCController *ctrl = g_new0(virLXCController, 1);
191 virLXCDriver *driver = NULL;
192 g_autofree char *configFile = NULL;
193
194 ctrl->timerShutdown = -1;
195 ctrl->firstClient = true;
196 ctrl->name = g_strdup(name);
197 ctrl->handshakeFds[0] = -1;
198 ctrl->handshakeFds[1] = -1;
199
200 if (!(driver = virLXCControllerDriverNew()))
201 goto error;
202
203 if ((configFile = virDomainConfigFile(LXC_STATE_DIR,
204 ctrl->name)) == NULL)
205 goto error;
206
207 if ((ctrl->vm = virDomainObjParseFile(configFile,
208 driver->xmlopt,
209 0)) == NULL)
210 goto error;
211 ctrl->def = ctrl->vm->def;
212
213 if ((ctrl->timerShutdown = virEventAddTimeout(-1,
214 virLXCControllerQuitTimer, ctrl,
215 NULL)) < 0)
216 goto error;
217
218 cleanup:
219 virLXCControllerDriverFree(driver);
220 return ctrl;
221
222 error:
223 virLXCControllerFree(ctrl);
224 ctrl = NULL;
225 goto cleanup;
226 }
227
228
virLXCControllerCloseLoopDevices(virLXCController * ctrl)229 static int virLXCControllerCloseLoopDevices(virLXCController *ctrl)
230 {
231 size_t i;
232
233 for (i = 0; i < ctrl->nloopDevs; i++)
234 VIR_FORCE_CLOSE(ctrl->loopDevFds[i]);
235
236 return 0;
237 }
238
239
virLXCControllerStopInit(virLXCController * ctrl)240 static void virLXCControllerStopInit(virLXCController *ctrl)
241 {
242 if (ctrl->initpid == 0)
243 return;
244
245 virLXCControllerCloseLoopDevices(ctrl);
246 virProcessAbort(ctrl->initpid);
247 ctrl->initpid = 0;
248 }
249
250
virLXCControllerConsoleClose(virLXCControllerConsole * console)251 static void virLXCControllerConsoleClose(virLXCControllerConsole *console)
252 {
253 if (console->hostWatch != -1)
254 virEventRemoveHandle(console->hostWatch);
255 VIR_FORCE_CLOSE(console->hostFd);
256
257 if (console->contWatch != -1)
258 virEventRemoveHandle(console->contWatch);
259 VIR_FORCE_CLOSE(console->contFd);
260
261 if (console->epollWatch != -1)
262 virEventRemoveHandle(console->epollWatch);
263 VIR_FORCE_CLOSE(console->epollFd);
264 }
265
266
267 static void
virLXCControllerFreeFuse(virLXCController * ctrl)268 virLXCControllerFreeFuse(virLXCController *ctrl)
269 {
270 return lxcFreeFuse(&ctrl->fuse);
271 }
272
273
virLXCControllerFree(virLXCController * ctrl)274 static void virLXCControllerFree(virLXCController *ctrl)
275 {
276 size_t i;
277
278 if (!ctrl)
279 return;
280
281 virLXCControllerStopInit(ctrl);
282
283 virObjectUnref(ctrl->securityManager);
284
285 for (i = 0; i < ctrl->nveths; i++)
286 g_free(ctrl->veths[i]);
287 g_free(ctrl->veths);
288 g_free(ctrl->nicindexes);
289
290 for (i = 0; i < ctrl->npassFDs; i++)
291 VIR_FORCE_CLOSE(ctrl->passFDs[i]);
292 g_free(ctrl->passFDs);
293
294 for (i = 0; i < ctrl->nconsoles; i++)
295 virLXCControllerConsoleClose(&(ctrl->consoles[i]));
296 g_free(ctrl->consoles);
297
298 g_free(ctrl->devptmx);
299
300 virDomainObjEndAPI(&ctrl->vm);
301 g_free(ctrl->name);
302
303 if (ctrl->timerShutdown != -1)
304 virEventRemoveTimeout(ctrl->timerShutdown);
305
306 virObjectUnref(ctrl->daemon);
307 virLXCControllerFreeFuse(ctrl);
308
309 g_free(ctrl->nbdpids);
310
311 g_free(ctrl->nsFDs);
312 virCgroupFree(ctrl->cgroup);
313
314 /* This must always be the last thing to be closed */
315 for (i = 0; i < G_N_ELEMENTS(ctrl->handshakeFds); i++)
316 VIR_FORCE_CLOSE(ctrl->handshakeFds[i]);
317 g_free(ctrl);
318 }
319
320
virLXCControllerAddConsole(virLXCController * ctrl,int hostFd)321 static int virLXCControllerAddConsole(virLXCController *ctrl,
322 int hostFd)
323 {
324 VIR_EXPAND_N(ctrl->consoles, ctrl->nconsoles, 1);
325 ctrl->consoles[ctrl->nconsoles-1].daemon = ctrl->daemon;
326 ctrl->consoles[ctrl->nconsoles-1].hostFd = hostFd;
327 ctrl->consoles[ctrl->nconsoles-1].hostWatch = -1;
328
329 ctrl->consoles[ctrl->nconsoles-1].contFd = -1;
330 ctrl->consoles[ctrl->nconsoles-1].contWatch = -1;
331
332 ctrl->consoles[ctrl->nconsoles-1].epollFd = -1;
333 ctrl->consoles[ctrl->nconsoles-1].epollWatch = -1;
334 return 0;
335 }
336
337
virLXCControllerConsoleSetNonblocking(virLXCControllerConsole * console)338 static int virLXCControllerConsoleSetNonblocking(virLXCControllerConsole *console)
339 {
340 if (virSetBlocking(console->hostFd, false) < 0 ||
341 virSetBlocking(console->contFd, false) < 0) {
342 virReportSystemError(errno, "%s",
343 _("Unable to set console file descriptor non-blocking"));
344 return -1;
345 }
346
347 return 0;
348 }
349
350
virLXCControllerDaemonHandshakeCont(virLXCController * ctrl)351 static int virLXCControllerDaemonHandshakeCont(virLXCController *ctrl)
352 {
353 if (lxcContainerSendContinue(ctrl->handshakeFds[1]) < 0) {
354 virReportSystemError(errno, "%s",
355 _("error sending continue signal to daemon"));
356 return -1;
357 }
358 return 0;
359 }
360
virLXCControllerDaemonHandshakeWait(virLXCController * ctrl)361 static int virLXCControllerDaemonHandshakeWait(virLXCController *ctrl)
362 {
363 if (lxcContainerWaitForContinue(ctrl->handshakeFds[0]) < 0) {
364 virReportSystemError(errno, "%s",
365 _("error waiting for continue signal from daemon"));
366 return -1;
367 }
368 return 0;
369 }
370
virLXCControllerValidateNICs(virLXCController * ctrl)371 static int virLXCControllerValidateNICs(virLXCController *ctrl)
372 {
373 if (ctrl->def->nnets != ctrl->nveths) {
374 virReportError(VIR_ERR_INTERNAL_ERROR,
375 _("expecting %zu veths, but got %zu"),
376 ctrl->def->nnets, ctrl->nveths);
377 return -1;
378 }
379
380 return 0;
381 }
382
383
virLXCControllerGetNICIndexes(virLXCController * ctrl)384 static int virLXCControllerGetNICIndexes(virLXCController *ctrl)
385 {
386 size_t i;
387
388 /* Gather the ifindexes of the "parent" veths for all interfaces
389 * implemented with a veth pair. These will be used when calling
390 * virCgroupNewMachine (and eventually the dbus method
391 * CreateMachineWithNetwork). ifindexes for the child veths, and
392 * for macvlan interfaces, *should not* be in this list, as they
393 * will be moved into the container. Only the interfaces that will
394 * remain outside the container, but are used for communication
395 * with the container, should be added to the list.
396 */
397
398 VIR_DEBUG("Getting nic indexes");
399 for (i = 0; i < ctrl->def->nnets; i++) {
400 int nicindex = -1;
401 virDomainNetType actualType = virDomainNetGetActualType(ctrl->def->nets[i]);
402
403 switch (actualType) {
404 case VIR_DOMAIN_NET_TYPE_BRIDGE:
405 case VIR_DOMAIN_NET_TYPE_NETWORK:
406 case VIR_DOMAIN_NET_TYPE_ETHERNET:
407 if (ctrl->def->nets[i]->ifname == NULL)
408 continue;
409 if (virNetDevGetIndex(ctrl->def->nets[i]->ifname,
410 &nicindex) < 0)
411 return -1;
412 VIR_EXPAND_N(ctrl->nicindexes, ctrl->nnicindexes, 1);
413 VIR_DEBUG("Index %d for %s", nicindex,
414 ctrl->def->nets[i]->ifname);
415 ctrl->nicindexes[ctrl->nnicindexes-1] = nicindex;
416 break;
417
418 case VIR_DOMAIN_NET_TYPE_DIRECT:
419 break;
420
421 case VIR_DOMAIN_NET_TYPE_USER:
422 case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
423 case VIR_DOMAIN_NET_TYPE_SERVER:
424 case VIR_DOMAIN_NET_TYPE_CLIENT:
425 case VIR_DOMAIN_NET_TYPE_MCAST:
426 case VIR_DOMAIN_NET_TYPE_UDP:
427 case VIR_DOMAIN_NET_TYPE_INTERNAL:
428 case VIR_DOMAIN_NET_TYPE_HOSTDEV:
429 case VIR_DOMAIN_NET_TYPE_VDPA:
430 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
431 _("Unsupported net type %s"),
432 virDomainNetTypeToString(actualType));
433 return -1;
434 case VIR_DOMAIN_NET_TYPE_LAST:
435 default:
436 virReportEnumRangeError(virDomainNetType, actualType);
437 return -1;
438 }
439 }
440
441 return 0;
442 }
443
444
virLXCControllerValidateConsoles(virLXCController * ctrl)445 static int virLXCControllerValidateConsoles(virLXCController *ctrl)
446 {
447 if (ctrl->def->nconsoles != ctrl->nconsoles) {
448 virReportError(VIR_ERR_INTERNAL_ERROR,
449 _("expecting %zu consoles, but got %zu tty file handlers"),
450 ctrl->def->nconsoles, ctrl->nconsoles);
451 return -1;
452 }
453
454 return 0;
455 }
456
457
virLXCControllerSetupLoopDeviceFS(virDomainFSDef * fs)458 static int virLXCControllerSetupLoopDeviceFS(virDomainFSDef *fs)
459 {
460 int lofd;
461 char *loname = NULL;
462
463 if ((lofd = virFileLoopDeviceAssociate(fs->src->path, &loname)) < 0)
464 return -1;
465
466 VIR_DEBUG("Changing fs %s to use type=block for dev %s",
467 fs->src->path, loname);
468 /*
469 * We now change it into a block device type, so that
470 * the rest of container setup 'just works'
471 */
472 fs->type = VIR_DOMAIN_FS_TYPE_BLOCK;
473 g_free(fs->src->path);
474 fs->src->path = g_steal_pointer(&loname);
475
476 return lofd;
477 }
478
479
virLXCControllerSetupLoopDeviceDisk(virDomainDiskDef * disk)480 static int virLXCControllerSetupLoopDeviceDisk(virDomainDiskDef *disk)
481 {
482 int lofd;
483 g_autofree char *loname = NULL;
484 const char *src = virDomainDiskGetSource(disk);
485
486 if ((lofd = virFileLoopDeviceAssociate(src, &loname)) < 0)
487 return -1;
488
489 VIR_DEBUG("Changing disk %s to use type=block for dev %s",
490 src, loname);
491
492 /*
493 * We now change it into a block device type, so that
494 * the rest of container setup 'just works'
495 */
496 virDomainDiskSetType(disk, VIR_STORAGE_TYPE_BLOCK);
497 virDomainDiskSetSource(disk, loname);
498
499 return lofd;
500
501 }
502
503
virLXCControllerSetupNBDDeviceFS(virDomainFSDef * fs)504 static int virLXCControllerSetupNBDDeviceFS(virDomainFSDef *fs)
505 {
506 char *dev;
507
508 if (fs->format <= VIR_STORAGE_FILE_NONE) {
509 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
510 _("An explicit disk format must be specified"));
511 return -1;
512 }
513
514 if (virFileNBDDeviceAssociate(fs->src->path,
515 virStorageFileFormatTypeToString(fs->format),
516 fs->readonly,
517 &dev) < 0)
518 return -1;
519
520 VIR_DEBUG("Changing fs %s to use type=block for dev %s",
521 fs->src->path, dev);
522 /*
523 * We now change it into a block device type, so that
524 * the rest of container setup 'just works'
525 */
526 fs->type = VIR_DOMAIN_FS_TYPE_BLOCK;
527 g_free(fs->src->path);
528 fs->src->path = dev;
529
530 return 0;
531 }
532
533
virLXCControllerSetupNBDDeviceDisk(virDomainDiskDef * disk)534 static int virLXCControllerSetupNBDDeviceDisk(virDomainDiskDef *disk)
535 {
536 g_autofree char *dev = NULL;
537 const char *src = virDomainDiskGetSource(disk);
538 int format = virDomainDiskGetFormat(disk);
539
540 if (format <= VIR_STORAGE_FILE_NONE) {
541 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
542 _("An explicit disk format must be specified"));
543 return -1;
544 }
545
546 if (virFileNBDDeviceAssociate(src,
547 virStorageFileFormatTypeToString(format),
548 disk->src->readonly,
549 &dev) < 0)
550 return -1;
551
552 VIR_DEBUG("Changing disk %s to use type=block for dev %s",
553 src, dev);
554 /*
555 * We now change it into a block device type, so that
556 * the rest of container setup 'just works'
557 */
558 virDomainDiskSetType(disk, VIR_STORAGE_TYPE_BLOCK);
559 virDomainDiskSetSource(disk, dev);
560
561 return 0;
562 }
563
virLXCControllerAppendNBDPids(virLXCController * ctrl,const char * dev)564 static int virLXCControllerAppendNBDPids(virLXCController *ctrl,
565 const char *dev)
566 {
567 g_autofree char *pidpath = NULL;
568 g_autofree pid_t *pids = NULL;
569 size_t npids = 0;
570 size_t i;
571 size_t loops = 0;
572 pid_t pid;
573
574 if (!STRPREFIX(dev, "/dev/"))
575 return -1;
576
577 pidpath = g_strdup_printf("/sys/devices/virtual/block/%s/pid", dev + 5);
578
579 /* Wait for the pid file to appear */
580 while (!virFileExists(pidpath)) {
581 /* wait for 100ms before checking again, but don't do it for ever */
582 if (errno == ENOENT && loops < 10) {
583 g_usleep(100 * 1000);
584 loops++;
585 } else {
586 virReportSystemError(errno,
587 _("Cannot check NBD device %s pid"),
588 dev + 5);
589 return -1;
590 }
591 }
592
593 if (virPidFileReadPath(pidpath, &pid) < 0)
594 return -1;
595
596 if (virProcessGetPids(pid, &npids, &pids) < 0)
597 return -1;
598
599 for (i = 0; i < npids; i++) {
600 VIR_APPEND_ELEMENT(ctrl->nbdpids, ctrl->nnbdpids, pids[i]);
601 }
602
603 return 0;
604 }
605
virLXCControllerSetupLoopDevices(virLXCController * ctrl)606 static int virLXCControllerSetupLoopDevices(virLXCController *ctrl)
607 {
608 size_t i;
609
610 VIR_DEBUG("Setting up loop devices for filesystems");
611
612 for (i = 0; i < ctrl->def->nfss; i++) {
613 virDomainFSDef *fs = ctrl->def->fss[i];
614 int fd;
615
616 if (fs->type != VIR_DOMAIN_FS_TYPE_FILE)
617 continue;
618
619 if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_DEFAULT) {
620 if (fs->format == VIR_STORAGE_FILE_RAW ||
621 fs->format == VIR_STORAGE_FILE_NONE)
622 fs->fsdriver = VIR_DOMAIN_FS_DRIVER_TYPE_LOOP;
623 else
624 fs->fsdriver = VIR_DOMAIN_FS_DRIVER_TYPE_NBD;
625 }
626
627 if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_LOOP) {
628 if (fs->format != VIR_STORAGE_FILE_RAW &&
629 fs->format != VIR_STORAGE_FILE_NONE) {
630 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
631 _("fs format %s is not supported"),
632 virStorageFileFormatTypeToString(fs->format));
633 return -1;
634 }
635
636 fd = virLXCControllerSetupLoopDeviceFS(fs);
637 if (fd < 0)
638 return -1;
639
640 VIR_DEBUG("Saving loop fd %d", fd);
641 VIR_EXPAND_N(ctrl->loopDevFds, ctrl->nloopDevs, 1);
642 ctrl->loopDevFds[ctrl->nloopDevs - 1] = fd;
643 } else if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_NBD) {
644 if (virLXCControllerSetupNBDDeviceFS(fs) < 0)
645 return -1;
646
647 /* The NBD device will be cleaned up while the cgroup will end.
648 * For this we need to remember the qemu-nbd pid and add it to
649 * the cgroup */
650 if (virLXCControllerAppendNBDPids(ctrl, fs->src->path) < 0)
651 return -1;
652 } else {
653 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
654 _("fs driver %s is not supported"),
655 virDomainFSDriverTypeToString(fs->fsdriver));
656 return -1;
657 }
658 }
659
660 VIR_DEBUG("Setting up loop devices for disks");
661
662 for (i = 0; i < ctrl->def->ndisks; i++) {
663 virDomainDiskDef *disk = ctrl->def->disks[i];
664 int fd;
665 const char *driver = virDomainDiskGetDriver(disk);
666 int format = virDomainDiskGetFormat(disk);
667
668 if (virDomainDiskGetType(disk) != VIR_STORAGE_TYPE_FILE)
669 continue;
670
671 /* If no driverName is set, we prefer 'loop' for
672 * dealing with raw or undefined formats, otherwise
673 * we use 'nbd'.
674 */
675 if (STREQ_NULLABLE(driver, "loop") ||
676 (!driver &&
677 (format == VIR_STORAGE_FILE_RAW ||
678 format == VIR_STORAGE_FILE_NONE))) {
679 if (format != VIR_STORAGE_FILE_RAW &&
680 format != VIR_STORAGE_FILE_NONE) {
681 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
682 _("disk format %s is not supported"),
683 virStorageFileFormatTypeToString(format));
684 return -1;
685 }
686
687 /* We treat 'none' as meaning 'raw' since we
688 * don't want to go into the auto-probing
689 * business for security reasons
690 */
691 fd = virLXCControllerSetupLoopDeviceDisk(disk);
692 if (fd < 0)
693 return -1;
694
695 VIR_DEBUG("Saving loop fd %d", fd);
696 VIR_EXPAND_N(ctrl->loopDevFds, ctrl->nloopDevs, 1);
697 ctrl->loopDevFds[ctrl->nloopDevs - 1] = fd;
698 } else if (!driver || STREQ(driver, "nbd")) {
699 if (disk->cachemode != VIR_DOMAIN_DISK_CACHE_DEFAULT &&
700 disk->cachemode != VIR_DOMAIN_DISK_CACHE_DISABLE) {
701 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
702 _("Disk cache mode %s is not supported"),
703 virDomainDiskCacheTypeToString(disk->cachemode));
704 return -1;
705 }
706 if (virLXCControllerSetupNBDDeviceDisk(disk) < 0)
707 return -1;
708
709 /* The NBD device will be cleaned up while the cgroup will end.
710 * For this we need to remember the qemu-nbd pid and add it to
711 * the cgroup */
712 if (virLXCControllerAppendNBDPids(ctrl, virDomainDiskGetSource(disk)) < 0)
713 return -1;
714 } else {
715 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
716 _("disk driver %s is not supported"),
717 driver);
718 return -1;
719 }
720 }
721
722 VIR_DEBUG("Setup all loop devices");
723
724 return 0;
725 }
726
727
728 /*
729 * To be run while still single threaded
730 */
virLXCControllerSetupCpuAffinity(virLXCController * ctrl)731 static int virLXCControllerSetupCpuAffinity(virLXCController *ctrl)
732 {
733 int hostcpus, maxcpu = CPU_SETSIZE;
734 virBitmap *cpumap;
735 virBitmap *cpumapToSet;
736
737 VIR_DEBUG("Setting CPU affinity");
738
739 /* setaffinity fails if you set bits for CPUs which
740 * aren't present, so we have to limit ourselves */
741 if ((hostcpus = virHostCPUGetCount()) < 0)
742 return -1;
743
744 if (maxcpu > hostcpus)
745 maxcpu = hostcpus;
746
747 cpumap = virBitmapNew(maxcpu);
748 cpumapToSet = cpumap;
749
750 if (ctrl->def->cpumask) {
751 cpumapToSet = ctrl->def->cpumask;
752 } else {
753 /* You may think this is redundant, but we can't assume libvirtd
754 * itself is running on all pCPUs, so we need to explicitly set
755 * the spawned LXC instance to all pCPUs if no map is given in
756 * its config file */
757 virBitmapSetAll(cpumap);
758 }
759
760 /* We are presuming we are running between fork/exec of LXC
761 * so use '0' to indicate our own process ID. No threads are
762 * running at this point
763 */
764 if (virProcessSetAffinity(0 /* Self */, cpumapToSet, false) < 0) {
765 virBitmapFree(cpumap);
766 return -1;
767 }
768 virBitmapFree(cpumap);
769
770 return 0;
771 }
772
773
virLXCControllerGetNumadAdvice(virLXCController * ctrl,virBitmap ** mask)774 static int virLXCControllerGetNumadAdvice(virLXCController *ctrl,
775 virBitmap **mask)
776 {
777 virBitmap *nodemask = NULL;
778 g_autofree char *nodeset = NULL;
779
780 /* Get the advisory nodeset from numad if 'placement' of
781 * either <vcpu> or <numatune> is 'auto'.
782 */
783 if (virDomainDefNeedsPlacementAdvice(ctrl->def)) {
784 nodeset = virNumaGetAutoPlacementAdvice(virDomainDefGetVcpus(ctrl->def),
785 ctrl->def->mem.cur_balloon);
786 if (!nodeset)
787 return -1;
788
789 VIR_DEBUG("Nodeset returned from numad: %s", nodeset);
790
791 if (virBitmapParse(nodeset, &nodemask, VIR_DOMAIN_CPUMASK_LEN) < 0)
792 return -1;
793 }
794
795 *mask = nodemask;
796
797 return 0;
798 }
799
800
801 /**
802 * virLXCControllerSetupResourceLimits
803 * @ctrl: the controller state
804 *
805 * Sets up the non-cgroup based resource limits that need
806 * to be inherited by the child process across clone()/exec().
807 * The cgroup limits are setup later
808 *
809 * Returns 0 on success or -1 in case of error
810 */
virLXCControllerSetupResourceLimits(virLXCController * ctrl)811 static int virLXCControllerSetupResourceLimits(virLXCController *ctrl)
812 {
813 virBitmap *auto_nodeset = NULL;
814 int ret = -1;
815 virBitmap *nodeset = NULL;
816 virDomainNumatuneMemMode mode;
817
818 if (virDomainNumatuneGetMode(ctrl->def->numa, -1, &mode) == 0) {
819 if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
820 virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) {
821 /* Use virNuma* API iff necessary. Once set and child is exec()-ed,
822 * there's no way for us to change it. Rely on cgroups (if available
823 * and enabled in the config) rather than virNuma*. */
824 VIR_DEBUG("Relying on CGroups for memory binding");
825 } else {
826
827 VIR_DEBUG("Setting up process resource limits");
828
829 if (virLXCControllerGetNumadAdvice(ctrl, &auto_nodeset) < 0)
830 goto cleanup;
831
832 nodeset = virDomainNumatuneGetNodeset(ctrl->def->numa, auto_nodeset, -1);
833
834 if (virNumaSetupMemoryPolicy(mode, nodeset) < 0)
835 goto cleanup;
836 }
837 }
838
839 if (virLXCControllerSetupCpuAffinity(ctrl) < 0)
840 goto cleanup;
841
842 ret = 0;
843 cleanup:
844 virBitmapFree(auto_nodeset);
845 return ret;
846 }
847
848
849 /*
850 * Creates the cgroup and sets up the various limits associated
851 * with it
852 */
virLXCControllerSetupCgroupLimits(virLXCController * ctrl)853 static int virLXCControllerSetupCgroupLimits(virLXCController *ctrl)
854 {
855 virBitmap *auto_nodeset = NULL;
856 int ret = -1;
857 virBitmap *nodeset = NULL;
858 size_t i;
859
860 VIR_DEBUG("Setting up cgroup resource limits");
861
862 if (virLXCControllerGetNumadAdvice(ctrl, &auto_nodeset) < 0)
863 goto cleanup;
864
865 nodeset = virDomainNumatuneGetNodeset(ctrl->def->numa, auto_nodeset, -1);
866
867 if (!(ctrl->cgroup = virLXCCgroupCreate(ctrl->def,
868 getpid(),
869 ctrl->nnicindexes,
870 ctrl->nicindexes)))
871 goto cleanup;
872
873 if (virCgroupAddMachineProcess(ctrl->cgroup, ctrl->initpid) < 0)
874 goto cleanup;
875
876 /* Add all qemu-nbd tasks to the cgroup */
877 for (i = 0; i < ctrl->nnbdpids; i++) {
878 if (virCgroupAddMachineProcess(ctrl->cgroup, ctrl->nbdpids[i]) < 0)
879 goto cleanup;
880 }
881
882 if (virLXCCgroupSetup(ctrl->def, ctrl->cgroup, nodeset) < 0)
883 goto cleanup;
884
885 ret = 0;
886 cleanup:
887 virBitmapFree(auto_nodeset);
888 return ret;
889 }
890
891
virLXCControllerClientCloseHook(virNetServerClient * client)892 static void virLXCControllerClientCloseHook(virNetServerClient *client)
893 {
894 virLXCController *ctrl = virNetServerClientGetPrivateData(client);
895
896 VIR_DEBUG("Client %p has closed", client);
897 if (ctrl->client == client)
898 ctrl->client = NULL;
899 if (ctrl->inShutdown) {
900 VIR_DEBUG("Arm timer to quit event loop");
901 virEventUpdateTimeout(ctrl->timerShutdown, 0);
902 }
903 }
904
virLXCControllerClientPrivateFree(void * data)905 static void virLXCControllerClientPrivateFree(void *data)
906 {
907 virLXCController *ctrl = data;
908 VIR_DEBUG("Got private data free %p", ctrl);
909 }
910
virLXCControllerClientPrivateNew(virNetServerClient * client,void * opaque)911 static void *virLXCControllerClientPrivateNew(virNetServerClient *client,
912 void *opaque)
913 {
914 virLXCController *ctrl = opaque;
915
916 virNetServerClientSetCloseHook(client, virLXCControllerClientCloseHook);
917 VIR_DEBUG("Got new client %p", client);
918 ctrl->client = client;
919
920 if (ctrl->initpid && ctrl->firstClient)
921 virLXCControllerEventSendInit(ctrl, ctrl->initpid);
922 ctrl->firstClient = false;
923
924 return ctrl;
925 }
926
927
virLXCControllerSetupServer(virLXCController * ctrl)928 static int virLXCControllerSetupServer(virLXCController *ctrl)
929 {
930 virNetServer *srv = NULL;
931 virNetServerService *svc = NULL;
932 g_autofree char *sockpath = NULL;
933
934 sockpath = g_strdup_printf("%s/%s.sock", LXC_STATE_DIR, ctrl->name);
935
936 if (!(srv = virNetServerNew("LXC", 1,
937 0, 0, 0, 1,
938 0, -1, 0,
939 virLXCControllerClientPrivateNew,
940 NULL,
941 virLXCControllerClientPrivateFree,
942 ctrl)))
943 goto error;
944
945 if (virSecurityManagerSetSocketLabel(ctrl->securityManager, ctrl->def) < 0)
946 goto error;
947
948 if (!(svc = virNetServerServiceNewUNIX(sockpath,
949 0700,
950 0,
951 0,
952 NULL,
953 false,
954 0,
955 5)))
956 goto error;
957
958 if (virSecurityManagerClearSocketLabel(ctrl->securityManager, ctrl->def) < 0)
959 goto error;
960
961 if (virNetServerAddService(srv, svc) < 0)
962 goto error;
963 virObjectUnref(svc);
964 svc = NULL;
965
966 if (!(ctrl->prog = virNetServerProgramNew(VIR_LXC_MONITOR_PROGRAM,
967 VIR_LXC_MONITOR_PROGRAM_VERSION,
968 virLXCMonitorProcs,
969 virLXCMonitorNProcs)))
970 goto error;
971
972 if (!(ctrl->daemon = virNetDaemonNew()) ||
973 virNetDaemonAddServer(ctrl->daemon, srv) < 0)
974 goto error;
975
976 virNetDaemonUpdateServices(ctrl->daemon, true);
977 return 0;
978
979 error:
980 virObjectUnref(srv);
981 virObjectUnref(ctrl->daemon);
982 ctrl->daemon = NULL;
983 virObjectUnref(svc);
984 return -1;
985 }
986
987
lxcControllerClearCapabilities(void)988 static int lxcControllerClearCapabilities(void)
989 {
990 #if WITH_CAPNG
991 int ret;
992
993 capng_clear(CAPNG_SELECT_BOTH);
994
995 if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
996 virReportError(VIR_ERR_INTERNAL_ERROR,
997 _("failed to apply capabilities: %d"), ret);
998 return -1;
999 }
1000 #else
1001 VIR_WARN("libcap-ng support not compiled in, unable to clear capabilities");
1002 #endif
1003 return 0;
1004 }
1005
1006 static bool wantReboot;
1007 static virMutex lock = VIR_MUTEX_INITIALIZER;
1008
1009
virLXCControllerSignalChildIO(virNetDaemon * dmn,siginfo_t * info G_GNUC_UNUSED,void * opaque)1010 static void virLXCControllerSignalChildIO(virNetDaemon *dmn,
1011 siginfo_t *info G_GNUC_UNUSED,
1012 void *opaque)
1013 {
1014 virLXCController *ctrl = opaque;
1015 int ret;
1016 int status;
1017
1018 ret = waitpid(-1, &status, WNOHANG);
1019 VIR_DEBUG("Got sig child %d vs %lld", ret, (long long)ctrl->initpid);
1020 if (ret == ctrl->initpid) {
1021 virNetDaemonQuit(dmn);
1022 virMutexLock(&lock);
1023 if (WIFSIGNALED(status) &&
1024 WTERMSIG(status) == SIGHUP) {
1025 VIR_DEBUG("Status indicates reboot");
1026 wantReboot = true;
1027 }
1028 virMutexUnlock(&lock);
1029 }
1030 }
1031
1032
virLXCControllerConsoleUpdateWatch(virLXCControllerConsole * console)1033 static void virLXCControllerConsoleUpdateWatch(virLXCControllerConsole *console)
1034 {
1035 int hostEvents = 0;
1036 int contEvents = 0;
1037
1038 /* If host console is open, then we can look to read/write */
1039 if (!console->hostClosed) {
1040 if (console->fromHostLen < sizeof(console->fromHostBuf))
1041 hostEvents |= VIR_EVENT_HANDLE_READABLE;
1042 if (console->fromContLen)
1043 hostEvents |= VIR_EVENT_HANDLE_WRITABLE;
1044 }
1045
1046 /* If cont console is open, then we can look to read/write */
1047 if (!console->contClosed) {
1048 if (console->fromContLen < sizeof(console->fromContBuf))
1049 contEvents |= VIR_EVENT_HANDLE_READABLE;
1050 if (console->fromHostLen)
1051 contEvents |= VIR_EVENT_HANDLE_WRITABLE;
1052 }
1053
1054 VIR_DEBUG("Container watch=%d, events=%d closed=%d; host watch=%d events=%d closed=%d",
1055 console->contWatch, contEvents, console->contClosed,
1056 console->hostWatch, hostEvents, console->hostClosed);
1057 virEventUpdateHandle(console->contWatch, contEvents);
1058 virEventUpdateHandle(console->hostWatch, hostEvents);
1059
1060 if (console->hostClosed) {
1061 /* Must setup an epoll to detect when host becomes accessible again */
1062 int events = EPOLLIN | EPOLLET;
1063 if (console->fromContLen)
1064 events |= EPOLLOUT;
1065
1066 if (events != console->hostEpoll) {
1067 struct epoll_event event;
1068 int action = EPOLL_CTL_ADD;
1069 if (console->hostEpoll)
1070 action = EPOLL_CTL_MOD;
1071
1072 VIR_DEBUG("newHostEvents=%x oldHostEvents=%x", events, console->hostEpoll);
1073
1074 event.events = events;
1075 event.data.fd = console->hostFd;
1076 if (epoll_ctl(console->epollFd, action, console->hostFd, &event) < 0) {
1077 VIR_DEBUG(":fail");
1078 virReportSystemError(errno, "%s",
1079 _("Unable to add epoll fd"));
1080 virNetDaemonQuit(console->daemon);
1081 return;
1082 }
1083 console->hostEpoll = events;
1084 VIR_DEBUG("newHostEvents=%x oldHostEvents=%x", events, console->hostEpoll);
1085 }
1086 } else if (console->hostEpoll) {
1087 VIR_DEBUG("Stop epoll oldContEvents=%x", console->hostEpoll);
1088 if (epoll_ctl(console->epollFd, EPOLL_CTL_DEL, console->hostFd, NULL) < 0) {
1089 virReportSystemError(errno, "%s",
1090 _("Unable to remove epoll fd"));
1091 VIR_DEBUG(":fail");
1092 virNetDaemonQuit(console->daemon);
1093 return;
1094 }
1095 console->hostEpoll = 0;
1096 }
1097
1098 if (console->contClosed) {
1099 /* Must setup an epoll to detect when guest becomes accessible again */
1100 int events = EPOLLIN | EPOLLET;
1101 if (console->fromHostLen)
1102 events |= EPOLLOUT;
1103
1104 if (events != console->contEpoll) {
1105 struct epoll_event event;
1106 int action = EPOLL_CTL_ADD;
1107 if (console->contEpoll)
1108 action = EPOLL_CTL_MOD;
1109
1110 VIR_DEBUG("newContEvents=%x oldContEvents=%x", events, console->contEpoll);
1111
1112 event.events = events;
1113 event.data.fd = console->contFd;
1114 if (epoll_ctl(console->epollFd, action, console->contFd, &event) < 0) {
1115 virReportSystemError(errno, "%s",
1116 _("Unable to add epoll fd"));
1117 VIR_DEBUG(":fail");
1118 virNetDaemonQuit(console->daemon);
1119 return;
1120 }
1121 console->contEpoll = events;
1122 VIR_DEBUG("newHostEvents=%x oldHostEvents=%x", events, console->contEpoll);
1123 }
1124 } else if (console->contEpoll) {
1125 VIR_DEBUG("Stop epoll oldContEvents=%x", console->contEpoll);
1126 if (epoll_ctl(console->epollFd, EPOLL_CTL_DEL, console->contFd, NULL) < 0) {
1127 virReportSystemError(errno, "%s",
1128 _("Unable to remove epoll fd"));
1129 VIR_DEBUG(":fail");
1130 virNetDaemonQuit(console->daemon);
1131 return;
1132 }
1133 console->contEpoll = 0;
1134 }
1135 }
1136
1137
virLXCControllerConsoleEPoll(int watch,int fd,int events,void * opaque)1138 static void virLXCControllerConsoleEPoll(int watch, int fd, int events, void *opaque)
1139 {
1140 virLXCControllerConsole *console = opaque;
1141
1142 virMutexLock(&lock);
1143 VIR_DEBUG("IO event watch=%d fd=%d events=%d fromHost=%zu fromcont=%zu",
1144 watch, fd, events,
1145 console->fromHostLen,
1146 console->fromContLen);
1147
1148 while (1) {
1149 struct epoll_event event;
1150 int ret;
1151 ret = epoll_wait(console->epollFd, &event, 1, 0);
1152 if (ret < 0) {
1153 if (errno == EINTR)
1154 continue;
1155 virReportSystemError(errno, "%s",
1156 _("Unable to wait on epoll"));
1157 virNetDaemonQuit(console->daemon);
1158 goto cleanup;
1159 }
1160
1161 if (ret == 0)
1162 break;
1163
1164 VIR_DEBUG("fd=%d hostFd=%d contFd=%d hostEpoll=%x contEpoll=%x",
1165 event.data.fd, console->hostFd, console->contFd,
1166 console->hostEpoll, console->contEpoll);
1167
1168 /* If we get HUP+dead PID, we just re-enable the main loop
1169 * which will see the PID has died and exit */
1170 if ((event.events & (EPOLLIN|EPOLLOUT))) {
1171 if (event.data.fd == console->hostFd) {
1172 console->hostClosed = false;
1173 } else {
1174 console->contClosed = false;
1175 }
1176 virLXCControllerConsoleUpdateWatch(console);
1177 break;
1178 }
1179 }
1180
1181 cleanup:
1182 virMutexUnlock(&lock);
1183 }
1184
virLXCControllerConsoleIO(int watch,int fd,int events,void * opaque)1185 static void virLXCControllerConsoleIO(int watch, int fd, int events, void *opaque)
1186 {
1187 virLXCControllerConsole *console = opaque;
1188
1189 virMutexLock(&lock);
1190 VIR_DEBUG("IO event watch=%d fd=%d events=%d fromHost=%zu fromcont=%zu",
1191 watch, fd, events,
1192 console->fromHostLen,
1193 console->fromContLen);
1194 if (events & VIR_EVENT_HANDLE_READABLE) {
1195 char *buf;
1196 size_t *len;
1197 size_t avail;
1198 ssize_t done;
1199 if (watch == console->hostWatch) {
1200 buf = console->fromHostBuf;
1201 len = &console->fromHostLen;
1202 avail = sizeof(console->fromHostBuf) - *len;
1203 } else {
1204 buf = console->fromContBuf;
1205 len = &console->fromContLen;
1206 avail = sizeof(console->fromContBuf) - *len;
1207 }
1208 reread:
1209 done = read(fd, buf + *len, avail);
1210 if (done == -1 && errno == EINTR)
1211 goto reread;
1212 if (done == -1 && errno != EAGAIN) {
1213 virReportSystemError(errno, "%s",
1214 _("Unable to read container pty"));
1215 goto error;
1216 }
1217 if (done > 0) {
1218 *len += done;
1219 } else {
1220 VIR_DEBUG("Read fd %d done %d errno %d", fd, (int)done, errno);
1221 }
1222 }
1223
1224 if (events & VIR_EVENT_HANDLE_WRITABLE) {
1225 char *buf;
1226 size_t *len;
1227 ssize_t done;
1228 if (watch == console->hostWatch) {
1229 buf = console->fromContBuf;
1230 len = &console->fromContLen;
1231 } else {
1232 buf = console->fromHostBuf;
1233 len = &console->fromHostLen;
1234 }
1235
1236 rewrite:
1237 done = write(fd, buf, *len);
1238 if (done == -1 && errno == EINTR)
1239 goto rewrite;
1240 if (done == -1 && errno != EAGAIN) {
1241 virReportSystemError(errno, "%s",
1242 _("Unable to write to container pty"));
1243 goto error;
1244 }
1245 if (done > 0) {
1246 memmove(buf, buf + done, (*len - done));
1247 *len -= done;
1248 } else {
1249 VIR_DEBUG("Write fd %d done %d errno %d", fd, (int)done, errno);
1250 }
1251 }
1252
1253 if (events & VIR_EVENT_HANDLE_HANGUP) {
1254 if (watch == console->hostWatch) {
1255 console->hostClosed = true;
1256 } else {
1257 console->contClosed = true;
1258 }
1259 VIR_DEBUG("Got EOF on %d %d", watch, fd);
1260 }
1261
1262 virLXCControllerConsoleUpdateWatch(console);
1263 virMutexUnlock(&lock);
1264 return;
1265
1266 error:
1267 virEventRemoveHandle(console->contWatch);
1268 virEventRemoveHandle(console->hostWatch);
1269 console->contWatch = console->hostWatch = -1;
1270 virNetDaemonQuit(console->daemon);
1271 virMutexUnlock(&lock);
1272 }
1273
1274
1275 /**
1276 * lxcControllerMain
1277 * @serverFd: server socket fd to accept client requests
1278 * @clientFd: initial client which is the libvirtd daemon
1279 *
1280 * Processes I/O on consoles and the monitor
1281 *
1282 * Returns 0 on success or -1 in case of error
1283 */
virLXCControllerMain(virLXCController * ctrl)1284 static int virLXCControllerMain(virLXCController *ctrl)
1285 {
1286 int rc = -1;
1287 size_t i;
1288
1289 if (virNetDaemonAddSignalHandler(ctrl->daemon,
1290 SIGCHLD,
1291 virLXCControllerSignalChildIO,
1292 ctrl) < 0)
1293 goto cleanup;
1294
1295 virResetLastError();
1296
1297 for (i = 0; i < ctrl->nconsoles; i++) {
1298 if ((ctrl->consoles[i].epollFd = epoll_create1(EPOLL_CLOEXEC)) < 0) {
1299 virReportSystemError(errno, "%s",
1300 _("Unable to create epoll fd"));
1301 goto cleanup;
1302 }
1303
1304 if ((ctrl->consoles[i].epollWatch = virEventAddHandle(ctrl->consoles[i].epollFd,
1305 VIR_EVENT_HANDLE_READABLE,
1306 virLXCControllerConsoleEPoll,
1307 &(ctrl->consoles[i]),
1308 NULL)) < 0) {
1309 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1310 _("Unable to watch epoll FD"));
1311 goto cleanup;
1312 }
1313
1314 if ((ctrl->consoles[i].hostWatch = virEventAddHandle(ctrl->consoles[i].hostFd,
1315 VIR_EVENT_HANDLE_READABLE,
1316 virLXCControllerConsoleIO,
1317 &(ctrl->consoles[i]),
1318 NULL)) < 0) {
1319 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1320 _("Unable to watch host console PTY"));
1321 goto cleanup;
1322 }
1323
1324 if ((ctrl->consoles[i].contWatch = virEventAddHandle(ctrl->consoles[i].contFd,
1325 VIR_EVENT_HANDLE_READABLE,
1326 virLXCControllerConsoleIO,
1327 &(ctrl->consoles[i]),
1328 NULL)) < 0) {
1329 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1330 _("Unable to watch host console PTY"));
1331 goto cleanup;
1332 }
1333 }
1334
1335 virNetDaemonRun(ctrl->daemon);
1336
1337 if (virGetLastErrorCode() == VIR_ERR_OK)
1338 rc = wantReboot ? 1 : 0;
1339
1340 cleanup:
1341 for (i = 0; i < ctrl->nconsoles; i++)
1342 virLXCControllerConsoleClose(&(ctrl->consoles[i]));
1343
1344 return rc;
1345 }
1346
1347 static unsigned int
virLXCControllerLookupUsernsMap(virDomainIdMapEntry * map,int num,unsigned int src)1348 virLXCControllerLookupUsernsMap(virDomainIdMapEntry *map,
1349 int num,
1350 unsigned int src)
1351 {
1352 size_t i;
1353
1354 for (i = 0; i < num; i++) {
1355 if (src > map[i].start && src < map[i].start + map[i].count)
1356 return map[i].target + (src - map[i].start);
1357 }
1358
1359 return src;
1360 }
1361
1362 static int
virLXCControllerSetupUsernsMap(virDomainIdMapEntry * map,int num,char * path)1363 virLXCControllerSetupUsernsMap(virDomainIdMapEntry *map,
1364 int num,
1365 char *path)
1366 {
1367 g_auto(virBuffer) map_value = VIR_BUFFER_INITIALIZER;
1368 size_t i;
1369
1370 /* The kernel supports up to 340 lines in /proc/<pid>/{g,u}id_map */
1371 if (num > 340) {
1372 virReportError(VIR_ERR_INVALID_ARG, "%s",
1373 _("Too many id mappings defined."));
1374 return -1;
1375 }
1376
1377 for (i = 0; i < num; i++)
1378 virBufferAsprintf(&map_value, "%u %u %u\n",
1379 map[i].start, map[i].target, map[i].count);
1380
1381 VIR_DEBUG("Set '%s' to '%s'", path, virBufferCurrentContent(&map_value));
1382
1383 if (virFileWriteStr(path, virBufferCurrentContent(&map_value), 0) < 0) {
1384 virReportSystemError(errno, _("unable write to %s"), path);
1385 return -1;
1386 }
1387
1388 return 0;
1389 }
1390
1391 /**
1392 * virLXCControllerSetupUserns
1393 *
1394 * Set proc files for user namespace
1395 *
1396 * Returns 0 on success or -1 in case of error
1397 */
virLXCControllerSetupUserns(virLXCController * ctrl)1398 static int virLXCControllerSetupUserns(virLXCController *ctrl)
1399 {
1400 g_autofree char *uid_map = NULL;
1401 g_autofree char *gid_map = NULL;
1402
1403 /* User namespace is disabled for container */
1404 if (ctrl->def->idmap.nuidmap == 0) {
1405 VIR_DEBUG("No uid map, skipping userns setup");
1406 return 0;
1407 }
1408
1409 VIR_DEBUG("Setting up userns maps");
1410 uid_map = g_strdup_printf("/proc/%d/uid_map", ctrl->initpid);
1411
1412 if (virLXCControllerSetupUsernsMap(ctrl->def->idmap.uidmap,
1413 ctrl->def->idmap.nuidmap,
1414 uid_map) < 0)
1415 return -1;
1416
1417 gid_map = g_strdup_printf("/proc/%d/gid_map", ctrl->initpid);
1418
1419 if (virLXCControllerSetupUsernsMap(ctrl->def->idmap.gidmap,
1420 ctrl->def->idmap.ngidmap,
1421 gid_map) < 0)
1422 return -1;
1423
1424 return 0;
1425 }
1426
virLXCControllerSetupDev(virLXCController * ctrl)1427 static int virLXCControllerSetupDev(virLXCController *ctrl)
1428 {
1429 g_autofree char *mount_options = NULL;
1430 g_autofree char *opts = NULL;
1431 g_autofree char *dev = NULL;
1432
1433 VIR_DEBUG("Setting up /dev/ for container");
1434
1435 mount_options = virSecurityManagerGetMountOptions(ctrl->securityManager,
1436 ctrl->def);
1437
1438 dev = g_strdup_printf("/%s/%s.dev", LXC_STATE_DIR, ctrl->def->name);
1439
1440 /*
1441 * tmpfs is limited to 64kb, since we only have device nodes in there
1442 * and don't want to DOS the entire OS RAM usage
1443 */
1444
1445 opts = g_strdup_printf("mode=755,size=65536%s", mount_options);
1446
1447 if (virFileSetupDev(dev, opts) < 0)
1448 return -1;
1449
1450 if (lxcContainerChown(ctrl->def, dev) < 0)
1451 return -1;
1452
1453 return 0;
1454 }
1455
virLXCControllerPopulateDevices(virLXCController * ctrl)1456 static int virLXCControllerPopulateDevices(virLXCController *ctrl)
1457 {
1458 size_t i;
1459 const struct {
1460 int maj;
1461 int min;
1462 mode_t mode;
1463 const char *path;
1464 } devs[] = {
1465 { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL, 0666, "/null" },
1466 { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO, 0666, "/zero" },
1467 { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL, 0666, "/full" },
1468 { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM, 0666, "/random" },
1469 { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/urandom" },
1470 { LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY, 0666, "/tty" },
1471 };
1472
1473 if (virLXCControllerSetupDev(ctrl) < 0)
1474 return -1;
1475
1476 /* Populate /dev/ with a few important bits */
1477 for (i = 0; i < G_N_ELEMENTS(devs); i++) {
1478 g_autofree char *path = NULL;
1479 dev_t dev;
1480
1481 path = g_strdup_printf("/%s/%s.dev/%s", LXC_STATE_DIR, ctrl->def->name,
1482 devs[i].path);
1483
1484 dev = makedev(devs[i].maj, devs[i].min);
1485 if (mknod(path, S_IFCHR, dev) < 0 ||
1486 chmod(path, devs[i].mode)) {
1487 virReportSystemError(errno,
1488 _("Failed to make device %s"),
1489 path);
1490 return -1;
1491 }
1492
1493 if (lxcContainerChown(ctrl->def, path) < 0)
1494 return -1;
1495 }
1496
1497 return 0;
1498 }
1499
1500
1501 static int
virLXCControllerSetupTimers(virLXCController * ctrl)1502 virLXCControllerSetupTimers(virLXCController *ctrl)
1503 {
1504 virDomainDef *def = ctrl->def;
1505 size_t i;
1506
1507 /* Not sync'ed with Host clock */
1508 if (def->clock.offset != VIR_DOMAIN_CLOCK_OFFSET_LOCALTIME)
1509 return 0;
1510
1511 for (i = 0; i < def->clock.ntimers; i++) {
1512 virDomainTimerDef *timer = def->clock.timers[i];
1513 g_autofree char *path = NULL;
1514 const char *timer_dev = NULL;
1515 struct stat sb;
1516 dev_t dev;
1517
1518 /* Check if "present" is set to "no" otherwise enable it. */
1519 if (!timer->present)
1520 continue;
1521
1522 switch ((virDomainTimerNameType)timer->name) {
1523 case VIR_DOMAIN_TIMER_NAME_PLATFORM:
1524 case VIR_DOMAIN_TIMER_NAME_TSC:
1525 case VIR_DOMAIN_TIMER_NAME_KVMCLOCK:
1526 case VIR_DOMAIN_TIMER_NAME_HYPERVCLOCK:
1527 case VIR_DOMAIN_TIMER_NAME_PIT:
1528 case VIR_DOMAIN_TIMER_NAME_ARMVTIMER:
1529 case VIR_DOMAIN_TIMER_NAME_LAST:
1530 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1531 _("unsupported timer type (name) '%s'"),
1532 virDomainTimerNameTypeToString(timer->name));
1533 return -1;
1534 case VIR_DOMAIN_TIMER_NAME_RTC:
1535 timer_dev = "/dev/rtc0";
1536 path = g_strdup_printf("/%s/%s.dev/%s", LXC_STATE_DIR,
1537 def->name, "/rtc");
1538 break;
1539 case VIR_DOMAIN_TIMER_NAME_HPET:
1540 timer_dev = "/dev/hpet";
1541 path = g_strdup_printf("/%s/%s.dev/%s", LXC_STATE_DIR,
1542 ctrl->def->name, "/hpet");
1543 break;
1544 }
1545
1546 if (!timer_dev)
1547 continue;
1548
1549 if (stat(timer_dev, &sb) < 0) {
1550 virReportSystemError(errno, _("Unable to access %s"),
1551 timer_dev);
1552 return -1;
1553 }
1554
1555 dev = makedev(major(sb.st_rdev), minor(sb.st_rdev));
1556 if (mknod(path, S_IFCHR, dev) < 0 ||
1557 chmod(path, sb.st_mode)) {
1558 virReportSystemError(errno,
1559 _("Failed to make device %s"),
1560 path);
1561 return -1;
1562 }
1563
1564 if (lxcContainerChown(def, path) < 0)
1565 return -1;
1566 }
1567
1568 return 0;
1569 }
1570
1571
1572 static int
virLXCControllerSetupHostdevSubsysUSB(virDomainDef * vmDef,virDomainHostdevDef * def,virSecurityManager * securityDriver)1573 virLXCControllerSetupHostdevSubsysUSB(virDomainDef *vmDef,
1574 virDomainHostdevDef *def,
1575 virSecurityManager *securityDriver)
1576 {
1577 g_autofree char *src = NULL;
1578 g_autofree char *dstdir = NULL;
1579 g_autofree char *dstfile = NULL;
1580 g_autofree char *vroot = NULL;
1581 struct stat sb;
1582 mode_t mode;
1583 virDomainHostdevSubsysUSB *usbsrc = &def->source.subsys.u.usb;
1584
1585 src = g_strdup_printf(USB_DEVFS "/%03d/%03d", usbsrc->bus, usbsrc->device);
1586
1587 vroot = g_strdup_printf("/%s/%s.dev/bus/usb/", LXC_STATE_DIR, vmDef->name);
1588
1589 dstdir = g_strdup_printf("%s/%03d/", vroot, usbsrc->bus);
1590
1591 dstfile = g_strdup_printf("%s/%03d", dstdir, usbsrc->device);
1592
1593 if (stat(src, &sb) < 0) {
1594 virReportSystemError(errno,
1595 _("Unable to access %s"), src);
1596 return -1;
1597 }
1598
1599 if (!S_ISCHR(sb.st_mode)) {
1600 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1601 _("USB source %s was not a character device"),
1602 src);
1603 return -1;
1604 }
1605
1606 mode = 0700 | S_IFCHR;
1607
1608 if (g_mkdir_with_parents(dstdir, 0777) < 0) {
1609 virReportSystemError(errno,
1610 _("Unable to create %s"), dstdir);
1611 return -1;
1612 }
1613
1614 VIR_DEBUG("Creating dev %s (%d,%d)",
1615 dstfile, major(sb.st_rdev), minor(sb.st_rdev));
1616 if (mknod(dstfile, mode, sb.st_rdev) < 0) {
1617 virReportSystemError(errno,
1618 _("Unable to create device %s"),
1619 dstfile);
1620 return -1;
1621 }
1622
1623 if (lxcContainerChown(vmDef, dstfile) < 0)
1624 return -1;
1625
1626 if (virSecurityManagerSetHostdevLabel(securityDriver,
1627 vmDef, def, vroot) < 0)
1628 return -1;
1629
1630 return 0;
1631 }
1632
1633
1634 static int
virLXCControllerSetupHostdevCapsStorage(virDomainDef * vmDef,virDomainHostdevDef * def,virSecurityManager * securityDriver)1635 virLXCControllerSetupHostdevCapsStorage(virDomainDef *vmDef,
1636 virDomainHostdevDef *def,
1637 virSecurityManager *securityDriver)
1638 {
1639 g_autofree char *dst = NULL;
1640 g_autofree char *path = NULL;
1641 int len = 0;
1642 int ret = -1;
1643 struct stat sb;
1644 mode_t mode;
1645 char *dev = def->source.caps.u.storage.block;
1646
1647 if (dev == NULL) {
1648 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
1649 _("Missing storage host block path"));
1650 goto cleanup;
1651 }
1652
1653 path = g_strdup(dev);
1654
1655 while (*(path + len) == '/')
1656 len++;
1657
1658 dst = g_strdup_printf("/%s/%s.dev/%s", LXC_STATE_DIR, vmDef->name,
1659 strchr(path + len, '/'));
1660
1661 if (stat(dev, &sb) < 0) {
1662 virReportSystemError(errno,
1663 _("Unable to access %s"),
1664 dev);
1665 goto cleanup;
1666 }
1667
1668 if (!S_ISBLK(sb.st_mode)) {
1669 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1670 _("Storage source %s must be a block device"),
1671 dev);
1672 goto cleanup;
1673 }
1674
1675 if (lxcContainerSetupHostdevCapsMakePath(dst) < 0) {
1676 virReportError(errno,
1677 _("Failed to create directory for device %s"),
1678 dev);
1679 goto cleanup;
1680 }
1681
1682 mode = 0700 | S_IFBLK;
1683
1684 VIR_DEBUG("Creating dev %s (%d,%d)", dst,
1685 major(sb.st_rdev), minor(sb.st_rdev));
1686 if (mknod(dst, mode, sb.st_rdev) < 0) {
1687 virReportSystemError(errno,
1688 _("Unable to create device %s"),
1689 dst);
1690 goto cleanup;
1691 }
1692
1693 if (lxcContainerChown(vmDef, dst) < 0)
1694 goto cleanup;
1695
1696 def->source.caps.u.storage.block = dst;
1697 if (virSecurityManagerSetHostdevLabel(securityDriver, vmDef, def, NULL) < 0)
1698 goto cleanup;
1699
1700 ret = 0;
1701
1702 cleanup:
1703 def->source.caps.u.storage.block = dev;
1704 return ret;
1705 }
1706
1707
1708 static int
virLXCControllerSetupHostdevCapsMisc(virDomainDef * vmDef,virDomainHostdevDef * def,virSecurityManager * securityDriver)1709 virLXCControllerSetupHostdevCapsMisc(virDomainDef *vmDef,
1710 virDomainHostdevDef *def,
1711 virSecurityManager *securityDriver)
1712 {
1713 g_autofree char *dst = NULL;
1714 g_autofree char *path = NULL;
1715 int len = 0;
1716 int ret = -1;
1717 struct stat sb;
1718 mode_t mode;
1719 char *dev = def->source.caps.u.misc.chardev;
1720
1721 if (dev == NULL) {
1722 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
1723 _("Missing storage host block path"));
1724 goto cleanup;
1725 }
1726
1727 path = g_strdup(dev);
1728
1729 while (*(path + len) == '/')
1730 len++;
1731
1732 dst = g_strdup_printf("/%s/%s.dev/%s", LXC_STATE_DIR, vmDef->name,
1733 strchr(path + len, '/'));
1734
1735 if (stat(dev, &sb) < 0) {
1736 virReportSystemError(errno,
1737 _("Unable to access %s"),
1738 dev);
1739 goto cleanup;
1740 }
1741
1742 if (!S_ISCHR(sb.st_mode)) {
1743 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1744 _("Storage source %s must be a character device"),
1745 dev);
1746 goto cleanup;
1747 }
1748
1749 if (lxcContainerSetupHostdevCapsMakePath(dst) < 0) {
1750 virReportError(errno,
1751 _("Failed to create directory for device %s"),
1752 dst);
1753 goto cleanup;
1754 }
1755
1756 mode = 0700 | S_IFCHR;
1757
1758 VIR_DEBUG("Creating dev %s (%d,%d)", dst,
1759 major(sb.st_rdev), minor(sb.st_rdev));
1760 if (mknod(dst, mode, sb.st_rdev) < 0) {
1761 virReportSystemError(errno,
1762 _("Unable to create device %s"),
1763 dev);
1764 goto cleanup;
1765 }
1766
1767 if (lxcContainerChown(vmDef, dst) < 0)
1768 goto cleanup;
1769
1770 def->source.caps.u.misc.chardev = dst;
1771 if (virSecurityManagerSetHostdevLabel(securityDriver, vmDef, def, NULL) < 0)
1772 goto cleanup;
1773
1774 ret = 0;
1775
1776 cleanup:
1777 def->source.caps.u.misc.chardev = dev;
1778 return ret;
1779 }
1780
1781 static int
virLXCControllerSetupHostdevSubsys(virDomainDef * vmDef,virDomainHostdevDef * def,virSecurityManager * securityDriver)1782 virLXCControllerSetupHostdevSubsys(virDomainDef *vmDef,
1783 virDomainHostdevDef *def,
1784 virSecurityManager *securityDriver)
1785 {
1786 switch (def->source.subsys.type) {
1787 case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB:
1788 return virLXCControllerSetupHostdevSubsysUSB(vmDef,
1789 def,
1790 securityDriver);
1791
1792 default:
1793 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1794 _("Unsupported host device mode %s"),
1795 virDomainHostdevSubsysTypeToString(def->source.subsys.type));
1796 return -1;
1797 }
1798 }
1799
1800
1801 static int
virLXCControllerSetupHostdevCaps(virDomainDef * vmDef,virDomainHostdevDef * def,virSecurityManager * securityDriver)1802 virLXCControllerSetupHostdevCaps(virDomainDef *vmDef,
1803 virDomainHostdevDef *def,
1804 virSecurityManager *securityDriver)
1805 {
1806 switch (def->source.subsys.type) {
1807 case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_STORAGE:
1808 return virLXCControllerSetupHostdevCapsStorage(vmDef,
1809 def,
1810 securityDriver);
1811
1812 case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC:
1813 return virLXCControllerSetupHostdevCapsMisc(vmDef,
1814 def,
1815 securityDriver);
1816
1817 case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET:
1818 return 0; /* case is handled in virLXCControllerMoveInterfaces */
1819
1820 default:
1821 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1822 _("Unsupported host device mode %s"),
1823 virDomainHostdevCapsTypeToString(def->source.subsys.type));
1824 return -1;
1825 }
1826 }
1827
1828
1829 static int
virLXCControllerSetupAllHostdevs(virLXCController * ctrl)1830 virLXCControllerSetupAllHostdevs(virLXCController *ctrl)
1831 {
1832 size_t i;
1833 virDomainDef *vmDef = ctrl->def;
1834 virSecurityManager *securityDriver = ctrl->securityManager;
1835 VIR_DEBUG("Setting up hostdevs");
1836
1837 for (i = 0; i < vmDef->nhostdevs; i++) {
1838 virDomainHostdevDef *def = vmDef->hostdevs[i];
1839 switch (def->mode) {
1840 case VIR_DOMAIN_HOSTDEV_MODE_SUBSYS:
1841 if (virLXCControllerSetupHostdevSubsys(vmDef,
1842 def,
1843 securityDriver) < 0)
1844 return -1;
1845 break;
1846 case VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES:
1847 if (virLXCControllerSetupHostdevCaps(vmDef,
1848 def,
1849 securityDriver) < 0)
1850 return -1;
1851 break;
1852 default:
1853 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1854 _("Unsupported host device mode %s"),
1855 virDomainHostdevModeTypeToString(def->mode));
1856 return -1;
1857 }
1858 }
1859
1860 VIR_DEBUG("Setup all hostdevs");
1861 return 0;
1862 }
1863
1864
virLXCControllerSetupDisk(virLXCController * ctrl,virDomainDiskDef * def,virSecurityManager * securityDriver)1865 static int virLXCControllerSetupDisk(virLXCController *ctrl,
1866 virDomainDiskDef *def,
1867 virSecurityManager *securityDriver)
1868 {
1869 g_autofree char *dst = NULL;
1870 int ret = -1;
1871 struct stat sb;
1872 mode_t mode;
1873 char *tmpsrc = def->src->path;
1874
1875 if (virDomainDiskGetType(def) != VIR_STORAGE_TYPE_BLOCK) {
1876 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
1877 _("Can't setup disk for non-block device"));
1878 goto cleanup;
1879 }
1880 if (!tmpsrc) {
1881 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
1882 _("Can't setup disk without media"));
1883 goto cleanup;
1884 }
1885
1886 dst = g_strdup_printf("/%s/%s.dev/%s", LXC_STATE_DIR, ctrl->def->name,
1887 def->dst);
1888
1889 if (stat(def->src->path, &sb) < 0) {
1890 virReportSystemError(errno,
1891 _("Unable to access %s"), tmpsrc);
1892 goto cleanup;
1893 }
1894
1895 if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) {
1896 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1897 _("Disk source %s must be a character/block device"),
1898 tmpsrc);
1899 goto cleanup;
1900 }
1901
1902 mode = 0700;
1903 if (S_ISCHR(sb.st_mode))
1904 mode |= S_IFCHR;
1905 else
1906 mode |= S_IFBLK;
1907
1908 /* Yes, the device name we're creating may not
1909 * actually correspond to the major:minor number
1910 * we're using, but we've no other option at this
1911 * time. Just have to hope that containerized apps
1912 * don't get upset that the major:minor is different
1913 * to that normally implied by the device name
1914 */
1915 VIR_DEBUG("Creating dev %s (%d,%d) from %s",
1916 dst, major(sb.st_rdev), minor(sb.st_rdev), tmpsrc);
1917 if (mknod(dst, mode, sb.st_rdev) < 0) {
1918 virReportSystemError(errno,
1919 _("Unable to create device %s"),
1920 dst);
1921 goto cleanup;
1922 }
1923
1924 if (lxcContainerChown(ctrl->def, dst) < 0)
1925 goto cleanup;
1926
1927 /* Labelling normally operates on src, but we need
1928 * to actually label the dst here, so hack the config */
1929 def->src->path = dst;
1930 if (virSecurityManagerSetImageLabel(securityDriver, ctrl->def, def->src,
1931 VIR_SECURITY_DOMAIN_IMAGE_LABEL_BACKING_CHAIN) < 0)
1932 goto cleanup;
1933
1934 ret = 0;
1935
1936 cleanup:
1937 def->src->path = tmpsrc;
1938 return ret;
1939 }
1940
virLXCControllerSetupAllDisks(virLXCController * ctrl)1941 static int virLXCControllerSetupAllDisks(virLXCController *ctrl)
1942 {
1943 size_t i;
1944 VIR_DEBUG("Setting up disks");
1945
1946 for (i = 0; i < ctrl->def->ndisks; i++) {
1947 if (virLXCControllerSetupDisk(ctrl, ctrl->def->disks[i],
1948 ctrl->securityManager) < 0)
1949 return -1;
1950 }
1951
1952 VIR_DEBUG("Setup all disks");
1953 return 0;
1954 }
1955
1956
1957
1958 /**
1959 * virLXCControllerMoveInterfaces
1960 * @nveths: number of interfaces
1961 * @veths: interface names
1962 * @container: pid of container
1963 *
1964 * Moves network interfaces into a container's namespace
1965 *
1966 * Returns 0 on success or -1 in case of error
1967 */
virLXCControllerMoveInterfaces(virLXCController * ctrl)1968 static int virLXCControllerMoveInterfaces(virLXCController *ctrl)
1969 {
1970 size_t i;
1971 virDomainDef *def = ctrl->def;
1972
1973 for (i = 0; i < ctrl->nveths; i++) {
1974 if (virNetDevSetNamespace(ctrl->veths[i], ctrl->initpid) < 0)
1975 return -1;
1976 }
1977
1978 for (i = 0; i < def->nhostdevs; i ++) {
1979 virDomainHostdevDef *hdev = def->hostdevs[i];
1980 virDomainHostdevCaps hdcaps;
1981
1982 if (hdev->mode != VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES)
1983 continue;
1984
1985 hdcaps = hdev->source.caps;
1986
1987 if (hdcaps.type != VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET)
1988 continue;
1989
1990 if (virNetDevSetNamespace(hdcaps.u.net.ifname, ctrl->initpid) < 0)
1991 return -1;
1992 }
1993
1994 return 0;
1995 }
1996
1997
1998 /**
1999 * virLXCControllerDeleteInterfaces:
2000 * @ctrl: the LXC controller
2001 *
2002 * Cleans up the container interfaces by deleting the veth device pairs.
2003 *
2004 * Returns 0 on success or -1 in case of error
2005 */
virLXCControllerDeleteInterfaces(virLXCController * ctrl)2006 static int virLXCControllerDeleteInterfaces(virLXCController *ctrl)
2007 {
2008 size_t i;
2009 int ret = 0;
2010
2011 for (i = 0; i < ctrl->nveths; i++) {
2012 if (virNetDevVethDelete(ctrl->veths[i]) < 0)
2013 ret = -1;
2014 }
2015
2016 return ret;
2017 }
2018
2019
lxcSetPersonality(virDomainDef * def)2020 static int lxcSetPersonality(virDomainDef *def)
2021 {
2022 virArch altArch;
2023
2024 VIR_DEBUG("Checking for 32-bit personality");
2025 altArch = lxcContainerGetAlt32bitArch(virArchFromHost());
2026 if (altArch &&
2027 (def->os.arch == altArch)) {
2028 VIR_DEBUG("Setting personality to %s",
2029 virArchToString(altArch));
2030 if (personality(PER_LINUX32) < 0) {
2031 virReportSystemError(errno, _("Unable to request personality for %s on %s"),
2032 virArchToString(altArch),
2033 virArchToString(virArchFromHost()));
2034 return -1;
2035 }
2036 }
2037 return 0;
2038 }
2039
2040 /* Create a private tty using the private devpts at PTMX, returning
2041 * the primary in @ttyprimary and the name of the secondary, _from the
2042 * perspective of the guest after remounting file systems_, in
2043 * @ttyName. Heavily borrowed from glibc, but doesn't require that
2044 * devpts == "/dev/pts" */
2045 static int
lxcCreateTty(virLXCController * ctrl,int * ttyprimary,char ** ttyName,char ** ttyHostPath)2046 lxcCreateTty(virLXCController *ctrl, int *ttyprimary,
2047 char **ttyName, char **ttyHostPath)
2048 {
2049 int ret = -1;
2050 int ptyno;
2051 int unlock = 0;
2052
2053 if ((*ttyprimary = open(ctrl->devptmx, O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0)
2054 goto cleanup;
2055
2056 if (ioctl(*ttyprimary, TIOCSPTLCK, &unlock) < 0)
2057 goto cleanup;
2058
2059 if (ioctl(*ttyprimary, TIOCGPTN, &ptyno) < 0)
2060 goto cleanup;
2061
2062 /* If mount() succeeded at honoring newinstance, then the kernel
2063 * was new enough to also honor the mode=0620,gid=5 options, which
2064 * guarantee that the new pty already has correct permissions; so
2065 * while glibc has to fstat(), fchmod(), and fchown() for older
2066 * kernels, we can skip those steps. ptyno shouldn't currently be
2067 * anything other than 0, but let's play it safe. */
2068 *ttyName = g_strdup_printf("/dev/pts/%d", ptyno);
2069 *ttyHostPath = g_strdup_printf("/%s/%s.devpts/%d", LXC_STATE_DIR, ctrl->def->name, ptyno);
2070
2071 ret = 0;
2072
2073 cleanup:
2074 if (ret != 0) {
2075 VIR_FORCE_CLOSE(*ttyprimary);
2076 g_free(*ttyName);
2077 *ttyName = NULL;
2078 }
2079
2080 return ret;
2081 }
2082
2083
2084 static int
virLXCControllerSetupPrivateNS(void)2085 virLXCControllerSetupPrivateNS(void)
2086 {
2087 /*
2088 * If doing a chroot style setup, we need to prepare
2089 * a private /dev/pts for the child now, which they
2090 * will later move into position.
2091 *
2092 * This is complex because 'virsh console' needs to
2093 * use /dev/pts from the host OS, and the guest OS
2094 * needs to use /dev/pts from the guest.
2095 *
2096 * This means that we (libvirt_lxc) need to see and
2097 * use both /dev/pts instances. We're running in the
2098 * host OS context though and don't want to expose
2099 * the guest OS /dev/pts there.
2100 *
2101 * Thus we call unshare(CLONE_NS) so that we can see
2102 * the guest's new /dev/pts, without it becoming
2103 * visible to the host OS. We also disable mount
2104 * propagation out of the root FS, in case it was
2105 * currently allowing bi-directional propagation.
2106 */
2107
2108 return virProcessSetupPrivateMountNS();
2109 }
2110
2111
2112 static int
virLXCControllerSetupDevPTS(virLXCController * ctrl)2113 virLXCControllerSetupDevPTS(virLXCController *ctrl)
2114 {
2115 g_autofree char *mount_options = NULL;
2116 g_autofree char *opts = NULL;
2117 g_autofree char *devpts = NULL;
2118 gid_t ptsgid = 5;
2119
2120 VIR_DEBUG("Setting up private /dev/pts");
2121
2122 mount_options = virSecurityManagerGetMountOptions(ctrl->securityManager,
2123 ctrl->def);
2124
2125 devpts = g_strdup_printf("%s/%s.devpts", LXC_STATE_DIR, ctrl->def->name);
2126 ctrl->devptmx = g_strdup_printf("%s/%s.devpts/ptmx", LXC_STATE_DIR, ctrl->def->name);
2127
2128 if (g_mkdir_with_parents(devpts, 0777) < 0) {
2129 virReportSystemError(errno,
2130 _("Failed to make path %s"),
2131 devpts);
2132 return -1;
2133 }
2134
2135 if (ctrl->def->idmap.ngidmap)
2136 ptsgid = virLXCControllerLookupUsernsMap(ctrl->def->idmap.gidmap,
2137 ctrl->def->idmap.ngidmap,
2138 ptsgid);
2139
2140 /* XXX should we support gid=X for X!=5 for distros which use
2141 * a different gid for tty? */
2142 opts = g_strdup_printf("newinstance,ptmxmode=0666,mode=0620,gid=%u%s", ptsgid,
2143 NULLSTR_EMPTY(mount_options));
2144
2145 VIR_DEBUG("Mount devpts on %s type=tmpfs flags=0x%x, opts=%s",
2146 devpts, MS_NOSUID, opts);
2147 if (mount("devpts", devpts, "devpts", MS_NOSUID, opts) < 0) {
2148 virReportSystemError(errno,
2149 _("Failed to mount devpts on %s"),
2150 devpts);
2151 return -1;
2152 }
2153
2154 if (access(ctrl->devptmx, R_OK) < 0) {
2155 virReportSystemError(ENOSYS, "%s",
2156 _("Kernel does not support private devpts"));
2157 return -1;
2158 }
2159
2160 if ((lxcContainerChown(ctrl->def, ctrl->devptmx) < 0) ||
2161 (lxcContainerChown(ctrl->def, devpts) < 0))
2162 return -1;
2163
2164 return 0;
2165 }
2166
2167
2168 static int
virLXCControllerSetupFuse(virLXCController * ctrl)2169 virLXCControllerSetupFuse(virLXCController *ctrl)
2170 {
2171 return lxcSetupFuse(&ctrl->fuse, ctrl->def);
2172 }
2173
2174 static int
virLXCControllerStartFuse(virLXCController * ctrl)2175 virLXCControllerStartFuse(virLXCController *ctrl)
2176 {
2177 return lxcStartFuse(ctrl->fuse);
2178 }
2179
2180 static int
virLXCControllerSetupConsoles(virLXCController * ctrl,char ** containerTTYPaths)2181 virLXCControllerSetupConsoles(virLXCController *ctrl,
2182 char **containerTTYPaths)
2183 {
2184 size_t i;
2185
2186 for (i = 0; i < ctrl->nconsoles; i++) {
2187 g_autofree char *ttyHostPath = NULL;
2188
2189 VIR_DEBUG("Opening tty on private %s", ctrl->devptmx);
2190 if (lxcCreateTty(ctrl,
2191 &ctrl->consoles[i].contFd,
2192 &containerTTYPaths[i], &ttyHostPath) < 0) {
2193 virReportSystemError(errno, "%s",
2194 _("Failed to allocate tty"));
2195 return -1;
2196 }
2197
2198 /* Change the owner of tty device to the root user of container */
2199 if (lxcContainerChown(ctrl->def, ttyHostPath) < 0)
2200 return -1;
2201 }
2202
2203 return 0;
2204 }
2205
2206
2207 static void
virLXCControllerEventSend(virLXCController * ctrl,int procnr,xdrproc_t proc,void * data)2208 virLXCControllerEventSend(virLXCController *ctrl,
2209 int procnr,
2210 xdrproc_t proc,
2211 void *data)
2212 {
2213 virNetMessage *msg;
2214
2215 if (!ctrl->client) {
2216 VIR_WARN("Dropping event %d because libvirtd is not connected", procnr);
2217 return;
2218 }
2219
2220 VIR_DEBUG("Send event %d client=%p", procnr, ctrl->client);
2221 if (!(msg = virNetMessageNew(false)))
2222 goto error;
2223
2224 msg->header.prog = virNetServerProgramGetID(ctrl->prog);
2225 msg->header.vers = virNetServerProgramGetVersion(ctrl->prog);
2226 msg->header.proc = procnr;
2227 msg->header.type = VIR_NET_MESSAGE;
2228 msg->header.serial = 1;
2229 msg->header.status = VIR_NET_OK;
2230
2231 if (virNetMessageEncodeHeader(msg) < 0)
2232 goto error;
2233
2234 if (virNetMessageEncodePayload(msg, proc, data) < 0)
2235 goto error;
2236
2237 VIR_DEBUG("Queue event %d %zu", procnr, msg->bufferLength);
2238 if (virNetServerClientSendMessage(ctrl->client, msg) < 0)
2239 goto error;
2240
2241 xdr_free(proc, data);
2242 return;
2243
2244 error:
2245 virNetMessageFree(msg);
2246 xdr_free(proc, data);
2247 }
2248
2249
2250 static int
virLXCControllerEventSendExit(virLXCController * ctrl,int exitstatus)2251 virLXCControllerEventSendExit(virLXCController *ctrl,
2252 int exitstatus)
2253 {
2254 virLXCMonitorExitEventMsg msg;
2255
2256 VIR_DEBUG("Exit status %d (client=%p)", exitstatus, ctrl->client);
2257 memset(&msg, 0, sizeof(msg));
2258 switch (exitstatus) {
2259 case 0:
2260 msg.status = VIR_LXC_MONITOR_EXIT_STATUS_SHUTDOWN;
2261 break;
2262 case 1:
2263 msg.status = VIR_LXC_MONITOR_EXIT_STATUS_REBOOT;
2264 break;
2265 default:
2266 msg.status = VIR_LXC_MONITOR_EXIT_STATUS_ERROR;
2267 break;
2268 }
2269
2270 virLXCControllerEventSend(ctrl,
2271 VIR_LXC_MONITOR_PROC_EXIT_EVENT,
2272 (xdrproc_t)xdr_virLXCMonitorExitEventMsg,
2273 (void*)&msg);
2274
2275 if (ctrl->client) {
2276 VIR_DEBUG("Waiting for client to complete dispatch");
2277 ctrl->inShutdown = true;
2278 virNetServerClientDelayedClose(ctrl->client);
2279 virNetDaemonRun(ctrl->daemon);
2280 }
2281 VIR_DEBUG("Client has gone away");
2282 return 0;
2283 }
2284
2285
2286 static int
virLXCControllerEventSendInit(virLXCController * ctrl,pid_t initpid)2287 virLXCControllerEventSendInit(virLXCController *ctrl,
2288 pid_t initpid)
2289 {
2290 virLXCMonitorInitEventMsg msg;
2291
2292 VIR_DEBUG("Init pid %lld", (long long)initpid);
2293 memset(&msg, 0, sizeof(msg));
2294 msg.initpid = initpid;
2295
2296 virLXCControllerEventSend(ctrl,
2297 VIR_LXC_MONITOR_PROC_INIT_EVENT,
2298 (xdrproc_t)xdr_virLXCMonitorInitEventMsg,
2299 (void*)&msg);
2300 return 0;
2301 }
2302
2303
2304 static int
virLXCControllerRun(virLXCController * ctrl)2305 virLXCControllerRun(virLXCController *ctrl)
2306 {
2307 int rc = -1;
2308 int control[2] = { -1, -1};
2309 int containerhandshake[2] = { -1, -1 };
2310 char **containerTTYPaths = g_new0(char *, ctrl->nconsoles);
2311 size_t i;
2312
2313 if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
2314 virReportSystemError(errno, "%s",
2315 _("sockpair failed"));
2316 goto cleanup;
2317 }
2318
2319 if (socketpair(PF_UNIX, SOCK_STREAM, 0, containerhandshake) < 0) {
2320 virReportSystemError(errno, "%s",
2321 _("socketpair failed"));
2322 goto cleanup;
2323 }
2324
2325 if (virLXCControllerSetupPrivateNS() < 0)
2326 goto cleanup;
2327
2328 if (virLXCControllerSetupLoopDevices(ctrl) < 0)
2329 goto cleanup;
2330
2331 if (virLXCControllerSetupResourceLimits(ctrl) < 0)
2332 goto cleanup;
2333
2334 if (virLXCControllerSetupDevPTS(ctrl) < 0)
2335 goto cleanup;
2336
2337 if (virLXCControllerPopulateDevices(ctrl) < 0)
2338 goto cleanup;
2339
2340 if (virLXCControllerSetupTimers(ctrl) < 0)
2341 goto cleanup;
2342
2343 if (virLXCControllerSetupAllDisks(ctrl) < 0)
2344 goto cleanup;
2345
2346 if (virLXCControllerSetupAllHostdevs(ctrl) < 0)
2347 goto cleanup;
2348
2349 if (virLXCControllerSetupFuse(ctrl) < 0)
2350 goto cleanup;
2351
2352 if (virLXCControllerSetupConsoles(ctrl, containerTTYPaths) < 0)
2353 goto cleanup;
2354
2355 if (lxcSetPersonality(ctrl->def) < 0)
2356 goto cleanup;
2357
2358 if ((ctrl->initpid = lxcContainerStart(ctrl->def,
2359 ctrl->securityManager,
2360 ctrl->nveths,
2361 ctrl->veths,
2362 ctrl->npassFDs,
2363 ctrl->passFDs,
2364 control[1],
2365 containerhandshake[1],
2366 ctrl->nsFDs,
2367 ctrl->nconsoles,
2368 containerTTYPaths)) < 0)
2369 goto cleanup;
2370 VIR_FORCE_CLOSE(control[1]);
2371 VIR_FORCE_CLOSE(containerhandshake[1]);
2372
2373 for (i = 0; i < ctrl->npassFDs; i++)
2374 VIR_FORCE_CLOSE(ctrl->passFDs[i]);
2375
2376 if (ctrl->nsFDs)
2377 for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++)
2378 VIR_FORCE_CLOSE(ctrl->nsFDs[i]);
2379
2380 if (virLXCControllerSetupCgroupLimits(ctrl) < 0)
2381 goto cleanup;
2382
2383 /* Allow daemon to detect CGroups. */
2384 if (virLXCControllerDaemonHandshakeCont(ctrl) < 0 ||
2385 virLXCControllerDaemonHandshakeWait(ctrl) < 0)
2386 goto cleanup;
2387
2388 if (virLXCControllerSetupUserns(ctrl) < 0)
2389 goto cleanup;
2390
2391 if (virLXCControllerMoveInterfaces(ctrl) < 0)
2392 goto cleanup;
2393
2394 if (virLXCControllerStartFuse(ctrl) < 0)
2395 goto cleanup;
2396
2397 if (lxcContainerSendContinue(control[0]) < 0) {
2398 virReportSystemError(errno, "%s",
2399 _("Unable to send container continue message"));
2400 goto cleanup;
2401 }
2402
2403 if (lxcContainerWaitForContinue(containerhandshake[0]) < 0) {
2404 virReportSystemError(errno, "%s",
2405 _("error receiving signal from container"));
2406 goto cleanup;
2407 }
2408
2409 /* ...and reduce our privileges */
2410 if (lxcControllerClearCapabilities() < 0)
2411 goto cleanup;
2412
2413 for (i = 0; i < ctrl->nconsoles; i++)
2414 if (virLXCControllerConsoleSetNonblocking(&(ctrl->consoles[i])) < 0)
2415 goto cleanup;
2416
2417 /* Allow daemon to connect to the monitor. */
2418 if (virLXCControllerDaemonHandshakeCont(ctrl) < 0)
2419 goto cleanup;
2420
2421 /* and preemptively close handshakeFds */
2422 for (i = 0; i < G_N_ELEMENTS(ctrl->handshakeFds); i++)
2423 VIR_FORCE_CLOSE(ctrl->handshakeFds[i]);
2424
2425 /* We must not hold open a dbus connection for life
2426 * of LXC instance, since dbus-daemon is limited to
2427 * only a few 100 connections by default
2428 */
2429 virGDBusCloseSystemBus();
2430
2431 rc = virLXCControllerMain(ctrl);
2432
2433 virLXCControllerEventSendExit(ctrl, rc);
2434
2435 cleanup:
2436 VIR_FORCE_CLOSE(control[0]);
2437 VIR_FORCE_CLOSE(control[1]);
2438 VIR_FORCE_CLOSE(containerhandshake[0]);
2439 VIR_FORCE_CLOSE(containerhandshake[1]);
2440
2441 for (i = 0; i < ctrl->nconsoles; i++)
2442 g_free(containerTTYPaths[i]);
2443 g_free(containerTTYPaths);
2444
2445 virLXCControllerStopInit(ctrl);
2446
2447 return rc;
2448 }
2449
2450
2451 static int
parseFDPair(const char * arg,int (* fd)[2])2452 parseFDPair(const char *arg,
2453 int (*fd)[2])
2454 {
2455 g_auto(GStrv) fds = NULL;
2456
2457 fds = g_strsplit(arg, ":", 0);
2458
2459 if (fds[0] == NULL || fds[1] == NULL || fds[2] != NULL ||
2460 virStrToLong_i(fds[0], NULL, 10, &(*fd)[0]) < 0 ||
2461 virStrToLong_i(fds[1], NULL, 10, &(*fd)[1]) < 0) {
2462 fprintf(stderr, "malformed --handshakefds argument '%s'",
2463 optarg);
2464 return -1;
2465 }
2466
2467 return 0;
2468 }
2469
2470
main(int argc,char * argv[])2471 int main(int argc, char *argv[])
2472 {
2473 pid_t pid;
2474 int rc = -1;
2475 const char *name = NULL;
2476 size_t nveths = 0;
2477 char **veths = NULL;
2478 int ns_fd[VIR_LXC_DOMAIN_NAMESPACE_LAST];
2479 int handshakeFds[2] = { -1, -1 };
2480 bool bg = false;
2481 const struct option options[] = {
2482 { "background", 0, NULL, 'b' },
2483 { "name", 1, NULL, 'n' },
2484 { "veth", 1, NULL, 'v' },
2485 { "console", 1, NULL, 'c' },
2486 { "passfd", 1, NULL, 'p' },
2487 { "handshakefds", 1, NULL, 's' },
2488 { "security", 1, NULL, 'S' },
2489 { "share-net", 1, NULL, 'N' },
2490 { "share-ipc", 1, NULL, 'I' },
2491 { "share-uts", 1, NULL, 'U' },
2492 { "help", 0, NULL, 'h' },
2493 { 0, 0, 0, 0 },
2494 };
2495 g_autofree int *ttyFDs = NULL;
2496 size_t nttyFDs = 0;
2497 g_autofree int *passFDs = NULL;
2498 size_t npassFDs = 0;
2499 virLXCController *ctrl = NULL;
2500 size_t i;
2501 const char *securityDriver = "none";
2502
2503 for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++)
2504 ns_fd[i] = -1;
2505
2506 if (virGettextInitialize() < 0 ||
2507 virErrorInitialize() < 0) {
2508 fprintf(stderr, _("%s: initialization failed\n"), argv[0]);
2509 exit(EXIT_FAILURE);
2510 }
2511
2512 /* Initialize logging */
2513 virLogSetFromEnv();
2514
2515 while (1) {
2516 int c;
2517
2518 c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:",
2519 options, NULL);
2520
2521 if (c == -1)
2522 break;
2523
2524 switch (c) {
2525 case 'b':
2526 bg = true;
2527 break;
2528
2529 case 'n':
2530 name = optarg;
2531 break;
2532
2533 case 'v':
2534 veths = g_renew(char *, veths, nveths+1);
2535 veths[nveths++] = g_strdup(optarg);
2536 break;
2537
2538 case 'c':
2539 ttyFDs = g_renew(int, ttyFDs, nttyFDs + 1);
2540 if (virStrToLong_i(optarg, NULL, 10, &ttyFDs[nttyFDs++]) < 0) {
2541 fprintf(stderr, "malformed --console argument '%s'", optarg);
2542 goto cleanup;
2543 }
2544 break;
2545
2546 case 'p':
2547 passFDs = g_renew(int, passFDs, npassFDs + 1);
2548 if (virStrToLong_i(optarg, NULL, 10, &passFDs[npassFDs++]) < 0) {
2549 fprintf(stderr, "malformed --passfd argument '%s'", optarg);
2550 goto cleanup;
2551 }
2552 break;
2553
2554 case 's':
2555 if (parseFDPair(optarg, &handshakeFds) < 0)
2556 goto cleanup;
2557 break;
2558
2559 case 'N':
2560 if (virStrToLong_i(optarg, NULL, 10, &ns_fd[VIR_LXC_DOMAIN_NAMESPACE_SHARENET]) < 0) {
2561 fprintf(stderr, "malformed --share-net argument '%s'",
2562 optarg);
2563 goto cleanup;
2564 }
2565 break;
2566
2567 case 'I':
2568 if (virStrToLong_i(optarg, NULL, 10, &ns_fd[VIR_LXC_DOMAIN_NAMESPACE_SHAREIPC]) < 0) {
2569 fprintf(stderr, "malformed --share-ipc argument '%s'",
2570 optarg);
2571 goto cleanup;
2572 }
2573 break;
2574
2575 case 'U':
2576 if (virStrToLong_i(optarg, NULL, 10, &ns_fd[VIR_LXC_DOMAIN_NAMESPACE_SHAREUTS]) < 0) {
2577 fprintf(stderr, "malformed --share-uts argument '%s'",
2578 optarg);
2579 goto cleanup;
2580 }
2581 break;
2582
2583 case 'S':
2584 securityDriver = optarg;
2585 break;
2586
2587 case 'h':
2588 case '?':
2589 fprintf(stderr, "\n");
2590 fprintf(stderr, "syntax: %s [OPTIONS]\n", argv[0]);
2591 fprintf(stderr, "\n");
2592 fprintf(stderr, "Options\n");
2593 fprintf(stderr, "\n");
2594 fprintf(stderr, " -b, --background\n");
2595 fprintf(stderr, " -n NAME, --name NAME\n");
2596 fprintf(stderr, " -c FD, --console FD\n");
2597 fprintf(stderr, " -v VETH, --veth VETH\n");
2598 fprintf(stderr, " -s FD:FD, --handshakefds FD:FD (read:write)\n");
2599 fprintf(stderr, " -S NAME, --security NAME\n");
2600 fprintf(stderr, " -N FD, --share-net FD\n");
2601 fprintf(stderr, " -I FD, --share-ipc FD\n");
2602 fprintf(stderr, " -U FD, --share-uts FD\n");
2603 fprintf(stderr, " -h, --help\n");
2604 fprintf(stderr, "\n");
2605 rc = 0;
2606 goto cleanup;
2607 }
2608 }
2609
2610 if (name == NULL) {
2611 fprintf(stderr, "%s: missing --name argument for configuration\n", argv[0]);
2612 goto cleanup;
2613 }
2614
2615 if (handshakeFds[0] < 0 || handshakeFds[1] < 0) {
2616 fprintf(stderr, "%s: missing --handshakefds argument for container PTY\n",
2617 argv[0]);
2618 goto cleanup;
2619 }
2620
2621 if (geteuid() != 0) {
2622 fprintf(stderr, "%s: must be run as the 'root' user\n", argv[0]);
2623 goto cleanup;
2624 }
2625
2626 virEventRegisterDefaultImpl();
2627
2628 virGDBusSetSharedBus(false);
2629
2630 if (!(ctrl = virLXCControllerNew(name)))
2631 goto cleanup;
2632
2633 memcpy(&ctrl->handshakeFds, &handshakeFds, sizeof(handshakeFds));
2634
2635 if (!(ctrl->securityManager = virSecurityManagerNew(securityDriver,
2636 LXC_DRIVER_NAME, 0)))
2637 goto cleanup;
2638
2639 if (ctrl->def->seclabels) {
2640 VIR_DEBUG("Security model %s type %s label %s imagelabel %s",
2641 NULLSTR(ctrl->def->seclabels[0]->model),
2642 virDomainSeclabelTypeToString(ctrl->def->seclabels[0]->type),
2643 NULLSTR(ctrl->def->seclabels[0]->label),
2644 NULLSTR(ctrl->def->seclabels[0]->imagelabel));
2645 } else {
2646 VIR_DEBUG("Security model not initialized");
2647 }
2648
2649 ctrl->veths = veths;
2650 ctrl->nveths = nveths;
2651
2652 ctrl->passFDs = passFDs;
2653 ctrl->npassFDs = npassFDs;
2654
2655 for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) {
2656 if (ns_fd[i] != -1) {
2657 if (!ctrl->nsFDs) {/*allocate only once */
2658 size_t j = 0;
2659 ctrl->nsFDs = g_new0(int, VIR_LXC_DOMAIN_NAMESPACE_LAST);
2660 for (j = 0; j < VIR_LXC_DOMAIN_NAMESPACE_LAST; j++)
2661 ctrl->nsFDs[j] = -1;
2662 }
2663 ctrl->nsFDs[i] = ns_fd[i];
2664 }
2665 }
2666
2667 for (i = 0; i < nttyFDs; i++) {
2668 if (virLXCControllerAddConsole(ctrl, ttyFDs[i]) < 0)
2669 goto cleanup;
2670 ttyFDs[i] = -1;
2671 }
2672
2673 if (virLXCControllerValidateNICs(ctrl) < 0)
2674 goto cleanup;
2675
2676 if (virLXCControllerGetNICIndexes(ctrl) < 0)
2677 goto cleanup;
2678
2679 if (virLXCControllerValidateConsoles(ctrl) < 0)
2680 goto cleanup;
2681
2682 if (virLXCControllerSetupServer(ctrl) < 0)
2683 goto cleanup;
2684
2685 if (bg) {
2686 if ((pid = fork()) < 0)
2687 goto cleanup;
2688
2689 if (pid > 0) {
2690 if ((rc = virPidFileWrite(LXC_STATE_DIR, name, pid)) < 0) {
2691 virReportSystemError(-rc,
2692 _("Unable to write pid file '%s/%s.pid'"),
2693 LXC_STATE_DIR, name);
2694 _exit(1);
2695 }
2696
2697 /* First child now exits, allowing original caller
2698 * (ie libvirtd's LXC driver to complete their
2699 * waitpid & continue */
2700 _exit(0);
2701 }
2702
2703 /* Don't hold on to any cwd we inherit from libvirtd either */
2704 if (chdir("/") < 0) {
2705 virReportSystemError(errno, "%s",
2706 _("Unable to change to root dir"));
2707 goto cleanup;
2708 }
2709
2710 if (setsid() < 0) {
2711 virReportSystemError(errno, "%s",
2712 _("Unable to become session leader"));
2713 goto cleanup;
2714 }
2715 }
2716
2717 rc = virLXCControllerRun(ctrl);
2718
2719 cleanup:
2720 if (rc < 0) {
2721 fprintf(stderr,
2722 _("Failure in libvirt_lxc startup: %s\n"),
2723 virGetLastErrorMessage());
2724 }
2725
2726 virPidFileDelete(LXC_STATE_DIR, name);
2727 if (ctrl)
2728 virLXCControllerDeleteInterfaces(ctrl);
2729 for (i = 0; i < nttyFDs; i++)
2730 VIR_FORCE_CLOSE(ttyFDs[i]);
2731 for (i = 0; i < npassFDs; i++)
2732 VIR_FORCE_CLOSE(passFDs[i]);
2733
2734 virLXCControllerFree(ctrl);
2735
2736 return rc < 0? EXIT_FAILURE : EXIT_SUCCESS;
2737 }
2738