xref: /openbsd/usr.sbin/vmd/config.c (revision a7eff89f)
1 /*	$OpenBSD: config.c,v 1.54 2018/10/26 11:24:45 reyk Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/queue.h>
21 #include <sys/time.h>
22 #include <sys/uio.h>
23 #include <sys/stat.h>
24 #include <sys/socket.h>
25 
26 #include <net/if.h>
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <termios.h>
31 #include <unistd.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <fcntl.h>
35 #include <util.h>
36 #include <errno.h>
37 #include <imsg.h>
38 
39 #include "proc.h"
40 #include "vmd.h"
41 
42 /* Supported bridge types */
43 const char *vmd_descsw[] = { "switch", "bridge", NULL };
44 
45 int
46 config_init(struct vmd *env)
47 {
48 	struct privsep	*ps = &env->vmd_ps;
49 	unsigned int	 what;
50 
51 	/* Global configuration */
52 	ps->ps_what[PROC_PARENT] = CONFIG_ALL;
53 	ps->ps_what[PROC_VMM] = CONFIG_VMS;
54 
55 	if (host(VMD_DHCP_PREFIX, &env->vmd_cfg.cfg_localprefix) == -1)
56 		return (-1);
57 
58 	/* Other configuration */
59 	what = ps->ps_what[privsep_process];
60 	if (what & CONFIG_VMS) {
61 		if ((env->vmd_vms = calloc(1, sizeof(*env->vmd_vms))) == NULL)
62 			return (-1);
63 		TAILQ_INIT(env->vmd_vms);
64 	}
65 	if (what & CONFIG_SWITCHES) {
66 		if ((env->vmd_switches = calloc(1,
67 		    sizeof(*env->vmd_switches))) == NULL)
68 			return (-1);
69 		TAILQ_INIT(env->vmd_switches);
70 	}
71 	if (what & CONFIG_USERS) {
72 		if ((env->vmd_users = calloc(1,
73 		    sizeof(*env->vmd_users))) == NULL)
74 			return (-1);
75 		TAILQ_INIT(env->vmd_users);
76 	}
77 
78 	return (0);
79 }
80 
81 void
82 config_purge(struct vmd *env, unsigned int reset)
83 {
84 	struct privsep		*ps = &env->vmd_ps;
85 	struct vmd_vm		*vm;
86 	struct vmd_switch	*vsw;
87 	unsigned int		 what;
88 
89 	DPRINTF("%s: %s purging vms and switches",
90 	    __func__, ps->ps_title[privsep_process]);
91 
92 	/* Reset global configuration (prefix was verified before) */
93 	(void)host(VMD_DHCP_PREFIX, &env->vmd_cfg.cfg_localprefix);
94 
95 	/* Reset other configuration */
96 	what = ps->ps_what[privsep_process] & reset;
97 	if (what & CONFIG_VMS && env->vmd_vms != NULL) {
98 		while ((vm = TAILQ_FIRST(env->vmd_vms)) != NULL) {
99 			vm_remove(vm, __func__);
100 		}
101 		env->vmd_nvm = 0;
102 	}
103 	if (what & CONFIG_SWITCHES && env->vmd_switches != NULL) {
104 		while ((vsw = TAILQ_FIRST(env->vmd_switches)) != NULL)
105 			switch_remove(vsw);
106 		env->vmd_nswitches = 0;
107 	}
108 }
109 
110 int
111 config_setconfig(struct vmd *env)
112 {
113 	struct privsep	*ps = &env->vmd_ps;
114 	unsigned int	 id;
115 
116 	DPRINTF("%s: setting config", __func__);
117 
118 	for (id = 0; id < PROC_MAX; id++) {
119 		if (id == privsep_process)
120 			continue;
121 		proc_compose(ps, id, IMSG_VMDOP_CONFIG, &env->vmd_cfg,
122 		    sizeof(env->vmd_cfg));
123 	}
124 
125 	return (0);
126 }
127 
128 int
129 config_getconfig(struct vmd *env, struct imsg *imsg)
130 {
131 	struct privsep	*ps = &env->vmd_ps;
132 
133 	log_debug("%s: %s retrieving config",
134 	    __func__, ps->ps_title[privsep_process]);
135 
136 	IMSG_SIZE_CHECK(imsg, &env->vmd_cfg);
137 	memcpy(&env->vmd_cfg, imsg->data, sizeof(env->vmd_cfg));
138 
139 	return (0);
140 }
141 
142 int
143 config_setreset(struct vmd *env, unsigned int reset)
144 {
145 	struct privsep	*ps = &env->vmd_ps;
146 	unsigned int	 id;
147 
148 	DPRINTF("%s: resetting state", __func__);
149 
150 	for (id = 0; id < PROC_MAX; id++) {
151 		if ((reset & ps->ps_what[id]) == 0 ||
152 		    id == privsep_process)
153 			continue;
154 		proc_compose(ps, id, IMSG_CTL_RESET, &reset, sizeof(reset));
155 	}
156 
157 	return (0);
158 }
159 
160 int
161 config_getreset(struct vmd *env, struct imsg *imsg)
162 {
163 	unsigned int	 mode;
164 
165 	IMSG_SIZE_CHECK(imsg, &mode);
166 	memcpy(&mode, imsg->data, sizeof(mode));
167 
168 	log_debug("%s: %s resetting state",
169 	    __func__, env->vmd_ps.ps_title[privsep_process]);
170 
171 	config_purge(env, mode);
172 
173 	return (0);
174 }
175 
176 int
177 config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid)
178 {
179 	int diskfds[VMM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK];
180 	struct vmd_if		*vif;
181 	struct vmop_create_params *vmc = &vm->vm_params;
182 	struct vm_create_params	*vcp = &vmc->vmc_params;
183 	unsigned int		 i, j;
184 	int			 fd = -1, vmboot = 0;
185 	int			 kernfd = -1;
186 	int			*tapfds = NULL;
187 	int			 cdromfd = -1;
188 	int			 saved_errno = 0;
189 	int			 n = 0, aflags, oflags;
190 	char			 ifname[IF_NAMESIZE], *s;
191 	char			 path[PATH_MAX];
192 	char			 base[PATH_MAX];
193 	unsigned int		 unit;
194 	struct timeval		 tv, rate, since_last;
195 
196 	errno = 0;
197 
198 	if (vm->vm_running) {
199 		log_warnx("%s: vm is already running", __func__);
200 		errno = EALREADY;
201 		return (-1);
202 	}
203 
204 	/* increase the user reference counter and check user limits */
205 	if (vm->vm_user != NULL && user_get(vm->vm_user->usr_id.uid) != NULL) {
206 		user_inc(vcp, vm->vm_user, 1);
207 		if (user_checklimit(vm->vm_user, vcp) == -1) {
208 			errno = EPERM;
209 			goto fail;
210 		}
211 	}
212 
213 	/*
214 	 * Rate-limit the VM so that it cannot restart in a loop:
215 	 * if the VM restarts after less than VM_START_RATE_SEC seconds,
216 	 * we increment the limit counter.  After VM_START_RATE_LIMIT
217 	 * of suchs fast reboots the VM is stopped.
218 	 */
219 	getmonotime(&tv);
220 	if (vm->vm_start_tv.tv_sec) {
221 		timersub(&tv, &vm->vm_start_tv, &since_last);
222 
223 		rate.tv_sec = VM_START_RATE_SEC;
224 		rate.tv_usec = 0;
225 		if (timercmp(&since_last, &rate, <))
226 			vm->vm_start_limit++;
227 		else {
228 			/* Reset counter */
229 			vm->vm_start_limit = 0;
230 		}
231 
232 		log_debug("%s: vm %u restarted after %lld.%ld seconds,"
233 		    " limit %d/%d", __func__, vcp->vcp_id, since_last.tv_sec,
234 		    since_last.tv_usec, vm->vm_start_limit,
235 		    VM_START_RATE_LIMIT);
236 
237 		if (vm->vm_start_limit >= VM_START_RATE_LIMIT) {
238 			log_warnx("%s: vm %u restarted too quickly",
239 			    __func__, vcp->vcp_id);
240 			errno = EPERM;
241 			goto fail;
242 		}
243 	}
244 	vm->vm_start_tv = tv;
245 
246 	for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++)
247 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
248 			diskfds[i][j] = -1;
249 
250 	tapfds = reallocarray(NULL, vcp->vcp_nnics, sizeof(*tapfds));
251 	if (tapfds == NULL) {
252 		log_warn("%s: can't allocate tap fds", __func__);
253 		goto fail;
254 	}
255 	for (i = 0; i < vcp->vcp_nnics; i++)
256 		tapfds[i] = -1;
257 
258 	vm->vm_peerid = peerid;
259 	vm->vm_uid = uid;
260 
261 	if (!vm->vm_received) {
262 		if (strlen(vcp->vcp_kernel)) {
263 			/*
264 			 * Boot kernel from disk image if path matches the
265 			 * root disk.
266 			 */
267 			if (vcp->vcp_ndisks &&
268 			    strcmp(vcp->vcp_kernel, vcp->vcp_disks[0]) == 0)
269 				vmboot = 1;
270 			/* Open external kernel for child */
271 			else if ((kernfd =
272 			    open(vcp->vcp_kernel, O_RDONLY)) == -1) {
273 				log_warn("%s: can't open kernel or BIOS "
274 				    "boot image %s", __func__, vcp->vcp_kernel);
275 				goto fail;
276 			}
277 		}
278 
279 		/*
280 		 * Try to open the default BIOS image if no kernel/BIOS has been
281 		 * specified.  The BIOS is an external firmware file that is
282 		 * typically distributed separately due to an incompatible
283 		 * license.
284 		 */
285 		if (kernfd == -1 && !vmboot &&
286 		    (kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) {
287 			log_warn("%s: can't open %s", __func__,
288 			    VM_DEFAULT_BIOS);
289 			errno = VMD_BIOS_MISSING;
290 			goto fail;
291 		}
292 
293 		if (!vmboot && vm_checkaccess(kernfd,
294 		    vmc->vmc_checkaccess & VMOP_CREATE_KERNEL,
295 		    uid, R_OK) == -1) {
296 			log_warnx("vm \"%s\" no read access to kernel %s",
297 			    vcp->vcp_name, vcp->vcp_kernel);
298 			errno = EPERM;
299 			goto fail;
300 		}
301 	}
302 
303 	/* Open CDROM image for child */
304 	if (strlen(vcp->vcp_cdrom)) {
305 		/* Stat cdrom to ensure it is a regular file */
306 		if ((cdromfd =
307 		    open(vcp->vcp_cdrom, O_RDONLY)) == -1) {
308 			log_warn("%s: can't open cdrom %s", __func__,
309 			    vcp->vcp_cdrom);
310 			errno = VMD_CDROM_MISSING;
311 			goto fail;
312 		}
313 
314 		if (vm_checkaccess(cdromfd,
315 		    vmc->vmc_checkaccess & VMOP_CREATE_CDROM,
316 		    uid, R_OK) == -1) {
317 			log_warnx("vm \"%s\" no read access to cdrom %s",
318 			    vcp->vcp_name, vcp->vcp_cdrom);
319 			errno = EPERM;
320 			goto fail;
321 		}
322 	}
323 
324 	/* Open disk images for child */
325 	for (i = 0 ; i < vcp->vcp_ndisks; i++) {
326 		if (strlcpy(path, vcp->vcp_disks[i], sizeof(path))
327 		   >= sizeof(path))
328 			log_warnx("%s, disk path too long", __func__);
329 		memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases));
330 		oflags = O_RDWR|O_EXLOCK|O_NONBLOCK;
331 		aflags = R_OK|W_OK;
332 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
333 			/* Stat disk[i] to ensure it is a regular file */
334 			if ((diskfds[i][j] = open(path, oflags)) == -1) {
335 				log_warn("%s: can't open disk %s", __func__,
336 				    vcp->vcp_disks[i]);
337 				errno = VMD_DISK_MISSING;
338 				goto fail;
339 			}
340 
341 			if (vm_checkaccess(diskfds[i][j],
342 			    vmc->vmc_checkaccess & VMOP_CREATE_DISK,
343 			    uid, aflags) == -1) {
344 				log_warnx("vm \"%s\" unable to access "
345 				    "disk %s", vcp->vcp_name, path);
346 				errno = EPERM;
347 				goto fail;
348 			}
349 
350 			/*
351 			 * Clear the write and exclusive flags for base images.
352 			 * All writes should go to the top image, allowing them
353 			 * to be shared.
354 			 */
355 			oflags = O_RDONLY|O_NONBLOCK;
356 			aflags = R_OK;
357 			n = virtio_get_base(diskfds[i][j], base, sizeof(base),
358 			    vmc->vmc_disktypes[i], path);
359 			if (n == 0)
360 				break;
361 			if (n == -1) {
362 				log_warnx("vm \"%s\" unable to read "
363 				    "base %s for disk %s", vcp->vcp_name,
364 				    base, vcp->vcp_disks[i]);
365 				goto fail;
366 			}
367 			(void)strlcpy(path, base, sizeof(path));
368 		}
369 	}
370 
371 	/* Open network interfaces */
372 	for (i = 0 ; i < vcp->vcp_nnics; i++) {
373 		vif = &vm->vm_ifs[i];
374 
375 		/* Check if the user has requested a specific tap(4) */
376 		s = vmc->vmc_ifnames[i];
377 		if (*s != '\0' && strcmp("tap", s) != 0) {
378 			if (priv_getiftype(s, ifname, &unit) == -1 ||
379 			    strcmp(ifname, "tap") != 0) {
380 				log_warnx("%s: invalid tap name %s",
381 				    __func__, s);
382 				errno = EINVAL;
383 				goto fail;
384 			}
385 		} else
386 			s = NULL;
387 
388 		/*
389 		 * Either open the requested tap(4) device or get
390 		 * the next available one.
391 		 */
392 		if (s != NULL) {
393 			snprintf(path, PATH_MAX, "/dev/%s", s);
394 			tapfds[i] = open(path, O_RDWR | O_NONBLOCK);
395 		} else {
396 			tapfds[i] = opentap(ifname);
397 			s = ifname;
398 		}
399 		if (tapfds[i] == -1) {
400 			log_warn("%s: can't open tap %s", __func__, s);
401 			goto fail;
402 		}
403 		if ((vif->vif_name = strdup(s)) == NULL) {
404 			log_warn("%s: can't save tap %s", __func__, s);
405 			goto fail;
406 		}
407 
408 		/* Check if the the interface is attached to a switch */
409 		s = vmc->vmc_ifswitch[i];
410 		if (*s != '\0') {
411 			if ((vif->vif_switch = strdup(s)) == NULL) {
412 				log_warn("%s: can't save switch %s",
413 				    __func__, s);
414 				goto fail;
415 			}
416 		}
417 
418 		/* Check if the the interface is assigned to a group */
419 		s = vmc->vmc_ifgroup[i];
420 		if (*s != '\0') {
421 			if ((vif->vif_group = strdup(s)) == NULL) {
422 				log_warn("%s: can't save group %s",
423 				    __func__, s);
424 				goto fail;
425 			}
426 		}
427 
428 		/* non-default rdomain (requires VMIFF_RDOMAIN below) */
429 		vif->vif_rdomain = vmc->vmc_ifrdomain[i];
430 
431 		/* Set the interface status */
432 		vif->vif_flags =
433 		    vmc->vmc_ifflags[i] & (VMIFF_UP|VMIFF_OPTMASK);
434 	}
435 
436 	/* Open TTY */
437 	if (vm->vm_ttyname == NULL) {
438 		if (vm_opentty(vm) == -1) {
439 			log_warn("%s: can't open tty %s", __func__,
440 			    vm->vm_ttyname == NULL ? "" : vm->vm_ttyname);
441 			goto fail;
442 		}
443 	}
444 	if ((fd = dup(vm->vm_tty)) == -1) {
445 		log_warn("%s: can't re-open tty %s", __func__, vm->vm_ttyname);
446 		goto fail;
447 	}
448 
449 	/* Send VM information */
450 	if (vm->vm_received)
451 		proc_compose_imsg(ps, PROC_VMM, -1,
452 		    IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd,  vmc,
453 		    sizeof(struct vmop_create_params));
454 	else
455 		proc_compose_imsg(ps, PROC_VMM, -1,
456 		    IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, kernfd,
457 		    vmc, sizeof(*vmc));
458 
459 	if (strlen(vcp->vcp_cdrom))
460 		proc_compose_imsg(ps, PROC_VMM, -1,
461 		    IMSG_VMDOP_START_VM_CDROM, vm->vm_vmid, cdromfd,
462 		    NULL, 0);
463 
464 	for (i = 0; i < vcp->vcp_ndisks; i++) {
465 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
466 			if (diskfds[i][j] == -1)
467 				break;
468 			proc_compose_imsg(ps, PROC_VMM, -1,
469 			    IMSG_VMDOP_START_VM_DISK, vm->vm_vmid,
470 			    diskfds[i][j], &i, sizeof(i));
471 		}
472 	}
473 	for (i = 0; i < vcp->vcp_nnics; i++) {
474 		proc_compose_imsg(ps, PROC_VMM, -1,
475 		    IMSG_VMDOP_START_VM_IF, vm->vm_vmid, tapfds[i],
476 		    &i, sizeof(i));
477 	}
478 
479 	if (!vm->vm_received)
480 		proc_compose_imsg(ps, PROC_VMM, -1,
481 		    IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd,  NULL, 0);
482 
483 	free(tapfds);
484 
485 	vm->vm_running = 1;
486 	return (0);
487 
488  fail:
489 	saved_errno = errno;
490 	log_warnx("%s: failed to start vm %s", __func__, vcp->vcp_name);
491 
492 	if (kernfd != -1)
493 		close(kernfd);
494 	if (cdromfd != -1)
495 		close(cdromfd);
496 	for (i = 0; i < vcp->vcp_ndisks; i++)
497 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
498 			if (diskfds[i][j] != -1)
499 				close(diskfds[i][j]);
500 	if (tapfds != NULL) {
501 		for (i = 0; i < vcp->vcp_nnics; i++)
502 			close(tapfds[i]);
503 		free(tapfds);
504 	}
505 
506 	if (vm->vm_from_config) {
507 		vm_stop(vm, 0, __func__);
508 	} else {
509 		vm_remove(vm, __func__);
510 	}
511 	errno = saved_errno;
512 	if (errno == 0)
513 		errno = EINVAL;
514 	return (-1);
515 }
516 
517 int
518 config_getvm(struct privsep *ps, struct imsg *imsg)
519 {
520 	struct vmop_create_params	 vmc;
521 	struct vmd_vm			*vm;
522 
523 	IMSG_SIZE_CHECK(imsg, &vmc);
524 	memcpy(&vmc, imsg->data, sizeof(vmc));
525 
526 	errno = 0;
527 	if (vm_register(ps, &vmc, &vm, imsg->hdr.peerid, 0) == -1)
528 		goto fail;
529 
530 	/* If the fd is -1, the kernel will be searched on the disk */
531 	vm->vm_kernel = imsg->fd;
532 	vm->vm_running = 1;
533 	vm->vm_peerid = (uint32_t)-1;
534 
535 	return (0);
536 
537  fail:
538 	if (imsg->fd != -1) {
539 		close(imsg->fd);
540 		imsg->fd = -1;
541 	}
542 
543 	vm_remove(vm, __func__);
544 	if (errno == 0)
545 		errno = EINVAL;
546 
547 	return (-1);
548 }
549 
550 int
551 config_getdisk(struct privsep *ps, struct imsg *imsg)
552 {
553 	struct vmd_vm	*vm;
554 	unsigned int	 n, idx;
555 
556 	errno = 0;
557 	if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
558 		errno = ENOENT;
559 		return (-1);
560 	}
561 
562 	IMSG_SIZE_CHECK(imsg, &n);
563 	memcpy(&n, imsg->data, sizeof(n));
564 
565 	if (n >= vm->vm_params.vmc_params.vcp_ndisks || imsg->fd == -1) {
566 		log_warnx("invalid disk id");
567 		errno = EINVAL;
568 		return (-1);
569 	}
570 	idx = vm->vm_params.vmc_diskbases[n]++;
571 	if (idx >= VM_MAX_BASE_PER_DISK) {
572 		log_warnx("too many bases for disk");
573 		errno = EINVAL;
574 		return (-1);
575 	}
576 	vm->vm_disks[n][idx] = imsg->fd;
577 	return (0);
578 }
579 
580 int
581 config_getif(struct privsep *ps, struct imsg *imsg)
582 {
583 	struct vmd_vm	*vm;
584 	unsigned int	 n;
585 
586 	errno = 0;
587 	if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
588 		errno = ENOENT;
589 		return (-1);
590 	}
591 
592 	IMSG_SIZE_CHECK(imsg, &n);
593 	memcpy(&n, imsg->data, sizeof(n));
594 	if (n >= vm->vm_params.vmc_params.vcp_nnics ||
595 	    vm->vm_ifs[n].vif_fd != -1 || imsg->fd == -1) {
596 		log_warnx("invalid interface id");
597 		goto fail;
598 	}
599 	vm->vm_ifs[n].vif_fd = imsg->fd;
600 	return (0);
601  fail:
602 	if (imsg->fd != -1)
603 		close(imsg->fd);
604 	errno = EINVAL;
605 	return (-1);
606 }
607 
608 int
609 config_getcdrom(struct privsep *ps, struct imsg *imsg)
610 {
611 	struct vmd_vm	*vm;
612 
613 	errno = 0;
614 	if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
615 		errno = ENOENT;
616 		return (-1);
617 	}
618 
619 	if (imsg->fd == -1) {
620 		log_warnx("invalid cdrom id");
621 		goto fail;
622 	}
623 
624 	vm->vm_cdrom = imsg->fd;
625 	return (0);
626  fail:
627 	if (imsg->fd != -1)
628 		close(imsg->fd);
629 	errno = EINVAL;
630 	return (-1);
631 }
632