xref: /openbsd/usr.sbin/vmd/vmd.c (revision e7a1ead9)
1 /*	$OpenBSD: vmd.c,v 1.163 2024/11/06 14:26:20 bluhm Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/queue.h>
21 #include <sys/stat.h>
22 #include <sys/sysctl.h>
23 #include <sys/tty.h>
24 #include <sys/ttycom.h>
25 #include <sys/ioctl.h>
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <event.h>
32 #include <fcntl.h>
33 #include <pwd.h>
34 #include <signal.h>
35 #include <syslog.h>
36 #include <unistd.h>
37 #include <util.h>
38 #include <ctype.h>
39 #include <grp.h>
40 
41 #include <dev/vmm/vmm.h>
42 
43 #include "proc.h"
44 #include "atomicio.h"
45 #include "vmd.h"
46 
47 __dead void usage(void);
48 
49 int	 main(int, char **);
50 int	 vmd_configure(void);
51 void	 vmd_sighdlr(int sig, short event, void *arg);
52 void	 vmd_shutdown(void);
53 int	 vmd_dispatch_control(int, struct privsep_proc *, struct imsg *);
54 int	 vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *);
55 int	 vmd_dispatch_agentx(int, struct privsep_proc *, struct imsg *);
56 int	 vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *);
57 int	 vmd_check_vmh(struct vm_dump_header *);
58 
59 int	 vm_instance(struct privsep *, struct vmd_vm **,
60 	    struct vmop_create_params *, uid_t);
61 int	 vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t);
62 int	 vm_claimid(const char *, int, uint32_t *);
63 void	 start_vm_batch(int, short, void*);
64 
65 static inline void vm_terminate(struct vmd_vm *, const char *);
66 
67 struct vmd	*env;
68 
69 static struct privsep_proc procs[] = {
70 	/* Keep "priv" on top as procs[0] */
71 	{ "priv",	PROC_PRIV,	vmd_dispatch_priv, priv },
72 	{ "control",	PROC_CONTROL,	vmd_dispatch_control, control },
73 	{ "vmm",	PROC_VMM,	vmd_dispatch_vmm, vmm,
74 	  vmm_shutdown, "/" },
75 	{ "agentx", 	PROC_AGENTX,	vmd_dispatch_agentx, vm_agentx,
76 	  vm_agentx_shutdown, "/" }
77 };
78 
79 enum privsep_procid privsep_process;
80 
81 struct event staggered_start_timer;
82 
83 /* For the privileged process */
84 static struct privsep_proc *proc_priv = &procs[0];
85 static struct passwd proc_privpw;
86 static const uint8_t zero_mac[ETHER_ADDR_LEN];
87 
88 const char		 default_conffile[] = VMD_CONF;
89 const char		*conffile = default_conffile;
90 
91 int
vmd_dispatch_control(int fd,struct privsep_proc * p,struct imsg * imsg)92 vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg)
93 {
94 	struct privsep			*ps = p->p_ps;
95 	int				 res = 0, ret = 0, cmd = 0, verbose;
96 	int				 ifd;
97 	unsigned int			 v = 0, flags;
98 	struct vmop_create_params	 vmc;
99 	struct vmop_id			 vid;
100 	struct vmop_result		 vmr;
101 	struct vm_dump_header		 vmh;
102 	struct vmd_vm			*vm = NULL;
103 	char				*str = NULL;
104 	uint32_t			 id = 0;
105 	struct control_sock		*rcs;
106 
107 	switch (imsg->hdr.type) {
108 	case IMSG_VMDOP_START_VM_REQUEST:
109 		IMSG_SIZE_CHECK(imsg, &vmc);
110 		memcpy(&vmc, imsg->data, sizeof(vmc));
111 		vmc.vmc_kernel = imsg_get_fd(imsg);
112 
113 		/* Try registering our VM in our list of known VMs. */
114 		if (vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid)) {
115 			res = errno;
116 
117 			/* Did we have a failure during lookup of a parent? */
118 			if (vm == NULL) {
119 				cmd = IMSG_VMDOP_START_VM_RESPONSE;
120 				break;
121 			}
122 
123 			/* Does the VM already exist? */
124 			if (res == EALREADY) {
125 				/* Is it already running? */
126 				if (vm->vm_state & VM_STATE_RUNNING) {
127 					cmd = IMSG_VMDOP_START_VM_RESPONSE;
128 					break;
129 				}
130 
131 				/* If not running, are our flags ok? */
132 				if (vmc.vmc_flags &&
133 				    vmc.vmc_flags != VMOP_CREATE_KERNEL) {
134 					cmd = IMSG_VMDOP_START_VM_RESPONSE;
135 					break;
136 				}
137 			}
138 			res = 0;
139 		}
140 
141 		/* Try to start the launch of the VM. */
142 		res = config_setvm(ps, vm, imsg->hdr.peerid,
143 		    vm->vm_params.vmc_owner.uid);
144 		if (res)
145 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
146 		break;
147 	case IMSG_VMDOP_WAIT_VM_REQUEST:
148 	case IMSG_VMDOP_TERMINATE_VM_REQUEST:
149 		IMSG_SIZE_CHECK(imsg, &vid);
150 		memcpy(&vid, imsg->data, sizeof(vid));
151 		flags = vid.vid_flags;
152 		cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
153 
154 		if ((id = vid.vid_id) == 0) {
155 			/* Lookup vm (id) by name */
156 			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
157 				res = ENOENT;
158 				break;
159 			}
160 			id = vm->vm_vmid;
161 		} else if ((vm = vm_getbyvmid(id)) == NULL) {
162 			res = ENOENT;
163 			break;
164 		}
165 
166 		/* Validate curent state of vm */
167 		if ((vm->vm_state & VM_STATE_SHUTDOWN) &&
168 			    (flags & VMOP_FORCE) == 0) {
169 				res = EALREADY;
170 				break;
171 		} else if (!(vm->vm_state & VM_STATE_RUNNING)) {
172 			res = EINVAL;
173 			break;
174 		} else if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) {
175 			res = EPERM;
176 			break;
177 		}
178 
179 		/* Only relay TERMINATION requests, not WAIT requests */
180 		if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) {
181 			memset(&vid, 0, sizeof(vid));
182 			vid.vid_id = id;
183 			vid.vid_flags = flags;
184 
185 			if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
186 				imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1)
187 				return (-1);
188 		}
189 		break;
190 	case IMSG_VMDOP_GET_INFO_VM_REQUEST:
191 		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
192 		break;
193 	case IMSG_VMDOP_LOAD:
194 		IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */
195 		str = get_string((uint8_t *)imsg->data,
196 		    IMSG_DATA_SIZE(imsg));
197 	case IMSG_VMDOP_RELOAD:
198 		if (vmd_reload(0, str) == -1)
199 			cmd = IMSG_CTL_FAIL;
200 		else
201 			cmd = IMSG_CTL_OK;
202 		free(str);
203 		break;
204 	case IMSG_CTL_RESET:
205 		IMSG_SIZE_CHECK(imsg, &v);
206 		memcpy(&v, imsg->data, sizeof(v));
207 		if (vmd_reload(v, NULL) == -1)
208 			cmd = IMSG_CTL_FAIL;
209 		else
210 			cmd = IMSG_CTL_OK;
211 		break;
212 	case IMSG_CTL_VERBOSE:
213 		IMSG_SIZE_CHECK(imsg, &verbose);
214 		memcpy(&verbose, imsg->data, sizeof(verbose));
215 		log_setverbose(verbose);
216 
217 		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
218 		proc_forward_imsg(ps, imsg, PROC_PRIV, -1);
219 		cmd = IMSG_CTL_OK;
220 		break;
221 	case IMSG_VMDOP_PAUSE_VM:
222 	case IMSG_VMDOP_UNPAUSE_VM:
223 		IMSG_SIZE_CHECK(imsg, &vid);
224 		memcpy(&vid, imsg->data, sizeof(vid));
225 		if (vid.vid_id == 0) {
226 			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
227 				res = ENOENT;
228 				cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM
229 				    ? IMSG_VMDOP_PAUSE_VM_RESPONSE
230 				    : IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
231 				break;
232 			} else {
233 				vid.vid_id = vm->vm_vmid;
234 			}
235 		} else if ((vm = vm_getbyid(vid.vid_id)) == NULL) {
236 			res = ENOENT;
237 			cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM
238 			    ? IMSG_VMDOP_PAUSE_VM_RESPONSE
239 			    : IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
240 			break;
241 		}
242 		if (vm_checkperm(vm, &vm->vm_params.vmc_owner,
243 		    vid.vid_uid) != 0) {
244 			res = EPERM;
245 			cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM
246 			    ? IMSG_VMDOP_PAUSE_VM_RESPONSE
247 			    : IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
248 			break;
249 		}
250 		proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
251 		    imsg->hdr.peerid, -1, &vid, sizeof(vid));
252 		break;
253 	case IMSG_VMDOP_SEND_VM_REQUEST:
254 		IMSG_SIZE_CHECK(imsg, &vid);
255 		memcpy(&vid, imsg->data, sizeof(vid));
256 		id = vid.vid_id;
257 		ifd = imsg_get_fd(imsg);
258 		if (vid.vid_id == 0) {
259 			if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
260 				res = ENOENT;
261 				cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
262 				close(ifd);
263 				break;
264 			} else {
265 				vid.vid_id = vm->vm_vmid;
266 			}
267 		} else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) {
268 			res = ENOENT;
269 			cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
270 			close(ifd);
271 			break;
272 		}
273 		vmr.vmr_id = vid.vid_id;
274 		log_debug("%s: sending fd to vmm", __func__);
275 		proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
276 		    imsg->hdr.peerid, ifd, &vid, sizeof(vid));
277 		break;
278 	case IMSG_VMDOP_RECEIVE_VM_REQUEST:
279 		IMSG_SIZE_CHECK(imsg, &vid);
280 		memcpy(&vid, imsg->data, sizeof(vid));
281 		ifd = imsg_get_fd(imsg);
282 		if (ifd == -1) {
283 			log_warnx("%s: invalid fd", __func__);
284 			return (-1);
285 		}
286 		if (atomicio(read, ifd, &vmh, sizeof(vmh)) != sizeof(vmh)) {
287 			log_warnx("%s: error reading vmh from received vm",
288 			    __func__);
289 			res = EIO;
290 			close(ifd);
291 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
292 			break;
293 		}
294 
295 		if (vmd_check_vmh(&vmh)) {
296 			res = ENOENT;
297 			close(ifd);
298 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
299 			break;
300 		}
301 		if (atomicio(read, ifd, &vmc, sizeof(vmc)) != sizeof(vmc)) {
302 			log_warnx("%s: error reading vmc from received vm",
303 			    __func__);
304 			res = EIO;
305 			close(ifd);
306 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
307 			break;
308 		}
309 		strlcpy(vmc.vmc_params.vcp_name, vid.vid_name,
310 		    sizeof(vmc.vmc_params.vcp_name));
311 		vmc.vmc_params.vcp_id = 0;
312 
313 		ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid);
314 		if (ret != 0) {
315 			res = errno;
316 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
317 			close(ifd);
318 		} else {
319 			vm->vm_state |= VM_STATE_RECEIVED;
320 			config_setvm(ps, vm, imsg->hdr.peerid,
321 			    vmc.vmc_owner.uid);
322 			log_debug("%s: sending fd to vmm", __func__);
323 			proc_compose_imsg(ps, PROC_VMM, -1,
324 			    IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, ifd,
325 			    NULL, 0);
326 		}
327 		break;
328 	case IMSG_VMDOP_DONE:
329 		control_reset(&ps->ps_csock);
330 		TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry)
331 			control_reset(rcs);
332 		cmd = 0;
333 		break;
334 	default:
335 		return (-1);
336 	}
337 
338 	switch (cmd) {
339 	case 0:
340 		break;
341 	case IMSG_VMDOP_START_VM_RESPONSE:
342 	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
343 		memset(&vmr, 0, sizeof(vmr));
344 		vmr.vmr_result = res;
345 		vmr.vmr_id = id;
346 		if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
347 		    imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
348 			return (-1);
349 		break;
350 	default:
351 		if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
352 		    imsg->hdr.peerid, -1, &res, sizeof(res)) == -1)
353 			return (-1);
354 		break;
355 	}
356 
357 	return (0);
358 }
359 
360 int
vmd_dispatch_vmm(int fd,struct privsep_proc * p,struct imsg * imsg)361 vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg)
362 {
363 	struct vmop_result	 vmr;
364 	struct privsep		*ps = p->p_ps;
365 	int			 res = 0;
366 	struct vmd_vm		*vm;
367 	struct vm_create_params	*vcp;
368 	struct vmop_info_result	 vir;
369 
370 	switch (imsg->hdr.type) {
371 	case IMSG_VMDOP_PAUSE_VM_RESPONSE:
372 		IMSG_SIZE_CHECK(imsg, &vmr);
373 		memcpy(&vmr, imsg->data, sizeof(vmr));
374 		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
375 			break;
376 		proc_compose_imsg(ps, PROC_CONTROL, -1,
377 		    imsg->hdr.type, imsg->hdr.peerid, -1,
378 		    imsg->data, sizeof(imsg->data));
379 		log_info("%s: paused vm %d successfully",
380 		    vm->vm_params.vmc_params.vcp_name,
381 		    vm->vm_vmid);
382 		vm->vm_state |= VM_STATE_PAUSED;
383 		break;
384 	case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
385 		IMSG_SIZE_CHECK(imsg, &vmr);
386 		memcpy(&vmr, imsg->data, sizeof(vmr));
387 		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
388 			break;
389 		proc_compose_imsg(ps, PROC_CONTROL, -1,
390 		    imsg->hdr.type, imsg->hdr.peerid, -1,
391 		    imsg->data, sizeof(imsg->data));
392 		log_info("%s: unpaused vm %d successfully.",
393 		    vm->vm_params.vmc_params.vcp_name,
394 		    vm->vm_vmid);
395 		vm->vm_state &= ~VM_STATE_PAUSED;
396 		break;
397 	case IMSG_VMDOP_START_VM_RESPONSE:
398 		IMSG_SIZE_CHECK(imsg, &vmr);
399 		memcpy(&vmr, imsg->data, sizeof(vmr));
400 		if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL)
401 			break;
402 		vm->vm_pid = vmr.vmr_pid;
403 		vcp = &vm->vm_params.vmc_params;
404 		vcp->vcp_id = vmr.vmr_id;
405 
406 		/*
407 		 * If the peerid is not -1, forward the response back to the
408 		 * the control socket.  If it is -1, the request originated
409 		 * from the parent, not the control socket.
410 		 */
411 		if (vm->vm_peerid != (uint32_t)-1) {
412 			(void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname,
413 			    sizeof(vmr.vmr_ttyname));
414 			if (proc_compose_imsg(ps, PROC_CONTROL, -1,
415 			    imsg->hdr.type, vm->vm_peerid, -1,
416 			    &vmr, sizeof(vmr)) == -1) {
417 				errno = vmr.vmr_result;
418 				log_warn("%s: failed to forward vm result",
419 				    vcp->vcp_name);
420 				vm_terminate(vm, __func__);
421 				return (-1);
422 			}
423 		}
424 
425 		if (vmr.vmr_result) {
426 			log_warnx("%s: failed to start vm", vcp->vcp_name);
427 			vm_terminate(vm, __func__);
428 			errno = vmr.vmr_result;
429 			break;
430 		}
431 
432 		/* Now configure all the interfaces */
433 		if (vm_priv_ifconfig(ps, vm) == -1) {
434 			log_warn("%s: failed to configure vm", vcp->vcp_name);
435 			vm_terminate(vm, __func__);
436 			break;
437 		}
438 
439 		log_info("started %s (vm %d) successfully, tty %s",
440 		    vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname);
441 		break;
442 	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
443 		IMSG_SIZE_CHECK(imsg, &vmr);
444 		memcpy(&vmr, imsg->data, sizeof(vmr));
445 
446 		if (vmr.vmr_result) {
447 			DPRINTF("%s: forwarding TERMINATE VM for vm id %d",
448 			    __func__, vmr.vmr_id);
449 			proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
450 		} else {
451 			if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
452 				break;
453 			/* Mark VM as shutting down */
454 			vm->vm_state |= VM_STATE_SHUTDOWN;
455 		}
456 		break;
457 	case IMSG_VMDOP_SEND_VM_RESPONSE:
458 		IMSG_SIZE_CHECK(imsg, &vmr);
459 		memcpy(&vmr, imsg->data, sizeof(vmr));
460 		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
461 			break;
462 		if (!vmr.vmr_result) {
463 			log_info("%s: sent vm %d successfully.",
464 			    vm->vm_params.vmc_params.vcp_name,
465 			    vm->vm_vmid);
466 			vm_terminate(vm, __func__);
467 		}
468 
469 		/* Send a response if a control client is waiting for it */
470 		if (imsg->hdr.peerid != (uint32_t)-1) {
471 			/* the error is meaningless for deferred responses */
472 			vmr.vmr_result = 0;
473 
474 			if (proc_compose_imsg(ps, PROC_CONTROL, -1,
475 			    IMSG_VMDOP_SEND_VM_RESPONSE,
476 			    imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
477 				return (-1);
478 		}
479 		break;
480 	case IMSG_VMDOP_TERMINATE_VM_EVENT:
481 		IMSG_SIZE_CHECK(imsg, &vmr);
482 		memcpy(&vmr, imsg->data, sizeof(vmr));
483 		DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d",
484 		    __func__, vmr.vmr_id, vmr.vmr_result);
485 		if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) {
486 			log_debug("%s: vm %d is no longer available",
487 			    __func__, vmr.vmr_id);
488 			break;
489 		}
490 		if (vmr.vmr_result != EAGAIN ||
491 		    vm->vm_params.vmc_bootdevice) {
492 			vm_terminate(vm, __func__);
493 		} else {
494 			/* Stop VM instance but keep the tty open */
495 			vm_stop(vm, 1, __func__);
496 			config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid);
497 		}
498 
499 		/* The error is meaningless for deferred responses */
500 		vmr.vmr_result = 0;
501 
502 		if (proc_compose_imsg(ps, PROC_CONTROL, -1,
503 			IMSG_VMDOP_TERMINATE_VM_EVENT,
504 			imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
505 			return (-1);
506 		break;
507 	case IMSG_VMDOP_GET_INFO_VM_DATA:
508 		IMSG_SIZE_CHECK(imsg, &vir);
509 		memcpy(&vir, imsg->data, sizeof(vir));
510 		if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) {
511 			memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname));
512 			if (vm->vm_ttyname[0] != '\0')
513 				strlcpy(vir.vir_ttyname, vm->vm_ttyname,
514 				    sizeof(vir.vir_ttyname));
515 			log_debug("%s: running vm: %d, vm_state: 0x%x",
516 			    __func__, vm->vm_vmid, vm->vm_state);
517 			vir.vir_state = vm->vm_state;
518 			/* get the user id who started the vm */
519 			vir.vir_uid = vm->vm_uid;
520 			vir.vir_gid = vm->vm_params.vmc_owner.gid;
521 		}
522 		if (proc_compose_imsg(ps,
523 		    imsg->hdr.peerid == IMSG_AGENTX_PEERID ?
524 		    PROC_AGENTX : PROC_CONTROL, -1, imsg->hdr.type,
525 		    imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) {
526 			if (vm)
527 				vm_terminate(vm, __func__);
528 			return (-1);
529 		}
530 		break;
531 	case IMSG_VMDOP_GET_INFO_VM_END_DATA:
532 		/*
533 		 * PROC_VMM has responded with the *running* VMs, now we
534 		 * append the others. These use the special value 0 for their
535 		 * kernel id to indicate that they are not running.
536 		 */
537 		TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
538 			if (!(vm->vm_state & VM_STATE_RUNNING)) {
539 				memset(&vir, 0, sizeof(vir));
540 				vir.vir_info.vir_id = vm->vm_vmid;
541 				strlcpy(vir.vir_info.vir_name,
542 				    vm->vm_params.vmc_params.vcp_name,
543 				    VMM_MAX_NAME_LEN);
544 				vir.vir_info.vir_memory_size =
545 				    vm->vm_params.vmc_params.
546 				    vcp_memranges[0].vmr_size;
547 				vir.vir_info.vir_ncpus =
548 				    vm->vm_params.vmc_params.vcp_ncpus;
549 				/* get the configured user id for this vm */
550 				vir.vir_uid = vm->vm_params.vmc_owner.uid;
551 				vir.vir_gid = vm->vm_params.vmc_owner.gid;
552 				log_debug("%s: vm: %d, vm_state: 0x%x",
553 				    __func__, vm->vm_vmid, vm->vm_state);
554 				vir.vir_state = vm->vm_state;
555 				if (proc_compose_imsg(ps,
556 				    imsg->hdr.peerid == IMSG_AGENTX_PEERID ?
557 				    PROC_AGENTX : PROC_CONTROL, -1,
558 				    IMSG_VMDOP_GET_INFO_VM_DATA,
559 				    imsg->hdr.peerid, -1, &vir,
560 				    sizeof(vir)) == -1) {
561 					log_debug("%s: GET_INFO_VM_END failed",
562 					    __func__);
563 					vm_terminate(vm, __func__);
564 					return (-1);
565 				}
566 			}
567 		}
568 		IMSG_SIZE_CHECK(imsg, &res);
569 		proc_forward_imsg(ps, imsg,
570 		    imsg->hdr.peerid == IMSG_AGENTX_PEERID ?
571 		    PROC_AGENTX : PROC_CONTROL, -1);
572 		break;
573 	default:
574 		return (-1);
575 	}
576 
577 	return (0);
578 }
579 
580 int
vmd_dispatch_agentx(int fd,struct privsep_proc * p,struct imsg * imsg)581 vmd_dispatch_agentx(int fd, struct privsep_proc *p, struct imsg *imsg)
582 {
583 	struct privsep			*ps = p->p_ps;
584 
585 	switch (imsg->hdr.type) {
586 	case IMSG_VMDOP_GET_INFO_VM_REQUEST:
587 		proc_forward_imsg(ps, imsg, PROC_VMM, -1);
588 		return (0);
589 	default:
590 		break;
591 	}
592 	return (-1);
593 }
594 
595 int
vmd_dispatch_priv(int fd,struct privsep_proc * p,struct imsg * imsg)596 vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg)
597 {
598 	struct vmop_addr_result	 var;
599 
600 	switch (imsg->hdr.type) {
601 	case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE:
602 		IMSG_SIZE_CHECK(imsg, &var);
603 		memcpy(&var, imsg->data, sizeof(var));
604 		proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1);
605 		break;
606 	default:
607 		return (-1);
608 	}
609 
610 	return (0);
611 }
612 
613 
614 void
vmd_sighdlr(int sig,short event,void * arg)615 vmd_sighdlr(int sig, short event, void *arg)
616 {
617 	if (privsep_process != PROC_PARENT)
618 		return;
619 	log_debug("%s: handling signal", __func__);
620 
621 	switch (sig) {
622 	case SIGHUP:
623 		log_info("%s: reload requested with SIGHUP", __func__);
624 
625 		/*
626 		 * This is safe because libevent uses async signal handlers
627 		 * that run in the event loop and not in signal context.
628 		 */
629 		(void)vmd_reload(0, NULL);
630 		break;
631 	case SIGPIPE:
632 		log_info("%s: ignoring SIGPIPE", __func__);
633 		break;
634 	case SIGUSR1:
635 		log_info("%s: ignoring SIGUSR1", __func__);
636 		break;
637 	case SIGTERM:
638 	case SIGINT:
639 		vmd_shutdown();
640 		break;
641 	default:
642 		fatalx("unexpected signal");
643 	}
644 }
645 
646 __dead void
usage(void)647 usage(void)
648 {
649 	extern char *__progname;
650 	fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n",
651 	    __progname);
652 	exit(1);
653 }
654 
655 int
main(int argc,char ** argv)656 main(int argc, char **argv)
657 {
658 	struct privsep		*ps;
659 	int			 ch;
660 	enum privsep_procid	 proc_id = PROC_PARENT;
661 	int			 proc_instance = 0, vm_launch = 0;
662 	int			 vmm_fd = -1, vm_fd = -1, psp_fd = -1;
663 	const char		*errp, *title = NULL;
664 	int			 argc0 = argc;
665 	char			 dev_type = '\0';
666 
667 	log_init(0, LOG_DAEMON);
668 
669 	if ((env = calloc(1, sizeof(*env))) == NULL)
670 		fatal("calloc: env");
671 	env->vmd_fd = -1;
672 	env->vmd_fd6 = -1;
673 
674 	while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:j:nt:vp:")) != -1) {
675 		switch (ch) {
676 		case 'D':
677 			if (cmdline_symset(optarg) < 0)
678 				log_warnx("could not parse macro definition %s",
679 				    optarg);
680 			break;
681 		case 'd':
682 			env->vmd_debug = 2;
683 			break;
684 		case 'f':
685 			conffile = optarg;
686 			break;
687 		case 'v':
688 			env->vmd_verbose++;
689 			break;
690 		/* vmd fork/exec */
691 		case 'n':
692 			env->vmd_noaction = 1;
693 			break;
694 		case 'P':
695 			title = optarg;
696 			proc_id = proc_getid(procs, nitems(procs), title);
697 			if (proc_id == PROC_MAX)
698 				fatalx("invalid process name");
699 			break;
700 		case 'I':
701 			proc_instance = strtonum(optarg, 0,
702 			    PROC_MAX_INSTANCES, &errp);
703 			if (errp)
704 				fatalx("invalid process instance");
705 			break;
706 		/* child vm and device fork/exec */
707 		case 'p':
708 			title = optarg;
709 			break;
710 		case 'V':
711 			vm_launch = VMD_LAUNCH_VM;
712 			vm_fd = strtonum(optarg, 0, 128, &errp);
713 			if (errp)
714 				fatalx("invalid vm fd");
715 			break;
716 		case 'X':
717 			vm_launch = VMD_LAUNCH_DEV;
718 			vm_fd = strtonum(optarg, 0, 128, &errp);
719 			if (errp)
720 				fatalx("invalid device fd");
721 			break;
722 		case 't':
723 			dev_type = *optarg;
724 			switch (dev_type) {
725 			case VMD_DEVTYPE_NET:
726 			case VMD_DEVTYPE_DISK:
727 				break;
728 			default: fatalx("invalid device type");
729 			}
730 			break;
731 		case 'i':
732 			vmm_fd = strtonum(optarg, 0, 128, &errp);
733 			if (errp)
734 				fatalx("invalid vmm fd");
735 			break;
736 		case 'j':
737 			/* -1 means no PSP available */
738 			psp_fd = strtonum(optarg, -1, 128, &errp);
739 			if (errp)
740 				fatalx("invalid psp fd");
741 			break;
742 		default:
743 			usage();
744 		}
745 	}
746 
747 	argc -= optind;
748 	if (argc > 0)
749 		usage();
750 
751 	if (env->vmd_noaction && !env->vmd_debug)
752 		env->vmd_debug = 1;
753 
754 	log_init(env->vmd_debug, LOG_DAEMON);
755 	log_setverbose(env->vmd_verbose);
756 
757 	/* Re-exec from the vmm child process requires an absolute path. */
758 	if (proc_id == PROC_PARENT && *argv[0] != '/' && !env->vmd_noaction)
759 		fatalx("re-exec requires execution with an absolute path");
760 	env->argv0 = argv[0];
761 
762 	/* check for root privileges */
763 	if (env->vmd_noaction == 0 && !vm_launch) {
764 		if (geteuid())
765 			fatalx("need root privileges");
766 	}
767 
768 	ps = &env->vmd_ps;
769 	ps->ps_env = env;
770 	env->vmd_psp_fd = psp_fd;
771 
772 	if (config_init(env) == -1)
773 		fatal("failed to initialize configuration");
774 
775 	if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL)
776 		fatal("unknown user %s", VMD_USER);
777 
778 	/* First proc runs as root without pledge but in default chroot */
779 	proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */
780 	proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */
781 
782 	/*
783 	 * If we're launching a new vm or its device, we short out here.
784 	 */
785 	if (vm_launch == VMD_LAUNCH_VM) {
786 		vm_main(vm_fd, vmm_fd);
787 		/* NOTREACHED */
788 	} else if (vm_launch == VMD_LAUNCH_DEV) {
789 		if (dev_type == VMD_DEVTYPE_NET) {
790 			log_procinit("vm/%s/vionet", title);
791 			vionet_main(vm_fd, vmm_fd);
792 			/* NOTREACHED */
793 		} else if (dev_type == VMD_DEVTYPE_DISK) {
794 			log_procinit("vm/%s/vioblk", title);
795 			vioblk_main(vm_fd, vmm_fd);
796 			/* NOTREACHED */
797 		}
798 		fatalx("unsupported device type '%c'", dev_type);
799 	}
800 
801 	/* Open /dev/vmm early. */
802 	if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) {
803 		env->vmd_fd = open(VMM_NODE, O_RDWR | O_CLOEXEC);
804 		if (env->vmd_fd == -1)
805 			fatal("%s", VMM_NODE);
806 	}
807 
808 	/* Configure the control socket */
809 	ps->ps_csock.cs_name = SOCKET_NAME;
810 	TAILQ_INIT(&ps->ps_rcsocks);
811 
812 	/* Configuration will be parsed after forking the children */
813 	env->vmd_conffile = conffile;
814 
815 	if (env->vmd_noaction)
816 		ps->ps_noaction = 1;
817 	ps->ps_instance = proc_instance;
818 	if (title != NULL)
819 		ps->ps_title[proc_id] = title;
820 
821 	/* only the parent returns */
822 	proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv,
823 	    proc_id);
824 
825 	if (ps->ps_noaction == 0)
826 		log_info("startup");
827 
828 	event_init();
829 
830 	signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps);
831 	signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps);
832 	signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps);
833 	signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps);
834 	signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps);
835 
836 	signal_add(&ps->ps_evsigint, NULL);
837 	signal_add(&ps->ps_evsigterm, NULL);
838 	signal_add(&ps->ps_evsighup, NULL);
839 	signal_add(&ps->ps_evsigpipe, NULL);
840 	signal_add(&ps->ps_evsigusr1, NULL);
841 
842 	if (!env->vmd_noaction)
843 		proc_connect(ps);
844 
845 	env->vmd_psp_fd = -1;
846 #ifdef __amd64__
847 	if (env->vmd_noaction == 0 && proc_id == PROC_PARENT)
848 		psp_setup();
849 #endif
850 
851 	if (vmd_configure() == -1)
852 		fatalx("configuration failed");
853 
854 	event_dispatch();
855 
856 	log_debug("exiting");
857 
858 	return (0);
859 }
860 
861 void
start_vm_batch(int fd,short type,void * args)862 start_vm_batch(int fd, short type, void *args)
863 {
864 	int		i = 0;
865 	struct vmd_vm	*vm;
866 
867 	log_debug("%s: starting batch of %d vms", __func__,
868 	    env->vmd_cfg.parallelism);
869 	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
870 		if (!(vm->vm_state & VM_STATE_WAITING)) {
871 			log_debug("%s: not starting vm %s (disabled)",
872 			    __func__,
873 			    vm->vm_params.vmc_params.vcp_name);
874 			continue;
875 		}
876 		i++;
877 		if (i > env->vmd_cfg.parallelism) {
878 			evtimer_add(&staggered_start_timer,
879 			    &env->vmd_cfg.delay);
880 			break;
881 		}
882 		vm->vm_state &= ~VM_STATE_WAITING;
883 		config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid);
884 	}
885 	log_debug("%s: done starting vms", __func__);
886 }
887 
888 int
vmd_configure(void)889 vmd_configure(void)
890 {
891 	int			ncpus;
892 	struct vmd_switch	*vsw;
893 	int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE};
894 	size_t ncpus_sz = sizeof(ncpus);
895 
896 	/*
897 	 * pledge in the parent process:
898 	 * stdio - for malloc and basic I/O including events.
899 	 * rpath - for reload to open and read the configuration files.
900 	 * wpath - for opening disk images and tap devices.
901 	 * tty - for openpty and TIOCUCNTL.
902 	 * proc - run kill to terminate its children safely.
903 	 * sendfd - for disks, interfaces and other fds.
904 	 * recvfd - for send and receive.
905 	 * getpw - lookup user or group id by name.
906 	 * chown, fattr - change tty ownership
907 	 * flock - locking disk files
908 	 */
909 	if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw"
910 	    " chown fattr flock", NULL) == -1)
911 		fatal("pledge");
912 
913 	if ((env->vmd_ptmfd = getptmfd()) == -1)
914 		fatal("getptmfd %s", PATH_PTMDEV);
915 
916 	if (parse_config(env->vmd_conffile) == -1) {
917 		proc_kill(&env->vmd_ps);
918 		exit(1);
919 	}
920 
921 	if (env->vmd_noaction) {
922 		fprintf(stderr, "configuration OK\n");
923 		proc_kill(&env->vmd_ps);
924 		exit(0);
925 	}
926 
927 	/* Send VMM device fd to vmm proc. */
928 	proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1,
929 	    IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0);
930 
931 	/* Send PSP device fd to vmm proc. */
932 	if (env->vmd_psp_fd != -1) {
933 		proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1,
934 		    IMSG_VMDOP_RECEIVE_PSP_FD, -1, env->vmd_psp_fd, NULL, 0);
935 	}
936 
937 	/* Send shared global configuration to all children */
938 	if (config_setconfig(env) == -1)
939 		return (-1);
940 
941 	TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
942 		if (vsw->sw_running)
943 			continue;
944 		if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
945 			log_warn("%s: failed to create switch %s",
946 			    __func__, vsw->sw_name);
947 			switch_remove(vsw);
948 			return (-1);
949 		}
950 	}
951 
952 	if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) {
953 		env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY;
954 		if (sysctl(ncpu_mib, nitems(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1)
955 			ncpus = 1;
956 		env->vmd_cfg.parallelism = ncpus;
957 		log_debug("%s: setting staggered start configuration to "
958 		    "parallelism: %d and delay: %lld",
959 		    __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec);
960 	}
961 
962 	log_debug("%s: starting vms in staggered fashion", __func__);
963 	evtimer_set(&staggered_start_timer, start_vm_batch, NULL);
964 	/* start first batch */
965 	start_vm_batch(0, 0, NULL);
966 
967 	return (0);
968 }
969 
970 int
vmd_reload(unsigned int reset,const char * filename)971 vmd_reload(unsigned int reset, const char *filename)
972 {
973 	struct vmd_vm		*vm, *next_vm;
974 	struct vmd_switch	*vsw;
975 	int			 reload = 0;
976 
977 	/* Switch back to the default config file */
978 	if (filename == NULL || *filename == '\0') {
979 		filename = env->vmd_conffile;
980 		reload = 1;
981 	}
982 
983 	log_debug("%s: level %d config file %s", __func__, reset, filename);
984 
985 	if (reset) {
986 		/* Purge the configuration */
987 		config_purge(env, reset);
988 		config_setreset(env, reset);
989 	} else {
990 		/*
991 		 * Load or reload the configuration.
992 		 *
993 		 * Reloading removes all non-running VMs before processing the
994 		 * config file, whereas loading only adds to the existing list
995 		 * of VMs.
996 		 */
997 
998 		if (reload) {
999 			TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry,
1000 			    next_vm) {
1001 				if (!(vm->vm_state & VM_STATE_RUNNING)) {
1002 					DPRINTF("%s: calling vm_remove",
1003 					    __func__);
1004 					vm_remove(vm, __func__);
1005 				}
1006 			}
1007 		}
1008 
1009 		if (parse_config(filename) == -1) {
1010 			log_debug("%s: failed to load config file %s",
1011 			    __func__, filename);
1012 			return (-1);
1013 		}
1014 
1015 		if (reload) {
1016 			/* Update shared global configuration in all children */
1017 			if (config_setconfig(env) == -1)
1018 				return (-1);
1019 		}
1020 
1021 		TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
1022 			if (vsw->sw_running)
1023 				continue;
1024 			if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
1025 				log_warn("%s: failed to create switch %s",
1026 				    __func__, vsw->sw_name);
1027 				switch_remove(vsw);
1028 				return (-1);
1029 			}
1030 		}
1031 
1032 		log_debug("%s: starting vms in staggered fashion", __func__);
1033 		evtimer_set(&staggered_start_timer, start_vm_batch, NULL);
1034 		/* start first batch */
1035 		start_vm_batch(0, 0, NULL);
1036 
1037 		}
1038 
1039 	return (0);
1040 }
1041 
1042 void
vmd_shutdown(void)1043 vmd_shutdown(void)
1044 {
1045 	struct vmd_vm *vm, *vm_next;
1046 
1047 	log_debug("%s: performing shutdown", __func__);
1048 
1049 	TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) {
1050 		vm_remove(vm, __func__);
1051 	}
1052 
1053 	proc_kill(&env->vmd_ps);
1054 	free(env);
1055 
1056 	log_warnx("terminating");
1057 	exit(0);
1058 }
1059 
1060 struct vmd_vm *
vm_getbyvmid(uint32_t vmid)1061 vm_getbyvmid(uint32_t vmid)
1062 {
1063 	struct vmd_vm	*vm;
1064 
1065 	if (vmid == 0)
1066 		return (NULL);
1067 	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1068 		if (vm->vm_vmid == vmid)
1069 			return (vm);
1070 	}
1071 
1072 	return (NULL);
1073 }
1074 
1075 struct vmd_vm *
vm_getbyid(uint32_t id)1076 vm_getbyid(uint32_t id)
1077 {
1078 	struct vmd_vm	*vm;
1079 
1080 	if (id == 0)
1081 		return (NULL);
1082 	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1083 		if (vm->vm_params.vmc_params.vcp_id == id)
1084 			return (vm);
1085 	}
1086 
1087 	return (NULL);
1088 }
1089 
1090 uint32_t
vm_id2vmid(uint32_t id,struct vmd_vm * vm)1091 vm_id2vmid(uint32_t id, struct vmd_vm *vm)
1092 {
1093 	if (vm == NULL && (vm = vm_getbyid(id)) == NULL)
1094 		return (0);
1095 	DPRINTF("%s: vmm id %u is vmid %u", __func__,
1096 	    id, vm->vm_vmid);
1097 	return (vm->vm_vmid);
1098 }
1099 
1100 uint32_t
vm_vmid2id(uint32_t vmid,struct vmd_vm * vm)1101 vm_vmid2id(uint32_t vmid, struct vmd_vm *vm)
1102 {
1103 	if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL)
1104 		return (0);
1105 	DPRINTF("%s: vmid %u is vmm id %u", __func__,
1106 	    vmid, vm->vm_params.vmc_params.vcp_id);
1107 	return (vm->vm_params.vmc_params.vcp_id);
1108 }
1109 
1110 struct vmd_vm *
vm_getbyname(const char * name)1111 vm_getbyname(const char *name)
1112 {
1113 	struct vmd_vm	*vm;
1114 
1115 	if (name == NULL)
1116 		return (NULL);
1117 	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1118 		if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0)
1119 			return (vm);
1120 	}
1121 
1122 	return (NULL);
1123 }
1124 
1125 struct vmd_vm *
vm_getbypid(pid_t pid)1126 vm_getbypid(pid_t pid)
1127 {
1128 	struct vmd_vm	*vm;
1129 
1130 	TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
1131 		if (vm->vm_pid == pid)
1132 			return (vm);
1133 	}
1134 
1135 	return (NULL);
1136 }
1137 
1138 void
vm_stop(struct vmd_vm * vm,int keeptty,const char * caller)1139 vm_stop(struct vmd_vm *vm, int keeptty, const char *caller)
1140 {
1141 	struct privsep	*ps = &env->vmd_ps;
1142 	unsigned int	 i, j;
1143 
1144 	if (vm == NULL)
1145 		return;
1146 
1147 	log_debug("%s: %s %s stopping vm %d%s",
1148 	    __func__, ps->ps_title[privsep_process], caller,
1149 	    vm->vm_vmid, keeptty ? ", keeping tty open" : "");
1150 
1151 	vm->vm_state &= ~(VM_STATE_RECEIVED | VM_STATE_RUNNING
1152 	    | VM_STATE_SHUTDOWN);
1153 
1154 	if (vm->vm_iev.ibuf.fd != -1) {
1155 		event_del(&vm->vm_iev.ev);
1156 		close(vm->vm_iev.ibuf.fd);
1157 	}
1158 	for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) {
1159 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
1160 			if (vm->vm_disks[i][j] != -1) {
1161 				close(vm->vm_disks[i][j]);
1162 				vm->vm_disks[i][j] = -1;
1163 			}
1164 		}
1165 	}
1166 	for (i = 0; i < VM_MAX_NICS_PER_VM; i++) {
1167 		if (vm->vm_ifs[i].vif_fd != -1) {
1168 			close(vm->vm_ifs[i].vif_fd);
1169 			vm->vm_ifs[i].vif_fd = -1;
1170 		}
1171 		free(vm->vm_ifs[i].vif_name);
1172 		free(vm->vm_ifs[i].vif_switch);
1173 		free(vm->vm_ifs[i].vif_group);
1174 		vm->vm_ifs[i].vif_name = NULL;
1175 		vm->vm_ifs[i].vif_switch = NULL;
1176 		vm->vm_ifs[i].vif_group = NULL;
1177 	}
1178 	if (vm->vm_kernel != -1) {
1179 		close(vm->vm_kernel);
1180 		vm->vm_kernel = -1;
1181 	}
1182 	if (vm->vm_cdrom != -1) {
1183 		close(vm->vm_cdrom);
1184 		vm->vm_cdrom = -1;
1185 	}
1186 	if (!keeptty) {
1187 		vm_closetty(vm);
1188 		vm->vm_uid = 0;
1189 	}
1190 }
1191 
1192 void
vm_remove(struct vmd_vm * vm,const char * caller)1193 vm_remove(struct vmd_vm *vm, const char *caller)
1194 {
1195 	struct privsep	*ps = &env->vmd_ps;
1196 
1197 	if (vm == NULL)
1198 		return;
1199 
1200 	log_debug("%s: %s %s removing vm %d from running config",
1201 	    __func__, ps->ps_title[privsep_process], caller,
1202 	    vm->vm_vmid);
1203 
1204 	TAILQ_REMOVE(env->vmd_vms, vm, vm_entry);
1205 
1206 	vm_stop(vm, 0, caller);
1207 	if (vm->vm_kernel_path != NULL && !vm->vm_from_config)
1208 		free(vm->vm_kernel_path);
1209 	free(vm);
1210 }
1211 
1212 int
vm_claimid(const char * name,int uid,uint32_t * id)1213 vm_claimid(const char *name, int uid, uint32_t *id)
1214 {
1215 	struct name2id *n2i = NULL;
1216 
1217 	TAILQ_FOREACH(n2i, env->vmd_known, entry)
1218 		if (strcmp(n2i->name, name) == 0 && n2i->uid == uid)
1219 			goto out;
1220 
1221 	if (++env->vmd_nvm == 0) {
1222 		log_warnx("too many vms");
1223 		return (-1);
1224 	}
1225 	if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) {
1226 		log_warnx("could not alloc vm name");
1227 		return (-1);
1228 	}
1229 	n2i->id = env->vmd_nvm;
1230 	n2i->uid = uid;
1231 	if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) {
1232 		log_warnx("vm name too long");
1233 		free(n2i);
1234 		return (-1);
1235 	}
1236 	TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry);
1237 
1238 out:
1239 	*id = n2i->id;
1240 	return (0);
1241 }
1242 
1243 int
vm_register(struct privsep * ps,struct vmop_create_params * vmc,struct vmd_vm ** ret_vm,uint32_t id,uid_t uid)1244 vm_register(struct privsep *ps, struct vmop_create_params *vmc,
1245     struct vmd_vm **ret_vm, uint32_t id, uid_t uid)
1246 {
1247 	struct vmd_vm		*vm = NULL, *vm_parent = NULL;
1248 	struct vm_create_params	*vcp = &vmc->vmc_params;
1249 	struct vmop_owner	*vmo = NULL;
1250 	uint32_t		 nid, rng;
1251 	unsigned int		 i, j;
1252 	struct vmd_switch	*sw;
1253 	char			*s;
1254 	int			 ret = 0;
1255 
1256 	/* Check if this is an instance of another VM */
1257 	if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) {
1258 		errno = ret; /* XXX might set invalid errno */
1259 		return (-1);
1260 	}
1261 
1262 	errno = 0;
1263 	*ret_vm = NULL;
1264 
1265 	if ((vm = vm_getbyname(vcp->vcp_name)) != NULL ||
1266 	    (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) {
1267 		if (vm_checkperm(vm, &vm->vm_params.vmc_owner,
1268 		    uid) != 0) {
1269 			errno = EPERM;
1270 			goto fail;
1271 		}
1272 		vm->vm_kernel = vmc->vmc_kernel;
1273 		*ret_vm = vm;
1274 		errno = EALREADY;
1275 		goto fail;
1276 	}
1277 
1278 	if (vm_parent != NULL)
1279 		vmo = &vm_parent->vm_params.vmc_insowner;
1280 
1281 	/* non-root users can only start existing VMs or instances */
1282 	if (vm_checkperm(NULL, vmo, uid) != 0) {
1283 		log_warnx("permission denied");
1284 		errno = EPERM;
1285 		goto fail;
1286 	}
1287 	if (vmc->vmc_flags == 0) {
1288 		log_warnx("invalid configuration, no devices");
1289 		errno = VMD_DISK_MISSING;
1290 		goto fail;
1291 	}
1292 	if (vcp->vcp_ncpus == 0)
1293 		vcp->vcp_ncpus = 1;
1294 	if (vcp->vcp_memranges[0].vmr_size == 0)
1295 		vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY;
1296 	if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) {
1297 		log_warnx("invalid number of CPUs");
1298 		goto fail;
1299 	} else if (vmc->vmc_ndisks > VM_MAX_DISKS_PER_VM) {
1300 		log_warnx("invalid number of disks");
1301 		goto fail;
1302 	} else if (vmc->vmc_nnics > VM_MAX_NICS_PER_VM) {
1303 		log_warnx("invalid number of interfaces");
1304 		goto fail;
1305 	} else if (vmc->vmc_kernel == -1 && vmc->vmc_ndisks == 0
1306 	    && strlen(vmc->vmc_cdrom) == 0) {
1307 		log_warnx("no kernel or disk/cdrom specified");
1308 		goto fail;
1309 	} else if (strlen(vcp->vcp_name) == 0) {
1310 		log_warnx("invalid VM name");
1311 		goto fail;
1312 	} else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' ||
1313 	    *vcp->vcp_name == '_') {
1314 		log_warnx("invalid VM name");
1315 		goto fail;
1316 	} else {
1317 		for (s = vcp->vcp_name; *s != '\0'; ++s) {
1318 			if (!(isalnum((unsigned char)*s) || *s == '.' || \
1319 			    *s == '-' || *s == '_')) {
1320 				log_warnx("invalid VM name");
1321 				goto fail;
1322 			}
1323 		}
1324 	}
1325 
1326 	if ((vm = calloc(1, sizeof(*vm))) == NULL)
1327 		goto fail;
1328 
1329 	memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params));
1330 	vmc = &vm->vm_params;
1331 	vcp = &vmc->vmc_params;
1332 	vm->vm_pid = -1;
1333 	vm->vm_tty = -1;
1334 	vm->vm_receive_fd = -1;
1335 	vm->vm_kernel = -1;
1336 	vm->vm_state &= ~VM_STATE_PAUSED;
1337 
1338 	if (vmc->vmc_kernel > -1)
1339 		vm->vm_kernel = vmc->vmc_kernel;
1340 
1341 	for (i = 0; i < VM_MAX_DISKS_PER_VM; i++)
1342 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
1343 			vm->vm_disks[i][j] = -1;
1344 	for (i = 0; i < VM_MAX_NICS_PER_VM; i++)
1345 		vm->vm_ifs[i].vif_fd = -1;
1346 	for (i = 0; i < vmc->vmc_nnics; i++) {
1347 		if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) {
1348 			/* inherit per-interface flags from the switch */
1349 			vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK);
1350 		}
1351 
1352 		/*
1353 		 * If the MAC address is zero, always randomize it in vmd(8)
1354 		 * because we cannot rely on the guest OS to do the right
1355 		 * thing like OpenBSD does.  Based on ether_fakeaddr()
1356 		 * from the kernel, incremented by one to differentiate
1357 		 * the source.
1358 		 */
1359 		if (memcmp(zero_mac, &vmc->vmc_macs[i], ETHER_ADDR_LEN) == 0) {
1360 			rng = arc4random();
1361 			vmc->vmc_macs[i][0] = 0xfe;
1362 			vmc->vmc_macs[i][1] = 0xe1;
1363 			vmc->vmc_macs[i][2] = 0xba + 1;
1364 			vmc->vmc_macs[i][3] = 0xd0 | ((i + 1) & 0xf);
1365 			vmc->vmc_macs[i][4] = rng;
1366 			vmc->vmc_macs[i][5] = rng >> 8;
1367 		}
1368 	}
1369 	vm->vm_cdrom = -1;
1370 	vm->vm_iev.ibuf.fd = -1;
1371 
1372 	/*
1373 	 * Assign a new internal Id if not specified and we succeed in
1374 	 * claiming a new Id.
1375 	 */
1376 	if (id != 0)
1377 		vm->vm_vmid = id;
1378 	else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1)
1379 		goto fail;
1380 	else
1381 		vm->vm_vmid = nid;
1382 
1383 	log_debug("%s: registering vm %d", __func__, vm->vm_vmid);
1384 	TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry);
1385 
1386 	*ret_vm = vm;
1387 	return (0);
1388  fail:
1389 	if (errno == 0)
1390 		errno = EINVAL;
1391 	return (-1);
1392 }
1393 
1394 int
vm_instance(struct privsep * ps,struct vmd_vm ** vm_parent,struct vmop_create_params * vmc,uid_t uid)1395 vm_instance(struct privsep *ps, struct vmd_vm **vm_parent,
1396     struct vmop_create_params *vmc, uid_t uid)
1397 {
1398 	char			*name;
1399 	struct vm_create_params	*vcp = &vmc->vmc_params;
1400 	struct vmop_create_params *vmcp;
1401 	struct vm_create_params	*vcpp;
1402 	unsigned int		 i, j;
1403 
1404 	/* return without error if the parent is NULL (nothing to inherit) */
1405 	if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 ||
1406 	    vmc->vmc_instance[0] == '\0')
1407 		return (0);
1408 
1409 	if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) {
1410 		return (VMD_PARENT_INVALID);
1411 	}
1412 
1413 	vmcp = &(*vm_parent)->vm_params;
1414 	vcpp = &vmcp->vmc_params;
1415 
1416 	/* Are we allowed to create an instance from this VM? */
1417 	if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) {
1418 		log_warnx("vm \"%s\" no permission to create vm instance",
1419 		    vcpp->vcp_name);
1420 		return (ENAMETOOLONG);
1421 	}
1422 
1423 	name = vcp->vcp_name;
1424 
1425 	if (vm_getbyname(vcp->vcp_name) != NULL ||
1426 	    vm_getbyvmid(vcp->vcp_id) != NULL) {
1427 		return (EPROCLIM);
1428 	}
1429 
1430 	/* CPU */
1431 	if (vcp->vcp_ncpus == 0)
1432 		vcp->vcp_ncpus = vcpp->vcp_ncpus;
1433 	if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 &&
1434 	    vcp->vcp_ncpus != vcpp->vcp_ncpus) {
1435 		log_warnx("vm \"%s\" no permission to set cpus", name);
1436 		return (EPERM);
1437 	}
1438 
1439 	/* memory */
1440 	if (vcp->vcp_memranges[0].vmr_size == 0)
1441 		vcp->vcp_memranges[0].vmr_size =
1442 		    vcpp->vcp_memranges[0].vmr_size;
1443 	if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 &&
1444 	    vcp->vcp_memranges[0].vmr_size !=
1445 	    vcpp->vcp_memranges[0].vmr_size) {
1446 		log_warnx("vm \"%s\" no permission to set memory", name);
1447 		return (EPERM);
1448 	}
1449 
1450 	/* disks cannot be inherited */
1451 	if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 &&
1452 	    vmc->vmc_ndisks) {
1453 		log_warnx("vm \"%s\" no permission to set disks", name);
1454 		return (EPERM);
1455 	}
1456 	for (i = 0; i < vmc->vmc_ndisks; i++) {
1457 		/* Check if this disk is already used in the parent */
1458 		for (j = 0; j < vmcp->vmc_ndisks; j++) {
1459 			if (strcmp(vmc->vmc_disks[i],
1460 			    vmcp->vmc_disks[j]) == 0) {
1461 				log_warnx("vm \"%s\" disk %s cannot be reused",
1462 				    name, vmc->vmc_disks[i]);
1463 				return (EBUSY);
1464 			}
1465 		}
1466 		vmc->vmc_checkaccess |= VMOP_CREATE_DISK;
1467 	}
1468 
1469 	/* interfaces */
1470 	if (vmc->vmc_nnics > 0 &&
1471 	    vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 &&
1472 	    vmc->vmc_nnics != vmcp->vmc_nnics) {
1473 		log_warnx("vm \"%s\" no permission to set interfaces", name);
1474 		return (EPERM);
1475 	}
1476 	for (i = 0; i < vmcp->vmc_nnics; i++) {
1477 		/* Interface got overwritten */
1478 		if (i < vmc->vmc_nnics)
1479 			continue;
1480 
1481 		/* Copy interface from parent */
1482 		vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i];
1483 		(void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i],
1484 		    sizeof(vmc->vmc_ifnames[i]));
1485 		(void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i],
1486 		    sizeof(vmc->vmc_ifswitch[i]));
1487 		(void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i],
1488 		    sizeof(vmc->vmc_ifgroup[i]));
1489 		memcpy(vmc->vmc_macs[i], vmcp->vmc_macs[i],
1490 		    sizeof(vmc->vmc_macs[i]));
1491 		vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i];
1492 		vmc->vmc_nnics++;
1493 	}
1494 	for (i = 0; i < vmc->vmc_nnics; i++) {
1495 		for (j = 0; j < vmcp->vmc_nnics; j++) {
1496 			if (memcmp(zero_mac, vmc->vmc_macs[i],
1497 			    sizeof(vmc->vmc_macs[i])) != 0 &&
1498 			    memcmp(vmcp->vmc_macs[i], vmc->vmc_macs[i],
1499 			    sizeof(vmc->vmc_macs[i])) != 0) {
1500 				log_warnx("vm \"%s\" lladdr cannot be reused",
1501 				    name);
1502 				return (EBUSY);
1503 			}
1504 			if (strlen(vmc->vmc_ifnames[i]) &&
1505 			    strcmp(vmc->vmc_ifnames[i],
1506 			    vmcp->vmc_ifnames[j]) == 0) {
1507 				log_warnx("vm \"%s\" %s cannot be reused",
1508 				    vmc->vmc_ifnames[i], name);
1509 				return (EBUSY);
1510 			}
1511 		}
1512 	}
1513 
1514 	/* kernel */
1515 	if (vmc->vmc_kernel > -1 || ((*vm_parent)->vm_kernel_path != NULL &&
1516 		strnlen((*vm_parent)->vm_kernel_path, PATH_MAX) < PATH_MAX)) {
1517 		if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) {
1518 			log_warnx("vm \"%s\" no permission to set boot image",
1519 			    name);
1520 			return (EPERM);
1521 		}
1522 		vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL;
1523 	}
1524 
1525 	/* cdrom */
1526 	if (strlen(vmc->vmc_cdrom) > 0) {
1527 		if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) {
1528 			log_warnx("vm \"%s\" no permission to set cdrom", name);
1529 			return (EPERM);
1530 		}
1531 		vmc->vmc_checkaccess |= VMOP_CREATE_CDROM;
1532 	} else if (strlcpy(vmc->vmc_cdrom, vmcp->vmc_cdrom,
1533 	    sizeof(vmc->vmc_cdrom)) >= sizeof(vmc->vmc_cdrom)) {
1534 		log_warnx("vm \"%s\" cdrom name too long", name);
1535 		return (EINVAL);
1536 	}
1537 
1538 	/* user */
1539 	if (vmc->vmc_owner.uid == 0)
1540 		vmc->vmc_owner.uid = vmcp->vmc_owner.uid;
1541 	else if (vmc->vmc_owner.uid != uid &&
1542 	    vmc->vmc_owner.uid != vmcp->vmc_owner.uid) {
1543 		log_warnx("vm \"%s\" user mismatch", name);
1544 		return (EPERM);
1545 	}
1546 
1547 	/* group */
1548 	if (vmc->vmc_owner.gid == 0)
1549 		vmc->vmc_owner.gid = vmcp->vmc_owner.gid;
1550 	else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) {
1551 		log_warnx("vm \"%s\" group mismatch", name);
1552 		return (EPERM);
1553 	}
1554 
1555 	/* child instances */
1556 	if (vmc->vmc_insflags) {
1557 		log_warnx("vm \"%s\" cannot change instance permissions", name);
1558 		return (EPERM);
1559 	}
1560 	if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) {
1561 		vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid;
1562 		vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid;
1563 		vmc->vmc_insflags = vmcp->vmc_insflags;
1564 	} else {
1565 		vmc->vmc_insowner.gid = 0;
1566 		vmc->vmc_insowner.uid = 0;
1567 		vmc->vmc_insflags = 0;
1568 	}
1569 
1570 	/* finished, remove instance flags */
1571 	vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE;
1572 
1573 	return (0);
1574 }
1575 
1576 /*
1577  * vm_checkperm
1578  *
1579  * Checks if the user represented by the 'uid' parameter is allowed to
1580  * manipulate the VM described by the 'vm' parameter (or connect to said VM's
1581  * console.)
1582  *
1583  * Parameters:
1584  *  vm: the VM whose permission is to be checked
1585  *  vmo: the required uid/gid to be checked
1586  *  uid: the user ID of the user making the request
1587  *
1588  * Return values:
1589  *   0: the permission should be granted
1590  *  -1: the permission check failed (also returned if vm == null)
1591  */
1592 int
vm_checkperm(struct vmd_vm * vm,struct vmop_owner * vmo,uid_t uid)1593 vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid)
1594 {
1595 	struct group	*gr;
1596 	struct passwd	*pw;
1597 	char		**grmem;
1598 
1599 	/* root has no restrictions */
1600 	if (uid == 0)
1601 		return (0);
1602 
1603 	if (vmo == NULL)
1604 		return (-1);
1605 
1606 	/* check user */
1607 	if (vm == NULL) {
1608 		if  (vmo->uid == uid)
1609 			return (0);
1610 	} else {
1611 		/*
1612 		 * check user of running vm (the owner of a running vm can
1613 		 * be different to (or more specific than) the configured owner.
1614 		 */
1615 		if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) ||
1616 		    (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid))
1617 			return (0);
1618 	}
1619 
1620 	/* check groups */
1621 	if (vmo->gid != -1) {
1622 		if ((pw = getpwuid(uid)) == NULL)
1623 			return (-1);
1624 		if (pw->pw_gid == vmo->gid)
1625 			return (0);
1626 		if ((gr = getgrgid(vmo->gid)) != NULL) {
1627 			for (grmem = gr->gr_mem; *grmem; grmem++)
1628 				if (strcmp(*grmem, pw->pw_name) == 0)
1629 					return (0);
1630 		}
1631 	}
1632 
1633 	return (-1);
1634 }
1635 
1636 /*
1637  * vm_checkinsflag
1638  *
1639  * Checks whether the non-root user is allowed to set an instance option.
1640  *
1641  * Parameters:
1642  *  vmc: the VM create parameters
1643  *  flag: the flag to be checked
1644  *  uid: the user ID of the user making the request
1645  *
1646  * Return values:
1647  *   0: the permission should be granted
1648  *  -1: the permission check failed (also returned if vm == null)
1649  */
1650 int
vm_checkinsflag(struct vmop_create_params * vmc,unsigned int flag,uid_t uid)1651 vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid)
1652 {
1653 	/* root has no restrictions */
1654 	if (uid == 0)
1655 		return (0);
1656 
1657 	if ((vmc->vmc_insflags & flag) == 0)
1658 		return (-1);
1659 
1660 	return (0);
1661 }
1662 
1663 /*
1664  * vm_checkaccess
1665  *
1666  * Checks if the user represented by the 'uid' parameter is allowed to
1667  * access the file described by the 'path' parameter.
1668  *
1669  * Parameters:
1670  *  fd: the file descriptor of the opened file
1671  *  uflag: check if the userid has access to the file
1672  *  uid: the user ID of the user making the request
1673  *  amode: the access flags of R_OK and W_OK
1674  *
1675  * Return values:
1676  *   0: the permission should be granted
1677  *  -1: the permission check failed
1678  */
1679 int
vm_checkaccess(int fd,unsigned int uflag,uid_t uid,int amode)1680 vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode)
1681 {
1682 	struct group	*gr;
1683 	struct passwd	*pw;
1684 	char		**grmem;
1685 	struct stat	 st;
1686 	mode_t		 mode;
1687 
1688 	if (fd == -1)
1689 		return (-1);
1690 
1691 	/*
1692 	 * File has to be accessible and a regular file
1693 	 */
1694 	if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode))
1695 		return (-1);
1696 
1697 	/* root has no restrictions */
1698 	if (uid == 0 || uflag == 0)
1699 		return (0);
1700 
1701 	/* check other */
1702 	mode = amode & W_OK ? S_IWOTH : 0;
1703 	mode |= amode & R_OK ? S_IROTH : 0;
1704 	if ((st.st_mode & mode) == mode)
1705 		return (0);
1706 
1707 	/* check user */
1708 	mode = amode & W_OK ? S_IWUSR : 0;
1709 	mode |= amode & R_OK ? S_IRUSR : 0;
1710 	if (uid == st.st_uid && (st.st_mode & mode) == mode)
1711 		return (0);
1712 
1713 	/* check groups */
1714 	mode = amode & W_OK ? S_IWGRP : 0;
1715 	mode |= amode & R_OK ? S_IRGRP : 0;
1716 	if ((st.st_mode & mode) != mode)
1717 		return (-1);
1718 	if ((pw = getpwuid(uid)) == NULL)
1719 		return (-1);
1720 	if (pw->pw_gid == st.st_gid)
1721 		return (0);
1722 	if ((gr = getgrgid(st.st_gid)) != NULL) {
1723 		for (grmem = gr->gr_mem; *grmem; grmem++)
1724 			if (strcmp(*grmem, pw->pw_name) == 0)
1725 				return (0);
1726 	}
1727 
1728 	return (-1);
1729 }
1730 
1731 int
vm_opentty(struct vmd_vm * vm)1732 vm_opentty(struct vmd_vm *vm)
1733 {
1734 	struct stat		 st;
1735 	struct group		*gr;
1736 	uid_t			 uid;
1737 	gid_t			 gid;
1738 	mode_t			 mode;
1739 	int			 on = 1, tty_slave;
1740 
1741 	/*
1742 	 * Open tty with pre-opened PTM fd
1743 	 */
1744 	if (fdopenpty(env->vmd_ptmfd, &vm->vm_tty, &tty_slave, vm->vm_ttyname,
1745 	    NULL, NULL) == -1) {
1746 		log_warn("fdopenpty");
1747 		return (-1);
1748 	}
1749 	close(tty_slave);
1750 
1751 	/*
1752 	 * We use user ioctl(2) mode to pass break commands.
1753 	 */
1754 	if (ioctl(vm->vm_tty, TIOCUCNTL, &on) == -1) {
1755 		log_warn("could not enable user ioctl mode on %s",
1756 		    vm->vm_ttyname);
1757 		goto fail;
1758 	}
1759 
1760 	uid = vm->vm_uid;
1761 	gid = vm->vm_params.vmc_owner.gid;
1762 
1763 	if (vm->vm_params.vmc_owner.gid != -1) {
1764 		mode = 0660;
1765 	} else if ((gr = getgrnam("tty")) != NULL) {
1766 		gid = gr->gr_gid;
1767 		mode = 0620;
1768 	} else {
1769 		mode = 0600;
1770 		gid = 0;
1771 	}
1772 
1773 	log_debug("%s: vm %s tty %s uid %d gid %d mode %o",
1774 	    __func__, vm->vm_params.vmc_params.vcp_name,
1775 	    vm->vm_ttyname, uid, gid, mode);
1776 
1777 	/*
1778 	 * Change ownership and mode of the tty as required.
1779 	 * Loosely based on the implementation of sshpty.c
1780 	 */
1781 	if (fstat(vm->vm_tty, &st) == -1) {
1782 		log_warn("fstat failed for %s", vm->vm_ttyname);
1783 		goto fail;
1784 	}
1785 
1786 	if (st.st_uid != uid || st.st_gid != gid) {
1787 		if (chown(vm->vm_ttyname, uid, gid) == -1) {
1788 			log_warn("chown %s %d %d failed, uid %d",
1789 			    vm->vm_ttyname, uid, gid, getuid());
1790 
1791 			/* Ignore failure on read-only filesystems */
1792 			if (!((errno == EROFS) &&
1793 			    (st.st_uid == uid || st.st_uid == 0)))
1794 				goto fail;
1795 		}
1796 	}
1797 
1798 	if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) {
1799 		if (chmod(vm->vm_ttyname, mode) == -1) {
1800 			log_warn("chmod %s %o failed, uid %d",
1801 			    vm->vm_ttyname, mode, getuid());
1802 
1803 			/* Ignore failure on read-only filesystems */
1804 			if (!((errno == EROFS) &&
1805 			    (st.st_uid == uid || st.st_uid == 0)))
1806 				goto fail;
1807 		}
1808 	}
1809 
1810 	return (0);
1811  fail:
1812 	vm_closetty(vm);
1813 	return (-1);
1814 }
1815 
1816 void
vm_closetty(struct vmd_vm * vm)1817 vm_closetty(struct vmd_vm *vm)
1818 {
1819 	if (vm->vm_tty != -1) {
1820 		/* Release and close the tty */
1821 		if (fchown(vm->vm_tty, 0, 0) == -1)
1822 			log_warn("chown %s 0 0 failed", vm->vm_ttyname);
1823 		if (fchmod(vm->vm_tty, 0666) == -1)
1824 			log_warn("chmod %s 0666 failed", vm->vm_ttyname);
1825 		close(vm->vm_tty);
1826 		vm->vm_tty = -1;
1827 	}
1828 	memset(&vm->vm_ttyname, 0, sizeof(vm->vm_ttyname));
1829 }
1830 
1831 void
switch_remove(struct vmd_switch * vsw)1832 switch_remove(struct vmd_switch *vsw)
1833 {
1834 	if (vsw == NULL)
1835 		return;
1836 
1837 	TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry);
1838 
1839 	free(vsw->sw_group);
1840 	free(vsw->sw_name);
1841 	free(vsw);
1842 }
1843 
1844 struct vmd_switch *
switch_getbyname(const char * name)1845 switch_getbyname(const char *name)
1846 {
1847 	struct vmd_switch	*vsw;
1848 
1849 	if (name == NULL)
1850 		return (NULL);
1851 	TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) {
1852 		if (strcmp(vsw->sw_name, name) == 0)
1853 			return (vsw);
1854 	}
1855 
1856 	return (NULL);
1857 }
1858 
1859 char *
get_string(uint8_t * ptr,size_t len)1860 get_string(uint8_t *ptr, size_t len)
1861 {
1862 	size_t	 i;
1863 
1864 	for (i = 0; i < len; i++)
1865 		if (!isprint((unsigned char)ptr[i]))
1866 			break;
1867 
1868 	return strndup(ptr, i);
1869 }
1870 
1871 uint32_t
prefixlen2mask(uint8_t prefixlen)1872 prefixlen2mask(uint8_t prefixlen)
1873 {
1874 	if (prefixlen == 0)
1875 		return (0);
1876 
1877 	if (prefixlen > 32)
1878 		prefixlen = 32;
1879 
1880 	return (htonl(0xffffffff << (32 - prefixlen)));
1881 }
1882 
1883 void
prefixlen2mask6(uint8_t prefixlen,struct in6_addr * mask)1884 prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask)
1885 {
1886 	struct in6_addr	 s6;
1887 	int		 i;
1888 
1889 	if (prefixlen > 128)
1890 		prefixlen = 128;
1891 
1892 	memset(&s6, 0, sizeof(s6));
1893 	for (i = 0; i < prefixlen / 8; i++)
1894 		s6.s6_addr[i] = 0xff;
1895 	i = prefixlen % 8;
1896 	if (i)
1897 		s6.s6_addr[prefixlen / 8] = 0xff00 >> i;
1898 
1899 	memcpy(mask, &s6, sizeof(s6));
1900 }
1901 
1902 void
getmonotime(struct timeval * tv)1903 getmonotime(struct timeval *tv)
1904 {
1905 	struct timespec	 ts;
1906 
1907 	if (clock_gettime(CLOCK_MONOTONIC, &ts))
1908 		fatal("clock_gettime");
1909 
1910 	TIMESPEC_TO_TIMEVAL(tv, &ts);
1911 }
1912 
1913 static inline void
vm_terminate(struct vmd_vm * vm,const char * caller)1914 vm_terminate(struct vmd_vm *vm, const char *caller)
1915 {
1916 	if (vm->vm_from_config)
1917 		vm_stop(vm, 0, caller);
1918 	else {
1919 		/* vm_remove calls vm_stop */
1920 		vm_remove(vm, caller);
1921 	}
1922 }
1923 
1924 /*
1925  * Utility function for closing vm file descriptors. Assumes an fd of -1 was
1926  * already closed or never opened.
1927  *
1928  * Returns 0 on success, otherwise -1 on failure.
1929  */
1930 int
close_fd(int fd)1931 close_fd(int fd)
1932 {
1933 	int	ret;
1934 
1935 	if (fd == -1)
1936 		return (0);
1937 
1938 #ifdef POSIX_CLOSE_RESTART
1939 	do { ret = close(fd); } while (ret == -1 && errno == EINTR);
1940 #else
1941 	ret = close(fd);
1942 #endif /* POSIX_CLOSE_RESTART */
1943 
1944 	if (ret == -1 && errno == EIO)
1945 		log_warn("%s(%d)", __func__, fd);
1946 
1947 	return (ret);
1948 }
1949