1 /* $OpenBSD: vmd.c,v 1.153 2024/01/18 14:49:59 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/wait.h> 22 #include <sys/stat.h> 23 #include <sys/sysctl.h> 24 #include <sys/tty.h> 25 #include <sys/ttycom.h> 26 #include <sys/ioctl.h> 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <termios.h> 32 #include <errno.h> 33 #include <event.h> 34 #include <fcntl.h> 35 #include <pwd.h> 36 #include <signal.h> 37 #include <syslog.h> 38 #include <unistd.h> 39 #include <util.h> 40 #include <ctype.h> 41 #include <grp.h> 42 43 #include <machine/specialreg.h> 44 #include <machine/vmmvar.h> 45 46 #include "proc.h" 47 #include "atomicio.h" 48 #include "vmd.h" 49 50 __dead void usage(void); 51 52 int main(int, char **); 53 int vmd_configure(void); 54 void vmd_sighdlr(int sig, short event, void *arg); 55 void vmd_shutdown(void); 56 int vmd_control_run(void); 57 int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 58 int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 59 int vmd_dispatch_agentx(int, struct privsep_proc *, struct imsg *); 60 int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); 61 int vmd_check_vmh(struct vm_dump_header *); 62 63 int vm_instance(struct privsep *, struct vmd_vm **, 64 struct vmop_create_params *, uid_t); 65 int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 66 int vm_claimid(const char *, int, uint32_t *); 67 void start_vm_batch(int, short, void*); 68 69 static inline void vm_terminate(struct vmd_vm *, const char *); 70 71 struct vmd *env; 72 73 static struct privsep_proc procs[] = { 74 /* Keep "priv" on top as procs[0] */ 75 { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, 76 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 77 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, 78 vmm_shutdown, "/" }, 79 { "agentx", PROC_AGENTX, vmd_dispatch_agentx, vm_agentx, 80 vm_agentx_shutdown, "/" } 81 }; 82 83 enum privsep_procid privsep_process; 84 85 struct event staggered_start_timer; 86 87 /* For the privileged process */ 88 static struct privsep_proc *proc_priv = &procs[0]; 89 static struct passwd proc_privpw; 90 static const uint8_t zero_mac[ETHER_ADDR_LEN]; 91 92 const char default_conffile[] = VMD_CONF; 93 const char *conffile = default_conffile; 94 95 int 96 vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 97 { 98 struct privsep *ps = p->p_ps; 99 int res = 0, ret = 0, cmd = 0, verbose; 100 int ifd; 101 unsigned int v = 0, flags; 102 struct vmop_create_params vmc; 103 struct vmop_id vid; 104 struct vmop_result vmr; 105 struct vm_dump_header vmh; 106 struct vmd_vm *vm = NULL; 107 char *str = NULL; 108 uint32_t id = 0; 109 struct control_sock *rcs; 110 111 switch (imsg->hdr.type) { 112 case IMSG_VMDOP_START_VM_REQUEST: 113 IMSG_SIZE_CHECK(imsg, &vmc); 114 memcpy(&vmc, imsg->data, sizeof(vmc)); 115 vmc.vmc_kernel = imsg_get_fd(imsg); 116 117 /* Try registering our VM in our list of known VMs. */ 118 if (vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid)) { 119 res = errno; 120 121 /* Did we have a failure during lookup of a parent? */ 122 if (vm == NULL) { 123 cmd = IMSG_VMDOP_START_VM_RESPONSE; 124 break; 125 } 126 127 /* Does the VM already exist? */ 128 if (res == EALREADY) { 129 /* Is it already running? */ 130 if (vm->vm_state & VM_STATE_RUNNING) { 131 cmd = IMSG_VMDOP_START_VM_RESPONSE; 132 break; 133 } 134 135 /* If not running, are our flags ok? */ 136 if (vmc.vmc_flags && 137 vmc.vmc_flags != VMOP_CREATE_KERNEL) { 138 cmd = IMSG_VMDOP_START_VM_RESPONSE; 139 break; 140 } 141 } 142 res = 0; 143 } 144 145 /* Try to start the launch of the VM. */ 146 res = config_setvm(ps, vm, imsg->hdr.peerid, 147 vm->vm_params.vmc_owner.uid); 148 if (res) 149 cmd = IMSG_VMDOP_START_VM_RESPONSE; 150 break; 151 case IMSG_VMDOP_WAIT_VM_REQUEST: 152 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 153 IMSG_SIZE_CHECK(imsg, &vid); 154 memcpy(&vid, imsg->data, sizeof(vid)); 155 flags = vid.vid_flags; 156 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 157 158 if ((id = vid.vid_id) == 0) { 159 /* Lookup vm (id) by name */ 160 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 161 res = ENOENT; 162 break; 163 } 164 id = vm->vm_vmid; 165 } else if ((vm = vm_getbyvmid(id)) == NULL) { 166 res = ENOENT; 167 break; 168 } 169 170 /* Validate curent state of vm */ 171 if ((vm->vm_state & VM_STATE_SHUTDOWN) && 172 (flags & VMOP_FORCE) == 0) { 173 res = EALREADY; 174 break; 175 } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 176 res = EINVAL; 177 break; 178 } else if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { 179 res = EPERM; 180 break; 181 } 182 183 /* Only relay TERMINATION requests, not WAIT requests */ 184 if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { 185 memset(&vid, 0, sizeof(vid)); 186 vid.vid_id = id; 187 vid.vid_flags = flags; 188 189 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 190 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 191 return (-1); 192 } 193 break; 194 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 195 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 196 break; 197 case IMSG_VMDOP_LOAD: 198 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 199 str = get_string((uint8_t *)imsg->data, 200 IMSG_DATA_SIZE(imsg)); 201 case IMSG_VMDOP_RELOAD: 202 if (vmd_reload(0, str) == -1) 203 cmd = IMSG_CTL_FAIL; 204 else 205 cmd = IMSG_CTL_OK; 206 free(str); 207 break; 208 case IMSG_CTL_RESET: 209 IMSG_SIZE_CHECK(imsg, &v); 210 memcpy(&v, imsg->data, sizeof(v)); 211 if (vmd_reload(v, NULL) == -1) 212 cmd = IMSG_CTL_FAIL; 213 else 214 cmd = IMSG_CTL_OK; 215 break; 216 case IMSG_CTL_VERBOSE: 217 IMSG_SIZE_CHECK(imsg, &verbose); 218 memcpy(&verbose, imsg->data, sizeof(verbose)); 219 log_setverbose(verbose); 220 221 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 222 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 223 cmd = IMSG_CTL_OK; 224 break; 225 case IMSG_VMDOP_PAUSE_VM: 226 case IMSG_VMDOP_UNPAUSE_VM: 227 IMSG_SIZE_CHECK(imsg, &vid); 228 memcpy(&vid, imsg->data, sizeof(vid)); 229 if (vid.vid_id == 0) { 230 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 231 res = ENOENT; 232 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 233 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 234 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 235 break; 236 } else { 237 vid.vid_id = vm->vm_vmid; 238 } 239 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 240 res = ENOENT; 241 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 242 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 243 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 244 break; 245 } 246 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 247 vid.vid_uid) != 0) { 248 res = EPERM; 249 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 250 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 251 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 252 break; 253 } 254 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 255 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 256 break; 257 case IMSG_VMDOP_SEND_VM_REQUEST: 258 IMSG_SIZE_CHECK(imsg, &vid); 259 memcpy(&vid, imsg->data, sizeof(vid)); 260 id = vid.vid_id; 261 ifd = imsg_get_fd(imsg); 262 if (vid.vid_id == 0) { 263 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 264 res = ENOENT; 265 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 266 close(ifd); 267 break; 268 } else { 269 vid.vid_id = vm->vm_vmid; 270 } 271 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 272 res = ENOENT; 273 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 274 close(ifd); 275 break; 276 } 277 vmr.vmr_id = vid.vid_id; 278 log_debug("%s: sending fd to vmm", __func__); 279 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 280 imsg->hdr.peerid, ifd, &vid, sizeof(vid)); 281 break; 282 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 283 IMSG_SIZE_CHECK(imsg, &vid); 284 memcpy(&vid, imsg->data, sizeof(vid)); 285 ifd = imsg_get_fd(imsg); 286 if (ifd == -1) { 287 log_warnx("%s: invalid fd", __func__); 288 return (-1); 289 } 290 if (atomicio(read, ifd, &vmh, sizeof(vmh)) != sizeof(vmh)) { 291 log_warnx("%s: error reading vmh from received vm", 292 __func__); 293 res = EIO; 294 close(ifd); 295 cmd = IMSG_VMDOP_START_VM_RESPONSE; 296 break; 297 } 298 299 if (vmd_check_vmh(&vmh)) { 300 res = ENOENT; 301 close(ifd); 302 cmd = IMSG_VMDOP_START_VM_RESPONSE; 303 break; 304 } 305 if (atomicio(read, ifd, &vmc, sizeof(vmc)) != sizeof(vmc)) { 306 log_warnx("%s: error reading vmc from received vm", 307 __func__); 308 res = EIO; 309 close(ifd); 310 cmd = IMSG_VMDOP_START_VM_RESPONSE; 311 break; 312 } 313 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 314 sizeof(vmc.vmc_params.vcp_name)); 315 vmc.vmc_params.vcp_id = 0; 316 317 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 318 if (ret != 0) { 319 res = errno; 320 cmd = IMSG_VMDOP_START_VM_RESPONSE; 321 close(ifd); 322 } else { 323 vm->vm_state |= VM_STATE_RECEIVED; 324 config_setvm(ps, vm, imsg->hdr.peerid, 325 vmc.vmc_owner.uid); 326 log_debug("%s: sending fd to vmm", __func__); 327 proc_compose_imsg(ps, PROC_VMM, -1, 328 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, ifd, 329 NULL, 0); 330 } 331 break; 332 case IMSG_VMDOP_DONE: 333 control_reset(&ps->ps_csock); 334 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 335 control_reset(rcs); 336 cmd = 0; 337 break; 338 default: 339 return (-1); 340 } 341 342 switch (cmd) { 343 case 0: 344 break; 345 case IMSG_VMDOP_START_VM_RESPONSE: 346 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 347 memset(&vmr, 0, sizeof(vmr)); 348 vmr.vmr_result = res; 349 vmr.vmr_id = id; 350 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 351 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 352 return (-1); 353 break; 354 default: 355 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 356 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 357 return (-1); 358 break; 359 } 360 361 return (0); 362 } 363 364 int 365 vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 366 { 367 struct vmop_result vmr; 368 struct privsep *ps = p->p_ps; 369 int res = 0; 370 struct vmd_vm *vm; 371 struct vm_create_params *vcp; 372 struct vmop_info_result vir; 373 374 switch (imsg->hdr.type) { 375 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 376 IMSG_SIZE_CHECK(imsg, &vmr); 377 memcpy(&vmr, imsg->data, sizeof(vmr)); 378 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 379 break; 380 proc_compose_imsg(ps, PROC_CONTROL, -1, 381 imsg->hdr.type, imsg->hdr.peerid, -1, 382 imsg->data, sizeof(imsg->data)); 383 log_info("%s: paused vm %d successfully", 384 vm->vm_params.vmc_params.vcp_name, 385 vm->vm_vmid); 386 vm->vm_state |= VM_STATE_PAUSED; 387 break; 388 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 389 IMSG_SIZE_CHECK(imsg, &vmr); 390 memcpy(&vmr, imsg->data, sizeof(vmr)); 391 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 392 break; 393 proc_compose_imsg(ps, PROC_CONTROL, -1, 394 imsg->hdr.type, imsg->hdr.peerid, -1, 395 imsg->data, sizeof(imsg->data)); 396 log_info("%s: unpaused vm %d successfully.", 397 vm->vm_params.vmc_params.vcp_name, 398 vm->vm_vmid); 399 vm->vm_state &= ~VM_STATE_PAUSED; 400 break; 401 case IMSG_VMDOP_START_VM_RESPONSE: 402 IMSG_SIZE_CHECK(imsg, &vmr); 403 memcpy(&vmr, imsg->data, sizeof(vmr)); 404 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 405 break; 406 vm->vm_pid = vmr.vmr_pid; 407 vcp = &vm->vm_params.vmc_params; 408 vcp->vcp_id = vmr.vmr_id; 409 410 /* 411 * If the peerid is not -1, forward the response back to the 412 * the control socket. If it is -1, the request originated 413 * from the parent, not the control socket. 414 */ 415 if (vm->vm_peerid != (uint32_t)-1) { 416 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 417 sizeof(vmr.vmr_ttyname)); 418 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 419 imsg->hdr.type, vm->vm_peerid, -1, 420 &vmr, sizeof(vmr)) == -1) { 421 errno = vmr.vmr_result; 422 log_warn("%s: failed to forward vm result", 423 vcp->vcp_name); 424 vm_terminate(vm, __func__); 425 return (-1); 426 } 427 } 428 429 if (vmr.vmr_result) { 430 log_warnx("%s: failed to start vm", vcp->vcp_name); 431 vm_terminate(vm, __func__); 432 errno = vmr.vmr_result; 433 break; 434 } 435 436 /* Now configure all the interfaces */ 437 if (vm_priv_ifconfig(ps, vm) == -1) { 438 log_warn("%s: failed to configure vm", vcp->vcp_name); 439 vm_terminate(vm, __func__); 440 break; 441 } 442 443 log_info("started %s (vm %d) successfully, tty %s", 444 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 445 break; 446 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 447 IMSG_SIZE_CHECK(imsg, &vmr); 448 memcpy(&vmr, imsg->data, sizeof(vmr)); 449 450 if (vmr.vmr_result) { 451 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 452 __func__, vmr.vmr_id); 453 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 454 } else { 455 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 456 break; 457 /* Mark VM as shutting down */ 458 vm->vm_state |= VM_STATE_SHUTDOWN; 459 } 460 break; 461 case IMSG_VMDOP_SEND_VM_RESPONSE: 462 IMSG_SIZE_CHECK(imsg, &vmr); 463 memcpy(&vmr, imsg->data, sizeof(vmr)); 464 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 465 break; 466 if (!vmr.vmr_result) { 467 log_info("%s: sent vm %d successfully.", 468 vm->vm_params.vmc_params.vcp_name, 469 vm->vm_vmid); 470 vm_terminate(vm, __func__); 471 } 472 473 /* Send a response if a control client is waiting for it */ 474 if (imsg->hdr.peerid != (uint32_t)-1) { 475 /* the error is meaningless for deferred responses */ 476 vmr.vmr_result = 0; 477 478 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 479 IMSG_VMDOP_SEND_VM_RESPONSE, 480 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 481 return (-1); 482 } 483 break; 484 case IMSG_VMDOP_TERMINATE_VM_EVENT: 485 IMSG_SIZE_CHECK(imsg, &vmr); 486 memcpy(&vmr, imsg->data, sizeof(vmr)); 487 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 488 __func__, vmr.vmr_id, vmr.vmr_result); 489 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 490 log_debug("%s: vm %d is no longer available", 491 __func__, vmr.vmr_id); 492 break; 493 } 494 if (vmr.vmr_result != EAGAIN || 495 vm->vm_params.vmc_bootdevice) { 496 vm_terminate(vm, __func__); 497 } else { 498 /* Stop VM instance but keep the tty open */ 499 vm_stop(vm, 1, __func__); 500 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 501 } 502 503 /* The error is meaningless for deferred responses */ 504 vmr.vmr_result = 0; 505 506 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 507 IMSG_VMDOP_TERMINATE_VM_EVENT, 508 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 509 return (-1); 510 break; 511 case IMSG_VMDOP_GET_INFO_VM_DATA: 512 IMSG_SIZE_CHECK(imsg, &vir); 513 memcpy(&vir, imsg->data, sizeof(vir)); 514 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 515 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 516 if (vm->vm_ttyname[0] != '\0') 517 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 518 sizeof(vir.vir_ttyname)); 519 log_debug("%s: running vm: %d, vm_state: 0x%x", 520 __func__, vm->vm_vmid, vm->vm_state); 521 vir.vir_state = vm->vm_state; 522 /* get the user id who started the vm */ 523 vir.vir_uid = vm->vm_uid; 524 vir.vir_gid = vm->vm_params.vmc_owner.gid; 525 } 526 if (proc_compose_imsg(ps, 527 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 528 PROC_AGENTX : PROC_CONTROL, -1, imsg->hdr.type, 529 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 530 log_debug("%s: GET_INFO_VM failed for vm %d, removing", 531 __func__, vm->vm_vmid); 532 vm_terminate(vm, __func__); 533 return (-1); 534 } 535 break; 536 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 537 /* 538 * PROC_VMM has responded with the *running* VMs, now we 539 * append the others. These use the special value 0 for their 540 * kernel id to indicate that they are not running. 541 */ 542 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 543 if (!(vm->vm_state & VM_STATE_RUNNING)) { 544 memset(&vir, 0, sizeof(vir)); 545 vir.vir_info.vir_id = vm->vm_vmid; 546 strlcpy(vir.vir_info.vir_name, 547 vm->vm_params.vmc_params.vcp_name, 548 VMM_MAX_NAME_LEN); 549 vir.vir_info.vir_memory_size = 550 vm->vm_params.vmc_params. 551 vcp_memranges[0].vmr_size; 552 vir.vir_info.vir_ncpus = 553 vm->vm_params.vmc_params.vcp_ncpus; 554 /* get the configured user id for this vm */ 555 vir.vir_uid = vm->vm_params.vmc_owner.uid; 556 vir.vir_gid = vm->vm_params.vmc_owner.gid; 557 log_debug("%s: vm: %d, vm_state: 0x%x", 558 __func__, vm->vm_vmid, vm->vm_state); 559 vir.vir_state = vm->vm_state; 560 if (proc_compose_imsg(ps, 561 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 562 PROC_AGENTX : PROC_CONTROL, -1, 563 IMSG_VMDOP_GET_INFO_VM_DATA, 564 imsg->hdr.peerid, -1, &vir, 565 sizeof(vir)) == -1) { 566 log_debug("%s: GET_INFO_VM_END failed", 567 __func__); 568 vm_terminate(vm, __func__); 569 return (-1); 570 } 571 } 572 } 573 IMSG_SIZE_CHECK(imsg, &res); 574 proc_forward_imsg(ps, imsg, 575 imsg->hdr.peerid == IMSG_AGENTX_PEERID ? 576 PROC_AGENTX : PROC_CONTROL, -1); 577 break; 578 default: 579 return (-1); 580 } 581 582 return (0); 583 } 584 585 int 586 vmd_dispatch_agentx(int fd, struct privsep_proc *p, struct imsg *imsg) 587 { 588 struct privsep *ps = p->p_ps; 589 590 switch (imsg->hdr.type) { 591 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 592 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 593 return (0); 594 default: 595 break; 596 } 597 return (-1); 598 } 599 600 int 601 vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) 602 { 603 struct vmop_addr_result var; 604 605 switch (imsg->hdr.type) { 606 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 607 IMSG_SIZE_CHECK(imsg, &var); 608 memcpy(&var, imsg->data, sizeof(var)); 609 proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); 610 break; 611 default: 612 return (-1); 613 } 614 615 return (0); 616 } 617 618 int 619 vmd_check_vmh(struct vm_dump_header *vmh) 620 { 621 int i; 622 unsigned int code, leaf; 623 unsigned int a, b, c, d; 624 625 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 626 log_warnx("%s: incompatible dump signature", __func__); 627 return (-1); 628 } 629 630 if (vmh->vmh_version != VM_DUMP_VERSION) { 631 log_warnx("%s: incompatible dump version", __func__); 632 return (-1); 633 } 634 635 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 636 code = vmh->vmh_cpuids[i].code; 637 leaf = vmh->vmh_cpuids[i].leaf; 638 if (leaf != 0x00) { 639 log_debug("%s: invalid leaf 0x%x for code 0x%x", 640 __func__, leaf, code); 641 return (-1); 642 } 643 644 switch (code) { 645 case 0x00: 646 CPUID_LEAF(code, leaf, a, b, c, d); 647 if (vmh->vmh_cpuids[i].a > a) { 648 log_debug("%s: incompatible cpuid level", 649 __func__); 650 return (-1); 651 } 652 if (!(vmh->vmh_cpuids[i].b == b && 653 vmh->vmh_cpuids[i].c == c && 654 vmh->vmh_cpuids[i].d == d)) { 655 log_debug("%s: incompatible cpu brand", 656 __func__); 657 return (-1); 658 } 659 break; 660 661 case 0x01: 662 CPUID_LEAF(code, leaf, a, b, c, d); 663 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 664 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 665 log_debug("%s: incompatible cpu features " 666 "code: 0x%x leaf: 0x%x reg: c", __func__, 667 code, leaf); 668 return (-1); 669 } 670 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 671 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 672 log_debug("%s: incompatible cpu features " 673 "code: 0x%x leaf: 0x%x reg: d", __func__, 674 code, leaf); 675 return (-1); 676 } 677 break; 678 679 case 0x07: 680 CPUID_LEAF(code, leaf, a, b, c, d); 681 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 682 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 683 log_debug("%s: incompatible cpu features " 684 "code: 0x%x leaf: 0x%x reg: c", __func__, 685 code, leaf); 686 return (-1); 687 } 688 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 689 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 690 log_debug("%s: incompatible cpu features " 691 "code: 0x%x leaf: 0x%x reg: d", __func__, 692 code, leaf); 693 return (-1); 694 } 695 break; 696 697 case 0x0d: 698 CPUID_LEAF(code, leaf, a, b, c, d); 699 if (vmh->vmh_cpuids[i].b > b) { 700 log_debug("%s: incompatible cpu: insufficient " 701 "max save area for enabled XCR0 features", 702 __func__); 703 return (-1); 704 } 705 if (vmh->vmh_cpuids[i].c > c) { 706 log_debug("%s: incompatible cpu: insufficient " 707 "max save area for supported XCR0 features", 708 __func__); 709 return (-1); 710 } 711 break; 712 713 case 0x80000001: 714 CPUID_LEAF(code, leaf, a, b, c, d); 715 if ((vmh->vmh_cpuids[i].a & a) != 716 vmh->vmh_cpuids[i].a) { 717 log_debug("%s: incompatible cpu features " 718 "code: 0x%x leaf: 0x%x reg: a", __func__, 719 code, leaf); 720 return (-1); 721 } 722 if ((vmh->vmh_cpuids[i].c & c) != 723 vmh->vmh_cpuids[i].c) { 724 log_debug("%s: incompatible cpu features " 725 "code: 0x%x leaf: 0x%x reg: c", __func__, 726 code, leaf); 727 return (-1); 728 } 729 if ((vmh->vmh_cpuids[i].d & d) != 730 vmh->vmh_cpuids[i].d) { 731 log_debug("%s: incompatible cpu features " 732 "code: 0x%x leaf: 0x%x reg: d", __func__, 733 code, leaf); 734 return (-1); 735 } 736 break; 737 738 default: 739 log_debug("%s: unknown code 0x%x", __func__, code); 740 return (-1); 741 } 742 } 743 744 return (0); 745 } 746 747 void 748 vmd_sighdlr(int sig, short event, void *arg) 749 { 750 if (privsep_process != PROC_PARENT) 751 return; 752 log_debug("%s: handling signal", __func__); 753 754 switch (sig) { 755 case SIGHUP: 756 log_info("%s: reload requested with SIGHUP", __func__); 757 758 /* 759 * This is safe because libevent uses async signal handlers 760 * that run in the event loop and not in signal context. 761 */ 762 (void)vmd_reload(0, NULL); 763 break; 764 case SIGPIPE: 765 log_info("%s: ignoring SIGPIPE", __func__); 766 break; 767 case SIGUSR1: 768 log_info("%s: ignoring SIGUSR1", __func__); 769 break; 770 case SIGTERM: 771 case SIGINT: 772 vmd_shutdown(); 773 break; 774 default: 775 fatalx("unexpected signal"); 776 } 777 } 778 779 __dead void 780 usage(void) 781 { 782 extern char *__progname; 783 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 784 __progname); 785 exit(1); 786 } 787 788 int 789 main(int argc, char **argv) 790 { 791 struct privsep *ps; 792 int ch; 793 enum privsep_procid proc_id = PROC_PARENT; 794 int proc_instance = 0, vm_launch = 0; 795 int vmm_fd = -1, vm_fd = -1; 796 const char *errp, *title = NULL; 797 int argc0 = argc; 798 char dev_type = '\0'; 799 800 log_init(0, LOG_DAEMON); 801 802 if ((env = calloc(1, sizeof(*env))) == NULL) 803 fatal("calloc: env"); 804 805 while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:vp:")) != -1) { 806 switch (ch) { 807 case 'D': 808 if (cmdline_symset(optarg) < 0) 809 log_warnx("could not parse macro definition %s", 810 optarg); 811 break; 812 case 'd': 813 env->vmd_debug = 2; 814 break; 815 case 'f': 816 conffile = optarg; 817 break; 818 case 'v': 819 env->vmd_verbose++; 820 break; 821 /* vmd fork/exec */ 822 case 'n': 823 env->vmd_noaction = 1; 824 break; 825 case 'P': 826 title = optarg; 827 proc_id = proc_getid(procs, nitems(procs), title); 828 if (proc_id == PROC_MAX) 829 fatalx("invalid process name"); 830 break; 831 case 'I': 832 proc_instance = strtonum(optarg, 0, 833 PROC_MAX_INSTANCES, &errp); 834 if (errp) 835 fatalx("invalid process instance"); 836 break; 837 /* child vm and device fork/exec */ 838 case 'p': 839 title = optarg; 840 break; 841 case 'V': 842 vm_launch = VMD_LAUNCH_VM; 843 vm_fd = strtonum(optarg, 0, 128, &errp); 844 if (errp) 845 fatalx("invalid vm fd"); 846 break; 847 case 'X': 848 vm_launch = VMD_LAUNCH_DEV; 849 vm_fd = strtonum(optarg, 0, 128, &errp); 850 if (errp) 851 fatalx("invalid device fd"); 852 break; 853 case 't': 854 dev_type = *optarg; 855 switch (dev_type) { 856 case VMD_DEVTYPE_NET: 857 case VMD_DEVTYPE_DISK: 858 break; 859 default: fatalx("invalid device type"); 860 } 861 break; 862 case 'i': 863 vmm_fd = strtonum(optarg, 0, 128, &errp); 864 if (errp) 865 fatalx("invalid vmm fd"); 866 break; 867 default: 868 usage(); 869 } 870 } 871 872 argc -= optind; 873 if (argc > 0) 874 usage(); 875 876 if (env->vmd_noaction && !env->vmd_debug) 877 env->vmd_debug = 1; 878 879 log_init(env->vmd_debug, LOG_DAEMON); 880 log_setverbose(env->vmd_verbose); 881 882 /* Re-exec from the vmm child process requires an absolute path. */ 883 if (proc_id == PROC_PARENT && *argv[0] != '/' && !env->vmd_noaction) 884 fatalx("re-exec requires execution with an absolute path"); 885 env->argv0 = argv[0]; 886 887 /* check for root privileges */ 888 if (env->vmd_noaction == 0 && !vm_launch) { 889 if (geteuid()) 890 fatalx("need root privileges"); 891 } 892 893 ps = &env->vmd_ps; 894 ps->ps_env = env; 895 env->vmd_fd = vmm_fd; 896 897 if (config_init(env) == -1) 898 fatal("failed to initialize configuration"); 899 900 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 901 fatal("unknown user %s", VMD_USER); 902 903 /* First proc runs as root without pledge but in default chroot */ 904 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 905 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 906 907 /* 908 * If we're launching a new vm or its device, we short out here. 909 */ 910 if (vm_launch == VMD_LAUNCH_VM) { 911 vm_main(vm_fd, vmm_fd); 912 /* NOTREACHED */ 913 } else if (vm_launch == VMD_LAUNCH_DEV) { 914 if (dev_type == VMD_DEVTYPE_NET) { 915 log_procinit("vm/%s/vionet", title); 916 vionet_main(vm_fd, vmm_fd); 917 /* NOTREACHED */ 918 } else if (dev_type == VMD_DEVTYPE_DISK) { 919 log_procinit("vm/%s/vioblk", title); 920 vioblk_main(vm_fd, vmm_fd); 921 /* NOTREACHED */ 922 } 923 fatalx("unsupported device type '%c'", dev_type); 924 } 925 926 /* Open /dev/vmm early. */ 927 if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { 928 env->vmd_fd = open(VMM_NODE, O_RDWR); 929 if (env->vmd_fd == -1) 930 fatal("%s", VMM_NODE); 931 } 932 933 /* Configure the control socket */ 934 ps->ps_csock.cs_name = SOCKET_NAME; 935 TAILQ_INIT(&ps->ps_rcsocks); 936 937 /* Configuration will be parsed after forking the children */ 938 env->vmd_conffile = conffile; 939 940 if (env->vmd_noaction) 941 ps->ps_noaction = 1; 942 ps->ps_instance = proc_instance; 943 if (title != NULL) 944 ps->ps_title[proc_id] = title; 945 946 /* only the parent returns */ 947 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 948 proc_id); 949 950 if (!env->vmd_debug && daemon(0, 0) == -1) 951 fatal("can't daemonize"); 952 953 if (ps->ps_noaction == 0) 954 log_info("startup"); 955 956 event_init(); 957 958 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 959 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 960 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 961 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 962 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 963 964 signal_add(&ps->ps_evsigint, NULL); 965 signal_add(&ps->ps_evsigterm, NULL); 966 signal_add(&ps->ps_evsighup, NULL); 967 signal_add(&ps->ps_evsigpipe, NULL); 968 signal_add(&ps->ps_evsigusr1, NULL); 969 970 if (!env->vmd_noaction) 971 proc_connect(ps); 972 973 if (vmd_configure() == -1) 974 fatalx("configuration failed"); 975 976 event_dispatch(); 977 978 log_debug("exiting"); 979 980 return (0); 981 } 982 983 void 984 start_vm_batch(int fd, short type, void *args) 985 { 986 int i = 0; 987 struct vmd_vm *vm; 988 989 log_debug("%s: starting batch of %d vms", __func__, 990 env->vmd_cfg.parallelism); 991 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 992 if (!(vm->vm_state & VM_STATE_WAITING)) { 993 log_debug("%s: not starting vm %s (disabled)", 994 __func__, 995 vm->vm_params.vmc_params.vcp_name); 996 continue; 997 } 998 i++; 999 if (i > env->vmd_cfg.parallelism) { 1000 evtimer_add(&staggered_start_timer, 1001 &env->vmd_cfg.delay); 1002 break; 1003 } 1004 vm->vm_state &= ~VM_STATE_WAITING; 1005 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 1006 } 1007 log_debug("%s: done starting vms", __func__); 1008 } 1009 1010 int 1011 vmd_configure(void) 1012 { 1013 int ncpus; 1014 struct vmd_switch *vsw; 1015 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 1016 size_t ncpus_sz = sizeof(ncpus); 1017 1018 if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) 1019 fatal("open %s", PATH_PTMDEV); 1020 1021 /* 1022 * pledge in the parent process: 1023 * stdio - for malloc and basic I/O including events. 1024 * rpath - for reload to open and read the configuration files. 1025 * wpath - for opening disk images and tap devices. 1026 * tty - for openpty and TIOCUCNTL. 1027 * proc - run kill to terminate its children safely. 1028 * sendfd - for disks, interfaces and other fds. 1029 * recvfd - for send and receive. 1030 * getpw - lookup user or group id by name. 1031 * chown, fattr - change tty ownership 1032 * flock - locking disk files 1033 */ 1034 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 1035 " chown fattr flock", NULL) == -1) 1036 fatal("pledge"); 1037 1038 if (parse_config(env->vmd_conffile) == -1) { 1039 proc_kill(&env->vmd_ps); 1040 exit(1); 1041 } 1042 1043 if (env->vmd_noaction) { 1044 fprintf(stderr, "configuration OK\n"); 1045 proc_kill(&env->vmd_ps); 1046 exit(0); 1047 } 1048 1049 /* Send VMM device fd to vmm proc. */ 1050 proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1, 1051 IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0); 1052 1053 /* Send shared global configuration to all children */ 1054 if (config_setconfig(env) == -1) 1055 return (-1); 1056 1057 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1058 if (vsw->sw_running) 1059 continue; 1060 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1061 log_warn("%s: failed to create switch %s", 1062 __func__, vsw->sw_name); 1063 switch_remove(vsw); 1064 return (-1); 1065 } 1066 } 1067 1068 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 1069 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 1070 if (sysctl(ncpu_mib, nitems(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 1071 ncpus = 1; 1072 env->vmd_cfg.parallelism = ncpus; 1073 log_debug("%s: setting staggered start configuration to " 1074 "parallelism: %d and delay: %lld", 1075 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 1076 } 1077 1078 log_debug("%s: starting vms in staggered fashion", __func__); 1079 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1080 /* start first batch */ 1081 start_vm_batch(0, 0, NULL); 1082 1083 return (0); 1084 } 1085 1086 int 1087 vmd_reload(unsigned int reset, const char *filename) 1088 { 1089 struct vmd_vm *vm, *next_vm; 1090 struct vmd_switch *vsw; 1091 int reload = 0; 1092 1093 /* Switch back to the default config file */ 1094 if (filename == NULL || *filename == '\0') { 1095 filename = env->vmd_conffile; 1096 reload = 1; 1097 } 1098 1099 log_debug("%s: level %d config file %s", __func__, reset, filename); 1100 1101 if (reset) { 1102 /* Purge the configuration */ 1103 config_purge(env, reset); 1104 config_setreset(env, reset); 1105 } else { 1106 /* 1107 * Load or reload the configuration. 1108 * 1109 * Reloading removes all non-running VMs before processing the 1110 * config file, whereas loading only adds to the existing list 1111 * of VMs. 1112 */ 1113 1114 if (reload) { 1115 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 1116 next_vm) { 1117 if (!(vm->vm_state & VM_STATE_RUNNING)) { 1118 DPRINTF("%s: calling vm_remove", 1119 __func__); 1120 vm_remove(vm, __func__); 1121 } 1122 } 1123 } 1124 1125 if (parse_config(filename) == -1) { 1126 log_debug("%s: failed to load config file %s", 1127 __func__, filename); 1128 return (-1); 1129 } 1130 1131 if (reload) { 1132 /* Update shared global configuration in all children */ 1133 if (config_setconfig(env) == -1) 1134 return (-1); 1135 } 1136 1137 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1138 if (vsw->sw_running) 1139 continue; 1140 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1141 log_warn("%s: failed to create switch %s", 1142 __func__, vsw->sw_name); 1143 switch_remove(vsw); 1144 return (-1); 1145 } 1146 } 1147 1148 log_debug("%s: starting vms in staggered fashion", __func__); 1149 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1150 /* start first batch */ 1151 start_vm_batch(0, 0, NULL); 1152 1153 } 1154 1155 return (0); 1156 } 1157 1158 void 1159 vmd_shutdown(void) 1160 { 1161 struct vmd_vm *vm, *vm_next; 1162 1163 log_debug("%s: performing shutdown", __func__); 1164 1165 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1166 vm_remove(vm, __func__); 1167 } 1168 1169 proc_kill(&env->vmd_ps); 1170 free(env); 1171 1172 log_warnx("terminating"); 1173 exit(0); 1174 } 1175 1176 struct vmd_vm * 1177 vm_getbyvmid(uint32_t vmid) 1178 { 1179 struct vmd_vm *vm; 1180 1181 if (vmid == 0) 1182 return (NULL); 1183 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1184 if (vm->vm_vmid == vmid) 1185 return (vm); 1186 } 1187 1188 return (NULL); 1189 } 1190 1191 struct vmd_vm * 1192 vm_getbyid(uint32_t id) 1193 { 1194 struct vmd_vm *vm; 1195 1196 if (id == 0) 1197 return (NULL); 1198 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1199 if (vm->vm_params.vmc_params.vcp_id == id) 1200 return (vm); 1201 } 1202 1203 return (NULL); 1204 } 1205 1206 uint32_t 1207 vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1208 { 1209 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1210 return (0); 1211 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1212 id, vm->vm_vmid); 1213 return (vm->vm_vmid); 1214 } 1215 1216 uint32_t 1217 vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1218 { 1219 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1220 return (0); 1221 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1222 vmid, vm->vm_params.vmc_params.vcp_id); 1223 return (vm->vm_params.vmc_params.vcp_id); 1224 } 1225 1226 struct vmd_vm * 1227 vm_getbyname(const char *name) 1228 { 1229 struct vmd_vm *vm; 1230 1231 if (name == NULL) 1232 return (NULL); 1233 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1234 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1235 return (vm); 1236 } 1237 1238 return (NULL); 1239 } 1240 1241 struct vmd_vm * 1242 vm_getbypid(pid_t pid) 1243 { 1244 struct vmd_vm *vm; 1245 1246 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1247 if (vm->vm_pid == pid) 1248 return (vm); 1249 } 1250 1251 return (NULL); 1252 } 1253 1254 void 1255 vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1256 { 1257 struct privsep *ps = &env->vmd_ps; 1258 unsigned int i, j; 1259 1260 if (vm == NULL) 1261 return; 1262 1263 log_debug("%s: %s %s stopping vm %d%s", 1264 __func__, ps->ps_title[privsep_process], caller, 1265 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1266 1267 vm->vm_state &= ~(VM_STATE_RECEIVED | VM_STATE_RUNNING 1268 | VM_STATE_SHUTDOWN); 1269 1270 if (vm->vm_iev.ibuf.fd != -1) { 1271 event_del(&vm->vm_iev.ev); 1272 close(vm->vm_iev.ibuf.fd); 1273 } 1274 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) { 1275 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1276 if (vm->vm_disks[i][j] != -1) { 1277 close(vm->vm_disks[i][j]); 1278 vm->vm_disks[i][j] = -1; 1279 } 1280 } 1281 } 1282 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) { 1283 if (vm->vm_ifs[i].vif_fd != -1) { 1284 close(vm->vm_ifs[i].vif_fd); 1285 vm->vm_ifs[i].vif_fd = -1; 1286 } 1287 free(vm->vm_ifs[i].vif_name); 1288 free(vm->vm_ifs[i].vif_switch); 1289 free(vm->vm_ifs[i].vif_group); 1290 vm->vm_ifs[i].vif_name = NULL; 1291 vm->vm_ifs[i].vif_switch = NULL; 1292 vm->vm_ifs[i].vif_group = NULL; 1293 } 1294 if (vm->vm_kernel != -1) { 1295 close(vm->vm_kernel); 1296 vm->vm_kernel = -1; 1297 } 1298 if (vm->vm_cdrom != -1) { 1299 close(vm->vm_cdrom); 1300 vm->vm_cdrom = -1; 1301 } 1302 if (!keeptty) { 1303 vm_closetty(vm); 1304 vm->vm_uid = 0; 1305 } 1306 } 1307 1308 void 1309 vm_remove(struct vmd_vm *vm, const char *caller) 1310 { 1311 struct privsep *ps = &env->vmd_ps; 1312 1313 if (vm == NULL) 1314 return; 1315 1316 log_debug("%s: %s %s removing vm %d from running config", 1317 __func__, ps->ps_title[privsep_process], caller, 1318 vm->vm_vmid); 1319 1320 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1321 1322 vm_stop(vm, 0, caller); 1323 if (vm->vm_kernel_path != NULL && !vm->vm_from_config) 1324 free(vm->vm_kernel_path); 1325 free(vm); 1326 } 1327 1328 int 1329 vm_claimid(const char *name, int uid, uint32_t *id) 1330 { 1331 struct name2id *n2i = NULL; 1332 1333 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1334 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1335 goto out; 1336 1337 if (++env->vmd_nvm == 0) { 1338 log_warnx("too many vms"); 1339 return (-1); 1340 } 1341 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1342 log_warnx("could not alloc vm name"); 1343 return (-1); 1344 } 1345 n2i->id = env->vmd_nvm; 1346 n2i->uid = uid; 1347 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1348 log_warnx("vm name too long"); 1349 free(n2i); 1350 return (-1); 1351 } 1352 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1353 1354 out: 1355 *id = n2i->id; 1356 return (0); 1357 } 1358 1359 int 1360 vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1361 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1362 { 1363 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1364 struct vm_create_params *vcp = &vmc->vmc_params; 1365 struct vmop_owner *vmo = NULL; 1366 uint32_t nid, rng; 1367 unsigned int i, j; 1368 struct vmd_switch *sw; 1369 char *s; 1370 int ret = 0; 1371 1372 /* Check if this is an instance of another VM */ 1373 if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { 1374 errno = ret; /* XXX might set invalid errno */ 1375 return (-1); 1376 } 1377 1378 errno = 0; 1379 *ret_vm = NULL; 1380 1381 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1382 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1383 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1384 uid) != 0) { 1385 errno = EPERM; 1386 goto fail; 1387 } 1388 vm->vm_kernel = vmc->vmc_kernel; 1389 *ret_vm = vm; 1390 errno = EALREADY; 1391 goto fail; 1392 } 1393 1394 if (vm_parent != NULL) 1395 vmo = &vm_parent->vm_params.vmc_insowner; 1396 1397 /* non-root users can only start existing VMs or instances */ 1398 if (vm_checkperm(NULL, vmo, uid) != 0) { 1399 log_warnx("permission denied"); 1400 errno = EPERM; 1401 goto fail; 1402 } 1403 if (vmc->vmc_flags == 0) { 1404 log_warnx("invalid configuration, no devices"); 1405 errno = VMD_DISK_MISSING; 1406 goto fail; 1407 } 1408 if (vcp->vcp_ncpus == 0) 1409 vcp->vcp_ncpus = 1; 1410 if (vcp->vcp_memranges[0].vmr_size == 0) 1411 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1412 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1413 log_warnx("invalid number of CPUs"); 1414 goto fail; 1415 } else if (vmc->vmc_ndisks > VM_MAX_DISKS_PER_VM) { 1416 log_warnx("invalid number of disks"); 1417 goto fail; 1418 } else if (vmc->vmc_nnics > VM_MAX_NICS_PER_VM) { 1419 log_warnx("invalid number of interfaces"); 1420 goto fail; 1421 } else if (vmc->vmc_kernel == -1 && vmc->vmc_ndisks == 0 1422 && strlen(vmc->vmc_cdrom) == 0) { 1423 log_warnx("no kernel or disk/cdrom specified"); 1424 goto fail; 1425 } else if (strlen(vcp->vcp_name) == 0) { 1426 log_warnx("invalid VM name"); 1427 goto fail; 1428 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1429 *vcp->vcp_name == '_') { 1430 log_warnx("invalid VM name"); 1431 goto fail; 1432 } else { 1433 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1434 if (!(isalnum((unsigned char)*s) || *s == '.' || \ 1435 *s == '-' || *s == '_')) { 1436 log_warnx("invalid VM name"); 1437 goto fail; 1438 } 1439 } 1440 } 1441 1442 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1443 goto fail; 1444 1445 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1446 vmc = &vm->vm_params; 1447 vcp = &vmc->vmc_params; 1448 vm->vm_pid = -1; 1449 vm->vm_tty = -1; 1450 vm->vm_receive_fd = -1; 1451 vm->vm_kernel = -1; 1452 vm->vm_state &= ~VM_STATE_PAUSED; 1453 1454 if (vmc->vmc_kernel > -1) 1455 vm->vm_kernel = vmc->vmc_kernel; 1456 1457 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) 1458 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1459 vm->vm_disks[i][j] = -1; 1460 for (i = 0; i < VM_MAX_NICS_PER_VM; i++) 1461 vm->vm_ifs[i].vif_fd = -1; 1462 for (i = 0; i < vmc->vmc_nnics; i++) { 1463 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1464 /* inherit per-interface flags from the switch */ 1465 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1466 } 1467 1468 /* 1469 * If the MAC address is zero, always randomize it in vmd(8) 1470 * because we cannot rely on the guest OS to do the right 1471 * thing like OpenBSD does. Based on ether_fakeaddr() 1472 * from the kernel, incremented by one to differentiate 1473 * the source. 1474 */ 1475 if (memcmp(zero_mac, &vmc->vmc_macs[i], ETHER_ADDR_LEN) == 0) { 1476 rng = arc4random(); 1477 vmc->vmc_macs[i][0] = 0xfe; 1478 vmc->vmc_macs[i][1] = 0xe1; 1479 vmc->vmc_macs[i][2] = 0xba + 1; 1480 vmc->vmc_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1481 vmc->vmc_macs[i][4] = rng; 1482 vmc->vmc_macs[i][5] = rng >> 8; 1483 } 1484 } 1485 vm->vm_cdrom = -1; 1486 vm->vm_iev.ibuf.fd = -1; 1487 1488 /* 1489 * Assign a new internal Id if not specified and we succeed in 1490 * claiming a new Id. 1491 */ 1492 if (id != 0) 1493 vm->vm_vmid = id; 1494 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1495 goto fail; 1496 else 1497 vm->vm_vmid = nid; 1498 1499 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1500 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1501 1502 *ret_vm = vm; 1503 return (0); 1504 fail: 1505 if (errno == 0) 1506 errno = EINVAL; 1507 return (-1); 1508 } 1509 1510 int 1511 vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1512 struct vmop_create_params *vmc, uid_t uid) 1513 { 1514 char *name; 1515 struct vm_create_params *vcp = &vmc->vmc_params; 1516 struct vmop_create_params *vmcp; 1517 struct vm_create_params *vcpp; 1518 unsigned int i, j; 1519 1520 /* return without error if the parent is NULL (nothing to inherit) */ 1521 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1522 vmc->vmc_instance[0] == '\0') 1523 return (0); 1524 1525 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1526 return (VMD_PARENT_INVALID); 1527 } 1528 1529 vmcp = &(*vm_parent)->vm_params; 1530 vcpp = &vmcp->vmc_params; 1531 1532 /* Are we allowed to create an instance from this VM? */ 1533 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1534 log_warnx("vm \"%s\" no permission to create vm instance", 1535 vcpp->vcp_name); 1536 return (ENAMETOOLONG); 1537 } 1538 1539 name = vcp->vcp_name; 1540 1541 if (vm_getbyname(vcp->vcp_name) != NULL || 1542 vm_getbyvmid(vcp->vcp_id) != NULL) { 1543 return (EPROCLIM); 1544 } 1545 1546 /* CPU */ 1547 if (vcp->vcp_ncpus == 0) 1548 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1549 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1550 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1551 log_warnx("vm \"%s\" no permission to set cpus", name); 1552 return (EPERM); 1553 } 1554 1555 /* memory */ 1556 if (vcp->vcp_memranges[0].vmr_size == 0) 1557 vcp->vcp_memranges[0].vmr_size = 1558 vcpp->vcp_memranges[0].vmr_size; 1559 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1560 vcp->vcp_memranges[0].vmr_size != 1561 vcpp->vcp_memranges[0].vmr_size) { 1562 log_warnx("vm \"%s\" no permission to set memory", name); 1563 return (EPERM); 1564 } 1565 1566 /* disks cannot be inherited */ 1567 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1568 vmc->vmc_ndisks) { 1569 log_warnx("vm \"%s\" no permission to set disks", name); 1570 return (EPERM); 1571 } 1572 for (i = 0; i < vmc->vmc_ndisks; i++) { 1573 /* Check if this disk is already used in the parent */ 1574 for (j = 0; j < vmcp->vmc_ndisks; j++) { 1575 if (strcmp(vmc->vmc_disks[i], 1576 vmcp->vmc_disks[j]) == 0) { 1577 log_warnx("vm \"%s\" disk %s cannot be reused", 1578 name, vmc->vmc_disks[i]); 1579 return (EBUSY); 1580 } 1581 } 1582 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1583 } 1584 1585 /* interfaces */ 1586 if (vmc->vmc_nnics > 0 && 1587 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1588 vmc->vmc_nnics != vmcp->vmc_nnics) { 1589 log_warnx("vm \"%s\" no permission to set interfaces", name); 1590 return (EPERM); 1591 } 1592 for (i = 0; i < vmcp->vmc_nnics; i++) { 1593 /* Interface got overwritten */ 1594 if (i < vmc->vmc_nnics) 1595 continue; 1596 1597 /* Copy interface from parent */ 1598 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1599 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1600 sizeof(vmc->vmc_ifnames[i])); 1601 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1602 sizeof(vmc->vmc_ifswitch[i])); 1603 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1604 sizeof(vmc->vmc_ifgroup[i])); 1605 memcpy(vmc->vmc_macs[i], vmcp->vmc_macs[i], 1606 sizeof(vmc->vmc_macs[i])); 1607 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1608 vmc->vmc_nnics++; 1609 } 1610 for (i = 0; i < vmc->vmc_nnics; i++) { 1611 for (j = 0; j < vmcp->vmc_nnics; j++) { 1612 if (memcmp(zero_mac, vmc->vmc_macs[i], 1613 sizeof(vmc->vmc_macs[i])) != 0 && 1614 memcmp(vmcp->vmc_macs[i], vmc->vmc_macs[i], 1615 sizeof(vmc->vmc_macs[i])) != 0) { 1616 log_warnx("vm \"%s\" lladdr cannot be reused", 1617 name); 1618 return (EBUSY); 1619 } 1620 if (strlen(vmc->vmc_ifnames[i]) && 1621 strcmp(vmc->vmc_ifnames[i], 1622 vmcp->vmc_ifnames[j]) == 0) { 1623 log_warnx("vm \"%s\" %s cannot be reused", 1624 vmc->vmc_ifnames[i], name); 1625 return (EBUSY); 1626 } 1627 } 1628 } 1629 1630 /* kernel */ 1631 if (vmc->vmc_kernel > -1 || ((*vm_parent)->vm_kernel_path != NULL && 1632 strnlen((*vm_parent)->vm_kernel_path, PATH_MAX) < PATH_MAX)) { 1633 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1634 log_warnx("vm \"%s\" no permission to set boot image", 1635 name); 1636 return (EPERM); 1637 } 1638 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1639 } 1640 1641 /* cdrom */ 1642 if (strlen(vmc->vmc_cdrom) > 0) { 1643 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1644 log_warnx("vm \"%s\" no permission to set cdrom", name); 1645 return (EPERM); 1646 } 1647 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1648 } else if (strlcpy(vmc->vmc_cdrom, vmcp->vmc_cdrom, 1649 sizeof(vmc->vmc_cdrom)) >= sizeof(vmc->vmc_cdrom)) { 1650 log_warnx("vm \"%s\" cdrom name too long", name); 1651 return (EINVAL); 1652 } 1653 1654 /* user */ 1655 if (vmc->vmc_owner.uid == 0) 1656 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1657 else if (vmc->vmc_owner.uid != uid && 1658 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1659 log_warnx("vm \"%s\" user mismatch", name); 1660 return (EPERM); 1661 } 1662 1663 /* group */ 1664 if (vmc->vmc_owner.gid == 0) 1665 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1666 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1667 log_warnx("vm \"%s\" group mismatch", name); 1668 return (EPERM); 1669 } 1670 1671 /* child instances */ 1672 if (vmc->vmc_insflags) { 1673 log_warnx("vm \"%s\" cannot change instance permissions", name); 1674 return (EPERM); 1675 } 1676 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1677 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1678 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1679 vmc->vmc_insflags = vmcp->vmc_insflags; 1680 } else { 1681 vmc->vmc_insowner.gid = 0; 1682 vmc->vmc_insowner.uid = 0; 1683 vmc->vmc_insflags = 0; 1684 } 1685 1686 /* finished, remove instance flags */ 1687 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1688 1689 return (0); 1690 } 1691 1692 /* 1693 * vm_checkperm 1694 * 1695 * Checks if the user represented by the 'uid' parameter is allowed to 1696 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1697 * console.) 1698 * 1699 * Parameters: 1700 * vm: the VM whose permission is to be checked 1701 * vmo: the required uid/gid to be checked 1702 * uid: the user ID of the user making the request 1703 * 1704 * Return values: 1705 * 0: the permission should be granted 1706 * -1: the permission check failed (also returned if vm == null) 1707 */ 1708 int 1709 vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1710 { 1711 struct group *gr; 1712 struct passwd *pw; 1713 char **grmem; 1714 1715 /* root has no restrictions */ 1716 if (uid == 0) 1717 return (0); 1718 1719 if (vmo == NULL) 1720 return (-1); 1721 1722 /* check user */ 1723 if (vm == NULL) { 1724 if (vmo->uid == uid) 1725 return (0); 1726 } else { 1727 /* 1728 * check user of running vm (the owner of a running vm can 1729 * be different to (or more specific than) the configured owner. 1730 */ 1731 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1732 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1733 return (0); 1734 } 1735 1736 /* check groups */ 1737 if (vmo->gid != -1) { 1738 if ((pw = getpwuid(uid)) == NULL) 1739 return (-1); 1740 if (pw->pw_gid == vmo->gid) 1741 return (0); 1742 if ((gr = getgrgid(vmo->gid)) != NULL) { 1743 for (grmem = gr->gr_mem; *grmem; grmem++) 1744 if (strcmp(*grmem, pw->pw_name) == 0) 1745 return (0); 1746 } 1747 } 1748 1749 return (-1); 1750 } 1751 1752 /* 1753 * vm_checkinsflag 1754 * 1755 * Checks whether the non-root user is allowed to set an instance option. 1756 * 1757 * Parameters: 1758 * vmc: the VM create parameters 1759 * flag: the flag to be checked 1760 * uid: the user ID of the user making the request 1761 * 1762 * Return values: 1763 * 0: the permission should be granted 1764 * -1: the permission check failed (also returned if vm == null) 1765 */ 1766 int 1767 vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1768 { 1769 /* root has no restrictions */ 1770 if (uid == 0) 1771 return (0); 1772 1773 if ((vmc->vmc_insflags & flag) == 0) 1774 return (-1); 1775 1776 return (0); 1777 } 1778 1779 /* 1780 * vm_checkaccess 1781 * 1782 * Checks if the user represented by the 'uid' parameter is allowed to 1783 * access the file described by the 'path' parameter. 1784 * 1785 * Parameters: 1786 * fd: the file descriptor of the opened file 1787 * uflag: check if the userid has access to the file 1788 * uid: the user ID of the user making the request 1789 * amode: the access flags of R_OK and W_OK 1790 * 1791 * Return values: 1792 * 0: the permission should be granted 1793 * -1: the permission check failed 1794 */ 1795 int 1796 vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1797 { 1798 struct group *gr; 1799 struct passwd *pw; 1800 char **grmem; 1801 struct stat st; 1802 mode_t mode; 1803 1804 if (fd == -1) 1805 return (-1); 1806 1807 /* 1808 * File has to be accessible and a regular file 1809 */ 1810 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1811 return (-1); 1812 1813 /* root has no restrictions */ 1814 if (uid == 0 || uflag == 0) 1815 return (0); 1816 1817 /* check other */ 1818 mode = amode & W_OK ? S_IWOTH : 0; 1819 mode |= amode & R_OK ? S_IROTH : 0; 1820 if ((st.st_mode & mode) == mode) 1821 return (0); 1822 1823 /* check user */ 1824 mode = amode & W_OK ? S_IWUSR : 0; 1825 mode |= amode & R_OK ? S_IRUSR : 0; 1826 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1827 return (0); 1828 1829 /* check groups */ 1830 mode = amode & W_OK ? S_IWGRP : 0; 1831 mode |= amode & R_OK ? S_IRGRP : 0; 1832 if ((st.st_mode & mode) != mode) 1833 return (-1); 1834 if ((pw = getpwuid(uid)) == NULL) 1835 return (-1); 1836 if (pw->pw_gid == st.st_gid) 1837 return (0); 1838 if ((gr = getgrgid(st.st_gid)) != NULL) { 1839 for (grmem = gr->gr_mem; *grmem; grmem++) 1840 if (strcmp(*grmem, pw->pw_name) == 0) 1841 return (0); 1842 } 1843 1844 return (-1); 1845 } 1846 1847 int 1848 vm_opentty(struct vmd_vm *vm) 1849 { 1850 struct ptmget ptm; 1851 struct stat st; 1852 struct group *gr; 1853 uid_t uid; 1854 gid_t gid; 1855 mode_t mode; 1856 int on; 1857 1858 /* 1859 * Open tty with pre-opened PTM fd 1860 */ 1861 if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) 1862 return (-1); 1863 1864 /* 1865 * We use user ioctl(2) mode to pass break commands. 1866 */ 1867 on = 1; 1868 if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1) 1869 fatal("could not enable user ioctl mode"); 1870 1871 vm->vm_tty = ptm.cfd; 1872 close(ptm.sfd); 1873 if (strlcpy(vm->vm_ttyname, ptm.sn, sizeof(vm->vm_ttyname)) 1874 >= sizeof(vm->vm_ttyname)) { 1875 log_warnx("%s: truncated ttyname", __func__); 1876 goto fail; 1877 } 1878 1879 uid = vm->vm_uid; 1880 gid = vm->vm_params.vmc_owner.gid; 1881 1882 if (vm->vm_params.vmc_owner.gid != -1) { 1883 mode = 0660; 1884 } else if ((gr = getgrnam("tty")) != NULL) { 1885 gid = gr->gr_gid; 1886 mode = 0620; 1887 } else { 1888 mode = 0600; 1889 gid = 0; 1890 } 1891 1892 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1893 __func__, vm->vm_params.vmc_params.vcp_name, 1894 vm->vm_ttyname, uid, gid, mode); 1895 1896 /* 1897 * Change ownership and mode of the tty as required. 1898 * Loosely based on the implementation of sshpty.c 1899 */ 1900 if (stat(vm->vm_ttyname, &st) == -1) 1901 goto fail; 1902 1903 if (st.st_uid != uid || st.st_gid != gid) { 1904 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1905 log_warn("chown %s %d %d failed, uid %d", 1906 vm->vm_ttyname, uid, gid, getuid()); 1907 1908 /* Ignore failure on read-only filesystems */ 1909 if (!((errno == EROFS) && 1910 (st.st_uid == uid || st.st_uid == 0))) 1911 goto fail; 1912 } 1913 } 1914 1915 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1916 if (chmod(vm->vm_ttyname, mode) == -1) { 1917 log_warn("chmod %s %o failed, uid %d", 1918 vm->vm_ttyname, mode, getuid()); 1919 1920 /* Ignore failure on read-only filesystems */ 1921 if (!((errno == EROFS) && 1922 (st.st_uid == uid || st.st_uid == 0))) 1923 goto fail; 1924 } 1925 } 1926 1927 return (0); 1928 fail: 1929 vm_closetty(vm); 1930 return (-1); 1931 } 1932 1933 void 1934 vm_closetty(struct vmd_vm *vm) 1935 { 1936 if (vm->vm_tty != -1) { 1937 /* Release and close the tty */ 1938 if (fchown(vm->vm_tty, 0, 0) == -1) 1939 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1940 if (fchmod(vm->vm_tty, 0666) == -1) 1941 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1942 close(vm->vm_tty); 1943 vm->vm_tty = -1; 1944 } 1945 memset(&vm->vm_ttyname, 0, sizeof(vm->vm_ttyname)); 1946 } 1947 1948 void 1949 switch_remove(struct vmd_switch *vsw) 1950 { 1951 if (vsw == NULL) 1952 return; 1953 1954 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1955 1956 free(vsw->sw_group); 1957 free(vsw->sw_name); 1958 free(vsw); 1959 } 1960 1961 struct vmd_switch * 1962 switch_getbyname(const char *name) 1963 { 1964 struct vmd_switch *vsw; 1965 1966 if (name == NULL) 1967 return (NULL); 1968 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1969 if (strcmp(vsw->sw_name, name) == 0) 1970 return (vsw); 1971 } 1972 1973 return (NULL); 1974 } 1975 1976 char * 1977 get_string(uint8_t *ptr, size_t len) 1978 { 1979 size_t i; 1980 1981 for (i = 0; i < len; i++) 1982 if (!isprint((unsigned char)ptr[i])) 1983 break; 1984 1985 return strndup(ptr, i); 1986 } 1987 1988 uint32_t 1989 prefixlen2mask(uint8_t prefixlen) 1990 { 1991 if (prefixlen == 0) 1992 return (0); 1993 1994 if (prefixlen > 32) 1995 prefixlen = 32; 1996 1997 return (htonl(0xffffffff << (32 - prefixlen))); 1998 } 1999 2000 void 2001 prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 2002 { 2003 struct in6_addr s6; 2004 int i; 2005 2006 if (prefixlen > 128) 2007 prefixlen = 128; 2008 2009 memset(&s6, 0, sizeof(s6)); 2010 for (i = 0; i < prefixlen / 8; i++) 2011 s6.s6_addr[i] = 0xff; 2012 i = prefixlen % 8; 2013 if (i) 2014 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 2015 2016 memcpy(mask, &s6, sizeof(s6)); 2017 } 2018 2019 void 2020 getmonotime(struct timeval *tv) 2021 { 2022 struct timespec ts; 2023 2024 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 2025 fatal("clock_gettime"); 2026 2027 TIMESPEC_TO_TIMEVAL(tv, &ts); 2028 } 2029 2030 static inline void 2031 vm_terminate(struct vmd_vm *vm, const char *caller) 2032 { 2033 if (vm->vm_from_config) 2034 vm_stop(vm, 0, caller); 2035 else { 2036 /* vm_remove calls vm_stop */ 2037 vm_remove(vm, caller); 2038 } 2039 } 2040 2041 /* 2042 * Utility function for closing vm file descriptors. Assumes an fd of -1 was 2043 * already closed or never opened. 2044 * 2045 * Returns 0 on success, otherwise -1 on failure. 2046 */ 2047 int 2048 close_fd(int fd) 2049 { 2050 int ret; 2051 2052 if (fd == -1) 2053 return (0); 2054 2055 #ifdef POSIX_CLOSE_RESTART 2056 do { ret = close(fd); } while (ret == -1 && errno == EINTR); 2057 #else 2058 ret = close(fd); 2059 #endif /* POSIX_CLOSE_RESTART */ 2060 2061 if (ret == -1 && errno == EIO) 2062 log_warn("%s(%d)", __func__, fd); 2063 2064 return (ret); 2065 } 2066