1 /* $OpenBSD: vmd.c,v 1.128 2021/12/13 18:28:40 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/wait.h> 22 #include <sys/stat.h> 23 #include <sys/sysctl.h> 24 #include <sys/tty.h> 25 #include <sys/ttycom.h> 26 #include <sys/ioctl.h> 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <termios.h> 32 #include <errno.h> 33 #include <event.h> 34 #include <fcntl.h> 35 #include <pwd.h> 36 #include <signal.h> 37 #include <syslog.h> 38 #include <unistd.h> 39 #include <util.h> 40 #include <ctype.h> 41 #include <pwd.h> 42 #include <grp.h> 43 44 #include <machine/specialreg.h> 45 #include <machine/vmmvar.h> 46 47 #include "proc.h" 48 #include "atomicio.h" 49 #include "vmd.h" 50 51 __dead void usage(void); 52 53 int main(int, char **); 54 int vmd_configure(void); 55 void vmd_sighdlr(int sig, short event, void *arg); 56 void vmd_shutdown(void); 57 int vmd_control_run(void); 58 int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 59 int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 60 int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); 61 int vmd_check_vmh(struct vm_dump_header *); 62 63 int vm_instance(struct privsep *, struct vmd_vm **, 64 struct vmop_create_params *, uid_t); 65 int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 66 int vm_claimid(const char *, int, uint32_t *); 67 void start_vm_batch(int, short, void*); 68 69 struct vmd *env; 70 71 static struct privsep_proc procs[] = { 72 /* Keep "priv" on top as procs[0] */ 73 { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, 74 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 75 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown }, 76 }; 77 78 enum privsep_procid privsep_process; 79 80 struct event staggered_start_timer; 81 82 /* For the privileged process */ 83 static struct privsep_proc *proc_priv = &procs[0]; 84 static struct passwd proc_privpw; 85 static const uint8_t zero_mac[ETHER_ADDR_LEN]; 86 87 int 88 vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 89 { 90 struct privsep *ps = p->p_ps; 91 int res = 0, ret = 0, cmd = 0, verbose; 92 unsigned int v = 0, flags; 93 struct vmop_create_params vmc; 94 struct vmop_id vid; 95 struct vmop_result vmr; 96 struct vm_dump_header vmh; 97 struct vmd_vm *vm = NULL; 98 char *str = NULL; 99 uint32_t id = 0; 100 struct control_sock *rcs; 101 102 switch (imsg->hdr.type) { 103 case IMSG_VMDOP_START_VM_REQUEST: 104 IMSG_SIZE_CHECK(imsg, &vmc); 105 memcpy(&vmc, imsg->data, sizeof(vmc)); 106 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 107 if (vmc.vmc_flags == 0) { 108 /* start an existing VM with pre-configured options */ 109 if (!(ret == -1 && errno == EALREADY && 110 !(vm->vm_state & VM_STATE_RUNNING))) { 111 res = errno; 112 cmd = IMSG_VMDOP_START_VM_RESPONSE; 113 } 114 } else if (ret != 0) { 115 res = errno; 116 cmd = IMSG_VMDOP_START_VM_RESPONSE; 117 } 118 if (res == 0) { 119 res = config_setvm(ps, vm, imsg->hdr.peerid, 120 vm->vm_params.vmc_owner.uid); 121 if (res) 122 cmd = IMSG_VMDOP_START_VM_RESPONSE; 123 } 124 break; 125 case IMSG_VMDOP_WAIT_VM_REQUEST: 126 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 127 IMSG_SIZE_CHECK(imsg, &vid); 128 memcpy(&vid, imsg->data, sizeof(vid)); 129 flags = vid.vid_flags; 130 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 131 132 if ((id = vid.vid_id) == 0) { 133 /* Lookup vm (id) by name */ 134 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 135 res = ENOENT; 136 break; 137 } else if ((vm->vm_state & VM_STATE_SHUTDOWN) && 138 (flags & VMOP_FORCE) == 0) { 139 res = EALREADY; 140 break; 141 } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 142 res = EINVAL; 143 break; 144 } 145 id = vm->vm_vmid; 146 } else if ((vm = vm_getbyvmid(id)) == NULL) { 147 res = ENOENT; 148 break; 149 } 150 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { 151 res = EPERM; 152 break; 153 } 154 155 /* Only relay TERMINATION requests, not WAIT requests */ 156 if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { 157 memset(&vid, 0, sizeof(vid)); 158 vid.vid_id = id; 159 vid.vid_flags = flags; 160 161 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 162 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 163 return (-1); 164 } 165 break; 166 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 167 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 168 break; 169 case IMSG_VMDOP_LOAD: 170 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 171 str = get_string((uint8_t *)imsg->data, 172 IMSG_DATA_SIZE(imsg)); 173 case IMSG_VMDOP_RELOAD: 174 if (vmd_reload(0, str) == -1) 175 cmd = IMSG_CTL_FAIL; 176 else 177 cmd = IMSG_CTL_OK; 178 free(str); 179 break; 180 case IMSG_CTL_RESET: 181 IMSG_SIZE_CHECK(imsg, &v); 182 memcpy(&v, imsg->data, sizeof(v)); 183 if (vmd_reload(v, NULL) == -1) 184 cmd = IMSG_CTL_FAIL; 185 else 186 cmd = IMSG_CTL_OK; 187 break; 188 case IMSG_CTL_VERBOSE: 189 IMSG_SIZE_CHECK(imsg, &verbose); 190 memcpy(&verbose, imsg->data, sizeof(verbose)); 191 log_setverbose(verbose); 192 193 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 194 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 195 cmd = IMSG_CTL_OK; 196 break; 197 case IMSG_VMDOP_PAUSE_VM: 198 case IMSG_VMDOP_UNPAUSE_VM: 199 IMSG_SIZE_CHECK(imsg, &vid); 200 memcpy(&vid, imsg->data, sizeof(vid)); 201 if (vid.vid_id == 0) { 202 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 203 res = ENOENT; 204 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 205 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 206 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 207 break; 208 } else { 209 vid.vid_id = vm->vm_vmid; 210 } 211 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 212 res = ENOENT; 213 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 214 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 215 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 216 break; 217 } 218 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 219 vid.vid_uid) != 0) { 220 res = EPERM; 221 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 222 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 223 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 224 break; 225 } 226 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 227 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 228 break; 229 case IMSG_VMDOP_SEND_VM_REQUEST: 230 IMSG_SIZE_CHECK(imsg, &vid); 231 memcpy(&vid, imsg->data, sizeof(vid)); 232 id = vid.vid_id; 233 if (vid.vid_id == 0) { 234 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 235 res = ENOENT; 236 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 237 close(imsg->fd); 238 break; 239 } else { 240 vid.vid_id = vm->vm_vmid; 241 } 242 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 243 res = ENOENT; 244 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 245 close(imsg->fd); 246 break; 247 } 248 vmr.vmr_id = vid.vid_id; 249 log_debug("%s: sending fd to vmm", __func__); 250 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 251 imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid)); 252 break; 253 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 254 IMSG_SIZE_CHECK(imsg, &vid); 255 memcpy(&vid, imsg->data, sizeof(vid)); 256 if (imsg->fd == -1) { 257 log_warnx("%s: invalid fd", __func__); 258 return (-1); 259 } 260 if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) != 261 sizeof(vmh)) { 262 log_warnx("%s: error reading vmh from received vm", 263 __func__); 264 res = EIO; 265 close(imsg->fd); 266 cmd = IMSG_VMDOP_START_VM_RESPONSE; 267 break; 268 } 269 270 if (vmd_check_vmh(&vmh)) { 271 res = ENOENT; 272 close(imsg->fd); 273 cmd = IMSG_VMDOP_START_VM_RESPONSE; 274 break; 275 } 276 if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) != 277 sizeof(vmc)) { 278 log_warnx("%s: error reading vmc from received vm", 279 __func__); 280 res = EIO; 281 close(imsg->fd); 282 cmd = IMSG_VMDOP_START_VM_RESPONSE; 283 break; 284 } 285 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 286 sizeof(vmc.vmc_params.vcp_name)); 287 vmc.vmc_params.vcp_id = 0; 288 289 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 290 if (ret != 0) { 291 res = errno; 292 cmd = IMSG_VMDOP_START_VM_RESPONSE; 293 close(imsg->fd); 294 } else { 295 vm->vm_state |= VM_STATE_RECEIVED; 296 config_setvm(ps, vm, imsg->hdr.peerid, 297 vmc.vmc_owner.uid); 298 log_debug("%s: sending fd to vmm", __func__); 299 proc_compose_imsg(ps, PROC_VMM, -1, 300 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd, 301 NULL, 0); 302 } 303 break; 304 case IMSG_VMDOP_DONE: 305 control_reset(&ps->ps_csock); 306 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 307 control_reset(rcs); 308 cmd = 0; 309 break; 310 default: 311 return (-1); 312 } 313 314 switch (cmd) { 315 case 0: 316 break; 317 case IMSG_VMDOP_START_VM_RESPONSE: 318 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 319 memset(&vmr, 0, sizeof(vmr)); 320 vmr.vmr_result = res; 321 vmr.vmr_id = id; 322 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 323 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 324 return (-1); 325 break; 326 default: 327 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 328 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 329 return (-1); 330 break; 331 } 332 333 return (0); 334 } 335 336 int 337 vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 338 { 339 struct vmop_result vmr; 340 struct privsep *ps = p->p_ps; 341 int res = 0; 342 struct vmd_vm *vm; 343 struct vm_create_params *vcp; 344 struct vmop_info_result vir; 345 346 switch (imsg->hdr.type) { 347 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 348 IMSG_SIZE_CHECK(imsg, &vmr); 349 memcpy(&vmr, imsg->data, sizeof(vmr)); 350 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 351 break; 352 proc_compose_imsg(ps, PROC_CONTROL, -1, 353 imsg->hdr.type, imsg->hdr.peerid, -1, 354 imsg->data, sizeof(imsg->data)); 355 log_info("%s: paused vm %d successfully", 356 vm->vm_params.vmc_params.vcp_name, 357 vm->vm_vmid); 358 vm->vm_state |= VM_STATE_PAUSED; 359 break; 360 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 361 IMSG_SIZE_CHECK(imsg, &vmr); 362 memcpy(&vmr, imsg->data, sizeof(vmr)); 363 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 364 break; 365 proc_compose_imsg(ps, PROC_CONTROL, -1, 366 imsg->hdr.type, imsg->hdr.peerid, -1, 367 imsg->data, sizeof(imsg->data)); 368 log_info("%s: unpaused vm %d successfully.", 369 vm->vm_params.vmc_params.vcp_name, 370 vm->vm_vmid); 371 vm->vm_state &= ~VM_STATE_PAUSED; 372 break; 373 case IMSG_VMDOP_START_VM_RESPONSE: 374 IMSG_SIZE_CHECK(imsg, &vmr); 375 memcpy(&vmr, imsg->data, sizeof(vmr)); 376 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 377 break; 378 vm->vm_pid = vmr.vmr_pid; 379 vcp = &vm->vm_params.vmc_params; 380 vcp->vcp_id = vmr.vmr_id; 381 382 /* 383 * If the peerid is not -1, forward the response back to the 384 * the control socket. If it is -1, the request originated 385 * from the parent, not the control socket. 386 */ 387 if (vm->vm_peerid != (uint32_t)-1) { 388 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 389 sizeof(vmr.vmr_ttyname)); 390 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 391 imsg->hdr.type, vm->vm_peerid, -1, 392 &vmr, sizeof(vmr)) == -1) { 393 errno = vmr.vmr_result; 394 log_warn("%s: failed to foward vm result", 395 vcp->vcp_name); 396 vm_remove(vm, __func__); 397 return (-1); 398 } 399 } 400 401 if (vmr.vmr_result) { 402 errno = vmr.vmr_result; 403 log_warn("%s: failed to start vm", vcp->vcp_name); 404 vm_remove(vm, __func__); 405 break; 406 } 407 408 /* Now configure all the interfaces */ 409 if (vm_priv_ifconfig(ps, vm) == -1) { 410 log_warn("%s: failed to configure vm", vcp->vcp_name); 411 vm_remove(vm, __func__); 412 break; 413 } 414 415 log_info("%s: started vm %d successfully, tty %s", 416 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 417 break; 418 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 419 IMSG_SIZE_CHECK(imsg, &vmr); 420 memcpy(&vmr, imsg->data, sizeof(vmr)); 421 422 if (vmr.vmr_result) { 423 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 424 __func__, vmr.vmr_id); 425 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 426 } else { 427 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 428 break; 429 /* Mark VM as shutting down */ 430 vm->vm_state |= VM_STATE_SHUTDOWN; 431 } 432 break; 433 case IMSG_VMDOP_SEND_VM_RESPONSE: 434 IMSG_SIZE_CHECK(imsg, &vmr); 435 memcpy(&vmr, imsg->data, sizeof(vmr)); 436 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 437 break; 438 if (!vmr.vmr_result) { 439 log_info("%s: sent vm %d successfully.", 440 vm->vm_params.vmc_params.vcp_name, 441 vm->vm_vmid); 442 if (vm->vm_from_config) 443 vm_stop(vm, 0, __func__); 444 else 445 vm_remove(vm, __func__); 446 } 447 448 /* Send a response if a control client is waiting for it */ 449 if (imsg->hdr.peerid != (uint32_t)-1) { 450 /* the error is meaningless for deferred responses */ 451 vmr.vmr_result = 0; 452 453 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 454 IMSG_VMDOP_SEND_VM_RESPONSE, 455 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 456 return (-1); 457 } 458 break; 459 case IMSG_VMDOP_TERMINATE_VM_EVENT: 460 IMSG_SIZE_CHECK(imsg, &vmr); 461 memcpy(&vmr, imsg->data, sizeof(vmr)); 462 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 463 __func__, vmr.vmr_id, vmr.vmr_result); 464 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 465 log_debug("%s: vm %d is no longer available", 466 __func__, vmr.vmr_id); 467 break; 468 } 469 if (vmr.vmr_result != EAGAIN || 470 vm->vm_params.vmc_bootdevice) { 471 if (vm->vm_from_config) 472 vm_stop(vm, 0, __func__); 473 else 474 vm_remove(vm, __func__); 475 } else { 476 /* Stop VM instance but keep the tty open */ 477 vm_stop(vm, 1, __func__); 478 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 479 } 480 481 /* The error is meaningless for deferred responses */ 482 vmr.vmr_result = 0; 483 484 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 485 IMSG_VMDOP_TERMINATE_VM_EVENT, 486 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 487 return (-1); 488 break; 489 case IMSG_VMDOP_GET_INFO_VM_DATA: 490 IMSG_SIZE_CHECK(imsg, &vir); 491 memcpy(&vir, imsg->data, sizeof(vir)); 492 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 493 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 494 if (vm->vm_ttyname != NULL) 495 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 496 sizeof(vir.vir_ttyname)); 497 log_debug("%s: running vm: %d, vm_state: 0x%x", 498 __func__, vm->vm_vmid, vm->vm_state); 499 vir.vir_state = vm->vm_state; 500 /* get the user id who started the vm */ 501 vir.vir_uid = vm->vm_uid; 502 vir.vir_gid = vm->vm_params.vmc_owner.gid; 503 } 504 if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type, 505 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 506 log_debug("%s: GET_INFO_VM failed for vm %d, removing", 507 __func__, vm->vm_vmid); 508 vm_remove(vm, __func__); 509 return (-1); 510 } 511 break; 512 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 513 /* 514 * PROC_VMM has responded with the *running* VMs, now we 515 * append the others. These use the special value 0 for their 516 * kernel id to indicate that they are not running. 517 */ 518 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 519 if (!(vm->vm_state & VM_STATE_RUNNING)) { 520 memset(&vir, 0, sizeof(vir)); 521 vir.vir_info.vir_id = vm->vm_vmid; 522 strlcpy(vir.vir_info.vir_name, 523 vm->vm_params.vmc_params.vcp_name, 524 VMM_MAX_NAME_LEN); 525 vir.vir_info.vir_memory_size = 526 vm->vm_params.vmc_params. 527 vcp_memranges[0].vmr_size; 528 vir.vir_info.vir_ncpus = 529 vm->vm_params.vmc_params.vcp_ncpus; 530 /* get the configured user id for this vm */ 531 vir.vir_uid = vm->vm_params.vmc_owner.uid; 532 vir.vir_gid = vm->vm_params.vmc_owner.gid; 533 log_debug("%s: vm: %d, vm_state: 0x%x", 534 __func__, vm->vm_vmid, vm->vm_state); 535 vir.vir_state = vm->vm_state; 536 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 537 IMSG_VMDOP_GET_INFO_VM_DATA, 538 imsg->hdr.peerid, -1, &vir, 539 sizeof(vir)) == -1) { 540 log_debug("%s: GET_INFO_VM_END failed", 541 __func__); 542 vm_remove(vm, __func__); 543 return (-1); 544 } 545 } 546 } 547 IMSG_SIZE_CHECK(imsg, &res); 548 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 549 break; 550 default: 551 return (-1); 552 } 553 554 return (0); 555 } 556 557 int 558 vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) 559 { 560 struct vmop_addr_result var; 561 562 switch (imsg->hdr.type) { 563 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 564 IMSG_SIZE_CHECK(imsg, &var); 565 memcpy(&var, imsg->data, sizeof(var)); 566 proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); 567 break; 568 default: 569 return (-1); 570 } 571 572 return (0); 573 } 574 575 int 576 vmd_check_vmh(struct vm_dump_header *vmh) 577 { 578 int i; 579 unsigned int code, leaf; 580 unsigned int a, b, c, d; 581 582 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 583 log_warnx("%s: incompatible dump signature", __func__); 584 return (-1); 585 } 586 587 if (vmh->vmh_version != VM_DUMP_VERSION) { 588 log_warnx("%s: incompatible dump version", __func__); 589 return (-1); 590 } 591 592 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 593 code = vmh->vmh_cpuids[i].code; 594 leaf = vmh->vmh_cpuids[i].leaf; 595 if (leaf != 0x00) { 596 log_debug("%s: invalid leaf 0x%x for code 0x%x", 597 __func__, leaf, code); 598 return (-1); 599 } 600 601 switch (code) { 602 case 0x00: 603 CPUID_LEAF(code, leaf, a, b, c, d); 604 if (vmh->vmh_cpuids[i].a > a) { 605 log_debug("%s: incompatible cpuid level", 606 __func__); 607 return (-1); 608 } 609 if (!(vmh->vmh_cpuids[i].b == b && 610 vmh->vmh_cpuids[i].c == c && 611 vmh->vmh_cpuids[i].d == d)) { 612 log_debug("%s: incompatible cpu brand", 613 __func__); 614 return (-1); 615 } 616 break; 617 618 case 0x01: 619 CPUID_LEAF(code, leaf, a, b, c, d); 620 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 621 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 622 log_debug("%s: incompatible cpu features " 623 "code: 0x%x leaf: 0x%x reg: c", __func__, 624 code, leaf); 625 return (-1); 626 } 627 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 628 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 629 log_debug("%s: incompatible cpu features " 630 "code: 0x%x leaf: 0x%x reg: d", __func__, 631 code, leaf); 632 return (-1); 633 } 634 break; 635 636 case 0x07: 637 CPUID_LEAF(code, leaf, a, b, c, d); 638 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 639 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 640 log_debug("%s: incompatible cpu features " 641 "code: 0x%x leaf: 0x%x reg: c", __func__, 642 code, leaf); 643 return (-1); 644 } 645 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 646 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 647 log_debug("%s: incompatible cpu features " 648 "code: 0x%x leaf: 0x%x reg: d", __func__, 649 code, leaf); 650 return (-1); 651 } 652 break; 653 654 case 0x0d: 655 CPUID_LEAF(code, leaf, a, b, c, d); 656 if (vmh->vmh_cpuids[i].b > b) { 657 log_debug("%s: incompatible cpu: insufficient " 658 "max save area for enabled XCR0 features", 659 __func__); 660 return (-1); 661 } 662 if (vmh->vmh_cpuids[i].c > c) { 663 log_debug("%s: incompatible cpu: insufficient " 664 "max save area for supported XCR0 features", 665 __func__); 666 return (-1); 667 } 668 break; 669 670 case 0x80000001: 671 CPUID_LEAF(code, leaf, a, b, c, d); 672 if ((vmh->vmh_cpuids[i].a & a) != 673 vmh->vmh_cpuids[i].a) { 674 log_debug("%s: incompatible cpu features " 675 "code: 0x%x leaf: 0x%x reg: a", __func__, 676 code, leaf); 677 return (-1); 678 } 679 if ((vmh->vmh_cpuids[i].c & c) != 680 vmh->vmh_cpuids[i].c) { 681 log_debug("%s: incompatible cpu features " 682 "code: 0x%x leaf: 0x%x reg: c", __func__, 683 code, leaf); 684 return (-1); 685 } 686 if ((vmh->vmh_cpuids[i].d & d) != 687 vmh->vmh_cpuids[i].d) { 688 log_debug("%s: incompatible cpu features " 689 "code: 0x%x leaf: 0x%x reg: d", __func__, 690 code, leaf); 691 return (-1); 692 } 693 break; 694 695 default: 696 log_debug("%s: unknown code 0x%x", __func__, code); 697 return (-1); 698 } 699 } 700 701 return (0); 702 } 703 704 void 705 vmd_sighdlr(int sig, short event, void *arg) 706 { 707 if (privsep_process != PROC_PARENT) 708 return; 709 log_debug("%s: handling signal", __func__); 710 711 switch (sig) { 712 case SIGHUP: 713 log_info("%s: reload requested with SIGHUP", __func__); 714 715 /* 716 * This is safe because libevent uses async signal handlers 717 * that run in the event loop and not in signal context. 718 */ 719 (void)vmd_reload(0, NULL); 720 break; 721 case SIGPIPE: 722 log_info("%s: ignoring SIGPIPE", __func__); 723 break; 724 case SIGUSR1: 725 log_info("%s: ignoring SIGUSR1", __func__); 726 break; 727 case SIGTERM: 728 case SIGINT: 729 vmd_shutdown(); 730 break; 731 default: 732 fatalx("unexpected signal"); 733 } 734 } 735 736 __dead void 737 usage(void) 738 { 739 extern char *__progname; 740 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 741 __progname); 742 exit(1); 743 } 744 745 int 746 main(int argc, char **argv) 747 { 748 struct privsep *ps; 749 int ch; 750 const char *conffile = VMD_CONF; 751 enum privsep_procid proc_id = PROC_PARENT; 752 int proc_instance = 0; 753 const char *errp, *title = NULL; 754 int argc0 = argc; 755 756 log_init(0, LOG_DAEMON); 757 758 if ((env = calloc(1, sizeof(*env))) == NULL) 759 fatal("calloc: env"); 760 761 while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) { 762 switch (ch) { 763 case 'D': 764 if (cmdline_symset(optarg) < 0) 765 log_warnx("could not parse macro definition %s", 766 optarg); 767 break; 768 case 'd': 769 env->vmd_debug = 2; 770 break; 771 case 'f': 772 conffile = optarg; 773 break; 774 case 'v': 775 env->vmd_verbose++; 776 break; 777 case 'n': 778 env->vmd_noaction = 1; 779 break; 780 case 'P': 781 title = optarg; 782 proc_id = proc_getid(procs, nitems(procs), title); 783 if (proc_id == PROC_MAX) 784 fatalx("invalid process name"); 785 break; 786 case 'I': 787 proc_instance = strtonum(optarg, 0, 788 PROC_MAX_INSTANCES, &errp); 789 if (errp) 790 fatalx("invalid process instance"); 791 break; 792 default: 793 usage(); 794 } 795 } 796 797 argc -= optind; 798 if (argc > 0) 799 usage(); 800 801 if (env->vmd_noaction && !env->vmd_debug) 802 env->vmd_debug = 1; 803 804 log_init(env->vmd_debug, LOG_DAEMON); 805 log_setverbose(env->vmd_verbose); 806 807 /* check for root privileges */ 808 if (env->vmd_noaction == 0) { 809 if (geteuid()) 810 fatalx("need root privileges"); 811 } 812 813 ps = &env->vmd_ps; 814 ps->ps_env = env; 815 env->vmd_fd = -1; 816 817 if (config_init(env) == -1) 818 fatal("failed to initialize configuration"); 819 820 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 821 fatal("unknown user %s", VMD_USER); 822 823 /* First proc runs as root without pledge but in default chroot */ 824 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 825 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 826 827 /* Open /dev/vmm */ 828 if (env->vmd_noaction == 0) { 829 env->vmd_fd = open(VMM_NODE, O_RDWR); 830 if (env->vmd_fd == -1) 831 fatal("%s", VMM_NODE); 832 } 833 834 /* Configure the control socket */ 835 ps->ps_csock.cs_name = SOCKET_NAME; 836 TAILQ_INIT(&ps->ps_rcsocks); 837 838 /* Configuration will be parsed after forking the children */ 839 env->vmd_conffile = conffile; 840 841 if (env->vmd_noaction) 842 ps->ps_noaction = 1; 843 ps->ps_instance = proc_instance; 844 if (title != NULL) 845 ps->ps_title[proc_id] = title; 846 847 /* only the parent returns */ 848 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 849 proc_id); 850 851 log_procinit("parent"); 852 if (!env->vmd_debug && daemon(0, 0) == -1) 853 fatal("can't daemonize"); 854 855 if (ps->ps_noaction == 0) 856 log_info("startup"); 857 858 event_init(); 859 860 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 861 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 862 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 863 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 864 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 865 866 signal_add(&ps->ps_evsigint, NULL); 867 signal_add(&ps->ps_evsigterm, NULL); 868 signal_add(&ps->ps_evsighup, NULL); 869 signal_add(&ps->ps_evsigpipe, NULL); 870 signal_add(&ps->ps_evsigusr1, NULL); 871 872 if (!env->vmd_noaction) 873 proc_connect(ps); 874 875 if (vmd_configure() == -1) 876 fatalx("configuration failed"); 877 878 event_dispatch(); 879 880 log_debug("parent exiting"); 881 882 return (0); 883 } 884 885 void 886 start_vm_batch(int fd, short type, void *args) 887 { 888 int i = 0; 889 struct vmd_vm *vm; 890 891 log_debug("%s: starting batch of %d vms", __func__, 892 env->vmd_cfg.parallelism); 893 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 894 if (!(vm->vm_state & VM_STATE_WAITING)) { 895 log_debug("%s: not starting vm %s (disabled)", 896 __func__, 897 vm->vm_params.vmc_params.vcp_name); 898 continue; 899 } 900 i++; 901 if (i > env->vmd_cfg.parallelism) { 902 evtimer_add(&staggered_start_timer, 903 &env->vmd_cfg.delay); 904 break; 905 } 906 vm->vm_state &= ~VM_STATE_WAITING; 907 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 908 } 909 log_debug("%s: done starting vms", __func__); 910 } 911 912 int 913 vmd_configure(void) 914 { 915 int ncpus; 916 struct vmd_switch *vsw; 917 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 918 size_t ncpus_sz = sizeof(ncpus); 919 920 if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) 921 fatal("open %s", PATH_PTMDEV); 922 923 /* 924 * pledge in the parent process: 925 * stdio - for malloc and basic I/O including events. 926 * rpath - for reload to open and read the configuration files. 927 * wpath - for opening disk images and tap devices. 928 * tty - for openpty and TIOCUCNTL. 929 * proc - run kill to terminate its children safely. 930 * sendfd - for disks, interfaces and other fds. 931 * recvfd - for send and receive. 932 * getpw - lookup user or group id by name. 933 * chown, fattr - change tty ownership 934 * flock - locking disk files 935 */ 936 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 937 " chown fattr flock", NULL) == -1) 938 fatal("pledge"); 939 940 if (parse_config(env->vmd_conffile) == -1) { 941 proc_kill(&env->vmd_ps); 942 exit(1); 943 } 944 945 if (env->vmd_noaction) { 946 fprintf(stderr, "configuration OK\n"); 947 proc_kill(&env->vmd_ps); 948 exit(0); 949 } 950 951 /* Send shared global configuration to all children */ 952 if (config_setconfig(env) == -1) 953 return (-1); 954 955 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 956 if (vsw->sw_running) 957 continue; 958 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 959 log_warn("%s: failed to create switch %s", 960 __func__, vsw->sw_name); 961 switch_remove(vsw); 962 return (-1); 963 } 964 } 965 966 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 967 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 968 if (sysctl(ncpu_mib, nitems(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 969 ncpus = 1; 970 env->vmd_cfg.parallelism = ncpus; 971 log_debug("%s: setting staggered start configuration to " 972 "parallelism: %d and delay: %lld", 973 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 974 } 975 976 log_debug("%s: starting vms in staggered fashion", __func__); 977 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 978 /* start first batch */ 979 start_vm_batch(0, 0, NULL); 980 981 return (0); 982 } 983 984 int 985 vmd_reload(unsigned int reset, const char *filename) 986 { 987 struct vmd_vm *vm, *next_vm; 988 struct vmd_switch *vsw; 989 int reload = 0; 990 991 /* Switch back to the default config file */ 992 if (filename == NULL || *filename == '\0') { 993 filename = env->vmd_conffile; 994 reload = 1; 995 } 996 997 log_debug("%s: level %d config file %s", __func__, reset, filename); 998 999 if (reset) { 1000 /* Purge the configuration */ 1001 config_purge(env, reset); 1002 config_setreset(env, reset); 1003 } else { 1004 /* 1005 * Load or reload the configuration. 1006 * 1007 * Reloading removes all non-running VMs before processing the 1008 * config file, whereas loading only adds to the existing list 1009 * of VMs. 1010 */ 1011 1012 if (reload) { 1013 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 1014 next_vm) { 1015 if (!(vm->vm_state & VM_STATE_RUNNING)) { 1016 DPRINTF("%s: calling vm_remove", 1017 __func__); 1018 vm_remove(vm, __func__); 1019 } 1020 } 1021 } 1022 1023 if (parse_config(filename) == -1) { 1024 log_debug("%s: failed to load config file %s", 1025 __func__, filename); 1026 return (-1); 1027 } 1028 1029 if (reload) { 1030 /* Update shared global configuration in all children */ 1031 if (config_setconfig(env) == -1) 1032 return (-1); 1033 } 1034 1035 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1036 if (vsw->sw_running) 1037 continue; 1038 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1039 log_warn("%s: failed to create switch %s", 1040 __func__, vsw->sw_name); 1041 switch_remove(vsw); 1042 return (-1); 1043 } 1044 } 1045 1046 log_debug("%s: starting vms in staggered fashion", __func__); 1047 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1048 /* start first batch */ 1049 start_vm_batch(0, 0, NULL); 1050 1051 } 1052 1053 return (0); 1054 } 1055 1056 void 1057 vmd_shutdown(void) 1058 { 1059 struct vmd_vm *vm, *vm_next; 1060 1061 log_debug("%s: performing shutdown", __func__); 1062 1063 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1064 vm_remove(vm, __func__); 1065 } 1066 1067 proc_kill(&env->vmd_ps); 1068 free(env); 1069 1070 log_warnx("parent terminating"); 1071 exit(0); 1072 } 1073 1074 struct vmd_vm * 1075 vm_getbyvmid(uint32_t vmid) 1076 { 1077 struct vmd_vm *vm; 1078 1079 if (vmid == 0) 1080 return (NULL); 1081 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1082 if (vm->vm_vmid == vmid) 1083 return (vm); 1084 } 1085 1086 return (NULL); 1087 } 1088 1089 struct vmd_vm * 1090 vm_getbyid(uint32_t id) 1091 { 1092 struct vmd_vm *vm; 1093 1094 if (id == 0) 1095 return (NULL); 1096 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1097 if (vm->vm_params.vmc_params.vcp_id == id) 1098 return (vm); 1099 } 1100 1101 return (NULL); 1102 } 1103 1104 uint32_t 1105 vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1106 { 1107 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1108 return (0); 1109 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1110 id, vm->vm_vmid); 1111 return (vm->vm_vmid); 1112 } 1113 1114 uint32_t 1115 vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1116 { 1117 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1118 return (0); 1119 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1120 vmid, vm->vm_params.vmc_params.vcp_id); 1121 return (vm->vm_params.vmc_params.vcp_id); 1122 } 1123 1124 struct vmd_vm * 1125 vm_getbyname(const char *name) 1126 { 1127 struct vmd_vm *vm; 1128 1129 if (name == NULL) 1130 return (NULL); 1131 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1132 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1133 return (vm); 1134 } 1135 1136 return (NULL); 1137 } 1138 1139 struct vmd_vm * 1140 vm_getbypid(pid_t pid) 1141 { 1142 struct vmd_vm *vm; 1143 1144 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1145 if (vm->vm_pid == pid) 1146 return (vm); 1147 } 1148 1149 return (NULL); 1150 } 1151 1152 void 1153 vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1154 { 1155 struct privsep *ps = &env->vmd_ps; 1156 unsigned int i, j; 1157 1158 if (vm == NULL) 1159 return; 1160 1161 log_debug("%s: %s %s stopping vm %d%s", 1162 __func__, ps->ps_title[privsep_process], caller, 1163 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1164 1165 vm->vm_state &= ~(VM_STATE_RUNNING | VM_STATE_SHUTDOWN); 1166 1167 user_inc(&vm->vm_params.vmc_params, vm->vm_user, 0); 1168 user_put(vm->vm_user); 1169 1170 if (vm->vm_iev.ibuf.fd != -1) { 1171 event_del(&vm->vm_iev.ev); 1172 close(vm->vm_iev.ibuf.fd); 1173 } 1174 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) { 1175 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1176 if (vm->vm_disks[i][j] != -1) { 1177 close(vm->vm_disks[i][j]); 1178 vm->vm_disks[i][j] = -1; 1179 } 1180 } 1181 } 1182 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) { 1183 if (vm->vm_ifs[i].vif_fd != -1) { 1184 close(vm->vm_ifs[i].vif_fd); 1185 vm->vm_ifs[i].vif_fd = -1; 1186 } 1187 free(vm->vm_ifs[i].vif_name); 1188 free(vm->vm_ifs[i].vif_switch); 1189 free(vm->vm_ifs[i].vif_group); 1190 vm->vm_ifs[i].vif_name = NULL; 1191 vm->vm_ifs[i].vif_switch = NULL; 1192 vm->vm_ifs[i].vif_group = NULL; 1193 } 1194 if (vm->vm_kernel != -1) { 1195 close(vm->vm_kernel); 1196 vm->vm_kernel = -1; 1197 } 1198 if (vm->vm_cdrom != -1) { 1199 close(vm->vm_cdrom); 1200 vm->vm_cdrom = -1; 1201 } 1202 if (!keeptty) { 1203 vm_closetty(vm); 1204 vm->vm_uid = 0; 1205 } 1206 } 1207 1208 void 1209 vm_remove(struct vmd_vm *vm, const char *caller) 1210 { 1211 struct privsep *ps = &env->vmd_ps; 1212 1213 if (vm == NULL) 1214 return; 1215 1216 log_debug("%s: %s %s removing vm %d from running config", 1217 __func__, ps->ps_title[privsep_process], caller, 1218 vm->vm_vmid); 1219 1220 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1221 1222 user_put(vm->vm_user); 1223 vm_stop(vm, 0, caller); 1224 free(vm); 1225 } 1226 1227 int 1228 vm_claimid(const char *name, int uid, uint32_t *id) 1229 { 1230 struct name2id *n2i = NULL; 1231 1232 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1233 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1234 goto out; 1235 1236 if (++env->vmd_nvm == 0) { 1237 log_warnx("too many vms"); 1238 return -1; 1239 } 1240 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1241 log_warnx("could not alloc vm name"); 1242 return -1; 1243 } 1244 n2i->id = env->vmd_nvm; 1245 n2i->uid = uid; 1246 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1247 log_warnx("vm name too long"); 1248 free(n2i); 1249 return -1; 1250 } 1251 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1252 1253 out: 1254 *id = n2i->id; 1255 return 0; 1256 } 1257 1258 int 1259 vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1260 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1261 { 1262 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1263 struct vm_create_params *vcp = &vmc->vmc_params; 1264 struct vmop_owner *vmo = NULL; 1265 struct vmd_user *usr = NULL; 1266 uint32_t nid, rng; 1267 unsigned int i, j; 1268 struct vmd_switch *sw; 1269 char *s; 1270 int ret = 0; 1271 1272 /* Check if this is an instance of another VM */ 1273 if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { 1274 errno = ret; /* XXX might set invalid errno */ 1275 return (-1); 1276 } 1277 1278 errno = 0; 1279 *ret_vm = NULL; 1280 1281 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1282 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1283 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1284 uid) != 0) { 1285 errno = EPERM; 1286 goto fail; 1287 } 1288 *ret_vm = vm; 1289 errno = EALREADY; 1290 goto fail; 1291 } 1292 1293 if (vm_parent != NULL) 1294 vmo = &vm_parent->vm_params.vmc_insowner; 1295 1296 /* non-root users can only start existing VMs or instances */ 1297 if (vm_checkperm(NULL, vmo, uid) != 0) { 1298 log_warnx("permission denied"); 1299 errno = EPERM; 1300 goto fail; 1301 } 1302 if (vmc->vmc_flags == 0) { 1303 log_warnx("invalid configuration, no devices"); 1304 errno = VMD_DISK_MISSING; 1305 goto fail; 1306 } 1307 if (vcp->vcp_ncpus == 0) 1308 vcp->vcp_ncpus = 1; 1309 if (vcp->vcp_memranges[0].vmr_size == 0) 1310 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1311 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1312 log_warnx("invalid number of CPUs"); 1313 goto fail; 1314 } else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) { 1315 log_warnx("invalid number of disks"); 1316 goto fail; 1317 } else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) { 1318 log_warnx("invalid number of interfaces"); 1319 goto fail; 1320 } else if (strlen(vcp->vcp_kernel) == 0 && 1321 vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) { 1322 log_warnx("no kernel or disk/cdrom specified"); 1323 goto fail; 1324 } else if (strlen(vcp->vcp_name) == 0) { 1325 log_warnx("invalid VM name"); 1326 goto fail; 1327 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1328 *vcp->vcp_name == '_') { 1329 log_warnx("invalid VM name"); 1330 goto fail; 1331 } else { 1332 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1333 if (!(isalnum(*s) || *s == '.' || *s == '-' || 1334 *s == '_')) { 1335 log_warnx("invalid VM name"); 1336 goto fail; 1337 } 1338 } 1339 } 1340 1341 /* track active users */ 1342 if (uid != 0 && env->vmd_users != NULL && 1343 (usr = user_get(uid)) == NULL) { 1344 log_warnx("could not add user"); 1345 goto fail; 1346 } 1347 1348 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1349 goto fail; 1350 1351 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1352 vmc = &vm->vm_params; 1353 vcp = &vmc->vmc_params; 1354 vm->vm_pid = -1; 1355 vm->vm_tty = -1; 1356 vm->vm_receive_fd = -1; 1357 vm->vm_state &= ~VM_STATE_PAUSED; 1358 vm->vm_user = usr; 1359 1360 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) 1361 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1362 vm->vm_disks[i][j] = -1; 1363 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) 1364 vm->vm_ifs[i].vif_fd = -1; 1365 for (i = 0; i < vcp->vcp_nnics; i++) { 1366 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1367 /* inherit per-interface flags from the switch */ 1368 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1369 } 1370 1371 /* 1372 * If the MAC address is zero, always randomize it in vmd(8) 1373 * because we cannot rely on the guest OS to do the right 1374 * thing like OpenBSD does. Based on ether_fakeaddr() 1375 * from the kernel, incremented by one to differentiate 1376 * the source. 1377 */ 1378 if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) { 1379 rng = arc4random(); 1380 vcp->vcp_macs[i][0] = 0xfe; 1381 vcp->vcp_macs[i][1] = 0xe1; 1382 vcp->vcp_macs[i][2] = 0xba + 1; 1383 vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1384 vcp->vcp_macs[i][4] = rng; 1385 vcp->vcp_macs[i][5] = rng >> 8; 1386 } 1387 } 1388 vm->vm_kernel = -1; 1389 vm->vm_cdrom = -1; 1390 vm->vm_iev.ibuf.fd = -1; 1391 1392 /* 1393 * Assign a new internal Id if not specified and we succeed in 1394 * claiming a new Id. 1395 */ 1396 if (id != 0) 1397 vm->vm_vmid = id; 1398 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1399 goto fail; 1400 else 1401 vm->vm_vmid = nid; 1402 1403 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1404 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1405 1406 *ret_vm = vm; 1407 return (0); 1408 fail: 1409 if (errno == 0) 1410 errno = EINVAL; 1411 return (-1); 1412 } 1413 1414 int 1415 vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1416 struct vmop_create_params *vmc, uid_t uid) 1417 { 1418 char *name; 1419 struct vm_create_params *vcp = &vmc->vmc_params; 1420 struct vmop_create_params *vmcp; 1421 struct vm_create_params *vcpp; 1422 struct vmd_vm *vm = NULL; 1423 unsigned int i, j; 1424 uint32_t id; 1425 1426 /* return without error if the parent is NULL (nothing to inherit) */ 1427 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1428 vmc->vmc_instance[0] == '\0') 1429 return (0); 1430 1431 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1432 return (VMD_PARENT_INVALID); 1433 } 1434 1435 vmcp = &(*vm_parent)->vm_params; 1436 vcpp = &vmcp->vmc_params; 1437 1438 /* Are we allowed to create an instance from this VM? */ 1439 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1440 log_warnx("vm \"%s\" no permission to create vm instance", 1441 vcpp->vcp_name); 1442 return (ENAMETOOLONG); 1443 } 1444 1445 id = vcp->vcp_id; 1446 name = vcp->vcp_name; 1447 1448 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1449 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1450 return (EPROCLIM); 1451 } 1452 1453 /* CPU */ 1454 if (vcp->vcp_ncpus == 0) 1455 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1456 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1457 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1458 log_warnx("vm \"%s\" no permission to set cpus", name); 1459 return (EPERM); 1460 } 1461 1462 /* memory */ 1463 if (vcp->vcp_memranges[0].vmr_size == 0) 1464 vcp->vcp_memranges[0].vmr_size = 1465 vcpp->vcp_memranges[0].vmr_size; 1466 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1467 vcp->vcp_memranges[0].vmr_size != 1468 vcpp->vcp_memranges[0].vmr_size) { 1469 log_warnx("vm \"%s\" no permission to set memory", name); 1470 return (EPERM); 1471 } 1472 1473 /* disks cannot be inherited */ 1474 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1475 vcp->vcp_ndisks) { 1476 log_warnx("vm \"%s\" no permission to set disks", name); 1477 return (EPERM); 1478 } 1479 for (i = 0; i < vcp->vcp_ndisks; i++) { 1480 /* Check if this disk is already used in the parent */ 1481 for (j = 0; j < vcpp->vcp_ndisks; j++) { 1482 if (strcmp(vcp->vcp_disks[i], 1483 vcpp->vcp_disks[j]) == 0) { 1484 log_warnx("vm \"%s\" disk %s cannot be reused", 1485 name, vcp->vcp_disks[i]); 1486 return (EBUSY); 1487 } 1488 } 1489 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1490 } 1491 1492 /* interfaces */ 1493 if (vcp->vcp_nnics > 0 && 1494 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1495 vcp->vcp_nnics != vcpp->vcp_nnics) { 1496 log_warnx("vm \"%s\" no permission to set interfaces", name); 1497 return (EPERM); 1498 } 1499 for (i = 0; i < vcpp->vcp_nnics; i++) { 1500 /* Interface got overwritten */ 1501 if (i < vcp->vcp_nnics) 1502 continue; 1503 1504 /* Copy interface from parent */ 1505 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1506 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1507 sizeof(vmc->vmc_ifnames[i])); 1508 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1509 sizeof(vmc->vmc_ifswitch[i])); 1510 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1511 sizeof(vmc->vmc_ifgroup[i])); 1512 memcpy(vcp->vcp_macs[i], vcpp->vcp_macs[i], 1513 sizeof(vcp->vcp_macs[i])); 1514 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1515 vcp->vcp_nnics++; 1516 } 1517 for (i = 0; i < vcp->vcp_nnics; i++) { 1518 for (j = 0; j < vcpp->vcp_nnics; j++) { 1519 if (memcmp(zero_mac, vcp->vcp_macs[i], 1520 sizeof(vcp->vcp_macs[i])) != 0 && 1521 memcmp(vcpp->vcp_macs[i], vcp->vcp_macs[i], 1522 sizeof(vcp->vcp_macs[i])) != 0) { 1523 log_warnx("vm \"%s\" lladdr cannot be reused", 1524 name); 1525 return (EBUSY); 1526 } 1527 if (strlen(vmc->vmc_ifnames[i]) && 1528 strcmp(vmc->vmc_ifnames[i], 1529 vmcp->vmc_ifnames[j]) == 0) { 1530 log_warnx("vm \"%s\" %s cannot be reused", 1531 vmc->vmc_ifnames[i], name); 1532 return (EBUSY); 1533 } 1534 } 1535 } 1536 1537 /* kernel */ 1538 if (strlen(vcp->vcp_kernel) > 0) { 1539 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1540 log_warnx("vm \"%s\" no permission to set boot image", 1541 name); 1542 return (EPERM); 1543 } 1544 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1545 } else if (strlcpy(vcp->vcp_kernel, vcpp->vcp_kernel, 1546 sizeof(vcp->vcp_kernel)) >= sizeof(vcp->vcp_kernel)) { 1547 log_warnx("vm \"%s\" kernel name too long", name); 1548 return (EINVAL); 1549 } 1550 1551 /* cdrom */ 1552 if (strlen(vcp->vcp_cdrom) > 0) { 1553 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1554 log_warnx("vm \"%s\" no permission to set cdrom", name); 1555 return (EPERM); 1556 } 1557 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1558 } else if (strlcpy(vcp->vcp_cdrom, vcpp->vcp_cdrom, 1559 sizeof(vcp->vcp_cdrom)) >= sizeof(vcp->vcp_cdrom)) { 1560 log_warnx("vm \"%s\" cdrom name too long", name); 1561 return (EINVAL); 1562 } 1563 1564 /* user */ 1565 if (vmc->vmc_owner.uid == 0) 1566 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1567 else if (vmc->vmc_owner.uid != uid && 1568 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1569 log_warnx("vm \"%s\" user mismatch", name); 1570 return (EPERM); 1571 } 1572 1573 /* group */ 1574 if (vmc->vmc_owner.gid == 0) 1575 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1576 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1577 log_warnx("vm \"%s\" group mismatch", name); 1578 return (EPERM); 1579 } 1580 1581 /* child instances */ 1582 if (vmc->vmc_insflags) { 1583 log_warnx("vm \"%s\" cannot change instance permissions", name); 1584 return (EPERM); 1585 } 1586 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1587 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1588 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1589 vmc->vmc_insflags = vmcp->vmc_insflags; 1590 } else { 1591 vmc->vmc_insowner.gid = 0; 1592 vmc->vmc_insowner.uid = 0; 1593 vmc->vmc_insflags = 0; 1594 } 1595 1596 /* finished, remove instance flags */ 1597 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1598 1599 return (0); 1600 } 1601 1602 /* 1603 * vm_checkperm 1604 * 1605 * Checks if the user represented by the 'uid' parameter is allowed to 1606 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1607 * console.) 1608 * 1609 * Parameters: 1610 * vm: the VM whose permission is to be checked 1611 * vmo: the required uid/gid to be checked 1612 * uid: the user ID of the user making the request 1613 * 1614 * Return values: 1615 * 0: the permission should be granted 1616 * -1: the permission check failed (also returned if vm == null) 1617 */ 1618 int 1619 vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1620 { 1621 struct group *gr; 1622 struct passwd *pw; 1623 char **grmem; 1624 1625 /* root has no restrictions */ 1626 if (uid == 0) 1627 return (0); 1628 1629 if (vmo == NULL) 1630 return (-1); 1631 1632 /* check user */ 1633 if (vm == NULL) { 1634 if (vmo->uid == uid) 1635 return (0); 1636 } else { 1637 /* 1638 * check user of running vm (the owner of a running vm can 1639 * be different to (or more specific than) the configured owner. 1640 */ 1641 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1642 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1643 return (0); 1644 } 1645 1646 /* check groups */ 1647 if (vmo->gid != -1) { 1648 if ((pw = getpwuid(uid)) == NULL) 1649 return (-1); 1650 if (pw->pw_gid == vmo->gid) 1651 return (0); 1652 if ((gr = getgrgid(vmo->gid)) != NULL) { 1653 for (grmem = gr->gr_mem; *grmem; grmem++) 1654 if (strcmp(*grmem, pw->pw_name) == 0) 1655 return (0); 1656 } 1657 } 1658 1659 return (-1); 1660 } 1661 1662 /* 1663 * vm_checkinsflag 1664 * 1665 * Checks wheter the non-root user is allowed to set an instance option. 1666 * 1667 * Parameters: 1668 * vmc: the VM create parameters 1669 * flag: the flag to be checked 1670 * uid: the user ID of the user making the request 1671 * 1672 * Return values: 1673 * 0: the permission should be granted 1674 * -1: the permission check failed (also returned if vm == null) 1675 */ 1676 int 1677 vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1678 { 1679 /* root has no restrictions */ 1680 if (uid == 0) 1681 return (0); 1682 1683 if ((vmc->vmc_insflags & flag) == 0) 1684 return (-1); 1685 1686 return (0); 1687 } 1688 1689 /* 1690 * vm_checkaccess 1691 * 1692 * Checks if the user represented by the 'uid' parameter is allowed to 1693 * access the file described by the 'path' parameter. 1694 * 1695 * Parameters: 1696 * fd: the file descriptor of the opened file 1697 * uflag: check if the userid has access to the file 1698 * uid: the user ID of the user making the request 1699 * amode: the access flags of R_OK and W_OK 1700 * 1701 * Return values: 1702 * 0: the permission should be granted 1703 * -1: the permission check failed 1704 */ 1705 int 1706 vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1707 { 1708 struct group *gr; 1709 struct passwd *pw; 1710 char **grmem; 1711 struct stat st; 1712 mode_t mode; 1713 1714 if (fd == -1) 1715 return (-1); 1716 1717 /* 1718 * File has to be accessible and a regular file 1719 */ 1720 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1721 return (-1); 1722 1723 /* root has no restrictions */ 1724 if (uid == 0 || uflag == 0) 1725 return (0); 1726 1727 /* check other */ 1728 mode = amode & W_OK ? S_IWOTH : 0; 1729 mode |= amode & R_OK ? S_IROTH : 0; 1730 if ((st.st_mode & mode) == mode) 1731 return (0); 1732 1733 /* check user */ 1734 mode = amode & W_OK ? S_IWUSR : 0; 1735 mode |= amode & R_OK ? S_IRUSR : 0; 1736 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1737 return (0); 1738 1739 /* check groups */ 1740 mode = amode & W_OK ? S_IWGRP : 0; 1741 mode |= amode & R_OK ? S_IRGRP : 0; 1742 if ((st.st_mode & mode) != mode) 1743 return (-1); 1744 if ((pw = getpwuid(uid)) == NULL) 1745 return (-1); 1746 if (pw->pw_gid == st.st_gid) 1747 return (0); 1748 if ((gr = getgrgid(st.st_gid)) != NULL) { 1749 for (grmem = gr->gr_mem; *grmem; grmem++) 1750 if (strcmp(*grmem, pw->pw_name) == 0) 1751 return (0); 1752 } 1753 1754 return (-1); 1755 } 1756 1757 int 1758 vm_opentty(struct vmd_vm *vm) 1759 { 1760 struct ptmget ptm; 1761 struct stat st; 1762 struct group *gr; 1763 uid_t uid; 1764 gid_t gid; 1765 mode_t mode; 1766 int on; 1767 1768 /* 1769 * Open tty with pre-opened PTM fd 1770 */ 1771 if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) 1772 return (-1); 1773 1774 /* 1775 * We use user ioctl(2) mode to pass break commands. 1776 */ 1777 on = 1; 1778 if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1) 1779 fatal("could not enable user ioctl mode"); 1780 1781 vm->vm_tty = ptm.cfd; 1782 close(ptm.sfd); 1783 if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL) 1784 goto fail; 1785 1786 uid = vm->vm_uid; 1787 gid = vm->vm_params.vmc_owner.gid; 1788 1789 if (vm->vm_params.vmc_owner.gid != -1) { 1790 mode = 0660; 1791 } else if ((gr = getgrnam("tty")) != NULL) { 1792 gid = gr->gr_gid; 1793 mode = 0620; 1794 } else { 1795 mode = 0600; 1796 gid = 0; 1797 } 1798 1799 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1800 __func__, vm->vm_params.vmc_params.vcp_name, 1801 vm->vm_ttyname, uid, gid, mode); 1802 1803 /* 1804 * Change ownership and mode of the tty as required. 1805 * Loosely based on the implementation of sshpty.c 1806 */ 1807 if (stat(vm->vm_ttyname, &st) == -1) 1808 goto fail; 1809 1810 if (st.st_uid != uid || st.st_gid != gid) { 1811 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1812 log_warn("chown %s %d %d failed, uid %d", 1813 vm->vm_ttyname, uid, gid, getuid()); 1814 1815 /* Ignore failure on read-only filesystems */ 1816 if (!((errno == EROFS) && 1817 (st.st_uid == uid || st.st_uid == 0))) 1818 goto fail; 1819 } 1820 } 1821 1822 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1823 if (chmod(vm->vm_ttyname, mode) == -1) { 1824 log_warn("chmod %s %o failed, uid %d", 1825 vm->vm_ttyname, mode, getuid()); 1826 1827 /* Ignore failure on read-only filesystems */ 1828 if (!((errno == EROFS) && 1829 (st.st_uid == uid || st.st_uid == 0))) 1830 goto fail; 1831 } 1832 } 1833 1834 return (0); 1835 fail: 1836 vm_closetty(vm); 1837 return (-1); 1838 } 1839 1840 void 1841 vm_closetty(struct vmd_vm *vm) 1842 { 1843 if (vm->vm_tty != -1) { 1844 /* Release and close the tty */ 1845 if (fchown(vm->vm_tty, 0, 0) == -1) 1846 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1847 if (fchmod(vm->vm_tty, 0666) == -1) 1848 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1849 close(vm->vm_tty); 1850 vm->vm_tty = -1; 1851 } 1852 free(vm->vm_ttyname); 1853 vm->vm_ttyname = NULL; 1854 } 1855 1856 void 1857 switch_remove(struct vmd_switch *vsw) 1858 { 1859 if (vsw == NULL) 1860 return; 1861 1862 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1863 1864 free(vsw->sw_group); 1865 free(vsw->sw_name); 1866 free(vsw); 1867 } 1868 1869 struct vmd_switch * 1870 switch_getbyname(const char *name) 1871 { 1872 struct vmd_switch *vsw; 1873 1874 if (name == NULL) 1875 return (NULL); 1876 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1877 if (strcmp(vsw->sw_name, name) == 0) 1878 return (vsw); 1879 } 1880 1881 return (NULL); 1882 } 1883 1884 struct vmd_user * 1885 user_get(uid_t uid) 1886 { 1887 struct vmd_user *usr; 1888 1889 if (uid == 0) 1890 return (NULL); 1891 1892 /* first try to find an existing user */ 1893 TAILQ_FOREACH(usr, env->vmd_users, usr_entry) { 1894 if (usr->usr_id.uid == uid) 1895 goto done; 1896 } 1897 1898 if ((usr = calloc(1, sizeof(*usr))) == NULL) { 1899 log_warn("could not allocate user"); 1900 return (NULL); 1901 } 1902 1903 usr->usr_id.uid = uid; 1904 usr->usr_id.gid = -1; 1905 TAILQ_INSERT_TAIL(env->vmd_users, usr, usr_entry); 1906 1907 done: 1908 DPRINTF("%s: uid %d #%d +", 1909 __func__, usr->usr_id.uid, usr->usr_refcnt + 1); 1910 usr->usr_refcnt++; 1911 1912 return (usr); 1913 } 1914 1915 void 1916 user_put(struct vmd_user *usr) 1917 { 1918 if (usr == NULL) 1919 return; 1920 1921 DPRINTF("%s: uid %d #%d -", 1922 __func__, usr->usr_id.uid, usr->usr_refcnt - 1); 1923 1924 if (--usr->usr_refcnt > 0) 1925 return; 1926 1927 TAILQ_REMOVE(env->vmd_users, usr, usr_entry); 1928 free(usr); 1929 } 1930 1931 void 1932 user_inc(struct vm_create_params *vcp, struct vmd_user *usr, int inc) 1933 { 1934 char mem[FMT_SCALED_STRSIZE]; 1935 1936 if (usr == NULL) 1937 return; 1938 1939 /* increment or decrement counters */ 1940 inc = inc ? 1 : -1; 1941 1942 usr->usr_maxcpu += vcp->vcp_ncpus * inc; 1943 usr->usr_maxmem += vcp->vcp_memranges[0].vmr_size * inc; 1944 usr->usr_maxifs += vcp->vcp_nnics * inc; 1945 1946 if (log_getverbose() > 1) { 1947 (void)fmt_scaled(usr->usr_maxmem * 1024 * 1024, mem); 1948 log_debug("%s: %c uid %d ref %d cpu %llu mem %s ifs %llu", 1949 __func__, inc == 1 ? '+' : '-', 1950 usr->usr_id.uid, usr->usr_refcnt, 1951 usr->usr_maxcpu, mem, usr->usr_maxifs); 1952 } 1953 } 1954 1955 int 1956 user_checklimit(struct vmd_user *usr, struct vm_create_params *vcp) 1957 { 1958 const char *limit = ""; 1959 1960 /* XXX make the limits configurable */ 1961 if (usr->usr_maxcpu > VM_DEFAULT_USER_MAXCPU) { 1962 limit = "cpu "; 1963 goto fail; 1964 } 1965 if (usr->usr_maxmem > VM_DEFAULT_USER_MAXMEM) { 1966 limit = "memory "; 1967 goto fail; 1968 } 1969 if (usr->usr_maxifs > VM_DEFAULT_USER_MAXIFS) { 1970 limit = "interface "; 1971 goto fail; 1972 } 1973 1974 return (0); 1975 1976 fail: 1977 log_warnx("%s: user %d %slimit reached", vcp->vcp_name, 1978 usr->usr_id.uid, limit); 1979 return (-1); 1980 } 1981 1982 char * 1983 get_string(uint8_t *ptr, size_t len) 1984 { 1985 size_t i; 1986 1987 for (i = 0; i < len; i++) 1988 if (!isprint(ptr[i])) 1989 break; 1990 1991 return strndup(ptr, i); 1992 } 1993 1994 uint32_t 1995 prefixlen2mask(uint8_t prefixlen) 1996 { 1997 if (prefixlen == 0) 1998 return (0); 1999 2000 if (prefixlen > 32) 2001 prefixlen = 32; 2002 2003 return (htonl(0xffffffff << (32 - prefixlen))); 2004 } 2005 2006 void 2007 prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 2008 { 2009 struct in6_addr s6; 2010 int i; 2011 2012 if (prefixlen > 128) 2013 prefixlen = 128; 2014 2015 memset(&s6, 0, sizeof(s6)); 2016 for (i = 0; i < prefixlen / 8; i++) 2017 s6.s6_addr[i] = 0xff; 2018 i = prefixlen % 8; 2019 if (i) 2020 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 2021 2022 memcpy(mask, &s6, sizeof(s6)); 2023 } 2024 2025 void 2026 getmonotime(struct timeval *tv) 2027 { 2028 struct timespec ts; 2029 2030 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 2031 fatal("clock_gettime"); 2032 2033 TIMESPEC_TO_TIMEVAL(tv, &ts); 2034 } 2035