1 /* $OpenBSD: vmd.c,v 1.125 2021/05/05 21:33:11 dv Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* nitems */ 20 #include <sys/queue.h> 21 #include <sys/wait.h> 22 #include <sys/cdefs.h> 23 #include <sys/stat.h> 24 #include <sys/sysctl.h> 25 #include <sys/tty.h> 26 #include <sys/ttycom.h> 27 #include <sys/ioctl.h> 28 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <termios.h> 33 #include <errno.h> 34 #include <event.h> 35 #include <fcntl.h> 36 #include <pwd.h> 37 #include <signal.h> 38 #include <syslog.h> 39 #include <unistd.h> 40 #include <util.h> 41 #include <ctype.h> 42 #include <pwd.h> 43 #include <grp.h> 44 45 #include <machine/specialreg.h> 46 #include <machine/vmmvar.h> 47 48 #include "proc.h" 49 #include "atomicio.h" 50 #include "vmd.h" 51 52 __dead void usage(void); 53 54 int main(int, char **); 55 int vmd_configure(void); 56 void vmd_sighdlr(int sig, short event, void *arg); 57 void vmd_shutdown(void); 58 int vmd_control_run(void); 59 int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 60 int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 61 int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); 62 int vmd_check_vmh(struct vm_dump_header *); 63 64 int vm_instance(struct privsep *, struct vmd_vm **, 65 struct vmop_create_params *, uid_t); 66 int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 67 int vm_claimid(const char *, int, uint32_t *); 68 void start_vm_batch(int, short, void*); 69 70 struct vmd *env; 71 72 static struct privsep_proc procs[] = { 73 /* Keep "priv" on top as procs[0] */ 74 { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, 75 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 76 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown }, 77 }; 78 79 enum privsep_procid privsep_process; 80 81 struct event staggered_start_timer; 82 83 /* For the privileged process */ 84 static struct privsep_proc *proc_priv = &procs[0]; 85 static struct passwd proc_privpw; 86 static const uint8_t zero_mac[ETHER_ADDR_LEN]; 87 88 int 89 vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 90 { 91 struct privsep *ps = p->p_ps; 92 int res = 0, ret = 0, cmd = 0, verbose; 93 unsigned int v = 0, flags; 94 struct vmop_create_params vmc; 95 struct vmop_id vid; 96 struct vmop_result vmr; 97 struct vm_dump_header vmh; 98 struct vmd_vm *vm = NULL; 99 char *str = NULL; 100 uint32_t id = 0; 101 struct control_sock *rcs; 102 103 switch (imsg->hdr.type) { 104 case IMSG_VMDOP_START_VM_REQUEST: 105 IMSG_SIZE_CHECK(imsg, &vmc); 106 memcpy(&vmc, imsg->data, sizeof(vmc)); 107 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 108 if (vmc.vmc_flags == 0) { 109 /* start an existing VM with pre-configured options */ 110 if (!(ret == -1 && errno == EALREADY && 111 !(vm->vm_state & VM_STATE_RUNNING))) { 112 res = errno; 113 cmd = IMSG_VMDOP_START_VM_RESPONSE; 114 } 115 } else if (ret != 0) { 116 res = errno; 117 cmd = IMSG_VMDOP_START_VM_RESPONSE; 118 } 119 if (res == 0 && 120 config_setvm(ps, vm, 121 imsg->hdr.peerid, vm->vm_params.vmc_owner.uid) == -1) { 122 res = errno; 123 cmd = IMSG_VMDOP_START_VM_RESPONSE; 124 } 125 break; 126 case IMSG_VMDOP_WAIT_VM_REQUEST: 127 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 128 IMSG_SIZE_CHECK(imsg, &vid); 129 memcpy(&vid, imsg->data, sizeof(vid)); 130 flags = vid.vid_flags; 131 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 132 133 if ((id = vid.vid_id) == 0) { 134 /* Lookup vm (id) by name */ 135 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 136 res = ENOENT; 137 break; 138 } else if ((vm->vm_state & VM_STATE_SHUTDOWN) && 139 (flags & VMOP_FORCE) == 0) { 140 res = EALREADY; 141 break; 142 } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 143 res = EINVAL; 144 break; 145 } 146 id = vm->vm_vmid; 147 } else if ((vm = vm_getbyvmid(id)) == NULL) { 148 res = ENOENT; 149 break; 150 } 151 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { 152 res = EPERM; 153 break; 154 } 155 156 /* Only relay TERMINATION requests, not WAIT requests */ 157 if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { 158 memset(&vid, 0, sizeof(vid)); 159 vid.vid_id = id; 160 vid.vid_flags = flags; 161 162 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 163 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 164 return (-1); 165 } 166 break; 167 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 168 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 169 break; 170 case IMSG_VMDOP_LOAD: 171 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 172 str = get_string((uint8_t *)imsg->data, 173 IMSG_DATA_SIZE(imsg)); 174 case IMSG_VMDOP_RELOAD: 175 if (vmd_reload(0, str) == -1) 176 cmd = IMSG_CTL_FAIL; 177 else 178 cmd = IMSG_CTL_OK; 179 free(str); 180 break; 181 case IMSG_CTL_RESET: 182 IMSG_SIZE_CHECK(imsg, &v); 183 memcpy(&v, imsg->data, sizeof(v)); 184 if (vmd_reload(v, NULL) == -1) 185 cmd = IMSG_CTL_FAIL; 186 else 187 cmd = IMSG_CTL_OK; 188 break; 189 case IMSG_CTL_VERBOSE: 190 IMSG_SIZE_CHECK(imsg, &verbose); 191 memcpy(&verbose, imsg->data, sizeof(verbose)); 192 log_setverbose(verbose); 193 194 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 195 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 196 cmd = IMSG_CTL_OK; 197 break; 198 case IMSG_VMDOP_PAUSE_VM: 199 case IMSG_VMDOP_UNPAUSE_VM: 200 IMSG_SIZE_CHECK(imsg, &vid); 201 memcpy(&vid, imsg->data, sizeof(vid)); 202 if (vid.vid_id == 0) { 203 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 204 res = ENOENT; 205 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 206 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 207 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 208 break; 209 } else { 210 vid.vid_id = vm->vm_vmid; 211 } 212 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 213 res = ENOENT; 214 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 215 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 216 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 217 break; 218 } 219 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 220 vid.vid_uid) != 0) { 221 res = EPERM; 222 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM 223 ? IMSG_VMDOP_PAUSE_VM_RESPONSE 224 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 225 break; 226 } 227 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 228 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 229 break; 230 case IMSG_VMDOP_SEND_VM_REQUEST: 231 IMSG_SIZE_CHECK(imsg, &vid); 232 memcpy(&vid, imsg->data, sizeof(vid)); 233 id = vid.vid_id; 234 if (vid.vid_id == 0) { 235 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 236 res = ENOENT; 237 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 238 close(imsg->fd); 239 break; 240 } else { 241 vid.vid_id = vm->vm_vmid; 242 } 243 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 244 res = ENOENT; 245 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 246 close(imsg->fd); 247 break; 248 } 249 vmr.vmr_id = vid.vid_id; 250 log_debug("%s: sending fd to vmm", __func__); 251 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 252 imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid)); 253 break; 254 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 255 IMSG_SIZE_CHECK(imsg, &vid); 256 memcpy(&vid, imsg->data, sizeof(vid)); 257 if (imsg->fd == -1) { 258 log_warnx("%s: invalid fd", __func__); 259 return (-1); 260 } 261 if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) != 262 sizeof(vmh)) { 263 log_warnx("%s: error reading vmh from received vm", 264 __func__); 265 res = EIO; 266 close(imsg->fd); 267 cmd = IMSG_VMDOP_START_VM_RESPONSE; 268 break; 269 } 270 271 if (vmd_check_vmh(&vmh)) { 272 res = ENOENT; 273 close(imsg->fd); 274 cmd = IMSG_VMDOP_START_VM_RESPONSE; 275 break; 276 } 277 if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) != 278 sizeof(vmc)) { 279 log_warnx("%s: error reading vmc from received vm", 280 __func__); 281 res = EIO; 282 close(imsg->fd); 283 cmd = IMSG_VMDOP_START_VM_RESPONSE; 284 break; 285 } 286 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 287 sizeof(vmc.vmc_params.vcp_name)); 288 vmc.vmc_params.vcp_id = 0; 289 290 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 291 if (ret != 0) { 292 res = errno; 293 cmd = IMSG_VMDOP_START_VM_RESPONSE; 294 close(imsg->fd); 295 } else { 296 vm->vm_state |= VM_STATE_RECEIVED; 297 config_setvm(ps, vm, imsg->hdr.peerid, 298 vmc.vmc_owner.uid); 299 log_debug("%s: sending fd to vmm", __func__); 300 proc_compose_imsg(ps, PROC_VMM, -1, 301 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd, 302 NULL, 0); 303 } 304 break; 305 case IMSG_VMDOP_DONE: 306 control_reset(&ps->ps_csock); 307 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 308 control_reset(rcs); 309 cmd = 0; 310 break; 311 default: 312 return (-1); 313 } 314 315 switch (cmd) { 316 case 0: 317 break; 318 case IMSG_VMDOP_START_VM_RESPONSE: 319 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 320 memset(&vmr, 0, sizeof(vmr)); 321 vmr.vmr_result = res; 322 vmr.vmr_id = id; 323 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 324 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 325 return (-1); 326 break; 327 default: 328 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 329 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 330 return (-1); 331 break; 332 } 333 334 return (0); 335 } 336 337 int 338 vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 339 { 340 struct vmop_result vmr; 341 struct privsep *ps = p->p_ps; 342 int res = 0; 343 struct vmd_vm *vm; 344 struct vm_create_params *vcp; 345 struct vmop_info_result vir; 346 347 switch (imsg->hdr.type) { 348 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 349 IMSG_SIZE_CHECK(imsg, &vmr); 350 memcpy(&vmr, imsg->data, sizeof(vmr)); 351 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 352 break; 353 proc_compose_imsg(ps, PROC_CONTROL, -1, 354 imsg->hdr.type, imsg->hdr.peerid, -1, 355 imsg->data, sizeof(imsg->data)); 356 log_info("%s: paused vm %d successfully", 357 vm->vm_params.vmc_params.vcp_name, 358 vm->vm_vmid); 359 vm->vm_state |= VM_STATE_PAUSED; 360 break; 361 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 362 IMSG_SIZE_CHECK(imsg, &vmr); 363 memcpy(&vmr, imsg->data, sizeof(vmr)); 364 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 365 break; 366 proc_compose_imsg(ps, PROC_CONTROL, -1, 367 imsg->hdr.type, imsg->hdr.peerid, -1, 368 imsg->data, sizeof(imsg->data)); 369 log_info("%s: unpaused vm %d successfully.", 370 vm->vm_params.vmc_params.vcp_name, 371 vm->vm_vmid); 372 vm->vm_state &= ~VM_STATE_PAUSED; 373 break; 374 case IMSG_VMDOP_START_VM_RESPONSE: 375 IMSG_SIZE_CHECK(imsg, &vmr); 376 memcpy(&vmr, imsg->data, sizeof(vmr)); 377 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 378 break; 379 vm->vm_pid = vmr.vmr_pid; 380 vcp = &vm->vm_params.vmc_params; 381 vcp->vcp_id = vmr.vmr_id; 382 383 /* 384 * If the peerid is not -1, forward the response back to the 385 * the control socket. If it is -1, the request originated 386 * from the parent, not the control socket. 387 */ 388 if (vm->vm_peerid != (uint32_t)-1) { 389 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 390 sizeof(vmr.vmr_ttyname)); 391 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 392 imsg->hdr.type, vm->vm_peerid, -1, 393 &vmr, sizeof(vmr)) == -1) { 394 errno = vmr.vmr_result; 395 log_warn("%s: failed to foward vm result", 396 vcp->vcp_name); 397 vm_remove(vm, __func__); 398 return (-1); 399 } 400 } 401 402 if (vmr.vmr_result) { 403 errno = vmr.vmr_result; 404 log_warn("%s: failed to start vm", vcp->vcp_name); 405 vm_remove(vm, __func__); 406 break; 407 } 408 409 /* Now configure all the interfaces */ 410 if (vm_priv_ifconfig(ps, vm) == -1) { 411 log_warn("%s: failed to configure vm", vcp->vcp_name); 412 vm_remove(vm, __func__); 413 break; 414 } 415 416 log_info("%s: started vm %d successfully, tty %s", 417 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 418 break; 419 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 420 IMSG_SIZE_CHECK(imsg, &vmr); 421 memcpy(&vmr, imsg->data, sizeof(vmr)); 422 423 if (vmr.vmr_result) { 424 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 425 __func__, vmr.vmr_id); 426 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 427 } else { 428 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 429 break; 430 /* Mark VM as shutting down */ 431 vm->vm_state |= VM_STATE_SHUTDOWN; 432 } 433 break; 434 case IMSG_VMDOP_SEND_VM_RESPONSE: 435 IMSG_SIZE_CHECK(imsg, &vmr); 436 memcpy(&vmr, imsg->data, sizeof(vmr)); 437 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 438 break; 439 if (!vmr.vmr_result) { 440 log_info("%s: sent vm %d successfully.", 441 vm->vm_params.vmc_params.vcp_name, 442 vm->vm_vmid); 443 if (vm->vm_from_config) 444 vm_stop(vm, 0, __func__); 445 else 446 vm_remove(vm, __func__); 447 } 448 449 /* Send a response if a control client is waiting for it */ 450 if (imsg->hdr.peerid != (uint32_t)-1) { 451 /* the error is meaningless for deferred responses */ 452 vmr.vmr_result = 0; 453 454 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 455 IMSG_VMDOP_SEND_VM_RESPONSE, 456 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 457 return (-1); 458 } 459 break; 460 case IMSG_VMDOP_TERMINATE_VM_EVENT: 461 IMSG_SIZE_CHECK(imsg, &vmr); 462 memcpy(&vmr, imsg->data, sizeof(vmr)); 463 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 464 __func__, vmr.vmr_id, vmr.vmr_result); 465 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 466 log_debug("%s: vm %d is no longer available", 467 __func__, vmr.vmr_id); 468 break; 469 } 470 if (vmr.vmr_result != EAGAIN || 471 vm->vm_params.vmc_bootdevice) { 472 if (vm->vm_from_config) 473 vm_stop(vm, 0, __func__); 474 else 475 vm_remove(vm, __func__); 476 } else { 477 /* Stop VM instance but keep the tty open */ 478 vm_stop(vm, 1, __func__); 479 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 480 } 481 482 /* The error is meaningless for deferred responses */ 483 vmr.vmr_result = 0; 484 485 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 486 IMSG_VMDOP_TERMINATE_VM_EVENT, 487 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 488 return (-1); 489 break; 490 case IMSG_VMDOP_GET_INFO_VM_DATA: 491 IMSG_SIZE_CHECK(imsg, &vir); 492 memcpy(&vir, imsg->data, sizeof(vir)); 493 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 494 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 495 if (vm->vm_ttyname != NULL) 496 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 497 sizeof(vir.vir_ttyname)); 498 log_debug("%s: running vm: %d, vm_state: 0x%x", 499 __func__, vm->vm_vmid, vm->vm_state); 500 vir.vir_state = vm->vm_state; 501 /* get the user id who started the vm */ 502 vir.vir_uid = vm->vm_uid; 503 vir.vir_gid = vm->vm_params.vmc_owner.gid; 504 } 505 if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type, 506 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 507 log_debug("%s: GET_INFO_VM failed for vm %d, removing", 508 __func__, vm->vm_vmid); 509 vm_remove(vm, __func__); 510 return (-1); 511 } 512 break; 513 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 514 /* 515 * PROC_VMM has responded with the *running* VMs, now we 516 * append the others. These use the special value 0 for their 517 * kernel id to indicate that they are not running. 518 */ 519 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 520 if (!(vm->vm_state & VM_STATE_RUNNING)) { 521 memset(&vir, 0, sizeof(vir)); 522 vir.vir_info.vir_id = vm->vm_vmid; 523 strlcpy(vir.vir_info.vir_name, 524 vm->vm_params.vmc_params.vcp_name, 525 VMM_MAX_NAME_LEN); 526 vir.vir_info.vir_memory_size = 527 vm->vm_params.vmc_params. 528 vcp_memranges[0].vmr_size; 529 vir.vir_info.vir_ncpus = 530 vm->vm_params.vmc_params.vcp_ncpus; 531 /* get the configured user id for this vm */ 532 vir.vir_uid = vm->vm_params.vmc_owner.uid; 533 vir.vir_gid = vm->vm_params.vmc_owner.gid; 534 log_debug("%s: vm: %d, vm_state: 0x%x", 535 __func__, vm->vm_vmid, vm->vm_state); 536 vir.vir_state = vm->vm_state; 537 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 538 IMSG_VMDOP_GET_INFO_VM_DATA, 539 imsg->hdr.peerid, -1, &vir, 540 sizeof(vir)) == -1) { 541 log_debug("%s: GET_INFO_VM_END failed", 542 __func__); 543 vm_remove(vm, __func__); 544 return (-1); 545 } 546 } 547 } 548 IMSG_SIZE_CHECK(imsg, &res); 549 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 550 break; 551 default: 552 return (-1); 553 } 554 555 return (0); 556 } 557 558 int 559 vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) 560 { 561 struct vmop_addr_result var; 562 563 switch (imsg->hdr.type) { 564 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 565 IMSG_SIZE_CHECK(imsg, &var); 566 memcpy(&var, imsg->data, sizeof(var)); 567 proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); 568 break; 569 default: 570 return (-1); 571 } 572 573 return (0); 574 } 575 576 int 577 vmd_check_vmh(struct vm_dump_header *vmh) 578 { 579 int i; 580 unsigned int code, leaf; 581 unsigned int a, b, c, d; 582 583 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 584 log_warnx("%s: incompatible dump signature", __func__); 585 return (-1); 586 } 587 588 if (vmh->vmh_version != VM_DUMP_VERSION) { 589 log_warnx("%s: incompatible dump version", __func__); 590 return (-1); 591 } 592 593 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 594 code = vmh->vmh_cpuids[i].code; 595 leaf = vmh->vmh_cpuids[i].leaf; 596 if (leaf != 0x00) { 597 log_debug("%s: invalid leaf 0x%x for code 0x%x", 598 __func__, leaf, code); 599 return (-1); 600 } 601 602 switch (code) { 603 case 0x00: 604 CPUID_LEAF(code, leaf, a, b, c, d); 605 if (vmh->vmh_cpuids[i].a > a) { 606 log_debug("%s: incompatible cpuid level", 607 __func__); 608 return (-1); 609 } 610 if (!(vmh->vmh_cpuids[i].b == b && 611 vmh->vmh_cpuids[i].c == c && 612 vmh->vmh_cpuids[i].d == d)) { 613 log_debug("%s: incompatible cpu brand", 614 __func__); 615 return (-1); 616 } 617 break; 618 619 case 0x01: 620 CPUID_LEAF(code, leaf, a, b, c, d); 621 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 622 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 623 log_debug("%s: incompatible cpu features " 624 "code: 0x%x leaf: 0x%x reg: c", __func__, 625 code, leaf); 626 return (-1); 627 } 628 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 629 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 630 log_debug("%s: incompatible cpu features " 631 "code: 0x%x leaf: 0x%x reg: d", __func__, 632 code, leaf); 633 return (-1); 634 } 635 break; 636 637 case 0x07: 638 CPUID_LEAF(code, leaf, a, b, c, d); 639 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 640 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 641 log_debug("%s: incompatible cpu features " 642 "code: 0x%x leaf: 0x%x reg: c", __func__, 643 code, leaf); 644 return (-1); 645 } 646 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 647 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 648 log_debug("%s: incompatible cpu features " 649 "code: 0x%x leaf: 0x%x reg: d", __func__, 650 code, leaf); 651 return (-1); 652 } 653 break; 654 655 case 0x0d: 656 CPUID_LEAF(code, leaf, a, b, c, d); 657 if (vmh->vmh_cpuids[i].b > b) { 658 log_debug("%s: incompatible cpu: insufficient " 659 "max save area for enabled XCR0 features", 660 __func__); 661 return (-1); 662 } 663 if (vmh->vmh_cpuids[i].c > c) { 664 log_debug("%s: incompatible cpu: insufficient " 665 "max save area for supported XCR0 features", 666 __func__); 667 return (-1); 668 } 669 break; 670 671 case 0x80000001: 672 CPUID_LEAF(code, leaf, a, b, c, d); 673 if ((vmh->vmh_cpuids[i].a & a) != 674 vmh->vmh_cpuids[i].a) { 675 log_debug("%s: incompatible cpu features " 676 "code: 0x%x leaf: 0x%x reg: a", __func__, 677 code, leaf); 678 return (-1); 679 } 680 if ((vmh->vmh_cpuids[i].c & c) != 681 vmh->vmh_cpuids[i].c) { 682 log_debug("%s: incompatible cpu features " 683 "code: 0x%x leaf: 0x%x reg: c", __func__, 684 code, leaf); 685 return (-1); 686 } 687 if ((vmh->vmh_cpuids[i].d & d) != 688 vmh->vmh_cpuids[i].d) { 689 log_debug("%s: incompatible cpu features " 690 "code: 0x%x leaf: 0x%x reg: d", __func__, 691 code, leaf); 692 return (-1); 693 } 694 break; 695 696 default: 697 log_debug("%s: unknown code 0x%x", __func__, code); 698 return (-1); 699 } 700 } 701 702 return (0); 703 } 704 705 void 706 vmd_sighdlr(int sig, short event, void *arg) 707 { 708 if (privsep_process != PROC_PARENT) 709 return; 710 log_debug("%s: handling signal", __func__); 711 712 switch (sig) { 713 case SIGHUP: 714 log_info("%s: reload requested with SIGHUP", __func__); 715 716 /* 717 * This is safe because libevent uses async signal handlers 718 * that run in the event loop and not in signal context. 719 */ 720 (void)vmd_reload(0, NULL); 721 break; 722 case SIGPIPE: 723 log_info("%s: ignoring SIGPIPE", __func__); 724 break; 725 case SIGUSR1: 726 log_info("%s: ignoring SIGUSR1", __func__); 727 break; 728 case SIGTERM: 729 case SIGINT: 730 vmd_shutdown(); 731 break; 732 default: 733 fatalx("unexpected signal"); 734 } 735 } 736 737 __dead void 738 usage(void) 739 { 740 extern char *__progname; 741 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 742 __progname); 743 exit(1); 744 } 745 746 int 747 main(int argc, char **argv) 748 { 749 struct privsep *ps; 750 int ch; 751 const char *conffile = VMD_CONF; 752 enum privsep_procid proc_id = PROC_PARENT; 753 int proc_instance = 0; 754 const char *errp, *title = NULL; 755 int argc0 = argc; 756 757 log_init(0, LOG_DAEMON); 758 759 if ((env = calloc(1, sizeof(*env))) == NULL) 760 fatal("calloc: env"); 761 762 while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) { 763 switch (ch) { 764 case 'D': 765 if (cmdline_symset(optarg) < 0) 766 log_warnx("could not parse macro definition %s", 767 optarg); 768 break; 769 case 'd': 770 env->vmd_debug = 2; 771 break; 772 case 'f': 773 conffile = optarg; 774 break; 775 case 'v': 776 env->vmd_verbose++; 777 break; 778 case 'n': 779 env->vmd_noaction = 1; 780 break; 781 case 'P': 782 title = optarg; 783 proc_id = proc_getid(procs, nitems(procs), title); 784 if (proc_id == PROC_MAX) 785 fatalx("invalid process name"); 786 break; 787 case 'I': 788 proc_instance = strtonum(optarg, 0, 789 PROC_MAX_INSTANCES, &errp); 790 if (errp) 791 fatalx("invalid process instance"); 792 break; 793 default: 794 usage(); 795 } 796 } 797 798 argc -= optind; 799 if (argc > 0) 800 usage(); 801 802 if (env->vmd_noaction && !env->vmd_debug) 803 env->vmd_debug = 1; 804 805 log_init(env->vmd_debug, LOG_DAEMON); 806 log_setverbose(env->vmd_verbose); 807 808 /* check for root privileges */ 809 if (env->vmd_noaction == 0) { 810 if (geteuid()) 811 fatalx("need root privileges"); 812 } 813 814 ps = &env->vmd_ps; 815 ps->ps_env = env; 816 env->vmd_fd = -1; 817 818 if (config_init(env) == -1) 819 fatal("failed to initialize configuration"); 820 821 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 822 fatal("unknown user %s", VMD_USER); 823 824 /* First proc runs as root without pledge but in default chroot */ 825 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 826 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 827 828 /* Open /dev/vmm */ 829 if (env->vmd_noaction == 0) { 830 env->vmd_fd = open(VMM_NODE, O_RDWR); 831 if (env->vmd_fd == -1) 832 fatal("%s", VMM_NODE); 833 } 834 835 /* Configure the control socket */ 836 ps->ps_csock.cs_name = SOCKET_NAME; 837 TAILQ_INIT(&ps->ps_rcsocks); 838 839 /* Configuration will be parsed after forking the children */ 840 env->vmd_conffile = conffile; 841 842 if (env->vmd_noaction) 843 ps->ps_noaction = 1; 844 ps->ps_instance = proc_instance; 845 if (title != NULL) 846 ps->ps_title[proc_id] = title; 847 848 /* only the parent returns */ 849 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 850 proc_id); 851 852 log_procinit("parent"); 853 if (!env->vmd_debug && daemon(0, 0) == -1) 854 fatal("can't daemonize"); 855 856 if (ps->ps_noaction == 0) 857 log_info("startup"); 858 859 event_init(); 860 861 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 862 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 863 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 864 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 865 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 866 867 signal_add(&ps->ps_evsigint, NULL); 868 signal_add(&ps->ps_evsigterm, NULL); 869 signal_add(&ps->ps_evsighup, NULL); 870 signal_add(&ps->ps_evsigpipe, NULL); 871 signal_add(&ps->ps_evsigusr1, NULL); 872 873 if (!env->vmd_noaction) 874 proc_connect(ps); 875 876 if (vmd_configure() == -1) 877 fatalx("configuration failed"); 878 879 event_dispatch(); 880 881 log_debug("parent exiting"); 882 883 return (0); 884 } 885 886 void 887 start_vm_batch(int fd, short type, void *args) 888 { 889 int i = 0; 890 struct vmd_vm *vm; 891 892 log_debug("%s: starting batch of %d vms", __func__, 893 env->vmd_cfg.parallelism); 894 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 895 if (!(vm->vm_state & VM_STATE_WAITING)) { 896 log_debug("%s: not starting vm %s (disabled)", 897 __func__, 898 vm->vm_params.vmc_params.vcp_name); 899 continue; 900 } 901 i++; 902 if (i > env->vmd_cfg.parallelism) { 903 evtimer_add(&staggered_start_timer, 904 &env->vmd_cfg.delay); 905 break; 906 } 907 vm->vm_state &= ~VM_STATE_WAITING; 908 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 909 } 910 log_debug("%s: done starting vms", __func__); 911 } 912 913 int 914 vmd_configure(void) 915 { 916 int ncpus; 917 struct vmd_switch *vsw; 918 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 919 size_t ncpus_sz = sizeof(ncpus); 920 921 if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) 922 fatal("open %s", PATH_PTMDEV); 923 924 /* 925 * pledge in the parent process: 926 * stdio - for malloc and basic I/O including events. 927 * rpath - for reload to open and read the configuration files. 928 * wpath - for opening disk images and tap devices. 929 * tty - for openpty and TIOCUCNTL. 930 * proc - run kill to terminate its children safely. 931 * sendfd - for disks, interfaces and other fds. 932 * recvfd - for send and receive. 933 * getpw - lookup user or group id by name. 934 * chown, fattr - change tty ownership 935 * flock - locking disk files 936 */ 937 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 938 " chown fattr flock", NULL) == -1) 939 fatal("pledge"); 940 941 if (parse_config(env->vmd_conffile) == -1) { 942 proc_kill(&env->vmd_ps); 943 exit(1); 944 } 945 946 if (env->vmd_noaction) { 947 fprintf(stderr, "configuration OK\n"); 948 proc_kill(&env->vmd_ps); 949 exit(0); 950 } 951 952 /* Send shared global configuration to all children */ 953 if (config_setconfig(env) == -1) 954 return (-1); 955 956 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 957 if (vsw->sw_running) 958 continue; 959 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 960 log_warn("%s: failed to create switch %s", 961 __func__, vsw->sw_name); 962 switch_remove(vsw); 963 return (-1); 964 } 965 } 966 967 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 968 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 969 if (sysctl(ncpu_mib, NELEM(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 970 ncpus = 1; 971 env->vmd_cfg.parallelism = ncpus; 972 log_debug("%s: setting staggered start configuration to " 973 "parallelism: %d and delay: %lld", 974 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 975 } 976 977 log_debug("%s: starting vms in staggered fashion", __func__); 978 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 979 /* start first batch */ 980 start_vm_batch(0, 0, NULL); 981 982 return (0); 983 } 984 985 int 986 vmd_reload(unsigned int reset, const char *filename) 987 { 988 struct vmd_vm *vm, *next_vm; 989 struct vmd_switch *vsw; 990 int reload = 0; 991 992 /* Switch back to the default config file */ 993 if (filename == NULL || *filename == '\0') { 994 filename = env->vmd_conffile; 995 reload = 1; 996 } 997 998 log_debug("%s: level %d config file %s", __func__, reset, filename); 999 1000 if (reset) { 1001 /* Purge the configuration */ 1002 config_purge(env, reset); 1003 config_setreset(env, reset); 1004 } else { 1005 /* 1006 * Load or reload the configuration. 1007 * 1008 * Reloading removes all non-running VMs before processing the 1009 * config file, whereas loading only adds to the existing list 1010 * of VMs. 1011 */ 1012 1013 if (reload) { 1014 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 1015 next_vm) { 1016 if (!(vm->vm_state & VM_STATE_RUNNING)) { 1017 DPRINTF("%s: calling vm_remove", 1018 __func__); 1019 vm_remove(vm, __func__); 1020 } 1021 } 1022 } 1023 1024 if (parse_config(filename) == -1) { 1025 log_debug("%s: failed to load config file %s", 1026 __func__, filename); 1027 return (-1); 1028 } 1029 1030 if (reload) { 1031 /* Update shared global configuration in all children */ 1032 if (config_setconfig(env) == -1) 1033 return (-1); 1034 } 1035 1036 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1037 if (vsw->sw_running) 1038 continue; 1039 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1040 log_warn("%s: failed to create switch %s", 1041 __func__, vsw->sw_name); 1042 switch_remove(vsw); 1043 return (-1); 1044 } 1045 } 1046 1047 log_debug("%s: starting vms in staggered fashion", __func__); 1048 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1049 /* start first batch */ 1050 start_vm_batch(0, 0, NULL); 1051 1052 } 1053 1054 return (0); 1055 } 1056 1057 void 1058 vmd_shutdown(void) 1059 { 1060 struct vmd_vm *vm, *vm_next; 1061 1062 log_debug("%s: performing shutdown", __func__); 1063 1064 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1065 vm_remove(vm, __func__); 1066 } 1067 1068 proc_kill(&env->vmd_ps); 1069 free(env); 1070 1071 log_warnx("parent terminating"); 1072 exit(0); 1073 } 1074 1075 struct vmd_vm * 1076 vm_getbyvmid(uint32_t vmid) 1077 { 1078 struct vmd_vm *vm; 1079 1080 if (vmid == 0) 1081 return (NULL); 1082 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1083 if (vm->vm_vmid == vmid) 1084 return (vm); 1085 } 1086 1087 return (NULL); 1088 } 1089 1090 struct vmd_vm * 1091 vm_getbyid(uint32_t id) 1092 { 1093 struct vmd_vm *vm; 1094 1095 if (id == 0) 1096 return (NULL); 1097 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1098 if (vm->vm_params.vmc_params.vcp_id == id) 1099 return (vm); 1100 } 1101 1102 return (NULL); 1103 } 1104 1105 uint32_t 1106 vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1107 { 1108 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1109 return (0); 1110 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1111 id, vm->vm_vmid); 1112 return (vm->vm_vmid); 1113 } 1114 1115 uint32_t 1116 vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1117 { 1118 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1119 return (0); 1120 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1121 vmid, vm->vm_params.vmc_params.vcp_id); 1122 return (vm->vm_params.vmc_params.vcp_id); 1123 } 1124 1125 struct vmd_vm * 1126 vm_getbyname(const char *name) 1127 { 1128 struct vmd_vm *vm; 1129 1130 if (name == NULL) 1131 return (NULL); 1132 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1133 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1134 return (vm); 1135 } 1136 1137 return (NULL); 1138 } 1139 1140 struct vmd_vm * 1141 vm_getbypid(pid_t pid) 1142 { 1143 struct vmd_vm *vm; 1144 1145 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1146 if (vm->vm_pid == pid) 1147 return (vm); 1148 } 1149 1150 return (NULL); 1151 } 1152 1153 void 1154 vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1155 { 1156 struct privsep *ps = &env->vmd_ps; 1157 unsigned int i, j; 1158 1159 if (vm == NULL) 1160 return; 1161 1162 log_debug("%s: %s %s stopping vm %d%s", 1163 __func__, ps->ps_title[privsep_process], caller, 1164 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1165 1166 vm->vm_state &= ~(VM_STATE_RUNNING | VM_STATE_SHUTDOWN); 1167 1168 user_inc(&vm->vm_params.vmc_params, vm->vm_user, 0); 1169 user_put(vm->vm_user); 1170 1171 if (vm->vm_iev.ibuf.fd != -1) { 1172 event_del(&vm->vm_iev.ev); 1173 close(vm->vm_iev.ibuf.fd); 1174 } 1175 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) { 1176 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1177 if (vm->vm_disks[i][j] != -1) { 1178 close(vm->vm_disks[i][j]); 1179 vm->vm_disks[i][j] = -1; 1180 } 1181 } 1182 } 1183 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) { 1184 if (vm->vm_ifs[i].vif_fd != -1) { 1185 close(vm->vm_ifs[i].vif_fd); 1186 vm->vm_ifs[i].vif_fd = -1; 1187 } 1188 free(vm->vm_ifs[i].vif_name); 1189 free(vm->vm_ifs[i].vif_switch); 1190 free(vm->vm_ifs[i].vif_group); 1191 vm->vm_ifs[i].vif_name = NULL; 1192 vm->vm_ifs[i].vif_switch = NULL; 1193 vm->vm_ifs[i].vif_group = NULL; 1194 } 1195 if (vm->vm_kernel != -1) { 1196 close(vm->vm_kernel); 1197 vm->vm_kernel = -1; 1198 } 1199 if (vm->vm_cdrom != -1) { 1200 close(vm->vm_cdrom); 1201 vm->vm_cdrom = -1; 1202 } 1203 if (!keeptty) { 1204 vm_closetty(vm); 1205 vm->vm_uid = 0; 1206 } 1207 } 1208 1209 void 1210 vm_remove(struct vmd_vm *vm, const char *caller) 1211 { 1212 struct privsep *ps = &env->vmd_ps; 1213 1214 if (vm == NULL) 1215 return; 1216 1217 log_debug("%s: %s %s removing vm %d from running config", 1218 __func__, ps->ps_title[privsep_process], caller, 1219 vm->vm_vmid); 1220 1221 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1222 1223 user_put(vm->vm_user); 1224 vm_stop(vm, 0, caller); 1225 free(vm); 1226 } 1227 1228 int 1229 vm_claimid(const char *name, int uid, uint32_t *id) 1230 { 1231 struct name2id *n2i = NULL; 1232 1233 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1234 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1235 goto out; 1236 1237 if (++env->vmd_nvm == 0) { 1238 log_warnx("too many vms"); 1239 return -1; 1240 } 1241 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1242 log_warnx("could not alloc vm name"); 1243 return -1; 1244 } 1245 n2i->id = env->vmd_nvm; 1246 n2i->uid = uid; 1247 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1248 log_warnx("vm name too long"); 1249 free(n2i); 1250 return -1; 1251 } 1252 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1253 1254 out: 1255 *id = n2i->id; 1256 return 0; 1257 } 1258 1259 int 1260 vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1261 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1262 { 1263 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1264 struct vm_create_params *vcp = &vmc->vmc_params; 1265 struct vmop_owner *vmo = NULL; 1266 struct vmd_user *usr = NULL; 1267 uint32_t nid, rng; 1268 unsigned int i, j; 1269 struct vmd_switch *sw; 1270 char *s; 1271 int ret = 0; 1272 1273 /* Check if this is an instance of another VM */ 1274 if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { 1275 errno = ret; /* XXX might set invalid errno */ 1276 return (-1); 1277 } 1278 1279 errno = 0; 1280 *ret_vm = NULL; 1281 1282 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1283 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1284 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1285 uid) != 0) { 1286 errno = EPERM; 1287 goto fail; 1288 } 1289 *ret_vm = vm; 1290 errno = EALREADY; 1291 goto fail; 1292 } 1293 1294 if (vm_parent != NULL) 1295 vmo = &vm_parent->vm_params.vmc_insowner; 1296 1297 /* non-root users can only start existing VMs or instances */ 1298 if (vm_checkperm(NULL, vmo, uid) != 0) { 1299 log_warnx("permission denied"); 1300 errno = EPERM; 1301 goto fail; 1302 } 1303 if (vmc->vmc_flags == 0) { 1304 log_warnx("invalid configuration, no devices"); 1305 errno = VMD_DISK_MISSING; 1306 goto fail; 1307 } 1308 if (vcp->vcp_ncpus == 0) 1309 vcp->vcp_ncpus = 1; 1310 if (vcp->vcp_memranges[0].vmr_size == 0) 1311 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1312 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1313 log_warnx("invalid number of CPUs"); 1314 goto fail; 1315 } else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) { 1316 log_warnx("invalid number of disks"); 1317 goto fail; 1318 } else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) { 1319 log_warnx("invalid number of interfaces"); 1320 goto fail; 1321 } else if (strlen(vcp->vcp_kernel) == 0 && 1322 vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) { 1323 log_warnx("no kernel or disk/cdrom specified"); 1324 goto fail; 1325 } else if (strlen(vcp->vcp_name) == 0) { 1326 log_warnx("invalid VM name"); 1327 goto fail; 1328 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1329 *vcp->vcp_name == '_') { 1330 log_warnx("invalid VM name"); 1331 goto fail; 1332 } else { 1333 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1334 if (!(isalnum(*s) || *s == '.' || *s == '-' || 1335 *s == '_')) { 1336 log_warnx("invalid VM name"); 1337 goto fail; 1338 } 1339 } 1340 } 1341 1342 /* track active users */ 1343 if (uid != 0 && env->vmd_users != NULL && 1344 (usr = user_get(uid)) == NULL) { 1345 log_warnx("could not add user"); 1346 goto fail; 1347 } 1348 1349 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1350 goto fail; 1351 1352 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1353 vmc = &vm->vm_params; 1354 vcp = &vmc->vmc_params; 1355 vm->vm_pid = -1; 1356 vm->vm_tty = -1; 1357 vm->vm_receive_fd = -1; 1358 vm->vm_state &= ~VM_STATE_PAUSED; 1359 vm->vm_user = usr; 1360 1361 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) 1362 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1363 vm->vm_disks[i][j] = -1; 1364 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) 1365 vm->vm_ifs[i].vif_fd = -1; 1366 for (i = 0; i < vcp->vcp_nnics; i++) { 1367 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1368 /* inherit per-interface flags from the switch */ 1369 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1370 } 1371 1372 /* 1373 * If the MAC address is zero, always randomize it in vmd(8) 1374 * because we cannot rely on the guest OS to do the right 1375 * thing like OpenBSD does. Based on ether_fakeaddr() 1376 * from the kernel, incremented by one to differentiate 1377 * the source. 1378 */ 1379 if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) { 1380 rng = arc4random(); 1381 vcp->vcp_macs[i][0] = 0xfe; 1382 vcp->vcp_macs[i][1] = 0xe1; 1383 vcp->vcp_macs[i][2] = 0xba + 1; 1384 vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1385 vcp->vcp_macs[i][4] = rng; 1386 vcp->vcp_macs[i][5] = rng >> 8; 1387 } 1388 } 1389 vm->vm_kernel = -1; 1390 vm->vm_cdrom = -1; 1391 vm->vm_iev.ibuf.fd = -1; 1392 1393 /* 1394 * Assign a new internal Id if not specified and we succeed in 1395 * claiming a new Id. 1396 */ 1397 if (id != 0) 1398 vm->vm_vmid = id; 1399 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1400 goto fail; 1401 else 1402 vm->vm_vmid = nid; 1403 1404 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1405 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1406 1407 *ret_vm = vm; 1408 return (0); 1409 fail: 1410 if (errno == 0) 1411 errno = EINVAL; 1412 return (-1); 1413 } 1414 1415 int 1416 vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1417 struct vmop_create_params *vmc, uid_t uid) 1418 { 1419 char *name; 1420 struct vm_create_params *vcp = &vmc->vmc_params; 1421 struct vmop_create_params *vmcp; 1422 struct vm_create_params *vcpp; 1423 struct vmd_vm *vm = NULL; 1424 unsigned int i, j; 1425 uint32_t id; 1426 1427 /* return without error if the parent is NULL (nothing to inherit) */ 1428 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1429 vmc->vmc_instance[0] == '\0') 1430 return (0); 1431 1432 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1433 return (VMD_PARENT_INVALID); 1434 } 1435 1436 vmcp = &(*vm_parent)->vm_params; 1437 vcpp = &vmcp->vmc_params; 1438 1439 /* Are we allowed to create an instance from this VM? */ 1440 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1441 log_warnx("vm \"%s\" no permission to create vm instance", 1442 vcpp->vcp_name); 1443 return (ENAMETOOLONG); 1444 } 1445 1446 id = vcp->vcp_id; 1447 name = vcp->vcp_name; 1448 1449 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1450 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1451 return (EPROCLIM); 1452 } 1453 1454 /* CPU */ 1455 if (vcp->vcp_ncpus == 0) 1456 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1457 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1458 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1459 log_warnx("vm \"%s\" no permission to set cpus", name); 1460 return (EPERM); 1461 } 1462 1463 /* memory */ 1464 if (vcp->vcp_memranges[0].vmr_size == 0) 1465 vcp->vcp_memranges[0].vmr_size = 1466 vcpp->vcp_memranges[0].vmr_size; 1467 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1468 vcp->vcp_memranges[0].vmr_size != 1469 vcpp->vcp_memranges[0].vmr_size) { 1470 log_warnx("vm \"%s\" no permission to set memory", name); 1471 return (EPERM); 1472 } 1473 1474 /* disks cannot be inherited */ 1475 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1476 vcp->vcp_ndisks) { 1477 log_warnx("vm \"%s\" no permission to set disks", name); 1478 return (EPERM); 1479 } 1480 for (i = 0; i < vcp->vcp_ndisks; i++) { 1481 /* Check if this disk is already used in the parent */ 1482 for (j = 0; j < vcpp->vcp_ndisks; j++) { 1483 if (strcmp(vcp->vcp_disks[i], 1484 vcpp->vcp_disks[j]) == 0) { 1485 log_warnx("vm \"%s\" disk %s cannot be reused", 1486 name, vcp->vcp_disks[i]); 1487 return (EBUSY); 1488 } 1489 } 1490 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1491 } 1492 1493 /* interfaces */ 1494 if (vcp->vcp_nnics > 0 && 1495 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1496 vcp->vcp_nnics != vcpp->vcp_nnics) { 1497 log_warnx("vm \"%s\" no permission to set interfaces", name); 1498 return (EPERM); 1499 } 1500 for (i = 0; i < vcpp->vcp_nnics; i++) { 1501 /* Interface got overwritten */ 1502 if (i < vcp->vcp_nnics) 1503 continue; 1504 1505 /* Copy interface from parent */ 1506 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1507 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1508 sizeof(vmc->vmc_ifnames[i])); 1509 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1510 sizeof(vmc->vmc_ifswitch[i])); 1511 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1512 sizeof(vmc->vmc_ifgroup[i])); 1513 memcpy(vcp->vcp_macs[i], vcpp->vcp_macs[i], 1514 sizeof(vcp->vcp_macs[i])); 1515 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1516 vcp->vcp_nnics++; 1517 } 1518 for (i = 0; i < vcp->vcp_nnics; i++) { 1519 for (j = 0; j < vcpp->vcp_nnics; j++) { 1520 if (memcmp(zero_mac, vcp->vcp_macs[i], 1521 sizeof(vcp->vcp_macs[i])) != 0 && 1522 memcmp(vcpp->vcp_macs[i], vcp->vcp_macs[i], 1523 sizeof(vcp->vcp_macs[i])) != 0) { 1524 log_warnx("vm \"%s\" lladdr cannot be reused", 1525 name); 1526 return (EBUSY); 1527 } 1528 if (strlen(vmc->vmc_ifnames[i]) && 1529 strcmp(vmc->vmc_ifnames[i], 1530 vmcp->vmc_ifnames[j]) == 0) { 1531 log_warnx("vm \"%s\" %s cannot be reused", 1532 vmc->vmc_ifnames[i], name); 1533 return (EBUSY); 1534 } 1535 } 1536 } 1537 1538 /* kernel */ 1539 if (strlen(vcp->vcp_kernel) > 0) { 1540 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1541 log_warnx("vm \"%s\" no permission to set boot image", 1542 name); 1543 return (EPERM); 1544 } 1545 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1546 } else if (strlcpy(vcp->vcp_kernel, vcpp->vcp_kernel, 1547 sizeof(vcp->vcp_kernel)) >= sizeof(vcp->vcp_kernel)) { 1548 log_warnx("vm \"%s\" kernel name too long", name); 1549 return (EINVAL); 1550 } 1551 1552 /* cdrom */ 1553 if (strlen(vcp->vcp_cdrom) > 0) { 1554 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1555 log_warnx("vm \"%s\" no permission to set cdrom", name); 1556 return (EPERM); 1557 } 1558 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1559 } else if (strlcpy(vcp->vcp_cdrom, vcpp->vcp_cdrom, 1560 sizeof(vcp->vcp_cdrom)) >= sizeof(vcp->vcp_cdrom)) { 1561 log_warnx("vm \"%s\" cdrom name too long", name); 1562 return (EINVAL); 1563 } 1564 1565 /* user */ 1566 if (vmc->vmc_owner.uid == 0) 1567 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1568 else if (vmc->vmc_owner.uid != uid && 1569 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1570 log_warnx("vm \"%s\" user mismatch", name); 1571 return (EPERM); 1572 } 1573 1574 /* group */ 1575 if (vmc->vmc_owner.gid == 0) 1576 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1577 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1578 log_warnx("vm \"%s\" group mismatch", name); 1579 return (EPERM); 1580 } 1581 1582 /* child instances */ 1583 if (vmc->vmc_insflags) { 1584 log_warnx("vm \"%s\" cannot change instance permissions", name); 1585 return (EPERM); 1586 } 1587 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1588 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1589 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1590 vmc->vmc_insflags = vmcp->vmc_insflags; 1591 } else { 1592 vmc->vmc_insowner.gid = 0; 1593 vmc->vmc_insowner.uid = 0; 1594 vmc->vmc_insflags = 0; 1595 } 1596 1597 /* finished, remove instance flags */ 1598 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1599 1600 return (0); 1601 } 1602 1603 /* 1604 * vm_checkperm 1605 * 1606 * Checks if the user represented by the 'uid' parameter is allowed to 1607 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1608 * console.) 1609 * 1610 * Parameters: 1611 * vm: the VM whose permission is to be checked 1612 * vmo: the required uid/gid to be checked 1613 * uid: the user ID of the user making the request 1614 * 1615 * Return values: 1616 * 0: the permission should be granted 1617 * -1: the permission check failed (also returned if vm == null) 1618 */ 1619 int 1620 vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1621 { 1622 struct group *gr; 1623 struct passwd *pw; 1624 char **grmem; 1625 1626 /* root has no restrictions */ 1627 if (uid == 0) 1628 return (0); 1629 1630 if (vmo == NULL) 1631 return (-1); 1632 1633 /* check user */ 1634 if (vm == NULL) { 1635 if (vmo->uid == uid) 1636 return (0); 1637 } else { 1638 /* 1639 * check user of running vm (the owner of a running vm can 1640 * be different to (or more specific than) the configured owner. 1641 */ 1642 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1643 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1644 return (0); 1645 } 1646 1647 /* check groups */ 1648 if (vmo->gid != -1) { 1649 if ((pw = getpwuid(uid)) == NULL) 1650 return (-1); 1651 if (pw->pw_gid == vmo->gid) 1652 return (0); 1653 if ((gr = getgrgid(vmo->gid)) != NULL) { 1654 for (grmem = gr->gr_mem; *grmem; grmem++) 1655 if (strcmp(*grmem, pw->pw_name) == 0) 1656 return (0); 1657 } 1658 } 1659 1660 return (-1); 1661 } 1662 1663 /* 1664 * vm_checkinsflag 1665 * 1666 * Checks wheter the non-root user is allowed to set an instance option. 1667 * 1668 * Parameters: 1669 * vmc: the VM create parameters 1670 * flag: the flag to be checked 1671 * uid: the user ID of the user making the request 1672 * 1673 * Return values: 1674 * 0: the permission should be granted 1675 * -1: the permission check failed (also returned if vm == null) 1676 */ 1677 int 1678 vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1679 { 1680 /* root has no restrictions */ 1681 if (uid == 0) 1682 return (0); 1683 1684 if ((vmc->vmc_insflags & flag) == 0) 1685 return (-1); 1686 1687 return (0); 1688 } 1689 1690 /* 1691 * vm_checkaccess 1692 * 1693 * Checks if the user represented by the 'uid' parameter is allowed to 1694 * access the file described by the 'path' parameter. 1695 * 1696 * Parameters: 1697 * fd: the file descriptor of the opened file 1698 * uflag: check if the userid has access to the file 1699 * uid: the user ID of the user making the request 1700 * amode: the access flags of R_OK and W_OK 1701 * 1702 * Return values: 1703 * 0: the permission should be granted 1704 * -1: the permission check failed 1705 */ 1706 int 1707 vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1708 { 1709 struct group *gr; 1710 struct passwd *pw; 1711 char **grmem; 1712 struct stat st; 1713 mode_t mode; 1714 1715 if (fd == -1) 1716 return (-1); 1717 1718 /* 1719 * File has to be accessible and a regular file 1720 */ 1721 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1722 return (-1); 1723 1724 /* root has no restrictions */ 1725 if (uid == 0 || uflag == 0) 1726 return (0); 1727 1728 /* check other */ 1729 mode = amode & W_OK ? S_IWOTH : 0; 1730 mode |= amode & R_OK ? S_IROTH : 0; 1731 if ((st.st_mode & mode) == mode) 1732 return (0); 1733 1734 /* check user */ 1735 mode = amode & W_OK ? S_IWUSR : 0; 1736 mode |= amode & R_OK ? S_IRUSR : 0; 1737 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1738 return (0); 1739 1740 /* check groups */ 1741 mode = amode & W_OK ? S_IWGRP : 0; 1742 mode |= amode & R_OK ? S_IRGRP : 0; 1743 if ((st.st_mode & mode) != mode) 1744 return (-1); 1745 if ((pw = getpwuid(uid)) == NULL) 1746 return (-1); 1747 if (pw->pw_gid == st.st_gid) 1748 return (0); 1749 if ((gr = getgrgid(st.st_gid)) != NULL) { 1750 for (grmem = gr->gr_mem; *grmem; grmem++) 1751 if (strcmp(*grmem, pw->pw_name) == 0) 1752 return (0); 1753 } 1754 1755 return (-1); 1756 } 1757 1758 int 1759 vm_opentty(struct vmd_vm *vm) 1760 { 1761 struct ptmget ptm; 1762 struct stat st; 1763 struct group *gr; 1764 uid_t uid; 1765 gid_t gid; 1766 mode_t mode; 1767 int on; 1768 1769 /* 1770 * Open tty with pre-opened PTM fd 1771 */ 1772 if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) 1773 return (-1); 1774 1775 /* 1776 * We use user ioctl(2) mode to pass break commands. 1777 */ 1778 on = 1; 1779 if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1) 1780 fatal("could not enable user ioctl mode"); 1781 1782 vm->vm_tty = ptm.cfd; 1783 close(ptm.sfd); 1784 if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL) 1785 goto fail; 1786 1787 uid = vm->vm_uid; 1788 gid = vm->vm_params.vmc_owner.gid; 1789 1790 if (vm->vm_params.vmc_owner.gid != -1) { 1791 mode = 0660; 1792 } else if ((gr = getgrnam("tty")) != NULL) { 1793 gid = gr->gr_gid; 1794 mode = 0620; 1795 } else { 1796 mode = 0600; 1797 gid = 0; 1798 } 1799 1800 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1801 __func__, vm->vm_params.vmc_params.vcp_name, 1802 vm->vm_ttyname, uid, gid, mode); 1803 1804 /* 1805 * Change ownership and mode of the tty as required. 1806 * Loosely based on the implementation of sshpty.c 1807 */ 1808 if (stat(vm->vm_ttyname, &st) == -1) 1809 goto fail; 1810 1811 if (st.st_uid != uid || st.st_gid != gid) { 1812 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1813 log_warn("chown %s %d %d failed, uid %d", 1814 vm->vm_ttyname, uid, gid, getuid()); 1815 1816 /* Ignore failure on read-only filesystems */ 1817 if (!((errno == EROFS) && 1818 (st.st_uid == uid || st.st_uid == 0))) 1819 goto fail; 1820 } 1821 } 1822 1823 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1824 if (chmod(vm->vm_ttyname, mode) == -1) { 1825 log_warn("chmod %s %o failed, uid %d", 1826 vm->vm_ttyname, mode, getuid()); 1827 1828 /* Ignore failure on read-only filesystems */ 1829 if (!((errno == EROFS) && 1830 (st.st_uid == uid || st.st_uid == 0))) 1831 goto fail; 1832 } 1833 } 1834 1835 return (0); 1836 fail: 1837 vm_closetty(vm); 1838 return (-1); 1839 } 1840 1841 void 1842 vm_closetty(struct vmd_vm *vm) 1843 { 1844 if (vm->vm_tty != -1) { 1845 /* Release and close the tty */ 1846 if (fchown(vm->vm_tty, 0, 0) == -1) 1847 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1848 if (fchmod(vm->vm_tty, 0666) == -1) 1849 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1850 close(vm->vm_tty); 1851 vm->vm_tty = -1; 1852 } 1853 free(vm->vm_ttyname); 1854 vm->vm_ttyname = NULL; 1855 } 1856 1857 void 1858 switch_remove(struct vmd_switch *vsw) 1859 { 1860 if (vsw == NULL) 1861 return; 1862 1863 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1864 1865 free(vsw->sw_group); 1866 free(vsw->sw_name); 1867 free(vsw); 1868 } 1869 1870 struct vmd_switch * 1871 switch_getbyname(const char *name) 1872 { 1873 struct vmd_switch *vsw; 1874 1875 if (name == NULL) 1876 return (NULL); 1877 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1878 if (strcmp(vsw->sw_name, name) == 0) 1879 return (vsw); 1880 } 1881 1882 return (NULL); 1883 } 1884 1885 struct vmd_user * 1886 user_get(uid_t uid) 1887 { 1888 struct vmd_user *usr; 1889 1890 if (uid == 0) 1891 return (NULL); 1892 1893 /* first try to find an existing user */ 1894 TAILQ_FOREACH(usr, env->vmd_users, usr_entry) { 1895 if (usr->usr_id.uid == uid) 1896 goto done; 1897 } 1898 1899 if ((usr = calloc(1, sizeof(*usr))) == NULL) { 1900 log_warn("could not allocate user"); 1901 return (NULL); 1902 } 1903 1904 usr->usr_id.uid = uid; 1905 usr->usr_id.gid = -1; 1906 TAILQ_INSERT_TAIL(env->vmd_users, usr, usr_entry); 1907 1908 done: 1909 DPRINTF("%s: uid %d #%d +", 1910 __func__, usr->usr_id.uid, usr->usr_refcnt + 1); 1911 usr->usr_refcnt++; 1912 1913 return (usr); 1914 } 1915 1916 void 1917 user_put(struct vmd_user *usr) 1918 { 1919 if (usr == NULL) 1920 return; 1921 1922 DPRINTF("%s: uid %d #%d -", 1923 __func__, usr->usr_id.uid, usr->usr_refcnt - 1); 1924 1925 if (--usr->usr_refcnt > 0) 1926 return; 1927 1928 TAILQ_REMOVE(env->vmd_users, usr, usr_entry); 1929 free(usr); 1930 } 1931 1932 void 1933 user_inc(struct vm_create_params *vcp, struct vmd_user *usr, int inc) 1934 { 1935 char mem[FMT_SCALED_STRSIZE]; 1936 1937 if (usr == NULL) 1938 return; 1939 1940 /* increment or decrement counters */ 1941 inc = inc ? 1 : -1; 1942 1943 usr->usr_maxcpu += vcp->vcp_ncpus * inc; 1944 usr->usr_maxmem += vcp->vcp_memranges[0].vmr_size * inc; 1945 usr->usr_maxifs += vcp->vcp_nnics * inc; 1946 1947 if (log_getverbose() > 1) { 1948 (void)fmt_scaled(usr->usr_maxmem * 1024 * 1024, mem); 1949 log_debug("%s: %c uid %d ref %d cpu %llu mem %s ifs %llu", 1950 __func__, inc == 1 ? '+' : '-', 1951 usr->usr_id.uid, usr->usr_refcnt, 1952 usr->usr_maxcpu, mem, usr->usr_maxifs); 1953 } 1954 } 1955 1956 int 1957 user_checklimit(struct vmd_user *usr, struct vm_create_params *vcp) 1958 { 1959 const char *limit = ""; 1960 1961 /* XXX make the limits configurable */ 1962 if (usr->usr_maxcpu > VM_DEFAULT_USER_MAXCPU) { 1963 limit = "cpu "; 1964 goto fail; 1965 } 1966 if (usr->usr_maxmem > VM_DEFAULT_USER_MAXMEM) { 1967 limit = "memory "; 1968 goto fail; 1969 } 1970 if (usr->usr_maxifs > VM_DEFAULT_USER_MAXIFS) { 1971 limit = "interface "; 1972 goto fail; 1973 } 1974 1975 return (0); 1976 1977 fail: 1978 log_warnx("%s: user %d %slimit reached", vcp->vcp_name, 1979 usr->usr_id.uid, limit); 1980 return (-1); 1981 } 1982 1983 char * 1984 get_string(uint8_t *ptr, size_t len) 1985 { 1986 size_t i; 1987 1988 for (i = 0; i < len; i++) 1989 if (!isprint(ptr[i])) 1990 break; 1991 1992 return strndup(ptr, i); 1993 } 1994 1995 uint32_t 1996 prefixlen2mask(uint8_t prefixlen) 1997 { 1998 if (prefixlen == 0) 1999 return (0); 2000 2001 if (prefixlen > 32) 2002 prefixlen = 32; 2003 2004 return (htonl(0xffffffff << (32 - prefixlen))); 2005 } 2006 2007 void 2008 prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 2009 { 2010 struct in6_addr s6; 2011 int i; 2012 2013 if (prefixlen > 128) 2014 prefixlen = 128; 2015 2016 memset(&s6, 0, sizeof(s6)); 2017 for (i = 0; i < prefixlen / 8; i++) 2018 s6.s6_addr[i] = 0xff; 2019 i = prefixlen % 8; 2020 if (i) 2021 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 2022 2023 memcpy(mask, &s6, sizeof(s6)); 2024 } 2025 2026 void 2027 getmonotime(struct timeval *tv) 2028 { 2029 struct timespec ts; 2030 2031 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 2032 fatal("clock_gettime"); 2033 2034 TIMESPEC_TO_TIMEVAL(tv, &ts); 2035 } 2036