1 /* $OpenBSD: vmd.c,v 1.119 2020/09/23 19:18:18 martijn Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* nitems */ 20 #include <sys/queue.h> 21 #include <sys/wait.h> 22 #include <sys/cdefs.h> 23 #include <sys/stat.h> 24 #include <sys/sysctl.h> 25 #include <sys/tty.h> 26 #include <sys/ttycom.h> 27 #include <sys/ioctl.h> 28 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <termios.h> 33 #include <errno.h> 34 #include <event.h> 35 #include <fcntl.h> 36 #include <pwd.h> 37 #include <signal.h> 38 #include <syslog.h> 39 #include <unistd.h> 40 #include <util.h> 41 #include <ctype.h> 42 #include <pwd.h> 43 #include <grp.h> 44 45 #include <machine/specialreg.h> 46 #include <machine/vmmvar.h> 47 48 #include "proc.h" 49 #include "atomicio.h" 50 #include "vmd.h" 51 52 __dead void usage(void); 53 54 int main(int, char **); 55 int vmd_configure(void); 56 void vmd_sighdlr(int sig, short event, void *arg); 57 void vmd_shutdown(void); 58 int vmd_control_run(void); 59 int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); 60 int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); 61 int vmd_check_vmh(struct vm_dump_header *); 62 63 int vm_instance(struct privsep *, struct vmd_vm **, 64 struct vmop_create_params *, uid_t); 65 int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); 66 int vm_claimid(const char *, int, uint32_t *); 67 void start_vm_batch(int, short, void*); 68 69 struct vmd *env; 70 71 static struct privsep_proc procs[] = { 72 /* Keep "priv" on top as procs[0] */ 73 { "priv", PROC_PRIV, NULL, priv }, 74 { "control", PROC_CONTROL, vmd_dispatch_control, control }, 75 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown }, 76 }; 77 78 struct event staggered_start_timer; 79 80 /* For the privileged process */ 81 static struct privsep_proc *proc_priv = &procs[0]; 82 static struct passwd proc_privpw; 83 static const uint8_t zero_mac[ETHER_ADDR_LEN]; 84 85 int 86 vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) 87 { 88 struct privsep *ps = p->p_ps; 89 int res = 0, ret = 0, cmd = 0, verbose; 90 unsigned int v = 0, flags; 91 struct vmop_create_params vmc; 92 struct vmop_id vid; 93 struct vmop_result vmr; 94 struct vm_dump_header vmh; 95 struct vmd_vm *vm = NULL; 96 char *str = NULL; 97 uint32_t id = 0; 98 struct control_sock *rcs; 99 100 switch (imsg->hdr.type) { 101 case IMSG_VMDOP_START_VM_REQUEST: 102 IMSG_SIZE_CHECK(imsg, &vmc); 103 memcpy(&vmc, imsg->data, sizeof(vmc)); 104 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 105 if (vmc.vmc_flags == 0) { 106 /* start an existing VM with pre-configured options */ 107 if (!(ret == -1 && errno == EALREADY && 108 !(vm->vm_state & VM_STATE_RUNNING))) { 109 res = errno; 110 cmd = IMSG_VMDOP_START_VM_RESPONSE; 111 } 112 } else if (ret != 0) { 113 res = errno; 114 cmd = IMSG_VMDOP_START_VM_RESPONSE; 115 } 116 if (res == 0 && 117 config_setvm(ps, vm, 118 imsg->hdr.peerid, vm->vm_params.vmc_owner.uid) == -1) { 119 res = errno; 120 cmd = IMSG_VMDOP_START_VM_RESPONSE; 121 } 122 break; 123 case IMSG_VMDOP_WAIT_VM_REQUEST: 124 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 125 IMSG_SIZE_CHECK(imsg, &vid); 126 memcpy(&vid, imsg->data, sizeof(vid)); 127 flags = vid.vid_flags; 128 129 if ((id = vid.vid_id) == 0) { 130 /* Lookup vm (id) by name */ 131 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 132 res = ENOENT; 133 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 134 break; 135 } else if ((vm->vm_state & VM_STATE_SHUTDOWN) && 136 (flags & VMOP_FORCE) == 0) { 137 res = EALREADY; 138 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 139 break; 140 } else if (!(vm->vm_state & VM_STATE_RUNNING)) { 141 res = EINVAL; 142 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 143 break; 144 } 145 id = vm->vm_vmid; 146 } else if ((vm = vm_getbyvmid(id)) == NULL) { 147 res = ENOENT; 148 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 149 break; 150 } 151 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 152 vid.vid_uid) != 0) { 153 res = EPERM; 154 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 155 break; 156 } 157 158 memset(&vid, 0, sizeof(vid)); 159 vid.vid_id = id; 160 vid.vid_flags = flags; 161 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 162 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) 163 return (-1); 164 break; 165 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 166 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 167 break; 168 case IMSG_VMDOP_LOAD: 169 IMSG_SIZE_CHECK(imsg, str); /* at least one byte for path */ 170 str = get_string((uint8_t *)imsg->data, 171 IMSG_DATA_SIZE(imsg)); 172 case IMSG_VMDOP_RELOAD: 173 if (vmd_reload(0, str) == -1) 174 cmd = IMSG_CTL_FAIL; 175 else 176 cmd = IMSG_CTL_OK; 177 free(str); 178 break; 179 case IMSG_CTL_RESET: 180 IMSG_SIZE_CHECK(imsg, &v); 181 memcpy(&v, imsg->data, sizeof(v)); 182 if (vmd_reload(v, NULL) == -1) 183 cmd = IMSG_CTL_FAIL; 184 else 185 cmd = IMSG_CTL_OK; 186 break; 187 case IMSG_CTL_VERBOSE: 188 IMSG_SIZE_CHECK(imsg, &verbose); 189 memcpy(&verbose, imsg->data, sizeof(verbose)); 190 log_setverbose(verbose); 191 192 proc_forward_imsg(ps, imsg, PROC_VMM, -1); 193 proc_forward_imsg(ps, imsg, PROC_PRIV, -1); 194 cmd = IMSG_CTL_OK; 195 break; 196 case IMSG_VMDOP_PAUSE_VM: 197 case IMSG_VMDOP_UNPAUSE_VM: 198 IMSG_SIZE_CHECK(imsg, &vid); 199 memcpy(&vid, imsg->data, sizeof(vid)); 200 if (vid.vid_id == 0) { 201 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 202 res = ENOENT; 203 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 204 break; 205 } else { 206 vid.vid_id = vm->vm_vmid; 207 } 208 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL) { 209 res = ENOENT; 210 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 211 break; 212 } 213 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 214 vid.vid_uid) != 0) { 215 res = EPERM; 216 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 217 break; 218 } 219 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 220 imsg->hdr.peerid, -1, &vid, sizeof(vid)); 221 break; 222 case IMSG_VMDOP_SEND_VM_REQUEST: 223 IMSG_SIZE_CHECK(imsg, &vid); 224 memcpy(&vid, imsg->data, sizeof(vid)); 225 id = vid.vid_id; 226 if (vid.vid_id == 0) { 227 if ((vm = vm_getbyname(vid.vid_name)) == NULL) { 228 res = ENOENT; 229 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 230 close(imsg->fd); 231 break; 232 } else { 233 vid.vid_id = vm->vm_vmid; 234 } 235 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL) { 236 res = ENOENT; 237 cmd = IMSG_VMDOP_SEND_VM_RESPONSE; 238 close(imsg->fd); 239 break; 240 } 241 vmr.vmr_id = vid.vid_id; 242 log_debug("%s: sending fd to vmm", __func__); 243 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, 244 imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid)); 245 break; 246 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 247 IMSG_SIZE_CHECK(imsg, &vid); 248 memcpy(&vid, imsg->data, sizeof(vid)); 249 if (imsg->fd == -1) { 250 log_warnx("%s: invalid fd", __func__); 251 return (-1); 252 } 253 if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) != 254 sizeof(vmh)) { 255 log_warnx("%s: error reading vmh from received vm", 256 __func__); 257 res = EIO; 258 close(imsg->fd); 259 cmd = IMSG_VMDOP_START_VM_RESPONSE; 260 break; 261 } 262 263 if (vmd_check_vmh(&vmh)) { 264 res = ENOENT; 265 close(imsg->fd); 266 cmd = IMSG_VMDOP_START_VM_RESPONSE; 267 break; 268 } 269 if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) != 270 sizeof(vmc)) { 271 log_warnx("%s: error reading vmc from received vm", 272 __func__); 273 res = EIO; 274 close(imsg->fd); 275 cmd = IMSG_VMDOP_START_VM_RESPONSE; 276 break; 277 } 278 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, 279 sizeof(vmc.vmc_params.vcp_name)); 280 vmc.vmc_params.vcp_id = 0; 281 282 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); 283 if (ret != 0) { 284 res = errno; 285 cmd = IMSG_VMDOP_START_VM_RESPONSE; 286 close(imsg->fd); 287 } else { 288 vm->vm_state |= VM_STATE_RECEIVED; 289 config_setvm(ps, vm, imsg->hdr.peerid, 290 vmc.vmc_owner.uid); 291 log_debug("%s: sending fd to vmm", __func__); 292 proc_compose_imsg(ps, PROC_VMM, -1, 293 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd, 294 NULL, 0); 295 } 296 break; 297 case IMSG_VMDOP_DONE: 298 control_reset(&ps->ps_csock); 299 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry) 300 control_reset(rcs); 301 cmd = 0; 302 break; 303 default: 304 return (-1); 305 } 306 307 switch (cmd) { 308 case 0: 309 break; 310 case IMSG_VMDOP_START_VM_RESPONSE: 311 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 312 memset(&vmr, 0, sizeof(vmr)); 313 vmr.vmr_result = res; 314 vmr.vmr_id = id; 315 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 316 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 317 return (-1); 318 break; 319 default: 320 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, 321 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 322 return (-1); 323 break; 324 } 325 326 return (0); 327 } 328 329 int 330 vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) 331 { 332 struct vmop_result vmr; 333 struct privsep *ps = p->p_ps; 334 int res = 0; 335 struct vmd_vm *vm; 336 struct vm_create_params *vcp; 337 struct vmop_info_result vir; 338 339 switch (imsg->hdr.type) { 340 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 341 IMSG_SIZE_CHECK(imsg, &vmr); 342 memcpy(&vmr, imsg->data, sizeof(vmr)); 343 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 344 break; 345 proc_compose_imsg(ps, PROC_CONTROL, -1, 346 imsg->hdr.type, imsg->hdr.peerid, -1, 347 imsg->data, sizeof(imsg->data)); 348 log_info("%s: paused vm %d successfully", 349 vm->vm_params.vmc_params.vcp_name, 350 vm->vm_vmid); 351 vm->vm_state |= VM_STATE_PAUSED; 352 break; 353 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 354 IMSG_SIZE_CHECK(imsg, &vmr); 355 memcpy(&vmr, imsg->data, sizeof(vmr)); 356 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 357 break; 358 proc_compose_imsg(ps, PROC_CONTROL, -1, 359 imsg->hdr.type, imsg->hdr.peerid, -1, 360 imsg->data, sizeof(imsg->data)); 361 log_info("%s: unpaused vm %d successfully.", 362 vm->vm_params.vmc_params.vcp_name, 363 vm->vm_vmid); 364 vm->vm_state &= ~VM_STATE_PAUSED; 365 break; 366 case IMSG_VMDOP_START_VM_RESPONSE: 367 IMSG_SIZE_CHECK(imsg, &vmr); 368 memcpy(&vmr, imsg->data, sizeof(vmr)); 369 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) 370 break; 371 vm->vm_pid = vmr.vmr_pid; 372 vcp = &vm->vm_params.vmc_params; 373 vcp->vcp_id = vmr.vmr_id; 374 375 /* 376 * If the peerid is not -1, forward the response back to the 377 * the control socket. If it is -1, the request originated 378 * from the parent, not the control socket. 379 */ 380 if (vm->vm_peerid != (uint32_t)-1) { 381 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, 382 sizeof(vmr.vmr_ttyname)); 383 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 384 imsg->hdr.type, vm->vm_peerid, -1, 385 &vmr, sizeof(vmr)) == -1) { 386 errno = vmr.vmr_result; 387 log_warn("%s: failed to foward vm result", 388 vcp->vcp_name); 389 vm_remove(vm, __func__); 390 return (-1); 391 } 392 } 393 394 if (vmr.vmr_result) { 395 errno = vmr.vmr_result; 396 log_warn("%s: failed to start vm", vcp->vcp_name); 397 vm_remove(vm, __func__); 398 break; 399 } 400 401 /* Now configure all the interfaces */ 402 if (vm_priv_ifconfig(ps, vm) == -1) { 403 log_warn("%s: failed to configure vm", vcp->vcp_name); 404 vm_remove(vm, __func__); 405 break; 406 } 407 408 log_info("%s: started vm %d successfully, tty %s", 409 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); 410 break; 411 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 412 IMSG_SIZE_CHECK(imsg, &vmr); 413 memcpy(&vmr, imsg->data, sizeof(vmr)); 414 DPRINTF("%s: forwarding TERMINATE VM for vm id %d", 415 __func__, vmr.vmr_id); 416 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 417 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 418 break; 419 if (vmr.vmr_result == 0) { 420 /* Mark VM as shutting down */ 421 vm->vm_state |= VM_STATE_SHUTDOWN; 422 } 423 break; 424 case IMSG_VMDOP_SEND_VM_RESPONSE: 425 IMSG_SIZE_CHECK(imsg, &vmr); 426 memcpy(&vmr, imsg->data, sizeof(vmr)); 427 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) 428 break; 429 if (!vmr.vmr_result) { 430 log_info("%s: sent vm %d successfully.", 431 vm->vm_params.vmc_params.vcp_name, 432 vm->vm_vmid); 433 if (vm->vm_from_config) 434 vm_stop(vm, 0, __func__); 435 else 436 vm_remove(vm, __func__); 437 } 438 439 /* Send a response if a control client is waiting for it */ 440 if (imsg->hdr.peerid != (uint32_t)-1) { 441 /* the error is meaningless for deferred responses */ 442 vmr.vmr_result = 0; 443 444 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 445 IMSG_VMDOP_SEND_VM_RESPONSE, 446 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 447 return (-1); 448 } 449 break; 450 case IMSG_VMDOP_TERMINATE_VM_EVENT: 451 IMSG_SIZE_CHECK(imsg, &vmr); 452 memcpy(&vmr, imsg->data, sizeof(vmr)); 453 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d", 454 __func__, vmr.vmr_id, vmr.vmr_result); 455 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL) { 456 log_debug("%s: vm %d is no longer available", 457 __func__, vmr.vmr_id); 458 break; 459 } 460 if (vmr.vmr_result != EAGAIN || 461 vm->vm_params.vmc_bootdevice) { 462 if (vm->vm_from_config) 463 vm_stop(vm, 0, __func__); 464 else 465 vm_remove(vm, __func__); 466 } else { 467 /* Stop VM instance but keep the tty open */ 468 vm_stop(vm, 1, __func__); 469 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); 470 } 471 472 /* Send a response if a control client is waiting for it */ 473 if (imsg->hdr.peerid != (uint32_t)-1) { 474 /* the error is meaningless for deferred responses */ 475 vmr.vmr_result = 0; 476 477 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 478 IMSG_VMDOP_TERMINATE_VM_RESPONSE, 479 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 480 return (-1); 481 } 482 break; 483 case IMSG_VMDOP_GET_INFO_VM_DATA: 484 IMSG_SIZE_CHECK(imsg, &vir); 485 memcpy(&vir, imsg->data, sizeof(vir)); 486 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL) { 487 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); 488 if (vm->vm_ttyname != NULL) 489 strlcpy(vir.vir_ttyname, vm->vm_ttyname, 490 sizeof(vir.vir_ttyname)); 491 log_debug("%s: running vm: %d, vm_state: 0x%x", 492 __func__, vm->vm_vmid, vm->vm_state); 493 vir.vir_state = vm->vm_state; 494 /* get the user id who started the vm */ 495 vir.vir_uid = vm->vm_uid; 496 vir.vir_gid = vm->vm_params.vmc_owner.gid; 497 } 498 if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type, 499 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { 500 log_debug("%s: GET_INFO_VM failed for vm %d, removing", 501 __func__, vm->vm_vmid); 502 vm_remove(vm, __func__); 503 return (-1); 504 } 505 break; 506 case IMSG_VMDOP_GET_INFO_VM_END_DATA: 507 /* 508 * PROC_VMM has responded with the *running* VMs, now we 509 * append the others. These use the special value 0 for their 510 * kernel id to indicate that they are not running. 511 */ 512 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 513 if (!(vm->vm_state & VM_STATE_RUNNING)) { 514 memset(&vir, 0, sizeof(vir)); 515 vir.vir_info.vir_id = vm->vm_vmid; 516 strlcpy(vir.vir_info.vir_name, 517 vm->vm_params.vmc_params.vcp_name, 518 VMM_MAX_NAME_LEN); 519 vir.vir_info.vir_memory_size = 520 vm->vm_params.vmc_params. 521 vcp_memranges[0].vmr_size; 522 vir.vir_info.vir_ncpus = 523 vm->vm_params.vmc_params.vcp_ncpus; 524 /* get the configured user id for this vm */ 525 vir.vir_uid = vm->vm_params.vmc_owner.uid; 526 vir.vir_gid = vm->vm_params.vmc_owner.gid; 527 log_debug("%s: vm: %d, vm_state: 0x%x", 528 __func__, vm->vm_vmid, vm->vm_state); 529 vir.vir_state = vm->vm_state; 530 if (proc_compose_imsg(ps, PROC_CONTROL, -1, 531 IMSG_VMDOP_GET_INFO_VM_DATA, 532 imsg->hdr.peerid, -1, &vir, 533 sizeof(vir)) == -1) { 534 log_debug("%s: GET_INFO_VM_END failed", 535 __func__); 536 vm_remove(vm, __func__); 537 return (-1); 538 } 539 } 540 } 541 IMSG_SIZE_CHECK(imsg, &res); 542 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); 543 break; 544 default: 545 return (-1); 546 } 547 548 return (0); 549 } 550 551 int 552 vmd_check_vmh(struct vm_dump_header *vmh) 553 { 554 int i; 555 unsigned int code, leaf; 556 unsigned int a, b, c, d; 557 558 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 559 log_warnx("%s: incompatible dump signature", __func__); 560 return (-1); 561 } 562 563 if (vmh->vmh_version != VM_DUMP_VERSION) { 564 log_warnx("%s: incompatible dump version", __func__); 565 return (-1); 566 } 567 568 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 569 code = vmh->vmh_cpuids[i].code; 570 leaf = vmh->vmh_cpuids[i].leaf; 571 if (leaf != 0x00) { 572 log_debug("%s: invalid leaf 0x%x for code 0x%x", 573 __func__, leaf, code); 574 return (-1); 575 } 576 577 switch (code) { 578 case 0x00: 579 CPUID_LEAF(code, leaf, a, b, c, d); 580 if (vmh->vmh_cpuids[i].a > a) { 581 log_debug("%s: incompatible cpuid level", 582 __func__); 583 return (-1); 584 } 585 if (!(vmh->vmh_cpuids[i].b == b && 586 vmh->vmh_cpuids[i].c == c && 587 vmh->vmh_cpuids[i].d == d)) { 588 log_debug("%s: incompatible cpu brand", 589 __func__); 590 return (-1); 591 } 592 break; 593 594 case 0x01: 595 CPUID_LEAF(code, leaf, a, b, c, d); 596 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 597 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 598 log_debug("%s: incompatible cpu features " 599 "code: 0x%x leaf: 0x%x reg: c", __func__, 600 code, leaf); 601 return (-1); 602 } 603 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 604 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 605 log_debug("%s: incompatible cpu features " 606 "code: 0x%x leaf: 0x%x reg: d", __func__, 607 code, leaf); 608 return (-1); 609 } 610 break; 611 612 case 0x07: 613 CPUID_LEAF(code, leaf, a, b, c, d); 614 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 615 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 616 log_debug("%s: incompatible cpu features " 617 "code: 0x%x leaf: 0x%x reg: c", __func__, 618 code, leaf); 619 return (-1); 620 } 621 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 622 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 623 log_debug("%s: incompatible cpu features " 624 "code: 0x%x leaf: 0x%x reg: d", __func__, 625 code, leaf); 626 return (-1); 627 } 628 break; 629 630 case 0x0d: 631 CPUID_LEAF(code, leaf, a, b, c, d); 632 if (vmh->vmh_cpuids[i].b > b) { 633 log_debug("%s: incompatible cpu: insufficient " 634 "max save area for enabled XCR0 features", 635 __func__); 636 return (-1); 637 } 638 if (vmh->vmh_cpuids[i].c > c) { 639 log_debug("%s: incompatible cpu: insufficient " 640 "max save area for supported XCR0 features", 641 __func__); 642 return (-1); 643 } 644 break; 645 646 case 0x80000001: 647 CPUID_LEAF(code, leaf, a, b, c, d); 648 if ((vmh->vmh_cpuids[i].a & a) != 649 vmh->vmh_cpuids[i].a) { 650 log_debug("%s: incompatible cpu features " 651 "code: 0x%x leaf: 0x%x reg: a", __func__, 652 code, leaf); 653 return (-1); 654 } 655 if ((vmh->vmh_cpuids[i].c & c) != 656 vmh->vmh_cpuids[i].c) { 657 log_debug("%s: incompatible cpu features " 658 "code: 0x%x leaf: 0x%x reg: c", __func__, 659 code, leaf); 660 return (-1); 661 } 662 if ((vmh->vmh_cpuids[i].d & d) != 663 vmh->vmh_cpuids[i].d) { 664 log_debug("%s: incompatible cpu features " 665 "code: 0x%x leaf: 0x%x reg: d", __func__, 666 code, leaf); 667 return (-1); 668 } 669 break; 670 671 default: 672 log_debug("%s: unknown code 0x%x", __func__, code); 673 return (-1); 674 } 675 } 676 677 return (0); 678 } 679 680 void 681 vmd_sighdlr(int sig, short event, void *arg) 682 { 683 if (privsep_process != PROC_PARENT) 684 return; 685 log_debug("%s: handling signal", __func__); 686 687 switch (sig) { 688 case SIGHUP: 689 log_info("%s: reload requested with SIGHUP", __func__); 690 691 /* 692 * This is safe because libevent uses async signal handlers 693 * that run in the event loop and not in signal context. 694 */ 695 (void)vmd_reload(0, NULL); 696 break; 697 case SIGPIPE: 698 log_info("%s: ignoring SIGPIPE", __func__); 699 break; 700 case SIGUSR1: 701 log_info("%s: ignoring SIGUSR1", __func__); 702 break; 703 case SIGTERM: 704 case SIGINT: 705 vmd_shutdown(); 706 break; 707 default: 708 fatalx("unexpected signal"); 709 } 710 } 711 712 __dead void 713 usage(void) 714 { 715 extern char *__progname; 716 fprintf(stderr, "usage: %s [-dnv] [-D macro=value] [-f file]\n", 717 __progname); 718 exit(1); 719 } 720 721 int 722 main(int argc, char **argv) 723 { 724 struct privsep *ps; 725 int ch; 726 const char *conffile = VMD_CONF; 727 enum privsep_procid proc_id = PROC_PARENT; 728 int proc_instance = 0; 729 const char *errp, *title = NULL; 730 int argc0 = argc; 731 732 log_init(0, LOG_DAEMON); 733 734 if ((env = calloc(1, sizeof(*env))) == NULL) 735 fatal("calloc: env"); 736 737 while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) { 738 switch (ch) { 739 case 'D': 740 if (cmdline_symset(optarg) < 0) 741 log_warnx("could not parse macro definition %s", 742 optarg); 743 break; 744 case 'd': 745 env->vmd_debug = 2; 746 break; 747 case 'f': 748 conffile = optarg; 749 break; 750 case 'v': 751 env->vmd_verbose++; 752 break; 753 case 'n': 754 env->vmd_noaction = 1; 755 break; 756 case 'P': 757 title = optarg; 758 proc_id = proc_getid(procs, nitems(procs), title); 759 if (proc_id == PROC_MAX) 760 fatalx("invalid process name"); 761 break; 762 case 'I': 763 proc_instance = strtonum(optarg, 0, 764 PROC_MAX_INSTANCES, &errp); 765 if (errp) 766 fatalx("invalid process instance"); 767 break; 768 default: 769 usage(); 770 } 771 } 772 773 argc -= optind; 774 if (argc > 0) 775 usage(); 776 777 if (env->vmd_noaction && !env->vmd_debug) 778 env->vmd_debug = 1; 779 780 /* check for root privileges */ 781 if (env->vmd_noaction == 0) { 782 if (geteuid()) 783 fatalx("need root privileges"); 784 } 785 786 ps = &env->vmd_ps; 787 ps->ps_env = env; 788 env->vmd_fd = -1; 789 790 if (config_init(env) == -1) 791 fatal("failed to initialize configuration"); 792 793 if ((ps->ps_pw = getpwnam(VMD_USER)) == NULL) 794 fatal("unknown user %s", VMD_USER); 795 796 /* First proc runs as root without pledge but in default chroot */ 797 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ 798 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ 799 800 /* Open /dev/vmm */ 801 if (env->vmd_noaction == 0) { 802 env->vmd_fd = open(VMM_NODE, O_RDWR); 803 if (env->vmd_fd == -1) 804 fatal("%s", VMM_NODE); 805 } 806 807 /* Configure the control socket */ 808 ps->ps_csock.cs_name = SOCKET_NAME; 809 TAILQ_INIT(&ps->ps_rcsocks); 810 811 /* Configuration will be parsed after forking the children */ 812 env->vmd_conffile = conffile; 813 814 log_init(env->vmd_debug, LOG_DAEMON); 815 log_setverbose(env->vmd_verbose); 816 817 if (env->vmd_noaction) 818 ps->ps_noaction = 1; 819 ps->ps_instance = proc_instance; 820 if (title != NULL) 821 ps->ps_title[proc_id] = title; 822 823 /* only the parent returns */ 824 proc_init(ps, procs, nitems(procs), env->vmd_debug, argc0, argv, 825 proc_id); 826 827 log_procinit("parent"); 828 if (!env->vmd_debug && daemon(0, 0) == -1) 829 fatal("can't daemonize"); 830 831 if (ps->ps_noaction == 0) 832 log_info("startup"); 833 834 event_init(); 835 836 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps); 837 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps); 838 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps); 839 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps); 840 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps); 841 842 signal_add(&ps->ps_evsigint, NULL); 843 signal_add(&ps->ps_evsigterm, NULL); 844 signal_add(&ps->ps_evsighup, NULL); 845 signal_add(&ps->ps_evsigpipe, NULL); 846 signal_add(&ps->ps_evsigusr1, NULL); 847 848 if (!env->vmd_noaction) 849 proc_connect(ps); 850 851 if (vmd_configure() == -1) 852 fatalx("configuration failed"); 853 854 event_dispatch(); 855 856 log_debug("parent exiting"); 857 858 return (0); 859 } 860 861 void 862 start_vm_batch(int fd, short type, void *args) 863 { 864 int i = 0; 865 struct vmd_vm *vm; 866 867 log_debug("%s: starting batch of %d vms", __func__, 868 env->vmd_cfg.parallelism); 869 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 870 if (!(vm->vm_state & VM_STATE_WAITING)) { 871 log_debug("%s: not starting vm %s (disabled)", 872 __func__, 873 vm->vm_params.vmc_params.vcp_name); 874 continue; 875 } 876 i++; 877 if (i > env->vmd_cfg.parallelism) { 878 evtimer_add(&staggered_start_timer, 879 &env->vmd_cfg.delay); 880 break; 881 } 882 vm->vm_state &= ~VM_STATE_WAITING; 883 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); 884 } 885 log_debug("%s: done starting vms", __func__); 886 } 887 888 int 889 vmd_configure(void) 890 { 891 int ncpus; 892 struct vmd_switch *vsw; 893 int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; 894 size_t ncpus_sz = sizeof(ncpus); 895 896 if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) 897 fatal("open %s", PATH_PTMDEV); 898 899 /* 900 * pledge in the parent process: 901 * stdio - for malloc and basic I/O including events. 902 * rpath - for reload to open and read the configuration files. 903 * wpath - for opening disk images and tap devices. 904 * tty - for openpty and TIOCUCNTL. 905 * proc - run kill to terminate its children safely. 906 * sendfd - for disks, interfaces and other fds. 907 * recvfd - for send and receive. 908 * getpw - lookup user or group id by name. 909 * chown, fattr - change tty ownership 910 * flock - locking disk files 911 */ 912 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" 913 " chown fattr flock", NULL) == -1) 914 fatal("pledge"); 915 916 if (parse_config(env->vmd_conffile) == -1) { 917 proc_kill(&env->vmd_ps); 918 exit(1); 919 } 920 921 if (env->vmd_noaction) { 922 fprintf(stderr, "configuration OK\n"); 923 proc_kill(&env->vmd_ps); 924 exit(0); 925 } 926 927 /* Send shared global configuration to all children */ 928 if (config_setconfig(env) == -1) 929 return (-1); 930 931 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 932 if (vsw->sw_running) 933 continue; 934 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 935 log_warn("%s: failed to create switch %s", 936 __func__, vsw->sw_name); 937 switch_remove(vsw); 938 return (-1); 939 } 940 } 941 942 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) { 943 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY; 944 if (sysctl(ncpu_mib, NELEM(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1) 945 ncpus = 1; 946 env->vmd_cfg.parallelism = ncpus; 947 log_debug("%s: setting staggered start configuration to " 948 "parallelism: %d and delay: %lld", 949 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); 950 } 951 952 log_debug("%s: starting vms in staggered fashion", __func__); 953 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 954 /* start first batch */ 955 start_vm_batch(0, 0, NULL); 956 957 return (0); 958 } 959 960 int 961 vmd_reload(unsigned int reset, const char *filename) 962 { 963 struct vmd_vm *vm, *next_vm; 964 struct vmd_switch *vsw; 965 int reload = 0; 966 967 /* Switch back to the default config file */ 968 if (filename == NULL || *filename == '\0') { 969 filename = env->vmd_conffile; 970 reload = 1; 971 } 972 973 log_debug("%s: level %d config file %s", __func__, reset, filename); 974 975 if (reset) { 976 /* Purge the configuration */ 977 config_purge(env, reset); 978 config_setreset(env, reset); 979 } else { 980 /* 981 * Load or reload the configuration. 982 * 983 * Reloading removes all non-running VMs before processing the 984 * config file, whereas loading only adds to the existing list 985 * of VMs. 986 */ 987 988 if (reload) { 989 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, 990 next_vm) { 991 if (!(vm->vm_state & VM_STATE_RUNNING)) { 992 DPRINTF("%s: calling vm_remove", 993 __func__); 994 vm_remove(vm, __func__); 995 } 996 } 997 } 998 999 if (parse_config(filename) == -1) { 1000 log_debug("%s: failed to load config file %s", 1001 __func__, filename); 1002 return (-1); 1003 } 1004 1005 if (reload) { 1006 /* Update shared global configuration in all children */ 1007 if (config_setconfig(env) == -1) 1008 return (-1); 1009 } 1010 1011 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1012 if (vsw->sw_running) 1013 continue; 1014 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { 1015 log_warn("%s: failed to create switch %s", 1016 __func__, vsw->sw_name); 1017 switch_remove(vsw); 1018 return (-1); 1019 } 1020 } 1021 1022 log_debug("%s: starting vms in staggered fashion", __func__); 1023 evtimer_set(&staggered_start_timer, start_vm_batch, NULL); 1024 /* start first batch */ 1025 start_vm_batch(0, 0, NULL); 1026 1027 } 1028 1029 return (0); 1030 } 1031 1032 void 1033 vmd_shutdown(void) 1034 { 1035 struct vmd_vm *vm, *vm_next; 1036 1037 log_debug("%s: performing shutdown", __func__); 1038 1039 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 1040 vm_remove(vm, __func__); 1041 } 1042 1043 proc_kill(&env->vmd_ps); 1044 free(env); 1045 1046 log_warnx("parent terminating"); 1047 exit(0); 1048 } 1049 1050 struct vmd_vm * 1051 vm_getbyvmid(uint32_t vmid) 1052 { 1053 struct vmd_vm *vm; 1054 1055 if (vmid == 0) 1056 return (NULL); 1057 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1058 if (vm->vm_vmid == vmid) 1059 return (vm); 1060 } 1061 1062 return (NULL); 1063 } 1064 1065 struct vmd_vm * 1066 vm_getbyid(uint32_t id) 1067 { 1068 struct vmd_vm *vm; 1069 1070 if (id == 0) 1071 return (NULL); 1072 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1073 if (vm->vm_params.vmc_params.vcp_id == id) 1074 return (vm); 1075 } 1076 1077 return (NULL); 1078 } 1079 1080 uint32_t 1081 vm_id2vmid(uint32_t id, struct vmd_vm *vm) 1082 { 1083 if (vm == NULL && (vm = vm_getbyid(id)) == NULL) 1084 return (0); 1085 DPRINTF("%s: vmm id %u is vmid %u", __func__, 1086 id, vm->vm_vmid); 1087 return (vm->vm_vmid); 1088 } 1089 1090 uint32_t 1091 vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) 1092 { 1093 if (vm == NULL && (vm = vm_getbyvmid(vmid)) == NULL) 1094 return (0); 1095 DPRINTF("%s: vmid %u is vmm id %u", __func__, 1096 vmid, vm->vm_params.vmc_params.vcp_id); 1097 return (vm->vm_params.vmc_params.vcp_id); 1098 } 1099 1100 struct vmd_vm * 1101 vm_getbyname(const char *name) 1102 { 1103 struct vmd_vm *vm; 1104 1105 if (name == NULL) 1106 return (NULL); 1107 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1108 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) 1109 return (vm); 1110 } 1111 1112 return (NULL); 1113 } 1114 1115 struct vmd_vm * 1116 vm_getbypid(pid_t pid) 1117 { 1118 struct vmd_vm *vm; 1119 1120 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 1121 if (vm->vm_pid == pid) 1122 return (vm); 1123 } 1124 1125 return (NULL); 1126 } 1127 1128 void 1129 vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) 1130 { 1131 struct privsep *ps = &env->vmd_ps; 1132 unsigned int i, j; 1133 1134 if (vm == NULL) 1135 return; 1136 1137 log_debug("%s: %s %s stopping vm %d%s", 1138 __func__, ps->ps_title[privsep_process], caller, 1139 vm->vm_vmid, keeptty ? ", keeping tty open" : ""); 1140 1141 vm->vm_state &= ~(VM_STATE_RUNNING | VM_STATE_SHUTDOWN); 1142 1143 user_inc(&vm->vm_params.vmc_params, vm->vm_user, 0); 1144 user_put(vm->vm_user); 1145 1146 if (vm->vm_iev.ibuf.fd != -1) { 1147 event_del(&vm->vm_iev.ev); 1148 close(vm->vm_iev.ibuf.fd); 1149 } 1150 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) { 1151 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 1152 if (vm->vm_disks[i][j] != -1) { 1153 close(vm->vm_disks[i][j]); 1154 vm->vm_disks[i][j] = -1; 1155 } 1156 } 1157 } 1158 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) { 1159 if (vm->vm_ifs[i].vif_fd != -1) { 1160 close(vm->vm_ifs[i].vif_fd); 1161 vm->vm_ifs[i].vif_fd = -1; 1162 } 1163 free(vm->vm_ifs[i].vif_name); 1164 free(vm->vm_ifs[i].vif_switch); 1165 free(vm->vm_ifs[i].vif_group); 1166 vm->vm_ifs[i].vif_name = NULL; 1167 vm->vm_ifs[i].vif_switch = NULL; 1168 vm->vm_ifs[i].vif_group = NULL; 1169 } 1170 if (vm->vm_kernel != -1) { 1171 close(vm->vm_kernel); 1172 vm->vm_kernel = -1; 1173 } 1174 if (vm->vm_cdrom != -1) { 1175 close(vm->vm_cdrom); 1176 vm->vm_cdrom = -1; 1177 } 1178 if (!keeptty) { 1179 vm_closetty(vm); 1180 vm->vm_uid = 0; 1181 } 1182 } 1183 1184 void 1185 vm_remove(struct vmd_vm *vm, const char *caller) 1186 { 1187 struct privsep *ps = &env->vmd_ps; 1188 1189 if (vm == NULL) 1190 return; 1191 1192 log_debug("%s: %s %s removing vm %d from running config", 1193 __func__, ps->ps_title[privsep_process], caller, 1194 vm->vm_vmid); 1195 1196 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); 1197 1198 user_put(vm->vm_user); 1199 vm_stop(vm, 0, caller); 1200 free(vm); 1201 } 1202 1203 int 1204 vm_claimid(const char *name, int uid, uint32_t *id) 1205 { 1206 struct name2id *n2i = NULL; 1207 1208 TAILQ_FOREACH(n2i, env->vmd_known, entry) 1209 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) 1210 goto out; 1211 1212 if (++env->vmd_nvm == 0) { 1213 log_warnx("too many vms"); 1214 return -1; 1215 } 1216 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL) { 1217 log_warnx("could not alloc vm name"); 1218 return -1; 1219 } 1220 n2i->id = env->vmd_nvm; 1221 n2i->uid = uid; 1222 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { 1223 log_warnx("vm name too long"); 1224 free(n2i); 1225 return -1; 1226 } 1227 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry); 1228 1229 out: 1230 *id = n2i->id; 1231 return 0; 1232 } 1233 1234 int 1235 vm_register(struct privsep *ps, struct vmop_create_params *vmc, 1236 struct vmd_vm **ret_vm, uint32_t id, uid_t uid) 1237 { 1238 struct vmd_vm *vm = NULL, *vm_parent = NULL; 1239 struct vm_create_params *vcp = &vmc->vmc_params; 1240 struct vmop_owner *vmo = NULL; 1241 struct vmd_user *usr = NULL; 1242 uint32_t nid, rng; 1243 unsigned int i, j; 1244 struct vmd_switch *sw; 1245 char *s; 1246 1247 /* Check if this is an instance of another VM */ 1248 if (vm_instance(ps, &vm_parent, vmc, uid) == -1) 1249 return (-1); 1250 1251 errno = 0; 1252 *ret_vm = NULL; 1253 1254 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1255 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1256 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, 1257 uid) != 0) { 1258 errno = EPERM; 1259 goto fail; 1260 } 1261 *ret_vm = vm; 1262 errno = EALREADY; 1263 goto fail; 1264 } 1265 1266 if (vm_parent != NULL) 1267 vmo = &vm_parent->vm_params.vmc_insowner; 1268 1269 /* non-root users can only start existing VMs or instances */ 1270 if (vm_checkperm(NULL, vmo, uid) != 0) { 1271 log_warnx("permission denied"); 1272 errno = EPERM; 1273 goto fail; 1274 } 1275 if (vmc->vmc_flags == 0) { 1276 log_warnx("invalid configuration, no devices"); 1277 errno = VMD_DISK_MISSING; 1278 goto fail; 1279 } 1280 if (vcp->vcp_ncpus == 0) 1281 vcp->vcp_ncpus = 1; 1282 if (vcp->vcp_memranges[0].vmr_size == 0) 1283 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY; 1284 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) { 1285 log_warnx("invalid number of CPUs"); 1286 goto fail; 1287 } else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) { 1288 log_warnx("invalid number of disks"); 1289 goto fail; 1290 } else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) { 1291 log_warnx("invalid number of interfaces"); 1292 goto fail; 1293 } else if (strlen(vcp->vcp_kernel) == 0 && 1294 vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) { 1295 log_warnx("no kernel or disk/cdrom specified"); 1296 goto fail; 1297 } else if (strlen(vcp->vcp_name) == 0) { 1298 log_warnx("invalid VM name"); 1299 goto fail; 1300 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || 1301 *vcp->vcp_name == '_') { 1302 log_warnx("invalid VM name"); 1303 goto fail; 1304 } else { 1305 for (s = vcp->vcp_name; *s != '\0'; ++s) { 1306 if (!(isalnum(*s) || *s == '.' || *s == '-' || 1307 *s == '_')) { 1308 log_warnx("invalid VM name"); 1309 goto fail; 1310 } 1311 } 1312 } 1313 1314 /* track active users */ 1315 if (uid != 0 && env->vmd_users != NULL && 1316 (usr = user_get(uid)) == NULL) { 1317 log_warnx("could not add user"); 1318 goto fail; 1319 } 1320 1321 if ((vm = calloc(1, sizeof(*vm))) == NULL) 1322 goto fail; 1323 1324 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); 1325 vmc = &vm->vm_params; 1326 vcp = &vmc->vmc_params; 1327 vm->vm_pid = -1; 1328 vm->vm_tty = -1; 1329 vm->vm_receive_fd = -1; 1330 vm->vm_state &= ~VM_STATE_PAUSED; 1331 vm->vm_user = usr; 1332 1333 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) 1334 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 1335 vm->vm_disks[i][j] = -1; 1336 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) 1337 vm->vm_ifs[i].vif_fd = -1; 1338 for (i = 0; i < vcp->vcp_nnics; i++) { 1339 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL) { 1340 /* inherit per-interface flags from the switch */ 1341 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK); 1342 } 1343 1344 /* 1345 * If the MAC address is zero, always randomize it in vmd(8) 1346 * because we cannot rely on the guest OS to do the right 1347 * thing like OpenBSD does. Based on ether_fakeaddr() 1348 * from the kernel, incremented by one to differentiate 1349 * the source. 1350 */ 1351 if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN) == 0) { 1352 rng = arc4random(); 1353 vcp->vcp_macs[i][0] = 0xfe; 1354 vcp->vcp_macs[i][1] = 0xe1; 1355 vcp->vcp_macs[i][2] = 0xba + 1; 1356 vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf); 1357 vcp->vcp_macs[i][4] = rng; 1358 vcp->vcp_macs[i][5] = rng >> 8; 1359 } 1360 } 1361 vm->vm_kernel = -1; 1362 vm->vm_cdrom = -1; 1363 vm->vm_iev.ibuf.fd = -1; 1364 1365 /* 1366 * Assign a new internal Id if not specified and we succeed in 1367 * claiming a new Id. 1368 */ 1369 if (id != 0) 1370 vm->vm_vmid = id; 1371 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) 1372 goto fail; 1373 else 1374 vm->vm_vmid = nid; 1375 1376 log_debug("%s: registering vm %d", __func__, vm->vm_vmid); 1377 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry); 1378 1379 *ret_vm = vm; 1380 return (0); 1381 fail: 1382 if (errno == 0) 1383 errno = EINVAL; 1384 return (-1); 1385 } 1386 1387 int 1388 vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, 1389 struct vmop_create_params *vmc, uid_t uid) 1390 { 1391 char *name; 1392 struct vm_create_params *vcp = &vmc->vmc_params; 1393 struct vmop_create_params *vmcp; 1394 struct vm_create_params *vcpp; 1395 struct vmd_vm *vm = NULL; 1396 unsigned int i, j; 1397 uint32_t id; 1398 1399 /* return without error if the parent is NULL (nothing to inherit) */ 1400 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE) == 0 || 1401 vmc->vmc_instance[0] == '\0') 1402 return (0); 1403 1404 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL) { 1405 errno = VMD_PARENT_INVALID; 1406 return (-1); 1407 } 1408 1409 errno = 0; 1410 vmcp = &(*vm_parent)->vm_params; 1411 vcpp = &vmcp->vmc_params; 1412 1413 /* Are we allowed to create an instance from this VM? */ 1414 if (vm_checkperm(NULL, &vmcp->vmc_insowner, uid) != 0) { 1415 log_warnx("vm \"%s\" no permission to create vm instance", 1416 vcpp->vcp_name); 1417 errno = ENAMETOOLONG; 1418 return (-1); 1419 } 1420 1421 id = vcp->vcp_id; 1422 name = vcp->vcp_name; 1423 1424 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL || 1425 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL) { 1426 errno = EPROCLIM; 1427 return (-1); 1428 } 1429 1430 /* CPU */ 1431 if (vcp->vcp_ncpus == 0) 1432 vcp->vcp_ncpus = vcpp->vcp_ncpus; 1433 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU, uid) != 0 && 1434 vcp->vcp_ncpus != vcpp->vcp_ncpus) { 1435 log_warnx("vm \"%s\" no permission to set cpus", name); 1436 errno = EPERM; 1437 return (-1); 1438 } 1439 1440 /* memory */ 1441 if (vcp->vcp_memranges[0].vmr_size == 0) 1442 vcp->vcp_memranges[0].vmr_size = 1443 vcpp->vcp_memranges[0].vmr_size; 1444 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY, uid) != 0 && 1445 vcp->vcp_memranges[0].vmr_size != 1446 vcpp->vcp_memranges[0].vmr_size) { 1447 log_warnx("vm \"%s\" no permission to set memory", name); 1448 errno = EPERM; 1449 return (-1); 1450 } 1451 1452 /* disks cannot be inherited */ 1453 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK, uid) != 0 && 1454 vcp->vcp_ndisks) { 1455 log_warnx("vm \"%s\" no permission to set disks", name); 1456 errno = EPERM; 1457 return (-1); 1458 } 1459 for (i = 0; i < vcp->vcp_ndisks; i++) { 1460 /* Check if this disk is already used in the parent */ 1461 for (j = 0; j < vcpp->vcp_ndisks; j++) { 1462 if (strcmp(vcp->vcp_disks[i], 1463 vcpp->vcp_disks[j]) == 0) { 1464 log_warnx("vm \"%s\" disk %s cannot be reused", 1465 name, vcp->vcp_disks[i]); 1466 errno = EBUSY; 1467 return (-1); 1468 } 1469 } 1470 vmc->vmc_checkaccess |= VMOP_CREATE_DISK; 1471 } 1472 1473 /* interfaces */ 1474 if (vcp->vcp_nnics > 0 && 1475 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK, uid) != 0 && 1476 vcp->vcp_nnics != vcpp->vcp_nnics) { 1477 log_warnx("vm \"%s\" no permission to set interfaces", name); 1478 errno = EPERM; 1479 return (-1); 1480 } 1481 for (i = 0; i < vcpp->vcp_nnics; i++) { 1482 /* Interface got overwritten */ 1483 if (i < vcp->vcp_nnics) 1484 continue; 1485 1486 /* Copy interface from parent */ 1487 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; 1488 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], 1489 sizeof(vmc->vmc_ifnames[i])); 1490 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], 1491 sizeof(vmc->vmc_ifswitch[i])); 1492 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], 1493 sizeof(vmc->vmc_ifgroup[i])); 1494 memcpy(vcp->vcp_macs[i], vcpp->vcp_macs[i], 1495 sizeof(vcp->vcp_macs[i])); 1496 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; 1497 vcp->vcp_nnics++; 1498 } 1499 for (i = 0; i < vcp->vcp_nnics; i++) { 1500 for (j = 0; j < vcpp->vcp_nnics; j++) { 1501 if (memcmp(zero_mac, vcp->vcp_macs[i], 1502 sizeof(vcp->vcp_macs[i])) != 0 && 1503 memcmp(vcpp->vcp_macs[i], vcp->vcp_macs[i], 1504 sizeof(vcp->vcp_macs[i])) != 0) { 1505 log_warnx("vm \"%s\" lladdr cannot be reused", 1506 name); 1507 errno = EBUSY; 1508 return (-1); 1509 } 1510 if (strlen(vmc->vmc_ifnames[i]) && 1511 strcmp(vmc->vmc_ifnames[i], 1512 vmcp->vmc_ifnames[j]) == 0) { 1513 log_warnx("vm \"%s\" %s cannot be reused", 1514 vmc->vmc_ifnames[i], name); 1515 errno = EBUSY; 1516 return (-1); 1517 } 1518 } 1519 } 1520 1521 /* kernel */ 1522 if (strlen(vcp->vcp_kernel) > 0) { 1523 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { 1524 log_warnx("vm \"%s\" no permission to set boot image", 1525 name); 1526 errno = EPERM; 1527 return (-1); 1528 } 1529 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; 1530 } else if (strlcpy(vcp->vcp_kernel, vcpp->vcp_kernel, 1531 sizeof(vcp->vcp_kernel)) >= sizeof(vcp->vcp_kernel)) { 1532 log_warnx("vm \"%s\" kernel name too long", name); 1533 errno = EINVAL; 1534 return (-1); 1535 } 1536 1537 /* cdrom */ 1538 if (strlen(vcp->vcp_cdrom) > 0) { 1539 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM, uid) != 0) { 1540 log_warnx("vm \"%s\" no permission to set cdrom", name); 1541 errno = EPERM; 1542 return (-1); 1543 } 1544 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM; 1545 } else if (strlcpy(vcp->vcp_cdrom, vcpp->vcp_cdrom, 1546 sizeof(vcp->vcp_cdrom)) >= sizeof(vcp->vcp_cdrom)) { 1547 log_warnx("vm \"%s\" cdrom name too long", name); 1548 errno = EINVAL; 1549 return (-1); 1550 } 1551 1552 /* user */ 1553 if (vmc->vmc_owner.uid == 0) 1554 vmc->vmc_owner.uid = vmcp->vmc_owner.uid; 1555 else if (vmc->vmc_owner.uid != uid && 1556 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { 1557 log_warnx("vm \"%s\" user mismatch", name); 1558 errno = EPERM; 1559 return (-1); 1560 } 1561 1562 /* group */ 1563 if (vmc->vmc_owner.gid == 0) 1564 vmc->vmc_owner.gid = vmcp->vmc_owner.gid; 1565 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { 1566 log_warnx("vm \"%s\" group mismatch", name); 1567 errno = EPERM; 1568 return (-1); 1569 } 1570 1571 /* child instances */ 1572 if (vmc->vmc_insflags) { 1573 log_warnx("vm \"%s\" cannot change instance permissions", name); 1574 errno = EPERM; 1575 return (-1); 1576 } 1577 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE) { 1578 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; 1579 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; 1580 vmc->vmc_insflags = vmcp->vmc_insflags; 1581 } else { 1582 vmc->vmc_insowner.gid = 0; 1583 vmc->vmc_insowner.uid = 0; 1584 vmc->vmc_insflags = 0; 1585 } 1586 1587 /* finished, remove instance flags */ 1588 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE; 1589 1590 return (0); 1591 } 1592 1593 /* 1594 * vm_checkperm 1595 * 1596 * Checks if the user represented by the 'uid' parameter is allowed to 1597 * manipulate the VM described by the 'vm' parameter (or connect to said VM's 1598 * console.) 1599 * 1600 * Parameters: 1601 * vm: the VM whose permission is to be checked 1602 * vmo: the required uid/gid to be checked 1603 * uid: the user ID of the user making the request 1604 * 1605 * Return values: 1606 * 0: the permission should be granted 1607 * -1: the permission check failed (also returned if vm == null) 1608 */ 1609 int 1610 vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) 1611 { 1612 struct group *gr; 1613 struct passwd *pw; 1614 char **grmem; 1615 1616 /* root has no restrictions */ 1617 if (uid == 0) 1618 return (0); 1619 1620 if (vmo == NULL) 1621 return (-1); 1622 1623 /* check user */ 1624 if (vm == NULL) { 1625 if (vmo->uid == uid) 1626 return (0); 1627 } else { 1628 /* 1629 * check user of running vm (the owner of a running vm can 1630 * be different to (or more specific than) the configured owner. 1631 */ 1632 if (((vm->vm_state & VM_STATE_RUNNING) && vm->vm_uid == uid) || 1633 (!(vm->vm_state & VM_STATE_RUNNING) && vmo->uid == uid)) 1634 return (0); 1635 } 1636 1637 /* check groups */ 1638 if (vmo->gid != -1) { 1639 if ((pw = getpwuid(uid)) == NULL) 1640 return (-1); 1641 if (pw->pw_gid == vmo->gid) 1642 return (0); 1643 if ((gr = getgrgid(vmo->gid)) != NULL) { 1644 for (grmem = gr->gr_mem; *grmem; grmem++) 1645 if (strcmp(*grmem, pw->pw_name) == 0) 1646 return (0); 1647 } 1648 } 1649 1650 return (-1); 1651 } 1652 1653 /* 1654 * vm_checkinsflag 1655 * 1656 * Checks wheter the non-root user is allowed to set an instance option. 1657 * 1658 * Parameters: 1659 * vmc: the VM create parameters 1660 * flag: the flag to be checked 1661 * uid: the user ID of the user making the request 1662 * 1663 * Return values: 1664 * 0: the permission should be granted 1665 * -1: the permission check failed (also returned if vm == null) 1666 */ 1667 int 1668 vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) 1669 { 1670 /* root has no restrictions */ 1671 if (uid == 0) 1672 return (0); 1673 1674 if ((vmc->vmc_insflags & flag) == 0) 1675 return (-1); 1676 1677 return (0); 1678 } 1679 1680 /* 1681 * vm_checkaccess 1682 * 1683 * Checks if the user represented by the 'uid' parameter is allowed to 1684 * access the file described by the 'path' parameter. 1685 * 1686 * Parameters: 1687 * fd: the file descriptor of the opened file 1688 * uflag: check if the userid has access to the file 1689 * uid: the user ID of the user making the request 1690 * amode: the access flags of R_OK and W_OK 1691 * 1692 * Return values: 1693 * 0: the permission should be granted 1694 * -1: the permission check failed 1695 */ 1696 int 1697 vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) 1698 { 1699 struct group *gr; 1700 struct passwd *pw; 1701 char **grmem; 1702 struct stat st; 1703 mode_t mode; 1704 1705 if (fd == -1) 1706 return (-1); 1707 1708 /* 1709 * File has to be accessible and a regular file 1710 */ 1711 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)) 1712 return (-1); 1713 1714 /* root has no restrictions */ 1715 if (uid == 0 || uflag == 0) 1716 return (0); 1717 1718 /* check other */ 1719 mode = amode & W_OK ? S_IWOTH : 0; 1720 mode |= amode & R_OK ? S_IROTH : 0; 1721 if ((st.st_mode & mode) == mode) 1722 return (0); 1723 1724 /* check user */ 1725 mode = amode & W_OK ? S_IWUSR : 0; 1726 mode |= amode & R_OK ? S_IRUSR : 0; 1727 if (uid == st.st_uid && (st.st_mode & mode) == mode) 1728 return (0); 1729 1730 /* check groups */ 1731 mode = amode & W_OK ? S_IWGRP : 0; 1732 mode |= amode & R_OK ? S_IRGRP : 0; 1733 if ((st.st_mode & mode) != mode) 1734 return (-1); 1735 if ((pw = getpwuid(uid)) == NULL) 1736 return (-1); 1737 if (pw->pw_gid == st.st_gid) 1738 return (0); 1739 if ((gr = getgrgid(st.st_gid)) != NULL) { 1740 for (grmem = gr->gr_mem; *grmem; grmem++) 1741 if (strcmp(*grmem, pw->pw_name) == 0) 1742 return (0); 1743 } 1744 1745 return (-1); 1746 } 1747 1748 int 1749 vm_opentty(struct vmd_vm *vm) 1750 { 1751 struct ptmget ptm; 1752 struct stat st; 1753 struct group *gr; 1754 uid_t uid; 1755 gid_t gid; 1756 mode_t mode; 1757 int on; 1758 1759 /* 1760 * Open tty with pre-opened PTM fd 1761 */ 1762 if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) 1763 return (-1); 1764 1765 /* 1766 * We use user ioctl(2) mode to pass break commands. 1767 */ 1768 on = 1; 1769 if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1) 1770 fatal("could not enable user ioctl mode"); 1771 1772 vm->vm_tty = ptm.cfd; 1773 close(ptm.sfd); 1774 if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL) 1775 goto fail; 1776 1777 uid = vm->vm_uid; 1778 gid = vm->vm_params.vmc_owner.gid; 1779 1780 if (vm->vm_params.vmc_owner.gid != -1) { 1781 mode = 0660; 1782 } else if ((gr = getgrnam("tty")) != NULL) { 1783 gid = gr->gr_gid; 1784 mode = 0620; 1785 } else { 1786 mode = 0600; 1787 gid = 0; 1788 } 1789 1790 log_debug("%s: vm %s tty %s uid %d gid %d mode %o", 1791 __func__, vm->vm_params.vmc_params.vcp_name, 1792 vm->vm_ttyname, uid, gid, mode); 1793 1794 /* 1795 * Change ownership and mode of the tty as required. 1796 * Loosely based on the implementation of sshpty.c 1797 */ 1798 if (stat(vm->vm_ttyname, &st) == -1) 1799 goto fail; 1800 1801 if (st.st_uid != uid || st.st_gid != gid) { 1802 if (chown(vm->vm_ttyname, uid, gid) == -1) { 1803 log_warn("chown %s %d %d failed, uid %d", 1804 vm->vm_ttyname, uid, gid, getuid()); 1805 1806 /* Ignore failure on read-only filesystems */ 1807 if (!((errno == EROFS) && 1808 (st.st_uid == uid || st.st_uid == 0))) 1809 goto fail; 1810 } 1811 } 1812 1813 if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) { 1814 if (chmod(vm->vm_ttyname, mode) == -1) { 1815 log_warn("chmod %s %o failed, uid %d", 1816 vm->vm_ttyname, mode, getuid()); 1817 1818 /* Ignore failure on read-only filesystems */ 1819 if (!((errno == EROFS) && 1820 (st.st_uid == uid || st.st_uid == 0))) 1821 goto fail; 1822 } 1823 } 1824 1825 return (0); 1826 fail: 1827 vm_closetty(vm); 1828 return (-1); 1829 } 1830 1831 void 1832 vm_closetty(struct vmd_vm *vm) 1833 { 1834 if (vm->vm_tty != -1) { 1835 /* Release and close the tty */ 1836 if (fchown(vm->vm_tty, 0, 0) == -1) 1837 log_warn("chown %s 0 0 failed", vm->vm_ttyname); 1838 if (fchmod(vm->vm_tty, 0666) == -1) 1839 log_warn("chmod %s 0666 failed", vm->vm_ttyname); 1840 close(vm->vm_tty); 1841 vm->vm_tty = -1; 1842 } 1843 free(vm->vm_ttyname); 1844 vm->vm_ttyname = NULL; 1845 } 1846 1847 void 1848 switch_remove(struct vmd_switch *vsw) 1849 { 1850 if (vsw == NULL) 1851 return; 1852 1853 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry); 1854 1855 free(vsw->sw_group); 1856 free(vsw->sw_name); 1857 free(vsw); 1858 } 1859 1860 struct vmd_switch * 1861 switch_getbyname(const char *name) 1862 { 1863 struct vmd_switch *vsw; 1864 1865 if (name == NULL) 1866 return (NULL); 1867 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry) { 1868 if (strcmp(vsw->sw_name, name) == 0) 1869 return (vsw); 1870 } 1871 1872 return (NULL); 1873 } 1874 1875 struct vmd_user * 1876 user_get(uid_t uid) 1877 { 1878 struct vmd_user *usr; 1879 1880 if (uid == 0) 1881 return (NULL); 1882 1883 /* first try to find an existing user */ 1884 TAILQ_FOREACH(usr, env->vmd_users, usr_entry) { 1885 if (usr->usr_id.uid == uid) 1886 goto done; 1887 } 1888 1889 if ((usr = calloc(1, sizeof(*usr))) == NULL) { 1890 log_warn("could not allocate user"); 1891 return (NULL); 1892 } 1893 1894 usr->usr_id.uid = uid; 1895 usr->usr_id.gid = -1; 1896 TAILQ_INSERT_TAIL(env->vmd_users, usr, usr_entry); 1897 1898 done: 1899 DPRINTF("%s: uid %d #%d +", 1900 __func__, usr->usr_id.uid, usr->usr_refcnt + 1); 1901 usr->usr_refcnt++; 1902 1903 return (usr); 1904 } 1905 1906 void 1907 user_put(struct vmd_user *usr) 1908 { 1909 if (usr == NULL) 1910 return; 1911 1912 DPRINTF("%s: uid %d #%d -", 1913 __func__, usr->usr_id.uid, usr->usr_refcnt - 1); 1914 1915 if (--usr->usr_refcnt > 0) 1916 return; 1917 1918 TAILQ_REMOVE(env->vmd_users, usr, usr_entry); 1919 free(usr); 1920 } 1921 1922 void 1923 user_inc(struct vm_create_params *vcp, struct vmd_user *usr, int inc) 1924 { 1925 char mem[FMT_SCALED_STRSIZE]; 1926 1927 if (usr == NULL) 1928 return; 1929 1930 /* increment or decrement counters */ 1931 inc = inc ? 1 : -1; 1932 1933 usr->usr_maxcpu += vcp->vcp_ncpus * inc; 1934 usr->usr_maxmem += vcp->vcp_memranges[0].vmr_size * inc; 1935 usr->usr_maxifs += vcp->vcp_nnics * inc; 1936 1937 if (log_getverbose() > 1) { 1938 (void)fmt_scaled(usr->usr_maxmem * 1024 * 1024, mem); 1939 log_debug("%s: %c uid %d ref %d cpu %llu mem %s ifs %llu", 1940 __func__, inc == 1 ? '+' : '-', 1941 usr->usr_id.uid, usr->usr_refcnt, 1942 usr->usr_maxcpu, mem, usr->usr_maxifs); 1943 } 1944 } 1945 1946 int 1947 user_checklimit(struct vmd_user *usr, struct vm_create_params *vcp) 1948 { 1949 const char *limit = ""; 1950 1951 /* XXX make the limits configurable */ 1952 if (usr->usr_maxcpu > VM_DEFAULT_USER_MAXCPU) { 1953 limit = "cpu "; 1954 goto fail; 1955 } 1956 if (usr->usr_maxmem > VM_DEFAULT_USER_MAXMEM) { 1957 limit = "memory "; 1958 goto fail; 1959 } 1960 if (usr->usr_maxifs > VM_DEFAULT_USER_MAXIFS) { 1961 limit = "interface "; 1962 goto fail; 1963 } 1964 1965 return (0); 1966 1967 fail: 1968 log_warnx("%s: user %d %slimit reached", vcp->vcp_name, 1969 usr->usr_id.uid, limit); 1970 return (-1); 1971 } 1972 1973 char * 1974 get_string(uint8_t *ptr, size_t len) 1975 { 1976 size_t i; 1977 1978 for (i = 0; i < len; i++) 1979 if (!isprint(ptr[i])) 1980 break; 1981 1982 return strndup(ptr, i); 1983 } 1984 1985 uint32_t 1986 prefixlen2mask(uint8_t prefixlen) 1987 { 1988 if (prefixlen == 0) 1989 return (0); 1990 1991 if (prefixlen > 32) 1992 prefixlen = 32; 1993 1994 return (htonl(0xffffffff << (32 - prefixlen))); 1995 } 1996 1997 void 1998 prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) 1999 { 2000 struct in6_addr s6; 2001 int i; 2002 2003 if (prefixlen > 128) 2004 prefixlen = 128; 2005 2006 memset(&s6, 0, sizeof(s6)); 2007 for (i = 0; i < prefixlen / 8; i++) 2008 s6.s6_addr[i] = 0xff; 2009 i = prefixlen % 8; 2010 if (i) 2011 s6.s6_addr[prefixlen / 8] = 0xff00 >> i; 2012 2013 memcpy(mask, &s6, sizeof(s6)); 2014 } 2015 2016 void 2017 getmonotime(struct timeval *tv) 2018 { 2019 struct timespec ts; 2020 2021 if (clock_gettime(CLOCK_MONOTONIC, &ts)) 2022 fatal("clock_gettime"); 2023 2024 TIMESPEC_TO_TIMEVAL(tv, &ts); 2025 } 2026