1 /* $OpenBSD: config.c,v 1.72 2023/07/13 18:31:59 dv Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/time.h> 22 #include <sys/uio.h> 23 #include <sys/stat.h> 24 #include <sys/socket.h> 25 26 #include <net/if.h> 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <termios.h> 31 #include <unistd.h> 32 #include <limits.h> 33 #include <string.h> 34 #include <fcntl.h> 35 #include <util.h> 36 #include <errno.h> 37 #include <imsg.h> 38 39 #include "proc.h" 40 #include "vmd.h" 41 42 /* Supported bridge types */ 43 const char *vmd_descsw[] = { "bridge", "veb", NULL }; 44 45 static int config_init_localprefix(struct vmd_config *); 46 47 static int 48 config_init_localprefix(struct vmd_config *cfg) 49 { 50 if (parse_prefix4(VMD_DHCP_PREFIX, &cfg->cfg_localprefix, NULL) == -1) 51 return (-1); 52 53 /* IPv6 is disabled by default */ 54 cfg->cfg_flags &= ~VMD_CFG_INET6; 55 56 /* Generate random IPv6 prefix only once */ 57 if (cfg->cfg_flags & VMD_CFG_AUTOINET6) 58 return (0); 59 if (parse_prefix6(VMD_ULA_PREFIX, &cfg->cfg_localprefix, NULL) == -1) 60 return (-1); 61 62 /* Randomize the 56 bits "Global ID" and "Subnet ID" */ 63 arc4random_buf(&cfg->cfg_localprefix.lp_in6.s6_addr[1], 7); 64 cfg->cfg_flags |= VMD_CFG_AUTOINET6; 65 66 return (0); 67 } 68 69 int 70 config_init(struct vmd *env) 71 { 72 struct privsep *ps = &env->vmd_ps; 73 unsigned int what; 74 75 /* Global configuration */ 76 ps->ps_what[PROC_PARENT] = CONFIG_ALL; 77 ps->ps_what[PROC_VMM] = CONFIG_VMS; 78 79 /* Local prefix */ 80 if (config_init_localprefix(&env->vmd_cfg) == -1) 81 return (-1); 82 83 /* Other configuration */ 84 what = ps->ps_what[privsep_process]; 85 if (what & CONFIG_VMS) { 86 if ((env->vmd_vms = calloc(1, sizeof(*env->vmd_vms))) == NULL) 87 return (-1); 88 if ((env->vmd_known = calloc(1, sizeof(*env->vmd_known))) == NULL) 89 return (-1); 90 TAILQ_INIT(env->vmd_vms); 91 TAILQ_INIT(env->vmd_known); 92 } 93 if (what & CONFIG_SWITCHES) { 94 if ((env->vmd_switches = calloc(1, 95 sizeof(*env->vmd_switches))) == NULL) 96 return (-1); 97 TAILQ_INIT(env->vmd_switches); 98 } 99 100 return (0); 101 } 102 103 void 104 config_purge(struct vmd *env, unsigned int reset) 105 { 106 struct privsep *ps = &env->vmd_ps; 107 struct name2id *n2i; 108 struct vmd_vm *vm; 109 struct vmd_switch *vsw; 110 unsigned int what; 111 112 DPRINTF("%s: %s purging vms and switches", 113 __func__, ps->ps_title[privsep_process]); 114 115 /* Reset global configuration (prefix was verified before) */ 116 config_init_localprefix(&env->vmd_cfg); 117 118 /* Reset other configuration */ 119 what = ps->ps_what[privsep_process] & reset; 120 if (what & CONFIG_VMS && env->vmd_vms != NULL) { 121 while ((vm = TAILQ_FIRST(env->vmd_vms)) != NULL) { 122 vm_remove(vm, __func__); 123 } 124 while ((n2i = TAILQ_FIRST(env->vmd_known)) != NULL) { 125 TAILQ_REMOVE(env->vmd_known, n2i, entry); 126 free(n2i); 127 } 128 env->vmd_nvm = 0; 129 } 130 if (what & CONFIG_SWITCHES && env->vmd_switches != NULL) { 131 while ((vsw = TAILQ_FIRST(env->vmd_switches)) != NULL) 132 switch_remove(vsw); 133 env->vmd_nswitches = 0; 134 } 135 } 136 137 int 138 config_setconfig(struct vmd *env) 139 { 140 struct privsep *ps = &env->vmd_ps; 141 unsigned int id; 142 143 DPRINTF("%s: setting config", __func__); 144 145 for (id = 0; id < PROC_MAX; id++) { 146 if (id == privsep_process) 147 continue; 148 proc_compose(ps, id, IMSG_VMDOP_CONFIG, &env->vmd_cfg, 149 sizeof(env->vmd_cfg)); 150 } 151 152 return (0); 153 } 154 155 int 156 config_getconfig(struct vmd *env, struct imsg *imsg) 157 { 158 struct privsep *ps = &env->vmd_ps; 159 160 log_debug("%s: %s retrieving config", 161 __func__, ps->ps_title[privsep_process]); 162 163 IMSG_SIZE_CHECK(imsg, &env->vmd_cfg); 164 memcpy(&env->vmd_cfg, imsg->data, sizeof(env->vmd_cfg)); 165 166 return (0); 167 } 168 169 int 170 config_setreset(struct vmd *env, unsigned int reset) 171 { 172 struct privsep *ps = &env->vmd_ps; 173 unsigned int id; 174 175 DPRINTF("%s: resetting state", __func__); 176 177 for (id = 0; id < PROC_MAX; id++) { 178 if ((reset & ps->ps_what[id]) == 0 || 179 id == privsep_process) 180 continue; 181 proc_compose(ps, id, IMSG_CTL_RESET, &reset, sizeof(reset)); 182 } 183 184 return (0); 185 } 186 187 int 188 config_getreset(struct vmd *env, struct imsg *imsg) 189 { 190 unsigned int mode; 191 192 IMSG_SIZE_CHECK(imsg, &mode); 193 memcpy(&mode, imsg->data, sizeof(mode)); 194 195 log_debug("%s: %s resetting state", 196 __func__, env->vmd_ps.ps_title[privsep_process]); 197 198 config_purge(env, mode); 199 200 return (0); 201 } 202 203 /* 204 * config_setvm 205 * 206 * Configure a vm, opening any required file descriptors. 207 * 208 * Returns 0 on success, error code on failure. 209 */ 210 int 211 config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) 212 { 213 int diskfds[VM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK]; 214 struct vmd_if *vif; 215 struct vmop_create_params *vmc = &vm->vm_params; 216 struct vm_create_params *vcp = &vmc->vmc_params; 217 unsigned int i, j; 218 int fd = -1, cdromfd = -1, kernfd = -1; 219 int *tapfds = NULL; 220 int n = 0, aflags, oflags, ret = -1; 221 char ifname[IF_NAMESIZE], *s; 222 char path[PATH_MAX], base[PATH_MAX]; 223 unsigned int unit; 224 struct timeval tv, rate, since_last; 225 struct vmop_addr_req var; 226 size_t bytes = 0; 227 228 if (vm->vm_state & VM_STATE_RUNNING) { 229 log_warnx("%s: vm is already running", __func__); 230 return (EALREADY); 231 } 232 233 /* 234 * Rate-limit the VM so that it cannot restart in a loop: 235 * if the VM restarts after less than VM_START_RATE_SEC seconds, 236 * we increment the limit counter. After VM_START_RATE_LIMIT 237 * of suchs fast reboots the VM is stopped. 238 */ 239 getmonotime(&tv); 240 if (vm->vm_start_tv.tv_sec) { 241 timersub(&tv, &vm->vm_start_tv, &since_last); 242 243 rate.tv_sec = VM_START_RATE_SEC; 244 rate.tv_usec = 0; 245 if (timercmp(&since_last, &rate, <)) 246 vm->vm_start_limit++; 247 else { 248 /* Reset counter */ 249 vm->vm_start_limit = 0; 250 } 251 252 log_debug("%s: vm %u restarted after %lld.%ld seconds," 253 " limit %d/%d", __func__, vcp->vcp_id, since_last.tv_sec, 254 since_last.tv_usec, vm->vm_start_limit, 255 VM_START_RATE_LIMIT); 256 257 if (vm->vm_start_limit >= VM_START_RATE_LIMIT) { 258 log_warnx("%s: vm %u restarted too quickly", 259 __func__, vcp->vcp_id); 260 return (EPERM); 261 } 262 } 263 vm->vm_start_tv = tv; 264 265 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) 266 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 267 diskfds[i][j] = -1; 268 269 tapfds = reallocarray(NULL, vmc->vmc_nnics, sizeof(*tapfds)); 270 if (tapfds == NULL) { 271 ret = errno; 272 log_warn("%s: can't allocate tap fds", __func__); 273 return (ret); 274 } 275 for (i = 0; i < vmc->vmc_nnics; i++) 276 tapfds[i] = -1; 277 278 vm->vm_peerid = peerid; 279 vm->vm_uid = uid; 280 281 /* 282 * From here onward, all failures need cleanup and use goto fail 283 */ 284 if (!(vm->vm_state & VM_STATE_RECEIVED) && vm->vm_kernel == -1) { 285 if (vm->vm_kernel_path != NULL) { 286 /* Open external kernel for child */ 287 kernfd = open(vm->vm_kernel_path, O_RDONLY); 288 if (kernfd == -1) { 289 ret = errno; 290 log_warn("%s: can't open kernel or BIOS " 291 "boot image %s", __func__, 292 vm->vm_kernel_path); 293 goto fail; 294 } 295 } 296 297 /* 298 * Try to open the default BIOS image if no kernel/BIOS has been 299 * specified. The BIOS is an external firmware file that is 300 * typically distributed separately due to an incompatible 301 * license. 302 */ 303 if (kernfd == -1) { 304 if ((kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) { 305 log_warn("can't open %s", VM_DEFAULT_BIOS); 306 ret = VMD_BIOS_MISSING; 307 goto fail; 308 } 309 } 310 311 if (vm_checkaccess(kernfd, 312 vmc->vmc_checkaccess & VMOP_CREATE_KERNEL, 313 uid, R_OK) == -1) { 314 log_warnx("vm \"%s\" no read access to kernel " 315 "%s", vcp->vcp_name, vm->vm_kernel_path); 316 ret = EPERM; 317 goto fail; 318 } 319 320 vm->vm_kernel = kernfd; 321 vmc->vmc_kernel = kernfd; 322 } 323 324 /* Open CDROM image for child */ 325 if (strlen(vmc->vmc_cdrom)) { 326 /* Stat cdrom to ensure it is a regular file */ 327 if ((cdromfd = 328 open(vmc->vmc_cdrom, O_RDONLY)) == -1) { 329 log_warn("can't open cdrom %s", vmc->vmc_cdrom); 330 ret = VMD_CDROM_MISSING; 331 goto fail; 332 } 333 334 if (vm_checkaccess(cdromfd, 335 vmc->vmc_checkaccess & VMOP_CREATE_CDROM, 336 uid, R_OK) == -1) { 337 log_warnx("vm \"%s\" no read access to cdrom %s", 338 vcp->vcp_name, vmc->vmc_cdrom); 339 ret = EPERM; 340 goto fail; 341 } 342 } 343 344 /* Open disk images for child */ 345 for (i = 0 ; i < vmc->vmc_ndisks; i++) { 346 if (strlcpy(path, vmc->vmc_disks[i], sizeof(path)) 347 >= sizeof(path)) 348 log_warnx("disk path %s too long", vmc->vmc_disks[i]); 349 memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases)); 350 oflags = O_RDWR|O_EXLOCK|O_NONBLOCK; 351 aflags = R_OK|W_OK; 352 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 353 /* Stat disk[i] to ensure it is a regular file */ 354 if ((diskfds[i][j] = open(path, oflags)) == -1) { 355 log_warn("can't open disk %s", 356 vmc->vmc_disks[i]); 357 ret = VMD_DISK_MISSING; 358 goto fail; 359 } 360 361 if (vm_checkaccess(diskfds[i][j], 362 vmc->vmc_checkaccess & VMOP_CREATE_DISK, 363 uid, aflags) == -1) { 364 log_warnx("vm \"%s\" unable to access " 365 "disk %s", vcp->vcp_name, path); 366 errno = EPERM; 367 goto fail; 368 } 369 370 /* 371 * Clear the write and exclusive flags for base images. 372 * All writes should go to the top image, allowing them 373 * to be shared. 374 */ 375 oflags = O_RDONLY|O_NONBLOCK; 376 aflags = R_OK; 377 n = virtio_get_base(diskfds[i][j], base, sizeof(base), 378 vmc->vmc_disktypes[i], path); 379 if (n == 0) 380 break; 381 if (n == -1) { 382 log_warnx("vm \"%s\" unable to read " 383 "base for disk %s", vcp->vcp_name, 384 vmc->vmc_disks[i]); 385 goto fail; 386 } 387 (void)strlcpy(path, base, sizeof(path)); 388 } 389 } 390 391 /* Open network interfaces */ 392 for (i = 0 ; i < vmc->vmc_nnics; i++) { 393 vif = &vm->vm_ifs[i]; 394 395 /* Check if the user has requested a specific tap(4) */ 396 s = vmc->vmc_ifnames[i]; 397 if (*s != '\0' && strcmp("tap", s) != 0) { 398 if (priv_getiftype(s, ifname, &unit) == -1 || 399 strcmp(ifname, "tap") != 0) { 400 log_warnx("%s: invalid tap name %s", 401 __func__, s); 402 ret = EINVAL; 403 goto fail; 404 } 405 } else 406 s = NULL; 407 408 /* 409 * Either open the requested tap(4) device or get 410 * the next available one. 411 */ 412 if (s != NULL) { 413 snprintf(path, PATH_MAX, "/dev/%s", s); 414 tapfds[i] = open(path, O_RDWR | O_NONBLOCK); 415 } else { 416 tapfds[i] = opentap(ifname); 417 s = ifname; 418 } 419 if (tapfds[i] == -1) { 420 log_warnx("%s: can't open tap %s", __func__, s); 421 goto fail; 422 } 423 if ((vif->vif_name = strdup(s)) == NULL) { 424 log_warn("%s: can't save tap %s", __func__, s); 425 goto fail; 426 } 427 428 /* Check if the the interface is attached to a switch */ 429 s = vmc->vmc_ifswitch[i]; 430 if (*s != '\0') { 431 if ((vif->vif_switch = strdup(s)) == NULL) { 432 log_warn("%s: can't save switch %s", 433 __func__, s); 434 goto fail; 435 } 436 } 437 438 /* Check if the the interface is assigned to a group */ 439 s = vmc->vmc_ifgroup[i]; 440 if (*s != '\0') { 441 if ((vif->vif_group = strdup(s)) == NULL) { 442 log_warn("%s: can't save group %s", 443 __func__, s); 444 goto fail; 445 } 446 } 447 448 /* non-default rdomain (requires VMIFF_RDOMAIN below) */ 449 vif->vif_rdomain = vmc->vmc_ifrdomain[i]; 450 451 /* Set the interface status */ 452 vif->vif_flags = 453 vmc->vmc_ifflags[i] & (VMIFF_UP|VMIFF_OPTMASK); 454 } 455 456 /* Open TTY */ 457 if (vm->vm_ttyname[0] == '\0') { 458 if (vm_opentty(vm) == -1) { 459 log_warn("%s: can't open tty %s", __func__, 460 vm->vm_ttyname[0] == '\0' ? "" : vm->vm_ttyname); 461 goto fail; 462 } 463 } 464 if ((fd = dup(vm->vm_tty)) == -1) { 465 log_warn("%s: can't re-open tty %s", __func__, vm->vm_ttyname); 466 goto fail; 467 } 468 469 /* Send VM information */ 470 /* XXX check proc_compose_imsg return values */ 471 if (vm->vm_state & VM_STATE_RECEIVED) 472 proc_compose_imsg(ps, PROC_VMM, -1, 473 IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd, vmc, 474 sizeof(struct vmop_create_params)); 475 else 476 proc_compose_imsg(ps, PROC_VMM, -1, 477 IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, vm->vm_kernel, 478 vmc, sizeof(*vmc)); 479 480 if (strlen(vmc->vmc_cdrom)) 481 proc_compose_imsg(ps, PROC_VMM, -1, 482 IMSG_VMDOP_START_VM_CDROM, vm->vm_vmid, cdromfd, 483 NULL, 0); 484 485 for (i = 0; i < vmc->vmc_ndisks; i++) { 486 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 487 if (diskfds[i][j] == -1) 488 break; 489 proc_compose_imsg(ps, PROC_VMM, -1, 490 IMSG_VMDOP_START_VM_DISK, vm->vm_vmid, 491 diskfds[i][j], &i, sizeof(i)); 492 } 493 } 494 for (i = 0; i < vmc->vmc_nnics; i++) { 495 proc_compose_imsg(ps, PROC_VMM, -1, 496 IMSG_VMDOP_START_VM_IF, vm->vm_vmid, tapfds[i], 497 &i, sizeof(i)); 498 499 memset(&var, 0, sizeof(var)); 500 var.var_vmid = vm->vm_vmid; 501 var.var_nic_idx = i; 502 proc_compose_imsg(ps, PROC_PRIV, -1, IMSG_VMDOP_PRIV_GET_ADDR, 503 vm->vm_vmid, dup(tapfds[i]), &var, sizeof(var)); 504 } 505 506 if (!(vm->vm_state & VM_STATE_RECEIVED)) 507 proc_compose_imsg(ps, PROC_VMM, -1, 508 IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0); 509 510 free(tapfds); 511 512 /* Collapse any memranges after the vm was sent to PROC_VMM */ 513 if (vcp->vcp_nmemranges > 0) { 514 for (i = 0; i < vcp->vcp_nmemranges; i++) 515 bytes += vcp->vcp_memranges[i].vmr_size; 516 memset(&vcp->vcp_memranges, 0, sizeof(vcp->vcp_memranges)); 517 vcp->vcp_nmemranges = 0; 518 vcp->vcp_memranges[0].vmr_size = bytes; 519 } 520 vm->vm_state |= VM_STATE_RUNNING; 521 return (0); 522 523 fail: 524 log_warnx("failed to start vm %s", vcp->vcp_name); 525 526 if (vm->vm_kernel != -1) 527 close(kernfd); 528 if (cdromfd != -1) 529 close(cdromfd); 530 for (i = 0; i < vmc->vmc_ndisks; i++) 531 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 532 if (diskfds[i][j] != -1) 533 close(diskfds[i][j]); 534 if (tapfds != NULL) { 535 for (i = 0; i < vmc->vmc_nnics; i++) 536 close(tapfds[i]); 537 free(tapfds); 538 } 539 540 if (vm->vm_from_config) { 541 vm_stop(vm, 0, __func__); 542 } else { 543 vm_remove(vm, __func__); 544 } 545 546 return (ret); 547 } 548 549 int 550 config_getvm(struct privsep *ps, struct imsg *imsg) 551 { 552 struct vmop_create_params vmc; 553 struct vmd_vm *vm = NULL; 554 555 IMSG_SIZE_CHECK(imsg, &vmc); 556 memcpy(&vmc, imsg->data, sizeof(vmc)); 557 vmc.vmc_kernel = imsg->fd; 558 559 errno = 0; 560 if (vm_register(ps, &vmc, &vm, imsg->hdr.peerid, 0) == -1) 561 goto fail; 562 563 vm->vm_state |= VM_STATE_RUNNING; 564 vm->vm_peerid = (uint32_t)-1; 565 vm->vm_kernel = imsg->fd; 566 return (0); 567 568 fail: 569 if (imsg->fd != -1) { 570 close(imsg->fd); 571 imsg->fd = -1; 572 } 573 574 vm_remove(vm, __func__); 575 if (errno == 0) 576 errno = EINVAL; 577 578 return (-1); 579 } 580 581 int 582 config_getdisk(struct privsep *ps, struct imsg *imsg) 583 { 584 struct vmd_vm *vm; 585 unsigned int n, idx; 586 587 errno = 0; 588 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 589 errno = ENOENT; 590 return (-1); 591 } 592 593 IMSG_SIZE_CHECK(imsg, &n); 594 memcpy(&n, imsg->data, sizeof(n)); 595 596 if (n >= vm->vm_params.vmc_ndisks || imsg->fd == -1) { 597 log_warnx("invalid disk id"); 598 errno = EINVAL; 599 return (-1); 600 } 601 idx = vm->vm_params.vmc_diskbases[n]++; 602 if (idx >= VM_MAX_BASE_PER_DISK) { 603 log_warnx("too many bases for disk"); 604 errno = EINVAL; 605 return (-1); 606 } 607 vm->vm_disks[n][idx] = imsg->fd; 608 return (0); 609 } 610 611 int 612 config_getif(struct privsep *ps, struct imsg *imsg) 613 { 614 struct vmd_vm *vm; 615 unsigned int n; 616 617 errno = 0; 618 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 619 errno = ENOENT; 620 return (-1); 621 } 622 623 IMSG_SIZE_CHECK(imsg, &n); 624 memcpy(&n, imsg->data, sizeof(n)); 625 if (n >= vm->vm_params.vmc_nnics || 626 vm->vm_ifs[n].vif_fd != -1 || imsg->fd == -1) { 627 log_warnx("invalid interface id"); 628 goto fail; 629 } 630 vm->vm_ifs[n].vif_fd = imsg->fd; 631 return (0); 632 fail: 633 if (imsg->fd != -1) 634 close(imsg->fd); 635 errno = EINVAL; 636 return (-1); 637 } 638 639 int 640 config_getcdrom(struct privsep *ps, struct imsg *imsg) 641 { 642 struct vmd_vm *vm; 643 644 errno = 0; 645 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 646 errno = ENOENT; 647 return (-1); 648 } 649 650 if (imsg->fd == -1) { 651 log_warnx("invalid cdrom id"); 652 goto fail; 653 } 654 655 vm->vm_cdrom = imsg->fd; 656 return (0); 657 fail: 658 if (imsg->fd != -1) 659 close(imsg->fd); 660 errno = EINVAL; 661 return (-1); 662 } 663