1 /* $OpenBSD: config.c,v 1.67 2023/01/28 14:40:53 dv Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/time.h> 22 #include <sys/uio.h> 23 #include <sys/stat.h> 24 #include <sys/socket.h> 25 26 #include <net/if.h> 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <termios.h> 31 #include <unistd.h> 32 #include <limits.h> 33 #include <string.h> 34 #include <fcntl.h> 35 #include <util.h> 36 #include <errno.h> 37 #include <imsg.h> 38 39 #include "proc.h" 40 #include "vmd.h" 41 42 /* Supported bridge types */ 43 const char *vmd_descsw[] = { "bridge", "veb", NULL }; 44 45 static int config_init_localprefix(struct vmd_config *); 46 47 static int 48 config_init_localprefix(struct vmd_config *cfg) 49 { 50 struct sockaddr_in6 *sin6; 51 52 if (host(VMD_DHCP_PREFIX, &cfg->cfg_localprefix) == -1) 53 return (-1); 54 55 /* IPv6 is disabled by default */ 56 cfg->cfg_flags &= ~VMD_CFG_INET6; 57 58 /* Generate random IPv6 prefix only once */ 59 if (cfg->cfg_flags & VMD_CFG_AUTOINET6) 60 return (0); 61 if (host(VMD_ULA_PREFIX, &cfg->cfg_localprefix6) == -1) 62 return (-1); 63 /* Randomize the 56 bits "Global ID" and "Subnet ID" */ 64 sin6 = ss2sin6(&cfg->cfg_localprefix6.ss); 65 arc4random_buf(&sin6->sin6_addr.s6_addr[1], 7); 66 cfg->cfg_flags |= VMD_CFG_AUTOINET6; 67 68 return (0); 69 } 70 71 int 72 config_init(struct vmd *env) 73 { 74 struct privsep *ps = &env->vmd_ps; 75 unsigned int what; 76 77 /* Global configuration */ 78 ps->ps_what[PROC_PARENT] = CONFIG_ALL; 79 ps->ps_what[PROC_VMM] = CONFIG_VMS; 80 81 /* Local prefix */ 82 if (config_init_localprefix(&env->vmd_cfg) == -1) 83 return (-1); 84 85 /* Other configuration */ 86 what = ps->ps_what[privsep_process]; 87 if (what & CONFIG_VMS) { 88 if ((env->vmd_vms = calloc(1, sizeof(*env->vmd_vms))) == NULL) 89 return (-1); 90 if ((env->vmd_known = calloc(1, sizeof(*env->vmd_known))) == NULL) 91 return (-1); 92 TAILQ_INIT(env->vmd_vms); 93 TAILQ_INIT(env->vmd_known); 94 } 95 if (what & CONFIG_SWITCHES) { 96 if ((env->vmd_switches = calloc(1, 97 sizeof(*env->vmd_switches))) == NULL) 98 return (-1); 99 TAILQ_INIT(env->vmd_switches); 100 } 101 102 return (0); 103 } 104 105 void 106 config_purge(struct vmd *env, unsigned int reset) 107 { 108 struct privsep *ps = &env->vmd_ps; 109 struct name2id *n2i; 110 struct vmd_vm *vm; 111 struct vmd_switch *vsw; 112 unsigned int what; 113 114 DPRINTF("%s: %s purging vms and switches", 115 __func__, ps->ps_title[privsep_process]); 116 117 /* Reset global configuration (prefix was verified before) */ 118 config_init_localprefix(&env->vmd_cfg); 119 120 /* Reset other configuration */ 121 what = ps->ps_what[privsep_process] & reset; 122 if (what & CONFIG_VMS && env->vmd_vms != NULL) { 123 while ((vm = TAILQ_FIRST(env->vmd_vms)) != NULL) { 124 vm_remove(vm, __func__); 125 } 126 while ((n2i = TAILQ_FIRST(env->vmd_known)) != NULL) { 127 TAILQ_REMOVE(env->vmd_known, n2i, entry); 128 free(n2i); 129 } 130 env->vmd_nvm = 0; 131 } 132 if (what & CONFIG_SWITCHES && env->vmd_switches != NULL) { 133 while ((vsw = TAILQ_FIRST(env->vmd_switches)) != NULL) 134 switch_remove(vsw); 135 env->vmd_nswitches = 0; 136 } 137 } 138 139 int 140 config_setconfig(struct vmd *env) 141 { 142 struct privsep *ps = &env->vmd_ps; 143 unsigned int id; 144 145 DPRINTF("%s: setting config", __func__); 146 147 for (id = 0; id < PROC_MAX; id++) { 148 if (id == privsep_process) 149 continue; 150 proc_compose(ps, id, IMSG_VMDOP_CONFIG, &env->vmd_cfg, 151 sizeof(env->vmd_cfg)); 152 } 153 154 return (0); 155 } 156 157 int 158 config_getconfig(struct vmd *env, struct imsg *imsg) 159 { 160 struct privsep *ps = &env->vmd_ps; 161 162 log_debug("%s: %s retrieving config", 163 __func__, ps->ps_title[privsep_process]); 164 165 IMSG_SIZE_CHECK(imsg, &env->vmd_cfg); 166 memcpy(&env->vmd_cfg, imsg->data, sizeof(env->vmd_cfg)); 167 168 return (0); 169 } 170 171 int 172 config_setreset(struct vmd *env, unsigned int reset) 173 { 174 struct privsep *ps = &env->vmd_ps; 175 unsigned int id; 176 177 DPRINTF("%s: resetting state", __func__); 178 179 for (id = 0; id < PROC_MAX; id++) { 180 if ((reset & ps->ps_what[id]) == 0 || 181 id == privsep_process) 182 continue; 183 proc_compose(ps, id, IMSG_CTL_RESET, &reset, sizeof(reset)); 184 } 185 186 return (0); 187 } 188 189 int 190 config_getreset(struct vmd *env, struct imsg *imsg) 191 { 192 unsigned int mode; 193 194 IMSG_SIZE_CHECK(imsg, &mode); 195 memcpy(&mode, imsg->data, sizeof(mode)); 196 197 log_debug("%s: %s resetting state", 198 __func__, env->vmd_ps.ps_title[privsep_process]); 199 200 config_purge(env, mode); 201 202 return (0); 203 } 204 205 /* 206 * config_setvm 207 * 208 * Configure a vm, opening any required file descriptors. 209 * 210 * Returns 0 on success, error code on failure. 211 */ 212 int 213 config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) 214 { 215 int diskfds[VM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK]; 216 struct vmd_if *vif; 217 struct vmop_create_params *vmc = &vm->vm_params; 218 struct vm_create_params *vcp = &vmc->vmc_params; 219 unsigned int i, j; 220 int fd = -1, cdromfd = -1, kernfd = -1; 221 int *tapfds = NULL; 222 int n = 0, aflags, oflags, ret = -1; 223 char ifname[IF_NAMESIZE], *s; 224 char path[PATH_MAX], base[PATH_MAX]; 225 unsigned int unit; 226 struct timeval tv, rate, since_last; 227 struct vmop_addr_req var; 228 size_t bytes = 0; 229 230 if (vm->vm_state & VM_STATE_RUNNING) { 231 log_warnx("%s: vm is already running", __func__); 232 return (EALREADY); 233 } 234 235 /* 236 * Rate-limit the VM so that it cannot restart in a loop: 237 * if the VM restarts after less than VM_START_RATE_SEC seconds, 238 * we increment the limit counter. After VM_START_RATE_LIMIT 239 * of suchs fast reboots the VM is stopped. 240 */ 241 getmonotime(&tv); 242 if (vm->vm_start_tv.tv_sec) { 243 timersub(&tv, &vm->vm_start_tv, &since_last); 244 245 rate.tv_sec = VM_START_RATE_SEC; 246 rate.tv_usec = 0; 247 if (timercmp(&since_last, &rate, <)) 248 vm->vm_start_limit++; 249 else { 250 /* Reset counter */ 251 vm->vm_start_limit = 0; 252 } 253 254 log_debug("%s: vm %u restarted after %lld.%ld seconds," 255 " limit %d/%d", __func__, vcp->vcp_id, since_last.tv_sec, 256 since_last.tv_usec, vm->vm_start_limit, 257 VM_START_RATE_LIMIT); 258 259 if (vm->vm_start_limit >= VM_START_RATE_LIMIT) { 260 log_warnx("%s: vm %u restarted too quickly", 261 __func__, vcp->vcp_id); 262 return (EPERM); 263 } 264 } 265 vm->vm_start_tv = tv; 266 267 for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) 268 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 269 diskfds[i][j] = -1; 270 271 tapfds = reallocarray(NULL, vcp->vcp_nnics, sizeof(*tapfds)); 272 if (tapfds == NULL) { 273 ret = errno; 274 log_warn("%s: can't allocate tap fds", __func__); 275 return (ret); 276 } 277 for (i = 0; i < vcp->vcp_nnics; i++) 278 tapfds[i] = -1; 279 280 vm->vm_peerid = peerid; 281 vm->vm_uid = uid; 282 283 /* 284 * From here onward, all failures need cleanup and use goto fail 285 */ 286 287 if (!(vm->vm_state & VM_STATE_RECEIVED)) { 288 if (strlen(vcp->vcp_kernel)) { 289 /* Open external kernel for child */ 290 if ((kernfd = open(vcp->vcp_kernel, O_RDONLY)) == -1) { 291 ret = errno; 292 log_warn("%s: can't open kernel or BIOS " 293 "boot image %s", __func__, vcp->vcp_kernel); 294 goto fail; 295 } 296 } 297 298 /* 299 * Try to open the default BIOS image if no kernel/BIOS has been 300 * specified. The BIOS is an external firmware file that is 301 * typically distributed separately due to an incompatible 302 * license. 303 */ 304 if (kernfd == -1 && 305 (kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) { 306 log_warn("can't open %s", VM_DEFAULT_BIOS); 307 ret = VMD_BIOS_MISSING; 308 goto fail; 309 } 310 311 if (vm_checkaccess(kernfd, 312 vmc->vmc_checkaccess & VMOP_CREATE_KERNEL, 313 uid, R_OK) == -1) { 314 log_warnx("vm \"%s\" no read access to kernel %s", 315 vcp->vcp_name, vcp->vcp_kernel); 316 ret = EPERM; 317 goto fail; 318 } 319 } 320 321 /* Open CDROM image for child */ 322 if (strlen(vcp->vcp_cdrom)) { 323 /* Stat cdrom to ensure it is a regular file */ 324 if ((cdromfd = 325 open(vcp->vcp_cdrom, O_RDONLY)) == -1) { 326 log_warn("can't open cdrom %s", vcp->vcp_cdrom); 327 ret = VMD_CDROM_MISSING; 328 goto fail; 329 } 330 331 if (vm_checkaccess(cdromfd, 332 vmc->vmc_checkaccess & VMOP_CREATE_CDROM, 333 uid, R_OK) == -1) { 334 log_warnx("vm \"%s\" no read access to cdrom %s", 335 vcp->vcp_name, vcp->vcp_cdrom); 336 ret = EPERM; 337 goto fail; 338 } 339 } 340 341 /* Open disk images for child */ 342 for (i = 0 ; i < vcp->vcp_ndisks; i++) { 343 if (strlcpy(path, vcp->vcp_disks[i], sizeof(path)) 344 >= sizeof(path)) 345 log_warnx("disk path %s too long", vcp->vcp_disks[i]); 346 memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases)); 347 oflags = O_RDWR|O_EXLOCK|O_NONBLOCK; 348 aflags = R_OK|W_OK; 349 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 350 /* Stat disk[i] to ensure it is a regular file */ 351 if ((diskfds[i][j] = open(path, oflags)) == -1) { 352 log_warn("can't open disk %s", 353 vcp->vcp_disks[i]); 354 ret = VMD_DISK_MISSING; 355 goto fail; 356 } 357 358 if (vm_checkaccess(diskfds[i][j], 359 vmc->vmc_checkaccess & VMOP_CREATE_DISK, 360 uid, aflags) == -1) { 361 log_warnx("vm \"%s\" unable to access " 362 "disk %s", vcp->vcp_name, path); 363 errno = EPERM; 364 goto fail; 365 } 366 367 /* 368 * Clear the write and exclusive flags for base images. 369 * All writes should go to the top image, allowing them 370 * to be shared. 371 */ 372 oflags = O_RDONLY|O_NONBLOCK; 373 aflags = R_OK; 374 n = virtio_get_base(diskfds[i][j], base, sizeof(base), 375 vmc->vmc_disktypes[i], path); 376 if (n == 0) 377 break; 378 if (n == -1) { 379 log_warnx("vm \"%s\" unable to read " 380 "base for disk %s", vcp->vcp_name, 381 vcp->vcp_disks[i]); 382 goto fail; 383 } 384 (void)strlcpy(path, base, sizeof(path)); 385 } 386 } 387 388 /* Open network interfaces */ 389 for (i = 0 ; i < vcp->vcp_nnics; i++) { 390 vif = &vm->vm_ifs[i]; 391 392 /* Check if the user has requested a specific tap(4) */ 393 s = vmc->vmc_ifnames[i]; 394 if (*s != '\0' && strcmp("tap", s) != 0) { 395 if (priv_getiftype(s, ifname, &unit) == -1 || 396 strcmp(ifname, "tap") != 0) { 397 log_warnx("%s: invalid tap name %s", 398 __func__, s); 399 ret = EINVAL; 400 goto fail; 401 } 402 } else 403 s = NULL; 404 405 /* 406 * Either open the requested tap(4) device or get 407 * the next available one. 408 */ 409 if (s != NULL) { 410 snprintf(path, PATH_MAX, "/dev/%s", s); 411 tapfds[i] = open(path, O_RDWR | O_NONBLOCK); 412 } else { 413 tapfds[i] = opentap(ifname); 414 s = ifname; 415 } 416 if (tapfds[i] == -1) { 417 log_warnx("%s: can't open tap %s", __func__, s); 418 goto fail; 419 } 420 if ((vif->vif_name = strdup(s)) == NULL) { 421 log_warn("%s: can't save tap %s", __func__, s); 422 goto fail; 423 } 424 425 /* Check if the the interface is attached to a switch */ 426 s = vmc->vmc_ifswitch[i]; 427 if (*s != '\0') { 428 if ((vif->vif_switch = strdup(s)) == NULL) { 429 log_warn("%s: can't save switch %s", 430 __func__, s); 431 goto fail; 432 } 433 } 434 435 /* Check if the the interface is assigned to a group */ 436 s = vmc->vmc_ifgroup[i]; 437 if (*s != '\0') { 438 if ((vif->vif_group = strdup(s)) == NULL) { 439 log_warn("%s: can't save group %s", 440 __func__, s); 441 goto fail; 442 } 443 } 444 445 /* non-default rdomain (requires VMIFF_RDOMAIN below) */ 446 vif->vif_rdomain = vmc->vmc_ifrdomain[i]; 447 448 /* Set the interface status */ 449 vif->vif_flags = 450 vmc->vmc_ifflags[i] & (VMIFF_UP|VMIFF_OPTMASK); 451 } 452 453 /* Open TTY */ 454 if (vm->vm_ttyname == NULL) { 455 if (vm_opentty(vm) == -1) { 456 log_warn("%s: can't open tty %s", __func__, 457 vm->vm_ttyname == NULL ? "" : vm->vm_ttyname); 458 goto fail; 459 } 460 } 461 if ((fd = dup(vm->vm_tty)) == -1) { 462 log_warn("%s: can't re-open tty %s", __func__, vm->vm_ttyname); 463 goto fail; 464 } 465 466 /* Send VM information */ 467 /* XXX check proc_compose_imsg return values */ 468 if (vm->vm_state & VM_STATE_RECEIVED) 469 proc_compose_imsg(ps, PROC_VMM, -1, 470 IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd, vmc, 471 sizeof(struct vmop_create_params)); 472 else 473 proc_compose_imsg(ps, PROC_VMM, -1, 474 IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, kernfd, 475 vmc, sizeof(*vmc)); 476 477 if (strlen(vcp->vcp_cdrom)) 478 proc_compose_imsg(ps, PROC_VMM, -1, 479 IMSG_VMDOP_START_VM_CDROM, vm->vm_vmid, cdromfd, 480 NULL, 0); 481 482 for (i = 0; i < vcp->vcp_ndisks; i++) { 483 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 484 if (diskfds[i][j] == -1) 485 break; 486 proc_compose_imsg(ps, PROC_VMM, -1, 487 IMSG_VMDOP_START_VM_DISK, vm->vm_vmid, 488 diskfds[i][j], &i, sizeof(i)); 489 } 490 } 491 for (i = 0; i < vcp->vcp_nnics; i++) { 492 proc_compose_imsg(ps, PROC_VMM, -1, 493 IMSG_VMDOP_START_VM_IF, vm->vm_vmid, tapfds[i], 494 &i, sizeof(i)); 495 496 memset(&var, 0, sizeof(var)); 497 var.var_vmid = vm->vm_vmid; 498 var.var_nic_idx = i; 499 proc_compose_imsg(ps, PROC_PRIV, -1, IMSG_VMDOP_PRIV_GET_ADDR, 500 vm->vm_vmid, dup(tapfds[i]), &var, sizeof(var)); 501 } 502 503 if (!(vm->vm_state & VM_STATE_RECEIVED)) 504 proc_compose_imsg(ps, PROC_VMM, -1, 505 IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0); 506 507 free(tapfds); 508 509 /* Collapse any memranges after the vm was sent to PROC_VMM */ 510 if (vcp->vcp_nmemranges > 0) { 511 for (i = 0; i < vcp->vcp_nmemranges; i++) 512 bytes += vcp->vcp_memranges[i].vmr_size; 513 memset(&vcp->vcp_memranges, 0, sizeof(vcp->vcp_memranges)); 514 vcp->vcp_nmemranges = 0; 515 vcp->vcp_memranges[0].vmr_size = bytes; 516 } 517 vm->vm_state |= VM_STATE_RUNNING; 518 return (0); 519 520 fail: 521 log_warnx("failed to start vm %s", vcp->vcp_name); 522 523 if (kernfd != -1) 524 close(kernfd); 525 if (cdromfd != -1) 526 close(cdromfd); 527 for (i = 0; i < vcp->vcp_ndisks; i++) 528 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 529 if (diskfds[i][j] != -1) 530 close(diskfds[i][j]); 531 if (tapfds != NULL) { 532 for (i = 0; i < vcp->vcp_nnics; i++) 533 close(tapfds[i]); 534 free(tapfds); 535 } 536 537 if (vm->vm_from_config) { 538 vm_stop(vm, 0, __func__); 539 } else { 540 vm_remove(vm, __func__); 541 } 542 543 return (ret); 544 } 545 546 int 547 config_getvm(struct privsep *ps, struct imsg *imsg) 548 { 549 struct vmop_create_params vmc; 550 struct vmd_vm *vm; 551 552 IMSG_SIZE_CHECK(imsg, &vmc); 553 memcpy(&vmc, imsg->data, sizeof(vmc)); 554 555 errno = 0; 556 if (vm_register(ps, &vmc, &vm, imsg->hdr.peerid, 0) == -1) 557 goto fail; 558 559 /* If the fd is -1, the kernel will be searched on the disk */ 560 vm->vm_kernel = imsg->fd; 561 vm->vm_state |= VM_STATE_RUNNING; 562 vm->vm_peerid = (uint32_t)-1; 563 564 return (0); 565 566 fail: 567 if (imsg->fd != -1) { 568 close(imsg->fd); 569 imsg->fd = -1; 570 } 571 572 vm_remove(vm, __func__); 573 if (errno == 0) 574 errno = EINVAL; 575 576 return (-1); 577 } 578 579 int 580 config_getdisk(struct privsep *ps, struct imsg *imsg) 581 { 582 struct vmd_vm *vm; 583 unsigned int n, idx; 584 585 errno = 0; 586 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 587 errno = ENOENT; 588 return (-1); 589 } 590 591 IMSG_SIZE_CHECK(imsg, &n); 592 memcpy(&n, imsg->data, sizeof(n)); 593 594 if (n >= vm->vm_params.vmc_params.vcp_ndisks || imsg->fd == -1) { 595 log_warnx("invalid disk id"); 596 errno = EINVAL; 597 return (-1); 598 } 599 idx = vm->vm_params.vmc_diskbases[n]++; 600 if (idx >= VM_MAX_BASE_PER_DISK) { 601 log_warnx("too many bases for disk"); 602 errno = EINVAL; 603 return (-1); 604 } 605 vm->vm_disks[n][idx] = imsg->fd; 606 return (0); 607 } 608 609 int 610 config_getif(struct privsep *ps, struct imsg *imsg) 611 { 612 struct vmd_vm *vm; 613 unsigned int n; 614 615 errno = 0; 616 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 617 errno = ENOENT; 618 return (-1); 619 } 620 621 IMSG_SIZE_CHECK(imsg, &n); 622 memcpy(&n, imsg->data, sizeof(n)); 623 if (n >= vm->vm_params.vmc_params.vcp_nnics || 624 vm->vm_ifs[n].vif_fd != -1 || imsg->fd == -1) { 625 log_warnx("invalid interface id"); 626 goto fail; 627 } 628 vm->vm_ifs[n].vif_fd = imsg->fd; 629 return (0); 630 fail: 631 if (imsg->fd != -1) 632 close(imsg->fd); 633 errno = EINVAL; 634 return (-1); 635 } 636 637 int 638 config_getcdrom(struct privsep *ps, struct imsg *imsg) 639 { 640 struct vmd_vm *vm; 641 642 errno = 0; 643 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 644 errno = ENOENT; 645 return (-1); 646 } 647 648 if (imsg->fd == -1) { 649 log_warnx("invalid cdrom id"); 650 goto fail; 651 } 652 653 vm->vm_cdrom = imsg->fd; 654 return (0); 655 fail: 656 if (imsg->fd != -1) 657 close(imsg->fd); 658 errno = EINVAL; 659 return (-1); 660 } 661