1 /* $OpenBSD: config.c,v 1.61 2021/03/29 23:37:01 dv Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/time.h> 22 #include <sys/uio.h> 23 #include <sys/stat.h> 24 #include <sys/socket.h> 25 26 #include <net/if.h> 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <termios.h> 31 #include <unistd.h> 32 #include <limits.h> 33 #include <string.h> 34 #include <fcntl.h> 35 #include <util.h> 36 #include <errno.h> 37 #include <imsg.h> 38 39 #include "proc.h" 40 #include "vmd.h" 41 42 /* Supported bridge types */ 43 const char *vmd_descsw[] = { "switch", "bridge", "veb", NULL }; 44 45 static int config_init_localprefix(struct vmd_config *); 46 47 static int 48 config_init_localprefix(struct vmd_config *cfg) 49 { 50 struct sockaddr_in6 *sin6; 51 52 if (host(VMD_DHCP_PREFIX, &cfg->cfg_localprefix) == -1) 53 return (-1); 54 55 /* IPv6 is disabled by default */ 56 cfg->cfg_flags &= ~VMD_CFG_INET6; 57 58 /* Generate random IPv6 prefix only once */ 59 if (cfg->cfg_flags & VMD_CFG_AUTOINET6) 60 return (0); 61 if (host(VMD_ULA_PREFIX, &cfg->cfg_localprefix6) == -1) 62 return (-1); 63 /* Randomize the 56 bits "Global ID" and "Subnet ID" */ 64 sin6 = ss2sin6(&cfg->cfg_localprefix6.ss); 65 arc4random_buf(&sin6->sin6_addr.s6_addr[1], 7); 66 cfg->cfg_flags |= VMD_CFG_AUTOINET6; 67 68 return (0); 69 } 70 71 int 72 config_init(struct vmd *env) 73 { 74 struct privsep *ps = &env->vmd_ps; 75 unsigned int what; 76 77 /* Global configuration */ 78 ps->ps_what[PROC_PARENT] = CONFIG_ALL; 79 ps->ps_what[PROC_VMM] = CONFIG_VMS; 80 81 /* Local prefix */ 82 if (config_init_localprefix(&env->vmd_cfg) == -1) 83 return (-1); 84 85 /* Other configuration */ 86 what = ps->ps_what[privsep_process]; 87 if (what & CONFIG_VMS) { 88 if ((env->vmd_vms = calloc(1, sizeof(*env->vmd_vms))) == NULL) 89 return (-1); 90 if ((env->vmd_known = calloc(1, sizeof(*env->vmd_known))) == NULL) 91 return (-1); 92 TAILQ_INIT(env->vmd_vms); 93 TAILQ_INIT(env->vmd_known); 94 } 95 if (what & CONFIG_SWITCHES) { 96 if ((env->vmd_switches = calloc(1, 97 sizeof(*env->vmd_switches))) == NULL) 98 return (-1); 99 TAILQ_INIT(env->vmd_switches); 100 } 101 if (what & CONFIG_USERS) { 102 if ((env->vmd_users = calloc(1, 103 sizeof(*env->vmd_users))) == NULL) 104 return (-1); 105 TAILQ_INIT(env->vmd_users); 106 } 107 108 return (0); 109 } 110 111 void 112 config_purge(struct vmd *env, unsigned int reset) 113 { 114 struct privsep *ps = &env->vmd_ps; 115 struct name2id *n2i; 116 struct vmd_vm *vm; 117 struct vmd_switch *vsw; 118 unsigned int what; 119 120 DPRINTF("%s: %s purging vms and switches", 121 __func__, ps->ps_title[privsep_process]); 122 123 /* Reset global configuration (prefix was verified before) */ 124 config_init_localprefix(&env->vmd_cfg); 125 126 /* Reset other configuration */ 127 what = ps->ps_what[privsep_process] & reset; 128 if (what & CONFIG_VMS && env->vmd_vms != NULL) { 129 while ((vm = TAILQ_FIRST(env->vmd_vms)) != NULL) { 130 vm_remove(vm, __func__); 131 } 132 while ((n2i = TAILQ_FIRST(env->vmd_known)) != NULL) { 133 TAILQ_REMOVE(env->vmd_known, n2i, entry); 134 free(n2i); 135 } 136 env->vmd_nvm = 0; 137 } 138 if (what & CONFIG_SWITCHES && env->vmd_switches != NULL) { 139 while ((vsw = TAILQ_FIRST(env->vmd_switches)) != NULL) 140 switch_remove(vsw); 141 env->vmd_nswitches = 0; 142 } 143 } 144 145 int 146 config_setconfig(struct vmd *env) 147 { 148 struct privsep *ps = &env->vmd_ps; 149 unsigned int id; 150 151 DPRINTF("%s: setting config", __func__); 152 153 for (id = 0; id < PROC_MAX; id++) { 154 if (id == privsep_process) 155 continue; 156 proc_compose(ps, id, IMSG_VMDOP_CONFIG, &env->vmd_cfg, 157 sizeof(env->vmd_cfg)); 158 } 159 160 return (0); 161 } 162 163 int 164 config_getconfig(struct vmd *env, struct imsg *imsg) 165 { 166 struct privsep *ps = &env->vmd_ps; 167 168 log_debug("%s: %s retrieving config", 169 __func__, ps->ps_title[privsep_process]); 170 171 IMSG_SIZE_CHECK(imsg, &env->vmd_cfg); 172 memcpy(&env->vmd_cfg, imsg->data, sizeof(env->vmd_cfg)); 173 174 return (0); 175 } 176 177 int 178 config_setreset(struct vmd *env, unsigned int reset) 179 { 180 struct privsep *ps = &env->vmd_ps; 181 unsigned int id; 182 183 DPRINTF("%s: resetting state", __func__); 184 185 for (id = 0; id < PROC_MAX; id++) { 186 if ((reset & ps->ps_what[id]) == 0 || 187 id == privsep_process) 188 continue; 189 proc_compose(ps, id, IMSG_CTL_RESET, &reset, sizeof(reset)); 190 } 191 192 return (0); 193 } 194 195 int 196 config_getreset(struct vmd *env, struct imsg *imsg) 197 { 198 unsigned int mode; 199 200 IMSG_SIZE_CHECK(imsg, &mode); 201 memcpy(&mode, imsg->data, sizeof(mode)); 202 203 log_debug("%s: %s resetting state", 204 __func__, env->vmd_ps.ps_title[privsep_process]); 205 206 config_purge(env, mode); 207 208 return (0); 209 } 210 211 int 212 config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) 213 { 214 int diskfds[VMM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK]; 215 struct vmd_if *vif; 216 struct vmop_create_params *vmc = &vm->vm_params; 217 struct vm_create_params *vcp = &vmc->vmc_params; 218 unsigned int i, j; 219 int fd = -1; 220 int kernfd = -1; 221 int *tapfds = NULL; 222 int cdromfd = -1; 223 int saved_errno = 0; 224 int n = 0, aflags, oflags; 225 char ifname[IF_NAMESIZE], *s; 226 char path[PATH_MAX]; 227 char base[PATH_MAX]; 228 unsigned int unit; 229 struct timeval tv, rate, since_last; 230 struct vmop_addr_req var; 231 232 errno = 0; 233 234 if (vm->vm_state & VM_STATE_RUNNING) { 235 log_warnx("%s: vm is already running", __func__); 236 errno = EALREADY; 237 return (-1); 238 } 239 240 /* increase the user reference counter and check user limits */ 241 if (vm->vm_user != NULL && user_get(vm->vm_user->usr_id.uid) != NULL) { 242 user_inc(vcp, vm->vm_user, 1); 243 if (user_checklimit(vm->vm_user, vcp) == -1) { 244 errno = EPERM; 245 goto fail; 246 } 247 } 248 249 /* 250 * Rate-limit the VM so that it cannot restart in a loop: 251 * if the VM restarts after less than VM_START_RATE_SEC seconds, 252 * we increment the limit counter. After VM_START_RATE_LIMIT 253 * of suchs fast reboots the VM is stopped. 254 */ 255 getmonotime(&tv); 256 if (vm->vm_start_tv.tv_sec) { 257 timersub(&tv, &vm->vm_start_tv, &since_last); 258 259 rate.tv_sec = VM_START_RATE_SEC; 260 rate.tv_usec = 0; 261 if (timercmp(&since_last, &rate, <)) 262 vm->vm_start_limit++; 263 else { 264 /* Reset counter */ 265 vm->vm_start_limit = 0; 266 } 267 268 log_debug("%s: vm %u restarted after %lld.%ld seconds," 269 " limit %d/%d", __func__, vcp->vcp_id, since_last.tv_sec, 270 since_last.tv_usec, vm->vm_start_limit, 271 VM_START_RATE_LIMIT); 272 273 if (vm->vm_start_limit >= VM_START_RATE_LIMIT) { 274 log_warnx("%s: vm %u restarted too quickly", 275 __func__, vcp->vcp_id); 276 errno = EPERM; 277 goto fail; 278 } 279 } 280 vm->vm_start_tv = tv; 281 282 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) 283 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 284 diskfds[i][j] = -1; 285 286 tapfds = reallocarray(NULL, vcp->vcp_nnics, sizeof(*tapfds)); 287 if (tapfds == NULL) { 288 log_warn("%s: can't allocate tap fds", __func__); 289 goto fail; 290 } 291 for (i = 0; i < vcp->vcp_nnics; i++) 292 tapfds[i] = -1; 293 294 vm->vm_peerid = peerid; 295 vm->vm_uid = uid; 296 297 if (!(vm->vm_state & VM_STATE_RECEIVED)) { 298 if (strlen(vcp->vcp_kernel)) { 299 /* Open external kernel for child */ 300 if ((kernfd = open(vcp->vcp_kernel, O_RDONLY)) == -1) { 301 log_warn("%s: can't open kernel or BIOS " 302 "boot image %s", __func__, vcp->vcp_kernel); 303 goto fail; 304 } 305 } 306 307 /* 308 * Try to open the default BIOS image if no kernel/BIOS has been 309 * specified. The BIOS is an external firmware file that is 310 * typically distributed separately due to an incompatible 311 * license. 312 */ 313 if (kernfd == -1 && 314 (kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) { 315 log_warn("can't open %s", VM_DEFAULT_BIOS); 316 errno = VMD_BIOS_MISSING; 317 goto fail; 318 } 319 320 if (vm_checkaccess(kernfd, 321 vmc->vmc_checkaccess & VMOP_CREATE_KERNEL, 322 uid, R_OK) == -1) { 323 log_warnx("vm \"%s\" no read access to kernel %s", 324 vcp->vcp_name, vcp->vcp_kernel); 325 errno = EPERM; 326 goto fail; 327 } 328 } 329 330 /* Open CDROM image for child */ 331 if (strlen(vcp->vcp_cdrom)) { 332 /* Stat cdrom to ensure it is a regular file */ 333 if ((cdromfd = 334 open(vcp->vcp_cdrom, O_RDONLY)) == -1) { 335 log_warn("can't open cdrom %s", vcp->vcp_cdrom); 336 errno = VMD_CDROM_MISSING; 337 goto fail; 338 } 339 340 if (vm_checkaccess(cdromfd, 341 vmc->vmc_checkaccess & VMOP_CREATE_CDROM, 342 uid, R_OK) == -1) { 343 log_warnx("vm \"%s\" no read access to cdrom %s", 344 vcp->vcp_name, vcp->vcp_cdrom); 345 errno = EPERM; 346 goto fail; 347 } 348 } 349 350 /* Open disk images for child */ 351 for (i = 0 ; i < vcp->vcp_ndisks; i++) { 352 if (strlcpy(path, vcp->vcp_disks[i], sizeof(path)) 353 >= sizeof(path)) 354 log_warnx("disk path %s too long", vcp->vcp_disks[i]); 355 memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases)); 356 oflags = O_RDWR|O_EXLOCK|O_NONBLOCK; 357 aflags = R_OK|W_OK; 358 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 359 /* Stat disk[i] to ensure it is a regular file */ 360 if ((diskfds[i][j] = open(path, oflags)) == -1) { 361 log_warn("can't open disk %s", 362 vcp->vcp_disks[i]); 363 errno = VMD_DISK_MISSING; 364 goto fail; 365 } 366 367 if (vm_checkaccess(diskfds[i][j], 368 vmc->vmc_checkaccess & VMOP_CREATE_DISK, 369 uid, aflags) == -1) { 370 log_warnx("vm \"%s\" unable to access " 371 "disk %s", vcp->vcp_name, path); 372 errno = EPERM; 373 goto fail; 374 } 375 376 /* 377 * Clear the write and exclusive flags for base images. 378 * All writes should go to the top image, allowing them 379 * to be shared. 380 */ 381 oflags = O_RDONLY|O_NONBLOCK; 382 aflags = R_OK; 383 n = virtio_get_base(diskfds[i][j], base, sizeof(base), 384 vmc->vmc_disktypes[i], path); 385 if (n == 0) 386 break; 387 if (n == -1) { 388 log_warnx("vm \"%s\" unable to read " 389 "base %s for disk %s", vcp->vcp_name, 390 base, vcp->vcp_disks[i]); 391 goto fail; 392 } 393 (void)strlcpy(path, base, sizeof(path)); 394 } 395 } 396 397 /* Open network interfaces */ 398 for (i = 0 ; i < vcp->vcp_nnics; i++) { 399 vif = &vm->vm_ifs[i]; 400 401 /* Check if the user has requested a specific tap(4) */ 402 s = vmc->vmc_ifnames[i]; 403 if (*s != '\0' && strcmp("tap", s) != 0) { 404 if (priv_getiftype(s, ifname, &unit) == -1 || 405 strcmp(ifname, "tap") != 0) { 406 log_warnx("%s: invalid tap name %s", 407 __func__, s); 408 errno = EINVAL; 409 goto fail; 410 } 411 } else 412 s = NULL; 413 414 /* 415 * Either open the requested tap(4) device or get 416 * the next available one. 417 */ 418 if (s != NULL) { 419 snprintf(path, PATH_MAX, "/dev/%s", s); 420 tapfds[i] = open(path, O_RDWR | O_NONBLOCK); 421 } else { 422 tapfds[i] = opentap(ifname); 423 s = ifname; 424 } 425 if (tapfds[i] == -1) { 426 log_warn("%s: can't open tap %s", __func__, s); 427 goto fail; 428 } 429 if ((vif->vif_name = strdup(s)) == NULL) { 430 log_warn("%s: can't save tap %s", __func__, s); 431 goto fail; 432 } 433 434 /* Check if the the interface is attached to a switch */ 435 s = vmc->vmc_ifswitch[i]; 436 if (*s != '\0') { 437 if ((vif->vif_switch = strdup(s)) == NULL) { 438 log_warn("%s: can't save switch %s", 439 __func__, s); 440 goto fail; 441 } 442 } 443 444 /* Check if the the interface is assigned to a group */ 445 s = vmc->vmc_ifgroup[i]; 446 if (*s != '\0') { 447 if ((vif->vif_group = strdup(s)) == NULL) { 448 log_warn("%s: can't save group %s", 449 __func__, s); 450 goto fail; 451 } 452 } 453 454 /* non-default rdomain (requires VMIFF_RDOMAIN below) */ 455 vif->vif_rdomain = vmc->vmc_ifrdomain[i]; 456 457 /* Set the interface status */ 458 vif->vif_flags = 459 vmc->vmc_ifflags[i] & (VMIFF_UP|VMIFF_OPTMASK); 460 } 461 462 /* Open TTY */ 463 if (vm->vm_ttyname == NULL) { 464 if (vm_opentty(vm) == -1) { 465 log_warn("%s: can't open tty %s", __func__, 466 vm->vm_ttyname == NULL ? "" : vm->vm_ttyname); 467 goto fail; 468 } 469 } 470 if ((fd = dup(vm->vm_tty)) == -1) { 471 log_warn("%s: can't re-open tty %s", __func__, vm->vm_ttyname); 472 goto fail; 473 } 474 475 /* Send VM information */ 476 if (vm->vm_state & VM_STATE_RECEIVED) 477 proc_compose_imsg(ps, PROC_VMM, -1, 478 IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd, vmc, 479 sizeof(struct vmop_create_params)); 480 else 481 proc_compose_imsg(ps, PROC_VMM, -1, 482 IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, kernfd, 483 vmc, sizeof(*vmc)); 484 485 if (strlen(vcp->vcp_cdrom)) 486 proc_compose_imsg(ps, PROC_VMM, -1, 487 IMSG_VMDOP_START_VM_CDROM, vm->vm_vmid, cdromfd, 488 NULL, 0); 489 490 for (i = 0; i < vcp->vcp_ndisks; i++) { 491 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 492 if (diskfds[i][j] == -1) 493 break; 494 proc_compose_imsg(ps, PROC_VMM, -1, 495 IMSG_VMDOP_START_VM_DISK, vm->vm_vmid, 496 diskfds[i][j], &i, sizeof(i)); 497 } 498 } 499 for (i = 0; i < vcp->vcp_nnics; i++) { 500 proc_compose_imsg(ps, PROC_VMM, -1, 501 IMSG_VMDOP_START_VM_IF, vm->vm_vmid, tapfds[i], 502 &i, sizeof(i)); 503 504 memset(&var, 0, sizeof(var)); 505 var.var_vmid = vm->vm_vmid; 506 var.var_nic_idx = i; 507 proc_compose_imsg(ps, PROC_PRIV, -1, IMSG_VMDOP_PRIV_GET_ADDR, 508 vm->vm_vmid, dup(tapfds[i]), &var, sizeof(var)); 509 } 510 511 if (!(vm->vm_state & VM_STATE_RECEIVED)) 512 proc_compose_imsg(ps, PROC_VMM, -1, 513 IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0); 514 515 free(tapfds); 516 517 vm->vm_state |= VM_STATE_RUNNING; 518 return (0); 519 520 fail: 521 saved_errno = errno; 522 log_warnx("failed to start vm %s", vcp->vcp_name); 523 524 if (kernfd != -1) 525 close(kernfd); 526 if (cdromfd != -1) 527 close(cdromfd); 528 for (i = 0; i < vcp->vcp_ndisks; i++) 529 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 530 if (diskfds[i][j] != -1) 531 close(diskfds[i][j]); 532 if (tapfds != NULL) { 533 for (i = 0; i < vcp->vcp_nnics; i++) 534 close(tapfds[i]); 535 free(tapfds); 536 } 537 538 if (vm->vm_from_config) { 539 vm_stop(vm, 0, __func__); 540 } else { 541 vm_remove(vm, __func__); 542 } 543 errno = saved_errno; 544 if (errno == 0) 545 errno = EINVAL; 546 return (-1); 547 } 548 549 int 550 config_getvm(struct privsep *ps, struct imsg *imsg) 551 { 552 struct vmop_create_params vmc; 553 struct vmd_vm *vm; 554 555 IMSG_SIZE_CHECK(imsg, &vmc); 556 memcpy(&vmc, imsg->data, sizeof(vmc)); 557 558 errno = 0; 559 if (vm_register(ps, &vmc, &vm, imsg->hdr.peerid, 0) == -1) 560 goto fail; 561 562 /* If the fd is -1, the kernel will be searched on the disk */ 563 vm->vm_kernel = imsg->fd; 564 vm->vm_state |= VM_STATE_RUNNING; 565 vm->vm_peerid = (uint32_t)-1; 566 567 return (0); 568 569 fail: 570 if (imsg->fd != -1) { 571 close(imsg->fd); 572 imsg->fd = -1; 573 } 574 575 vm_remove(vm, __func__); 576 if (errno == 0) 577 errno = EINVAL; 578 579 return (-1); 580 } 581 582 int 583 config_getdisk(struct privsep *ps, struct imsg *imsg) 584 { 585 struct vmd_vm *vm; 586 unsigned int n, idx; 587 588 errno = 0; 589 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 590 errno = ENOENT; 591 return (-1); 592 } 593 594 IMSG_SIZE_CHECK(imsg, &n); 595 memcpy(&n, imsg->data, sizeof(n)); 596 597 if (n >= vm->vm_params.vmc_params.vcp_ndisks || imsg->fd == -1) { 598 log_warnx("invalid disk id"); 599 errno = EINVAL; 600 return (-1); 601 } 602 idx = vm->vm_params.vmc_diskbases[n]++; 603 if (idx >= VM_MAX_BASE_PER_DISK) { 604 log_warnx("too many bases for disk"); 605 errno = EINVAL; 606 return (-1); 607 } 608 vm->vm_disks[n][idx] = imsg->fd; 609 return (0); 610 } 611 612 int 613 config_getif(struct privsep *ps, struct imsg *imsg) 614 { 615 struct vmd_vm *vm; 616 unsigned int n; 617 618 errno = 0; 619 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 620 errno = ENOENT; 621 return (-1); 622 } 623 624 IMSG_SIZE_CHECK(imsg, &n); 625 memcpy(&n, imsg->data, sizeof(n)); 626 if (n >= vm->vm_params.vmc_params.vcp_nnics || 627 vm->vm_ifs[n].vif_fd != -1 || imsg->fd == -1) { 628 log_warnx("invalid interface id"); 629 goto fail; 630 } 631 vm->vm_ifs[n].vif_fd = imsg->fd; 632 return (0); 633 fail: 634 if (imsg->fd != -1) 635 close(imsg->fd); 636 errno = EINVAL; 637 return (-1); 638 } 639 640 int 641 config_getcdrom(struct privsep *ps, struct imsg *imsg) 642 { 643 struct vmd_vm *vm; 644 645 errno = 0; 646 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 647 errno = ENOENT; 648 return (-1); 649 } 650 651 if (imsg->fd == -1) { 652 log_warnx("invalid cdrom id"); 653 goto fail; 654 } 655 656 vm->vm_cdrom = imsg->fd; 657 return (0); 658 fail: 659 if (imsg->fd != -1) 660 close(imsg->fd); 661 errno = EINVAL; 662 return (-1); 663 } 664