1 /* $OpenBSD: config.c,v 1.54 2018/10/26 11:24:45 reyk Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/time.h> 22 #include <sys/uio.h> 23 #include <sys/stat.h> 24 #include <sys/socket.h> 25 26 #include <net/if.h> 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <termios.h> 31 #include <unistd.h> 32 #include <limits.h> 33 #include <string.h> 34 #include <fcntl.h> 35 #include <util.h> 36 #include <errno.h> 37 #include <imsg.h> 38 39 #include "proc.h" 40 #include "vmd.h" 41 42 /* Supported bridge types */ 43 const char *vmd_descsw[] = { "switch", "bridge", NULL }; 44 45 int 46 config_init(struct vmd *env) 47 { 48 struct privsep *ps = &env->vmd_ps; 49 unsigned int what; 50 51 /* Global configuration */ 52 ps->ps_what[PROC_PARENT] = CONFIG_ALL; 53 ps->ps_what[PROC_VMM] = CONFIG_VMS; 54 55 if (host(VMD_DHCP_PREFIX, &env->vmd_cfg.cfg_localprefix) == -1) 56 return (-1); 57 58 /* Other configuration */ 59 what = ps->ps_what[privsep_process]; 60 if (what & CONFIG_VMS) { 61 if ((env->vmd_vms = calloc(1, sizeof(*env->vmd_vms))) == NULL) 62 return (-1); 63 TAILQ_INIT(env->vmd_vms); 64 } 65 if (what & CONFIG_SWITCHES) { 66 if ((env->vmd_switches = calloc(1, 67 sizeof(*env->vmd_switches))) == NULL) 68 return (-1); 69 TAILQ_INIT(env->vmd_switches); 70 } 71 if (what & CONFIG_USERS) { 72 if ((env->vmd_users = calloc(1, 73 sizeof(*env->vmd_users))) == NULL) 74 return (-1); 75 TAILQ_INIT(env->vmd_users); 76 } 77 78 return (0); 79 } 80 81 void 82 config_purge(struct vmd *env, unsigned int reset) 83 { 84 struct privsep *ps = &env->vmd_ps; 85 struct vmd_vm *vm; 86 struct vmd_switch *vsw; 87 unsigned int what; 88 89 DPRINTF("%s: %s purging vms and switches", 90 __func__, ps->ps_title[privsep_process]); 91 92 /* Reset global configuration (prefix was verified before) */ 93 (void)host(VMD_DHCP_PREFIX, &env->vmd_cfg.cfg_localprefix); 94 95 /* Reset other configuration */ 96 what = ps->ps_what[privsep_process] & reset; 97 if (what & CONFIG_VMS && env->vmd_vms != NULL) { 98 while ((vm = TAILQ_FIRST(env->vmd_vms)) != NULL) { 99 vm_remove(vm, __func__); 100 } 101 env->vmd_nvm = 0; 102 } 103 if (what & CONFIG_SWITCHES && env->vmd_switches != NULL) { 104 while ((vsw = TAILQ_FIRST(env->vmd_switches)) != NULL) 105 switch_remove(vsw); 106 env->vmd_nswitches = 0; 107 } 108 } 109 110 int 111 config_setconfig(struct vmd *env) 112 { 113 struct privsep *ps = &env->vmd_ps; 114 unsigned int id; 115 116 DPRINTF("%s: setting config", __func__); 117 118 for (id = 0; id < PROC_MAX; id++) { 119 if (id == privsep_process) 120 continue; 121 proc_compose(ps, id, IMSG_VMDOP_CONFIG, &env->vmd_cfg, 122 sizeof(env->vmd_cfg)); 123 } 124 125 return (0); 126 } 127 128 int 129 config_getconfig(struct vmd *env, struct imsg *imsg) 130 { 131 struct privsep *ps = &env->vmd_ps; 132 133 log_debug("%s: %s retrieving config", 134 __func__, ps->ps_title[privsep_process]); 135 136 IMSG_SIZE_CHECK(imsg, &env->vmd_cfg); 137 memcpy(&env->vmd_cfg, imsg->data, sizeof(env->vmd_cfg)); 138 139 return (0); 140 } 141 142 int 143 config_setreset(struct vmd *env, unsigned int reset) 144 { 145 struct privsep *ps = &env->vmd_ps; 146 unsigned int id; 147 148 DPRINTF("%s: resetting state", __func__); 149 150 for (id = 0; id < PROC_MAX; id++) { 151 if ((reset & ps->ps_what[id]) == 0 || 152 id == privsep_process) 153 continue; 154 proc_compose(ps, id, IMSG_CTL_RESET, &reset, sizeof(reset)); 155 } 156 157 return (0); 158 } 159 160 int 161 config_getreset(struct vmd *env, struct imsg *imsg) 162 { 163 unsigned int mode; 164 165 IMSG_SIZE_CHECK(imsg, &mode); 166 memcpy(&mode, imsg->data, sizeof(mode)); 167 168 log_debug("%s: %s resetting state", 169 __func__, env->vmd_ps.ps_title[privsep_process]); 170 171 config_purge(env, mode); 172 173 return (0); 174 } 175 176 int 177 config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) 178 { 179 int diskfds[VMM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK]; 180 struct vmd_if *vif; 181 struct vmop_create_params *vmc = &vm->vm_params; 182 struct vm_create_params *vcp = &vmc->vmc_params; 183 unsigned int i, j; 184 int fd = -1, vmboot = 0; 185 int kernfd = -1; 186 int *tapfds = NULL; 187 int cdromfd = -1; 188 int saved_errno = 0; 189 int n = 0, aflags, oflags; 190 char ifname[IF_NAMESIZE], *s; 191 char path[PATH_MAX]; 192 char base[PATH_MAX]; 193 unsigned int unit; 194 struct timeval tv, rate, since_last; 195 196 errno = 0; 197 198 if (vm->vm_running) { 199 log_warnx("%s: vm is already running", __func__); 200 errno = EALREADY; 201 return (-1); 202 } 203 204 /* increase the user reference counter and check user limits */ 205 if (vm->vm_user != NULL && user_get(vm->vm_user->usr_id.uid) != NULL) { 206 user_inc(vcp, vm->vm_user, 1); 207 if (user_checklimit(vm->vm_user, vcp) == -1) { 208 errno = EPERM; 209 goto fail; 210 } 211 } 212 213 /* 214 * Rate-limit the VM so that it cannot restart in a loop: 215 * if the VM restarts after less than VM_START_RATE_SEC seconds, 216 * we increment the limit counter. After VM_START_RATE_LIMIT 217 * of suchs fast reboots the VM is stopped. 218 */ 219 getmonotime(&tv); 220 if (vm->vm_start_tv.tv_sec) { 221 timersub(&tv, &vm->vm_start_tv, &since_last); 222 223 rate.tv_sec = VM_START_RATE_SEC; 224 rate.tv_usec = 0; 225 if (timercmp(&since_last, &rate, <)) 226 vm->vm_start_limit++; 227 else { 228 /* Reset counter */ 229 vm->vm_start_limit = 0; 230 } 231 232 log_debug("%s: vm %u restarted after %lld.%ld seconds," 233 " limit %d/%d", __func__, vcp->vcp_id, since_last.tv_sec, 234 since_last.tv_usec, vm->vm_start_limit, 235 VM_START_RATE_LIMIT); 236 237 if (vm->vm_start_limit >= VM_START_RATE_LIMIT) { 238 log_warnx("%s: vm %u restarted too quickly", 239 __func__, vcp->vcp_id); 240 errno = EPERM; 241 goto fail; 242 } 243 } 244 vm->vm_start_tv = tv; 245 246 for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) 247 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 248 diskfds[i][j] = -1; 249 250 tapfds = reallocarray(NULL, vcp->vcp_nnics, sizeof(*tapfds)); 251 if (tapfds == NULL) { 252 log_warn("%s: can't allocate tap fds", __func__); 253 goto fail; 254 } 255 for (i = 0; i < vcp->vcp_nnics; i++) 256 tapfds[i] = -1; 257 258 vm->vm_peerid = peerid; 259 vm->vm_uid = uid; 260 261 if (!vm->vm_received) { 262 if (strlen(vcp->vcp_kernel)) { 263 /* 264 * Boot kernel from disk image if path matches the 265 * root disk. 266 */ 267 if (vcp->vcp_ndisks && 268 strcmp(vcp->vcp_kernel, vcp->vcp_disks[0]) == 0) 269 vmboot = 1; 270 /* Open external kernel for child */ 271 else if ((kernfd = 272 open(vcp->vcp_kernel, O_RDONLY)) == -1) { 273 log_warn("%s: can't open kernel or BIOS " 274 "boot image %s", __func__, vcp->vcp_kernel); 275 goto fail; 276 } 277 } 278 279 /* 280 * Try to open the default BIOS image if no kernel/BIOS has been 281 * specified. The BIOS is an external firmware file that is 282 * typically distributed separately due to an incompatible 283 * license. 284 */ 285 if (kernfd == -1 && !vmboot && 286 (kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) { 287 log_warn("%s: can't open %s", __func__, 288 VM_DEFAULT_BIOS); 289 errno = VMD_BIOS_MISSING; 290 goto fail; 291 } 292 293 if (!vmboot && vm_checkaccess(kernfd, 294 vmc->vmc_checkaccess & VMOP_CREATE_KERNEL, 295 uid, R_OK) == -1) { 296 log_warnx("vm \"%s\" no read access to kernel %s", 297 vcp->vcp_name, vcp->vcp_kernel); 298 errno = EPERM; 299 goto fail; 300 } 301 } 302 303 /* Open CDROM image for child */ 304 if (strlen(vcp->vcp_cdrom)) { 305 /* Stat cdrom to ensure it is a regular file */ 306 if ((cdromfd = 307 open(vcp->vcp_cdrom, O_RDONLY)) == -1) { 308 log_warn("%s: can't open cdrom %s", __func__, 309 vcp->vcp_cdrom); 310 errno = VMD_CDROM_MISSING; 311 goto fail; 312 } 313 314 if (vm_checkaccess(cdromfd, 315 vmc->vmc_checkaccess & VMOP_CREATE_CDROM, 316 uid, R_OK) == -1) { 317 log_warnx("vm \"%s\" no read access to cdrom %s", 318 vcp->vcp_name, vcp->vcp_cdrom); 319 errno = EPERM; 320 goto fail; 321 } 322 } 323 324 /* Open disk images for child */ 325 for (i = 0 ; i < vcp->vcp_ndisks; i++) { 326 if (strlcpy(path, vcp->vcp_disks[i], sizeof(path)) 327 >= sizeof(path)) 328 log_warnx("%s, disk path too long", __func__); 329 memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases)); 330 oflags = O_RDWR|O_EXLOCK|O_NONBLOCK; 331 aflags = R_OK|W_OK; 332 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 333 /* Stat disk[i] to ensure it is a regular file */ 334 if ((diskfds[i][j] = open(path, oflags)) == -1) { 335 log_warn("%s: can't open disk %s", __func__, 336 vcp->vcp_disks[i]); 337 errno = VMD_DISK_MISSING; 338 goto fail; 339 } 340 341 if (vm_checkaccess(diskfds[i][j], 342 vmc->vmc_checkaccess & VMOP_CREATE_DISK, 343 uid, aflags) == -1) { 344 log_warnx("vm \"%s\" unable to access " 345 "disk %s", vcp->vcp_name, path); 346 errno = EPERM; 347 goto fail; 348 } 349 350 /* 351 * Clear the write and exclusive flags for base images. 352 * All writes should go to the top image, allowing them 353 * to be shared. 354 */ 355 oflags = O_RDONLY|O_NONBLOCK; 356 aflags = R_OK; 357 n = virtio_get_base(diskfds[i][j], base, sizeof(base), 358 vmc->vmc_disktypes[i], path); 359 if (n == 0) 360 break; 361 if (n == -1) { 362 log_warnx("vm \"%s\" unable to read " 363 "base %s for disk %s", vcp->vcp_name, 364 base, vcp->vcp_disks[i]); 365 goto fail; 366 } 367 (void)strlcpy(path, base, sizeof(path)); 368 } 369 } 370 371 /* Open network interfaces */ 372 for (i = 0 ; i < vcp->vcp_nnics; i++) { 373 vif = &vm->vm_ifs[i]; 374 375 /* Check if the user has requested a specific tap(4) */ 376 s = vmc->vmc_ifnames[i]; 377 if (*s != '\0' && strcmp("tap", s) != 0) { 378 if (priv_getiftype(s, ifname, &unit) == -1 || 379 strcmp(ifname, "tap") != 0) { 380 log_warnx("%s: invalid tap name %s", 381 __func__, s); 382 errno = EINVAL; 383 goto fail; 384 } 385 } else 386 s = NULL; 387 388 /* 389 * Either open the requested tap(4) device or get 390 * the next available one. 391 */ 392 if (s != NULL) { 393 snprintf(path, PATH_MAX, "/dev/%s", s); 394 tapfds[i] = open(path, O_RDWR | O_NONBLOCK); 395 } else { 396 tapfds[i] = opentap(ifname); 397 s = ifname; 398 } 399 if (tapfds[i] == -1) { 400 log_warn("%s: can't open tap %s", __func__, s); 401 goto fail; 402 } 403 if ((vif->vif_name = strdup(s)) == NULL) { 404 log_warn("%s: can't save tap %s", __func__, s); 405 goto fail; 406 } 407 408 /* Check if the the interface is attached to a switch */ 409 s = vmc->vmc_ifswitch[i]; 410 if (*s != '\0') { 411 if ((vif->vif_switch = strdup(s)) == NULL) { 412 log_warn("%s: can't save switch %s", 413 __func__, s); 414 goto fail; 415 } 416 } 417 418 /* Check if the the interface is assigned to a group */ 419 s = vmc->vmc_ifgroup[i]; 420 if (*s != '\0') { 421 if ((vif->vif_group = strdup(s)) == NULL) { 422 log_warn("%s: can't save group %s", 423 __func__, s); 424 goto fail; 425 } 426 } 427 428 /* non-default rdomain (requires VMIFF_RDOMAIN below) */ 429 vif->vif_rdomain = vmc->vmc_ifrdomain[i]; 430 431 /* Set the interface status */ 432 vif->vif_flags = 433 vmc->vmc_ifflags[i] & (VMIFF_UP|VMIFF_OPTMASK); 434 } 435 436 /* Open TTY */ 437 if (vm->vm_ttyname == NULL) { 438 if (vm_opentty(vm) == -1) { 439 log_warn("%s: can't open tty %s", __func__, 440 vm->vm_ttyname == NULL ? "" : vm->vm_ttyname); 441 goto fail; 442 } 443 } 444 if ((fd = dup(vm->vm_tty)) == -1) { 445 log_warn("%s: can't re-open tty %s", __func__, vm->vm_ttyname); 446 goto fail; 447 } 448 449 /* Send VM information */ 450 if (vm->vm_received) 451 proc_compose_imsg(ps, PROC_VMM, -1, 452 IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd, vmc, 453 sizeof(struct vmop_create_params)); 454 else 455 proc_compose_imsg(ps, PROC_VMM, -1, 456 IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, kernfd, 457 vmc, sizeof(*vmc)); 458 459 if (strlen(vcp->vcp_cdrom)) 460 proc_compose_imsg(ps, PROC_VMM, -1, 461 IMSG_VMDOP_START_VM_CDROM, vm->vm_vmid, cdromfd, 462 NULL, 0); 463 464 for (i = 0; i < vcp->vcp_ndisks; i++) { 465 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 466 if (diskfds[i][j] == -1) 467 break; 468 proc_compose_imsg(ps, PROC_VMM, -1, 469 IMSG_VMDOP_START_VM_DISK, vm->vm_vmid, 470 diskfds[i][j], &i, sizeof(i)); 471 } 472 } 473 for (i = 0; i < vcp->vcp_nnics; i++) { 474 proc_compose_imsg(ps, PROC_VMM, -1, 475 IMSG_VMDOP_START_VM_IF, vm->vm_vmid, tapfds[i], 476 &i, sizeof(i)); 477 } 478 479 if (!vm->vm_received) 480 proc_compose_imsg(ps, PROC_VMM, -1, 481 IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0); 482 483 free(tapfds); 484 485 vm->vm_running = 1; 486 return (0); 487 488 fail: 489 saved_errno = errno; 490 log_warnx("%s: failed to start vm %s", __func__, vcp->vcp_name); 491 492 if (kernfd != -1) 493 close(kernfd); 494 if (cdromfd != -1) 495 close(cdromfd); 496 for (i = 0; i < vcp->vcp_ndisks; i++) 497 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 498 if (diskfds[i][j] != -1) 499 close(diskfds[i][j]); 500 if (tapfds != NULL) { 501 for (i = 0; i < vcp->vcp_nnics; i++) 502 close(tapfds[i]); 503 free(tapfds); 504 } 505 506 if (vm->vm_from_config) { 507 vm_stop(vm, 0, __func__); 508 } else { 509 vm_remove(vm, __func__); 510 } 511 errno = saved_errno; 512 if (errno == 0) 513 errno = EINVAL; 514 return (-1); 515 } 516 517 int 518 config_getvm(struct privsep *ps, struct imsg *imsg) 519 { 520 struct vmop_create_params vmc; 521 struct vmd_vm *vm; 522 523 IMSG_SIZE_CHECK(imsg, &vmc); 524 memcpy(&vmc, imsg->data, sizeof(vmc)); 525 526 errno = 0; 527 if (vm_register(ps, &vmc, &vm, imsg->hdr.peerid, 0) == -1) 528 goto fail; 529 530 /* If the fd is -1, the kernel will be searched on the disk */ 531 vm->vm_kernel = imsg->fd; 532 vm->vm_running = 1; 533 vm->vm_peerid = (uint32_t)-1; 534 535 return (0); 536 537 fail: 538 if (imsg->fd != -1) { 539 close(imsg->fd); 540 imsg->fd = -1; 541 } 542 543 vm_remove(vm, __func__); 544 if (errno == 0) 545 errno = EINVAL; 546 547 return (-1); 548 } 549 550 int 551 config_getdisk(struct privsep *ps, struct imsg *imsg) 552 { 553 struct vmd_vm *vm; 554 unsigned int n, idx; 555 556 errno = 0; 557 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 558 errno = ENOENT; 559 return (-1); 560 } 561 562 IMSG_SIZE_CHECK(imsg, &n); 563 memcpy(&n, imsg->data, sizeof(n)); 564 565 if (n >= vm->vm_params.vmc_params.vcp_ndisks || imsg->fd == -1) { 566 log_warnx("invalid disk id"); 567 errno = EINVAL; 568 return (-1); 569 } 570 idx = vm->vm_params.vmc_diskbases[n]++; 571 if (idx >= VM_MAX_BASE_PER_DISK) { 572 log_warnx("too many bases for disk"); 573 errno = EINVAL; 574 return (-1); 575 } 576 vm->vm_disks[n][idx] = imsg->fd; 577 return (0); 578 } 579 580 int 581 config_getif(struct privsep *ps, struct imsg *imsg) 582 { 583 struct vmd_vm *vm; 584 unsigned int n; 585 586 errno = 0; 587 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 588 errno = ENOENT; 589 return (-1); 590 } 591 592 IMSG_SIZE_CHECK(imsg, &n); 593 memcpy(&n, imsg->data, sizeof(n)); 594 if (n >= vm->vm_params.vmc_params.vcp_nnics || 595 vm->vm_ifs[n].vif_fd != -1 || imsg->fd == -1) { 596 log_warnx("invalid interface id"); 597 goto fail; 598 } 599 vm->vm_ifs[n].vif_fd = imsg->fd; 600 return (0); 601 fail: 602 if (imsg->fd != -1) 603 close(imsg->fd); 604 errno = EINVAL; 605 return (-1); 606 } 607 608 int 609 config_getcdrom(struct privsep *ps, struct imsg *imsg) 610 { 611 struct vmd_vm *vm; 612 613 errno = 0; 614 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 615 errno = ENOENT; 616 return (-1); 617 } 618 619 if (imsg->fd == -1) { 620 log_warnx("invalid cdrom id"); 621 goto fail; 622 } 623 624 vm->vm_cdrom = imsg->fd; 625 return (0); 626 fail: 627 if (imsg->fd != -1) 628 close(imsg->fd); 629 errno = EINVAL; 630 return (-1); 631 } 632