1 /* $OpenBSD: virtio.c,v 1.97 2021/08/29 18:01:32 dv Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* PAGE_SIZE */ 20 #include <sys/socket.h> 21 22 #include <machine/vmmvar.h> 23 #include <dev/pci/pcireg.h> 24 #include <dev/pci/pcidevs.h> 25 #include <dev/pv/virtioreg.h> 26 #include <dev/pci/virtio_pcireg.h> 27 #include <dev/pv/vioblkreg.h> 28 #include <dev/pv/vioscsireg.h> 29 30 #include <net/if.h> 31 #include <netinet/in.h> 32 #include <netinet/if_ether.h> 33 #include <netinet/ip.h> 34 35 #include <errno.h> 36 #include <event.h> 37 #include <poll.h> 38 #include <stddef.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 43 #include "atomicio.h" 44 #include "pci.h" 45 #include "vioscsi.h" 46 #include "virtio.h" 47 #include "vmd.h" 48 #include "vmm.h" 49 50 extern char *__progname; 51 struct viornd_dev viornd; 52 struct vioblk_dev *vioblk; 53 struct vionet_dev *vionet; 54 struct vioscsi_dev *vioscsi; 55 struct vmmci_dev vmmci; 56 57 int nr_vionet; 58 int nr_vioblk; 59 60 #define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ 61 62 #define VIRTIO_NET_F_MAC (1<<5) 63 64 #define VMMCI_F_TIMESYNC (1<<0) 65 #define VMMCI_F_ACK (1<<1) 66 #define VMMCI_F_SYNCRTC (1<<2) 67 68 #define RXQ 0 69 #define TXQ 1 70 71 const char * 72 vioblk_cmd_name(uint32_t type) 73 { 74 switch (type) { 75 case VIRTIO_BLK_T_IN: return "read"; 76 case VIRTIO_BLK_T_OUT: return "write"; 77 case VIRTIO_BLK_T_SCSI_CMD: return "scsi read"; 78 case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write"; 79 case VIRTIO_BLK_T_FLUSH: return "flush"; 80 case VIRTIO_BLK_T_FLUSH_OUT: return "flush out"; 81 case VIRTIO_BLK_T_GET_ID: return "get id"; 82 default: return "unknown"; 83 } 84 } 85 86 static const char * 87 virtio_reg_name(uint8_t reg) 88 { 89 switch (reg) { 90 case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; 91 case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; 92 case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address"; 93 case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; 94 case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; 95 case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; 96 case VIRTIO_CONFIG_DEVICE_STATUS: return "device status"; 97 case VIRTIO_CONFIG_ISR_STATUS: return "isr status"; 98 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: return "device config 0"; 99 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: return "device config 1"; 100 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2"; 101 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3"; 102 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4"; 103 default: return "unknown"; 104 } 105 } 106 107 uint32_t 108 vring_size(uint32_t vq_size) 109 { 110 uint32_t allocsize1, allocsize2; 111 112 /* allocsize1: descriptor table + avail ring + pad */ 113 allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size 114 + sizeof(uint16_t) * (2 + vq_size)); 115 /* allocsize2: used ring + pad */ 116 allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2 117 + sizeof(struct vring_used_elem) * vq_size); 118 119 return allocsize1 + allocsize2; 120 } 121 122 /* Update queue select */ 123 void 124 viornd_update_qs(void) 125 { 126 /* Invalid queue? */ 127 if (viornd.cfg.queue_select > 0) { 128 viornd.cfg.queue_size = 0; 129 return; 130 } 131 132 /* Update queue address/size based on queue select */ 133 viornd.cfg.queue_address = viornd.vq[viornd.cfg.queue_select].qa; 134 viornd.cfg.queue_size = viornd.vq[viornd.cfg.queue_select].qs; 135 } 136 137 /* Update queue address */ 138 void 139 viornd_update_qa(void) 140 { 141 /* Invalid queue? */ 142 if (viornd.cfg.queue_select > 0) 143 return; 144 145 viornd.vq[viornd.cfg.queue_select].qa = viornd.cfg.queue_address; 146 } 147 148 int 149 viornd_notifyq(void) 150 { 151 uint64_t q_gpa; 152 uint32_t vr_sz; 153 size_t sz; 154 int dxx, ret; 155 uint16_t aidx, uidx; 156 char *buf, *rnd_data; 157 struct vring_desc *desc; 158 struct vring_avail *avail; 159 struct vring_used *used; 160 161 ret = 0; 162 163 /* Invalid queue? */ 164 if (viornd.cfg.queue_notify > 0) 165 return (0); 166 167 vr_sz = vring_size(VIORND_QUEUE_SIZE); 168 q_gpa = viornd.vq[viornd.cfg.queue_notify].qa; 169 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 170 171 buf = calloc(1, vr_sz); 172 if (buf == NULL) { 173 log_warn("calloc error getting viornd ring"); 174 return (0); 175 } 176 177 if (read_mem(q_gpa, buf, vr_sz)) { 178 free(buf); 179 return (0); 180 } 181 182 desc = (struct vring_desc *)(buf); 183 avail = (struct vring_avail *)(buf + 184 viornd.vq[viornd.cfg.queue_notify].vq_availoffset); 185 used = (struct vring_used *)(buf + 186 viornd.vq[viornd.cfg.queue_notify].vq_usedoffset); 187 188 aidx = avail->idx & VIORND_QUEUE_MASK; 189 uidx = used->idx & VIORND_QUEUE_MASK; 190 191 dxx = avail->ring[aidx] & VIORND_QUEUE_MASK; 192 193 sz = desc[dxx].len; 194 if (sz > MAXPHYS) 195 fatalx("viornd descriptor size too large (%zu)", sz); 196 197 rnd_data = malloc(sz); 198 199 if (rnd_data != NULL) { 200 arc4random_buf(rnd_data, sz); 201 if (write_mem(desc[dxx].addr, rnd_data, sz)) { 202 log_warnx("viornd: can't write random data @ " 203 "0x%llx", 204 desc[dxx].addr); 205 } else { 206 /* ret == 1 -> interrupt needed */ 207 /* XXX check VIRTIO_F_NO_INTR */ 208 ret = 1; 209 viornd.cfg.isr_status = 1; 210 used->ring[uidx].id = dxx; 211 used->ring[uidx].len = sz; 212 used->idx++; 213 214 if (write_mem(q_gpa, buf, vr_sz)) { 215 log_warnx("viornd: error writing vio ring"); 216 } 217 } 218 free(rnd_data); 219 } else 220 fatal("memory allocation error for viornd data"); 221 222 free(buf); 223 224 return (ret); 225 } 226 227 int 228 virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 229 void *unused, uint8_t sz) 230 { 231 *intr = 0xFF; 232 233 if (dir == 0) { 234 switch (reg) { 235 case VIRTIO_CONFIG_DEVICE_FEATURES: 236 case VIRTIO_CONFIG_QUEUE_SIZE: 237 case VIRTIO_CONFIG_ISR_STATUS: 238 log_warnx("%s: illegal write %x to %s", 239 __progname, *data, virtio_reg_name(reg)); 240 break; 241 case VIRTIO_CONFIG_GUEST_FEATURES: 242 viornd.cfg.guest_feature = *data; 243 break; 244 case VIRTIO_CONFIG_QUEUE_ADDRESS: 245 viornd.cfg.queue_address = *data; 246 viornd_update_qa(); 247 break; 248 case VIRTIO_CONFIG_QUEUE_SELECT: 249 viornd.cfg.queue_select = *data; 250 viornd_update_qs(); 251 break; 252 case VIRTIO_CONFIG_QUEUE_NOTIFY: 253 viornd.cfg.queue_notify = *data; 254 if (viornd_notifyq()) 255 *intr = 1; 256 break; 257 case VIRTIO_CONFIG_DEVICE_STATUS: 258 viornd.cfg.device_status = *data; 259 break; 260 } 261 } else { 262 switch (reg) { 263 case VIRTIO_CONFIG_DEVICE_FEATURES: 264 *data = viornd.cfg.device_feature; 265 break; 266 case VIRTIO_CONFIG_GUEST_FEATURES: 267 *data = viornd.cfg.guest_feature; 268 break; 269 case VIRTIO_CONFIG_QUEUE_ADDRESS: 270 *data = viornd.cfg.queue_address; 271 break; 272 case VIRTIO_CONFIG_QUEUE_SIZE: 273 *data = viornd.cfg.queue_size; 274 break; 275 case VIRTIO_CONFIG_QUEUE_SELECT: 276 *data = viornd.cfg.queue_select; 277 break; 278 case VIRTIO_CONFIG_QUEUE_NOTIFY: 279 *data = viornd.cfg.queue_notify; 280 break; 281 case VIRTIO_CONFIG_DEVICE_STATUS: 282 *data = viornd.cfg.device_status; 283 break; 284 case VIRTIO_CONFIG_ISR_STATUS: 285 *data = viornd.cfg.isr_status; 286 viornd.cfg.isr_status = 0; 287 vcpu_deassert_pic_irq(viornd.vm_id, 0, viornd.irq); 288 break; 289 } 290 } 291 return (0); 292 } 293 294 void 295 vioblk_update_qa(struct vioblk_dev *dev) 296 { 297 /* Invalid queue? */ 298 if (dev->cfg.queue_select > 0) 299 return; 300 301 dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; 302 } 303 304 void 305 vioblk_update_qs(struct vioblk_dev *dev) 306 { 307 /* Invalid queue? */ 308 if (dev->cfg.queue_select > 0) { 309 dev->cfg.queue_size = 0; 310 return; 311 } 312 313 /* Update queue address/size based on queue select */ 314 dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; 315 dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; 316 } 317 318 static void 319 vioblk_free_info(struct ioinfo *info) 320 { 321 if (!info) 322 return; 323 free(info->buf); 324 free(info); 325 } 326 327 static struct ioinfo * 328 vioblk_start_read(struct vioblk_dev *dev, off_t sector, size_t sz) 329 { 330 struct ioinfo *info; 331 332 /* Limit to 64M for now */ 333 if (sz > (1 << 26)) { 334 log_warnx("%s: read size exceeded 64M", __func__); 335 return (NULL); 336 } 337 338 info = calloc(1, sizeof(*info)); 339 if (!info) 340 goto nomem; 341 info->buf = malloc(sz); 342 if (info->buf == NULL) 343 goto nomem; 344 info->len = sz; 345 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 346 info->file = &dev->file; 347 348 return info; 349 350 nomem: 351 free(info); 352 log_warn("malloc error vioblk read"); 353 return (NULL); 354 } 355 356 357 static const uint8_t * 358 vioblk_finish_read(struct ioinfo *info) 359 { 360 struct virtio_backing *file; 361 362 file = info->file; 363 if (file->pread(file->p, info->buf, info->len, info->offset) != info->len) { 364 info->error = errno; 365 log_warn("vioblk read error"); 366 return NULL; 367 } 368 369 return info->buf; 370 } 371 372 static struct ioinfo * 373 vioblk_start_write(struct vioblk_dev *dev, off_t sector, 374 paddr_t addr, size_t len) 375 { 376 struct ioinfo *info; 377 378 /* Limit to 64M for now */ 379 if (len > (1 << 26)) { 380 log_warnx("%s: write size exceeded 64M", __func__); 381 return (NULL); 382 } 383 384 info = calloc(1, sizeof(*info)); 385 if (!info) 386 goto nomem; 387 388 info->buf = malloc(len); 389 if (info->buf == NULL) 390 goto nomem; 391 info->len = len; 392 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 393 info->file = &dev->file; 394 395 if (read_mem(addr, info->buf, info->len)) { 396 vioblk_free_info(info); 397 return NULL; 398 } 399 400 return info; 401 402 nomem: 403 free(info); 404 log_warn("malloc error vioblk write"); 405 return (NULL); 406 } 407 408 static int 409 vioblk_finish_write(struct ioinfo *info) 410 { 411 struct virtio_backing *file; 412 413 file = info->file; 414 if (file->pwrite(file->p, info->buf, info->len, info->offset) != info->len) { 415 log_warn("vioblk write error"); 416 return EIO; 417 } 418 return 0; 419 } 420 421 /* 422 * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can 423 */ 424 int 425 vioblk_notifyq(struct vioblk_dev *dev) 426 { 427 uint64_t q_gpa; 428 uint32_t vr_sz; 429 uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; 430 uint8_t ds; 431 int cnt, ret; 432 off_t secbias; 433 char *vr; 434 struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; 435 struct vring_avail *avail; 436 struct vring_used *used; 437 struct virtio_blk_req_hdr cmd; 438 439 ret = 0; 440 441 /* Invalid queue? */ 442 if (dev->cfg.queue_notify > 0) 443 return (0); 444 445 vr_sz = vring_size(VIOBLK_QUEUE_SIZE); 446 q_gpa = dev->vq[dev->cfg.queue_notify].qa; 447 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 448 449 vr = calloc(1, vr_sz); 450 if (vr == NULL) { 451 log_warn("calloc error getting vioblk ring"); 452 return (0); 453 } 454 455 if (read_mem(q_gpa, vr, vr_sz)) { 456 log_warnx("error reading gpa 0x%llx", q_gpa); 457 goto out; 458 } 459 460 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 461 desc = (struct vring_desc *)(vr); 462 avail = (struct vring_avail *)(vr + 463 dev->vq[dev->cfg.queue_notify].vq_availoffset); 464 used = (struct vring_used *)(vr + 465 dev->vq[dev->cfg.queue_notify].vq_usedoffset); 466 467 idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK; 468 469 if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { 470 log_warnx("vioblk queue notify - nothing to do?"); 471 goto out; 472 } 473 474 while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) { 475 476 cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; 477 cmd_desc = &desc[cmd_desc_idx]; 478 479 if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) { 480 log_warnx("unchained vioblk cmd descriptor received " 481 "(idx %d)", cmd_desc_idx); 482 goto out; 483 } 484 485 /* Read command from descriptor ring */ 486 if (cmd_desc->flags & VRING_DESC_F_WRITE) { 487 log_warnx("vioblk: unexpected writable cmd descriptor " 488 "%d", cmd_desc_idx); 489 goto out; 490 } 491 if (read_mem(cmd_desc->addr, &cmd, sizeof(cmd))) { 492 log_warnx("vioblk: command read_mem error @ 0x%llx", 493 cmd_desc->addr); 494 goto out; 495 } 496 497 switch (cmd.type) { 498 case VIRTIO_BLK_T_IN: 499 /* first descriptor */ 500 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 501 secdata_desc = &desc[secdata_desc_idx]; 502 503 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 504 log_warnx("unchained vioblk data descriptor " 505 "received (idx %d)", cmd_desc_idx); 506 goto out; 507 } 508 509 cnt = 0; 510 secbias = 0; 511 do { 512 struct ioinfo *info; 513 const uint8_t *secdata; 514 515 if ((secdata_desc->flags & VRING_DESC_F_WRITE) 516 == 0) { 517 log_warnx("vioblk: unwritable data " 518 "descriptor %d", secdata_desc_idx); 519 goto out; 520 } 521 522 info = vioblk_start_read(dev, 523 cmd.sector + secbias, secdata_desc->len); 524 525 if (info == NULL) { 526 log_warnx("vioblk: can't start read"); 527 goto out; 528 } 529 530 /* read the data, use current data descriptor */ 531 secdata = vioblk_finish_read(info); 532 if (secdata == NULL) { 533 vioblk_free_info(info); 534 log_warnx("vioblk: block read error, " 535 "sector %lld", cmd.sector); 536 goto out; 537 } 538 539 if (write_mem(secdata_desc->addr, secdata, 540 secdata_desc->len)) { 541 log_warnx("can't write sector " 542 "data to gpa @ 0x%llx", 543 secdata_desc->addr); 544 vioblk_free_info(info); 545 goto out; 546 } 547 548 vioblk_free_info(info); 549 550 secbias += (secdata_desc->len / 551 VIRTIO_BLK_SECTOR_SIZE); 552 secdata_desc_idx = secdata_desc->next & 553 VIOBLK_QUEUE_MASK; 554 secdata_desc = &desc[secdata_desc_idx]; 555 556 /* Guard against infinite chains */ 557 if (++cnt >= VIOBLK_QUEUE_SIZE) { 558 log_warnx("%s: descriptor table " 559 "invalid", __func__); 560 goto out; 561 } 562 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 563 564 ds_desc_idx = secdata_desc_idx; 565 ds_desc = secdata_desc; 566 567 ds = VIRTIO_BLK_S_OK; 568 break; 569 case VIRTIO_BLK_T_OUT: 570 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 571 secdata_desc = &desc[secdata_desc_idx]; 572 573 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 574 log_warnx("wr vioblk: unchained vioblk data " 575 "descriptor received (idx %d)", 576 cmd_desc_idx); 577 goto out; 578 } 579 580 if (secdata_desc->len > dev->max_xfer) { 581 log_warnx("%s: invalid read size %d requested", 582 __func__, secdata_desc->len); 583 goto out; 584 } 585 586 cnt = 0; 587 secbias = 0; 588 do { 589 struct ioinfo *info; 590 591 if (secdata_desc->flags & VRING_DESC_F_WRITE) { 592 log_warnx("wr vioblk: unexpected " 593 "writable data descriptor %d", 594 secdata_desc_idx); 595 goto out; 596 } 597 598 info = vioblk_start_write(dev, 599 cmd.sector + secbias, 600 secdata_desc->addr, secdata_desc->len); 601 602 if (info == NULL) { 603 log_warnx("wr vioblk: can't read " 604 "sector data @ 0x%llx", 605 secdata_desc->addr); 606 goto out; 607 } 608 609 if (vioblk_finish_write(info)) { 610 log_warnx("wr vioblk: disk write " 611 "error"); 612 vioblk_free_info(info); 613 goto out; 614 } 615 616 vioblk_free_info(info); 617 618 secbias += secdata_desc->len / 619 VIRTIO_BLK_SECTOR_SIZE; 620 621 secdata_desc_idx = secdata_desc->next & 622 VIOBLK_QUEUE_MASK; 623 secdata_desc = &desc[secdata_desc_idx]; 624 625 /* Guard against infinite chains */ 626 if (++cnt >= VIOBLK_QUEUE_SIZE) { 627 log_warnx("%s: descriptor table " 628 "invalid", __func__); 629 goto out; 630 } 631 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 632 633 ds_desc_idx = secdata_desc_idx; 634 ds_desc = secdata_desc; 635 636 ds = VIRTIO_BLK_S_OK; 637 break; 638 case VIRTIO_BLK_T_FLUSH: 639 case VIRTIO_BLK_T_FLUSH_OUT: 640 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 641 ds_desc = &desc[ds_desc_idx]; 642 643 ds = VIRTIO_BLK_S_UNSUPP; 644 break; 645 case VIRTIO_BLK_T_GET_ID: 646 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 647 secdata_desc = &desc[secdata_desc_idx]; 648 649 /* 650 * We don't support this command yet. While it's not 651 * officially part of the virtio spec (will be in v1.2) 652 * there's no feature to negotiate. Linux drivers will 653 * often send this command regardless. 654 * 655 * When the command is received, it should appear as a 656 * chain of 3 descriptors, similar to the IN/OUT 657 * commands. The middle descriptor should have have a 658 * length of VIRTIO_BLK_ID_BYTES bytes. 659 */ 660 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 661 log_warnx("id vioblk: unchained vioblk data " 662 "descriptor received (idx %d)", 663 cmd_desc_idx); 664 goto out; 665 } 666 667 /* Skip the data descriptor. */ 668 ds_desc_idx = secdata_desc->next & VIOBLK_QUEUE_MASK; 669 ds_desc = &desc[ds_desc_idx]; 670 671 ds = VIRTIO_BLK_S_UNSUPP; 672 break; 673 default: 674 log_warnx("%s: unsupported command 0x%x", __func__, 675 cmd.type); 676 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 677 ds_desc = &desc[ds_desc_idx]; 678 679 ds = VIRTIO_BLK_S_UNSUPP; 680 break; 681 } 682 683 if ((ds_desc->flags & VRING_DESC_F_WRITE) == 0) { 684 log_warnx("%s: ds descriptor %d unwritable", __func__, 685 ds_desc_idx); 686 goto out; 687 } 688 if (write_mem(ds_desc->addr, &ds, sizeof(ds))) { 689 log_warnx("%s: can't write device status data @ 0x%llx", 690 __func__, ds_desc->addr); 691 goto out; 692 } 693 694 ret = 1; 695 dev->cfg.isr_status = 1; 696 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; 697 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; 698 used->idx++; 699 700 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 701 VIOBLK_QUEUE_MASK; 702 if (write_mem(q_gpa, vr, vr_sz)) 703 log_warnx("%s: error writing vio ring", __func__); 704 705 idx = (idx + 1) & VIOBLK_QUEUE_MASK; 706 } 707 out: 708 free(vr); 709 return (ret); 710 } 711 712 int 713 virtio_blk_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 714 void *cookie, uint8_t sz) 715 { 716 struct vioblk_dev *dev = (struct vioblk_dev *)cookie; 717 718 *intr = 0xFF; 719 720 721 if (dir == 0) { 722 switch (reg) { 723 case VIRTIO_CONFIG_DEVICE_FEATURES: 724 case VIRTIO_CONFIG_QUEUE_SIZE: 725 case VIRTIO_CONFIG_ISR_STATUS: 726 log_warnx("%s: illegal write %x to %s", 727 __progname, *data, virtio_reg_name(reg)); 728 break; 729 case VIRTIO_CONFIG_GUEST_FEATURES: 730 dev->cfg.guest_feature = *data; 731 break; 732 case VIRTIO_CONFIG_QUEUE_ADDRESS: 733 dev->cfg.queue_address = *data; 734 vioblk_update_qa(dev); 735 break; 736 case VIRTIO_CONFIG_QUEUE_SELECT: 737 dev->cfg.queue_select = *data; 738 vioblk_update_qs(dev); 739 break; 740 case VIRTIO_CONFIG_QUEUE_NOTIFY: 741 dev->cfg.queue_notify = *data; 742 if (vioblk_notifyq(dev)) 743 *intr = 1; 744 break; 745 case VIRTIO_CONFIG_DEVICE_STATUS: 746 dev->cfg.device_status = *data; 747 if (dev->cfg.device_status == 0) { 748 log_debug("%s: device reset", __func__); 749 dev->cfg.guest_feature = 0; 750 dev->cfg.queue_address = 0; 751 vioblk_update_qa(dev); 752 dev->cfg.queue_size = 0; 753 vioblk_update_qs(dev); 754 dev->cfg.queue_select = 0; 755 dev->cfg.queue_notify = 0; 756 dev->cfg.isr_status = 0; 757 dev->vq[0].last_avail = 0; 758 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 759 } 760 break; 761 default: 762 break; 763 } 764 } else { 765 switch (reg) { 766 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 767 switch (sz) { 768 case 4: 769 *data = (uint32_t)(dev->sz); 770 break; 771 case 2: 772 *data &= 0xFFFF0000; 773 *data |= (uint32_t)(dev->sz) & 0xFFFF; 774 break; 775 case 1: 776 *data &= 0xFFFFFF00; 777 *data |= (uint32_t)(dev->sz) & 0xFF; 778 break; 779 } 780 /* XXX handle invalid sz */ 781 break; 782 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 783 if (sz == 1) { 784 *data &= 0xFFFFFF00; 785 *data |= (uint32_t)(dev->sz >> 8) & 0xFF; 786 } 787 /* XXX handle invalid sz */ 788 break; 789 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 790 if (sz == 1) { 791 *data &= 0xFFFFFF00; 792 *data |= (uint32_t)(dev->sz >> 16) & 0xFF; 793 } else if (sz == 2) { 794 *data &= 0xFFFF0000; 795 *data |= (uint32_t)(dev->sz >> 16) & 0xFFFF; 796 } 797 /* XXX handle invalid sz */ 798 break; 799 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 800 if (sz == 1) { 801 *data &= 0xFFFFFF00; 802 *data |= (uint32_t)(dev->sz >> 24) & 0xFF; 803 } 804 /* XXX handle invalid sz */ 805 break; 806 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 807 switch (sz) { 808 case 4: 809 *data = (uint32_t)(dev->sz >> 32); 810 break; 811 case 2: 812 *data &= 0xFFFF0000; 813 *data |= (uint32_t)(dev->sz >> 32) & 0xFFFF; 814 break; 815 case 1: 816 *data &= 0xFFFFFF00; 817 *data |= (uint32_t)(dev->sz >> 32) & 0xFF; 818 break; 819 } 820 /* XXX handle invalid sz */ 821 break; 822 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 823 if (sz == 1) { 824 *data &= 0xFFFFFF00; 825 *data |= (uint32_t)(dev->sz >> 40) & 0xFF; 826 } 827 /* XXX handle invalid sz */ 828 break; 829 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: 830 if (sz == 1) { 831 *data &= 0xFFFFFF00; 832 *data |= (uint32_t)(dev->sz >> 48) & 0xFF; 833 } else if (sz == 2) { 834 *data &= 0xFFFF0000; 835 *data |= (uint32_t)(dev->sz >> 48) & 0xFFFF; 836 } 837 /* XXX handle invalid sz */ 838 break; 839 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: 840 if (sz == 1) { 841 *data &= 0xFFFFFF00; 842 *data |= (uint32_t)(dev->sz >> 56) & 0xFF; 843 } 844 /* XXX handle invalid sz */ 845 break; 846 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 847 switch (sz) { 848 case 4: 849 *data = (uint32_t)(dev->max_xfer); 850 break; 851 case 2: 852 *data &= 0xFFFF0000; 853 *data |= (uint32_t)(dev->max_xfer) & 0xFFFF; 854 break; 855 case 1: 856 *data &= 0xFFFFFF00; 857 *data |= (uint32_t)(dev->max_xfer) & 0xFF; 858 break; 859 } 860 /* XXX handle invalid sz */ 861 break; 862 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 9: 863 if (sz == 1) { 864 *data &= 0xFFFFFF00; 865 *data |= (uint32_t)(dev->max_xfer >> 8) & 0xFF; 866 } 867 /* XXX handle invalid sz */ 868 break; 869 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 10: 870 if (sz == 1) { 871 *data &= 0xFFFFFF00; 872 *data |= (uint32_t)(dev->max_xfer >> 16) & 0xFF; 873 } else if (sz == 2) { 874 *data &= 0xFFFF0000; 875 *data |= (uint32_t)(dev->max_xfer >> 16) 876 & 0xFFFF; 877 } 878 /* XXX handle invalid sz */ 879 break; 880 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 11: 881 if (sz == 1) { 882 *data &= 0xFFFFFF00; 883 *data |= (uint32_t)(dev->max_xfer >> 24) & 0xFF; 884 } 885 /* XXX handle invalid sz */ 886 break; 887 case VIRTIO_CONFIG_DEVICE_FEATURES: 888 *data = dev->cfg.device_feature; 889 break; 890 case VIRTIO_CONFIG_GUEST_FEATURES: 891 *data = dev->cfg.guest_feature; 892 break; 893 case VIRTIO_CONFIG_QUEUE_ADDRESS: 894 *data = dev->cfg.queue_address; 895 break; 896 case VIRTIO_CONFIG_QUEUE_SIZE: 897 if (sz == 4) 898 *data = dev->cfg.queue_size; 899 else if (sz == 2) { 900 *data &= 0xFFFF0000; 901 *data |= (uint16_t)dev->cfg.queue_size; 902 } else if (sz == 1) { 903 *data &= 0xFFFFFF00; 904 *data |= (uint8_t)dev->cfg.queue_size; 905 } 906 break; 907 case VIRTIO_CONFIG_QUEUE_SELECT: 908 *data = dev->cfg.queue_select; 909 break; 910 case VIRTIO_CONFIG_QUEUE_NOTIFY: 911 *data = dev->cfg.queue_notify; 912 break; 913 case VIRTIO_CONFIG_DEVICE_STATUS: 914 if (sz == 4) 915 *data = dev->cfg.device_status; 916 else if (sz == 2) { 917 *data &= 0xFFFF0000; 918 *data |= (uint16_t)dev->cfg.device_status; 919 } else if (sz == 1) { 920 *data &= 0xFFFFFF00; 921 *data |= (uint8_t)dev->cfg.device_status; 922 } 923 break; 924 case VIRTIO_CONFIG_ISR_STATUS: 925 *data = dev->cfg.isr_status; 926 dev->cfg.isr_status = 0; 927 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 928 break; 929 } 930 } 931 return (0); 932 } 933 934 int 935 virtio_net_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 936 void *cookie, uint8_t sz) 937 { 938 struct vionet_dev *dev = (struct vionet_dev *)cookie; 939 940 *intr = 0xFF; 941 mutex_lock(&dev->mutex); 942 943 if (dir == 0) { 944 switch (reg) { 945 case VIRTIO_CONFIG_DEVICE_FEATURES: 946 case VIRTIO_CONFIG_QUEUE_SIZE: 947 case VIRTIO_CONFIG_ISR_STATUS: 948 log_warnx("%s: illegal write %x to %s", 949 __progname, *data, virtio_reg_name(reg)); 950 break; 951 case VIRTIO_CONFIG_GUEST_FEATURES: 952 dev->cfg.guest_feature = *data; 953 break; 954 case VIRTIO_CONFIG_QUEUE_ADDRESS: 955 dev->cfg.queue_address = *data; 956 vionet_update_qa(dev); 957 break; 958 case VIRTIO_CONFIG_QUEUE_SELECT: 959 dev->cfg.queue_select = *data; 960 vionet_update_qs(dev); 961 break; 962 case VIRTIO_CONFIG_QUEUE_NOTIFY: 963 dev->cfg.queue_notify = *data; 964 if (vionet_notifyq(dev)) 965 *intr = 1; 966 break; 967 case VIRTIO_CONFIG_DEVICE_STATUS: 968 dev->cfg.device_status = *data; 969 if (dev->cfg.device_status == 0) { 970 log_debug("%s: device reset", __func__); 971 dev->cfg.guest_feature = 0; 972 dev->cfg.queue_address = 0; 973 vionet_update_qa(dev); 974 dev->cfg.queue_size = 0; 975 vionet_update_qs(dev); 976 dev->cfg.queue_select = 0; 977 dev->cfg.queue_notify = 0; 978 dev->cfg.isr_status = 0; 979 dev->vq[RXQ].last_avail = 0; 980 dev->vq[RXQ].notified_avail = 0; 981 dev->vq[TXQ].last_avail = 0; 982 dev->vq[TXQ].notified_avail = 0; 983 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 984 } 985 break; 986 default: 987 break; 988 } 989 } else { 990 switch (reg) { 991 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 992 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 993 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 994 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 995 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 996 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 997 *data = dev->mac[reg - 998 VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI]; 999 break; 1000 case VIRTIO_CONFIG_DEVICE_FEATURES: 1001 *data = dev->cfg.device_feature; 1002 break; 1003 case VIRTIO_CONFIG_GUEST_FEATURES: 1004 *data = dev->cfg.guest_feature; 1005 break; 1006 case VIRTIO_CONFIG_QUEUE_ADDRESS: 1007 *data = dev->cfg.queue_address; 1008 break; 1009 case VIRTIO_CONFIG_QUEUE_SIZE: 1010 *data = dev->cfg.queue_size; 1011 break; 1012 case VIRTIO_CONFIG_QUEUE_SELECT: 1013 *data = dev->cfg.queue_select; 1014 break; 1015 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1016 *data = dev->cfg.queue_notify; 1017 break; 1018 case VIRTIO_CONFIG_DEVICE_STATUS: 1019 *data = dev->cfg.device_status; 1020 break; 1021 case VIRTIO_CONFIG_ISR_STATUS: 1022 *data = dev->cfg.isr_status; 1023 dev->cfg.isr_status = 0; 1024 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 1025 break; 1026 } 1027 } 1028 1029 mutex_unlock(&dev->mutex); 1030 return (0); 1031 } 1032 1033 /* 1034 * Must be called with dev->mutex acquired. 1035 */ 1036 void 1037 vionet_update_qa(struct vionet_dev *dev) 1038 { 1039 /* Invalid queue? */ 1040 if (dev->cfg.queue_select > 1) 1041 return; 1042 1043 dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; 1044 } 1045 1046 /* 1047 * Must be called with dev->mutex acquired. 1048 */ 1049 void 1050 vionet_update_qs(struct vionet_dev *dev) 1051 { 1052 /* Invalid queue? */ 1053 if (dev->cfg.queue_select > 1) { 1054 dev->cfg.queue_size = 0; 1055 return; 1056 } 1057 1058 /* Update queue address/size based on queue select */ 1059 dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; 1060 dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; 1061 } 1062 1063 /* 1064 * vionet_enq_rx 1065 * 1066 * Take a given packet from the host-side tap and copy it into the guest's 1067 * buffers utilizing the rx virtio ring. If the packet length is invalid 1068 * (too small or too large) or if there are not enough buffers available, 1069 * the packet is dropped. 1070 * 1071 * Must be called with dev->mutex acquired. 1072 */ 1073 int 1074 vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_t sz, int *spc) 1075 { 1076 uint64_t q_gpa; 1077 uint32_t vr_sz; 1078 uint16_t dxx, idx, hdr_desc_idx, chain_hdr_idx; 1079 int ret = 0; 1080 char *vr = NULL; 1081 size_t bufsz = 0, off = 0, pkt_offset = 0, chunk_size = 0; 1082 size_t chain_len = 0; 1083 struct vring_desc *desc, *pkt_desc, *hdr_desc; 1084 struct vring_avail *avail; 1085 struct vring_used *used; 1086 struct vring_used_elem *ue; 1087 struct virtio_net_hdr hdr; 1088 size_t hdr_sz; 1089 1090 if (sz < VIONET_MIN_TXLEN || sz > VIONET_MAX_TXLEN) { 1091 log_warn("%s: invalid packet size", __func__); 1092 return (0); 1093 } 1094 1095 hdr_sz = sizeof(hdr); 1096 1097 if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK)) 1098 return ret; 1099 1100 vr_sz = vring_size(VIONET_QUEUE_SIZE); 1101 q_gpa = dev->vq[RXQ].qa; 1102 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1103 1104 vr = calloc(1, vr_sz); 1105 if (vr == NULL) { 1106 log_warn("rx enq: calloc error getting vionet ring"); 1107 return (0); 1108 } 1109 1110 if (read_mem(q_gpa, vr, vr_sz)) { 1111 log_warnx("rx enq: error reading gpa 0x%llx", q_gpa); 1112 goto out; 1113 } 1114 1115 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 1116 desc = (struct vring_desc *)(vr); 1117 avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset); 1118 used = (struct vring_used *)(vr + dev->vq[RXQ].vq_usedoffset); 1119 1120 idx = dev->vq[RXQ].last_avail & VIONET_QUEUE_MASK; 1121 if ((dev->vq[RXQ].notified_avail & VIONET_QUEUE_MASK) == idx) { 1122 log_debug("%s: insufficient available buffer capacity, " 1123 "dropping packet.", __func__); 1124 goto out; 1125 } 1126 1127 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 1128 hdr_desc = &desc[hdr_desc_idx]; 1129 1130 dxx = hdr_desc_idx; 1131 chain_hdr_idx = dxx; 1132 chain_len = 0; 1133 1134 /* Process the descriptor and walk any potential chain. */ 1135 do { 1136 off = 0; 1137 pkt_desc = &desc[dxx]; 1138 if (!(pkt_desc->flags & VRING_DESC_F_WRITE)) { 1139 log_warnx("%s: invalid descriptor, not writable", 1140 __func__); 1141 goto out; 1142 } 1143 1144 /* How much data do we get to write? */ 1145 if (sz - bufsz > pkt_desc->len) 1146 chunk_size = pkt_desc->len; 1147 else 1148 chunk_size = sz - bufsz; 1149 1150 if (chain_len == 0) { 1151 off = hdr_sz; 1152 if (chunk_size == pkt_desc->len) 1153 chunk_size -= off; 1154 } 1155 1156 /* Write a chunk of data if we need to */ 1157 if (chunk_size && write_mem(pkt_desc->addr + off, 1158 pkt + pkt_offset, chunk_size)) { 1159 log_warnx("%s: failed to write to buffer 0x%llx", 1160 __func__, pkt_desc->addr); 1161 goto out; 1162 } 1163 1164 chain_len += chunk_size + off; 1165 bufsz += chunk_size; 1166 pkt_offset += chunk_size; 1167 1168 dxx = pkt_desc->next & VIONET_QUEUE_MASK; 1169 } while (bufsz < sz && pkt_desc->flags & VRING_DESC_F_NEXT); 1170 1171 /* Update the list of used buffers. */ 1172 ue = &used->ring[(used->idx) & VIONET_QUEUE_MASK]; 1173 ue->id = chain_hdr_idx; 1174 ue->len = chain_len; 1175 off = ((char *)ue - vr); 1176 if (write_mem(q_gpa + off, ue, sizeof(*ue))) { 1177 log_warnx("%s: error updating rx used ring", __func__); 1178 goto out; 1179 } 1180 1181 /* Move our marker in the ring...*/ 1182 used->idx++; 1183 dev->vq[RXQ].last_avail = (dev->vq[RXQ].last_avail + 1) & 1184 VIONET_QUEUE_MASK; 1185 1186 /* Prepend the virtio net header in the first buffer. */ 1187 memset(&hdr, 0, sizeof(hdr)); 1188 hdr.hdr_len = hdr_sz; 1189 if (write_mem(hdr_desc->addr, &hdr, hdr_sz)) { 1190 log_warnx("vionet: rx enq header write_mem error @ 0x%llx", 1191 hdr_desc->addr); 1192 goto out; 1193 } 1194 1195 /* Update the index field in the used ring. This must be done last. */ 1196 dev->cfg.isr_status = 1; 1197 off = (char *)&used->idx - vr; 1198 *spc = (dev->vq[RXQ].notified_avail - dev->vq[RXQ].last_avail) & 1199 VIONET_QUEUE_MASK; 1200 1201 if (write_mem(q_gpa + off, &used->idx, sizeof(used->idx))) 1202 log_warnx("vionet: error writing vio ring"); 1203 1204 ret = 1; 1205 1206 out: 1207 free(vr); 1208 return (ret); 1209 } 1210 1211 /* 1212 * vionet_rx 1213 * 1214 * Enqueue data that was received on a tap file descriptor 1215 * to the vionet device queue. 1216 * 1217 * Must be called with dev->mutex acquired. 1218 */ 1219 static int 1220 vionet_rx(struct vionet_dev *dev) 1221 { 1222 char buf[PAGE_SIZE]; 1223 int num_enq = 0, spc = 0; 1224 struct ether_header *eh; 1225 ssize_t sz; 1226 1227 do { 1228 sz = read(dev->fd, buf, sizeof(buf)); 1229 if (sz == -1) { 1230 /* 1231 * If we get EAGAIN, No data is currently available. 1232 * Do not treat this as an error. 1233 */ 1234 if (errno != EAGAIN) 1235 log_warn("unexpected read error on vionet " 1236 "device"); 1237 } else if (sz > 0) { 1238 eh = (struct ether_header *)buf; 1239 if (!dev->lockedmac || 1240 ETHER_IS_MULTICAST(eh->ether_dhost) || 1241 memcmp(eh->ether_dhost, dev->mac, 1242 sizeof(eh->ether_dhost)) == 0) 1243 num_enq += vionet_enq_rx(dev, buf, sz, &spc); 1244 } else if (sz == 0) { 1245 log_debug("process_rx: no data"); 1246 break; 1247 } 1248 } while (spc > 0 && sz > 0); 1249 1250 return (num_enq); 1251 } 1252 1253 /* 1254 * vionet_rx_event 1255 * 1256 * Called from the event handling thread when new data can be 1257 * received on the tap fd of a vionet device. 1258 */ 1259 static void 1260 vionet_rx_event(int fd, short kind, void *arg) 1261 { 1262 struct vionet_dev *dev = arg; 1263 1264 mutex_lock(&dev->mutex); 1265 1266 if (vionet_rx(dev) > 0) { 1267 /* XXX: vcpu_id */ 1268 vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq); 1269 } 1270 1271 mutex_unlock(&dev->mutex); 1272 } 1273 1274 /* 1275 * Must be called with dev->mutex acquired. 1276 */ 1277 void 1278 vionet_notify_rx(struct vionet_dev *dev) 1279 { 1280 uint64_t q_gpa; 1281 uint32_t vr_sz; 1282 char *vr; 1283 struct vring_avail *avail; 1284 1285 vr_sz = vring_size(VIONET_QUEUE_SIZE); 1286 q_gpa = dev->vq[RXQ].qa; 1287 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1288 1289 vr = malloc(vr_sz); 1290 if (vr == NULL) { 1291 log_warn("malloc error getting vionet ring"); 1292 return; 1293 } 1294 1295 if (read_mem(q_gpa, vr, vr_sz)) { 1296 log_warnx("error reading gpa 0x%llx", q_gpa); 1297 free(vr); 1298 return; 1299 } 1300 1301 /* Compute offset into avail ring */ 1302 avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset); 1303 1304 dev->vq[RXQ].notified_avail = avail->idx - 1; 1305 1306 free(vr); 1307 } 1308 1309 /* 1310 * Must be called with dev->mutex acquired. 1311 */ 1312 int 1313 vionet_notifyq(struct vionet_dev *dev) 1314 { 1315 int ret; 1316 1317 switch (dev->cfg.queue_notify) { 1318 case RXQ: 1319 vionet_notify_rx(dev); 1320 ret = 0; 1321 break; 1322 case TXQ: 1323 ret = vionet_notify_tx(dev); 1324 break; 1325 default: 1326 /* 1327 * Catch the unimplemented queue ID 2 (control queue) as 1328 * well as any bogus queue IDs. 1329 */ 1330 log_debug("%s: notify for unimplemented queue ID %d", 1331 __func__, dev->cfg.queue_notify); 1332 ret = 0; 1333 break; 1334 } 1335 1336 return (ret); 1337 } 1338 1339 /* 1340 * Must be called with dev->mutex acquired. 1341 */ 1342 int 1343 vionet_notify_tx(struct vionet_dev *dev) 1344 { 1345 uint64_t q_gpa; 1346 uint32_t vr_sz; 1347 uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx, cnt; 1348 size_t pktsz, chunk_size = 0; 1349 ssize_t dhcpsz; 1350 int ret, num_enq, ofs, spc; 1351 char *vr, *pkt, *dhcppkt; 1352 struct vring_desc *desc, *pkt_desc, *hdr_desc; 1353 struct vring_avail *avail; 1354 struct vring_used *used; 1355 struct ether_header *eh; 1356 1357 dhcpsz = 0; 1358 vr = pkt = dhcppkt = NULL; 1359 ret = spc = 0; 1360 1361 vr_sz = vring_size(VIONET_QUEUE_SIZE); 1362 q_gpa = dev->vq[TXQ].qa; 1363 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1364 1365 vr = calloc(1, vr_sz); 1366 if (vr == NULL) { 1367 log_warn("calloc error getting vionet ring"); 1368 goto out; 1369 } 1370 1371 if (read_mem(q_gpa, vr, vr_sz)) { 1372 log_warnx("error reading gpa 0x%llx", q_gpa); 1373 goto out; 1374 } 1375 1376 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 1377 desc = (struct vring_desc *)(vr); 1378 avail = (struct vring_avail *)(vr + dev->vq[TXQ].vq_availoffset); 1379 used = (struct vring_used *)(vr + dev->vq[TXQ].vq_usedoffset); 1380 1381 num_enq = 0; 1382 1383 idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK; 1384 1385 if ((avail->idx & VIONET_QUEUE_MASK) == idx) { 1386 log_warnx("vionet tx queue notify - nothing to do?"); 1387 goto out; 1388 } 1389 1390 while ((avail->idx & VIONET_QUEUE_MASK) != idx) { 1391 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 1392 hdr_desc = &desc[hdr_desc_idx]; 1393 pktsz = 0; 1394 1395 cnt = 0; 1396 dxx = hdr_desc_idx; 1397 do { 1398 pktsz += desc[dxx].len; 1399 dxx = desc[dxx].next & VIONET_QUEUE_MASK; 1400 1401 /* 1402 * Virtio 1.0, cs04, section 2.4.5: 1403 * "The number of descriptors in the table is defined 1404 * by the queue size for this virtqueue: this is the 1405 * maximum possible descriptor chain length." 1406 */ 1407 if (++cnt >= VIONET_QUEUE_SIZE) { 1408 log_warnx("%s: descriptor table invalid", 1409 __func__); 1410 goto out; 1411 } 1412 } while (desc[dxx].flags & VRING_DESC_F_NEXT); 1413 1414 pktsz += desc[dxx].len; 1415 1416 /* Remove virtio header descriptor len */ 1417 pktsz -= hdr_desc->len; 1418 1419 /* Drop packets violating device MTU-based limits */ 1420 if (pktsz < VIONET_MIN_TXLEN || pktsz > VIONET_MAX_TXLEN) { 1421 log_warnx("%s: invalid packet size %lu", __func__, 1422 pktsz); 1423 goto drop_packet; 1424 } 1425 pkt = malloc(pktsz); 1426 if (pkt == NULL) { 1427 log_warn("malloc error alloc packet buf"); 1428 goto out; 1429 } 1430 1431 ofs = 0; 1432 pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; 1433 pkt_desc = &desc[pkt_desc_idx]; 1434 1435 while (pkt_desc->flags & VRING_DESC_F_NEXT) { 1436 /* must be not writable */ 1437 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1438 log_warnx("unexpected writable tx desc " 1439 "%d", pkt_desc_idx); 1440 goto out; 1441 } 1442 1443 /* Check we don't read beyond allocated pktsz */ 1444 if (pkt_desc->len > pktsz - ofs) { 1445 log_warnx("%s: descriptor len past pkt len", 1446 __func__); 1447 chunk_size = pktsz - ofs; 1448 } else 1449 chunk_size = pkt_desc->len; 1450 1451 /* Read packet from descriptor ring */ 1452 if (read_mem(pkt_desc->addr, pkt + ofs, chunk_size)) { 1453 log_warnx("vionet: packet read_mem error " 1454 "@ 0x%llx", pkt_desc->addr); 1455 goto out; 1456 } 1457 1458 ofs += pkt_desc->len; 1459 pkt_desc_idx = pkt_desc->next & VIONET_QUEUE_MASK; 1460 pkt_desc = &desc[pkt_desc_idx]; 1461 } 1462 1463 /* Now handle tail descriptor - must be not writable */ 1464 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1465 log_warnx("unexpected writable tx descriptor %d", 1466 pkt_desc_idx); 1467 goto out; 1468 } 1469 1470 /* Check we don't read beyond allocated pktsz */ 1471 if (pkt_desc->len > pktsz - ofs) { 1472 log_warnx("%s: descriptor len past pkt len", __func__); 1473 chunk_size = pktsz - ofs - pkt_desc->len; 1474 } else 1475 chunk_size = pkt_desc->len; 1476 1477 /* Read packet from descriptor ring */ 1478 if (read_mem(pkt_desc->addr, pkt + ofs, chunk_size)) { 1479 log_warnx("vionet: packet read_mem error @ " 1480 "0x%llx", pkt_desc->addr); 1481 goto out; 1482 } 1483 1484 /* reject other source addresses */ 1485 if (dev->lockedmac && pktsz >= ETHER_HDR_LEN && 1486 (eh = (struct ether_header *)pkt) && 1487 memcmp(eh->ether_shost, dev->mac, 1488 sizeof(eh->ether_shost)) != 0) 1489 log_debug("vionet: wrong source address %s for vm %d", 1490 ether_ntoa((struct ether_addr *) 1491 eh->ether_shost), dev->vm_id); 1492 else if (dev->local && 1493 (dhcpsz = dhcp_request(dev, pkt, pktsz, &dhcppkt)) != -1) { 1494 log_debug("vionet: dhcp request," 1495 " local response size %zd", dhcpsz); 1496 1497 /* XXX signed vs unsigned here, funky cast */ 1498 } else if (write(dev->fd, pkt, pktsz) != (int)pktsz) { 1499 log_warnx("vionet: tx failed writing to tap: " 1500 "%d", errno); 1501 goto out; 1502 } 1503 1504 drop_packet: 1505 ret = 1; 1506 dev->cfg.isr_status = 1; 1507 used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx; 1508 used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len; 1509 used->idx++; 1510 1511 dev->vq[TXQ].last_avail++; 1512 num_enq++; 1513 1514 idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK; 1515 1516 free(pkt); 1517 pkt = NULL; 1518 } 1519 1520 if (write_mem(q_gpa, vr, vr_sz)) { 1521 log_warnx("vionet: tx error writing vio ring"); 1522 } 1523 1524 if (dhcpsz > 0) { 1525 if (vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc)) 1526 ret = 1; 1527 } 1528 1529 out: 1530 free(vr); 1531 free(pkt); 1532 free(dhcppkt); 1533 1534 return (ret); 1535 } 1536 1537 int 1538 vmmci_ctl(unsigned int cmd) 1539 { 1540 struct timeval tv = { 0, 0 }; 1541 1542 if ((vmmci.cfg.device_status & 1543 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) 1544 return (-1); 1545 1546 if (cmd == vmmci.cmd) 1547 return (0); 1548 1549 switch (cmd) { 1550 case VMMCI_NONE: 1551 break; 1552 case VMMCI_SHUTDOWN: 1553 case VMMCI_REBOOT: 1554 /* Update command */ 1555 vmmci.cmd = cmd; 1556 1557 /* 1558 * vmm VMs do not support powerdown, send a reboot request 1559 * instead and turn it off after the triple fault. 1560 */ 1561 if (cmd == VMMCI_SHUTDOWN) 1562 cmd = VMMCI_REBOOT; 1563 1564 /* Trigger interrupt */ 1565 vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1566 vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1567 1568 /* Add ACK timeout */ 1569 tv.tv_sec = VMMCI_TIMEOUT; 1570 evtimer_add(&vmmci.timeout, &tv); 1571 break; 1572 case VMMCI_SYNCRTC: 1573 if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) { 1574 /* RTC updated, request guest VM resync of its RTC */ 1575 vmmci.cmd = cmd; 1576 1577 vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1578 vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1579 } else { 1580 log_debug("%s: RTC sync skipped (guest does not " 1581 "support RTC sync)\n", __func__); 1582 } 1583 break; 1584 default: 1585 fatalx("invalid vmmci command: %d", cmd); 1586 } 1587 1588 return (0); 1589 } 1590 1591 void 1592 vmmci_ack(unsigned int cmd) 1593 { 1594 struct timeval tv = { 0, 0 }; 1595 1596 switch (cmd) { 1597 case VMMCI_NONE: 1598 break; 1599 case VMMCI_SHUTDOWN: 1600 /* 1601 * The shutdown was requested by the VM if we don't have 1602 * a pending shutdown request. In this case add a short 1603 * timeout to give the VM a chance to reboot before the 1604 * timer is expired. 1605 */ 1606 if (vmmci.cmd == 0) { 1607 log_debug("%s: vm %u requested shutdown", __func__, 1608 vmmci.vm_id); 1609 tv.tv_sec = VMMCI_TIMEOUT; 1610 evtimer_add(&vmmci.timeout, &tv); 1611 return; 1612 } 1613 /* FALLTHROUGH */ 1614 case VMMCI_REBOOT: 1615 /* 1616 * If the VM acknowleged our shutdown request, give it 1617 * enough time to shutdown or reboot gracefully. This 1618 * might take a considerable amount of time (running 1619 * rc.shutdown on the VM), so increase the timeout before 1620 * killing it forcefully. 1621 */ 1622 if (cmd == vmmci.cmd && 1623 evtimer_pending(&vmmci.timeout, NULL)) { 1624 log_debug("%s: vm %u acknowledged shutdown request", 1625 __func__, vmmci.vm_id); 1626 tv.tv_sec = VMMCI_SHUTDOWN_TIMEOUT; 1627 evtimer_add(&vmmci.timeout, &tv); 1628 } 1629 break; 1630 case VMMCI_SYNCRTC: 1631 log_debug("%s: vm %u acknowledged RTC sync request", 1632 __func__, vmmci.vm_id); 1633 vmmci.cmd = VMMCI_NONE; 1634 break; 1635 default: 1636 log_warnx("%s: illegal request %u", __func__, cmd); 1637 break; 1638 } 1639 } 1640 1641 void 1642 vmmci_timeout(int fd, short type, void *arg) 1643 { 1644 log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id); 1645 vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); 1646 } 1647 1648 int 1649 vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 1650 void *unused, uint8_t sz) 1651 { 1652 *intr = 0xFF; 1653 1654 if (dir == 0) { 1655 switch (reg) { 1656 case VIRTIO_CONFIG_DEVICE_FEATURES: 1657 case VIRTIO_CONFIG_QUEUE_SIZE: 1658 case VIRTIO_CONFIG_ISR_STATUS: 1659 log_warnx("%s: illegal write %x to %s", 1660 __progname, *data, virtio_reg_name(reg)); 1661 break; 1662 case VIRTIO_CONFIG_GUEST_FEATURES: 1663 vmmci.cfg.guest_feature = *data; 1664 break; 1665 case VIRTIO_CONFIG_QUEUE_ADDRESS: 1666 vmmci.cfg.queue_address = *data; 1667 break; 1668 case VIRTIO_CONFIG_QUEUE_SELECT: 1669 vmmci.cfg.queue_select = *data; 1670 break; 1671 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1672 vmmci.cfg.queue_notify = *data; 1673 break; 1674 case VIRTIO_CONFIG_DEVICE_STATUS: 1675 vmmci.cfg.device_status = *data; 1676 break; 1677 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1678 vmmci_ack(*data); 1679 break; 1680 } 1681 } else { 1682 switch (reg) { 1683 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1684 *data = vmmci.cmd; 1685 break; 1686 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 1687 /* Update time once when reading the first register */ 1688 gettimeofday(&vmmci.time, NULL); 1689 *data = (uint64_t)vmmci.time.tv_sec; 1690 break; 1691 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 1692 *data = (uint64_t)vmmci.time.tv_sec << 32; 1693 break; 1694 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: 1695 *data = (uint64_t)vmmci.time.tv_usec; 1696 break; 1697 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: 1698 *data = (uint64_t)vmmci.time.tv_usec << 32; 1699 break; 1700 case VIRTIO_CONFIG_DEVICE_FEATURES: 1701 *data = vmmci.cfg.device_feature; 1702 break; 1703 case VIRTIO_CONFIG_GUEST_FEATURES: 1704 *data = vmmci.cfg.guest_feature; 1705 break; 1706 case VIRTIO_CONFIG_QUEUE_ADDRESS: 1707 *data = vmmci.cfg.queue_address; 1708 break; 1709 case VIRTIO_CONFIG_QUEUE_SIZE: 1710 *data = vmmci.cfg.queue_size; 1711 break; 1712 case VIRTIO_CONFIG_QUEUE_SELECT: 1713 *data = vmmci.cfg.queue_select; 1714 break; 1715 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1716 *data = vmmci.cfg.queue_notify; 1717 break; 1718 case VIRTIO_CONFIG_DEVICE_STATUS: 1719 *data = vmmci.cfg.device_status; 1720 break; 1721 case VIRTIO_CONFIG_ISR_STATUS: 1722 *data = vmmci.cfg.isr_status; 1723 vmmci.cfg.isr_status = 0; 1724 vcpu_deassert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1725 break; 1726 } 1727 } 1728 return (0); 1729 } 1730 1731 int 1732 virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath) 1733 { 1734 switch (type) { 1735 case VMDF_RAW: 1736 return 0; 1737 case VMDF_QCOW2: 1738 return virtio_qcow2_get_base(fd, path, npath, dpath); 1739 } 1740 log_warnx("%s: invalid disk format", __func__); 1741 return -1; 1742 } 1743 1744 /* 1745 * Initializes a struct virtio_backing using the list of fds. 1746 */ 1747 static int 1748 virtio_init_disk(struct virtio_backing *file, off_t *sz, 1749 int *fd, size_t nfd, int type) 1750 { 1751 /* 1752 * probe disk types in order of preference, first one to work wins. 1753 * TODO: provide a way of specifying the type and options. 1754 */ 1755 switch (type) { 1756 case VMDF_RAW: 1757 return virtio_raw_init(file, sz, fd, nfd); 1758 case VMDF_QCOW2: 1759 return virtio_qcow2_init(file, sz, fd, nfd); 1760 } 1761 log_warnx("%s: invalid disk format", __func__); 1762 return -1; 1763 } 1764 1765 void 1766 virtio_init(struct vmd_vm *vm, int child_cdrom, 1767 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 1768 { 1769 struct vmop_create_params *vmc = &vm->vm_params; 1770 struct vm_create_params *vcp = &vmc->vmc_params; 1771 uint8_t id; 1772 uint8_t i; 1773 int ret; 1774 1775 /* Virtio entropy device */ 1776 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1777 PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM, 1778 PCI_SUBCLASS_SYSTEM_MISC, 1779 PCI_VENDOR_OPENBSD, 1780 PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) { 1781 log_warnx("%s: can't add PCI virtio rng device", 1782 __progname); 1783 return; 1784 } 1785 1786 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) { 1787 log_warnx("%s: can't add bar for virtio rng device", 1788 __progname); 1789 return; 1790 } 1791 1792 memset(&viornd, 0, sizeof(viornd)); 1793 viornd.vq[0].qs = VIORND_QUEUE_SIZE; 1794 viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * 1795 VIORND_QUEUE_SIZE; 1796 viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1797 sizeof(struct vring_desc) * VIORND_QUEUE_SIZE 1798 + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE)); 1799 viornd.pci_id = id; 1800 viornd.irq = pci_get_dev_irq(id); 1801 viornd.vm_id = vcp->vcp_id; 1802 1803 if (vcp->vcp_nnics > 0) { 1804 vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); 1805 if (vionet == NULL) { 1806 log_warn("%s: calloc failure allocating vionets", 1807 __progname); 1808 return; 1809 } 1810 1811 nr_vionet = vcp->vcp_nnics; 1812 /* Virtio network */ 1813 for (i = 0; i < vcp->vcp_nnics; i++) { 1814 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1815 PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM, 1816 PCI_SUBCLASS_SYSTEM_MISC, 1817 PCI_VENDOR_OPENBSD, 1818 PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) { 1819 log_warnx("%s: can't add PCI virtio net device", 1820 __progname); 1821 return; 1822 } 1823 1824 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_net_io, 1825 &vionet[i])) { 1826 log_warnx("%s: can't add bar for virtio net " 1827 "device", __progname); 1828 return; 1829 } 1830 1831 ret = pthread_mutex_init(&vionet[i].mutex, NULL); 1832 if (ret) { 1833 errno = ret; 1834 log_warn("%s: could not initialize mutex " 1835 "for vionet device", __progname); 1836 return; 1837 } 1838 1839 vionet[i].vq[RXQ].qs = VIONET_QUEUE_SIZE; 1840 vionet[i].vq[RXQ].vq_availoffset = 1841 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1842 vionet[i].vq[RXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 1843 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1844 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1845 vionet[i].vq[RXQ].last_avail = 0; 1846 vionet[i].vq[RXQ].notified_avail = 0; 1847 1848 vionet[i].vq[TXQ].qs = VIONET_QUEUE_SIZE; 1849 vionet[i].vq[TXQ].vq_availoffset = 1850 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1851 vionet[i].vq[TXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 1852 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1853 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1854 vionet[i].vq[TXQ].last_avail = 0; 1855 vionet[i].vq[TXQ].notified_avail = 0; 1856 vionet[i].fd = child_taps[i]; 1857 vionet[i].vm_id = vcp->vcp_id; 1858 vionet[i].vm_vmid = vm->vm_vmid; 1859 vionet[i].irq = pci_get_dev_irq(id); 1860 1861 event_set(&vionet[i].event, vionet[i].fd, 1862 EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); 1863 if (event_add(&vionet[i].event, NULL)) { 1864 log_warn("could not initialize vionet event " 1865 "handler"); 1866 return; 1867 } 1868 1869 /* MAC address has been assigned by the parent */ 1870 memcpy(&vionet[i].mac, &vcp->vcp_macs[i], 6); 1871 vionet[i].cfg.device_feature = VIRTIO_NET_F_MAC; 1872 1873 vionet[i].lockedmac = 1874 vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0; 1875 vionet[i].local = 1876 vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0; 1877 if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET) 1878 vionet[i].pxeboot = 1; 1879 vionet[i].idx = i; 1880 vionet[i].pci_id = id; 1881 1882 log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s", 1883 __func__, vcp->vcp_name, i, 1884 ether_ntoa((void *)vionet[i].mac), 1885 vionet[i].lockedmac ? ", locked" : "", 1886 vionet[i].local ? ", local" : "", 1887 vionet[i].pxeboot ? ", pxeboot" : ""); 1888 } 1889 } 1890 1891 if (vcp->vcp_ndisks > 0) { 1892 nr_vioblk = vcp->vcp_ndisks; 1893 vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); 1894 if (vioblk == NULL) { 1895 log_warn("%s: calloc failure allocating vioblks", 1896 __progname); 1897 return; 1898 } 1899 1900 /* One virtio block device for each disk defined in vcp */ 1901 for (i = 0; i < vcp->vcp_ndisks; i++) { 1902 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1903 PCI_PRODUCT_QUMRANET_VIO_BLOCK, 1904 PCI_CLASS_MASS_STORAGE, 1905 PCI_SUBCLASS_MASS_STORAGE_SCSI, 1906 PCI_VENDOR_OPENBSD, 1907 PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) { 1908 log_warnx("%s: can't add PCI virtio block " 1909 "device", __progname); 1910 return; 1911 } 1912 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_blk_io, 1913 &vioblk[i])) { 1914 log_warnx("%s: can't add bar for virtio block " 1915 "device", __progname); 1916 return; 1917 } 1918 vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE; 1919 vioblk[i].vq[0].vq_availoffset = 1920 sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE; 1921 vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1922 sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE 1923 + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); 1924 vioblk[i].vq[0].last_avail = 0; 1925 vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX; 1926 vioblk[i].max_xfer = 1048576; 1927 vioblk[i].pci_id = id; 1928 vioblk[i].vm_id = vcp->vcp_id; 1929 vioblk[i].irq = pci_get_dev_irq(id); 1930 if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, 1931 child_disks[i], vmc->vmc_diskbases[i], 1932 vmc->vmc_disktypes[i]) == -1) { 1933 log_warnx("%s: unable to determine disk format", 1934 __func__); 1935 return; 1936 } 1937 vioblk[i].sz /= 512; 1938 } 1939 } 1940 1941 /* vioscsi cdrom */ 1942 if (strlen(vcp->vcp_cdrom)) { 1943 vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 1944 if (vioscsi == NULL) { 1945 log_warn("%s: calloc failure allocating vioscsi", 1946 __progname); 1947 return; 1948 } 1949 1950 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1951 PCI_PRODUCT_QUMRANET_VIO_SCSI, 1952 PCI_CLASS_MASS_STORAGE, 1953 PCI_SUBCLASS_MASS_STORAGE_SCSI, 1954 PCI_VENDOR_OPENBSD, 1955 PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) { 1956 log_warnx("%s: can't add PCI vioscsi device", 1957 __progname); 1958 return; 1959 } 1960 1961 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) { 1962 log_warnx("%s: can't add bar for vioscsi device", 1963 __progname); 1964 return; 1965 } 1966 1967 for ( i = 0; i < VIRTIO_MAX_QUEUES; i++) { 1968 vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE; 1969 vioscsi->vq[i].vq_availoffset = 1970 sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE; 1971 vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN( 1972 sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE 1973 + sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE)); 1974 vioscsi->vq[i].last_avail = 0; 1975 } 1976 if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, 1977 &child_cdrom, 1, VMDF_RAW) == -1) { 1978 log_warnx("%s: unable to determine iso format", 1979 __func__); 1980 return; 1981 } 1982 vioscsi->locked = 0; 1983 vioscsi->lba = 0; 1984 vioscsi->n_blocks = vioscsi->sz >> 11; /* num of 2048 blocks in file */ 1985 vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM; 1986 vioscsi->pci_id = id; 1987 vioscsi->vm_id = vcp->vcp_id; 1988 vioscsi->irq = pci_get_dev_irq(id); 1989 } 1990 1991 /* virtio control device */ 1992 if (pci_add_device(&id, PCI_VENDOR_OPENBSD, 1993 PCI_PRODUCT_OPENBSD_CONTROL, 1994 PCI_CLASS_COMMUNICATIONS, 1995 PCI_SUBCLASS_COMMUNICATIONS_MISC, 1996 PCI_VENDOR_OPENBSD, 1997 PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) { 1998 log_warnx("%s: can't add PCI vmm control device", 1999 __progname); 2000 return; 2001 } 2002 2003 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) { 2004 log_warnx("%s: can't add bar for vmm control device", 2005 __progname); 2006 return; 2007 } 2008 2009 memset(&vmmci, 0, sizeof(vmmci)); 2010 vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK | 2011 VMMCI_F_SYNCRTC; 2012 vmmci.vm_id = vcp->vcp_id; 2013 vmmci.irq = pci_get_dev_irq(id); 2014 vmmci.pci_id = id; 2015 2016 evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 2017 } 2018 2019 /* 2020 * vionet_set_hostmac 2021 * 2022 * Sets the hardware address for the host-side tap(4) on a vionet_dev. 2023 * 2024 * This should only be called from the event-loop thread 2025 * 2026 * vm: pointer to the current vmd_vm instance 2027 * idx: index into the array of vionet_dev's for the target vionet_dev 2028 * addr: ethernet address to set 2029 */ 2030 void 2031 vionet_set_hostmac(struct vmd_vm *vm, unsigned int idx, uint8_t *addr) 2032 { 2033 struct vmop_create_params *vmc = &vm->vm_params; 2034 struct vm_create_params *vcp = &vmc->vmc_params; 2035 struct vionet_dev *dev; 2036 2037 if (idx > vcp->vcp_nnics) 2038 fatalx("vionet_set_hostmac"); 2039 2040 dev = &vionet[idx]; 2041 memcpy(dev->hostmac, addr, sizeof(dev->hostmac)); 2042 } 2043 2044 void 2045 virtio_shutdown(struct vmd_vm *vm) 2046 { 2047 int i; 2048 2049 /* ensure that our disks are synced */ 2050 if (vioscsi != NULL) 2051 vioscsi->file.close(vioscsi->file.p, 0); 2052 2053 for (i = 0; i < nr_vioblk; i++) 2054 vioblk[i].file.close(vioblk[i].file.p, 0); 2055 } 2056 2057 int 2058 vmmci_restore(int fd, uint32_t vm_id) 2059 { 2060 log_debug("%s: receiving vmmci", __func__); 2061 if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 2062 log_warnx("%s: error reading vmmci from fd", __func__); 2063 return (-1); 2064 } 2065 2066 if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL)) { 2067 log_warnx("%s: can't set bar fn for vmm control device", 2068 __progname); 2069 return (-1); 2070 } 2071 vmmci.vm_id = vm_id; 2072 vmmci.irq = pci_get_dev_irq(vmmci.pci_id); 2073 memset(&vmmci.timeout, 0, sizeof(struct event)); 2074 evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 2075 return (0); 2076 } 2077 2078 int 2079 viornd_restore(int fd, struct vm_create_params *vcp) 2080 { 2081 log_debug("%s: receiving viornd", __func__); 2082 if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 2083 log_warnx("%s: error reading viornd from fd", __func__); 2084 return (-1); 2085 } 2086 if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL)) { 2087 log_warnx("%s: can't set bar fn for virtio rng device", 2088 __progname); 2089 return (-1); 2090 } 2091 viornd.vm_id = vcp->vcp_id; 2092 viornd.irq = pci_get_dev_irq(viornd.pci_id); 2093 2094 return (0); 2095 } 2096 2097 int 2098 vionet_restore(int fd, struct vmd_vm *vm, int *child_taps) 2099 { 2100 struct vmop_create_params *vmc = &vm->vm_params; 2101 struct vm_create_params *vcp = &vmc->vmc_params; 2102 uint8_t i; 2103 int ret; 2104 2105 nr_vionet = vcp->vcp_nnics; 2106 if (vcp->vcp_nnics > 0) { 2107 vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); 2108 if (vionet == NULL) { 2109 log_warn("%s: calloc failure allocating vionets", 2110 __progname); 2111 return (-1); 2112 } 2113 log_debug("%s: receiving vionet", __func__); 2114 if (atomicio(read, fd, vionet, 2115 vcp->vcp_nnics * sizeof(struct vionet_dev)) != 2116 vcp->vcp_nnics * sizeof(struct vionet_dev)) { 2117 log_warnx("%s: error reading vionet from fd", 2118 __func__); 2119 return (-1); 2120 } 2121 2122 /* Virtio network */ 2123 for (i = 0; i < vcp->vcp_nnics; i++) { 2124 if (pci_set_bar_fn(vionet[i].pci_id, 0, virtio_net_io, 2125 &vionet[i])) { 2126 log_warnx("%s: can't set bar fn for virtio net " 2127 "device", __progname); 2128 return (-1); 2129 } 2130 2131 memset(&vionet[i].mutex, 0, sizeof(pthread_mutex_t)); 2132 ret = pthread_mutex_init(&vionet[i].mutex, NULL); 2133 2134 if (ret) { 2135 errno = ret; 2136 log_warn("%s: could not initialize mutex " 2137 "for vionet device", __progname); 2138 return (-1); 2139 } 2140 vionet[i].fd = child_taps[i]; 2141 vionet[i].vm_id = vcp->vcp_id; 2142 vionet[i].vm_vmid = vm->vm_vmid; 2143 vionet[i].irq = pci_get_dev_irq(vionet[i].pci_id); 2144 2145 memset(&vionet[i].event, 0, sizeof(struct event)); 2146 event_set(&vionet[i].event, vionet[i].fd, 2147 EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); 2148 } 2149 } 2150 return (0); 2151 } 2152 2153 int 2154 vioblk_restore(int fd, struct vmop_create_params *vmc, 2155 int child_disks[][VM_MAX_BASE_PER_DISK]) 2156 { 2157 struct vm_create_params *vcp = &vmc->vmc_params; 2158 uint8_t i; 2159 2160 nr_vioblk = vcp->vcp_ndisks; 2161 vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); 2162 if (vioblk == NULL) { 2163 log_warn("%s: calloc failure allocating vioblks", __progname); 2164 return (-1); 2165 } 2166 log_debug("%s: receiving vioblk", __func__); 2167 if (atomicio(read, fd, vioblk, 2168 nr_vioblk * sizeof(struct vioblk_dev)) != 2169 nr_vioblk * sizeof(struct vioblk_dev)) { 2170 log_warnx("%s: error reading vioblk from fd", __func__); 2171 return (-1); 2172 } 2173 for (i = 0; i < vcp->vcp_ndisks; i++) { 2174 if (pci_set_bar_fn(vioblk[i].pci_id, 0, virtio_blk_io, 2175 &vioblk[i])) { 2176 log_warnx("%s: can't set bar fn for virtio block " 2177 "device", __progname); 2178 return (-1); 2179 } 2180 if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, 2181 child_disks[i], vmc->vmc_diskbases[i], 2182 vmc->vmc_disktypes[i]) == -1) { 2183 log_warnx("%s: unable to determine disk format", 2184 __func__); 2185 return (-1); 2186 } 2187 vioblk[i].vm_id = vcp->vcp_id; 2188 vioblk[i].irq = pci_get_dev_irq(vioblk[i].pci_id); 2189 } 2190 return (0); 2191 } 2192 2193 int 2194 vioscsi_restore(int fd, struct vm_create_params *vcp, int child_cdrom) 2195 { 2196 if (!strlen(vcp->vcp_cdrom)) 2197 return (0); 2198 2199 vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 2200 if (vioscsi == NULL) { 2201 log_warn("%s: calloc failure allocating vioscsi", __progname); 2202 return (-1); 2203 } 2204 2205 log_debug("%s: receiving vioscsi", __func__); 2206 2207 if (atomicio(read, fd, vioscsi, sizeof(struct vioscsi_dev)) != 2208 sizeof(struct vioscsi_dev)) { 2209 log_warnx("%s: error reading vioscsi from fd", __func__); 2210 return (-1); 2211 } 2212 2213 if (pci_set_bar_fn(vioscsi->pci_id, 0, vioscsi_io, vioscsi)) { 2214 log_warnx("%s: can't set bar fn for vmm control device", 2215 __progname); 2216 return (-1); 2217 } 2218 2219 if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, &child_cdrom, 1, 2220 VMDF_RAW) == -1) { 2221 log_warnx("%s: unable to determine iso format", __func__); 2222 return (-1); 2223 } 2224 vioscsi->vm_id = vcp->vcp_id; 2225 vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id); 2226 2227 return (0); 2228 } 2229 2230 int 2231 virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, 2232 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 2233 { 2234 struct vmop_create_params *vmc = &vm->vm_params; 2235 struct vm_create_params *vcp = &vmc->vmc_params; 2236 int ret; 2237 2238 if ((ret = viornd_restore(fd, vcp)) == -1) 2239 return ret; 2240 2241 if ((ret = vioblk_restore(fd, vmc, child_disks)) == -1) 2242 return ret; 2243 2244 if ((ret = vioscsi_restore(fd, vcp, child_cdrom)) == -1) 2245 return ret; 2246 2247 if ((ret = vionet_restore(fd, vm, child_taps)) == -1) 2248 return ret; 2249 2250 if ((ret = vmmci_restore(fd, vcp->vcp_id)) == -1) 2251 return ret; 2252 2253 return (0); 2254 } 2255 2256 int 2257 viornd_dump(int fd) 2258 { 2259 log_debug("%s: sending viornd", __func__); 2260 if (atomicio(vwrite, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 2261 log_warnx("%s: error writing viornd to fd", __func__); 2262 return (-1); 2263 } 2264 return (0); 2265 } 2266 2267 int 2268 vmmci_dump(int fd) 2269 { 2270 log_debug("%s: sending vmmci", __func__); 2271 if (atomicio(vwrite, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 2272 log_warnx("%s: error writing vmmci to fd", __func__); 2273 return (-1); 2274 } 2275 return (0); 2276 } 2277 2278 int 2279 vionet_dump(int fd) 2280 { 2281 log_debug("%s: sending vionet", __func__); 2282 if (atomicio(vwrite, fd, vionet, 2283 nr_vionet * sizeof(struct vionet_dev)) != 2284 nr_vionet * sizeof(struct vionet_dev)) { 2285 log_warnx("%s: error writing vionet to fd", __func__); 2286 return (-1); 2287 } 2288 return (0); 2289 } 2290 2291 int 2292 vioblk_dump(int fd) 2293 { 2294 log_debug("%s: sending vioblk", __func__); 2295 if (atomicio(vwrite, fd, vioblk, 2296 nr_vioblk * sizeof(struct vioblk_dev)) != 2297 nr_vioblk * sizeof(struct vioblk_dev)) { 2298 log_warnx("%s: error writing vioblk to fd", __func__); 2299 return (-1); 2300 } 2301 return (0); 2302 } 2303 2304 int 2305 vioscsi_dump(int fd) 2306 { 2307 if (vioscsi == NULL) 2308 return (0); 2309 2310 log_debug("%s: sending vioscsi", __func__); 2311 if (atomicio(vwrite, fd, vioscsi, sizeof(struct vioscsi_dev)) != 2312 sizeof(struct vioscsi_dev)) { 2313 log_warnx("%s: error writing vioscsi to fd", __func__); 2314 return (-1); 2315 } 2316 return (0); 2317 } 2318 2319 int 2320 virtio_dump(int fd) 2321 { 2322 int ret; 2323 2324 if ((ret = viornd_dump(fd)) == -1) 2325 return ret; 2326 2327 if ((ret = vioblk_dump(fd)) == -1) 2328 return ret; 2329 2330 if ((ret = vioscsi_dump(fd)) == -1) 2331 return ret; 2332 2333 if ((ret = vionet_dump(fd)) == -1) 2334 return ret; 2335 2336 if ((ret = vmmci_dump(fd)) == -1) 2337 return ret; 2338 2339 return (0); 2340 } 2341 2342 void 2343 virtio_stop(struct vm_create_params *vcp) 2344 { 2345 uint8_t i; 2346 for (i = 0; i < vcp->vcp_nnics; i++) { 2347 if (event_del(&vionet[i].event)) { 2348 log_warn("could not initialize vionet event " 2349 "handler"); 2350 return; 2351 } 2352 } 2353 } 2354 2355 void 2356 virtio_start(struct vm_create_params *vcp) 2357 { 2358 uint8_t i; 2359 for (i = 0; i < vcp->vcp_nnics; i++) { 2360 if (event_add(&vionet[i].event, NULL)) { 2361 log_warn("could not initialize vionet event " 2362 "handler"); 2363 return; 2364 } 2365 } 2366 } 2367