1 /* $OpenBSD: virtio.c,v 1.82 2019/12/11 06:45:16 pd Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* PAGE_SIZE */ 20 #include <sys/socket.h> 21 22 #include <machine/vmmvar.h> 23 #include <dev/pci/pcireg.h> 24 #include <dev/pci/pcidevs.h> 25 #include <dev/pv/virtioreg.h> 26 #include <dev/pci/virtio_pcireg.h> 27 #include <dev/pv/vioblkreg.h> 28 #include <dev/pv/vioscsireg.h> 29 30 #include <net/if.h> 31 #include <netinet/in.h> 32 #include <netinet/if_ether.h> 33 34 #include <errno.h> 35 #include <event.h> 36 #include <poll.h> 37 #include <stddef.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <unistd.h> 41 42 #include "pci.h" 43 #include "vmd.h" 44 #include "vmm.h" 45 #include "virtio.h" 46 #include "vioscsi.h" 47 #include "loadfile.h" 48 #include "atomicio.h" 49 50 extern char *__progname; 51 struct viornd_dev viornd; 52 struct vioblk_dev *vioblk; 53 struct vionet_dev *vionet; 54 struct vioscsi_dev *vioscsi; 55 struct vmmci_dev vmmci; 56 57 int nr_vionet; 58 int nr_vioblk; 59 60 #define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ 61 62 #define VIRTIO_NET_F_MAC (1<<5) 63 64 #define VMMCI_F_TIMESYNC (1<<0) 65 #define VMMCI_F_ACK (1<<1) 66 #define VMMCI_F_SYNCRTC (1<<2) 67 68 #define RXQ 0 69 #define TXQ 1 70 71 const char * 72 vioblk_cmd_name(uint32_t type) 73 { 74 switch (type) { 75 case VIRTIO_BLK_T_IN: return "read"; 76 case VIRTIO_BLK_T_OUT: return "write"; 77 case VIRTIO_BLK_T_SCSI_CMD: return "scsi read"; 78 case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write"; 79 case VIRTIO_BLK_T_FLUSH: return "flush"; 80 case VIRTIO_BLK_T_FLUSH_OUT: return "flush out"; 81 case VIRTIO_BLK_T_GET_ID: return "get id"; 82 default: return "unknown"; 83 } 84 } 85 86 static void 87 dump_descriptor_chain(struct vring_desc *desc, int16_t dxx) 88 { 89 log_debug("descriptor chain @ %d", dxx); 90 do { 91 log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x " 92 "/ 0x%x / 0x%x", 93 dxx, 94 desc[dxx].addr, 95 desc[dxx].len, 96 desc[dxx].flags, 97 desc[dxx].next); 98 dxx = desc[dxx].next; 99 } while (desc[dxx].flags & VRING_DESC_F_NEXT); 100 101 log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x / 0x%x " 102 "/ 0x%x", 103 dxx, 104 desc[dxx].addr, 105 desc[dxx].len, 106 desc[dxx].flags, 107 desc[dxx].next); 108 } 109 110 static const char * 111 virtio_reg_name(uint8_t reg) 112 { 113 switch (reg) { 114 case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; 115 case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; 116 case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address"; 117 case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; 118 case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; 119 case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; 120 case VIRTIO_CONFIG_DEVICE_STATUS: return "device status"; 121 case VIRTIO_CONFIG_ISR_STATUS: return "isr status"; 122 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: return "device config 0"; 123 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: return "device config 1"; 124 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2"; 125 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3"; 126 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4"; 127 default: return "unknown"; 128 } 129 } 130 131 uint32_t 132 vring_size(uint32_t vq_size) 133 { 134 uint32_t allocsize1, allocsize2; 135 136 /* allocsize1: descriptor table + avail ring + pad */ 137 allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size 138 + sizeof(uint16_t) * (2 + vq_size)); 139 /* allocsize2: used ring + pad */ 140 allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2 141 + sizeof(struct vring_used_elem) * vq_size); 142 143 return allocsize1 + allocsize2; 144 } 145 146 /* Update queue select */ 147 void 148 viornd_update_qs(void) 149 { 150 /* Invalid queue? */ 151 if (viornd.cfg.queue_select > 0) { 152 viornd.cfg.queue_size = 0; 153 return; 154 } 155 156 /* Update queue address/size based on queue select */ 157 viornd.cfg.queue_address = viornd.vq[viornd.cfg.queue_select].qa; 158 viornd.cfg.queue_size = viornd.vq[viornd.cfg.queue_select].qs; 159 } 160 161 /* Update queue address */ 162 void 163 viornd_update_qa(void) 164 { 165 /* Invalid queue? */ 166 if (viornd.cfg.queue_select > 0) 167 return; 168 169 viornd.vq[viornd.cfg.queue_select].qa = viornd.cfg.queue_address; 170 } 171 172 int 173 viornd_notifyq(void) 174 { 175 uint64_t q_gpa; 176 uint32_t vr_sz; 177 size_t sz; 178 int ret; 179 uint16_t aidx, uidx; 180 char *buf, *rnd_data; 181 struct vring_desc *desc; 182 struct vring_avail *avail; 183 struct vring_used *used; 184 185 ret = 0; 186 187 /* Invalid queue? */ 188 if (viornd.cfg.queue_notify > 0) 189 return (0); 190 191 vr_sz = vring_size(VIORND_QUEUE_SIZE); 192 q_gpa = viornd.vq[viornd.cfg.queue_notify].qa; 193 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 194 195 buf = calloc(1, vr_sz); 196 if (buf == NULL) { 197 log_warn("calloc error getting viornd ring"); 198 return (0); 199 } 200 201 if (read_mem(q_gpa, buf, vr_sz)) { 202 free(buf); 203 return (0); 204 } 205 206 desc = (struct vring_desc *)(buf); 207 avail = (struct vring_avail *)(buf + 208 viornd.vq[viornd.cfg.queue_notify].vq_availoffset); 209 used = (struct vring_used *)(buf + 210 viornd.vq[viornd.cfg.queue_notify].vq_usedoffset); 211 212 aidx = avail->idx & VIORND_QUEUE_MASK; 213 uidx = used->idx & VIORND_QUEUE_MASK; 214 215 sz = desc[avail->ring[aidx]].len; 216 if (sz > MAXPHYS) 217 fatal("viornd descriptor size too large (%zu)", sz); 218 219 rnd_data = malloc(sz); 220 221 if (rnd_data != NULL) { 222 arc4random_buf(rnd_data, desc[avail->ring[aidx]].len); 223 if (write_mem(desc[avail->ring[aidx]].addr, 224 rnd_data, desc[avail->ring[aidx]].len)) { 225 log_warnx("viornd: can't write random data @ " 226 "0x%llx", 227 desc[avail->ring[aidx]].addr); 228 } else { 229 /* ret == 1 -> interrupt needed */ 230 /* XXX check VIRTIO_F_NO_INTR */ 231 ret = 1; 232 viornd.cfg.isr_status = 1; 233 used->ring[uidx].id = avail->ring[aidx] & 234 VIORND_QUEUE_MASK; 235 used->ring[uidx].len = desc[avail->ring[aidx]].len; 236 used->idx++; 237 238 if (write_mem(q_gpa, buf, vr_sz)) { 239 log_warnx("viornd: error writing vio ring"); 240 } 241 } 242 free(rnd_data); 243 } else 244 fatal("memory allocation error for viornd data"); 245 246 free(buf); 247 248 return (ret); 249 } 250 251 int 252 virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 253 void *unused, uint8_t sz) 254 { 255 *intr = 0xFF; 256 257 if (dir == 0) { 258 switch (reg) { 259 case VIRTIO_CONFIG_DEVICE_FEATURES: 260 case VIRTIO_CONFIG_QUEUE_SIZE: 261 case VIRTIO_CONFIG_ISR_STATUS: 262 log_warnx("%s: illegal write %x to %s", 263 __progname, *data, virtio_reg_name(reg)); 264 break; 265 case VIRTIO_CONFIG_GUEST_FEATURES: 266 viornd.cfg.guest_feature = *data; 267 break; 268 case VIRTIO_CONFIG_QUEUE_ADDRESS: 269 viornd.cfg.queue_address = *data; 270 viornd_update_qa(); 271 break; 272 case VIRTIO_CONFIG_QUEUE_SELECT: 273 viornd.cfg.queue_select = *data; 274 viornd_update_qs(); 275 break; 276 case VIRTIO_CONFIG_QUEUE_NOTIFY: 277 viornd.cfg.queue_notify = *data; 278 if (viornd_notifyq()) 279 *intr = 1; 280 break; 281 case VIRTIO_CONFIG_DEVICE_STATUS: 282 viornd.cfg.device_status = *data; 283 break; 284 } 285 } else { 286 switch (reg) { 287 case VIRTIO_CONFIG_DEVICE_FEATURES: 288 *data = viornd.cfg.device_feature; 289 break; 290 case VIRTIO_CONFIG_GUEST_FEATURES: 291 *data = viornd.cfg.guest_feature; 292 break; 293 case VIRTIO_CONFIG_QUEUE_ADDRESS: 294 *data = viornd.cfg.queue_address; 295 break; 296 case VIRTIO_CONFIG_QUEUE_SIZE: 297 *data = viornd.cfg.queue_size; 298 break; 299 case VIRTIO_CONFIG_QUEUE_SELECT: 300 *data = viornd.cfg.queue_select; 301 break; 302 case VIRTIO_CONFIG_QUEUE_NOTIFY: 303 *data = viornd.cfg.queue_notify; 304 break; 305 case VIRTIO_CONFIG_DEVICE_STATUS: 306 *data = viornd.cfg.device_status; 307 break; 308 case VIRTIO_CONFIG_ISR_STATUS: 309 *data = viornd.cfg.isr_status; 310 viornd.cfg.isr_status = 0; 311 vcpu_deassert_pic_irq(viornd.vm_id, 0, viornd.irq); 312 break; 313 } 314 } 315 return (0); 316 } 317 318 void 319 vioblk_update_qa(struct vioblk_dev *dev) 320 { 321 /* Invalid queue? */ 322 if (dev->cfg.queue_select > 0) 323 return; 324 325 dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; 326 } 327 328 void 329 vioblk_update_qs(struct vioblk_dev *dev) 330 { 331 /* Invalid queue? */ 332 if (dev->cfg.queue_select > 0) { 333 dev->cfg.queue_size = 0; 334 return; 335 } 336 337 /* Update queue address/size based on queue select */ 338 dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; 339 dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; 340 } 341 342 static void 343 vioblk_free_info(struct ioinfo *info) 344 { 345 if (!info) 346 return; 347 free(info->buf); 348 free(info); 349 } 350 351 static struct ioinfo * 352 vioblk_start_read(struct vioblk_dev *dev, off_t sector, ssize_t sz) 353 { 354 struct ioinfo *info; 355 356 info = calloc(1, sizeof(*info)); 357 if (!info) 358 goto nomem; 359 info->buf = malloc(sz); 360 if (info->buf == NULL) 361 goto nomem; 362 info->len = sz; 363 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 364 info->file = &dev->file; 365 366 return info; 367 368 nomem: 369 free(info); 370 log_warn("malloc error vioblk read"); 371 return (NULL); 372 } 373 374 375 static const uint8_t * 376 vioblk_finish_read(struct ioinfo *info) 377 { 378 struct virtio_backing *file; 379 380 file = info->file; 381 if (file->pread(file->p, info->buf, info->len, info->offset) != info->len) { 382 info->error = errno; 383 log_warn("vioblk read error"); 384 return NULL; 385 } 386 387 return info->buf; 388 } 389 390 static struct ioinfo * 391 vioblk_start_write(struct vioblk_dev *dev, off_t sector, 392 paddr_t addr, size_t len) 393 { 394 struct ioinfo *info; 395 396 info = calloc(1, sizeof(*info)); 397 if (!info) 398 goto nomem; 399 info->buf = malloc(len); 400 if (info->buf == NULL) 401 goto nomem; 402 info->len = len; 403 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 404 info->file = &dev->file; 405 406 if (read_mem(addr, info->buf, len)) { 407 vioblk_free_info(info); 408 return NULL; 409 } 410 411 return info; 412 413 nomem: 414 free(info); 415 log_warn("malloc error vioblk write"); 416 return (NULL); 417 } 418 419 static int 420 vioblk_finish_write(struct ioinfo *info) 421 { 422 struct virtio_backing *file; 423 424 file = info->file; 425 if (file->pwrite(file->p, info->buf, info->len, info->offset) != info->len) { 426 log_warn("vioblk write error"); 427 return EIO; 428 } 429 return 0; 430 } 431 432 /* 433 * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can 434 * XXX cant trust ring data from VM, be extra cautious. 435 */ 436 int 437 vioblk_notifyq(struct vioblk_dev *dev) 438 { 439 uint64_t q_gpa; 440 uint32_t vr_sz; 441 uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; 442 uint8_t ds; 443 int ret; 444 off_t secbias; 445 char *vr; 446 struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; 447 struct vring_avail *avail; 448 struct vring_used *used; 449 struct virtio_blk_req_hdr cmd; 450 451 ret = 0; 452 453 /* Invalid queue? */ 454 if (dev->cfg.queue_notify > 0) 455 return (0); 456 457 vr_sz = vring_size(VIOBLK_QUEUE_SIZE); 458 q_gpa = dev->vq[dev->cfg.queue_notify].qa; 459 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 460 461 vr = calloc(1, vr_sz); 462 if (vr == NULL) { 463 log_warn("calloc error getting vioblk ring"); 464 return (0); 465 } 466 467 if (read_mem(q_gpa, vr, vr_sz)) { 468 log_warnx("error reading gpa 0x%llx", q_gpa); 469 goto out; 470 } 471 472 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 473 desc = (struct vring_desc *)(vr); 474 avail = (struct vring_avail *)(vr + 475 dev->vq[dev->cfg.queue_notify].vq_availoffset); 476 used = (struct vring_used *)(vr + 477 dev->vq[dev->cfg.queue_notify].vq_usedoffset); 478 479 idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK; 480 481 if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { 482 log_warnx("vioblk queue notify - nothing to do?"); 483 goto out; 484 } 485 486 while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) { 487 488 cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; 489 cmd_desc = &desc[cmd_desc_idx]; 490 491 if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) { 492 log_warnx("unchained vioblk cmd descriptor received " 493 "(idx %d)", cmd_desc_idx); 494 goto out; 495 } 496 497 /* Read command from descriptor ring */ 498 if (read_mem(cmd_desc->addr, &cmd, cmd_desc->len)) { 499 log_warnx("vioblk: command read_mem error @ 0x%llx", 500 cmd_desc->addr); 501 goto out; 502 } 503 504 switch (cmd.type) { 505 case VIRTIO_BLK_T_IN: 506 /* first descriptor */ 507 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 508 secdata_desc = &desc[secdata_desc_idx]; 509 510 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 511 log_warnx("unchained vioblk data descriptor " 512 "received (idx %d)", cmd_desc_idx); 513 goto out; 514 } 515 516 secbias = 0; 517 do { 518 struct ioinfo *info; 519 const uint8_t *secdata; 520 521 info = vioblk_start_read(dev, 522 cmd.sector + secbias, 523 (ssize_t)secdata_desc->len); 524 525 /* read the data, use current data descriptor */ 526 secdata = vioblk_finish_read(info); 527 if (secdata == NULL) { 528 vioblk_free_info(info); 529 log_warnx("vioblk: block read error, " 530 "sector %lld", cmd.sector); 531 goto out; 532 } 533 534 if (write_mem(secdata_desc->addr, secdata, 535 secdata_desc->len)) { 536 log_warnx("can't write sector " 537 "data to gpa @ 0x%llx", 538 secdata_desc->addr); 539 dump_descriptor_chain(desc, 540 cmd_desc_idx); 541 vioblk_free_info(info); 542 goto out; 543 } 544 545 vioblk_free_info(info); 546 547 secbias += (secdata_desc->len / 548 VIRTIO_BLK_SECTOR_SIZE); 549 secdata_desc_idx = secdata_desc->next & 550 VIOBLK_QUEUE_MASK; 551 secdata_desc = &desc[secdata_desc_idx]; 552 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 553 554 ds_desc_idx = secdata_desc_idx; 555 ds_desc = secdata_desc; 556 557 ds = VIRTIO_BLK_S_OK; 558 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 559 log_warnx("can't write device status data @ " 560 "0x%llx", ds_desc->addr); 561 dump_descriptor_chain(desc, cmd_desc_idx); 562 goto out; 563 } 564 565 ret = 1; 566 dev->cfg.isr_status = 1; 567 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 568 cmd_desc_idx; 569 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 570 cmd_desc->len; 571 used->idx++; 572 573 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 574 VIOBLK_QUEUE_MASK; 575 576 if (write_mem(q_gpa, vr, vr_sz)) { 577 log_warnx("vioblk: error writing vio ring"); 578 } 579 break; 580 case VIRTIO_BLK_T_OUT: 581 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 582 secdata_desc = &desc[secdata_desc_idx]; 583 584 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 585 log_warnx("wr vioblk: unchained vioblk data " 586 "descriptor received (idx %d)", 587 cmd_desc_idx); 588 goto out; 589 } 590 591 if (secdata_desc->len > dev->max_xfer) { 592 log_warnx("%s: invalid read size %d requested", 593 __func__, secdata_desc->len); 594 goto out; 595 } 596 597 secbias = 0; 598 do { 599 struct ioinfo *info; 600 601 info = vioblk_start_write(dev, 602 cmd.sector + secbias, 603 secdata_desc->addr, secdata_desc->len); 604 605 if (info == NULL) { 606 log_warnx("wr vioblk: can't read " 607 "sector data @ 0x%llx", 608 secdata_desc->addr); 609 dump_descriptor_chain(desc, 610 cmd_desc_idx); 611 goto out; 612 } 613 614 if (vioblk_finish_write(info)) { 615 log_warnx("wr vioblk: disk write " 616 "error"); 617 vioblk_free_info(info); 618 goto out; 619 } 620 621 vioblk_free_info(info); 622 623 secbias += secdata_desc->len / 624 VIRTIO_BLK_SECTOR_SIZE; 625 626 secdata_desc_idx = secdata_desc->next & 627 VIOBLK_QUEUE_MASK; 628 secdata_desc = &desc[secdata_desc_idx]; 629 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 630 631 ds_desc_idx = secdata_desc_idx; 632 ds_desc = secdata_desc; 633 634 ds = VIRTIO_BLK_S_OK; 635 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 636 log_warnx("wr vioblk: can't write device " 637 "status data @ 0x%llx", ds_desc->addr); 638 dump_descriptor_chain(desc, cmd_desc_idx); 639 goto out; 640 } 641 642 ret = 1; 643 dev->cfg.isr_status = 1; 644 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 645 cmd_desc_idx; 646 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 647 cmd_desc->len; 648 used->idx++; 649 650 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 651 VIOBLK_QUEUE_MASK; 652 if (write_mem(q_gpa, vr, vr_sz)) 653 log_warnx("wr vioblk: error writing vio ring"); 654 break; 655 case VIRTIO_BLK_T_FLUSH: 656 case VIRTIO_BLK_T_FLUSH_OUT: 657 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 658 ds_desc = &desc[ds_desc_idx]; 659 660 ds = VIRTIO_BLK_S_OK; 661 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 662 log_warnx("fl vioblk: " 663 "can't write device status " 664 "data @ 0x%llx", ds_desc->addr); 665 dump_descriptor_chain(desc, cmd_desc_idx); 666 goto out; 667 } 668 669 ret = 1; 670 dev->cfg.isr_status = 1; 671 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 672 cmd_desc_idx; 673 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 674 cmd_desc->len; 675 used->idx++; 676 677 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 678 VIOBLK_QUEUE_MASK; 679 if (write_mem(q_gpa, vr, vr_sz)) { 680 log_warnx("fl vioblk: error writing vio ring"); 681 } 682 break; 683 default: 684 log_warnx("%s: unsupported command 0x%x", __func__, 685 cmd.type); 686 687 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 688 ds_desc = &desc[ds_desc_idx]; 689 690 ds = VIRTIO_BLK_S_UNSUPP; 691 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 692 log_warnx("%s: get id : can't write device " 693 "status data @ 0x%llx", __func__, 694 ds_desc->addr); 695 dump_descriptor_chain(desc, cmd_desc_idx); 696 goto out; 697 } 698 699 ret = 1; 700 dev->cfg.isr_status = 1; 701 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 702 cmd_desc_idx; 703 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 704 cmd_desc->len; 705 used->idx++; 706 707 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 708 VIOBLK_QUEUE_MASK; 709 if (write_mem(q_gpa, vr, vr_sz)) { 710 log_warnx("%s: get id : error writing vio ring", 711 __func__); 712 } 713 break; 714 } 715 716 idx = (idx + 1) & VIOBLK_QUEUE_MASK; 717 } 718 out: 719 free(vr); 720 return (ret); 721 } 722 723 int 724 virtio_blk_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 725 void *cookie, uint8_t sz) 726 { 727 struct vioblk_dev *dev = (struct vioblk_dev *)cookie; 728 729 *intr = 0xFF; 730 731 732 if (dir == 0) { 733 switch (reg) { 734 case VIRTIO_CONFIG_DEVICE_FEATURES: 735 case VIRTIO_CONFIG_QUEUE_SIZE: 736 case VIRTIO_CONFIG_ISR_STATUS: 737 log_warnx("%s: illegal write %x to %s", 738 __progname, *data, virtio_reg_name(reg)); 739 break; 740 case VIRTIO_CONFIG_GUEST_FEATURES: 741 dev->cfg.guest_feature = *data; 742 break; 743 case VIRTIO_CONFIG_QUEUE_ADDRESS: 744 dev->cfg.queue_address = *data; 745 vioblk_update_qa(dev); 746 break; 747 case VIRTIO_CONFIG_QUEUE_SELECT: 748 dev->cfg.queue_select = *data; 749 vioblk_update_qs(dev); 750 break; 751 case VIRTIO_CONFIG_QUEUE_NOTIFY: 752 dev->cfg.queue_notify = *data; 753 if (vioblk_notifyq(dev)) 754 *intr = 1; 755 break; 756 case VIRTIO_CONFIG_DEVICE_STATUS: 757 dev->cfg.device_status = *data; 758 if (dev->cfg.device_status == 0) { 759 log_debug("%s: device reset", __func__); 760 dev->cfg.guest_feature = 0; 761 dev->cfg.queue_address = 0; 762 vioblk_update_qa(dev); 763 dev->cfg.queue_size = 0; 764 vioblk_update_qs(dev); 765 dev->cfg.queue_select = 0; 766 dev->cfg.queue_notify = 0; 767 dev->cfg.isr_status = 0; 768 dev->vq[0].last_avail = 0; 769 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 770 } 771 break; 772 default: 773 break; 774 } 775 } else { 776 switch (reg) { 777 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 778 switch (sz) { 779 case 4: 780 *data = (uint32_t)(dev->sz); 781 break; 782 case 2: 783 *data &= 0xFFFF0000; 784 *data |= (uint32_t)(dev->sz) & 0xFFFF; 785 break; 786 case 1: 787 *data &= 0xFFFFFF00; 788 *data |= (uint32_t)(dev->sz) & 0xFF; 789 break; 790 } 791 /* XXX handle invalid sz */ 792 break; 793 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 794 if (sz == 1) { 795 *data &= 0xFFFFFF00; 796 *data |= (uint32_t)(dev->sz >> 8) & 0xFF; 797 } 798 /* XXX handle invalid sz */ 799 break; 800 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 801 if (sz == 1) { 802 *data &= 0xFFFFFF00; 803 *data |= (uint32_t)(dev->sz >> 16) & 0xFF; 804 } else if (sz == 2) { 805 *data &= 0xFFFF0000; 806 *data |= (uint32_t)(dev->sz >> 16) & 0xFFFF; 807 } 808 /* XXX handle invalid sz */ 809 break; 810 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 811 if (sz == 1) { 812 *data &= 0xFFFFFF00; 813 *data |= (uint32_t)(dev->sz >> 24) & 0xFF; 814 } 815 /* XXX handle invalid sz */ 816 break; 817 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 818 switch (sz) { 819 case 4: 820 *data = (uint32_t)(dev->sz >> 32); 821 break; 822 case 2: 823 *data &= 0xFFFF0000; 824 *data |= (uint32_t)(dev->sz >> 32) & 0xFFFF; 825 break; 826 case 1: 827 *data &= 0xFFFFFF00; 828 *data |= (uint32_t)(dev->sz >> 32) & 0xFF; 829 break; 830 } 831 /* XXX handle invalid sz */ 832 break; 833 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 834 if (sz == 1) { 835 *data &= 0xFFFFFF00; 836 *data |= (uint32_t)(dev->sz >> 40) & 0xFF; 837 } 838 /* XXX handle invalid sz */ 839 break; 840 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: 841 if (sz == 1) { 842 *data &= 0xFFFFFF00; 843 *data |= (uint32_t)(dev->sz >> 48) & 0xFF; 844 } else if (sz == 2) { 845 *data &= 0xFFFF0000; 846 *data |= (uint32_t)(dev->sz >> 48) & 0xFFFF; 847 } 848 /* XXX handle invalid sz */ 849 break; 850 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: 851 if (sz == 1) { 852 *data &= 0xFFFFFF00; 853 *data |= (uint32_t)(dev->sz >> 56) & 0xFF; 854 } 855 /* XXX handle invalid sz */ 856 break; 857 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 858 switch (sz) { 859 case 4: 860 *data = (uint32_t)(dev->max_xfer); 861 break; 862 case 2: 863 *data &= 0xFFFF0000; 864 *data |= (uint32_t)(dev->max_xfer) & 0xFFFF; 865 break; 866 case 1: 867 *data &= 0xFFFFFF00; 868 *data |= (uint32_t)(dev->max_xfer) & 0xFF; 869 break; 870 } 871 /* XXX handle invalid sz */ 872 break; 873 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 9: 874 if (sz == 1) { 875 *data &= 0xFFFFFF00; 876 *data |= (uint32_t)(dev->max_xfer >> 8) & 0xFF; 877 } 878 /* XXX handle invalid sz */ 879 break; 880 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 10: 881 if (sz == 1) { 882 *data &= 0xFFFFFF00; 883 *data |= (uint32_t)(dev->max_xfer >> 16) & 0xFF; 884 } else if (sz == 2) { 885 *data &= 0xFFFF0000; 886 *data |= (uint32_t)(dev->max_xfer >> 16) 887 & 0xFFFF; 888 } 889 /* XXX handle invalid sz */ 890 break; 891 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 11: 892 if (sz == 1) { 893 *data &= 0xFFFFFF00; 894 *data |= (uint32_t)(dev->max_xfer >> 24) & 0xFF; 895 } 896 /* XXX handle invalid sz */ 897 break; 898 case VIRTIO_CONFIG_DEVICE_FEATURES: 899 *data = dev->cfg.device_feature; 900 break; 901 case VIRTIO_CONFIG_GUEST_FEATURES: 902 *data = dev->cfg.guest_feature; 903 break; 904 case VIRTIO_CONFIG_QUEUE_ADDRESS: 905 *data = dev->cfg.queue_address; 906 break; 907 case VIRTIO_CONFIG_QUEUE_SIZE: 908 if (sz == 4) 909 *data = dev->cfg.queue_size; 910 else if (sz == 2) { 911 *data &= 0xFFFF0000; 912 *data |= (uint16_t)dev->cfg.queue_size; 913 } else if (sz == 1) { 914 *data &= 0xFFFFFF00; 915 *data |= (uint8_t)dev->cfg.queue_size; 916 } 917 break; 918 case VIRTIO_CONFIG_QUEUE_SELECT: 919 *data = dev->cfg.queue_select; 920 break; 921 case VIRTIO_CONFIG_QUEUE_NOTIFY: 922 *data = dev->cfg.queue_notify; 923 break; 924 case VIRTIO_CONFIG_DEVICE_STATUS: 925 if (sz == 4) 926 *data = dev->cfg.device_status; 927 else if (sz == 2) { 928 *data &= 0xFFFF0000; 929 *data |= (uint16_t)dev->cfg.device_status; 930 } else if (sz == 1) { 931 *data &= 0xFFFFFF00; 932 *data |= (uint8_t)dev->cfg.device_status; 933 } 934 break; 935 case VIRTIO_CONFIG_ISR_STATUS: 936 *data = dev->cfg.isr_status; 937 dev->cfg.isr_status = 0; 938 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 939 break; 940 } 941 } 942 return (0); 943 } 944 945 int 946 virtio_net_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 947 void *cookie, uint8_t sz) 948 { 949 struct vionet_dev *dev = (struct vionet_dev *)cookie; 950 951 *intr = 0xFF; 952 mutex_lock(&dev->mutex); 953 954 if (dir == 0) { 955 switch (reg) { 956 case VIRTIO_CONFIG_DEVICE_FEATURES: 957 case VIRTIO_CONFIG_QUEUE_SIZE: 958 case VIRTIO_CONFIG_ISR_STATUS: 959 log_warnx("%s: illegal write %x to %s", 960 __progname, *data, virtio_reg_name(reg)); 961 break; 962 case VIRTIO_CONFIG_GUEST_FEATURES: 963 dev->cfg.guest_feature = *data; 964 break; 965 case VIRTIO_CONFIG_QUEUE_ADDRESS: 966 dev->cfg.queue_address = *data; 967 vionet_update_qa(dev); 968 break; 969 case VIRTIO_CONFIG_QUEUE_SELECT: 970 dev->cfg.queue_select = *data; 971 vionet_update_qs(dev); 972 break; 973 case VIRTIO_CONFIG_QUEUE_NOTIFY: 974 dev->cfg.queue_notify = *data; 975 if (vionet_notifyq(dev)) 976 *intr = 1; 977 break; 978 case VIRTIO_CONFIG_DEVICE_STATUS: 979 dev->cfg.device_status = *data; 980 if (dev->cfg.device_status == 0) { 981 log_debug("%s: device reset", __func__); 982 dev->cfg.guest_feature = 0; 983 dev->cfg.queue_address = 0; 984 vionet_update_qa(dev); 985 dev->cfg.queue_size = 0; 986 vionet_update_qs(dev); 987 dev->cfg.queue_select = 0; 988 dev->cfg.queue_notify = 0; 989 dev->cfg.isr_status = 0; 990 dev->vq[RXQ].last_avail = 0; 991 dev->vq[RXQ].notified_avail = 0; 992 dev->vq[TXQ].last_avail = 0; 993 dev->vq[TXQ].notified_avail = 0; 994 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 995 } 996 break; 997 default: 998 break; 999 } 1000 } else { 1001 switch (reg) { 1002 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1003 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 1004 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 1005 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 1006 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 1007 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 1008 *data = dev->mac[reg - 1009 VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI]; 1010 break; 1011 case VIRTIO_CONFIG_DEVICE_FEATURES: 1012 *data = dev->cfg.device_feature; 1013 break; 1014 case VIRTIO_CONFIG_GUEST_FEATURES: 1015 *data = dev->cfg.guest_feature; 1016 break; 1017 case VIRTIO_CONFIG_QUEUE_ADDRESS: 1018 *data = dev->cfg.queue_address; 1019 break; 1020 case VIRTIO_CONFIG_QUEUE_SIZE: 1021 *data = dev->cfg.queue_size; 1022 break; 1023 case VIRTIO_CONFIG_QUEUE_SELECT: 1024 *data = dev->cfg.queue_select; 1025 break; 1026 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1027 *data = dev->cfg.queue_notify; 1028 break; 1029 case VIRTIO_CONFIG_DEVICE_STATUS: 1030 *data = dev->cfg.device_status; 1031 break; 1032 case VIRTIO_CONFIG_ISR_STATUS: 1033 *data = dev->cfg.isr_status; 1034 dev->cfg.isr_status = 0; 1035 vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); 1036 break; 1037 } 1038 } 1039 1040 mutex_unlock(&dev->mutex); 1041 return (0); 1042 } 1043 1044 /* 1045 * Must be called with dev->mutex acquired. 1046 */ 1047 void 1048 vionet_update_qa(struct vionet_dev *dev) 1049 { 1050 /* Invalid queue? */ 1051 if (dev->cfg.queue_select > 1) 1052 return; 1053 1054 dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; 1055 } 1056 1057 /* 1058 * Must be called with dev->mutex acquired. 1059 */ 1060 void 1061 vionet_update_qs(struct vionet_dev *dev) 1062 { 1063 /* Invalid queue? */ 1064 if (dev->cfg.queue_select > 1) { 1065 dev->cfg.queue_size = 0; 1066 return; 1067 } 1068 1069 /* Update queue address/size based on queue select */ 1070 dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; 1071 dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; 1072 } 1073 1074 /* 1075 * Must be called with dev->mutex acquired. 1076 */ 1077 int 1078 vionet_enq_rx(struct vionet_dev *dev, char *pkt, ssize_t sz, int *spc) 1079 { 1080 uint64_t q_gpa; 1081 uint32_t vr_sz; 1082 uint16_t idx, pkt_desc_idx, hdr_desc_idx; 1083 ptrdiff_t off; 1084 int ret; 1085 char *vr; 1086 ssize_t rem; 1087 struct vring_desc *desc, *pkt_desc, *hdr_desc; 1088 struct vring_avail *avail; 1089 struct vring_used *used; 1090 struct vring_used_elem *ue; 1091 struct virtio_net_hdr hdr; 1092 1093 ret = 0; 1094 1095 if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK)) 1096 return ret; 1097 1098 vr_sz = vring_size(VIONET_QUEUE_SIZE); 1099 q_gpa = dev->vq[RXQ].qa; 1100 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1101 1102 vr = calloc(1, vr_sz); 1103 if (vr == NULL) { 1104 log_warn("rx enq: calloc error getting vionet ring"); 1105 return (0); 1106 } 1107 1108 if (read_mem(q_gpa, vr, vr_sz)) { 1109 log_warnx("rx enq: error reading gpa 0x%llx", q_gpa); 1110 goto out; 1111 } 1112 1113 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 1114 desc = (struct vring_desc *)(vr); 1115 avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset); 1116 used = (struct vring_used *)(vr + dev->vq[RXQ].vq_usedoffset); 1117 1118 idx = dev->vq[RXQ].last_avail & VIONET_QUEUE_MASK; 1119 1120 if ((dev->vq[RXQ].notified_avail & VIONET_QUEUE_MASK) == idx) { 1121 log_debug("vionet queue notify - no space, dropping packet"); 1122 goto out; 1123 } 1124 1125 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 1126 hdr_desc = &desc[hdr_desc_idx]; 1127 1128 pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; 1129 pkt_desc = &desc[pkt_desc_idx]; 1130 1131 /* Set up the virtio header (written first, before the packet data) */ 1132 memset(&hdr, 0, sizeof(struct virtio_net_hdr)); 1133 hdr.hdr_len = sizeof(struct virtio_net_hdr); 1134 1135 /* Check size of header descriptor */ 1136 if (hdr_desc->len < sizeof(struct virtio_net_hdr)) { 1137 log_warnx("%s: invalid header descriptor (too small)", 1138 __func__); 1139 goto out; 1140 } 1141 1142 /* Write out virtio header */ 1143 if (write_mem(hdr_desc->addr, &hdr, sizeof(struct virtio_net_hdr))) { 1144 log_warnx("vionet: rx enq header write_mem error @ " 1145 "0x%llx", hdr_desc->addr); 1146 goto out; 1147 } 1148 1149 /* 1150 * Compute remaining space in the first (header) descriptor, and 1151 * copy the packet data after if space is available. Otherwise, 1152 * copy to the pkt_desc descriptor. 1153 */ 1154 rem = hdr_desc->len - sizeof(struct virtio_net_hdr); 1155 1156 if (rem >= sz) { 1157 if (write_mem(hdr_desc->addr + sizeof(struct virtio_net_hdr), 1158 pkt, sz)) { 1159 log_warnx("vionet: rx enq packet write_mem error @ " 1160 "0x%llx", pkt_desc->addr); 1161 goto out; 1162 } 1163 } else { 1164 /* Fallback to pkt_desc descriptor */ 1165 if ((uint64_t)pkt_desc->len >= (uint64_t)sz) { 1166 /* Must be not readable */ 1167 if ((pkt_desc->flags & VRING_DESC_F_WRITE) == 0) { 1168 log_warnx("unexpected readable rx desc %d", 1169 pkt_desc_idx); 1170 goto out; 1171 } 1172 1173 /* Write packet to descriptor ring */ 1174 if (write_mem(pkt_desc->addr, pkt, sz)) { 1175 log_warnx("vionet: rx enq packet write_mem " 1176 "error @ 0x%llx", pkt_desc->addr); 1177 goto out; 1178 } 1179 } else { 1180 log_warnx("%s: descriptor too small for packet data", 1181 __func__); 1182 goto out; 1183 } 1184 } 1185 1186 ret = 1; 1187 dev->cfg.isr_status = 1; 1188 ue = &used->ring[used->idx & VIONET_QUEUE_MASK]; 1189 ue->id = hdr_desc_idx; 1190 ue->len = sz + sizeof(struct virtio_net_hdr); 1191 used->idx++; 1192 dev->vq[RXQ].last_avail++; 1193 *spc = dev->vq[RXQ].notified_avail - dev->vq[RXQ].last_avail; 1194 1195 off = (char *)ue - vr; 1196 if (write_mem(q_gpa + off, ue, sizeof *ue)) 1197 log_warnx("vionet: error writing vio ring"); 1198 else { 1199 off = (char *)&used->idx - vr; 1200 if (write_mem(q_gpa + off, &used->idx, sizeof used->idx)) 1201 log_warnx("vionet: error writing vio ring"); 1202 } 1203 out: 1204 free(vr); 1205 return (ret); 1206 } 1207 1208 /* 1209 * vionet_rx 1210 * 1211 * Enqueue data that was received on a tap file descriptor 1212 * to the vionet device queue. 1213 * 1214 * Must be called with dev->mutex acquired. 1215 */ 1216 static int 1217 vionet_rx(struct vionet_dev *dev) 1218 { 1219 char buf[PAGE_SIZE]; 1220 int hasdata, num_enq = 0, spc = 0; 1221 struct ether_header *eh; 1222 ssize_t sz; 1223 1224 do { 1225 sz = read(dev->fd, buf, sizeof buf); 1226 if (sz == -1) { 1227 /* 1228 * If we get EAGAIN, No data is currently available. 1229 * Do not treat this as an error. 1230 */ 1231 if (errno != EAGAIN) 1232 log_warn("unexpected read error on vionet " 1233 "device"); 1234 } else if (sz != 0) { 1235 eh = (struct ether_header *)buf; 1236 if (!dev->lockedmac || sz < ETHER_HDR_LEN || 1237 ETHER_IS_MULTICAST(eh->ether_dhost) || 1238 memcmp(eh->ether_dhost, dev->mac, 1239 sizeof(eh->ether_dhost)) == 0) 1240 num_enq += vionet_enq_rx(dev, buf, sz, &spc); 1241 } else if (sz == 0) { 1242 log_debug("process_rx: no data"); 1243 hasdata = 0; 1244 break; 1245 } 1246 1247 hasdata = fd_hasdata(dev->fd); 1248 } while (spc && hasdata); 1249 1250 dev->rx_pending = hasdata; 1251 return (num_enq); 1252 } 1253 1254 /* 1255 * vionet_rx_event 1256 * 1257 * Called from the event handling thread when new data can be 1258 * received on the tap fd of a vionet device. 1259 */ 1260 static void 1261 vionet_rx_event(int fd, short kind, void *arg) 1262 { 1263 struct vionet_dev *dev = arg; 1264 1265 mutex_lock(&dev->mutex); 1266 1267 /* 1268 * We already have other data pending to be received. The data that 1269 * has become available now will be enqueued to the vionet_dev 1270 * later. 1271 */ 1272 if (dev->rx_pending) { 1273 mutex_unlock(&dev->mutex); 1274 return; 1275 } 1276 1277 if (vionet_rx(dev) > 0) { 1278 /* XXX: vcpu_id */ 1279 vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq); 1280 } 1281 1282 mutex_unlock(&dev->mutex); 1283 } 1284 1285 /* 1286 * vionet_process_rx 1287 * 1288 * Processes any remaining pending receivable data for a vionet device. 1289 * Called on VCPU exit. Although we poll on the tap file descriptor of 1290 * a vionet_dev in a separate thread, this function still needs to be 1291 * called on VCPU exit: it can happen that not all data fits into the 1292 * receive queue of the vionet_dev immediately. So any outstanding data 1293 * is handled here. 1294 * 1295 * Parameters: 1296 * vm_id: VM ID of the VM for which to process vionet events 1297 */ 1298 void 1299 vionet_process_rx(uint32_t vm_id) 1300 { 1301 int i; 1302 1303 for (i = 0 ; i < nr_vionet; i++) { 1304 mutex_lock(&vionet[i].mutex); 1305 if (!vionet[i].rx_added) { 1306 mutex_unlock(&vionet[i].mutex); 1307 continue; 1308 } 1309 1310 if (vionet[i].rx_pending) { 1311 if (vionet_rx(&vionet[i])) { 1312 vcpu_assert_pic_irq(vm_id, 0, vionet[i].irq); 1313 } 1314 } 1315 mutex_unlock(&vionet[i].mutex); 1316 } 1317 } 1318 1319 /* 1320 * Must be called with dev->mutex acquired. 1321 */ 1322 void 1323 vionet_notify_rx(struct vionet_dev *dev) 1324 { 1325 uint64_t q_gpa; 1326 uint32_t vr_sz; 1327 char *vr; 1328 struct vring_avail *avail; 1329 1330 vr_sz = vring_size(VIONET_QUEUE_SIZE); 1331 q_gpa = dev->vq[RXQ].qa; 1332 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1333 1334 vr = malloc(vr_sz); 1335 if (vr == NULL) { 1336 log_warn("malloc error getting vionet ring"); 1337 return; 1338 } 1339 1340 if (read_mem(q_gpa, vr, vr_sz)) { 1341 log_warnx("error reading gpa 0x%llx", q_gpa); 1342 free(vr); 1343 return; 1344 } 1345 1346 /* Compute offset into avail ring */ 1347 avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset); 1348 1349 dev->rx_added = 1; 1350 dev->vq[RXQ].notified_avail = avail->idx - 1; 1351 1352 free(vr); 1353 } 1354 1355 /* 1356 * Must be called with dev->mutex acquired. 1357 */ 1358 int 1359 vionet_notifyq(struct vionet_dev *dev) 1360 { 1361 int ret; 1362 1363 switch (dev->cfg.queue_notify) { 1364 case RXQ: 1365 vionet_notify_rx(dev); 1366 ret = 0; 1367 break; 1368 case TXQ: 1369 ret = vionet_notify_tx(dev); 1370 break; 1371 default: 1372 /* 1373 * Catch the unimplemented queue ID 2 (control queue) as 1374 * well as any bogus queue IDs. 1375 */ 1376 log_debug("%s: notify for unimplemented queue ID %d", 1377 __func__, dev->cfg.queue_notify); 1378 ret = 0; 1379 break; 1380 } 1381 1382 return (ret); 1383 } 1384 1385 /* 1386 * Must be called with dev->mutex acquired. 1387 * 1388 * XXX cant trust ring data from VM, be extra cautious. 1389 */ 1390 int 1391 vionet_notify_tx(struct vionet_dev *dev) 1392 { 1393 uint64_t q_gpa; 1394 uint32_t vr_sz; 1395 uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx; 1396 size_t pktsz; 1397 ssize_t dhcpsz; 1398 int ret, num_enq, ofs, spc; 1399 char *vr, *pkt, *dhcppkt; 1400 struct vring_desc *desc, *pkt_desc, *hdr_desc; 1401 struct vring_avail *avail; 1402 struct vring_used *used; 1403 struct ether_header *eh; 1404 1405 vr = pkt = dhcppkt = NULL; 1406 ret = spc = 0; 1407 dhcpsz = 0; 1408 1409 vr_sz = vring_size(VIONET_QUEUE_SIZE); 1410 q_gpa = dev->vq[TXQ].qa; 1411 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1412 1413 vr = calloc(1, vr_sz); 1414 if (vr == NULL) { 1415 log_warn("calloc error getting vionet ring"); 1416 goto out; 1417 } 1418 1419 if (read_mem(q_gpa, vr, vr_sz)) { 1420 log_warnx("error reading gpa 0x%llx", q_gpa); 1421 goto out; 1422 } 1423 1424 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 1425 desc = (struct vring_desc *)(vr); 1426 avail = (struct vring_avail *)(vr + dev->vq[TXQ].vq_availoffset); 1427 used = (struct vring_used *)(vr + dev->vq[TXQ].vq_usedoffset); 1428 1429 num_enq = 0; 1430 1431 idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK; 1432 1433 if ((avail->idx & VIONET_QUEUE_MASK) == idx) { 1434 log_warnx("vionet tx queue notify - nothing to do?"); 1435 goto out; 1436 } 1437 1438 while ((avail->idx & VIONET_QUEUE_MASK) != idx) { 1439 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 1440 hdr_desc = &desc[hdr_desc_idx]; 1441 pktsz = 0; 1442 1443 dxx = hdr_desc_idx; 1444 do { 1445 pktsz += desc[dxx].len; 1446 dxx = desc[dxx].next; 1447 } while (desc[dxx].flags & VRING_DESC_F_NEXT); 1448 1449 pktsz += desc[dxx].len; 1450 1451 /* Remove virtio header descriptor len */ 1452 pktsz -= hdr_desc->len; 1453 1454 /* 1455 * XXX check sanity pktsz 1456 * XXX too long and > PAGE_SIZE checks 1457 * (PAGE_SIZE can be relaxed to 16384 later) 1458 */ 1459 pkt = malloc(pktsz); 1460 if (pkt == NULL) { 1461 log_warn("malloc error alloc packet buf"); 1462 goto out; 1463 } 1464 1465 ofs = 0; 1466 pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; 1467 pkt_desc = &desc[pkt_desc_idx]; 1468 1469 while (pkt_desc->flags & VRING_DESC_F_NEXT) { 1470 /* must be not writable */ 1471 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1472 log_warnx("unexpected writable tx desc " 1473 "%d", pkt_desc_idx); 1474 goto out; 1475 } 1476 1477 /* Read packet from descriptor ring */ 1478 if (read_mem(pkt_desc->addr, pkt + ofs, 1479 pkt_desc->len)) { 1480 log_warnx("vionet: packet read_mem error " 1481 "@ 0x%llx", pkt_desc->addr); 1482 goto out; 1483 } 1484 1485 ofs += pkt_desc->len; 1486 pkt_desc_idx = pkt_desc->next & VIONET_QUEUE_MASK; 1487 pkt_desc = &desc[pkt_desc_idx]; 1488 } 1489 1490 /* Now handle tail descriptor - must be not writable */ 1491 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1492 log_warnx("unexpected writable tx descriptor %d", 1493 pkt_desc_idx); 1494 goto out; 1495 } 1496 1497 /* Read packet from descriptor ring */ 1498 if (read_mem(pkt_desc->addr, pkt + ofs, 1499 pkt_desc->len)) { 1500 log_warnx("vionet: packet read_mem error @ " 1501 "0x%llx", pkt_desc->addr); 1502 goto out; 1503 } 1504 1505 /* reject other source addresses */ 1506 if (dev->lockedmac && pktsz >= ETHER_HDR_LEN && 1507 (eh = (struct ether_header *)pkt) && 1508 memcmp(eh->ether_shost, dev->mac, 1509 sizeof(eh->ether_shost)) != 0) 1510 log_debug("vionet: wrong source address %s for vm %d", 1511 ether_ntoa((struct ether_addr *) 1512 eh->ether_shost), dev->vm_id); 1513 else if (dev->local && dhcpsz == 0 && 1514 (dhcpsz = dhcp_request(dev, pkt, pktsz, &dhcppkt)) != -1) { 1515 log_debug("vionet: dhcp request," 1516 " local response size %zd", dhcpsz); 1517 1518 /* XXX signed vs unsigned here, funky cast */ 1519 } else if (write(dev->fd, pkt, pktsz) != (int)pktsz) { 1520 log_warnx("vionet: tx failed writing to tap: " 1521 "%d", errno); 1522 goto out; 1523 } 1524 1525 ret = 1; 1526 dev->cfg.isr_status = 1; 1527 used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx; 1528 used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len; 1529 used->idx++; 1530 1531 dev->vq[TXQ].last_avail++; 1532 num_enq++; 1533 1534 idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK; 1535 1536 free(pkt); 1537 pkt = NULL; 1538 } 1539 1540 if (write_mem(q_gpa, vr, vr_sz)) { 1541 log_warnx("vionet: tx error writing vio ring"); 1542 } 1543 1544 if (dhcpsz > 0) { 1545 if (vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc)) 1546 ret = 1; 1547 } 1548 1549 out: 1550 free(vr); 1551 free(pkt); 1552 free(dhcppkt); 1553 1554 return (ret); 1555 } 1556 1557 int 1558 vmmci_ctl(unsigned int cmd) 1559 { 1560 struct timeval tv = { 0, 0 }; 1561 1562 if ((vmmci.cfg.device_status & 1563 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) 1564 return (-1); 1565 1566 if (cmd == vmmci.cmd) 1567 return (0); 1568 1569 switch (cmd) { 1570 case VMMCI_NONE: 1571 break; 1572 case VMMCI_SHUTDOWN: 1573 case VMMCI_REBOOT: 1574 /* Update command */ 1575 vmmci.cmd = cmd; 1576 1577 /* 1578 * vmm VMs do not support powerdown, send a reboot request 1579 * instead and turn it off after the triple fault. 1580 */ 1581 if (cmd == VMMCI_SHUTDOWN) 1582 cmd = VMMCI_REBOOT; 1583 1584 /* Trigger interrupt */ 1585 vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1586 vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1587 1588 /* Add ACK timeout */ 1589 tv.tv_sec = VMMCI_TIMEOUT; 1590 evtimer_add(&vmmci.timeout, &tv); 1591 break; 1592 case VMMCI_SYNCRTC: 1593 if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) { 1594 /* RTC updated, request guest VM resync of its RTC */ 1595 vmmci.cmd = cmd; 1596 1597 vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1598 vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1599 } else { 1600 log_debug("%s: RTC sync skipped (guest does not " 1601 "support RTC sync)\n", __func__); 1602 } 1603 break; 1604 default: 1605 fatalx("invalid vmmci command: %d", cmd); 1606 } 1607 1608 return (0); 1609 } 1610 1611 void 1612 vmmci_ack(unsigned int cmd) 1613 { 1614 struct timeval tv = { 0, 0 }; 1615 1616 switch (cmd) { 1617 case VMMCI_NONE: 1618 break; 1619 case VMMCI_SHUTDOWN: 1620 /* 1621 * The shutdown was requested by the VM if we don't have 1622 * a pending shutdown request. In this case add a short 1623 * timeout to give the VM a chance to reboot before the 1624 * timer is expired. 1625 */ 1626 if (vmmci.cmd == 0) { 1627 log_debug("%s: vm %u requested shutdown", __func__, 1628 vmmci.vm_id); 1629 tv.tv_sec = VMMCI_TIMEOUT; 1630 evtimer_add(&vmmci.timeout, &tv); 1631 return; 1632 } 1633 /* FALLTHROUGH */ 1634 case VMMCI_REBOOT: 1635 /* 1636 * If the VM acknowleged our shutdown request, give it 1637 * enough time to shutdown or reboot gracefully. This 1638 * might take a considerable amount of time (running 1639 * rc.shutdown on the VM), so increase the timeout before 1640 * killing it forcefully. 1641 */ 1642 if (cmd == vmmci.cmd && 1643 evtimer_pending(&vmmci.timeout, NULL)) { 1644 log_debug("%s: vm %u acknowledged shutdown request", 1645 __func__, vmmci.vm_id); 1646 tv.tv_sec = VMMCI_SHUTDOWN_TIMEOUT; 1647 evtimer_add(&vmmci.timeout, &tv); 1648 } 1649 break; 1650 case VMMCI_SYNCRTC: 1651 log_debug("%s: vm %u acknowledged RTC sync request", 1652 __func__, vmmci.vm_id); 1653 vmmci.cmd = VMMCI_NONE; 1654 break; 1655 default: 1656 log_warnx("%s: illegal request %u", __func__, cmd); 1657 break; 1658 } 1659 } 1660 1661 void 1662 vmmci_timeout(int fd, short type, void *arg) 1663 { 1664 log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id); 1665 vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); 1666 } 1667 1668 int 1669 vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 1670 void *unused, uint8_t sz) 1671 { 1672 *intr = 0xFF; 1673 1674 if (dir == 0) { 1675 switch (reg) { 1676 case VIRTIO_CONFIG_DEVICE_FEATURES: 1677 case VIRTIO_CONFIG_QUEUE_SIZE: 1678 case VIRTIO_CONFIG_ISR_STATUS: 1679 log_warnx("%s: illegal write %x to %s", 1680 __progname, *data, virtio_reg_name(reg)); 1681 break; 1682 case VIRTIO_CONFIG_GUEST_FEATURES: 1683 vmmci.cfg.guest_feature = *data; 1684 break; 1685 case VIRTIO_CONFIG_QUEUE_ADDRESS: 1686 vmmci.cfg.queue_address = *data; 1687 break; 1688 case VIRTIO_CONFIG_QUEUE_SELECT: 1689 vmmci.cfg.queue_select = *data; 1690 break; 1691 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1692 vmmci.cfg.queue_notify = *data; 1693 break; 1694 case VIRTIO_CONFIG_DEVICE_STATUS: 1695 vmmci.cfg.device_status = *data; 1696 break; 1697 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1698 vmmci_ack(*data); 1699 break; 1700 } 1701 } else { 1702 switch (reg) { 1703 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 1704 *data = vmmci.cmd; 1705 break; 1706 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 1707 /* Update time once when reading the first register */ 1708 gettimeofday(&vmmci.time, NULL); 1709 *data = (uint64_t)vmmci.time.tv_sec; 1710 break; 1711 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 1712 *data = (uint64_t)vmmci.time.tv_sec << 32; 1713 break; 1714 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: 1715 *data = (uint64_t)vmmci.time.tv_usec; 1716 break; 1717 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: 1718 *data = (uint64_t)vmmci.time.tv_usec << 32; 1719 break; 1720 case VIRTIO_CONFIG_DEVICE_FEATURES: 1721 *data = vmmci.cfg.device_feature; 1722 break; 1723 case VIRTIO_CONFIG_GUEST_FEATURES: 1724 *data = vmmci.cfg.guest_feature; 1725 break; 1726 case VIRTIO_CONFIG_QUEUE_ADDRESS: 1727 *data = vmmci.cfg.queue_address; 1728 break; 1729 case VIRTIO_CONFIG_QUEUE_SIZE: 1730 *data = vmmci.cfg.queue_size; 1731 break; 1732 case VIRTIO_CONFIG_QUEUE_SELECT: 1733 *data = vmmci.cfg.queue_select; 1734 break; 1735 case VIRTIO_CONFIG_QUEUE_NOTIFY: 1736 *data = vmmci.cfg.queue_notify; 1737 break; 1738 case VIRTIO_CONFIG_DEVICE_STATUS: 1739 *data = vmmci.cfg.device_status; 1740 break; 1741 case VIRTIO_CONFIG_ISR_STATUS: 1742 *data = vmmci.cfg.isr_status; 1743 vmmci.cfg.isr_status = 0; 1744 vcpu_deassert_pic_irq(vmmci.vm_id, 0, vmmci.irq); 1745 break; 1746 } 1747 } 1748 return (0); 1749 } 1750 1751 int 1752 virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath) 1753 { 1754 switch (type) { 1755 case VMDF_RAW: 1756 return 0; 1757 case VMDF_QCOW2: 1758 return virtio_qcow2_get_base(fd, path, npath, dpath); 1759 } 1760 log_warnx("%s: invalid disk format", __func__); 1761 return -1; 1762 } 1763 1764 /* 1765 * Initializes a struct virtio_backing using the list of fds. 1766 */ 1767 static int 1768 virtio_init_disk(struct virtio_backing *file, off_t *sz, 1769 int *fd, size_t nfd, int type) 1770 { 1771 /* 1772 * probe disk types in order of preference, first one to work wins. 1773 * TODO: provide a way of specifying the type and options. 1774 */ 1775 switch (type) { 1776 case VMDF_RAW: 1777 return virtio_raw_init(file, sz, fd, nfd); 1778 case VMDF_QCOW2: 1779 return virtio_qcow2_init(file, sz, fd, nfd); 1780 } 1781 log_warnx("%s: invalid disk format", __func__); 1782 return -1; 1783 } 1784 1785 void 1786 virtio_init(struct vmd_vm *vm, int child_cdrom, 1787 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 1788 { 1789 struct vmop_create_params *vmc = &vm->vm_params; 1790 struct vm_create_params *vcp = &vmc->vmc_params; 1791 uint8_t id; 1792 uint8_t i; 1793 int ret; 1794 1795 /* Virtio entropy device */ 1796 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1797 PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM, 1798 PCI_SUBCLASS_SYSTEM_MISC, 1799 PCI_VENDOR_OPENBSD, 1800 PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) { 1801 log_warnx("%s: can't add PCI virtio rng device", 1802 __progname); 1803 return; 1804 } 1805 1806 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) { 1807 log_warnx("%s: can't add bar for virtio rng device", 1808 __progname); 1809 return; 1810 } 1811 1812 memset(&viornd, 0, sizeof(viornd)); 1813 viornd.vq[0].qs = VIORND_QUEUE_SIZE; 1814 viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * 1815 VIORND_QUEUE_SIZE; 1816 viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1817 sizeof(struct vring_desc) * VIORND_QUEUE_SIZE 1818 + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE)); 1819 viornd.pci_id = id; 1820 viornd.irq = pci_get_dev_irq(id); 1821 viornd.vm_id = vcp->vcp_id; 1822 1823 if (vcp->vcp_nnics > 0) { 1824 vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); 1825 if (vionet == NULL) { 1826 log_warn("%s: calloc failure allocating vionets", 1827 __progname); 1828 return; 1829 } 1830 1831 nr_vionet = vcp->vcp_nnics; 1832 /* Virtio network */ 1833 for (i = 0; i < vcp->vcp_nnics; i++) { 1834 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1835 PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM, 1836 PCI_SUBCLASS_SYSTEM_MISC, 1837 PCI_VENDOR_OPENBSD, 1838 PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) { 1839 log_warnx("%s: can't add PCI virtio net device", 1840 __progname); 1841 return; 1842 } 1843 1844 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_net_io, 1845 &vionet[i])) { 1846 log_warnx("%s: can't add bar for virtio net " 1847 "device", __progname); 1848 return; 1849 } 1850 1851 ret = pthread_mutex_init(&vionet[i].mutex, NULL); 1852 if (ret) { 1853 errno = ret; 1854 log_warn("%s: could not initialize mutex " 1855 "for vionet device", __progname); 1856 return; 1857 } 1858 1859 vionet[i].vq[RXQ].qs = VIONET_QUEUE_SIZE; 1860 vionet[i].vq[RXQ].vq_availoffset = 1861 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1862 vionet[i].vq[RXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 1863 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1864 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1865 vionet[i].vq[RXQ].last_avail = 0; 1866 vionet[i].vq[TXQ].qs = VIONET_QUEUE_SIZE; 1867 vionet[i].vq[TXQ].vq_availoffset = 1868 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1869 vionet[i].vq[TXQ].vq_usedoffset = VIRTQUEUE_ALIGN( 1870 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1871 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1872 vionet[i].vq[TXQ].last_avail = 0; 1873 vionet[i].vq[TXQ].notified_avail = 0; 1874 vionet[i].fd = child_taps[i]; 1875 vionet[i].rx_pending = 0; 1876 vionet[i].vm_id = vcp->vcp_id; 1877 vionet[i].vm_vmid = vm->vm_vmid; 1878 vionet[i].irq = pci_get_dev_irq(id); 1879 1880 event_set(&vionet[i].event, vionet[i].fd, 1881 EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); 1882 if (event_add(&vionet[i].event, NULL)) { 1883 log_warn("could not initialize vionet event " 1884 "handler"); 1885 return; 1886 } 1887 1888 /* MAC address has been assigned by the parent */ 1889 memcpy(&vionet[i].mac, &vcp->vcp_macs[i], 6); 1890 vionet[i].cfg.device_feature = VIRTIO_NET_F_MAC; 1891 1892 vionet[i].lockedmac = 1893 vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0; 1894 vionet[i].local = 1895 vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0; 1896 if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET) 1897 vionet[i].pxeboot = 1; 1898 vionet[i].idx = i; 1899 vionet[i].pci_id = id; 1900 1901 log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s", 1902 __func__, vcp->vcp_name, i, 1903 ether_ntoa((void *)vionet[i].mac), 1904 vionet[i].lockedmac ? ", locked" : "", 1905 vionet[i].local ? ", local" : "", 1906 vionet[i].pxeboot ? ", pxeboot" : ""); 1907 } 1908 } 1909 1910 if (vcp->vcp_ndisks > 0) { 1911 nr_vioblk = vcp->vcp_ndisks; 1912 vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); 1913 if (vioblk == NULL) { 1914 log_warn("%s: calloc failure allocating vioblks", 1915 __progname); 1916 return; 1917 } 1918 1919 /* One virtio block device for each disk defined in vcp */ 1920 for (i = 0; i < vcp->vcp_ndisks; i++) { 1921 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1922 PCI_PRODUCT_QUMRANET_VIO_BLOCK, 1923 PCI_CLASS_MASS_STORAGE, 1924 PCI_SUBCLASS_MASS_STORAGE_SCSI, 1925 PCI_VENDOR_OPENBSD, 1926 PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) { 1927 log_warnx("%s: can't add PCI virtio block " 1928 "device", __progname); 1929 return; 1930 } 1931 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_blk_io, 1932 &vioblk[i])) { 1933 log_warnx("%s: can't add bar for virtio block " 1934 "device", __progname); 1935 return; 1936 } 1937 vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE; 1938 vioblk[i].vq[0].vq_availoffset = 1939 sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE; 1940 vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1941 sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE 1942 + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); 1943 vioblk[i].vq[0].last_avail = 0; 1944 vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX; 1945 vioblk[i].max_xfer = 1048576; 1946 vioblk[i].pci_id = id; 1947 vioblk[i].vm_id = vcp->vcp_id; 1948 vioblk[i].irq = pci_get_dev_irq(id); 1949 if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, 1950 child_disks[i], vmc->vmc_diskbases[i], 1951 vmc->vmc_disktypes[i]) == -1) { 1952 log_warnx("%s: unable to determine disk format", 1953 __func__); 1954 return; 1955 } 1956 vioblk[i].sz /= 512; 1957 } 1958 } 1959 1960 /* vioscsi cdrom */ 1961 if (strlen(vcp->vcp_cdrom)) { 1962 vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 1963 if (vioscsi == NULL) { 1964 log_warn("%s: calloc failure allocating vioscsi", 1965 __progname); 1966 return; 1967 } 1968 1969 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1970 PCI_PRODUCT_QUMRANET_VIO_SCSI, 1971 PCI_CLASS_MASS_STORAGE, 1972 PCI_SUBCLASS_MASS_STORAGE_SCSI, 1973 PCI_VENDOR_OPENBSD, 1974 PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) { 1975 log_warnx("%s: can't add PCI vioscsi device", 1976 __progname); 1977 return; 1978 } 1979 1980 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) { 1981 log_warnx("%s: can't add bar for vioscsi device", 1982 __progname); 1983 return; 1984 } 1985 1986 for ( i = 0; i < VIRTIO_MAX_QUEUES; i++) { 1987 vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE; 1988 vioscsi->vq[i].vq_availoffset = 1989 sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE; 1990 vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN( 1991 sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE 1992 + sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE)); 1993 vioscsi->vq[i].last_avail = 0; 1994 } 1995 if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, 1996 &child_cdrom, 1, VMDF_RAW) == -1) { 1997 log_warnx("%s: unable to determine iso format", 1998 __func__); 1999 return; 2000 } 2001 vioscsi->locked = 0; 2002 vioscsi->lba = 0; 2003 vioscsi->n_blocks = vioscsi->sz >> 11; /* num of 2048 blocks in file */ 2004 vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM; 2005 vioscsi->pci_id = id; 2006 vioscsi->vm_id = vcp->vcp_id; 2007 vioscsi->irq = pci_get_dev_irq(id); 2008 } 2009 2010 /* virtio control device */ 2011 if (pci_add_device(&id, PCI_VENDOR_OPENBSD, 2012 PCI_PRODUCT_OPENBSD_CONTROL, 2013 PCI_CLASS_COMMUNICATIONS, 2014 PCI_SUBCLASS_COMMUNICATIONS_MISC, 2015 PCI_VENDOR_OPENBSD, 2016 PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) { 2017 log_warnx("%s: can't add PCI vmm control device", 2018 __progname); 2019 return; 2020 } 2021 2022 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) { 2023 log_warnx("%s: can't add bar for vmm control device", 2024 __progname); 2025 return; 2026 } 2027 2028 memset(&vmmci, 0, sizeof(vmmci)); 2029 vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK | 2030 VMMCI_F_SYNCRTC; 2031 vmmci.vm_id = vcp->vcp_id; 2032 vmmci.irq = pci_get_dev_irq(id); 2033 vmmci.pci_id = id; 2034 2035 evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 2036 } 2037 2038 void 2039 virtio_shutdown(struct vmd_vm *vm) 2040 { 2041 int i; 2042 2043 /* ensure that our disks are synced */ 2044 if (vioscsi != NULL) 2045 vioscsi->file.close(vioscsi->file.p, 0); 2046 2047 for (i = 0; i < nr_vioblk; i++) 2048 vioblk[i].file.close(vioblk[i].file.p, 0); 2049 } 2050 2051 int 2052 vmmci_restore(int fd, uint32_t vm_id) 2053 { 2054 log_debug("%s: receiving vmmci", __func__); 2055 if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 2056 log_warnx("%s: error reading vmmci from fd", __func__); 2057 return (-1); 2058 } 2059 2060 if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL)) { 2061 log_warnx("%s: can't set bar fn for vmm control device", 2062 __progname); 2063 return (-1); 2064 } 2065 vmmci.vm_id = vm_id; 2066 vmmci.irq = pci_get_dev_irq(vmmci.pci_id); 2067 memset(&vmmci.timeout, 0, sizeof(struct event)); 2068 evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); 2069 return (0); 2070 } 2071 2072 int 2073 viornd_restore(int fd, struct vm_create_params *vcp) 2074 { 2075 log_debug("%s: receiving viornd", __func__); 2076 if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 2077 log_warnx("%s: error reading viornd from fd", __func__); 2078 return (-1); 2079 } 2080 if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL)) { 2081 log_warnx("%s: can't set bar fn for virtio rng device", 2082 __progname); 2083 return (-1); 2084 } 2085 viornd.vm_id = vcp->vcp_id; 2086 viornd.irq = pci_get_dev_irq(viornd.pci_id); 2087 2088 return (0); 2089 } 2090 2091 int 2092 vionet_restore(int fd, struct vmd_vm *vm, int *child_taps) 2093 { 2094 struct vmop_create_params *vmc = &vm->vm_params; 2095 struct vm_create_params *vcp = &vmc->vmc_params; 2096 uint8_t i; 2097 int ret; 2098 2099 nr_vionet = vcp->vcp_nnics; 2100 if (vcp->vcp_nnics > 0) { 2101 vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); 2102 if (vionet == NULL) { 2103 log_warn("%s: calloc failure allocating vionets", 2104 __progname); 2105 return (-1); 2106 } 2107 log_debug("%s: receiving vionet", __func__); 2108 if (atomicio(read, fd, vionet, 2109 vcp->vcp_nnics * sizeof(struct vionet_dev)) != 2110 vcp->vcp_nnics * sizeof(struct vionet_dev)) { 2111 log_warnx("%s: error reading vionet from fd", 2112 __func__); 2113 return (-1); 2114 } 2115 2116 /* Virtio network */ 2117 for (i = 0; i < vcp->vcp_nnics; i++) { 2118 if (pci_set_bar_fn(vionet[i].pci_id, 0, virtio_net_io, 2119 &vionet[i])) { 2120 log_warnx("%s: can't set bar fn for virtio net " 2121 "device", __progname); 2122 return (-1); 2123 } 2124 2125 memset(&vionet[i].mutex, 0, sizeof(pthread_mutex_t)); 2126 ret = pthread_mutex_init(&vionet[i].mutex, NULL); 2127 2128 if (ret) { 2129 errno = ret; 2130 log_warn("%s: could not initialize mutex " 2131 "for vionet device", __progname); 2132 return (-1); 2133 } 2134 vionet[i].fd = child_taps[i]; 2135 vionet[i].rx_pending = 0; 2136 vionet[i].vm_id = vcp->vcp_id; 2137 vionet[i].vm_vmid = vm->vm_vmid; 2138 vionet[i].irq = pci_get_dev_irq(vionet[i].pci_id); 2139 2140 memset(&vionet[i].event, 0, sizeof(struct event)); 2141 event_set(&vionet[i].event, vionet[i].fd, 2142 EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); 2143 } 2144 } 2145 return (0); 2146 } 2147 2148 int 2149 vioblk_restore(int fd, struct vmop_create_params *vmc, 2150 int child_disks[][VM_MAX_BASE_PER_DISK]) 2151 { 2152 struct vm_create_params *vcp = &vmc->vmc_params; 2153 uint8_t i; 2154 2155 nr_vioblk = vcp->vcp_ndisks; 2156 vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); 2157 if (vioblk == NULL) { 2158 log_warn("%s: calloc failure allocating vioblks", __progname); 2159 return (-1); 2160 } 2161 log_debug("%s: receiving vioblk", __func__); 2162 if (atomicio(read, fd, vioblk, 2163 nr_vioblk * sizeof(struct vioblk_dev)) != 2164 nr_vioblk * sizeof(struct vioblk_dev)) { 2165 log_warnx("%s: error reading vioblk from fd", __func__); 2166 return (-1); 2167 } 2168 for (i = 0; i < vcp->vcp_ndisks; i++) { 2169 if (pci_set_bar_fn(vioblk[i].pci_id, 0, virtio_blk_io, 2170 &vioblk[i])) { 2171 log_warnx("%s: can't set bar fn for virtio block " 2172 "device", __progname); 2173 return (-1); 2174 } 2175 if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, 2176 child_disks[i], vmc->vmc_diskbases[i], 2177 vmc->vmc_disktypes[i]) == -1) { 2178 log_warnx("%s: unable to determine disk format", 2179 __func__); 2180 return (-1); 2181 } 2182 vioblk[i].vm_id = vcp->vcp_id; 2183 vioblk[i].irq = pci_get_dev_irq(vioblk[i].pci_id); 2184 } 2185 return (0); 2186 } 2187 2188 int 2189 vioscsi_restore(int fd, struct vm_create_params *vcp, int child_cdrom) 2190 { 2191 if (!strlen(vcp->vcp_cdrom)) 2192 return (0); 2193 2194 vioscsi = calloc(1, sizeof(struct vioscsi_dev)); 2195 if (vioscsi == NULL) { 2196 log_warn("%s: calloc failure allocating vioscsi", __progname); 2197 return (-1); 2198 } 2199 2200 log_debug("%s: receiving vioscsi", __func__); 2201 2202 if (atomicio(read, fd, vioscsi, sizeof(struct vioscsi_dev)) != 2203 sizeof(struct vioscsi_dev)) { 2204 log_warnx("%s: error reading vioscsi from fd", __func__); 2205 return (-1); 2206 } 2207 2208 if (pci_set_bar_fn(vioscsi->pci_id, 0, vioscsi_io, vioscsi)) { 2209 log_warnx("%s: can't set bar fn for vmm control device", 2210 __progname); 2211 return (-1); 2212 } 2213 2214 if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, &child_cdrom, 1, 2215 VMDF_RAW) == -1) { 2216 log_warnx("%s: unable to determine iso format", __func__); 2217 return (-1); 2218 } 2219 vioscsi->vm_id = vcp->vcp_id; 2220 vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id); 2221 2222 return (0); 2223 } 2224 2225 int 2226 virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, 2227 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 2228 { 2229 struct vmop_create_params *vmc = &vm->vm_params; 2230 struct vm_create_params *vcp = &vmc->vmc_params; 2231 int ret; 2232 2233 if ((ret = viornd_restore(fd, vcp)) == -1) 2234 return ret; 2235 2236 if ((ret = vioblk_restore(fd, vmc, child_disks)) == -1) 2237 return ret; 2238 2239 if ((ret = vioscsi_restore(fd, vcp, child_cdrom)) == -1) 2240 return ret; 2241 2242 if ((ret = vionet_restore(fd, vm, child_taps)) == -1) 2243 return ret; 2244 2245 if ((ret = vmmci_restore(fd, vcp->vcp_id)) == -1) 2246 return ret; 2247 2248 return (0); 2249 } 2250 2251 int 2252 viornd_dump(int fd) 2253 { 2254 log_debug("%s: sending viornd", __func__); 2255 if (atomicio(vwrite, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { 2256 log_warnx("%s: error writing viornd to fd", __func__); 2257 return (-1); 2258 } 2259 return (0); 2260 } 2261 2262 int 2263 vmmci_dump(int fd) 2264 { 2265 log_debug("%s: sending vmmci", __func__); 2266 if (atomicio(vwrite, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { 2267 log_warnx("%s: error writing vmmci to fd", __func__); 2268 return (-1); 2269 } 2270 return (0); 2271 } 2272 2273 int 2274 vionet_dump(int fd) 2275 { 2276 log_debug("%s: sending vionet", __func__); 2277 if (atomicio(vwrite, fd, vionet, 2278 nr_vionet * sizeof(struct vionet_dev)) != 2279 nr_vionet * sizeof(struct vionet_dev)) { 2280 log_warnx("%s: error writing vionet to fd", __func__); 2281 return (-1); 2282 } 2283 return (0); 2284 } 2285 2286 int 2287 vioblk_dump(int fd) 2288 { 2289 log_debug("%s: sending vioblk", __func__); 2290 if (atomicio(vwrite, fd, vioblk, 2291 nr_vioblk * sizeof(struct vioblk_dev)) != 2292 nr_vioblk * sizeof(struct vioblk_dev)) { 2293 log_warnx("%s: error writing vioblk to fd", __func__); 2294 return (-1); 2295 } 2296 return (0); 2297 } 2298 2299 int 2300 vioscsi_dump(int fd) 2301 { 2302 if (vioscsi == NULL) 2303 return (0); 2304 2305 log_debug("%s: sending vioscsi", __func__); 2306 if (atomicio(vwrite, fd, vioscsi, sizeof(struct vioscsi_dev)) != 2307 sizeof(struct vioscsi_dev)) { 2308 log_warnx("%s: error writing vioscsi to fd", __func__); 2309 return (-1); 2310 } 2311 return (0); 2312 } 2313 2314 int 2315 virtio_dump(int fd) 2316 { 2317 int ret; 2318 2319 if ((ret = viornd_dump(fd)) == -1) 2320 return ret; 2321 2322 if ((ret = vioblk_dump(fd)) == -1) 2323 return ret; 2324 2325 if ((ret = vioscsi_dump(fd)) == -1) 2326 return ret; 2327 2328 if ((ret = vionet_dump(fd)) == -1) 2329 return ret; 2330 2331 if ((ret = vmmci_dump(fd)) == -1) 2332 return ret; 2333 2334 return (0); 2335 } 2336 2337 void 2338 virtio_stop(struct vm_create_params *vcp) 2339 { 2340 uint8_t i; 2341 for (i = 0; i < vcp->vcp_nnics; i++) { 2342 if (event_del(&vionet[i].event)) { 2343 log_warn("could not initialize vionet event " 2344 "handler"); 2345 return; 2346 } 2347 } 2348 } 2349 2350 void 2351 virtio_start(struct vm_create_params *vcp) 2352 { 2353 uint8_t i; 2354 for (i = 0; i < vcp->vcp_nnics; i++) { 2355 if (event_add(&vionet[i].event, NULL)) { 2356 log_warn("could not initialize vionet event " 2357 "handler"); 2358 return; 2359 } 2360 } 2361 } 2362