1 /* $OpenBSD: virtio.c,v 1.17 2016/08/17 05:07:13 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* PAGE_SIZE */ 20 21 #include <machine/vmmvar.h> 22 #include <dev/pci/pcireg.h> 23 #include <dev/pci/pcidevs.h> 24 #include <dev/pci/virtioreg.h> 25 #include <dev/pci/vioblkreg.h> 26 27 #include <errno.h> 28 #include <poll.h> 29 #include <stddef.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "pci.h" 35 #include "vmd.h" 36 #include "virtio.h" 37 #include "loadfile.h" 38 39 extern char *__progname; 40 41 struct viornd_dev viornd; 42 struct vioblk_dev *vioblk; 43 struct vionet_dev *vionet; 44 45 int nr_vionet; 46 47 #define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ 48 49 #define VIRTIO_NET_F_MAC (1<<5) 50 51 52 const char * 53 vioblk_cmd_name(uint32_t type) 54 { 55 switch (type) { 56 case VIRTIO_BLK_T_IN: return "read"; 57 case VIRTIO_BLK_T_OUT: return "write"; 58 case VIRTIO_BLK_T_SCSI_CMD: return "scsi read"; 59 case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write"; 60 case VIRTIO_BLK_T_FLUSH: return "flush"; 61 case VIRTIO_BLK_T_FLUSH_OUT: return "flush out"; 62 case VIRTIO_BLK_T_GET_ID: return "get id"; 63 default: return "unknown"; 64 } 65 } 66 67 static void 68 dump_descriptor_chain(struct vring_desc *desc, int16_t dxx) 69 { 70 log_debug("descriptor chain @ %d", dxx); 71 do { 72 log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x " 73 "/ 0x%x / 0x%x", 74 dxx, 75 desc[dxx].addr, 76 desc[dxx].len, 77 desc[dxx].flags, 78 desc[dxx].next); 79 dxx = desc[dxx].next; 80 } while (desc[dxx].flags & VRING_DESC_F_NEXT); 81 82 log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x / 0x%x " 83 "/ 0x%x", 84 dxx, 85 desc[dxx].addr, 86 desc[dxx].len, 87 desc[dxx].flags, 88 desc[dxx].next); 89 } 90 91 static const char * 92 virtio_reg_name(uint8_t reg) 93 { 94 switch (reg) { 95 case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; 96 case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; 97 case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address"; 98 case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; 99 case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; 100 case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; 101 case VIRTIO_CONFIG_DEVICE_STATUS: return "device status"; 102 case VIRTIO_CONFIG_ISR_STATUS: return "isr status"; 103 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: return "device config 0"; 104 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: return "device config 1"; 105 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2"; 106 default: return "unknown"; 107 } 108 } 109 110 uint32_t 111 vring_size(uint32_t vq_size) 112 { 113 uint32_t allocsize1, allocsize2; 114 115 /* allocsize1: descriptor table + avail ring + pad */ 116 allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size 117 + sizeof(uint16_t) * (2 + vq_size)); 118 /* allocsize2: used ring + pad */ 119 allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2 120 + sizeof(struct vring_used_elem) * vq_size); 121 122 return allocsize1 + allocsize2; 123 } 124 125 /* Update queue select */ 126 void 127 viornd_update_qs(void) 128 { 129 /* Invalid queue? */ 130 if (viornd.cfg.queue_select > 0) 131 return; 132 133 /* Update queue address/size based on queue select */ 134 viornd.cfg.queue_address = viornd.vq[viornd.cfg.queue_select].qa; 135 viornd.cfg.queue_size = viornd.vq[viornd.cfg.queue_select].qs; 136 } 137 138 /* Update queue address */ 139 void 140 viornd_update_qa(void) 141 { 142 /* Invalid queue? */ 143 if (viornd.cfg.queue_select > 0) 144 return; 145 146 viornd.vq[viornd.cfg.queue_select].qa = viornd.cfg.queue_address; 147 } 148 149 int 150 viornd_notifyq(void) 151 { 152 uint64_t q_gpa; 153 uint32_t vr_sz; 154 size_t sz; 155 int ret; 156 char *buf, *rnd_data; 157 struct vring_desc *desc; 158 struct vring_avail *avail; 159 struct vring_used *used; 160 161 ret = 0; 162 163 /* Invalid queue? */ 164 if (viornd.cfg.queue_notify > 0) 165 return (0); 166 167 vr_sz = vring_size(VIORND_QUEUE_SIZE); 168 q_gpa = viornd.vq[viornd.cfg.queue_notify].qa; 169 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 170 171 buf = calloc(1, vr_sz); 172 if (buf == NULL) { 173 log_warn("calloc error getting viornd ring"); 174 return (0); 175 } 176 177 if (read_mem(q_gpa, buf, vr_sz)) { 178 free(buf); 179 return (0); 180 } 181 182 desc = (struct vring_desc *)(buf); 183 avail = (struct vring_avail *)(buf + 184 viornd.vq[viornd.cfg.queue_notify].vq_availoffset); 185 used = (struct vring_used *)(buf + 186 viornd.vq[viornd.cfg.queue_notify].vq_usedoffset); 187 188 sz = desc[avail->ring[avail->idx]].len; 189 if (sz > MAXPHYS) 190 fatal("viornd descriptor size too large (%zu)", sz); 191 192 rnd_data = malloc(sz); 193 194 if (rnd_data != NULL) { 195 arc4random_buf(rnd_data, desc[avail->ring[avail->idx]].len); 196 if (write_mem(desc[avail->ring[avail->idx]].addr, 197 rnd_data, desc[avail->ring[avail->idx]].len)) { 198 log_warnx("viornd: can't write random data @ " 199 "0x%llx", 200 desc[avail->ring[avail->idx]].addr); 201 } else { 202 /* ret == 1 -> interrupt needed */ 203 /* XXX check VIRTIO_F_NO_INTR */ 204 ret = 1; 205 viornd.cfg.isr_status = 1; 206 used->ring[used->idx].id = avail->ring[avail->idx]; 207 used->ring[used->idx].len = 208 desc[avail->ring[avail->idx]].len; 209 used->idx++; 210 211 if (write_mem(q_gpa, buf, vr_sz)) { 212 log_warnx("viornd: error writing vio ring"); 213 } 214 } 215 free(rnd_data); 216 } else 217 fatal("memory allocation error for viornd data"); 218 219 free(buf); 220 221 return (ret); 222 } 223 224 int 225 virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 226 void *unused) 227 { 228 *intr = 0xFF; 229 230 if (dir == 0) { 231 switch (reg) { 232 case VIRTIO_CONFIG_DEVICE_FEATURES: 233 case VIRTIO_CONFIG_QUEUE_SIZE: 234 case VIRTIO_CONFIG_ISR_STATUS: 235 log_warnx("%s: illegal write %x to %s", 236 __progname, *data, virtio_reg_name(reg)); 237 break; 238 case VIRTIO_CONFIG_GUEST_FEATURES: 239 viornd.cfg.guest_feature = *data; 240 break; 241 case VIRTIO_CONFIG_QUEUE_ADDRESS: 242 viornd.cfg.queue_address = *data; 243 viornd_update_qa(); 244 break; 245 case VIRTIO_CONFIG_QUEUE_SELECT: 246 viornd.cfg.queue_select = *data; 247 viornd_update_qs(); 248 break; 249 case VIRTIO_CONFIG_QUEUE_NOTIFY: 250 viornd.cfg.queue_notify = *data; 251 if (viornd_notifyq()) 252 *intr = 1; 253 break; 254 case VIRTIO_CONFIG_DEVICE_STATUS: 255 viornd.cfg.device_status = *data; 256 break; 257 } 258 } else { 259 switch (reg) { 260 case VIRTIO_CONFIG_DEVICE_FEATURES: 261 *data = viornd.cfg.device_feature; 262 break; 263 case VIRTIO_CONFIG_GUEST_FEATURES: 264 *data = viornd.cfg.guest_feature; 265 break; 266 case VIRTIO_CONFIG_QUEUE_ADDRESS: 267 *data = viornd.cfg.queue_address; 268 break; 269 case VIRTIO_CONFIG_QUEUE_SIZE: 270 *data = viornd.cfg.queue_size; 271 break; 272 case VIRTIO_CONFIG_QUEUE_SELECT: 273 *data = viornd.cfg.queue_select; 274 break; 275 case VIRTIO_CONFIG_QUEUE_NOTIFY: 276 *data = viornd.cfg.queue_notify; 277 break; 278 case VIRTIO_CONFIG_DEVICE_STATUS: 279 *data = viornd.cfg.device_status; 280 break; 281 case VIRTIO_CONFIG_ISR_STATUS: 282 *data = viornd.cfg.isr_status; 283 break; 284 } 285 } 286 return (0); 287 } 288 289 void 290 vioblk_update_qa(struct vioblk_dev *dev) 291 { 292 /* Invalid queue? */ 293 if (dev->cfg.queue_select > 0) 294 return; 295 296 dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; 297 } 298 299 void 300 vioblk_update_qs(struct vioblk_dev *dev) 301 { 302 /* Invalid queue? */ 303 if (dev->cfg.queue_select > 0) 304 return; 305 306 /* Update queue address/size based on queue select */ 307 dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; 308 dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; 309 } 310 311 static char * 312 vioblk_do_read(struct vioblk_dev *dev, off_t sector, ssize_t sz) 313 { 314 char *buf; 315 316 buf = malloc(sz); 317 if (buf == NULL) { 318 log_warn("malloc errror vioblk read"); 319 return (NULL); 320 } 321 322 if (lseek(dev->fd, sector * VIRTIO_BLK_SECTOR_SIZE, 323 SEEK_SET) == -1) { 324 log_warn("seek error in vioblk read"); 325 free(buf); 326 return (NULL); 327 } 328 329 if (read(dev->fd, buf, sz) != sz) { 330 log_warn("vioblk read error"); 331 free(buf); 332 return (NULL); 333 } 334 335 return buf; 336 } 337 338 static int 339 vioblk_do_write(struct vioblk_dev *dev, off_t sector, char *buf, ssize_t sz) 340 { 341 if (lseek(dev->fd, sector * VIRTIO_BLK_SECTOR_SIZE, 342 SEEK_SET) == -1) { 343 log_warn("seek error in vioblk write"); 344 return (1); 345 } 346 347 if (write(dev->fd, buf, sz) != sz) { 348 log_warn("vioblk write error"); 349 return (1); 350 } 351 352 return (0); 353 } 354 355 /* 356 * XXX this function needs a cleanup block, lots of free(blah); return (0) 357 * in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can 358 * XXX cant trust ring data from VM, be extra cautious. 359 */ 360 int 361 vioblk_notifyq(struct vioblk_dev *dev) 362 { 363 uint64_t q_gpa; 364 uint32_t vr_sz; 365 uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; 366 uint8_t ds; 367 int ret; 368 off_t secbias; 369 char *vr, *secdata; 370 struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; 371 struct vring_avail *avail; 372 struct vring_used *used; 373 struct virtio_blk_req_hdr cmd; 374 375 ret = 0; 376 377 /* Invalid queue? */ 378 if (dev->cfg.queue_notify > 0) 379 return (0); 380 381 vr_sz = vring_size(VIOBLK_QUEUE_SIZE); 382 q_gpa = dev->vq[dev->cfg.queue_notify].qa; 383 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 384 385 vr = calloc(1, vr_sz); 386 if (vr == NULL) { 387 log_warn("calloc error getting vioblk ring"); 388 return (0); 389 } 390 391 if (read_mem(q_gpa, vr, vr_sz)) { 392 log_warnx("error reading gpa 0x%llx", q_gpa); 393 free(vr); 394 return (0); 395 } 396 397 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 398 desc = (struct vring_desc *)(vr); 399 avail = (struct vring_avail *)(vr + 400 dev->vq[dev->cfg.queue_notify].vq_availoffset); 401 used = (struct vring_used *)(vr + 402 dev->vq[dev->cfg.queue_notify].vq_usedoffset); 403 404 405 idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK; 406 407 if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { 408 log_warnx("vioblk queue notify - nothing to do?"); 409 free(vr); 410 return (0); 411 } 412 413 cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; 414 cmd_desc = &desc[cmd_desc_idx]; 415 416 if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) { 417 log_warnx("unchained vioblk cmd descriptor received " 418 "(idx %d)", cmd_desc_idx); 419 free(vr); 420 return (0); 421 } 422 423 /* Read command from descriptor ring */ 424 if (read_mem(cmd_desc->addr, &cmd, cmd_desc->len)) { 425 log_warnx("vioblk: command read_mem error @ 0x%llx", 426 cmd_desc->addr); 427 free(vr); 428 return (0); 429 } 430 431 switch (cmd.type) { 432 case VIRTIO_BLK_T_IN: 433 /* first descriptor */ 434 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 435 secdata_desc = &desc[secdata_desc_idx]; 436 437 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 438 log_warnx("unchained vioblk data descriptor " 439 "received (idx %d)", cmd_desc_idx); 440 free(vr); 441 return (0); 442 } 443 444 secbias = 0; 445 do { 446 /* read the data (use current data descriptor) */ 447 /* 448 * XXX waste to malloc secdata in vioblk_do_read 449 * and free it here over and over 450 */ 451 secdata = vioblk_do_read(dev, cmd.sector + secbias, 452 (ssize_t)secdata_desc->len); 453 if (secdata == NULL) { 454 log_warnx("vioblk: block read error, " 455 "sector %lld", cmd.sector); 456 free(vr); 457 return (0); 458 } 459 460 if (write_mem(secdata_desc->addr, secdata, 461 secdata_desc->len)) { 462 log_warnx("can't write sector " 463 "data to gpa @ 0x%llx", 464 secdata_desc->addr); 465 dump_descriptor_chain(desc, cmd_desc_idx); 466 free(vr); 467 free(secdata); 468 return (0); 469 } 470 471 free(secdata); 472 473 secbias += (secdata_desc->len / VIRTIO_BLK_SECTOR_SIZE); 474 secdata_desc_idx = secdata_desc->next & 475 VIOBLK_QUEUE_MASK; 476 secdata_desc = &desc[secdata_desc_idx]; 477 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 478 479 ds_desc_idx = secdata_desc_idx; 480 ds_desc = secdata_desc; 481 482 ds = VIRTIO_BLK_S_OK; 483 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 484 log_warnx("can't write device status data @ " 485 "0x%llx", ds_desc->addr); 486 dump_descriptor_chain(desc, cmd_desc_idx); 487 free(vr); 488 return (0); 489 } 490 491 492 ret = 1; 493 dev->cfg.isr_status = 1; 494 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; 495 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; 496 used->idx++; 497 498 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 499 VIOBLK_QUEUE_MASK; 500 501 if (write_mem(q_gpa, vr, vr_sz)) { 502 log_warnx("vioblk: error writing vio ring"); 503 } 504 break; 505 case VIRTIO_BLK_T_OUT: 506 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 507 secdata_desc = &desc[secdata_desc_idx]; 508 509 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 510 log_warnx("wr vioblk: unchained vioblk data " 511 "descriptor received (idx %d)", cmd_desc_idx); 512 free(vr); 513 return (0); 514 } 515 516 secdata = malloc(MAXPHYS); 517 if (secdata == NULL) { 518 log_warn("wr vioblk: malloc error, len %d", 519 secdata_desc->len); 520 free(vr); 521 return (0); 522 } 523 524 secbias = 0; 525 do { 526 if (read_mem(secdata_desc->addr, secdata, 527 secdata_desc->len)) { 528 log_warnx("wr vioblk: can't read " 529 "sector data @ 0x%llx", 530 secdata_desc->addr); 531 dump_descriptor_chain(desc, cmd_desc_idx); 532 free(vr); 533 free(secdata); 534 return (0); 535 } 536 537 if (vioblk_do_write(dev, cmd.sector + secbias, 538 secdata, (ssize_t)secdata_desc->len)) { 539 log_warnx("wr vioblk: disk write error"); 540 free(vr); 541 free(secdata); 542 return (0); 543 } 544 545 secbias += secdata_desc->len / VIRTIO_BLK_SECTOR_SIZE; 546 547 secdata_desc_idx = secdata_desc->next & 548 VIOBLK_QUEUE_MASK; 549 secdata_desc = &desc[secdata_desc_idx]; 550 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 551 552 free(secdata); 553 554 ds_desc_idx = secdata_desc_idx; 555 ds_desc = secdata_desc; 556 557 ds = VIRTIO_BLK_S_OK; 558 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 559 log_warnx("wr vioblk: can't write device status " 560 "data @ 0x%llx", ds_desc->addr); 561 dump_descriptor_chain(desc, cmd_desc_idx); 562 free(vr); 563 return (0); 564 } 565 566 ret = 1; 567 dev->cfg.isr_status = 1; 568 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; 569 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; 570 used->idx++; 571 572 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 573 VIOBLK_QUEUE_MASK; 574 if (write_mem(q_gpa, vr, vr_sz)) 575 log_warnx("wr vioblk: error writing vio ring"); 576 break; 577 case VIRTIO_BLK_T_FLUSH: 578 case VIRTIO_BLK_T_FLUSH_OUT: 579 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 580 ds_desc = &desc[ds_desc_idx]; 581 582 ds = VIRTIO_BLK_S_OK; 583 if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { 584 log_warnx("fl vioblk: can't write device status " 585 "data @ 0x%llx", ds_desc->addr); 586 dump_descriptor_chain(desc, cmd_desc_idx); 587 free(vr); 588 return (0); 589 } 590 591 ret = 1; 592 dev->cfg.isr_status = 1; 593 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; 594 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; 595 used->idx++; 596 597 dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & 598 VIOBLK_QUEUE_MASK; 599 if (write_mem(q_gpa, vr, vr_sz)) { 600 log_warnx("fl vioblk: error writing vio ring"); 601 } 602 break; 603 } 604 605 free(vr); 606 607 return (ret); 608 } 609 610 int 611 virtio_blk_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 612 void *cookie) 613 { 614 struct vioblk_dev *dev = (struct vioblk_dev *)cookie; 615 616 *intr = 0xFF; 617 618 if (dir == 0) { 619 switch (reg) { 620 case VIRTIO_CONFIG_DEVICE_FEATURES: 621 case VIRTIO_CONFIG_QUEUE_SIZE: 622 case VIRTIO_CONFIG_ISR_STATUS: 623 log_warnx("%s: illegal write %x to %s", 624 __progname, *data, virtio_reg_name(reg)); 625 break; 626 case VIRTIO_CONFIG_GUEST_FEATURES: 627 dev->cfg.guest_feature = *data; 628 break; 629 case VIRTIO_CONFIG_QUEUE_ADDRESS: 630 dev->cfg.queue_address = *data; 631 vioblk_update_qa(dev); 632 break; 633 case VIRTIO_CONFIG_QUEUE_SELECT: 634 dev->cfg.queue_select = *data; 635 vioblk_update_qs(dev); 636 break; 637 case VIRTIO_CONFIG_QUEUE_NOTIFY: 638 dev->cfg.queue_notify = *data; 639 if (vioblk_notifyq(dev)) 640 *intr = 1; 641 break; 642 case VIRTIO_CONFIG_DEVICE_STATUS: 643 dev->cfg.device_status = *data; 644 break; 645 default: 646 break; 647 } 648 } else { 649 switch (reg) { 650 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 651 *data = (uint32_t)(dev->sz >> 32); 652 break; 653 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 654 *data = (uint32_t)(dev->sz); 655 break; 656 case VIRTIO_CONFIG_DEVICE_FEATURES: 657 *data = dev->cfg.device_feature; 658 break; 659 case VIRTIO_CONFIG_GUEST_FEATURES: 660 *data = dev->cfg.guest_feature; 661 break; 662 case VIRTIO_CONFIG_QUEUE_ADDRESS: 663 *data = dev->cfg.queue_address; 664 break; 665 case VIRTIO_CONFIG_QUEUE_SIZE: 666 *data = dev->cfg.queue_size; 667 break; 668 case VIRTIO_CONFIG_QUEUE_SELECT: 669 *data = dev->cfg.queue_select; 670 break; 671 case VIRTIO_CONFIG_QUEUE_NOTIFY: 672 *data = dev->cfg.queue_notify; 673 break; 674 case VIRTIO_CONFIG_DEVICE_STATUS: 675 *data = dev->cfg.device_status; 676 break; 677 case VIRTIO_CONFIG_ISR_STATUS: 678 *data = dev->cfg.isr_status; 679 break; 680 } 681 } 682 return (0); 683 } 684 685 int 686 virtio_net_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, 687 void *cookie) 688 { 689 struct vionet_dev *dev = (struct vionet_dev *)cookie; 690 691 *intr = 0xFF; 692 693 if (dir == 0) { 694 switch (reg) { 695 case VIRTIO_CONFIG_DEVICE_FEATURES: 696 case VIRTIO_CONFIG_QUEUE_SIZE: 697 case VIRTIO_CONFIG_ISR_STATUS: 698 log_warnx("%s: illegal write %x to %s", 699 __progname, *data, virtio_reg_name(reg)); 700 break; 701 case VIRTIO_CONFIG_GUEST_FEATURES: 702 dev->cfg.guest_feature = *data; 703 break; 704 case VIRTIO_CONFIG_QUEUE_ADDRESS: 705 dev->cfg.queue_address = *data; 706 vionet_update_qa(dev); 707 break; 708 case VIRTIO_CONFIG_QUEUE_SELECT: 709 dev->cfg.queue_select = *data; 710 vionet_update_qs(dev); 711 break; 712 case VIRTIO_CONFIG_QUEUE_NOTIFY: 713 dev->cfg.queue_notify = *data; 714 if (vionet_notifyq(dev)) 715 *intr = 1; 716 break; 717 case VIRTIO_CONFIG_DEVICE_STATUS: 718 dev->cfg.device_status = *data; 719 break; 720 default: 721 break; 722 } 723 } else { 724 switch (reg) { 725 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 726 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 727 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 728 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 729 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 730 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 731 *data = dev->mac[reg - 732 VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI]; 733 break; 734 case VIRTIO_CONFIG_DEVICE_FEATURES: 735 *data = dev->cfg.device_feature; 736 break; 737 case VIRTIO_CONFIG_GUEST_FEATURES: 738 *data = dev->cfg.guest_feature; 739 break; 740 case VIRTIO_CONFIG_QUEUE_ADDRESS: 741 *data = dev->cfg.queue_address; 742 break; 743 case VIRTIO_CONFIG_QUEUE_SIZE: 744 *data = dev->cfg.queue_size; 745 break; 746 case VIRTIO_CONFIG_QUEUE_SELECT: 747 *data = dev->cfg.queue_select; 748 break; 749 case VIRTIO_CONFIG_QUEUE_NOTIFY: 750 *data = dev->cfg.queue_notify; 751 break; 752 case VIRTIO_CONFIG_DEVICE_STATUS: 753 *data = dev->cfg.device_status; 754 break; 755 case VIRTIO_CONFIG_ISR_STATUS: 756 *data = dev->cfg.isr_status; 757 break; 758 } 759 } 760 return (0); 761 } 762 763 void 764 vionet_update_qa(struct vionet_dev *dev) 765 { 766 /* Invalid queue? */ 767 if (dev->cfg.queue_select > 1) 768 return; 769 770 dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; 771 } 772 773 void 774 vionet_update_qs(struct vionet_dev *dev) 775 { 776 /* Invalid queue? */ 777 if (dev->cfg.queue_select > 1) 778 return; 779 780 /* Update queue address/size based on queue select */ 781 dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; 782 dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; 783 } 784 785 int 786 vionet_enq_rx(struct vionet_dev *dev, char *pkt, ssize_t sz, int *spc) 787 { 788 uint64_t q_gpa; 789 uint32_t vr_sz; 790 uint16_t idx, pkt_desc_idx, hdr_desc_idx; 791 ptrdiff_t off; 792 int ret; 793 char *vr; 794 struct vring_desc *desc, *pkt_desc, *hdr_desc; 795 struct vring_avail *avail; 796 struct vring_used *used; 797 struct vring_used_elem *ue; 798 799 ret = 0; 800 801 vr_sz = vring_size(VIONET_QUEUE_SIZE); 802 q_gpa = dev->vq[0].qa; 803 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 804 805 vr = calloc(1, vr_sz); 806 if (vr == NULL) { 807 log_warn("rx enq: calloc error getting vionet ring"); 808 return (0); 809 } 810 811 if (read_mem(q_gpa, vr, vr_sz)) { 812 log_warnx("rx enq: error reading gpa 0x%llx", q_gpa); 813 free(vr); 814 return (0); 815 } 816 817 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 818 desc = (struct vring_desc *)(vr); 819 avail = (struct vring_avail *)(vr + 820 dev->vq[0].vq_availoffset); 821 used = (struct vring_used *)(vr + 822 dev->vq[0].vq_usedoffset); 823 824 idx = dev->vq[0].last_avail & VIONET_QUEUE_MASK; 825 826 if ((dev->vq[0].notified_avail & VIONET_QUEUE_MASK) == idx) { 827 log_warnx("vionet queue notify - no space, dropping packet"); 828 free(vr); 829 return (0); 830 } 831 832 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 833 hdr_desc = &desc[hdr_desc_idx]; 834 835 pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; 836 pkt_desc = &desc[pkt_desc_idx]; 837 838 /* must be not readable */ 839 if ((pkt_desc->flags & VRING_DESC_F_WRITE) == 0) { 840 log_warnx("unexpected readable rx descriptor %d", 841 pkt_desc_idx); 842 free(vr); 843 return (0); 844 } 845 846 /* Write packet to descriptor ring */ 847 if (write_mem(pkt_desc->addr, pkt, sz)) { 848 log_warnx("vionet: rx enq packet write_mem error @ " 849 "0x%llx", pkt_desc->addr); 850 free(vr); 851 return (0); 852 } 853 854 ret = 1; 855 dev->cfg.isr_status = 1; 856 ue = &used->ring[used->idx & VIONET_QUEUE_MASK]; 857 ue->id = hdr_desc_idx; 858 ue->len = hdr_desc->len + sz; 859 used->idx++; 860 dev->vq[0].last_avail = (dev->vq[0].last_avail + 1); 861 *spc = dev->vq[0].notified_avail - dev->vq[0].last_avail; 862 863 off = (char *)ue - vr; 864 if (write_mem(q_gpa + off, ue, sizeof *ue)) 865 log_warnx("vionet: error writing vio ring"); 866 else { 867 off = (char *)&used->idx - vr; 868 if (write_mem(q_gpa + off, &used->idx, sizeof used->idx)) 869 log_warnx("vionet: error writing vio ring"); 870 } 871 872 free(vr); 873 874 return (ret); 875 } 876 877 /* 878 * vionet_rx 879 * 880 * Enqueue data that was received on a tap file descriptor 881 * to the vionet device queue. 882 */ 883 static int 884 vionet_rx(struct vionet_dev *dev) 885 { 886 char buf[PAGE_SIZE]; 887 int hasdata, num_enq = 0, spc = 0; 888 ssize_t sz; 889 890 do { 891 sz = read(dev->fd, buf, sizeof buf); 892 if (sz == -1) { 893 /* 894 * If we get EAGAIN, No data is currently available. 895 * Do not treat this as an error. 896 */ 897 if (errno != EAGAIN) 898 log_warn("unexpected read error on vionet " 899 "device"); 900 } else if (sz != 0) 901 num_enq += vionet_enq_rx(dev, buf, sz, &spc); 902 else if (sz == 0) { 903 log_debug("process_rx: no data"); 904 hasdata = 0; 905 break; 906 } 907 908 hasdata = fd_hasdata(dev->fd); 909 } while (spc && hasdata); 910 911 dev->rx_pending = hasdata; 912 return (num_enq); 913 } 914 915 int 916 vionet_process_rx(void) 917 { 918 int i, num_enq; 919 920 num_enq = 0; 921 for (i = 0 ; i < nr_vionet; i++) { 922 if (!vionet[i].rx_added) 923 continue; 924 925 if (vionet[i].rx_pending || fd_hasdata(vionet[i].fd)) 926 num_enq += vionet_rx(&vionet[i]); 927 } 928 929 /* 930 * XXX returns the number of packets enqueued across all vionet, which 931 * may not be right for VMs with more than one vionet. 932 */ 933 return (num_enq); 934 } 935 936 void 937 vionet_notify_rx(struct vionet_dev *dev) 938 { 939 uint64_t q_gpa; 940 uint32_t vr_sz; 941 char *vr; 942 struct vring_avail *avail; 943 944 vr_sz = vring_size(VIONET_QUEUE_SIZE); 945 q_gpa = dev->vq[dev->cfg.queue_notify].qa; 946 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 947 948 vr = malloc(vr_sz); 949 if (vr == NULL) { 950 log_warn("malloc error getting vionet ring"); 951 return; 952 } 953 954 if (read_mem(q_gpa, vr, vr_sz)) { 955 log_warnx("error reading gpa 0x%llx", q_gpa); 956 free(vr); 957 return; 958 } 959 960 /* Compute offset into avail ring */ 961 avail = (struct vring_avail *)(vr + 962 dev->vq[dev->cfg.queue_notify].vq_availoffset); 963 964 dev->rx_added = 1; 965 dev->vq[0].notified_avail = avail->idx; 966 967 free(vr); 968 } 969 970 971 /* 972 * XXX cant trust ring data from VM, be extra cautious. 973 * XXX advertise link status to guest 974 */ 975 int 976 vionet_notifyq(struct vionet_dev *dev) 977 { 978 uint64_t q_gpa; 979 uint32_t vr_sz; 980 uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx; 981 size_t pktsz; 982 int ret, num_enq, ofs; 983 char *vr, *pkt; 984 struct vring_desc *desc, *pkt_desc, *hdr_desc; 985 struct vring_avail *avail; 986 struct vring_used *used; 987 988 vr = pkt = NULL; 989 ret = 0; 990 991 /* Invalid queue? */ 992 if (dev->cfg.queue_notify != 1) { 993 vionet_notify_rx(dev); 994 goto out; 995 } 996 997 vr_sz = vring_size(VIONET_QUEUE_SIZE); 998 q_gpa = dev->vq[dev->cfg.queue_notify].qa; 999 q_gpa = q_gpa * VIRTIO_PAGE_SIZE; 1000 1001 vr = calloc(1, vr_sz); 1002 if (vr == NULL) { 1003 log_warn("calloc error getting vionet ring"); 1004 goto out; 1005 } 1006 1007 if (read_mem(q_gpa, vr, vr_sz)) { 1008 log_warnx("error reading gpa 0x%llx", q_gpa); 1009 goto out; 1010 } 1011 1012 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 1013 desc = (struct vring_desc *)(vr); 1014 avail = (struct vring_avail *)(vr + 1015 dev->vq[dev->cfg.queue_notify].vq_availoffset); 1016 used = (struct vring_used *)(vr + 1017 dev->vq[dev->cfg.queue_notify].vq_usedoffset); 1018 1019 num_enq = 0; 1020 1021 idx = dev->vq[dev->cfg.queue_notify].last_avail & VIONET_QUEUE_MASK; 1022 1023 if ((avail->idx & VIONET_QUEUE_MASK) == idx) { 1024 log_warnx("vionet tx queue notify - nothing to do?"); 1025 goto out; 1026 } 1027 1028 while ((avail->idx & VIONET_QUEUE_MASK) != idx) { 1029 hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; 1030 hdr_desc = &desc[hdr_desc_idx]; 1031 pktsz = 0; 1032 1033 dxx = hdr_desc_idx; 1034 do { 1035 pktsz += desc[dxx].len; 1036 dxx = desc[dxx].next; 1037 } while (desc[dxx].flags & VRING_DESC_F_NEXT); 1038 1039 pktsz += desc[dxx].len; 1040 1041 /* Remove virtio header descriptor len */ 1042 pktsz -= hdr_desc->len; 1043 1044 /* 1045 * XXX check sanity pktsz 1046 * XXX too long and > PAGE_SIZE checks 1047 * (PAGE_SIZE can be relaxed to 16384 later) 1048 */ 1049 pkt = malloc(pktsz); 1050 if (pkt == NULL) { 1051 log_warn("malloc error alloc packet buf"); 1052 goto out; 1053 } 1054 1055 ofs = 0; 1056 pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; 1057 pkt_desc = &desc[pkt_desc_idx]; 1058 1059 while (pkt_desc->flags & VRING_DESC_F_NEXT) { 1060 /* must be not writable */ 1061 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1062 log_warnx("unexpected writable tx desc " 1063 "%d", pkt_desc_idx); 1064 goto out; 1065 } 1066 1067 /* Read packet from descriptor ring */ 1068 if (read_mem(pkt_desc->addr, pkt + ofs, 1069 pkt_desc->len)) { 1070 log_warnx("vionet: packet read_mem error " 1071 "@ 0x%llx", pkt_desc->addr); 1072 goto out; 1073 } 1074 1075 ofs += pkt_desc->len; 1076 pkt_desc_idx = pkt_desc->next & VIONET_QUEUE_MASK; 1077 pkt_desc = &desc[pkt_desc_idx]; 1078 } 1079 1080 /* Now handle tail descriptor - must be not writable */ 1081 if (pkt_desc->flags & VRING_DESC_F_WRITE) { 1082 log_warnx("unexpected writable tx descriptor %d", 1083 pkt_desc_idx); 1084 goto out; 1085 } 1086 1087 /* Read packet from descriptor ring */ 1088 if (read_mem(pkt_desc->addr, pkt + ofs, 1089 pkt_desc->len)) { 1090 log_warnx("vionet: packet read_mem error @ " 1091 "0x%llx", pkt_desc->addr); 1092 goto out; 1093 } 1094 1095 /* XXX signed vs unsigned here, funky cast */ 1096 if (write(dev->fd, pkt, pktsz) != (int)pktsz) { 1097 log_warnx("vionet: tx failed writing to tap: " 1098 "%d", errno); 1099 goto out; 1100 } 1101 1102 ret = 1; 1103 dev->cfg.isr_status = 1; 1104 used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx; 1105 used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len; 1106 used->idx++; 1107 1108 dev->vq[dev->cfg.queue_notify].last_avail = 1109 (dev->vq[dev->cfg.queue_notify].last_avail + 1); 1110 num_enq++; 1111 1112 idx = dev->vq[dev->cfg.queue_notify].last_avail & 1113 VIONET_QUEUE_MASK; 1114 } 1115 1116 if (write_mem(q_gpa, vr, vr_sz)) { 1117 log_warnx("vionet: tx error writing vio ring"); 1118 } 1119 1120 out: 1121 free(vr); 1122 free(pkt); 1123 1124 return (ret); 1125 } 1126 1127 void 1128 virtio_init(struct vm_create_params *vcp, int *child_disks, int *child_taps) 1129 { 1130 uint8_t id; 1131 uint8_t i; 1132 off_t sz; 1133 1134 /* Virtio entropy device */ 1135 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1136 PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM, 1137 PCI_SUBCLASS_SYSTEM_MISC, 1138 PCI_VENDOR_OPENBSD, 1139 PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) { 1140 log_warnx("%s: can't add PCI virtio rng device", 1141 __progname); 1142 return; 1143 } 1144 1145 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) { 1146 log_warnx("%s: can't add bar for virtio rng device", 1147 __progname); 1148 return; 1149 } 1150 1151 memset(&viornd, 0, sizeof(viornd)); 1152 viornd.vq[0].qs = VIORND_QUEUE_SIZE; 1153 viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * 1154 VIORND_QUEUE_SIZE; 1155 viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1156 sizeof(struct vring_desc) * VIORND_QUEUE_SIZE 1157 + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE)); 1158 1159 if (vcp->vcp_ndisks > 0) { 1160 vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); 1161 if (vioblk == NULL) { 1162 log_warn("%s: calloc failure allocating vioblks", 1163 __progname); 1164 return; 1165 } 1166 1167 /* One virtio block device for each disk defined in vcp */ 1168 for (i = 0; i < vcp->vcp_ndisks; i++) { 1169 if ((sz = lseek(child_disks[i], 0, SEEK_END)) == -1) 1170 continue; 1171 1172 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1173 PCI_PRODUCT_QUMRANET_VIO_BLOCK, 1174 PCI_CLASS_MASS_STORAGE, 1175 PCI_SUBCLASS_MASS_STORAGE_SCSI, 1176 PCI_VENDOR_OPENBSD, 1177 PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) { 1178 log_warnx("%s: can't add PCI virtio block " 1179 "device", __progname); 1180 return; 1181 } 1182 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_blk_io, 1183 &vioblk[i])) { 1184 log_warnx("%s: can't add bar for virtio block " 1185 "device", __progname); 1186 return; 1187 } 1188 vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE; 1189 vioblk[i].vq[0].vq_availoffset = 1190 sizeof(struct vring_desc) * VIORND_QUEUE_SIZE; 1191 vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1192 sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE 1193 + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); 1194 vioblk[i].vq[0].last_avail = 0; 1195 vioblk[i].fd = child_disks[i]; 1196 vioblk[i].sz = sz / 512; 1197 } 1198 } 1199 1200 if (vcp->vcp_nnics > 0) { 1201 vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); 1202 if (vionet == NULL) { 1203 log_warn("%s: calloc failure allocating vionets", 1204 __progname); 1205 return; 1206 } 1207 1208 nr_vionet = vcp->vcp_nnics; 1209 /* Virtio network */ 1210 for (i = 0; i < vcp->vcp_nnics; i++) { 1211 if (pci_add_device(&id, PCI_VENDOR_QUMRANET, 1212 PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM, 1213 PCI_SUBCLASS_SYSTEM_MISC, 1214 PCI_VENDOR_OPENBSD, 1215 PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) { 1216 log_warnx("%s: can't add PCI virtio net device", 1217 __progname); 1218 return; 1219 } 1220 1221 if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_net_io, 1222 &vionet[i])) { 1223 log_warnx("%s: can't add bar for virtio net " 1224 "device", __progname); 1225 return; 1226 } 1227 1228 vionet[i].vq[0].qs = VIONET_QUEUE_SIZE; 1229 vionet[i].vq[0].vq_availoffset = 1230 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1231 vionet[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( 1232 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1233 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1234 vionet[i].vq[0].last_avail = 0; 1235 vionet[i].vq[1].qs = VIONET_QUEUE_SIZE; 1236 vionet[i].vq[1].vq_availoffset = 1237 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; 1238 vionet[i].vq[1].vq_usedoffset = VIRTQUEUE_ALIGN( 1239 sizeof(struct vring_desc) * VIONET_QUEUE_SIZE 1240 + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); 1241 vionet[i].vq[1].last_avail = 0; 1242 vionet[i].fd = child_taps[i]; 1243 1244 #if 0 1245 /* User defined MAC */ 1246 vionet[i].cfg.device_feature = VIRTIO_NET_F_MAC; 1247 bcopy(&vcp->vcp_macs[i], &vionet[i].mac, 6); 1248 #endif 1249 } 1250 } 1251 } 1252