1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qapi/error.h" 16 #include "qemu-common.h" 17 #include "cpu.h" 18 #include "trace.h" 19 #include "exec/address-spaces.h" 20 #include "qemu/error-report.h" 21 #include "hw/virtio/virtio.h" 22 #include "qemu/atomic.h" 23 #include "hw/virtio/virtio-bus.h" 24 #include "migration/migration.h" 25 #include "hw/virtio/virtio-access.h" 26 27 /* 28 * The alignment to use between consumer and producer parts of vring. 29 * x86 pagesize again. This is the default, used by transports like PCI 30 * which don't provide a means for the guest to tell the host the alignment. 31 */ 32 #define VIRTIO_PCI_VRING_ALIGN 4096 33 34 typedef struct VRingDesc 35 { 36 uint64_t addr; 37 uint32_t len; 38 uint16_t flags; 39 uint16_t next; 40 } VRingDesc; 41 42 typedef struct VRingAvail 43 { 44 uint16_t flags; 45 uint16_t idx; 46 uint16_t ring[0]; 47 } VRingAvail; 48 49 typedef struct VRingUsedElem 50 { 51 uint32_t id; 52 uint32_t len; 53 } VRingUsedElem; 54 55 typedef struct VRingUsed 56 { 57 uint16_t flags; 58 uint16_t idx; 59 VRingUsedElem ring[0]; 60 } VRingUsed; 61 62 typedef struct VRing 63 { 64 unsigned int num; 65 unsigned int num_default; 66 unsigned int align; 67 hwaddr desc; 68 hwaddr avail; 69 hwaddr used; 70 } VRing; 71 72 struct VirtQueue 73 { 74 VRing vring; 75 76 /* Next head to pop */ 77 uint16_t last_avail_idx; 78 79 /* Last avail_idx read from VQ. */ 80 uint16_t shadow_avail_idx; 81 82 uint16_t used_idx; 83 84 /* Last used index value we have signalled on */ 85 uint16_t signalled_used; 86 87 /* Last used index value we have signalled on */ 88 bool signalled_used_valid; 89 90 /* Notification enabled? */ 91 bool notification; 92 93 uint16_t queue_index; 94 95 int inuse; 96 97 uint16_t vector; 98 VirtIOHandleOutput handle_output; 99 VirtIOHandleOutput handle_aio_output; 100 bool use_aio; 101 VirtIODevice *vdev; 102 EventNotifier guest_notifier; 103 EventNotifier host_notifier; 104 QLIST_ENTRY(VirtQueue) node; 105 }; 106 107 /* virt queue functions */ 108 void virtio_queue_update_rings(VirtIODevice *vdev, int n) 109 { 110 VRing *vring = &vdev->vq[n].vring; 111 112 if (!vring->desc) { 113 /* not yet setup -> nothing to do */ 114 return; 115 } 116 vring->avail = vring->desc + vring->num * sizeof(VRingDesc); 117 vring->used = vring_align(vring->avail + 118 offsetof(VRingAvail, ring[vring->num]), 119 vring->align); 120 } 121 122 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, 123 hwaddr desc_pa, int i) 124 { 125 address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc), 126 MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc)); 127 virtio_tswap64s(vdev, &desc->addr); 128 virtio_tswap32s(vdev, &desc->len); 129 virtio_tswap16s(vdev, &desc->flags); 130 virtio_tswap16s(vdev, &desc->next); 131 } 132 133 static inline uint16_t vring_avail_flags(VirtQueue *vq) 134 { 135 hwaddr pa; 136 pa = vq->vring.avail + offsetof(VRingAvail, flags); 137 return virtio_lduw_phys(vq->vdev, pa); 138 } 139 140 static inline uint16_t vring_avail_idx(VirtQueue *vq) 141 { 142 hwaddr pa; 143 pa = vq->vring.avail + offsetof(VRingAvail, idx); 144 vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa); 145 return vq->shadow_avail_idx; 146 } 147 148 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 149 { 150 hwaddr pa; 151 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); 152 return virtio_lduw_phys(vq->vdev, pa); 153 } 154 155 static inline uint16_t vring_get_used_event(VirtQueue *vq) 156 { 157 return vring_avail_ring(vq, vq->vring.num); 158 } 159 160 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, 161 int i) 162 { 163 hwaddr pa; 164 virtio_tswap32s(vq->vdev, &uelem->id); 165 virtio_tswap32s(vq->vdev, &uelem->len); 166 pa = vq->vring.used + offsetof(VRingUsed, ring[i]); 167 address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED, 168 (void *)uelem, sizeof(VRingUsedElem)); 169 } 170 171 static uint16_t vring_used_idx(VirtQueue *vq) 172 { 173 hwaddr pa; 174 pa = vq->vring.used + offsetof(VRingUsed, idx); 175 return virtio_lduw_phys(vq->vdev, pa); 176 } 177 178 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 179 { 180 hwaddr pa; 181 pa = vq->vring.used + offsetof(VRingUsed, idx); 182 virtio_stw_phys(vq->vdev, pa, val); 183 vq->used_idx = val; 184 } 185 186 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 187 { 188 VirtIODevice *vdev = vq->vdev; 189 hwaddr pa; 190 pa = vq->vring.used + offsetof(VRingUsed, flags); 191 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask); 192 } 193 194 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 195 { 196 VirtIODevice *vdev = vq->vdev; 197 hwaddr pa; 198 pa = vq->vring.used + offsetof(VRingUsed, flags); 199 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask); 200 } 201 202 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) 203 { 204 hwaddr pa; 205 if (!vq->notification) { 206 return; 207 } 208 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); 209 virtio_stw_phys(vq->vdev, pa, val); 210 } 211 212 void virtio_queue_set_notification(VirtQueue *vq, int enable) 213 { 214 vq->notification = enable; 215 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 216 vring_set_avail_event(vq, vring_avail_idx(vq)); 217 } else if (enable) { 218 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 219 } else { 220 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 221 } 222 if (enable) { 223 /* Expose avail event/used flags before caller checks the avail idx. */ 224 smp_mb(); 225 } 226 } 227 228 int virtio_queue_ready(VirtQueue *vq) 229 { 230 return vq->vring.avail != 0; 231 } 232 233 /* Fetch avail_idx from VQ memory only when we really need to know if 234 * guest has added some buffers. */ 235 int virtio_queue_empty(VirtQueue *vq) 236 { 237 if (vq->shadow_avail_idx != vq->last_avail_idx) { 238 return 0; 239 } 240 241 return vring_avail_idx(vq) == vq->last_avail_idx; 242 } 243 244 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, 245 unsigned int len) 246 { 247 unsigned int offset; 248 int i; 249 250 offset = 0; 251 for (i = 0; i < elem->in_num; i++) { 252 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 253 254 cpu_physical_memory_unmap(elem->in_sg[i].iov_base, 255 elem->in_sg[i].iov_len, 256 1, size); 257 258 offset += size; 259 } 260 261 for (i = 0; i < elem->out_num; i++) 262 cpu_physical_memory_unmap(elem->out_sg[i].iov_base, 263 elem->out_sg[i].iov_len, 264 0, elem->out_sg[i].iov_len); 265 } 266 267 void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, 268 unsigned int len) 269 { 270 vq->last_avail_idx--; 271 virtqueue_unmap_sg(vq, elem, len); 272 } 273 274 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 275 unsigned int len, unsigned int idx) 276 { 277 VRingUsedElem uelem; 278 279 trace_virtqueue_fill(vq, elem, len, idx); 280 281 virtqueue_unmap_sg(vq, elem, len); 282 283 idx = (idx + vq->used_idx) % vq->vring.num; 284 285 uelem.id = elem->index; 286 uelem.len = len; 287 vring_used_write(vq, &uelem, idx); 288 } 289 290 void virtqueue_flush(VirtQueue *vq, unsigned int count) 291 { 292 uint16_t old, new; 293 /* Make sure buffer is written before we update index. */ 294 smp_wmb(); 295 trace_virtqueue_flush(vq, count); 296 old = vq->used_idx; 297 new = old + count; 298 vring_used_idx_set(vq, new); 299 vq->inuse -= count; 300 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 301 vq->signalled_used_valid = false; 302 } 303 304 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 305 unsigned int len) 306 { 307 virtqueue_fill(vq, elem, len, 0); 308 virtqueue_flush(vq, 1); 309 } 310 311 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 312 { 313 uint16_t num_heads = vring_avail_idx(vq) - idx; 314 315 /* Check it isn't doing very strange things with descriptor numbers. */ 316 if (num_heads > vq->vring.num) { 317 error_report("Guest moved used index from %u to %u", 318 idx, vq->shadow_avail_idx); 319 exit(1); 320 } 321 /* On success, callers read a descriptor at vq->last_avail_idx. 322 * Make sure descriptor read does not bypass avail index read. */ 323 if (num_heads) { 324 smp_rmb(); 325 } 326 327 return num_heads; 328 } 329 330 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) 331 { 332 unsigned int head; 333 334 /* Grab the next descriptor number they're advertising, and increment 335 * the index we've seen. */ 336 head = vring_avail_ring(vq, idx % vq->vring.num); 337 338 /* If their number is silly, that's a fatal mistake. */ 339 if (head >= vq->vring.num) { 340 error_report("Guest says index %u is available", head); 341 exit(1); 342 } 343 344 return head; 345 } 346 347 static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, 348 hwaddr desc_pa, unsigned int max) 349 { 350 unsigned int next; 351 352 /* If this descriptor says it doesn't chain, we're done. */ 353 if (!(desc->flags & VRING_DESC_F_NEXT)) { 354 return max; 355 } 356 357 /* Check they're not leading us off end of descriptors. */ 358 next = desc->next; 359 /* Make sure compiler knows to grab that: we don't want it changing! */ 360 smp_wmb(); 361 362 if (next >= max) { 363 error_report("Desc next is %u", next); 364 exit(1); 365 } 366 367 vring_desc_read(vdev, desc, desc_pa, next); 368 return next; 369 } 370 371 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 372 unsigned int *out_bytes, 373 unsigned max_in_bytes, unsigned max_out_bytes) 374 { 375 unsigned int idx; 376 unsigned int total_bufs, in_total, out_total; 377 378 idx = vq->last_avail_idx; 379 380 total_bufs = in_total = out_total = 0; 381 while (virtqueue_num_heads(vq, idx)) { 382 VirtIODevice *vdev = vq->vdev; 383 unsigned int max, num_bufs, indirect = 0; 384 VRingDesc desc; 385 hwaddr desc_pa; 386 int i; 387 388 max = vq->vring.num; 389 num_bufs = total_bufs; 390 i = virtqueue_get_head(vq, idx++); 391 desc_pa = vq->vring.desc; 392 vring_desc_read(vdev, &desc, desc_pa, i); 393 394 if (desc.flags & VRING_DESC_F_INDIRECT) { 395 if (desc.len % sizeof(VRingDesc)) { 396 error_report("Invalid size for indirect buffer table"); 397 exit(1); 398 } 399 400 /* If we've got too many, that implies a descriptor loop. */ 401 if (num_bufs >= max) { 402 error_report("Looped descriptor"); 403 exit(1); 404 } 405 406 /* loop over the indirect descriptor table */ 407 indirect = 1; 408 max = desc.len / sizeof(VRingDesc); 409 desc_pa = desc.addr; 410 num_bufs = i = 0; 411 vring_desc_read(vdev, &desc, desc_pa, i); 412 } 413 414 do { 415 /* If we've got too many, that implies a descriptor loop. */ 416 if (++num_bufs > max) { 417 error_report("Looped descriptor"); 418 exit(1); 419 } 420 421 if (desc.flags & VRING_DESC_F_WRITE) { 422 in_total += desc.len; 423 } else { 424 out_total += desc.len; 425 } 426 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 427 goto done; 428 } 429 } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); 430 431 if (!indirect) 432 total_bufs = num_bufs; 433 else 434 total_bufs++; 435 } 436 done: 437 if (in_bytes) { 438 *in_bytes = in_total; 439 } 440 if (out_bytes) { 441 *out_bytes = out_total; 442 } 443 } 444 445 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 446 unsigned int out_bytes) 447 { 448 unsigned int in_total, out_total; 449 450 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 451 return in_bytes <= in_total && out_bytes <= out_total; 452 } 453 454 static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov, 455 unsigned int max_num_sg, bool is_write, 456 hwaddr pa, size_t sz) 457 { 458 unsigned num_sg = *p_num_sg; 459 assert(num_sg <= max_num_sg); 460 461 while (sz) { 462 hwaddr len = sz; 463 464 if (num_sg == max_num_sg) { 465 error_report("virtio: too many write descriptors in indirect table"); 466 exit(1); 467 } 468 469 iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write); 470 iov[num_sg].iov_len = len; 471 addr[num_sg] = pa; 472 473 sz -= len; 474 pa += len; 475 num_sg++; 476 } 477 *p_num_sg = num_sg; 478 } 479 480 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, 481 unsigned int *num_sg, unsigned int max_size, 482 int is_write) 483 { 484 unsigned int i; 485 hwaddr len; 486 487 /* Note: this function MUST validate input, some callers 488 * are passing in num_sg values received over the network. 489 */ 490 /* TODO: teach all callers that this can fail, and return failure instead 491 * of asserting here. 492 * When we do, we might be able to re-enable NDEBUG below. 493 */ 494 #ifdef NDEBUG 495 #error building with NDEBUG is not supported 496 #endif 497 assert(*num_sg <= max_size); 498 499 for (i = 0; i < *num_sg; i++) { 500 len = sg[i].iov_len; 501 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); 502 if (!sg[i].iov_base) { 503 error_report("virtio: error trying to map MMIO memory"); 504 exit(1); 505 } 506 if (len != sg[i].iov_len) { 507 error_report("virtio: unexpected memory split"); 508 exit(1); 509 } 510 } 511 } 512 513 void virtqueue_map(VirtQueueElement *elem) 514 { 515 virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, 516 VIRTQUEUE_MAX_SIZE, 1); 517 virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, 518 VIRTQUEUE_MAX_SIZE, 0); 519 } 520 521 void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) 522 { 523 VirtQueueElement *elem; 524 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); 525 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); 526 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); 527 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); 528 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); 529 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); 530 531 assert(sz >= sizeof(VirtQueueElement)); 532 elem = g_malloc(out_sg_end); 533 elem->out_num = out_num; 534 elem->in_num = in_num; 535 elem->in_addr = (void *)elem + in_addr_ofs; 536 elem->out_addr = (void *)elem + out_addr_ofs; 537 elem->in_sg = (void *)elem + in_sg_ofs; 538 elem->out_sg = (void *)elem + out_sg_ofs; 539 return elem; 540 } 541 542 void *virtqueue_pop(VirtQueue *vq, size_t sz) 543 { 544 unsigned int i, head, max; 545 hwaddr desc_pa = vq->vring.desc; 546 VirtIODevice *vdev = vq->vdev; 547 VirtQueueElement *elem; 548 unsigned out_num, in_num; 549 hwaddr addr[VIRTQUEUE_MAX_SIZE]; 550 struct iovec iov[VIRTQUEUE_MAX_SIZE]; 551 VRingDesc desc; 552 553 if (virtio_queue_empty(vq)) { 554 return NULL; 555 } 556 /* Needed after virtio_queue_empty(), see comment in 557 * virtqueue_num_heads(). */ 558 smp_rmb(); 559 560 /* When we start there are none of either input nor output. */ 561 out_num = in_num = 0; 562 563 max = vq->vring.num; 564 565 i = head = virtqueue_get_head(vq, vq->last_avail_idx++); 566 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 567 vring_set_avail_event(vq, vq->last_avail_idx); 568 } 569 570 vring_desc_read(vdev, &desc, desc_pa, i); 571 if (desc.flags & VRING_DESC_F_INDIRECT) { 572 if (desc.len % sizeof(VRingDesc)) { 573 error_report("Invalid size for indirect buffer table"); 574 exit(1); 575 } 576 577 /* loop over the indirect descriptor table */ 578 max = desc.len / sizeof(VRingDesc); 579 desc_pa = desc.addr; 580 i = 0; 581 vring_desc_read(vdev, &desc, desc_pa, i); 582 } 583 584 /* Collect all the descriptors */ 585 do { 586 if (desc.flags & VRING_DESC_F_WRITE) { 587 virtqueue_map_desc(&in_num, addr + out_num, iov + out_num, 588 VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len); 589 } else { 590 if (in_num) { 591 error_report("Incorrect order for descriptors"); 592 exit(1); 593 } 594 virtqueue_map_desc(&out_num, addr, iov, 595 VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len); 596 } 597 598 /* If we've got too many, that implies a descriptor loop. */ 599 if ((in_num + out_num) > max) { 600 error_report("Looped descriptor"); 601 exit(1); 602 } 603 } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); 604 605 /* Now copy what we have collected and mapped */ 606 elem = virtqueue_alloc_element(sz, out_num, in_num); 607 elem->index = head; 608 for (i = 0; i < out_num; i++) { 609 elem->out_addr[i] = addr[i]; 610 elem->out_sg[i] = iov[i]; 611 } 612 for (i = 0; i < in_num; i++) { 613 elem->in_addr[i] = addr[out_num + i]; 614 elem->in_sg[i] = iov[out_num + i]; 615 } 616 617 vq->inuse++; 618 619 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 620 return elem; 621 } 622 623 /* Reading and writing a structure directly to QEMUFile is *awful*, but 624 * it is what QEMU has always done by mistake. We can change it sooner 625 * or later by bumping the version number of the affected vm states. 626 * In the meanwhile, since the in-memory layout of VirtQueueElement 627 * has changed, we need to marshal to and from the layout that was 628 * used before the change. 629 */ 630 typedef struct VirtQueueElementOld { 631 unsigned int index; 632 unsigned int out_num; 633 unsigned int in_num; 634 hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; 635 hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; 636 struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; 637 struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; 638 } VirtQueueElementOld; 639 640 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) 641 { 642 VirtQueueElement *elem; 643 VirtQueueElementOld data; 644 int i; 645 646 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 647 648 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); 649 elem->index = data.index; 650 651 for (i = 0; i < elem->in_num; i++) { 652 elem->in_addr[i] = data.in_addr[i]; 653 } 654 655 for (i = 0; i < elem->out_num; i++) { 656 elem->out_addr[i] = data.out_addr[i]; 657 } 658 659 for (i = 0; i < elem->in_num; i++) { 660 /* Base is overwritten by virtqueue_map. */ 661 elem->in_sg[i].iov_base = 0; 662 elem->in_sg[i].iov_len = data.in_sg[i].iov_len; 663 } 664 665 for (i = 0; i < elem->out_num; i++) { 666 /* Base is overwritten by virtqueue_map. */ 667 elem->out_sg[i].iov_base = 0; 668 elem->out_sg[i].iov_len = data.out_sg[i].iov_len; 669 } 670 671 virtqueue_map(elem); 672 return elem; 673 } 674 675 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem) 676 { 677 VirtQueueElementOld data; 678 int i; 679 680 memset(&data, 0, sizeof(data)); 681 data.index = elem->index; 682 data.in_num = elem->in_num; 683 data.out_num = elem->out_num; 684 685 for (i = 0; i < elem->in_num; i++) { 686 data.in_addr[i] = elem->in_addr[i]; 687 } 688 689 for (i = 0; i < elem->out_num; i++) { 690 data.out_addr[i] = elem->out_addr[i]; 691 } 692 693 for (i = 0; i < elem->in_num; i++) { 694 /* Base is overwritten by virtqueue_map when loading. Do not 695 * save it, as it would leak the QEMU address space layout. */ 696 data.in_sg[i].iov_len = elem->in_sg[i].iov_len; 697 } 698 699 for (i = 0; i < elem->out_num; i++) { 700 /* Do not save iov_base as above. */ 701 data.out_sg[i].iov_len = elem->out_sg[i].iov_len; 702 } 703 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 704 } 705 706 /* virtio device */ 707 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 708 { 709 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 710 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 711 712 if (k->notify) { 713 k->notify(qbus->parent, vector); 714 } 715 } 716 717 void virtio_update_irq(VirtIODevice *vdev) 718 { 719 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 720 } 721 722 static int virtio_validate_features(VirtIODevice *vdev) 723 { 724 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 725 726 if (k->validate_features) { 727 return k->validate_features(vdev); 728 } else { 729 return 0; 730 } 731 } 732 733 int virtio_set_status(VirtIODevice *vdev, uint8_t val) 734 { 735 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 736 trace_virtio_set_status(vdev, val); 737 738 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 739 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && 740 val & VIRTIO_CONFIG_S_FEATURES_OK) { 741 int ret = virtio_validate_features(vdev); 742 743 if (ret) { 744 return ret; 745 } 746 } 747 } 748 if (k->set_status) { 749 k->set_status(vdev, val); 750 } 751 vdev->status = val; 752 return 0; 753 } 754 755 bool target_words_bigendian(void); 756 static enum virtio_device_endian virtio_default_endian(void) 757 { 758 if (target_words_bigendian()) { 759 return VIRTIO_DEVICE_ENDIAN_BIG; 760 } else { 761 return VIRTIO_DEVICE_ENDIAN_LITTLE; 762 } 763 } 764 765 static enum virtio_device_endian virtio_current_cpu_endian(void) 766 { 767 CPUClass *cc = CPU_GET_CLASS(current_cpu); 768 769 if (cc->virtio_is_big_endian(current_cpu)) { 770 return VIRTIO_DEVICE_ENDIAN_BIG; 771 } else { 772 return VIRTIO_DEVICE_ENDIAN_LITTLE; 773 } 774 } 775 776 void virtio_reset(void *opaque) 777 { 778 VirtIODevice *vdev = opaque; 779 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 780 int i; 781 782 virtio_set_status(vdev, 0); 783 if (current_cpu) { 784 /* Guest initiated reset */ 785 vdev->device_endian = virtio_current_cpu_endian(); 786 } else { 787 /* System reset */ 788 vdev->device_endian = virtio_default_endian(); 789 } 790 791 if (k->reset) { 792 k->reset(vdev); 793 } 794 795 vdev->guest_features = 0; 796 vdev->queue_sel = 0; 797 vdev->status = 0; 798 vdev->isr = 0; 799 vdev->config_vector = VIRTIO_NO_VECTOR; 800 virtio_notify_vector(vdev, vdev->config_vector); 801 802 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { 803 vdev->vq[i].vring.desc = 0; 804 vdev->vq[i].vring.avail = 0; 805 vdev->vq[i].vring.used = 0; 806 vdev->vq[i].last_avail_idx = 0; 807 vdev->vq[i].shadow_avail_idx = 0; 808 vdev->vq[i].used_idx = 0; 809 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); 810 vdev->vq[i].signalled_used = 0; 811 vdev->vq[i].signalled_used_valid = false; 812 vdev->vq[i].notification = true; 813 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; 814 } 815 } 816 817 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 818 { 819 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 820 uint8_t val; 821 822 if (addr + sizeof(val) > vdev->config_len) { 823 return (uint32_t)-1; 824 } 825 826 k->get_config(vdev, vdev->config); 827 828 val = ldub_p(vdev->config + addr); 829 return val; 830 } 831 832 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 833 { 834 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 835 uint16_t val; 836 837 if (addr + sizeof(val) > vdev->config_len) { 838 return (uint32_t)-1; 839 } 840 841 k->get_config(vdev, vdev->config); 842 843 val = lduw_p(vdev->config + addr); 844 return val; 845 } 846 847 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 848 { 849 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 850 uint32_t val; 851 852 if (addr + sizeof(val) > vdev->config_len) { 853 return (uint32_t)-1; 854 } 855 856 k->get_config(vdev, vdev->config); 857 858 val = ldl_p(vdev->config + addr); 859 return val; 860 } 861 862 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 863 { 864 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 865 uint8_t val = data; 866 867 if (addr + sizeof(val) > vdev->config_len) { 868 return; 869 } 870 871 stb_p(vdev->config + addr, val); 872 873 if (k->set_config) { 874 k->set_config(vdev, vdev->config); 875 } 876 } 877 878 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 879 { 880 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 881 uint16_t val = data; 882 883 if (addr + sizeof(val) > vdev->config_len) { 884 return; 885 } 886 887 stw_p(vdev->config + addr, val); 888 889 if (k->set_config) { 890 k->set_config(vdev, vdev->config); 891 } 892 } 893 894 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 895 { 896 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 897 uint32_t val = data; 898 899 if (addr + sizeof(val) > vdev->config_len) { 900 return; 901 } 902 903 stl_p(vdev->config + addr, val); 904 905 if (k->set_config) { 906 k->set_config(vdev, vdev->config); 907 } 908 } 909 910 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr) 911 { 912 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 913 uint8_t val; 914 915 if (addr + sizeof(val) > vdev->config_len) { 916 return (uint32_t)-1; 917 } 918 919 k->get_config(vdev, vdev->config); 920 921 val = ldub_p(vdev->config + addr); 922 return val; 923 } 924 925 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr) 926 { 927 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 928 uint16_t val; 929 930 if (addr + sizeof(val) > vdev->config_len) { 931 return (uint32_t)-1; 932 } 933 934 k->get_config(vdev, vdev->config); 935 936 val = lduw_le_p(vdev->config + addr); 937 return val; 938 } 939 940 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr) 941 { 942 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 943 uint32_t val; 944 945 if (addr + sizeof(val) > vdev->config_len) { 946 return (uint32_t)-1; 947 } 948 949 k->get_config(vdev, vdev->config); 950 951 val = ldl_le_p(vdev->config + addr); 952 return val; 953 } 954 955 void virtio_config_modern_writeb(VirtIODevice *vdev, 956 uint32_t addr, uint32_t data) 957 { 958 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 959 uint8_t val = data; 960 961 if (addr + sizeof(val) > vdev->config_len) { 962 return; 963 } 964 965 stb_p(vdev->config + addr, val); 966 967 if (k->set_config) { 968 k->set_config(vdev, vdev->config); 969 } 970 } 971 972 void virtio_config_modern_writew(VirtIODevice *vdev, 973 uint32_t addr, uint32_t data) 974 { 975 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 976 uint16_t val = data; 977 978 if (addr + sizeof(val) > vdev->config_len) { 979 return; 980 } 981 982 stw_le_p(vdev->config + addr, val); 983 984 if (k->set_config) { 985 k->set_config(vdev, vdev->config); 986 } 987 } 988 989 void virtio_config_modern_writel(VirtIODevice *vdev, 990 uint32_t addr, uint32_t data) 991 { 992 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 993 uint32_t val = data; 994 995 if (addr + sizeof(val) > vdev->config_len) { 996 return; 997 } 998 999 stl_le_p(vdev->config + addr, val); 1000 1001 if (k->set_config) { 1002 k->set_config(vdev, vdev->config); 1003 } 1004 } 1005 1006 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 1007 { 1008 vdev->vq[n].vring.desc = addr; 1009 virtio_queue_update_rings(vdev, n); 1010 } 1011 1012 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 1013 { 1014 return vdev->vq[n].vring.desc; 1015 } 1016 1017 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, 1018 hwaddr avail, hwaddr used) 1019 { 1020 vdev->vq[n].vring.desc = desc; 1021 vdev->vq[n].vring.avail = avail; 1022 vdev->vq[n].vring.used = used; 1023 } 1024 1025 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 1026 { 1027 /* Don't allow guest to flip queue between existent and 1028 * nonexistent states, or to set it to an invalid size. 1029 */ 1030 if (!!num != !!vdev->vq[n].vring.num || 1031 num > VIRTQUEUE_MAX_SIZE || 1032 num < 0) { 1033 return; 1034 } 1035 vdev->vq[n].vring.num = num; 1036 } 1037 1038 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector) 1039 { 1040 return QLIST_FIRST(&vdev->vector_queues[vector]); 1041 } 1042 1043 VirtQueue *virtio_vector_next_queue(VirtQueue *vq) 1044 { 1045 return QLIST_NEXT(vq, node); 1046 } 1047 1048 int virtio_queue_get_num(VirtIODevice *vdev, int n) 1049 { 1050 return vdev->vq[n].vring.num; 1051 } 1052 1053 int virtio_get_num_queues(VirtIODevice *vdev) 1054 { 1055 int i; 1056 1057 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1058 if (!virtio_queue_get_num(vdev, i)) { 1059 break; 1060 } 1061 } 1062 1063 return i; 1064 } 1065 1066 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 1067 { 1068 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1069 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1070 1071 /* virtio-1 compliant devices cannot change the alignment */ 1072 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1073 error_report("tried to modify queue alignment for virtio-1 device"); 1074 return; 1075 } 1076 /* Check that the transport told us it was going to do this 1077 * (so a buggy transport will immediately assert rather than 1078 * silently failing to migrate this state) 1079 */ 1080 assert(k->has_variable_vring_alignment); 1081 1082 vdev->vq[n].vring.align = align; 1083 virtio_queue_update_rings(vdev, n); 1084 } 1085 1086 static void virtio_queue_notify_aio_vq(VirtQueue *vq) 1087 { 1088 if (vq->vring.desc && vq->handle_aio_output) { 1089 VirtIODevice *vdev = vq->vdev; 1090 1091 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 1092 vq->handle_aio_output(vdev, vq); 1093 } 1094 } 1095 1096 static void virtio_queue_notify_vq(VirtQueue *vq) 1097 { 1098 if (vq->vring.desc && vq->handle_output) { 1099 VirtIODevice *vdev = vq->vdev; 1100 1101 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 1102 vq->handle_output(vdev, vq); 1103 } 1104 } 1105 1106 void virtio_queue_notify(VirtIODevice *vdev, int n) 1107 { 1108 virtio_queue_notify_vq(&vdev->vq[n]); 1109 } 1110 1111 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 1112 { 1113 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector : 1114 VIRTIO_NO_VECTOR; 1115 } 1116 1117 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 1118 { 1119 VirtQueue *vq = &vdev->vq[n]; 1120 1121 if (n < VIRTIO_QUEUE_MAX) { 1122 if (vdev->vector_queues && 1123 vdev->vq[n].vector != VIRTIO_NO_VECTOR) { 1124 QLIST_REMOVE(vq, node); 1125 } 1126 vdev->vq[n].vector = vector; 1127 if (vdev->vector_queues && 1128 vector != VIRTIO_NO_VECTOR) { 1129 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node); 1130 } 1131 } 1132 } 1133 1134 static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size, 1135 VirtIOHandleOutput handle_output, 1136 bool use_aio) 1137 { 1138 int i; 1139 1140 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1141 if (vdev->vq[i].vring.num == 0) 1142 break; 1143 } 1144 1145 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 1146 abort(); 1147 1148 vdev->vq[i].vring.num = queue_size; 1149 vdev->vq[i].vring.num_default = queue_size; 1150 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 1151 vdev->vq[i].handle_output = handle_output; 1152 vdev->vq[i].handle_aio_output = NULL; 1153 vdev->vq[i].use_aio = use_aio; 1154 1155 return &vdev->vq[i]; 1156 } 1157 1158 /* Add a virt queue and mark AIO. 1159 * An AIO queue will use the AioContext based event interface instead of the 1160 * default IOHandler and EventNotifier interface. 1161 */ 1162 VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size, 1163 VirtIOHandleOutput handle_output) 1164 { 1165 return virtio_add_queue_internal(vdev, queue_size, handle_output, true); 1166 } 1167 1168 /* Add a normal virt queue (on the contrary to the AIO version above. */ 1169 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 1170 VirtIOHandleOutput handle_output) 1171 { 1172 return virtio_add_queue_internal(vdev, queue_size, handle_output, false); 1173 } 1174 1175 void virtio_del_queue(VirtIODevice *vdev, int n) 1176 { 1177 if (n < 0 || n >= VIRTIO_QUEUE_MAX) { 1178 abort(); 1179 } 1180 1181 vdev->vq[n].vring.num = 0; 1182 vdev->vq[n].vring.num_default = 0; 1183 } 1184 1185 void virtio_irq(VirtQueue *vq) 1186 { 1187 trace_virtio_irq(vq); 1188 vq->vdev->isr |= 0x01; 1189 virtio_notify_vector(vq->vdev, vq->vector); 1190 } 1191 1192 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) 1193 { 1194 uint16_t old, new; 1195 bool v; 1196 /* We need to expose used array entries before checking used event. */ 1197 smp_mb(); 1198 /* Always notify when queue is empty (when feature acknowledge) */ 1199 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && 1200 !vq->inuse && virtio_queue_empty(vq)) { 1201 return true; 1202 } 1203 1204 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 1205 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 1206 } 1207 1208 v = vq->signalled_used_valid; 1209 vq->signalled_used_valid = true; 1210 old = vq->signalled_used; 1211 new = vq->signalled_used = vq->used_idx; 1212 return !v || vring_need_event(vring_get_used_event(vq), new, old); 1213 } 1214 1215 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 1216 { 1217 if (!virtio_should_notify(vdev, vq)) { 1218 return; 1219 } 1220 1221 trace_virtio_notify(vdev, vq); 1222 vdev->isr |= 0x01; 1223 virtio_notify_vector(vdev, vq->vector); 1224 } 1225 1226 void virtio_notify_config(VirtIODevice *vdev) 1227 { 1228 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1229 return; 1230 1231 vdev->isr |= 0x03; 1232 vdev->generation++; 1233 virtio_notify_vector(vdev, vdev->config_vector); 1234 } 1235 1236 static bool virtio_device_endian_needed(void *opaque) 1237 { 1238 VirtIODevice *vdev = opaque; 1239 1240 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); 1241 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1242 return vdev->device_endian != virtio_default_endian(); 1243 } 1244 /* Devices conforming to VIRTIO 1.0 or later are always LE. */ 1245 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE; 1246 } 1247 1248 static bool virtio_64bit_features_needed(void *opaque) 1249 { 1250 VirtIODevice *vdev = opaque; 1251 1252 return (vdev->host_features >> 32) != 0; 1253 } 1254 1255 static bool virtio_virtqueue_needed(void *opaque) 1256 { 1257 VirtIODevice *vdev = opaque; 1258 1259 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); 1260 } 1261 1262 static bool virtio_ringsize_needed(void *opaque) 1263 { 1264 VirtIODevice *vdev = opaque; 1265 int i; 1266 1267 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1268 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) { 1269 return true; 1270 } 1271 } 1272 return false; 1273 } 1274 1275 static bool virtio_extra_state_needed(void *opaque) 1276 { 1277 VirtIODevice *vdev = opaque; 1278 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1279 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1280 1281 return k->has_extra_state && 1282 k->has_extra_state(qbus->parent); 1283 } 1284 1285 static const VMStateDescription vmstate_virtqueue = { 1286 .name = "virtqueue_state", 1287 .version_id = 1, 1288 .minimum_version_id = 1, 1289 .fields = (VMStateField[]) { 1290 VMSTATE_UINT64(vring.avail, struct VirtQueue), 1291 VMSTATE_UINT64(vring.used, struct VirtQueue), 1292 VMSTATE_END_OF_LIST() 1293 } 1294 }; 1295 1296 static const VMStateDescription vmstate_virtio_virtqueues = { 1297 .name = "virtio/virtqueues", 1298 .version_id = 1, 1299 .minimum_version_id = 1, 1300 .needed = &virtio_virtqueue_needed, 1301 .fields = (VMStateField[]) { 1302 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1303 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue), 1304 VMSTATE_END_OF_LIST() 1305 } 1306 }; 1307 1308 static const VMStateDescription vmstate_ringsize = { 1309 .name = "ringsize_state", 1310 .version_id = 1, 1311 .minimum_version_id = 1, 1312 .fields = (VMStateField[]) { 1313 VMSTATE_UINT32(vring.num_default, struct VirtQueue), 1314 VMSTATE_END_OF_LIST() 1315 } 1316 }; 1317 1318 static const VMStateDescription vmstate_virtio_ringsize = { 1319 .name = "virtio/ringsize", 1320 .version_id = 1, 1321 .minimum_version_id = 1, 1322 .needed = &virtio_ringsize_needed, 1323 .fields = (VMStateField[]) { 1324 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1325 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue), 1326 VMSTATE_END_OF_LIST() 1327 } 1328 }; 1329 1330 static int get_extra_state(QEMUFile *f, void *pv, size_t size) 1331 { 1332 VirtIODevice *vdev = pv; 1333 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1334 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1335 1336 if (!k->load_extra_state) { 1337 return -1; 1338 } else { 1339 return k->load_extra_state(qbus->parent, f); 1340 } 1341 } 1342 1343 static void put_extra_state(QEMUFile *f, void *pv, size_t size) 1344 { 1345 VirtIODevice *vdev = pv; 1346 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1347 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1348 1349 k->save_extra_state(qbus->parent, f); 1350 } 1351 1352 static const VMStateInfo vmstate_info_extra_state = { 1353 .name = "virtqueue_extra_state", 1354 .get = get_extra_state, 1355 .put = put_extra_state, 1356 }; 1357 1358 static const VMStateDescription vmstate_virtio_extra_state = { 1359 .name = "virtio/extra_state", 1360 .version_id = 1, 1361 .minimum_version_id = 1, 1362 .needed = &virtio_extra_state_needed, 1363 .fields = (VMStateField[]) { 1364 { 1365 .name = "extra_state", 1366 .version_id = 0, 1367 .field_exists = NULL, 1368 .size = 0, 1369 .info = &vmstate_info_extra_state, 1370 .flags = VMS_SINGLE, 1371 .offset = 0, 1372 }, 1373 VMSTATE_END_OF_LIST() 1374 } 1375 }; 1376 1377 static const VMStateDescription vmstate_virtio_device_endian = { 1378 .name = "virtio/device_endian", 1379 .version_id = 1, 1380 .minimum_version_id = 1, 1381 .needed = &virtio_device_endian_needed, 1382 .fields = (VMStateField[]) { 1383 VMSTATE_UINT8(device_endian, VirtIODevice), 1384 VMSTATE_END_OF_LIST() 1385 } 1386 }; 1387 1388 static const VMStateDescription vmstate_virtio_64bit_features = { 1389 .name = "virtio/64bit_features", 1390 .version_id = 1, 1391 .minimum_version_id = 1, 1392 .needed = &virtio_64bit_features_needed, 1393 .fields = (VMStateField[]) { 1394 VMSTATE_UINT64(guest_features, VirtIODevice), 1395 VMSTATE_END_OF_LIST() 1396 } 1397 }; 1398 1399 static const VMStateDescription vmstate_virtio = { 1400 .name = "virtio", 1401 .version_id = 1, 1402 .minimum_version_id = 1, 1403 .minimum_version_id_old = 1, 1404 .fields = (VMStateField[]) { 1405 VMSTATE_END_OF_LIST() 1406 }, 1407 .subsections = (const VMStateDescription*[]) { 1408 &vmstate_virtio_device_endian, 1409 &vmstate_virtio_64bit_features, 1410 &vmstate_virtio_virtqueues, 1411 &vmstate_virtio_ringsize, 1412 &vmstate_virtio_extra_state, 1413 NULL 1414 } 1415 }; 1416 1417 void virtio_save(VirtIODevice *vdev, QEMUFile *f) 1418 { 1419 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1420 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1421 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1422 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff); 1423 int i; 1424 1425 if (k->save_config) { 1426 k->save_config(qbus->parent, f); 1427 } 1428 1429 qemu_put_8s(f, &vdev->status); 1430 qemu_put_8s(f, &vdev->isr); 1431 qemu_put_be16s(f, &vdev->queue_sel); 1432 qemu_put_be32s(f, &guest_features_lo); 1433 qemu_put_be32(f, vdev->config_len); 1434 qemu_put_buffer(f, vdev->config, vdev->config_len); 1435 1436 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1437 if (vdev->vq[i].vring.num == 0) 1438 break; 1439 } 1440 1441 qemu_put_be32(f, i); 1442 1443 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1444 if (vdev->vq[i].vring.num == 0) 1445 break; 1446 1447 qemu_put_be32(f, vdev->vq[i].vring.num); 1448 if (k->has_variable_vring_alignment) { 1449 qemu_put_be32(f, vdev->vq[i].vring.align); 1450 } 1451 /* XXX virtio-1 devices */ 1452 qemu_put_be64(f, vdev->vq[i].vring.desc); 1453 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 1454 if (k->save_queue) { 1455 k->save_queue(qbus->parent, i, f); 1456 } 1457 } 1458 1459 if (vdc->save != NULL) { 1460 vdc->save(vdev, f); 1461 } 1462 1463 /* Subsections */ 1464 vmstate_save_state(f, &vmstate_virtio, vdev, NULL); 1465 } 1466 1467 /* A wrapper for use as a VMState .put function */ 1468 void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size) 1469 { 1470 virtio_save(VIRTIO_DEVICE(opaque), f); 1471 } 1472 1473 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) 1474 { 1475 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1476 bool bad = (val & ~(vdev->host_features)) != 0; 1477 1478 val &= vdev->host_features; 1479 if (k->set_features) { 1480 k->set_features(vdev, val); 1481 } 1482 vdev->guest_features = val; 1483 return bad ? -1 : 0; 1484 } 1485 1486 int virtio_set_features(VirtIODevice *vdev, uint64_t val) 1487 { 1488 /* 1489 * The driver must not attempt to set features after feature negotiation 1490 * has finished. 1491 */ 1492 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) { 1493 return -EINVAL; 1494 } 1495 return virtio_set_features_nocheck(vdev, val); 1496 } 1497 1498 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) 1499 { 1500 int i, ret; 1501 int32_t config_len; 1502 uint32_t num; 1503 uint32_t features; 1504 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1505 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1506 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1507 1508 /* 1509 * We poison the endianness to ensure it does not get used before 1510 * subsections have been loaded. 1511 */ 1512 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN; 1513 1514 if (k->load_config) { 1515 ret = k->load_config(qbus->parent, f); 1516 if (ret) 1517 return ret; 1518 } 1519 1520 qemu_get_8s(f, &vdev->status); 1521 qemu_get_8s(f, &vdev->isr); 1522 qemu_get_be16s(f, &vdev->queue_sel); 1523 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) { 1524 return -1; 1525 } 1526 qemu_get_be32s(f, &features); 1527 1528 /* 1529 * Temporarily set guest_features low bits - needed by 1530 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 1531 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ. 1532 * 1533 * Note: devices should always test host features in future - don't create 1534 * new dependencies like this. 1535 */ 1536 vdev->guest_features = features; 1537 1538 config_len = qemu_get_be32(f); 1539 1540 /* 1541 * There are cases where the incoming config can be bigger or smaller 1542 * than what we have; so load what we have space for, and skip 1543 * any excess that's in the stream. 1544 */ 1545 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len)); 1546 1547 while (config_len > vdev->config_len) { 1548 qemu_get_byte(f); 1549 config_len--; 1550 } 1551 1552 num = qemu_get_be32(f); 1553 1554 if (num > VIRTIO_QUEUE_MAX) { 1555 error_report("Invalid number of virtqueues: 0x%x", num); 1556 return -1; 1557 } 1558 1559 for (i = 0; i < num; i++) { 1560 vdev->vq[i].vring.num = qemu_get_be32(f); 1561 if (k->has_variable_vring_alignment) { 1562 vdev->vq[i].vring.align = qemu_get_be32(f); 1563 } 1564 vdev->vq[i].vring.desc = qemu_get_be64(f); 1565 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 1566 vdev->vq[i].signalled_used_valid = false; 1567 vdev->vq[i].notification = true; 1568 1569 if (vdev->vq[i].vring.desc) { 1570 /* XXX virtio-1 devices */ 1571 virtio_queue_update_rings(vdev, i); 1572 } else if (vdev->vq[i].last_avail_idx) { 1573 error_report("VQ %d address 0x0 " 1574 "inconsistent with Host index 0x%x", 1575 i, vdev->vq[i].last_avail_idx); 1576 return -1; 1577 } 1578 if (k->load_queue) { 1579 ret = k->load_queue(qbus->parent, i, f); 1580 if (ret) 1581 return ret; 1582 } 1583 } 1584 1585 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 1586 1587 if (vdc->load != NULL) { 1588 ret = vdc->load(vdev, f, version_id); 1589 if (ret) { 1590 return ret; 1591 } 1592 } 1593 1594 /* Subsections */ 1595 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); 1596 if (ret) { 1597 return ret; 1598 } 1599 1600 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) { 1601 vdev->device_endian = virtio_default_endian(); 1602 } 1603 1604 if (virtio_64bit_features_needed(vdev)) { 1605 /* 1606 * Subsection load filled vdev->guest_features. Run them 1607 * through virtio_set_features to sanity-check them against 1608 * host_features. 1609 */ 1610 uint64_t features64 = vdev->guest_features; 1611 if (virtio_set_features_nocheck(vdev, features64) < 0) { 1612 error_report("Features 0x%" PRIx64 " unsupported. " 1613 "Allowed features: 0x%" PRIx64, 1614 features64, vdev->host_features); 1615 return -1; 1616 } 1617 } else { 1618 if (virtio_set_features_nocheck(vdev, features) < 0) { 1619 error_report("Features 0x%x unsupported. " 1620 "Allowed features: 0x%" PRIx64, 1621 features, vdev->host_features); 1622 return -1; 1623 } 1624 } 1625 1626 for (i = 0; i < num; i++) { 1627 if (vdev->vq[i].vring.desc) { 1628 uint16_t nheads; 1629 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 1630 /* Check it isn't doing strange things with descriptor numbers. */ 1631 if (nheads > vdev->vq[i].vring.num) { 1632 error_report("VQ %d size 0x%x Guest index 0x%x " 1633 "inconsistent with Host index 0x%x: delta 0x%x", 1634 i, vdev->vq[i].vring.num, 1635 vring_avail_idx(&vdev->vq[i]), 1636 vdev->vq[i].last_avail_idx, nheads); 1637 return -1; 1638 } 1639 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]); 1640 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]); 1641 } 1642 } 1643 1644 return 0; 1645 } 1646 1647 void virtio_cleanup(VirtIODevice *vdev) 1648 { 1649 qemu_del_vm_change_state_handler(vdev->vmstate); 1650 g_free(vdev->config); 1651 g_free(vdev->vq); 1652 g_free(vdev->vector_queues); 1653 } 1654 1655 static void virtio_vmstate_change(void *opaque, int running, RunState state) 1656 { 1657 VirtIODevice *vdev = opaque; 1658 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1659 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1660 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); 1661 vdev->vm_running = running; 1662 1663 if (backend_run) { 1664 virtio_set_status(vdev, vdev->status); 1665 } 1666 1667 if (k->vmstate_change) { 1668 k->vmstate_change(qbus->parent, backend_run); 1669 } 1670 1671 if (!backend_run) { 1672 virtio_set_status(vdev, vdev->status); 1673 } 1674 } 1675 1676 void virtio_instance_init_common(Object *proxy_obj, void *data, 1677 size_t vdev_size, const char *vdev_name) 1678 { 1679 DeviceState *vdev = data; 1680 1681 object_initialize(vdev, vdev_size, vdev_name); 1682 object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL); 1683 object_unref(OBJECT(vdev)); 1684 qdev_alias_all_properties(vdev, proxy_obj); 1685 } 1686 1687 void virtio_init(VirtIODevice *vdev, const char *name, 1688 uint16_t device_id, size_t config_size) 1689 { 1690 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1691 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1692 int i; 1693 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0; 1694 1695 if (nvectors) { 1696 vdev->vector_queues = 1697 g_malloc0(sizeof(*vdev->vector_queues) * nvectors); 1698 } 1699 1700 vdev->device_id = device_id; 1701 vdev->status = 0; 1702 vdev->isr = 0; 1703 vdev->queue_sel = 0; 1704 vdev->config_vector = VIRTIO_NO_VECTOR; 1705 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX); 1706 vdev->vm_running = runstate_is_running(); 1707 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1708 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 1709 vdev->vq[i].vdev = vdev; 1710 vdev->vq[i].queue_index = i; 1711 } 1712 1713 vdev->name = name; 1714 vdev->config_len = config_size; 1715 if (vdev->config_len) { 1716 vdev->config = g_malloc0(config_size); 1717 } else { 1718 vdev->config = NULL; 1719 } 1720 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, 1721 vdev); 1722 vdev->device_endian = virtio_default_endian(); 1723 vdev->use_guest_notifier_mask = true; 1724 } 1725 1726 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 1727 { 1728 return vdev->vq[n].vring.desc; 1729 } 1730 1731 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 1732 { 1733 return vdev->vq[n].vring.avail; 1734 } 1735 1736 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 1737 { 1738 return vdev->vq[n].vring.used; 1739 } 1740 1741 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) 1742 { 1743 return vdev->vq[n].vring.desc; 1744 } 1745 1746 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 1747 { 1748 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 1749 } 1750 1751 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 1752 { 1753 return offsetof(VRingAvail, ring) + 1754 sizeof(uint16_t) * vdev->vq[n].vring.num; 1755 } 1756 1757 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 1758 { 1759 return offsetof(VRingUsed, ring) + 1760 sizeof(VRingUsedElem) * vdev->vq[n].vring.num; 1761 } 1762 1763 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n) 1764 { 1765 return vdev->vq[n].vring.used - vdev->vq[n].vring.desc + 1766 virtio_queue_get_used_size(vdev, n); 1767 } 1768 1769 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 1770 { 1771 return vdev->vq[n].last_avail_idx; 1772 } 1773 1774 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) 1775 { 1776 vdev->vq[n].last_avail_idx = idx; 1777 vdev->vq[n].shadow_avail_idx = idx; 1778 } 1779 1780 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) 1781 { 1782 vdev->vq[n].signalled_used_valid = false; 1783 } 1784 1785 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 1786 { 1787 return vdev->vq + n; 1788 } 1789 1790 uint16_t virtio_get_queue_index(VirtQueue *vq) 1791 { 1792 return vq->queue_index; 1793 } 1794 1795 static void virtio_queue_guest_notifier_read(EventNotifier *n) 1796 { 1797 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 1798 if (event_notifier_test_and_clear(n)) { 1799 virtio_irq(vq); 1800 } 1801 } 1802 1803 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 1804 bool with_irqfd) 1805 { 1806 if (assign && !with_irqfd) { 1807 event_notifier_set_handler(&vq->guest_notifier, false, 1808 virtio_queue_guest_notifier_read); 1809 } else { 1810 event_notifier_set_handler(&vq->guest_notifier, false, NULL); 1811 } 1812 if (!assign) { 1813 /* Test and clear notifier before closing it, 1814 * in case poll callback didn't have time to run. */ 1815 virtio_queue_guest_notifier_read(&vq->guest_notifier); 1816 } 1817 } 1818 1819 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 1820 { 1821 return &vq->guest_notifier; 1822 } 1823 1824 static void virtio_queue_host_notifier_aio_read(EventNotifier *n) 1825 { 1826 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1827 if (event_notifier_test_and_clear(n)) { 1828 virtio_queue_notify_aio_vq(vq); 1829 } 1830 } 1831 1832 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, 1833 VirtIOHandleOutput handle_output) 1834 { 1835 if (handle_output) { 1836 vq->handle_aio_output = handle_output; 1837 aio_set_event_notifier(ctx, &vq->host_notifier, true, 1838 virtio_queue_host_notifier_aio_read); 1839 } else { 1840 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); 1841 /* Test and clear notifier before after disabling event, 1842 * in case poll callback didn't have time to run. */ 1843 virtio_queue_host_notifier_aio_read(&vq->host_notifier); 1844 vq->handle_aio_output = NULL; 1845 } 1846 } 1847 1848 static void virtio_queue_host_notifier_read(EventNotifier *n) 1849 { 1850 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1851 if (event_notifier_test_and_clear(n)) { 1852 virtio_queue_notify_vq(vq); 1853 } 1854 } 1855 1856 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, 1857 bool set_handler) 1858 { 1859 AioContext *ctx = qemu_get_aio_context(); 1860 if (assign && set_handler) { 1861 if (vq->use_aio) { 1862 aio_set_event_notifier(ctx, &vq->host_notifier, true, 1863 virtio_queue_host_notifier_read); 1864 } else { 1865 event_notifier_set_handler(&vq->host_notifier, true, 1866 virtio_queue_host_notifier_read); 1867 } 1868 } else { 1869 if (vq->use_aio) { 1870 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); 1871 } else { 1872 event_notifier_set_handler(&vq->host_notifier, true, NULL); 1873 } 1874 } 1875 if (!assign) { 1876 /* Test and clear notifier before after disabling event, 1877 * in case poll callback didn't have time to run. */ 1878 virtio_queue_host_notifier_read(&vq->host_notifier); 1879 } 1880 } 1881 1882 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 1883 { 1884 return &vq->host_notifier; 1885 } 1886 1887 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 1888 { 1889 g_free(vdev->bus_name); 1890 vdev->bus_name = g_strdup(bus_name); 1891 } 1892 1893 static void virtio_device_realize(DeviceState *dev, Error **errp) 1894 { 1895 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1896 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1897 Error *err = NULL; 1898 1899 if (vdc->realize != NULL) { 1900 vdc->realize(dev, &err); 1901 if (err != NULL) { 1902 error_propagate(errp, err); 1903 return; 1904 } 1905 } 1906 1907 virtio_bus_device_plugged(vdev, &err); 1908 if (err != NULL) { 1909 error_propagate(errp, err); 1910 return; 1911 } 1912 } 1913 1914 static void virtio_device_unrealize(DeviceState *dev, Error **errp) 1915 { 1916 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1917 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1918 Error *err = NULL; 1919 1920 virtio_bus_device_unplugged(vdev); 1921 1922 if (vdc->unrealize != NULL) { 1923 vdc->unrealize(dev, &err); 1924 if (err != NULL) { 1925 error_propagate(errp, err); 1926 return; 1927 } 1928 } 1929 1930 g_free(vdev->bus_name); 1931 vdev->bus_name = NULL; 1932 } 1933 1934 static Property virtio_properties[] = { 1935 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), 1936 DEFINE_PROP_END_OF_LIST(), 1937 }; 1938 1939 static void virtio_device_class_init(ObjectClass *klass, void *data) 1940 { 1941 /* Set the default value here. */ 1942 DeviceClass *dc = DEVICE_CLASS(klass); 1943 1944 dc->realize = virtio_device_realize; 1945 dc->unrealize = virtio_device_unrealize; 1946 dc->bus_type = TYPE_VIRTIO_BUS; 1947 dc->props = virtio_properties; 1948 } 1949 1950 static const TypeInfo virtio_device_info = { 1951 .name = TYPE_VIRTIO_DEVICE, 1952 .parent = TYPE_DEVICE, 1953 .instance_size = sizeof(VirtIODevice), 1954 .class_init = virtio_device_class_init, 1955 .abstract = true, 1956 .class_size = sizeof(VirtioDeviceClass), 1957 }; 1958 1959 static void virtio_register_types(void) 1960 { 1961 type_register_static(&virtio_device_info); 1962 } 1963 1964 type_init(virtio_register_types) 1965