1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/dev/virtio/virtqueue.c,v 1.2 2012/04/14 05:48:04 grehan Exp $ 27 */ 28 29 /* 30 * Implements the virtqueue interface as basically described 31 * in the original VirtIO paper. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/sglist.h> 39 #include <sys/serialize.h> 40 #include <vm/vm.h> 41 #include <vm/pmap.h> 42 43 #include <machine/cpu.h> 44 #include <machine/atomic.h> 45 #include <sys/bus.h> 46 #include <sys/rman.h> 47 48 #include "virtio.h" 49 #include "virtqueue.h" 50 #include "virtio_ring.h" 51 52 #include "virtio_bus_if.h" 53 54 struct virtqueue { 55 device_t vq_dev; 56 char vq_name[VIRTQUEUE_MAX_NAME_SZ]; 57 uint16_t vq_queue_index; 58 uint16_t vq_nentries; 59 uint32_t vq_flags; 60 61 #define VIRTQUEUE_FLAG_EVENT_IDX 0x0002 62 63 int vq_alignment; 64 int vq_ring_size; 65 void *vq_ring_mem; 66 67 virtqueue_intr_t *vq_intrhand; 68 void *vq_intrhand_arg; 69 70 struct vring vq_ring; 71 uint16_t vq_free_cnt; 72 uint16_t vq_queued_cnt; 73 /* 74 * Head of the free chain in the descriptor table. If 75 * there are no free descriptors, this will be set to 76 * VQ_RING_DESC_CHAIN_END. 77 */ 78 uint16_t vq_desc_head_idx; 79 /* 80 * Last consumed descriptor in the used table, 81 * trails vq_ring.used->idx. 82 */ 83 uint16_t vq_used_cons_idx; 84 85 struct vq_desc_extra { 86 void *cookie; 87 uint16_t ndescs; 88 } vq_descx[0]; 89 }; 90 91 /* 92 * The maximum virtqueue size is 2^15. Use that value as the end of 93 * descriptor chain terminator since it will never be a valid index 94 * in the descriptor table. This is used to verify we are correctly 95 * handling vq_free_cnt. 96 */ 97 #define VQ_RING_DESC_CHAIN_END 32768 98 99 #define VQASSERT(_vq, _exp, _msg, ...) \ 100 KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name, \ 101 ##__VA_ARGS__)) 102 103 #define VQ_RING_ASSERT_VALID_IDX(_vq, _idx) \ 104 VQASSERT((_vq), (_idx) < (_vq)->vq_nentries, \ 105 "invalid ring index: %d, max: %d", (_idx), \ 106 (_vq)->vq_nentries) 107 108 #define VQ_RING_ASSERT_CHAIN_TERM(_vq) \ 109 VQASSERT((_vq), (_vq)->vq_desc_head_idx == \ 110 VQ_RING_DESC_CHAIN_END, "full ring terminated " \ 111 "incorrectly: head idx: %d", (_vq)->vq_desc_head_idx) 112 113 static void vq_ring_init(struct virtqueue *); 114 static void vq_ring_update_avail(struct virtqueue *, uint16_t); 115 static uint16_t vq_ring_enqueue_segments(struct virtqueue *, 116 struct vring_desc *, uint16_t, struct sglist *, int, int); 117 static int vq_ring_must_notify_host(struct virtqueue *); 118 static void vq_ring_notify_host(struct virtqueue *); 119 static void vq_ring_free_chain(struct virtqueue *, uint16_t); 120 121 uint64_t 122 virtqueue_filter_features(uint64_t features) 123 { 124 uint64_t mask; 125 126 mask = (1 << VIRTIO_TRANSPORT_F_START) - 1; 127 mask |= VIRTIO_RING_F_EVENT_IDX; 128 129 return (features & mask); 130 } 131 132 int 133 virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, int align, 134 vm_paddr_t highaddr, struct vq_alloc_info *info, struct virtqueue **vqp) 135 { 136 struct virtqueue *vq; 137 int error; 138 139 *vqp = NULL; 140 error = 0; 141 142 if (size == 0) { 143 device_printf(dev, 144 "virtqueue %d (%s) does not exist (size is zero)\n", 145 queue, info->vqai_name); 146 return (ENODEV); 147 } else if (!powerof2(size)) { 148 device_printf(dev, 149 "virtqueue %d (%s) size is not a power of 2: %d\n", 150 queue, info->vqai_name, size); 151 return (ENXIO); 152 } 153 154 vq = kmalloc(sizeof(struct virtqueue) + 155 size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO); 156 if (vq == NULL) { 157 device_printf(dev, "cannot allocate virtqueue\n"); 158 return (ENOMEM); 159 } 160 161 vq->vq_dev = dev; 162 strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name)); 163 vq->vq_queue_index = queue; 164 vq->vq_alignment = align; 165 vq->vq_nentries = size; 166 vq->vq_free_cnt = size; 167 vq->vq_intrhand = info->vqai_intr; 168 vq->vq_intrhand_arg = info->vqai_intr_arg; 169 170 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0) 171 vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX; 172 173 vq->vq_ring_size = round_page(vring_size(size, align)); 174 vq->vq_ring_mem = contigmalloc(vq->vq_ring_size, M_DEVBUF, 175 M_NOWAIT | M_ZERO, 0, highaddr, PAGE_SIZE, 0); 176 if (vq->vq_ring_mem == NULL) { 177 device_printf(dev, 178 "cannot allocate memory for virtqueue ring\n"); 179 error = ENOMEM; 180 goto fail; 181 } 182 183 vq_ring_init(vq); 184 virtqueue_disable_intr(vq); 185 186 *vqp = vq; 187 188 fail: 189 if (error) 190 virtqueue_free(vq); 191 192 return (error); 193 } 194 195 int 196 virtqueue_reinit(struct virtqueue *vq, uint16_t size) 197 { 198 struct vq_desc_extra *dxp; 199 int i; 200 201 if (vq->vq_nentries != size) { 202 device_printf(vq->vq_dev, 203 "%s: '%s' changed size; old=%hu, new=%hu\n", 204 __func__, vq->vq_name, vq->vq_nentries, size); 205 return (EINVAL); 206 } 207 208 /* Warn if the virtqueue was not properly cleaned up. */ 209 if (vq->vq_free_cnt != vq->vq_nentries) { 210 device_printf(vq->vq_dev, 211 "%s: warning, '%s' virtqueue not empty, " 212 "leaking %d entries\n", __func__, vq->vq_name, 213 vq->vq_nentries - vq->vq_free_cnt); 214 } 215 216 vq->vq_desc_head_idx = 0; 217 vq->vq_used_cons_idx = 0; 218 vq->vq_queued_cnt = 0; 219 vq->vq_free_cnt = vq->vq_nentries; 220 221 /* To be safe, reset all our allocated memory. */ 222 bzero(vq->vq_ring_mem, vq->vq_ring_size); 223 for (i = 0; i < vq->vq_nentries; i++) { 224 dxp = &vq->vq_descx[i]; 225 dxp->cookie = NULL; 226 dxp->ndescs = 0; 227 } 228 229 vq_ring_init(vq); 230 virtqueue_disable_intr(vq); 231 232 return (0); 233 } 234 235 void 236 virtqueue_free(struct virtqueue *vq) 237 { 238 239 if (vq->vq_free_cnt != vq->vq_nentries) { 240 device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, " 241 "leaking %d entries\n", vq->vq_name, 242 vq->vq_nentries - vq->vq_free_cnt); 243 } 244 245 if (vq->vq_ring_mem != NULL) { 246 contigfree(vq->vq_ring_mem, vq->vq_ring_size, M_DEVBUF); 247 vq->vq_ring_size = 0; 248 vq->vq_ring_mem = NULL; 249 } 250 251 kfree(vq, M_DEVBUF); 252 } 253 254 vm_paddr_t 255 virtqueue_paddr(struct virtqueue *vq) 256 { 257 return (vtophys(vq->vq_ring_mem)); 258 } 259 260 int 261 virtqueue_size(struct virtqueue *vq) 262 { 263 return (vq->vq_nentries); 264 } 265 266 int 267 virtqueue_empty(struct virtqueue *vq) 268 { 269 270 return (vq->vq_nentries == vq->vq_free_cnt); 271 } 272 273 int 274 virtqueue_full(struct virtqueue *vq) 275 { 276 277 return (vq->vq_free_cnt == 0); 278 } 279 280 void 281 virtqueue_notify(struct virtqueue *vq, lwkt_serialize_t interlock) 282 { 283 /* Ensure updated avail->idx is visible to host. */ 284 cpu_mfence(); 285 286 if (vq_ring_must_notify_host(vq)) { 287 lwkt_serialize_exit(interlock); 288 vq_ring_notify_host(vq); 289 lwkt_serialize_enter(interlock); 290 } 291 vq->vq_queued_cnt = 0; 292 } 293 294 int 295 virtqueue_nused(struct virtqueue *vq) 296 { 297 uint16_t used_idx, nused; 298 299 used_idx = vq->vq_ring.used->idx; 300 nused = (uint16_t)(used_idx - vq->vq_used_cons_idx); 301 VQASSERT(vq, nused <= vq->vq_nentries, "used more than available"); 302 303 return (nused); 304 } 305 306 int 307 virtqueue_intr(struct virtqueue *vq) 308 { 309 310 if (vq->vq_intrhand == NULL || 311 vq->vq_used_cons_idx == vq->vq_ring.used->idx) 312 return (0); 313 314 vq->vq_intrhand(vq->vq_intrhand_arg); 315 316 return (1); 317 } 318 319 /* 320 * Enable interrupts on a given virtqueue. Returns 1 if there are 321 * additional entries to process on the virtqueue after we return. 322 */ 323 int 324 virtqueue_enable_intr(struct virtqueue *vq) 325 { 326 /* 327 * Enable interrupts, making sure we get the latest 328 * index of what's already been consumed. 329 */ 330 vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 331 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 332 vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx; 333 } else { 334 vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 335 } 336 337 cpu_mfence(); 338 339 /* 340 * Additional items may have been consumed in the time between 341 * since we last checked and enabled interrupts above. Let our 342 * caller know so it processes the new entries. 343 */ 344 if (vq->vq_used_cons_idx != vq->vq_ring.used->idx) 345 return (1); 346 347 return (0); 348 } 349 350 int 351 virtqueue_postpone_intr(struct virtqueue *vq) 352 { 353 uint16_t ndesc; 354 355 /* 356 * Postpone until at least half of the available descriptors 357 * have been consumed. 358 * 359 * XXX Adaptive factor? (Linux uses 3/4) 360 */ 361 ndesc = (uint16_t)(vq->vq_ring.avail->idx - vq->vq_used_cons_idx) / 2; 362 363 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) 364 vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc; 365 else 366 vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 367 368 cpu_mfence(); 369 370 /* 371 * Enough items may have already been consumed to meet our 372 * threshold since we last checked. Let our caller know so 373 * it processes the new entries. 374 */ 375 if (virtqueue_nused(vq) > ndesc) 376 return (1); 377 378 return (0); 379 } 380 381 void 382 virtqueue_disable_intr(struct virtqueue *vq) 383 { 384 /* 385 * Note this is only considered a hint to the host. 386 */ 387 if ((vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) == 0) 388 vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 389 } 390 391 int 392 virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg, 393 int readable, int writable) 394 { 395 struct vq_desc_extra *dxp; 396 int needed; 397 uint16_t head_idx, idx; 398 399 needed = readable + writable; 400 401 VQASSERT(vq, cookie != NULL, "enqueuing with no cookie"); 402 VQASSERT(vq, needed == sg->sg_nseg, 403 "segment count mismatch, %d, %d", needed, sg->sg_nseg); 404 405 if (needed < 1) 406 return (EINVAL); 407 if (vq->vq_free_cnt == 0) 408 return (ENOSPC); 409 if (vq->vq_free_cnt < needed) 410 return (EMSGSIZE); 411 412 head_idx = vq->vq_desc_head_idx; 413 VQ_RING_ASSERT_VALID_IDX(vq, head_idx); 414 dxp = &vq->vq_descx[head_idx]; 415 416 VQASSERT(vq, dxp->cookie == NULL, 417 "cookie already exists for index %d", head_idx); 418 dxp->cookie = cookie; 419 dxp->ndescs = needed; 420 421 idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx, 422 sg, readable, writable); 423 424 vq->vq_desc_head_idx = idx; 425 vq->vq_free_cnt -= needed; 426 if (vq->vq_free_cnt == 0) 427 VQ_RING_ASSERT_CHAIN_TERM(vq); 428 else 429 VQ_RING_ASSERT_VALID_IDX(vq, idx); 430 431 vq_ring_update_avail(vq, head_idx); 432 433 return (0); 434 } 435 436 void * 437 virtqueue_dequeue(struct virtqueue *vq, uint32_t *len) 438 { 439 struct vring_used_elem *uep; 440 void *cookie; 441 uint16_t used_idx, desc_idx; 442 443 if (vq->vq_used_cons_idx == vq->vq_ring.used->idx) 444 return (NULL); 445 446 used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1); 447 uep = &vq->vq_ring.used->ring[used_idx]; 448 449 cpu_mfence(); 450 desc_idx = (uint16_t) uep->id; 451 if (len != NULL) 452 *len = uep->len; 453 454 vq_ring_free_chain(vq, desc_idx); 455 456 cookie = vq->vq_descx[desc_idx].cookie; 457 VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx); 458 vq->vq_descx[desc_idx].cookie = NULL; 459 460 return (cookie); 461 } 462 463 void * 464 virtqueue_poll(struct virtqueue *vq, uint32_t *len) 465 { 466 void *cookie; 467 468 /* We only poll the virtqueue when dumping to virtio-blk */ 469 while ((cookie = virtqueue_dequeue(vq, len)) == NULL) 470 ; 471 472 return (cookie); 473 } 474 475 void * 476 virtqueue_drain(struct virtqueue *vq, int *last) 477 { 478 void *cookie; 479 int idx; 480 481 cookie = NULL; 482 idx = *last; 483 484 while (idx < vq->vq_nentries && cookie == NULL) { 485 if ((cookie = vq->vq_descx[idx].cookie) != NULL) { 486 vq->vq_descx[idx].cookie = NULL; 487 /* Free chain to keep free count consistent. */ 488 vq_ring_free_chain(vq, idx); 489 } 490 idx++; 491 } 492 493 *last = idx; 494 495 return (cookie); 496 } 497 498 void 499 virtqueue_dump(struct virtqueue *vq) 500 { 501 502 if (vq == NULL) 503 return; 504 505 kprintf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; " 506 "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; " 507 "used.idx=%d; avail.flags=0x%x; used.flags=0x%x\n", 508 vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, 509 virtqueue_nused(vq), vq->vq_queued_cnt, vq->vq_desc_head_idx, 510 vq->vq_ring.avail->idx, vq->vq_used_cons_idx, 511 vq->vq_ring.used->idx, vq->vq_ring.avail->flags, 512 vq->vq_ring.used->flags); 513 } 514 515 static void 516 vq_ring_init(struct virtqueue *vq) 517 { 518 struct vring *vr; 519 char *ring_mem; 520 int i, size; 521 522 ring_mem = vq->vq_ring_mem; 523 size = vq->vq_nentries; 524 vr = &vq->vq_ring; 525 526 vring_init(vr, size, ring_mem, vq->vq_alignment); 527 528 for (i = 0; i < size - 1; i++) 529 vr->desc[i].next = i + 1; 530 vr->desc[i].next = VQ_RING_DESC_CHAIN_END; 531 } 532 533 static void 534 vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) 535 { 536 uint16_t avail_idx; 537 538 /* 539 * Place the head of the descriptor chain into the next slot and make 540 * it usable to the host. The chain is made available now rather than 541 * deferring to virtqueue_notify() in the hopes that if the host is 542 * currently running on another CPU, we can keep it processing the new 543 * descriptor. 544 */ 545 avail_idx = vq->vq_ring.avail->idx & (vq->vq_nentries - 1); 546 vq->vq_ring.avail->ring[avail_idx] = desc_idx; 547 548 cpu_mfence(); 549 vq->vq_ring.avail->idx++; 550 551 /* Keep pending count until virtqueue_notify() for debugging. */ 552 vq->vq_queued_cnt++; 553 } 554 555 static uint16_t 556 vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc, 557 uint16_t head_idx, struct sglist *sg, int readable, int writable) 558 { 559 struct sglist_seg *seg; 560 struct vring_desc *dp; 561 int i, needed; 562 uint16_t idx; 563 564 needed = readable + writable; 565 566 for (i = 0, idx = head_idx, seg = sg->sg_segs; 567 i < needed; 568 i++, idx = dp->next, seg++) { 569 VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END, 570 "premature end of free desc chain"); 571 572 dp = &desc[idx]; 573 dp->addr = seg->ss_paddr; 574 dp->len = seg->ss_len; 575 dp->flags = 0; 576 577 if (i < needed - 1) 578 dp->flags |= VRING_DESC_F_NEXT; 579 if (i >= readable) 580 dp->flags |= VRING_DESC_F_WRITE; 581 } 582 583 return (idx); 584 } 585 586 static int 587 vq_ring_must_notify_host(struct virtqueue *vq) 588 { 589 uint16_t new_idx, prev_idx, event_idx; 590 591 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 592 new_idx = vq->vq_ring.avail->idx; 593 prev_idx = new_idx - vq->vq_queued_cnt; 594 event_idx = vring_avail_event(&vq->vq_ring); 595 596 return (vring_need_event(event_idx, new_idx, prev_idx) != 0); 597 } 598 599 return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0); 600 } 601 602 static void 603 vq_ring_notify_host(struct virtqueue *vq) 604 { 605 VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index); 606 } 607 608 static void 609 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 610 { 611 struct vring_desc *dp; 612 struct vq_desc_extra *dxp; 613 614 VQ_RING_ASSERT_VALID_IDX(vq, desc_idx); 615 dp = &vq->vq_ring.desc[desc_idx]; 616 dxp = &vq->vq_descx[desc_idx]; 617 618 if (vq->vq_free_cnt == 0) 619 VQ_RING_ASSERT_CHAIN_TERM(vq); 620 621 vq->vq_free_cnt += dxp->ndescs; 622 dxp->ndescs--; 623 624 while (dp->flags & VRING_DESC_F_NEXT) { 625 VQ_RING_ASSERT_VALID_IDX(vq, dp->next); 626 dp = &vq->vq_ring.desc[dp->next]; 627 dxp->ndescs--; 628 } 629 VQASSERT(vq, dxp->ndescs == 0, "failed to free entire desc chain"); 630 631 /* 632 * We must append the existing free chain, if any, to the end of 633 * newly freed chain. If the virtqueue was completely used, then 634 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 635 */ 636 dp->next = vq->vq_desc_head_idx; 637 vq->vq_desc_head_idx = desc_idx; 638 } 639