1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/dev/virtio/virtqueue.c,v 1.2 2012/04/14 05:48:04 grehan Exp $ 27 */ 28 29 /* 30 * Implements the virtqueue interface as basically described 31 * in the original VirtIO paper. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/sglist.h> 39 #include <sys/serialize.h> 40 #include <vm/vm.h> 41 #include <vm/pmap.h> 42 43 #include <machine/cpu.h> 44 #include <machine/atomic.h> 45 #include <sys/bus.h> 46 #include <sys/rman.h> 47 48 #include "virtio.h" 49 #include "virtqueue.h" 50 #include "virtio_ring.h" 51 52 #include "virtio_bus_if.h" 53 54 struct virtqueue { 55 device_t vq_dev; 56 char vq_name[VIRTQUEUE_MAX_NAME_SZ]; 57 uint16_t vq_queue_index; 58 uint16_t vq_nentries; 59 uint32_t vq_flags; 60 61 #define VIRTQUEUE_FLAG_EVENT_IDX 0x0002 62 63 int vq_alignment; 64 int vq_ring_size; 65 void *vq_ring_mem; 66 67 virtqueue_intr_t *vq_intrhand; 68 void *vq_intrhand_arg; 69 70 struct vring vq_ring; 71 uint16_t vq_free_cnt; 72 uint16_t vq_queued_cnt; 73 /* 74 * Head of the free chain in the descriptor table. If 75 * there are no free descriptors, this will be set to 76 * VQ_RING_DESC_CHAIN_END. 77 */ 78 uint16_t vq_desc_head_idx; 79 /* 80 * Last consumed descriptor in the used table, 81 * trails vq_ring.used->idx. 82 */ 83 uint16_t vq_used_cons_idx; 84 85 struct vq_desc_extra { 86 void *cookie; 87 uint16_t ndescs; 88 } vq_descx[0]; 89 }; 90 91 /* 92 * The maximum virtqueue size is 2^15. Use that value as the end of 93 * descriptor chain terminator since it will never be a valid index 94 * in the descriptor table. This is used to verify we are correctly 95 * handling vq_free_cnt. 96 */ 97 #define VQ_RING_DESC_CHAIN_END 32768 98 99 #define VQASSERT(_vq, _exp, _msg, ...) \ 100 KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name, \ 101 ##__VA_ARGS__)) 102 103 #define VQ_RING_ASSERT_VALID_IDX(_vq, _idx) \ 104 VQASSERT((_vq), (_idx) < (_vq)->vq_nentries, \ 105 "invalid ring index: %d, max: %d", (_idx), \ 106 (_vq)->vq_nentries) 107 108 #define VQ_RING_ASSERT_CHAIN_TERM(_vq) \ 109 VQASSERT((_vq), (_vq)->vq_desc_head_idx == \ 110 VQ_RING_DESC_CHAIN_END, "full ring terminated " \ 111 "incorrectly: head idx: %d", (_vq)->vq_desc_head_idx) 112 113 static void vq_ring_init(struct virtqueue *); 114 static void vq_ring_update_avail(struct virtqueue *, uint16_t); 115 static uint16_t vq_ring_enqueue_segments(struct virtqueue *, 116 struct vring_desc *, uint16_t, struct sglist *, int, int); 117 static int vq_ring_must_notify_host(struct virtqueue *); 118 static void vq_ring_notify_host(struct virtqueue *); 119 static void vq_ring_free_chain(struct virtqueue *, uint16_t); 120 121 uint64_t 122 virtqueue_filter_features(uint64_t features) 123 { 124 uint64_t mask; 125 126 mask = (1 << VIRTIO_TRANSPORT_F_START) - 1; 127 mask |= VIRTIO_RING_F_EVENT_IDX; 128 129 return (features & mask); 130 } 131 132 int 133 virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, int align, 134 vm_paddr_t highaddr, struct vq_alloc_info *info, struct virtqueue **vqp) 135 { 136 struct virtqueue *vq; 137 int error; 138 139 *vqp = NULL; 140 error = 0; 141 142 if (size == 0) { 143 device_printf(dev, 144 "virtqueue %d (%s) does not exist (size is zero)\n", 145 queue, info->vqai_name); 146 return (ENODEV); 147 } else if (!powerof2(size)) { 148 device_printf(dev, 149 "virtqueue %d (%s) size is not a power of 2: %d\n", 150 queue, info->vqai_name, size); 151 return (ENXIO); 152 } 153 154 vq = kmalloc(sizeof(struct virtqueue) + 155 size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO); 156 if (vq == NULL) { 157 device_printf(dev, "cannot allocate virtqueue\n"); 158 return (ENOMEM); 159 } 160 161 vq->vq_dev = dev; 162 strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name)); 163 vq->vq_queue_index = queue; 164 vq->vq_alignment = align; 165 vq->vq_nentries = size; 166 vq->vq_free_cnt = size; 167 vq->vq_intrhand = info->vqai_intr; 168 vq->vq_intrhand_arg = info->vqai_intr_arg; 169 170 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0) 171 vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX; 172 173 vq->vq_ring_size = round_page(vring_size(size, align)); 174 vq->vq_ring_mem = contigmalloc(vq->vq_ring_size, M_DEVBUF, 175 M_NOWAIT | M_ZERO, 0, highaddr, PAGE_SIZE, 0); 176 if (vq->vq_ring_mem == NULL) { 177 device_printf(dev, 178 "cannot allocate memory for virtqueue ring\n"); 179 error = ENOMEM; 180 goto fail; 181 } 182 183 vq_ring_init(vq); 184 virtqueue_disable_intr(vq); 185 186 *vqp = vq; 187 188 fail: 189 if (error) 190 virtqueue_free(vq); 191 192 return (error); 193 } 194 195 int 196 virtqueue_reinit(struct virtqueue *vq, uint16_t size) 197 { 198 struct vq_desc_extra *dxp; 199 int i; 200 201 if (vq->vq_nentries != size) { 202 device_printf(vq->vq_dev, 203 "%s: '%s' changed size; old=%hu, new=%hu\n", 204 __func__, vq->vq_name, vq->vq_nentries, size); 205 return (EINVAL); 206 } 207 208 /* Warn if the virtqueue was not properly cleaned up. */ 209 if (vq->vq_free_cnt != vq->vq_nentries) { 210 device_printf(vq->vq_dev, 211 "%s: warning, '%s' virtqueue not empty, " 212 "leaking %d entries\n", __func__, vq->vq_name, 213 vq->vq_nentries - vq->vq_free_cnt); 214 } 215 216 vq->vq_desc_head_idx = 0; 217 vq->vq_used_cons_idx = 0; 218 vq->vq_queued_cnt = 0; 219 vq->vq_free_cnt = vq->vq_nentries; 220 221 /* To be safe, reset all our allocated memory. */ 222 bzero(vq->vq_ring_mem, vq->vq_ring_size); 223 for (i = 0; i < vq->vq_nentries; i++) { 224 dxp = &vq->vq_descx[i]; 225 dxp->cookie = NULL; 226 dxp->ndescs = 0; 227 } 228 229 vq_ring_init(vq); 230 virtqueue_disable_intr(vq); 231 232 return (0); 233 } 234 235 void 236 virtqueue_free(struct virtqueue *vq) 237 { 238 239 if (vq->vq_free_cnt != vq->vq_nentries) { 240 device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, " 241 "leaking %d entries\n", vq->vq_name, 242 vq->vq_nentries - vq->vq_free_cnt); 243 } 244 245 if (vq->vq_ring_mem != NULL) { 246 contigfree(vq->vq_ring_mem, vq->vq_ring_size, M_DEVBUF); 247 vq->vq_ring_size = 0; 248 vq->vq_ring_mem = NULL; 249 } 250 251 kfree(vq, M_DEVBUF); 252 } 253 254 vm_paddr_t 255 virtqueue_paddr(struct virtqueue *vq) 256 { 257 return (vtophys(vq->vq_ring_mem)); 258 } 259 260 int 261 virtqueue_size(struct virtqueue *vq) 262 { 263 return (vq->vq_nentries); 264 } 265 266 int 267 virtqueue_empty(struct virtqueue *vq) 268 { 269 270 return (vq->vq_nentries == vq->vq_free_cnt); 271 } 272 273 int 274 virtqueue_full(struct virtqueue *vq) 275 { 276 277 return (vq->vq_free_cnt == 0); 278 } 279 280 void 281 virtqueue_notify(struct virtqueue *vq, lwkt_serialize_t interlock) 282 { 283 /* Ensure updated avail->idx is visible to host. */ 284 cpu_mfence(); 285 286 if (vq_ring_must_notify_host(vq)) { 287 if (interlock != NULL) 288 lwkt_serialize_exit(interlock); 289 vq_ring_notify_host(vq); 290 if (interlock != NULL) 291 lwkt_serialize_enter(interlock); 292 } 293 vq->vq_queued_cnt = 0; 294 } 295 296 int 297 virtqueue_nused(struct virtqueue *vq) 298 { 299 uint16_t used_idx, nused; 300 301 used_idx = vq->vq_ring.used->idx; 302 nused = (uint16_t)(used_idx - vq->vq_used_cons_idx); 303 VQASSERT(vq, nused <= vq->vq_nentries, "used more than available"); 304 305 return (nused); 306 } 307 308 int 309 virtqueue_intr(struct virtqueue *vq) 310 { 311 312 if (vq->vq_intrhand == NULL || 313 vq->vq_used_cons_idx == vq->vq_ring.used->idx) 314 return (0); 315 316 vq->vq_intrhand(vq->vq_intrhand_arg); 317 318 return (1); 319 } 320 321 /* 322 * Enable interrupts on a given virtqueue. Returns 1 if there are 323 * additional entries to process on the virtqueue after we return. 324 */ 325 int 326 virtqueue_enable_intr(struct virtqueue *vq) 327 { 328 /* 329 * Enable interrupts, making sure we get the latest 330 * index of what's already been consumed. 331 */ 332 vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 333 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 334 vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx; 335 } else { 336 vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 337 } 338 339 cpu_mfence(); 340 341 /* 342 * Additional items may have been consumed in the time between 343 * since we last checked and enabled interrupts above. Let our 344 * caller know so it processes the new entries. 345 */ 346 if (vq->vq_used_cons_idx != vq->vq_ring.used->idx) 347 return (1); 348 349 return (0); 350 } 351 352 int 353 virtqueue_postpone_intr(struct virtqueue *vq) 354 { 355 uint16_t ndesc; 356 357 /* 358 * Postpone until at least half of the available descriptors 359 * have been consumed. 360 * 361 * XXX Adaptive factor? (Linux uses 3/4) 362 */ 363 ndesc = (uint16_t)(vq->vq_ring.avail->idx - vq->vq_used_cons_idx) / 2; 364 365 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) 366 vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc; 367 else 368 vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 369 370 cpu_mfence(); 371 372 /* 373 * Enough items may have already been consumed to meet our 374 * threshold since we last checked. Let our caller know so 375 * it processes the new entries. 376 */ 377 if (virtqueue_nused(vq) > ndesc) 378 return (1); 379 380 return (0); 381 } 382 383 void 384 virtqueue_disable_intr(struct virtqueue *vq) 385 { 386 /* 387 * Note this is only considered a hint to the host. 388 */ 389 if ((vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) == 0) 390 vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 391 } 392 393 int 394 virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg, 395 int readable, int writable) 396 { 397 struct vq_desc_extra *dxp; 398 int needed; 399 uint16_t head_idx, idx; 400 401 needed = readable + writable; 402 403 VQASSERT(vq, cookie != NULL, "enqueuing with no cookie"); 404 VQASSERT(vq, needed == sg->sg_nseg, 405 "segment count mismatch, %d, %d", needed, sg->sg_nseg); 406 407 if (needed < 1) 408 return (EINVAL); 409 if (vq->vq_free_cnt == 0) 410 return (ENOSPC); 411 if (vq->vq_free_cnt < needed) 412 return (EMSGSIZE); 413 414 head_idx = vq->vq_desc_head_idx; 415 VQ_RING_ASSERT_VALID_IDX(vq, head_idx); 416 dxp = &vq->vq_descx[head_idx]; 417 418 VQASSERT(vq, dxp->cookie == NULL, 419 "cookie already exists for index %d", head_idx); 420 dxp->cookie = cookie; 421 dxp->ndescs = needed; 422 423 idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx, 424 sg, readable, writable); 425 426 vq->vq_desc_head_idx = idx; 427 vq->vq_free_cnt -= needed; 428 if (vq->vq_free_cnt == 0) 429 VQ_RING_ASSERT_CHAIN_TERM(vq); 430 else 431 VQ_RING_ASSERT_VALID_IDX(vq, idx); 432 433 vq_ring_update_avail(vq, head_idx); 434 435 return (0); 436 } 437 438 void * 439 virtqueue_dequeue(struct virtqueue *vq, uint32_t *len) 440 { 441 struct vring_used_elem *uep; 442 void *cookie; 443 uint16_t used_idx, desc_idx; 444 445 if (vq->vq_used_cons_idx == vq->vq_ring.used->idx) 446 return (NULL); 447 448 used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1); 449 uep = &vq->vq_ring.used->ring[used_idx]; 450 451 cpu_mfence(); 452 desc_idx = (uint16_t) uep->id; 453 if (len != NULL) 454 *len = uep->len; 455 456 vq_ring_free_chain(vq, desc_idx); 457 458 cookie = vq->vq_descx[desc_idx].cookie; 459 VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx); 460 vq->vq_descx[desc_idx].cookie = NULL; 461 462 return (cookie); 463 } 464 465 void * 466 virtqueue_poll(struct virtqueue *vq, uint32_t *len) 467 { 468 void *cookie; 469 470 /* We only poll the virtqueue when dumping to virtio-blk */ 471 while ((cookie = virtqueue_dequeue(vq, len)) == NULL) 472 ; 473 474 return (cookie); 475 } 476 477 void * 478 virtqueue_drain(struct virtqueue *vq, int *last) 479 { 480 void *cookie; 481 int idx; 482 483 cookie = NULL; 484 idx = *last; 485 486 while (idx < vq->vq_nentries && cookie == NULL) { 487 if ((cookie = vq->vq_descx[idx].cookie) != NULL) { 488 vq->vq_descx[idx].cookie = NULL; 489 /* Free chain to keep free count consistent. */ 490 vq_ring_free_chain(vq, idx); 491 } 492 idx++; 493 } 494 495 *last = idx; 496 497 return (cookie); 498 } 499 500 void 501 virtqueue_dump(struct virtqueue *vq) 502 { 503 504 if (vq == NULL) 505 return; 506 507 kprintf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; " 508 "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; " 509 "used.idx=%d; avail.flags=0x%x; used.flags=0x%x\n", 510 vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, 511 virtqueue_nused(vq), vq->vq_queued_cnt, vq->vq_desc_head_idx, 512 vq->vq_ring.avail->idx, vq->vq_used_cons_idx, 513 vq->vq_ring.used->idx, vq->vq_ring.avail->flags, 514 vq->vq_ring.used->flags); 515 } 516 517 static void 518 vq_ring_init(struct virtqueue *vq) 519 { 520 struct vring *vr; 521 char *ring_mem; 522 int i, size; 523 524 ring_mem = vq->vq_ring_mem; 525 size = vq->vq_nentries; 526 vr = &vq->vq_ring; 527 528 vring_init(vr, size, ring_mem, vq->vq_alignment); 529 530 for (i = 0; i < size - 1; i++) 531 vr->desc[i].next = i + 1; 532 vr->desc[i].next = VQ_RING_DESC_CHAIN_END; 533 } 534 535 static void 536 vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) 537 { 538 uint16_t avail_idx; 539 540 /* 541 * Place the head of the descriptor chain into the next slot and make 542 * it usable to the host. The chain is made available now rather than 543 * deferring to virtqueue_notify() in the hopes that if the host is 544 * currently running on another CPU, we can keep it processing the new 545 * descriptor. 546 */ 547 avail_idx = vq->vq_ring.avail->idx & (vq->vq_nentries - 1); 548 vq->vq_ring.avail->ring[avail_idx] = desc_idx; 549 550 cpu_mfence(); 551 vq->vq_ring.avail->idx++; 552 553 /* Keep pending count until virtqueue_notify() for debugging. */ 554 vq->vq_queued_cnt++; 555 } 556 557 static uint16_t 558 vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc, 559 uint16_t head_idx, struct sglist *sg, int readable, int writable) 560 { 561 struct sglist_seg *seg; 562 struct vring_desc *dp; 563 int i, needed; 564 uint16_t idx; 565 566 needed = readable + writable; 567 568 for (i = 0, idx = head_idx, seg = sg->sg_segs; 569 i < needed; 570 i++, idx = dp->next, seg++) { 571 VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END, 572 "premature end of free desc chain"); 573 574 dp = &desc[idx]; 575 dp->addr = seg->ss_paddr; 576 dp->len = seg->ss_len; 577 dp->flags = 0; 578 579 if (i < needed - 1) 580 dp->flags |= VRING_DESC_F_NEXT; 581 if (i >= readable) 582 dp->flags |= VRING_DESC_F_WRITE; 583 } 584 585 return (idx); 586 } 587 588 static int 589 vq_ring_must_notify_host(struct virtqueue *vq) 590 { 591 uint16_t new_idx, prev_idx, event_idx; 592 593 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 594 new_idx = vq->vq_ring.avail->idx; 595 prev_idx = new_idx - vq->vq_queued_cnt; 596 event_idx = vring_avail_event(&vq->vq_ring); 597 598 return (vring_need_event(event_idx, new_idx, prev_idx) != 0); 599 } 600 601 return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0); 602 } 603 604 static void 605 vq_ring_notify_host(struct virtqueue *vq) 606 { 607 VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index); 608 } 609 610 static void 611 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 612 { 613 struct vring_desc *dp; 614 struct vq_desc_extra *dxp; 615 616 VQ_RING_ASSERT_VALID_IDX(vq, desc_idx); 617 dp = &vq->vq_ring.desc[desc_idx]; 618 dxp = &vq->vq_descx[desc_idx]; 619 620 if (vq->vq_free_cnt == 0) 621 VQ_RING_ASSERT_CHAIN_TERM(vq); 622 623 vq->vq_free_cnt += dxp->ndescs; 624 dxp->ndescs--; 625 626 while (dp->flags & VRING_DESC_F_NEXT) { 627 VQ_RING_ASSERT_VALID_IDX(vq, dp->next); 628 dp = &vq->vq_ring.desc[dp->next]; 629 dxp->ndescs--; 630 } 631 VQASSERT(vq, dxp->ndescs == 0, "failed to free entire desc chain"); 632 633 /* 634 * We must append the existing free chain, if any, to the end of 635 * newly freed chain. If the virtqueue was completely used, then 636 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 637 */ 638 dp->next = vq->vq_desc_head_idx; 639 vq->vq_desc_head_idx = desc_idx; 640 } 641