1 /* 2 * Virtio ring manipulation routines 3 * 4 * Copyright 2017 Red Hat, Inc. 5 * 6 * Authors: 7 * Ladi Prosek <lprosek@redhat.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met : 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and / or other materials provided with the distribution. 17 * 3. Neither the names of the copyright holders nor the names of their contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 #include "osdep.h" 33 #include "virtio_pci.h" 34 #include "VirtIO.h" 35 #include "kdebugprint.h" 36 #include "virtio_ring.h" 37 #include "windows/virtio_ring_allocation.h" 38 39 #define DESC_INDEX(num, i) ((i) & ((num) - 1)) 40 41 /* This marks a buffer as continuing via the next field. */ 42 #define VIRTQ_DESC_F_NEXT 1 43 /* This marks a buffer as write-only (otherwise read-only). */ 44 #define VIRTQ_DESC_F_WRITE 2 45 /* This means the buffer contains a list of buffer descriptors. */ 46 #define VIRTQ_DESC_F_INDIRECT 4 47 48 /* The Host uses this in used->flags to advise the Guest: don't kick me when 49 * you add a buffer. It's unreliable, so it's simply an optimization. Guest 50 * will still kick if it's out of buffers. */ 51 #define VIRTQ_USED_F_NO_NOTIFY 1 52 /* The Guest uses this in avail->flags to advise the Host: don't interrupt me 53 * when you consume a buffer. It's unreliable, so it's simply an 54 * optimization. */ 55 #define VIRTQ_AVAIL_F_NO_INTERRUPT 1 56 57 #pragma warning (push) 58 #pragma warning (disable:4200) 59 60 #include <pshpack1.h> 61 62 /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ 63 struct vring_desc { 64 /* Address (guest-physical). */ 65 __virtio64 addr; 66 /* Length. */ 67 __virtio32 len; 68 /* The flags as indicated above. */ 69 __virtio16 flags; 70 /* We chain unused descriptors via this, too */ 71 __virtio16 next; 72 }; 73 74 struct vring_avail { 75 __virtio16 flags; 76 __virtio16 idx; 77 __virtio16 ring[]; 78 }; 79 80 /* u32 is used here for ids for padding reasons. */ 81 struct vring_used_elem { 82 /* Index of start of used descriptor chain. */ 83 __virtio32 id; 84 /* Total length of the descriptor chain which was used (written to) */ 85 __virtio32 len; 86 }; 87 88 struct vring_used { 89 __virtio16 flags; 90 __virtio16 idx; 91 struct vring_used_elem ring[]; 92 }; 93 94 #include <poppack.h> 95 96 /* Alignment requirements for vring elements. 97 * When using pre-virtio 1.0 layout, these fall out naturally. 98 */ 99 #define VRING_AVAIL_ALIGN_SIZE 2 100 #define VRING_USED_ALIGN_SIZE 4 101 #define VRING_DESC_ALIGN_SIZE 16 102 103 /* The standard layout for the ring is a continuous chunk of memory which looks 104 * like this. We assume num is a power of 2. 105 * 106 * struct vring 107 * { 108 * // The actual descriptors (16 bytes each) 109 * struct vring_desc desc[num]; 110 * 111 * // A ring of available descriptor heads with free-running index. 112 * __virtio16 avail_flags; 113 * __virtio16 avail_idx; 114 * __virtio16 available[num]; 115 * __virtio16 used_event_idx; 116 * 117 * // Padding to the next align boundary. 118 * char pad[]; 119 * 120 * // A ring of used descriptor heads with free-running index. 121 * __virtio16 used_flags; 122 * __virtio16 used_idx; 123 * struct vring_used_elem used[num]; 124 * __virtio16 avail_event_idx; 125 * }; 126 */ 127 /* We publish the used event index at the end of the available ring, and vice 128 * versa. They are at the end for backwards compatibility. */ 129 130 struct vring { 131 unsigned int num; 132 133 struct vring_desc *desc; 134 135 struct vring_avail *avail; 136 137 struct vring_used *used; 138 }; 139 140 #define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) 141 #define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num]) 142 143 static inline void vring_init(struct vring *vr, unsigned int num, void *p, 144 unsigned long align) 145 { 146 vr->num = num; 147 vr->desc = (struct vring_desc *)p; 148 vr->avail = (struct vring_avail *)((__u8 *)p + num * sizeof(struct vring_desc)); 149 vr->used = (struct vring_used *)(((ULONG_PTR)&vr->avail->ring[num] + sizeof(__virtio16) 150 + align - 1) & ~((ULONG_PTR)align - 1)); 151 } 152 153 static inline unsigned vring_size_split(unsigned int num, unsigned long align) 154 { 155 #pragma warning (push) 156 #pragma warning (disable:4319) 157 return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num) 158 + align - 1) & ~(align - 1)) 159 + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; 160 #pragma warning(pop) 161 } 162 163 /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ 164 /* Assuming a given event_idx value from the other side, if 165 * we have just incremented index from old to new_idx, 166 * should we trigger an event? */ 167 static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) 168 { 169 /* Note: Xen has similar logic for notification hold-off 170 * in include/xen/interface/io/ring.h with req_event and req_prod 171 * corresponding to event_idx + 1 and new_idx respectively. 172 * Note also that req_event and req_prod in Xen start at 1, 173 * event indexes in virtio start at 0. */ 174 return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); 175 } 176 177 struct virtqueue_split { 178 struct virtqueue vq; 179 struct vring vring; 180 struct { 181 u16 flags; 182 u16 idx; 183 } master_vring_avail; 184 unsigned int num_unused; 185 unsigned int num_added_since_kick; 186 u16 first_unused; 187 u16 last_used; 188 void *opaque[]; 189 }; 190 191 #define splitvq(vq) ((struct virtqueue_split *)vq) 192 193 #pragma warning (pop) 194 195 /* Returns the index of the first unused descriptor */ 196 static inline u16 get_unused_desc(struct virtqueue_split *vq) 197 { 198 u16 idx = vq->first_unused; 199 ASSERT(vq->num_unused > 0); 200 201 vq->first_unused = vq->vring.desc[idx].next; 202 vq->num_unused--; 203 return idx; 204 } 205 206 /* Marks the descriptor chain starting at index idx as unused */ 207 static inline void put_unused_desc_chain(struct virtqueue_split *vq, u16 idx) 208 { 209 u16 start = idx; 210 211 vq->opaque[idx] = NULL; 212 while (vq->vring.desc[idx].flags & VIRTQ_DESC_F_NEXT) { 213 idx = vq->vring.desc[idx].next; 214 vq->num_unused++; 215 } 216 217 vq->vring.desc[idx].flags = VIRTQ_DESC_F_NEXT; 218 vq->vring.desc[idx].next = vq->first_unused; 219 vq->num_unused++; 220 221 vq->first_unused = start; 222 } 223 224 /* Adds a buffer to a virtqueue, returns 0 on success, negative number on error */ 225 static int virtqueue_add_buf_split( 226 struct virtqueue *_vq, /* the queue */ 227 struct scatterlist sg[], /* sg array of length out + in */ 228 unsigned int out, /* number of driver->device buffer descriptors in sg */ 229 unsigned int in, /* number of device->driver buffer descriptors in sg */ 230 void *opaque, /* later returned from virtqueue_get_buf */ 231 void *va_indirect, /* VA of the indirect page or NULL */ 232 ULONGLONG phys_indirect) /* PA of the indirect page or 0 */ 233 { 234 struct virtqueue_split *vq = splitvq(_vq); 235 struct vring *vring = &vq->vring; 236 unsigned int i; 237 u16 idx; 238 239 if (va_indirect && (out + in) > 1 && vq->num_unused > 0) { 240 /* Use one indirect descriptor */ 241 struct vring_desc *desc = (struct vring_desc *)va_indirect; 242 243 for (i = 0; i < out + in; i++) { 244 desc[i].flags = (i < out ? 0 : VIRTQ_DESC_F_WRITE); 245 desc[i].flags |= VIRTQ_DESC_F_NEXT; 246 desc[i].addr = sg[i].physAddr.QuadPart; 247 desc[i].len = sg[i].length; 248 desc[i].next = (u16)i + 1; 249 } 250 desc[i - 1].flags &= ~VIRTQ_DESC_F_NEXT; 251 252 idx = get_unused_desc(vq); 253 vq->vring.desc[idx].flags = VIRTQ_DESC_F_INDIRECT; 254 vq->vring.desc[idx].addr = phys_indirect; 255 vq->vring.desc[idx].len = i * sizeof(struct vring_desc); 256 257 vq->opaque[idx] = opaque; 258 } else { 259 u16 last_idx; 260 261 /* Use out + in regular descriptors */ 262 if (out + in > vq->num_unused) { 263 return -ENOSPC; 264 } 265 266 /* First descriptor */ 267 idx = last_idx = get_unused_desc(vq); 268 vq->opaque[idx] = opaque; 269 270 vring->desc[idx].addr = sg[0].physAddr.QuadPart; 271 vring->desc[idx].len = sg[0].length; 272 vring->desc[idx].flags = VIRTQ_DESC_F_NEXT; 273 if (out == 0) { 274 vring->desc[idx].flags |= VIRTQ_DESC_F_WRITE; 275 } 276 vring->desc[idx].next = vq->first_unused; 277 278 /* The rest of descriptors */ 279 for (i = 1; i < out + in; i++) { 280 last_idx = get_unused_desc(vq); 281 282 vring->desc[last_idx].addr = sg[i].physAddr.QuadPart; 283 vring->desc[last_idx].len = sg[i].length; 284 vring->desc[last_idx].flags = VIRTQ_DESC_F_NEXT; 285 if (i >= out) { 286 vring->desc[last_idx].flags |= VIRTQ_DESC_F_WRITE; 287 } 288 vring->desc[last_idx].next = vq->first_unused; 289 } 290 vring->desc[last_idx].flags &= ~VIRTQ_DESC_F_NEXT; 291 } 292 293 /* Write the first descriptor into the available ring */ 294 vring->avail->ring[DESC_INDEX(vring->num, vq->master_vring_avail.idx)] = idx; 295 KeMemoryBarrier(); 296 vring->avail->idx = ++vq->master_vring_avail.idx; 297 vq->num_added_since_kick++; 298 299 return 0; 300 } 301 302 /* Gets the opaque pointer associated with a returned buffer, or NULL if no buffer is available */ 303 static void *virtqueue_get_buf_split( 304 struct virtqueue *_vq, /* the queue */ 305 unsigned int *len) /* number of bytes returned by the device */ 306 { 307 struct virtqueue_split *vq = splitvq(_vq); 308 void *opaque; 309 u16 idx; 310 311 if (vq->last_used == (int)vq->vring.used->idx) { 312 /* No descriptor index in the used ring */ 313 return NULL; 314 } 315 KeMemoryBarrier(); 316 317 idx = DESC_INDEX(vq->vring.num, vq->last_used); 318 *len = vq->vring.used->ring[idx].len; 319 320 /* Get the first used descriptor */ 321 idx = (u16)vq->vring.used->ring[idx].id; 322 opaque = vq->opaque[idx]; 323 324 /* Put all descriptors back to the free list */ 325 put_unused_desc_chain(vq, idx); 326 327 vq->last_used++; 328 if (_vq->vdev->event_suppression_enabled && virtqueue_is_interrupt_enabled(_vq)) { 329 vring_used_event(&vq->vring) = vq->last_used; 330 KeMemoryBarrier(); 331 } 332 333 ASSERT(opaque != NULL); 334 return opaque; 335 } 336 337 /* Returns true if at least one returned buffer is available, false otherwise */ 338 static BOOLEAN virtqueue_has_buf_split(struct virtqueue *_vq) 339 { 340 struct virtqueue_split *vq = splitvq(_vq); 341 return (vq->last_used != vq->vring.used->idx); 342 } 343 344 /* Returns true if the device should be notified, false otherwise */ 345 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 346 { 347 struct virtqueue_split *vq = splitvq(_vq); 348 bool wrap_around; 349 u16 old, new; 350 KeMemoryBarrier(); 351 352 wrap_around = (vq->num_added_since_kick >= (1 << 16)); 353 354 old = (u16)(vq->master_vring_avail.idx - vq->num_added_since_kick); 355 new = vq->master_vring_avail.idx; 356 vq->num_added_since_kick = 0; 357 358 if (_vq->vdev->event_suppression_enabled) { 359 return wrap_around || (bool)vring_need_event(vring_avail_event(&vq->vring), new, old); 360 } else { 361 return !(vq->vring.used->flags & VIRTQ_USED_F_NO_NOTIFY); 362 } 363 } 364 365 /* Notifies the device even if it's not necessary according to the event suppression logic */ 366 static void virtqueue_kick_always_split(struct virtqueue *_vq) 367 { 368 struct virtqueue_split *vq = splitvq(_vq); 369 KeMemoryBarrier(); 370 vq->num_added_since_kick = 0; 371 virtqueue_notify(_vq); 372 } 373 374 /* Enables interrupts on a virtqueue and returns false if the queue has at least one returned 375 * buffer available to be fetched by virtqueue_get_buf, true otherwise */ 376 static bool virtqueue_enable_cb_split(struct virtqueue *_vq) 377 { 378 struct virtqueue_split *vq = splitvq(_vq); 379 if (!virtqueue_is_interrupt_enabled(_vq)) { 380 vq->master_vring_avail.flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; 381 if (!_vq->vdev->event_suppression_enabled) 382 { 383 vq->vring.avail->flags = vq->master_vring_avail.flags; 384 } 385 } 386 387 vring_used_event(&vq->vring) = vq->last_used; 388 KeMemoryBarrier(); 389 return (vq->last_used == vq->vring.used->idx); 390 } 391 392 /* Enables interrupts on a virtqueue after ~3/4 of the currently pushed buffers have been 393 * returned, returns false if this condition currently holds, false otherwise */ 394 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 395 { 396 struct virtqueue_split *vq = splitvq(_vq); 397 u16 bufs; 398 399 if (!virtqueue_is_interrupt_enabled(_vq)) { 400 vq->master_vring_avail.flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; 401 if (!_vq->vdev->event_suppression_enabled) 402 { 403 vq->vring.avail->flags = vq->master_vring_avail.flags; 404 } 405 } 406 407 /* Note that 3/4 is an arbitrary threshold */ 408 bufs = (u16)(vq->master_vring_avail.idx - vq->last_used) * 3 / 4; 409 vring_used_event(&vq->vring) = vq->last_used + bufs; 410 KeMemoryBarrier(); 411 return ((vq->vring.used->idx - vq->last_used) <= bufs); 412 } 413 414 /* Disables interrupts on a virtqueue */ 415 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 416 { 417 struct virtqueue_split *vq = splitvq(_vq); 418 if (virtqueue_is_interrupt_enabled(_vq)) { 419 vq->master_vring_avail.flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; 420 if (!_vq->vdev->event_suppression_enabled) 421 { 422 vq->vring.avail->flags = vq->master_vring_avail.flags; 423 } 424 } 425 } 426 427 /* Returns true if interrupts are enabled on a virtqueue, false otherwise */ 428 static BOOLEAN virtqueue_is_interrupt_enabled_split(struct virtqueue *_vq) 429 { 430 struct virtqueue_split *vq = splitvq(_vq); 431 return !(vq->master_vring_avail.flags & VIRTQ_AVAIL_F_NO_INTERRUPT); 432 } 433 434 /* Re-initializes an already initialized virtqueue */ 435 static void virtqueue_shutdown_split(struct virtqueue *_vq) 436 { 437 struct virtqueue_split *vq = splitvq(_vq); 438 unsigned int num = vq->vring.num; 439 void *pages = vq->vring.desc; 440 unsigned int vring_align = _vq->vdev->addr ? PAGE_SIZE : SMP_CACHE_BYTES; 441 442 RtlZeroMemory(pages, vring_size_split(num, vring_align)); 443 (void)vring_new_virtqueue_split( 444 _vq->index, 445 vq->vring.num, 446 vring_align, 447 _vq->vdev, 448 pages, 449 _vq->notification_cb, 450 vq); 451 } 452 453 /* Gets the opaque pointer associated with a not-yet-returned buffer, or NULL if no buffer is available 454 * to aid drivers with cleaning up all data on virtqueue shutdown */ 455 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 456 { 457 struct virtqueue_split *vq = splitvq(_vq); 458 u16 idx; 459 void *opaque = NULL; 460 461 for (idx = 0; idx < (u16)vq->vring.num; idx++) { 462 opaque = vq->opaque[idx]; 463 if (opaque) { 464 put_unused_desc_chain(vq, idx); 465 vq->vring.avail->idx = --vq->master_vring_avail.idx; 466 break; 467 } 468 } 469 return opaque; 470 } 471 472 /* Returns the size of the virtqueue structure including 473 * additional size for per-descriptor data */ 474 unsigned int vring_control_block_size(u16 qsize, bool packed) 475 { 476 unsigned int res; 477 if (packed) { 478 return vring_control_block_size_packed(qsize); 479 } 480 res = sizeof(struct virtqueue_split); 481 res += sizeof(void *) * qsize; 482 return res; 483 } 484 485 /* Initializes a new virtqueue using already allocated memory */ 486 struct virtqueue *vring_new_virtqueue_split( 487 unsigned int index, /* virtqueue index */ 488 unsigned int num, /* virtqueue size (always a power of 2) */ 489 unsigned int vring_align, /* vring alignment requirement */ 490 VirtIODevice *vdev, /* the virtio device owning the queue */ 491 void *pages, /* vring memory */ 492 void(*notify)(struct virtqueue *), /* notification callback */ 493 void *control) /* virtqueue memory */ 494 { 495 struct virtqueue_split *vq = splitvq(control); 496 u16 i; 497 498 if (DESC_INDEX(num, num) != 0) { 499 DPrintf(0, "Virtqueue length %u is not a power of 2\n", num); 500 return NULL; 501 } 502 503 RtlZeroMemory(vq, sizeof(*vq) + num * sizeof(void *)); 504 505 vring_init(&vq->vring, num, pages, vring_align); 506 vq->vq.vdev = vdev; 507 vq->vq.notification_cb = notify; 508 vq->vq.index = index; 509 510 /* Build a linked list of unused descriptors */ 511 vq->num_unused = num; 512 vq->first_unused = 0; 513 for (i = 0; i < num - 1; i++) { 514 vq->vring.desc[i].flags = VIRTQ_DESC_F_NEXT; 515 vq->vring.desc[i].next = i + 1; 516 } 517 vq->vq.avail_va = vq->vring.avail; 518 vq->vq.used_va = vq->vring.used; 519 vq->vq.add_buf = virtqueue_add_buf_split; 520 vq->vq.detach_unused_buf = virtqueue_detach_unused_buf_split; 521 vq->vq.disable_cb = virtqueue_disable_cb_split; 522 vq->vq.enable_cb = virtqueue_enable_cb_split; 523 vq->vq.enable_cb_delayed = virtqueue_enable_cb_delayed_split; 524 vq->vq.get_buf = virtqueue_get_buf_split; 525 vq->vq.has_buf = virtqueue_has_buf_split; 526 vq->vq.is_interrupt_enabled = virtqueue_is_interrupt_enabled_split; 527 vq->vq.kick_always = virtqueue_kick_always_split; 528 vq->vq.kick_prepare = virtqueue_kick_prepare_split; 529 vq->vq.shutdown = virtqueue_shutdown_split; 530 return &vq->vq; 531 } 532 533 /* Negotiates virtio transport features */ 534 void vring_transport_features( 535 VirtIODevice *vdev, 536 u64 *features) /* points to device features on entry and driver accepted features on return */ 537 { 538 unsigned int i; 539 540 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 541 if (i != VIRTIO_RING_F_INDIRECT_DESC && 542 i != VIRTIO_RING_F_EVENT_IDX && 543 i != VIRTIO_F_VERSION_1) { 544 virtio_feature_disable(*features, i); 545 } 546 } 547 } 548 549 /* Returns the max number of scatter-gather elements that fit in an indirect pages */ 550 u32 virtio_get_indirect_page_capacity() 551 { 552 return PAGE_SIZE / sizeof(struct vring_desc); 553 } 554 555 unsigned long vring_size(unsigned int num, unsigned long align, bool packed) 556 { 557 if (packed) { 558 return vring_size_packed(num, align); 559 } else { 560 return vring_size_split(num, align); 561 } 562 } 563