1 /*
2  * Virtio PCI driver - modern (virtio 1.0) device support
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright Red Hat, Inc. 2014
6  *
7  * Authors:
8  *  Anthony Liguori  <aliguori@us.ibm.com>
9  *  Rusty Russell <rusty@rustcorp.com.au>
10  *  Michael S. Tsirkin <mst@redhat.com>
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met :
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and / or other materials provided with the distribution.
20  * 3. Neither the names of the copyright holders nor the names of their contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include "osdep.h"
36 #define VIRTIO_PCI_NO_LEGACY
37 #include "virtio_pci.h"
38 #include "VirtIO.h"
39 #include "kdebugprint.h"
40 #include "virtio_ring.h"
41 #include "virtio_pci_common.h"
42 #include "windows/virtio_ring_allocation.h"
43 #include <stddef.h>
44 
45 #ifdef WPP_EVENT_TRACING
46 #include "VirtIOPCIModern.tmh"
47 #endif
48 
49 static void *vio_modern_map_capability(VirtIODevice *vdev, int cap_offset,
50                                        size_t minlen, u32 alignment,
51                                        u32 start, u32 size, size_t *len)
52 {
53     u8 bar;
54     u32 bar_offset, bar_length;
55     void *addr;
56 
57     pci_read_config_byte(vdev, cap_offset + offsetof(struct virtio_pci_cap, bar), &bar);
58     pci_read_config_dword(vdev, cap_offset + offsetof(struct virtio_pci_cap, offset), &bar_offset);
59     pci_read_config_dword(vdev, cap_offset + offsetof(struct virtio_pci_cap, length), &bar_length);
60 
61     if (start + minlen > bar_length) {
62         DPrintf(0, "bar %i cap is not large enough to map %zu bytes at offset %u\n", bar, minlen, start);
63         return NULL;
64     }
65 
66     bar_length -= start;
67     bar_offset += start;
68 
69     if (bar_offset & (alignment - 1)) {
70         DPrintf(0, "bar %i offset %u not aligned to %u\n", bar, bar_offset, alignment);
71         return NULL;
72     }
73 
74     if (bar_length > size) {
75         bar_length = size;
76     }
77 
78     if (len) {
79         *len = bar_length;
80     }
81 
82     if (bar_offset + minlen > pci_get_resource_len(vdev, bar)) {
83         DPrintf(0, "bar %i is not large enough to map %zu bytes at offset %u\n", bar, minlen, bar_offset);
84         return NULL;
85     }
86 
87     addr = pci_map_address_range(vdev, bar, bar_offset, bar_length);
88     if (!addr) {
89         DPrintf(0, "unable to map %u bytes at bar %i offset %u\n", bar_length, bar, bar_offset);
90     }
91     return addr;
92 }
93 
94 static void *vio_modern_map_simple_capability(VirtIODevice *vdev, int cap_offset, size_t length, u32 alignment)
95 {
96     return vio_modern_map_capability(
97         vdev,
98         cap_offset,
99         length,      // minlen
100         alignment,
101         0,           // offset
102         (u32)length, // size is equal to minlen
103         NULL);       // not interested in the full length
104 }
105 
106 static void vio_modern_get_config(VirtIODevice *vdev, unsigned offset,
107                                   void *buf, unsigned len)
108 {
109     if (!vdev->config) {
110         ASSERT(!"Device has no config to read");
111         return;
112     }
113     if (offset + len > vdev->config_len) {
114         ASSERT(!"Can't read beyond the config length");
115         return;
116     }
117 
118     switch (len) {
119     case 1:
120         *(u8 *)buf = ioread8(vdev, vdev->config + offset);
121         break;
122     case 2:
123         *(u16 *)buf = ioread16(vdev, vdev->config + offset);
124         break;
125     case 4:
126         *(u32 *)buf = ioread32(vdev, vdev->config + offset);
127         break;
128     default:
129         ASSERT(!"Only 1, 2, 4 byte config reads are supported");
130     }
131 }
132 
133 static void vio_modern_set_config(VirtIODevice *vdev, unsigned offset,
134                                   const void *buf, unsigned len)
135 {
136     if (!vdev->config) {
137         ASSERT(!"Device has no config to write");
138         return;
139     }
140     if (offset + len > vdev->config_len) {
141         ASSERT(!"Can't write beyond the config length");
142         return;
143     }
144 
145     switch (len) {
146     case 1:
147         iowrite8(vdev, *(u8 *)buf, vdev->config + offset);
148         break;
149     case 2:
150         iowrite16(vdev, *(u16 *)buf, vdev->config + offset);
151         break;
152     case 4:
153         iowrite32(vdev, *(u32 *)buf, vdev->config + offset);
154         break;
155     default:
156         ASSERT(!"Only 1, 2, 4 byte config writes are supported");
157     }
158 }
159 
160 static u32 vio_modern_get_generation(VirtIODevice *vdev)
161 {
162     return ioread8(vdev, &vdev->common->config_generation);
163 }
164 
165 static u8 vio_modern_get_status(VirtIODevice *vdev)
166 {
167     return ioread8(vdev, &vdev->common->device_status);
168 }
169 
170 static void vio_modern_set_status(VirtIODevice *vdev, u8 status)
171 {
172     /* We should never be setting status to 0. */
173     ASSERT(status != 0);
174     iowrite8(vdev, status, &vdev->common->device_status);
175 }
176 
177 static void vio_modern_reset(VirtIODevice *vdev)
178 {
179     /* 0 status means a reset. */
180     iowrite8(vdev, 0, &vdev->common->device_status);
181     /* After writing 0 to device_status, the driver MUST wait for a read of
182      * device_status to return 0 before reinitializing the device.
183      * This will flush out the status write, and flush in device writes,
184      * including MSI-X interrupts, if any.
185      */
186     while (ioread8(vdev, &vdev->common->device_status)) {
187         u16 val;
188         if (pci_read_config_word(vdev, 0, &val) || val == 0xffff) {
189             DPrintf(0, "PCI config space is not readable, probably the device is removed\n", 0);
190             break;
191         }
192         vdev_sleep(vdev, 1);
193     }
194 }
195 
196 static u64 vio_modern_get_features(VirtIODevice *vdev)
197 {
198     u64 features;
199 
200     iowrite32(vdev, 0, &vdev->common->device_feature_select);
201     features = ioread32(vdev, &vdev->common->device_feature);
202     iowrite32(vdev, 1, &vdev->common->device_feature_select);
203     features |= ((u64)ioread32(vdev, &vdev->common->device_feature) << 32);
204 
205     return features;
206 }
207 
208 static NTSTATUS vio_modern_set_features(VirtIODevice *vdev, u64 features)
209 {
210     /* Give virtio_ring a chance to accept features. */
211     vring_transport_features(vdev, &features);
212 
213     if (!virtio_is_feature_enabled(features, VIRTIO_F_VERSION_1)) {
214         DPrintf(0, "virtio: device uses modern interface but does not have VIRTIO_F_VERSION_1\n", 0);
215         return STATUS_INVALID_PARAMETER;
216     }
217 
218     iowrite32(vdev, 0, &vdev->common->guest_feature_select);
219     iowrite32(vdev, (u32)features, &vdev->common->guest_feature);
220     iowrite32(vdev, 1, &vdev->common->guest_feature_select);
221     iowrite32(vdev, features >> 32, &vdev->common->guest_feature);
222 
223     return STATUS_SUCCESS;
224 }
225 
226 static u16 vio_modern_set_config_vector(VirtIODevice *vdev, u16 vector)
227 {
228     /* Setup the vector used for configuration events */
229     iowrite16(vdev, vector, &vdev->common->msix_config);
230     /* Verify we had enough resources to assign the vector */
231     /* Will also flush the write out to device */
232     return ioread16(vdev, &vdev->common->msix_config);
233 }
234 
235 static u16 vio_modern_set_queue_vector(struct virtqueue *vq, u16 vector)
236 {
237     VirtIODevice *vdev = vq->vdev;
238     volatile struct virtio_pci_common_cfg *cfg = vdev->common;
239 
240     iowrite16(vdev, (u16)vq->index, &cfg->queue_select);
241     iowrite16(vdev, vector, &cfg->queue_msix_vector);
242     return ioread16(vdev, &cfg->queue_msix_vector);
243 }
244 
245 static size_t vring_pci_size(u16 num, bool packed)
246 {
247     /* We only need a cacheline separation. */
248     return (size_t)ROUND_TO_PAGES(vring_size(num, SMP_CACHE_BYTES, packed));
249 }
250 
251 static NTSTATUS vio_modern_query_vq_alloc(VirtIODevice *vdev,
252                                           unsigned index,
253                                           unsigned short *pNumEntries,
254                                           unsigned long *pRingSize,
255                                           unsigned long *pHeapSize)
256 {
257     volatile struct virtio_pci_common_cfg *cfg = vdev->common;
258     u16 num;
259 
260     if (index >= ioread16(vdev, &cfg->num_queues)) {
261         return STATUS_NOT_FOUND;
262     }
263 
264     /* Select the queue we're interested in */
265     iowrite16(vdev, (u16)index, &cfg->queue_select);
266 
267     /* Check if queue is either not available or already active. */
268     num = ioread16(vdev, &cfg->queue_size);
269     /* QEMU has a bug where queues don't revert to inactive on device
270      * reset. Skip checking the queue_enable field until it is fixed.
271      */
272     if (!num /*|| ioread16(vdev, &cfg->queue_enable)*/) {
273         return STATUS_NOT_FOUND;
274     }
275 
276     if (num & (num - 1)) {
277         DPrintf(0, "%p: bad queue size %u", vdev, num);
278         return STATUS_INVALID_PARAMETER;
279     }
280 
281     *pNumEntries = num;
282     *pRingSize = (unsigned long)vring_pci_size(num, vdev->packed_ring);
283     *pHeapSize = vring_control_block_size(num, vdev->packed_ring);
284 
285     return STATUS_SUCCESS;
286 }
287 
288 static NTSTATUS vio_modern_setup_vq(struct virtqueue **queue,
289                                     VirtIODevice *vdev,
290                                     VirtIOQueueInfo *info,
291                                     unsigned index,
292                                     u16 msix_vec)
293 {
294     volatile struct virtio_pci_common_cfg *cfg = vdev->common;
295     struct virtqueue *vq;
296     void *vq_addr;
297     u16 off;
298     unsigned long ring_size, heap_size;
299     NTSTATUS status;
300 
301     /* select the queue and query allocation parameters */
302     status = vio_modern_query_vq_alloc(vdev, index, &info->num, &ring_size, &heap_size);
303     if (!NT_SUCCESS(status)) {
304         return status;
305     }
306 
307     /* get offset of notification word for this vq */
308     off = ioread16(vdev, &cfg->queue_notify_off);
309 
310     /* try to allocate contiguous pages, scale down on failure */
311     while (!(info->queue = mem_alloc_contiguous_pages(vdev, vring_pci_size(info->num, vdev->packed_ring)))) {
312         if (info->num > 0) {
313             info->num /= 2;
314         } else {
315             return STATUS_INSUFFICIENT_RESOURCES;
316         }
317     }
318 
319     vq_addr = mem_alloc_nonpaged_block(vdev, heap_size);
320     if (vq_addr == NULL) {
321         return STATUS_INSUFFICIENT_RESOURCES;
322     }
323 
324     /* create the vring */
325     if (vdev->packed_ring) {
326         vq = vring_new_virtqueue_packed(index, info->num,
327             SMP_CACHE_BYTES, vdev,
328             info->queue, vp_notify, vq_addr);
329     } else {
330         vq = vring_new_virtqueue_split(index, info->num,
331             SMP_CACHE_BYTES, vdev,
332             info->queue, vp_notify, vq_addr);
333     }
334 
335     if (!vq) {
336         status = STATUS_INSUFFICIENT_RESOURCES;
337         goto err_new_queue;
338     }
339 
340     /* activate the queue */
341     iowrite16(vdev, info->num, &cfg->queue_size);
342     iowrite64_twopart(vdev, mem_get_physical_address(vdev, info->queue),
343         &cfg->queue_desc_lo, &cfg->queue_desc_hi);
344     iowrite64_twopart(vdev, mem_get_physical_address(vdev, vq->avail_va),
345         &cfg->queue_avail_lo, &cfg->queue_avail_hi);
346     iowrite64_twopart(vdev, mem_get_physical_address(vdev, vq->used_va),
347         &cfg->queue_used_lo, &cfg->queue_used_hi);
348 
349     if (vdev->notify_base) {
350         /* offset should not wrap */
351         if ((u64)off * vdev->notify_offset_multiplier + 2
352             > vdev->notify_len) {
353             DPrintf(0,
354                 "%p: bad notification offset %u (x %u) "
355                 "for queue %u > %zd",
356                 vdev,
357                 off, vdev->notify_offset_multiplier,
358                 index, vdev->notify_len);
359             status = STATUS_INVALID_PARAMETER;
360             goto err_map_notify;
361         }
362         vq->notification_addr = (void *)(vdev->notify_base +
363             off * vdev->notify_offset_multiplier);
364     } else {
365         vq->notification_addr = vio_modern_map_capability(vdev,
366             vdev->notify_map_cap, 2, 2,
367             off * vdev->notify_offset_multiplier, 2,
368             NULL);
369     }
370 
371     if (!vq->notification_addr) {
372         status = STATUS_INSUFFICIENT_RESOURCES;
373         goto err_map_notify;
374     }
375 
376     if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
377         msix_vec = vdev->device->set_queue_vector(vq, msix_vec);
378         if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
379             status = STATUS_DEVICE_BUSY;
380             goto err_assign_vector;
381         }
382     }
383 
384     /* enable the queue */
385     iowrite16(vdev, 1, &vdev->common->queue_enable);
386 
387     *queue = vq;
388     return STATUS_SUCCESS;
389 
390 err_assign_vector:
391 err_map_notify:
392     virtqueue_shutdown(vq);
393 err_new_queue:
394     mem_free_nonpaged_block(vdev, vq_addr);
395     mem_free_contiguous_pages(vdev, info->queue);
396     return status;
397 }
398 
399 static void vio_modern_del_vq(VirtIOQueueInfo *info)
400 {
401     struct virtqueue *vq = info->vq;
402     VirtIODevice *vdev = vq->vdev;
403 
404     iowrite16(vdev, (u16)vq->index, &vdev->common->queue_select);
405 
406     if (vdev->msix_used) {
407         iowrite16(vdev, VIRTIO_MSI_NO_VECTOR, &vdev->common->queue_msix_vector);
408         /* Flush the write out to device */
409         ioread16(vdev, &vdev->common->queue_msix_vector);
410     }
411 
412     virtqueue_shutdown(vq);
413 
414     mem_free_nonpaged_block(vdev, vq);
415     mem_free_contiguous_pages(vdev, info->queue);
416 }
417 
418 static const struct virtio_device_ops virtio_pci_device_ops = {
419     /* .get_config = */ vio_modern_get_config,
420     /* .set_config = */ vio_modern_set_config,
421     /* .get_config_generation = */ vio_modern_get_generation,
422     /* .get_status = */ vio_modern_get_status,
423     /* .set_status = */ vio_modern_set_status,
424     /* .reset = */ vio_modern_reset,
425     /* .get_features = */ vio_modern_get_features,
426     /* .set_features = */ vio_modern_set_features,
427     /* .set_config_vector = */ vio_modern_set_config_vector,
428     /* .set_queue_vector = */ vio_modern_set_queue_vector,
429     /* .query_queue_alloc = */ vio_modern_query_vq_alloc,
430     /* .setup_queue = */ vio_modern_setup_vq,
431     /* .delete_queue = */ vio_modern_del_vq,
432 };
433 
434 static u8 find_next_pci_vendor_capability(VirtIODevice *vdev, u8 offset)
435 {
436     u8 id = 0;
437     int iterations = 48;
438 
439     if (pci_read_config_byte(vdev, offset, &offset) != 0) {
440         return 0;
441     }
442 
443     while (iterations-- && offset >= 0x40) {
444         offset &= ~3;
445         if (pci_read_config_byte(vdev, offset + offsetof(PCI_CAPABILITIES_HEADER,
446                 CapabilityID), &id) != 0) {
447             break;
448         }
449         if (id == 0xFF) {
450             break;
451         }
452         if (id == PCI_CAPABILITY_ID_VENDOR_SPECIFIC) {
453             return offset;
454         }
455         if (pci_read_config_byte(vdev, offset + offsetof(PCI_CAPABILITIES_HEADER,
456                 Next), &offset) != 0) {
457             break;
458         }
459     }
460     return 0;
461 }
462 
463 static u8 find_first_pci_vendor_capability(VirtIODevice *vdev)
464 {
465     u8 hdr_type, offset;
466     u16 status;
467 
468     if (pci_read_config_byte(vdev, offsetof(PCI_COMMON_HEADER, HeaderType), &hdr_type) != 0) {
469         return 0;
470     }
471     if (pci_read_config_word(vdev, offsetof(PCI_COMMON_HEADER, Status), &status) != 0) {
472         return 0;
473     }
474     if ((status & PCI_STATUS_CAPABILITIES_LIST) == 0) {
475         return 0;
476     }
477 
478     switch (hdr_type & ~PCI_MULTIFUNCTION) {
479     case PCI_BRIDGE_TYPE:
480         offset = offsetof(PCI_COMMON_HEADER, u.type1.CapabilitiesPtr);
481         break;
482     case PCI_CARDBUS_BRIDGE_TYPE:
483         offset = offsetof(PCI_COMMON_HEADER, u.type2.CapabilitiesPtr);
484         break;
485     default:
486         offset = offsetof(PCI_COMMON_HEADER, u.type0.CapabilitiesPtr);
487         break;
488     }
489 
490     if (offset != 0) {
491         offset = find_next_pci_vendor_capability(vdev, offset);
492     }
493     return offset;
494 }
495 
496 /* Populate Offsets with virtio vendor capability offsets within the PCI config space */
497 static void find_pci_vendor_capabilities(VirtIODevice *vdev, int *Offsets, size_t nOffsets)
498 {
499     u8 offset = find_first_pci_vendor_capability(vdev);
500     while (offset > 0) {
501         u8 cfg_type, bar;
502         pci_read_config_byte(vdev, offset + offsetof(struct virtio_pci_cap, cfg_type), &cfg_type);
503         pci_read_config_byte(vdev, offset + offsetof(struct virtio_pci_cap, bar), &bar);
504 
505         if (bar < PCI_TYPE0_ADDRESSES &&
506             cfg_type < nOffsets &&
507             pci_get_resource_len(vdev, bar) > 0) {
508             Offsets[cfg_type] = offset;
509         }
510 
511         offset = find_next_pci_vendor_capability(vdev, offset + offsetof(PCI_CAPABILITIES_HEADER, Next));
512     }
513 }
514 
515 /* Modern device initialization */
516 NTSTATUS vio_modern_initialize(VirtIODevice *vdev)
517 {
518     int capabilities[VIRTIO_PCI_CAP_PCI_CFG];
519 
520     u32 notify_length;
521     u32 notify_offset;
522 
523     RtlZeroMemory(capabilities, sizeof(capabilities));
524     find_pci_vendor_capabilities(vdev, capabilities, VIRTIO_PCI_CAP_PCI_CFG);
525 
526     /* Check for a common config, if not found use legacy mode */
527     if (!capabilities[VIRTIO_PCI_CAP_COMMON_CFG]) {
528         DPrintf(0, "%s(%p): device not found\n", __FUNCTION__, vdev);
529         return STATUS_DEVICE_NOT_CONNECTED;
530     }
531 
532     /* Check isr and notify caps, if not found fail */
533     if (!capabilities[VIRTIO_PCI_CAP_ISR_CFG] || !capabilities[VIRTIO_PCI_CAP_NOTIFY_CFG]) {
534         DPrintf(0, "%s(%p): missing capabilities %i/%i/%i\n",
535             __FUNCTION__, vdev,
536             capabilities[VIRTIO_PCI_CAP_COMMON_CFG],
537             capabilities[VIRTIO_PCI_CAP_ISR_CFG],
538             capabilities[VIRTIO_PCI_CAP_NOTIFY_CFG]);
539         return STATUS_INVALID_PARAMETER;
540     }
541 
542     /* Map bars according to the capabilities */
543     vdev->common = vio_modern_map_simple_capability(vdev,
544         capabilities[VIRTIO_PCI_CAP_COMMON_CFG],
545         sizeof(struct virtio_pci_common_cfg), 4);
546     if (!vdev->common) {
547         return STATUS_INVALID_PARAMETER;
548     }
549 
550     vdev->isr = vio_modern_map_simple_capability(vdev,
551         capabilities[VIRTIO_PCI_CAP_ISR_CFG],
552         sizeof(u8), 1);
553     if (!vdev->isr) {
554         return STATUS_INVALID_PARAMETER;
555     }
556 
557     /* Read notify_off_multiplier from config space. */
558     pci_read_config_dword(vdev,
559         capabilities[VIRTIO_PCI_CAP_NOTIFY_CFG] + offsetof(struct virtio_pci_notify_cap,
560         notify_off_multiplier),
561         &vdev->notify_offset_multiplier);
562 
563     /* Read notify length and offset from config space. */
564     pci_read_config_dword(vdev,
565         capabilities[VIRTIO_PCI_CAP_NOTIFY_CFG] + offsetof(struct virtio_pci_notify_cap,
566         cap.length),
567         &notify_length);
568     pci_read_config_dword(vdev,
569         capabilities[VIRTIO_PCI_CAP_NOTIFY_CFG] + offsetof(struct virtio_pci_notify_cap,
570         cap.offset),
571         &notify_offset);
572 
573     /* Map the notify capability if it's small enough.
574      * Otherwise, map each VQ individually later.
575      */
576     if (notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
577         vdev->notify_base = vio_modern_map_capability(vdev,
578             capabilities[VIRTIO_PCI_CAP_NOTIFY_CFG], 2, 2,
579             0, notify_length,
580             &vdev->notify_len);
581         if (!vdev->notify_base) {
582             return STATUS_INVALID_PARAMETER;
583         }
584     } else {
585         vdev->notify_map_cap = capabilities[VIRTIO_PCI_CAP_NOTIFY_CFG];
586     }
587 
588     /* Map the device config capability, the PAGE_SIZE size is a guess */
589     if (capabilities[VIRTIO_PCI_CAP_DEVICE_CFG]) {
590         vdev->config = vio_modern_map_capability(vdev,
591             capabilities[VIRTIO_PCI_CAP_DEVICE_CFG], 0, 4,
592             0, PAGE_SIZE,
593             &vdev->config_len);
594         if (!vdev->config) {
595             return STATUS_INVALID_PARAMETER;
596         }
597     }
598 
599     vdev->device = &virtio_pci_device_ops;
600 
601     return STATUS_SUCCESS;
602 }
603