xref: /linux/drivers/vfio/pci/vfio_pci_rdwr.c (revision 1e525507)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 
20 #include "vfio_pci_priv.h"
21 
22 #ifdef __LITTLE_ENDIAN
23 #define vfio_ioread64	ioread64
24 #define vfio_iowrite64	iowrite64
25 #define vfio_ioread32	ioread32
26 #define vfio_iowrite32	iowrite32
27 #define vfio_ioread16	ioread16
28 #define vfio_iowrite16	iowrite16
29 #else
30 #define vfio_ioread64	ioread64be
31 #define vfio_iowrite64	iowrite64be
32 #define vfio_ioread32	ioread32be
33 #define vfio_iowrite32	iowrite32be
34 #define vfio_ioread16	ioread16be
35 #define vfio_iowrite16	iowrite16be
36 #endif
37 #define vfio_ioread8	ioread8
38 #define vfio_iowrite8	iowrite8
39 
40 #define VFIO_IOWRITE(size) \
41 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,	\
42 			bool test_mem, u##size val, void __iomem *io)	\
43 {									\
44 	if (test_mem) {							\
45 		down_read(&vdev->memory_lock);				\
46 		if (!__vfio_pci_memory_enabled(vdev)) {			\
47 			up_read(&vdev->memory_lock);			\
48 			return -EIO;					\
49 		}							\
50 	}								\
51 									\
52 	vfio_iowrite##size(val, io);					\
53 									\
54 	if (test_mem)							\
55 		up_read(&vdev->memory_lock);				\
56 									\
57 	return 0;							\
58 }									\
59 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
60 
61 VFIO_IOWRITE(8)
62 VFIO_IOWRITE(16)
63 VFIO_IOWRITE(32)
64 #ifdef iowrite64
65 VFIO_IOWRITE(64)
66 #endif
67 
68 #define VFIO_IOREAD(size) \
69 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,	\
70 			bool test_mem, u##size *val, void __iomem *io)	\
71 {									\
72 	if (test_mem) {							\
73 		down_read(&vdev->memory_lock);				\
74 		if (!__vfio_pci_memory_enabled(vdev)) {			\
75 			up_read(&vdev->memory_lock);			\
76 			return -EIO;					\
77 		}							\
78 	}								\
79 									\
80 	*val = vfio_ioread##size(io);					\
81 									\
82 	if (test_mem)							\
83 		up_read(&vdev->memory_lock);				\
84 									\
85 	return 0;							\
86 }									\
87 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
88 
89 VFIO_IOREAD(8)
90 VFIO_IOREAD(16)
91 VFIO_IOREAD(32)
92 
93 /*
94  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
95  * range which is inaccessible.  The excluded range drops writes and fills
96  * reads with -1.  This is intended for handling MSI-X vector tables and
97  * leftover space for ROM BARs.
98  */
99 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
100 			       void __iomem *io, char __user *buf,
101 			       loff_t off, size_t count, size_t x_start,
102 			       size_t x_end, bool iswrite)
103 {
104 	ssize_t done = 0;
105 	int ret;
106 
107 	while (count) {
108 		size_t fillable, filled;
109 
110 		if (off < x_start)
111 			fillable = min(count, (size_t)(x_start - off));
112 		else if (off >= x_end)
113 			fillable = count;
114 		else
115 			fillable = 0;
116 
117 		if (fillable >= 4 && !(off % 4)) {
118 			u32 val;
119 
120 			if (iswrite) {
121 				if (copy_from_user(&val, buf, 4))
122 					return -EFAULT;
123 
124 				ret = vfio_pci_core_iowrite32(vdev, test_mem,
125 							      val, io + off);
126 				if (ret)
127 					return ret;
128 			} else {
129 				ret = vfio_pci_core_ioread32(vdev, test_mem,
130 							     &val, io + off);
131 				if (ret)
132 					return ret;
133 
134 				if (copy_to_user(buf, &val, 4))
135 					return -EFAULT;
136 			}
137 
138 			filled = 4;
139 		} else if (fillable >= 2 && !(off % 2)) {
140 			u16 val;
141 
142 			if (iswrite) {
143 				if (copy_from_user(&val, buf, 2))
144 					return -EFAULT;
145 
146 				ret = vfio_pci_core_iowrite16(vdev, test_mem,
147 							      val, io + off);
148 				if (ret)
149 					return ret;
150 			} else {
151 				ret = vfio_pci_core_ioread16(vdev, test_mem,
152 							     &val, io + off);
153 				if (ret)
154 					return ret;
155 
156 				if (copy_to_user(buf, &val, 2))
157 					return -EFAULT;
158 			}
159 
160 			filled = 2;
161 		} else if (fillable) {
162 			u8 val;
163 
164 			if (iswrite) {
165 				if (copy_from_user(&val, buf, 1))
166 					return -EFAULT;
167 
168 				ret = vfio_pci_core_iowrite8(vdev, test_mem,
169 							     val, io + off);
170 				if (ret)
171 					return ret;
172 			} else {
173 				ret = vfio_pci_core_ioread8(vdev, test_mem,
174 							    &val, io + off);
175 				if (ret)
176 					return ret;
177 
178 				if (copy_to_user(buf, &val, 1))
179 					return -EFAULT;
180 			}
181 
182 			filled = 1;
183 		} else {
184 			/* Fill reads with -1, drop writes */
185 			filled = min(count, (size_t)(x_end - off));
186 			if (!iswrite) {
187 				u8 val = 0xFF;
188 				size_t i;
189 
190 				for (i = 0; i < filled; i++)
191 					if (copy_to_user(buf + i, &val, 1))
192 						return -EFAULT;
193 			}
194 		}
195 
196 		count -= filled;
197 		done += filled;
198 		off += filled;
199 		buf += filled;
200 	}
201 
202 	return done;
203 }
204 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
205 
206 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
207 {
208 	struct pci_dev *pdev = vdev->pdev;
209 	int ret;
210 	void __iomem *io;
211 
212 	if (vdev->barmap[bar])
213 		return 0;
214 
215 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
216 	if (ret)
217 		return ret;
218 
219 	io = pci_iomap(pdev, bar, 0);
220 	if (!io) {
221 		pci_release_selected_regions(pdev, 1 << bar);
222 		return -ENOMEM;
223 	}
224 
225 	vdev->barmap[bar] = io;
226 
227 	return 0;
228 }
229 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
230 
231 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
232 			size_t count, loff_t *ppos, bool iswrite)
233 {
234 	struct pci_dev *pdev = vdev->pdev;
235 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
236 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
237 	size_t x_start = 0, x_end = 0;
238 	resource_size_t end;
239 	void __iomem *io;
240 	struct resource *res = &vdev->pdev->resource[bar];
241 	ssize_t done;
242 
243 	if (pci_resource_start(pdev, bar))
244 		end = pci_resource_len(pdev, bar);
245 	else if (bar == PCI_ROM_RESOURCE &&
246 		 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
247 		end = 0x20000;
248 	else
249 		return -EINVAL;
250 
251 	if (pos >= end)
252 		return -EINVAL;
253 
254 	count = min(count, (size_t)(end - pos));
255 
256 	if (bar == PCI_ROM_RESOURCE) {
257 		/*
258 		 * The ROM can fill less space than the BAR, so we start the
259 		 * excluded range at the end of the actual ROM.  This makes
260 		 * filling large ROM BARs much faster.
261 		 */
262 		io = pci_map_rom(pdev, &x_start);
263 		if (!io) {
264 			done = -ENOMEM;
265 			goto out;
266 		}
267 		x_end = end;
268 	} else {
269 		int ret = vfio_pci_core_setup_barmap(vdev, bar);
270 		if (ret) {
271 			done = ret;
272 			goto out;
273 		}
274 
275 		io = vdev->barmap[bar];
276 	}
277 
278 	if (bar == vdev->msix_bar) {
279 		x_start = vdev->msix_offset;
280 		x_end = vdev->msix_offset + vdev->msix_size;
281 	}
282 
283 	done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
284 				      count, x_start, x_end, iswrite);
285 
286 	if (done >= 0)
287 		*ppos += done;
288 
289 	if (bar == PCI_ROM_RESOURCE)
290 		pci_unmap_rom(pdev, io);
291 out:
292 	return done;
293 }
294 
295 #ifdef CONFIG_VFIO_PCI_VGA
296 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
297 			       size_t count, loff_t *ppos, bool iswrite)
298 {
299 	int ret;
300 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
301 	void __iomem *iomem = NULL;
302 	unsigned int rsrc;
303 	bool is_ioport;
304 	ssize_t done;
305 
306 	if (!vdev->has_vga)
307 		return -EINVAL;
308 
309 	if (pos > 0xbfffful)
310 		return -EINVAL;
311 
312 	switch ((u32)pos) {
313 	case 0xa0000 ... 0xbffff:
314 		count = min(count, (size_t)(0xc0000 - pos));
315 		iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
316 		off = pos - 0xa0000;
317 		rsrc = VGA_RSRC_LEGACY_MEM;
318 		is_ioport = false;
319 		break;
320 	case 0x3b0 ... 0x3bb:
321 		count = min(count, (size_t)(0x3bc - pos));
322 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
323 		off = pos - 0x3b0;
324 		rsrc = VGA_RSRC_LEGACY_IO;
325 		is_ioport = true;
326 		break;
327 	case 0x3c0 ... 0x3df:
328 		count = min(count, (size_t)(0x3e0 - pos));
329 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
330 		off = pos - 0x3c0;
331 		rsrc = VGA_RSRC_LEGACY_IO;
332 		is_ioport = true;
333 		break;
334 	default:
335 		return -EINVAL;
336 	}
337 
338 	if (!iomem)
339 		return -ENOMEM;
340 
341 	ret = vga_get_interruptible(vdev->pdev, rsrc);
342 	if (ret) {
343 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
344 		return ret;
345 	}
346 
347 	/*
348 	 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
349 	 * probing, so we don't currently worry about access in relation
350 	 * to the memory enable bit in the command register.
351 	 */
352 	done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
353 				      0, 0, iswrite);
354 
355 	vga_put(vdev->pdev, rsrc);
356 
357 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
358 
359 	if (done >= 0)
360 		*ppos += done;
361 
362 	return done;
363 }
364 #endif
365 
366 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
367 					bool test_mem)
368 {
369 	switch (ioeventfd->count) {
370 	case 1:
371 		vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
372 				       ioeventfd->data, ioeventfd->addr);
373 		break;
374 	case 2:
375 		vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
376 					ioeventfd->data, ioeventfd->addr);
377 		break;
378 	case 4:
379 		vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
380 					ioeventfd->data, ioeventfd->addr);
381 		break;
382 #ifdef iowrite64
383 	case 8:
384 		vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
385 					ioeventfd->data, ioeventfd->addr);
386 		break;
387 #endif
388 	}
389 }
390 
391 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
392 {
393 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
394 	struct vfio_pci_core_device *vdev = ioeventfd->vdev;
395 
396 	if (ioeventfd->test_mem) {
397 		if (!down_read_trylock(&vdev->memory_lock))
398 			return 1; /* Lock contended, use thread */
399 		if (!__vfio_pci_memory_enabled(vdev)) {
400 			up_read(&vdev->memory_lock);
401 			return 0;
402 		}
403 	}
404 
405 	vfio_pci_ioeventfd_do_write(ioeventfd, false);
406 
407 	if (ioeventfd->test_mem)
408 		up_read(&vdev->memory_lock);
409 
410 	return 0;
411 }
412 
413 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
414 {
415 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
416 
417 	vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
418 }
419 
420 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
421 		       uint64_t data, int count, int fd)
422 {
423 	struct pci_dev *pdev = vdev->pdev;
424 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
425 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
426 	struct vfio_pci_ioeventfd *ioeventfd;
427 
428 	/* Only support ioeventfds into BARs */
429 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
430 		return -EINVAL;
431 
432 	if (pos + count > pci_resource_len(pdev, bar))
433 		return -EINVAL;
434 
435 	/* Disallow ioeventfds working around MSI-X table writes */
436 	if (bar == vdev->msix_bar &&
437 	    !(pos + count <= vdev->msix_offset ||
438 	      pos >= vdev->msix_offset + vdev->msix_size))
439 		return -EINVAL;
440 
441 #ifndef iowrite64
442 	if (count == 8)
443 		return -EINVAL;
444 #endif
445 
446 	ret = vfio_pci_core_setup_barmap(vdev, bar);
447 	if (ret)
448 		return ret;
449 
450 	mutex_lock(&vdev->ioeventfds_lock);
451 
452 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
453 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
454 		    ioeventfd->data == data && ioeventfd->count == count) {
455 			if (fd == -1) {
456 				vfio_virqfd_disable(&ioeventfd->virqfd);
457 				list_del(&ioeventfd->next);
458 				vdev->ioeventfds_nr--;
459 				kfree(ioeventfd);
460 				ret = 0;
461 			} else
462 				ret = -EEXIST;
463 
464 			goto out_unlock;
465 		}
466 	}
467 
468 	if (fd < 0) {
469 		ret = -ENODEV;
470 		goto out_unlock;
471 	}
472 
473 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
474 		ret = -ENOSPC;
475 		goto out_unlock;
476 	}
477 
478 	ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
479 	if (!ioeventfd) {
480 		ret = -ENOMEM;
481 		goto out_unlock;
482 	}
483 
484 	ioeventfd->vdev = vdev;
485 	ioeventfd->addr = vdev->barmap[bar] + pos;
486 	ioeventfd->data = data;
487 	ioeventfd->pos = pos;
488 	ioeventfd->bar = bar;
489 	ioeventfd->count = count;
490 	ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
491 
492 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
493 				 vfio_pci_ioeventfd_thread, NULL,
494 				 &ioeventfd->virqfd, fd);
495 	if (ret) {
496 		kfree(ioeventfd);
497 		goto out_unlock;
498 	}
499 
500 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
501 	vdev->ioeventfds_nr++;
502 
503 out_unlock:
504 	mutex_unlock(&vdev->ioeventfds_lock);
505 
506 	return ret;
507 }
508