10e9b5cd6SAndrey Gruzdev /*
20e9b5cd6SAndrey Gruzdev * Linux UFFD-WP support
30e9b5cd6SAndrey Gruzdev *
40e9b5cd6SAndrey Gruzdev * Copyright Virtuozzo GmbH, 2020
50e9b5cd6SAndrey Gruzdev *
60e9b5cd6SAndrey Gruzdev * Authors:
70e9b5cd6SAndrey Gruzdev * Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
80e9b5cd6SAndrey Gruzdev *
90e9b5cd6SAndrey Gruzdev * This work is licensed under the terms of the GNU GPL, version 2 or
100e9b5cd6SAndrey Gruzdev * later. See the COPYING file in the top-level directory.
110e9b5cd6SAndrey Gruzdev */
120e9b5cd6SAndrey Gruzdev
130e9b5cd6SAndrey Gruzdev #include "qemu/osdep.h"
140e9b5cd6SAndrey Gruzdev #include "qemu/bitops.h"
150e9b5cd6SAndrey Gruzdev #include "qemu/error-report.h"
160e9b5cd6SAndrey Gruzdev #include "qemu/userfaultfd.h"
170e9b5cd6SAndrey Gruzdev #include "trace.h"
180e9b5cd6SAndrey Gruzdev #include <poll.h>
190e9b5cd6SAndrey Gruzdev #include <sys/syscall.h>
200e9b5cd6SAndrey Gruzdev #include <sys/ioctl.h>
21c40c0463SPeter Xu
22c40c0463SPeter Xu typedef enum {
23c40c0463SPeter Xu UFFD_UNINITIALIZED = 0,
24c40c0463SPeter Xu UFFD_USE_DEV_PATH,
25c40c0463SPeter Xu UFFD_USE_SYSCALL,
26c40c0463SPeter Xu } uffd_open_mode;
270e9b5cd6SAndrey Gruzdev
uffd_open(int flags)28d5890ea0SPeter Xu int uffd_open(int flags)
29d5890ea0SPeter Xu {
30d5890ea0SPeter Xu #if defined(__NR_userfaultfd)
31c40c0463SPeter Xu static uffd_open_mode open_mode;
32c40c0463SPeter Xu static int uffd_dev;
33c40c0463SPeter Xu
34c40c0463SPeter Xu /* Detect how to generate uffd desc when run the 1st time */
35c40c0463SPeter Xu if (open_mode == UFFD_UNINITIALIZED) {
36c40c0463SPeter Xu /*
37c40c0463SPeter Xu * Make /dev/userfaultfd the default approach because it has better
38c40c0463SPeter Xu * permission controls, meanwhile allows kernel faults without any
39c40c0463SPeter Xu * privilege requirement (e.g. SYS_CAP_PTRACE).
40c40c0463SPeter Xu */
41c40c0463SPeter Xu uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
42c40c0463SPeter Xu if (uffd_dev >= 0) {
43c40c0463SPeter Xu open_mode = UFFD_USE_DEV_PATH;
44c40c0463SPeter Xu } else {
45c40c0463SPeter Xu /* Fallback to the system call */
46c40c0463SPeter Xu open_mode = UFFD_USE_SYSCALL;
47c40c0463SPeter Xu }
48c40c0463SPeter Xu trace_uffd_detect_open_mode(open_mode);
49c40c0463SPeter Xu }
50c40c0463SPeter Xu
51c40c0463SPeter Xu if (open_mode == UFFD_USE_DEV_PATH) {
52c40c0463SPeter Xu assert(uffd_dev >= 0);
53c40c0463SPeter Xu return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
54c40c0463SPeter Xu }
55c40c0463SPeter Xu
56d5890ea0SPeter Xu return syscall(__NR_userfaultfd, flags);
57d5890ea0SPeter Xu #else
58d5890ea0SPeter Xu return -EINVAL;
59d5890ea0SPeter Xu #endif
60d5890ea0SPeter Xu }
61d5890ea0SPeter Xu
620e9b5cd6SAndrey Gruzdev /**
630e9b5cd6SAndrey Gruzdev * uffd_query_features: query UFFD features
640e9b5cd6SAndrey Gruzdev *
650e9b5cd6SAndrey Gruzdev * Returns: 0 on success, negative value in case of an error
660e9b5cd6SAndrey Gruzdev *
670e9b5cd6SAndrey Gruzdev * @features: parameter to receive 'uffdio_api.features'
680e9b5cd6SAndrey Gruzdev */
uffd_query_features(uint64_t * features)690e9b5cd6SAndrey Gruzdev int uffd_query_features(uint64_t *features)
700e9b5cd6SAndrey Gruzdev {
710e9b5cd6SAndrey Gruzdev int uffd_fd;
720e9b5cd6SAndrey Gruzdev struct uffdio_api api_struct = { 0 };
730e9b5cd6SAndrey Gruzdev int ret = -1;
740e9b5cd6SAndrey Gruzdev
75d5890ea0SPeter Xu uffd_fd = uffd_open(O_CLOEXEC);
760e9b5cd6SAndrey Gruzdev if (uffd_fd < 0) {
770e9b5cd6SAndrey Gruzdev trace_uffd_query_features_nosys(errno);
780e9b5cd6SAndrey Gruzdev return -1;
790e9b5cd6SAndrey Gruzdev }
800e9b5cd6SAndrey Gruzdev
810e9b5cd6SAndrey Gruzdev api_struct.api = UFFD_API;
820e9b5cd6SAndrey Gruzdev api_struct.features = 0;
830e9b5cd6SAndrey Gruzdev
840e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
850e9b5cd6SAndrey Gruzdev trace_uffd_query_features_api_failed(errno);
860e9b5cd6SAndrey Gruzdev goto out;
870e9b5cd6SAndrey Gruzdev }
880e9b5cd6SAndrey Gruzdev *features = api_struct.features;
890e9b5cd6SAndrey Gruzdev ret = 0;
900e9b5cd6SAndrey Gruzdev
910e9b5cd6SAndrey Gruzdev out:
920e9b5cd6SAndrey Gruzdev close(uffd_fd);
930e9b5cd6SAndrey Gruzdev return ret;
940e9b5cd6SAndrey Gruzdev }
950e9b5cd6SAndrey Gruzdev
960e9b5cd6SAndrey Gruzdev /**
970e9b5cd6SAndrey Gruzdev * uffd_create_fd: create UFFD file descriptor
980e9b5cd6SAndrey Gruzdev *
990e9b5cd6SAndrey Gruzdev * Returns non-negative file descriptor or negative value in case of an error
1000e9b5cd6SAndrey Gruzdev *
1010e9b5cd6SAndrey Gruzdev * @features: UFFD features to request
1020e9b5cd6SAndrey Gruzdev * @non_blocking: create UFFD file descriptor for non-blocking operation
1030e9b5cd6SAndrey Gruzdev */
uffd_create_fd(uint64_t features,bool non_blocking)1040e9b5cd6SAndrey Gruzdev int uffd_create_fd(uint64_t features, bool non_blocking)
1050e9b5cd6SAndrey Gruzdev {
1060e9b5cd6SAndrey Gruzdev int uffd_fd;
1070e9b5cd6SAndrey Gruzdev int flags;
1080e9b5cd6SAndrey Gruzdev struct uffdio_api api_struct = { 0 };
1090e9b5cd6SAndrey Gruzdev uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
1100e9b5cd6SAndrey Gruzdev
1110e9b5cd6SAndrey Gruzdev flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
112d5890ea0SPeter Xu uffd_fd = uffd_open(flags);
1130e9b5cd6SAndrey Gruzdev if (uffd_fd < 0) {
1140e9b5cd6SAndrey Gruzdev trace_uffd_create_fd_nosys(errno);
1150e9b5cd6SAndrey Gruzdev return -1;
1160e9b5cd6SAndrey Gruzdev }
1170e9b5cd6SAndrey Gruzdev
1180e9b5cd6SAndrey Gruzdev api_struct.api = UFFD_API;
1190e9b5cd6SAndrey Gruzdev api_struct.features = features;
1200e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
1210e9b5cd6SAndrey Gruzdev trace_uffd_create_fd_api_failed(errno);
1220e9b5cd6SAndrey Gruzdev goto fail;
1230e9b5cd6SAndrey Gruzdev }
1240e9b5cd6SAndrey Gruzdev if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
1250e9b5cd6SAndrey Gruzdev trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls);
1260e9b5cd6SAndrey Gruzdev goto fail;
1270e9b5cd6SAndrey Gruzdev }
1280e9b5cd6SAndrey Gruzdev
1290e9b5cd6SAndrey Gruzdev return uffd_fd;
1300e9b5cd6SAndrey Gruzdev
1310e9b5cd6SAndrey Gruzdev fail:
1320e9b5cd6SAndrey Gruzdev close(uffd_fd);
1330e9b5cd6SAndrey Gruzdev return -1;
1340e9b5cd6SAndrey Gruzdev }
1350e9b5cd6SAndrey Gruzdev
1360e9b5cd6SAndrey Gruzdev /**
1370e9b5cd6SAndrey Gruzdev * uffd_close_fd: close UFFD file descriptor
1380e9b5cd6SAndrey Gruzdev *
1390e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
1400e9b5cd6SAndrey Gruzdev */
uffd_close_fd(int uffd_fd)1410e9b5cd6SAndrey Gruzdev void uffd_close_fd(int uffd_fd)
1420e9b5cd6SAndrey Gruzdev {
1430e9b5cd6SAndrey Gruzdev assert(uffd_fd >= 0);
1440e9b5cd6SAndrey Gruzdev close(uffd_fd);
1450e9b5cd6SAndrey Gruzdev }
1460e9b5cd6SAndrey Gruzdev
1470e9b5cd6SAndrey Gruzdev /**
1480e9b5cd6SAndrey Gruzdev * uffd_register_memory: register memory range via UFFD-IO
1490e9b5cd6SAndrey Gruzdev *
1500e9b5cd6SAndrey Gruzdev * Returns 0 in case of success, negative value in case of an error
1510e9b5cd6SAndrey Gruzdev *
1520e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
1530e9b5cd6SAndrey Gruzdev * @addr: base address of memory range
1540e9b5cd6SAndrey Gruzdev * @length: length of memory range
1550e9b5cd6SAndrey Gruzdev * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...)
1560e9b5cd6SAndrey Gruzdev * @ioctls: optional pointer to receive supported IOCTL mask
1570e9b5cd6SAndrey Gruzdev */
uffd_register_memory(int uffd_fd,void * addr,uint64_t length,uint64_t mode,uint64_t * ioctls)1580e9b5cd6SAndrey Gruzdev int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
1590e9b5cd6SAndrey Gruzdev uint64_t mode, uint64_t *ioctls)
1600e9b5cd6SAndrey Gruzdev {
1610e9b5cd6SAndrey Gruzdev struct uffdio_register uffd_register;
1620e9b5cd6SAndrey Gruzdev
1630e9b5cd6SAndrey Gruzdev uffd_register.range.start = (uintptr_t) addr;
1640e9b5cd6SAndrey Gruzdev uffd_register.range.len = length;
1650e9b5cd6SAndrey Gruzdev uffd_register.mode = mode;
1660e9b5cd6SAndrey Gruzdev
1670e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) {
1680e9b5cd6SAndrey Gruzdev trace_uffd_register_memory_failed(addr, length, mode, errno);
1690e9b5cd6SAndrey Gruzdev return -1;
1700e9b5cd6SAndrey Gruzdev }
1710e9b5cd6SAndrey Gruzdev if (ioctls) {
1720e9b5cd6SAndrey Gruzdev *ioctls = uffd_register.ioctls;
1730e9b5cd6SAndrey Gruzdev }
1740e9b5cd6SAndrey Gruzdev
1750e9b5cd6SAndrey Gruzdev return 0;
1760e9b5cd6SAndrey Gruzdev }
1770e9b5cd6SAndrey Gruzdev
1780e9b5cd6SAndrey Gruzdev /**
1790e9b5cd6SAndrey Gruzdev * uffd_unregister_memory: un-register memory range with UFFD-IO
1800e9b5cd6SAndrey Gruzdev *
1810e9b5cd6SAndrey Gruzdev * Returns 0 in case of success, negative value in case of an error
1820e9b5cd6SAndrey Gruzdev *
1830e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
1840e9b5cd6SAndrey Gruzdev * @addr: base address of memory range
1850e9b5cd6SAndrey Gruzdev * @length: length of memory range
1860e9b5cd6SAndrey Gruzdev */
uffd_unregister_memory(int uffd_fd,void * addr,uint64_t length)1870e9b5cd6SAndrey Gruzdev int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length)
1880e9b5cd6SAndrey Gruzdev {
1890e9b5cd6SAndrey Gruzdev struct uffdio_range uffd_range;
1900e9b5cd6SAndrey Gruzdev
1910e9b5cd6SAndrey Gruzdev uffd_range.start = (uintptr_t) addr;
1920e9b5cd6SAndrey Gruzdev uffd_range.len = length;
1930e9b5cd6SAndrey Gruzdev
1940e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) {
1950e9b5cd6SAndrey Gruzdev trace_uffd_unregister_memory_failed(addr, length, errno);
1960e9b5cd6SAndrey Gruzdev return -1;
1970e9b5cd6SAndrey Gruzdev }
1980e9b5cd6SAndrey Gruzdev
1990e9b5cd6SAndrey Gruzdev return 0;
2000e9b5cd6SAndrey Gruzdev }
2010e9b5cd6SAndrey Gruzdev
2020e9b5cd6SAndrey Gruzdev /**
2030e9b5cd6SAndrey Gruzdev * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO
2040e9b5cd6SAndrey Gruzdev *
2050e9b5cd6SAndrey Gruzdev * Returns 0 on success, negative value in case of error
2060e9b5cd6SAndrey Gruzdev *
2070e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
2080e9b5cd6SAndrey Gruzdev * @addr: base address of memory range
2090e9b5cd6SAndrey Gruzdev * @length: length of memory range
2100e9b5cd6SAndrey Gruzdev * @wp: write-protect/unprotect
2110e9b5cd6SAndrey Gruzdev * @dont_wake: do not wake threads waiting on wr-protected page
2120e9b5cd6SAndrey Gruzdev */
uffd_change_protection(int uffd_fd,void * addr,uint64_t length,bool wp,bool dont_wake)2130e9b5cd6SAndrey Gruzdev int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
2140e9b5cd6SAndrey Gruzdev bool wp, bool dont_wake)
2150e9b5cd6SAndrey Gruzdev {
2160e9b5cd6SAndrey Gruzdev struct uffdio_writeprotect uffd_writeprotect;
2170e9b5cd6SAndrey Gruzdev
2180e9b5cd6SAndrey Gruzdev uffd_writeprotect.range.start = (uintptr_t) addr;
2190e9b5cd6SAndrey Gruzdev uffd_writeprotect.range.len = length;
2200e9b5cd6SAndrey Gruzdev if (!wp && dont_wake) {
2210e9b5cd6SAndrey Gruzdev /* DONTWAKE is meaningful only on protection release */
2220e9b5cd6SAndrey Gruzdev uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
2230e9b5cd6SAndrey Gruzdev } else {
2240e9b5cd6SAndrey Gruzdev uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0);
2250e9b5cd6SAndrey Gruzdev }
2260e9b5cd6SAndrey Gruzdev
2270e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
2280e9b5cd6SAndrey Gruzdev error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
2290e9b5cd6SAndrey Gruzdev " mode=%" PRIx64 " errno=%i", addr, length,
2300e9b5cd6SAndrey Gruzdev (uint64_t) uffd_writeprotect.mode, errno);
2310e9b5cd6SAndrey Gruzdev return -1;
2320e9b5cd6SAndrey Gruzdev }
2330e9b5cd6SAndrey Gruzdev
2340e9b5cd6SAndrey Gruzdev return 0;
2350e9b5cd6SAndrey Gruzdev }
2360e9b5cd6SAndrey Gruzdev
2370e9b5cd6SAndrey Gruzdev /**
2380e9b5cd6SAndrey Gruzdev * uffd_copy_page: copy range of pages to destination via UFFD-IO
2390e9b5cd6SAndrey Gruzdev *
2400e9b5cd6SAndrey Gruzdev * Copy range of source pages to the destination to resolve
2410e9b5cd6SAndrey Gruzdev * missing page fault somewhere in the destination range.
2420e9b5cd6SAndrey Gruzdev *
243*6f81bd1aSDr. David Alan Gilbert * Returns 0 on success, -errno in case of an error
2440e9b5cd6SAndrey Gruzdev *
2450e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
2460e9b5cd6SAndrey Gruzdev * @dst_addr: destination base address
2470e9b5cd6SAndrey Gruzdev * @src_addr: source base address
2480e9b5cd6SAndrey Gruzdev * @length: length of the range to copy
2490e9b5cd6SAndrey Gruzdev * @dont_wake: do not wake threads waiting on missing page
2500e9b5cd6SAndrey Gruzdev */
uffd_copy_page(int uffd_fd,void * dst_addr,void * src_addr,uint64_t length,bool dont_wake)2510e9b5cd6SAndrey Gruzdev int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
2520e9b5cd6SAndrey Gruzdev uint64_t length, bool dont_wake)
2530e9b5cd6SAndrey Gruzdev {
2540e9b5cd6SAndrey Gruzdev struct uffdio_copy uffd_copy;
2550e9b5cd6SAndrey Gruzdev
2560e9b5cd6SAndrey Gruzdev uffd_copy.dst = (uintptr_t) dst_addr;
2570e9b5cd6SAndrey Gruzdev uffd_copy.src = (uintptr_t) src_addr;
2580e9b5cd6SAndrey Gruzdev uffd_copy.len = length;
2590e9b5cd6SAndrey Gruzdev uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
2600e9b5cd6SAndrey Gruzdev
2610e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
262*6f81bd1aSDr. David Alan Gilbert int e = errno;
2630e9b5cd6SAndrey Gruzdev error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
2640e9b5cd6SAndrey Gruzdev " mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
265*6f81bd1aSDr. David Alan Gilbert length, (uint64_t) uffd_copy.mode, e);
266*6f81bd1aSDr. David Alan Gilbert return -e;
2670e9b5cd6SAndrey Gruzdev }
2680e9b5cd6SAndrey Gruzdev
2690e9b5cd6SAndrey Gruzdev return 0;
2700e9b5cd6SAndrey Gruzdev }
2710e9b5cd6SAndrey Gruzdev
2720e9b5cd6SAndrey Gruzdev /**
2730e9b5cd6SAndrey Gruzdev * uffd_zero_page: fill range of pages with zeroes via UFFD-IO
2740e9b5cd6SAndrey Gruzdev *
2750e9b5cd6SAndrey Gruzdev * Fill range pages with zeroes to resolve missing page fault within the range.
2760e9b5cd6SAndrey Gruzdev *
277*6f81bd1aSDr. David Alan Gilbert * Returns 0 on success, -errno in case of an error
2780e9b5cd6SAndrey Gruzdev *
2790e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
2800e9b5cd6SAndrey Gruzdev * @addr: base address
2810e9b5cd6SAndrey Gruzdev * @length: length of the range to fill with zeroes
2820e9b5cd6SAndrey Gruzdev * @dont_wake: do not wake threads waiting on missing page
2830e9b5cd6SAndrey Gruzdev */
uffd_zero_page(int uffd_fd,void * addr,uint64_t length,bool dont_wake)2840e9b5cd6SAndrey Gruzdev int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
2850e9b5cd6SAndrey Gruzdev {
2860e9b5cd6SAndrey Gruzdev struct uffdio_zeropage uffd_zeropage;
2870e9b5cd6SAndrey Gruzdev
2880e9b5cd6SAndrey Gruzdev uffd_zeropage.range.start = (uintptr_t) addr;
2890e9b5cd6SAndrey Gruzdev uffd_zeropage.range.len = length;
2900e9b5cd6SAndrey Gruzdev uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
2910e9b5cd6SAndrey Gruzdev
2920e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
293*6f81bd1aSDr. David Alan Gilbert int e = errno;
2940e9b5cd6SAndrey Gruzdev error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
2950e9b5cd6SAndrey Gruzdev " mode=%" PRIx64 " errno=%i", addr, length,
296*6f81bd1aSDr. David Alan Gilbert (uint64_t) uffd_zeropage.mode, e);
297*6f81bd1aSDr. David Alan Gilbert return -e;
2980e9b5cd6SAndrey Gruzdev }
2990e9b5cd6SAndrey Gruzdev
3000e9b5cd6SAndrey Gruzdev return 0;
3010e9b5cd6SAndrey Gruzdev }
3020e9b5cd6SAndrey Gruzdev
3030e9b5cd6SAndrey Gruzdev /**
3040e9b5cd6SAndrey Gruzdev * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution
3050e9b5cd6SAndrey Gruzdev *
3060e9b5cd6SAndrey Gruzdev * Wake up threads waiting on any page/pages from the designated range.
3070e9b5cd6SAndrey Gruzdev * The main use case is when during some period, page faults are resolved
3080e9b5cd6SAndrey Gruzdev * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
3090e9b5cd6SAndrey Gruzdev * for the whole memory range are satisfied in a single call to uffd_wakeup().
3100e9b5cd6SAndrey Gruzdev *
311*6f81bd1aSDr. David Alan Gilbert * Returns 0 on success, -errno in case of an error
3120e9b5cd6SAndrey Gruzdev *
3130e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
3140e9b5cd6SAndrey Gruzdev * @addr: base address
3150e9b5cd6SAndrey Gruzdev * @length: length of the range
3160e9b5cd6SAndrey Gruzdev */
uffd_wakeup(int uffd_fd,void * addr,uint64_t length)3170e9b5cd6SAndrey Gruzdev int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
3180e9b5cd6SAndrey Gruzdev {
3190e9b5cd6SAndrey Gruzdev struct uffdio_range uffd_range;
3200e9b5cd6SAndrey Gruzdev
3210e9b5cd6SAndrey Gruzdev uffd_range.start = (uintptr_t) addr;
3220e9b5cd6SAndrey Gruzdev uffd_range.len = length;
3230e9b5cd6SAndrey Gruzdev
3240e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
325*6f81bd1aSDr. David Alan Gilbert int e = errno;
3260e9b5cd6SAndrey Gruzdev error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
327*6f81bd1aSDr. David Alan Gilbert addr, length, e);
328*6f81bd1aSDr. David Alan Gilbert return -e;
3290e9b5cd6SAndrey Gruzdev }
3300e9b5cd6SAndrey Gruzdev
3310e9b5cd6SAndrey Gruzdev return 0;
3320e9b5cd6SAndrey Gruzdev }
3330e9b5cd6SAndrey Gruzdev
3340e9b5cd6SAndrey Gruzdev /**
3350e9b5cd6SAndrey Gruzdev * uffd_read_events: read pending UFFD events
3360e9b5cd6SAndrey Gruzdev *
3370e9b5cd6SAndrey Gruzdev * Returns number of fetched messages, 0 if non is available or
3380e9b5cd6SAndrey Gruzdev * negative value in case of an error
3390e9b5cd6SAndrey Gruzdev *
3400e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
3410e9b5cd6SAndrey Gruzdev * @msgs: pointer to message buffer
3420e9b5cd6SAndrey Gruzdev * @count: number of messages that can fit in the buffer
3430e9b5cd6SAndrey Gruzdev */
uffd_read_events(int uffd_fd,struct uffd_msg * msgs,int count)3440e9b5cd6SAndrey Gruzdev int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
3450e9b5cd6SAndrey Gruzdev {
3460e9b5cd6SAndrey Gruzdev ssize_t res;
3470e9b5cd6SAndrey Gruzdev do {
3480e9b5cd6SAndrey Gruzdev res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg));
3490e9b5cd6SAndrey Gruzdev } while (res < 0 && errno == EINTR);
3500e9b5cd6SAndrey Gruzdev
3510e9b5cd6SAndrey Gruzdev if ((res < 0 && errno == EAGAIN)) {
3520e9b5cd6SAndrey Gruzdev return 0;
3530e9b5cd6SAndrey Gruzdev }
3540e9b5cd6SAndrey Gruzdev if (res < 0) {
3550e9b5cd6SAndrey Gruzdev error_report("uffd_read_events() failed: errno=%i", errno);
3560e9b5cd6SAndrey Gruzdev return -1;
3570e9b5cd6SAndrey Gruzdev }
3580e9b5cd6SAndrey Gruzdev
3590e9b5cd6SAndrey Gruzdev return (int) (res / sizeof(struct uffd_msg));
3600e9b5cd6SAndrey Gruzdev }
361