xref: /qemu/util/userfaultfd.c (revision ccf6b782)
10e9b5cd6SAndrey Gruzdev /*
20e9b5cd6SAndrey Gruzdev  * Linux UFFD-WP support
30e9b5cd6SAndrey Gruzdev  *
40e9b5cd6SAndrey Gruzdev  * Copyright Virtuozzo GmbH, 2020
50e9b5cd6SAndrey Gruzdev  *
60e9b5cd6SAndrey Gruzdev  * Authors:
70e9b5cd6SAndrey Gruzdev  *  Andrey Gruzdev   <andrey.gruzdev@virtuozzo.com>
80e9b5cd6SAndrey Gruzdev  *
90e9b5cd6SAndrey Gruzdev  * This work is licensed under the terms of the GNU GPL, version 2 or
100e9b5cd6SAndrey Gruzdev  * later.  See the COPYING file in the top-level directory.
110e9b5cd6SAndrey Gruzdev  */
120e9b5cd6SAndrey Gruzdev 
130e9b5cd6SAndrey Gruzdev #include "qemu/osdep.h"
140e9b5cd6SAndrey Gruzdev #include "qemu/bitops.h"
150e9b5cd6SAndrey Gruzdev #include "qemu/error-report.h"
160e9b5cd6SAndrey Gruzdev #include "qemu/userfaultfd.h"
170e9b5cd6SAndrey Gruzdev #include "trace.h"
180e9b5cd6SAndrey Gruzdev #include <poll.h>
190e9b5cd6SAndrey Gruzdev #include <sys/syscall.h>
200e9b5cd6SAndrey Gruzdev #include <sys/ioctl.h>
21c40c0463SPeter Xu 
22c40c0463SPeter Xu typedef enum {
23c40c0463SPeter Xu     UFFD_UNINITIALIZED = 0,
24c40c0463SPeter Xu     UFFD_USE_DEV_PATH,
25c40c0463SPeter Xu     UFFD_USE_SYSCALL,
26c40c0463SPeter Xu } uffd_open_mode;
270e9b5cd6SAndrey Gruzdev 
uffd_open(int flags)28d5890ea0SPeter Xu int uffd_open(int flags)
29d5890ea0SPeter Xu {
30d5890ea0SPeter Xu #if defined(__NR_userfaultfd)
31c40c0463SPeter Xu     static uffd_open_mode open_mode;
32c40c0463SPeter Xu     static int uffd_dev;
33c40c0463SPeter Xu 
34c40c0463SPeter Xu     /* Detect how to generate uffd desc when run the 1st time */
35c40c0463SPeter Xu     if (open_mode == UFFD_UNINITIALIZED) {
36c40c0463SPeter Xu         /*
37c40c0463SPeter Xu          * Make /dev/userfaultfd the default approach because it has better
38c40c0463SPeter Xu          * permission controls, meanwhile allows kernel faults without any
39c40c0463SPeter Xu          * privilege requirement (e.g. SYS_CAP_PTRACE).
40c40c0463SPeter Xu          */
41c40c0463SPeter Xu         uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
42c40c0463SPeter Xu         if (uffd_dev >= 0) {
43c40c0463SPeter Xu             open_mode = UFFD_USE_DEV_PATH;
44c40c0463SPeter Xu         } else {
45c40c0463SPeter Xu             /* Fallback to the system call */
46c40c0463SPeter Xu             open_mode = UFFD_USE_SYSCALL;
47c40c0463SPeter Xu         }
48c40c0463SPeter Xu         trace_uffd_detect_open_mode(open_mode);
49c40c0463SPeter Xu     }
50c40c0463SPeter Xu 
51c40c0463SPeter Xu     if (open_mode == UFFD_USE_DEV_PATH) {
52c40c0463SPeter Xu         assert(uffd_dev >= 0);
53c40c0463SPeter Xu         return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
54c40c0463SPeter Xu     }
55c40c0463SPeter Xu 
56d5890ea0SPeter Xu     return syscall(__NR_userfaultfd, flags);
57d5890ea0SPeter Xu #else
58d5890ea0SPeter Xu     return -EINVAL;
59d5890ea0SPeter Xu #endif
60d5890ea0SPeter Xu }
61d5890ea0SPeter Xu 
620e9b5cd6SAndrey Gruzdev /**
630e9b5cd6SAndrey Gruzdev  * uffd_query_features: query UFFD features
640e9b5cd6SAndrey Gruzdev  *
650e9b5cd6SAndrey Gruzdev  * Returns: 0 on success, negative value in case of an error
660e9b5cd6SAndrey Gruzdev  *
670e9b5cd6SAndrey Gruzdev  * @features: parameter to receive 'uffdio_api.features'
680e9b5cd6SAndrey Gruzdev  */
uffd_query_features(uint64_t * features)690e9b5cd6SAndrey Gruzdev int uffd_query_features(uint64_t *features)
700e9b5cd6SAndrey Gruzdev {
710e9b5cd6SAndrey Gruzdev     int uffd_fd;
720e9b5cd6SAndrey Gruzdev     struct uffdio_api api_struct = { 0 };
730e9b5cd6SAndrey Gruzdev     int ret = -1;
740e9b5cd6SAndrey Gruzdev 
75d5890ea0SPeter Xu     uffd_fd = uffd_open(O_CLOEXEC);
760e9b5cd6SAndrey Gruzdev     if (uffd_fd < 0) {
770e9b5cd6SAndrey Gruzdev         trace_uffd_query_features_nosys(errno);
780e9b5cd6SAndrey Gruzdev         return -1;
790e9b5cd6SAndrey Gruzdev     }
800e9b5cd6SAndrey Gruzdev 
810e9b5cd6SAndrey Gruzdev     api_struct.api = UFFD_API;
820e9b5cd6SAndrey Gruzdev     api_struct.features = 0;
830e9b5cd6SAndrey Gruzdev 
840e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
850e9b5cd6SAndrey Gruzdev         trace_uffd_query_features_api_failed(errno);
860e9b5cd6SAndrey Gruzdev         goto out;
870e9b5cd6SAndrey Gruzdev     }
880e9b5cd6SAndrey Gruzdev     *features = api_struct.features;
890e9b5cd6SAndrey Gruzdev     ret = 0;
900e9b5cd6SAndrey Gruzdev 
910e9b5cd6SAndrey Gruzdev out:
920e9b5cd6SAndrey Gruzdev     close(uffd_fd);
930e9b5cd6SAndrey Gruzdev     return ret;
940e9b5cd6SAndrey Gruzdev }
950e9b5cd6SAndrey Gruzdev 
960e9b5cd6SAndrey Gruzdev /**
970e9b5cd6SAndrey Gruzdev  * uffd_create_fd: create UFFD file descriptor
980e9b5cd6SAndrey Gruzdev  *
990e9b5cd6SAndrey Gruzdev  * Returns non-negative file descriptor or negative value in case of an error
1000e9b5cd6SAndrey Gruzdev  *
1010e9b5cd6SAndrey Gruzdev  * @features: UFFD features to request
1020e9b5cd6SAndrey Gruzdev  * @non_blocking: create UFFD file descriptor for non-blocking operation
1030e9b5cd6SAndrey Gruzdev  */
uffd_create_fd(uint64_t features,bool non_blocking)1040e9b5cd6SAndrey Gruzdev int uffd_create_fd(uint64_t features, bool non_blocking)
1050e9b5cd6SAndrey Gruzdev {
1060e9b5cd6SAndrey Gruzdev     int uffd_fd;
1070e9b5cd6SAndrey Gruzdev     int flags;
1080e9b5cd6SAndrey Gruzdev     struct uffdio_api api_struct = { 0 };
1090e9b5cd6SAndrey Gruzdev     uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
1100e9b5cd6SAndrey Gruzdev 
1110e9b5cd6SAndrey Gruzdev     flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
112d5890ea0SPeter Xu     uffd_fd = uffd_open(flags);
1130e9b5cd6SAndrey Gruzdev     if (uffd_fd < 0) {
1140e9b5cd6SAndrey Gruzdev         trace_uffd_create_fd_nosys(errno);
1150e9b5cd6SAndrey Gruzdev         return -1;
1160e9b5cd6SAndrey Gruzdev     }
1170e9b5cd6SAndrey Gruzdev 
1180e9b5cd6SAndrey Gruzdev     api_struct.api = UFFD_API;
1190e9b5cd6SAndrey Gruzdev     api_struct.features = features;
1200e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
1210e9b5cd6SAndrey Gruzdev         trace_uffd_create_fd_api_failed(errno);
1220e9b5cd6SAndrey Gruzdev         goto fail;
1230e9b5cd6SAndrey Gruzdev     }
1240e9b5cd6SAndrey Gruzdev     if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
1250e9b5cd6SAndrey Gruzdev         trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls);
1260e9b5cd6SAndrey Gruzdev         goto fail;
1270e9b5cd6SAndrey Gruzdev     }
1280e9b5cd6SAndrey Gruzdev 
1290e9b5cd6SAndrey Gruzdev     return uffd_fd;
1300e9b5cd6SAndrey Gruzdev 
1310e9b5cd6SAndrey Gruzdev fail:
1320e9b5cd6SAndrey Gruzdev     close(uffd_fd);
1330e9b5cd6SAndrey Gruzdev     return -1;
1340e9b5cd6SAndrey Gruzdev }
1350e9b5cd6SAndrey Gruzdev 
1360e9b5cd6SAndrey Gruzdev /**
1370e9b5cd6SAndrey Gruzdev  * uffd_close_fd: close UFFD file descriptor
1380e9b5cd6SAndrey Gruzdev  *
1390e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
1400e9b5cd6SAndrey Gruzdev  */
uffd_close_fd(int uffd_fd)1410e9b5cd6SAndrey Gruzdev void uffd_close_fd(int uffd_fd)
1420e9b5cd6SAndrey Gruzdev {
1430e9b5cd6SAndrey Gruzdev     assert(uffd_fd >= 0);
1440e9b5cd6SAndrey Gruzdev     close(uffd_fd);
1450e9b5cd6SAndrey Gruzdev }
1460e9b5cd6SAndrey Gruzdev 
1470e9b5cd6SAndrey Gruzdev /**
1480e9b5cd6SAndrey Gruzdev  * uffd_register_memory: register memory range via UFFD-IO
1490e9b5cd6SAndrey Gruzdev  *
1500e9b5cd6SAndrey Gruzdev  * Returns 0 in case of success, negative value in case of an error
1510e9b5cd6SAndrey Gruzdev  *
1520e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
1530e9b5cd6SAndrey Gruzdev  * @addr: base address of memory range
1540e9b5cd6SAndrey Gruzdev  * @length: length of memory range
1550e9b5cd6SAndrey Gruzdev  * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...)
1560e9b5cd6SAndrey Gruzdev  * @ioctls: optional pointer to receive supported IOCTL mask
1570e9b5cd6SAndrey Gruzdev  */
uffd_register_memory(int uffd_fd,void * addr,uint64_t length,uint64_t mode,uint64_t * ioctls)1580e9b5cd6SAndrey Gruzdev int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
1590e9b5cd6SAndrey Gruzdev         uint64_t mode, uint64_t *ioctls)
1600e9b5cd6SAndrey Gruzdev {
1610e9b5cd6SAndrey Gruzdev     struct uffdio_register uffd_register;
1620e9b5cd6SAndrey Gruzdev 
1630e9b5cd6SAndrey Gruzdev     uffd_register.range.start = (uintptr_t) addr;
1640e9b5cd6SAndrey Gruzdev     uffd_register.range.len = length;
1650e9b5cd6SAndrey Gruzdev     uffd_register.mode = mode;
1660e9b5cd6SAndrey Gruzdev 
1670e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) {
1680e9b5cd6SAndrey Gruzdev         trace_uffd_register_memory_failed(addr, length, mode, errno);
1690e9b5cd6SAndrey Gruzdev         return -1;
1700e9b5cd6SAndrey Gruzdev     }
1710e9b5cd6SAndrey Gruzdev     if (ioctls) {
1720e9b5cd6SAndrey Gruzdev         *ioctls = uffd_register.ioctls;
1730e9b5cd6SAndrey Gruzdev     }
1740e9b5cd6SAndrey Gruzdev 
1750e9b5cd6SAndrey Gruzdev     return 0;
1760e9b5cd6SAndrey Gruzdev }
1770e9b5cd6SAndrey Gruzdev 
1780e9b5cd6SAndrey Gruzdev /**
1790e9b5cd6SAndrey Gruzdev  * uffd_unregister_memory: un-register memory range with UFFD-IO
1800e9b5cd6SAndrey Gruzdev  *
1810e9b5cd6SAndrey Gruzdev  * Returns 0 in case of success, negative value in case of an error
1820e9b5cd6SAndrey Gruzdev  *
1830e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
1840e9b5cd6SAndrey Gruzdev  * @addr: base address of memory range
1850e9b5cd6SAndrey Gruzdev  * @length: length of memory range
1860e9b5cd6SAndrey Gruzdev  */
uffd_unregister_memory(int uffd_fd,void * addr,uint64_t length)1870e9b5cd6SAndrey Gruzdev int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length)
1880e9b5cd6SAndrey Gruzdev {
1890e9b5cd6SAndrey Gruzdev     struct uffdio_range uffd_range;
1900e9b5cd6SAndrey Gruzdev 
1910e9b5cd6SAndrey Gruzdev     uffd_range.start = (uintptr_t) addr;
1920e9b5cd6SAndrey Gruzdev     uffd_range.len = length;
1930e9b5cd6SAndrey Gruzdev 
1940e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) {
1950e9b5cd6SAndrey Gruzdev         trace_uffd_unregister_memory_failed(addr, length, errno);
1960e9b5cd6SAndrey Gruzdev         return -1;
1970e9b5cd6SAndrey Gruzdev     }
1980e9b5cd6SAndrey Gruzdev 
1990e9b5cd6SAndrey Gruzdev     return 0;
2000e9b5cd6SAndrey Gruzdev }
2010e9b5cd6SAndrey Gruzdev 
2020e9b5cd6SAndrey Gruzdev /**
2030e9b5cd6SAndrey Gruzdev  * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO
2040e9b5cd6SAndrey Gruzdev  *
2050e9b5cd6SAndrey Gruzdev  * Returns 0 on success, negative value in case of error
2060e9b5cd6SAndrey Gruzdev  *
2070e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
2080e9b5cd6SAndrey Gruzdev  * @addr: base address of memory range
2090e9b5cd6SAndrey Gruzdev  * @length: length of memory range
2100e9b5cd6SAndrey Gruzdev  * @wp: write-protect/unprotect
2110e9b5cd6SAndrey Gruzdev  * @dont_wake: do not wake threads waiting on wr-protected page
2120e9b5cd6SAndrey Gruzdev  */
uffd_change_protection(int uffd_fd,void * addr,uint64_t length,bool wp,bool dont_wake)2130e9b5cd6SAndrey Gruzdev int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
2140e9b5cd6SAndrey Gruzdev         bool wp, bool dont_wake)
2150e9b5cd6SAndrey Gruzdev {
2160e9b5cd6SAndrey Gruzdev     struct uffdio_writeprotect uffd_writeprotect;
2170e9b5cd6SAndrey Gruzdev 
2180e9b5cd6SAndrey Gruzdev     uffd_writeprotect.range.start = (uintptr_t) addr;
2190e9b5cd6SAndrey Gruzdev     uffd_writeprotect.range.len = length;
2200e9b5cd6SAndrey Gruzdev     if (!wp && dont_wake) {
2210e9b5cd6SAndrey Gruzdev         /* DONTWAKE is meaningful only on protection release */
2220e9b5cd6SAndrey Gruzdev         uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
2230e9b5cd6SAndrey Gruzdev     } else {
2240e9b5cd6SAndrey Gruzdev         uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0);
2250e9b5cd6SAndrey Gruzdev     }
2260e9b5cd6SAndrey Gruzdev 
2270e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
2280e9b5cd6SAndrey Gruzdev         error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
2290e9b5cd6SAndrey Gruzdev                 " mode=%" PRIx64 " errno=%i", addr, length,
2300e9b5cd6SAndrey Gruzdev                 (uint64_t) uffd_writeprotect.mode, errno);
2310e9b5cd6SAndrey Gruzdev         return -1;
2320e9b5cd6SAndrey Gruzdev     }
2330e9b5cd6SAndrey Gruzdev 
2340e9b5cd6SAndrey Gruzdev     return 0;
2350e9b5cd6SAndrey Gruzdev }
2360e9b5cd6SAndrey Gruzdev 
2370e9b5cd6SAndrey Gruzdev /**
2380e9b5cd6SAndrey Gruzdev  * uffd_copy_page: copy range of pages to destination via UFFD-IO
2390e9b5cd6SAndrey Gruzdev  *
2400e9b5cd6SAndrey Gruzdev  * Copy range of source pages to the destination to resolve
2410e9b5cd6SAndrey Gruzdev  * missing page fault somewhere in the destination range.
2420e9b5cd6SAndrey Gruzdev  *
243*6f81bd1aSDr. David Alan Gilbert  * Returns 0 on success, -errno in case of an error
2440e9b5cd6SAndrey Gruzdev  *
2450e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
2460e9b5cd6SAndrey Gruzdev  * @dst_addr: destination base address
2470e9b5cd6SAndrey Gruzdev  * @src_addr: source base address
2480e9b5cd6SAndrey Gruzdev  * @length: length of the range to copy
2490e9b5cd6SAndrey Gruzdev  * @dont_wake: do not wake threads waiting on missing page
2500e9b5cd6SAndrey Gruzdev  */
uffd_copy_page(int uffd_fd,void * dst_addr,void * src_addr,uint64_t length,bool dont_wake)2510e9b5cd6SAndrey Gruzdev int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
2520e9b5cd6SAndrey Gruzdev         uint64_t length, bool dont_wake)
2530e9b5cd6SAndrey Gruzdev {
2540e9b5cd6SAndrey Gruzdev     struct uffdio_copy uffd_copy;
2550e9b5cd6SAndrey Gruzdev 
2560e9b5cd6SAndrey Gruzdev     uffd_copy.dst = (uintptr_t) dst_addr;
2570e9b5cd6SAndrey Gruzdev     uffd_copy.src = (uintptr_t) src_addr;
2580e9b5cd6SAndrey Gruzdev     uffd_copy.len = length;
2590e9b5cd6SAndrey Gruzdev     uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
2600e9b5cd6SAndrey Gruzdev 
2610e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
262*6f81bd1aSDr. David Alan Gilbert         int e = errno;
2630e9b5cd6SAndrey Gruzdev         error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
2640e9b5cd6SAndrey Gruzdev                 " mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
265*6f81bd1aSDr. David Alan Gilbert                 length, (uint64_t) uffd_copy.mode, e);
266*6f81bd1aSDr. David Alan Gilbert         return -e;
2670e9b5cd6SAndrey Gruzdev     }
2680e9b5cd6SAndrey Gruzdev 
2690e9b5cd6SAndrey Gruzdev     return 0;
2700e9b5cd6SAndrey Gruzdev }
2710e9b5cd6SAndrey Gruzdev 
2720e9b5cd6SAndrey Gruzdev /**
2730e9b5cd6SAndrey Gruzdev  * uffd_zero_page: fill range of pages with zeroes via UFFD-IO
2740e9b5cd6SAndrey Gruzdev  *
2750e9b5cd6SAndrey Gruzdev  * Fill range pages with zeroes to resolve missing page fault within the range.
2760e9b5cd6SAndrey Gruzdev  *
277*6f81bd1aSDr. David Alan Gilbert  * Returns 0 on success, -errno in case of an error
2780e9b5cd6SAndrey Gruzdev  *
2790e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
2800e9b5cd6SAndrey Gruzdev  * @addr: base address
2810e9b5cd6SAndrey Gruzdev  * @length: length of the range to fill with zeroes
2820e9b5cd6SAndrey Gruzdev  * @dont_wake: do not wake threads waiting on missing page
2830e9b5cd6SAndrey Gruzdev  */
uffd_zero_page(int uffd_fd,void * addr,uint64_t length,bool dont_wake)2840e9b5cd6SAndrey Gruzdev int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
2850e9b5cd6SAndrey Gruzdev {
2860e9b5cd6SAndrey Gruzdev     struct uffdio_zeropage uffd_zeropage;
2870e9b5cd6SAndrey Gruzdev 
2880e9b5cd6SAndrey Gruzdev     uffd_zeropage.range.start = (uintptr_t) addr;
2890e9b5cd6SAndrey Gruzdev     uffd_zeropage.range.len = length;
2900e9b5cd6SAndrey Gruzdev     uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
2910e9b5cd6SAndrey Gruzdev 
2920e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
293*6f81bd1aSDr. David Alan Gilbert         int e = errno;
2940e9b5cd6SAndrey Gruzdev         error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
2950e9b5cd6SAndrey Gruzdev                 " mode=%" PRIx64 " errno=%i", addr, length,
296*6f81bd1aSDr. David Alan Gilbert                 (uint64_t) uffd_zeropage.mode, e);
297*6f81bd1aSDr. David Alan Gilbert         return -e;
2980e9b5cd6SAndrey Gruzdev     }
2990e9b5cd6SAndrey Gruzdev 
3000e9b5cd6SAndrey Gruzdev     return 0;
3010e9b5cd6SAndrey Gruzdev }
3020e9b5cd6SAndrey Gruzdev 
3030e9b5cd6SAndrey Gruzdev /**
3040e9b5cd6SAndrey Gruzdev  * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution
3050e9b5cd6SAndrey Gruzdev  *
3060e9b5cd6SAndrey Gruzdev  * Wake up threads waiting on any page/pages from the designated range.
3070e9b5cd6SAndrey Gruzdev  * The main use case is when during some period, page faults are resolved
3080e9b5cd6SAndrey Gruzdev  * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
3090e9b5cd6SAndrey Gruzdev  * for the whole memory range are satisfied in a single call to uffd_wakeup().
3100e9b5cd6SAndrey Gruzdev  *
311*6f81bd1aSDr. David Alan Gilbert  * Returns 0 on success, -errno in case of an error
3120e9b5cd6SAndrey Gruzdev  *
3130e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
3140e9b5cd6SAndrey Gruzdev  * @addr: base address
3150e9b5cd6SAndrey Gruzdev  * @length: length of the range
3160e9b5cd6SAndrey Gruzdev  */
uffd_wakeup(int uffd_fd,void * addr,uint64_t length)3170e9b5cd6SAndrey Gruzdev int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
3180e9b5cd6SAndrey Gruzdev {
3190e9b5cd6SAndrey Gruzdev     struct uffdio_range uffd_range;
3200e9b5cd6SAndrey Gruzdev 
3210e9b5cd6SAndrey Gruzdev     uffd_range.start = (uintptr_t) addr;
3220e9b5cd6SAndrey Gruzdev     uffd_range.len = length;
3230e9b5cd6SAndrey Gruzdev 
3240e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
325*6f81bd1aSDr. David Alan Gilbert         int e = errno;
3260e9b5cd6SAndrey Gruzdev         error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
327*6f81bd1aSDr. David Alan Gilbert                 addr, length, e);
328*6f81bd1aSDr. David Alan Gilbert         return -e;
3290e9b5cd6SAndrey Gruzdev     }
3300e9b5cd6SAndrey Gruzdev 
3310e9b5cd6SAndrey Gruzdev     return 0;
3320e9b5cd6SAndrey Gruzdev }
3330e9b5cd6SAndrey Gruzdev 
3340e9b5cd6SAndrey Gruzdev /**
3350e9b5cd6SAndrey Gruzdev  * uffd_read_events: read pending UFFD events
3360e9b5cd6SAndrey Gruzdev  *
3370e9b5cd6SAndrey Gruzdev  * Returns number of fetched messages, 0 if non is available or
3380e9b5cd6SAndrey Gruzdev  * negative value in case of an error
3390e9b5cd6SAndrey Gruzdev  *
3400e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
3410e9b5cd6SAndrey Gruzdev  * @msgs: pointer to message buffer
3420e9b5cd6SAndrey Gruzdev  * @count: number of messages that can fit in the buffer
3430e9b5cd6SAndrey Gruzdev  */
uffd_read_events(int uffd_fd,struct uffd_msg * msgs,int count)3440e9b5cd6SAndrey Gruzdev int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
3450e9b5cd6SAndrey Gruzdev {
3460e9b5cd6SAndrey Gruzdev     ssize_t res;
3470e9b5cd6SAndrey Gruzdev     do {
3480e9b5cd6SAndrey Gruzdev         res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg));
3490e9b5cd6SAndrey Gruzdev     } while (res < 0 && errno == EINTR);
3500e9b5cd6SAndrey Gruzdev 
3510e9b5cd6SAndrey Gruzdev     if ((res < 0 && errno == EAGAIN)) {
3520e9b5cd6SAndrey Gruzdev         return 0;
3530e9b5cd6SAndrey Gruzdev     }
3540e9b5cd6SAndrey Gruzdev     if (res < 0) {
3550e9b5cd6SAndrey Gruzdev         error_report("uffd_read_events() failed: errno=%i", errno);
3560e9b5cd6SAndrey Gruzdev         return -1;
3570e9b5cd6SAndrey Gruzdev     }
3580e9b5cd6SAndrey Gruzdev 
3590e9b5cd6SAndrey Gruzdev     return (int) (res / sizeof(struct uffd_msg));
3600e9b5cd6SAndrey Gruzdev }
361