10e9b5cd6SAndrey Gruzdev /* 20e9b5cd6SAndrey Gruzdev * Linux UFFD-WP support 30e9b5cd6SAndrey Gruzdev * 40e9b5cd6SAndrey Gruzdev * Copyright Virtuozzo GmbH, 2020 50e9b5cd6SAndrey Gruzdev * 60e9b5cd6SAndrey Gruzdev * Authors: 70e9b5cd6SAndrey Gruzdev * Andrey Gruzdev <andrey.gruzdev@virtuozzo.com> 80e9b5cd6SAndrey Gruzdev * 90e9b5cd6SAndrey Gruzdev * This work is licensed under the terms of the GNU GPL, version 2 or 100e9b5cd6SAndrey Gruzdev * later. See the COPYING file in the top-level directory. 110e9b5cd6SAndrey Gruzdev */ 120e9b5cd6SAndrey Gruzdev 130e9b5cd6SAndrey Gruzdev #include "qemu/osdep.h" 140e9b5cd6SAndrey Gruzdev #include "qemu/bitops.h" 150e9b5cd6SAndrey Gruzdev #include "qemu/error-report.h" 160e9b5cd6SAndrey Gruzdev #include "qemu/userfaultfd.h" 170e9b5cd6SAndrey Gruzdev #include "trace.h" 180e9b5cd6SAndrey Gruzdev #include <poll.h> 190e9b5cd6SAndrey Gruzdev #include <sys/syscall.h> 200e9b5cd6SAndrey Gruzdev #include <sys/ioctl.h> 21c40c0463SPeter Xu #include <fcntl.h> 22c40c0463SPeter Xu 23c40c0463SPeter Xu typedef enum { 24c40c0463SPeter Xu UFFD_UNINITIALIZED = 0, 25c40c0463SPeter Xu UFFD_USE_DEV_PATH, 26c40c0463SPeter Xu UFFD_USE_SYSCALL, 27c40c0463SPeter Xu } uffd_open_mode; 280e9b5cd6SAndrey Gruzdev 29d5890ea0SPeter Xu int uffd_open(int flags) 30d5890ea0SPeter Xu { 31d5890ea0SPeter Xu #if defined(__NR_userfaultfd) 32c40c0463SPeter Xu static uffd_open_mode open_mode; 33c40c0463SPeter Xu static int uffd_dev; 34c40c0463SPeter Xu 35c40c0463SPeter Xu /* Detect how to generate uffd desc when run the 1st time */ 36c40c0463SPeter Xu if (open_mode == UFFD_UNINITIALIZED) { 37c40c0463SPeter Xu /* 38c40c0463SPeter Xu * Make /dev/userfaultfd the default approach because it has better 39c40c0463SPeter Xu * permission controls, meanwhile allows kernel faults without any 40c40c0463SPeter Xu * privilege requirement (e.g. SYS_CAP_PTRACE). 41c40c0463SPeter Xu */ 42c40c0463SPeter Xu uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); 43c40c0463SPeter Xu if (uffd_dev >= 0) { 44c40c0463SPeter Xu open_mode = UFFD_USE_DEV_PATH; 45c40c0463SPeter Xu } else { 46c40c0463SPeter Xu /* Fallback to the system call */ 47c40c0463SPeter Xu open_mode = UFFD_USE_SYSCALL; 48c40c0463SPeter Xu } 49c40c0463SPeter Xu trace_uffd_detect_open_mode(open_mode); 50c40c0463SPeter Xu } 51c40c0463SPeter Xu 52c40c0463SPeter Xu if (open_mode == UFFD_USE_DEV_PATH) { 53c40c0463SPeter Xu assert(uffd_dev >= 0); 54c40c0463SPeter Xu return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags); 55c40c0463SPeter Xu } 56c40c0463SPeter Xu 57d5890ea0SPeter Xu return syscall(__NR_userfaultfd, flags); 58d5890ea0SPeter Xu #else 59d5890ea0SPeter Xu return -EINVAL; 60d5890ea0SPeter Xu #endif 61d5890ea0SPeter Xu } 62d5890ea0SPeter Xu 630e9b5cd6SAndrey Gruzdev /** 640e9b5cd6SAndrey Gruzdev * uffd_query_features: query UFFD features 650e9b5cd6SAndrey Gruzdev * 660e9b5cd6SAndrey Gruzdev * Returns: 0 on success, negative value in case of an error 670e9b5cd6SAndrey Gruzdev * 680e9b5cd6SAndrey Gruzdev * @features: parameter to receive 'uffdio_api.features' 690e9b5cd6SAndrey Gruzdev */ 700e9b5cd6SAndrey Gruzdev int uffd_query_features(uint64_t *features) 710e9b5cd6SAndrey Gruzdev { 720e9b5cd6SAndrey Gruzdev int uffd_fd; 730e9b5cd6SAndrey Gruzdev struct uffdio_api api_struct = { 0 }; 740e9b5cd6SAndrey Gruzdev int ret = -1; 750e9b5cd6SAndrey Gruzdev 76d5890ea0SPeter Xu uffd_fd = uffd_open(O_CLOEXEC); 770e9b5cd6SAndrey Gruzdev if (uffd_fd < 0) { 780e9b5cd6SAndrey Gruzdev trace_uffd_query_features_nosys(errno); 790e9b5cd6SAndrey Gruzdev return -1; 800e9b5cd6SAndrey Gruzdev } 810e9b5cd6SAndrey Gruzdev 820e9b5cd6SAndrey Gruzdev api_struct.api = UFFD_API; 830e9b5cd6SAndrey Gruzdev api_struct.features = 0; 840e9b5cd6SAndrey Gruzdev 850e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) { 860e9b5cd6SAndrey Gruzdev trace_uffd_query_features_api_failed(errno); 870e9b5cd6SAndrey Gruzdev goto out; 880e9b5cd6SAndrey Gruzdev } 890e9b5cd6SAndrey Gruzdev *features = api_struct.features; 900e9b5cd6SAndrey Gruzdev ret = 0; 910e9b5cd6SAndrey Gruzdev 920e9b5cd6SAndrey Gruzdev out: 930e9b5cd6SAndrey Gruzdev close(uffd_fd); 940e9b5cd6SAndrey Gruzdev return ret; 950e9b5cd6SAndrey Gruzdev } 960e9b5cd6SAndrey Gruzdev 970e9b5cd6SAndrey Gruzdev /** 980e9b5cd6SAndrey Gruzdev * uffd_create_fd: create UFFD file descriptor 990e9b5cd6SAndrey Gruzdev * 1000e9b5cd6SAndrey Gruzdev * Returns non-negative file descriptor or negative value in case of an error 1010e9b5cd6SAndrey Gruzdev * 1020e9b5cd6SAndrey Gruzdev * @features: UFFD features to request 1030e9b5cd6SAndrey Gruzdev * @non_blocking: create UFFD file descriptor for non-blocking operation 1040e9b5cd6SAndrey Gruzdev */ 1050e9b5cd6SAndrey Gruzdev int uffd_create_fd(uint64_t features, bool non_blocking) 1060e9b5cd6SAndrey Gruzdev { 1070e9b5cd6SAndrey Gruzdev int uffd_fd; 1080e9b5cd6SAndrey Gruzdev int flags; 1090e9b5cd6SAndrey Gruzdev struct uffdio_api api_struct = { 0 }; 1100e9b5cd6SAndrey Gruzdev uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER); 1110e9b5cd6SAndrey Gruzdev 1120e9b5cd6SAndrey Gruzdev flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0); 113d5890ea0SPeter Xu uffd_fd = uffd_open(flags); 1140e9b5cd6SAndrey Gruzdev if (uffd_fd < 0) { 1150e9b5cd6SAndrey Gruzdev trace_uffd_create_fd_nosys(errno); 1160e9b5cd6SAndrey Gruzdev return -1; 1170e9b5cd6SAndrey Gruzdev } 1180e9b5cd6SAndrey Gruzdev 1190e9b5cd6SAndrey Gruzdev api_struct.api = UFFD_API; 1200e9b5cd6SAndrey Gruzdev api_struct.features = features; 1210e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) { 1220e9b5cd6SAndrey Gruzdev trace_uffd_create_fd_api_failed(errno); 1230e9b5cd6SAndrey Gruzdev goto fail; 1240e9b5cd6SAndrey Gruzdev } 1250e9b5cd6SAndrey Gruzdev if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) { 1260e9b5cd6SAndrey Gruzdev trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls); 1270e9b5cd6SAndrey Gruzdev goto fail; 1280e9b5cd6SAndrey Gruzdev } 1290e9b5cd6SAndrey Gruzdev 1300e9b5cd6SAndrey Gruzdev return uffd_fd; 1310e9b5cd6SAndrey Gruzdev 1320e9b5cd6SAndrey Gruzdev fail: 1330e9b5cd6SAndrey Gruzdev close(uffd_fd); 1340e9b5cd6SAndrey Gruzdev return -1; 1350e9b5cd6SAndrey Gruzdev } 1360e9b5cd6SAndrey Gruzdev 1370e9b5cd6SAndrey Gruzdev /** 1380e9b5cd6SAndrey Gruzdev * uffd_close_fd: close UFFD file descriptor 1390e9b5cd6SAndrey Gruzdev * 1400e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor 1410e9b5cd6SAndrey Gruzdev */ 1420e9b5cd6SAndrey Gruzdev void uffd_close_fd(int uffd_fd) 1430e9b5cd6SAndrey Gruzdev { 1440e9b5cd6SAndrey Gruzdev assert(uffd_fd >= 0); 1450e9b5cd6SAndrey Gruzdev close(uffd_fd); 1460e9b5cd6SAndrey Gruzdev } 1470e9b5cd6SAndrey Gruzdev 1480e9b5cd6SAndrey Gruzdev /** 1490e9b5cd6SAndrey Gruzdev * uffd_register_memory: register memory range via UFFD-IO 1500e9b5cd6SAndrey Gruzdev * 1510e9b5cd6SAndrey Gruzdev * Returns 0 in case of success, negative value in case of an error 1520e9b5cd6SAndrey Gruzdev * 1530e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor 1540e9b5cd6SAndrey Gruzdev * @addr: base address of memory range 1550e9b5cd6SAndrey Gruzdev * @length: length of memory range 1560e9b5cd6SAndrey Gruzdev * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...) 1570e9b5cd6SAndrey Gruzdev * @ioctls: optional pointer to receive supported IOCTL mask 1580e9b5cd6SAndrey Gruzdev */ 1590e9b5cd6SAndrey Gruzdev int uffd_register_memory(int uffd_fd, void *addr, uint64_t length, 1600e9b5cd6SAndrey Gruzdev uint64_t mode, uint64_t *ioctls) 1610e9b5cd6SAndrey Gruzdev { 1620e9b5cd6SAndrey Gruzdev struct uffdio_register uffd_register; 1630e9b5cd6SAndrey Gruzdev 1640e9b5cd6SAndrey Gruzdev uffd_register.range.start = (uintptr_t) addr; 1650e9b5cd6SAndrey Gruzdev uffd_register.range.len = length; 1660e9b5cd6SAndrey Gruzdev uffd_register.mode = mode; 1670e9b5cd6SAndrey Gruzdev 1680e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) { 1690e9b5cd6SAndrey Gruzdev trace_uffd_register_memory_failed(addr, length, mode, errno); 1700e9b5cd6SAndrey Gruzdev return -1; 1710e9b5cd6SAndrey Gruzdev } 1720e9b5cd6SAndrey Gruzdev if (ioctls) { 1730e9b5cd6SAndrey Gruzdev *ioctls = uffd_register.ioctls; 1740e9b5cd6SAndrey Gruzdev } 1750e9b5cd6SAndrey Gruzdev 1760e9b5cd6SAndrey Gruzdev return 0; 1770e9b5cd6SAndrey Gruzdev } 1780e9b5cd6SAndrey Gruzdev 1790e9b5cd6SAndrey Gruzdev /** 1800e9b5cd6SAndrey Gruzdev * uffd_unregister_memory: un-register memory range with UFFD-IO 1810e9b5cd6SAndrey Gruzdev * 1820e9b5cd6SAndrey Gruzdev * Returns 0 in case of success, negative value in case of an error 1830e9b5cd6SAndrey Gruzdev * 1840e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor 1850e9b5cd6SAndrey Gruzdev * @addr: base address of memory range 1860e9b5cd6SAndrey Gruzdev * @length: length of memory range 1870e9b5cd6SAndrey Gruzdev */ 1880e9b5cd6SAndrey Gruzdev int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length) 1890e9b5cd6SAndrey Gruzdev { 1900e9b5cd6SAndrey Gruzdev struct uffdio_range uffd_range; 1910e9b5cd6SAndrey Gruzdev 1920e9b5cd6SAndrey Gruzdev uffd_range.start = (uintptr_t) addr; 1930e9b5cd6SAndrey Gruzdev uffd_range.len = length; 1940e9b5cd6SAndrey Gruzdev 1950e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) { 1960e9b5cd6SAndrey Gruzdev trace_uffd_unregister_memory_failed(addr, length, errno); 1970e9b5cd6SAndrey Gruzdev return -1; 1980e9b5cd6SAndrey Gruzdev } 1990e9b5cd6SAndrey Gruzdev 2000e9b5cd6SAndrey Gruzdev return 0; 2010e9b5cd6SAndrey Gruzdev } 2020e9b5cd6SAndrey Gruzdev 2030e9b5cd6SAndrey Gruzdev /** 2040e9b5cd6SAndrey Gruzdev * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO 2050e9b5cd6SAndrey Gruzdev * 2060e9b5cd6SAndrey Gruzdev * Returns 0 on success, negative value in case of error 2070e9b5cd6SAndrey Gruzdev * 2080e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor 2090e9b5cd6SAndrey Gruzdev * @addr: base address of memory range 2100e9b5cd6SAndrey Gruzdev * @length: length of memory range 2110e9b5cd6SAndrey Gruzdev * @wp: write-protect/unprotect 2120e9b5cd6SAndrey Gruzdev * @dont_wake: do not wake threads waiting on wr-protected page 2130e9b5cd6SAndrey Gruzdev */ 2140e9b5cd6SAndrey Gruzdev int uffd_change_protection(int uffd_fd, void *addr, uint64_t length, 2150e9b5cd6SAndrey Gruzdev bool wp, bool dont_wake) 2160e9b5cd6SAndrey Gruzdev { 2170e9b5cd6SAndrey Gruzdev struct uffdio_writeprotect uffd_writeprotect; 2180e9b5cd6SAndrey Gruzdev 2190e9b5cd6SAndrey Gruzdev uffd_writeprotect.range.start = (uintptr_t) addr; 2200e9b5cd6SAndrey Gruzdev uffd_writeprotect.range.len = length; 2210e9b5cd6SAndrey Gruzdev if (!wp && dont_wake) { 2220e9b5cd6SAndrey Gruzdev /* DONTWAKE is meaningful only on protection release */ 2230e9b5cd6SAndrey Gruzdev uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE; 2240e9b5cd6SAndrey Gruzdev } else { 2250e9b5cd6SAndrey Gruzdev uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0); 2260e9b5cd6SAndrey Gruzdev } 2270e9b5cd6SAndrey Gruzdev 2280e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) { 2290e9b5cd6SAndrey Gruzdev error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64 2300e9b5cd6SAndrey Gruzdev " mode=%" PRIx64 " errno=%i", addr, length, 2310e9b5cd6SAndrey Gruzdev (uint64_t) uffd_writeprotect.mode, errno); 2320e9b5cd6SAndrey Gruzdev return -1; 2330e9b5cd6SAndrey Gruzdev } 2340e9b5cd6SAndrey Gruzdev 2350e9b5cd6SAndrey Gruzdev return 0; 2360e9b5cd6SAndrey Gruzdev } 2370e9b5cd6SAndrey Gruzdev 2380e9b5cd6SAndrey Gruzdev /** 2390e9b5cd6SAndrey Gruzdev * uffd_copy_page: copy range of pages to destination via UFFD-IO 2400e9b5cd6SAndrey Gruzdev * 2410e9b5cd6SAndrey Gruzdev * Copy range of source pages to the destination to resolve 2420e9b5cd6SAndrey Gruzdev * missing page fault somewhere in the destination range. 2430e9b5cd6SAndrey Gruzdev * 2440e9b5cd6SAndrey Gruzdev * Returns 0 on success, negative value in case of an error 2450e9b5cd6SAndrey Gruzdev * 2460e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor 2470e9b5cd6SAndrey Gruzdev * @dst_addr: destination base address 2480e9b5cd6SAndrey Gruzdev * @src_addr: source base address 2490e9b5cd6SAndrey Gruzdev * @length: length of the range to copy 2500e9b5cd6SAndrey Gruzdev * @dont_wake: do not wake threads waiting on missing page 2510e9b5cd6SAndrey Gruzdev */ 2520e9b5cd6SAndrey Gruzdev int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr, 2530e9b5cd6SAndrey Gruzdev uint64_t length, bool dont_wake) 2540e9b5cd6SAndrey Gruzdev { 2550e9b5cd6SAndrey Gruzdev struct uffdio_copy uffd_copy; 2560e9b5cd6SAndrey Gruzdev 2570e9b5cd6SAndrey Gruzdev uffd_copy.dst = (uintptr_t) dst_addr; 2580e9b5cd6SAndrey Gruzdev uffd_copy.src = (uintptr_t) src_addr; 2590e9b5cd6SAndrey Gruzdev uffd_copy.len = length; 2600e9b5cd6SAndrey Gruzdev uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0; 2610e9b5cd6SAndrey Gruzdev 2620e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) { 2630e9b5cd6SAndrey Gruzdev error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64 2640e9b5cd6SAndrey Gruzdev " mode=%" PRIx64 " errno=%i", dst_addr, src_addr, 2650e9b5cd6SAndrey Gruzdev length, (uint64_t) uffd_copy.mode, errno); 2660e9b5cd6SAndrey Gruzdev return -1; 2670e9b5cd6SAndrey Gruzdev } 2680e9b5cd6SAndrey Gruzdev 2690e9b5cd6SAndrey Gruzdev return 0; 2700e9b5cd6SAndrey Gruzdev } 2710e9b5cd6SAndrey Gruzdev 2720e9b5cd6SAndrey Gruzdev /** 2730e9b5cd6SAndrey Gruzdev * uffd_zero_page: fill range of pages with zeroes via UFFD-IO 2740e9b5cd6SAndrey Gruzdev * 2750e9b5cd6SAndrey Gruzdev * Fill range pages with zeroes to resolve missing page fault within the range. 2760e9b5cd6SAndrey Gruzdev * 2770e9b5cd6SAndrey Gruzdev * Returns 0 on success, negative value in case of an error 2780e9b5cd6SAndrey Gruzdev * 2790e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor 2800e9b5cd6SAndrey Gruzdev * @addr: base address 2810e9b5cd6SAndrey Gruzdev * @length: length of the range to fill with zeroes 2820e9b5cd6SAndrey Gruzdev * @dont_wake: do not wake threads waiting on missing page 2830e9b5cd6SAndrey Gruzdev */ 2840e9b5cd6SAndrey Gruzdev int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake) 2850e9b5cd6SAndrey Gruzdev { 2860e9b5cd6SAndrey Gruzdev struct uffdio_zeropage uffd_zeropage; 2870e9b5cd6SAndrey Gruzdev 2880e9b5cd6SAndrey Gruzdev uffd_zeropage.range.start = (uintptr_t) addr; 2890e9b5cd6SAndrey Gruzdev uffd_zeropage.range.len = length; 2900e9b5cd6SAndrey Gruzdev uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0; 2910e9b5cd6SAndrey Gruzdev 2920e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) { 2930e9b5cd6SAndrey Gruzdev error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64 2940e9b5cd6SAndrey Gruzdev " mode=%" PRIx64 " errno=%i", addr, length, 2950e9b5cd6SAndrey Gruzdev (uint64_t) uffd_zeropage.mode, errno); 2960e9b5cd6SAndrey Gruzdev return -1; 2970e9b5cd6SAndrey Gruzdev } 2980e9b5cd6SAndrey Gruzdev 2990e9b5cd6SAndrey Gruzdev return 0; 3000e9b5cd6SAndrey Gruzdev } 3010e9b5cd6SAndrey Gruzdev 3020e9b5cd6SAndrey Gruzdev /** 3030e9b5cd6SAndrey Gruzdev * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution 3040e9b5cd6SAndrey Gruzdev * 3050e9b5cd6SAndrey Gruzdev * Wake up threads waiting on any page/pages from the designated range. 3060e9b5cd6SAndrey Gruzdev * The main use case is when during some period, page faults are resolved 3070e9b5cd6SAndrey Gruzdev * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits 3080e9b5cd6SAndrey Gruzdev * for the whole memory range are satisfied in a single call to uffd_wakeup(). 3090e9b5cd6SAndrey Gruzdev * 3100e9b5cd6SAndrey Gruzdev * Returns 0 on success, negative value in case of an error 3110e9b5cd6SAndrey Gruzdev * 3120e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor 3130e9b5cd6SAndrey Gruzdev * @addr: base address 3140e9b5cd6SAndrey Gruzdev * @length: length of the range 3150e9b5cd6SAndrey Gruzdev */ 3160e9b5cd6SAndrey Gruzdev int uffd_wakeup(int uffd_fd, void *addr, uint64_t length) 3170e9b5cd6SAndrey Gruzdev { 3180e9b5cd6SAndrey Gruzdev struct uffdio_range uffd_range; 3190e9b5cd6SAndrey Gruzdev 3200e9b5cd6SAndrey Gruzdev uffd_range.start = (uintptr_t) addr; 3210e9b5cd6SAndrey Gruzdev uffd_range.len = length; 3220e9b5cd6SAndrey Gruzdev 3230e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) { 3240e9b5cd6SAndrey Gruzdev error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i", 3250e9b5cd6SAndrey Gruzdev addr, length, errno); 3260e9b5cd6SAndrey Gruzdev return -1; 3270e9b5cd6SAndrey Gruzdev } 3280e9b5cd6SAndrey Gruzdev 3290e9b5cd6SAndrey Gruzdev return 0; 3300e9b5cd6SAndrey Gruzdev } 3310e9b5cd6SAndrey Gruzdev 3320e9b5cd6SAndrey Gruzdev /** 3330e9b5cd6SAndrey Gruzdev * uffd_read_events: read pending UFFD events 3340e9b5cd6SAndrey Gruzdev * 3350e9b5cd6SAndrey Gruzdev * Returns number of fetched messages, 0 if non is available or 3360e9b5cd6SAndrey Gruzdev * negative value in case of an error 3370e9b5cd6SAndrey Gruzdev * 3380e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor 3390e9b5cd6SAndrey Gruzdev * @msgs: pointer to message buffer 3400e9b5cd6SAndrey Gruzdev * @count: number of messages that can fit in the buffer 3410e9b5cd6SAndrey Gruzdev */ 3420e9b5cd6SAndrey Gruzdev int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count) 3430e9b5cd6SAndrey Gruzdev { 3440e9b5cd6SAndrey Gruzdev ssize_t res; 3450e9b5cd6SAndrey Gruzdev do { 3460e9b5cd6SAndrey Gruzdev res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg)); 3470e9b5cd6SAndrey Gruzdev } while (res < 0 && errno == EINTR); 3480e9b5cd6SAndrey Gruzdev 3490e9b5cd6SAndrey Gruzdev if ((res < 0 && errno == EAGAIN)) { 3500e9b5cd6SAndrey Gruzdev return 0; 3510e9b5cd6SAndrey Gruzdev } 3520e9b5cd6SAndrey Gruzdev if (res < 0) { 3530e9b5cd6SAndrey Gruzdev error_report("uffd_read_events() failed: errno=%i", errno); 3540e9b5cd6SAndrey Gruzdev return -1; 3550e9b5cd6SAndrey Gruzdev } 3560e9b5cd6SAndrey Gruzdev 3570e9b5cd6SAndrey Gruzdev return (int) (res / sizeof(struct uffd_msg)); 3580e9b5cd6SAndrey Gruzdev } 3590e9b5cd6SAndrey Gruzdev 3600e9b5cd6SAndrey Gruzdev /** 3610e9b5cd6SAndrey Gruzdev * uffd_poll_events: poll UFFD file descriptor for read 3620e9b5cd6SAndrey Gruzdev * 3630e9b5cd6SAndrey Gruzdev * Returns true if events are available for read, false otherwise 3640e9b5cd6SAndrey Gruzdev * 3650e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor 3660e9b5cd6SAndrey Gruzdev * @tmo: timeout value 3670e9b5cd6SAndrey Gruzdev */ 3680e9b5cd6SAndrey Gruzdev bool uffd_poll_events(int uffd_fd, int tmo) 3690e9b5cd6SAndrey Gruzdev { 3700e9b5cd6SAndrey Gruzdev int res; 3710e9b5cd6SAndrey Gruzdev struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 }; 3720e9b5cd6SAndrey Gruzdev 3730e9b5cd6SAndrey Gruzdev do { 3740e9b5cd6SAndrey Gruzdev res = poll(&poll_fd, 1, tmo); 3750e9b5cd6SAndrey Gruzdev } while (res < 0 && errno == EINTR); 3760e9b5cd6SAndrey Gruzdev 3770e9b5cd6SAndrey Gruzdev if (res == 0) { 3780e9b5cd6SAndrey Gruzdev return false; 3790e9b5cd6SAndrey Gruzdev } 3800e9b5cd6SAndrey Gruzdev if (res < 0) { 3810e9b5cd6SAndrey Gruzdev error_report("uffd_poll_events() failed: errno=%i", errno); 3820e9b5cd6SAndrey Gruzdev return false; 3830e9b5cd6SAndrey Gruzdev } 3840e9b5cd6SAndrey Gruzdev 3850e9b5cd6SAndrey Gruzdev return (poll_fd.revents & POLLIN) != 0; 3860e9b5cd6SAndrey Gruzdev } 387