xref: /qemu/util/userfaultfd.c (revision 0e9b5cd6)
1*0e9b5cd6SAndrey Gruzdev /*
2*0e9b5cd6SAndrey Gruzdev  * Linux UFFD-WP support
3*0e9b5cd6SAndrey Gruzdev  *
4*0e9b5cd6SAndrey Gruzdev  * Copyright Virtuozzo GmbH, 2020
5*0e9b5cd6SAndrey Gruzdev  *
6*0e9b5cd6SAndrey Gruzdev  * Authors:
7*0e9b5cd6SAndrey Gruzdev  *  Andrey Gruzdev   <andrey.gruzdev@virtuozzo.com>
8*0e9b5cd6SAndrey Gruzdev  *
9*0e9b5cd6SAndrey Gruzdev  * This work is licensed under the terms of the GNU GPL, version 2 or
10*0e9b5cd6SAndrey Gruzdev  * later.  See the COPYING file in the top-level directory.
11*0e9b5cd6SAndrey Gruzdev  */
12*0e9b5cd6SAndrey Gruzdev 
13*0e9b5cd6SAndrey Gruzdev #include "qemu/osdep.h"
14*0e9b5cd6SAndrey Gruzdev #include "qemu/bitops.h"
15*0e9b5cd6SAndrey Gruzdev #include "qemu/error-report.h"
16*0e9b5cd6SAndrey Gruzdev #include "qemu/userfaultfd.h"
17*0e9b5cd6SAndrey Gruzdev #include "trace.h"
18*0e9b5cd6SAndrey Gruzdev #include <poll.h>
19*0e9b5cd6SAndrey Gruzdev #include <sys/syscall.h>
20*0e9b5cd6SAndrey Gruzdev #include <sys/ioctl.h>
21*0e9b5cd6SAndrey Gruzdev 
22*0e9b5cd6SAndrey Gruzdev /**
23*0e9b5cd6SAndrey Gruzdev  * uffd_query_features: query UFFD features
24*0e9b5cd6SAndrey Gruzdev  *
25*0e9b5cd6SAndrey Gruzdev  * Returns: 0 on success, negative value in case of an error
26*0e9b5cd6SAndrey Gruzdev  *
27*0e9b5cd6SAndrey Gruzdev  * @features: parameter to receive 'uffdio_api.features'
28*0e9b5cd6SAndrey Gruzdev  */
29*0e9b5cd6SAndrey Gruzdev int uffd_query_features(uint64_t *features)
30*0e9b5cd6SAndrey Gruzdev {
31*0e9b5cd6SAndrey Gruzdev     int uffd_fd;
32*0e9b5cd6SAndrey Gruzdev     struct uffdio_api api_struct = { 0 };
33*0e9b5cd6SAndrey Gruzdev     int ret = -1;
34*0e9b5cd6SAndrey Gruzdev 
35*0e9b5cd6SAndrey Gruzdev     uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC);
36*0e9b5cd6SAndrey Gruzdev     if (uffd_fd < 0) {
37*0e9b5cd6SAndrey Gruzdev         trace_uffd_query_features_nosys(errno);
38*0e9b5cd6SAndrey Gruzdev         return -1;
39*0e9b5cd6SAndrey Gruzdev     }
40*0e9b5cd6SAndrey Gruzdev 
41*0e9b5cd6SAndrey Gruzdev     api_struct.api = UFFD_API;
42*0e9b5cd6SAndrey Gruzdev     api_struct.features = 0;
43*0e9b5cd6SAndrey Gruzdev 
44*0e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
45*0e9b5cd6SAndrey Gruzdev         trace_uffd_query_features_api_failed(errno);
46*0e9b5cd6SAndrey Gruzdev         goto out;
47*0e9b5cd6SAndrey Gruzdev     }
48*0e9b5cd6SAndrey Gruzdev     *features = api_struct.features;
49*0e9b5cd6SAndrey Gruzdev     ret = 0;
50*0e9b5cd6SAndrey Gruzdev 
51*0e9b5cd6SAndrey Gruzdev out:
52*0e9b5cd6SAndrey Gruzdev     close(uffd_fd);
53*0e9b5cd6SAndrey Gruzdev     return ret;
54*0e9b5cd6SAndrey Gruzdev }
55*0e9b5cd6SAndrey Gruzdev 
56*0e9b5cd6SAndrey Gruzdev /**
57*0e9b5cd6SAndrey Gruzdev  * uffd_create_fd: create UFFD file descriptor
58*0e9b5cd6SAndrey Gruzdev  *
59*0e9b5cd6SAndrey Gruzdev  * Returns non-negative file descriptor or negative value in case of an error
60*0e9b5cd6SAndrey Gruzdev  *
61*0e9b5cd6SAndrey Gruzdev  * @features: UFFD features to request
62*0e9b5cd6SAndrey Gruzdev  * @non_blocking: create UFFD file descriptor for non-blocking operation
63*0e9b5cd6SAndrey Gruzdev  */
64*0e9b5cd6SAndrey Gruzdev int uffd_create_fd(uint64_t features, bool non_blocking)
65*0e9b5cd6SAndrey Gruzdev {
66*0e9b5cd6SAndrey Gruzdev     int uffd_fd;
67*0e9b5cd6SAndrey Gruzdev     int flags;
68*0e9b5cd6SAndrey Gruzdev     struct uffdio_api api_struct = { 0 };
69*0e9b5cd6SAndrey Gruzdev     uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
70*0e9b5cd6SAndrey Gruzdev 
71*0e9b5cd6SAndrey Gruzdev     flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
72*0e9b5cd6SAndrey Gruzdev     uffd_fd = syscall(__NR_userfaultfd, flags);
73*0e9b5cd6SAndrey Gruzdev     if (uffd_fd < 0) {
74*0e9b5cd6SAndrey Gruzdev         trace_uffd_create_fd_nosys(errno);
75*0e9b5cd6SAndrey Gruzdev         return -1;
76*0e9b5cd6SAndrey Gruzdev     }
77*0e9b5cd6SAndrey Gruzdev 
78*0e9b5cd6SAndrey Gruzdev     api_struct.api = UFFD_API;
79*0e9b5cd6SAndrey Gruzdev     api_struct.features = features;
80*0e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
81*0e9b5cd6SAndrey Gruzdev         trace_uffd_create_fd_api_failed(errno);
82*0e9b5cd6SAndrey Gruzdev         goto fail;
83*0e9b5cd6SAndrey Gruzdev     }
84*0e9b5cd6SAndrey Gruzdev     if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
85*0e9b5cd6SAndrey Gruzdev         trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls);
86*0e9b5cd6SAndrey Gruzdev         goto fail;
87*0e9b5cd6SAndrey Gruzdev     }
88*0e9b5cd6SAndrey Gruzdev 
89*0e9b5cd6SAndrey Gruzdev     return uffd_fd;
90*0e9b5cd6SAndrey Gruzdev 
91*0e9b5cd6SAndrey Gruzdev fail:
92*0e9b5cd6SAndrey Gruzdev     close(uffd_fd);
93*0e9b5cd6SAndrey Gruzdev     return -1;
94*0e9b5cd6SAndrey Gruzdev }
95*0e9b5cd6SAndrey Gruzdev 
96*0e9b5cd6SAndrey Gruzdev /**
97*0e9b5cd6SAndrey Gruzdev  * uffd_close_fd: close UFFD file descriptor
98*0e9b5cd6SAndrey Gruzdev  *
99*0e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
100*0e9b5cd6SAndrey Gruzdev  */
101*0e9b5cd6SAndrey Gruzdev void uffd_close_fd(int uffd_fd)
102*0e9b5cd6SAndrey Gruzdev {
103*0e9b5cd6SAndrey Gruzdev     assert(uffd_fd >= 0);
104*0e9b5cd6SAndrey Gruzdev     close(uffd_fd);
105*0e9b5cd6SAndrey Gruzdev }
106*0e9b5cd6SAndrey Gruzdev 
107*0e9b5cd6SAndrey Gruzdev /**
108*0e9b5cd6SAndrey Gruzdev  * uffd_register_memory: register memory range via UFFD-IO
109*0e9b5cd6SAndrey Gruzdev  *
110*0e9b5cd6SAndrey Gruzdev  * Returns 0 in case of success, negative value in case of an error
111*0e9b5cd6SAndrey Gruzdev  *
112*0e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
113*0e9b5cd6SAndrey Gruzdev  * @addr: base address of memory range
114*0e9b5cd6SAndrey Gruzdev  * @length: length of memory range
115*0e9b5cd6SAndrey Gruzdev  * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...)
116*0e9b5cd6SAndrey Gruzdev  * @ioctls: optional pointer to receive supported IOCTL mask
117*0e9b5cd6SAndrey Gruzdev  */
118*0e9b5cd6SAndrey Gruzdev int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
119*0e9b5cd6SAndrey Gruzdev         uint64_t mode, uint64_t *ioctls)
120*0e9b5cd6SAndrey Gruzdev {
121*0e9b5cd6SAndrey Gruzdev     struct uffdio_register uffd_register;
122*0e9b5cd6SAndrey Gruzdev 
123*0e9b5cd6SAndrey Gruzdev     uffd_register.range.start = (uintptr_t) addr;
124*0e9b5cd6SAndrey Gruzdev     uffd_register.range.len = length;
125*0e9b5cd6SAndrey Gruzdev     uffd_register.mode = mode;
126*0e9b5cd6SAndrey Gruzdev 
127*0e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) {
128*0e9b5cd6SAndrey Gruzdev         trace_uffd_register_memory_failed(addr, length, mode, errno);
129*0e9b5cd6SAndrey Gruzdev         return -1;
130*0e9b5cd6SAndrey Gruzdev     }
131*0e9b5cd6SAndrey Gruzdev     if (ioctls) {
132*0e9b5cd6SAndrey Gruzdev         *ioctls = uffd_register.ioctls;
133*0e9b5cd6SAndrey Gruzdev     }
134*0e9b5cd6SAndrey Gruzdev 
135*0e9b5cd6SAndrey Gruzdev     return 0;
136*0e9b5cd6SAndrey Gruzdev }
137*0e9b5cd6SAndrey Gruzdev 
138*0e9b5cd6SAndrey Gruzdev /**
139*0e9b5cd6SAndrey Gruzdev  * uffd_unregister_memory: un-register memory range with UFFD-IO
140*0e9b5cd6SAndrey Gruzdev  *
141*0e9b5cd6SAndrey Gruzdev  * Returns 0 in case of success, negative value in case of an error
142*0e9b5cd6SAndrey Gruzdev  *
143*0e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
144*0e9b5cd6SAndrey Gruzdev  * @addr: base address of memory range
145*0e9b5cd6SAndrey Gruzdev  * @length: length of memory range
146*0e9b5cd6SAndrey Gruzdev  */
147*0e9b5cd6SAndrey Gruzdev int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length)
148*0e9b5cd6SAndrey Gruzdev {
149*0e9b5cd6SAndrey Gruzdev     struct uffdio_range uffd_range;
150*0e9b5cd6SAndrey Gruzdev 
151*0e9b5cd6SAndrey Gruzdev     uffd_range.start = (uintptr_t) addr;
152*0e9b5cd6SAndrey Gruzdev     uffd_range.len = length;
153*0e9b5cd6SAndrey Gruzdev 
154*0e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) {
155*0e9b5cd6SAndrey Gruzdev         trace_uffd_unregister_memory_failed(addr, length, errno);
156*0e9b5cd6SAndrey Gruzdev         return -1;
157*0e9b5cd6SAndrey Gruzdev     }
158*0e9b5cd6SAndrey Gruzdev 
159*0e9b5cd6SAndrey Gruzdev     return 0;
160*0e9b5cd6SAndrey Gruzdev }
161*0e9b5cd6SAndrey Gruzdev 
162*0e9b5cd6SAndrey Gruzdev /**
163*0e9b5cd6SAndrey Gruzdev  * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO
164*0e9b5cd6SAndrey Gruzdev  *
165*0e9b5cd6SAndrey Gruzdev  * Returns 0 on success, negative value in case of error
166*0e9b5cd6SAndrey Gruzdev  *
167*0e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
168*0e9b5cd6SAndrey Gruzdev  * @addr: base address of memory range
169*0e9b5cd6SAndrey Gruzdev  * @length: length of memory range
170*0e9b5cd6SAndrey Gruzdev  * @wp: write-protect/unprotect
171*0e9b5cd6SAndrey Gruzdev  * @dont_wake: do not wake threads waiting on wr-protected page
172*0e9b5cd6SAndrey Gruzdev  */
173*0e9b5cd6SAndrey Gruzdev int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
174*0e9b5cd6SAndrey Gruzdev         bool wp, bool dont_wake)
175*0e9b5cd6SAndrey Gruzdev {
176*0e9b5cd6SAndrey Gruzdev     struct uffdio_writeprotect uffd_writeprotect;
177*0e9b5cd6SAndrey Gruzdev 
178*0e9b5cd6SAndrey Gruzdev     uffd_writeprotect.range.start = (uintptr_t) addr;
179*0e9b5cd6SAndrey Gruzdev     uffd_writeprotect.range.len = length;
180*0e9b5cd6SAndrey Gruzdev     if (!wp && dont_wake) {
181*0e9b5cd6SAndrey Gruzdev         /* DONTWAKE is meaningful only on protection release */
182*0e9b5cd6SAndrey Gruzdev         uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
183*0e9b5cd6SAndrey Gruzdev     } else {
184*0e9b5cd6SAndrey Gruzdev         uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0);
185*0e9b5cd6SAndrey Gruzdev     }
186*0e9b5cd6SAndrey Gruzdev 
187*0e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
188*0e9b5cd6SAndrey Gruzdev         error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
189*0e9b5cd6SAndrey Gruzdev                 " mode=%" PRIx64 " errno=%i", addr, length,
190*0e9b5cd6SAndrey Gruzdev                 (uint64_t) uffd_writeprotect.mode, errno);
191*0e9b5cd6SAndrey Gruzdev         return -1;
192*0e9b5cd6SAndrey Gruzdev     }
193*0e9b5cd6SAndrey Gruzdev 
194*0e9b5cd6SAndrey Gruzdev     return 0;
195*0e9b5cd6SAndrey Gruzdev }
196*0e9b5cd6SAndrey Gruzdev 
197*0e9b5cd6SAndrey Gruzdev /**
198*0e9b5cd6SAndrey Gruzdev  * uffd_copy_page: copy range of pages to destination via UFFD-IO
199*0e9b5cd6SAndrey Gruzdev  *
200*0e9b5cd6SAndrey Gruzdev  * Copy range of source pages to the destination to resolve
201*0e9b5cd6SAndrey Gruzdev  * missing page fault somewhere in the destination range.
202*0e9b5cd6SAndrey Gruzdev  *
203*0e9b5cd6SAndrey Gruzdev  * Returns 0 on success, negative value in case of an error
204*0e9b5cd6SAndrey Gruzdev  *
205*0e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
206*0e9b5cd6SAndrey Gruzdev  * @dst_addr: destination base address
207*0e9b5cd6SAndrey Gruzdev  * @src_addr: source base address
208*0e9b5cd6SAndrey Gruzdev  * @length: length of the range to copy
209*0e9b5cd6SAndrey Gruzdev  * @dont_wake: do not wake threads waiting on missing page
210*0e9b5cd6SAndrey Gruzdev  */
211*0e9b5cd6SAndrey Gruzdev int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
212*0e9b5cd6SAndrey Gruzdev         uint64_t length, bool dont_wake)
213*0e9b5cd6SAndrey Gruzdev {
214*0e9b5cd6SAndrey Gruzdev     struct uffdio_copy uffd_copy;
215*0e9b5cd6SAndrey Gruzdev 
216*0e9b5cd6SAndrey Gruzdev     uffd_copy.dst = (uintptr_t) dst_addr;
217*0e9b5cd6SAndrey Gruzdev     uffd_copy.src = (uintptr_t) src_addr;
218*0e9b5cd6SAndrey Gruzdev     uffd_copy.len = length;
219*0e9b5cd6SAndrey Gruzdev     uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
220*0e9b5cd6SAndrey Gruzdev 
221*0e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
222*0e9b5cd6SAndrey Gruzdev         error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
223*0e9b5cd6SAndrey Gruzdev                 " mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
224*0e9b5cd6SAndrey Gruzdev                 length, (uint64_t) uffd_copy.mode, errno);
225*0e9b5cd6SAndrey Gruzdev         return -1;
226*0e9b5cd6SAndrey Gruzdev     }
227*0e9b5cd6SAndrey Gruzdev 
228*0e9b5cd6SAndrey Gruzdev     return 0;
229*0e9b5cd6SAndrey Gruzdev }
230*0e9b5cd6SAndrey Gruzdev 
231*0e9b5cd6SAndrey Gruzdev /**
232*0e9b5cd6SAndrey Gruzdev  * uffd_zero_page: fill range of pages with zeroes via UFFD-IO
233*0e9b5cd6SAndrey Gruzdev  *
234*0e9b5cd6SAndrey Gruzdev  * Fill range pages with zeroes to resolve missing page fault within the range.
235*0e9b5cd6SAndrey Gruzdev  *
236*0e9b5cd6SAndrey Gruzdev  * Returns 0 on success, negative value in case of an error
237*0e9b5cd6SAndrey Gruzdev  *
238*0e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
239*0e9b5cd6SAndrey Gruzdev  * @addr: base address
240*0e9b5cd6SAndrey Gruzdev  * @length: length of the range to fill with zeroes
241*0e9b5cd6SAndrey Gruzdev  * @dont_wake: do not wake threads waiting on missing page
242*0e9b5cd6SAndrey Gruzdev  */
243*0e9b5cd6SAndrey Gruzdev int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
244*0e9b5cd6SAndrey Gruzdev {
245*0e9b5cd6SAndrey Gruzdev     struct uffdio_zeropage uffd_zeropage;
246*0e9b5cd6SAndrey Gruzdev 
247*0e9b5cd6SAndrey Gruzdev     uffd_zeropage.range.start = (uintptr_t) addr;
248*0e9b5cd6SAndrey Gruzdev     uffd_zeropage.range.len = length;
249*0e9b5cd6SAndrey Gruzdev     uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
250*0e9b5cd6SAndrey Gruzdev 
251*0e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
252*0e9b5cd6SAndrey Gruzdev         error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
253*0e9b5cd6SAndrey Gruzdev                 " mode=%" PRIx64 " errno=%i", addr, length,
254*0e9b5cd6SAndrey Gruzdev                 (uint64_t) uffd_zeropage.mode, errno);
255*0e9b5cd6SAndrey Gruzdev         return -1;
256*0e9b5cd6SAndrey Gruzdev     }
257*0e9b5cd6SAndrey Gruzdev 
258*0e9b5cd6SAndrey Gruzdev     return 0;
259*0e9b5cd6SAndrey Gruzdev }
260*0e9b5cd6SAndrey Gruzdev 
261*0e9b5cd6SAndrey Gruzdev /**
262*0e9b5cd6SAndrey Gruzdev  * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution
263*0e9b5cd6SAndrey Gruzdev  *
264*0e9b5cd6SAndrey Gruzdev  * Wake up threads waiting on any page/pages from the designated range.
265*0e9b5cd6SAndrey Gruzdev  * The main use case is when during some period, page faults are resolved
266*0e9b5cd6SAndrey Gruzdev  * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
267*0e9b5cd6SAndrey Gruzdev  * for the whole memory range are satisfied in a single call to uffd_wakeup().
268*0e9b5cd6SAndrey Gruzdev  *
269*0e9b5cd6SAndrey Gruzdev  * Returns 0 on success, negative value in case of an error
270*0e9b5cd6SAndrey Gruzdev  *
271*0e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
272*0e9b5cd6SAndrey Gruzdev  * @addr: base address
273*0e9b5cd6SAndrey Gruzdev  * @length: length of the range
274*0e9b5cd6SAndrey Gruzdev  */
275*0e9b5cd6SAndrey Gruzdev int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
276*0e9b5cd6SAndrey Gruzdev {
277*0e9b5cd6SAndrey Gruzdev     struct uffdio_range uffd_range;
278*0e9b5cd6SAndrey Gruzdev 
279*0e9b5cd6SAndrey Gruzdev     uffd_range.start = (uintptr_t) addr;
280*0e9b5cd6SAndrey Gruzdev     uffd_range.len = length;
281*0e9b5cd6SAndrey Gruzdev 
282*0e9b5cd6SAndrey Gruzdev     if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
283*0e9b5cd6SAndrey Gruzdev         error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
284*0e9b5cd6SAndrey Gruzdev                 addr, length, errno);
285*0e9b5cd6SAndrey Gruzdev         return -1;
286*0e9b5cd6SAndrey Gruzdev     }
287*0e9b5cd6SAndrey Gruzdev 
288*0e9b5cd6SAndrey Gruzdev     return 0;
289*0e9b5cd6SAndrey Gruzdev }
290*0e9b5cd6SAndrey Gruzdev 
291*0e9b5cd6SAndrey Gruzdev /**
292*0e9b5cd6SAndrey Gruzdev  * uffd_read_events: read pending UFFD events
293*0e9b5cd6SAndrey Gruzdev  *
294*0e9b5cd6SAndrey Gruzdev  * Returns number of fetched messages, 0 if non is available or
295*0e9b5cd6SAndrey Gruzdev  * negative value in case of an error
296*0e9b5cd6SAndrey Gruzdev  *
297*0e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
298*0e9b5cd6SAndrey Gruzdev  * @msgs: pointer to message buffer
299*0e9b5cd6SAndrey Gruzdev  * @count: number of messages that can fit in the buffer
300*0e9b5cd6SAndrey Gruzdev  */
301*0e9b5cd6SAndrey Gruzdev int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
302*0e9b5cd6SAndrey Gruzdev {
303*0e9b5cd6SAndrey Gruzdev     ssize_t res;
304*0e9b5cd6SAndrey Gruzdev     do {
305*0e9b5cd6SAndrey Gruzdev         res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg));
306*0e9b5cd6SAndrey Gruzdev     } while (res < 0 && errno == EINTR);
307*0e9b5cd6SAndrey Gruzdev 
308*0e9b5cd6SAndrey Gruzdev     if ((res < 0 && errno == EAGAIN)) {
309*0e9b5cd6SAndrey Gruzdev         return 0;
310*0e9b5cd6SAndrey Gruzdev     }
311*0e9b5cd6SAndrey Gruzdev     if (res < 0) {
312*0e9b5cd6SAndrey Gruzdev         error_report("uffd_read_events() failed: errno=%i", errno);
313*0e9b5cd6SAndrey Gruzdev         return -1;
314*0e9b5cd6SAndrey Gruzdev     }
315*0e9b5cd6SAndrey Gruzdev 
316*0e9b5cd6SAndrey Gruzdev     return (int) (res / sizeof(struct uffd_msg));
317*0e9b5cd6SAndrey Gruzdev }
318*0e9b5cd6SAndrey Gruzdev 
319*0e9b5cd6SAndrey Gruzdev /**
320*0e9b5cd6SAndrey Gruzdev  * uffd_poll_events: poll UFFD file descriptor for read
321*0e9b5cd6SAndrey Gruzdev  *
322*0e9b5cd6SAndrey Gruzdev  * Returns true if events are available for read, false otherwise
323*0e9b5cd6SAndrey Gruzdev  *
324*0e9b5cd6SAndrey Gruzdev  * @uffd_fd: UFFD file descriptor
325*0e9b5cd6SAndrey Gruzdev  * @tmo: timeout value
326*0e9b5cd6SAndrey Gruzdev  */
327*0e9b5cd6SAndrey Gruzdev bool uffd_poll_events(int uffd_fd, int tmo)
328*0e9b5cd6SAndrey Gruzdev {
329*0e9b5cd6SAndrey Gruzdev     int res;
330*0e9b5cd6SAndrey Gruzdev     struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 };
331*0e9b5cd6SAndrey Gruzdev 
332*0e9b5cd6SAndrey Gruzdev     do {
333*0e9b5cd6SAndrey Gruzdev         res = poll(&poll_fd, 1, tmo);
334*0e9b5cd6SAndrey Gruzdev     } while (res < 0 && errno == EINTR);
335*0e9b5cd6SAndrey Gruzdev 
336*0e9b5cd6SAndrey Gruzdev     if (res == 0) {
337*0e9b5cd6SAndrey Gruzdev         return false;
338*0e9b5cd6SAndrey Gruzdev     }
339*0e9b5cd6SAndrey Gruzdev     if (res < 0) {
340*0e9b5cd6SAndrey Gruzdev         error_report("uffd_poll_events() failed: errno=%i", errno);
341*0e9b5cd6SAndrey Gruzdev         return false;
342*0e9b5cd6SAndrey Gruzdev     }
343*0e9b5cd6SAndrey Gruzdev 
344*0e9b5cd6SAndrey Gruzdev     return (poll_fd.revents & POLLIN) != 0;
345*0e9b5cd6SAndrey Gruzdev }
346