1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 /* 3 * include/linux/userfaultfd.h 4 * 5 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> 6 * Copyright (C) 2015 Red Hat, Inc. 7 * 8 */ 9 10 #ifndef _LINUX_USERFAULTFD_H 11 #define _LINUX_USERFAULTFD_H 12 13 #include <linux/types.h> 14 15 /* ioctls for /dev/userfaultfd */ 16 #define USERFAULTFD_IOC 0xAA 17 #define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) 18 19 /* 20 * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and 21 * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In 22 * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ 23 * means the userland is reading). 24 */ 25 #define UFFD_API ((__u64)0xAA) 26 #define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ 27 UFFDIO_REGISTER_MODE_WP | \ 28 UFFDIO_REGISTER_MODE_MINOR) 29 #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ 30 UFFD_FEATURE_EVENT_FORK | \ 31 UFFD_FEATURE_EVENT_REMAP | \ 32 UFFD_FEATURE_EVENT_REMOVE | \ 33 UFFD_FEATURE_EVENT_UNMAP | \ 34 UFFD_FEATURE_MISSING_HUGETLBFS | \ 35 UFFD_FEATURE_MISSING_SHMEM | \ 36 UFFD_FEATURE_SIGBUS | \ 37 UFFD_FEATURE_THREAD_ID | \ 38 UFFD_FEATURE_MINOR_HUGETLBFS | \ 39 UFFD_FEATURE_MINOR_SHMEM | \ 40 UFFD_FEATURE_EXACT_ADDRESS | \ 41 UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ 42 UFFD_FEATURE_WP_UNPOPULATED | \ 43 UFFD_FEATURE_POISON | \ 44 UFFD_FEATURE_WP_ASYNC | \ 45 UFFD_FEATURE_MOVE) 46 #define UFFD_API_IOCTLS \ 47 ((__u64)1 << _UFFDIO_REGISTER | \ 48 (__u64)1 << _UFFDIO_UNREGISTER | \ 49 (__u64)1 << _UFFDIO_API) 50 #define UFFD_API_RANGE_IOCTLS \ 51 ((__u64)1 << _UFFDIO_WAKE | \ 52 (__u64)1 << _UFFDIO_COPY | \ 53 (__u64)1 << _UFFDIO_ZEROPAGE | \ 54 (__u64)1 << _UFFDIO_MOVE | \ 55 (__u64)1 << _UFFDIO_WRITEPROTECT | \ 56 (__u64)1 << _UFFDIO_CONTINUE | \ 57 (__u64)1 << _UFFDIO_POISON) 58 #define UFFD_API_RANGE_IOCTLS_BASIC \ 59 ((__u64)1 << _UFFDIO_WAKE | \ 60 (__u64)1 << _UFFDIO_COPY | \ 61 (__u64)1 << _UFFDIO_WRITEPROTECT | \ 62 (__u64)1 << _UFFDIO_CONTINUE | \ 63 (__u64)1 << _UFFDIO_POISON) 64 65 /* 66 * Valid ioctl command number range with this API is from 0x00 to 67 * 0x3F. UFFDIO_API is the fixed number, everything else can be 68 * changed by implementing a different UFFD_API. If sticking to the 69 * same UFFD_API more ioctl can be added and userland will be aware of 70 * which ioctl the running kernel implements through the ioctl command 71 * bitmask written by the UFFDIO_API. 72 */ 73 #define _UFFDIO_REGISTER (0x00) 74 #define _UFFDIO_UNREGISTER (0x01) 75 #define _UFFDIO_WAKE (0x02) 76 #define _UFFDIO_COPY (0x03) 77 #define _UFFDIO_ZEROPAGE (0x04) 78 #define _UFFDIO_MOVE (0x05) 79 #define _UFFDIO_WRITEPROTECT (0x06) 80 #define _UFFDIO_CONTINUE (0x07) 81 #define _UFFDIO_POISON (0x08) 82 #define _UFFDIO_API (0x3F) 83 84 /* userfaultfd ioctl ids */ 85 #define UFFDIO 0xAA 86 #define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ 87 struct uffdio_api) 88 #define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ 89 struct uffdio_register) 90 #define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ 91 struct uffdio_range) 92 #define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ 93 struct uffdio_range) 94 #define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ 95 struct uffdio_copy) 96 #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ 97 struct uffdio_zeropage) 98 #define UFFDIO_MOVE _IOWR(UFFDIO, _UFFDIO_MOVE, \ 99 struct uffdio_move) 100 #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ 101 struct uffdio_writeprotect) 102 #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ 103 struct uffdio_continue) 104 #define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \ 105 struct uffdio_poison) 106 107 /* read() structure */ 108 struct uffd_msg { 109 __u8 event; 110 111 __u8 reserved1; 112 __u16 reserved2; 113 __u32 reserved3; 114 115 union { 116 struct { 117 __u64 flags; 118 __u64 address; 119 union { 120 __u32 ptid; 121 } feat; 122 } pagefault; 123 124 struct { 125 __u32 ufd; 126 } fork; 127 128 struct { 129 __u64 from; 130 __u64 to; 131 __u64 len; 132 } remap; 133 134 struct { 135 __u64 start; 136 __u64 end; 137 } remove; 138 139 struct { 140 /* unused reserved fields */ 141 __u64 reserved1; 142 __u64 reserved2; 143 __u64 reserved3; 144 } reserved; 145 } arg; 146 } __attribute__((packed)); 147 148 /* 149 * Start at 0x12 and not at 0 to be more strict against bugs. 150 */ 151 #define UFFD_EVENT_PAGEFAULT 0x12 152 #define UFFD_EVENT_FORK 0x13 153 #define UFFD_EVENT_REMAP 0x14 154 #define UFFD_EVENT_REMOVE 0x15 155 #define UFFD_EVENT_UNMAP 0x16 156 157 /* flags for UFFD_EVENT_PAGEFAULT */ 158 #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ 159 #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ 160 #define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ 161 162 struct uffdio_api { 163 /* userland asks for an API number and the features to enable */ 164 __u64 api; 165 /* 166 * Kernel answers below with the all available features for 167 * the API, this notifies userland of which events and/or 168 * which flags for each event are enabled in the current 169 * kernel. 170 * 171 * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE 172 * are to be considered implicitly always enabled in all kernels as 173 * long as the uffdio_api.api requested matches UFFD_API. 174 * 175 * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER 176 * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on 177 * hugetlbfs virtual memory ranges. Adding or not adding 178 * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has 179 * no real functional effect after UFFDIO_API returns, but 180 * it's only useful for an initial feature set probe at 181 * UFFDIO_API time. There are two ways to use it: 182 * 183 * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the 184 * uffdio_api.features before calling UFFDIO_API, an error 185 * will be returned by UFFDIO_API on a kernel without 186 * hugetlbfs missing support 187 * 188 * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in 189 * uffdio_api.features and instead it will be set by the 190 * kernel in the uffdio_api.features if the kernel supports 191 * it, so userland can later check if the feature flag is 192 * present in uffdio_api.features after UFFDIO_API 193 * succeeded. 194 * 195 * UFFD_FEATURE_MISSING_SHMEM works the same as 196 * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem 197 * (i.e. tmpfs and other shmem based APIs). 198 * 199 * UFFD_FEATURE_SIGBUS feature means no page-fault 200 * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead 201 * a SIGBUS signal will be sent to the faulting process. 202 * 203 * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will 204 * be returned, if feature is not requested 0 will be returned. 205 * 206 * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults 207 * can be intercepted (via REGISTER_MODE_MINOR) for 208 * hugetlbfs-backed pages. 209 * 210 * UFFD_FEATURE_MINOR_SHMEM indicates the same support as 211 * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. 212 * 213 * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page 214 * faults would be provided and the offset within the page would not be 215 * masked. 216 * 217 * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd 218 * write-protection mode is supported on both shmem and hugetlbfs. 219 * 220 * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd 221 * write-protection mode will always apply to unpopulated pages 222 * (i.e. empty ptes). This will be the default behavior for shmem 223 * & hugetlbfs, so this flag only affects anonymous memory behavior 224 * when userfault write-protection mode is registered. 225 * 226 * UFFD_FEATURE_WP_ASYNC indicates that userfaultfd write-protection 227 * asynchronous mode is supported in which the write fault is 228 * automatically resolved and write-protection is un-set. 229 * It implies UFFD_FEATURE_WP_UNPOPULATED. 230 * 231 * UFFD_FEATURE_MOVE indicates that the kernel supports moving an 232 * existing page contents from userspace. 233 */ 234 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) 235 #define UFFD_FEATURE_EVENT_FORK (1<<1) 236 #define UFFD_FEATURE_EVENT_REMAP (1<<2) 237 #define UFFD_FEATURE_EVENT_REMOVE (1<<3) 238 #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) 239 #define UFFD_FEATURE_MISSING_SHMEM (1<<5) 240 #define UFFD_FEATURE_EVENT_UNMAP (1<<6) 241 #define UFFD_FEATURE_SIGBUS (1<<7) 242 #define UFFD_FEATURE_THREAD_ID (1<<8) 243 #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) 244 #define UFFD_FEATURE_MINOR_SHMEM (1<<10) 245 #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) 246 #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) 247 #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) 248 #define UFFD_FEATURE_POISON (1<<14) 249 #define UFFD_FEATURE_WP_ASYNC (1<<15) 250 #define UFFD_FEATURE_MOVE (1<<16) 251 __u64 features; 252 253 __u64 ioctls; 254 }; 255 256 struct uffdio_range { 257 __u64 start; 258 __u64 len; 259 }; 260 261 struct uffdio_register { 262 struct uffdio_range range; 263 #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) 264 #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) 265 #define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) 266 __u64 mode; 267 268 /* 269 * kernel answers which ioctl commands are available for the 270 * range, keep at the end as the last 8 bytes aren't read. 271 */ 272 __u64 ioctls; 273 }; 274 275 struct uffdio_copy { 276 __u64 dst; 277 __u64 src; 278 __u64 len; 279 #define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) 280 /* 281 * UFFDIO_COPY_MODE_WP will map the page write protected on 282 * the fly. UFFDIO_COPY_MODE_WP is available only if the 283 * write protected ioctl is implemented for the range 284 * according to the uffdio_register.ioctls. 285 */ 286 #define UFFDIO_COPY_MODE_WP ((__u64)1<<1) 287 __u64 mode; 288 289 /* 290 * "copy" is written by the ioctl and must be at the end: the 291 * copy_from_user will not read the last 8 bytes. 292 */ 293 __s64 copy; 294 }; 295 296 struct uffdio_zeropage { 297 struct uffdio_range range; 298 #define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) 299 __u64 mode; 300 301 /* 302 * "zeropage" is written by the ioctl and must be at the end: 303 * the copy_from_user will not read the last 8 bytes. 304 */ 305 __s64 zeropage; 306 }; 307 308 struct uffdio_writeprotect { 309 struct uffdio_range range; 310 /* 311 * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, 312 * unset the flag to undo protection of a range which was previously 313 * write protected. 314 * 315 * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up 316 * any wait thread after the operation succeeds. 317 * 318 * NOTE: Write protecting a region (WP=1) is unrelated to page faults, 319 * therefore DONTWAKE flag is meaningless with WP=1. Removing write 320 * protection (WP=0) in response to a page fault wakes the faulting 321 * task unless DONTWAKE is set. 322 */ 323 #define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) 324 #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) 325 __u64 mode; 326 }; 327 328 struct uffdio_continue { 329 struct uffdio_range range; 330 #define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) 331 /* 332 * UFFDIO_CONTINUE_MODE_WP will map the page write protected on 333 * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the 334 * write protected ioctl is implemented for the range 335 * according to the uffdio_register.ioctls. 336 */ 337 #define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1) 338 __u64 mode; 339 340 /* 341 * Fields below here are written by the ioctl and must be at the end: 342 * the copy_from_user will not read past here. 343 */ 344 __s64 mapped; 345 }; 346 347 struct uffdio_poison { 348 struct uffdio_range range; 349 #define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0) 350 __u64 mode; 351 352 /* 353 * Fields below here are written by the ioctl and must be at the end: 354 * the copy_from_user will not read past here. 355 */ 356 __s64 updated; 357 }; 358 359 struct uffdio_move { 360 __u64 dst; 361 __u64 src; 362 __u64 len; 363 /* 364 * Especially if used to atomically remove memory from the 365 * address space the wake on the dst range is not needed. 366 */ 367 #define UFFDIO_MOVE_MODE_DONTWAKE ((__u64)1<<0) 368 #define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES ((__u64)1<<1) 369 __u64 mode; 370 /* 371 * "move" is written by the ioctl and must be at the end: the 372 * copy_from_user will not read the last 8 bytes. 373 */ 374 __s64 move; 375 }; 376 377 /* 378 * Flags for the userfaultfd(2) system call itself. 379 */ 380 381 /* 382 * Create a userfaultfd that can handle page faults only in user mode. 383 */ 384 #define UFFD_USER_MODE_ONLY 1 385 386 #endif /* _LINUX_USERFAULTFD_H */ 387