1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_SCHED_MM_H 3 #define _LINUX_SCHED_MM_H 4 5 #include <linux/kernel.h> 6 #include <linux/atomic.h> 7 #include <linux/sched.h> 8 #include <linux/mm_types.h> 9 #include <linux/gfp.h> 10 #include <linux/sync_core.h> 11 #include <linux/ioasid.h> 12 13 /* 14 * Routines for handling mm_structs 15 */ 16 extern struct mm_struct *mm_alloc(void); 17 18 /** 19 * mmgrab() - Pin a &struct mm_struct. 20 * @mm: The &struct mm_struct to pin. 21 * 22 * Make sure that @mm will not get freed even after the owning task 23 * exits. This doesn't guarantee that the associated address space 24 * will still exist later on and mmget_not_zero() has to be used before 25 * accessing it. 26 * 27 * This is a preferred way to pin @mm for a longer/unbounded amount 28 * of time. 29 * 30 * Use mmdrop() to release the reference acquired by mmgrab(). 31 * 32 * See also <Documentation/vm/active_mm.rst> for an in-depth explanation 33 * of &mm_struct.mm_count vs &mm_struct.mm_users. 34 */ 35 static inline void mmgrab(struct mm_struct *mm) 36 { 37 atomic_inc(&mm->mm_count); 38 } 39 40 extern void __mmdrop(struct mm_struct *mm); 41 42 static inline void mmdrop(struct mm_struct *mm) 43 { 44 /* 45 * The implicit full barrier implied by atomic_dec_and_test() is 46 * required by the membarrier system call before returning to 47 * user-space, after storing to rq->curr. 48 */ 49 if (unlikely(atomic_dec_and_test(&mm->mm_count))) 50 __mmdrop(mm); 51 } 52 53 #ifdef CONFIG_PREEMPT_RT 54 /* 55 * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is 56 * by far the least expensive way to do that. 57 */ 58 static inline void __mmdrop_delayed(struct rcu_head *rhp) 59 { 60 struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); 61 62 __mmdrop(mm); 63 } 64 65 /* 66 * Invoked from finish_task_switch(). Delegates the heavy lifting on RT 67 * kernels via RCU. 68 */ 69 static inline void mmdrop_sched(struct mm_struct *mm) 70 { 71 /* Provides a full memory barrier. See mmdrop() */ 72 if (atomic_dec_and_test(&mm->mm_count)) 73 call_rcu(&mm->delayed_drop, __mmdrop_delayed); 74 } 75 #else 76 static inline void mmdrop_sched(struct mm_struct *mm) 77 { 78 mmdrop(mm); 79 } 80 #endif 81 82 /** 83 * mmget() - Pin the address space associated with a &struct mm_struct. 84 * @mm: The address space to pin. 85 * 86 * Make sure that the address space of the given &struct mm_struct doesn't 87 * go away. This does not protect against parts of the address space being 88 * modified or freed, however. 89 * 90 * Never use this function to pin this address space for an 91 * unbounded/indefinite amount of time. 92 * 93 * Use mmput() to release the reference acquired by mmget(). 94 * 95 * See also <Documentation/vm/active_mm.rst> for an in-depth explanation 96 * of &mm_struct.mm_count vs &mm_struct.mm_users. 97 */ 98 static inline void mmget(struct mm_struct *mm) 99 { 100 atomic_inc(&mm->mm_users); 101 } 102 103 static inline bool mmget_not_zero(struct mm_struct *mm) 104 { 105 return atomic_inc_not_zero(&mm->mm_users); 106 } 107 108 /* mmput gets rid of the mappings and all user-space */ 109 extern void mmput(struct mm_struct *); 110 #ifdef CONFIG_MMU 111 /* same as above but performs the slow path from the async context. Can 112 * be called from the atomic context as well 113 */ 114 void mmput_async(struct mm_struct *); 115 #endif 116 117 /* Grab a reference to a task's mm, if it is not already going away */ 118 extern struct mm_struct *get_task_mm(struct task_struct *task); 119 /* 120 * Grab a reference to a task's mm, if it is not already going away 121 * and ptrace_may_access with the mode parameter passed to it 122 * succeeds. 123 */ 124 extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); 125 /* Remove the current tasks stale references to the old mm_struct on exit() */ 126 extern void exit_mm_release(struct task_struct *, struct mm_struct *); 127 /* Remove the current tasks stale references to the old mm_struct on exec() */ 128 extern void exec_mm_release(struct task_struct *, struct mm_struct *); 129 130 #ifdef CONFIG_MEMCG 131 extern void mm_update_next_owner(struct mm_struct *mm); 132 #else 133 static inline void mm_update_next_owner(struct mm_struct *mm) 134 { 135 } 136 #endif /* CONFIG_MEMCG */ 137 138 #ifdef CONFIG_MMU 139 #ifndef arch_get_mmap_end 140 #define arch_get_mmap_end(addr) (TASK_SIZE) 141 #endif 142 143 #ifndef arch_get_mmap_base 144 #define arch_get_mmap_base(addr, base) (base) 145 #endif 146 147 extern void arch_pick_mmap_layout(struct mm_struct *mm, 148 struct rlimit *rlim_stack); 149 extern unsigned long 150 arch_get_unmapped_area(struct file *, unsigned long, unsigned long, 151 unsigned long, unsigned long); 152 extern unsigned long 153 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, 154 unsigned long len, unsigned long pgoff, 155 unsigned long flags); 156 #else 157 static inline void arch_pick_mmap_layout(struct mm_struct *mm, 158 struct rlimit *rlim_stack) {} 159 #endif 160 161 static inline bool in_vfork(struct task_struct *tsk) 162 { 163 bool ret; 164 165 /* 166 * need RCU to access ->real_parent if CLONE_VM was used along with 167 * CLONE_PARENT. 168 * 169 * We check real_parent->mm == tsk->mm because CLONE_VFORK does not 170 * imply CLONE_VM 171 * 172 * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus 173 * ->real_parent is not necessarily the task doing vfork(), so in 174 * theory we can't rely on task_lock() if we want to dereference it. 175 * 176 * And in this case we can't trust the real_parent->mm == tsk->mm 177 * check, it can be false negative. But we do not care, if init or 178 * another oom-unkillable task does this it should blame itself. 179 */ 180 rcu_read_lock(); 181 ret = tsk->vfork_done && 182 rcu_dereference(tsk->real_parent)->mm == tsk->mm; 183 rcu_read_unlock(); 184 185 return ret; 186 } 187 188 /* 189 * Applies per-task gfp context to the given allocation flags. 190 * PF_MEMALLOC_NOIO implies GFP_NOIO 191 * PF_MEMALLOC_NOFS implies GFP_NOFS 192 * PF_MEMALLOC_PIN implies !GFP_MOVABLE 193 */ 194 static inline gfp_t current_gfp_context(gfp_t flags) 195 { 196 unsigned int pflags = READ_ONCE(current->flags); 197 198 if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { 199 /* 200 * NOIO implies both NOIO and NOFS and it is a weaker context 201 * so always make sure it makes precedence 202 */ 203 if (pflags & PF_MEMALLOC_NOIO) 204 flags &= ~(__GFP_IO | __GFP_FS); 205 else if (pflags & PF_MEMALLOC_NOFS) 206 flags &= ~__GFP_FS; 207 208 if (pflags & PF_MEMALLOC_PIN) 209 flags &= ~__GFP_MOVABLE; 210 } 211 return flags; 212 } 213 214 #ifdef CONFIG_LOCKDEP 215 extern void __fs_reclaim_acquire(unsigned long ip); 216 extern void __fs_reclaim_release(unsigned long ip); 217 extern void fs_reclaim_acquire(gfp_t gfp_mask); 218 extern void fs_reclaim_release(gfp_t gfp_mask); 219 #else 220 static inline void __fs_reclaim_acquire(unsigned long ip) { } 221 static inline void __fs_reclaim_release(unsigned long ip) { } 222 static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } 223 static inline void fs_reclaim_release(gfp_t gfp_mask) { } 224 #endif 225 226 /* Any memory-allocation retry loop should use 227 * memalloc_retry_wait(), and pass the flags for the most 228 * constrained allocation attempt that might have failed. 229 * This provides useful documentation of where loops are, 230 * and a central place to fine tune the waiting as the MM 231 * implementation changes. 232 */ 233 static inline void memalloc_retry_wait(gfp_t gfp_flags) 234 { 235 /* We use io_schedule_timeout because waiting for memory 236 * typically included waiting for dirty pages to be 237 * written out, which requires IO. 238 */ 239 __set_current_state(TASK_UNINTERRUPTIBLE); 240 gfp_flags = current_gfp_context(gfp_flags); 241 if (gfpflags_allow_blocking(gfp_flags) && 242 !(gfp_flags & __GFP_NORETRY)) 243 /* Probably waited already, no need for much more */ 244 io_schedule_timeout(1); 245 else 246 /* Probably didn't wait, and has now released a lock, 247 * so now is a good time to wait 248 */ 249 io_schedule_timeout(HZ/50); 250 } 251 252 /** 253 * might_alloc - Mark possible allocation sites 254 * @gfp_mask: gfp_t flags that would be used to allocate 255 * 256 * Similar to might_sleep() and other annotations, this can be used in functions 257 * that might allocate, but often don't. Compiles to nothing without 258 * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking. 259 */ 260 static inline void might_alloc(gfp_t gfp_mask) 261 { 262 fs_reclaim_acquire(gfp_mask); 263 fs_reclaim_release(gfp_mask); 264 265 might_sleep_if(gfpflags_allow_blocking(gfp_mask)); 266 } 267 268 /** 269 * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. 270 * 271 * This functions marks the beginning of the GFP_NOIO allocation scope. 272 * All further allocations will implicitly drop __GFP_IO flag and so 273 * they are safe for the IO critical section from the allocation recursion 274 * point of view. Use memalloc_noio_restore to end the scope with flags 275 * returned by this function. 276 * 277 * This function is safe to be used from any context. 278 */ 279 static inline unsigned int memalloc_noio_save(void) 280 { 281 unsigned int flags = current->flags & PF_MEMALLOC_NOIO; 282 current->flags |= PF_MEMALLOC_NOIO; 283 return flags; 284 } 285 286 /** 287 * memalloc_noio_restore - Ends the implicit GFP_NOIO scope. 288 * @flags: Flags to restore. 289 * 290 * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function. 291 * Always make sure that the given flags is the return value from the 292 * pairing memalloc_noio_save call. 293 */ 294 static inline void memalloc_noio_restore(unsigned int flags) 295 { 296 current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; 297 } 298 299 /** 300 * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope. 301 * 302 * This functions marks the beginning of the GFP_NOFS allocation scope. 303 * All further allocations will implicitly drop __GFP_FS flag and so 304 * they are safe for the FS critical section from the allocation recursion 305 * point of view. Use memalloc_nofs_restore to end the scope with flags 306 * returned by this function. 307 * 308 * This function is safe to be used from any context. 309 */ 310 static inline unsigned int memalloc_nofs_save(void) 311 { 312 unsigned int flags = current->flags & PF_MEMALLOC_NOFS; 313 current->flags |= PF_MEMALLOC_NOFS; 314 return flags; 315 } 316 317 /** 318 * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope. 319 * @flags: Flags to restore. 320 * 321 * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function. 322 * Always make sure that the given flags is the return value from the 323 * pairing memalloc_nofs_save call. 324 */ 325 static inline void memalloc_nofs_restore(unsigned int flags) 326 { 327 current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; 328 } 329 330 static inline unsigned int memalloc_noreclaim_save(void) 331 { 332 unsigned int flags = current->flags & PF_MEMALLOC; 333 current->flags |= PF_MEMALLOC; 334 return flags; 335 } 336 337 static inline void memalloc_noreclaim_restore(unsigned int flags) 338 { 339 current->flags = (current->flags & ~PF_MEMALLOC) | flags; 340 } 341 342 static inline unsigned int memalloc_pin_save(void) 343 { 344 unsigned int flags = current->flags & PF_MEMALLOC_PIN; 345 346 current->flags |= PF_MEMALLOC_PIN; 347 return flags; 348 } 349 350 static inline void memalloc_pin_restore(unsigned int flags) 351 { 352 current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags; 353 } 354 355 #ifdef CONFIG_MEMCG 356 DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); 357 /** 358 * set_active_memcg - Starts the remote memcg charging scope. 359 * @memcg: memcg to charge. 360 * 361 * This function marks the beginning of the remote memcg charging scope. All the 362 * __GFP_ACCOUNT allocations till the end of the scope will be charged to the 363 * given memcg. 364 * 365 * NOTE: This function can nest. Users must save the return value and 366 * reset the previous value after their own charging scope is over. 367 */ 368 static inline struct mem_cgroup * 369 set_active_memcg(struct mem_cgroup *memcg) 370 { 371 struct mem_cgroup *old; 372 373 if (!in_task()) { 374 old = this_cpu_read(int_active_memcg); 375 this_cpu_write(int_active_memcg, memcg); 376 } else { 377 old = current->active_memcg; 378 current->active_memcg = memcg; 379 } 380 381 return old; 382 } 383 #else 384 static inline struct mem_cgroup * 385 set_active_memcg(struct mem_cgroup *memcg) 386 { 387 return NULL; 388 } 389 #endif 390 391 #ifdef CONFIG_MEMBARRIER 392 enum { 393 MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), 394 MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), 395 MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2), 396 MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), 397 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), 398 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), 399 MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY = (1U << 6), 400 MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ = (1U << 7), 401 }; 402 403 enum { 404 MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), 405 MEMBARRIER_FLAG_RSEQ = (1U << 1), 406 }; 407 408 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS 409 #include <asm/membarrier.h> 410 #endif 411 412 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) 413 { 414 if (current->mm != mm) 415 return; 416 if (likely(!(atomic_read(&mm->membarrier_state) & 417 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) 418 return; 419 sync_core_before_usermode(); 420 } 421 422 extern void membarrier_exec_mmap(struct mm_struct *mm); 423 424 extern void membarrier_update_current_mm(struct mm_struct *next_mm); 425 426 #else 427 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS 428 static inline void membarrier_arch_switch_mm(struct mm_struct *prev, 429 struct mm_struct *next, 430 struct task_struct *tsk) 431 { 432 } 433 #endif 434 static inline void membarrier_exec_mmap(struct mm_struct *mm) 435 { 436 } 437 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) 438 { 439 } 440 static inline void membarrier_update_current_mm(struct mm_struct *next_mm) 441 { 442 } 443 #endif 444 445 #ifdef CONFIG_IOMMU_SVA 446 static inline void mm_pasid_init(struct mm_struct *mm) 447 { 448 mm->pasid = INVALID_IOASID; 449 } 450 451 /* Associate a PASID with an mm_struct: */ 452 static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) 453 { 454 mm->pasid = pasid; 455 } 456 457 static inline void mm_pasid_drop(struct mm_struct *mm) 458 { 459 if (pasid_valid(mm->pasid)) { 460 ioasid_free(mm->pasid); 461 mm->pasid = INVALID_IOASID; 462 } 463 } 464 #else 465 static inline void mm_pasid_init(struct mm_struct *mm) {} 466 static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} 467 static inline void mm_pasid_drop(struct mm_struct *mm) {} 468 #endif 469 470 #endif /* _LINUX_SCHED_MM_H */ 471