1 /******************************************************************************* 2 Copyright (c) 2015-2023 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #ifndef __UVM_VA_BLOCK_H__ 25 #define __UVM_VA_BLOCK_H__ 26 27 #include "uvm_forward_decl.h" 28 #include "uvm_types.h" 29 #include "uvm_linux.h" 30 #include "nv-kref.h" 31 #include "uvm_common.h" 32 #include "uvm_perf_module.h" 33 #include "uvm_processors.h" 34 #include "uvm_lock.h" 35 #include "uvm_test_ioctl.h" 36 #include "uvm_tracker.h" 37 #include "uvm_pmm_gpu.h" 38 #include "uvm_perf_thrashing.h" 39 #include "uvm_perf_utils.h" 40 #include "uvm_va_block_types.h" 41 #include "uvm_range_tree.h" 42 #include "uvm_mmu.h" 43 #include "nv-kthread-q.h" 44 45 #include <linux/mmu_notifier.h> 46 #include <linux/wait.h> 47 48 // VA blocks are the leaf nodes in the uvm_va_space tree for managed allocations 49 // (VA ranges with type == UVM_VA_RANGE_TYPE_MANAGED): 50 // 51 // UVM: uvm_va_space -> uvm_va_range -> uvm_va_block 52 // HMM: uvm_va_space -> uvm_va_block 53 // 54 // Each VA block is contained within a single VA range, and contains state on 55 // VAs covered by that block. Most importantly, the block tracks the current 56 // state of the virtual-to-physical mappings for all VAs within that block 57 // across all processors, along with the physical residency location for each 58 // VA. 59 // 60 // The block serializes both CPU and GPU operations on all VAs under that block. 61 // The CPU work is serialized with the block lock, and the GPU work is 62 // serialized by the block work tracker which itself is protected by the block 63 // lock. 64 // 65 // The size of each block varies from the size of the smallest VA range 66 // (PAGE_SIZE) to the max block size specified by UVM_VA_BLOCK_BITS. No block 67 // will span a 2^UVM_VA_BLOCK_BITS boundary in VA space. The size of the block 68 // is determined by the alignment of the parent VA range and the block's 69 // placement within the range. 70 // 71 // Note that this means user space will get best allocation efficiency if it 72 // allocates memory in 2^UVM_VA_BLOCK_BITS naturally-aligned chunks. 73 74 // enums used for indexing into the array of pte_bits bitmaps in the VA block 75 // which hold the current state of each PTE. For a given {processor, PTE}, the 76 // bits represented here must be enough to re-create the non-address portion of 77 // the PTE for that processor. 78 79 // If _READ is not set, the PTE mapping is not valid. 80 // If _WRITE is set, _READ is also set (_WRITE implies _READ). 81 typedef enum 82 { 83 UVM_PTE_BITS_CPU_READ, 84 UVM_PTE_BITS_CPU_WRITE, 85 UVM_PTE_BITS_CPU_MAX 86 } uvm_pte_bits_cpu_t; 87 88 // If _READ is not set, the PTE mapping is not valid. 89 // If _WRITE is set, _READ is also set (_WRITE implies _READ). 90 // If _ATOMIC is set, _WRITE is also set (_ATOMIC implies _WRITE and _READ). 91 // 92 // TODO: Bug 1764925: Track volatile here too if we add GPU L2 caching 93 typedef enum 94 { 95 UVM_PTE_BITS_GPU_READ, 96 UVM_PTE_BITS_GPU_WRITE, 97 UVM_PTE_BITS_GPU_ATOMIC, 98 UVM_PTE_BITS_GPU_MAX 99 } uvm_pte_bits_gpu_t; 100 101 typedef struct 102 { 103 // Per-page residency bit vector, used for fast traversal 104 // of resident pages. 105 // 106 // This follows the same semantics as the CPU residency bit vector and 107 // notably each bit still represents a PAGE_SIZE amount of data, but the 108 // physical GPU memory is tracked by an array of GPU chunks below. 109 uvm_page_mask_t resident; 110 111 // Pages that have been evicted to sysmem 112 uvm_page_mask_t evicted; 113 114 NvU64 *cpu_chunks_dma_addrs; 115 116 // Array of naturally-aligned chunks. Each chunk has the largest possible 117 // size which can fit within the block, so they are not uniform size. 118 // 119 // The number of chunks in the array is calculated using 120 // block_num_gpu_chunks. The size of each chunk is calculated using 121 // block_gpu_chunk_index. 122 uvm_gpu_chunk_t **chunks; 123 124 // These page table ranges are not necessarily all used at the same time. 125 // The block might also be too small or not aligned properly to use the 126 // larger ranges, in which case they're never allocated. 127 // 128 // Once a range is allocated we keep it around to avoid constant allocation 129 // overhead when doing PTE splitting and merging. 130 // 131 // Check range.table to see if a given range has been allocated yet. 132 // 133 // page_table_range_big's range covers the big PTEs which fit within the 134 // interior of this block. See the big_ptes field. 135 uvm_page_table_range_t page_table_range_2m; 136 uvm_page_table_range_t page_table_range_big; 137 uvm_page_table_range_t page_table_range_4k; 138 139 // These flags are ignored unless the {block, gpu} pair supports a 2M page 140 // size. In that case it's the responsibility of the block code to make the 141 // lower page tables active by calling uvm_page_tree_write_pde. 142 // 143 // They can be allocated and activated separately, so we have to track them 144 // separately. 145 // 146 // Activated only means that uvm_page_tree_write_pde has been called at some 147 // point in the past with the appropriate range allocated. It does not imply 148 // that the 2M entry is a PDE (see pte_is_2m). 149 bool activated_big; 150 bool activated_4k; 151 152 // For {block, gpu} pairs which support the 2M page size, the page table 153 // ranges are uninitialized on allocation. This flag tracks whether the big 154 // PTEs have been initialized. 155 // 156 // We don't need an equivalent flag for the 4k range because we always write 157 // just the 4k PTEs not covered by higher-level PTEs. Big PTEs however can 158 // be allocated and activated late while the 4k PTEs are already active, in 159 // which case we need to initialize the entire big range. 160 bool initialized_big; 161 162 // Sticky state to split PTEs to 4k and keep them there. Used when a fatal 163 // fault has been detected on this GPU to avoid false dependencies within 164 // the uTLB for fatal and non-fatal faults on the same larger PTE, which 165 // could lead to wrong fault attribution. 166 bool force_4k_ptes; 167 168 // This table shows the HW PTE states given all permutations of pte_is_2m, 169 // big_ptes, and pte_bits. Note that the first row assumes that the 4k page 170 // tables have been allocated (if not, then no PDEs are allocated either). 171 // 172 // |-------------- SW state --------------|------------------- HW state --------------------| 173 // pte_is_2m pte_is_big pte_bits[READ] | Page size PDE0(2M only) Big PTE 4k PTE 174 // ---------------------------------------------------------------------------------------- 175 // 0 0 0 | 4k Valid PDE Invalid [1] Invalid 176 // 0 0 1 | 4k Valid PDE Invalid [1] Valid 177 // 0 1 0 | Big Valid PDE Unmapped [2] x 178 // 0 1 1 | Big Valid PDE Valid x 179 // 1 must be 0 0 | 2M Invalid x x 180 // 1 must be 0 1 | 2M Valid PTE x x 181 // 182 // [1]: The big PTE may be unallocated, in which case its pointer won't be 183 // valid in the parent PDE. If the big PTE is allocated, it will be 184 // invalid so the 4k PTEs are active. 185 // 186 // [2]: The unmapped big PTE pattern differs from the invalid pattern, and 187 // it prevents HW from reading the 4k entries. See the unmapped_pte() 188 // MMU HAL function. 189 190 // If pte_is_2m is true, there is a 2M PTE covering this VA block (valid or 191 // invalid). If false then we're in one of the following scenarios: 192 // 1) This {block, gpu} does not support 2M pages. 193 // 2) 2M pages are supported but the page_table_range_2m has not been 194 // allocated (implying that the other page table ranges have not been 195 // allocated either). 196 // 3) page_table_range_2m has been allocated, but the big_ptes bitmap should 197 // be used to determine the mix of big and 4k PTEs. 198 bool pte_is_2m; 199 200 // When pte_is_2m is false, this block consists of any possible mix of big 201 // and 4k PTEs. This bitmap describes that mix. A set bit indicates that the 202 // corresponding big-page-sized region of the block is covered by a big PTE. 203 // A cleared bit indicates that it is covered by 4k PTEs. 204 // 205 // Neither setting implies that the PTE currently has a valid mapping, it 206 // just indicates which PTE is read by the GPU (see the table above). 207 // 208 // The indices represent the corresponding big PTEs in the block's interior. 209 // For example, a block with alignment and size of one 4k page on either 210 // side of a big page will only use bit 0. Use uvm_va_block_big_page_index to look 211 // the big_ptes index of a page. 212 // 213 // The block might not be able to fit any big PTEs, in which case this 214 // bitmap is always zero. Use uvm_va_block_gpu_num_big_pages to find the number of 215 // valid bits in this mask. 216 DECLARE_BITMAP(big_ptes, MAX_BIG_PAGES_PER_UVM_VA_BLOCK); 217 218 // See the comments for uvm_va_block_mmap_t::cpu.pte_bits. 219 // 220 // The major difference is that these bits are always accurate since, unlike 221 // the CPU PTEs, the UVM driver is in full control of these mappings. 222 // 223 // Note that the granularity is always PAGE_SIZE, not whatever GPU PTE size 224 // happens to currently map these regions. PAGE_SIZE is the minimum 225 // granularity of operations on the VA blocks. As a future optimization we 226 // could consider sub-PAGE_SIZE operations if PAGE_SIZE > 4K and the CPU 227 // isn't involved, for example false sharing among peer GPUs. 228 uvm_page_mask_t pte_bits[UVM_PTE_BITS_GPU_MAX]; 229 230 } uvm_va_block_gpu_state_t; 231 232 // TODO: Bug 1766180: Worst-case we could have one of these per system page. 233 // Options: 234 // 1) Rely on the OOM killer to prevent the user from trying to do that 235 // 2) Be much more space-conscious in this struct (difficult) 236 // 3) Cap the per-process range and/or block count, like vm.max_map_count 237 // does for vmas 238 struct uvm_va_block_struct 239 { 240 // Reference count for this block. References are held by: 241 // - The parent VA range for managed blocks or VA space for HMM blocks 242 // - The reverse map 243 // - The eviction path temporarily when attempting to evict a GPU page under 244 // this block 245 // 246 // This isn't protected by the lock on the eviction path, so it must be 247 // atomic. nv_kref provides that. 248 nv_kref_t kref; 249 250 // Lock protecting the block. See the comment at the top of uvm.c. 251 uvm_mutex_t lock; 252 253 // Parent VA range. Managed blocks have this set. HMM blocks will have 254 // va_range set to NULL and hmm.va_space set instead. Dead blocks that are 255 // waiting for the last ref count to be removed have va_range and 256 // hmm.va_space set to NULL (could be either type of block). 257 // 258 // This field can be read while holding either the block lock or just the VA 259 // space lock in read mode, since it can only change when the VA space lock 260 // is held in write mode. 261 uvm_va_range_t *va_range; 262 263 // Virtual address [start, end] covered by this block. These fields can be 264 // read while holding either the block lock or just the VA space lock in 265 // read mode, since they can only change when the VA space lock is held in 266 // write mode. 267 NvU64 start; 268 NvU64 end; 269 270 // Per-processor residency bit vector, used for fast lookup of which 271 // processors are active in this block. 272 // 273 // A set bit means the corresponding processor has a coherent physical copy 274 // of memory somewhere in the block. The per-processor state must then be 275 // inspected to find out which pages. The processor may or may not have a 276 // mapping to that physical memory, however. 277 // 278 // A cleared bit means the corresponding processor does not have a coherent 279 // physical copy of any pages under this block. The processor may still have 280 // cached pages allocated for future use, however. It also may have mappings 281 // to pages resident on other processors. 282 uvm_processor_mask_t resident; 283 284 // Per-processor mapping bit vector, used for fast lookup of which 285 // processors are active in this block. 286 // 287 // A set bit means the corresponding processor has an active, valid page 288 // table mapping to some VA in this block. The per-processor pte_bits state 289 // must then be inspected to find out the mapping address and permissions. 290 // 291 // A cleared bit means the corresponding processor has no virtual mappings 292 // within this block (all pte_bits entries are 0). 293 uvm_processor_mask_t mapped; 294 295 // Per-processor evicted bit vector, used for fast lookup of which GPUs 296 // have evicted pages in this block. 297 // 298 // A set bit means the corresponding processor was the residency of some of 299 // the pages in the block when they were evicted due to memory capacity 300 // limitations. The per-processor state must then be inspected to find out 301 // which pages. 302 // 303 // A cleared bit means the corresponding processor has no evicted pages 304 // within this block (all evicted entries are 0). 305 uvm_processor_mask_t evicted_gpus; 306 307 struct 308 { 309 // Per-page residency bit vector, used for fast traversal of resident 310 // pages. 311 // 312 // A set bit means the CPU has a coherent copy of the physical page 313 // resident in its memory, and that the corresponding entry in the pages 314 // array is present. This does not mean that the coherent copy is 315 // currently mapped anywhere, however. A page may be resident on 316 // multiple processors when in read-duplicate mode. 317 // 318 // A cleared bit means the CPU does not have a coherent copy of that 319 // page resident. The corresponding entry in the pages array may or may 320 // not present. If the entry is present, it's a cached page which can be 321 // reused in the future. 322 // 323 // Allocating PAGES_PER_UVM_VA_BLOCK is overkill when the block is 324 // smaller than UVM_VA_BLOCK_SIZE, but it's not much extra memory 325 // overhead on the whole. 326 uvm_page_mask_t resident; 327 328 // CPU memory chunks represent physically contiguous CPU memory 329 // allocations. See uvm_pmm_sysmem.h for more details on CPU chunks. 330 // This member is meant to hold an opaque value indicating the CPU 331 // chunk storage method. For more details on CPU chunk storage, 332 // see uvm_cpu_chunk_storage_type_t in uvm_va_block.c. 333 unsigned long chunks; 334 335 // Per-page allocation bit vector. 336 // 337 // A set bit means that a CPU page has been allocated for the 338 // corresponding page index. 339 uvm_page_mask_t allocated; 340 341 // Per-page mapping bit vectors, one per bit we need to track. These are 342 // used for fast traversal of valid mappings in the block. These contain 343 // all non-address bits needed to establish a virtual mapping on this 344 // processor (permissions, cacheability, etc). 345 // 346 // A cleared bit in UVM_PTE_BITS_CPU_READ means the CPU has no valid 347 // virtual mapping to that address (the access will fault). Further, 348 // UVM_PTE_BITS_CPU_WRITE is guaranteed to also be clear. 349 // 350 // A set bit in UVM_PTE_BITS_CPU_READ means the CPU has a valid mapping 351 // at that address with at least read permissions. The physical page for 352 // that mapping is contained in the pages array. If 353 // UVM_PTE_BITS_CPU_WRITE is not set, the mapping is read-only. 354 // Otherwise, the mapping is read-write. 355 // 356 // For managed allocations, this is the maximum permissions a PTE 357 // could have, but not necessarily the actual current permissions of the 358 // CPU PTEs. The UVM driver will never change the PTEs without updating 359 // this state, but the kernel can downgrade our CPU mappings at any time 360 // without notifying the UVM driver (for example in response to user 361 // space calling madvise with MADV_DONTNEED). 362 // 363 // For HMM allocations, this is the minimum permission the CPU has since 364 // Linux can upgrade a read-only PTE to read-write without notifying 365 // the UVM driver. This is why read duplication isn't currently 366 // supported. 367 // TODO: Bug 3660922: Need to handle read duplication at some point. 368 uvm_page_mask_t pte_bits[UVM_PTE_BITS_CPU_MAX]; 369 370 // Whether the CPU has ever mapped a page on this VA block. This is 371 // used to force GMMU PDE1 pre-population on ATS systems. See 372 // pre_populate_gpu_pde1 in uvm_va_block.c for more information. 373 NvU8 ever_mapped : 1; 374 375 // We can get "unexpected" faults if multiple CPU threads fault on the 376 // same address simultaneously and race to create the mapping. Since 377 // our CPU fault handler always unmaps to handle the case where the 378 // kernel downgrades our CPU mappings, we can introduce an infinite 379 // stream of CPU faults in multi-threaded workloads. 380 // 381 // In order to handle this scenario, we keep track of the first thread 382 // that faulted on a page with valid permissions and the timestamp. 383 // Then, we keep track of the subsequent faults on that page during a 384 // window of time. If the first thread faults again on the page, that 385 // will indicate that the mapping has been downgraded by the kernel and 386 // we need to remap it. Faults from the rest of threads are just 387 // ignored. The information is also cleared on the following events: 388 // - The tracking window finishes 389 // - The page is unmapped 390 struct 391 { 392 // Timestamp when the first fault was detected. This also is used 393 // as a flag that the contents of this struct are valid 394 NvU64 first_fault_stamp; 395 396 // First thread that faulted while having valid permissions. we 397 // don't take a reference on the pid so we shouldn't ever use it 398 // for task-lookup in the kernel. We only use it as a heuristic so 399 // it's OK if the pid gets destroyed or reused. 400 pid_t first_pid; 401 402 // Index of the page whose faults are being tracked 403 uvm_page_index_t page_index; 404 } fault_authorized; 405 } cpu; 406 407 // Per-GPU residency and mapping state 408 // 409 // TODO: Bug 1766180: Even though these are pointers, making this a static 410 // array will use up a non-trivial amount of storage for small blocks. 411 // In most cases we won't have anywhere near this many GPUs active 412 // anyway. Consider using a dense array of just the GPUs registered in 413 // this VA space, depending on the perf of accessing that array and on 414 // how noticeable this memory overhead actually is. 415 uvm_va_block_gpu_state_t *gpus[UVM_ID_MAX_GPUS]; 416 417 // Mask to keep track of the pages that are read-duplicate 418 uvm_page_mask_t read_duplicated_pages; 419 420 // Mask to keep track of the pages that are not mapped on any non-UVM-Lite 421 // processor. 422 // 0: Page is definitely not mapped by any processors 423 // 1: Page may or may not be mapped by a processor 424 // 425 // This mask sets the bit when the page is mapped on any non-UVM-Lite 426 // processor but it is not always unset on unmap (to avoid a performance 427 // impact). Therefore, it can contain false negatives. It should be only 428 // used for opportunistic optimizations that have a fast path for pages 429 // that are not mapped anywhere (see uvm_va_block_migrate_locked, for 430 // example), but not the other way around. 431 uvm_page_mask_t maybe_mapped_pages; 432 433 // Tracks all outstanding GPU work related to this block: GPU copies, PTE 434 // updates, TLB invalidates, etc. The residency and mapping state is only 435 // valid once this tracker is done. 436 // 437 // CPU operations need to wait for this tracker to be done. GPU operations 438 // need to acquire it before pushing their work, then that work must be 439 // added to this tracker before the block's lock is dropped. 440 uvm_tracker_t tracker; 441 442 // A queue item for establishing eviction mappings in a deferred way 443 nv_kthread_q_item_t eviction_mappings_q_item; 444 445 uvm_perf_module_data_desc_t perf_modules_data[UVM_PERF_MODULE_TYPE_COUNT]; 446 447 // Prefetch infomation that is updated while holding the va_block lock but 448 // records state while the lock is not held. 449 struct 450 { 451 uvm_processor_id_t last_migration_proc_id; 452 453 NvU16 fault_migrations_to_last_proc; 454 } prefetch_info; 455 456 #if UVM_IS_CONFIG_HMM() 457 struct 458 { 459 // The MMU notifier is registered per va_block. 460 struct mmu_interval_notifier notifier; 461 462 // Wait queue for GPU atomic operations to system memory. 463 struct wait_queue_head atomic_waitq; 464 465 // Mask of pages being migrated to system memory for GPU atomic access. 466 // It is used so other threads don't try to migrate those pages while 467 // make_device_exclusive_range() is called without holding the va_block 468 // lock. 469 uvm_page_mask_t atomic_busy; 470 471 // Sequence number to tell if any changes were made to the va_block 472 // while not holding the block lock and calling hmm_range_fault(). 473 unsigned long changed; 474 475 // Parent VA space pointer. It is NULL for managed blocks or if 476 // the HMM block is dead. This field can be read while holding the 477 // block lock and is only modified while holding the va_space write 478 // lock and va_block lock (same as the va_range pointer). 479 uvm_va_space_t *va_space; 480 481 // Tree of uvm_va_policy_node_t. The policy node ranges always cover 482 // all or part of a VMA range or a contiguous range of VMAs within the 483 // va_block. Policy nodes are resized or deleted when the underlying 484 // VMA range is changed by Linux via the invalidate() callback. 485 // Otherwise, policies could be stale after munmap(). 486 // Locking: The va_block lock is needed to access or modify the tree. 487 uvm_range_tree_t va_policy_tree; 488 489 // Storage node for range tree of va_blocks. 490 uvm_range_tree_node_t node; 491 } hmm; 492 #endif 493 }; 494 495 // We define additional per-VA Block fields for testing. When 496 // uvm_enable_builtin_tests is defined, all VA Blocks will have 497 // uvm_va_block_wrapper_t size. Otherwise, the test fields are not available. 498 // Use the uvm_va_block_get_test function defined below to obtain a safe 499 // pointer to uvm_va_block_test_t from a uvm_va_block_t pointer. 500 struct uvm_va_block_wrapper_struct 501 { 502 uvm_va_block_t block; 503 504 struct uvm_va_block_test_struct 505 { 506 // Count of how many page table allocations should be forced to retry 507 // with eviction enabled. Used for testing only. 508 NvU32 page_table_allocation_retry_force_count; 509 510 // Count of how many user pages allocations should be forced to retry 511 // with eviction enabled. Used for testing only. 512 NvU32 user_pages_allocation_retry_force_count; 513 514 // Mask of chunk sizes to be used for CPU chunk allocations. 515 // The actual set of chunk sizes to be used will be the set resulting 516 // from AND'ing this value with the value of 517 // uvm_cpu_chunk_allocation_sizes module parameter. 518 NvU32 cpu_chunk_allocation_size_mask; 519 520 // Subsequent operations that need to allocate CPU pages will fail. As 521 // opposed to other error injection settings, this one fails N times 522 // and then succeeds instead of failing on the Nth try. A value of ~0u 523 // means fail indefinitely. 524 // This is because this error is supposed to be fatal and tests verify 525 // the state of the VA blocks after the failure. However, some tests 526 // use kernels to trigger migrations and a fault replay could trigger 527 // a successful migration if this error flag is cleared. 528 NvU32 inject_cpu_pages_allocation_error_count; 529 530 // Force the next eviction attempt on this block to fail. Used for 531 // testing only. 532 bool inject_eviction_error; 533 534 // Force the next successful chunk allocation to then fail. Used for testing 535 // only to simulate driver metadata allocation failure. 536 bool inject_populate_error; 537 538 // Force the next split on this block to fail. 539 // Set by error injection ioctl for testing purposes only. 540 bool inject_split_error; 541 } test; 542 }; 543 544 // Tracking needed for supporting allocation-retry of user GPU memory 545 struct uvm_va_block_retry_struct 546 { 547 // A tracker used for all allocations from PMM. 548 uvm_tracker_t tracker; 549 550 // List of allocated chunks (uvm_gpu_chunk_t). Currently all chunks are of 551 // the same size. However it can contain chunks from multiple GPUs. All 552 // remaining free chunks are freed when the operation is finished with 553 // uvm_va_block_retry_deinit(). 554 struct list_head free_chunks; 555 556 // List of chunks allocated and used during the block operation. This list 557 // can contain chunks from multiple GPUs. All the used chunks are unpinned 558 // when the operation is finished with uvm_va_block_retry_deinit(). 559 struct list_head used_chunks; 560 }; 561 562 // Module load/exit 563 NV_STATUS uvm_va_block_init(void); 564 void uvm_va_block_exit(void); 565 566 // Allocates and initializes the block. The block's ref count is initialized to 567 // 1. The caller is responsible for inserting the block into its parent 568 // va_range. 569 // 570 // The caller must be holding the VA space lock in at least read mode. 571 // 572 // The va_range must have type UVM_VA_RANGE_TYPE_MANAGED. 573 NV_STATUS uvm_va_block_create(uvm_va_range_t *va_range, 574 NvU64 start, 575 NvU64 end, 576 uvm_va_block_t **out_block); 577 578 // Internal function called only when uvm_va_block_release drops the ref count 579 // to 0. Do not call directly. 580 void uvm_va_block_destroy(nv_kref_t *kref); 581 582 static inline void uvm_va_block_retain(uvm_va_block_t *va_block) 583 { 584 nv_kref_get(&va_block->kref); 585 } 586 587 // Locking: The va_block lock must not be held. 588 // The va_space lock must be held in write mode unless it is the special case 589 // that the block has no GPU state; for example, right after calling 590 // uvm_va_block_create(). In that case, the va_space lock can be held in read 591 // mode. 592 static inline void uvm_va_block_release(uvm_va_block_t *va_block) 593 { 594 if (va_block) { 595 // The calling thread shouldn't be holding the block's mutex when 596 // releasing the block as it might get destroyed. 597 uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_BLOCK); 598 nv_kref_put(&va_block->kref, uvm_va_block_destroy); 599 } 600 } 601 602 // Same as uvm_va_block_release but the caller may be holding the VA block lock. 603 // The caller must ensure that the refcount will not get to zero in this call. 604 static inline void uvm_va_block_release_no_destroy(uvm_va_block_t *va_block) 605 { 606 int destroyed = nv_kref_put(&va_block->kref, uvm_va_block_destroy); 607 UVM_ASSERT(!destroyed); 608 } 609 610 // Returns true if the block is managed by HMM. 611 // Locking: This can be called while holding either the block lock or just the 612 // VA space lock in read mode, since it can only change when the VA space lock 613 // is held in write mode. 614 static inline bool uvm_va_block_is_hmm(uvm_va_block_t *va_block) 615 { 616 #if UVM_IS_CONFIG_HMM() 617 return va_block->hmm.va_space; 618 #else 619 return false; 620 #endif 621 } 622 623 // Return true if the block is dead. 624 // Locking: This can be called while holding either the block lock or just the 625 // VA space lock in read mode, since it can only change when the VA space lock 626 // is held in write mode. 627 static inline bool uvm_va_block_is_dead(uvm_va_block_t *va_block) 628 { 629 if (va_block->va_range) 630 return false; 631 632 #if UVM_IS_CONFIG_HMM() 633 if (va_block->hmm.va_space) 634 return false; 635 #endif 636 637 return true; 638 } 639 640 static inline uvm_va_block_gpu_state_t *uvm_va_block_gpu_state_get(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id) 641 { 642 return va_block->gpus[uvm_id_gpu_index(gpu_id)]; 643 } 644 645 // Return the va_space pointer of the given block or NULL if the block is dead. 646 // Locking: This can be called while holding either the block lock or just the 647 // VA space lock in read mode, since it can only change when the VA space lock 648 // is held in write mode. 649 uvm_va_space_t *uvm_va_block_get_va_space_maybe_dead(uvm_va_block_t *va_block); 650 651 // Return the va_space pointer of the given block assuming the block is not dead 652 // (asserts that it is not dead and asserts va_space is not NULL). 653 // Locking: This can be called while holding either the block lock or just the 654 // VA space lock in read mode, since it can only change when the VA space lock 655 // is held in write mode. 656 uvm_va_space_t *uvm_va_block_get_va_space(uvm_va_block_t *va_block); 657 658 // Return true if the VA space has access counter migrations enabled and should 659 // remote map pages evicted to system memory. This is OK since access counters 660 // can pull the data back to vidmem if sufficient accesses trigger a migration. 661 // The caller must ensure that the VA space cannot go away. 662 bool uvm_va_space_map_remote_on_eviction(uvm_va_space_t *va_space); 663 664 // Dynamic cache-based allocation for uvm_va_block_context_t. 665 // 666 // See uvm_va_block_context_init() for a description of the mm parameter. 667 uvm_va_block_context_t *uvm_va_block_context_alloc(struct mm_struct *mm); 668 void uvm_va_block_context_free(uvm_va_block_context_t *va_block_context); 669 670 // Initialization of an already-allocated uvm_va_block_context_t. 671 // 672 // mm is used to initialize the value of va_block_context->mm. NULL is allowed. 673 static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context, struct mm_struct *mm) 674 { 675 UVM_ASSERT(va_block_context); 676 677 // Write garbage into the VA Block context to ensure that the UVM code 678 // clears masks appropriately 679 if (UVM_IS_DEBUG()) 680 memset(va_block_context, 0xff, sizeof(*va_block_context)); 681 682 va_block_context->mm = mm; 683 #if UVM_IS_CONFIG_HMM() 684 va_block_context->hmm.vma = NULL; 685 #endif 686 } 687 688 // Check that a single policy covers the given region for the given va_block. 689 // This always returns true and is intended to only be used with UVM_ASSERT(). 690 // Locking: the va_block lock must be held. 691 bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block, 692 const uvm_va_policy_t *policy, 693 uvm_va_block_region_t region); 694 695 // TODO: Bug 1766480: Using only page masks instead of a combination of regions 696 // and page masks could simplify the below APIs and their implementations 697 // at the cost of having to scan the whole mask for small regions. 698 // Investigate the performance effects of doing that. 699 700 // Moves the physical pages of the given region onto the destination processor. 701 // If page_mask is non-NULL, the movement is further restricted to only those 702 // pages in the region which are present in the mask. 703 // 704 // prefetch_page_mask may be passed as a subset of page_mask when cause is 705 // UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT, 706 // UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT, or 707 // UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER to indicate pages that have been 708 // pulled due to automatic page prefetching heuristics. For pages in this mask, 709 // UVM_MAKE_RESIDENT_CAUSE_PREFETCH will be reported in migration events, 710 // instead. 711 // 712 // This function breaks read duplication for all given pages even if they 713 // don't migrate. Pages which are not resident on the destination processor 714 // will also be unmapped from all existing processors, be populated in the 715 // destination processor's memory, and copied to the new physical location. 716 // Any new memory will be zeroed if it is the first allocation for that page 717 // in the system. 718 // 719 // This function does not create any new virtual mappings. 720 // 721 // This function acquires/waits for the va_block tracker and updates that 722 // tracker with any new work pushed. 723 // 724 // Allocation-retry: this operation may need to perform eviction to be able to 725 // allocate GPU memory successfully and if that happens, 726 // NV_ERR_MORE_PROCESSING_REQUIRED will be returned. That also means that the 727 // block's lock has been unlocked and relocked as part of the call and that the 728 // whole sequence of operations performed under the block's lock needs to be 729 // attempted again. To facilitate that, the caller needs to provide the same 730 // va_block_retry struct for each attempt that has been initialized before the 731 // first attempt and needs to be deinitialized after the last one. Most callers 732 // can just use UVM_VA_BLOCK_LOCK_RETRY() that takes care of that for the 733 // caller. 734 // 735 // If dest_id is the CPU then va_block_retry can be NULL and allocation-retry of 736 // user memory is guaranteed not to happen. Allocation-retry of GPU page tables 737 // can still occur though. 738 // 739 // va_block_context must not be NULL. This function will set a bit in 740 // va_block_context->make_resident.pages_changed_residency for each page that 741 // changed residency (due to a migration or first population) as a result of the 742 // operation and va_block_context->make_resident.all_involved_processors for 743 // each processor involved in the copy. This function only sets bits in those 744 // masks. It is the caller's responsiblity to zero the masks or not first. 745 // 746 // va_block_context->policy must also be set by the caller for the given region. 747 // See the comments for uvm_va_block_check_policy_is_valid(). 748 // 749 // Notably any status other than NV_OK indicates that the block's lock might 750 // have been unlocked and relocked. 751 // 752 // LOCKING: The caller must hold the va_block lock. 753 // If va_block_context->mm != NULL, va_block_context->mm->mmap_lock must be 754 // held in at least read mode. 755 NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block, 756 uvm_va_block_retry_t *va_block_retry, 757 uvm_va_block_context_t *va_block_context, 758 uvm_processor_id_t dest_id, 759 uvm_va_block_region_t region, 760 const uvm_page_mask_t *page_mask, 761 const uvm_page_mask_t *prefetch_page_mask, 762 uvm_make_resident_cause_t cause); 763 764 // Similar to uvm_va_block_make_resident (read documentation there). The main 765 // differences are: 766 // - Pages are copied not moved (i.e. other copies of the page are not 767 // unmapped) 768 // - Processors with a resident copy of pages that migrated have write and 769 // atomic access permission revoked, unlike in uvm_va_block_make_resident 770 // where they are unmapped 771 // - All remote mappings (due to either SetAccessedBy or performance heuristics) 772 // are broken 773 // - Only managed va_blocks are supported. 774 // TODO: Bug 3660922: need to implement HMM read duplication support. 775 NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block, 776 uvm_va_block_retry_t *va_block_retry, 777 uvm_va_block_context_t *va_block_context, 778 uvm_processor_id_t dest_id, 779 uvm_va_block_region_t region, 780 const uvm_page_mask_t *page_mask, 781 const uvm_page_mask_t *prefetch_page_mask, 782 uvm_make_resident_cause_t cause); 783 784 // Similar to uvm_va_block_make_resident() (read documentation there). The 785 // difference is that source pages are only copied to the destination and the 786 // residency is not updated until uvm_va_block_make_resident_finish() is called. 787 // Otherwise, the combination of uvm_va_block_make_resident_copy() and 788 // uvm_va_block_make_resident_finish() is the same as just calling 789 // uvm_va_block_make_resident(). Note, however, that the va_block lock must be 790 // held across the two calls for the operation to be complete. The va_block 791 // lock can be dropped after calling uvm_va_block_make_resident_copy() but 792 // uvm_va_block_make_resident_copy() must be called again after relocking the 793 // va_block lock and before calling uvm_va_block_make_resident_finish(). 794 // This split is needed when using migrate_vma_setup() and migrate_vma_pages() 795 // so that when migrate_vma_pages() indicates a page is not migrating, the 796 // va_block state is not updated. 797 // LOCKING: The caller must hold the va_block lock. 798 NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block, 799 uvm_va_block_retry_t *va_block_retry, 800 uvm_va_block_context_t *va_block_context, 801 uvm_processor_id_t dest_id, 802 uvm_va_block_region_t region, 803 const uvm_page_mask_t *page_mask, 804 const uvm_page_mask_t *prefetch_page_mask, 805 uvm_make_resident_cause_t cause); 806 807 // The page_mask must be the same or a subset of the page_mask passed to 808 // uvm_va_block_make_resident_copy(). This step updates the residency and breaks 809 // read duplication. 810 // LOCKING: The caller must hold the va_block lock. 811 void uvm_va_block_make_resident_finish(uvm_va_block_t *va_block, 812 uvm_va_block_context_t *va_block_context, 813 uvm_va_block_region_t region, 814 const uvm_page_mask_t *page_mask); 815 816 // Creates or upgrades a mapping from the input processor to the given virtual 817 // address region. Pages which already have new_prot permissions or higher are 818 // skipped, so this call ensures that the range is mapped with at least new_prot 819 // permissions. new_prot must not be UVM_PROT_NONE. uvm_va_block_unmap or 820 // uvm_va_block_revoke_prot should be used to downgrade permissions instead. 821 // 822 // The mapped pages are described by the region parameter and the map page mask 823 // that allows the caller to restrict the map operation to specific pages within 824 // the region. If the page mask is NULL then the whole region is mapped. 825 // 826 // If the input processor is a GPU with no GPU VA space registered, or if the 827 // input processor is the CPU and this thread is not allowed to create CPU 828 // mappings, this function does nothing. CPU mappings are only allowed if 829 // uvm_va_range_vma_check(va_block_context->mm) is valid, so the caller must 830 // set va_block_context->mm before calling this function. 831 // 832 // cause specifies the cause to be reported in events in case a remote mapping 833 // is created. 834 // 835 // Any CPU mappings will wait for the va_block tracker. If this function pushes 836 // GPU work it will first acquire the va_block tracker, then add the pushed work 837 // to out_tracker. It is the caller's responsibility to add this work to 838 // va_block's tracker. Note that while it is generally safe to run map 839 // operations on different GPUs concurrently, two PTE operations (map, unmap, 840 // revoke) on the same GPU must be serialized even if they target different 841 // pages because the earlier operation can cause a PTE split or merge which is 842 // assumed by the later operation. 843 // 844 // va_block_context must not be NULL and va_block_context->policy must be valid. 845 // See the comments for uvm_va_block_check_policy_is_valid(). 846 // 847 // If allocation-retry was required as part of the operation and was successful, 848 // NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the 849 // out_tracker were added to the block's tracker and then the block's lock was 850 // unlocked and relocked. 851 // 852 // In general, any status other than NV_OK indicates that the block's lock might 853 // have been unlocked and relocked. 854 // 855 // LOCKING: The caller must hold the va block lock. If va_block_context->mm != 856 // NULL, va_block_context->mm->mmap_lock must be held in at least read 857 // mode. 858 NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block, 859 uvm_va_block_context_t *va_block_context, 860 uvm_processor_id_t id, 861 uvm_va_block_region_t region, 862 const uvm_page_mask_t *map_page_mask, 863 uvm_prot_t new_prot, 864 UvmEventMapRemoteCause cause, 865 uvm_tracker_t *out_tracker); 866 867 // Like uvm_va_block_map, except it maps all processors in the input mask. The 868 // VA block tracker contains all map operations on return. 869 // 870 // Note that this can return NV_ERR_MORE_PROCESSING_REQUIRED just like 871 // uvm_va_block_map() indicating that the operation needs to be retried. 872 NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block, 873 uvm_va_block_context_t *va_block_context, 874 const uvm_processor_mask_t *map_processor_mask, 875 uvm_va_block_region_t region, 876 const uvm_page_mask_t *map_page_mask, 877 uvm_prot_t new_prot, 878 UvmEventMapRemoteCause cause); 879 880 // Unmaps virtual regions from a single processor. This does not free page 881 // tables or physical memory. This is safe to call on the eviction path, but the 882 // caller must ensure that the block hasn't been killed. 883 // 884 // The unmapped pages are described by the region parameter and the unmap page 885 // mask that allows the caller to restrict the unmap operation to specific pages 886 // within the region. If the page mask is NULL then the whole region is 887 // unmapped. 888 // 889 // If id is UVM_ID_CPU, this is guaranteed to return NV_OK, and this is safe to 890 // call without holding a reference on the mm which owns the associated vma. 891 // 892 // Any CPU unmappings will wait for the va_block tracker. If this function 893 // pushes GPU work it will first acquire the va_block tracker, then add the 894 // pushed work to out_tracker. It is the caller's responsibility to add this 895 // work to va_block's tracker. Note that while it is generally safe to run unmap 896 // operations on different GPUs concurrently, two PTE operations (map, unmap, 897 // revoke) on the same GPU must be serialized even if they target different 898 // pages because the earlier operation can cause a PTE split or merge which is 899 // assumed by the later operation. 900 // 901 // va_block_context must not be NULL. The va_block_context->policy is unused. 902 // 903 // If allocation-retry was required as part of the operation and was successful, 904 // NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the 905 // out_tracker were added to the block's tracker and then the block's lock was 906 // unlocked and relocked. It is guaranteed that retry will not be required if 907 // the unmap does not cause a PTE split. Examples of operations which will not 908 // cause a PTE split include unmapping the entire block, unmapping all PTEs with 909 // matching attributes, and unmapping all PTEs which point to the same physical 910 // chunk. 911 // 912 // LOCKING: The caller must hold the va_block lock. 913 NV_STATUS uvm_va_block_unmap(uvm_va_block_t *va_block, 914 uvm_va_block_context_t *va_block_context, 915 uvm_processor_id_t id, 916 uvm_va_block_region_t region, 917 const uvm_page_mask_t *unmap_page_mask, 918 uvm_tracker_t *out_tracker); 919 920 // Like uvm_va_block_unmap, except it unmaps all processors in the input mask. 921 // The VA block tracker contains all map operations on return. 922 NV_STATUS uvm_va_block_unmap_mask(uvm_va_block_t *va_block, 923 uvm_va_block_context_t *va_block_context, 924 const uvm_processor_mask_t *unmap_processor_mask, 925 uvm_va_block_region_t region, 926 const uvm_page_mask_t *unmap_page_mask); 927 928 // Function called when the preferred location changes. Notably: 929 // - Mark all CPU pages as dirty because the new processor may not have 930 // up-to-date data. 931 // - Unmap the preferred location's processor from any pages in this region 932 // which are not resident on the preferred location. 933 // 934 // va_block_context must not be NULL and va_block_context->policy must be valid. 935 // See the comments for uvm_va_block_check_policy_is_valid(). 936 // 937 // LOCKING: The caller must hold the VA block lock. 938 NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block, 939 uvm_va_block_context_t *va_block_context, 940 uvm_va_block_region_t region); 941 942 // Maps the given processor to all resident pages in this block, as allowed by 943 // location and policy. Waits for the operation to complete before returning. 944 // This function should only be called with managed va_blocks. 945 // 946 // va_block_context must not be NULL and va_block_context->policy must be valid. 947 // See the comments for uvm_va_block_check_policy_is_valid(). 948 // 949 // LOCKING: This takes and releases the VA block lock. If va_block_context->mm 950 // != NULL, va_block_context->mm->mmap_lock must be held in at least 951 // read mode. 952 NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block, 953 uvm_va_block_context_t *va_block_context, 954 uvm_processor_id_t processor_id); 955 956 // Maps given processor to all resident pages in this block and region, as 957 // allowed by location and policy. The caller is responsible for waiting for 958 // the tracker after all mappings have been started. 959 // This function can be called with HMM and managed va_blocks. 960 // 961 // va_block_context must not be NULL and va_block_context->policy must be valid. 962 // See the comments for uvm_va_block_check_policy_is_valid(). 963 // 964 // LOCKING: The caller must hold the va_block lock and 965 // va_block_context->mm->mmap_lock must be held in at least read mode. 966 NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block, 967 uvm_va_block_context_t *va_block_context, 968 uvm_processor_id_t processor_id, 969 uvm_va_block_region_t region, 970 uvm_tracker_t *out_tracker); 971 972 // Breaks SetAccessedBy and remote mappings 973 // This function should only be called with managed va_blocks. 974 // 975 // va_block_context must not be NULL and va_block_context->policy must be valid. 976 // See the comments for uvm_va_block_check_policy_is_valid(). 977 // 978 // LOCKING: This takes and releases the VA block lock. If va_block_context->mm 979 // != NULL, va_block_context->mm->mmap_lock must be held in at least 980 // read mode. 981 NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block, 982 uvm_va_block_context_t *va_block_context); 983 984 // Restores SetAccessedBy mappings 985 // This function should only be called with managed va_blocks. 986 // 987 // va_block_context must not be NULL and va_block_context->policy must be valid. 988 // See the comments for uvm_va_block_check_policy_is_valid(). 989 // 990 // LOCKING: This takes and releases the VA block lock. If va_block_context->mm 991 // != NULL, va_block_context->mm->mmap_lock must be held in at least 992 // read mode. 993 NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block, 994 uvm_va_block_context_t *va_block_context); 995 996 // Check if processor_id is allowed to access the va_block with access_type 997 // permissions. Return values: 998 // 999 // NV_ERR_INVALID_ADDRESS The VA block is logically dead (zombie) 1000 // NV_ERR_INVALID_ACCESS_TYPE The vma corresponding to the VA range does not 1001 // allow access_type permissions, or migration is 1002 // disallowed and processor_id cannot access the 1003 // range remotely (UVM-Lite). 1004 // NV_ERR_INVALID_OPERATION The access would violate the policies specified 1005 // by UvmPreventMigrationRangeGroups. 1006 // 1007 // va_block_context must not be NULL, va_block_context->policy must be valid, 1008 // and if the va_block is a HMM block, va_block_context->hmm.vma must be valid 1009 // which also means the va_block_context->mm is not NULL, retained, and locked 1010 // for at least read. 1011 // Locking: the va_block lock must be held. 1012 NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block, 1013 uvm_va_block_context_t *va_block_context, 1014 uvm_processor_id_t processor_id, 1015 uvm_page_index_t page_index, 1016 uvm_fault_type_t access_type, 1017 bool allow_migration); 1018 1019 // API for access privilege revocation 1020 // 1021 // Revoke prot_to_revoke access permissions for the given processor. 1022 // 1023 // The revoked pages are described by the region parameter and the revoke page 1024 // mask that allows the caller to restrict the revoke operation to specific 1025 // pages within the region. 1026 // 1027 // prot_to_revoke must be greater than UVM_PROT_READ_ONLY. Caller should call 1028 // unmap explicitly if it wants to revoke all access privileges. 1029 // 1030 // If id is UVM_ID_CPU, and prot_to_revoke is UVM_PROT_READ_WRITE_ATOMIC, no 1031 // action is performed. If the processor id corresponds to the CPU and the 1032 // caller cannot establish CPU mappings because it does not have a reference on 1033 // vma->vm_mm (va_block_context->mm != vma->vm_mm), the page will be simply 1034 // unmapped. Caller should call unmap explicitly if it wants to revoke all 1035 // access privileges. 1036 // 1037 // Any CPU revocation will wait for the va_block tracker. If this function 1038 // pushes GPU work it will first acquire the va_block tracker, then add the 1039 // pushed work to out_tracker. It is the caller's responsibility to add this 1040 // work to va_block's tracker. Note that while it is generally safe to run 1041 // revocation operations on different GPUs concurrently, two PTE operations 1042 // (map, unmap, revoke) on the same GPU must be serialized even if they target 1043 // different pages because the earlier operation can cause a PTE split or merge 1044 // which is assumed by the later operation. 1045 // 1046 // va_block_context must not be NULL. The va_block_context->policy is unused. 1047 // 1048 // If allocation-retry was required as part of the operation and was successful, 1049 // NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the 1050 // out_tracker were added to the block's tracker and then the block's lock was 1051 // unlocked and relocked. 1052 // 1053 // In general, any status other than NV_OK indicates that the block's lock might 1054 // have been unlocked and relocked. 1055 // 1056 // LOCKING: The caller must hold the va block lock. If va_block_context->mm != 1057 // NULL, va_block_context->mm->mmap_lock must be held in at least read 1058 // mode. 1059 NV_STATUS uvm_va_block_revoke_prot(uvm_va_block_t *va_block, 1060 uvm_va_block_context_t *va_block_context, 1061 uvm_processor_id_t id, 1062 uvm_va_block_region_t region, 1063 const uvm_page_mask_t *revoke_page_mask, 1064 uvm_prot_t prot_to_revoke, 1065 uvm_tracker_t *out_tracker); 1066 1067 // Like uvm_va_block_revoke_prot(), except it revokes all processors in the 1068 // input mask. The VA block tracker contains all revocation operations on 1069 // return. 1070 // 1071 // Note that this can return NV_ERR_MORE_PROCESSING_REQUIRED just like 1072 // uvm_va_block_revoke_prot() indicating that the operation needs to be retried. 1073 NV_STATUS uvm_va_block_revoke_prot_mask(uvm_va_block_t *va_block, 1074 uvm_va_block_context_t *va_block_context, 1075 const uvm_processor_mask_t *revoke_processor_mask, 1076 uvm_va_block_region_t region, 1077 const uvm_page_mask_t *revoke_page_mask, 1078 uvm_prot_t prot_to_revoke); 1079 1080 // Tries to map all pages in the given region and map_page_mask with at most 1081 // max_prot privileges for appropriate processors as determined by the 1082 // accessed_by mask, heuristics and the given processor mask (excluding 1083 // processor_id, which triggered the migration and should have already been 1084 // mapped). 1085 // 1086 // va_block_context must not be NULL and va_block_context->policy must be valid. 1087 // See the comments for uvm_va_block_check_policy_is_valid(). 1088 // 1089 // This function acquires/waits for the va_block tracker and updates that 1090 // tracker with any new work pushed. 1091 // 1092 // Note that this can return NV_ERR_MORE_PROCESSING_REQUIRED just like 1093 // uvm_va_block_map() indicating that the operation needs to be retried. 1094 // 1095 // LOCKING: The caller must hold the va block lock. If va_block_context->mm != 1096 // NULL, va_block_context->mm->mmap_lock must be held in at least read 1097 // mode. 1098 NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block, 1099 uvm_va_block_context_t *va_block_context, 1100 uvm_processor_id_t new_residency, 1101 uvm_processor_id_t processor_id, 1102 uvm_va_block_region_t region, 1103 const uvm_page_mask_t *map_page_mask, 1104 uvm_prot_t max_prot, 1105 const uvm_processor_mask_t *processor_mask); 1106 1107 // Maps processors using SetAccessedBy to all resident pages in the region 1108 // parameter. On Volta+ it is also used to map evicted pages that can be later 1109 // pulled back by using access counters. 1110 // 1111 // This function acquires/waits for the va_block tracker and updates that 1112 // tracker with any new work pushed. 1113 // 1114 // Note that this can return NV_ERR_MORE_PROCESSING_REQUIRED just like 1115 // uvm_va_block_map() indicating that the operation needs to be retried. 1116 // 1117 // va_block_context must not be NULL and va_block_context->policy must be valid. 1118 // See the comments for uvm_va_block_check_policy_is_valid(). 1119 // 1120 // LOCKING: The caller must hold the va block lock. If va_block_context->mm != 1121 // NULL, va_block_context->mm->mmap_lock must be held in at least read 1122 // mode. 1123 NV_STATUS uvm_va_block_add_mappings(uvm_va_block_t *va_block, 1124 uvm_va_block_context_t *va_block_context, 1125 uvm_processor_id_t processor_id, 1126 uvm_va_block_region_t region, 1127 const uvm_page_mask_t *page_mask, 1128 UvmEventMapRemoteCause cause); 1129 1130 // Notifies the VA block that a new GPU VA space has been created. 1131 // LOCKING: The caller must hold the va_block lock 1132 NV_STATUS uvm_va_block_add_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_space_t *gpu_va_space); 1133 1134 // Destroys the VA block's mappings and page tables on the GPU, if it has any. 1135 // 1136 // If mm != NULL, that mm is used for any CPU mappings which may be created as 1137 // a result of this call. See uvm_va_block_context_t::mm for details. 1138 // 1139 // va_block_context must not be NULL. The va_block_context->policy is unused. 1140 // 1141 // LOCKING: The caller must hold the va_block lock. If block_context->mm is not 1142 // NULL, the caller must hold mm->mmap_lock in at least read mode. 1143 void uvm_va_block_remove_gpu_va_space(uvm_va_block_t *va_block, 1144 uvm_gpu_va_space_t *gpu_va_space, 1145 uvm_va_block_context_t *block_context); 1146 1147 // Creates any mappings necessary in this VA block between the two GPUs, in 1148 // either direction. 1149 // LOCKING: The caller must hold the va_block lock 1150 NV_STATUS uvm_va_block_enable_peer(uvm_va_block_t *va_block, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1); 1151 1152 // Unmaps all page tables in this VA block which have peer mappings between 1153 // the two GPUs, in either direction. 1154 // LOCKING: The caller must hold the va_block lock 1155 void uvm_va_block_disable_peer(uvm_va_block_t *va_block, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1); 1156 1157 // Unmap any mappings from GPU to the preferred location. 1158 // 1159 // The GPU has to be in UVM-Lite mode. 1160 // 1161 // LOCKING: The caller must hold the va_block lock 1162 void uvm_va_block_unmap_preferred_location_uvm_lite(uvm_va_block_t *va_block, uvm_gpu_t *gpu); 1163 1164 // Frees all memory under this block associated with this GPU. Any portion of 1165 // the block which is resident on the GPU is evicted to sysmem before being 1166 // freed. 1167 // 1168 // If mm != NULL, that mm is used for any CPU mappings which may be created as 1169 // a result of this call. See uvm_va_block_context_t::mm for details. 1170 // 1171 // LOCKING: This takes and releases the VA block lock. If mm != NULL, the caller 1172 // must hold mm->mmap_lock in at least read mode. 1173 void uvm_va_block_unregister_gpu(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm); 1174 1175 // Same as uvm_va_block_unregister_gpu() but the VA block lock must be held. 1176 // Note that this handles allocation-retry internally and hence might unlock 1177 // and relock block's lock. 1178 void uvm_va_block_unregister_gpu_locked(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm); 1179 1180 // Unmaps all memory associated with the block and drops the ref count of the 1181 // block. This allows the caller to free resources associated with this block 1182 // regardless of the block's current ref count. Most importantly it allows the 1183 // VA covered by this block to be immediately available for other page table 1184 // mappings upon return. 1185 // 1186 // This clears block->va_range, so only the VA range destroy path should call 1187 // it. Other paths with references on this block, specifically the eviction path 1188 // which temporarily takes a reference to the block, must always check the block 1189 // state after taking the block lock to see if their mapping is still in place. 1190 // 1191 // All of the unmap and state destruction steps are also performed when the ref 1192 // count goes to 0, so this function only needs to be called if the block's 1193 // resources need to be reclaimed immediately. 1194 // 1195 // The caller should not lock the block before calling this function. 1196 // 1197 // This performs a uvm_va_block_release. 1198 void uvm_va_block_kill(uvm_va_block_t *va_block); 1199 1200 // Exactly the same split semantics as uvm_va_range_split, including error 1201 // handling. See that function's comments for details. 1202 // 1203 // new_va_block's va_range is set to new_va_range before any reverse mapping is 1204 // established to the new block, but the caller is responsible for inserting the 1205 // new block into the range. 1206 NV_STATUS uvm_va_block_split(uvm_va_block_t *existing_va_block, 1207 NvU64 new_end, 1208 uvm_va_block_t **new_va_block, 1209 uvm_va_range_t *new_va_range); 1210 1211 // Exactly the same split semantics as uvm_va_block_split, including error 1212 // handling except the existing_va_block block lock needs to be held and 1213 // the new_va_block has to be preallocated. 1214 // Also note that the existing_va_block lock may be dropped and re-acquired. 1215 NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block, 1216 NvU64 new_end, 1217 uvm_va_block_t *new_va_block, 1218 uvm_va_range_t *new_va_range); 1219 1220 // Handles a CPU fault in the given VA block, performing any operations 1221 // necessary to establish a coherent CPU mapping (migrations, cache invalidates, 1222 // etc.). 1223 // 1224 // Locking: 1225 // - vma->vm_mm->mmap_lock must be held in at least read mode. Note, that 1226 // might not be the same as current->mm->mmap_lock. 1227 // - va_space lock must be held in at least read mode 1228 // 1229 // service_context->block_context.mm is ignored and vma->vm_mm is used instead. 1230 // service_context->block_context.policy is set by this function. 1231 // 1232 // Returns NV_ERR_INVALID_ACCESS_TYPE if a CPU mapping to fault_addr cannot be 1233 // accessed, for example because it's within a range group which is non- 1234 // migratable. 1235 NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block, 1236 NvU64 fault_addr, 1237 bool is_write, 1238 uvm_service_block_context_t *service_context); 1239 1240 // Performs any operations necessary to establish a coherent mapping 1241 // (migrations, cache invalidates, etc.) in response to the given service block 1242 // context. 1243 // 1244 // service_context must not be NULL and service_context->block_context.policy 1245 // must be valid. See the comments for uvm_va_block_check_policy_is_valid(). 1246 // If va_block is a HMM block, va_block_context->hmm.vma must be valid. 1247 // See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h. 1248 // service_context->prefetch_hint is set by this function. 1249 // 1250 // Locking: 1251 // - service_context->block_context.mm->mmap_lock must be held in at least 1252 // read mode, if valid. 1253 // - va_space lock must be held in at least read mode 1254 // - va_block lock must be held 1255 // 1256 // If allocation-retry was required as part of the operation and was successful, 1257 // NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock 1258 // was unlocked and relocked. 1259 // 1260 // NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected 1261 // and the performance heuristics logic decided to throttle execution. 1262 // Any other error code different than NV_OK indicates OOM or a global fatal 1263 // error. 1264 NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id, 1265 uvm_va_block_t *va_block, 1266 uvm_va_block_retry_t *block_retry, 1267 uvm_service_block_context_t *service_context); 1268 1269 // Performs population of the destination pages, unmapping and copying source 1270 // pages to new_residency. 1271 // 1272 // service_context must not be NULL and service_context->block_context.policy 1273 // must be valid. See the comments for uvm_va_block_check_policy_is_valid(). 1274 // If va_block is a HMM block, va_block_context->hmm.vma must be valid. 1275 // See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h. 1276 // service_context->prefetch_hint should be set before calling this function. 1277 // 1278 // Locking: 1279 // - service_context->block_context.mm->mmap_lock must be held in at least 1280 // read mode, if valid. 1281 // - va_space lock must be held in at least read mode 1282 // - va_block lock must be held 1283 // 1284 // If allocation-retry was required as part of the operation and was successful, 1285 // NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock 1286 // was unlocked and relocked. 1287 // 1288 // NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected 1289 // and the performance heuristics logic decided to throttle execution. 1290 // Any other error code different than NV_OK indicates OOM or a global fatal 1291 // error. 1292 NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id, 1293 uvm_processor_id_t new_residency, 1294 uvm_va_block_t *va_block, 1295 uvm_va_block_retry_t *block_retry, 1296 uvm_service_block_context_t *service_context); 1297 1298 // This updates the va_block residency state and maps the faulting processor_id 1299 // to the new residency (which may be remote). 1300 // 1301 // service_context must not be NULL and service_context->block_context.policy 1302 // must be valid. See the comments for uvm_va_block_check_policy_is_valid(). 1303 // If va_block is a HMM block, va_block_context->hmm.vma must be valid. 1304 // See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h. 1305 // service_context must be initialized by calling uvm_va_block_service_copy() 1306 // before calling this function. 1307 // 1308 // Locking: 1309 // - service_context->block_context.mm->mmap_lock must be held in at least 1310 // read mode, if valid. 1311 // - va_space lock must be held in at least read mode 1312 // - va_block lock must be held 1313 // - the mmap lock and va_space lock must be held across the calls to 1314 // uvm_va_block_service_copy() and this function. If the va_block lock is 1315 // dropped inbetween, special care is needed to check for eviction and 1316 // invalidation callbacks. 1317 // 1318 // If allocation-retry was required as part of the operation and was successful, 1319 // NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock 1320 // was unlocked and relocked. 1321 // 1322 // NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected 1323 // and the performance heuristics logic decided to throttle execution. 1324 // Any other error code different than NV_OK indicates OOM or a global fatal 1325 // error. 1326 NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id, 1327 uvm_va_block_t *va_block, 1328 uvm_service_block_context_t *service_context); 1329 1330 // Allocate GPU state for the given va_block and registered GPUs. 1331 // Locking: The block lock must be held. 1332 NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block); 1333 1334 // Release any GPU or policy data associated with the given region in response 1335 // to munmap(). 1336 // Locking: The va_block lock must be held. 1337 void uvm_va_block_munmap_region(uvm_va_block_t *va_block, 1338 uvm_va_block_region_t region); 1339 1340 // Size of the block in bytes. Guaranteed to be a page-aligned value between 1341 // PAGE_SIZE and UVM_VA_BLOCK_SIZE. 1342 static inline NvU64 uvm_va_block_size(uvm_va_block_t *block) 1343 { 1344 NvU64 size = block->end - block->start + 1; 1345 UVM_ASSERT(PAGE_ALIGNED(size)); 1346 UVM_ASSERT(size >= PAGE_SIZE); 1347 UVM_ASSERT(size <= UVM_VA_BLOCK_SIZE); 1348 return size; 1349 } 1350 1351 // Number of pages with PAGE_SIZE in the block 1352 static inline size_t uvm_va_block_num_cpu_pages(uvm_va_block_t *block) 1353 { 1354 return uvm_va_block_size(block) / PAGE_SIZE; 1355 } 1356 1357 // VA of the given page using CPU page size. page_index must be valid 1358 static inline NvU64 uvm_va_block_cpu_page_address(uvm_va_block_t *block, uvm_page_index_t page_index) 1359 { 1360 UVM_ASSERT(page_index < uvm_va_block_num_cpu_pages(block)); 1361 return block->start + PAGE_SIZE * page_index; 1362 } 1363 1364 // Get the physical address on the given GPU for given residency 1365 uvm_gpu_phys_address_t uvm_va_block_res_phys_page_address(uvm_va_block_t *va_block, 1366 uvm_page_index_t page_index, 1367 uvm_processor_id_t residency, 1368 uvm_gpu_t *gpu); 1369 1370 // Get the page physical address on the given GPU 1371 // 1372 // This will assert that GPU state is indeed present. 1373 uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block, 1374 uvm_page_index_t page_index, 1375 uvm_gpu_t *gpu); 1376 1377 static bool uvm_va_block_contains_address(uvm_va_block_t *block, NvU64 address) 1378 { 1379 return address >= block->start && address <= block->end; 1380 } 1381 1382 // Obtain a pointer to the uvm_va_block_test_t structure for the given VA 1383 // block. If uvm_enable_builtin_tests is unset, NULL will be returned. 1384 static uvm_va_block_test_t *uvm_va_block_get_test(uvm_va_block_t *va_block) 1385 { 1386 if (uvm_enable_builtin_tests) 1387 return &container_of(va_block, uvm_va_block_wrapper_t, block)->test; 1388 1389 return NULL; 1390 } 1391 1392 // Get the page residency mask for a processor if it's known to be there. 1393 // 1394 // If the processor is a GPU, this will assert that GPU state is indeed present. 1395 uvm_page_mask_t *uvm_va_block_resident_mask_get(uvm_va_block_t *block, uvm_processor_id_t processor); 1396 1397 // Get the page mapped mask for a processor. The returned mask cannot be 1398 // directly modified by the caller 1399 // 1400 // If the processor is a GPU, this will assert that GPU state is indeed present. 1401 const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_processor_id_t processor); 1402 1403 // VA block lookup functions. There are a number of permutations which might be 1404 // useful, such as looking up the block from {va_space, va_range} x {addr, 1405 // block index}. The ones implemented here and in uvm_va_range.h support the 1406 // primary three use cases, which are: 1407 // 1) Iterating over all VA blocks in a VA range. This uses block indices on the 1408 // VA range: 1409 // uvm_va_range_num_blocks 1410 // uvm_va_range_block_index 1411 // uvm_va_range_block 1412 // uvm_va_range_block_create 1413 // 2) Operating on a single VA block (fault). This looks up the block using the 1414 // VA space and address: 1415 // uvm_va_block_find 1416 // uvm_va_block_find_create 1417 // 3) Operating on a single VA block (fault). This looks up the block using the 1418 // supplied VA range and address: 1419 // uvm_va_block_find_create_in_range 1420 1421 // Finds the UVM or HMM VA block containing addr, if any. The va_space->lock 1422 // must be held in at least read mode. Return values: 1423 // NV_ERR_INVALID_ADDRESS addr is not a UVM_VA_RANGE_TYPE_MANAGED va_range nor 1424 // a HMM enabled VMA. 1425 // 1426 // NV_ERR_OBJECT_NOT_FOUND addr is valid but no block has been allocated to 1427 // cover it yet 1428 // 1429 // NV_OK The block was returned successfully 1430 NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t **out_block); 1431 1432 // Same as uvm_va_block_find except that the block is created if not found. 1433 // If addr is covered by a UVM_VA_RANGE_TYPE_MANAGED va_range, a managed block 1434 // will be created. Otherwise, if addr is not covered by any va_range, HMM is 1435 // enabled in the va_space, and va_block_context and va_block_context->mm are 1436 // non-NULL, then a HMM block will be created and va_block_context->hmm.vma is 1437 // set to the VMA covering 'addr'. The va_block_context->policy field is left 1438 // unchanged. 1439 // In either case, if va_block_context->mm is non-NULL, it must be retained and 1440 // locked in at least read mode. Return values: 1441 // NV_ERR_INVALID_ADDRESS addr is not a UVM_VA_RANGE_TYPE_MANAGED va_range nor 1442 // a HMM enabled VMA. 1443 // NV_ERR_NO_MEMORY memory could not be allocated. 1444 NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space, 1445 NvU64 addr, 1446 uvm_va_block_context_t *va_block_context, 1447 uvm_va_block_t **out_block); 1448 1449 // Same as uvm_va_block_find_create except that va_range lookup was already done 1450 // by the caller. If the supplied va_range is NULL, this function behaves just 1451 // like when the va_range lookup in uvm_va_block_find_create is NULL. 1452 NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space, 1453 uvm_va_range_t *va_range, 1454 NvU64 addr, 1455 uvm_va_block_context_t *va_block_context, 1456 uvm_va_block_t **out_block); 1457 1458 // Same as uvm_va_block_find_create except that only managed va_blocks are 1459 // created if not already present in the VA range. 1460 static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space, 1461 NvU64 addr, 1462 uvm_va_block_t **out_block) 1463 { 1464 return uvm_va_block_find_create(va_space, addr, NULL, out_block); 1465 } 1466 1467 // Look up a chunk backing a specific address within the VA block. 1468 // Returns NULL if none. 1469 uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu_t *gpu, NvU64 address); 1470 1471 // Implementation of the UvmMigrate() API at the VA block scope. 1472 // 1473 // The out_tracker can be NULL. 1474 // 1475 // If do_mappings is false, mappings are not added after pages have been 1476 // migrated. 1477 // 1478 // The caller needs to handle allocation-retry. va_block_retry can be NULL if 1479 // the destination is the CPU. 1480 // 1481 // va_block_context must not be NULL and va_block_context->policy must be valid. 1482 // See the comments for uvm_va_block_check_policy_is_valid(). 1483 // If va_block is a HMM block, va_block_context->hmm.vma must be valid. 1484 // See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h. 1485 // 1486 // LOCKING: The caller must hold the va_block lock. If va_block_context->mm != 1487 // NULL, va_block_context->mm->mmap_lock must be held in at least 1488 // read mode. 1489 NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block, 1490 uvm_va_block_retry_t *va_block_retry, 1491 uvm_va_block_context_t *va_block_context, 1492 uvm_va_block_region_t region, 1493 uvm_processor_id_t dest_id, 1494 uvm_migrate_mode_t mode, 1495 uvm_tracker_t *out_tracker); 1496 1497 // Write block's data from a CPU buffer 1498 // 1499 // The [dst, dst + size) range has to fit within a single PAGE_SIZE page. 1500 // 1501 // va_block_context must not be NULL. The caller is not required to set 1502 // va_block_context->policy or va_block_context->hmm.vma. 1503 // 1504 // The caller needs to support allocation-retry of page tables. 1505 // 1506 // LOCKING: The caller must hold the va_block lock 1507 NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block, 1508 uvm_va_block_context_t *block_context, 1509 NvU64 dst, 1510 uvm_mem_t *src, 1511 size_t size); 1512 1513 // Read block's data into a CPU buffer 1514 // 1515 // The [src, src + size) range has to fit within a single PAGE_SIZE page. 1516 // 1517 // LOCKING: The caller must hold the va_block lock 1518 NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block, uvm_mem_t *dst, NvU64 src, size_t size); 1519 1520 // Initialize va block retry tracking 1521 void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry); 1522 1523 // Deinitialize va block retry tracking after a block operation 1524 // 1525 // Frees all the remaining free chunks and unpins all the used chunks. 1526 void uvm_va_block_retry_deinit(uvm_va_block_retry_t *uvm_va_block_retry, uvm_va_block_t *va_block); 1527 1528 // Evict all chunks from the block that are subchunks of the passed in root_chunk. 1529 // 1530 // Add all the work tracking the eviction to the tracker. 1531 // 1532 // Returns NV_OK if the block is dead or doesn't have any subchunks of the 1533 // root_chunk. 1534 // 1535 // LOCKING: The caller must hold the va_block lock 1536 NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block, 1537 uvm_gpu_t *gpu, 1538 uvm_gpu_chunk_t *root_chunk, 1539 uvm_tracker_t *tracker); 1540 1541 NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *params, struct file *filp); 1542 NV_STATUS uvm_test_change_pte_mapping(UVM_TEST_CHANGE_PTE_MAPPING_PARAMS *params, struct file *filp); 1543 NV_STATUS uvm_test_va_block_info(UVM_TEST_VA_BLOCK_INFO_PARAMS *params, struct file *filp); 1544 NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params, struct file *filp); 1545 1546 // Compute the offset in system pages of addr from the start of va_block. 1547 static uvm_page_index_t uvm_va_block_cpu_page_index(uvm_va_block_t *va_block, NvU64 addr) 1548 { 1549 UVM_ASSERT(addr >= va_block->start); 1550 UVM_ASSERT(addr <= va_block->end); 1551 return (addr - va_block->start) / PAGE_SIZE; 1552 } 1553 1554 // Computes the size and index in the gpu_state chunks array of the GPU chunk 1555 // which corresponds to the given page_index of the VA region. 1556 // Note this is only used for testing and does not work on HMM va_blocks as it 1557 // returns incorrect results for those. 1558 size_t uvm_va_block_gpu_chunk_index_range(NvU64 start, 1559 NvU64 size, 1560 uvm_gpu_t *gpu, 1561 uvm_page_index_t page_index, 1562 uvm_chunk_size_t *out_chunk_size); 1563 1564 // If there are any resident CPU pages in the block, mark them as dirty 1565 void uvm_va_block_mark_cpu_dirty(uvm_va_block_t *va_block); 1566 1567 // Sets the internal state required to handle fault cancellation 1568 // 1569 // This function may require allocating page tables to split big pages into 4K 1570 // pages. If allocation-retry was required as part of the operation and was 1571 // successful, NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case the 1572 // block's lock was unlocked and relocked. 1573 // 1574 // va_block_context must not be NULL. The va_block_context->policy is unused. 1575 // 1576 // LOCKING: The caller must hold the va_block lock. 1577 NV_STATUS uvm_va_block_set_cancel(uvm_va_block_t *va_block, uvm_va_block_context_t *block_context, uvm_gpu_t *gpu); 1578 1579 // 1580 // uvm_va_block_region_t helpers 1581 // 1582 1583 static uvm_va_block_region_t uvm_va_block_region(uvm_page_index_t first, uvm_page_index_t outer) 1584 { 1585 BUILD_BUG_ON(PAGES_PER_UVM_VA_BLOCK >= (1 << (sizeof(first) * 8))); 1586 1587 UVM_ASSERT(first <= outer); 1588 1589 return (uvm_va_block_region_t){ .first = first, .outer = outer }; 1590 } 1591 1592 static uvm_va_block_region_t uvm_va_block_region_for_page(uvm_page_index_t page_index) 1593 { 1594 return uvm_va_block_region(page_index, page_index + 1); 1595 } 1596 1597 static size_t uvm_va_block_region_num_pages(uvm_va_block_region_t region) 1598 { 1599 return region.outer - region.first; 1600 } 1601 1602 static NvU64 uvm_va_block_region_size(uvm_va_block_region_t region) 1603 { 1604 return uvm_va_block_region_num_pages(region) * PAGE_SIZE; 1605 } 1606 1607 static NvU64 uvm_va_block_region_start(uvm_va_block_t *va_block, uvm_va_block_region_t region) 1608 { 1609 return va_block->start + region.first * PAGE_SIZE; 1610 } 1611 1612 static NvU64 uvm_va_block_region_end(uvm_va_block_t *va_block, uvm_va_block_region_t region) 1613 { 1614 return va_block->start + region.outer * PAGE_SIZE - 1; 1615 } 1616 1617 static bool uvm_va_block_region_contains_region(uvm_va_block_region_t region, uvm_va_block_region_t subregion) 1618 { 1619 return subregion.first >= region.first && subregion.outer <= region.outer; 1620 } 1621 1622 static bool uvm_va_block_region_contains_page(uvm_va_block_region_t region, uvm_page_index_t page_index) 1623 { 1624 return uvm_va_block_region_contains_region(region, uvm_va_block_region_for_page(page_index)); 1625 } 1626 1627 // Create a block range from a va block and start and end virtual addresses 1628 // within the block. 1629 static uvm_va_block_region_t uvm_va_block_region_from_start_end(uvm_va_block_t *va_block, NvU64 start, NvU64 end) 1630 { 1631 uvm_va_block_region_t region; 1632 1633 UVM_ASSERT(start < end); 1634 UVM_ASSERT(start >= va_block->start); 1635 UVM_ASSERT(end <= va_block->end); 1636 UVM_ASSERT(PAGE_ALIGNED(start)); 1637 UVM_ASSERT(PAGE_ALIGNED(end + 1)); 1638 1639 region.first = uvm_va_block_cpu_page_index(va_block, start); 1640 region.outer = uvm_va_block_cpu_page_index(va_block, end) + 1; 1641 1642 return region; 1643 } 1644 1645 static uvm_va_block_region_t uvm_va_block_region_from_start_size(uvm_va_block_t *va_block, NvU64 start, NvU64 size) 1646 { 1647 return uvm_va_block_region_from_start_end(va_block, start, start + size - 1); 1648 } 1649 1650 static uvm_va_block_region_t uvm_va_block_region_from_block(uvm_va_block_t *va_block) 1651 { 1652 return uvm_va_block_region(0, uvm_va_block_num_cpu_pages(va_block)); 1653 } 1654 1655 // Create a block region from a va block and page mask. Note that the region 1656 // covers the first through the last set bit and may have unset bits in between. 1657 static uvm_va_block_region_t uvm_va_block_region_from_mask(uvm_va_block_t *va_block, const uvm_page_mask_t *page_mask) 1658 { 1659 uvm_va_block_region_t region; 1660 uvm_page_index_t outer = uvm_va_block_num_cpu_pages(va_block); 1661 1662 region.first = find_first_bit(page_mask->bitmap, outer); 1663 if (region.first >= outer) { 1664 region = uvm_va_block_region(0, 0); 1665 } 1666 else { 1667 // At least one bit is set so find_last_bit() should not return 'outer'. 1668 region.outer = find_last_bit(page_mask->bitmap, outer) + 1; 1669 UVM_ASSERT(region.outer <= outer); 1670 } 1671 1672 return region; 1673 } 1674 1675 static bool uvm_page_mask_test(const uvm_page_mask_t *mask, uvm_page_index_t page_index) 1676 { 1677 UVM_ASSERT(page_index < PAGES_PER_UVM_VA_BLOCK); 1678 1679 return test_bit(page_index, mask->bitmap); 1680 } 1681 1682 static bool uvm_page_mask_test_and_set(uvm_page_mask_t *mask, uvm_page_index_t page_index) 1683 { 1684 UVM_ASSERT(page_index < PAGES_PER_UVM_VA_BLOCK); 1685 1686 return __test_and_set_bit(page_index, mask->bitmap); 1687 } 1688 1689 static bool uvm_page_mask_test_and_clear(uvm_page_mask_t *mask, uvm_page_index_t page_index) 1690 { 1691 UVM_ASSERT(page_index < PAGES_PER_UVM_VA_BLOCK); 1692 1693 return __test_and_clear_bit(page_index, mask->bitmap); 1694 } 1695 1696 static void uvm_page_mask_set(uvm_page_mask_t *mask, uvm_page_index_t page_index) 1697 { 1698 UVM_ASSERT(page_index < PAGES_PER_UVM_VA_BLOCK); 1699 1700 __set_bit(page_index, mask->bitmap); 1701 } 1702 1703 static void uvm_page_mask_clear(uvm_page_mask_t *mask, uvm_page_index_t page_index) 1704 { 1705 UVM_ASSERT(page_index < PAGES_PER_UVM_VA_BLOCK); 1706 1707 __clear_bit(page_index, mask->bitmap); 1708 } 1709 1710 static bool uvm_page_mask_region_test(const uvm_page_mask_t *mask, 1711 uvm_va_block_region_t region, 1712 uvm_page_index_t page_index) 1713 { 1714 if (!uvm_va_block_region_contains_page(region, page_index)) 1715 return false; 1716 1717 return !mask || uvm_page_mask_test(mask, page_index); 1718 } 1719 1720 static NvU32 uvm_page_mask_region_weight(const uvm_page_mask_t *mask, uvm_va_block_region_t region) 1721 { 1722 NvU32 weight_before = 0; 1723 1724 if (region.first > 0) 1725 weight_before = bitmap_weight(mask->bitmap, region.first); 1726 1727 return bitmap_weight(mask->bitmap, region.outer) - weight_before; 1728 } 1729 1730 static bool uvm_page_mask_region_empty(const uvm_page_mask_t *mask, uvm_va_block_region_t region) 1731 { 1732 return find_next_bit(mask->bitmap, region.outer, region.first) == region.outer; 1733 } 1734 1735 static bool uvm_page_mask_region_full(const uvm_page_mask_t *mask, uvm_va_block_region_t region) 1736 { 1737 return find_next_zero_bit(mask->bitmap, region.outer, region.first) == region.outer; 1738 } 1739 1740 static void uvm_page_mask_region_fill(uvm_page_mask_t *mask, uvm_va_block_region_t region) 1741 { 1742 bitmap_set(mask->bitmap, region.first, region.outer - region.first); 1743 } 1744 1745 static void uvm_page_mask_region_clear(uvm_page_mask_t *mask, uvm_va_block_region_t region) 1746 { 1747 bitmap_clear(mask->bitmap, region.first, region.outer - region.first); 1748 } 1749 1750 static void uvm_page_mask_region_clear_outside(uvm_page_mask_t *mask, uvm_va_block_region_t region) 1751 { 1752 if (region.first > 0) 1753 bitmap_clear(mask->bitmap, 0, region.first); 1754 if (region.outer < PAGES_PER_UVM_VA_BLOCK) 1755 bitmap_clear(mask->bitmap, region.outer, PAGES_PER_UVM_VA_BLOCK - region.outer); 1756 } 1757 1758 static void uvm_page_mask_zero(uvm_page_mask_t *mask) 1759 { 1760 bitmap_zero(mask->bitmap, PAGES_PER_UVM_VA_BLOCK); 1761 } 1762 1763 static bool uvm_page_mask_empty(const uvm_page_mask_t *mask) 1764 { 1765 return bitmap_empty(mask->bitmap, PAGES_PER_UVM_VA_BLOCK); 1766 } 1767 1768 static bool uvm_page_mask_full(const uvm_page_mask_t *mask) 1769 { 1770 return bitmap_full(mask->bitmap, PAGES_PER_UVM_VA_BLOCK); 1771 } 1772 1773 static bool uvm_page_mask_and(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in1, const uvm_page_mask_t *mask_in2) 1774 { 1775 return bitmap_and(mask_out->bitmap, mask_in1->bitmap, mask_in2->bitmap, PAGES_PER_UVM_VA_BLOCK); 1776 } 1777 1778 static bool uvm_page_mask_andnot(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in1, const uvm_page_mask_t *mask_in2) 1779 { 1780 return bitmap_andnot(mask_out->bitmap, mask_in1->bitmap, mask_in2->bitmap, PAGES_PER_UVM_VA_BLOCK); 1781 } 1782 1783 static void uvm_page_mask_or(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in1, const uvm_page_mask_t *mask_in2) 1784 { 1785 bitmap_or(mask_out->bitmap, mask_in1->bitmap, mask_in2->bitmap, PAGES_PER_UVM_VA_BLOCK); 1786 } 1787 1788 static void uvm_page_mask_complement(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in) 1789 { 1790 bitmap_complement(mask_out->bitmap, mask_in->bitmap, PAGES_PER_UVM_VA_BLOCK); 1791 } 1792 1793 static void uvm_page_mask_copy(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in) 1794 { 1795 bitmap_copy(mask_out->bitmap, mask_in->bitmap, PAGES_PER_UVM_VA_BLOCK); 1796 } 1797 1798 static NvU32 uvm_page_mask_weight(const uvm_page_mask_t *mask) 1799 { 1800 return bitmap_weight(mask->bitmap, PAGES_PER_UVM_VA_BLOCK); 1801 } 1802 1803 static bool uvm_page_mask_subset(const uvm_page_mask_t *subset, const uvm_page_mask_t *mask) 1804 { 1805 return bitmap_subset(subset->bitmap, mask->bitmap, PAGES_PER_UVM_VA_BLOCK); 1806 } 1807 1808 static bool uvm_page_mask_init_from_region(uvm_page_mask_t *mask_out, 1809 uvm_va_block_region_t region, 1810 const uvm_page_mask_t *mask_in) 1811 { 1812 uvm_page_mask_zero(mask_out); 1813 uvm_page_mask_region_fill(mask_out, region); 1814 1815 if (mask_in) 1816 return uvm_page_mask_and(mask_out, mask_out, mask_in); 1817 1818 return true; 1819 } 1820 1821 static void uvm_page_mask_shift_right(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in, unsigned shift) 1822 { 1823 bitmap_shift_right(mask_out->bitmap, mask_in->bitmap, shift, PAGES_PER_UVM_VA_BLOCK); 1824 } 1825 1826 static void uvm_page_mask_shift_left(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in, unsigned shift) 1827 { 1828 bitmap_shift_left(mask_out->bitmap, mask_in->bitmap, shift, PAGES_PER_UVM_VA_BLOCK); 1829 } 1830 1831 static bool uvm_page_mask_intersects(const uvm_page_mask_t *mask1, const uvm_page_mask_t *mask2) 1832 { 1833 return bitmap_intersects(mask1->bitmap, mask2->bitmap, PAGES_PER_UVM_VA_BLOCK); 1834 } 1835 1836 // Print the given page mask on the given buffer using hex symbols. The 1837 // minimum required size of the buffer is UVM_PAGE_MASK_PRINT_MIN_BUFFER_SIZE. 1838 static void uvm_page_mask_print(const uvm_page_mask_t *mask, char *buffer) 1839 { 1840 // There are two cases, which depend on PAGE_SIZE 1841 if (PAGES_PER_UVM_VA_BLOCK > 32) { 1842 NvLength current_long_idx = UVM_PAGE_MASK_WORDS - 1; 1843 const char *buffer_end = buffer + UVM_PAGE_MASK_PRINT_MIN_BUFFER_SIZE; 1844 1845 UVM_ASSERT(sizeof(*mask->bitmap) == 8); 1846 1847 // For 4KB pages, we need to iterate over multiple words 1848 do { 1849 NvU64 current_long = mask->bitmap[current_long_idx]; 1850 1851 buffer += sprintf(buffer, "%016llx", current_long); 1852 if (current_long_idx != 0) 1853 buffer += sprintf(buffer, ":"); 1854 } while (current_long_idx-- != 0); 1855 1856 UVM_ASSERT(buffer <= buffer_end); 1857 } 1858 else { 1859 NvU32 value = (unsigned)*mask->bitmap; 1860 1861 UVM_ASSERT(PAGES_PER_UVM_VA_BLOCK == 32); 1862 1863 // For 64KB pages, a single print suffices 1864 sprintf(buffer, "%08x", value); 1865 } 1866 } 1867 1868 static uvm_va_block_region_t uvm_va_block_first_subregion_in_mask(uvm_va_block_region_t region, 1869 const uvm_page_mask_t *page_mask) 1870 { 1871 uvm_va_block_region_t subregion; 1872 1873 if (!page_mask) 1874 return region; 1875 1876 subregion.first = find_next_bit(page_mask->bitmap, region.outer, region.first); 1877 subregion.outer = find_next_zero_bit(page_mask->bitmap, region.outer, subregion.first + 1); 1878 return subregion; 1879 } 1880 1881 static uvm_va_block_region_t uvm_va_block_next_subregion_in_mask(uvm_va_block_region_t region, 1882 const uvm_page_mask_t *page_mask, 1883 uvm_va_block_region_t previous_subregion) 1884 { 1885 uvm_va_block_region_t subregion; 1886 1887 if (!page_mask) { 1888 subregion.first = region.outer; 1889 subregion.outer = region.outer; 1890 return subregion; 1891 } 1892 1893 subregion.first = find_next_bit(page_mask->bitmap, region.outer, previous_subregion.outer + 1); 1894 subregion.outer = find_next_zero_bit(page_mask->bitmap, region.outer, subregion.first + 1); 1895 return subregion; 1896 } 1897 1898 // Iterate over contiguous subregions of the region given by the page mask. 1899 // If the page mask is NULL then it behaves as if it was a fully set mask and 1900 // the only subregion iterated over will be the region itself. 1901 #define for_each_va_block_subregion_in_mask(subregion, page_mask, region) \ 1902 for ((subregion) = uvm_va_block_first_subregion_in_mask((region), (page_mask)); \ 1903 (subregion).first != (region).outer; \ 1904 (subregion) = uvm_va_block_next_subregion_in_mask((region), (page_mask), (subregion))) 1905 1906 static uvm_page_index_t uvm_va_block_first_page_in_mask(uvm_va_block_region_t region, 1907 const uvm_page_mask_t *page_mask) 1908 { 1909 if (page_mask) 1910 return find_next_bit(page_mask->bitmap, region.outer, region.first); 1911 else 1912 return region.first; 1913 } 1914 1915 static uvm_page_index_t uvm_va_block_next_page_in_mask(uvm_va_block_region_t region, 1916 const uvm_page_mask_t *page_mask, 1917 uvm_page_index_t previous_page) 1918 { 1919 if (page_mask) { 1920 return find_next_bit(page_mask->bitmap, region.outer, previous_page + 1); 1921 } 1922 else { 1923 UVM_ASSERT(previous_page < region.outer); 1924 return previous_page + 1; 1925 } 1926 } 1927 1928 static uvm_page_index_t uvm_va_block_first_unset_page_in_mask(uvm_va_block_region_t region, 1929 const uvm_page_mask_t *page_mask) 1930 { 1931 if (page_mask) 1932 return find_next_zero_bit(page_mask->bitmap, region.outer, region.first); 1933 else 1934 return region.first; 1935 } 1936 1937 static uvm_page_index_t uvm_va_block_next_unset_page_in_mask(uvm_va_block_region_t region, 1938 const uvm_page_mask_t *page_mask, 1939 uvm_page_index_t previous_page) 1940 { 1941 if (page_mask) { 1942 return find_next_zero_bit(page_mask->bitmap, region.outer, previous_page + 1); 1943 } 1944 else { 1945 UVM_ASSERT(previous_page < region.outer); 1946 return previous_page + 1; 1947 } 1948 } 1949 1950 static NvU64 uvm_reverse_map_start(const uvm_reverse_map_t *reverse_map) 1951 { 1952 return uvm_va_block_cpu_page_address(reverse_map->va_block, reverse_map->region.first); 1953 } 1954 1955 static NvU64 uvm_reverse_map_end(const uvm_reverse_map_t *reverse_map) 1956 { 1957 return uvm_va_block_cpu_page_address(reverse_map->va_block, reverse_map->region.first) + 1958 uvm_va_block_region_size(reverse_map->region) - 1; 1959 } 1960 1961 // Iterate over contiguous pages of the region given by the page mask. 1962 // If the page mask is NULL then it behaves as if it was a fully set mask and 1963 // it will iterate over all pages within the region. 1964 #define for_each_va_block_page_in_region_mask(page_index, page_mask, region) \ 1965 for ((page_index) = uvm_va_block_first_page_in_mask((region), (page_mask)); \ 1966 (page_index) != (region).outer; \ 1967 (page_index) = uvm_va_block_next_page_in_mask((region), (page_mask), (page_index))) 1968 1969 // Same as for_each_va_block_page_in_region_mask, but the region spans the 1970 // whole given VA block 1971 #define for_each_va_block_page_in_mask(page_index, page_mask, va_block) \ 1972 for_each_va_block_page_in_region_mask(page_index, page_mask, uvm_va_block_region_from_block(va_block)) 1973 1974 // Similar to for_each_va_block_page_in_region_mask, but iterating over pages 1975 // whose bit is unset. 1976 #define for_each_va_block_unset_page_in_region_mask(page_index, page_mask, region) \ 1977 for ((page_index) = uvm_va_block_first_unset_page_in_mask((region), (page_mask)); \ 1978 (page_index) != (region).outer; \ 1979 (page_index) = uvm_va_block_next_unset_page_in_mask((region), (page_mask), (page_index))) 1980 1981 // Similar to for_each_va_block_page_in_mask, but iterating over pages whose 1982 // bit is unset. 1983 #define for_each_va_block_unset_page_in_mask(page_index, page_mask, va_block) \ 1984 for_each_va_block_unset_page_in_region_mask(page_index, page_mask, uvm_va_block_region_from_block(va_block)) 1985 1986 // Iterate over all pages within the given region 1987 #define for_each_va_block_page_in_region(page_index, region) \ 1988 for_each_va_block_page_in_region_mask((page_index), NULL, (region)) 1989 1990 // Iterate over all pages within the given VA block 1991 #define for_each_va_block_page(page_index, va_block) \ 1992 for_each_va_block_page_in_region((page_index), uvm_va_block_region_from_block(va_block)) 1993 1994 // Return the block region covered by the given chunk size. page_index must be 1995 // any page within the block known to be covered by the chunk. 1996 static uvm_va_block_region_t uvm_va_block_chunk_region(uvm_va_block_t *block, 1997 uvm_chunk_size_t chunk_size, 1998 uvm_page_index_t page_index) 1999 { 2000 NvU64 page_addr = uvm_va_block_cpu_page_address(block, page_index); 2001 NvU64 chunk_start_addr = UVM_ALIGN_DOWN(page_addr, chunk_size); 2002 uvm_page_index_t first = (uvm_page_index_t)((chunk_start_addr - block->start) / PAGE_SIZE); 2003 return uvm_va_block_region(first, first + (chunk_size / PAGE_SIZE)); 2004 } 2005 2006 // 2007 // Helpers for page state (permissions, size, residency) 2008 // 2009 2010 bool uvm_va_block_page_is_gpu_authorized(uvm_va_block_t *va_block, 2011 uvm_page_index_t page_index, 2012 uvm_gpu_id_t gpu_id, 2013 uvm_prot_t required_prot); 2014 2015 // Compute the processors that have a copy of the given page resident in their 2016 // memory. 2017 void uvm_va_block_page_resident_processors(uvm_va_block_t *va_block, 2018 uvm_page_index_t page_index, 2019 uvm_processor_mask_t *resident_processors); 2020 2021 // Count how many processors have a copy of the given page resident in their 2022 // memory. 2023 NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block, uvm_page_index_t page_index); 2024 2025 // Get the processor with a resident copy of a page closest to the given 2026 // processor. 2027 uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_block, 2028 uvm_page_index_t page_index, 2029 uvm_processor_id_t processor); 2030 2031 // Insert a CPU chunk at the given page_index into the va_block. 2032 // Locking: The va_block lock must be held. 2033 NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block, 2034 uvm_cpu_chunk_t *chunk, 2035 uvm_page_index_t page_index); 2036 2037 // Remove a CPU chunk at the given page_index from the va_block. 2038 // Locking: The va_block lock must be held. 2039 void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, 2040 uvm_page_index_t page_index); 2041 2042 // Return the CPU chunk at the given page_index from the va_block. 2043 // Locking: The va_block lock must be held. 2044 uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block, 2045 uvm_page_index_t page_index); 2046 2047 // Return the CPU chunk at the given page_index from the va_block. 2048 // Locking: The va_block lock must be held. 2049 struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, 2050 uvm_page_index_t page_index); 2051 2052 // Physically map a CPU chunk so it is DMA'able from all registered GPUs. 2053 // Locking: The va_block lock must be held. 2054 NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block, 2055 uvm_page_index_t page_index); 2056 2057 // Physically unmap a CPU chunk from all registered GPUs. 2058 // Locking: The va_block lock must be held. 2059 void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block, 2060 uvm_cpu_chunk_t *chunk, 2061 uvm_page_index_t page_index); 2062 2063 // Remove any CPU chunks in the given region. 2064 // Locking: The va_block lock must be held. 2065 void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_region_t region); 2066 2067 // Get CPU page size or 0 if it is not mapped 2068 NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, 2069 uvm_page_index_t page_index); 2070 2071 // Get GPU page size or 0 if it is not mapped on the given GPU 2072 NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index); 2073 2074 // Get page size or 0 if it is not mapped on the given processor 2075 static NvU32 uvm_va_block_page_size_processor(uvm_va_block_t *va_block, 2076 uvm_processor_id_t processor_id, 2077 uvm_page_index_t page_index) 2078 { 2079 if (UVM_ID_IS_CPU(processor_id)) 2080 return uvm_va_block_page_size_cpu(va_block, page_index); 2081 else 2082 return uvm_va_block_page_size_gpu(va_block, processor_id, page_index); 2083 } 2084 2085 // Returns the big page size for the GPU VA space of the block 2086 NvU32 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu); 2087 2088 // Returns the number of big pages in the VA block for the given size 2089 size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU32 big_page_size); 2090 2091 // Returns the number of big pages in the VA block for the big page size on the 2092 // given GPU 2093 static size_t uvm_va_block_gpu_num_big_pages(uvm_va_block_t *va_block, uvm_gpu_t *gpu) 2094 { 2095 return uvm_va_block_num_big_pages(va_block, uvm_va_block_gpu_big_page_size(va_block, gpu)); 2096 } 2097 2098 // Returns the start address of the given big page index and big page size 2099 NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index, NvU32 big_page_size); 2100 2101 // Returns the region [start, end] of the given big page index and big page size 2102 uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block, 2103 size_t big_page_index, 2104 NvU32 big_page_size); 2105 2106 // Returns the largest sub-region region of [start, end] which can fit big 2107 // pages. If the region cannot fit any big pages, an invalid region (0, 0) is 2108 // returned. 2109 uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU32 big_page_size); 2110 2111 // Returns the largest sub-region region of 'region' which can fit big pages. 2112 // If the region cannot fit any big pages, an invalid region (0, 0) is returned. 2113 uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_block, 2114 uvm_va_block_region_t region, 2115 NvU32 big_page_size); 2116 2117 // Returns the big page index (the bit index within 2118 // uvm_va_block_gpu_state_t::big_ptes) corresponding to page_index. If 2119 // page_index cannot be covered by a big PTE due to alignment or block size, 2120 // MAX_BIG_PAGES_PER_UVM_VA_BLOCK is returned. 2121 size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU32 big_page_size); 2122 2123 // Returns the new residency for a page that faulted or triggered access 2124 // counter notifications. The read_duplicate output parameter indicates if the 2125 // page meets the requirements to be read-duplicated 2126 // va_block_context must not be NULL, va_block_context->policy must be valid, 2127 // and if the va_block is a HMM block, va_block_context->hmm.vma must be valid 2128 // which also means the va_block_context->mm is not NULL, retained, and locked 2129 // for at least read. See the comments for uvm_va_block_check_policy_is_valid() 2130 // and uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h. 2131 // Locking: the va_block lock must be held. 2132 uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block, 2133 uvm_va_block_context_t *va_block_context, 2134 uvm_page_index_t page_index, 2135 uvm_processor_id_t processor_id, 2136 NvU32 access_type_mask, 2137 const uvm_va_policy_t *policy, 2138 const uvm_perf_thrashing_hint_t *thrashing_hint, 2139 uvm_service_operation_t operation, 2140 bool *read_duplicate); 2141 2142 // Return the maximum mapping protection for processor_id that will not require 2143 // any permision revocation on the rest of processors. 2144 uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block, 2145 uvm_processor_id_t processor_id, 2146 uvm_page_index_t page_index); 2147 2148 // A helper macro for handling allocation-retry 2149 // 2150 // The macro takes a VA block, uvm_va_block_retry_t struct and a function call 2151 // to retry as long as it returns NV_ERR_MORE_PROCESSING_REQUIRED. 2152 // 2153 // block_retry can be NULL if it's not necessary for the function call, 2154 // otherwise it will be initialized and deinitialized by the macro. 2155 // 2156 // The macro also locks and unlocks the block's lock internally as it's expected 2157 // that the block's lock has been unlocked and relocked whenever the function call 2158 // returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the 2159 // block's state is not locked across these calls. 2160 #define UVM_VA_BLOCK_LOCK_RETRY(va_block, block_retry, call) ({ \ 2161 NV_STATUS status; \ 2162 uvm_va_block_t *__block = (va_block); \ 2163 uvm_va_block_retry_t *__retry = (block_retry); \ 2164 \ 2165 uvm_va_block_retry_init(__retry); \ 2166 \ 2167 uvm_mutex_lock(&__block->lock); \ 2168 \ 2169 do { \ 2170 status = (call); \ 2171 } while (status == NV_ERR_MORE_PROCESSING_REQUIRED); \ 2172 \ 2173 uvm_mutex_unlock(&__block->lock); \ 2174 \ 2175 uvm_va_block_retry_deinit(__retry, __block); \ 2176 \ 2177 status; \ 2178 }) 2179 2180 // A helper macro for handling allocation-retry 2181 // 2182 // The macro takes a VA block, uvm_va_block_retry_t struct and a function call 2183 // to retry as long as it returns NV_ERR_MORE_PROCESSING_REQUIRED. 2184 // 2185 // block_retry can be NULL if it's not necessary for the function call, 2186 // otherwise it will be initialized and deinitialized by the macro. 2187 // 2188 // This macro, as opposed to UVM_VA_BLOCK_LOCK_RETRY(), expects the block lock 2189 // to be already taken. Notably the block's lock might be unlocked and relocked 2190 // as part of the call. 2191 #define UVM_VA_BLOCK_RETRY_LOCKED(va_block, block_retry, call) ({ \ 2192 NV_STATUS status; \ 2193 uvm_va_block_t *__block = (va_block); \ 2194 uvm_va_block_retry_t *__retry = (block_retry); \ 2195 \ 2196 uvm_va_block_retry_init(__retry); \ 2197 \ 2198 uvm_assert_mutex_locked(&__block->lock); \ 2199 \ 2200 do { \ 2201 status = (call); \ 2202 } while (status == NV_ERR_MORE_PROCESSING_REQUIRED); \ 2203 \ 2204 uvm_va_block_retry_deinit(__retry, __block); \ 2205 \ 2206 status; \ 2207 }) 2208 2209 #endif // __UVM_VA_BLOCK_H__ 2210