1 /******************************************************************************* 2 Copyright (c) 2015-2022 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #ifndef __UVM_PMM_GPU_H__ 25 #define __UVM_PMM_GPU_H__ 26 27 // 28 // The Physical Memory Manager (PMM) manages the life cycle of GPU physical 29 // memory. 30 // 31 // The memory is managed in GPU chunks of different sizes (uvm_chunk_size_t) and 32 // users of PMM need to explicitly register the chunk sizes they need to be 33 // supported (see chunk_size_init_func in uvm_pmm_gpu_init()). 34 // 35 // Two memory types (uvm_pmm_gpu_memory_type_t) are supported, one for user and 36 // one for kernel allocations. The user memory type is used only for backing 37 // user data managed by VA blocks and kernel memory type is used for everything 38 // else. The distinction exists to support oversubscription, which requires the 39 // ability to evict already allocated memory from its users on-demand to satisfy 40 // new memory allocations when no more unused memory is available. Eviction is 41 // limited to the user memory type as it's a very complex operation requiring 42 // integration between PMM and other UVM driver modules. The assumption is that 43 // the vast majority of memory should be used for user data as everything else 44 // can be considered overhead and should be minimized. Two flavors of 45 // oversubscription exist: internal oversubscription allowing PMM allocations to 46 // evict other PMM allocations and external oversubscription allowing other PMA 47 // clients to evict memory used by PMM. 48 // 49 // Both allocation and freeing of memory support asynchronous operations where 50 // the allocated/freed GPU memory chunks can have pending GPU operations 51 // returned when allocating memory and passed in when freeing it via trackers. 52 // 53 54 #include "uvm_forward_decl.h" 55 #include "uvm_lock.h" 56 #include "uvm_processors.h" 57 #include "uvm_tracker.h" 58 #include "uvm_va_block_types.h" 59 #include "uvm_linux.h" 60 #include "uvm_types.h" 61 #include "nv_uvm_types.h" 62 #if UVM_IS_CONFIG_HMM() 63 #include <linux/memremap.h> 64 #endif 65 66 typedef enum 67 { 68 UVM_CHUNK_SIZE_1 = 1ULL, 69 UVM_CHUNK_SIZE_2 = 2ULL, 70 UVM_CHUNK_SIZE_4 = 4ULL, 71 UVM_CHUNK_SIZE_8 = 8ULL, 72 UVM_CHUNK_SIZE_16 = 16ULL, 73 UVM_CHUNK_SIZE_32 = 32ULL, 74 UVM_CHUNK_SIZE_64 = 64ULL, 75 UVM_CHUNK_SIZE_128 = 128ULL, 76 UVM_CHUNK_SIZE_256 = 256ULL, 77 UVM_CHUNK_SIZE_512 = 512ULL, 78 UVM_CHUNK_SIZE_1K = 1024ULL, 79 UVM_CHUNK_SIZE_2K = 2*1024ULL, 80 UVM_CHUNK_SIZE_4K = 4*1024ULL, 81 UVM_CHUNK_SIZE_8K = 8*1024ULL, 82 UVM_CHUNK_SIZE_16K = 16*1024ULL, 83 UVM_CHUNK_SIZE_32K = 32*1024ULL, 84 UVM_CHUNK_SIZE_64K = 64*1024ULL, 85 UVM_CHUNK_SIZE_128K = 128*1024ULL, 86 UVM_CHUNK_SIZE_256K = 256*1024ULL, 87 UVM_CHUNK_SIZE_512K = 512*1024ULL, 88 UVM_CHUNK_SIZE_1M = 1024*1024ULL, 89 UVM_CHUNK_SIZE_2M = 2*1024*1024ULL, 90 UVM_CHUNK_SIZE_MAX = UVM_CHUNK_SIZE_2M, 91 UVM_CHUNK_SIZE_INVALID = UVM_CHUNK_SIZE_MAX * 2ULL 92 } uvm_chunk_size_t; 93 94 #define UVM_CHUNK_SIZES_MASK (uvm_chunk_sizes_mask_t)(UVM_CHUNK_SIZE_MAX | (UVM_CHUNK_SIZE_MAX-1)) 95 96 typedef enum 97 { 98 // Memory type for backing user pages. On Pascal+ it can be evicted. 99 UVM_PMM_GPU_MEMORY_TYPE_USER, 100 // When the Confidential Computing feature is enabled, the protected flavor 101 // allocates memory out of the VPR region. When it's disabled, all flavors 102 // have no effects and are equivalent to the base type. 103 UVM_PMM_GPU_MEMORY_TYPE_USER_PROTECTED = UVM_PMM_GPU_MEMORY_TYPE_USER, 104 UVM_PMM_GPU_MEMORY_TYPE_USER_UNPROTECTED, 105 106 // Memory type for internal UVM allocations. It cannot be evicted. 107 UVM_PMM_GPU_MEMORY_TYPE_KERNEL, 108 // See user types for the behavior description when the Confidential 109 // Computing feature is ON or OFF. 110 UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED = UVM_PMM_GPU_MEMORY_TYPE_KERNEL, 111 UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED, 112 113 // Number of types - MUST BE LAST. 114 UVM_PMM_GPU_MEMORY_TYPE_COUNT 115 } uvm_pmm_gpu_memory_type_t; 116 117 const char *uvm_pmm_gpu_memory_type_string(uvm_pmm_gpu_memory_type_t type); 118 119 // Returns true if the given memory type is used to back user pages. 120 bool uvm_pmm_gpu_memory_type_is_user(uvm_pmm_gpu_memory_type_t type); 121 122 // Returns true if the given memory type is used to back internal UVM 123 // allocations. 124 static bool uvm_pmm_gpu_memory_type_is_kernel(uvm_pmm_gpu_memory_type_t type) 125 { 126 return !uvm_pmm_gpu_memory_type_is_user(type); 127 } 128 129 typedef enum 130 { 131 // Chunk belongs to PMA. Code outside PMM should not have access to 132 // it and it is likely a bug in UVM code (either in PMM or outside) 133 // if that happens. 134 UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED, 135 136 // Chunk is on free list. That is it can be reused or returned to PMA 137 // as soon as its tracker is done. Code outside PMM should not have 138 // access to this chunk and it is likely a bug in UVM code (either in 139 // PMM or outside) if that happens. 140 UVM_PMM_GPU_CHUNK_STATE_FREE, 141 142 // Chunk is split into subchunks. 143 UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, 144 145 // Chunk is temporarily pinned. 146 // 147 // This state is used for user memory chunks that have been allocated, but haven't 148 // been unpinned yet and also internally when a chunk is about to be split. 149 UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED, 150 151 // Chunk is allocated. That is it is backing some VA block 152 UVM_PMM_GPU_CHUNK_STATE_ALLOCATED, 153 154 // Number of states - MUST BE LAST 155 UVM_PMM_GPU_CHUNK_STATE_COUNT 156 } uvm_pmm_gpu_chunk_state_t; 157 158 const char *uvm_pmm_gpu_chunk_state_string(uvm_pmm_gpu_chunk_state_t state); 159 160 typedef enum 161 { 162 // No flags passed 163 UVM_PMM_ALLOC_FLAGS_NONE, 164 165 // If there is no free memory, allocation may evict chunks instead of 166 // returning error immediately. Therefore it must not be called under the 167 // VA block lock. 168 UVM_PMM_ALLOC_FLAGS_EVICT = (1 << 0), 169 170 // Do not use batching in this call if PMA page allocaion is required 171 UVM_PMM_ALLOC_FLAGS_DONT_BATCH = (1 << 1), 172 173 UVM_PMM_ALLOC_FLAGS_MASK = (1 << 2) - 1 174 } uvm_pmm_alloc_flags_t; 175 176 177 typedef enum 178 { 179 // Identifier for lists with zeroed chunks 180 UVM_PMM_LIST_ZERO, 181 182 // Identifier for lists with non-zeroed chunks 183 UVM_PMM_LIST_NO_ZERO, 184 185 // Number of states for zeroed/non-zeroed chunk lists - MUST BE LAST 186 UVM_PMM_LIST_ZERO_COUNT 187 } uvm_pmm_list_zero_t; 188 189 static void uvm_pmm_list_zero_checks(void) 190 { 191 BUILD_BUG_ON(UVM_PMM_LIST_ZERO_COUNT > 2); 192 } 193 194 // Maximum chunk sizes per type of allocation in single GPU. 195 // The worst case today is Maxwell with 4 allocations sizes for page tables and 196 // 2 page sizes used by uvm_mem_t. Notably one of the allocations for page 197 // tables is 2M which is our common root chunk size. 198 #define UVM_MAX_CHUNK_SIZES 6 199 200 // This specifies a maximum GAP between 2 allocation levels. 201 #define UVM_PMM_MAX_SUBCHUNKS UVM_CHUNK_SIZE_MAX 202 203 #define UVM_PMM_CHUNK_SPLIT_CACHE_SIZES (ilog2(UVM_PMM_MAX_SUBCHUNKS) + 1) 204 #define UVM_CHUNK_SIZE_MASK_SIZE (ilog2(UVM_CHUNK_SIZE_MAX) + 1) 205 206 typedef uvm_chunk_size_t uvm_chunk_sizes_mask_t; 207 208 typedef struct uvm_pmm_gpu_chunk_suballoc_struct uvm_pmm_gpu_chunk_suballoc_t; 209 210 #if UVM_IS_CONFIG_HMM() 211 212 typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t; 213 214 typedef struct 215 { 216 struct dev_pagemap pagemap; 217 } uvm_pmm_gpu_devmem_t; 218 219 // Return the GPU chunk for a given device private struct page. 220 uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page); 221 222 // Return the GPU id for a given device private struct page. 223 uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page); 224 225 // Return the PFN of the device private struct page for the given GPU chunk. 226 unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk); 227 228 #endif 229 230 struct uvm_gpu_chunk_struct 231 { 232 // Physical address of GPU chunk. This may be removed to save memory 233 // if we will be able to get it from reverse map and changed 234 // into smaller index for subchunks. 235 NvU64 address; 236 237 struct 238 { 239 // We use +1 in the order_base_2 calls appropriately to avoid compiler 240 // warnings due to the bitfields being too narrow for the values of 241 // their types. 242 uvm_pmm_gpu_memory_type_t type : order_base_2(UVM_PMM_GPU_MEMORY_TYPE_COUNT + 1); 243 244 // The eviction flag is internal and used only for root chunks. It's 245 // set by the eviction path once a chunk is chosen for eviction in 246 // chunk_start_eviction(). Also see the (root_)chunk_is_in_eviction() 247 // helpers. 248 bool in_eviction : 1; 249 250 bool inject_split_error : 1; 251 252 // This flag is initalized when allocating a new root chunk from PMA. 253 // It is set to true, if PMA already scrubbed the chunk. The flag is 254 // only valid at allocation time (after uvm_pmm_gpu_alloc call), and 255 // the caller is not required to clear it before freeing the chunk. The 256 // VA block chunk population code can query it to skip zeroing the 257 // chunk. 258 bool is_zero : 1; 259 260 // This flag indicates an allocated chunk is referenced by a device 261 // private struct page PTE and therefore expects a page_free() callback. 262 bool is_referenced : 1; 263 264 uvm_pmm_gpu_chunk_state_t state : order_base_2(UVM_PMM_GPU_CHUNK_STATE_COUNT + 1); 265 266 size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE); 267 268 // Start page index within va_block 269 uvm_page_index_t va_block_page_index : order_base_2(PAGES_PER_UVM_VA_BLOCK + 1); 270 271 // This allows determining what PMM owns the chunk. Users of this field 272 // must only use it if the owning GPU is retained. 273 // TODO: Bug 2008200: Enforce single PMM instance per GPU 274 NvU32 gpu_global_index : order_base_2(UVM_GLOBAL_ID_MAX_PROCESSORS); 275 }; 276 277 // List entry. 278 // 279 // Guaranteed to be a valid list node at all times for simplicity. 280 // 281 // Protected by PMM's list_lock when managed by PMM. Notably the list node 282 // can be used by the allocator of the chunk after alloc and before the 283 // chunk is unpinned or freed. 284 struct list_head list; 285 286 // The VA block using the chunk, if any. 287 // User chunks that are not backed by a VA block are considered to be 288 // temporarily pinned and cannot be evicted. 289 uvm_va_block_t *va_block; 290 291 // If this is subchunk it points to the parent - in other words 292 // chunk of bigger size which contains this chunk. 293 uvm_gpu_chunk_t *parent; 294 295 // Array describing suballocations 296 uvm_pmm_gpu_chunk_suballoc_t *suballoc; 297 }; 298 299 typedef struct uvm_gpu_root_chunk_struct 300 { 301 uvm_gpu_chunk_t chunk; 302 303 // Pending operations for all GPU chunks under the root chunk. 304 // 305 // Protected by the corresponding root chunk bit lock. 306 uvm_tracker_t tracker; 307 308 // Indirect peers which have IOMMU mappings to this root chunk. The mapped 309 // addresses are stored in this root chunk's index in 310 // uvm_pmm_gpu_t::root_chunks.indirect_peer[id].dma_addrs. 311 // 312 // Protected by the corresponding root chunk bit lock. 313 // 314 // We can use a regular processor id because indirect peers are not allowed 315 // between partitioned GPUs when SMC is enabled. 316 uvm_processor_mask_t indirect_peers_mapped; 317 } uvm_gpu_root_chunk_t; 318 319 typedef struct 320 { 321 // Indirect peers are GPUs which can coherently access this GPU's memory, 322 // but are routed through an intermediate processor. Indirect peers access 323 // each others' memory with the SYS aperture rather then a PEER aperture, 324 // meaning they need IOMMU mappings: 325 // 326 // accessing_gpu ==> IOMMU ==> CPU ==> owning_gpu (this GPU) 327 // 328 // This array has one entry per root chunk on this GPU. Each entry 329 // contains the IOMMU address accessing_gpu needs to use in order to 330 // access this GPU's root chunk. The root chunks are mapped as whole 331 // regions both for tracking simplicity and to allow GPUs to map with 332 // large PTEs. 333 // 334 // An array entry is valid iff accessing_gpu's ID is set in the 335 // corresponding root chunk's indirect_peers_mapped mask. 336 // 337 // Management of these addresses would be simpler if they were stored 338 // in the root chunks themselves, but in the common case there are only 339 // a small number of indirect peers in a system. Dynamic array 340 // allocation per indirect peer wastes less memory. 341 NvU64 *dma_addrs; 342 343 // Number of this GPU's root chunks mapped for each indirect peer. 344 atomic64_t map_count; 345 } uvm_gpu_root_chunk_indirect_peer_t; 346 347 typedef struct uvm_pmm_gpu_struct 348 { 349 // Sizes of the MMU 350 uvm_chunk_sizes_mask_t chunk_sizes[UVM_PMM_GPU_MEMORY_TYPE_COUNT]; 351 352 // PMA (Physical Memory Allocator) opaque handle 353 void *pma; 354 355 // PMA statistics used for eviction heuristics 356 const UvmPmaStatistics *pma_stats; 357 358 struct 359 { 360 // Array of all root chunks indexed by their physical address divided by 361 // UVM_CHUNK_SIZE_MAX. 362 // 363 // This array is pre-allocated during uvm_pmm_gpu_init() for all 364 // possible physical addresses (based on 365 // gpu::vidmem_max_physical_address). 366 size_t count; 367 uvm_gpu_root_chunk_t *array; 368 369 // Bit locks for the root chunks with 1 bit per each root chunk 370 uvm_bit_locks_t bitlocks; 371 372 // List of root chunks unused by VA blocks, i.e. allocated, but not 373 // holding any resident pages. These take priority when evicting as no 374 // data needs to be migrated for them to be evicted. 375 // 376 // For simplicity, the list is approximate, tracking unused chunks only 377 // from root chunk sized (2M) VA blocks. 378 // 379 // Updated by the VA block code with 380 // uvm_pmm_gpu_mark_root_chunk_(un)used(). 381 struct list_head va_block_unused; 382 383 // List of root chunks used by VA blocks 384 struct list_head va_block_used; 385 386 // List of chunks needing to be lazily freed and a queue for processing 387 // the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t 388 // or workqueue. 389 struct list_head va_block_lazy_free; 390 nv_kthread_q_item_t va_block_lazy_free_q_item; 391 392 uvm_gpu_root_chunk_indirect_peer_t indirect_peer[UVM_ID_MAX_GPUS]; 393 } root_chunks; 394 395 #if UVM_IS_CONFIG_HMM() 396 uvm_pmm_gpu_devmem_t devmem; 397 #endif 398 399 // Lock protecting PMA allocation, freeing and eviction 400 uvm_rw_semaphore_t pma_lock; 401 402 // Lock protecting splits, merges and walks of chunks. 403 uvm_mutex_t lock; 404 405 // Lock protecting lists and chunk's state transitions. 406 uvm_spinlock_t list_lock; 407 408 // Free chunk lists. There are separate lists for non-zero and zero chunks. 409 struct list_head free_list[UVM_PMM_GPU_MEMORY_TYPE_COUNT][UVM_MAX_CHUNK_SIZES][UVM_PMM_LIST_ZERO_COUNT]; 410 411 // Inject an error after evicting a number of chunks. 0 means no error left 412 // to be injected. 413 NvU32 inject_pma_evict_error_after_num_chunks; 414 415 // The mask of the initialized chunk sizes 416 DECLARE_BITMAP(chunk_split_cache_initialized, UVM_PMM_CHUNK_SPLIT_CACHE_SIZES); 417 418 bool initialized; 419 420 bool pma_address_cache_initialized; 421 } uvm_pmm_gpu_t; 422 423 // Return containing GPU 424 uvm_gpu_t *uvm_pmm_to_gpu(uvm_pmm_gpu_t *pmm); 425 426 // Initialize PMM on GPU 427 NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm); 428 429 // Deinitialize the PMM on GPU 430 void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm); 431 432 static uvm_chunk_size_t uvm_gpu_chunk_get_size(uvm_gpu_chunk_t *chunk) 433 { 434 return ((uvm_chunk_size_t)1) << chunk->log2_size; 435 } 436 437 static void uvm_gpu_chunk_set_size(uvm_gpu_chunk_t *chunk, uvm_chunk_size_t size) 438 { 439 chunk->log2_size = ilog2(size); 440 } 441 442 // Retrieve the GPU associated with the chunk. Users of this helper must only 443 // use it if the owning GPU is retained. 444 uvm_gpu_t *uvm_gpu_chunk_get_gpu(const uvm_gpu_chunk_t *chunk); 445 446 // Return the first struct page corresponding to the physical address range 447 // of the given chunk. 448 // 449 // Notes: 450 // - The GPU must have NUMA support enabled. 451 // - For chunks smaller than a system page, this function returns the struct 452 // page containing the chunk's starting address. 453 struct page *uvm_gpu_chunk_to_page(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk); 454 455 // Allocates num_chunks chunks of size chunk_size in caller-supplied array (chunks). 456 // 457 // Returned chunks are in the TEMP_PINNED state, requiring a call to either 458 // uvm_pmm_gpu_unpin_allocated, uvm_pmm_gpu_unpin_referenced, or 459 // uvm_pmm_gpu_free. If a tracker is passed in, all 460 // the pending operations on the allocated chunks will be added to it 461 // guaranteeing that all the entries come from the same GPU as the PMM. 462 // Otherwise, when tracker is NULL, all the pending operations will be 463 // synchronized before returning to the caller. 464 // 465 // Each of the allocated chunks list nodes (uvm_gpu_chunk_t::list) can be used 466 // by the caller until the chunk is unpinned (uvm_pmm_gpu_unpin_allocated, 467 // uvm_pmm_gpu_unpin_referenced) or freed (uvm_pmm_gpu_free). If used, the list 468 // node has to be returned to a valid state before calling either of the APIs. 469 // 470 // In case of an error, the chunks array is guaranteed to be cleared. 471 // 472 // If the memory returned by the PMM allocator cannot be physically addressed, 473 // the MMU interface provides user chunk mapping and unmapping functions 474 // (uvm_mmu_chunk_map/unmap) that enable virtual addressing. 475 NV_STATUS uvm_pmm_gpu_alloc(uvm_pmm_gpu_t *pmm, 476 size_t num_chunks, 477 uvm_chunk_size_t chunk_size, 478 uvm_pmm_gpu_memory_type_t mem_type, 479 uvm_pmm_alloc_flags_t flags, 480 uvm_gpu_chunk_t **chunks, 481 uvm_tracker_t *out_tracker); 482 483 // Helper for allocating kernel memory 484 // 485 // Internally calls uvm_pmm_gpu_alloc() and sets the state of all chunks to 486 // allocated on success. 487 // 488 // If Confidential Computing is enabled, this helper allocates protected kernel 489 // memory. 490 static NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm, 491 size_t num_chunks, 492 uvm_chunk_size_t chunk_size, 493 uvm_pmm_alloc_flags_t flags, 494 uvm_gpu_chunk_t **chunks, 495 uvm_tracker_t *out_tracker) 496 { 497 return uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_KERNEL, flags, chunks, out_tracker); 498 } 499 500 // Helper for allocating user memory 501 // 502 // Simple wrapper that just uses UVM_PMM_GPU_MEMORY_TYPE_USER for the memory 503 // type. 504 // 505 // If Confidential Computing is enabled, this helper allocates protected user 506 // memory. 507 static NV_STATUS uvm_pmm_gpu_alloc_user(uvm_pmm_gpu_t *pmm, 508 size_t num_chunks, 509 uvm_chunk_size_t chunk_size, 510 uvm_pmm_alloc_flags_t flags, 511 uvm_gpu_chunk_t **chunks, 512 uvm_tracker_t *out_tracker) 513 { 514 return uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_USER, flags, chunks, out_tracker); 515 } 516 517 // Unpin a temporarily pinned chunk, set its reverse map to a VA block, and 518 // mark it as allocated. 519 // 520 // Can only be used on user memory. 521 void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block); 522 523 // Unpin a temporarily pinned chunk, set its reverse map to a VA block, and 524 // mark it as referenced. 525 // 526 // Can only be used on user memory. 527 void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block); 528 529 // Frees the chunk. This also unpins the chunk if it is temporarily pinned. 530 // 531 // The tracker is optional and a NULL tracker indicates that no new operation 532 // has been pushed for the chunk, but the tracker returned as part of 533 // its allocation doesn't have to be completed as PMM will synchronize it 534 // internally if needed. A non-NULL tracker indicates any additional pending 535 // operations on the chunk pushed by the caller that need to be synchronized 536 // before freeing or re-using the chunk. 537 void uvm_pmm_gpu_free(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker); 538 539 // Splits the input chunk in-place into smaller chunks of subchunk_size. No data 540 // is moved, and the smaller chunks remain allocated. 541 // 542 // If the subchunks array is non-NULL, it will be filled with 543 // (uvm_gpu_chunk_get_size(chunk) / subchunk_size) chunks in address order. The 544 // new chunks must all be freed individually. 545 // 546 // If the subchunks array is NULL, the split chunks can be retrieved later by 547 // passing the original parent chunk to uvm_pmm_gpu_get_subchunks. 548 // 549 // On error, the original chunk remains unmodified. 550 // 551 // The chunk must be in the ALLOCATED state with the owning VA block lock held, 552 // or the TEMP_PINNED state. 553 // 554 // subchunk_size must be a valid chunk size for the given type. 555 // 556 // The chunk can be re-merged if desired using uvm_pmm_gpu_merge_chunk. 557 NV_STATUS uvm_pmm_gpu_split_chunk(uvm_pmm_gpu_t *pmm, 558 uvm_gpu_chunk_t *chunk, 559 uvm_chunk_size_t subchunk_size, 560 uvm_gpu_chunk_t **subchunks); 561 562 // Retrieve leaf subchunks under parent. Up to num_subchunks chunks are copied 563 // into the subchunks array in address order, starting with the subchunk at 564 // start_index. start_index can be thought of as the number of leaf subchunks to 565 // skip before beginning the copy. 566 // 567 // parent can be in the ALLOCATED state, in which case parent is the only chunk 568 // which may be copied into the subchunks array. 569 // 570 // num_subchunks may be 0. 571 // 572 // Returns the number of subchunks written to the array. This may be less than 573 // num_subchunks depending on the value of start_index and how many subchunks 574 // are present under parent. 575 size_t uvm_pmm_gpu_get_subchunks(uvm_pmm_gpu_t *pmm, 576 uvm_gpu_chunk_t *parent, 577 size_t start_index, 578 size_t num_subchunks, 579 uvm_gpu_chunk_t **subchunks); 580 581 // Merges a chunk previously split with uvm_pmm_gpu_split_chunk. All of chunk's 582 // leaf children must be allocated. 583 void uvm_pmm_gpu_merge_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk); 584 585 // Waits for all free chunk trackers (removing their completed entries) to complete. 586 // 587 // This inherently races with any chunks being freed to this PMM. The assumption 588 // is that the caller doesn't care about preventing new chunks from being freed, 589 // just that any already-freed chunks will be synced. 590 void uvm_pmm_gpu_sync(uvm_pmm_gpu_t *pmm); 591 592 // Mark an allocated chunk as evicted 593 void uvm_pmm_gpu_mark_chunk_evicted(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk); 594 595 // Initialize indirect peer state so accessing_gpu is ready to create mappings 596 // to pmm's root chunks. 597 // 598 // Locking: The global lock must be held. 599 NV_STATUS uvm_pmm_gpu_indirect_peer_init(uvm_pmm_gpu_t *pmm, uvm_gpu_t *accessing_gpu); 600 601 // Tear down indirect peer state from other_gpu to pmm's GPU. Any existing IOMMU 602 // mappings from other_gpu to this GPU are torn down. 603 // 604 // Locking: The global lock must be held. 605 void uvm_pmm_gpu_indirect_peer_destroy(uvm_pmm_gpu_t *pmm, uvm_gpu_t *other_gpu); 606 607 // Create an IOMMU mapping to allow accessing_gpu to access chunk on pmm's GPU. 608 // chunk can be any size, and can be mapped more than once (the address will not 609 // change). The address can be retrieved using uvm_pmm_gpu_indirect_peer_addr. 610 // 611 // Note that there is no corresponding unmap call. The mappings will be removed 612 // automatically as necessary when the chunk is freed. This allows mappings to 613 // be reused as much as possible. 614 NV_STATUS uvm_pmm_gpu_indirect_peer_map(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_gpu_t *accessing_gpu); 615 616 // Retrieve the system address accessing_gpu must use to access this chunk. 617 // uvm_pmm_gpu_indirect_peer_map must have been called first. 618 NvU64 uvm_pmm_gpu_indirect_peer_addr(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_gpu_t *accessing_gpu); 619 620 // Returns the physical address for use by accessing_gpu of a vidmem allocation 621 // on the peer pmm->gpu. This address can be used for making PTEs on 622 // accessing_gpu, but not for copying between the two GPUs. For that, use 623 // uvm_gpu_peer_copy_address. 624 uvm_gpu_phys_address_t uvm_pmm_gpu_peer_phys_address(uvm_pmm_gpu_t *pmm, 625 uvm_gpu_chunk_t *chunk, 626 uvm_gpu_t *accessing_gpu); 627 628 // Returns the physical or virtual address for use by accessing_gpu to copy to/ 629 // from a vidmem allocation on the peer pmm->gpu. This may be different from 630 // uvm_gpu_peer_phys_address to handle CE limitations in addressing peer 631 // physical memory directly. 632 uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm, 633 uvm_gpu_chunk_t *chunk, 634 uvm_gpu_t *accessing_gpu); 635 636 // Mark a user chunk as used 637 // 638 // If the chunk is pinned or selected for eviction, this won't do anything. The 639 // chunk can be pinned when it's being initially populated by the VA block. 640 // Allow that state to make this API easy to use for the caller. 641 void uvm_pmm_gpu_mark_root_chunk_used(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk); 642 643 // Mark an allocated user chunk as unused 644 void uvm_pmm_gpu_mark_root_chunk_unused(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk); 645 646 static bool uvm_gpu_chunk_same_root(uvm_gpu_chunk_t *chunk1, uvm_gpu_chunk_t *chunk2) 647 { 648 return UVM_ALIGN_DOWN(chunk1->address, UVM_CHUNK_SIZE_MAX) == UVM_ALIGN_DOWN(chunk2->address, UVM_CHUNK_SIZE_MAX); 649 } 650 651 // Finds the first (smallest) size in the chunk_sizes mask 652 static uvm_chunk_size_t uvm_chunk_find_first_size(uvm_chunk_sizes_mask_t chunk_sizes) 653 { 654 UVM_ASSERT(chunk_sizes); 655 return (uvm_chunk_size_t)1 << __ffs(chunk_sizes); 656 } 657 658 // Finds the last (biggest) size in the chunk_sizes mask 659 static uvm_chunk_size_t uvm_chunk_find_last_size(uvm_chunk_sizes_mask_t chunk_sizes) 660 { 661 UVM_ASSERT(chunk_sizes); 662 return (uvm_chunk_size_t)1 << __fls(chunk_sizes); 663 } 664 665 // Finds the smallest size in the chunk_sizes mask which is larger than 666 // chunk_size. If there is no such value returns UVM_CHUNK_SIZE_INVALID. 667 static uvm_chunk_size_t uvm_chunk_find_next_size(uvm_chunk_sizes_mask_t chunk_sizes, uvm_chunk_size_t chunk_size) 668 { 669 UVM_ASSERT(is_power_of_2(chunk_size)); 670 UVM_ASSERT(chunk_sizes & chunk_size); 671 BUILD_BUG_ON(sizeof(chunk_sizes) > sizeof(unsigned long)); 672 return (uvm_chunk_size_t)1 << __ffs((chunk_sizes & ~((chunk_size << 1) - 1)) | UVM_CHUNK_SIZE_INVALID); 673 } 674 675 // Finds the largest size in the chunk_sizes mask which is smaller than 676 // chunk_size. If there is no such value returns UVM_CHUNK_SIZE_INVALID. 677 static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_sizes, uvm_chunk_size_t chunk_size) 678 { 679 UVM_ASSERT(is_power_of_2(chunk_size)); 680 UVM_ASSERT(chunk_sizes & chunk_size); 681 chunk_sizes = chunk_sizes & (chunk_size - 1); 682 if (!chunk_sizes) 683 return UVM_CHUNK_SIZE_INVALID; 684 return (uvm_chunk_size_t)1 << __fls(chunk_sizes); 685 } 686 687 // Obtain the {va_block, virt_addr} information for the chunks in the given 688 // [phys_addr:phys_addr + region_size) range. One entry per chunk is returned. 689 // phys_addr and region_size must be page-aligned. 690 // 691 // Valid translations are written to out_mappings sequentially (there are no 692 // gaps). The caller is required to provide enough entries in out_pages for the 693 // whole region. The function returns the number of entries written to 694 // out_mappings. 695 // 696 // The returned reverse map is a snapshot: it is stale as soon as it is 697 // returned, and the caller is responsible for locking the VA block(s) and 698 // checking that the chunks are still there. Also, the VA block(s) are 699 // retained, and it's up to the caller to release them. 700 NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings); 701 702 // Iterates over every size in the input mask from smallest to largest 703 #define for_each_chunk_size(__size, __chunk_sizes) \ 704 for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) : \ 705 UVM_CHUNK_SIZE_INVALID; \ 706 (__size) != UVM_CHUNK_SIZE_INVALID; \ 707 (__size) = uvm_chunk_find_next_size((__chunk_sizes), (__size))) 708 709 // Iterates over every size in the input mask from largest to smallest 710 #define for_each_chunk_size_rev(__size, __chunk_sizes) \ 711 for ((__size) = (__chunk_sizes) ? uvm_chunk_find_last_size(__chunk_sizes) : \ 712 UVM_CHUNK_SIZE_INVALID; \ 713 (__size) != UVM_CHUNK_SIZE_INVALID; \ 714 (__size) = uvm_chunk_find_prev_size((__chunk_sizes), (__size))) 715 716 // Iterates over every size in the input mask from smallest to largest, starting 717 // from and including __size. __size must be present in the mask. 718 #define for_each_chunk_size_from(__size, __chunk_sizes) \ 719 for (; (__size) != UVM_CHUNK_SIZE_INVALID; \ 720 (__size) = uvm_chunk_find_next_size((__chunk_sizes), (__size))) 721 722 // Iterates over every size in the input mask from largest to smallest, starting 723 // from and including __size. __size must be present in the mask. 724 #define for_each_chunk_size_rev_from(__size, __chunk_sizes) \ 725 for (; (__size) != UVM_CHUNK_SIZE_INVALID; \ 726 (__size) = uvm_chunk_find_prev_size((__chunk_sizes), (__size))) 727 728 #endif 729