1 /*******************************************************************************
2     Copyright (c) 2015-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_PMM_GPU_H__
25 #define __UVM_PMM_GPU_H__
26 
27 //
28 // The Physical Memory Manager (PMM) manages the life cycle of GPU physical
29 // memory.
30 //
31 // The memory is managed in GPU chunks of different sizes (uvm_chunk_size_t) and
32 // users of PMM need to explicitly register the chunk sizes they need to be
33 // supported (see chunk_size_init_func in uvm_pmm_gpu_init()).
34 //
35 // Two memory types (uvm_pmm_gpu_memory_type_t) are supported, one for user and
36 // one for kernel allocations. The user memory type is used only for backing
37 // user data managed by VA blocks and kernel memory type is used for everything
38 // else. The distinction exists to support oversubscription, which requires the
39 // ability to evict already allocated memory from its users on-demand to satisfy
40 // new memory allocations when no more unused memory is available. Eviction is
41 // limited to the user memory type as it's a very complex operation requiring
42 // integration between PMM and other UVM driver modules. The assumption is that
43 // the vast majority of memory should be used for user data as everything else
44 // can be considered overhead and should be minimized. Two flavors of
45 // oversubscription exist: internal oversubscription allowing PMM allocations to
46 // evict other PMM allocations and external oversubscription allowing other PMA
47 // clients to evict memory used by PMM.
48 //
49 // Both allocation and freeing of memory support asynchronous operations where
50 // the allocated/freed GPU memory chunks can have pending GPU operations
51 // returned when allocating memory and passed in when freeing it via trackers.
52 //
53 
54 #include "uvm_forward_decl.h"
55 #include "uvm_lock.h"
56 #include "uvm_processors.h"
57 #include "uvm_tracker.h"
58 #include "uvm_va_block_types.h"
59 #include "uvm_linux.h"
60 #include "uvm_types.h"
61 #include "nv_uvm_types.h"
62 #if UVM_IS_CONFIG_HMM()
63 #include <linux/memremap.h>
64 #endif
65 
66 typedef enum
67 {
68     UVM_CHUNK_SIZE_1       =           1ULL,
69     UVM_CHUNK_SIZE_2       =           2ULL,
70     UVM_CHUNK_SIZE_4       =           4ULL,
71     UVM_CHUNK_SIZE_8       =           8ULL,
72     UVM_CHUNK_SIZE_16      =          16ULL,
73     UVM_CHUNK_SIZE_32      =          32ULL,
74     UVM_CHUNK_SIZE_64      =          64ULL,
75     UVM_CHUNK_SIZE_128     =         128ULL,
76     UVM_CHUNK_SIZE_256     =         256ULL,
77     UVM_CHUNK_SIZE_512     =         512ULL,
78     UVM_CHUNK_SIZE_1K      =        1024ULL,
79     UVM_CHUNK_SIZE_2K      =      2*1024ULL,
80     UVM_CHUNK_SIZE_4K      =      4*1024ULL,
81     UVM_CHUNK_SIZE_8K      =      8*1024ULL,
82     UVM_CHUNK_SIZE_16K     =     16*1024ULL,
83     UVM_CHUNK_SIZE_32K     =     32*1024ULL,
84     UVM_CHUNK_SIZE_64K     =     64*1024ULL,
85     UVM_CHUNK_SIZE_128K    =    128*1024ULL,
86     UVM_CHUNK_SIZE_256K    =    256*1024ULL,
87     UVM_CHUNK_SIZE_512K    =    512*1024ULL,
88     UVM_CHUNK_SIZE_1M      =   1024*1024ULL,
89     UVM_CHUNK_SIZE_2M      = 2*1024*1024ULL,
90     UVM_CHUNK_SIZE_MAX     = UVM_CHUNK_SIZE_2M,
91     UVM_CHUNK_SIZE_INVALID = UVM_CHUNK_SIZE_MAX * 2ULL
92 } uvm_chunk_size_t;
93 
94 #define UVM_CHUNK_SIZES_MASK     (uvm_chunk_sizes_mask_t)(UVM_CHUNK_SIZE_MAX | (UVM_CHUNK_SIZE_MAX-1))
95 
96 typedef enum
97 {
98     // Memory type for backing user pages. On Pascal+ it can be evicted.
99     UVM_PMM_GPU_MEMORY_TYPE_USER,
100     // When the Confidential Computing feature is enabled, the protected flavor
101     // allocates memory out of the VPR region. When it's disabled, all flavors
102     // have no effects and are equivalent to the base type.
103     UVM_PMM_GPU_MEMORY_TYPE_USER_PROTECTED = UVM_PMM_GPU_MEMORY_TYPE_USER,
104     UVM_PMM_GPU_MEMORY_TYPE_USER_UNPROTECTED,
105 
106     // Memory type for internal UVM allocations. It cannot be evicted.
107     UVM_PMM_GPU_MEMORY_TYPE_KERNEL,
108     // See user types for the behavior description when the Confidential
109     // Computing feature is ON or OFF.
110     UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED = UVM_PMM_GPU_MEMORY_TYPE_KERNEL,
111     UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED,
112 
113     // Number of types - MUST BE LAST.
114     UVM_PMM_GPU_MEMORY_TYPE_COUNT
115 } uvm_pmm_gpu_memory_type_t;
116 
117 const char *uvm_pmm_gpu_memory_type_string(uvm_pmm_gpu_memory_type_t type);
118 
119 // Returns true if the given memory type is used to back user pages.
120 bool uvm_pmm_gpu_memory_type_is_user(uvm_pmm_gpu_memory_type_t type);
121 
122 // Returns true if the given memory type is used to back internal UVM
123 // allocations.
124 static bool uvm_pmm_gpu_memory_type_is_kernel(uvm_pmm_gpu_memory_type_t type)
125 {
126     return !uvm_pmm_gpu_memory_type_is_user(type);
127 }
128 
129 typedef enum
130 {
131     // Chunk belongs to PMA. Code outside PMM should not have access to
132     // it and it is likely a bug in UVM code (either in PMM or outside)
133     // if that happens.
134     UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED,
135 
136     // Chunk is on free list. That is it can be reused or returned to PMA
137     // as soon as its tracker is done. Code outside PMM should not have
138     // access to this chunk and it is likely a bug in UVM code (either in
139     // PMM or outside) if that happens.
140     UVM_PMM_GPU_CHUNK_STATE_FREE,
141 
142     // Chunk is split into subchunks.
143     UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
144 
145     // Chunk is temporarily pinned.
146     //
147     // This state is used for user memory chunks that have been allocated, but haven't
148     // been unpinned yet and also internally when a chunk is about to be split.
149     UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED,
150 
151     // Chunk is allocated. That is it is backing some VA block
152     UVM_PMM_GPU_CHUNK_STATE_ALLOCATED,
153 
154     // Number of states - MUST BE LAST
155     UVM_PMM_GPU_CHUNK_STATE_COUNT
156 } uvm_pmm_gpu_chunk_state_t;
157 
158 const char *uvm_pmm_gpu_chunk_state_string(uvm_pmm_gpu_chunk_state_t state);
159 
160 typedef enum
161 {
162     // No flags passed
163     UVM_PMM_ALLOC_FLAGS_NONE,
164 
165     // If there is no free memory, allocation may evict chunks instead of
166     // returning error immediately. Therefore it must not be called under the
167     // VA block lock.
168     UVM_PMM_ALLOC_FLAGS_EVICT = (1 << 0),
169 
170     // Do not use batching in this call if PMA page allocaion is required
171     UVM_PMM_ALLOC_FLAGS_DONT_BATCH = (1 << 1),
172 
173     UVM_PMM_ALLOC_FLAGS_MASK = (1 << 2) - 1
174 } uvm_pmm_alloc_flags_t;
175 
176 
177 typedef enum
178 {
179     // Identifier for lists with zeroed chunks
180     UVM_PMM_LIST_ZERO,
181 
182     // Identifier for lists with non-zeroed chunks
183     UVM_PMM_LIST_NO_ZERO,
184 
185     // Number of states for zeroed/non-zeroed chunk lists - MUST BE LAST
186     UVM_PMM_LIST_ZERO_COUNT
187 } uvm_pmm_list_zero_t;
188 
189 static void uvm_pmm_list_zero_checks(void)
190 {
191     BUILD_BUG_ON(UVM_PMM_LIST_ZERO_COUNT > 2);
192 }
193 
194 // Maximum chunk sizes per type of allocation in single GPU.
195 // The worst case today is Maxwell with 4 allocations sizes for page tables and
196 // 2 page sizes used by uvm_mem_t. Notably one of the allocations for page
197 // tables is 2M which is our common root chunk size.
198 #define UVM_MAX_CHUNK_SIZES 6
199 
200 // This specifies a maximum GAP between 2 allocation levels.
201 #define UVM_PMM_MAX_SUBCHUNKS UVM_CHUNK_SIZE_MAX
202 
203 #define UVM_PMM_CHUNK_SPLIT_CACHE_SIZES (ilog2(UVM_PMM_MAX_SUBCHUNKS) + 1)
204 #define UVM_CHUNK_SIZE_MASK_SIZE (ilog2(UVM_CHUNK_SIZE_MAX) + 1)
205 
206 typedef uvm_chunk_size_t uvm_chunk_sizes_mask_t;
207 
208 typedef struct uvm_pmm_gpu_chunk_suballoc_struct uvm_pmm_gpu_chunk_suballoc_t;
209 
210 #if UVM_IS_CONFIG_HMM()
211 
212 typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;
213 
214 typedef struct
215 {
216     struct dev_pagemap pagemap;
217 } uvm_pmm_gpu_devmem_t;
218 
219 // Return the GPU chunk for a given device private struct page.
220 uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page);
221 
222 // Return the GPU id for a given device private struct page.
223 uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page);
224 
225 // Return the PFN of the device private struct page for the given GPU chunk.
226 unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
227 
228 #endif
229 
230 struct uvm_gpu_chunk_struct
231 {
232     // Physical address of GPU chunk. This may be removed to save memory
233     // if we will be able to get it from reverse map and changed
234     // into smaller index for subchunks.
235     NvU64 address;
236 
237     struct
238     {
239         // We use +1 in the order_base_2 calls appropriately to avoid compiler
240         // warnings due to the bitfields being too narrow for the values of
241         // their types.
242         uvm_pmm_gpu_memory_type_t type : order_base_2(UVM_PMM_GPU_MEMORY_TYPE_COUNT + 1);
243 
244         // The eviction flag is internal and used only for root chunks. It's
245         // set by the eviction path once a chunk is chosen for eviction in
246         // chunk_start_eviction(). Also see the (root_)chunk_is_in_eviction()
247         // helpers.
248         bool in_eviction : 1;
249 
250         bool inject_split_error : 1;
251 
252         // This flag is initalized when allocating a new root chunk from PMA.
253         // It is set to true, if PMA already scrubbed the chunk. The flag is
254         // only valid at allocation time (after uvm_pmm_gpu_alloc call), and
255         // the caller is not required to clear it before freeing the chunk. The
256         // VA block chunk population code can query it to skip zeroing the
257         // chunk.
258         bool is_zero : 1;
259 
260         // This flag indicates an allocated chunk is referenced by a device
261         // private struct page PTE and therefore expects a page_free() callback.
262         bool is_referenced : 1;
263 
264         uvm_pmm_gpu_chunk_state_t state : order_base_2(UVM_PMM_GPU_CHUNK_STATE_COUNT + 1);
265 
266         size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
267 
268         // Start page index within va_block
269         uvm_page_index_t va_block_page_index : order_base_2(PAGES_PER_UVM_VA_BLOCK + 1);
270 
271         // This allows determining what PMM owns the chunk. Users of this field
272         // must only use it if the owning GPU is retained.
273         // TODO: Bug 2008200: Enforce single PMM instance per GPU
274         NvU32 gpu_global_index : order_base_2(UVM_GLOBAL_ID_MAX_PROCESSORS);
275     };
276 
277     // List entry.
278     //
279     // Guaranteed to be a valid list node at all times for simplicity.
280     //
281     // Protected by PMM's list_lock when managed by PMM. Notably the list node
282     // can be used by the allocator of the chunk after alloc and before the
283     // chunk is unpinned or freed.
284     struct list_head list;
285 
286     // The VA block using the chunk, if any.
287     // User chunks that are not backed by a VA block are considered to be
288     // temporarily pinned and cannot be evicted.
289     uvm_va_block_t *va_block;
290 
291     // If this is subchunk it points to the parent - in other words
292     // chunk of bigger size which contains this chunk.
293     uvm_gpu_chunk_t *parent;
294 
295     // Array describing suballocations
296     uvm_pmm_gpu_chunk_suballoc_t *suballoc;
297 };
298 
299 typedef struct uvm_gpu_root_chunk_struct
300 {
301     uvm_gpu_chunk_t chunk;
302 
303     // Pending operations for all GPU chunks under the root chunk.
304     //
305     // Protected by the corresponding root chunk bit lock.
306     uvm_tracker_t tracker;
307 
308     // Indirect peers which have IOMMU mappings to this root chunk. The mapped
309     // addresses are stored in this root chunk's index in
310     // uvm_pmm_gpu_t::root_chunks.indirect_peer[id].dma_addrs.
311     //
312     // Protected by the corresponding root chunk bit lock.
313     //
314     // We can use a regular processor id because indirect peers are not allowed
315     // between partitioned GPUs when SMC is enabled.
316     uvm_processor_mask_t indirect_peers_mapped;
317 } uvm_gpu_root_chunk_t;
318 
319 typedef struct
320 {
321     // Indirect peers are GPUs which can coherently access this GPU's memory,
322     // but are routed through an intermediate processor. Indirect peers access
323     // each others' memory with the SYS aperture rather then a PEER aperture,
324     // meaning they need IOMMU mappings:
325     //
326     // accessing_gpu ==> IOMMU ==> CPU ==> owning_gpu (this GPU)
327     //
328     // This array has one entry per root chunk on this GPU. Each entry
329     // contains the IOMMU address accessing_gpu needs to use in order to
330     // access this GPU's root chunk. The root chunks are mapped as whole
331     // regions both for tracking simplicity and to allow GPUs to map with
332     // large PTEs.
333     //
334     // An array entry is valid iff accessing_gpu's ID is set in the
335     // corresponding root chunk's indirect_peers_mapped mask.
336     //
337     // Management of these addresses would be simpler if they were stored
338     // in the root chunks themselves, but in the common case there are only
339     // a small number of indirect peers in a system. Dynamic array
340     // allocation per indirect peer wastes less memory.
341     NvU64 *dma_addrs;
342 
343     // Number of this GPU's root chunks mapped for each indirect peer.
344     atomic64_t map_count;
345 } uvm_gpu_root_chunk_indirect_peer_t;
346 
347 typedef struct uvm_pmm_gpu_struct
348 {
349     // Sizes of the MMU
350     uvm_chunk_sizes_mask_t chunk_sizes[UVM_PMM_GPU_MEMORY_TYPE_COUNT];
351 
352     // PMA (Physical Memory Allocator) opaque handle
353     void *pma;
354 
355     // PMA statistics used for eviction heuristics
356     const UvmPmaStatistics *pma_stats;
357 
358     struct
359     {
360         // Array of all root chunks indexed by their physical address divided by
361         // UVM_CHUNK_SIZE_MAX.
362         //
363         // This array is pre-allocated during uvm_pmm_gpu_init() for all
364         // possible physical addresses (based on
365         // gpu::vidmem_max_physical_address).
366         size_t count;
367         uvm_gpu_root_chunk_t *array;
368 
369         // Bit locks for the root chunks with 1 bit per each root chunk
370         uvm_bit_locks_t bitlocks;
371 
372         // List of root chunks unused by VA blocks, i.e. allocated, but not
373         // holding any resident pages. These take priority when evicting as no
374         // data needs to be migrated for them to be evicted.
375         //
376         // For simplicity, the list is approximate, tracking unused chunks only
377         // from root chunk sized (2M) VA blocks.
378         //
379         // Updated by the VA block code with
380         // uvm_pmm_gpu_mark_root_chunk_(un)used().
381         struct list_head va_block_unused;
382 
383         // List of root chunks used by VA blocks
384         struct list_head va_block_used;
385 
386         // List of chunks needing to be lazily freed and a queue for processing
387         // the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t
388         // or workqueue.
389         struct list_head va_block_lazy_free;
390         nv_kthread_q_item_t va_block_lazy_free_q_item;
391 
392         uvm_gpu_root_chunk_indirect_peer_t indirect_peer[UVM_ID_MAX_GPUS];
393     } root_chunks;
394 
395 #if UVM_IS_CONFIG_HMM()
396     uvm_pmm_gpu_devmem_t devmem;
397 #endif
398 
399     // Lock protecting PMA allocation, freeing and eviction
400     uvm_rw_semaphore_t pma_lock;
401 
402     // Lock protecting splits, merges and walks of chunks.
403     uvm_mutex_t lock;
404 
405     // Lock protecting lists and chunk's state transitions.
406     uvm_spinlock_t list_lock;
407 
408     // Free chunk lists. There are separate lists for non-zero and zero chunks.
409     struct list_head free_list[UVM_PMM_GPU_MEMORY_TYPE_COUNT][UVM_MAX_CHUNK_SIZES][UVM_PMM_LIST_ZERO_COUNT];
410 
411     // Inject an error after evicting a number of chunks. 0 means no error left
412     // to be injected.
413     NvU32 inject_pma_evict_error_after_num_chunks;
414 
415     // The mask of the initialized chunk sizes
416     DECLARE_BITMAP(chunk_split_cache_initialized, UVM_PMM_CHUNK_SPLIT_CACHE_SIZES);
417 
418     bool initialized;
419 
420     bool pma_address_cache_initialized;
421 } uvm_pmm_gpu_t;
422 
423 // Return containing GPU
424 uvm_gpu_t *uvm_pmm_to_gpu(uvm_pmm_gpu_t *pmm);
425 
426 // Initialize PMM on GPU
427 NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm);
428 
429 // Deinitialize the PMM on GPU
430 void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm);
431 
432 static uvm_chunk_size_t uvm_gpu_chunk_get_size(uvm_gpu_chunk_t *chunk)
433 {
434     return ((uvm_chunk_size_t)1) << chunk->log2_size;
435 }
436 
437 static void uvm_gpu_chunk_set_size(uvm_gpu_chunk_t *chunk, uvm_chunk_size_t size)
438 {
439     chunk->log2_size = ilog2(size);
440 }
441 
442 // Retrieve the GPU associated with the chunk. Users of this helper must only
443 // use it if the owning GPU is retained.
444 uvm_gpu_t *uvm_gpu_chunk_get_gpu(const uvm_gpu_chunk_t *chunk);
445 
446 // Return the first struct page corresponding to the physical address range
447 // of the given chunk.
448 //
449 // Notes:
450 // - The GPU must have NUMA support enabled.
451 // - For chunks smaller than a system page, this function returns the struct
452 // page containing the chunk's starting address.
453 struct page *uvm_gpu_chunk_to_page(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
454 
455 // Allocates num_chunks chunks of size chunk_size in caller-supplied array (chunks).
456 //
457 // Returned chunks are in the TEMP_PINNED state, requiring a call to either
458 // uvm_pmm_gpu_unpin_allocated, uvm_pmm_gpu_unpin_referenced, or
459 // uvm_pmm_gpu_free. If a tracker is passed in, all
460 // the pending operations on the allocated chunks will be added to it
461 // guaranteeing that all the entries come from the same GPU as the PMM.
462 // Otherwise, when tracker is NULL, all the pending operations will be
463 // synchronized before returning to the caller.
464 //
465 // Each of the allocated chunks list nodes (uvm_gpu_chunk_t::list) can be used
466 // by the caller until the chunk is unpinned (uvm_pmm_gpu_unpin_allocated,
467 // uvm_pmm_gpu_unpin_referenced) or freed (uvm_pmm_gpu_free). If used, the list
468 // node has to be returned to a valid state before calling either of the APIs.
469 //
470 // In case of an error, the chunks array is guaranteed to be cleared.
471 //
472 // If the memory returned by the PMM allocator cannot be physically addressed,
473 // the MMU interface provides user chunk mapping and unmapping functions
474 // (uvm_mmu_chunk_map/unmap) that enable virtual addressing.
475 NV_STATUS uvm_pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
476                             size_t num_chunks,
477                             uvm_chunk_size_t chunk_size,
478                             uvm_pmm_gpu_memory_type_t mem_type,
479                             uvm_pmm_alloc_flags_t flags,
480                             uvm_gpu_chunk_t **chunks,
481                             uvm_tracker_t *out_tracker);
482 
483 // Helper for allocating kernel memory
484 //
485 // Internally calls uvm_pmm_gpu_alloc() and sets the state of all chunks to
486 // allocated on success.
487 //
488 // If Confidential Computing is enabled, this helper allocates protected kernel
489 // memory.
490 static NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
491                                           size_t num_chunks,
492                                           uvm_chunk_size_t chunk_size,
493                                           uvm_pmm_alloc_flags_t flags,
494                                           uvm_gpu_chunk_t **chunks,
495                                           uvm_tracker_t *out_tracker)
496 {
497     return uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_KERNEL, flags, chunks, out_tracker);
498 }
499 
500 // Helper for allocating user memory
501 //
502 // Simple wrapper that just uses UVM_PMM_GPU_MEMORY_TYPE_USER for the memory
503 // type.
504 //
505 // If Confidential Computing is enabled, this helper allocates protected user
506 // memory.
507 static NV_STATUS uvm_pmm_gpu_alloc_user(uvm_pmm_gpu_t *pmm,
508                                         size_t num_chunks,
509                                         uvm_chunk_size_t chunk_size,
510                                         uvm_pmm_alloc_flags_t flags,
511                                         uvm_gpu_chunk_t **chunks,
512                                         uvm_tracker_t *out_tracker)
513 {
514     return uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_USER, flags, chunks, out_tracker);
515 }
516 
517 // Unpin a temporarily pinned chunk, set its reverse map to a VA block, and
518 // mark it as allocated.
519 //
520 // Can only be used on user memory.
521 void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
522 
523 // Unpin a temporarily pinned chunk, set its reverse map to a VA block, and
524 // mark it as referenced.
525 //
526 // Can only be used on user memory.
527 void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
528 
529 // Frees the chunk. This also unpins the chunk if it is temporarily pinned.
530 //
531 // The tracker is optional and a NULL tracker indicates that no new operation
532 // has been pushed for the chunk, but the tracker returned as part of
533 // its allocation doesn't have to be completed as PMM will synchronize it
534 // internally if needed. A non-NULL tracker indicates any additional pending
535 // operations on the chunk pushed by the caller that need to be synchronized
536 // before freeing or re-using the chunk.
537 void uvm_pmm_gpu_free(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker);
538 
539 // Splits the input chunk in-place into smaller chunks of subchunk_size. No data
540 // is moved, and the smaller chunks remain allocated.
541 //
542 // If the subchunks array is non-NULL, it will be filled with
543 // (uvm_gpu_chunk_get_size(chunk) / subchunk_size) chunks in address order. The
544 // new chunks must all be freed individually.
545 //
546 // If the subchunks array is NULL, the split chunks can be retrieved later by
547 // passing the original parent chunk to uvm_pmm_gpu_get_subchunks.
548 //
549 // On error, the original chunk remains unmodified.
550 //
551 // The chunk must be in the ALLOCATED state with the owning VA block lock held,
552 // or the TEMP_PINNED state.
553 //
554 // subchunk_size must be a valid chunk size for the given type.
555 //
556 // The chunk can be re-merged if desired using uvm_pmm_gpu_merge_chunk.
557 NV_STATUS uvm_pmm_gpu_split_chunk(uvm_pmm_gpu_t *pmm,
558                                   uvm_gpu_chunk_t *chunk,
559                                   uvm_chunk_size_t subchunk_size,
560                                   uvm_gpu_chunk_t **subchunks);
561 
562 // Retrieve leaf subchunks under parent. Up to num_subchunks chunks are copied
563 // into the subchunks array in address order, starting with the subchunk at
564 // start_index. start_index can be thought of as the number of leaf subchunks to
565 // skip before beginning the copy.
566 //
567 // parent can be in the ALLOCATED state, in which case parent is the only chunk
568 // which may be copied into the subchunks array.
569 //
570 // num_subchunks may be 0.
571 //
572 // Returns the number of subchunks written to the array. This may be less than
573 // num_subchunks depending on the value of start_index and how many subchunks
574 // are present under parent.
575 size_t uvm_pmm_gpu_get_subchunks(uvm_pmm_gpu_t *pmm,
576                                  uvm_gpu_chunk_t *parent,
577                                  size_t start_index,
578                                  size_t num_subchunks,
579                                  uvm_gpu_chunk_t **subchunks);
580 
581 // Merges a chunk previously split with uvm_pmm_gpu_split_chunk. All of chunk's
582 // leaf children must be allocated.
583 void uvm_pmm_gpu_merge_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
584 
585 // Waits for all free chunk trackers (removing their completed entries) to complete.
586 //
587 // This inherently races with any chunks being freed to this PMM. The assumption
588 // is that the caller doesn't care about preventing new chunks from being freed,
589 // just that any already-freed chunks will be synced.
590 void uvm_pmm_gpu_sync(uvm_pmm_gpu_t *pmm);
591 
592 // Mark an allocated chunk as evicted
593 void uvm_pmm_gpu_mark_chunk_evicted(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
594 
595 // Initialize indirect peer state so accessing_gpu is ready to create mappings
596 // to pmm's root chunks.
597 //
598 // Locking: The global lock must be held.
599 NV_STATUS uvm_pmm_gpu_indirect_peer_init(uvm_pmm_gpu_t *pmm, uvm_gpu_t *accessing_gpu);
600 
601 // Tear down indirect peer state from other_gpu to pmm's GPU. Any existing IOMMU
602 // mappings from other_gpu to this GPU are torn down.
603 //
604 // Locking: The global lock must be held.
605 void uvm_pmm_gpu_indirect_peer_destroy(uvm_pmm_gpu_t *pmm, uvm_gpu_t *other_gpu);
606 
607 // Create an IOMMU mapping to allow accessing_gpu to access chunk on pmm's GPU.
608 // chunk can be any size, and can be mapped more than once (the address will not
609 // change). The address can be retrieved using uvm_pmm_gpu_indirect_peer_addr.
610 //
611 // Note that there is no corresponding unmap call. The mappings will be removed
612 // automatically as necessary when the chunk is freed. This allows mappings to
613 // be reused as much as possible.
614 NV_STATUS uvm_pmm_gpu_indirect_peer_map(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_gpu_t *accessing_gpu);
615 
616 // Retrieve the system address accessing_gpu must use to access this chunk.
617 // uvm_pmm_gpu_indirect_peer_map must have been called first.
618 NvU64 uvm_pmm_gpu_indirect_peer_addr(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_gpu_t *accessing_gpu);
619 
620 // Returns the physical address for use by accessing_gpu of a vidmem allocation
621 // on the peer pmm->gpu. This address can be used for making PTEs on
622 // accessing_gpu, but not for copying between the two GPUs. For that, use
623 // uvm_gpu_peer_copy_address.
624 uvm_gpu_phys_address_t uvm_pmm_gpu_peer_phys_address(uvm_pmm_gpu_t *pmm,
625                                                      uvm_gpu_chunk_t *chunk,
626                                                      uvm_gpu_t *accessing_gpu);
627 
628 // Returns the physical or virtual address for use by accessing_gpu to copy to/
629 // from a vidmem allocation on the peer pmm->gpu. This may be different from
630 // uvm_gpu_peer_phys_address to handle CE limitations in addressing peer
631 // physical memory directly.
632 uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm,
633                                                 uvm_gpu_chunk_t *chunk,
634                                                 uvm_gpu_t *accessing_gpu);
635 
636 // Mark a user chunk as used
637 //
638 // If the chunk is pinned or selected for eviction, this won't do anything. The
639 // chunk can be pinned when it's being initially populated by the VA block.
640 // Allow that state to make this API easy to use for the caller.
641 void uvm_pmm_gpu_mark_root_chunk_used(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
642 
643 // Mark an allocated user chunk as unused
644 void uvm_pmm_gpu_mark_root_chunk_unused(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
645 
646 static bool uvm_gpu_chunk_same_root(uvm_gpu_chunk_t *chunk1, uvm_gpu_chunk_t *chunk2)
647 {
648     return UVM_ALIGN_DOWN(chunk1->address, UVM_CHUNK_SIZE_MAX) == UVM_ALIGN_DOWN(chunk2->address, UVM_CHUNK_SIZE_MAX);
649 }
650 
651 // Finds the first (smallest) size in the chunk_sizes mask
652 static uvm_chunk_size_t uvm_chunk_find_first_size(uvm_chunk_sizes_mask_t chunk_sizes)
653 {
654     UVM_ASSERT(chunk_sizes);
655     return (uvm_chunk_size_t)1 << __ffs(chunk_sizes);
656 }
657 
658 // Finds the last (biggest) size in the chunk_sizes mask
659 static uvm_chunk_size_t uvm_chunk_find_last_size(uvm_chunk_sizes_mask_t chunk_sizes)
660 {
661     UVM_ASSERT(chunk_sizes);
662     return (uvm_chunk_size_t)1 << __fls(chunk_sizes);
663 }
664 
665 // Finds the smallest size in the chunk_sizes mask which is larger than
666 // chunk_size. If there is no such value returns UVM_CHUNK_SIZE_INVALID.
667 static uvm_chunk_size_t uvm_chunk_find_next_size(uvm_chunk_sizes_mask_t chunk_sizes, uvm_chunk_size_t chunk_size)
668 {
669     UVM_ASSERT(is_power_of_2(chunk_size));
670     UVM_ASSERT(chunk_sizes & chunk_size);
671     BUILD_BUG_ON(sizeof(chunk_sizes) > sizeof(unsigned long));
672     return (uvm_chunk_size_t)1 << __ffs((chunk_sizes & ~((chunk_size << 1) - 1)) | UVM_CHUNK_SIZE_INVALID);
673 }
674 
675 // Finds the largest size in the chunk_sizes mask which is smaller than
676 // chunk_size. If there is no such value returns UVM_CHUNK_SIZE_INVALID.
677 static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_sizes, uvm_chunk_size_t chunk_size)
678 {
679     UVM_ASSERT(is_power_of_2(chunk_size));
680     UVM_ASSERT(chunk_sizes & chunk_size);
681     chunk_sizes = chunk_sizes & (chunk_size - 1);
682     if (!chunk_sizes)
683         return UVM_CHUNK_SIZE_INVALID;
684     return (uvm_chunk_size_t)1 << __fls(chunk_sizes);
685 }
686 
687 // Obtain the {va_block, virt_addr} information for the chunks in the given
688 // [phys_addr:phys_addr + region_size) range. One entry per chunk is returned.
689 // phys_addr and region_size must be page-aligned.
690 //
691 // Valid translations are written to out_mappings sequentially (there are no
692 // gaps). The caller is required to provide enough entries in out_pages for the
693 // whole region. The function returns the number of entries written to
694 // out_mappings.
695 //
696 // The returned reverse map is a snapshot: it is stale as soon as it is
697 // returned, and the caller is responsible for locking the VA block(s) and
698 // checking that the chunks are still there. Also, the VA block(s) are
699 // retained, and it's up to the caller to release them.
700 NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings);
701 
702 // Iterates over every size in the input mask from smallest to largest
703 #define for_each_chunk_size(__size, __chunk_sizes)                                  \
704     for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) :    \
705                                       UVM_CHUNK_SIZE_INVALID;                       \
706          (__size) != UVM_CHUNK_SIZE_INVALID;                                        \
707          (__size) = uvm_chunk_find_next_size((__chunk_sizes), (__size)))
708 
709 // Iterates over every size in the input mask from largest to smallest
710 #define for_each_chunk_size_rev(__size, __chunk_sizes)                          \
711     for ((__size) = (__chunk_sizes) ? uvm_chunk_find_last_size(__chunk_sizes) : \
712                                       UVM_CHUNK_SIZE_INVALID;                   \
713          (__size) != UVM_CHUNK_SIZE_INVALID;                                    \
714          (__size) = uvm_chunk_find_prev_size((__chunk_sizes), (__size)))
715 
716 // Iterates over every size in the input mask from smallest to largest, starting
717 // from and including __size. __size must be present in the mask.
718 #define for_each_chunk_size_from(__size, __chunk_sizes)                 \
719     for (; (__size) != UVM_CHUNK_SIZE_INVALID;                          \
720          (__size) = uvm_chunk_find_next_size((__chunk_sizes), (__size)))
721 
722 // Iterates over every size in the input mask from largest to smallest, starting
723 // from and including __size. __size must be present in the mask.
724 #define for_each_chunk_size_rev_from(__size, __chunk_sizes)             \
725     for (; (__size) != UVM_CHUNK_SIZE_INVALID;                          \
726          (__size) = uvm_chunk_find_prev_size((__chunk_sizes), (__size)))
727 
728 #endif
729