11739a20eSAndy Ritger /*******************************************************************************
21739a20eSAndy Ritger     Copyright (c) 2015-2022 NVIDIA Corporation
31739a20eSAndy Ritger 
41739a20eSAndy Ritger     Permission is hereby granted, free of charge, to any person obtaining a copy
51739a20eSAndy Ritger     of this software and associated documentation files (the "Software"), to
61739a20eSAndy Ritger     deal in the Software without restriction, including without limitation the
71739a20eSAndy Ritger     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
81739a20eSAndy Ritger     sell copies of the Software, and to permit persons to whom the Software is
91739a20eSAndy Ritger     furnished to do so, subject to the following conditions:
101739a20eSAndy Ritger 
111739a20eSAndy Ritger         The above copyright notice and this permission notice shall be
121739a20eSAndy Ritger         included in all copies or substantial portions of the Software.
131739a20eSAndy Ritger 
141739a20eSAndy Ritger     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
151739a20eSAndy Ritger     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
161739a20eSAndy Ritger     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
171739a20eSAndy Ritger     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
181739a20eSAndy Ritger     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
191739a20eSAndy Ritger     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
201739a20eSAndy Ritger     DEALINGS IN THE SOFTWARE.
211739a20eSAndy Ritger 
221739a20eSAndy Ritger *******************************************************************************/
231739a20eSAndy Ritger 
241739a20eSAndy Ritger #ifndef __UVM_VA_RANGE_H__
251739a20eSAndy Ritger #define __UVM_VA_RANGE_H__
261739a20eSAndy Ritger 
271739a20eSAndy Ritger #include "uvm_linux.h"
281739a20eSAndy Ritger #include "nv-kref.h"
291739a20eSAndy Ritger #include "uvm_common.h"
301739a20eSAndy Ritger #include "uvm_perf_module.h"
311739a20eSAndy Ritger #include "uvm_processors.h"
321739a20eSAndy Ritger #include "uvm_gpu.h"
331739a20eSAndy Ritger #include "uvm_lock.h"
341739a20eSAndy Ritger #include "uvm_va_space.h"
351739a20eSAndy Ritger #include "uvm_range_tree.h"
361739a20eSAndy Ritger #include "uvm_va_policy.h"
371739a20eSAndy Ritger #include "uvm_test_ioctl.h"
381739a20eSAndy Ritger #include "uvm_range_group.h"
391739a20eSAndy Ritger #include "uvm_forward_decl.h"
401739a20eSAndy Ritger #include "uvm_mmu.h"
411739a20eSAndy Ritger #include "uvm_hal_types.h"
421739a20eSAndy Ritger #include "uvm_mem.h"
431739a20eSAndy Ritger #include "uvm_tracker.h"
441739a20eSAndy Ritger #include "uvm_ioctl.h"
451739a20eSAndy Ritger 
461739a20eSAndy Ritger // VA Ranges are the UVM driver equivalent of Linux kernel vmas. They represent
471739a20eSAndy Ritger // user allocations of any page-aligned size. We maintain these as a separate
481739a20eSAndy Ritger // data structure from the vma tree for several reasons:
491739a20eSAndy Ritger //
501739a20eSAndy Ritger // 1) RM allocations mapped to the GPU by UVM don't have associated UVM vmas
511739a20eSAndy Ritger //
521739a20eSAndy Ritger // 2) We don't always have a separate reference on the vma's mm_struct, so we
531739a20eSAndy Ritger //    can't always lock mmap_lock on paths where current->mm != vma->vm_mm.
541739a20eSAndy Ritger //
551739a20eSAndy Ritger // 3) HMM vmas aren't ours, so we can't use their vm_private_data pointers.
561739a20eSAndy Ritger //
571739a20eSAndy Ritger // The tree as a whole is protected by va_space->lock. Faults and mappings only
581739a20eSAndy Ritger // need to take the lock in read mode.
591739a20eSAndy Ritger // Modification of the range state (such as changes to logical permissions or
601739a20eSAndy Ritger // location preferences) must take the lock in write mode.
611739a20eSAndy Ritger //
621739a20eSAndy Ritger // VA ranges with type == UVM_VA_RANGE_TYPE_MANAGED:
631739a20eSAndy Ritger //     Each va_range is contained completely within a parent vma. There can be
641739a20eSAndy Ritger //     multiple va_ranges under the same vma, but not vice versa. All VAs within
651739a20eSAndy Ritger //     the va_range share the same policy state.
661739a20eSAndy Ritger //
671739a20eSAndy Ritger //     Each va_range is a collection of VA blocks. The VA blocks each have
681739a20eSAndy Ritger //     individual locks, and they hold the current mapping and location state
691739a20eSAndy Ritger //     for their block across all processors (CPU and all GPUs).
701739a20eSAndy Ritger //
711739a20eSAndy Ritger // VA ranges with type == UVM_VA_RANGE_TYPE_EXTERNAL:
721739a20eSAndy Ritger //     These ranges track physical allocations made by RM. The UVM driver is
731739a20eSAndy Ritger //     responsible for mapping them to the GPU(s), but not to the CPU. These
741739a20eSAndy Ritger //     ranges do not support faulting nor migration, and they do not necessarily
751739a20eSAndy Ritger //     correspond to valid vmas.
761739a20eSAndy Ritger //
771739a20eSAndy Ritger //     These ranges do not have blocks. All state (page tables, mapping handles,
781739a20eSAndy Ritger //     etc) is maintained within the range.
791739a20eSAndy Ritger //
801739a20eSAndy Ritger // VA ranges with type == UVM_VA_RANGE_TYPE_CHANNEL:
811739a20eSAndy Ritger //     These are similar to EXTERNAL ranges, except they represent internal
821739a20eSAndy Ritger //     allocations required for user channels to operate (context save areas,
831739a20eSAndy Ritger //     for example).
841739a20eSAndy Ritger //
851739a20eSAndy Ritger // VA ranges with type == UVM_VA_RANGE_TYPE_SKED_REFLECTED:
861739a20eSAndy Ritger //     These ranges track special SKED reflected mappings required for CNP. The
871739a20eSAndy Ritger //     mappings don't have any physical backing. They just use PTEs with a
881739a20eSAndy Ritger //     special kind, see make_sked_reflected_pte_pascal() for an example of the
891739a20eSAndy Ritger //     PTE encoding.
901739a20eSAndy Ritger //     Notably the API that creates these ranges calls them "dynamic parallelism
911739a20eSAndy Ritger //     regions", but we use "SKED reflected ranges" internally as it's more
921739a20eSAndy Ritger //     descriptive.
931739a20eSAndy Ritger //
941739a20eSAndy Ritger // VA ranges with type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
951739a20eSAndy Ritger //     These ranges track semaphore pool allocations. They are backed by sysmem,
961739a20eSAndy Ritger //     and persistently mapped on the CPU and all GPUs (with registered VA
971739a20eSAndy Ritger //     spaces) in a user VA space. The ranges are also mapped on UVM internal VA
981739a20eSAndy Ritger //     space on the CPU and all registered GPUs.
991739a20eSAndy Ritger //
1001739a20eSAndy Ritger //     These ranges do not have blocks.
1011739a20eSAndy Ritger //
1021739a20eSAndy Ritger 
1031739a20eSAndy Ritger // This enum must be kept in sync with UVM_TEST_VA_RANGE_TYPE in
1041739a20eSAndy Ritger // uvm_test_ioctl.h
1051739a20eSAndy Ritger typedef enum
1061739a20eSAndy Ritger {
1071739a20eSAndy Ritger     UVM_VA_RANGE_TYPE_INVALID = 0,
1081739a20eSAndy Ritger     UVM_VA_RANGE_TYPE_MANAGED,
1091739a20eSAndy Ritger     UVM_VA_RANGE_TYPE_EXTERNAL,
1101739a20eSAndy Ritger     UVM_VA_RANGE_TYPE_CHANNEL,
1111739a20eSAndy Ritger     UVM_VA_RANGE_TYPE_SKED_REFLECTED,
1121739a20eSAndy Ritger     UVM_VA_RANGE_TYPE_SEMAPHORE_POOL,
1131739a20eSAndy Ritger     UVM_VA_RANGE_TYPE_MAX
1141739a20eSAndy Ritger } uvm_va_range_type_t;
1151739a20eSAndy Ritger 
1161739a20eSAndy Ritger // Wrapper to protect access to VMA's vm_page_prot
1171739a20eSAndy Ritger typedef struct
1181739a20eSAndy Ritger {
1191739a20eSAndy Ritger     // Needed for creating CPU mappings on the va_range. Do not access this
1201739a20eSAndy Ritger     // directly, instead use uvm_va_range_vma and friends.
1211739a20eSAndy Ritger     struct vm_area_struct *vma;
1221739a20eSAndy Ritger 
1231739a20eSAndy Ritger     uvm_rw_semaphore_t lock;
1241739a20eSAndy Ritger } uvm_vma_wrapper_t;
1251739a20eSAndy Ritger 
1261739a20eSAndy Ritger // TODO: Bug 1733295. VA range types should really be inverted. Instead of
1271739a20eSAndy Ritger //       maintaining common node state with a union of structs, we should have
1281739a20eSAndy Ritger //       separate C types for each VA range type. Each type would embed a common
1291739a20eSAndy Ritger //       VA range node.
1301739a20eSAndy Ritger //
1311739a20eSAndy Ritger //       There's a lot of state in the top-level uvm_va_range_t struct below
1321739a20eSAndy Ritger //       which really belongs in the per-type structs (for example, blocks).
1331739a20eSAndy Ritger //       We're deferring that cleanup to the full refactor.
1341739a20eSAndy Ritger 
1351739a20eSAndy Ritger // va_range state when va_range.type == UVM_VA_RANGE_TYPE_MANAGED
1361739a20eSAndy Ritger typedef struct
1371739a20eSAndy Ritger {
1381739a20eSAndy Ritger     // This is null in the case of a zombie allocation. Zombie allocations are
1391739a20eSAndy Ritger     // created from unfreed allocations at termination of a process which used
1401739a20eSAndy Ritger     // UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE, when at least one other
1411739a20eSAndy Ritger     // process is sharing the UVM file descriptor.
1421739a20eSAndy Ritger     uvm_vma_wrapper_t *vma_wrapper;
1431739a20eSAndy Ritger 
144758b4ee8SAndy Ritger     // Managed allocations only use this policy and never use the policy
1451739a20eSAndy Ritger     // stored in the va_block for HMM allocations.
1461739a20eSAndy Ritger     uvm_va_policy_t policy;
1471739a20eSAndy Ritger 
1481739a20eSAndy Ritger     uvm_perf_module_data_desc_t perf_modules_data[UVM_PERF_MODULE_TYPE_COUNT];
1491739a20eSAndy Ritger } uvm_va_range_managed_t;
1501739a20eSAndy Ritger 
1511739a20eSAndy Ritger typedef struct
1521739a20eSAndy Ritger {
1531739a20eSAndy Ritger     // GPU mapping the allocation. The GPU's RM address space is required when
1541739a20eSAndy Ritger     // releasing the handle.
1551739a20eSAndy Ritger     uvm_gpu_t *gpu;
1561739a20eSAndy Ritger 
1571739a20eSAndy Ritger     // RM handle to the physical allocation. This handle is dup'd into our client
1581739a20eSAndy Ritger     // once - on initial mapping of the external allocation. If the allocation is
1591739a20eSAndy Ritger     // ever split, its ref_count is incremented. The allocation is not released
1601739a20eSAndy Ritger     // until the ref_count drops to 0.
1611739a20eSAndy Ritger     NvHandle rm_handle;
1621739a20eSAndy Ritger 
1631739a20eSAndy Ritger     // Refcount for this handle/allocation. The refcount is used when external
1641739a20eSAndy Ritger     // ranges are split, resulting in two ranges using the same physical allocation.
1651739a20eSAndy Ritger     nv_kref_t ref_count;
1661739a20eSAndy Ritger } uvm_ext_gpu_mem_handle;
1671739a20eSAndy Ritger 
1681739a20eSAndy Ritger typedef struct
1691739a20eSAndy Ritger {
1701739a20eSAndy Ritger     uvm_range_tree_node_t node;
1711739a20eSAndy Ritger 
1721739a20eSAndy Ritger     // Handle to the physical user allocation dup'd into our client. This
1731739a20eSAndy Ritger     // prevents the allocation from being removed until we free it, even if the
1741739a20eSAndy Ritger     // user frees their handle without telling us.
1751739a20eSAndy Ritger     // This will be NULL for sparse mappings, which don't correspond to actual
1761739a20eSAndy Ritger     // allocations.
1771739a20eSAndy Ritger     uvm_ext_gpu_mem_handle *mem_handle;
1781739a20eSAndy Ritger 
1791739a20eSAndy Ritger     // Tracks completion of PTE writes on pt_range_vec. The tree lock
1801739a20eSAndy Ritger     // protecting this ext_gpu_map may be dropped before those writes are
1811739a20eSAndy Ritger     // complete, so subsequent operations on this ext_gpu_map must acquire this
1821739a20eSAndy Ritger     // tracker before operating on pt_range_vec.
1831739a20eSAndy Ritger     uvm_tracker_t tracker;
1841739a20eSAndy Ritger 
1851739a20eSAndy Ritger     // GPU on which this allocation is mapped.
1861739a20eSAndy Ritger     uvm_gpu_t *gpu;
1871739a20eSAndy Ritger 
1881739a20eSAndy Ritger     // GPU which owns the allocation. For sysmem, this is the GPU that the
1891739a20eSAndy Ritger     // sysmem was originally allocated under. For the allocation to remain valid
1901739a20eSAndy Ritger     // we need to prevent the GPU from going away, similarly to P2P mapped
1911739a20eSAndy Ritger     // memory.
192b5bf85a8SAndy Ritger     // Similarly for EGM memory.
1931739a20eSAndy Ritger     //
1941739a20eSAndy Ritger     // This field is not used for sparse mappings as they don't have an
1951739a20eSAndy Ritger     // allocation and, hence, owning GPU.
1961739a20eSAndy Ritger     //
1971739a20eSAndy Ritger     // TODO: Bug 1811006: The semantics of sysmem might change depending on the
1981739a20eSAndy Ritger     // resolution of this bug.
1991739a20eSAndy Ritger     //
2001739a20eSAndy Ritger     // TODO: Bug 1757136: For SLI, this is any GPU in the SLI group. We may need
2011739a20eSAndy Ritger     //       to handle that specially.
2021739a20eSAndy Ritger     uvm_gpu_t *owning_gpu;
2031739a20eSAndy Ritger 
2041739a20eSAndy Ritger     // We need to know whether this memory is actually located on owning_gpu so
2051739a20eSAndy Ritger     // we know what type of membar is needed at TLB invalidate time, and to know
2061739a20eSAndy Ritger     // if the mapping GPU has to be unmapped on UvmDisablePeerAccess.
2071739a20eSAndy Ritger     //
2081739a20eSAndy Ritger     // This field is not used for sparse mappings as they don't have physical
2091739a20eSAndy Ritger     // backing.
2101739a20eSAndy Ritger     bool is_sysmem;
2111739a20eSAndy Ritger 
212b5bf85a8SAndy Ritger     // EGM memory. If true is_sysmem also has to be true and owning_gpu
213b5bf85a8SAndy Ritger     // has to be valid.
214b5bf85a8SAndy Ritger     bool is_egm;
2151739a20eSAndy Ritger     // GPU page tables mapping the allocation
2161739a20eSAndy Ritger     uvm_page_table_range_vec_t pt_range_vec;
2171739a20eSAndy Ritger 
2181739a20eSAndy Ritger     // Node for the deferred free list where this allocation is stored upon
2191739a20eSAndy Ritger     // unmapped.
2201739a20eSAndy Ritger     //
2211739a20eSAndy Ritger     // This field is unused for sparse mappings. Since they don't have physical
2221739a20eSAndy Ritger     // backing there is no RM object to be freed when the mapping is unmapped.
2231739a20eSAndy Ritger     uvm_deferred_free_object_t deferred_free;
2241739a20eSAndy Ritger } uvm_ext_gpu_map_t;
2251739a20eSAndy Ritger 
2261739a20eSAndy Ritger typedef struct
2271739a20eSAndy Ritger {
2281739a20eSAndy Ritger     // Lock protecting the range tree.
2291739a20eSAndy Ritger     uvm_mutex_t lock;
2301739a20eSAndy Ritger 
2311739a20eSAndy Ritger     // Range tree that contains all of the mapped portions of an External VA
2321739a20eSAndy Ritger     // range. The tree holds uvm_ext_gpu_map_t instances.
2331739a20eSAndy Ritger     uvm_range_tree_t tree;
2341739a20eSAndy Ritger } uvm_ext_gpu_range_tree_t;
2351739a20eSAndy Ritger 
2361739a20eSAndy Ritger typedef struct
2371739a20eSAndy Ritger {
2381739a20eSAndy Ritger     // Mask of GPUs which have mappings to this VA range. If a bit in this mask
2391739a20eSAndy Ritger     // is set, the corresponding pointer in gpu_ranges is valid.
2401739a20eSAndy Ritger     // The bitmap can be safely accessed by following the locking rules:
2411739a20eSAndy Ritger     //   * If the VA space lock is held for write, the mask can be read or written
2421739a20eSAndy Ritger     //     normally.
2431739a20eSAndy Ritger     //   * If the VA space lock is held for read, and one of the range tree locks is
2441739a20eSAndy Ritger     //     held, only the bit corresponding to that GPU range tree can be accessed.
2451739a20eSAndy Ritger     //     Writes must use uvm_processor_mask_set_atomic and
2461739a20eSAndy Ritger     //     uvm_processor_mask_clear_atomic to avoid clobbering other bits in the
2471739a20eSAndy Ritger     //     mask. If no range tree lock is held, the mask cannot be accessed.
2481739a20eSAndy Ritger     //   * If the VA space lock is not held, the mask cannot be accessed
2491739a20eSAndy Ritger     uvm_processor_mask_t mapped_gpus;
2501739a20eSAndy Ritger 
2511739a20eSAndy Ritger     // Per-GPU tree of mapped external allocations. This has to be per-GPU in the VA
2521739a20eSAndy Ritger     // range because each GPU is able to map a completely different set of
2531739a20eSAndy Ritger     // allocations to the same VA range.
2541739a20eSAndy Ritger     uvm_ext_gpu_range_tree_t gpu_ranges[UVM_ID_MAX_GPUS];
255*3bf16b89SBernhard Stoeckner 
256*3bf16b89SBernhard Stoeckner     // Dynamically allocated page mask allocated in
257*3bf16b89SBernhard Stoeckner     // uvm_va_range_create_external() and used and freed in uvm_free().
258*3bf16b89SBernhard Stoeckner     uvm_processor_mask_t *retained_mask;
2591739a20eSAndy Ritger } uvm_va_range_external_t;
2601739a20eSAndy Ritger 
2611739a20eSAndy Ritger // va_range state when va_range.type == UVM_VA_RANGE_TYPE_CHANNEL. This
2621739a20eSAndy Ritger // represents a channel buffer resource and mapping.
2631739a20eSAndy Ritger typedef struct
2641739a20eSAndy Ritger {
2651739a20eSAndy Ritger     // Only a single GPU can map a channel resource, so we only need one GPU
2661739a20eSAndy Ritger     // VA space parent.
2671739a20eSAndy Ritger     uvm_gpu_va_space_t *gpu_va_space;
2681739a20eSAndy Ritger 
2691739a20eSAndy Ritger     // Page tables mapped by this range
2701739a20eSAndy Ritger     uvm_page_table_range_vec_t pt_range_vec;
2711739a20eSAndy Ritger 
2721739a20eSAndy Ritger     // Physical location of this channel resource. All pages have the same
2731739a20eSAndy Ritger     // aperture.
2741739a20eSAndy Ritger     uvm_aperture_t aperture;
2751739a20eSAndy Ritger 
2761739a20eSAndy Ritger     // Note that this is not a normal RM object handle. It is a non-zero opaque
2771739a20eSAndy Ritger     // identifier underneath the GPU VA space which represents this channel
2781739a20eSAndy Ritger     // resource. Each channel using this VA range has retained this descriptor
2791739a20eSAndy Ritger     // and is responsible for releasing it. That's safe because channels outlive
2801739a20eSAndy Ritger     // their VA ranges.
2811739a20eSAndy Ritger     NvP64 rm_descriptor;
2821739a20eSAndy Ritger 
2831739a20eSAndy Ritger     // This is an ID assigned by RM to each resource descriptor.
2841739a20eSAndy Ritger     NvU32 rm_id;
2851739a20eSAndy Ritger 
2861739a20eSAndy Ritger     // The TSG which owns this mapping. Sharing of VA ranges is only allowed
2871739a20eSAndy Ritger     // within the same TSG. If valid == false, no sharing is allowed because the
2881739a20eSAndy Ritger     // channel is not in a TSG.
2891739a20eSAndy Ritger     struct
2901739a20eSAndy Ritger     {
2911739a20eSAndy Ritger         bool valid;
2921739a20eSAndy Ritger         NvU32 id;
2931739a20eSAndy Ritger     } tsg;
2941739a20eSAndy Ritger 
2951739a20eSAndy Ritger     NvU64 ref_count;
2961739a20eSAndy Ritger 
2971739a20eSAndy Ritger     // Storage in the corresponding uvm_gpu_va_space's channel_va_ranges list
2981739a20eSAndy Ritger     struct list_head list_node;
2991739a20eSAndy Ritger } uvm_va_range_channel_t;
3001739a20eSAndy Ritger 
3011739a20eSAndy Ritger // va_range state when va_range.type == UVM_VA_RANGE_TYPE_SKED_REFLECTED. This
3021739a20eSAndy Ritger // represents a sked reflected mapping.
3031739a20eSAndy Ritger typedef struct
3041739a20eSAndy Ritger {
3051739a20eSAndy Ritger     // Each SKED reflected range is unique to a single GPU so only a single GPU
3061739a20eSAndy Ritger     // VA space needs to be tracked.
3071739a20eSAndy Ritger     uvm_gpu_va_space_t *gpu_va_space;
3081739a20eSAndy Ritger 
3091739a20eSAndy Ritger     // Page tables mapped by this range
3101739a20eSAndy Ritger     uvm_page_table_range_vec_t pt_range_vec;
3111739a20eSAndy Ritger } uvm_va_range_sked_reflected_t;
3121739a20eSAndy Ritger 
3131739a20eSAndy Ritger typedef struct
3141739a20eSAndy Ritger {
3151739a20eSAndy Ritger     uvm_mem_t *mem;
3161739a20eSAndy Ritger 
3171739a20eSAndy Ritger     // The optional owner is a GPU (at most one) that has the allocation cached -
3181739a20eSAndy Ritger     // in this case, all writes must be done from this GPU.
3191739a20eSAndy Ritger     // protected by va_space lock
3201739a20eSAndy Ritger     uvm_gpu_t *owner;
3211739a20eSAndy Ritger 
3221739a20eSAndy Ritger     // Per-gpu attributes
3231739a20eSAndy Ritger     uvm_mem_gpu_mapping_attrs_t gpu_attrs[UVM_ID_MAX_GPUS];
3241739a20eSAndy Ritger 
3251739a20eSAndy Ritger     // Default attributes to assign when a new GPU is registered
3261739a20eSAndy Ritger     uvm_mem_gpu_mapping_attrs_t default_gpu_attrs;
3271739a20eSAndy Ritger 
3281739a20eSAndy Ritger     // Tracks all outstanding GPU work using this allocation.
3291739a20eSAndy Ritger     uvm_tracker_t tracker;
3301739a20eSAndy Ritger     uvm_mutex_t tracker_lock;
3311739a20eSAndy Ritger } uvm_va_range_semaphore_pool_t;
3321739a20eSAndy Ritger 
3331739a20eSAndy Ritger struct uvm_va_range_struct
3341739a20eSAndy Ritger {
3351739a20eSAndy Ritger     // Parent uvm_va_space.
3361739a20eSAndy Ritger     uvm_va_space_t *va_space;
3371739a20eSAndy Ritger 
3381739a20eSAndy Ritger     // Storage in VA range tree. Also contains range start and end.
3391739a20eSAndy Ritger     // start and end + 1 have to be PAGE_SIZED aligned.
3401739a20eSAndy Ritger     uvm_range_tree_node_t node;
3411739a20eSAndy Ritger 
3421739a20eSAndy Ritger     // Force the next split on this range to fail. Set by error injection ioctl
3431739a20eSAndy Ritger     // (testing purposes only).
3441739a20eSAndy Ritger     bool inject_split_error;
3451739a20eSAndy Ritger 
3461739a20eSAndy Ritger     // Force the next register_gpu_va_space to fail while adding this va_range.
3471739a20eSAndy Ritger     // Set by error injection ioctl (testing purposes only).
3481739a20eSAndy Ritger     bool inject_add_gpu_va_space_error;
3491739a20eSAndy Ritger 
3501739a20eSAndy Ritger     // Mask of UVM-Lite GPUs for the VA range
3511739a20eSAndy Ritger     //
3521739a20eSAndy Ritger     // If the preferred location is set to a non-faultable GPU or the CPU,
3531739a20eSAndy Ritger     // this mask contains all non-faultable GPUs that are in the accessed by
3541739a20eSAndy Ritger     // mask and the preferred location itself if it's a GPU. Empty otherwise.
3551739a20eSAndy Ritger     //
3561739a20eSAndy Ritger     // All UVM-Lite GPUs have mappings only to the preferred location. The
3571739a20eSAndy Ritger     // mappings are initially established only when the pages are resident on
3581739a20eSAndy Ritger     // the preferred location, but persist after that until the preferred
3591739a20eSAndy Ritger     // location is changed or a GPU stops being a UVM-Lite GPU.
3601739a20eSAndy Ritger     uvm_processor_mask_t uvm_lite_gpus;
3611739a20eSAndy Ritger 
3621739a20eSAndy Ritger     // This is a uvm_va_block_t ** array of all VA block pointers under this
3631739a20eSAndy Ritger     // range. The pointers can be accessed using the functions
3641739a20eSAndy Ritger     // uvm_va_range_block() and uvm_va_range_block_create(). The latter
3651739a20eSAndy Ritger     // allocates the block if it doesn't already exist. Once allocated, the
3661739a20eSAndy Ritger     // blocks persist in the array until the parent VA range is destroyed.
3671739a20eSAndy Ritger     //
3681739a20eSAndy Ritger     // Concurrent on-demand allocation requires the use of either atomics or a
3691739a20eSAndy Ritger     // spin lock. Given that we don't want to take a spin lock for every lookup,
3701739a20eSAndy Ritger     // and that the blocks are persistent, atomics are preferred.
3711739a20eSAndy Ritger     //
3721739a20eSAndy Ritger     // The number of blocks is calculated from the range size using
3731739a20eSAndy Ritger     // uvm_va_range_num_blocks().
3741739a20eSAndy Ritger     //
3751739a20eSAndy Ritger     // TODO: Bug 1766585: Compare perf of up-front allocation and demand-
3761739a20eSAndy Ritger     //       allocation of blocks in the common case (lots of accessed blocks)
3771739a20eSAndy Ritger     //       and the sparse case. If the common case is hurt by demand-
3781739a20eSAndy Ritger     //       allocation, or if the sparse case isn't helped much, just allocate
3791739a20eSAndy Ritger     //       them all at range allocation.
3801739a20eSAndy Ritger     atomic_long_t *blocks;
3811739a20eSAndy Ritger 
3821739a20eSAndy Ritger     uvm_va_range_type_t type;
3831739a20eSAndy Ritger     union
3841739a20eSAndy Ritger     {
3851739a20eSAndy Ritger         uvm_va_range_managed_t managed;
3861739a20eSAndy Ritger         uvm_va_range_external_t external;
3871739a20eSAndy Ritger         uvm_va_range_channel_t channel;
3881739a20eSAndy Ritger         uvm_va_range_sked_reflected_t sked_reflected;
3891739a20eSAndy Ritger         uvm_va_range_semaphore_pool_t semaphore_pool;
3901739a20eSAndy Ritger     };
3911739a20eSAndy Ritger };
3921739a20eSAndy Ritger 
3931739a20eSAndy Ritger // Module load/exit
3941739a20eSAndy Ritger NV_STATUS uvm_va_range_init(void);
3951739a20eSAndy Ritger void uvm_va_range_exit(void);
3961739a20eSAndy Ritger 
uvm_va_range_size(uvm_va_range_t * va_range)3971739a20eSAndy Ritger static NvU64 uvm_va_range_size(uvm_va_range_t *va_range)
3981739a20eSAndy Ritger {
3991739a20eSAndy Ritger     return uvm_range_tree_node_size(&va_range->node);
4001739a20eSAndy Ritger }
4011739a20eSAndy Ritger 
uvm_va_range_is_aligned(uvm_va_range_t * va_range,NvU64 alignment)4021739a20eSAndy Ritger static bool uvm_va_range_is_aligned(uvm_va_range_t *va_range, NvU64 alignment)
4031739a20eSAndy Ritger {
4041739a20eSAndy Ritger     return IS_ALIGNED(va_range->node.start, alignment) && IS_ALIGNED(uvm_va_range_size(va_range), alignment);
4051739a20eSAndy Ritger }
4061739a20eSAndy Ritger 
uvm_va_range_is_managed_zombie(uvm_va_range_t * va_range)4071739a20eSAndy Ritger static bool uvm_va_range_is_managed_zombie(uvm_va_range_t *va_range)
4081739a20eSAndy Ritger {
4091739a20eSAndy Ritger     return va_range->type == UVM_VA_RANGE_TYPE_MANAGED && va_range->managed.vma_wrapper == NULL;
4101739a20eSAndy Ritger }
4111739a20eSAndy Ritger 
4121739a20eSAndy Ritger // Create a va_range with type UVM_VA_RANGE_TYPE_MANAGED. The out va_range pointer
4131739a20eSAndy Ritger // is optional.
4141739a20eSAndy Ritger //
4151739a20eSAndy Ritger // Returns NV_ERR_UVM_ADDRESS_IN_USE if the vma overlaps with an existing range
4161739a20eSAndy Ritger // in the va_space tree.
4171739a20eSAndy Ritger NV_STATUS uvm_va_range_create_mmap(uvm_va_space_t *va_space,
4181739a20eSAndy Ritger                                    struct mm_struct *mm,
4191739a20eSAndy Ritger                                    uvm_vma_wrapper_t *vma_wrapper,
4201739a20eSAndy Ritger                                    uvm_va_range_t **out_va_range);
4211739a20eSAndy Ritger 
4221739a20eSAndy Ritger // Create a va_range with type UVM_VA_RANGE_TYPE_EXTERNAL. The out va_range
4231739a20eSAndy Ritger // pointer is optional.
4241739a20eSAndy Ritger //
4251739a20eSAndy Ritger // Returns NV_ERR_UVM_ADDRESS_IN_USE if the range overlaps with an existing
4261739a20eSAndy Ritger // range in the va_space tree.
4271739a20eSAndy Ritger NV_STATUS uvm_va_range_create_external(uvm_va_space_t *va_space,
4281739a20eSAndy Ritger                                        struct mm_struct *mm,
4291739a20eSAndy Ritger                                        NvU64 start,
4301739a20eSAndy Ritger                                        NvU64 length,
4311739a20eSAndy Ritger                                        uvm_va_range_t **out_va_range);
4321739a20eSAndy Ritger 
4331739a20eSAndy Ritger // Create a va_range with type UVM_VA_RANGE_TYPE_CHANNEL. The out va_range
4341739a20eSAndy Ritger // pointer is optional.
4351739a20eSAndy Ritger //
4361739a20eSAndy Ritger // Returns NV_ERR_UVM_ADDRESS_IN_USE if the range overlaps with an existing
4371739a20eSAndy Ritger // range in the va_space tree.
4381739a20eSAndy Ritger NV_STATUS uvm_va_range_create_channel(uvm_va_space_t *va_space,
4391739a20eSAndy Ritger                                       struct mm_struct *mm,
4401739a20eSAndy Ritger                                       NvU64 start,
4411739a20eSAndy Ritger                                       NvU64 end,
4421739a20eSAndy Ritger                                       uvm_va_range_t **out_va_range);
4431739a20eSAndy Ritger 
4441739a20eSAndy Ritger NV_STATUS uvm_va_range_create_sked_reflected(uvm_va_space_t *va_space,
4451739a20eSAndy Ritger                                              struct mm_struct *mm,
4461739a20eSAndy Ritger                                              NvU64 start,
4471739a20eSAndy Ritger                                              NvU64 length,
4481739a20eSAndy Ritger                                              uvm_va_range_t **out_va_range);
4491739a20eSAndy Ritger 
4501739a20eSAndy Ritger NV_STATUS uvm_va_range_create_semaphore_pool(uvm_va_space_t *va_space,
4511739a20eSAndy Ritger                                              struct mm_struct *mm,
4521739a20eSAndy Ritger                                              NvU64 start,
4531739a20eSAndy Ritger                                              NvU64 length,
4541739a20eSAndy Ritger                                              const UvmGpuMappingAttributes *per_gpu_attrs,
4551739a20eSAndy Ritger                                              NvU32 per_gpu_attrs_count,
4561739a20eSAndy Ritger                                              uvm_va_range_t **out_va_range);
4571739a20eSAndy Ritger 
4581739a20eSAndy Ritger // Destroys any state associated with this VA range, removes the VA range from
4591739a20eSAndy Ritger // the VA space, and frees the VA range.
4601739a20eSAndy Ritger //
4611739a20eSAndy Ritger // deferred_free_list may be NULL if the VA range type is known to not require
4621739a20eSAndy Ritger // deferred free. Otherwise this function adds entries to the list for later
4631739a20eSAndy Ritger // processing by uvm_deferred_free_object_list.
4641739a20eSAndy Ritger void uvm_va_range_destroy(uvm_va_range_t *va_range, struct list_head *deferred_free_list);
4651739a20eSAndy Ritger 
4661739a20eSAndy Ritger void uvm_va_range_zombify(uvm_va_range_t *va_range);
4671739a20eSAndy Ritger 
4681739a20eSAndy Ritger NV_STATUS uvm_api_clean_up_zombie_resources(UVM_CLEAN_UP_ZOMBIE_RESOURCES_PARAMS *params, struct file *filp);
4691739a20eSAndy Ritger NV_STATUS uvm_api_validate_va_range(UVM_VALIDATE_VA_RANGE_PARAMS *params, struct file *filp);
4701739a20eSAndy Ritger 
4711739a20eSAndy Ritger // Inform the VA range that a GPU VA space is now available for them to map, if
4721739a20eSAndy Ritger // the VA range is supposed to proactively map GPUs (UvmAllocSemaphorePool,
4731739a20eSAndy Ritger // UvmSetAccessedBy).
4741739a20eSAndy Ritger //
4751739a20eSAndy Ritger // If mm != NULL, that mm is used for any CPU mappings which may be created as
4761739a20eSAndy Ritger // a result of this call. See uvm_va_block_context_t::mm for details.
4771739a20eSAndy Ritger //
4781739a20eSAndy Ritger // LOCKING: If mm != NULL, the caller must hold mm->mmap_lock in at least read
4791739a20eSAndy Ritger //          mode.
4801739a20eSAndy Ritger NV_STATUS uvm_va_range_add_gpu_va_space(uvm_va_range_t *va_range,
4811739a20eSAndy Ritger                                         uvm_gpu_va_space_t *gpu_va_space,
4821739a20eSAndy Ritger                                         struct mm_struct *mm);
4831739a20eSAndy Ritger 
4841739a20eSAndy Ritger // Destroy the VA range's mappings on the GPU, if it has any
4851739a20eSAndy Ritger //
4861739a20eSAndy Ritger // If mm != NULL, that mm is used for any CPU mappings which may be created as
4871739a20eSAndy Ritger // a result of this call. See uvm_va_block_context_t::mm for details.
4881739a20eSAndy Ritger //
4891739a20eSAndy Ritger // LOCKING: If mm != NULL, the caller must hold mm->mmap_lock in at least read
4901739a20eSAndy Ritger //          mode.
4911739a20eSAndy Ritger void uvm_va_range_remove_gpu_va_space(uvm_va_range_t *va_range,
4921739a20eSAndy Ritger                                       uvm_gpu_va_space_t *gpu_va_space,
4931739a20eSAndy Ritger                                       struct mm_struct *mm,
4941739a20eSAndy Ritger                                       struct list_head *deferred_free_list);
4951739a20eSAndy Ritger 
4961739a20eSAndy Ritger // Inform the VA range that peer mappings can now be established between the
4971739a20eSAndy Ritger // GPUs, if the VA range is supposed to proactively create them (UvmSetAccessedBy).
4981739a20eSAndy Ritger NV_STATUS uvm_va_range_enable_peer(uvm_va_range_t *va_range, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
4991739a20eSAndy Ritger 
5001739a20eSAndy Ritger // Unmap all page tables in this VA range which have peer mappings between these
5011739a20eSAndy Ritger // two GPUs, in either direction.
5021739a20eSAndy Ritger void uvm_va_range_disable_peer(uvm_va_range_t *va_range,
5031739a20eSAndy Ritger                                uvm_gpu_t *gpu0,
5041739a20eSAndy Ritger                                uvm_gpu_t *gpu1,
5051739a20eSAndy Ritger                                struct list_head *deferred_free_list);
5061739a20eSAndy Ritger 
5071739a20eSAndy Ritger // Notify the VA range of a newly registered GPU.
5081739a20eSAndy Ritger //
5091739a20eSAndy Ritger // LOCKING: the lock of the enclosing VA space is held in R/W mode
5101739a20eSAndy Ritger NV_STATUS uvm_va_range_register_gpu(uvm_va_range_t *va_range, uvm_gpu_t *gpu);
5111739a20eSAndy Ritger 
5121739a20eSAndy Ritger // Unmap all page tables in this VA range which map memory owned by this GPU.
5131739a20eSAndy Ritger // Managed ranges will have any memory still resident on this GPU evicted to
5141739a20eSAndy Ritger // system memory.
5151739a20eSAndy Ritger //
5161739a20eSAndy Ritger // deferred_free_list may be NULL if the VA range type is known to not require
5171739a20eSAndy Ritger // deferred free. Otherwise this function adds entries to the list for later
5181739a20eSAndy Ritger // processing by uvm_deferred_free_object_list.
5191739a20eSAndy Ritger //
5201739a20eSAndy Ritger // If mm != NULL, that mm is used for any CPU mappings which may be created as
5211739a20eSAndy Ritger // a result of this call. See uvm_va_block_context_t::mm for details.
5221739a20eSAndy Ritger //
5231739a20eSAndy Ritger // LOCKING: If mm != NULL, the caller must hold mm->mmap_lock in at least read
5241739a20eSAndy Ritger //          mode.
5251739a20eSAndy Ritger void uvm_va_range_unregister_gpu(uvm_va_range_t *va_range,
5261739a20eSAndy Ritger                                  uvm_gpu_t *gpu,
5271739a20eSAndy Ritger                                  struct mm_struct *mm,
5281739a20eSAndy Ritger                                  struct list_head *deferred_free_list);
5291739a20eSAndy Ritger 
5301739a20eSAndy Ritger // Splits existing_va_range into two pieces, with new_va_range always after
5311739a20eSAndy Ritger // existing. existing is updated to have new_end. new_end+1 must be page-
5321739a20eSAndy Ritger // aligned.
5331739a20eSAndy Ritger //
5341739a20eSAndy Ritger // Before: [----------- existing ------------]
5351739a20eSAndy Ritger // After:  [---- existing ----][---- new ----]
5361739a20eSAndy Ritger //                            ^new_end
5371739a20eSAndy Ritger //
5381739a20eSAndy Ritger // On error, existing_va_range is still accessible and is left in its original
5391739a20eSAndy Ritger // functional state.
5401739a20eSAndy Ritger //
5411739a20eSAndy Ritger // The va_range must have type UVM_VA_RANGE_TYPE_MANAGED.
5421739a20eSAndy Ritger NV_STATUS uvm_va_range_split(uvm_va_range_t *existing_va_range,
5431739a20eSAndy Ritger                              NvU64 new_end,
5441739a20eSAndy Ritger                              uvm_va_range_t **new_va_range);
5451739a20eSAndy Ritger 
5461739a20eSAndy Ritger // TODO: Bug 1707562: Merge va ranges
5471739a20eSAndy Ritger 
uvm_va_range_container(uvm_range_tree_node_t * node)548eb5c7665SAndy Ritger static uvm_va_range_t *uvm_va_range_container(uvm_range_tree_node_t *node)
549eb5c7665SAndy Ritger {
550eb5c7665SAndy Ritger     if (!node)
551eb5c7665SAndy Ritger         return NULL;
552eb5c7665SAndy Ritger     return container_of(node, uvm_va_range_t, node);
553eb5c7665SAndy Ritger }
554eb5c7665SAndy Ritger 
5551739a20eSAndy Ritger // Returns the va_range containing addr, if any
5561739a20eSAndy Ritger uvm_va_range_t *uvm_va_range_find(uvm_va_space_t *va_space, NvU64 addr);
5571739a20eSAndy Ritger 
uvm_ext_gpu_map_container(uvm_range_tree_node_t * node)5581739a20eSAndy Ritger static uvm_ext_gpu_map_t *uvm_ext_gpu_map_container(uvm_range_tree_node_t *node)
5591739a20eSAndy Ritger {
5601739a20eSAndy Ritger     if (!node)
5611739a20eSAndy Ritger         return NULL;
5621739a20eSAndy Ritger     return container_of(node, uvm_ext_gpu_map_t, node);
5631739a20eSAndy Ritger }
5641739a20eSAndy Ritger 
5651739a20eSAndy Ritger // Iterators for all va_ranges
5661739a20eSAndy Ritger 
5671739a20eSAndy Ritger #define uvm_for_each_va_range(va_range, va_space) \
5681739a20eSAndy Ritger     list_for_each_entry((va_range), &(va_space)->va_range_tree.head, node.list)
5691739a20eSAndy Ritger 
5701739a20eSAndy Ritger #define uvm_for_each_va_range_safe(va_range, va_range_next, va_space) \
5711739a20eSAndy Ritger     list_for_each_entry_safe((va_range), (va_range_next), &(va_space)->va_range_tree.head, node.list)
5721739a20eSAndy Ritger 
5731739a20eSAndy Ritger 
5741739a20eSAndy Ritger // Iterators for specific ranges
5751739a20eSAndy Ritger 
5761739a20eSAndy Ritger // Returns the first va_range in the range [start, end], if any
5771739a20eSAndy Ritger uvm_va_range_t *uvm_va_space_iter_first(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
5781739a20eSAndy Ritger 
5791739a20eSAndy Ritger // Returns the va_range following the provided va_range in address order, if
5801739a20eSAndy Ritger // that va_range's start <= the provided end.
5811739a20eSAndy Ritger uvm_va_range_t *uvm_va_space_iter_next(uvm_va_range_t *va_range, NvU64 end);
5821739a20eSAndy Ritger 
5831739a20eSAndy Ritger // Like uvm_va_space_iter_next, but also returns NULL if the next va_range
5841739a20eSAndy Ritger // is not adjacent to the provided va_range.
uvm_va_space_iter_next_contig(uvm_va_range_t * va_range,NvU64 end)5851739a20eSAndy Ritger static uvm_va_range_t *uvm_va_space_iter_next_contig(uvm_va_range_t *va_range, NvU64 end)
5861739a20eSAndy Ritger {
5871739a20eSAndy Ritger     uvm_va_range_t *next = uvm_va_space_iter_next(va_range, end);
5881739a20eSAndy Ritger     if (next && next->node.start != va_range->node.end + 1)
5891739a20eSAndy Ritger         return NULL;
5901739a20eSAndy Ritger     return next;
5911739a20eSAndy Ritger }
5921739a20eSAndy Ritger 
5931739a20eSAndy Ritger // Returns whether the range [start, end] has any VA ranges within it
uvm_va_space_range_empty(uvm_va_space_t * va_space,NvU64 start,NvU64 end)5941739a20eSAndy Ritger static bool uvm_va_space_range_empty(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
5951739a20eSAndy Ritger {
5961739a20eSAndy Ritger     return uvm_va_space_iter_first(va_space, start, end) == NULL;
5971739a20eSAndy Ritger }
5981739a20eSAndy Ritger 
5991739a20eSAndy Ritger #define uvm_for_each_va_range_in(va_range, va_space, start, end)            \
6001739a20eSAndy Ritger     for ((va_range) = uvm_va_space_iter_first((va_space), (start), (end));  \
6011739a20eSAndy Ritger          (va_range);                                                        \
6021739a20eSAndy Ritger          (va_range) = uvm_va_space_iter_next((va_range), (end)))
6031739a20eSAndy Ritger 
6041739a20eSAndy Ritger #define uvm_for_each_va_range_in_safe(va_range, va_range_next, va_space, start, end)    \
6051739a20eSAndy Ritger     for ((va_range) = uvm_va_space_iter_first((va_space), (start), (end)),              \
6061739a20eSAndy Ritger              (va_range_next) = uvm_va_space_iter_next((va_range), (end));               \
6071739a20eSAndy Ritger          (va_range);                                                                    \
6081739a20eSAndy Ritger          (va_range) = (va_range_next), (va_range_next) = uvm_va_space_iter_next((va_range), (end)))
6091739a20eSAndy Ritger 
6101739a20eSAndy Ritger // Iterator for all contiguous VA ranges between [start, end]. If any part of
6111739a20eSAndy Ritger // [start, end] is not covered by a VA range, iteration stops.
6121739a20eSAndy Ritger #define uvm_for_each_va_range_in_contig(va_range, va_space, start, end)         \
6131739a20eSAndy Ritger     for ((va_range) = uvm_va_space_iter_first((va_space), (start), (start));    \
6141739a20eSAndy Ritger          (va_range);                                                            \
6151739a20eSAndy Ritger          (va_range) = uvm_va_space_iter_next_contig((va_range), (end)))
6161739a20eSAndy Ritger 
6171739a20eSAndy Ritger #define uvm_for_each_va_range_in_contig_from(va_range, va_space, first_va_range, end) \
6181739a20eSAndy Ritger     for ((va_range) = (first_va_range);                                               \
6191739a20eSAndy Ritger          (va_range);                                                                  \
6201739a20eSAndy Ritger          (va_range) = uvm_va_space_iter_next_contig((va_range), (end)))
6211739a20eSAndy Ritger 
6221739a20eSAndy Ritger // Like uvm_for_each_va_range_in_contig but also stops iteration if any VA range
6231739a20eSAndy Ritger // has a type other than UVM_VA_RANGE_TYPE_MANAGED.
6241739a20eSAndy Ritger #define uvm_for_each_managed_va_range_in_contig(va_range, va_space, start, end) \
6251739a20eSAndy Ritger     for ((va_range) = uvm_va_space_iter_first((va_space), (start), (start));    \
6261739a20eSAndy Ritger          (va_range) && (va_range)->type == UVM_VA_RANGE_TYPE_MANAGED;           \
6271739a20eSAndy Ritger          (va_range) = uvm_va_space_iter_next_contig((va_range), (end)))
6281739a20eSAndy Ritger 
6291739a20eSAndy Ritger #define uvm_for_each_va_range_in_vma(va_range, vma)             \
6301739a20eSAndy Ritger     uvm_for_each_va_range_in(va_range,                          \
6311739a20eSAndy Ritger                              uvm_va_space_get(vma->vm_file),    \
6321739a20eSAndy Ritger                              vma->vm_start,                     \
6331739a20eSAndy Ritger                              vma->vm_end - 1)
6341739a20eSAndy Ritger 
6351739a20eSAndy Ritger #define uvm_for_each_va_range_in_vma_safe(va_range, va_range_next, vma) \
6361739a20eSAndy Ritger     uvm_for_each_va_range_in_safe(va_range,                             \
6371739a20eSAndy Ritger                                   va_range_next,                        \
6381739a20eSAndy Ritger                                   uvm_va_space_get(vma->vm_file),       \
6391739a20eSAndy Ritger                                   vma->vm_start,                        \
6401739a20eSAndy Ritger                                   vma->vm_end - 1)
6411739a20eSAndy Ritger 
6421739a20eSAndy Ritger // Only call this if you're sure that either:
6431739a20eSAndy Ritger // 1) You have a reference on the vma's vm_mm and that vma->vm_mm's mmap_lock is
6441739a20eSAndy Ritger //    held; or
6451739a20eSAndy Ritger // 2) You won't be operating on the vma (as with vm_insert_page) or accessing
6461739a20eSAndy Ritger //    any fields in the vma that can change without va_space->lock being held
6471739a20eSAndy Ritger //    (such as vm_flags).
6481739a20eSAndy Ritger //
6491739a20eSAndy Ritger // Otherwise, use uvm_va_range_vma_current or uvm_va_range_vma_check and be
6501739a20eSAndy Ritger // prepared to handle a NULL return value.
uvm_va_range_vma(uvm_va_range_t * va_range)6511739a20eSAndy Ritger static struct vm_area_struct *uvm_va_range_vma(uvm_va_range_t *va_range)
6521739a20eSAndy Ritger {
6531739a20eSAndy Ritger     struct vm_area_struct *vma;
6541739a20eSAndy Ritger     UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_MANAGED, "type: %d", va_range->type);
6551739a20eSAndy Ritger     UVM_ASSERT(va_range->managed.vma_wrapper);
6561739a20eSAndy Ritger 
6571739a20eSAndy Ritger     uvm_assert_rwsem_locked(&va_range->va_space->lock);
6581739a20eSAndy Ritger 
6591739a20eSAndy Ritger     // vm_file, vm_private_data, vm_start, and vm_end are all safe to access
6601739a20eSAndy Ritger     // here because they can't change without the kernel calling vm_ops->open
6611739a20eSAndy Ritger     // or vm_ops->close, which both take va_space->lock.
6621739a20eSAndy Ritger     vma = va_range->managed.vma_wrapper->vma;
6631739a20eSAndy Ritger     UVM_ASSERT(vma);
6641739a20eSAndy Ritger     UVM_ASSERT_MSG(vma->vm_private_data == va_range->managed.vma_wrapper,
6651739a20eSAndy Ritger                    "vma: 0x%llx [0x%lx, 0x%lx] has vm_private_data 0x%llx\n",
6661739a20eSAndy Ritger                    (NvU64)vma,
6671739a20eSAndy Ritger                    vma->vm_start,
6681739a20eSAndy Ritger                    vma->vm_end - 1,
6691739a20eSAndy Ritger                    (NvU64)vma->vm_private_data);
6701739a20eSAndy Ritger     UVM_ASSERT_MSG(va_range->va_space == uvm_va_space_get(vma->vm_file),
6711739a20eSAndy Ritger                    "va_range va_space: 0x%llx vm_file: 0x%llx vm_file va_space: 0x%llx",
6721739a20eSAndy Ritger                    (NvU64)va_range->va_space,
6731739a20eSAndy Ritger                    (NvU64)vma->vm_file,
6741739a20eSAndy Ritger                    (NvU64)uvm_va_space_get(vma->vm_file));
6751739a20eSAndy Ritger     UVM_ASSERT_MSG(va_range->node.start >= vma->vm_start,
6761739a20eSAndy Ritger                    "Range mismatch: va_range: [0x%llx, 0x%llx] vma: [0x%lx, 0x%lx]\n",
6771739a20eSAndy Ritger                    va_range->node.start,
6781739a20eSAndy Ritger                    va_range->node.end,
6791739a20eSAndy Ritger                    vma->vm_start,
6801739a20eSAndy Ritger                    vma->vm_end - 1);
6811739a20eSAndy Ritger     UVM_ASSERT_MSG(va_range->node.end <= vma->vm_end - 1,
6821739a20eSAndy Ritger                    "Range mismatch: va_range: [0x%llx, 0x%llx] vma: [0x%lx, 0x%lx]\n",
6831739a20eSAndy Ritger                    va_range->node.start,
6841739a20eSAndy Ritger                    va_range->node.end,
6851739a20eSAndy Ritger                    vma->vm_start,
6861739a20eSAndy Ritger                    vma->vm_end - 1);
6871739a20eSAndy Ritger 
6881739a20eSAndy Ritger     return vma;
6891739a20eSAndy Ritger }
6901739a20eSAndy Ritger 
6911739a20eSAndy Ritger // Check that the VA range's vma is safe to use under mm. If not, NULL is
6921739a20eSAndy Ritger // returned. If the vma is returned, there must be a reference on mm and
6931739a20eSAndy Ritger // mm->mmap_lock must be held.
uvm_va_range_vma_check(uvm_va_range_t * va_range,struct mm_struct * mm)6941739a20eSAndy Ritger static struct vm_area_struct *uvm_va_range_vma_check(uvm_va_range_t *va_range, struct mm_struct *mm)
6951739a20eSAndy Ritger {
6961739a20eSAndy Ritger     struct vm_area_struct *vma;
6971739a20eSAndy Ritger 
6981739a20eSAndy Ritger     UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_MANAGED, "type: %d\n", va_range->type);
6991739a20eSAndy Ritger 
7001739a20eSAndy Ritger     // Zombies don't have a vma_wrapper.
7011739a20eSAndy Ritger     if (!va_range->managed.vma_wrapper)
7021739a20eSAndy Ritger         return NULL;
7031739a20eSAndy Ritger 
7041739a20eSAndy Ritger     vma = uvm_va_range_vma(va_range);
7051739a20eSAndy Ritger 
7061739a20eSAndy Ritger     // Examples of mm on various paths:
7071739a20eSAndy Ritger     //  - CPU fault         vma->vm_mm
7081739a20eSAndy Ritger     //  - GPU fault         current->mm or va_space->va_space_mm.mm
7091739a20eSAndy Ritger     //  - IOCTL             current->mm or va_space->va_space_mm.mm
7101739a20eSAndy Ritger     //  - Process teardown  NULL
7111739a20eSAndy Ritger     //
7121739a20eSAndy Ritger     // Since the "safe" mm varies based on the path, we may not have a reference
7131739a20eSAndy Ritger     // on the vma's owning mm_struct. We won't know that until we look at the
7141739a20eSAndy Ritger     // vma. By then it's too late to take mmap_lock since mmap_lock is above the
7151739a20eSAndy Ritger     // va_space lock in our lock ordering, and we must be holding the va_space
7161739a20eSAndy Ritger     // lock to query the va_range. Hence the need to detect the cases in which
7171739a20eSAndy Ritger     // it's safe to operate on the vma.
7181739a20eSAndy Ritger     //
7191739a20eSAndy Ritger     // When we can't detect for certain that mm is safe to use, we shouldn't
7201739a20eSAndy Ritger     // operate on the vma at all. The vma can't be outright freed until we drop
7211739a20eSAndy Ritger     // the va_space lock so the pointer itself will remain valid, but its fields
7221739a20eSAndy Ritger     // (like vm_start and vm_end) could be modified behind our back. We also
7231739a20eSAndy Ritger     // aren't allowed to call vm_insert_page unless we hold the vma's mmap_lock.
7241739a20eSAndy Ritger     //
7251739a20eSAndy Ritger     // Note that if uvm_va_space_mm_enabled() is true, then vma->vm_mm must be
7261739a20eSAndy Ritger     // va_space->va_space_mm.mm because we enforce that at mmap.
7271739a20eSAndy Ritger     //
7281739a20eSAndy Ritger     // An interesting case is when vma->vm_mm != current->mm. This can happen
7291739a20eSAndy Ritger     // due to fork, ptrace, process teardown, etc. It will also be the case in
7301739a20eSAndy Ritger     // the GPU fault handler.
7311739a20eSAndy Ritger     if (mm != vma->vm_mm)
7321739a20eSAndy Ritger         return NULL;
7331739a20eSAndy Ritger 
7341739a20eSAndy Ritger     uvm_assert_mmap_lock_locked(vma->vm_mm);
7351739a20eSAndy Ritger     return vma;
7361739a20eSAndy Ritger }
7371739a20eSAndy Ritger 
7381739a20eSAndy Ritger // Helper for use when the only mm which is known is current->mm
uvm_va_range_vma_current(uvm_va_range_t * va_range)7391739a20eSAndy Ritger static struct vm_area_struct *uvm_va_range_vma_current(uvm_va_range_t *va_range)
7401739a20eSAndy Ritger {
7411739a20eSAndy Ritger     return uvm_va_range_vma_check(va_range, current->mm);
7421739a20eSAndy Ritger }
7431739a20eSAndy Ritger 
7441739a20eSAndy Ritger // Returns the maximum number of VA blocks which could be contained with the
7451739a20eSAndy Ritger // given va_range (number of elements in the va_range->blocks array).
7461739a20eSAndy Ritger // va_range->node.start and .end must be set.
7471739a20eSAndy Ritger //
7481739a20eSAndy Ritger // The va_range must have type UVM_VA_RANGE_TYPE_MANAGED.
7491739a20eSAndy Ritger size_t uvm_va_range_num_blocks(uvm_va_range_t *va_range);
7501739a20eSAndy Ritger 
7511739a20eSAndy Ritger // Get the index within the va_range->blocks array of the VA block
7521739a20eSAndy Ritger // corresponding to addr. The block pointer is not guaranteed to be valid. Use
7531739a20eSAndy Ritger // either uvm_va_range_block or uvm_va_range_block_create to look up the block.
7541739a20eSAndy Ritger //
7551739a20eSAndy Ritger // The va_range must have type UVM_VA_RANGE_TYPE_MANAGED.
7561739a20eSAndy Ritger size_t uvm_va_range_block_index(uvm_va_range_t *va_range, NvU64 addr);
7571739a20eSAndy Ritger 
7581739a20eSAndy Ritger // Looks up the VA block at va_range->blocks[index]. If no block is present at
7591739a20eSAndy Ritger // that index, NULL is returned.
7601739a20eSAndy Ritger //
7611739a20eSAndy Ritger // The va_range must have type UVM_VA_RANGE_TYPE_MANAGED.
uvm_va_range_block(uvm_va_range_t * va_range,size_t index)7621739a20eSAndy Ritger static uvm_va_block_t *uvm_va_range_block(uvm_va_range_t *va_range, size_t index)
7631739a20eSAndy Ritger {
7641739a20eSAndy Ritger     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
7651739a20eSAndy Ritger     UVM_ASSERT(index < uvm_va_range_num_blocks(va_range));
7661739a20eSAndy Ritger     uvm_assert_rwsem_locked(&va_range->va_space->lock);
7671739a20eSAndy Ritger 
7681739a20eSAndy Ritger     return (uvm_va_block_t *)atomic_long_read(&va_range->blocks[index]);
7691739a20eSAndy Ritger }
7701739a20eSAndy Ritger 
7711739a20eSAndy Ritger // Same as uvm_va_range_block except that the block is created if not already
7721739a20eSAndy Ritger // present in the array. If NV_OK is returned, the block has been allocated
7731739a20eSAndy Ritger // successfully.
7741739a20eSAndy Ritger //
7751739a20eSAndy Ritger // The va_range must have type UVM_VA_RANGE_TYPE_MANAGED.
7761739a20eSAndy Ritger NV_STATUS uvm_va_range_block_create(uvm_va_range_t *va_range, size_t index, uvm_va_block_t **out_block);
7771739a20eSAndy Ritger 
7781739a20eSAndy Ritger // Returns the first populated VA block in the VA range after the input
7791739a20eSAndy Ritger // va_block, or NULL if none. If the input va_block is NULL, this returns the
7801739a20eSAndy Ritger // first VA block in the VA range, if any exists.
7811739a20eSAndy Ritger uvm_va_block_t *uvm_va_range_block_next(uvm_va_range_t *va_range, uvm_va_block_t *va_block);
7821739a20eSAndy Ritger 
7831739a20eSAndy Ritger // Iterate over populated VA blocks in the range. Does not create new VA blocks.
7841739a20eSAndy Ritger #define for_each_va_block_in_va_range(__va_range, __va_block)           \
7851739a20eSAndy Ritger     for (__va_block = uvm_va_range_block_next(__va_range, NULL);        \
7861739a20eSAndy Ritger          __va_block;                                                    \
7871739a20eSAndy Ritger          __va_block = uvm_va_range_block_next(__va_range, __va_block))
7881739a20eSAndy Ritger 
7891739a20eSAndy Ritger // Iterate over populated VA blocks in the range. Does not create new VA blocks. Safe version
7901739a20eSAndy Ritger #define for_each_va_block_in_va_range_safe(__va_range, __va_block, __va_block_next)            \
7911739a20eSAndy Ritger     for (__va_block = uvm_va_range_block_next(__va_range, NULL),                               \
7921739a20eSAndy Ritger          __va_block_next = uvm_va_range_block_next(__va_range, __va_block);                    \
7931739a20eSAndy Ritger          __va_block;                                                                           \
7941739a20eSAndy Ritger          __va_block = __va_block_next,                                                         \
7951739a20eSAndy Ritger          __va_block_next = __va_block? uvm_va_range_block_next(__va_range, __va_block) : NULL)
7961739a20eSAndy Ritger 
7971739a20eSAndy Ritger // Set the VA range preferred location (or unset it if preferred location is
7981739a20eSAndy Ritger // UVM_ID_INVALID).
7991739a20eSAndy Ritger //
8001739a20eSAndy Ritger // Unsetting the preferred location potentially changes the range group
8011739a20eSAndy Ritger // association to UVM_RANGE_GROUP_ID_NONE if the VA range was previously
8021739a20eSAndy Ritger // associated with a non-migratable range group.
8031739a20eSAndy Ritger //
8041739a20eSAndy Ritger // Changing the preferred location also updates the mask and mappings of GPUs
8051739a20eSAndy Ritger // in UVM-Lite mode.
8061739a20eSAndy Ritger //
8071739a20eSAndy Ritger // The va_range must have type UVM_VA_RANGE_TYPE_MANAGED.
8081739a20eSAndy Ritger //
8091739a20eSAndy Ritger // If mm != NULL, that mm is used for any CPU mappings which may be created as
8101739a20eSAndy Ritger // a result of this call. See uvm_va_block_context_t::mm for details.
8111739a20eSAndy Ritger //
8121739a20eSAndy Ritger // If out_tracker != NULL any block work will be added to that tracker.
8131739a20eSAndy Ritger //
8141739a20eSAndy Ritger // LOCKING: If mm != NULL, the caller must hold mm->mmap_lock in at least read
8151739a20eSAndy Ritger //          mode.
8161739a20eSAndy Ritger NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
8171739a20eSAndy Ritger                                               uvm_processor_id_t preferred_location,
81891676d66SBernhard Stoeckner                                               int preferred_cpu_nid,
8191739a20eSAndy Ritger                                               struct mm_struct *mm,
8201739a20eSAndy Ritger                                               uvm_tracker_t *out_tracker);
8211739a20eSAndy Ritger 
8221739a20eSAndy Ritger // Add a processor to the accessed_by mask and establish any new required
8231739a20eSAndy Ritger // mappings.
8241739a20eSAndy Ritger //
8251739a20eSAndy Ritger // Also update the mask of UVM-Lite GPUs if needed.
8261739a20eSAndy Ritger //
8271739a20eSAndy Ritger // If mm != NULL, that mm is used for any CPU mappings which may be created as
8281739a20eSAndy Ritger // a result of this call. See uvm_va_block_context_t::mm for details.
8291739a20eSAndy Ritger //
8301739a20eSAndy Ritger // If out_tracker != NULL any block work will be added to that tracker.
8311739a20eSAndy Ritger //
8321739a20eSAndy Ritger // LOCKING: If mm != NULL, the caller must hold mm->mmap_lock in at least read
8331739a20eSAndy Ritger //          mode.
8341739a20eSAndy Ritger NV_STATUS uvm_va_range_set_accessed_by(uvm_va_range_t *va_range,
8351739a20eSAndy Ritger                                        uvm_processor_id_t processor_id,
8361739a20eSAndy Ritger                                        struct mm_struct *mm,
8371739a20eSAndy Ritger                                        uvm_tracker_t *out_tracker);
8381739a20eSAndy Ritger 
8391739a20eSAndy Ritger // Remove a processor from the accessed_by mask
8401739a20eSAndy Ritger //
8411739a20eSAndy Ritger // If out_tracker != NULL any block work will be added to that tracker.
8421739a20eSAndy Ritger //
8431739a20eSAndy Ritger // This also updates the mask and mappings of the UVM-Lite GPUs if required.
8441739a20eSAndy Ritger void uvm_va_range_unset_accessed_by(uvm_va_range_t *va_range,
8451739a20eSAndy Ritger                                     uvm_processor_id_t processor_id,
8461739a20eSAndy Ritger                                     uvm_tracker_t *out_tracker);
8471739a20eSAndy Ritger 
8481739a20eSAndy Ritger // Set read-duplication and remove any existing accessed_by and remote mappings
8491739a20eSAndy Ritger //
8501739a20eSAndy Ritger // If mm != NULL, that mm is used for any CPU mappings which may be created as
8511739a20eSAndy Ritger // a result of this call. See uvm_va_block_context_t::mm for details.
8521739a20eSAndy Ritger //
8531739a20eSAndy Ritger // LOCKING: If mm != NULL, the caller must hold mm->mmap_lock in at least read
8541739a20eSAndy Ritger //          mode.
8551739a20eSAndy Ritger NV_STATUS uvm_va_range_set_read_duplication(uvm_va_range_t *va_range, struct mm_struct *mm);
8561739a20eSAndy Ritger 
8571739a20eSAndy Ritger // Unset read-duplication and establish accessed_by mappings
8581739a20eSAndy Ritger //
8591739a20eSAndy Ritger // If mm != NULL, that mm is used for any CPU mappings which may be created as
8601739a20eSAndy Ritger // a result of this call. See uvm_va_block_context_t::mm for details.
8611739a20eSAndy Ritger //
8621739a20eSAndy Ritger // LOCKING: If mm != NULL, the caller must hold mm->mmap_lock in at least read
8631739a20eSAndy Ritger //          mode.
8641739a20eSAndy Ritger NV_STATUS uvm_va_range_unset_read_duplication(uvm_va_range_t *va_range, struct mm_struct *mm);
8651739a20eSAndy Ritger 
8661739a20eSAndy Ritger // Create and destroy vma wrappers
8671739a20eSAndy Ritger uvm_vma_wrapper_t *uvm_vma_wrapper_alloc(struct vm_area_struct *vma);
8681739a20eSAndy Ritger void uvm_vma_wrapper_destroy(uvm_vma_wrapper_t *vma_wrapper);
8691739a20eSAndy Ritger 
uvm_va_range_get_policy(uvm_va_range_t * va_range)8701739a20eSAndy Ritger static uvm_va_policy_t *uvm_va_range_get_policy(uvm_va_range_t *va_range)
8711739a20eSAndy Ritger {
8721739a20eSAndy Ritger     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
8731739a20eSAndy Ritger     return &va_range->managed.policy;
8741739a20eSAndy Ritger }
8751739a20eSAndy Ritger 
8761739a20eSAndy Ritger NV_STATUS uvm_test_va_range_info(UVM_TEST_VA_RANGE_INFO_PARAMS *params, struct file *filp);
8771739a20eSAndy Ritger NV_STATUS uvm_test_va_range_split(UVM_TEST_VA_RANGE_SPLIT_PARAMS *params, struct file *filp);
8781739a20eSAndy Ritger NV_STATUS uvm_test_va_range_inject_split_error(UVM_TEST_VA_RANGE_INJECT_SPLIT_ERROR_PARAMS *params, struct file *filp);
8791739a20eSAndy Ritger NV_STATUS uvm_test_va_range_inject_add_gpu_va_space_error(UVM_TEST_VA_RANGE_INJECT_ADD_GPU_VA_SPACE_ERROR_PARAMS *params,
8801739a20eSAndy Ritger                                                           struct file *filp);
8811739a20eSAndy Ritger 
8821739a20eSAndy Ritger #endif // __UVM_VA_RANGE_H__
883