1 /*******************************************************************************
2     Copyright (c) 2015-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_common.h"
25 #include "uvm_linux.h"
26 #include "uvm_types.h"
27 #include "uvm_api.h"
28 #include "uvm_global.h"
29 #include "uvm_hal.h"
30 #include "uvm_va_range.h"
31 #include "uvm_va_block.h"
32 #include "uvm_kvmalloc.h"
33 #include "uvm_map_external.h"
34 #include "uvm_perf_thrashing.h"
35 #include "nv_uvm_interface.h"
36 
37 static struct kmem_cache *g_uvm_va_range_cache __read_mostly;
38 static struct kmem_cache *g_uvm_vma_wrapper_cache __read_mostly;
39 
uvm_va_range_init(void)40 NV_STATUS uvm_va_range_init(void)
41 {
42     g_uvm_va_range_cache = NV_KMEM_CACHE_CREATE("uvm_va_range_t", uvm_va_range_t);
43     if (!g_uvm_va_range_cache)
44         return NV_ERR_NO_MEMORY;
45 
46     g_uvm_vma_wrapper_cache = NV_KMEM_CACHE_CREATE("uvm_vma_wrapper_t", uvm_vma_wrapper_t);
47     if (!g_uvm_vma_wrapper_cache)
48         return NV_ERR_NO_MEMORY;
49 
50     return uvm_va_block_init();
51 }
52 
uvm_va_range_exit(void)53 void uvm_va_range_exit(void)
54 {
55     uvm_va_block_exit();
56     kmem_cache_destroy_safe(&g_uvm_va_range_cache);
57     kmem_cache_destroy_safe(&g_uvm_vma_wrapper_cache);
58 }
59 
block_calc_start(uvm_va_range_t * va_range,size_t index)60 static NvU64 block_calc_start(uvm_va_range_t *va_range, size_t index)
61 {
62     NvU64 range_start = UVM_VA_BLOCK_ALIGN_DOWN(va_range->node.start);
63     NvU64 block_start = range_start + index * UVM_VA_BLOCK_SIZE;
64     NvU64 start = max(va_range->node.start, block_start);
65     UVM_ASSERT(start < va_range->node.end);
66     return start;
67 }
68 
block_calc_end(uvm_va_range_t * va_range,size_t index)69 static NvU64 block_calc_end(uvm_va_range_t *va_range, size_t index)
70 {
71     NvU64 start = block_calc_start(va_range, index);
72     NvU64 block_end = UVM_VA_BLOCK_ALIGN_UP(start + 1) - 1; // Inclusive end
73     NvU64 end = min(va_range->node.end, block_end);
74     UVM_ASSERT(end > va_range->node.start);
75     return end;
76 }
77 
78 // Called before the range's bounds have been adjusted. This may not actually
79 // shrink the blocks array. For example, if the shrink attempt fails then
80 // va_range's old array is left intact. This may waste memory, but it means this
81 // function cannot fail.
blocks_array_shrink(uvm_va_range_t * va_range,size_t new_num_blocks)82 static void blocks_array_shrink(uvm_va_range_t *va_range, size_t new_num_blocks)
83 {
84     size_t new_size = new_num_blocks * sizeof(va_range->blocks[0]);
85     atomic_long_t *new_blocks;
86 
87     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
88     UVM_ASSERT(va_range->blocks);
89     UVM_ASSERT(uvm_kvsize(va_range->blocks) >= uvm_va_range_num_blocks(va_range) * sizeof(va_range->blocks[0]));
90     UVM_ASSERT(new_num_blocks);
91     UVM_ASSERT(new_num_blocks <= uvm_va_range_num_blocks(va_range));
92 
93     // TODO: Bug 1766579: This could be optimized by only shrinking the array
94     //       when the new size is half of the old size or some similar
95     //       threshold. Need to profile this on real apps to see if that's worth
96     //       doing.
97 
98     new_blocks = uvm_kvrealloc(va_range->blocks, new_size);
99     if (!new_blocks) {
100         // If we failed to allocate a smaller array, just leave the old one as-is
101         UVM_DBG_PRINT("Failed to shrink range [0x%llx, 0x%llx] from %zu blocks to %zu blocks\n",
102                       va_range->node.start,
103                       va_range->node.end,
104                       uvm_kvsize(va_range->blocks) / sizeof(va_range->blocks[0]),
105                       new_num_blocks);
106         return;
107     }
108 
109     va_range->blocks = new_blocks;
110 }
111 
uvm_va_range_alloc(uvm_va_space_t * va_space,NvU64 start,NvU64 end)112 static uvm_va_range_t *uvm_va_range_alloc(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
113 {
114     uvm_va_range_t *va_range = nv_kmem_cache_zalloc(g_uvm_va_range_cache, NV_UVM_GFP_FLAGS);
115     if (!va_range)
116         return NULL;
117 
118     uvm_assert_rwsem_locked_write(&va_space->lock);
119 
120     va_range->va_space = va_space;
121     va_range->node.start = start;
122     va_range->node.end = end;
123 
124     // The range is inserted into the VA space tree only at the end of creation,
125     // so clear the node so the destroy path knows whether to remove it.
126     RB_CLEAR_NODE(&va_range->node.rb_node);
127 
128     return va_range;
129 }
130 
uvm_va_range_alloc_reclaim(uvm_va_space_t * va_space,struct mm_struct * mm,uvm_va_range_type_t type,NvU64 start,NvU64 end,uvm_va_range_t ** out_va_range)131 static NV_STATUS uvm_va_range_alloc_reclaim(uvm_va_space_t *va_space,
132                                             struct mm_struct *mm,
133                                             uvm_va_range_type_t type,
134                                             NvU64 start,
135                                             NvU64 end,
136                                             uvm_va_range_t **out_va_range)
137 {
138     uvm_va_range_t *va_range;
139     NV_STATUS status;
140 
141     // Check for no overlap with HMM blocks.
142     status = uvm_hmm_va_block_reclaim(va_space, mm, start, end);
143     if (status != NV_OK)
144         return status;
145 
146     va_range = uvm_va_range_alloc(va_space, start, end);
147     if (!va_range)
148         return NV_ERR_NO_MEMORY;
149 
150     va_range->type = type;
151 
152     *out_va_range = va_range;
153     return NV_OK;
154 }
155 
uvm_va_range_alloc_managed(uvm_va_space_t * va_space,NvU64 start,NvU64 end)156 static uvm_va_range_t *uvm_va_range_alloc_managed(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
157 {
158     uvm_va_range_t *va_range = NULL;
159 
160     va_range = uvm_va_range_alloc(va_space, start, end);
161     if (!va_range)
162         goto error;
163 
164     va_range->type = UVM_VA_RANGE_TYPE_MANAGED;
165     va_range->managed.policy = uvm_va_policy_default;
166 
167     va_range->blocks = uvm_kvmalloc_zero(uvm_va_range_num_blocks(va_range) * sizeof(va_range->blocks[0]));
168     if (!va_range->blocks) {
169         UVM_DBG_PRINT("Failed to allocate %zu blocks\n", uvm_va_range_num_blocks(va_range));
170         goto error;
171     }
172 
173     return va_range;
174 
175 error:
176     uvm_va_range_destroy(va_range, NULL);
177     return NULL;
178 }
179 
uvm_va_range_create_mmap(uvm_va_space_t * va_space,struct mm_struct * mm,uvm_vma_wrapper_t * vma_wrapper,uvm_va_range_t ** out_va_range)180 NV_STATUS uvm_va_range_create_mmap(uvm_va_space_t *va_space,
181                                    struct mm_struct *mm,
182                                    uvm_vma_wrapper_t *vma_wrapper,
183                                    uvm_va_range_t **out_va_range)
184 {
185     NV_STATUS status;
186     struct vm_area_struct *vma = vma_wrapper->vma;
187     uvm_va_range_t *va_range = NULL;
188 
189     // Check for no overlap with HMM blocks.
190     status = uvm_hmm_va_block_reclaim(va_space, mm, vma->vm_start, vma->vm_end - 1);
191     if (status != NV_OK)
192         return status;
193 
194     // vma->vm_end is exclusive but va_range end is inclusive
195     va_range = uvm_va_range_alloc_managed(va_space, vma->vm_start, vma->vm_end - 1);
196     if (!va_range) {
197         status = NV_ERR_NO_MEMORY;
198         goto error;
199     }
200 
201     va_range->managed.vma_wrapper = vma_wrapper;
202 
203     status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
204     if (status != NV_OK)
205         goto error;
206 
207     if (out_va_range)
208         *out_va_range = va_range;
209 
210     return NV_OK;
211 
212 error:
213     uvm_va_range_destroy(va_range, NULL);
214     return status;
215 }
216 
uvm_va_range_create_external(uvm_va_space_t * va_space,struct mm_struct * mm,NvU64 start,NvU64 length,uvm_va_range_t ** out_va_range)217 NV_STATUS uvm_va_range_create_external(uvm_va_space_t *va_space,
218                                        struct mm_struct *mm,
219                                        NvU64 start,
220                                        NvU64 length,
221                                        uvm_va_range_t **out_va_range)
222 {
223     NV_STATUS status;
224     uvm_va_range_t *va_range = NULL;
225     uvm_processor_mask_t *retained_mask = NULL;
226     NvU32 i;
227 
228     status = uvm_va_range_alloc_reclaim(va_space,
229                                         mm,
230                                         UVM_VA_RANGE_TYPE_EXTERNAL,
231                                         start,
232                                         start + length - 1,
233                                         &va_range);
234     if (status != NV_OK)
235         return status;
236 
237     UVM_ASSERT(!va_range->external.retained_mask);
238 
239     retained_mask = uvm_processor_mask_cache_alloc();
240     if (!retained_mask) {
241         status = NV_ERR_NO_MEMORY;
242         goto error;
243     }
244 
245     va_range->external.retained_mask = retained_mask;
246 
247     for (i = 0; i < ARRAY_SIZE(va_range->external.gpu_ranges); i++) {
248         uvm_mutex_init(&va_range->external.gpu_ranges[i].lock, UVM_LOCK_ORDER_EXT_RANGE_TREE);
249         uvm_range_tree_init(&va_range->external.gpu_ranges[i].tree);
250     }
251 
252     status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
253     if (status != NV_OK)
254         goto error;
255 
256     if (out_va_range)
257         *out_va_range = va_range;
258 
259     return NV_OK;
260 
261 error:
262     uvm_va_range_destroy(va_range, NULL);
263 
264     return status;
265 }
266 
uvm_va_range_create_channel(uvm_va_space_t * va_space,struct mm_struct * mm,NvU64 start,NvU64 end,uvm_va_range_t ** out_va_range)267 NV_STATUS uvm_va_range_create_channel(uvm_va_space_t *va_space,
268                                       struct mm_struct *mm,
269                                       NvU64 start,
270                                       NvU64 end,
271                                       uvm_va_range_t **out_va_range)
272 {
273     NV_STATUS status;
274     uvm_va_range_t *va_range = NULL;
275 
276     status = uvm_va_range_alloc_reclaim(va_space,
277                                         mm,
278                                         UVM_VA_RANGE_TYPE_CHANNEL,
279                                         start,
280                                         end,
281                                         &va_range);
282     if (status != NV_OK)
283         return status;
284 
285     INIT_LIST_HEAD(&va_range->channel.list_node);
286 
287     status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
288     if (status != NV_OK)
289         goto error;
290 
291     if (out_va_range)
292         *out_va_range = va_range;
293 
294     return NV_OK;
295 
296 error:
297     uvm_va_range_destroy(va_range, NULL);
298     return status;
299 }
300 
uvm_va_range_create_sked_reflected(uvm_va_space_t * va_space,struct mm_struct * mm,NvU64 start,NvU64 length,uvm_va_range_t ** out_va_range)301 NV_STATUS uvm_va_range_create_sked_reflected(uvm_va_space_t *va_space,
302                                              struct mm_struct *mm,
303                                              NvU64 start,
304                                              NvU64 length,
305                                              uvm_va_range_t **out_va_range)
306 {
307     NV_STATUS status;
308     uvm_va_range_t *va_range = NULL;
309 
310     status = uvm_va_range_alloc_reclaim(va_space,
311                                         mm,
312                                         UVM_VA_RANGE_TYPE_SKED_REFLECTED,
313                                         start,
314                                         start + length - 1,
315                                         &va_range);
316     if (status != NV_OK)
317         return status;
318 
319     status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
320     if (status != NV_OK)
321         goto error;
322 
323     if (out_va_range)
324         *out_va_range = va_range;
325 
326     return NV_OK;
327 
328 error:
329     uvm_va_range_destroy(va_range, NULL);
330     return status;
331 }
332 
uvm_va_range_create_semaphore_pool(uvm_va_space_t * va_space,struct mm_struct * mm,NvU64 start,NvU64 length,const UvmGpuMappingAttributes * per_gpu_attrs,NvU32 per_gpu_attrs_count,uvm_va_range_t ** out_va_range)333 NV_STATUS uvm_va_range_create_semaphore_pool(uvm_va_space_t *va_space,
334                                              struct mm_struct *mm,
335                                              NvU64 start,
336                                              NvU64 length,
337                                              const UvmGpuMappingAttributes *per_gpu_attrs,
338                                              NvU32 per_gpu_attrs_count,
339                                              uvm_va_range_t **out_va_range)
340 {
341     static const uvm_mem_gpu_mapping_attrs_t default_attrs = {
342             .protection = UVM_PROT_READ_WRITE_ATOMIC,
343             .is_cacheable = false
344     };
345 
346     NV_STATUS status;
347     uvm_va_range_t *va_range = NULL;
348     uvm_mem_alloc_params_t mem_alloc_params = { 0 };
349     NvU32 i;
350     uvm_gpu_id_t gpu_id;
351 
352     status = uvm_va_range_alloc_reclaim(va_space,
353                                         mm,
354                                         UVM_VA_RANGE_TYPE_SEMAPHORE_POOL,
355                                         start,
356                                         start + length - 1,
357                                         &va_range);
358     if (status != NV_OK)
359         return status;
360 
361     uvm_tracker_init(&va_range->semaphore_pool.tracker);
362     uvm_mutex_init(&va_range->semaphore_pool.tracker_lock, UVM_LOCK_ORDER_SEMA_POOL_TRACKER);
363 
364     status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
365     if (status != NV_OK)
366         goto error;
367 
368     // The semaphore pool memory is located in sysmem, and must be zeroed upon
369     // allocation because it may be mapped on the user VA space.
370     mem_alloc_params.page_size = UVM_PAGE_SIZE_DEFAULT;
371     mem_alloc_params.size = length;
372     mem_alloc_params.zero = true;
373     mem_alloc_params.mm = mm;
374 
375     va_range->semaphore_pool.default_gpu_attrs = default_attrs;
376     va_range->semaphore_pool.owner = NULL;
377 
378     for_each_gpu_id(gpu_id)
379         va_range->semaphore_pool.gpu_attrs[uvm_id_gpu_index(gpu_id)] = default_attrs;
380 
381     for (i = 0; i < per_gpu_attrs_count; i++) {
382         uvm_gpu_t *gpu;
383         uvm_mem_gpu_mapping_attrs_t attrs = default_attrs;
384 
385         status = uvm_mem_translate_gpu_attributes(&per_gpu_attrs[i], va_space, &gpu, &attrs);
386         if (status != NV_OK)
387             goto error;
388 
389         if (i == 0 && g_uvm_global.conf_computing_enabled)
390             mem_alloc_params.dma_owner = gpu;
391 
392         if (attrs.is_cacheable) {
393             // At most 1 GPU can have this memory cached, in which case it is
394             // the 'owner' GPU.
395             if (va_range->semaphore_pool.owner != NULL) {
396                 UVM_DBG_PRINT("Caching of semaphore pool requested on >1 GPU.");
397                 status = NV_ERR_INVALID_ARGUMENT;
398                 goto error;
399             }
400 
401             va_range->semaphore_pool.owner = gpu;
402         }
403 
404         va_range->semaphore_pool.gpu_attrs[uvm_id_gpu_index(gpu->id)] = attrs;
405     }
406 
407     status = uvm_mem_alloc(&mem_alloc_params, &va_range->semaphore_pool.mem);
408     if (status != NV_OK)
409         goto error;
410 
411     status = uvm_mem_map_cpu_kernel(va_range->semaphore_pool.mem);
412     if (status != NV_OK)
413         goto error;
414 
415     if (out_va_range)
416         *out_va_range = va_range;
417 
418     return NV_OK;
419 
420 error:
421     uvm_va_range_destroy(va_range, NULL);
422     return status;
423 }
424 
uvm_va_range_destroy_managed(uvm_va_range_t * va_range)425 static void uvm_va_range_destroy_managed(uvm_va_range_t *va_range)
426 {
427     uvm_va_block_t *block;
428     uvm_va_block_t *block_tmp;
429     uvm_perf_event_data_t event_data;
430     NV_STATUS status;
431 
432     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
433 
434     if (va_range->blocks) {
435         // Unmap and drop our ref count on each block
436         for_each_va_block_in_va_range_safe(va_range, block, block_tmp)
437             uvm_va_block_kill(block);
438 
439         uvm_kvfree(va_range->blocks);
440     }
441 
442     event_data.range_destroy.range = va_range;
443     uvm_perf_event_notify(&va_range->va_space->perf_events, UVM_PERF_EVENT_RANGE_DESTROY, &event_data);
444 
445     status = uvm_range_group_assign_range(va_range->va_space, NULL, va_range->node.start, va_range->node.end);
446     UVM_ASSERT(status == NV_OK);
447 }
448 
uvm_va_range_destroy_external(uvm_va_range_t * va_range,struct list_head * deferred_free_list)449 static void uvm_va_range_destroy_external(uvm_va_range_t *va_range, struct list_head *deferred_free_list)
450 {
451     uvm_gpu_t *gpu;
452 
453     uvm_processor_mask_cache_free(va_range->external.retained_mask);
454 
455     if (uvm_processor_mask_empty(&va_range->external.mapped_gpus))
456         return;
457 
458     UVM_ASSERT(deferred_free_list);
459 
460     for_each_va_space_gpu_in_mask(gpu, va_range->va_space, &va_range->external.mapped_gpus) {
461         uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, gpu);
462         uvm_ext_gpu_map_t *ext_map, *ext_map_next;
463 
464         uvm_mutex_lock(&range_tree->lock);
465         uvm_ext_gpu_map_for_each_safe(ext_map, ext_map_next, va_range, gpu)
466             uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_free_list);
467         uvm_mutex_unlock(&range_tree->lock);
468     }
469 
470     UVM_ASSERT(uvm_processor_mask_empty(&va_range->external.mapped_gpus));
471 }
472 
uvm_va_range_destroy_channel(uvm_va_range_t * va_range)473 static void uvm_va_range_destroy_channel(uvm_va_range_t *va_range)
474 {
475     uvm_gpu_va_space_t *gpu_va_space = va_range->channel.gpu_va_space;
476     uvm_membar_t membar;
477 
478     UVM_ASSERT(va_range->channel.ref_count == 0);
479 
480     // Unmap the buffer
481     if (gpu_va_space && va_range->channel.pt_range_vec.ranges) {
482         membar = uvm_hal_downgrade_membar_type(gpu_va_space->gpu, va_range->channel.aperture == UVM_APERTURE_VID);
483         uvm_page_table_range_vec_clear_ptes(&va_range->channel.pt_range_vec, membar);
484         uvm_page_table_range_vec_deinit(&va_range->channel.pt_range_vec);
485     }
486 
487     list_del(&va_range->channel.list_node);
488 
489     // Channel unregister handles releasing this descriptor back to RM
490     va_range->channel.rm_descriptor = 0;
491 }
492 
uvm_va_range_destroy_sked_reflected(uvm_va_range_t * va_range)493 static void uvm_va_range_destroy_sked_reflected(uvm_va_range_t *va_range)
494 {
495     uvm_gpu_va_space_t *gpu_va_space = va_range->sked_reflected.gpu_va_space;
496 
497     if (!gpu_va_space || !va_range->sked_reflected.pt_range_vec.ranges)
498         return;
499 
500     // The SKED reflected mapping has no physical backing and hence no physical
501     // accesses can be pending to it and no membar is needed.
502     uvm_page_table_range_vec_clear_ptes(&va_range->sked_reflected.pt_range_vec, UVM_MEMBAR_NONE);
503     uvm_page_table_range_vec_deinit(&va_range->sked_reflected.pt_range_vec);
504 
505     va_range->sked_reflected.gpu_va_space = NULL;
506 }
507 
uvm_va_range_destroy_semaphore_pool(uvm_va_range_t * va_range)508 static void uvm_va_range_destroy_semaphore_pool(uvm_va_range_t *va_range)
509 {
510     NV_STATUS status = uvm_tracker_wait_deinit(&va_range->semaphore_pool.tracker);
511     if (status != NV_OK) {
512         UVM_ASSERT_MSG(status == uvm_global_get_status(),
513                        "uvm_tracker_wait() returned %d (%s) in uvm_va_range_destroy_semaphore_pool()\n",
514                        status,
515                        nvstatusToString(status));
516     }
517     uvm_mem_free(va_range->semaphore_pool.mem);
518     va_range->semaphore_pool.mem = NULL;
519 }
520 
uvm_va_range_destroy(uvm_va_range_t * va_range,struct list_head * deferred_free_list)521 void uvm_va_range_destroy(uvm_va_range_t *va_range, struct list_head *deferred_free_list)
522 {
523     if (!va_range)
524         return;
525 
526     if (!RB_EMPTY_NODE(&va_range->node.rb_node))
527         uvm_range_tree_remove(&va_range->va_space->va_range_tree, &va_range->node);
528 
529     switch (va_range->type) {
530         case UVM_VA_RANGE_TYPE_INVALID:
531             // Skip partially-created ranges with unset types
532             break;
533         case UVM_VA_RANGE_TYPE_MANAGED:
534             uvm_va_range_destroy_managed(va_range);
535             break;
536         case UVM_VA_RANGE_TYPE_EXTERNAL:
537             uvm_va_range_destroy_external(va_range, deferred_free_list);
538             break;
539         case UVM_VA_RANGE_TYPE_CHANNEL:
540             uvm_va_range_destroy_channel(va_range);
541             break;
542         case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
543             uvm_va_range_destroy_sked_reflected(va_range);
544             break;
545         case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
546             uvm_va_range_destroy_semaphore_pool(va_range);
547             break;
548         default:
549             UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
550                            va_range->node.start, va_range->node.end, va_range->type);
551     }
552 
553     kmem_cache_free(g_uvm_va_range_cache, va_range);
554 }
555 
uvm_va_range_zombify(uvm_va_range_t * va_range)556 void uvm_va_range_zombify(uvm_va_range_t *va_range)
557 {
558     if (!va_range)
559         return;
560 
561     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
562     UVM_ASSERT(va_range->managed.vma_wrapper);
563 
564     // Destroy will be done by uvm_destroy_vma_managed
565     va_range->managed.vma_wrapper = NULL;
566 }
567 
uvm_api_clean_up_zombie_resources(UVM_CLEAN_UP_ZOMBIE_RESOURCES_PARAMS * params,struct file * filp)568 NV_STATUS uvm_api_clean_up_zombie_resources(UVM_CLEAN_UP_ZOMBIE_RESOURCES_PARAMS *params, struct file *filp)
569 {
570     uvm_va_space_t *va_space = uvm_va_space_get(filp);
571     uvm_va_range_t *va_range, *va_range_next;
572 
573     uvm_va_space_down_write(va_space);
574 
575     uvm_for_each_va_range_safe(va_range, va_range_next, va_space) {
576         if (uvm_va_range_is_managed_zombie(va_range))
577             uvm_va_range_destroy(va_range, NULL);
578     }
579 
580     uvm_va_space_up_write(va_space);
581 
582     return NV_OK;
583 }
584 
uvm_api_validate_va_range(UVM_VALIDATE_VA_RANGE_PARAMS * params,struct file * filp)585 NV_STATUS uvm_api_validate_va_range(UVM_VALIDATE_VA_RANGE_PARAMS *params, struct file *filp)
586 {
587     NV_STATUS status = NV_ERR_INVALID_ADDRESS;
588     uvm_va_space_t *va_space = uvm_va_space_get(filp);
589     uvm_va_range_t *va_range;
590 
591     uvm_va_space_down_read(va_space);
592 
593     va_range = uvm_va_range_find(va_space, params->base);
594     if (va_range && va_range->node.start == params->base && va_range->node.end + 1 == params->base + params->length)
595         status = NV_OK;
596 
597     uvm_va_space_up_read(va_space);
598 
599     return status;
600 }
601 
va_range_add_gpu_va_space_managed(uvm_va_range_t * va_range,uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm)602 static NV_STATUS va_range_add_gpu_va_space_managed(uvm_va_range_t *va_range,
603                                                    uvm_gpu_va_space_t *gpu_va_space,
604                                                    struct mm_struct *mm)
605 {
606     uvm_va_space_t *va_space = va_range->va_space;
607     uvm_gpu_t *gpu = gpu_va_space->gpu;
608     NV_STATUS status = NV_OK;
609     const bool should_add_remote_mappings =
610         uvm_processor_mask_test(&uvm_va_range_get_policy(va_range)->accessed_by, gpu->id) ||
611         uvm_processor_mask_test(&va_range->uvm_lite_gpus, gpu->id);
612 
613     // By this time, the gpu is already in the registration mask.
614     const bool should_disable_read_duplication =
615         uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_ENABLED &&
616         (uvm_va_space_can_read_duplicate(va_space, NULL) != uvm_va_space_can_read_duplicate(va_space, gpu));
617 
618     // Combine conditions to perform a single VA block traversal
619     if (gpu_va_space->ats.enabled || should_add_remote_mappings || should_disable_read_duplication) {
620         uvm_va_block_t *va_block;
621         uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
622 
623 
624         // TODO: Bug 2090378. Consolidate all per-VA block operations within
625         // uvm_va_block_add_gpu_va_space so we only need to take the VA block
626         // once.
627         for_each_va_block_in_va_range(va_range, va_block) {
628             if (gpu_va_space->ats.enabled) {
629                 // Notify that a new GPU VA space has been created. This is only
630                 // currently used for PDE1 pre-population on ATS systems.
631                 status = UVM_VA_BLOCK_LOCK_RETRY(va_block, NULL, uvm_va_block_add_gpu_va_space(va_block, gpu_va_space));
632                 if (status != NV_OK)
633                     break;
634             }
635 
636             if (should_add_remote_mappings) {
637                 // Now that we have a GPU VA space, map any VA ranges for which
638                 // this GPU is a UVM-Lite GPU or has accessed_by set.
639                 status = uvm_va_block_set_accessed_by(va_block, va_block_context, gpu->id);
640                 if (status != NV_OK)
641                     break;
642             }
643 
644             if (should_disable_read_duplication) {
645                 status = uvm_va_block_unset_read_duplication(va_block, va_block_context);
646                 if (status != NV_OK)
647                     break;
648             }
649         }
650     }
651 
652     return status;
653 }
654 
va_range_add_gpu_va_space_semaphore_pool(uvm_va_range_t * va_range,uvm_gpu_t * gpu)655 static NV_STATUS va_range_add_gpu_va_space_semaphore_pool(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
656 {
657     uvm_mem_gpu_mapping_attrs_t *attrs;
658 
659     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
660     UVM_ASSERT(uvm_mem_mapped_on_gpu_kernel(va_range->semaphore_pool.mem, gpu));
661 
662     attrs = &va_range->semaphore_pool.gpu_attrs[uvm_id_gpu_index(gpu->id)];
663 
664     return uvm_mem_map_gpu_user(va_range->semaphore_pool.mem,
665                                 gpu,
666                                 va_range->va_space,
667                                 (void *)va_range->node.start,
668                                 attrs);
669 }
670 
uvm_va_range_add_gpu_va_space(uvm_va_range_t * va_range,uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm)671 NV_STATUS uvm_va_range_add_gpu_va_space(uvm_va_range_t *va_range,
672                                         uvm_gpu_va_space_t *gpu_va_space,
673                                         struct mm_struct *mm)
674 {
675     UVM_ASSERT(va_range->type < UVM_VA_RANGE_TYPE_MAX);
676 
677     if (va_range->inject_add_gpu_va_space_error) {
678         va_range->inject_add_gpu_va_space_error = false;
679         return NV_ERR_NO_MEMORY;
680     }
681 
682     switch (va_range->type) {
683         case UVM_VA_RANGE_TYPE_MANAGED:
684             return va_range_add_gpu_va_space_managed(va_range, gpu_va_space, mm);
685         case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
686             return va_range_add_gpu_va_space_semaphore_pool(va_range, gpu_va_space->gpu);
687         default:
688             return NV_OK;
689     }
690 }
691 
va_range_remove_gpu_va_space_managed(uvm_va_range_t * va_range,uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm)692 static void va_range_remove_gpu_va_space_managed(uvm_va_range_t *va_range,
693                                                  uvm_gpu_va_space_t *gpu_va_space,
694                                                  struct mm_struct *mm)
695 {
696     uvm_va_block_t *va_block;
697     uvm_va_space_t *va_space = va_range->va_space;
698     bool should_enable_read_duplicate;
699     uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
700 
701     should_enable_read_duplicate =
702         uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_ENABLED &&
703         uvm_va_space_can_read_duplicate(va_space, NULL) != uvm_va_space_can_read_duplicate(va_space, gpu_va_space->gpu);
704 
705     for_each_va_block_in_va_range(va_range, va_block) {
706         uvm_mutex_lock(&va_block->lock);
707         uvm_va_block_remove_gpu_va_space(va_block, gpu_va_space, va_block_context);
708         uvm_mutex_unlock(&va_block->lock);
709 
710         if (should_enable_read_duplicate)
711             uvm_va_block_set_read_duplication(va_block, va_block_context);
712     }
713 }
714 
va_range_remove_gpu_va_space_external(uvm_va_range_t * va_range,uvm_gpu_t * gpu,struct list_head * deferred_free_list)715 static void va_range_remove_gpu_va_space_external(uvm_va_range_t *va_range,
716                                                   uvm_gpu_t *gpu,
717                                                   struct list_head *deferred_free_list)
718 {
719     uvm_ext_gpu_range_tree_t *range_tree;
720     uvm_ext_gpu_map_t *ext_map, *ext_map_next;
721 
722     UVM_ASSERT(deferred_free_list);
723 
724     range_tree = uvm_ext_gpu_range_tree(va_range, gpu);
725     uvm_mutex_lock(&range_tree->lock);
726 
727     uvm_ext_gpu_map_for_each_safe(ext_map, ext_map_next, va_range, gpu)
728         uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_free_list);
729 
730     uvm_mutex_unlock(&range_tree->lock);
731 }
732 
va_range_remove_gpu_va_space_semaphore_pool(uvm_va_range_t * va_range,uvm_gpu_t * gpu)733 static void va_range_remove_gpu_va_space_semaphore_pool(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
734 {
735     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
736 
737     if (g_uvm_global.conf_computing_enabled && (va_range->semaphore_pool.mem->dma_owner == gpu))
738         uvm_va_range_destroy(va_range, NULL);
739     else
740         uvm_mem_unmap_gpu_user(va_range->semaphore_pool.mem, gpu);
741 }
742 
uvm_va_range_remove_gpu_va_space(uvm_va_range_t * va_range,uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm,struct list_head * deferred_free_list)743 void uvm_va_range_remove_gpu_va_space(uvm_va_range_t *va_range,
744                                       uvm_gpu_va_space_t *gpu_va_space,
745                                       struct mm_struct *mm,
746                                       struct list_head *deferred_free_list)
747 {
748     switch (va_range->type) {
749         case UVM_VA_RANGE_TYPE_MANAGED:
750             va_range_remove_gpu_va_space_managed(va_range, gpu_va_space, mm);
751             break;
752         case UVM_VA_RANGE_TYPE_EXTERNAL:
753             va_range_remove_gpu_va_space_external(va_range, gpu_va_space->gpu, deferred_free_list);
754             break;
755         case UVM_VA_RANGE_TYPE_CHANNEL:
756             // All channels under this GPU VA space should've been removed before
757             // removing the GPU VA space.
758             UVM_ASSERT(va_range->channel.gpu_va_space != gpu_va_space);
759             break;
760         case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
761             if (va_range->sked_reflected.gpu_va_space == gpu_va_space)
762                 uvm_va_range_destroy_sked_reflected(va_range);
763             break;
764         case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
765             va_range_remove_gpu_va_space_semaphore_pool(va_range, gpu_va_space->gpu);
766             break;
767         default:
768             UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
769                            va_range->node.start, va_range->node.end, va_range->type);
770     }
771 }
772 
uvm_va_range_enable_peer_managed(uvm_va_range_t * va_range,uvm_gpu_t * gpu0,uvm_gpu_t * gpu1)773 static NV_STATUS uvm_va_range_enable_peer_managed(uvm_va_range_t *va_range, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
774 {
775     NV_STATUS status;
776     uvm_va_block_t *va_block;
777     bool gpu0_accessed_by = uvm_processor_mask_test(&uvm_va_range_get_policy(va_range)->accessed_by, gpu0->id);
778     bool gpu1_accessed_by = uvm_processor_mask_test(&uvm_va_range_get_policy(va_range)->accessed_by, gpu1->id);
779     uvm_va_space_t *va_space = va_range->va_space;
780     uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, NULL);
781 
782 
783     for_each_va_block_in_va_range(va_range, va_block) {
784         // TODO: Bug 1767224: Refactor the uvm_va_block_set_accessed_by logic
785         //       into uvm_va_block_enable_peer.
786         uvm_mutex_lock(&va_block->lock);
787         status = uvm_va_block_enable_peer(va_block, gpu0, gpu1);
788         uvm_mutex_unlock(&va_block->lock);
789 
790         if (status != NV_OK)
791             return status;
792 
793         // For UVM-Lite at most one GPU needs to map the peer GPU if it's the
794         // preferred location, but it doesn't hurt to just try mapping both.
795         if (gpu0_accessed_by) {
796             status = uvm_va_block_set_accessed_by(va_block,
797                                                   va_block_context,
798                                                   gpu0->id);
799             if (status != NV_OK)
800                 return status;
801         }
802 
803         if (gpu1_accessed_by) {
804             status = uvm_va_block_set_accessed_by(va_block,
805                                                   va_block_context,
806                                                   gpu1->id);
807             if (status != NV_OK)
808                 return status;
809         }
810     }
811 
812     return NV_OK;
813 }
814 
uvm_va_range_enable_peer(uvm_va_range_t * va_range,uvm_gpu_t * gpu0,uvm_gpu_t * gpu1)815 NV_STATUS uvm_va_range_enable_peer(uvm_va_range_t *va_range, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
816 {
817     switch (va_range->type) {
818         case UVM_VA_RANGE_TYPE_MANAGED:
819             return uvm_va_range_enable_peer_managed(va_range, gpu0, gpu1);
820         case UVM_VA_RANGE_TYPE_EXTERNAL:
821             // UVM_VA_RANGE_TYPE_EXTERNAL doesn't create new mappings when enabling peer access
822             return NV_OK;
823         case UVM_VA_RANGE_TYPE_CHANNEL:
824             // UVM_VA_RANGE_TYPE_CHANNEL should never have peer mappings
825             return NV_OK;
826         case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
827             // UVM_VA_RANGE_TYPE_SKED_REFLECTED should never have peer mappings
828             return NV_OK;
829         case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
830             // UVM_VA_RANGE_TYPE_SEMAPHORE_POOL should never have peer mappings
831             return NV_OK;
832         default:
833             UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
834                            va_range->node.start, va_range->node.end, va_range->type);
835             return NV_ERR_NOT_SUPPORTED;
836     }
837 }
838 
uvm_va_range_disable_peer_external(uvm_va_range_t * va_range,uvm_gpu_t * mapping_gpu,uvm_gpu_t * owning_gpu,struct list_head * deferred_free_list)839 static void uvm_va_range_disable_peer_external(uvm_va_range_t *va_range,
840                                                uvm_gpu_t *mapping_gpu,
841                                                uvm_gpu_t *owning_gpu,
842                                                struct list_head *deferred_free_list)
843 {
844     uvm_ext_gpu_range_tree_t *range_tree;
845     uvm_ext_gpu_map_t *ext_map, *ext_map_next;
846 
847     range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
848     uvm_mutex_lock(&range_tree->lock);
849     uvm_ext_gpu_map_for_each_safe(ext_map, ext_map_next, va_range, mapping_gpu) {
850         if (ext_map->owning_gpu == owning_gpu && (!ext_map->is_sysmem || ext_map->is_egm)) {
851             UVM_ASSERT(deferred_free_list);
852             uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_free_list);
853         }
854     }
855     uvm_mutex_unlock(&range_tree->lock);
856 }
857 
uvm_va_range_disable_peer_managed(uvm_va_range_t * va_range,uvm_gpu_t * gpu0,uvm_gpu_t * gpu1)858 static void uvm_va_range_disable_peer_managed(uvm_va_range_t *va_range, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
859 {
860     uvm_va_block_t *va_block;
861     uvm_gpu_t *uvm_lite_gpu_to_unmap = NULL;
862 
863     bool uvm_lite_mode = uvm_processor_mask_test(&va_range->uvm_lite_gpus, gpu0->id) &&
864                          uvm_processor_mask_test(&va_range->uvm_lite_gpus, gpu1->id);
865 
866     if (uvm_lite_mode) {
867         // In UVM-Lite mode, the UVM-Lite GPUs can only have mappings to the the
868         // preferred location. If peer mappings are being disabled to the
869         // preferred location, then unmap the other GPU.
870         // Nothing to do otherwise.
871         if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu0->id, NUMA_NO_NODE))
872             uvm_lite_gpu_to_unmap = gpu1;
873         else if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu1->id, NUMA_NO_NODE))
874             uvm_lite_gpu_to_unmap = gpu0;
875         else
876             return;
877     }
878 
879     for_each_va_block_in_va_range(va_range, va_block) {
880         uvm_mutex_lock(&va_block->lock);
881         if (uvm_lite_mode)
882             uvm_va_block_unmap_preferred_location_uvm_lite(va_block, uvm_lite_gpu_to_unmap);
883         else
884             uvm_va_block_disable_peer(va_block, gpu0, gpu1);
885         uvm_mutex_unlock(&va_block->lock);
886     }
887 
888     if (uvm_lite_mode && !uvm_range_group_all_migratable(va_range->va_space, va_range->node.start, va_range->node.end)) {
889         UVM_ASSERT(uvm_lite_gpu_to_unmap);
890 
891         // Migration is prevented, but we had to unmap a UVM-Lite GPU. Update
892         // the accessed by and UVM-Lite GPUs masks as it cannot be considered a
893         // UVM-Lite GPU any more.
894         uvm_va_range_unset_accessed_by(va_range, uvm_lite_gpu_to_unmap->id, NULL);
895     }
896 }
897 
uvm_va_range_disable_peer(uvm_va_range_t * va_range,uvm_gpu_t * gpu0,uvm_gpu_t * gpu1,struct list_head * deferred_free_list)898 void uvm_va_range_disable_peer(uvm_va_range_t *va_range,
899                                uvm_gpu_t *gpu0,
900                                uvm_gpu_t *gpu1,
901                                struct list_head *deferred_free_list)
902 {
903 
904     switch (va_range->type) {
905         case UVM_VA_RANGE_TYPE_MANAGED:
906             uvm_va_range_disable_peer_managed(va_range, gpu0, gpu1);
907             break;
908         case UVM_VA_RANGE_TYPE_EXTERNAL:
909             // If GPU 0 has a mapping to GPU 1, remove GPU 0's mapping
910             uvm_va_range_disable_peer_external(va_range, gpu0, gpu1, deferred_free_list);
911             // If GPU 1 has a mapping to GPU 0, remove GPU 1's mapping
912             uvm_va_range_disable_peer_external(va_range, gpu1, gpu0, deferred_free_list);
913             break;
914         case UVM_VA_RANGE_TYPE_CHANNEL:
915             // UVM_VA_RANGE_TYPE_CHANNEL should never have peer mappings
916             break;
917         case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
918             // UVM_VA_RANGE_TYPE_SKED_REFLECTED should never have peer mappings
919             break;
920         case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
921             // UVM_VA_RANGE_TYPE_SEMAPHORE_POOL should never have peer mappings
922             break;
923         default:
924             UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
925                            va_range->node.start, va_range->node.end, va_range->type);
926     }
927 }
928 
va_range_register_gpu_semaphore_pool(uvm_va_range_t * va_range,uvm_gpu_t * gpu)929 static NV_STATUS va_range_register_gpu_semaphore_pool(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
930 {
931     // TODO: Bug 1812419: pass GPU mapping attributes to uvm_mem_map_gpu_kernel
932     // once that function accepts them.
933     return uvm_mem_map_gpu_kernel(va_range->semaphore_pool.mem, gpu);
934 }
935 
uvm_va_range_register_gpu(uvm_va_range_t * va_range,uvm_gpu_t * gpu)936 NV_STATUS uvm_va_range_register_gpu(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
937 {
938     UVM_ASSERT(va_range->type < UVM_VA_RANGE_TYPE_MAX);
939     uvm_assert_rwsem_locked_write(&va_range->va_space->lock);
940 
941     if (va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL)
942         return va_range_register_gpu_semaphore_pool(va_range, gpu);
943 
944     return NV_OK;
945 }
946 
va_range_unregister_gpu_managed(uvm_va_range_t * va_range,uvm_gpu_t * gpu,struct mm_struct * mm)947 static void va_range_unregister_gpu_managed(uvm_va_range_t *va_range, uvm_gpu_t *gpu, struct mm_struct *mm)
948 {
949     uvm_va_block_t *va_block;
950 
951     // Reset preferred location and accessed-by of VA ranges if needed
952     // Note: ignoring the return code of uvm_va_range_set_preferred_location since this
953     // will only return on error when setting a preferred location, not on a reset
954     if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu->id, NUMA_NO_NODE))
955         (void)uvm_va_range_set_preferred_location(va_range, UVM_ID_INVALID, NUMA_NO_NODE, mm, NULL);
956 
957     uvm_va_range_unset_accessed_by(va_range, gpu->id, NULL);
958 
959     // Migrate and free any remaining resident allocations on this GPU
960     for_each_va_block_in_va_range(va_range, va_block)
961         uvm_va_block_unregister_gpu(va_block, gpu, mm);
962 }
963 
964 // The GPU being unregistered can't have any remaining mappings, since those
965 // were removed when the corresponding GPU VA space was removed. However, other
966 // GPUs could still have mappings to memory resident on this GPU, so we have to
967 // unmap those.
va_range_unregister_gpu_external(uvm_va_range_t * va_range,uvm_gpu_t * gpu,struct list_head * deferred_free_list)968 static void va_range_unregister_gpu_external(uvm_va_range_t *va_range,
969                                              uvm_gpu_t *gpu,
970                                              struct list_head *deferred_free_list)
971 {
972     uvm_ext_gpu_map_t *ext_map, *ext_map_next;
973     uvm_gpu_t *other_gpu;
974 
975     for_each_va_space_gpu_in_mask(other_gpu, va_range->va_space, &va_range->external.mapped_gpus) {
976         uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, other_gpu);
977         UVM_ASSERT(other_gpu != gpu);
978 
979         uvm_mutex_lock(&range_tree->lock);
980         uvm_ext_gpu_map_for_each_safe(ext_map, ext_map_next, va_range, other_gpu) {
981             if (ext_map->owning_gpu == gpu) {
982                 UVM_ASSERT(deferred_free_list);
983                 uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_free_list);
984             }
985         }
986         uvm_mutex_unlock(&range_tree->lock);
987     }
988 }
989 
va_range_unregister_gpu_semaphore_pool(uvm_va_range_t * va_range,uvm_gpu_t * gpu)990 static void va_range_unregister_gpu_semaphore_pool(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
991 {
992     NV_STATUS status;
993 
994     // Ranges for this GPU should have been previously unmapped from the user VA
995     // space during GPU VA space unregister, which should have already happened.
996     UVM_ASSERT(!uvm_mem_mapped_on_gpu_user(va_range->semaphore_pool.mem, gpu));
997     UVM_ASSERT(uvm_mem_mapped_on_gpu_kernel(va_range->semaphore_pool.mem, gpu));
998 
999     uvm_mutex_lock(&va_range->semaphore_pool.tracker_lock);
1000     status = uvm_tracker_wait(&va_range->semaphore_pool.tracker);
1001     uvm_mutex_unlock(&va_range->semaphore_pool.tracker_lock);
1002     if (status != NV_OK)
1003         UVM_ASSERT(status == uvm_global_get_status());
1004 
1005     uvm_mem_unmap_gpu_phys(va_range->semaphore_pool.mem, gpu);
1006 
1007     va_range->semaphore_pool.gpu_attrs[uvm_id_gpu_index(gpu->id)] = va_range->semaphore_pool.default_gpu_attrs;
1008     if (va_range->semaphore_pool.owner == gpu)
1009         va_range->semaphore_pool.owner = NULL;
1010 }
1011 
uvm_va_range_unregister_gpu(uvm_va_range_t * va_range,uvm_gpu_t * gpu,struct mm_struct * mm,struct list_head * deferred_free_list)1012 void uvm_va_range_unregister_gpu(uvm_va_range_t *va_range,
1013                                  uvm_gpu_t *gpu,
1014                                  struct mm_struct *mm,
1015                                  struct list_head *deferred_free_list)
1016 {
1017     switch (va_range->type) {
1018         case UVM_VA_RANGE_TYPE_MANAGED:
1019             va_range_unregister_gpu_managed(va_range, gpu, mm);
1020             break;
1021         case UVM_VA_RANGE_TYPE_EXTERNAL:
1022             va_range_unregister_gpu_external(va_range, gpu, deferred_free_list);
1023             break;
1024         case UVM_VA_RANGE_TYPE_CHANNEL:
1025             // All ranges should have been destroyed by GPU VA space unregister,
1026             // which should have already happened.
1027             UVM_ASSERT(va_range->channel.gpu_va_space->gpu != gpu);
1028             break;
1029         case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
1030             // All ranges for this GPU should have been unmapped by GPU VA space
1031             // unregister (uvm_va_range_destroy_sked_reflected), which should
1032             // have already happened.
1033             if (va_range->sked_reflected.gpu_va_space != NULL)
1034                 UVM_ASSERT(va_range->sked_reflected.gpu_va_space->gpu != gpu);
1035             break;
1036         case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
1037             va_range_unregister_gpu_semaphore_pool(va_range, gpu);
1038             break;
1039         default:
1040             UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
1041                            va_range->node.start, va_range->node.end, va_range->type);
1042     }
1043 }
1044 
1045 // Split existing's blocks into new. new's blocks array has already been
1046 // allocated. This is called before existing's range node is split, so it
1047 // overlaps new. new is always in the upper region of existing.
1048 //
1049 // The caller will do the range tree split.
1050 //
1051 // If this fails it leaves existing unchanged.
uvm_va_range_split_blocks(uvm_va_range_t * existing,uvm_va_range_t * new)1052 static NV_STATUS uvm_va_range_split_blocks(uvm_va_range_t *existing, uvm_va_range_t *new)
1053 {
1054     uvm_va_block_t *old_block, *block = NULL;
1055     size_t existing_blocks, split_index, new_index = 0;
1056     NV_STATUS status;
1057 
1058     UVM_ASSERT(new->node.start >  existing->node.start);
1059     UVM_ASSERT(new->node.end   <= existing->node.end);
1060 
1061     split_index = uvm_va_range_block_index(existing, new->node.start);
1062 
1063     // Handle a block spanning the split point
1064     if (block_calc_start(existing, split_index) != new->node.start) {
1065         // If a populated block actually spans the split point, we have to split
1066         // the block. Otherwise just account for the extra entry in the arrays.
1067         old_block = uvm_va_range_block(existing, split_index);
1068         if (old_block) {
1069             UVM_ASSERT(old_block->start < new->node.start);
1070             status = uvm_va_block_split(old_block, new->node.start - 1, &block, new);
1071             if (status != NV_OK)
1072                 return status;
1073 
1074             // No memory barrier is needed since we're holding the va_space lock in
1075             // write mode, so no other thread can access the blocks array.
1076             atomic_long_set(&new->blocks[0], (long)block);
1077         }
1078 
1079         new_index = 1;
1080     }
1081 
1082     // uvm_va_block_split gets first crack at injecting an error. If it did so,
1083     // we wouldn't be here. However, not all va_range splits will call
1084     // uvm_va_block_split so we need an extra check here. We can't push this
1085     // injection later since all paths past this point assume success, so they
1086     // modify the state of 'existing' range.
1087     //
1088     // Even if there was no block split above, there is no guarantee that one
1089     // of our blocks doesn't have the 'inject_split_error' flag set. We clear
1090     // that here to prevent multiple errors caused by one
1091     // 'uvm_test_va_range_inject_split_error' call.
1092     if (existing->inject_split_error) {
1093         UVM_ASSERT(!block);
1094         existing->inject_split_error = false;
1095 
1096         for_each_va_block_in_va_range(existing, block) {
1097             uvm_va_block_test_t *block_test = uvm_va_block_get_test(block);
1098             if (block_test)
1099                 block_test->inject_split_error = false;
1100         }
1101 
1102         return NV_ERR_NO_MEMORY;
1103     }
1104 
1105     existing_blocks = split_index + new_index;
1106 
1107     // Copy existing's blocks over to the new range, accounting for the explicit
1108     // assignment above in case we did a block split. There are two general
1109     // cases:
1110     //
1111     // No split:
1112     //                             split_index
1113     //                                  v
1114     //  existing (before) [----- A ----][----- B ----][----- C ----]
1115     //  existing (after)  [----- A ----]
1116     //  new                             [----- B ----][----- C ----]
1117     //
1118     // Split:
1119     //                                    split_index
1120     //                                         v
1121     //  existing (before) [----- A ----][----- B ----][----- C ----]
1122     //  existing (after   [----- A ----][- B -]
1123     //  new                                    [- N -][----- C ----]
1124     //                                            ^new->blocks[0]
1125 
1126     // Note, if we split the last block of existing, this won't iterate at all.
1127     for (; new_index < uvm_va_range_num_blocks(new); new_index++) {
1128         block = uvm_va_range_block(existing, split_index + new_index);
1129         if (!block) {
1130             // new's array was cleared at allocation
1131             UVM_ASSERT(uvm_va_range_block(new, new_index) == NULL);
1132             continue;
1133         }
1134 
1135         // As soon as we make this assignment and drop the lock, the reverse
1136         // mapping code can start looking at new, so new must be ready to go.
1137         uvm_mutex_lock(&block->lock);
1138         UVM_ASSERT(block->va_range == existing);
1139         block->va_range = new;
1140         uvm_mutex_unlock(&block->lock);
1141 
1142         // No memory barrier is needed since we're holding the va_space lock in
1143         // write mode, so no other thread can access the blocks array.
1144         atomic_long_set(&new->blocks[new_index], (long)block);
1145         atomic_long_set(&existing->blocks[split_index + new_index], (long)NULL);
1146     }
1147 
1148     blocks_array_shrink(existing, existing_blocks);
1149 
1150     return NV_OK;
1151 }
1152 
uvm_va_range_split(uvm_va_range_t * existing_va_range,NvU64 new_end,uvm_va_range_t ** new_va_range)1153 NV_STATUS uvm_va_range_split(uvm_va_range_t *existing_va_range,
1154                              NvU64 new_end,
1155                              uvm_va_range_t **new_va_range)
1156 {
1157     uvm_va_space_t *va_space = existing_va_range->va_space;
1158     uvm_va_range_t *new = NULL;
1159     uvm_perf_event_data_t event_data;
1160     NV_STATUS status;
1161 
1162     UVM_ASSERT(existing_va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1163     UVM_ASSERT(new_end > existing_va_range->node.start);
1164     UVM_ASSERT(new_end < existing_va_range->node.end);
1165     UVM_ASSERT(PAGE_ALIGNED(new_end + 1));
1166     uvm_assert_rwsem_locked_write(&va_space->lock);
1167 
1168     new = uvm_va_range_alloc_managed(va_space, new_end + 1, existing_va_range->node.end);
1169     if (!new) {
1170         status = NV_ERR_NO_MEMORY;
1171         goto error;
1172     }
1173 
1174     // The new va_range is under the same vma. If this is a uvm_vm_open, the
1175     // caller takes care of updating existing's vma_wrapper for us.
1176     new->managed.vma_wrapper = existing_va_range->managed.vma_wrapper;
1177 
1178     // Copy over state before splitting blocks so any block lookups happening
1179     // concurrently on the eviction path will see the new range's data.
1180     uvm_va_range_get_policy(new)->read_duplication = uvm_va_range_get_policy(existing_va_range)->read_duplication;
1181     uvm_va_range_get_policy(new)->preferred_location = uvm_va_range_get_policy(existing_va_range)->preferred_location;
1182     uvm_va_range_get_policy(new)->preferred_nid = uvm_va_range_get_policy(existing_va_range)->preferred_nid;
1183     uvm_processor_mask_copy(&uvm_va_range_get_policy(new)->accessed_by,
1184                             &uvm_va_range_get_policy(existing_va_range)->accessed_by);
1185     uvm_processor_mask_copy(&new->uvm_lite_gpus, &existing_va_range->uvm_lite_gpus);
1186 
1187     status = uvm_va_range_split_blocks(existing_va_range, new);
1188     if (status != NV_OK)
1189         goto error;
1190 
1191     // Finally, update the VA range tree
1192     uvm_range_tree_split(&va_space->va_range_tree, &existing_va_range->node, &new->node);
1193 
1194     if (new->type == UVM_VA_RANGE_TYPE_MANAGED) {
1195         event_data.range_shrink.range = new;
1196         uvm_perf_event_notify(&va_space->perf_events, UVM_PERF_EVENT_RANGE_SHRINK, &event_data);
1197     }
1198 
1199     if (new_va_range)
1200         *new_va_range = new;
1201     return NV_OK;
1202 
1203 error:
1204     uvm_va_range_destroy(new, NULL);
1205     return status;
1206 
1207 }
1208 
uvm_va_range_find(uvm_va_space_t * va_space,NvU64 addr)1209 uvm_va_range_t *uvm_va_range_find(uvm_va_space_t *va_space, NvU64 addr)
1210 {
1211     uvm_assert_rwsem_locked(&va_space->lock);
1212     return uvm_va_range_container(uvm_range_tree_find(&va_space->va_range_tree, addr));
1213 }
1214 
uvm_va_space_iter_first(uvm_va_space_t * va_space,NvU64 start,NvU64 end)1215 uvm_va_range_t *uvm_va_space_iter_first(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
1216 {
1217     uvm_range_tree_node_t *node = uvm_range_tree_iter_first(&va_space->va_range_tree, start, end);
1218     return uvm_va_range_container(node);
1219 }
1220 
uvm_va_space_iter_next(uvm_va_range_t * va_range,NvU64 end)1221 uvm_va_range_t *uvm_va_space_iter_next(uvm_va_range_t *va_range, NvU64 end)
1222 {
1223     uvm_range_tree_node_t *node;
1224 
1225     // Handling a NULL va_range here makes uvm_for_each_va_range_in_safe much
1226     // less messy
1227     if (!va_range)
1228         return NULL;
1229 
1230     node = uvm_range_tree_iter_next(&va_range->va_space->va_range_tree, &va_range->node, end);
1231     return uvm_va_range_container(node);
1232 }
1233 
uvm_va_range_num_blocks(uvm_va_range_t * va_range)1234 size_t uvm_va_range_num_blocks(uvm_va_range_t *va_range)
1235 {
1236     NvU64 start = UVM_VA_BLOCK_ALIGN_DOWN(va_range->node.start);
1237     NvU64 end   = UVM_VA_BLOCK_ALIGN_UP(va_range->node.end); // End is inclusive
1238     return (end - start) / UVM_VA_BLOCK_SIZE;
1239 }
1240 
uvm_va_range_block_index(uvm_va_range_t * va_range,NvU64 addr)1241 size_t uvm_va_range_block_index(uvm_va_range_t *va_range, NvU64 addr)
1242 {
1243     size_t addr_index, start_index, index;
1244 
1245     UVM_ASSERT(addr >= va_range->node.start);
1246     UVM_ASSERT(addr <= va_range->node.end);
1247     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1248 
1249     // Each block will cover as much space as possible within the aligned
1250     // UVM_VA_BLOCK_SIZE, up to the parent VA range boundaries. In other words,
1251     // the entire VA space can be broken into UVM_VA_BLOCK_SIZE chunks. Even if
1252     // there are multiple ranges (and thus multiple blocks) per actual
1253     // UVM_VA_BLOCK_SIZE chunk, none of those will have more than 1 block unless
1254     // they span a UVM_VA_BLOCK_SIZE alignment boundary.
1255     addr_index = (size_t)(addr / UVM_VA_BLOCK_SIZE);
1256     start_index = (size_t)(va_range->node.start / UVM_VA_BLOCK_SIZE);
1257 
1258     index = addr_index - start_index;
1259     UVM_ASSERT(index < uvm_va_range_num_blocks(va_range));
1260     return index;
1261 }
1262 
uvm_va_range_block_create(uvm_va_range_t * va_range,size_t index,uvm_va_block_t ** out_block)1263 NV_STATUS uvm_va_range_block_create(uvm_va_range_t *va_range, size_t index, uvm_va_block_t **out_block)
1264 {
1265     uvm_va_block_t *block, *old;
1266     NV_STATUS status;
1267 
1268     block = uvm_va_range_block(va_range, index);
1269     if (!block) {
1270         // No block has been created here yet, so allocate one and attempt to
1271         // insert it. Note that this runs the risk of an out-of-memory error
1272         // when multiple threads race and all concurrently allocate a block for
1273         // the same address. This should be extremely rare. There is also
1274         // precedent in the Linux kernel, which does the same thing for demand-
1275         // allocation of anonymous pages.
1276         status = uvm_va_block_create(va_range,
1277                                      block_calc_start(va_range, index),
1278                                      block_calc_end(va_range, index),
1279                                      &block);
1280         if (status != NV_OK)
1281             return status;
1282 
1283         // Try to insert it
1284         old = (uvm_va_block_t *)nv_atomic_long_cmpxchg(&va_range->blocks[index],
1285                                                       (long)NULL,
1286                                                       (long)block);
1287         if (old) {
1288             // Someone else beat us on the insert
1289             uvm_va_block_release(block);
1290             block = old;
1291         }
1292     }
1293 
1294     *out_block = block;
1295     return NV_OK;
1296 }
1297 
uvm_va_range_block_next(uvm_va_range_t * va_range,uvm_va_block_t * va_block)1298 uvm_va_block_t *uvm_va_range_block_next(uvm_va_range_t *va_range, uvm_va_block_t *va_block)
1299 {
1300     uvm_va_space_t *va_space = va_range->va_space;
1301     size_t i = 0;
1302 
1303     uvm_assert_rwsem_locked(&va_space->lock);
1304 
1305     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1306 
1307     if (va_block)
1308         i = uvm_va_range_block_index(va_range, va_block->start) + 1;
1309 
1310     for (; i < uvm_va_range_num_blocks(va_range); i++) {
1311         va_block = uvm_va_range_block(va_range, i);
1312         if (va_block) {
1313             UVM_ASSERT(va_block->va_range == va_range);
1314             UVM_ASSERT(uvm_va_range_block_index(va_range, va_block->start) == i);
1315             return va_block;
1316         }
1317     }
1318 
1319     return NULL;
1320 }
1321 
range_unmap_mask(uvm_va_range_t * va_range,const uvm_processor_mask_t * mask,uvm_tracker_t * out_tracker)1322 static NV_STATUS range_unmap_mask(uvm_va_range_t *va_range,
1323                                   const uvm_processor_mask_t *mask,
1324                                   uvm_tracker_t *out_tracker)
1325 {
1326     uvm_va_space_t *va_space = va_range->va_space;
1327     uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
1328     uvm_va_block_t *block;
1329 
1330     UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_MANAGED, "type 0x%x\n", va_range->type);
1331 
1332     if (uvm_processor_mask_empty(mask))
1333         return NV_OK;
1334 
1335     for_each_va_block_in_va_range(va_range, block) {
1336         NV_STATUS status;
1337         uvm_va_block_region_t region = uvm_va_block_region_from_block(block);
1338 
1339         uvm_mutex_lock(&block->lock);
1340         status = uvm_va_block_unmap_mask(block, block_context, mask, region, NULL);
1341         if (out_tracker)
1342             uvm_tracker_add_tracker_safe(out_tracker, &block->tracker);
1343 
1344         uvm_mutex_unlock(&block->lock);
1345         if (status != NV_OK)
1346             return status;
1347     }
1348 
1349     return NV_OK;
1350 }
1351 
range_unmap(uvm_va_range_t * va_range,uvm_processor_id_t processor,uvm_tracker_t * out_tracker)1352 static NV_STATUS range_unmap(uvm_va_range_t *va_range, uvm_processor_id_t processor, uvm_tracker_t *out_tracker)
1353 {
1354     uvm_processor_mask_t *mask;
1355     uvm_va_space_t *va_space = va_range->va_space;
1356 
1357     uvm_assert_rwsem_locked_write(&va_space->lock);
1358 
1359     mask = &va_space->unmap_mask;
1360 
1361     UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_MANAGED, "type 0x%x\n", va_range->type);
1362 
1363     uvm_processor_mask_zero(mask);
1364     uvm_processor_mask_set(mask, processor);
1365 
1366     return range_unmap_mask(va_range, mask, out_tracker);
1367 }
1368 
range_map_uvm_lite_gpus(uvm_va_range_t * va_range,uvm_tracker_t * out_tracker)1369 static NV_STATUS range_map_uvm_lite_gpus(uvm_va_range_t *va_range, uvm_tracker_t *out_tracker)
1370 {
1371     NV_STATUS status = NV_OK;
1372     uvm_va_block_t *va_block;
1373     uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_range->va_space, NULL);
1374 
1375     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1376 
1377     if (uvm_processor_mask_empty(&va_range->uvm_lite_gpus))
1378         return NV_OK;
1379 
1380 
1381     for_each_va_block_in_va_range(va_range, va_block) {
1382         // UVM-Lite GPUs always map with RWA
1383         uvm_mutex_lock(&va_block->lock);
1384         status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, NULL,
1385                 uvm_va_block_map_mask(va_block,
1386                                       va_block_context,
1387                                       &va_range->uvm_lite_gpus,
1388                                       uvm_va_block_region_from_block(va_block),
1389                                       NULL,
1390                                       UVM_PROT_READ_WRITE_ATOMIC,
1391                                       UvmEventMapRemoteCauseCoherence));
1392         if (status == NV_OK && out_tracker)
1393             status = uvm_tracker_add_tracker(out_tracker, &va_block->tracker);
1394 
1395         uvm_mutex_unlock(&va_block->lock);
1396         if (status != NV_OK)
1397             break;
1398     }
1399 
1400     return status;
1401 }
1402 
1403 // Calculate the mask of GPUs that should follow the UVM-Lite behaviour
calc_uvm_lite_gpus_mask(uvm_va_space_t * va_space,uvm_processor_id_t preferred_location,const uvm_processor_mask_t * accessed_by_mask,uvm_processor_mask_t * uvm_lite_gpus)1404 static void calc_uvm_lite_gpus_mask(uvm_va_space_t *va_space,
1405                                     uvm_processor_id_t preferred_location,
1406                                     const uvm_processor_mask_t *accessed_by_mask,
1407                                     uvm_processor_mask_t *uvm_lite_gpus)
1408 {
1409     uvm_gpu_id_t gpu_id;
1410 
1411     uvm_assert_rwsem_locked_write(&va_space->lock);
1412 
1413     // Zero out the mask first
1414     uvm_processor_mask_zero(uvm_lite_gpus);
1415 
1416     // If no preferred location is set then there are no GPUs following the UVM-Lite behavior
1417     if (UVM_ID_IS_INVALID(preferred_location))
1418         return;
1419 
1420     // If the preferred location is a faultable GPU, then no GPUs should follow
1421     // the UVM-Lite behaviour.
1422     if (UVM_ID_IS_GPU(preferred_location) &&
1423         uvm_processor_mask_test(&va_space->faultable_processors, preferred_location)) {
1424         return;
1425     }
1426 
1427     // Otherwise add all non-faultable GPUs to the UVM-Lite mask that have
1428     // accessed by set.
1429     for_each_gpu_id_in_mask(gpu_id, accessed_by_mask) {
1430         if (!uvm_processor_mask_test(&va_space->faultable_processors, gpu_id))
1431             uvm_processor_mask_set(uvm_lite_gpus, gpu_id);
1432     }
1433 
1434     // And the preferred location if it's a GPU
1435     if (UVM_ID_IS_GPU(preferred_location))
1436         uvm_processor_mask_set(uvm_lite_gpus, preferred_location);
1437 }
1438 
1439 // Update the mask of GPUs that follow the UVM-Lite behaviour
range_update_uvm_lite_gpus_mask(uvm_va_range_t * va_range)1440 static void range_update_uvm_lite_gpus_mask(uvm_va_range_t *va_range)
1441 {
1442     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1443     calc_uvm_lite_gpus_mask(va_range->va_space,
1444                             uvm_va_range_get_policy(va_range)->preferred_location,
1445                             &uvm_va_range_get_policy(va_range)->accessed_by,
1446                             &va_range->uvm_lite_gpus);
1447 }
1448 
uvm_va_range_set_preferred_location(uvm_va_range_t * va_range,uvm_processor_id_t preferred_location,int preferred_cpu_nid,struct mm_struct * mm,uvm_tracker_t * out_tracker)1449 NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
1450                                               uvm_processor_id_t preferred_location,
1451                                               int preferred_cpu_nid,
1452                                               struct mm_struct *mm,
1453                                               uvm_tracker_t *out_tracker)
1454 {
1455     NV_STATUS status = NV_OK;
1456     uvm_processor_mask_t *all_uvm_lite_gpus = NULL;
1457     uvm_processor_mask_t *new_uvm_lite_gpus = NULL;
1458     uvm_processor_mask_t *set_accessed_by_processors = NULL;
1459     uvm_range_group_range_iter_t iter;
1460     uvm_range_group_range_t *rgr = NULL;
1461     uvm_va_space_t *va_space = va_range->va_space;
1462     uvm_va_block_t *va_block;
1463     uvm_va_block_context_t *va_block_context;
1464     uvm_va_policy_t *va_range_policy;
1465 
1466     uvm_assert_rwsem_locked_write(&va_space->lock);
1467     UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1468 
1469     all_uvm_lite_gpus = uvm_processor_mask_cache_alloc();
1470     if (!all_uvm_lite_gpus) {
1471         status = NV_ERR_NO_MEMORY;
1472         goto out;
1473     }
1474 
1475     new_uvm_lite_gpus = uvm_processor_mask_cache_alloc();
1476     if (!new_uvm_lite_gpus) {
1477         status = NV_ERR_NO_MEMORY;
1478         goto out;
1479     }
1480 
1481     set_accessed_by_processors = uvm_processor_mask_cache_alloc();
1482     if (!set_accessed_by_processors) {
1483         status = NV_ERR_NO_MEMORY;
1484         goto out;
1485     }
1486 
1487     va_range_policy = uvm_va_range_get_policy(va_range);
1488     if (uvm_va_policy_preferred_location_equal(va_range_policy, preferred_location, preferred_cpu_nid))
1489         goto out;
1490 
1491     // Mark all range group ranges within this VA range as migrated since the preferred location has changed.
1492     uvm_range_group_for_each_range_in(rgr, va_space, va_range->node.start, va_range->node.end) {
1493         uvm_spin_lock(&rgr->range_group->migrated_ranges_lock);
1494         if (list_empty(&rgr->range_group_migrated_list_node))
1495             list_move_tail(&rgr->range_group_migrated_list_node, &rgr->range_group->migrated_ranges);
1496         uvm_spin_unlock(&rgr->range_group->migrated_ranges_lock);
1497     }
1498 
1499     // Calculate the new UVM-Lite GPUs mask, but don't update va_range state so
1500     // that we can keep block_page_check_mappings() happy while updating the
1501     // mappings.
1502     calc_uvm_lite_gpus_mask(va_space, preferred_location, &va_range_policy->accessed_by, new_uvm_lite_gpus);
1503 
1504     // If the range contains non-migratable range groups, check that new UVM-Lite GPUs
1505     // can all map the new preferred location.
1506     if (!uvm_range_group_all_migratable(va_space, va_range->node.start, va_range->node.end) &&
1507         UVM_ID_IS_VALID(preferred_location) &&
1508         !uvm_processor_mask_subset(new_uvm_lite_gpus, &va_space->accessible_from[uvm_id_value(preferred_location)])) {
1509         status = NV_ERR_INVALID_DEVICE;
1510         goto out;
1511     }
1512 
1513     if (UVM_ID_IS_INVALID(preferred_location)) {
1514         uvm_range_group_for_each_migratability_in_safe(&iter, va_space, va_range->node.start, va_range->node.end) {
1515             if (!iter.migratable) {
1516                 // Clear the range group assocation for any unmigratable ranges if there is no preferred location
1517                 status = uvm_range_group_assign_range(va_space, NULL, iter.start, iter.end);
1518                 if (status != NV_OK)
1519                     goto out;
1520             }
1521         }
1522     }
1523 
1524     // Unmap all old and new UVM-Lite GPUs
1525     //  - GPUs that stop being UVM-Lite need to be unmapped so that they don't
1526     //    have stale mappings to the old preferred location.
1527     //  - GPUs that will continue to be UVM-Lite GPUs or are new UVM-Lite GPUs
1528     //    need to be unmapped so that the new preferred location can be mapped.
1529     uvm_processor_mask_or(all_uvm_lite_gpus, &va_range->uvm_lite_gpus, new_uvm_lite_gpus);
1530     status = range_unmap_mask(va_range, all_uvm_lite_gpus, out_tracker);
1531     if (status != NV_OK)
1532         goto out;
1533 
1534     // GPUs that stop being UVM-Lite, but are in the accessed_by mask need to
1535     // have any possible mappings established.
1536     uvm_processor_mask_andnot(set_accessed_by_processors, &va_range->uvm_lite_gpus, new_uvm_lite_gpus);
1537 
1538     // A GPU which had been in UVM-Lite mode before must still be in UVM-Lite
1539     // mode if it is the new preferred location. Otherwise we'd have to be more
1540     // careful below to not establish remote mappings to the new preferred
1541     // location.
1542     if (UVM_ID_IS_GPU(preferred_location))
1543         UVM_ASSERT(!uvm_processor_mask_test(set_accessed_by_processors, preferred_location));
1544 
1545     // The old preferred location should establish new remote mappings if it has
1546     // accessed-by set.
1547     if (UVM_ID_IS_VALID(va_range_policy->preferred_location))
1548         uvm_processor_mask_set(set_accessed_by_processors, va_range_policy->preferred_location);
1549 
1550     uvm_processor_mask_and(set_accessed_by_processors, set_accessed_by_processors, &va_range_policy->accessed_by);
1551 
1552     // Now update the va_range state
1553     va_range_policy->preferred_location = preferred_location;
1554     va_range_policy->preferred_nid = preferred_cpu_nid;
1555     uvm_processor_mask_copy(&va_range->uvm_lite_gpus, new_uvm_lite_gpus);
1556 
1557     va_block_context = uvm_va_space_block_context(va_space, mm);
1558 
1559     for_each_va_block_in_va_range(va_range, va_block) {
1560         uvm_processor_id_t id;
1561         uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
1562 
1563         for_each_id_in_mask(id, set_accessed_by_processors) {
1564             status = uvm_va_block_set_accessed_by(va_block, va_block_context, id);
1565             if (status != NV_OK)
1566                 goto out;
1567         }
1568 
1569         // Also, mark CPU pages as dirty and remove remote mappings from the new
1570         // preferred location
1571         uvm_mutex_lock(&va_block->lock);
1572         status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
1573                                            NULL,
1574                                            uvm_va_block_set_preferred_location_locked(va_block,
1575                                                                                       va_block_context,
1576                                                                                       region));
1577 
1578         if (out_tracker) {
1579             NV_STATUS tracker_status;
1580 
1581             tracker_status = uvm_tracker_add_tracker_safe(out_tracker, &va_block->tracker);
1582             if (status == NV_OK)
1583                 status = tracker_status;
1584         }
1585 
1586         uvm_mutex_unlock(&va_block->lock);
1587 
1588         if (status != NV_OK)
1589             goto out;
1590     }
1591 
1592     // And lastly map all of the current UVM-Lite GPUs to the resident pages on
1593     // the new preferred location. Anything that's not resident right now will
1594     // get mapped on the next PreventMigration().
1595     status = range_map_uvm_lite_gpus(va_range, out_tracker);
1596 
1597 out:
1598     uvm_processor_mask_cache_free(set_accessed_by_processors);
1599     uvm_processor_mask_cache_free(new_uvm_lite_gpus);
1600     uvm_processor_mask_cache_free(all_uvm_lite_gpus);
1601 
1602     return status;
1603 }
1604 
uvm_va_range_set_accessed_by(uvm_va_range_t * va_range,uvm_processor_id_t processor_id,struct mm_struct * mm,uvm_tracker_t * out_tracker)1605 NV_STATUS uvm_va_range_set_accessed_by(uvm_va_range_t *va_range,
1606                                        uvm_processor_id_t processor_id,
1607                                        struct mm_struct *mm,
1608                                        uvm_tracker_t *out_tracker)
1609 {
1610     NV_STATUS status = NV_OK;
1611     uvm_va_block_t *va_block;
1612     uvm_va_space_t *va_space = va_range->va_space;
1613     uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
1614     uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
1615     uvm_processor_mask_t *new_uvm_lite_gpus;
1616 
1617     // va_block_context->scratch_processor_mask cannot be used since
1618     // range_unmap() calls uvm_va_space_block_context(), which re-
1619     // initializes the VA block context structure.
1620     new_uvm_lite_gpus = uvm_processor_mask_cache_alloc();
1621     if (!new_uvm_lite_gpus)
1622         return NV_ERR_NO_MEMORY;
1623 
1624     // If the range belongs to a non-migratable range group and that processor_id is a non-faultable GPU,
1625     // check it can map the preferred location
1626     if (!uvm_range_group_all_migratable(va_space, va_range->node.start, va_range->node.end) &&
1627         UVM_ID_IS_GPU(processor_id) &&
1628         !uvm_processor_mask_test(&va_space->faultable_processors, processor_id) &&
1629         !uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(policy->preferred_location)], processor_id)) {
1630         status = NV_ERR_INVALID_DEVICE;
1631         goto out;
1632     }
1633 
1634     uvm_processor_mask_set(&policy->accessed_by, processor_id);
1635 
1636     // If a GPU is already a UVM-Lite GPU then there is nothing else to do.
1637     if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, processor_id))
1638         goto out;
1639 
1640     // Calculate the new UVM-Lite GPUs mask, but don't update it in the va range
1641     // yet so that we can keep block_page_check_mappings() happy while updating
1642     // the mappings.
1643     calc_uvm_lite_gpus_mask(va_space, policy->preferred_location, &policy->accessed_by, new_uvm_lite_gpus);
1644 
1645     if (uvm_processor_mask_test(new_uvm_lite_gpus, processor_id)) {
1646         // GPUs that become UVM-Lite GPUs need to unmap everything so that they
1647         // can map the preferred location.
1648         status = range_unmap(va_range, processor_id, out_tracker);
1649         if (status != NV_OK)
1650             goto out;
1651     }
1652 
1653     uvm_processor_mask_copy(&va_range->uvm_lite_gpus, new_uvm_lite_gpus);
1654 
1655     for_each_va_block_in_va_range(va_range, va_block) {
1656         status = uvm_va_block_set_accessed_by(va_block, va_block_context, processor_id);
1657         if (status != NV_OK)
1658             goto out;
1659     }
1660 
1661 out:
1662     uvm_processor_mask_cache_free(new_uvm_lite_gpus);
1663     return status;
1664 }
1665 
uvm_va_range_unset_accessed_by(uvm_va_range_t * va_range,uvm_processor_id_t processor_id,uvm_tracker_t * out_tracker)1666 void uvm_va_range_unset_accessed_by(uvm_va_range_t *va_range,
1667                                     uvm_processor_id_t processor_id,
1668                                     uvm_tracker_t *out_tracker)
1669 {
1670     uvm_range_group_range_t *rgr = NULL;
1671 
1672     // Mark all range group ranges within this VA range as migrated. We do this to force
1673     // uvm_range_group_set_migration_policy to re-check the policy state since we're changing it here.
1674     uvm_range_group_for_each_range_in(rgr, va_range->va_space, va_range->node.start, va_range->node.end) {
1675         uvm_spin_lock(&rgr->range_group->migrated_ranges_lock);
1676         if (list_empty(&rgr->range_group_migrated_list_node))
1677             list_move_tail(&rgr->range_group_migrated_list_node, &rgr->range_group->migrated_ranges);
1678         uvm_spin_unlock(&rgr->range_group->migrated_ranges_lock);
1679     }
1680 
1681     uvm_processor_mask_clear(&uvm_va_range_get_policy(va_range)->accessed_by, processor_id);
1682 
1683     // If a UVM-Lite GPU is being removed from the accessed_by mask, it will
1684     // also stop being a UVM-Lite GPU unless it's also the preferred location.
1685     if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, processor_id) &&
1686         !uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), processor_id, NUMA_NO_NODE)) {
1687         range_unmap(va_range, processor_id, out_tracker);
1688     }
1689 
1690     range_update_uvm_lite_gpus_mask(va_range);
1691 }
1692 
uvm_va_range_set_read_duplication(uvm_va_range_t * va_range,struct mm_struct * mm)1693 NV_STATUS uvm_va_range_set_read_duplication(uvm_va_range_t *va_range, struct mm_struct *mm)
1694 {
1695     uvm_va_block_t *va_block;
1696     uvm_va_block_context_t *va_block_context;
1697 
1698     if (uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_ENABLED)
1699         return NV_OK;
1700 
1701     va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
1702 
1703     for_each_va_block_in_va_range(va_range, va_block) {
1704         NV_STATUS status = uvm_va_block_set_read_duplication(va_block, va_block_context);
1705 
1706         if (status != NV_OK)
1707             return status;
1708     }
1709 
1710     return NV_OK;
1711 }
1712 
uvm_va_range_unset_read_duplication(uvm_va_range_t * va_range,struct mm_struct * mm)1713 NV_STATUS uvm_va_range_unset_read_duplication(uvm_va_range_t *va_range, struct mm_struct *mm)
1714 {
1715     uvm_va_block_t *va_block;
1716     uvm_va_block_context_t *va_block_context;
1717     NV_STATUS status;
1718 
1719     if (uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_DISABLED)
1720         return NV_OK;
1721 
1722     va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
1723 
1724     for_each_va_block_in_va_range(va_range, va_block) {
1725         status = uvm_va_block_unset_read_duplication(va_block, va_block_context);
1726 
1727         if (status != NV_OK)
1728             return status;
1729     }
1730 
1731     return NV_OK;
1732 }
1733 
uvm_vma_wrapper_alloc(struct vm_area_struct * vma)1734 uvm_vma_wrapper_t *uvm_vma_wrapper_alloc(struct vm_area_struct *vma)
1735 {
1736     uvm_vma_wrapper_t *vma_wrapper = nv_kmem_cache_zalloc(g_uvm_vma_wrapper_cache, NV_UVM_GFP_FLAGS);
1737     if (!vma_wrapper)
1738         return NULL;
1739 
1740     vma_wrapper->vma = vma;
1741     uvm_init_rwsem(&vma_wrapper->lock, UVM_LOCK_ORDER_LEAF);
1742 
1743     return vma_wrapper;
1744 }
1745 
uvm_vma_wrapper_destroy(uvm_vma_wrapper_t * vma_wrapper)1746 void uvm_vma_wrapper_destroy(uvm_vma_wrapper_t *vma_wrapper)
1747 {
1748     if (!vma_wrapper)
1749         return;
1750 
1751     uvm_assert_rwsem_unlocked(&vma_wrapper->lock);
1752 
1753     kmem_cache_free(g_uvm_vma_wrapper_cache, vma_wrapper);
1754 }
1755 
sked_reflected_pte_maker(uvm_page_table_range_vec_t * range_vec,NvU64 offset,void * caller_data)1756 static NvU64 sked_reflected_pte_maker(uvm_page_table_range_vec_t *range_vec, NvU64 offset, void *caller_data)
1757 {
1758     (void)caller_data;
1759 
1760     return range_vec->tree->hal->make_sked_reflected_pte();
1761 }
1762 
uvm_map_sked_reflected_range(uvm_va_space_t * va_space,UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS * params)1763 static NV_STATUS uvm_map_sked_reflected_range(uvm_va_space_t *va_space, UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS *params)
1764 {
1765     NV_STATUS status;
1766     uvm_va_range_t *va_range = NULL;
1767     uvm_gpu_t *gpu;
1768     uvm_gpu_va_space_t *gpu_va_space;
1769     uvm_page_tree_t *page_tables;
1770     struct mm_struct *mm;
1771 
1772     if (uvm_api_range_invalid_4k(params->base, params->length))
1773         return NV_ERR_INVALID_ADDRESS;
1774 
1775     // The mm needs to be locked in order to remove stale HMM va_blocks.
1776     mm = uvm_va_space_mm_or_current_retain_lock(va_space);
1777     uvm_va_space_down_write(va_space);
1778 
1779     gpu = uvm_va_space_get_gpu_by_uuid_with_gpu_va_space(va_space, &params->gpuUuid);
1780     if (!gpu) {
1781         status = NV_ERR_INVALID_DEVICE;
1782         goto done;
1783     }
1784 
1785     // Check if the GPU can access the VA
1786     if (!uvm_gpu_can_address(gpu, params->base, params->length)) {
1787         status = NV_ERR_OUT_OF_RANGE;
1788         goto done;
1789     }
1790 
1791     gpu_va_space = va_space->gpu_va_spaces[uvm_id_gpu_index(gpu->id)];
1792     page_tables = &gpu_va_space->page_tables;
1793 
1794     // The VA range must exactly cover one supported GPU page
1795     if (!is_power_of_2(params->length) ||
1796         !IS_ALIGNED(params->base, params->length) ||
1797         !uvm_mmu_page_size_supported(page_tables, params->length)) {
1798         status = NV_ERR_INVALID_ADDRESS;
1799         goto done;
1800     }
1801 
1802     status = uvm_va_range_create_sked_reflected(va_space, mm, params->base, params->length, &va_range);
1803     if (status != NV_OK) {
1804         UVM_DBG_PRINT_RL("Failed to create sked reflected VA range [0x%llx, 0x%llx)\n",
1805                 params->base, params->base + params->length);
1806         goto done;
1807     }
1808 
1809     va_range->sked_reflected.gpu_va_space = gpu_va_space;
1810 
1811     status = uvm_page_table_range_vec_init(page_tables,
1812                                            va_range->node.start,
1813                                            uvm_va_range_size(va_range),
1814                                            params->length,
1815                                            UVM_PMM_ALLOC_FLAGS_EVICT,
1816                                            &va_range->sked_reflected.pt_range_vec);
1817     if (status != NV_OK)
1818         goto done;
1819 
1820     status = uvm_page_table_range_vec_write_ptes(&va_range->sked_reflected.pt_range_vec,
1821             UVM_MEMBAR_NONE, sked_reflected_pte_maker, NULL);
1822 
1823     if (status != NV_OK)
1824         goto done;
1825 
1826 done:
1827     if (status != NV_OK && va_range != NULL)
1828         uvm_va_range_destroy(va_range, NULL);
1829 
1830     uvm_va_space_up_write(va_space);
1831     uvm_va_space_mm_or_current_release_unlock(va_space, mm);
1832 
1833     return status;
1834 }
1835 
uvm_api_map_dynamic_parallelism_region(UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS * params,struct file * filp)1836 NV_STATUS uvm_api_map_dynamic_parallelism_region(UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS *params, struct file *filp)
1837 {
1838     uvm_va_space_t *va_space = uvm_va_space_get(filp);
1839 
1840     // Notably the ranges created by the UvmMapDynamicParallelismRegion() API
1841     // are referred to as "SKED reflected ranges" internally as it's more
1842     // descriptive.
1843     return uvm_map_sked_reflected_range(va_space, params);
1844 }
1845 
uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS * params,struct file * filp)1846 NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params, struct file *filp)
1847 {
1848     NV_STATUS status;
1849     uvm_va_space_t *va_space = uvm_va_space_get(filp);
1850     uvm_va_range_t *va_range = NULL;
1851     uvm_gpu_t *gpu;
1852     struct mm_struct *mm;
1853 
1854     if (uvm_api_range_invalid(params->base, params->length))
1855         return NV_ERR_INVALID_ADDRESS;
1856     if (params->gpuAttributesCount > UVM_MAX_GPUS_V2)
1857         return NV_ERR_INVALID_ARGUMENT;
1858 
1859     if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
1860         return NV_ERR_INVALID_ARGUMENT;
1861 
1862     // The mm needs to be locked in order to remove stale HMM va_blocks.
1863     mm = uvm_va_space_mm_or_current_retain_lock(va_space);
1864     uvm_va_space_down_write(va_space);
1865 
1866     status = uvm_va_range_create_semaphore_pool(va_space,
1867                                                 mm,
1868                                                 params->base,
1869                                                 params->length,
1870                                                 params->perGpuAttributes,
1871                                                 params->gpuAttributesCount,
1872                                                 &va_range);
1873     if (status != NV_OK)
1874         goto unlock;
1875 
1876     for_each_va_space_gpu(gpu, va_space) {
1877         status = va_range_register_gpu_semaphore_pool(va_range, gpu);
1878         if (status != NV_OK)
1879             goto done;
1880 
1881         if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->id))
1882             continue;
1883 
1884         status = va_range_add_gpu_va_space_semaphore_pool(va_range, gpu);
1885         if (status != NV_OK)
1886             goto done;
1887     }
1888 
1889 done:
1890     if (status != NV_OK)
1891         uvm_va_range_destroy(va_range, NULL);
1892 
1893 unlock:
1894     uvm_va_space_up_write(va_space);
1895     uvm_va_space_mm_or_current_release_unlock(va_space, mm);
1896     return status;
1897 }
1898 
uvm_test_va_range_info(UVM_TEST_VA_RANGE_INFO_PARAMS * params,struct file * filp)1899 NV_STATUS uvm_test_va_range_info(UVM_TEST_VA_RANGE_INFO_PARAMS *params, struct file *filp)
1900 {
1901     uvm_va_space_t *va_space;
1902     uvm_va_range_t *va_range;
1903     uvm_processor_id_t processor_id;
1904     uvm_va_policy_t *policy;
1905     struct vm_area_struct *vma;
1906     NV_STATUS status = NV_OK;
1907     struct mm_struct *mm;
1908 
1909     va_space = uvm_va_space_get(filp);
1910 
1911     mm = uvm_va_space_mm_or_current_retain_lock(va_space);
1912     uvm_va_space_down_read(va_space);
1913 
1914     va_range = uvm_va_range_find(va_space, params->lookup_address);
1915     if (!va_range) {
1916         status = uvm_hmm_va_range_info(va_space, mm, params);
1917         goto out;
1918     }
1919 
1920     policy = uvm_va_range_get_policy(va_range);
1921     params->va_range_start = va_range->node.start;
1922     params->va_range_end   = va_range->node.end;
1923 
1924     // -Wall implies -Wenum-compare, so cast through int to avoid warnings
1925     BUILD_BUG_ON((int)UVM_READ_DUPLICATION_UNSET    != (int)UVM_TEST_READ_DUPLICATION_UNSET);
1926     BUILD_BUG_ON((int)UVM_READ_DUPLICATION_ENABLED  != (int)UVM_TEST_READ_DUPLICATION_ENABLED);
1927     BUILD_BUG_ON((int)UVM_READ_DUPLICATION_DISABLED != (int)UVM_TEST_READ_DUPLICATION_DISABLED);
1928     BUILD_BUG_ON((int)UVM_READ_DUPLICATION_MAX      != (int)UVM_TEST_READ_DUPLICATION_MAX);
1929     params->read_duplication = policy->read_duplication;
1930 
1931     if (UVM_ID_IS_INVALID(policy->preferred_location)) {
1932         memset(&params->preferred_location, 0, sizeof(params->preferred_location));
1933         params->preferred_cpu_nid = NUMA_NO_NODE;
1934     }
1935     else {
1936         uvm_va_space_processor_uuid(va_space, &params->preferred_location, policy->preferred_location);
1937         params->preferred_cpu_nid = policy->preferred_nid;
1938     }
1939 
1940     params->accessed_by_count = 0;
1941     for_each_id_in_mask(processor_id, &policy->accessed_by)
1942         uvm_va_space_processor_uuid(va_space, &params->accessed_by[params->accessed_by_count++], processor_id);
1943 
1944     // -Wall implies -Wenum-compare, so cast through int to avoid warnings
1945     BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_INVALID        != (int)UVM_VA_RANGE_TYPE_INVALID);
1946     BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_MANAGED        != (int)UVM_VA_RANGE_TYPE_MANAGED);
1947     BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_EXTERNAL       != (int)UVM_VA_RANGE_TYPE_EXTERNAL);
1948     BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_CHANNEL        != (int)UVM_VA_RANGE_TYPE_CHANNEL);
1949     BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_SKED_REFLECTED != (int)UVM_VA_RANGE_TYPE_SKED_REFLECTED);
1950     BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_SEMAPHORE_POOL != (int)UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
1951     BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_MAX            != (int)UVM_VA_RANGE_TYPE_MAX);
1952     params->type = va_range->type;
1953 
1954     switch (va_range->type) {
1955         case UVM_VA_RANGE_TYPE_MANAGED:
1956 
1957             params->managed.subtype = UVM_TEST_RANGE_SUBTYPE_UVM;
1958             if (!va_range->managed.vma_wrapper) {
1959                 params->managed.is_zombie = NV_TRUE;
1960                 goto out;
1961             }
1962             params->managed.is_zombie = NV_FALSE;
1963             vma = uvm_va_range_vma_check(va_range, mm);
1964             if (!vma) {
1965                 // We aren't in the same mm as the one which owns the vma, and
1966                 // we don't have that mm locked.
1967                 params->managed.owned_by_calling_process = NV_FALSE;
1968                 goto out;
1969             }
1970             params->managed.owned_by_calling_process = (mm == current->mm ? NV_TRUE : NV_FALSE);
1971             params->managed.vma_start = vma->vm_start;
1972             params->managed.vma_end   = vma->vm_end - 1;
1973             break;
1974         default:
1975             break;
1976     }
1977 
1978 out:
1979     uvm_va_space_up_read(va_space);
1980     uvm_va_space_mm_or_current_release_unlock(va_space, mm);
1981     return status;
1982 }
1983 
uvm_test_va_range_split(UVM_TEST_VA_RANGE_SPLIT_PARAMS * params,struct file * filp)1984 NV_STATUS uvm_test_va_range_split(UVM_TEST_VA_RANGE_SPLIT_PARAMS *params, struct file *filp)
1985 {
1986     uvm_va_space_t *va_space = uvm_va_space_get(filp);
1987     uvm_va_range_t *va_range;
1988     NV_STATUS status = NV_OK;
1989 
1990     if (!PAGE_ALIGNED(params->split_address + 1))
1991         return NV_ERR_INVALID_ADDRESS;
1992 
1993     uvm_va_space_down_write(va_space);
1994 
1995     va_range = uvm_va_range_find(va_space, params->split_address);
1996     if (!va_range ||
1997         va_range->node.end == params->split_address ||
1998         va_range->type != UVM_VA_RANGE_TYPE_MANAGED) {
1999         status = NV_ERR_INVALID_ADDRESS;
2000         goto out;
2001     }
2002 
2003     status = uvm_va_range_split(va_range, params->split_address, NULL);
2004 
2005 out:
2006     uvm_va_space_up_write(va_space);
2007     return status;
2008 }
2009 
uvm_test_va_range_inject_split_error(UVM_TEST_VA_RANGE_INJECT_SPLIT_ERROR_PARAMS * params,struct file * filp)2010 NV_STATUS uvm_test_va_range_inject_split_error(UVM_TEST_VA_RANGE_INJECT_SPLIT_ERROR_PARAMS *params, struct file *filp)
2011 {
2012     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2013     uvm_va_range_t *va_range;
2014     struct mm_struct *mm;
2015     NV_STATUS status = NV_OK;
2016 
2017     mm = uvm_va_space_mm_or_current_retain_lock(va_space);
2018     uvm_va_space_down_write(va_space);
2019 
2020     va_range = uvm_va_range_find(va_space, params->lookup_address);
2021     if (!va_range) {
2022         if (!mm)
2023             status = NV_ERR_INVALID_ADDRESS;
2024         else
2025             status = uvm_hmm_test_va_block_inject_split_error(va_space, params->lookup_address);
2026     }
2027     else if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED) {
2028         status = NV_ERR_INVALID_ADDRESS;
2029     }
2030     else {
2031         uvm_va_block_t *va_block;
2032         size_t split_index;
2033 
2034         va_range->inject_split_error = true;
2035 
2036         split_index = uvm_va_range_block_index(va_range, params->lookup_address);
2037         va_block = uvm_va_range_block(va_range, split_index);
2038         if (va_block) {
2039             uvm_va_block_test_t *block_test = uvm_va_block_get_test(va_block);
2040 
2041             if (block_test)
2042                 block_test->inject_split_error = true;
2043         }
2044     }
2045 
2046     uvm_va_space_up_write(va_space);
2047     uvm_va_space_mm_or_current_release_unlock(va_space, mm);
2048     return status;
2049 }
2050 
uvm_test_va_range_inject_add_gpu_va_space_error(UVM_TEST_VA_RANGE_INJECT_ADD_GPU_VA_SPACE_ERROR_PARAMS * params,struct file * filp)2051 NV_STATUS uvm_test_va_range_inject_add_gpu_va_space_error(UVM_TEST_VA_RANGE_INJECT_ADD_GPU_VA_SPACE_ERROR_PARAMS *params,
2052                                                           struct file *filp)
2053 {
2054     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2055     uvm_va_range_t *va_range;
2056     NV_STATUS status = NV_OK;
2057 
2058     uvm_va_space_down_write(va_space);
2059 
2060     va_range = uvm_va_range_find(va_space, params->lookup_address);
2061     if (!va_range) {
2062         status = NV_ERR_INVALID_ADDRESS;
2063         goto out;
2064     }
2065 
2066     va_range->inject_add_gpu_va_space_error = true;
2067 
2068 out:
2069     uvm_va_space_up_write(va_space);
2070     return status;
2071 }
2072 
2073