1 /*******************************************************************************
2 Copyright (c) 2015-2023 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_common.h"
25 #include "uvm_linux.h"
26 #include "uvm_types.h"
27 #include "uvm_api.h"
28 #include "uvm_global.h"
29 #include "uvm_hal.h"
30 #include "uvm_va_range.h"
31 #include "uvm_va_block.h"
32 #include "uvm_kvmalloc.h"
33 #include "uvm_map_external.h"
34 #include "uvm_perf_thrashing.h"
35 #include "nv_uvm_interface.h"
36
37 static struct kmem_cache *g_uvm_va_range_cache __read_mostly;
38 static struct kmem_cache *g_uvm_vma_wrapper_cache __read_mostly;
39
uvm_va_range_init(void)40 NV_STATUS uvm_va_range_init(void)
41 {
42 g_uvm_va_range_cache = NV_KMEM_CACHE_CREATE("uvm_va_range_t", uvm_va_range_t);
43 if (!g_uvm_va_range_cache)
44 return NV_ERR_NO_MEMORY;
45
46 g_uvm_vma_wrapper_cache = NV_KMEM_CACHE_CREATE("uvm_vma_wrapper_t", uvm_vma_wrapper_t);
47 if (!g_uvm_vma_wrapper_cache)
48 return NV_ERR_NO_MEMORY;
49
50 return uvm_va_block_init();
51 }
52
uvm_va_range_exit(void)53 void uvm_va_range_exit(void)
54 {
55 uvm_va_block_exit();
56 kmem_cache_destroy_safe(&g_uvm_va_range_cache);
57 kmem_cache_destroy_safe(&g_uvm_vma_wrapper_cache);
58 }
59
block_calc_start(uvm_va_range_t * va_range,size_t index)60 static NvU64 block_calc_start(uvm_va_range_t *va_range, size_t index)
61 {
62 NvU64 range_start = UVM_VA_BLOCK_ALIGN_DOWN(va_range->node.start);
63 NvU64 block_start = range_start + index * UVM_VA_BLOCK_SIZE;
64 NvU64 start = max(va_range->node.start, block_start);
65 UVM_ASSERT(start < va_range->node.end);
66 return start;
67 }
68
block_calc_end(uvm_va_range_t * va_range,size_t index)69 static NvU64 block_calc_end(uvm_va_range_t *va_range, size_t index)
70 {
71 NvU64 start = block_calc_start(va_range, index);
72 NvU64 block_end = UVM_VA_BLOCK_ALIGN_UP(start + 1) - 1; // Inclusive end
73 NvU64 end = min(va_range->node.end, block_end);
74 UVM_ASSERT(end > va_range->node.start);
75 return end;
76 }
77
78 // Called before the range's bounds have been adjusted. This may not actually
79 // shrink the blocks array. For example, if the shrink attempt fails then
80 // va_range's old array is left intact. This may waste memory, but it means this
81 // function cannot fail.
blocks_array_shrink(uvm_va_range_t * va_range,size_t new_num_blocks)82 static void blocks_array_shrink(uvm_va_range_t *va_range, size_t new_num_blocks)
83 {
84 size_t new_size = new_num_blocks * sizeof(va_range->blocks[0]);
85 atomic_long_t *new_blocks;
86
87 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
88 UVM_ASSERT(va_range->blocks);
89 UVM_ASSERT(uvm_kvsize(va_range->blocks) >= uvm_va_range_num_blocks(va_range) * sizeof(va_range->blocks[0]));
90 UVM_ASSERT(new_num_blocks);
91 UVM_ASSERT(new_num_blocks <= uvm_va_range_num_blocks(va_range));
92
93 // TODO: Bug 1766579: This could be optimized by only shrinking the array
94 // when the new size is half of the old size or some similar
95 // threshold. Need to profile this on real apps to see if that's worth
96 // doing.
97
98 new_blocks = uvm_kvrealloc(va_range->blocks, new_size);
99 if (!new_blocks) {
100 // If we failed to allocate a smaller array, just leave the old one as-is
101 UVM_DBG_PRINT("Failed to shrink range [0x%llx, 0x%llx] from %zu blocks to %zu blocks\n",
102 va_range->node.start,
103 va_range->node.end,
104 uvm_kvsize(va_range->blocks) / sizeof(va_range->blocks[0]),
105 new_num_blocks);
106 return;
107 }
108
109 va_range->blocks = new_blocks;
110 }
111
uvm_va_range_alloc(uvm_va_space_t * va_space,NvU64 start,NvU64 end)112 static uvm_va_range_t *uvm_va_range_alloc(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
113 {
114 uvm_va_range_t *va_range = nv_kmem_cache_zalloc(g_uvm_va_range_cache, NV_UVM_GFP_FLAGS);
115 if (!va_range)
116 return NULL;
117
118 uvm_assert_rwsem_locked_write(&va_space->lock);
119
120 va_range->va_space = va_space;
121 va_range->node.start = start;
122 va_range->node.end = end;
123
124 // The range is inserted into the VA space tree only at the end of creation,
125 // so clear the node so the destroy path knows whether to remove it.
126 RB_CLEAR_NODE(&va_range->node.rb_node);
127
128 return va_range;
129 }
130
uvm_va_range_alloc_reclaim(uvm_va_space_t * va_space,struct mm_struct * mm,uvm_va_range_type_t type,NvU64 start,NvU64 end,uvm_va_range_t ** out_va_range)131 static NV_STATUS uvm_va_range_alloc_reclaim(uvm_va_space_t *va_space,
132 struct mm_struct *mm,
133 uvm_va_range_type_t type,
134 NvU64 start,
135 NvU64 end,
136 uvm_va_range_t **out_va_range)
137 {
138 uvm_va_range_t *va_range;
139 NV_STATUS status;
140
141 // Check for no overlap with HMM blocks.
142 status = uvm_hmm_va_block_reclaim(va_space, mm, start, end);
143 if (status != NV_OK)
144 return status;
145
146 va_range = uvm_va_range_alloc(va_space, start, end);
147 if (!va_range)
148 return NV_ERR_NO_MEMORY;
149
150 va_range->type = type;
151
152 *out_va_range = va_range;
153 return NV_OK;
154 }
155
uvm_va_range_alloc_managed(uvm_va_space_t * va_space,NvU64 start,NvU64 end)156 static uvm_va_range_t *uvm_va_range_alloc_managed(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
157 {
158 uvm_va_range_t *va_range = NULL;
159
160 va_range = uvm_va_range_alloc(va_space, start, end);
161 if (!va_range)
162 goto error;
163
164 va_range->type = UVM_VA_RANGE_TYPE_MANAGED;
165 va_range->managed.policy = uvm_va_policy_default;
166
167 va_range->blocks = uvm_kvmalloc_zero(uvm_va_range_num_blocks(va_range) * sizeof(va_range->blocks[0]));
168 if (!va_range->blocks) {
169 UVM_DBG_PRINT("Failed to allocate %zu blocks\n", uvm_va_range_num_blocks(va_range));
170 goto error;
171 }
172
173 return va_range;
174
175 error:
176 uvm_va_range_destroy(va_range, NULL);
177 return NULL;
178 }
179
uvm_va_range_create_mmap(uvm_va_space_t * va_space,struct mm_struct * mm,uvm_vma_wrapper_t * vma_wrapper,uvm_va_range_t ** out_va_range)180 NV_STATUS uvm_va_range_create_mmap(uvm_va_space_t *va_space,
181 struct mm_struct *mm,
182 uvm_vma_wrapper_t *vma_wrapper,
183 uvm_va_range_t **out_va_range)
184 {
185 NV_STATUS status;
186 struct vm_area_struct *vma = vma_wrapper->vma;
187 uvm_va_range_t *va_range = NULL;
188
189 // Check for no overlap with HMM blocks.
190 status = uvm_hmm_va_block_reclaim(va_space, mm, vma->vm_start, vma->vm_end - 1);
191 if (status != NV_OK)
192 return status;
193
194 // vma->vm_end is exclusive but va_range end is inclusive
195 va_range = uvm_va_range_alloc_managed(va_space, vma->vm_start, vma->vm_end - 1);
196 if (!va_range) {
197 status = NV_ERR_NO_MEMORY;
198 goto error;
199 }
200
201 va_range->managed.vma_wrapper = vma_wrapper;
202
203 status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
204 if (status != NV_OK)
205 goto error;
206
207 if (out_va_range)
208 *out_va_range = va_range;
209
210 return NV_OK;
211
212 error:
213 uvm_va_range_destroy(va_range, NULL);
214 return status;
215 }
216
uvm_va_range_create_external(uvm_va_space_t * va_space,struct mm_struct * mm,NvU64 start,NvU64 length,uvm_va_range_t ** out_va_range)217 NV_STATUS uvm_va_range_create_external(uvm_va_space_t *va_space,
218 struct mm_struct *mm,
219 NvU64 start,
220 NvU64 length,
221 uvm_va_range_t **out_va_range)
222 {
223 NV_STATUS status;
224 uvm_va_range_t *va_range = NULL;
225 uvm_processor_mask_t *retained_mask = NULL;
226 NvU32 i;
227
228 status = uvm_va_range_alloc_reclaim(va_space,
229 mm,
230 UVM_VA_RANGE_TYPE_EXTERNAL,
231 start,
232 start + length - 1,
233 &va_range);
234 if (status != NV_OK)
235 return status;
236
237 UVM_ASSERT(!va_range->external.retained_mask);
238
239 retained_mask = uvm_processor_mask_cache_alloc();
240 if (!retained_mask) {
241 status = NV_ERR_NO_MEMORY;
242 goto error;
243 }
244
245 va_range->external.retained_mask = retained_mask;
246
247 for (i = 0; i < ARRAY_SIZE(va_range->external.gpu_ranges); i++) {
248 uvm_mutex_init(&va_range->external.gpu_ranges[i].lock, UVM_LOCK_ORDER_EXT_RANGE_TREE);
249 uvm_range_tree_init(&va_range->external.gpu_ranges[i].tree);
250 }
251
252 status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
253 if (status != NV_OK)
254 goto error;
255
256 if (out_va_range)
257 *out_va_range = va_range;
258
259 return NV_OK;
260
261 error:
262 uvm_va_range_destroy(va_range, NULL);
263
264 return status;
265 }
266
uvm_va_range_create_channel(uvm_va_space_t * va_space,struct mm_struct * mm,NvU64 start,NvU64 end,uvm_va_range_t ** out_va_range)267 NV_STATUS uvm_va_range_create_channel(uvm_va_space_t *va_space,
268 struct mm_struct *mm,
269 NvU64 start,
270 NvU64 end,
271 uvm_va_range_t **out_va_range)
272 {
273 NV_STATUS status;
274 uvm_va_range_t *va_range = NULL;
275
276 status = uvm_va_range_alloc_reclaim(va_space,
277 mm,
278 UVM_VA_RANGE_TYPE_CHANNEL,
279 start,
280 end,
281 &va_range);
282 if (status != NV_OK)
283 return status;
284
285 INIT_LIST_HEAD(&va_range->channel.list_node);
286
287 status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
288 if (status != NV_OK)
289 goto error;
290
291 if (out_va_range)
292 *out_va_range = va_range;
293
294 return NV_OK;
295
296 error:
297 uvm_va_range_destroy(va_range, NULL);
298 return status;
299 }
300
uvm_va_range_create_sked_reflected(uvm_va_space_t * va_space,struct mm_struct * mm,NvU64 start,NvU64 length,uvm_va_range_t ** out_va_range)301 NV_STATUS uvm_va_range_create_sked_reflected(uvm_va_space_t *va_space,
302 struct mm_struct *mm,
303 NvU64 start,
304 NvU64 length,
305 uvm_va_range_t **out_va_range)
306 {
307 NV_STATUS status;
308 uvm_va_range_t *va_range = NULL;
309
310 status = uvm_va_range_alloc_reclaim(va_space,
311 mm,
312 UVM_VA_RANGE_TYPE_SKED_REFLECTED,
313 start,
314 start + length - 1,
315 &va_range);
316 if (status != NV_OK)
317 return status;
318
319 status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
320 if (status != NV_OK)
321 goto error;
322
323 if (out_va_range)
324 *out_va_range = va_range;
325
326 return NV_OK;
327
328 error:
329 uvm_va_range_destroy(va_range, NULL);
330 return status;
331 }
332
uvm_va_range_create_semaphore_pool(uvm_va_space_t * va_space,struct mm_struct * mm,NvU64 start,NvU64 length,const UvmGpuMappingAttributes * per_gpu_attrs,NvU32 per_gpu_attrs_count,uvm_va_range_t ** out_va_range)333 NV_STATUS uvm_va_range_create_semaphore_pool(uvm_va_space_t *va_space,
334 struct mm_struct *mm,
335 NvU64 start,
336 NvU64 length,
337 const UvmGpuMappingAttributes *per_gpu_attrs,
338 NvU32 per_gpu_attrs_count,
339 uvm_va_range_t **out_va_range)
340 {
341 static const uvm_mem_gpu_mapping_attrs_t default_attrs = {
342 .protection = UVM_PROT_READ_WRITE_ATOMIC,
343 .is_cacheable = false
344 };
345
346 NV_STATUS status;
347 uvm_va_range_t *va_range = NULL;
348 uvm_mem_alloc_params_t mem_alloc_params = { 0 };
349 NvU32 i;
350 uvm_gpu_id_t gpu_id;
351
352 status = uvm_va_range_alloc_reclaim(va_space,
353 mm,
354 UVM_VA_RANGE_TYPE_SEMAPHORE_POOL,
355 start,
356 start + length - 1,
357 &va_range);
358 if (status != NV_OK)
359 return status;
360
361 uvm_tracker_init(&va_range->semaphore_pool.tracker);
362 uvm_mutex_init(&va_range->semaphore_pool.tracker_lock, UVM_LOCK_ORDER_SEMA_POOL_TRACKER);
363
364 status = uvm_range_tree_add(&va_space->va_range_tree, &va_range->node);
365 if (status != NV_OK)
366 goto error;
367
368 // The semaphore pool memory is located in sysmem, and must be zeroed upon
369 // allocation because it may be mapped on the user VA space.
370 mem_alloc_params.page_size = UVM_PAGE_SIZE_DEFAULT;
371 mem_alloc_params.size = length;
372 mem_alloc_params.zero = true;
373 mem_alloc_params.mm = mm;
374
375 va_range->semaphore_pool.default_gpu_attrs = default_attrs;
376 va_range->semaphore_pool.owner = NULL;
377
378 for_each_gpu_id(gpu_id)
379 va_range->semaphore_pool.gpu_attrs[uvm_id_gpu_index(gpu_id)] = default_attrs;
380
381 for (i = 0; i < per_gpu_attrs_count; i++) {
382 uvm_gpu_t *gpu;
383 uvm_mem_gpu_mapping_attrs_t attrs = default_attrs;
384
385 status = uvm_mem_translate_gpu_attributes(&per_gpu_attrs[i], va_space, &gpu, &attrs);
386 if (status != NV_OK)
387 goto error;
388
389 if (i == 0 && g_uvm_global.conf_computing_enabled)
390 mem_alloc_params.dma_owner = gpu;
391
392 if (attrs.is_cacheable) {
393 // At most 1 GPU can have this memory cached, in which case it is
394 // the 'owner' GPU.
395 if (va_range->semaphore_pool.owner != NULL) {
396 UVM_DBG_PRINT("Caching of semaphore pool requested on >1 GPU.");
397 status = NV_ERR_INVALID_ARGUMENT;
398 goto error;
399 }
400
401 va_range->semaphore_pool.owner = gpu;
402 }
403
404 va_range->semaphore_pool.gpu_attrs[uvm_id_gpu_index(gpu->id)] = attrs;
405 }
406
407 status = uvm_mem_alloc(&mem_alloc_params, &va_range->semaphore_pool.mem);
408 if (status != NV_OK)
409 goto error;
410
411 status = uvm_mem_map_cpu_kernel(va_range->semaphore_pool.mem);
412 if (status != NV_OK)
413 goto error;
414
415 if (out_va_range)
416 *out_va_range = va_range;
417
418 return NV_OK;
419
420 error:
421 uvm_va_range_destroy(va_range, NULL);
422 return status;
423 }
424
uvm_va_range_destroy_managed(uvm_va_range_t * va_range)425 static void uvm_va_range_destroy_managed(uvm_va_range_t *va_range)
426 {
427 uvm_va_block_t *block;
428 uvm_va_block_t *block_tmp;
429 uvm_perf_event_data_t event_data;
430 NV_STATUS status;
431
432 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
433
434 if (va_range->blocks) {
435 // Unmap and drop our ref count on each block
436 for_each_va_block_in_va_range_safe(va_range, block, block_tmp)
437 uvm_va_block_kill(block);
438
439 uvm_kvfree(va_range->blocks);
440 }
441
442 event_data.range_destroy.range = va_range;
443 uvm_perf_event_notify(&va_range->va_space->perf_events, UVM_PERF_EVENT_RANGE_DESTROY, &event_data);
444
445 status = uvm_range_group_assign_range(va_range->va_space, NULL, va_range->node.start, va_range->node.end);
446 UVM_ASSERT(status == NV_OK);
447 }
448
uvm_va_range_destroy_external(uvm_va_range_t * va_range,struct list_head * deferred_free_list)449 static void uvm_va_range_destroy_external(uvm_va_range_t *va_range, struct list_head *deferred_free_list)
450 {
451 uvm_gpu_t *gpu;
452
453 uvm_processor_mask_cache_free(va_range->external.retained_mask);
454
455 if (uvm_processor_mask_empty(&va_range->external.mapped_gpus))
456 return;
457
458 UVM_ASSERT(deferred_free_list);
459
460 for_each_va_space_gpu_in_mask(gpu, va_range->va_space, &va_range->external.mapped_gpus) {
461 uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, gpu);
462 uvm_ext_gpu_map_t *ext_map, *ext_map_next;
463
464 uvm_mutex_lock(&range_tree->lock);
465 uvm_ext_gpu_map_for_each_safe(ext_map, ext_map_next, va_range, gpu)
466 uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_free_list);
467 uvm_mutex_unlock(&range_tree->lock);
468 }
469
470 UVM_ASSERT(uvm_processor_mask_empty(&va_range->external.mapped_gpus));
471 }
472
uvm_va_range_destroy_channel(uvm_va_range_t * va_range)473 static void uvm_va_range_destroy_channel(uvm_va_range_t *va_range)
474 {
475 uvm_gpu_va_space_t *gpu_va_space = va_range->channel.gpu_va_space;
476 uvm_membar_t membar;
477
478 UVM_ASSERT(va_range->channel.ref_count == 0);
479
480 // Unmap the buffer
481 if (gpu_va_space && va_range->channel.pt_range_vec.ranges) {
482 membar = uvm_hal_downgrade_membar_type(gpu_va_space->gpu, va_range->channel.aperture == UVM_APERTURE_VID);
483 uvm_page_table_range_vec_clear_ptes(&va_range->channel.pt_range_vec, membar);
484 uvm_page_table_range_vec_deinit(&va_range->channel.pt_range_vec);
485 }
486
487 list_del(&va_range->channel.list_node);
488
489 // Channel unregister handles releasing this descriptor back to RM
490 va_range->channel.rm_descriptor = 0;
491 }
492
uvm_va_range_destroy_sked_reflected(uvm_va_range_t * va_range)493 static void uvm_va_range_destroy_sked_reflected(uvm_va_range_t *va_range)
494 {
495 uvm_gpu_va_space_t *gpu_va_space = va_range->sked_reflected.gpu_va_space;
496
497 if (!gpu_va_space || !va_range->sked_reflected.pt_range_vec.ranges)
498 return;
499
500 // The SKED reflected mapping has no physical backing and hence no physical
501 // accesses can be pending to it and no membar is needed.
502 uvm_page_table_range_vec_clear_ptes(&va_range->sked_reflected.pt_range_vec, UVM_MEMBAR_NONE);
503 uvm_page_table_range_vec_deinit(&va_range->sked_reflected.pt_range_vec);
504
505 va_range->sked_reflected.gpu_va_space = NULL;
506 }
507
uvm_va_range_destroy_semaphore_pool(uvm_va_range_t * va_range)508 static void uvm_va_range_destroy_semaphore_pool(uvm_va_range_t *va_range)
509 {
510 NV_STATUS status = uvm_tracker_wait_deinit(&va_range->semaphore_pool.tracker);
511 if (status != NV_OK) {
512 UVM_ASSERT_MSG(status == uvm_global_get_status(),
513 "uvm_tracker_wait() returned %d (%s) in uvm_va_range_destroy_semaphore_pool()\n",
514 status,
515 nvstatusToString(status));
516 }
517 uvm_mem_free(va_range->semaphore_pool.mem);
518 va_range->semaphore_pool.mem = NULL;
519 }
520
uvm_va_range_destroy(uvm_va_range_t * va_range,struct list_head * deferred_free_list)521 void uvm_va_range_destroy(uvm_va_range_t *va_range, struct list_head *deferred_free_list)
522 {
523 if (!va_range)
524 return;
525
526 if (!RB_EMPTY_NODE(&va_range->node.rb_node))
527 uvm_range_tree_remove(&va_range->va_space->va_range_tree, &va_range->node);
528
529 switch (va_range->type) {
530 case UVM_VA_RANGE_TYPE_INVALID:
531 // Skip partially-created ranges with unset types
532 break;
533 case UVM_VA_RANGE_TYPE_MANAGED:
534 uvm_va_range_destroy_managed(va_range);
535 break;
536 case UVM_VA_RANGE_TYPE_EXTERNAL:
537 uvm_va_range_destroy_external(va_range, deferred_free_list);
538 break;
539 case UVM_VA_RANGE_TYPE_CHANNEL:
540 uvm_va_range_destroy_channel(va_range);
541 break;
542 case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
543 uvm_va_range_destroy_sked_reflected(va_range);
544 break;
545 case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
546 uvm_va_range_destroy_semaphore_pool(va_range);
547 break;
548 default:
549 UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
550 va_range->node.start, va_range->node.end, va_range->type);
551 }
552
553 kmem_cache_free(g_uvm_va_range_cache, va_range);
554 }
555
uvm_va_range_zombify(uvm_va_range_t * va_range)556 void uvm_va_range_zombify(uvm_va_range_t *va_range)
557 {
558 if (!va_range)
559 return;
560
561 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
562 UVM_ASSERT(va_range->managed.vma_wrapper);
563
564 // Destroy will be done by uvm_destroy_vma_managed
565 va_range->managed.vma_wrapper = NULL;
566 }
567
uvm_api_clean_up_zombie_resources(UVM_CLEAN_UP_ZOMBIE_RESOURCES_PARAMS * params,struct file * filp)568 NV_STATUS uvm_api_clean_up_zombie_resources(UVM_CLEAN_UP_ZOMBIE_RESOURCES_PARAMS *params, struct file *filp)
569 {
570 uvm_va_space_t *va_space = uvm_va_space_get(filp);
571 uvm_va_range_t *va_range, *va_range_next;
572
573 uvm_va_space_down_write(va_space);
574
575 uvm_for_each_va_range_safe(va_range, va_range_next, va_space) {
576 if (uvm_va_range_is_managed_zombie(va_range))
577 uvm_va_range_destroy(va_range, NULL);
578 }
579
580 uvm_va_space_up_write(va_space);
581
582 return NV_OK;
583 }
584
uvm_api_validate_va_range(UVM_VALIDATE_VA_RANGE_PARAMS * params,struct file * filp)585 NV_STATUS uvm_api_validate_va_range(UVM_VALIDATE_VA_RANGE_PARAMS *params, struct file *filp)
586 {
587 NV_STATUS status = NV_ERR_INVALID_ADDRESS;
588 uvm_va_space_t *va_space = uvm_va_space_get(filp);
589 uvm_va_range_t *va_range;
590
591 uvm_va_space_down_read(va_space);
592
593 va_range = uvm_va_range_find(va_space, params->base);
594 if (va_range && va_range->node.start == params->base && va_range->node.end + 1 == params->base + params->length)
595 status = NV_OK;
596
597 uvm_va_space_up_read(va_space);
598
599 return status;
600 }
601
va_range_add_gpu_va_space_managed(uvm_va_range_t * va_range,uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm)602 static NV_STATUS va_range_add_gpu_va_space_managed(uvm_va_range_t *va_range,
603 uvm_gpu_va_space_t *gpu_va_space,
604 struct mm_struct *mm)
605 {
606 uvm_va_space_t *va_space = va_range->va_space;
607 uvm_gpu_t *gpu = gpu_va_space->gpu;
608 NV_STATUS status = NV_OK;
609 const bool should_add_remote_mappings =
610 uvm_processor_mask_test(&uvm_va_range_get_policy(va_range)->accessed_by, gpu->id) ||
611 uvm_processor_mask_test(&va_range->uvm_lite_gpus, gpu->id);
612
613 // By this time, the gpu is already in the registration mask.
614 const bool should_disable_read_duplication =
615 uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_ENABLED &&
616 (uvm_va_space_can_read_duplicate(va_space, NULL) != uvm_va_space_can_read_duplicate(va_space, gpu));
617
618 // Combine conditions to perform a single VA block traversal
619 if (gpu_va_space->ats.enabled || should_add_remote_mappings || should_disable_read_duplication) {
620 uvm_va_block_t *va_block;
621 uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
622
623
624 // TODO: Bug 2090378. Consolidate all per-VA block operations within
625 // uvm_va_block_add_gpu_va_space so we only need to take the VA block
626 // once.
627 for_each_va_block_in_va_range(va_range, va_block) {
628 if (gpu_va_space->ats.enabled) {
629 // Notify that a new GPU VA space has been created. This is only
630 // currently used for PDE1 pre-population on ATS systems.
631 status = UVM_VA_BLOCK_LOCK_RETRY(va_block, NULL, uvm_va_block_add_gpu_va_space(va_block, gpu_va_space));
632 if (status != NV_OK)
633 break;
634 }
635
636 if (should_add_remote_mappings) {
637 // Now that we have a GPU VA space, map any VA ranges for which
638 // this GPU is a UVM-Lite GPU or has accessed_by set.
639 status = uvm_va_block_set_accessed_by(va_block, va_block_context, gpu->id);
640 if (status != NV_OK)
641 break;
642 }
643
644 if (should_disable_read_duplication) {
645 status = uvm_va_block_unset_read_duplication(va_block, va_block_context);
646 if (status != NV_OK)
647 break;
648 }
649 }
650 }
651
652 return status;
653 }
654
va_range_add_gpu_va_space_semaphore_pool(uvm_va_range_t * va_range,uvm_gpu_t * gpu)655 static NV_STATUS va_range_add_gpu_va_space_semaphore_pool(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
656 {
657 uvm_mem_gpu_mapping_attrs_t *attrs;
658
659 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
660 UVM_ASSERT(uvm_mem_mapped_on_gpu_kernel(va_range->semaphore_pool.mem, gpu));
661
662 attrs = &va_range->semaphore_pool.gpu_attrs[uvm_id_gpu_index(gpu->id)];
663
664 return uvm_mem_map_gpu_user(va_range->semaphore_pool.mem,
665 gpu,
666 va_range->va_space,
667 (void *)va_range->node.start,
668 attrs);
669 }
670
uvm_va_range_add_gpu_va_space(uvm_va_range_t * va_range,uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm)671 NV_STATUS uvm_va_range_add_gpu_va_space(uvm_va_range_t *va_range,
672 uvm_gpu_va_space_t *gpu_va_space,
673 struct mm_struct *mm)
674 {
675 UVM_ASSERT(va_range->type < UVM_VA_RANGE_TYPE_MAX);
676
677 if (va_range->inject_add_gpu_va_space_error) {
678 va_range->inject_add_gpu_va_space_error = false;
679 return NV_ERR_NO_MEMORY;
680 }
681
682 switch (va_range->type) {
683 case UVM_VA_RANGE_TYPE_MANAGED:
684 return va_range_add_gpu_va_space_managed(va_range, gpu_va_space, mm);
685 case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
686 return va_range_add_gpu_va_space_semaphore_pool(va_range, gpu_va_space->gpu);
687 default:
688 return NV_OK;
689 }
690 }
691
va_range_remove_gpu_va_space_managed(uvm_va_range_t * va_range,uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm)692 static void va_range_remove_gpu_va_space_managed(uvm_va_range_t *va_range,
693 uvm_gpu_va_space_t *gpu_va_space,
694 struct mm_struct *mm)
695 {
696 uvm_va_block_t *va_block;
697 uvm_va_space_t *va_space = va_range->va_space;
698 bool should_enable_read_duplicate;
699 uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
700
701 should_enable_read_duplicate =
702 uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_ENABLED &&
703 uvm_va_space_can_read_duplicate(va_space, NULL) != uvm_va_space_can_read_duplicate(va_space, gpu_va_space->gpu);
704
705 for_each_va_block_in_va_range(va_range, va_block) {
706 uvm_mutex_lock(&va_block->lock);
707 uvm_va_block_remove_gpu_va_space(va_block, gpu_va_space, va_block_context);
708 uvm_mutex_unlock(&va_block->lock);
709
710 if (should_enable_read_duplicate)
711 uvm_va_block_set_read_duplication(va_block, va_block_context);
712 }
713 }
714
va_range_remove_gpu_va_space_external(uvm_va_range_t * va_range,uvm_gpu_t * gpu,struct list_head * deferred_free_list)715 static void va_range_remove_gpu_va_space_external(uvm_va_range_t *va_range,
716 uvm_gpu_t *gpu,
717 struct list_head *deferred_free_list)
718 {
719 uvm_ext_gpu_range_tree_t *range_tree;
720 uvm_ext_gpu_map_t *ext_map, *ext_map_next;
721
722 UVM_ASSERT(deferred_free_list);
723
724 range_tree = uvm_ext_gpu_range_tree(va_range, gpu);
725 uvm_mutex_lock(&range_tree->lock);
726
727 uvm_ext_gpu_map_for_each_safe(ext_map, ext_map_next, va_range, gpu)
728 uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_free_list);
729
730 uvm_mutex_unlock(&range_tree->lock);
731 }
732
va_range_remove_gpu_va_space_semaphore_pool(uvm_va_range_t * va_range,uvm_gpu_t * gpu)733 static void va_range_remove_gpu_va_space_semaphore_pool(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
734 {
735 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
736
737 if (g_uvm_global.conf_computing_enabled && (va_range->semaphore_pool.mem->dma_owner == gpu))
738 uvm_va_range_destroy(va_range, NULL);
739 else
740 uvm_mem_unmap_gpu_user(va_range->semaphore_pool.mem, gpu);
741 }
742
uvm_va_range_remove_gpu_va_space(uvm_va_range_t * va_range,uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm,struct list_head * deferred_free_list)743 void uvm_va_range_remove_gpu_va_space(uvm_va_range_t *va_range,
744 uvm_gpu_va_space_t *gpu_va_space,
745 struct mm_struct *mm,
746 struct list_head *deferred_free_list)
747 {
748 switch (va_range->type) {
749 case UVM_VA_RANGE_TYPE_MANAGED:
750 va_range_remove_gpu_va_space_managed(va_range, gpu_va_space, mm);
751 break;
752 case UVM_VA_RANGE_TYPE_EXTERNAL:
753 va_range_remove_gpu_va_space_external(va_range, gpu_va_space->gpu, deferred_free_list);
754 break;
755 case UVM_VA_RANGE_TYPE_CHANNEL:
756 // All channels under this GPU VA space should've been removed before
757 // removing the GPU VA space.
758 UVM_ASSERT(va_range->channel.gpu_va_space != gpu_va_space);
759 break;
760 case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
761 if (va_range->sked_reflected.gpu_va_space == gpu_va_space)
762 uvm_va_range_destroy_sked_reflected(va_range);
763 break;
764 case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
765 va_range_remove_gpu_va_space_semaphore_pool(va_range, gpu_va_space->gpu);
766 break;
767 default:
768 UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
769 va_range->node.start, va_range->node.end, va_range->type);
770 }
771 }
772
uvm_va_range_enable_peer_managed(uvm_va_range_t * va_range,uvm_gpu_t * gpu0,uvm_gpu_t * gpu1)773 static NV_STATUS uvm_va_range_enable_peer_managed(uvm_va_range_t *va_range, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
774 {
775 NV_STATUS status;
776 uvm_va_block_t *va_block;
777 bool gpu0_accessed_by = uvm_processor_mask_test(&uvm_va_range_get_policy(va_range)->accessed_by, gpu0->id);
778 bool gpu1_accessed_by = uvm_processor_mask_test(&uvm_va_range_get_policy(va_range)->accessed_by, gpu1->id);
779 uvm_va_space_t *va_space = va_range->va_space;
780 uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, NULL);
781
782
783 for_each_va_block_in_va_range(va_range, va_block) {
784 // TODO: Bug 1767224: Refactor the uvm_va_block_set_accessed_by logic
785 // into uvm_va_block_enable_peer.
786 uvm_mutex_lock(&va_block->lock);
787 status = uvm_va_block_enable_peer(va_block, gpu0, gpu1);
788 uvm_mutex_unlock(&va_block->lock);
789
790 if (status != NV_OK)
791 return status;
792
793 // For UVM-Lite at most one GPU needs to map the peer GPU if it's the
794 // preferred location, but it doesn't hurt to just try mapping both.
795 if (gpu0_accessed_by) {
796 status = uvm_va_block_set_accessed_by(va_block,
797 va_block_context,
798 gpu0->id);
799 if (status != NV_OK)
800 return status;
801 }
802
803 if (gpu1_accessed_by) {
804 status = uvm_va_block_set_accessed_by(va_block,
805 va_block_context,
806 gpu1->id);
807 if (status != NV_OK)
808 return status;
809 }
810 }
811
812 return NV_OK;
813 }
814
uvm_va_range_enable_peer(uvm_va_range_t * va_range,uvm_gpu_t * gpu0,uvm_gpu_t * gpu1)815 NV_STATUS uvm_va_range_enable_peer(uvm_va_range_t *va_range, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
816 {
817 switch (va_range->type) {
818 case UVM_VA_RANGE_TYPE_MANAGED:
819 return uvm_va_range_enable_peer_managed(va_range, gpu0, gpu1);
820 case UVM_VA_RANGE_TYPE_EXTERNAL:
821 // UVM_VA_RANGE_TYPE_EXTERNAL doesn't create new mappings when enabling peer access
822 return NV_OK;
823 case UVM_VA_RANGE_TYPE_CHANNEL:
824 // UVM_VA_RANGE_TYPE_CHANNEL should never have peer mappings
825 return NV_OK;
826 case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
827 // UVM_VA_RANGE_TYPE_SKED_REFLECTED should never have peer mappings
828 return NV_OK;
829 case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
830 // UVM_VA_RANGE_TYPE_SEMAPHORE_POOL should never have peer mappings
831 return NV_OK;
832 default:
833 UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
834 va_range->node.start, va_range->node.end, va_range->type);
835 return NV_ERR_NOT_SUPPORTED;
836 }
837 }
838
uvm_va_range_disable_peer_external(uvm_va_range_t * va_range,uvm_gpu_t * mapping_gpu,uvm_gpu_t * owning_gpu,struct list_head * deferred_free_list)839 static void uvm_va_range_disable_peer_external(uvm_va_range_t *va_range,
840 uvm_gpu_t *mapping_gpu,
841 uvm_gpu_t *owning_gpu,
842 struct list_head *deferred_free_list)
843 {
844 uvm_ext_gpu_range_tree_t *range_tree;
845 uvm_ext_gpu_map_t *ext_map, *ext_map_next;
846
847 range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
848 uvm_mutex_lock(&range_tree->lock);
849 uvm_ext_gpu_map_for_each_safe(ext_map, ext_map_next, va_range, mapping_gpu) {
850 if (ext_map->owning_gpu == owning_gpu && (!ext_map->is_sysmem || ext_map->is_egm)) {
851 UVM_ASSERT(deferred_free_list);
852 uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_free_list);
853 }
854 }
855 uvm_mutex_unlock(&range_tree->lock);
856 }
857
uvm_va_range_disable_peer_managed(uvm_va_range_t * va_range,uvm_gpu_t * gpu0,uvm_gpu_t * gpu1)858 static void uvm_va_range_disable_peer_managed(uvm_va_range_t *va_range, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
859 {
860 uvm_va_block_t *va_block;
861 uvm_gpu_t *uvm_lite_gpu_to_unmap = NULL;
862
863 bool uvm_lite_mode = uvm_processor_mask_test(&va_range->uvm_lite_gpus, gpu0->id) &&
864 uvm_processor_mask_test(&va_range->uvm_lite_gpus, gpu1->id);
865
866 if (uvm_lite_mode) {
867 // In UVM-Lite mode, the UVM-Lite GPUs can only have mappings to the the
868 // preferred location. If peer mappings are being disabled to the
869 // preferred location, then unmap the other GPU.
870 // Nothing to do otherwise.
871 if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu0->id, NUMA_NO_NODE))
872 uvm_lite_gpu_to_unmap = gpu1;
873 else if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu1->id, NUMA_NO_NODE))
874 uvm_lite_gpu_to_unmap = gpu0;
875 else
876 return;
877 }
878
879 for_each_va_block_in_va_range(va_range, va_block) {
880 uvm_mutex_lock(&va_block->lock);
881 if (uvm_lite_mode)
882 uvm_va_block_unmap_preferred_location_uvm_lite(va_block, uvm_lite_gpu_to_unmap);
883 else
884 uvm_va_block_disable_peer(va_block, gpu0, gpu1);
885 uvm_mutex_unlock(&va_block->lock);
886 }
887
888 if (uvm_lite_mode && !uvm_range_group_all_migratable(va_range->va_space, va_range->node.start, va_range->node.end)) {
889 UVM_ASSERT(uvm_lite_gpu_to_unmap);
890
891 // Migration is prevented, but we had to unmap a UVM-Lite GPU. Update
892 // the accessed by and UVM-Lite GPUs masks as it cannot be considered a
893 // UVM-Lite GPU any more.
894 uvm_va_range_unset_accessed_by(va_range, uvm_lite_gpu_to_unmap->id, NULL);
895 }
896 }
897
uvm_va_range_disable_peer(uvm_va_range_t * va_range,uvm_gpu_t * gpu0,uvm_gpu_t * gpu1,struct list_head * deferred_free_list)898 void uvm_va_range_disable_peer(uvm_va_range_t *va_range,
899 uvm_gpu_t *gpu0,
900 uvm_gpu_t *gpu1,
901 struct list_head *deferred_free_list)
902 {
903
904 switch (va_range->type) {
905 case UVM_VA_RANGE_TYPE_MANAGED:
906 uvm_va_range_disable_peer_managed(va_range, gpu0, gpu1);
907 break;
908 case UVM_VA_RANGE_TYPE_EXTERNAL:
909 // If GPU 0 has a mapping to GPU 1, remove GPU 0's mapping
910 uvm_va_range_disable_peer_external(va_range, gpu0, gpu1, deferred_free_list);
911 // If GPU 1 has a mapping to GPU 0, remove GPU 1's mapping
912 uvm_va_range_disable_peer_external(va_range, gpu1, gpu0, deferred_free_list);
913 break;
914 case UVM_VA_RANGE_TYPE_CHANNEL:
915 // UVM_VA_RANGE_TYPE_CHANNEL should never have peer mappings
916 break;
917 case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
918 // UVM_VA_RANGE_TYPE_SKED_REFLECTED should never have peer mappings
919 break;
920 case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
921 // UVM_VA_RANGE_TYPE_SEMAPHORE_POOL should never have peer mappings
922 break;
923 default:
924 UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
925 va_range->node.start, va_range->node.end, va_range->type);
926 }
927 }
928
va_range_register_gpu_semaphore_pool(uvm_va_range_t * va_range,uvm_gpu_t * gpu)929 static NV_STATUS va_range_register_gpu_semaphore_pool(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
930 {
931 // TODO: Bug 1812419: pass GPU mapping attributes to uvm_mem_map_gpu_kernel
932 // once that function accepts them.
933 return uvm_mem_map_gpu_kernel(va_range->semaphore_pool.mem, gpu);
934 }
935
uvm_va_range_register_gpu(uvm_va_range_t * va_range,uvm_gpu_t * gpu)936 NV_STATUS uvm_va_range_register_gpu(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
937 {
938 UVM_ASSERT(va_range->type < UVM_VA_RANGE_TYPE_MAX);
939 uvm_assert_rwsem_locked_write(&va_range->va_space->lock);
940
941 if (va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL)
942 return va_range_register_gpu_semaphore_pool(va_range, gpu);
943
944 return NV_OK;
945 }
946
va_range_unregister_gpu_managed(uvm_va_range_t * va_range,uvm_gpu_t * gpu,struct mm_struct * mm)947 static void va_range_unregister_gpu_managed(uvm_va_range_t *va_range, uvm_gpu_t *gpu, struct mm_struct *mm)
948 {
949 uvm_va_block_t *va_block;
950
951 // Reset preferred location and accessed-by of VA ranges if needed
952 // Note: ignoring the return code of uvm_va_range_set_preferred_location since this
953 // will only return on error when setting a preferred location, not on a reset
954 if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu->id, NUMA_NO_NODE))
955 (void)uvm_va_range_set_preferred_location(va_range, UVM_ID_INVALID, NUMA_NO_NODE, mm, NULL);
956
957 uvm_va_range_unset_accessed_by(va_range, gpu->id, NULL);
958
959 // Migrate and free any remaining resident allocations on this GPU
960 for_each_va_block_in_va_range(va_range, va_block)
961 uvm_va_block_unregister_gpu(va_block, gpu, mm);
962 }
963
964 // The GPU being unregistered can't have any remaining mappings, since those
965 // were removed when the corresponding GPU VA space was removed. However, other
966 // GPUs could still have mappings to memory resident on this GPU, so we have to
967 // unmap those.
va_range_unregister_gpu_external(uvm_va_range_t * va_range,uvm_gpu_t * gpu,struct list_head * deferred_free_list)968 static void va_range_unregister_gpu_external(uvm_va_range_t *va_range,
969 uvm_gpu_t *gpu,
970 struct list_head *deferred_free_list)
971 {
972 uvm_ext_gpu_map_t *ext_map, *ext_map_next;
973 uvm_gpu_t *other_gpu;
974
975 for_each_va_space_gpu_in_mask(other_gpu, va_range->va_space, &va_range->external.mapped_gpus) {
976 uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, other_gpu);
977 UVM_ASSERT(other_gpu != gpu);
978
979 uvm_mutex_lock(&range_tree->lock);
980 uvm_ext_gpu_map_for_each_safe(ext_map, ext_map_next, va_range, other_gpu) {
981 if (ext_map->owning_gpu == gpu) {
982 UVM_ASSERT(deferred_free_list);
983 uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_free_list);
984 }
985 }
986 uvm_mutex_unlock(&range_tree->lock);
987 }
988 }
989
va_range_unregister_gpu_semaphore_pool(uvm_va_range_t * va_range,uvm_gpu_t * gpu)990 static void va_range_unregister_gpu_semaphore_pool(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
991 {
992 NV_STATUS status;
993
994 // Ranges for this GPU should have been previously unmapped from the user VA
995 // space during GPU VA space unregister, which should have already happened.
996 UVM_ASSERT(!uvm_mem_mapped_on_gpu_user(va_range->semaphore_pool.mem, gpu));
997 UVM_ASSERT(uvm_mem_mapped_on_gpu_kernel(va_range->semaphore_pool.mem, gpu));
998
999 uvm_mutex_lock(&va_range->semaphore_pool.tracker_lock);
1000 status = uvm_tracker_wait(&va_range->semaphore_pool.tracker);
1001 uvm_mutex_unlock(&va_range->semaphore_pool.tracker_lock);
1002 if (status != NV_OK)
1003 UVM_ASSERT(status == uvm_global_get_status());
1004
1005 uvm_mem_unmap_gpu_phys(va_range->semaphore_pool.mem, gpu);
1006
1007 va_range->semaphore_pool.gpu_attrs[uvm_id_gpu_index(gpu->id)] = va_range->semaphore_pool.default_gpu_attrs;
1008 if (va_range->semaphore_pool.owner == gpu)
1009 va_range->semaphore_pool.owner = NULL;
1010 }
1011
uvm_va_range_unregister_gpu(uvm_va_range_t * va_range,uvm_gpu_t * gpu,struct mm_struct * mm,struct list_head * deferred_free_list)1012 void uvm_va_range_unregister_gpu(uvm_va_range_t *va_range,
1013 uvm_gpu_t *gpu,
1014 struct mm_struct *mm,
1015 struct list_head *deferred_free_list)
1016 {
1017 switch (va_range->type) {
1018 case UVM_VA_RANGE_TYPE_MANAGED:
1019 va_range_unregister_gpu_managed(va_range, gpu, mm);
1020 break;
1021 case UVM_VA_RANGE_TYPE_EXTERNAL:
1022 va_range_unregister_gpu_external(va_range, gpu, deferred_free_list);
1023 break;
1024 case UVM_VA_RANGE_TYPE_CHANNEL:
1025 // All ranges should have been destroyed by GPU VA space unregister,
1026 // which should have already happened.
1027 UVM_ASSERT(va_range->channel.gpu_va_space->gpu != gpu);
1028 break;
1029 case UVM_VA_RANGE_TYPE_SKED_REFLECTED:
1030 // All ranges for this GPU should have been unmapped by GPU VA space
1031 // unregister (uvm_va_range_destroy_sked_reflected), which should
1032 // have already happened.
1033 if (va_range->sked_reflected.gpu_va_space != NULL)
1034 UVM_ASSERT(va_range->sked_reflected.gpu_va_space->gpu != gpu);
1035 break;
1036 case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
1037 va_range_unregister_gpu_semaphore_pool(va_range, gpu);
1038 break;
1039 default:
1040 UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
1041 va_range->node.start, va_range->node.end, va_range->type);
1042 }
1043 }
1044
1045 // Split existing's blocks into new. new's blocks array has already been
1046 // allocated. This is called before existing's range node is split, so it
1047 // overlaps new. new is always in the upper region of existing.
1048 //
1049 // The caller will do the range tree split.
1050 //
1051 // If this fails it leaves existing unchanged.
uvm_va_range_split_blocks(uvm_va_range_t * existing,uvm_va_range_t * new)1052 static NV_STATUS uvm_va_range_split_blocks(uvm_va_range_t *existing, uvm_va_range_t *new)
1053 {
1054 uvm_va_block_t *old_block, *block = NULL;
1055 size_t existing_blocks, split_index, new_index = 0;
1056 NV_STATUS status;
1057
1058 UVM_ASSERT(new->node.start > existing->node.start);
1059 UVM_ASSERT(new->node.end <= existing->node.end);
1060
1061 split_index = uvm_va_range_block_index(existing, new->node.start);
1062
1063 // Handle a block spanning the split point
1064 if (block_calc_start(existing, split_index) != new->node.start) {
1065 // If a populated block actually spans the split point, we have to split
1066 // the block. Otherwise just account for the extra entry in the arrays.
1067 old_block = uvm_va_range_block(existing, split_index);
1068 if (old_block) {
1069 UVM_ASSERT(old_block->start < new->node.start);
1070 status = uvm_va_block_split(old_block, new->node.start - 1, &block, new);
1071 if (status != NV_OK)
1072 return status;
1073
1074 // No memory barrier is needed since we're holding the va_space lock in
1075 // write mode, so no other thread can access the blocks array.
1076 atomic_long_set(&new->blocks[0], (long)block);
1077 }
1078
1079 new_index = 1;
1080 }
1081
1082 // uvm_va_block_split gets first crack at injecting an error. If it did so,
1083 // we wouldn't be here. However, not all va_range splits will call
1084 // uvm_va_block_split so we need an extra check here. We can't push this
1085 // injection later since all paths past this point assume success, so they
1086 // modify the state of 'existing' range.
1087 //
1088 // Even if there was no block split above, there is no guarantee that one
1089 // of our blocks doesn't have the 'inject_split_error' flag set. We clear
1090 // that here to prevent multiple errors caused by one
1091 // 'uvm_test_va_range_inject_split_error' call.
1092 if (existing->inject_split_error) {
1093 UVM_ASSERT(!block);
1094 existing->inject_split_error = false;
1095
1096 for_each_va_block_in_va_range(existing, block) {
1097 uvm_va_block_test_t *block_test = uvm_va_block_get_test(block);
1098 if (block_test)
1099 block_test->inject_split_error = false;
1100 }
1101
1102 return NV_ERR_NO_MEMORY;
1103 }
1104
1105 existing_blocks = split_index + new_index;
1106
1107 // Copy existing's blocks over to the new range, accounting for the explicit
1108 // assignment above in case we did a block split. There are two general
1109 // cases:
1110 //
1111 // No split:
1112 // split_index
1113 // v
1114 // existing (before) [----- A ----][----- B ----][----- C ----]
1115 // existing (after) [----- A ----]
1116 // new [----- B ----][----- C ----]
1117 //
1118 // Split:
1119 // split_index
1120 // v
1121 // existing (before) [----- A ----][----- B ----][----- C ----]
1122 // existing (after [----- A ----][- B -]
1123 // new [- N -][----- C ----]
1124 // ^new->blocks[0]
1125
1126 // Note, if we split the last block of existing, this won't iterate at all.
1127 for (; new_index < uvm_va_range_num_blocks(new); new_index++) {
1128 block = uvm_va_range_block(existing, split_index + new_index);
1129 if (!block) {
1130 // new's array was cleared at allocation
1131 UVM_ASSERT(uvm_va_range_block(new, new_index) == NULL);
1132 continue;
1133 }
1134
1135 // As soon as we make this assignment and drop the lock, the reverse
1136 // mapping code can start looking at new, so new must be ready to go.
1137 uvm_mutex_lock(&block->lock);
1138 UVM_ASSERT(block->va_range == existing);
1139 block->va_range = new;
1140 uvm_mutex_unlock(&block->lock);
1141
1142 // No memory barrier is needed since we're holding the va_space lock in
1143 // write mode, so no other thread can access the blocks array.
1144 atomic_long_set(&new->blocks[new_index], (long)block);
1145 atomic_long_set(&existing->blocks[split_index + new_index], (long)NULL);
1146 }
1147
1148 blocks_array_shrink(existing, existing_blocks);
1149
1150 return NV_OK;
1151 }
1152
uvm_va_range_split(uvm_va_range_t * existing_va_range,NvU64 new_end,uvm_va_range_t ** new_va_range)1153 NV_STATUS uvm_va_range_split(uvm_va_range_t *existing_va_range,
1154 NvU64 new_end,
1155 uvm_va_range_t **new_va_range)
1156 {
1157 uvm_va_space_t *va_space = existing_va_range->va_space;
1158 uvm_va_range_t *new = NULL;
1159 uvm_perf_event_data_t event_data;
1160 NV_STATUS status;
1161
1162 UVM_ASSERT(existing_va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1163 UVM_ASSERT(new_end > existing_va_range->node.start);
1164 UVM_ASSERT(new_end < existing_va_range->node.end);
1165 UVM_ASSERT(PAGE_ALIGNED(new_end + 1));
1166 uvm_assert_rwsem_locked_write(&va_space->lock);
1167
1168 new = uvm_va_range_alloc_managed(va_space, new_end + 1, existing_va_range->node.end);
1169 if (!new) {
1170 status = NV_ERR_NO_MEMORY;
1171 goto error;
1172 }
1173
1174 // The new va_range is under the same vma. If this is a uvm_vm_open, the
1175 // caller takes care of updating existing's vma_wrapper for us.
1176 new->managed.vma_wrapper = existing_va_range->managed.vma_wrapper;
1177
1178 // Copy over state before splitting blocks so any block lookups happening
1179 // concurrently on the eviction path will see the new range's data.
1180 uvm_va_range_get_policy(new)->read_duplication = uvm_va_range_get_policy(existing_va_range)->read_duplication;
1181 uvm_va_range_get_policy(new)->preferred_location = uvm_va_range_get_policy(existing_va_range)->preferred_location;
1182 uvm_va_range_get_policy(new)->preferred_nid = uvm_va_range_get_policy(existing_va_range)->preferred_nid;
1183 uvm_processor_mask_copy(&uvm_va_range_get_policy(new)->accessed_by,
1184 &uvm_va_range_get_policy(existing_va_range)->accessed_by);
1185 uvm_processor_mask_copy(&new->uvm_lite_gpus, &existing_va_range->uvm_lite_gpus);
1186
1187 status = uvm_va_range_split_blocks(existing_va_range, new);
1188 if (status != NV_OK)
1189 goto error;
1190
1191 // Finally, update the VA range tree
1192 uvm_range_tree_split(&va_space->va_range_tree, &existing_va_range->node, &new->node);
1193
1194 if (new->type == UVM_VA_RANGE_TYPE_MANAGED) {
1195 event_data.range_shrink.range = new;
1196 uvm_perf_event_notify(&va_space->perf_events, UVM_PERF_EVENT_RANGE_SHRINK, &event_data);
1197 }
1198
1199 if (new_va_range)
1200 *new_va_range = new;
1201 return NV_OK;
1202
1203 error:
1204 uvm_va_range_destroy(new, NULL);
1205 return status;
1206
1207 }
1208
uvm_va_range_find(uvm_va_space_t * va_space,NvU64 addr)1209 uvm_va_range_t *uvm_va_range_find(uvm_va_space_t *va_space, NvU64 addr)
1210 {
1211 uvm_assert_rwsem_locked(&va_space->lock);
1212 return uvm_va_range_container(uvm_range_tree_find(&va_space->va_range_tree, addr));
1213 }
1214
uvm_va_space_iter_first(uvm_va_space_t * va_space,NvU64 start,NvU64 end)1215 uvm_va_range_t *uvm_va_space_iter_first(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
1216 {
1217 uvm_range_tree_node_t *node = uvm_range_tree_iter_first(&va_space->va_range_tree, start, end);
1218 return uvm_va_range_container(node);
1219 }
1220
uvm_va_space_iter_next(uvm_va_range_t * va_range,NvU64 end)1221 uvm_va_range_t *uvm_va_space_iter_next(uvm_va_range_t *va_range, NvU64 end)
1222 {
1223 uvm_range_tree_node_t *node;
1224
1225 // Handling a NULL va_range here makes uvm_for_each_va_range_in_safe much
1226 // less messy
1227 if (!va_range)
1228 return NULL;
1229
1230 node = uvm_range_tree_iter_next(&va_range->va_space->va_range_tree, &va_range->node, end);
1231 return uvm_va_range_container(node);
1232 }
1233
uvm_va_range_num_blocks(uvm_va_range_t * va_range)1234 size_t uvm_va_range_num_blocks(uvm_va_range_t *va_range)
1235 {
1236 NvU64 start = UVM_VA_BLOCK_ALIGN_DOWN(va_range->node.start);
1237 NvU64 end = UVM_VA_BLOCK_ALIGN_UP(va_range->node.end); // End is inclusive
1238 return (end - start) / UVM_VA_BLOCK_SIZE;
1239 }
1240
uvm_va_range_block_index(uvm_va_range_t * va_range,NvU64 addr)1241 size_t uvm_va_range_block_index(uvm_va_range_t *va_range, NvU64 addr)
1242 {
1243 size_t addr_index, start_index, index;
1244
1245 UVM_ASSERT(addr >= va_range->node.start);
1246 UVM_ASSERT(addr <= va_range->node.end);
1247 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1248
1249 // Each block will cover as much space as possible within the aligned
1250 // UVM_VA_BLOCK_SIZE, up to the parent VA range boundaries. In other words,
1251 // the entire VA space can be broken into UVM_VA_BLOCK_SIZE chunks. Even if
1252 // there are multiple ranges (and thus multiple blocks) per actual
1253 // UVM_VA_BLOCK_SIZE chunk, none of those will have more than 1 block unless
1254 // they span a UVM_VA_BLOCK_SIZE alignment boundary.
1255 addr_index = (size_t)(addr / UVM_VA_BLOCK_SIZE);
1256 start_index = (size_t)(va_range->node.start / UVM_VA_BLOCK_SIZE);
1257
1258 index = addr_index - start_index;
1259 UVM_ASSERT(index < uvm_va_range_num_blocks(va_range));
1260 return index;
1261 }
1262
uvm_va_range_block_create(uvm_va_range_t * va_range,size_t index,uvm_va_block_t ** out_block)1263 NV_STATUS uvm_va_range_block_create(uvm_va_range_t *va_range, size_t index, uvm_va_block_t **out_block)
1264 {
1265 uvm_va_block_t *block, *old;
1266 NV_STATUS status;
1267
1268 block = uvm_va_range_block(va_range, index);
1269 if (!block) {
1270 // No block has been created here yet, so allocate one and attempt to
1271 // insert it. Note that this runs the risk of an out-of-memory error
1272 // when multiple threads race and all concurrently allocate a block for
1273 // the same address. This should be extremely rare. There is also
1274 // precedent in the Linux kernel, which does the same thing for demand-
1275 // allocation of anonymous pages.
1276 status = uvm_va_block_create(va_range,
1277 block_calc_start(va_range, index),
1278 block_calc_end(va_range, index),
1279 &block);
1280 if (status != NV_OK)
1281 return status;
1282
1283 // Try to insert it
1284 old = (uvm_va_block_t *)nv_atomic_long_cmpxchg(&va_range->blocks[index],
1285 (long)NULL,
1286 (long)block);
1287 if (old) {
1288 // Someone else beat us on the insert
1289 uvm_va_block_release(block);
1290 block = old;
1291 }
1292 }
1293
1294 *out_block = block;
1295 return NV_OK;
1296 }
1297
uvm_va_range_block_next(uvm_va_range_t * va_range,uvm_va_block_t * va_block)1298 uvm_va_block_t *uvm_va_range_block_next(uvm_va_range_t *va_range, uvm_va_block_t *va_block)
1299 {
1300 uvm_va_space_t *va_space = va_range->va_space;
1301 size_t i = 0;
1302
1303 uvm_assert_rwsem_locked(&va_space->lock);
1304
1305 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1306
1307 if (va_block)
1308 i = uvm_va_range_block_index(va_range, va_block->start) + 1;
1309
1310 for (; i < uvm_va_range_num_blocks(va_range); i++) {
1311 va_block = uvm_va_range_block(va_range, i);
1312 if (va_block) {
1313 UVM_ASSERT(va_block->va_range == va_range);
1314 UVM_ASSERT(uvm_va_range_block_index(va_range, va_block->start) == i);
1315 return va_block;
1316 }
1317 }
1318
1319 return NULL;
1320 }
1321
range_unmap_mask(uvm_va_range_t * va_range,const uvm_processor_mask_t * mask,uvm_tracker_t * out_tracker)1322 static NV_STATUS range_unmap_mask(uvm_va_range_t *va_range,
1323 const uvm_processor_mask_t *mask,
1324 uvm_tracker_t *out_tracker)
1325 {
1326 uvm_va_space_t *va_space = va_range->va_space;
1327 uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
1328 uvm_va_block_t *block;
1329
1330 UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_MANAGED, "type 0x%x\n", va_range->type);
1331
1332 if (uvm_processor_mask_empty(mask))
1333 return NV_OK;
1334
1335 for_each_va_block_in_va_range(va_range, block) {
1336 NV_STATUS status;
1337 uvm_va_block_region_t region = uvm_va_block_region_from_block(block);
1338
1339 uvm_mutex_lock(&block->lock);
1340 status = uvm_va_block_unmap_mask(block, block_context, mask, region, NULL);
1341 if (out_tracker)
1342 uvm_tracker_add_tracker_safe(out_tracker, &block->tracker);
1343
1344 uvm_mutex_unlock(&block->lock);
1345 if (status != NV_OK)
1346 return status;
1347 }
1348
1349 return NV_OK;
1350 }
1351
range_unmap(uvm_va_range_t * va_range,uvm_processor_id_t processor,uvm_tracker_t * out_tracker)1352 static NV_STATUS range_unmap(uvm_va_range_t *va_range, uvm_processor_id_t processor, uvm_tracker_t *out_tracker)
1353 {
1354 uvm_processor_mask_t *mask;
1355 uvm_va_space_t *va_space = va_range->va_space;
1356
1357 uvm_assert_rwsem_locked_write(&va_space->lock);
1358
1359 mask = &va_space->unmap_mask;
1360
1361 UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_MANAGED, "type 0x%x\n", va_range->type);
1362
1363 uvm_processor_mask_zero(mask);
1364 uvm_processor_mask_set(mask, processor);
1365
1366 return range_unmap_mask(va_range, mask, out_tracker);
1367 }
1368
range_map_uvm_lite_gpus(uvm_va_range_t * va_range,uvm_tracker_t * out_tracker)1369 static NV_STATUS range_map_uvm_lite_gpus(uvm_va_range_t *va_range, uvm_tracker_t *out_tracker)
1370 {
1371 NV_STATUS status = NV_OK;
1372 uvm_va_block_t *va_block;
1373 uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_range->va_space, NULL);
1374
1375 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1376
1377 if (uvm_processor_mask_empty(&va_range->uvm_lite_gpus))
1378 return NV_OK;
1379
1380
1381 for_each_va_block_in_va_range(va_range, va_block) {
1382 // UVM-Lite GPUs always map with RWA
1383 uvm_mutex_lock(&va_block->lock);
1384 status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, NULL,
1385 uvm_va_block_map_mask(va_block,
1386 va_block_context,
1387 &va_range->uvm_lite_gpus,
1388 uvm_va_block_region_from_block(va_block),
1389 NULL,
1390 UVM_PROT_READ_WRITE_ATOMIC,
1391 UvmEventMapRemoteCauseCoherence));
1392 if (status == NV_OK && out_tracker)
1393 status = uvm_tracker_add_tracker(out_tracker, &va_block->tracker);
1394
1395 uvm_mutex_unlock(&va_block->lock);
1396 if (status != NV_OK)
1397 break;
1398 }
1399
1400 return status;
1401 }
1402
1403 // Calculate the mask of GPUs that should follow the UVM-Lite behaviour
calc_uvm_lite_gpus_mask(uvm_va_space_t * va_space,uvm_processor_id_t preferred_location,const uvm_processor_mask_t * accessed_by_mask,uvm_processor_mask_t * uvm_lite_gpus)1404 static void calc_uvm_lite_gpus_mask(uvm_va_space_t *va_space,
1405 uvm_processor_id_t preferred_location,
1406 const uvm_processor_mask_t *accessed_by_mask,
1407 uvm_processor_mask_t *uvm_lite_gpus)
1408 {
1409 uvm_gpu_id_t gpu_id;
1410
1411 uvm_assert_rwsem_locked_write(&va_space->lock);
1412
1413 // Zero out the mask first
1414 uvm_processor_mask_zero(uvm_lite_gpus);
1415
1416 // If no preferred location is set then there are no GPUs following the UVM-Lite behavior
1417 if (UVM_ID_IS_INVALID(preferred_location))
1418 return;
1419
1420 // If the preferred location is a faultable GPU, then no GPUs should follow
1421 // the UVM-Lite behaviour.
1422 if (UVM_ID_IS_GPU(preferred_location) &&
1423 uvm_processor_mask_test(&va_space->faultable_processors, preferred_location)) {
1424 return;
1425 }
1426
1427 // Otherwise add all non-faultable GPUs to the UVM-Lite mask that have
1428 // accessed by set.
1429 for_each_gpu_id_in_mask(gpu_id, accessed_by_mask) {
1430 if (!uvm_processor_mask_test(&va_space->faultable_processors, gpu_id))
1431 uvm_processor_mask_set(uvm_lite_gpus, gpu_id);
1432 }
1433
1434 // And the preferred location if it's a GPU
1435 if (UVM_ID_IS_GPU(preferred_location))
1436 uvm_processor_mask_set(uvm_lite_gpus, preferred_location);
1437 }
1438
1439 // Update the mask of GPUs that follow the UVM-Lite behaviour
range_update_uvm_lite_gpus_mask(uvm_va_range_t * va_range)1440 static void range_update_uvm_lite_gpus_mask(uvm_va_range_t *va_range)
1441 {
1442 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1443 calc_uvm_lite_gpus_mask(va_range->va_space,
1444 uvm_va_range_get_policy(va_range)->preferred_location,
1445 &uvm_va_range_get_policy(va_range)->accessed_by,
1446 &va_range->uvm_lite_gpus);
1447 }
1448
uvm_va_range_set_preferred_location(uvm_va_range_t * va_range,uvm_processor_id_t preferred_location,int preferred_cpu_nid,struct mm_struct * mm,uvm_tracker_t * out_tracker)1449 NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
1450 uvm_processor_id_t preferred_location,
1451 int preferred_cpu_nid,
1452 struct mm_struct *mm,
1453 uvm_tracker_t *out_tracker)
1454 {
1455 NV_STATUS status = NV_OK;
1456 uvm_processor_mask_t *all_uvm_lite_gpus = NULL;
1457 uvm_processor_mask_t *new_uvm_lite_gpus = NULL;
1458 uvm_processor_mask_t *set_accessed_by_processors = NULL;
1459 uvm_range_group_range_iter_t iter;
1460 uvm_range_group_range_t *rgr = NULL;
1461 uvm_va_space_t *va_space = va_range->va_space;
1462 uvm_va_block_t *va_block;
1463 uvm_va_block_context_t *va_block_context;
1464 uvm_va_policy_t *va_range_policy;
1465
1466 uvm_assert_rwsem_locked_write(&va_space->lock);
1467 UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
1468
1469 all_uvm_lite_gpus = uvm_processor_mask_cache_alloc();
1470 if (!all_uvm_lite_gpus) {
1471 status = NV_ERR_NO_MEMORY;
1472 goto out;
1473 }
1474
1475 new_uvm_lite_gpus = uvm_processor_mask_cache_alloc();
1476 if (!new_uvm_lite_gpus) {
1477 status = NV_ERR_NO_MEMORY;
1478 goto out;
1479 }
1480
1481 set_accessed_by_processors = uvm_processor_mask_cache_alloc();
1482 if (!set_accessed_by_processors) {
1483 status = NV_ERR_NO_MEMORY;
1484 goto out;
1485 }
1486
1487 va_range_policy = uvm_va_range_get_policy(va_range);
1488 if (uvm_va_policy_preferred_location_equal(va_range_policy, preferred_location, preferred_cpu_nid))
1489 goto out;
1490
1491 // Mark all range group ranges within this VA range as migrated since the preferred location has changed.
1492 uvm_range_group_for_each_range_in(rgr, va_space, va_range->node.start, va_range->node.end) {
1493 uvm_spin_lock(&rgr->range_group->migrated_ranges_lock);
1494 if (list_empty(&rgr->range_group_migrated_list_node))
1495 list_move_tail(&rgr->range_group_migrated_list_node, &rgr->range_group->migrated_ranges);
1496 uvm_spin_unlock(&rgr->range_group->migrated_ranges_lock);
1497 }
1498
1499 // Calculate the new UVM-Lite GPUs mask, but don't update va_range state so
1500 // that we can keep block_page_check_mappings() happy while updating the
1501 // mappings.
1502 calc_uvm_lite_gpus_mask(va_space, preferred_location, &va_range_policy->accessed_by, new_uvm_lite_gpus);
1503
1504 // If the range contains non-migratable range groups, check that new UVM-Lite GPUs
1505 // can all map the new preferred location.
1506 if (!uvm_range_group_all_migratable(va_space, va_range->node.start, va_range->node.end) &&
1507 UVM_ID_IS_VALID(preferred_location) &&
1508 !uvm_processor_mask_subset(new_uvm_lite_gpus, &va_space->accessible_from[uvm_id_value(preferred_location)])) {
1509 status = NV_ERR_INVALID_DEVICE;
1510 goto out;
1511 }
1512
1513 if (UVM_ID_IS_INVALID(preferred_location)) {
1514 uvm_range_group_for_each_migratability_in_safe(&iter, va_space, va_range->node.start, va_range->node.end) {
1515 if (!iter.migratable) {
1516 // Clear the range group assocation for any unmigratable ranges if there is no preferred location
1517 status = uvm_range_group_assign_range(va_space, NULL, iter.start, iter.end);
1518 if (status != NV_OK)
1519 goto out;
1520 }
1521 }
1522 }
1523
1524 // Unmap all old and new UVM-Lite GPUs
1525 // - GPUs that stop being UVM-Lite need to be unmapped so that they don't
1526 // have stale mappings to the old preferred location.
1527 // - GPUs that will continue to be UVM-Lite GPUs or are new UVM-Lite GPUs
1528 // need to be unmapped so that the new preferred location can be mapped.
1529 uvm_processor_mask_or(all_uvm_lite_gpus, &va_range->uvm_lite_gpus, new_uvm_lite_gpus);
1530 status = range_unmap_mask(va_range, all_uvm_lite_gpus, out_tracker);
1531 if (status != NV_OK)
1532 goto out;
1533
1534 // GPUs that stop being UVM-Lite, but are in the accessed_by mask need to
1535 // have any possible mappings established.
1536 uvm_processor_mask_andnot(set_accessed_by_processors, &va_range->uvm_lite_gpus, new_uvm_lite_gpus);
1537
1538 // A GPU which had been in UVM-Lite mode before must still be in UVM-Lite
1539 // mode if it is the new preferred location. Otherwise we'd have to be more
1540 // careful below to not establish remote mappings to the new preferred
1541 // location.
1542 if (UVM_ID_IS_GPU(preferred_location))
1543 UVM_ASSERT(!uvm_processor_mask_test(set_accessed_by_processors, preferred_location));
1544
1545 // The old preferred location should establish new remote mappings if it has
1546 // accessed-by set.
1547 if (UVM_ID_IS_VALID(va_range_policy->preferred_location))
1548 uvm_processor_mask_set(set_accessed_by_processors, va_range_policy->preferred_location);
1549
1550 uvm_processor_mask_and(set_accessed_by_processors, set_accessed_by_processors, &va_range_policy->accessed_by);
1551
1552 // Now update the va_range state
1553 va_range_policy->preferred_location = preferred_location;
1554 va_range_policy->preferred_nid = preferred_cpu_nid;
1555 uvm_processor_mask_copy(&va_range->uvm_lite_gpus, new_uvm_lite_gpus);
1556
1557 va_block_context = uvm_va_space_block_context(va_space, mm);
1558
1559 for_each_va_block_in_va_range(va_range, va_block) {
1560 uvm_processor_id_t id;
1561 uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
1562
1563 for_each_id_in_mask(id, set_accessed_by_processors) {
1564 status = uvm_va_block_set_accessed_by(va_block, va_block_context, id);
1565 if (status != NV_OK)
1566 goto out;
1567 }
1568
1569 // Also, mark CPU pages as dirty and remove remote mappings from the new
1570 // preferred location
1571 uvm_mutex_lock(&va_block->lock);
1572 status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
1573 NULL,
1574 uvm_va_block_set_preferred_location_locked(va_block,
1575 va_block_context,
1576 region));
1577
1578 if (out_tracker) {
1579 NV_STATUS tracker_status;
1580
1581 tracker_status = uvm_tracker_add_tracker_safe(out_tracker, &va_block->tracker);
1582 if (status == NV_OK)
1583 status = tracker_status;
1584 }
1585
1586 uvm_mutex_unlock(&va_block->lock);
1587
1588 if (status != NV_OK)
1589 goto out;
1590 }
1591
1592 // And lastly map all of the current UVM-Lite GPUs to the resident pages on
1593 // the new preferred location. Anything that's not resident right now will
1594 // get mapped on the next PreventMigration().
1595 status = range_map_uvm_lite_gpus(va_range, out_tracker);
1596
1597 out:
1598 uvm_processor_mask_cache_free(set_accessed_by_processors);
1599 uvm_processor_mask_cache_free(new_uvm_lite_gpus);
1600 uvm_processor_mask_cache_free(all_uvm_lite_gpus);
1601
1602 return status;
1603 }
1604
uvm_va_range_set_accessed_by(uvm_va_range_t * va_range,uvm_processor_id_t processor_id,struct mm_struct * mm,uvm_tracker_t * out_tracker)1605 NV_STATUS uvm_va_range_set_accessed_by(uvm_va_range_t *va_range,
1606 uvm_processor_id_t processor_id,
1607 struct mm_struct *mm,
1608 uvm_tracker_t *out_tracker)
1609 {
1610 NV_STATUS status = NV_OK;
1611 uvm_va_block_t *va_block;
1612 uvm_va_space_t *va_space = va_range->va_space;
1613 uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
1614 uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
1615 uvm_processor_mask_t *new_uvm_lite_gpus;
1616
1617 // va_block_context->scratch_processor_mask cannot be used since
1618 // range_unmap() calls uvm_va_space_block_context(), which re-
1619 // initializes the VA block context structure.
1620 new_uvm_lite_gpus = uvm_processor_mask_cache_alloc();
1621 if (!new_uvm_lite_gpus)
1622 return NV_ERR_NO_MEMORY;
1623
1624 // If the range belongs to a non-migratable range group and that processor_id is a non-faultable GPU,
1625 // check it can map the preferred location
1626 if (!uvm_range_group_all_migratable(va_space, va_range->node.start, va_range->node.end) &&
1627 UVM_ID_IS_GPU(processor_id) &&
1628 !uvm_processor_mask_test(&va_space->faultable_processors, processor_id) &&
1629 !uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(policy->preferred_location)], processor_id)) {
1630 status = NV_ERR_INVALID_DEVICE;
1631 goto out;
1632 }
1633
1634 uvm_processor_mask_set(&policy->accessed_by, processor_id);
1635
1636 // If a GPU is already a UVM-Lite GPU then there is nothing else to do.
1637 if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, processor_id))
1638 goto out;
1639
1640 // Calculate the new UVM-Lite GPUs mask, but don't update it in the va range
1641 // yet so that we can keep block_page_check_mappings() happy while updating
1642 // the mappings.
1643 calc_uvm_lite_gpus_mask(va_space, policy->preferred_location, &policy->accessed_by, new_uvm_lite_gpus);
1644
1645 if (uvm_processor_mask_test(new_uvm_lite_gpus, processor_id)) {
1646 // GPUs that become UVM-Lite GPUs need to unmap everything so that they
1647 // can map the preferred location.
1648 status = range_unmap(va_range, processor_id, out_tracker);
1649 if (status != NV_OK)
1650 goto out;
1651 }
1652
1653 uvm_processor_mask_copy(&va_range->uvm_lite_gpus, new_uvm_lite_gpus);
1654
1655 for_each_va_block_in_va_range(va_range, va_block) {
1656 status = uvm_va_block_set_accessed_by(va_block, va_block_context, processor_id);
1657 if (status != NV_OK)
1658 goto out;
1659 }
1660
1661 out:
1662 uvm_processor_mask_cache_free(new_uvm_lite_gpus);
1663 return status;
1664 }
1665
uvm_va_range_unset_accessed_by(uvm_va_range_t * va_range,uvm_processor_id_t processor_id,uvm_tracker_t * out_tracker)1666 void uvm_va_range_unset_accessed_by(uvm_va_range_t *va_range,
1667 uvm_processor_id_t processor_id,
1668 uvm_tracker_t *out_tracker)
1669 {
1670 uvm_range_group_range_t *rgr = NULL;
1671
1672 // Mark all range group ranges within this VA range as migrated. We do this to force
1673 // uvm_range_group_set_migration_policy to re-check the policy state since we're changing it here.
1674 uvm_range_group_for_each_range_in(rgr, va_range->va_space, va_range->node.start, va_range->node.end) {
1675 uvm_spin_lock(&rgr->range_group->migrated_ranges_lock);
1676 if (list_empty(&rgr->range_group_migrated_list_node))
1677 list_move_tail(&rgr->range_group_migrated_list_node, &rgr->range_group->migrated_ranges);
1678 uvm_spin_unlock(&rgr->range_group->migrated_ranges_lock);
1679 }
1680
1681 uvm_processor_mask_clear(&uvm_va_range_get_policy(va_range)->accessed_by, processor_id);
1682
1683 // If a UVM-Lite GPU is being removed from the accessed_by mask, it will
1684 // also stop being a UVM-Lite GPU unless it's also the preferred location.
1685 if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, processor_id) &&
1686 !uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), processor_id, NUMA_NO_NODE)) {
1687 range_unmap(va_range, processor_id, out_tracker);
1688 }
1689
1690 range_update_uvm_lite_gpus_mask(va_range);
1691 }
1692
uvm_va_range_set_read_duplication(uvm_va_range_t * va_range,struct mm_struct * mm)1693 NV_STATUS uvm_va_range_set_read_duplication(uvm_va_range_t *va_range, struct mm_struct *mm)
1694 {
1695 uvm_va_block_t *va_block;
1696 uvm_va_block_context_t *va_block_context;
1697
1698 if (uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_ENABLED)
1699 return NV_OK;
1700
1701 va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
1702
1703 for_each_va_block_in_va_range(va_range, va_block) {
1704 NV_STATUS status = uvm_va_block_set_read_duplication(va_block, va_block_context);
1705
1706 if (status != NV_OK)
1707 return status;
1708 }
1709
1710 return NV_OK;
1711 }
1712
uvm_va_range_unset_read_duplication(uvm_va_range_t * va_range,struct mm_struct * mm)1713 NV_STATUS uvm_va_range_unset_read_duplication(uvm_va_range_t *va_range, struct mm_struct *mm)
1714 {
1715 uvm_va_block_t *va_block;
1716 uvm_va_block_context_t *va_block_context;
1717 NV_STATUS status;
1718
1719 if (uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_DISABLED)
1720 return NV_OK;
1721
1722 va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
1723
1724 for_each_va_block_in_va_range(va_range, va_block) {
1725 status = uvm_va_block_unset_read_duplication(va_block, va_block_context);
1726
1727 if (status != NV_OK)
1728 return status;
1729 }
1730
1731 return NV_OK;
1732 }
1733
uvm_vma_wrapper_alloc(struct vm_area_struct * vma)1734 uvm_vma_wrapper_t *uvm_vma_wrapper_alloc(struct vm_area_struct *vma)
1735 {
1736 uvm_vma_wrapper_t *vma_wrapper = nv_kmem_cache_zalloc(g_uvm_vma_wrapper_cache, NV_UVM_GFP_FLAGS);
1737 if (!vma_wrapper)
1738 return NULL;
1739
1740 vma_wrapper->vma = vma;
1741 uvm_init_rwsem(&vma_wrapper->lock, UVM_LOCK_ORDER_LEAF);
1742
1743 return vma_wrapper;
1744 }
1745
uvm_vma_wrapper_destroy(uvm_vma_wrapper_t * vma_wrapper)1746 void uvm_vma_wrapper_destroy(uvm_vma_wrapper_t *vma_wrapper)
1747 {
1748 if (!vma_wrapper)
1749 return;
1750
1751 uvm_assert_rwsem_unlocked(&vma_wrapper->lock);
1752
1753 kmem_cache_free(g_uvm_vma_wrapper_cache, vma_wrapper);
1754 }
1755
sked_reflected_pte_maker(uvm_page_table_range_vec_t * range_vec,NvU64 offset,void * caller_data)1756 static NvU64 sked_reflected_pte_maker(uvm_page_table_range_vec_t *range_vec, NvU64 offset, void *caller_data)
1757 {
1758 (void)caller_data;
1759
1760 return range_vec->tree->hal->make_sked_reflected_pte();
1761 }
1762
uvm_map_sked_reflected_range(uvm_va_space_t * va_space,UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS * params)1763 static NV_STATUS uvm_map_sked_reflected_range(uvm_va_space_t *va_space, UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS *params)
1764 {
1765 NV_STATUS status;
1766 uvm_va_range_t *va_range = NULL;
1767 uvm_gpu_t *gpu;
1768 uvm_gpu_va_space_t *gpu_va_space;
1769 uvm_page_tree_t *page_tables;
1770 struct mm_struct *mm;
1771
1772 if (uvm_api_range_invalid_4k(params->base, params->length))
1773 return NV_ERR_INVALID_ADDRESS;
1774
1775 // The mm needs to be locked in order to remove stale HMM va_blocks.
1776 mm = uvm_va_space_mm_or_current_retain_lock(va_space);
1777 uvm_va_space_down_write(va_space);
1778
1779 gpu = uvm_va_space_get_gpu_by_uuid_with_gpu_va_space(va_space, ¶ms->gpuUuid);
1780 if (!gpu) {
1781 status = NV_ERR_INVALID_DEVICE;
1782 goto done;
1783 }
1784
1785 // Check if the GPU can access the VA
1786 if (!uvm_gpu_can_address(gpu, params->base, params->length)) {
1787 status = NV_ERR_OUT_OF_RANGE;
1788 goto done;
1789 }
1790
1791 gpu_va_space = va_space->gpu_va_spaces[uvm_id_gpu_index(gpu->id)];
1792 page_tables = &gpu_va_space->page_tables;
1793
1794 // The VA range must exactly cover one supported GPU page
1795 if (!is_power_of_2(params->length) ||
1796 !IS_ALIGNED(params->base, params->length) ||
1797 !uvm_mmu_page_size_supported(page_tables, params->length)) {
1798 status = NV_ERR_INVALID_ADDRESS;
1799 goto done;
1800 }
1801
1802 status = uvm_va_range_create_sked_reflected(va_space, mm, params->base, params->length, &va_range);
1803 if (status != NV_OK) {
1804 UVM_DBG_PRINT_RL("Failed to create sked reflected VA range [0x%llx, 0x%llx)\n",
1805 params->base, params->base + params->length);
1806 goto done;
1807 }
1808
1809 va_range->sked_reflected.gpu_va_space = gpu_va_space;
1810
1811 status = uvm_page_table_range_vec_init(page_tables,
1812 va_range->node.start,
1813 uvm_va_range_size(va_range),
1814 params->length,
1815 UVM_PMM_ALLOC_FLAGS_EVICT,
1816 &va_range->sked_reflected.pt_range_vec);
1817 if (status != NV_OK)
1818 goto done;
1819
1820 status = uvm_page_table_range_vec_write_ptes(&va_range->sked_reflected.pt_range_vec,
1821 UVM_MEMBAR_NONE, sked_reflected_pte_maker, NULL);
1822
1823 if (status != NV_OK)
1824 goto done;
1825
1826 done:
1827 if (status != NV_OK && va_range != NULL)
1828 uvm_va_range_destroy(va_range, NULL);
1829
1830 uvm_va_space_up_write(va_space);
1831 uvm_va_space_mm_or_current_release_unlock(va_space, mm);
1832
1833 return status;
1834 }
1835
uvm_api_map_dynamic_parallelism_region(UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS * params,struct file * filp)1836 NV_STATUS uvm_api_map_dynamic_parallelism_region(UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS *params, struct file *filp)
1837 {
1838 uvm_va_space_t *va_space = uvm_va_space_get(filp);
1839
1840 // Notably the ranges created by the UvmMapDynamicParallelismRegion() API
1841 // are referred to as "SKED reflected ranges" internally as it's more
1842 // descriptive.
1843 return uvm_map_sked_reflected_range(va_space, params);
1844 }
1845
uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS * params,struct file * filp)1846 NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params, struct file *filp)
1847 {
1848 NV_STATUS status;
1849 uvm_va_space_t *va_space = uvm_va_space_get(filp);
1850 uvm_va_range_t *va_range = NULL;
1851 uvm_gpu_t *gpu;
1852 struct mm_struct *mm;
1853
1854 if (uvm_api_range_invalid(params->base, params->length))
1855 return NV_ERR_INVALID_ADDRESS;
1856 if (params->gpuAttributesCount > UVM_MAX_GPUS_V2)
1857 return NV_ERR_INVALID_ARGUMENT;
1858
1859 if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
1860 return NV_ERR_INVALID_ARGUMENT;
1861
1862 // The mm needs to be locked in order to remove stale HMM va_blocks.
1863 mm = uvm_va_space_mm_or_current_retain_lock(va_space);
1864 uvm_va_space_down_write(va_space);
1865
1866 status = uvm_va_range_create_semaphore_pool(va_space,
1867 mm,
1868 params->base,
1869 params->length,
1870 params->perGpuAttributes,
1871 params->gpuAttributesCount,
1872 &va_range);
1873 if (status != NV_OK)
1874 goto unlock;
1875
1876 for_each_va_space_gpu(gpu, va_space) {
1877 status = va_range_register_gpu_semaphore_pool(va_range, gpu);
1878 if (status != NV_OK)
1879 goto done;
1880
1881 if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->id))
1882 continue;
1883
1884 status = va_range_add_gpu_va_space_semaphore_pool(va_range, gpu);
1885 if (status != NV_OK)
1886 goto done;
1887 }
1888
1889 done:
1890 if (status != NV_OK)
1891 uvm_va_range_destroy(va_range, NULL);
1892
1893 unlock:
1894 uvm_va_space_up_write(va_space);
1895 uvm_va_space_mm_or_current_release_unlock(va_space, mm);
1896 return status;
1897 }
1898
uvm_test_va_range_info(UVM_TEST_VA_RANGE_INFO_PARAMS * params,struct file * filp)1899 NV_STATUS uvm_test_va_range_info(UVM_TEST_VA_RANGE_INFO_PARAMS *params, struct file *filp)
1900 {
1901 uvm_va_space_t *va_space;
1902 uvm_va_range_t *va_range;
1903 uvm_processor_id_t processor_id;
1904 uvm_va_policy_t *policy;
1905 struct vm_area_struct *vma;
1906 NV_STATUS status = NV_OK;
1907 struct mm_struct *mm;
1908
1909 va_space = uvm_va_space_get(filp);
1910
1911 mm = uvm_va_space_mm_or_current_retain_lock(va_space);
1912 uvm_va_space_down_read(va_space);
1913
1914 va_range = uvm_va_range_find(va_space, params->lookup_address);
1915 if (!va_range) {
1916 status = uvm_hmm_va_range_info(va_space, mm, params);
1917 goto out;
1918 }
1919
1920 policy = uvm_va_range_get_policy(va_range);
1921 params->va_range_start = va_range->node.start;
1922 params->va_range_end = va_range->node.end;
1923
1924 // -Wall implies -Wenum-compare, so cast through int to avoid warnings
1925 BUILD_BUG_ON((int)UVM_READ_DUPLICATION_UNSET != (int)UVM_TEST_READ_DUPLICATION_UNSET);
1926 BUILD_BUG_ON((int)UVM_READ_DUPLICATION_ENABLED != (int)UVM_TEST_READ_DUPLICATION_ENABLED);
1927 BUILD_BUG_ON((int)UVM_READ_DUPLICATION_DISABLED != (int)UVM_TEST_READ_DUPLICATION_DISABLED);
1928 BUILD_BUG_ON((int)UVM_READ_DUPLICATION_MAX != (int)UVM_TEST_READ_DUPLICATION_MAX);
1929 params->read_duplication = policy->read_duplication;
1930
1931 if (UVM_ID_IS_INVALID(policy->preferred_location)) {
1932 memset(¶ms->preferred_location, 0, sizeof(params->preferred_location));
1933 params->preferred_cpu_nid = NUMA_NO_NODE;
1934 }
1935 else {
1936 uvm_va_space_processor_uuid(va_space, ¶ms->preferred_location, policy->preferred_location);
1937 params->preferred_cpu_nid = policy->preferred_nid;
1938 }
1939
1940 params->accessed_by_count = 0;
1941 for_each_id_in_mask(processor_id, &policy->accessed_by)
1942 uvm_va_space_processor_uuid(va_space, ¶ms->accessed_by[params->accessed_by_count++], processor_id);
1943
1944 // -Wall implies -Wenum-compare, so cast through int to avoid warnings
1945 BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_INVALID != (int)UVM_VA_RANGE_TYPE_INVALID);
1946 BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_MANAGED != (int)UVM_VA_RANGE_TYPE_MANAGED);
1947 BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_EXTERNAL != (int)UVM_VA_RANGE_TYPE_EXTERNAL);
1948 BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_CHANNEL != (int)UVM_VA_RANGE_TYPE_CHANNEL);
1949 BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_SKED_REFLECTED != (int)UVM_VA_RANGE_TYPE_SKED_REFLECTED);
1950 BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_SEMAPHORE_POOL != (int)UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
1951 BUILD_BUG_ON((int)UVM_TEST_VA_RANGE_TYPE_MAX != (int)UVM_VA_RANGE_TYPE_MAX);
1952 params->type = va_range->type;
1953
1954 switch (va_range->type) {
1955 case UVM_VA_RANGE_TYPE_MANAGED:
1956
1957 params->managed.subtype = UVM_TEST_RANGE_SUBTYPE_UVM;
1958 if (!va_range->managed.vma_wrapper) {
1959 params->managed.is_zombie = NV_TRUE;
1960 goto out;
1961 }
1962 params->managed.is_zombie = NV_FALSE;
1963 vma = uvm_va_range_vma_check(va_range, mm);
1964 if (!vma) {
1965 // We aren't in the same mm as the one which owns the vma, and
1966 // we don't have that mm locked.
1967 params->managed.owned_by_calling_process = NV_FALSE;
1968 goto out;
1969 }
1970 params->managed.owned_by_calling_process = (mm == current->mm ? NV_TRUE : NV_FALSE);
1971 params->managed.vma_start = vma->vm_start;
1972 params->managed.vma_end = vma->vm_end - 1;
1973 break;
1974 default:
1975 break;
1976 }
1977
1978 out:
1979 uvm_va_space_up_read(va_space);
1980 uvm_va_space_mm_or_current_release_unlock(va_space, mm);
1981 return status;
1982 }
1983
uvm_test_va_range_split(UVM_TEST_VA_RANGE_SPLIT_PARAMS * params,struct file * filp)1984 NV_STATUS uvm_test_va_range_split(UVM_TEST_VA_RANGE_SPLIT_PARAMS *params, struct file *filp)
1985 {
1986 uvm_va_space_t *va_space = uvm_va_space_get(filp);
1987 uvm_va_range_t *va_range;
1988 NV_STATUS status = NV_OK;
1989
1990 if (!PAGE_ALIGNED(params->split_address + 1))
1991 return NV_ERR_INVALID_ADDRESS;
1992
1993 uvm_va_space_down_write(va_space);
1994
1995 va_range = uvm_va_range_find(va_space, params->split_address);
1996 if (!va_range ||
1997 va_range->node.end == params->split_address ||
1998 va_range->type != UVM_VA_RANGE_TYPE_MANAGED) {
1999 status = NV_ERR_INVALID_ADDRESS;
2000 goto out;
2001 }
2002
2003 status = uvm_va_range_split(va_range, params->split_address, NULL);
2004
2005 out:
2006 uvm_va_space_up_write(va_space);
2007 return status;
2008 }
2009
uvm_test_va_range_inject_split_error(UVM_TEST_VA_RANGE_INJECT_SPLIT_ERROR_PARAMS * params,struct file * filp)2010 NV_STATUS uvm_test_va_range_inject_split_error(UVM_TEST_VA_RANGE_INJECT_SPLIT_ERROR_PARAMS *params, struct file *filp)
2011 {
2012 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2013 uvm_va_range_t *va_range;
2014 struct mm_struct *mm;
2015 NV_STATUS status = NV_OK;
2016
2017 mm = uvm_va_space_mm_or_current_retain_lock(va_space);
2018 uvm_va_space_down_write(va_space);
2019
2020 va_range = uvm_va_range_find(va_space, params->lookup_address);
2021 if (!va_range) {
2022 if (!mm)
2023 status = NV_ERR_INVALID_ADDRESS;
2024 else
2025 status = uvm_hmm_test_va_block_inject_split_error(va_space, params->lookup_address);
2026 }
2027 else if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED) {
2028 status = NV_ERR_INVALID_ADDRESS;
2029 }
2030 else {
2031 uvm_va_block_t *va_block;
2032 size_t split_index;
2033
2034 va_range->inject_split_error = true;
2035
2036 split_index = uvm_va_range_block_index(va_range, params->lookup_address);
2037 va_block = uvm_va_range_block(va_range, split_index);
2038 if (va_block) {
2039 uvm_va_block_test_t *block_test = uvm_va_block_get_test(va_block);
2040
2041 if (block_test)
2042 block_test->inject_split_error = true;
2043 }
2044 }
2045
2046 uvm_va_space_up_write(va_space);
2047 uvm_va_space_mm_or_current_release_unlock(va_space, mm);
2048 return status;
2049 }
2050
uvm_test_va_range_inject_add_gpu_va_space_error(UVM_TEST_VA_RANGE_INJECT_ADD_GPU_VA_SPACE_ERROR_PARAMS * params,struct file * filp)2051 NV_STATUS uvm_test_va_range_inject_add_gpu_va_space_error(UVM_TEST_VA_RANGE_INJECT_ADD_GPU_VA_SPACE_ERROR_PARAMS *params,
2052 struct file *filp)
2053 {
2054 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2055 uvm_va_range_t *va_range;
2056 NV_STATUS status = NV_OK;
2057
2058 uvm_va_space_down_write(va_space);
2059
2060 va_range = uvm_va_range_find(va_space, params->lookup_address);
2061 if (!va_range) {
2062 status = NV_ERR_INVALID_ADDRESS;
2063 goto out;
2064 }
2065
2066 va_range->inject_add_gpu_va_space_error = true;
2067
2068 out:
2069 uvm_va_space_up_write(va_space);
2070 return status;
2071 }
2072
2073