1 /*******************************************************************************
2 Copyright (c) 2017-2023 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_test.h"
25 #include "uvm_test_ioctl.h"
26
27 #include "uvm_global.h"
28 #include "uvm_gpu.h"
29 #include "uvm_pmm_sysmem.h"
30 #include "uvm_va_block.h"
31 #include "uvm_va_range.h"
32 #include "uvm_va_space.h"
33 #include "uvm_kvmalloc.h"
34 #include "uvm_hal.h"
35 #include "uvm_push.h"
36 #include "uvm_processors.h"
37
38 // Pre-allocated array used for dma-to-virt translations
39 static uvm_reverse_map_t g_sysmem_translations[PAGES_PER_UVM_VA_BLOCK];
40
41 // We use our own separate reverse map to easily specify contiguous DMA
42 // address ranges
43 static uvm_pmm_sysmem_mappings_t g_reverse_map;
44
45 // Check that the DMA addresses in the range defined by
46 // [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
47 // are registered in the reverse map, using one call per entry. The returned
48 // virtual addresses must belong to va_block. The function assumes a 1:1
49 // dma-to-virt mapping for the whole VA block
check_reverse_map_block_page(uvm_va_block_t * va_block,NvU64 base_dma_addr,const uvm_page_mask_t * page_mask)50 static NV_STATUS check_reverse_map_block_page(uvm_va_block_t *va_block,
51 NvU64 base_dma_addr,
52 const uvm_page_mask_t *page_mask)
53 {
54 uvm_page_index_t page_index;
55
56 for_each_va_block_page(page_index, va_block) {
57 size_t num_pages;
58
59 memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
60 num_pages = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
61 base_dma_addr + page_index * PAGE_SIZE,
62 PAGE_SIZE,
63 g_sysmem_translations,
64 PAGES_PER_UVM_VA_BLOCK);
65 if (!page_mask || uvm_page_mask_test(page_mask, page_index)) {
66 TEST_CHECK_RET(num_pages == 1);
67 TEST_CHECK_RET(g_sysmem_translations[0].va_block == va_block);
68 TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
69 TEST_CHECK_RET(uvm_reverse_map_start(&g_sysmem_translations[0]) == uvm_va_block_cpu_page_address(va_block, page_index));
70 TEST_CHECK_RET(uvm_va_block_region_num_pages(g_sysmem_translations[0].region) == 1);
71 TEST_CHECK_RET(UVM_ID_IS_CPU(g_sysmem_translations[0].owner));
72 uvm_va_block_release(g_sysmem_translations[0].va_block);
73 }
74 else {
75 TEST_CHECK_RET(num_pages == 0);
76 }
77 }
78
79 return NV_OK;
80 }
81
82 // Check that the DMA addresses in the range defined by
83 // [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
84 // are registered in the reverse map, using a single translation call. The
85 // returned virtual addresses must belong to va_block. The function assumes a
86 // 1:1 dma-to-virt mapping for the whole VA block
check_reverse_map_block_batch(uvm_va_block_t * va_block,NvU64 base_dma_addr,const uvm_page_mask_t * page_mask)87 static NV_STATUS check_reverse_map_block_batch(uvm_va_block_t *va_block,
88 NvU64 base_dma_addr,
89 const uvm_page_mask_t *page_mask)
90 {
91 size_t num_translations;
92 size_t num_pages;
93 size_t reverse_map_index;
94
95 memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
96 num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
97 base_dma_addr,
98 uvm_va_block_size(va_block),
99 g_sysmem_translations,
100 PAGES_PER_UVM_VA_BLOCK);
101 if (num_translations == 0 && page_mask)
102 TEST_CHECK_RET(uvm_page_mask_empty(page_mask));
103
104 num_pages = 0;
105 for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
106 uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
107 size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
108
109 num_pages += num_reverse_map_pages;
110
111 TEST_CHECK_RET(reverse_map->va_block == va_block);
112 TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
113 uvm_va_block_release(reverse_map->va_block);
114 TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
115 }
116
117 if (page_mask)
118 TEST_CHECK_RET(num_pages == uvm_page_mask_weight(page_mask));
119 else
120 TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block));
121
122 return NV_OK;
123 }
124
125 // Check that the DMA addresses for all the CPU pages of the two given VA blocks
126 // are registered in the reverse map, using a single translation call. The
127 // returned virtual addresses must belong to one of the blocks. The function
128 // assumes a 1:1 dma-to-virt mapping for each VA block and that va_block1 is
129 // mapped behind va_block0.
check_reverse_map_two_blocks_batch(NvU64 base_dma_addr,uvm_va_block_t * va_block0,uvm_va_block_t * va_block1)130 static NV_STATUS check_reverse_map_two_blocks_batch(NvU64 base_dma_addr,
131 uvm_va_block_t *va_block0,
132 uvm_va_block_t *va_block1)
133 {
134 size_t num_pages;
135 size_t num_translations;
136 size_t reverse_map_index;
137
138 memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
139 num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
140 base_dma_addr,
141 UVM_VA_BLOCK_SIZE,
142 g_sysmem_translations,
143 PAGES_PER_UVM_VA_BLOCK);
144 TEST_CHECK_RET(num_translations == 2);
145
146 num_pages = 0;
147 for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
148 uvm_va_block_t *block;
149 uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
150 NvU64 virt_addr = uvm_reverse_map_start(reverse_map);
151 size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
152
153 if (reverse_map_index == 0)
154 block = va_block0;
155 else
156 block = va_block1;
157
158 TEST_CHECK_RET(reverse_map->va_block == block);
159 TEST_CHECK_RET(nv_kref_read(&block->kref) >= 2);
160 uvm_va_block_release(reverse_map->va_block);
161 TEST_CHECK_RET(num_reverse_map_pages == uvm_va_block_num_cpu_pages(block));
162 TEST_CHECK_RET(virt_addr == block->start);
163 TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
164
165 num_pages += num_reverse_map_pages;
166 }
167
168 TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block0) + uvm_va_block_num_cpu_pages(va_block1));
169
170 return NV_OK;
171 }
172
173 static const NvU64 g_base_dma_addr = UVM_VA_BLOCK_SIZE;
174
175 // This function adds the mappings for all the subregions in va_block defined
176 // by page_mask. g_base_dma_addr is used as the base DMA address for the whole
177 // VA block.
test_pmm_sysmem_reverse_map_single(uvm_va_block_t * va_block,uvm_page_mask_t * page_mask,uvm_chunk_size_t split_size,bool merge)178 static NV_STATUS test_pmm_sysmem_reverse_map_single(uvm_va_block_t *va_block,
179 uvm_page_mask_t *page_mask,
180 uvm_chunk_size_t split_size,
181 bool merge)
182 {
183 NV_STATUS status = NV_OK;
184 uvm_va_block_region_t subregion;
185
186 TEST_CHECK_RET(is_power_of_2(split_size));
187 TEST_CHECK_RET(split_size >= PAGE_SIZE);
188
189 for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
190 TEST_CHECK_RET(is_power_of_2(uvm_va_block_region_size(subregion)));
191 uvm_mutex_lock(&va_block->lock);
192 status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
193 g_base_dma_addr + subregion.first * PAGE_SIZE,
194 va_block->start + subregion.first * PAGE_SIZE,
195 uvm_va_block_region_size(subregion),
196 va_block,
197 UVM_ID_CPU);
198 uvm_mutex_unlock(&va_block->lock);
199 if (status != NV_OK)
200 return status;
201 }
202
203 TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
204 TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
205
206 if (split_size != UVM_CHUNK_SIZE_MAX) {
207 for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
208 TEST_CHECK_RET(uvm_va_block_region_size(subregion) > split_size);
209
210 uvm_mutex_lock(&va_block->lock);
211 status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&g_reverse_map,
212 g_base_dma_addr + subregion.first * PAGE_SIZE,
213 split_size);
214 uvm_mutex_unlock(&va_block->lock);
215 TEST_CHECK_RET(status == NV_OK);
216 }
217
218 TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
219 TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
220 }
221
222 if (split_size != UVM_CHUNK_SIZE_MAX && merge) {
223 for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
224 uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
225 g_base_dma_addr + subregion.first * PAGE_SIZE,
226 uvm_va_block_region_size(subregion));
227 }
228
229 TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
230 TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
231 }
232
233 for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
234 NvU64 subregion_dma_addr = g_base_dma_addr + subregion.first * PAGE_SIZE;
235
236 if (split_size == UVM_CHUNK_SIZE_MAX || merge) {
237 uvm_mutex_lock(&va_block->lock);
238 uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr);
239 uvm_mutex_unlock(&va_block->lock);
240 }
241 else {
242 size_t chunk;
243 size_t num_chunks = uvm_va_block_region_size(subregion) / split_size;
244 TEST_CHECK_RET(num_chunks > 1);
245
246 uvm_mutex_lock(&va_block->lock);
247
248 for (chunk = 0; chunk < num_chunks; ++chunk)
249 uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr + chunk * split_size);
250
251 uvm_mutex_unlock(&va_block->lock);
252 }
253 }
254
255 uvm_page_mask_zero(page_mask);
256
257 TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
258 TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
259
260 return status;
261 }
262
263 static uvm_page_mask_t g_page_mask;
264
test_pmm_sysmem_reverse_map_single_whole(uvm_va_space_t * va_space,NvU64 addr)265 static NV_STATUS test_pmm_sysmem_reverse_map_single_whole(uvm_va_space_t *va_space, NvU64 addr)
266 {
267 NV_STATUS status;
268 uvm_va_block_t *va_block;
269 const bool merge_array[] = {false, true};
270 const uvm_chunk_size_t chunk_split_array[] = { UVM_CHUNK_SIZE_4K, UVM_CHUNK_SIZE_64K, UVM_CHUNK_SIZE_MAX };
271 unsigned merge_index;
272 unsigned chunk_split_index;
273
274 status = uvm_va_block_find(va_space, addr, &va_block);
275 if (status != NV_OK)
276 return status;
277
278 TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
279
280 for (merge_index = 0; merge_index < ARRAY_SIZE(merge_array); ++merge_index) {
281 for (chunk_split_index = 0; chunk_split_index < ARRAY_SIZE(chunk_split_array); ++chunk_split_index) {
282 // The reverse map has PAGE_SIZE granularity
283 if (chunk_split_array[chunk_split_index] < PAGE_SIZE)
284 continue;
285
286 uvm_page_mask_region_fill(&g_page_mask, uvm_va_block_region_from_block(va_block));
287
288 TEST_CHECK_RET(test_pmm_sysmem_reverse_map_single(va_block,
289 &g_page_mask,
290 chunk_split_array[chunk_split_index],
291 merge_array[merge_index]) == NV_OK);
292 }
293 }
294
295 return status;
296 }
297
test_pmm_sysmem_reverse_map_single_pattern(uvm_va_space_t * va_space,NvU64 addr)298 static NV_STATUS test_pmm_sysmem_reverse_map_single_pattern(uvm_va_space_t *va_space, NvU64 addr)
299 {
300 NV_STATUS status;
301 uvm_va_block_t *va_block;
302 uvm_page_index_t page_index;
303
304 status = uvm_va_block_find(va_space, addr, &va_block);
305 if (status != NV_OK)
306 return status;
307
308 uvm_page_mask_zero(&g_page_mask);
309
310 for_each_va_block_page(page_index, va_block) {
311 if (page_index % 2 == 0)
312 uvm_page_mask_set(&g_page_mask, page_index);
313 }
314
315 return test_pmm_sysmem_reverse_map_single(va_block, &g_page_mask, UVM_CHUNK_SIZE_MAX, false);
316 }
317
318 // This function assumes that addr points at a VA range with 4 sized VA blocks
319 // with size UVM_VA_BLOCK_SIZE / 4.
test_pmm_sysmem_reverse_map_many_blocks(uvm_va_space_t * va_space,NvU64 addr)320 static NV_STATUS test_pmm_sysmem_reverse_map_many_blocks(uvm_va_space_t *va_space, NvU64 addr)
321 {
322 NV_STATUS status;
323 uvm_va_block_t *va_block0;
324 uvm_va_block_t *va_block1;
325 NvU64 base_dma_addr0;
326 NvU64 base_dma_addr1;
327
328 status = uvm_va_block_find(va_space, addr + UVM_VA_BLOCK_SIZE / 4, &va_block0);
329 if (status != NV_OK)
330 return status;
331
332 status = uvm_va_block_find(va_space, addr + 3 * UVM_VA_BLOCK_SIZE / 4, &va_block1);
333 if (status != NV_OK)
334 return status;
335
336 TEST_CHECK_RET(va_block0 != va_block1);
337
338 base_dma_addr0 = g_base_dma_addr + uvm_va_block_size(va_block0);
339 base_dma_addr1 = base_dma_addr0 + uvm_va_block_size(va_block0);
340
341 TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block0)));
342 TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block1)));
343
344 uvm_mutex_lock(&va_block0->lock);
345 status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
346 base_dma_addr0,
347 va_block0->start,
348 uvm_va_block_size(va_block0),
349 va_block0,
350 UVM_ID_CPU);
351 uvm_mutex_unlock(&va_block0->lock);
352 TEST_CHECK_RET(status == NV_OK);
353
354 uvm_mutex_lock(&va_block1->lock);
355 status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
356 base_dma_addr1,
357 va_block1->start,
358 uvm_va_block_size(va_block1),
359 va_block1,
360 UVM_ID_CPU);
361 uvm_mutex_unlock(&va_block1->lock);
362
363 // Check each VA block individually
364 if (status == NV_OK) {
365 TEST_CHECK_GOTO(check_reverse_map_block_page(va_block0, base_dma_addr0, NULL) == NV_OK, error);
366 TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block0, base_dma_addr0, NULL) == NV_OK, error);
367 TEST_CHECK_GOTO(check_reverse_map_block_page(va_block1, base_dma_addr1, NULL) == NV_OK, error);
368 TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block1, base_dma_addr1, NULL) == NV_OK, error);
369
370 // Check both VA blocks at the same time
371 TEST_CHECK_GOTO(check_reverse_map_two_blocks_batch(g_base_dma_addr, va_block0, va_block1) == NV_OK, error);
372
373 error:
374 uvm_mutex_lock(&va_block1->lock);
375 uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr1);
376 uvm_mutex_unlock(&va_block1->lock);
377 }
378
379 uvm_mutex_lock(&va_block0->lock);
380 uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr0);
381 uvm_mutex_unlock(&va_block0->lock);
382
383 return status;
384 }
385
386 // This function registers a non-uniform distribution of chunks (mixing 4K and 64K chunks)
387 // and merges them back to verify that the logic is working.
test_pmm_sysmem_reverse_map_merge(uvm_va_space_t * va_space,NvU64 addr)388 static NV_STATUS test_pmm_sysmem_reverse_map_merge(uvm_va_space_t *va_space, NvU64 addr)
389 {
390 NV_STATUS status = NV_OK;
391 uvm_va_block_t *va_block;
392 const unsigned chunks_64k_pos[] =
393 {
394 16,
395 64,
396 96,
397 192,
398 208,
399 224,
400 288,
401 320,
402 384,
403 480
404 };
405 uvm_page_index_t page_index;
406 unsigned i;
407
408 if (PAGE_SIZE != UVM_PAGE_SIZE_4K)
409 return NV_OK;
410
411 status = uvm_va_block_find(va_space, addr, &va_block);
412 if (status != NV_OK)
413 return status;
414
415 TEST_CHECK_RET(uvm_va_block_size(va_block) == UVM_VA_BLOCK_SIZE);
416
417 page_index = 0;
418 for (i = 0; i < ARRAY_SIZE(chunks_64k_pos); ++i) {
419 // Fill with 4K mappings until the next 64K mapping
420 while (page_index < chunks_64k_pos[i]) {
421 uvm_mutex_lock(&va_block->lock);
422 status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
423 g_base_dma_addr + page_index * PAGE_SIZE,
424 uvm_va_block_cpu_page_address(va_block, page_index),
425 PAGE_SIZE,
426 va_block,
427 UVM_ID_CPU);
428 uvm_mutex_unlock(&va_block->lock);
429 TEST_CHECK_RET(status == NV_OK);
430
431 ++page_index;
432 }
433
434 // Register the 64K mapping
435 uvm_mutex_lock(&va_block->lock);
436 status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
437 g_base_dma_addr + page_index * PAGE_SIZE,
438 uvm_va_block_cpu_page_address(va_block, page_index),
439 UVM_CHUNK_SIZE_64K,
440 va_block,
441 UVM_ID_CPU);
442 uvm_mutex_unlock(&va_block->lock);
443 TEST_CHECK_RET(status == NV_OK);
444
445 page_index += UVM_PAGE_SIZE_64K / PAGE_SIZE;
446 }
447
448 // Fill the tail with 4K mappings, too
449 while (page_index < PAGES_PER_UVM_VA_BLOCK) {
450 uvm_mutex_lock(&va_block->lock);
451 status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
452 g_base_dma_addr + page_index * PAGE_SIZE,
453 uvm_va_block_cpu_page_address(va_block, page_index),
454 PAGE_SIZE,
455 va_block,
456 UVM_ID_CPU);
457 uvm_mutex_unlock(&va_block->lock);
458 TEST_CHECK_RET(status == NV_OK);
459
460 ++page_index;
461 }
462
463 TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
464 TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
465
466 uvm_mutex_lock(&va_block->lock);
467 uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
468 g_base_dma_addr,
469 uvm_va_block_size(va_block));
470 uvm_mutex_unlock(&va_block->lock);
471
472 TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
473 TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
474
475 uvm_mutex_lock(&va_block->lock);
476 uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
477 uvm_mutex_unlock(&va_block->lock);
478
479 return status;
480 }
481
test_pmm_sysmem_reverse_map_remove_on_eviction(uvm_va_space_t * va_space,NvU64 addr)482 static NV_STATUS test_pmm_sysmem_reverse_map_remove_on_eviction(uvm_va_space_t *va_space, NvU64 addr)
483 {
484 uvm_va_block_t *va_block;
485 NV_STATUS status = uvm_va_block_find(va_space, addr, &va_block);
486
487 if (status != NV_OK)
488 return status;
489
490 TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
491
492 uvm_mutex_lock(&va_block->lock);
493 status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
494 g_base_dma_addr,
495 addr,
496 uvm_va_block_size(va_block),
497 va_block,
498 UVM_ID_CPU);
499 uvm_mutex_unlock(&va_block->lock);
500
501 uvm_mutex_lock(&va_block->lock);
502 uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
503 uvm_mutex_unlock(&va_block->lock);
504
505 TEST_CHECK_RET(status == NV_OK);
506
507 uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
508 uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
509
510 return NV_OK;
511 }
512
test_pmm_sysmem_reverse_map(uvm_va_space_t * va_space,NvU64 addr1,NvU64 addr2)513 static NV_STATUS test_pmm_sysmem_reverse_map(uvm_va_space_t *va_space, NvU64 addr1, NvU64 addr2)
514 {
515 NV_STATUS status = NV_OK;
516 uvm_gpu_t *volta_gpu = NULL;
517 uvm_gpu_t *gpu;
518
519 // Find a GPU with support for access counters with physical address
520 // notifications, since it is required to add or remove entries to the
521 // reverse map.
522 for_each_va_space_gpu(gpu, va_space) {
523 if (gpu->parent->access_counters_can_use_physical_addresses) {
524 // Initialize the reverse map.
525 status = uvm_pmm_sysmem_mappings_init(gpu, &g_reverse_map);
526 if (status != NV_OK)
527 return status;
528
529 volta_gpu = gpu;
530 break;
531 }
532 }
533
534 if (!volta_gpu)
535 return NV_ERR_INVALID_DEVICE;
536
537 status = test_pmm_sysmem_reverse_map_single_whole(va_space, addr1);
538
539 if (status == NV_OK)
540 status = test_pmm_sysmem_reverse_map_single_pattern(va_space, addr1);
541
542 if (status == NV_OK)
543 status = test_pmm_sysmem_reverse_map_many_blocks(va_space, addr2);
544
545 if (status == NV_OK)
546 status = test_pmm_sysmem_reverse_map_merge(va_space, addr1);
547
548 if (status == NV_OK)
549 status = test_pmm_sysmem_reverse_map_remove_on_eviction(va_space, addr1);
550
551 uvm_pmm_sysmem_mappings_deinit(&g_reverse_map);
552
553 return status;
554 }
555
uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS * params,struct file * filp)556 NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *filp)
557 {
558 NV_STATUS status;
559 uvm_va_space_t *va_space;
560
561 va_space = uvm_va_space_get(filp);
562
563 // Take the global lock to void interferences from different instances of
564 // the test, since we use a bunch of global variables
565 uvm_mutex_lock(&g_uvm_global.global_lock);
566 uvm_va_space_down_write(va_space);
567
568 if (uvm_pmm_sysmem_mappings_indirect_supported()) {
569 status = test_pmm_sysmem_reverse_map(va_space, params->range_address1, params->range_address2);
570 }
571 else {
572 UVM_TEST_PRINT("Skipping kernel_driver_pmm_sysmem test due to lack of support for radix_tree_replace_slot in Linux 4.10");
573 status = NV_OK;
574 }
575
576 uvm_va_space_up_write(va_space);
577 uvm_mutex_unlock(&g_uvm_global.global_lock);
578
579 return status;
580 }
581
cpu_chunk_map_on_cpu(uvm_cpu_chunk_t * chunk,void ** cpu_addr)582 static NV_STATUS cpu_chunk_map_on_cpu(uvm_cpu_chunk_t *chunk, void **cpu_addr)
583 {
584 struct page **pages;
585 uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
586 size_t num_pages = uvm_cpu_chunk_num_pages(chunk);
587 NV_STATUS status = NV_OK;
588
589 UVM_ASSERT(cpu_addr);
590
591 // Map the CPU chunk on the CPU.
592 if (chunk_size > PAGE_SIZE) {
593 size_t i;
594
595 pages = uvm_kvmalloc(num_pages * sizeof(*pages));
596 if (!pages)
597 return NV_ERR_NO_MEMORY;
598
599 for (i = 0; i < num_pages; i++)
600 pages[i] = chunk->page + i;
601 }
602 else {
603 pages = &chunk->page;
604 }
605
606 *cpu_addr = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
607 if (!*cpu_addr)
608 status = NV_ERR_NO_MEMORY;
609
610 if (chunk_size > PAGE_SIZE)
611 uvm_kvfree(pages);
612
613 return status;
614 }
615
test_cpu_chunk_mapping_access(uvm_cpu_chunk_t * chunk,uvm_gpu_t * gpu)616 static NV_STATUS test_cpu_chunk_mapping_access(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
617 {
618 NvU64 dma_addr;
619 uvm_gpu_address_t gpu_addr;
620 uvm_push_t push;
621 NvU32 *cpu_addr;
622 uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
623 size_t i;
624 NV_STATUS status = NV_OK;
625
626 TEST_NV_CHECK_RET(cpu_chunk_map_on_cpu(chunk, (void **)&cpu_addr));
627 memset(cpu_addr, 0, chunk_size);
628
629 dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
630 gpu_addr = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr));
631
632 TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
633 UVM_CHANNEL_TYPE_GPU_TO_CPU,
634 NULL,
635 &push,
636 "GPU -> CPU {%s, %llx} %u bytes",
637 uvm_gpu_address_aperture_string(gpu_addr),
638 gpu_addr.address,
639 chunk_size),
640 done);
641 gpu->parent->ce_hal->memset_4(&push, gpu_addr, 0xdeadc0de, chunk_size);
642 TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), done);
643
644 for (i = 0; i < chunk_size / sizeof(*cpu_addr); i++) {
645 if (cpu_addr[i] != 0xdeadc0de) {
646 UVM_TEST_PRINT("GPU write of {%s, 0x%llx} %u bytes expected pattern 0x%08x, but offset %zu is 0x%08x\n",
647 uvm_gpu_address_aperture_string(gpu_addr),
648 gpu_addr.address,
649 chunk_size,
650 0xdeadc0de,
651 i * sizeof(*cpu_addr),
652 cpu_addr[i]);
653 status = NV_ERR_INVALID_STATE;
654 break;
655 }
656 }
657
658 done:
659 vunmap(cpu_addr);
660 return status;
661 }
662
test_cpu_chunk_alloc(uvm_chunk_size_t size,uvm_cpu_chunk_alloc_flags_t flags,int nid,uvm_cpu_chunk_t ** out_chunk)663 static NV_STATUS test_cpu_chunk_alloc(uvm_chunk_size_t size,
664 uvm_cpu_chunk_alloc_flags_t flags,
665 int nid,
666 uvm_cpu_chunk_t **out_chunk)
667 {
668 uvm_cpu_chunk_t *chunk;
669 NV_STATUS status = NV_OK;
670 size_t i;
671
672 UVM_ASSERT(out_chunk);
673
674 // It is possible that the allocation fails due to lack of large pages
675 // rather than an API issue, which will result in a false negative.
676 // However, that should be very rare.
677 TEST_NV_CHECK_RET(uvm_cpu_chunk_alloc(size, flags, nid, &chunk));
678
679 // Check general state of the chunk:
680 // - chunk should be a physical chunk,
681 // - chunk should have the correct size,
682 // - chunk should have the correct number of base pages, and
683 TEST_CHECK_GOTO(uvm_cpu_chunk_is_physical(chunk), done);
684 TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(chunk) == size, done);
685 TEST_CHECK_GOTO(uvm_cpu_chunk_num_pages(chunk) == size / PAGE_SIZE, done);
686
687 // It is possible for the kernel to allocate a chunk on a NUMA node other
688 // than the one requested. However, that should not be an issue with
689 // sufficient memory on each NUMA node.
690 if (nid != NUMA_NO_NODE)
691 TEST_CHECK_GOTO(uvm_cpu_chunk_get_numa_node(chunk) == nid, done);
692
693 if (flags & UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO) {
694 NvU64 *cpu_addr;
695
696 TEST_NV_CHECK_GOTO(cpu_chunk_map_on_cpu(chunk, (void **)&cpu_addr), done);
697 for (i = 0; i < size / sizeof(*cpu_addr); i++)
698 TEST_CHECK_GOTO(cpu_addr[i] == 0, done);
699 vunmap(cpu_addr);
700 }
701
702 for (i = 0; i < size / PAGE_SIZE; i++) {
703 if (flags & UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO)
704 TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(chunk, i), done);
705 else
706 TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(chunk, i), done);
707 }
708
709 done:
710 if (status == NV_OK)
711 *out_chunk = chunk;
712 else
713 uvm_cpu_chunk_free(chunk);
714
715 return status;
716 }
717
test_cpu_chunk_mapping_basic_verify(uvm_gpu_t * gpu,uvm_cpu_chunk_alloc_flags_t flags,uvm_chunk_size_t size)718 static NV_STATUS test_cpu_chunk_mapping_basic_verify(uvm_gpu_t *gpu,
719 uvm_cpu_chunk_alloc_flags_t flags,
720 uvm_chunk_size_t size)
721 {
722 uvm_cpu_chunk_t *chunk;
723 uvm_cpu_physical_chunk_t *phys_chunk;
724 NvU64 dma_addr;
725 NV_STATUS status = NV_OK;
726
727 TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, flags, NUMA_NO_NODE, &chunk));
728 phys_chunk = uvm_cpu_chunk_to_physical(chunk);
729
730 // Check state of the physical chunk:
731 // - gpu_mappings.max_entries should be 1 (for the static entry),
732 // - gpu_mappings.dma_addrs_mask should be 0.
733 // - no GPU mapping address.
734 TEST_CHECK_GOTO(phys_chunk->gpu_mappings.max_entries == 1, done);
735 TEST_CHECK_GOTO(uvm_parent_processor_mask_get_gpu_count(&phys_chunk->gpu_mappings.dma_addrs_mask) == 0, done);
736 TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0, done);
737 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
738
739 // Test basic access.
740 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
741
742 // Test double map is harmless.
743 dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
744 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
745 TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == dma_addr, done);
746 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
747
748 // Test unmap, remap.
749 uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
750 TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0, done);
751 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
752 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
753
754 done:
755 // Test free with mapped GPUs still works.
756 uvm_cpu_chunk_free(chunk);
757 return status;;
758 }
759
test_cpu_chunk_mapping_basic(uvm_gpu_t * gpu,uvm_cpu_chunk_alloc_flags_t flags)760 static NV_STATUS test_cpu_chunk_mapping_basic(uvm_gpu_t *gpu, uvm_cpu_chunk_alloc_flags_t flags)
761 {
762 uvm_chunk_sizes_mask_t chunk_sizes = uvm_cpu_chunk_get_allocation_sizes();
763 uvm_chunk_size_t size;
764
765 for_each_chunk_size(size, chunk_sizes)
766 TEST_NV_CHECK_RET(test_cpu_chunk_mapping_basic_verify(gpu, flags, size));
767
768 return NV_OK;
769 }
770
test_cpu_chunk_mapping_array(uvm_gpu_t * gpu0,uvm_gpu_t * gpu1,uvm_gpu_t * gpu2)771 static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_t *gpu2)
772 {
773 NV_STATUS status = NV_OK;
774 uvm_cpu_chunk_t *chunk;
775 uvm_cpu_physical_chunk_t *phys_chunk;
776 NvU64 dma_addr_gpu1;
777
778 TEST_NV_CHECK_RET(test_cpu_chunk_alloc(PAGE_SIZE, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
779 phys_chunk = uvm_cpu_chunk_to_physical(chunk);
780
781 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu1), done);
782 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
783 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu2), done);
784 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
785 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
786 dma_addr_gpu1 = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu1->parent);
787 uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu2->parent);
788 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
789 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done);
790 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done);
791 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
792
793 // DMA mapping addresses for different GPUs live in different IOMMU spaces,
794 // so it would be perfectly legal for them to have the same IOVA, and even
795 // if they lived in the same space we freed GPU3's address so it would be
796 // available for reuse.
797 // What we need to ensure is that GPU2's address didn't change after we map
798 // GPU1. It's true that we may get a false negative if both addresses
799 // happened to alias and we had a bug in how the addresses are shifted in
800 // the dense array, but that's better than intermittent failure.
801 TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu1->parent) == dma_addr_gpu1, done);
802
803 done:
804 uvm_cpu_chunk_free(chunk);
805 return status;
806 }
807
do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t * chunk,uvm_gpu_t * gpu)808 static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
809 {
810 NV_STATUS status = NV_OK;
811 uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
812 uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
813 size_t num_split_chunks;
814 uvm_cpu_chunk_t **split_chunks;
815 uvm_cpu_chunk_t *merged_chunk;
816 uvm_chunk_size_t split_size;
817 NvU64 phys_dma_addr;
818 size_t map_chunk;
819 size_t i;
820
821 split_size = uvm_chunk_find_prev_size(alloc_sizes, size);
822 UVM_ASSERT(split_size != UVM_CHUNK_SIZE_INVALID);
823 num_split_chunks = size / split_size;
824 split_chunks = uvm_kvmalloc_zero(num_split_chunks * sizeof(*split_chunks));
825
826 if (!split_chunks)
827 return NV_ERR_NO_MEMORY;
828
829 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done_free);
830 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done_free);
831 uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
832
833 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
834 TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == num_split_chunks, done);
835
836 for (i = 0; i < num_split_chunks; i++) {
837 TEST_CHECK_GOTO(split_chunks[i], done);
838 TEST_CHECK_GOTO(uvm_cpu_chunk_is_logical(split_chunks[i]), done);
839 TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[i]) == split_size, done);
840 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu), done);
841 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu), done);
842 }
843
844 // Test CPU chunk merging.
845 merged_chunk = uvm_cpu_chunk_merge(split_chunks);
846 TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
847 TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
848
849 // Since all logical chunks were mapped, the entire merged chunk should
850 // be accessible without needing to map it.
851 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu), done_free);
852
853 // Test that GPU mappings are transferred after a split
854 phys_dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
855
856 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
857
858 for (i = 0; i < num_split_chunks; i++) {
859 NvU64 dma_addr;
860
861 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu), done);
862 dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[i], gpu->parent);
863 TEST_CHECK_GOTO(dma_addr == phys_dma_addr + (i * split_size), done);
864 uvm_cpu_chunk_unmap_parent_gpu_phys(split_chunks[i], gpu->parent);
865 }
866
867 // Test that mapping one logical chunk does not affect others.
868 map_chunk = num_split_chunks / 2;
869 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[map_chunk], gpu), done);
870 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[map_chunk], gpu), done);
871
872 for (i = 0; i < num_split_chunks; i++) {
873 if (i != map_chunk)
874 TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[i], gpu->parent) == 0, done);
875 }
876
877 if (split_size > PAGE_SIZE) {
878 for (i = 0; i < num_split_chunks; i++)
879 TEST_NV_CHECK_GOTO(do_test_cpu_chunk_split_and_merge(split_chunks[i], gpu), done);
880 }
881
882 // Map all chunks before merging.
883 for (i = 0; i < num_split_chunks; i++)
884 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu), done);
885
886 // Test CPU chunk merging.
887 merged_chunk = uvm_cpu_chunk_merge(split_chunks);
888
889 // At this point, all split chunks have been merged.
890 num_split_chunks = 0;
891
892 TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
893 TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
894
895 // Since all logical chunks were mapped, the entire merged chunk should
896 // be accessible without needing to map it.
897 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu), done_free);
898
899 done:
900 for (i = 0; i < num_split_chunks; i++)
901 uvm_cpu_chunk_free(split_chunks[i]);
902
903 done_free:
904 uvm_kvfree(split_chunks);
905
906 return status;
907 }
908
test_cpu_chunk_split_and_merge(uvm_gpu_t * gpu)909 static NV_STATUS test_cpu_chunk_split_and_merge(uvm_gpu_t *gpu)
910 {
911 uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
912 uvm_chunk_size_t size;
913
914 size = uvm_chunk_find_next_size(alloc_sizes, PAGE_SIZE);
915 for_each_chunk_size_from(size, alloc_sizes) {
916 uvm_cpu_chunk_t *chunk;
917 NV_STATUS status;
918
919 TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
920 status = do_test_cpu_chunk_split_and_merge(chunk, gpu);
921 uvm_cpu_chunk_free(chunk);
922
923 if (status != NV_OK)
924 return status;
925 }
926
927 return NV_OK;
928 }
929
test_cpu_chunk_dirty_split(uvm_cpu_chunk_t * chunk)930 static NV_STATUS test_cpu_chunk_dirty_split(uvm_cpu_chunk_t *chunk)
931 {
932 uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
933 uvm_chunk_size_t split_size;
934 uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
935 uvm_cpu_chunk_t **split_chunks;
936 uvm_cpu_chunk_t *merged_chunk;
937 size_t num_pages = size / PAGE_SIZE;
938 size_t num_split_chunks;
939 size_t num_split_chunk_pages;
940 size_t i;
941 NV_STATUS status = NV_OK;
942
943 split_size = uvm_chunk_find_prev_size(alloc_sizes, size);
944 UVM_ASSERT(split_size != UVM_CHUNK_SIZE_INVALID);
945 num_split_chunks = size / split_size;
946 num_split_chunk_pages = split_size / PAGE_SIZE;
947 split_chunks = uvm_kvmalloc_zero(num_split_chunks * sizeof(*split_chunks));
948 if (!split_chunks)
949 return NV_ERR_NO_MEMORY;
950
951 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
952
953 // The parent chunk had only the even pages set as dirty. Make sure
954 // that's still the case after the split.
955 for (i = 0; i < num_split_chunks; i++) {
956 uvm_page_index_t chunk_page;
957
958 for (chunk_page = 0; chunk_page < num_split_chunk_pages; chunk_page++) {
959 if (((i * num_split_chunk_pages) + chunk_page) % 2)
960 TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(split_chunks[i], chunk_page), done);
961 else
962 TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(split_chunks[i], chunk_page), done);
963 }
964 }
965
966 if (split_size > PAGE_SIZE) {
967 for (i = 0; i < num_split_chunks; i++)
968 TEST_NV_CHECK_GOTO(test_cpu_chunk_dirty_split(split_chunks[i]), done);
969 }
970
971 merged_chunk = uvm_cpu_chunk_merge(split_chunks);
972 num_split_chunks = 0;
973 for (i = 0; i < num_pages; i++) {
974 if (i % 2)
975 TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(merged_chunk, i), done_free);
976 else
977 TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(merged_chunk, i), done_free);
978 }
979
980 done:
981 for (i = 0; i < num_split_chunks; i++)
982 uvm_cpu_chunk_free(split_chunks[i]);
983
984 done_free:
985 uvm_kvfree(split_chunks);
986 return status;
987 }
988
test_cpu_chunk_dirty(uvm_gpu_t * gpu)989 static NV_STATUS test_cpu_chunk_dirty(uvm_gpu_t *gpu)
990 {
991 NV_STATUS status = NV_OK;
992 uvm_cpu_chunk_t *chunk;
993 uvm_chunk_size_t size;
994 uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
995 size_t i;
996
997 for_each_chunk_size(size, alloc_sizes) {
998 uvm_cpu_physical_chunk_t *phys_chunk;
999 size_t num_pages;
1000
1001 TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
1002 phys_chunk = uvm_cpu_chunk_to_physical(chunk);
1003 num_pages = uvm_cpu_chunk_num_pages(chunk);
1004
1005 for (i = 0; i < num_pages; i++)
1006 TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(chunk, i), done);
1007
1008 if (size > PAGE_SIZE)
1009 TEST_CHECK_GOTO(bitmap_empty(phys_chunk->dirty_bitmap, num_pages), done);
1010
1011 uvm_cpu_chunk_free(chunk);
1012
1013 TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO, NUMA_NO_NODE, &chunk));
1014 phys_chunk = uvm_cpu_chunk_to_physical(chunk);
1015 num_pages = uvm_cpu_chunk_num_pages(chunk);
1016
1017 // Allocating the chunk with UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO will set the
1018 // entire chunk as dirty.
1019 for (i = 0; i < num_pages; i++)
1020 TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(chunk, i), done);
1021
1022 if (size > PAGE_SIZE)
1023 TEST_CHECK_GOTO(bitmap_full(phys_chunk->dirty_bitmap, num_pages), done);
1024
1025 // For chunks larger than PAGE_SIZE, marking individual pages in a
1026 // physical chunk should not affect the entire chunk.
1027 for (i = 0; i < num_pages; i++) {
1028 uvm_cpu_chunk_mark_clean(chunk, i);
1029 TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(chunk, i), done);
1030 if (size > PAGE_SIZE) {
1031 TEST_CHECK_GOTO(bitmap_empty(phys_chunk->dirty_bitmap, i + 1), done);
1032 TEST_CHECK_GOTO(bitmap_weight(phys_chunk->dirty_bitmap, num_pages) == num_pages - (i + 1), done);
1033 }
1034 }
1035
1036 for (i = 0; i < num_pages; i++) {
1037 uvm_cpu_chunk_mark_dirty(chunk, i);
1038 TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(chunk, i), done);
1039 if (size > PAGE_SIZE) {
1040 TEST_CHECK_GOTO(bitmap_full(phys_chunk->dirty_bitmap, i + 1), done);
1041 TEST_CHECK_GOTO(bitmap_weight(phys_chunk->dirty_bitmap, num_pages) == i + 1, done);
1042 }
1043 }
1044
1045 // Leave only even pages as dirty
1046 for (i = 1; i < num_pages; i += 2)
1047 uvm_cpu_chunk_mark_clean(chunk, i);
1048
1049 for (i = 0; i < num_pages; i++) {
1050 if (i % 2) {
1051 TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(chunk, i), done);
1052 if (size > PAGE_SIZE)
1053 TEST_CHECK_GOTO(!test_bit(i, phys_chunk->dirty_bitmap), done);
1054 }
1055 else {
1056 TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(chunk, i), done);
1057 if (size > PAGE_SIZE)
1058 TEST_CHECK_GOTO(test_bit(i, phys_chunk->dirty_bitmap), done);
1059 }
1060 }
1061
1062 if (size > PAGE_SIZE)
1063 TEST_NV_CHECK_GOTO(test_cpu_chunk_dirty_split(chunk), done);
1064
1065 done:
1066 uvm_cpu_chunk_free(chunk);
1067
1068 if (status != NV_OK)
1069 break;
1070 }
1071
1072 return status;
1073 }
1074
do_test_cpu_chunk_free(uvm_cpu_chunk_t * chunk,uvm_va_space_t * va_space,uvm_processor_mask_t * test_gpus)1075 NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
1076 {
1077 NV_STATUS status = NV_OK;
1078 uvm_cpu_chunk_t **split_chunks;
1079 uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
1080 size_t size = uvm_cpu_chunk_get_size(chunk);
1081 uvm_chunk_size_t split_size = uvm_chunk_find_prev_size(alloc_sizes, size);
1082 size_t num_split_chunks = size / split_size;
1083 uvm_gpu_t *gpu;
1084 size_t i;
1085 size_t j;
1086
1087 split_chunks = uvm_kvmalloc_zero(num_split_chunks * sizeof(*split_chunks));
1088 if (!split_chunks) {
1089 UVM_TEST_PRINT("Failed to allocate split chunk array memory");
1090 status = NV_ERR_NO_MEMORY;
1091 goto done_free;
1092 }
1093
1094 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
1095
1096 // The caller does not free the input chunk.
1097 // So, we have to do it in this function. However, beyond this point
1098 // the input chunk will be freed by freeing the split chunks.
1099 chunk = NULL;
1100
1101 // Map every other chunk.
1102 // The call to uvm_cpu_chunk_unmap_parent_gpu_phys() is here in case this
1103 // is part of a double split (see below). In that case, the parent chunk
1104 // would be either mapped or unmapped.
1105 //
1106 // If it is mapped, we have to unmap the subchunks in
1107 // order for the mapping check below to succeed. If it is unmapped, the
1108 // calls are noops.
1109 for (i = 0; i < num_split_chunks; i++) {
1110 for_each_va_space_gpu_in_mask(gpu, va_space, test_gpus) {
1111 if (i & (1 << uvm_id_gpu_index(gpu->id)))
1112 TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu), done);
1113 else
1114 uvm_cpu_chunk_unmap_parent_gpu_phys(split_chunks[i], gpu->parent);
1115 }
1116 }
1117
1118 // Do a double split if we can
1119 if (split_size > PAGE_SIZE) {
1120 size_t chunk_to_be_resplit;
1121
1122 // Test an even (mapped) chunk.
1123 chunk_to_be_resplit = num_split_chunks / 2;
1124 TEST_NV_CHECK_GOTO(do_test_cpu_chunk_free(split_chunks[chunk_to_be_resplit], va_space, test_gpus), done);
1125
1126 // The chunk would have been freed by do_test_cpu_chunk_free().
1127 split_chunks[chunk_to_be_resplit] = NULL;
1128
1129 // Test an odd (unmapped) chunk.
1130 chunk_to_be_resplit += 1;
1131 TEST_NV_CHECK_GOTO(do_test_cpu_chunk_free(split_chunks[chunk_to_be_resplit], va_space, test_gpus), done);
1132 split_chunks[chunk_to_be_resplit] = NULL;
1133 }
1134
1135 for (i = 0; i < num_split_chunks; i++) {
1136 if (!split_chunks[i])
1137 continue;
1138
1139 uvm_cpu_chunk_free(split_chunks[i]);
1140 split_chunks[i] = NULL;
1141
1142 for (j = i + 1; j < num_split_chunks; j++) {
1143 if (!split_chunks[j])
1144 continue;
1145
1146 TEST_CHECK_GOTO(uvm_cpu_chunk_is_logical(split_chunks[j]), done);
1147 TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[j]) == split_size, done);
1148 for_each_va_space_gpu_in_mask(gpu, va_space, test_gpus) {
1149 if (j & (1 << uvm_id_gpu_index(gpu->id)))
1150 TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[j], gpu->parent), done);
1151 else
1152 TEST_CHECK_GOTO(!uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[j], gpu->parent), done);
1153 }
1154 }
1155 }
1156
1157 done:
1158 for (i = 0; i < num_split_chunks; i++) {
1159 if (split_chunks[i])
1160 uvm_cpu_chunk_free(split_chunks[i]);
1161 }
1162
1163 done_free:
1164 if (chunk)
1165 uvm_cpu_chunk_free(chunk);
1166
1167 uvm_kvfree(split_chunks);
1168 return status;
1169 }
1170
test_cpu_chunk_free(uvm_va_space_t * va_space,uvm_processor_mask_t * test_gpus)1171 NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
1172 {
1173 uvm_cpu_chunk_t *chunk;
1174 uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
1175 size_t size = uvm_chunk_find_next_size(alloc_sizes, PAGE_SIZE);
1176
1177 for_each_chunk_size_from(size, alloc_sizes) {
1178 TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
1179 TEST_NV_CHECK_RET(do_test_cpu_chunk_free(chunk, va_space, test_gpus));
1180 }
1181
1182 return NV_OK;
1183 }
1184
test_cpu_chunk_numa_alloc(uvm_va_space_t * va_space)1185 static NV_STATUS test_cpu_chunk_numa_alloc(uvm_va_space_t *va_space)
1186 {
1187 uvm_cpu_chunk_t *chunk;
1188 uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
1189 size_t size;
1190
1191 for_each_chunk_size(size, alloc_sizes) {
1192 int nid;
1193
1194 for_each_possible_uvm_node(nid) {
1195 // Do not test CPU allocation on nodes that have no memory or CPU
1196 if (!node_state(nid, N_MEMORY) || !node_state(nid, N_CPU))
1197 continue;
1198
1199 TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, nid, &chunk));
1200 uvm_cpu_chunk_free(chunk);
1201 }
1202 }
1203
1204 return NV_OK;
1205 }
1206
uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS * params,struct file * filp)1207 NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct file *filp)
1208 {
1209 uvm_va_space_t *va_space = uvm_va_space_get(filp);
1210 uvm_processor_mask_t *test_gpus;
1211 uvm_gpu_t *gpu;
1212 NV_STATUS status = NV_OK;
1213
1214 test_gpus = uvm_processor_mask_cache_alloc();
1215 if (!test_gpus)
1216 return NV_ERR_NO_MEMORY;
1217
1218 uvm_va_space_down_read(va_space);
1219 uvm_processor_mask_and(test_gpus, &va_space->registered_gpus, &va_space->accessible_from[uvm_id_value(UVM_ID_CPU)]);
1220
1221 for_each_va_space_gpu_in_mask(gpu, va_space, test_gpus) {
1222 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_basic(gpu, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE), done);
1223 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_basic(gpu, UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO), done);
1224 TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge(gpu), done);
1225 TEST_NV_CHECK_GOTO(test_cpu_chunk_dirty(gpu), done);
1226 }
1227
1228 TEST_NV_CHECK_GOTO(test_cpu_chunk_free(va_space, test_gpus), done);
1229 TEST_NV_CHECK_GOTO(test_cpu_chunk_numa_alloc(va_space), done);
1230
1231 if (uvm_processor_mask_get_gpu_count(test_gpus) >= 3) {
1232 uvm_gpu_t *gpu2, *gpu3;
1233
1234 gpu = uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
1235 gpu2 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu);
1236 gpu3 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu2);
1237 TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
1238 }
1239
1240 done:
1241 uvm_va_space_up_read(va_space);
1242 uvm_processor_mask_cache_free(test_gpus);
1243 return status;
1244 }
1245