1 /*******************************************************************************
2 Copyright (c) 2015-2023 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_global.h"
25 #include "uvm_channel.h"
26 #include "uvm_hal.h"
27 #include "uvm_mem.h"
28 #include "uvm_push.h"
29 #include "uvm_test.h"
30 #include "uvm_test_rng.h"
31 #include "uvm_va_space.h"
32 #include "uvm_tracker.h"
33 #include "uvm_thread_context.h"
34 #include "uvm_gpu_semaphore.h"
35 #include "uvm_kvmalloc.h"
36
37 #define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU 1024
38 #define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU 64
39
40 // Schedule pushes one after another on all GPUs and channel types that copy and
41 // increment a counter into an adjacent memory location in a buffer. And then
42 // verify that all the values are correct on the CPU.
test_ordering(uvm_va_space_t * va_space)43 static NV_STATUS test_ordering(uvm_va_space_t *va_space)
44 {
45 NV_STATUS status;
46 uvm_gpu_t *gpu;
47 bool exclude_proxy_channel_type;
48 NvU32 i, j;
49 uvm_rm_mem_t *mem = NULL;
50 NvU32 *host_mem;
51 uvm_push_t push;
52 NvU64 gpu_va;
53 uvm_tracker_t tracker = UVM_TRACKER_INIT();
54 NvU32 value = 0;
55 const NvU32 iters_per_channel_type_per_gpu = g_uvm_global.num_simulated_devices > 0 ?
56 TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU :
57 TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU;
58 const NvU32 values_count = iters_per_channel_type_per_gpu;
59 const size_t buffer_size = sizeof(NvU32) * values_count;
60
61 // TODO: Bug 3839176: the test is waived on Confidential Computing because
62 // it assumes that GPU can access system memory without using encryption.
63 if (g_uvm_global.conf_computing_enabled)
64 return NV_OK;
65
66 gpu = uvm_va_space_find_first_gpu(va_space);
67 TEST_CHECK_RET(gpu != NULL);
68
69 status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
70 TEST_CHECK_GOTO(status == NV_OK, done);
71
72 host_mem = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
73 memset(host_mem, 0, buffer_size);
74
75 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Initial memset");
76 TEST_CHECK_GOTO(status == NV_OK, done);
77
78 gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;
79
80 // Semaphore release as part of uvm_push_end() will do the membar
81 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
82 gpu->parent->ce_hal->memset_v_4(&push, gpu_va, 0, buffer_size);
83
84 uvm_push_end(&push);
85
86 TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);
87
88 exclude_proxy_channel_type = uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent);
89
90 for (i = 0; i < iters_per_channel_type_per_gpu; ++i) {
91 for (j = 0; j < UVM_CHANNEL_TYPE_CE_COUNT; ++j) {
92 uvm_channel_type_t channel_type = j;
93
94 // Proxy channels don't support the virtual memcopies that are about
95 // to be pushed, so don't test the proxy channel type in any of the
96 // GPUs.
97 if (exclude_proxy_channel_type && (channel_type == uvm_channel_proxy_channel_type()))
98 continue;
99
100 for_each_va_space_gpu(gpu, va_space) {
101 NvU64 gpu_va_base;
102 NvU64 gpu_va_src;
103 NvU64 gpu_va_dst;
104
105 status = uvm_push_begin_acquire(gpu->channel_manager,
106 channel_type,
107 &tracker,
108 &push,
109 "memcpy and inc to %u",
110 value + 1);
111 TEST_CHECK_GOTO(status == NV_OK, done);
112
113 gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;
114 gpu_va_src = gpu_va_base + (value % values_count) * sizeof(NvU32);
115 gpu_va_dst = gpu_va_base + ((value + 1) % values_count) * sizeof(NvU32);
116
117 // The semaphore reduction will do a membar before the reduction
118 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
119 gpu->parent->ce_hal->memcopy_v_to_v(&push, gpu_va_dst, gpu_va_src, sizeof(NvU32));
120
121 // The following reduction is done from the same GPU, but the
122 // previous memcpy is to uncached sysmem and that bypasses L2
123 // and hence requires a SYSMEMBAR to be ordered.
124 gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va_dst, ++value);
125
126 uvm_push_end(&push);
127
128 uvm_tracker_clear(&tracker);
129 TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);
130 }
131 }
132 }
133 status = uvm_tracker_wait(&tracker);
134 TEST_CHECK_GOTO(status == NV_OK, done);
135
136 // At this moment, this should hold:
137 // mem[value % values_count] == value
138 // mem[(value + 1) % values_count] == value + 1 - values_count
139 // And in general, for i=[0, values_count):
140 // mem[(value + 1 + i) % values_count] == value + 1 - values_count + i
141 // Verify that
142
143 for (i = 0; i < values_count; ++i) {
144 NvU32 index = (value + 1 + i) % values_count;
145 NvU32 expected = (value + 1 + i) - values_count;
146 if (host_mem[index] != expected) {
147 UVM_TEST_PRINT("Bad value at host_mem[%u] = %u instead of %u\n", index, host_mem[index], expected);
148 status = NV_ERR_INVALID_STATE;
149 goto done;
150 }
151 }
152
153 done:
154 uvm_tracker_wait(&tracker);
155 uvm_rm_mem_free(mem);
156
157 return status;
158 }
159
test_unexpected_completed_values(uvm_va_space_t * va_space)160 static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
161 {
162 uvm_gpu_t *gpu;
163
164 for_each_va_space_gpu(gpu, va_space) {
165 uvm_channel_t *channel;
166 NvU64 completed_value;
167
168 // The GPU channel manager is destroyed and then re-created after
169 // the test, so this test requires exclusive access to the GPU.
170 TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
171
172 channel = &gpu->channel_manager->channel_pools[0].channels[0];
173 completed_value = uvm_channel_update_completed_value(channel);
174 uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1);
175
176 TEST_NV_CHECK_RET(uvm_global_get_status());
177 uvm_channel_update_progress_all(channel);
178 TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
179
180 uvm_channel_manager_destroy(gpu->channel_manager);
181
182 // Destruction will hit the error again, so clear one more time.
183 uvm_global_reset_fatal_error();
184
185 TEST_NV_CHECK_RET(uvm_channel_manager_create(gpu, &gpu->channel_manager));
186 }
187
188 return NV_OK;
189 }
190
uvm_test_rc_for_gpu(uvm_gpu_t * gpu)191 static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
192 {
193 uvm_push_t push;
194 uvm_channel_pool_t *pool;
195 uvm_gpfifo_entry_t *fatal_entry;
196 uvm_push_info_t *push_info;
197 int fatal_line;
198 uvm_tracker_entry_t tracker_entry;
199 NV_STATUS status;
200 uvm_tracker_t tracker = UVM_TRACKER_INIT();
201 uvm_channel_manager_t *manager = gpu->channel_manager;
202
203 // Submit a bunch of successful pushes on each channel first so that the
204 // fatal one is behind a bunch of work (notably more than
205 // uvm_channel_update_progress() completes by default).
206 uvm_for_each_pool(pool, manager) {
207 uvm_channel_t *channel;
208
209 // Skip LCIC channels as those can't accept any pushes
210 if (uvm_channel_pool_is_lcic(pool))
211 continue;
212 uvm_for_each_channel_in_pool(channel, pool) {
213 NvU32 i;
214 for (i = 0; i < 512; ++i) {
215 status = uvm_push_begin_on_channel(channel, &push, "Non-faulting push");
216 TEST_CHECK_RET(status == NV_OK);
217
218 uvm_push_end(&push);
219 }
220 }
221 }
222
223 // Check RC on a proxy channel (SR-IOV heavy) or internal channel (any other
224 // mode). It is not allowed to use a virtual address in a memset pushed to
225 // a proxy channel, so we use a physical address instead.
226 if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent)) {
227 uvm_gpu_address_t dst_address;
228
229 // Save the line number the push that's supposed to fail was started on
230 fatal_line = __LINE__ + 1;
231 TEST_NV_CHECK_RET(uvm_push_begin(manager, uvm_channel_proxy_channel_type(), &push, "Fatal push 0x%X", 0xBAD));
232
233 // Memset targeting a physical address beyond the vidmem size. The
234 // passed physical address is not the vidmem size reported by RM
235 // because the reported size can be smaller than the actual physical
236 // size, such that accessing a GPA at the reported size may be allowed
237 // by VMMU.
238 //
239 // GA100 GPUs have way less than UVM_GPU_MAX_PHYS_MEM vidmem, so using
240 // that value as physical address should result on an error
241 dst_address = uvm_gpu_address_physical(UVM_APERTURE_VID, UVM_GPU_MAX_PHYS_MEM - 8);
242 gpu->parent->ce_hal->memset_8(&push, dst_address, 0, 8);
243 }
244 else {
245 fatal_line = __LINE__ + 1;
246 TEST_NV_CHECK_RET(uvm_push_begin(manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Fatal push 0x%X", 0xBAD));
247
248 // Memset that should fault on 0xFFFFFFFF
249 gpu->parent->ce_hal->memset_v_4(&push, 0xFFFFFFFF, 0, 4);
250 }
251
252 uvm_push_end(&push);
253
254 uvm_push_get_tracker_entry(&push, &tracker_entry);
255 uvm_tracker_overwrite_with_push(&tracker, &push);
256
257 status = uvm_channel_manager_wait(manager);
258 TEST_CHECK_RET(status == NV_ERR_RC_ERROR);
259
260 TEST_CHECK_RET(uvm_channel_get_status(push.channel) == NV_ERR_RC_ERROR);
261 fatal_entry = uvm_channel_get_fatal_entry(push.channel);
262 TEST_CHECK_RET(fatal_entry != NULL);
263
264 push_info = fatal_entry->push_info;
265 TEST_CHECK_RET(push_info != NULL);
266 TEST_CHECK_RET(push_info->line == fatal_line);
267 TEST_CHECK_RET(strcmp(push_info->function, __FUNCTION__) == 0);
268 TEST_CHECK_RET(strcmp(push_info->filename, kbasename(__FILE__)) == 0);
269 if (uvm_push_info_is_tracking_descriptions())
270 TEST_CHECK_RET(strcmp(push_info->description, "Fatal push 0xBAD") == 0);
271
272 TEST_CHECK_RET(uvm_global_get_status() == NV_ERR_RC_ERROR);
273
274 // Check that waiting for an entry after a global fatal error makes the
275 // entry completed.
276 TEST_CHECK_RET(!uvm_tracker_is_entry_completed(&tracker_entry));
277 TEST_CHECK_RET(uvm_tracker_wait_for_entry(&tracker_entry) == NV_ERR_RC_ERROR);
278 TEST_CHECK_RET(uvm_tracker_is_entry_completed(&tracker_entry));
279
280 // Check that waiting for a tracker after a global fatal error, clears all
281 // the entries from the tracker.
282 TEST_CHECK_RET(!uvm_tracker_is_empty(&tracker));
283 TEST_CHECK_RET(uvm_tracker_wait(&tracker) == NV_ERR_RC_ERROR);
284 TEST_CHECK_RET(uvm_tracker_is_empty(&tracker));
285
286 TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_RC_ERROR);
287
288 return NV_OK;
289 }
290
test_rc(uvm_va_space_t * va_space)291 static NV_STATUS test_rc(uvm_va_space_t *va_space)
292 {
293 uvm_gpu_t *gpu;
294
295 uvm_assert_mutex_locked(&g_uvm_global.global_lock);
296
297 for_each_va_space_gpu(gpu, va_space) {
298 NV_STATUS test_status, create_status;
299
300 // The GPU channel manager is destroyed and then re-created after
301 // testing RC, so this test requires exclusive access to the GPU.
302 TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
303
304 g_uvm_global.disable_fatal_error_assert = true;
305 test_status = uvm_test_rc_for_gpu(gpu);
306 g_uvm_global.disable_fatal_error_assert = false;
307
308 uvm_channel_manager_destroy(gpu->channel_manager);
309 create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
310
311 TEST_NV_CHECK_RET(test_status);
312 TEST_NV_CHECK_RET(create_status);
313 }
314
315 return NV_OK;
316 }
317
uvm_test_iommu_rc_for_gpu(uvm_gpu_t * gpu)318 static NV_STATUS uvm_test_iommu_rc_for_gpu(uvm_gpu_t *gpu)
319 {
320 NV_STATUS status = NV_OK;
321
322 #if defined(NV_IOMMU_IS_DMA_DOMAIN_PRESENT) && defined(CONFIG_IOMMU_DEFAULT_DMA_STRICT)
323 // This test needs the DMA API to immediately invalidate IOMMU mappings on
324 // DMA unmap (as apposed to lazy invalidation). The policy can be changed
325 // on boot (e.g. iommu.strict=1), but there isn't a good way to check for
326 // the runtime setting. CONFIG_IOMMU_DEFAULT_DMA_STRICT checks for the
327 // default value.
328
329 uvm_push_t push;
330 uvm_mem_t *sysmem;
331 uvm_gpu_address_t sysmem_dma_addr;
332 char *cpu_ptr = NULL;
333 const size_t data_size = PAGE_SIZE;
334 size_t i;
335
336 struct device *dev = &gpu->parent->pci_dev->dev;
337 struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
338
339 // Check that the iommu domain is controlled by linux DMA API
340 if (!domain || !iommu_is_dma_domain(domain))
341 return NV_OK;
342
343 // Only run if ATS is enabled with 64kB base page.
344 // Otherwise the CE doesn't get response on writing to unmapped location.
345 if (!g_uvm_global.ats.enabled || PAGE_SIZE != UVM_PAGE_SIZE_64K)
346 return NV_OK;
347
348 status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(data_size, NULL, &sysmem);
349 TEST_NV_CHECK_RET(status);
350
351 status = uvm_mem_map_gpu_phys(sysmem, gpu);
352 TEST_NV_CHECK_GOTO(status, done);
353
354 cpu_ptr = uvm_mem_get_cpu_addr_kernel(sysmem);
355 sysmem_dma_addr = uvm_mem_gpu_address_physical(sysmem, gpu, 0, data_size);
356
357 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset to IOMMU mapped sysmem");
358 TEST_NV_CHECK_GOTO(status, done);
359
360 gpu->parent->ce_hal->memset_8(&push, sysmem_dma_addr, 0, data_size);
361
362 status = uvm_push_end_and_wait(&push);
363 TEST_NV_CHECK_GOTO(status, done);
364
365 // Check that we have zeroed the memory
366 for (i = 0; i < data_size; ++i)
367 TEST_CHECK_GOTO(cpu_ptr[i] == 0, done);
368
369 // Unmap the buffer and try write again to the same address
370 uvm_mem_unmap_gpu_phys(sysmem, gpu);
371
372 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset after IOMMU unmap");
373 TEST_NV_CHECK_GOTO(status, done);
374
375 gpu->parent->ce_hal->memset_4(&push, sysmem_dma_addr, 0xffffffff, data_size);
376
377 status = uvm_push_end_and_wait(&push);
378
379 TEST_CHECK_GOTO(status == NV_ERR_RC_ERROR, done);
380 TEST_CHECK_GOTO(uvm_channel_get_status(push.channel) == NV_ERR_RC_ERROR, done);
381 TEST_CHECK_GOTO(uvm_global_reset_fatal_error() == NV_ERR_RC_ERROR, done);
382
383 // Check that writes after unmap did not succeed
384 for (i = 0; i < data_size; ++i)
385 TEST_CHECK_GOTO(cpu_ptr[i] == 0, done);
386
387 status = NV_OK;
388
389 done:
390 uvm_mem_free(sysmem);
391 #endif
392 return status;
393 }
394
test_iommu(uvm_va_space_t * va_space)395 static NV_STATUS test_iommu(uvm_va_space_t *va_space)
396 {
397 uvm_gpu_t *gpu;
398
399 uvm_assert_mutex_locked(&g_uvm_global.global_lock);
400
401 for_each_va_space_gpu(gpu, va_space) {
402 NV_STATUS test_status, create_status;
403
404 // The GPU channel manager is destroyed and then re-created after
405 // testing ATS RC fault, so this test requires exclusive access to the GPU.
406 TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
407
408 g_uvm_global.disable_fatal_error_assert = true;
409 test_status = uvm_test_iommu_rc_for_gpu(gpu);
410 g_uvm_global.disable_fatal_error_assert = false;
411
412 uvm_channel_manager_destroy(gpu->channel_manager);
413 create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
414
415 TEST_NV_CHECK_RET(test_status);
416 TEST_NV_CHECK_RET(create_status);
417 }
418
419 return NV_OK;
420 }
421
422 typedef struct
423 {
424 uvm_push_t push;
425 uvm_tracker_t tracker;
426 uvm_gpu_semaphore_t semaphore;
427 NvU32 queued_counter_value;
428 NvU32 queued_counter_repeat;
429 uvm_rm_mem_t *counter_mem;
430 uvm_rm_mem_t *counter_snapshots_mem;
431 uvm_rm_mem_t *other_stream_counter_snapshots_mem;
432 NvU32 *counter_snapshots;
433 NvU32 *other_stream_counter_snapshots;
434 NvU32 *other_stream_counter_expected;
435 } uvm_test_stream_t;
436
437 #define MAX_COUNTER_REPEAT_COUNT 10 * 1024
438 // For each iter, snapshot the first and last counter value
439 #define TEST_SNAPSHOT_SIZE(it) (2 * it * sizeof(NvU32))
440
snapshot_counter(uvm_push_t * push,uvm_rm_mem_t * counter_mem,uvm_rm_mem_t * snapshot_mem,NvU32 index,NvU32 counters_count)441 static void snapshot_counter(uvm_push_t *push,
442 uvm_rm_mem_t *counter_mem,
443 uvm_rm_mem_t *snapshot_mem,
444 NvU32 index,
445 NvU32 counters_count)
446 {
447 uvm_gpu_t *gpu = uvm_push_get_gpu(push);
448 NvU64 counter_gpu_va;
449 NvU64 snapshot_gpu_va;
450 bool is_proxy_channel;
451 NvU32 last_counter_offset = (counters_count - 1) * sizeof(NvU32);
452
453 if (counters_count == 0)
454 return;
455
456 is_proxy_channel = uvm_channel_is_proxy(push->channel);
457 counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;
458 snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel).address + index * 2 * sizeof(NvU32);
459
460 // Copy the last and first counter to a snapshot for later verification.
461
462 // Membar will be done by uvm_push_end()
463 uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
464 uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
465 gpu->parent->ce_hal->memcopy_v_to_v(push,
466 snapshot_gpu_va + sizeof(NvU32),
467 counter_gpu_va + last_counter_offset,
468 sizeof(NvU32));
469
470 // Membar will be done by uvm_push_end()
471 uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
472 uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
473 gpu->parent->ce_hal->memcopy_v_to_v(push, snapshot_gpu_va, counter_gpu_va, sizeof(NvU32));
474 }
475
set_counter(uvm_push_t * push,uvm_rm_mem_t * counter_mem,NvU32 value,NvU32 count)476 static void set_counter(uvm_push_t *push, uvm_rm_mem_t *counter_mem, NvU32 value, NvU32 count)
477 {
478 uvm_gpu_t *gpu = uvm_push_get_gpu(push);
479 NvU64 counter_gpu_va;
480 bool is_proxy_channel;
481
482 is_proxy_channel = uvm_channel_is_proxy(push->channel);
483 counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;
484
485 gpu->parent->ce_hal->memset_v_4(push, counter_gpu_va, value, count * sizeof(NvU32));
486 }
487
random_ce_channel_type(uvm_test_rng_t * rng)488 static uvm_channel_type_t random_ce_channel_type(uvm_test_rng_t *rng)
489 {
490 return (uvm_channel_type_t)uvm_test_rng_range_32(rng, 0, UVM_CHANNEL_TYPE_CE_COUNT - 1);
491 }
492
random_ce_channel_type_except(uvm_test_rng_t * rng,uvm_channel_type_t exception)493 static uvm_channel_type_t random_ce_channel_type_except(uvm_test_rng_t *rng, uvm_channel_type_t exception)
494 {
495 uvm_channel_type_t channel_type;
496
497 UVM_ASSERT(exception < UVM_CHANNEL_TYPE_CE_COUNT);
498
499 channel_type = (uvm_channel_type_t)uvm_test_rng_range_32(rng, 0, UVM_CHANNEL_TYPE_CE_COUNT - 2);
500
501 if (channel_type >= exception)
502 channel_type++;
503
504 UVM_ASSERT(channel_type < UVM_CHANNEL_TYPE_CE_COUNT);
505
506 return channel_type;
507 }
508
gpu_random_internal_ce_channel_type(uvm_gpu_t * gpu,uvm_test_rng_t * rng)509 static uvm_channel_type_t gpu_random_internal_ce_channel_type(uvm_gpu_t *gpu, uvm_test_rng_t *rng)
510 {
511 if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent))
512 return random_ce_channel_type_except(rng, uvm_channel_proxy_channel_type());
513
514 return random_ce_channel_type(rng);
515 }
516
random_va_space_gpu(uvm_test_rng_t * rng,uvm_va_space_t * va_space)517 static uvm_gpu_t *random_va_space_gpu(uvm_test_rng_t *rng, uvm_va_space_t *va_space)
518 {
519 uvm_gpu_t *gpu;
520 NvU32 gpu_count = uvm_processor_mask_get_gpu_count(&va_space->registered_gpus);
521 NvU32 gpu_index = uvm_test_rng_range_32(rng, 0, gpu_count - 1);
522
523 UVM_ASSERT(gpu_count > 0);
524
525 for_each_va_space_gpu(gpu, va_space) {
526 if (gpu_index-- == 0)
527 return gpu;
528 }
529
530 UVM_ASSERT(0);
531 return NULL;
532 }
533
534
test_memset_rm_mem(uvm_push_t * push,uvm_rm_mem_t * rm_mem,NvU32 value)535 static void test_memset_rm_mem(uvm_push_t *push, uvm_rm_mem_t *rm_mem, NvU32 value)
536 {
537 uvm_gpu_t *gpu;
538 NvU64 gpu_va;
539
540 UVM_ASSERT(rm_mem->size % 4 == 0);
541
542 gpu = uvm_push_get_gpu(push);
543 gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel)).address;
544
545 gpu->parent->ce_hal->memset_v_4(push, gpu_va, value, rm_mem->size);
546 }
547
548 // This test schedules a randomly sized memset on a random channel and GPU in a
549 // "stream" that has operations ordered by acquiring the tracker of the previous
550 // operation. It also snapshots the memset done by the previous operation in the
551 // stream to verify it later on the CPU. Each iteration also optionally acquires
552 // a different stream and snapshots its memset.
553 // The test ioctl is expected to be called at the same time from multiple
554 // threads and contains some schedule() calls to help get as many threads
555 // through the init phase before other threads continue. It also has a random
556 // schedule() call in the main loop scheduling GPU work.
stress_test_all_gpus_in_va(uvm_va_space_t * va_space,NvU32 num_streams,NvU32 iterations_per_stream,NvU32 seed,NvU32 verbose)557 static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
558 NvU32 num_streams,
559 NvU32 iterations_per_stream,
560 NvU32 seed,
561 NvU32 verbose)
562 {
563 NV_STATUS status = NV_OK;
564 uvm_gpu_t *gpu;
565 NvU32 i, j;
566 uvm_test_stream_t *streams;
567 uvm_test_rng_t rng;
568
569 uvm_test_rng_init(&rng, seed);
570
571 gpu = uvm_va_space_find_first_gpu(va_space);
572 TEST_CHECK_RET(gpu != NULL);
573
574 streams = uvm_kvmalloc_zero(sizeof(*streams) * num_streams);
575 TEST_CHECK_RET(streams != NULL);
576
577 // Initialize all the trackers first so that clean up on error can always
578 // wait for them.
579 for (i = 0; i < num_streams; ++i) {
580 uvm_test_stream_t *stream = &streams[i];
581 uvm_tracker_init(&stream->tracker);
582 }
583
584 for (i = 0; i < num_streams; ++i) {
585 uvm_test_stream_t *stream = &streams[i];
586
587 status = uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &stream->semaphore);
588 if (status != NV_OK)
589 goto done;
590
591 stream->queued_counter_value = 0;
592
593 status = uvm_rm_mem_alloc_and_map_all(gpu,
594 UVM_RM_MEM_TYPE_SYS,
595 MAX_COUNTER_REPEAT_COUNT * sizeof(NvU32),
596 0,
597 &stream->counter_mem);
598 TEST_CHECK_GOTO(status == NV_OK, done);
599
600 status = uvm_rm_mem_alloc_and_map_all(gpu,
601 UVM_RM_MEM_TYPE_SYS,
602 TEST_SNAPSHOT_SIZE(iterations_per_stream),
603 0,
604 &stream->counter_snapshots_mem);
605 TEST_CHECK_GOTO(status == NV_OK, done);
606
607 stream->counter_snapshots = (NvU32*)uvm_rm_mem_get_cpu_va(stream->counter_snapshots_mem);
608
609 status = uvm_rm_mem_alloc_and_map_all(gpu,
610 UVM_RM_MEM_TYPE_SYS,
611 TEST_SNAPSHOT_SIZE(iterations_per_stream),
612 0,
613 &stream->other_stream_counter_snapshots_mem);
614 TEST_CHECK_GOTO(status == NV_OK, done);
615
616 stream->other_stream_counter_snapshots = (NvU32*)uvm_rm_mem_get_cpu_va(stream->other_stream_counter_snapshots_mem);
617
618 stream->other_stream_counter_expected = uvm_kvmalloc_zero(sizeof(NvU32) * iterations_per_stream);
619 if (stream->other_stream_counter_expected == NULL) {
620 status = NV_ERR_NO_MEMORY;
621 goto done;
622 }
623
624 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, &stream->push, "stream %u init", i);
625 TEST_CHECK_GOTO(status == NV_OK, done);
626
627 test_memset_rm_mem(&stream->push, stream->counter_mem, 0);
628 test_memset_rm_mem(&stream->push, stream->counter_snapshots_mem, 0);
629 test_memset_rm_mem(&stream->push, stream->other_stream_counter_snapshots_mem, 0);
630
631 status = uvm_push_end_and_wait(&stream->push);
632 TEST_CHECK_GOTO(status == NV_OK, done);
633
634 if (fatal_signal_pending(current)) {
635 status = NV_ERR_SIGNAL_PENDING;
636 goto done;
637 }
638
639 // Let other threads run
640 schedule();
641 }
642
643 if (verbose > 0) {
644 UVM_TEST_PRINT("Init done, seed %u, GPUs:\n", seed);
645 for_each_va_space_gpu(gpu, va_space) {
646 UVM_TEST_PRINT(" GPU %s\n", uvm_gpu_name(gpu));
647 }
648 }
649
650 for (i = 0; i < iterations_per_stream; ++i) {
651 for (j = 0; j < num_streams; ++j) {
652 uvm_test_stream_t *stream = &streams[j];
653 uvm_channel_type_t channel_type;
654 gpu = random_va_space_gpu(&rng, va_space);
655
656 if (fatal_signal_pending(current)) {
657 status = NV_ERR_SIGNAL_PENDING;
658 goto done;
659 }
660
661 // Select a random channel type. In SR-IOV heavy the selection has
662 // to exclude the type associated with proxy channels, because they
663 // do not support the virtual memcopies/memsets pushed by
664 // snapshot_counter and set_counter
665 channel_type = gpu_random_internal_ce_channel_type(gpu, &rng);
666
667 status = uvm_push_begin_acquire(gpu->channel_manager,
668 channel_type,
669 &stream->tracker,
670 &stream->push,
671 "stream %u payload %u gid %u channel_type %u",
672 j,
673 stream->queued_counter_value,
674 uvm_id_value(gpu->id),
675 channel_type);
676 TEST_CHECK_GOTO(status == NV_OK, done);
677
678 snapshot_counter(&stream->push,
679 stream->counter_mem,
680 stream->counter_snapshots_mem,
681 i,
682 stream->queued_counter_repeat);
683
684 // Set a random number [2, MAX_COUNTER_REPEAT_COUNT] of counters
685 stream->queued_counter_repeat = uvm_test_rng_range_32(&rng, 2, MAX_COUNTER_REPEAT_COUNT);
686 set_counter(&stream->push,
687 stream->counter_mem,
688 ++stream->queued_counter_value,
689 stream->queued_counter_repeat);
690
691 if (uvm_test_rng_range_32(&rng, 0, 1) == 0) {
692 NvU32 random_stream_index = uvm_test_rng_range_32(&rng, 0, num_streams - 1);
693 uvm_test_stream_t *random_stream = &streams[random_stream_index];
694
695 if ((random_stream->push.gpu == gpu) || uvm_push_allow_dependencies_across_gpus()) {
696 uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
697
698 snapshot_counter(&stream->push,
699 random_stream->counter_mem,
700 stream->other_stream_counter_snapshots_mem,
701 i,
702 random_stream->queued_counter_repeat);
703 }
704 }
705
706 uvm_push_end(&stream->push);
707 uvm_tracker_clear(&stream->tracker);
708 TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&stream->tracker, &stream->push), done);
709 }
710
711 // Randomly schedule other threads
712 if (uvm_test_rng_range_32(&rng, 0, 9) == 0)
713 schedule();
714 }
715
716 if (verbose > 0)
717 UVM_TEST_PRINT("All work scheduled\n");
718
719 // Let other threads run
720 schedule();
721
722 for (i = 0; i < num_streams; ++i) {
723 uvm_test_stream_t *stream = &streams[i];
724 status = uvm_tracker_wait(&stream->tracker);
725 if (status != NV_OK) {
726 UVM_TEST_PRINT("Failed to wait for the tracker for stream %u: %s\n", i, nvstatusToString(status));
727 goto done;
728 }
729 for (j = 0; j < iterations_per_stream; ++j) {
730 NvU32 snapshot_last = stream->counter_snapshots[j * 2];
731 NvU32 snapshot_first = stream->counter_snapshots[j * 2 + 1];
732 if (snapshot_last != j || snapshot_first != j) {
733 UVM_TEST_PRINT("Stream %u counter snapshot[%u] = %u,%u instead of %u,%u\n",
734 i,
735 j,
736 snapshot_last,
737 snapshot_first,
738 j,
739 j);
740 status = NV_ERR_INVALID_STATE;
741 goto done;
742 }
743 }
744 for (j = 0; j < iterations_per_stream; ++j) {
745 NvU32 snapshot_last = stream->other_stream_counter_snapshots[j * 2];
746 NvU32 snapshot_first = stream->other_stream_counter_snapshots[j * 2 + 1];
747 NvU32 expected = stream->other_stream_counter_expected[j];
748 if (snapshot_last < expected || snapshot_first < expected) {
749 UVM_TEST_PRINT("Stream %u other_counter snapshot[%u] = %u,%u which is < of %u,%u\n",
750 i,
751 j,
752 snapshot_last,
753 snapshot_first,
754 expected,
755 expected);
756 status = NV_ERR_INVALID_STATE;
757 goto done;
758 }
759 }
760 }
761
762 if (verbose > 0)
763 UVM_TEST_PRINT("Verification done\n");
764
765 schedule();
766
767 done:
768 // Wait for all the trackers first before freeing up memory as streams
769 // references each other's buffers.
770 for (i = 0; i < num_streams; ++i) {
771 uvm_test_stream_t *stream = &streams[i];
772 uvm_tracker_wait(&stream->tracker);
773 }
774
775 for (i = 0; i < num_streams; ++i) {
776 uvm_test_stream_t *stream = &streams[i];
777 uvm_gpu_semaphore_free(&stream->semaphore);
778 uvm_rm_mem_free(stream->other_stream_counter_snapshots_mem);
779 uvm_rm_mem_free(stream->counter_snapshots_mem);
780 uvm_rm_mem_free(stream->counter_mem);
781 uvm_tracker_deinit(&stream->tracker);
782 uvm_kvfree(stream->other_stream_counter_expected);
783 }
784 uvm_kvfree(streams);
785
786 if (verbose > 0)
787 UVM_TEST_PRINT("Cleanup done\n");
788
789 return status;
790 }
791
792 // The following test is inspired by uvm_push_test.c:test_concurrent_pushes.
793 // This test verifies that concurrent pushes using the same channel pool
794 // select different channels, when the Confidential Computing feature is
795 // enabled.
test_conf_computing_channel_selection(uvm_va_space_t * va_space)796 static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
797 {
798 NV_STATUS status = NV_OK;
799 uvm_channel_pool_t *pool;
800 uvm_push_t *pushes;
801 uvm_gpu_t *gpu;
802 NvU32 i;
803 NvU32 num_pushes;
804
805 if (!g_uvm_global.conf_computing_enabled)
806 return NV_OK;
807
808 uvm_thread_context_lock_disable_tracking();
809
810 for_each_va_space_gpu(gpu, va_space) {
811 uvm_channel_type_t channel_type;
812
813 for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
814 pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
815 TEST_CHECK_RET(pool != NULL);
816
817 // Skip LCIC channels as those can't accept any pushes
818 if (uvm_channel_pool_is_lcic(pool))
819 continue;
820
821 if (pool->num_channels < 2)
822 continue;
823
824 num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
825
826 pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
827 TEST_CHECK_RET(pushes != NULL);
828
829 for (i = 0; i < num_pushes; i++) {
830 uvm_push_t *push = &pushes[i];
831 status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
832 TEST_NV_CHECK_GOTO(status, error);
833 if (i > 0)
834 TEST_CHECK_GOTO(pushes[i-1].channel != push->channel, error);
835 }
836 for (i = 0; i < num_pushes; i++) {
837 uvm_push_t *push = &pushes[i];
838 status = uvm_push_end_and_wait(push);
839 TEST_NV_CHECK_GOTO(status, error);
840 }
841
842 uvm_kvfree(pushes);
843 }
844 }
845
846 uvm_thread_context_lock_enable_tracking();
847
848 return status;
849 error:
850 uvm_thread_context_lock_enable_tracking();
851 uvm_kvfree(pushes);
852
853 return status;
854 }
855
test_channel_iv_rotation(uvm_va_space_t * va_space)856 static NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
857 {
858 uvm_gpu_t *gpu;
859
860 if (!g_uvm_global.conf_computing_enabled)
861 return NV_OK;
862
863 for_each_va_space_gpu(gpu, va_space) {
864 uvm_channel_pool_t *pool;
865
866 uvm_for_each_pool(pool, gpu->channel_manager) {
867 NvU64 before_rotation_enc, before_rotation_dec, after_rotation_enc, after_rotation_dec;
868 NV_STATUS status = NV_OK;
869
870 // Check one (the first) channel per pool
871 uvm_channel_t *channel = pool->channels;
872
873 // Create a dummy encrypt/decrypt push to use few IVs.
874 // SEC2 used encrypt during initialization, no need to use a dummy
875 // push.
876 if (!uvm_channel_is_sec2(channel)) {
877 uvm_push_t push;
878 size_t data_size;
879 uvm_conf_computing_dma_buffer_t *cipher_text;
880 void *cipher_cpu_va, *plain_cpu_va, *tag_cpu_va;
881 uvm_gpu_address_t cipher_gpu_address, plain_gpu_address, tag_gpu_address;
882 uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ? uvm_channel_lcic_get_paired_wlc(channel) : channel;
883
884 plain_cpu_va = &status;
885 data_size = sizeof(status);
886
887 TEST_NV_CHECK_RET(uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool,
888 &cipher_text,
889 NULL));
890 cipher_cpu_va = uvm_mem_get_cpu_addr_kernel(cipher_text->alloc);
891 tag_cpu_va = uvm_mem_get_cpu_addr_kernel(cipher_text->auth_tag);
892
893 cipher_gpu_address = uvm_mem_gpu_address_virtual_kernel(cipher_text->alloc, gpu);
894 tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(cipher_text->auth_tag, gpu);
895
896 TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(work_channel, &push, "Dummy push for IV rotation"), free);
897
898 (void)uvm_push_get_single_inline_buffer(&push,
899 data_size,
900 UVM_CONF_COMPUTING_BUF_ALIGNMENT,
901 &plain_gpu_address);
902
903 uvm_conf_computing_cpu_encrypt(work_channel, cipher_cpu_va, plain_cpu_va, NULL, data_size, tag_cpu_va);
904 gpu->parent->ce_hal->decrypt(&push, plain_gpu_address, cipher_gpu_address, data_size, tag_gpu_address);
905
906 TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), free);
907
908 free:
909 uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, cipher_text, NULL);
910
911 if (status != NV_OK)
912 return status;
913 }
914
915 // Reserve a channel to hold the push lock during rotation
916 if (!uvm_channel_is_lcic(channel))
917 TEST_NV_CHECK_RET(uvm_channel_reserve(channel, 1));
918
919 uvm_conf_computing_query_message_pools(channel, &before_rotation_enc, &before_rotation_dec);
920 TEST_NV_CHECK_GOTO(uvm_conf_computing_rotate_channel_ivs_below_limit(channel, -1, true), release);
921 uvm_conf_computing_query_message_pools(channel, &after_rotation_enc, &after_rotation_dec);
922
923 release:
924 if (!uvm_channel_is_lcic(channel))
925 uvm_channel_release(channel, 1);
926
927 if (status != NV_OK)
928 return status;
929
930 // All channels except SEC2 used at least a single IV to release tracking.
931 // SEC2 doesn't support decrypt direction.
932 if (uvm_channel_is_sec2(channel))
933 TEST_CHECK_RET(before_rotation_dec == after_rotation_dec);
934 else
935 TEST_CHECK_RET(before_rotation_dec < after_rotation_dec);
936
937 // All channels used one CPU encrypt/GPU decrypt, either during
938 // initialization or in the push above, with the exception of LCIC.
939 // LCIC is used in tandem with WLC, but it never uses CPU encrypt/
940 // GPU decrypt ops.
941 if (uvm_channel_is_lcic(channel))
942 TEST_CHECK_RET(before_rotation_enc == after_rotation_enc);
943 else
944 TEST_CHECK_RET(before_rotation_enc < after_rotation_enc);
945 }
946 }
947
948 return NV_OK;
949 }
950
test_write_ctrl_gpfifo_noop(uvm_va_space_t * va_space)951 static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
952 {
953 uvm_gpu_t *gpu;
954
955 for_each_va_space_gpu(gpu, va_space) {
956 uvm_channel_manager_t *manager = gpu->channel_manager;
957 uvm_channel_pool_t *pool;
958
959 uvm_for_each_pool(pool, manager) {
960 uvm_channel_t *channel;
961
962 // Skip LCIC channels as those can't accept any pushes
963 if (uvm_channel_pool_is_lcic(pool))
964 continue;
965
966 // Skip WLC channels as those can't accept ctrl gpfifos
967 // after their schedule is set up
968 if (uvm_channel_pool_is_wlc(pool))
969 continue;
970 uvm_for_each_channel_in_pool(channel, pool) {
971 NvU32 i;
972
973 if (uvm_channel_is_proxy(channel))
974 continue;
975
976 // We submit 8x the channel's GPFIFO entries to force a few
977 // complete loops in the GPFIFO circular buffer.
978 for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
979 NvU64 entry;
980 gpu->parent->host_hal->set_gpfifo_noop(&entry);
981 TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
982 }
983 }
984 }
985 }
986
987 return NV_OK;
988 }
989
test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t * va_space)990 static NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
991 {
992 uvm_gpu_t *gpu;
993
994 for_each_va_space_gpu(gpu, va_space) {
995 uvm_channel_manager_t *manager = gpu->channel_manager;
996 uvm_channel_pool_t *pool;
997
998 uvm_for_each_pool(pool, manager) {
999 uvm_channel_t *channel;
1000
1001 // Skip LCIC channels as those can't accept any pushes
1002 if (uvm_channel_pool_is_lcic(pool))
1003 continue;
1004
1005 // Skip WLC channels as those can't accept ctrl gpfifos
1006 // after their schedule is set up
1007 if (uvm_channel_pool_is_wlc(pool))
1008 continue;
1009 uvm_for_each_channel_in_pool(channel, pool) {
1010 NvU32 i;
1011 uvm_push_t push;
1012
1013 if (uvm_channel_is_proxy(channel))
1014 continue;
1015
1016 // We submit 8x the channel's GPFIFO entries to force a few
1017 // complete loops in the GPFIFO circular buffer.
1018 for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
1019 if (i % 2 == 0) {
1020 NvU64 entry;
1021 gpu->parent->host_hal->set_gpfifo_noop(&entry);
1022 TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
1023 }
1024 else {
1025 TEST_NV_CHECK_RET(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl and push test"));
1026 uvm_push_end(&push);
1027 }
1028 }
1029
1030 TEST_NV_CHECK_RET(uvm_push_wait(&push));
1031 }
1032 }
1033 }
1034
1035 return NV_OK;
1036 }
1037
test_write_ctrl_gpfifo_tight(uvm_va_space_t * va_space)1038 static NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
1039 {
1040 NV_STATUS status = NV_OK;
1041 uvm_gpu_t *gpu;
1042 uvm_channel_t *channel;
1043 uvm_rm_mem_t *mem;
1044 NvU32 *cpu_ptr;
1045 NvU64 gpu_va;
1046 NvU32 i;
1047 NvU64 entry;
1048 uvm_push_t push;
1049
1050 // TODO: Bug 3839176: the test is waived on Confidential Computing because
1051 // it assumes that GPU can access system memory without using encryption.
1052 if (g_uvm_global.conf_computing_enabled)
1053 return NV_OK;
1054
1055 for_each_va_space_gpu(gpu, va_space) {
1056 uvm_channel_manager_t *manager = gpu->channel_manager;
1057
1058 TEST_NV_CHECK_RET(uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(*cpu_ptr), 0, &mem));
1059 cpu_ptr = uvm_rm_mem_get_cpu_va(mem);
1060 gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu);
1061
1062 *cpu_ptr = 0;
1063
1064 // This semaphore acquire takes 1 GPFIFO entries.
1065 TEST_NV_CHECK_GOTO(uvm_push_begin(manager, UVM_CHANNEL_TYPE_GPU_TO_GPU, &push, "gpfifo ctrl tight test acq"),
1066 error);
1067
1068 channel = push.channel;
1069 UVM_ASSERT(!uvm_channel_is_proxy(channel));
1070
1071 gpu->parent->host_hal->semaphore_acquire(&push, gpu_va, 1);
1072 uvm_push_end(&push);
1073
1074 // Flush all completed entries from the GPFIFO ring buffer. This test
1075 // requires this flush because we verify (below with
1076 // uvm_channel_get_available_gpfifo_entries) the number of free entries
1077 // in the channel.
1078 uvm_channel_update_progress_all(channel);
1079
1080 // Populate the remaining GPFIFO entries, leaving 2 slots available.
1081 // 2 available entries + 1 semaphore acquire (above) + 1 spare entry to
1082 // indicate a terminal condition for the GPFIFO ringbuffer, therefore we
1083 // push num_gpfifo_entries-4.
1084 for (i = 0; i < channel->num_gpfifo_entries - 4; i++) {
1085 TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl tight test populate"), error);
1086 uvm_push_end(&push);
1087 }
1088
1089 TEST_CHECK_GOTO(uvm_channel_get_available_gpfifo_entries(channel) == 2, error);
1090
1091 // We should have room for the control GPFIFO and the subsequent
1092 // semaphore release.
1093 gpu->parent->host_hal->set_gpfifo_noop(&entry);
1094 TEST_NV_CHECK_GOTO(uvm_channel_write_ctrl_gpfifo(channel, entry), error);
1095
1096 // Release the semaphore.
1097 UVM_WRITE_ONCE(*cpu_ptr, 1);
1098
1099 TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
1100
1101 uvm_rm_mem_free(mem);
1102 }
1103
1104 return NV_OK;
1105
1106 error:
1107 uvm_rm_mem_free(mem);
1108
1109 return status;
1110 }
1111
1112 // This test is inspired by the test_rc (above).
1113 // The test recreates the GPU's channel manager forcing its pushbuffer to be
1114 // mapped on a non-zero 1TB segment. This exercises work submission from
1115 // pushbuffers whose VAs are greater than 1TB.
test_channel_pushbuffer_extension_base(uvm_va_space_t * va_space)1116 static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space)
1117 {
1118 uvm_gpu_t *gpu;
1119 NV_STATUS status = NV_OK;
1120
1121 uvm_assert_mutex_locked(&g_uvm_global.global_lock);
1122
1123 for_each_va_space_gpu(gpu, va_space) {
1124 uvm_channel_manager_t *manager;
1125 uvm_channel_pool_t *pool;
1126
1127 if (!uvm_parent_gpu_needs_pushbuffer_segments(gpu->parent))
1128 continue;
1129
1130 // The GPU channel manager pushbuffer is destroyed and then re-created
1131 // after testing a non-zero pushbuffer extension base, so this test
1132 // requires exclusive access to the GPU.
1133 TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
1134
1135 gpu->uvm_test_force_upper_pushbuffer_segment = 1;
1136 uvm_channel_manager_destroy(gpu->channel_manager);
1137 TEST_NV_CHECK_GOTO(uvm_channel_manager_create(gpu, &gpu->channel_manager), error);
1138 gpu->uvm_test_force_upper_pushbuffer_segment = 0;
1139
1140 manager = gpu->channel_manager;
1141 TEST_CHECK_GOTO(uvm_pushbuffer_get_gpu_va_base(manager->pushbuffer) >= (1ull << 40), error);
1142
1143 // Submit a few pushes with the recently allocated
1144 // channel_manager->pushbuffer.
1145 uvm_for_each_pool(pool, manager) {
1146 uvm_channel_t *channel;
1147
1148 // Skip LCIC channels as those can't accept any pushes
1149 if (uvm_channel_pool_is_lcic(pool))
1150 continue;
1151 uvm_for_each_channel_in_pool(channel, pool) {
1152 NvU32 i;
1153 uvm_push_t push;
1154
1155 for (i = 0; i < channel->num_gpfifo_entries; i++) {
1156 TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "pushbuffer extension push test"),
1157 error);
1158 uvm_push_end(&push);
1159 }
1160
1161 TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
1162 }
1163 }
1164 }
1165
1166 return NV_OK;
1167
1168 error:
1169 gpu->uvm_test_force_upper_pushbuffer_segment = 0;
1170
1171 return status;
1172 }
1173
uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS * params,struct file * filp)1174 NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct file *filp)
1175 {
1176 NV_STATUS status;
1177 uvm_va_space_t *va_space = uvm_va_space_get(filp);
1178
1179 uvm_mutex_lock(&g_uvm_global.global_lock);
1180 uvm_va_space_down_read_rm(va_space);
1181
1182 status = test_ordering(va_space);
1183 if (status != NV_OK)
1184 goto done;
1185
1186 status = test_write_ctrl_gpfifo_noop(va_space);
1187 if (status != NV_OK)
1188 goto done;
1189
1190 status = test_write_ctrl_gpfifo_and_pushes(va_space);
1191 if (status != NV_OK)
1192 goto done;
1193
1194 status = test_write_ctrl_gpfifo_tight(va_space);
1195 if (status != NV_OK)
1196 goto done;
1197
1198 status = test_conf_computing_channel_selection(va_space);
1199 if (status != NV_OK)
1200 goto done;
1201
1202 status = test_channel_iv_rotation(va_space);
1203 if (status != NV_OK)
1204 goto done;
1205
1206 // The following tests have side effects, they reset the GPU's
1207 // channel_manager.
1208 status = test_channel_pushbuffer_extension_base(va_space);
1209 if (status != NV_OK)
1210 goto done;
1211
1212 g_uvm_global.disable_fatal_error_assert = true;
1213 uvm_release_asserts_set_global_error_for_tests = true;
1214 status = test_unexpected_completed_values(va_space);
1215 uvm_release_asserts_set_global_error_for_tests = false;
1216 g_uvm_global.disable_fatal_error_assert = false;
1217 if (status != NV_OK)
1218 goto done;
1219
1220 if (g_uvm_global.num_simulated_devices == 0) {
1221 status = test_rc(va_space);
1222 if (status != NV_OK)
1223 goto done;
1224 }
1225
1226 status = test_iommu(va_space);
1227 if (status != NV_OK)
1228 goto done;
1229
1230 done:
1231 uvm_va_space_up_read_rm(va_space);
1232 uvm_mutex_unlock(&g_uvm_global.global_lock);
1233
1234 return status;
1235 }
1236
uvm_test_channel_stress_stream(uvm_va_space_t * va_space,const UVM_TEST_CHANNEL_STRESS_PARAMS * params)1237 static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
1238 const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
1239 {
1240 NV_STATUS status = NV_OK;
1241
1242 if (params->iterations == 0 || params->num_streams == 0)
1243 return NV_ERR_INVALID_PARAMETER;
1244
1245 // TODO: Bug 3839176: the test is waived on Confidential Computing because
1246 // it assumes that GPU can access system memory without using encryption.
1247 if (g_uvm_global.conf_computing_enabled)
1248 return NV_OK;
1249
1250 // TODO: Bug 1764963: Rework the test to not rely on the global lock as that
1251 // serializes all the threads calling this at the same time.
1252 uvm_mutex_lock(&g_uvm_global.global_lock);
1253 uvm_va_space_down_read_rm(va_space);
1254
1255 status = stress_test_all_gpus_in_va(va_space,
1256 params->num_streams,
1257 params->iterations,
1258 params->seed,
1259 params->verbose);
1260
1261 uvm_va_space_up_read_rm(va_space);
1262 uvm_mutex_unlock(&g_uvm_global.global_lock);
1263
1264 return status;
1265 }
1266
uvm_test_channel_stress_update_channels(uvm_va_space_t * va_space,const UVM_TEST_CHANNEL_STRESS_PARAMS * params)1267 static NV_STATUS uvm_test_channel_stress_update_channels(uvm_va_space_t *va_space,
1268 const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
1269 {
1270 NV_STATUS status = NV_OK;
1271 uvm_test_rng_t rng;
1272 NvU32 i;
1273
1274 uvm_test_rng_init(&rng, params->seed);
1275
1276 uvm_va_space_down_read(va_space);
1277
1278 for (i = 0; i < params->iterations; ++i) {
1279 uvm_gpu_t *gpu = random_va_space_gpu(&rng, va_space);
1280 uvm_channel_manager_update_progress(gpu->channel_manager);
1281
1282 if (fatal_signal_pending(current)) {
1283 status = NV_ERR_SIGNAL_PENDING;
1284 goto done;
1285 }
1286 }
1287
1288 done:
1289 uvm_va_space_up_read(va_space);
1290
1291 return status;
1292 }
1293
uvm_test_channel_noop_push(uvm_va_space_t * va_space,const UVM_TEST_CHANNEL_STRESS_PARAMS * params)1294 static NV_STATUS uvm_test_channel_noop_push(uvm_va_space_t *va_space,
1295 const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
1296 {
1297 NV_STATUS status = NV_OK;
1298 uvm_push_t push;
1299 uvm_test_rng_t rng;
1300 uvm_gpu_t *gpu;
1301 NvU32 i;
1302
1303 uvm_test_rng_init(&rng, params->seed);
1304
1305 uvm_va_space_down_read(va_space);
1306
1307 for (i = 0; i < params->iterations; ++i) {
1308 uvm_channel_type_t channel_type = random_ce_channel_type(&rng);
1309 gpu = random_va_space_gpu(&rng, va_space);
1310
1311 status = uvm_push_begin(gpu->channel_manager, channel_type, &push, "noop push");
1312 if (status != NV_OK)
1313 goto done;
1314
1315 // Push an actual noop method so that the push doesn't get optimized
1316 // away if we ever detect empty pushes.
1317 gpu->parent->host_hal->noop(&push, UVM_METHOD_SIZE);
1318
1319 uvm_push_end(&push);
1320
1321 if (fatal_signal_pending(current)) {
1322 status = NV_ERR_SIGNAL_PENDING;
1323 goto done;
1324 }
1325 }
1326 if (params->verbose > 0)
1327 UVM_TEST_PRINT("Noop pushes: completed %u pushes seed: %u\n", i, params->seed);
1328
1329 for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
1330 NV_STATUS wait_status = uvm_channel_manager_wait(gpu->channel_manager);
1331 if (status == NV_OK)
1332 status = wait_status;
1333 }
1334
1335 done:
1336 uvm_va_space_up_read(va_space);
1337
1338 return status;
1339 }
1340
uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS * params,struct file * filp)1341 NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
1342 {
1343 uvm_va_space_t *va_space = uvm_va_space_get(filp);
1344
1345 switch (params->mode) {
1346 case UVM_TEST_CHANNEL_STRESS_MODE_STREAM:
1347 return uvm_test_channel_stress_stream(va_space, params);
1348 case UVM_TEST_CHANNEL_STRESS_MODE_UPDATE_CHANNELS:
1349 return uvm_test_channel_stress_update_channels(va_space, params);
1350 case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
1351 return uvm_test_channel_noop_push(va_space, params);
1352 default:
1353 return NV_ERR_INVALID_PARAMETER;
1354 }
1355 }
1356