1 /*******************************************************************************
2     Copyright (c) 2015-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_global.h"
25 #include "uvm_channel.h"
26 #include "uvm_hal.h"
27 #include "uvm_mem.h"
28 #include "uvm_push.h"
29 #include "uvm_test.h"
30 #include "uvm_test_rng.h"
31 #include "uvm_va_space.h"
32 #include "uvm_tracker.h"
33 #include "uvm_thread_context.h"
34 #include "uvm_gpu_semaphore.h"
35 #include "uvm_kvmalloc.h"
36 
37 #define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU     1024
38 #define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU 64
39 
40 // Schedule pushes one after another on all GPUs and channel types that copy and
41 // increment a counter into an adjacent memory location in a buffer. And then
42 // verify that all the values are correct on the CPU.
test_ordering(uvm_va_space_t * va_space)43 static NV_STATUS test_ordering(uvm_va_space_t *va_space)
44 {
45     NV_STATUS status;
46     uvm_gpu_t *gpu;
47     bool exclude_proxy_channel_type;
48     NvU32 i, j;
49     uvm_rm_mem_t *mem = NULL;
50     NvU32 *host_mem;
51     uvm_push_t push;
52     NvU64 gpu_va;
53     uvm_tracker_t tracker = UVM_TRACKER_INIT();
54     NvU32 value = 0;
55     const NvU32 iters_per_channel_type_per_gpu = g_uvm_global.num_simulated_devices > 0 ?
56                                                      TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU :
57                                                      TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU;
58     const NvU32 values_count = iters_per_channel_type_per_gpu;
59     const size_t buffer_size = sizeof(NvU32) * values_count;
60 
61     // TODO: Bug 3839176: the test is waived on Confidential Computing because
62     // it assumes that GPU can access system memory without using encryption.
63     if (g_uvm_global.conf_computing_enabled)
64         return NV_OK;
65 
66     gpu = uvm_va_space_find_first_gpu(va_space);
67     TEST_CHECK_RET(gpu != NULL);
68 
69     status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
70     TEST_CHECK_GOTO(status == NV_OK, done);
71 
72     host_mem = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
73     memset(host_mem, 0, buffer_size);
74 
75     status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Initial memset");
76     TEST_CHECK_GOTO(status == NV_OK, done);
77 
78     gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;
79 
80     // Semaphore release as part of uvm_push_end() will do the membar
81     uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
82     gpu->parent->ce_hal->memset_v_4(&push, gpu_va, 0, buffer_size);
83 
84     uvm_push_end(&push);
85 
86     TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);
87 
88     exclude_proxy_channel_type = uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent);
89 
90     for (i = 0; i < iters_per_channel_type_per_gpu; ++i) {
91         for (j = 0; j < UVM_CHANNEL_TYPE_CE_COUNT; ++j) {
92             uvm_channel_type_t channel_type = j;
93 
94             // Proxy channels don't support the virtual memcopies that are about
95             // to be pushed, so don't test the proxy channel type in any of the
96             // GPUs.
97             if (exclude_proxy_channel_type && (channel_type == uvm_channel_proxy_channel_type()))
98                 continue;
99 
100             for_each_va_space_gpu(gpu, va_space) {
101                 NvU64 gpu_va_base;
102                 NvU64 gpu_va_src;
103                 NvU64 gpu_va_dst;
104 
105                 status = uvm_push_begin_acquire(gpu->channel_manager,
106                                                 channel_type,
107                                                 &tracker,
108                                                 &push,
109                                                 "memcpy and inc to %u",
110                                                 value + 1);
111                 TEST_CHECK_GOTO(status == NV_OK, done);
112 
113                 gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;
114                 gpu_va_src = gpu_va_base + (value % values_count) * sizeof(NvU32);
115                 gpu_va_dst = gpu_va_base + ((value + 1) % values_count) * sizeof(NvU32);
116 
117                 // The semaphore reduction will do a membar before the reduction
118                 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
119                 gpu->parent->ce_hal->memcopy_v_to_v(&push, gpu_va_dst, gpu_va_src, sizeof(NvU32));
120 
121                 // The following reduction is done from the same GPU, but the
122                 // previous memcpy is to uncached sysmem and that bypasses L2
123                 // and hence requires a SYSMEMBAR to be ordered.
124                 gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va_dst, ++value);
125 
126                 uvm_push_end(&push);
127 
128                 uvm_tracker_clear(&tracker);
129                 TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);
130             }
131         }
132     }
133     status = uvm_tracker_wait(&tracker);
134     TEST_CHECK_GOTO(status == NV_OK, done);
135 
136     // At this moment, this should hold:
137     // mem[value % values_count] == value
138     // mem[(value + 1) % values_count]  == value + 1 - values_count
139     // And in general, for i=[0, values_count):
140     // mem[(value + 1 + i) % values_count]  == value + 1 - values_count + i
141     // Verify that
142 
143     for (i = 0; i < values_count; ++i) {
144         NvU32 index = (value + 1 + i) % values_count;
145         NvU32 expected = (value + 1 + i) - values_count;
146         if (host_mem[index] != expected) {
147             UVM_TEST_PRINT("Bad value at host_mem[%u] = %u instead of %u\n", index, host_mem[index], expected);
148             status = NV_ERR_INVALID_STATE;
149             goto done;
150         }
151     }
152 
153 done:
154     uvm_tracker_wait(&tracker);
155     uvm_rm_mem_free(mem);
156 
157     return status;
158 }
159 
test_unexpected_completed_values(uvm_va_space_t * va_space)160 static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
161 {
162     uvm_gpu_t *gpu;
163 
164     for_each_va_space_gpu(gpu, va_space) {
165         uvm_channel_t *channel;
166         NvU64 completed_value;
167 
168         // The GPU channel manager is destroyed and then re-created after
169         // the test, so this test requires exclusive access to the GPU.
170         TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
171 
172         channel = &gpu->channel_manager->channel_pools[0].channels[0];
173         completed_value = uvm_channel_update_completed_value(channel);
174         uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1);
175 
176         TEST_NV_CHECK_RET(uvm_global_get_status());
177         uvm_channel_update_progress_all(channel);
178         TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
179 
180         uvm_channel_manager_destroy(gpu->channel_manager);
181 
182         // Destruction will hit the error again, so clear one more time.
183         uvm_global_reset_fatal_error();
184 
185         TEST_NV_CHECK_RET(uvm_channel_manager_create(gpu, &gpu->channel_manager));
186     }
187 
188     return NV_OK;
189 }
190 
uvm_test_rc_for_gpu(uvm_gpu_t * gpu)191 static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
192 {
193     uvm_push_t push;
194     uvm_channel_pool_t *pool;
195     uvm_gpfifo_entry_t *fatal_entry;
196     uvm_push_info_t *push_info;
197     int fatal_line;
198     uvm_tracker_entry_t tracker_entry;
199     NV_STATUS status;
200     uvm_tracker_t tracker = UVM_TRACKER_INIT();
201     uvm_channel_manager_t *manager = gpu->channel_manager;
202 
203     // Submit a bunch of successful pushes on each channel first so that the
204     // fatal one is behind a bunch of work (notably more than
205     // uvm_channel_update_progress() completes by default).
206     uvm_for_each_pool(pool, manager) {
207         uvm_channel_t *channel;
208 
209             // Skip LCIC channels as those can't accept any pushes
210             if (uvm_channel_pool_is_lcic(pool))
211                 continue;
212         uvm_for_each_channel_in_pool(channel, pool) {
213             NvU32 i;
214             for (i = 0; i < 512; ++i) {
215                 status = uvm_push_begin_on_channel(channel, &push, "Non-faulting push");
216                 TEST_CHECK_RET(status == NV_OK);
217 
218                 uvm_push_end(&push);
219             }
220         }
221     }
222 
223     // Check RC on a proxy channel (SR-IOV heavy) or internal channel (any other
224     // mode). It is not allowed to use a virtual address in a memset pushed to
225     // a proxy channel, so we use a physical address instead.
226     if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent)) {
227         uvm_gpu_address_t dst_address;
228 
229         // Save the line number the push that's supposed to fail was started on
230         fatal_line = __LINE__ + 1;
231         TEST_NV_CHECK_RET(uvm_push_begin(manager, uvm_channel_proxy_channel_type(), &push, "Fatal push 0x%X", 0xBAD));
232 
233         // Memset targeting a physical address beyond the vidmem size. The
234         // passed physical address is not the vidmem size reported by RM
235         // because the reported size can be smaller than the actual physical
236         // size, such that accessing a GPA at the reported size may be allowed
237         // by VMMU.
238         //
239         // GA100 GPUs have way less than UVM_GPU_MAX_PHYS_MEM vidmem, so using
240         // that value as physical address should result on an error
241         dst_address = uvm_gpu_address_physical(UVM_APERTURE_VID, UVM_GPU_MAX_PHYS_MEM - 8);
242         gpu->parent->ce_hal->memset_8(&push, dst_address, 0, 8);
243     }
244     else {
245         fatal_line = __LINE__ + 1;
246         TEST_NV_CHECK_RET(uvm_push_begin(manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Fatal push 0x%X", 0xBAD));
247 
248         // Memset that should fault on 0xFFFFFFFF
249         gpu->parent->ce_hal->memset_v_4(&push, 0xFFFFFFFF, 0, 4);
250     }
251 
252     uvm_push_end(&push);
253 
254     uvm_push_get_tracker_entry(&push, &tracker_entry);
255     uvm_tracker_overwrite_with_push(&tracker, &push);
256 
257     status = uvm_channel_manager_wait(manager);
258     TEST_CHECK_RET(status == NV_ERR_RC_ERROR);
259 
260     TEST_CHECK_RET(uvm_channel_get_status(push.channel) == NV_ERR_RC_ERROR);
261     fatal_entry = uvm_channel_get_fatal_entry(push.channel);
262     TEST_CHECK_RET(fatal_entry != NULL);
263 
264     push_info = fatal_entry->push_info;
265     TEST_CHECK_RET(push_info != NULL);
266     TEST_CHECK_RET(push_info->line == fatal_line);
267     TEST_CHECK_RET(strcmp(push_info->function, __FUNCTION__) == 0);
268     TEST_CHECK_RET(strcmp(push_info->filename, kbasename(__FILE__)) == 0);
269     if (uvm_push_info_is_tracking_descriptions())
270         TEST_CHECK_RET(strcmp(push_info->description, "Fatal push 0xBAD") == 0);
271 
272     TEST_CHECK_RET(uvm_global_get_status() == NV_ERR_RC_ERROR);
273 
274     // Check that waiting for an entry after a global fatal error makes the
275     // entry completed.
276     TEST_CHECK_RET(!uvm_tracker_is_entry_completed(&tracker_entry));
277     TEST_CHECK_RET(uvm_tracker_wait_for_entry(&tracker_entry) == NV_ERR_RC_ERROR);
278     TEST_CHECK_RET(uvm_tracker_is_entry_completed(&tracker_entry));
279 
280     // Check that waiting for a tracker after a global fatal error, clears all
281     // the entries from the tracker.
282     TEST_CHECK_RET(!uvm_tracker_is_empty(&tracker));
283     TEST_CHECK_RET(uvm_tracker_wait(&tracker) == NV_ERR_RC_ERROR);
284     TEST_CHECK_RET(uvm_tracker_is_empty(&tracker));
285 
286     TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_RC_ERROR);
287 
288     return NV_OK;
289 }
290 
test_rc(uvm_va_space_t * va_space)291 static NV_STATUS test_rc(uvm_va_space_t *va_space)
292 {
293     uvm_gpu_t *gpu;
294 
295     uvm_assert_mutex_locked(&g_uvm_global.global_lock);
296 
297     for_each_va_space_gpu(gpu, va_space) {
298         NV_STATUS test_status, create_status;
299 
300         // The GPU channel manager is destroyed and then re-created after
301         // testing RC, so this test requires exclusive access to the GPU.
302         TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
303 
304         g_uvm_global.disable_fatal_error_assert = true;
305         test_status = uvm_test_rc_for_gpu(gpu);
306         g_uvm_global.disable_fatal_error_assert = false;
307 
308         uvm_channel_manager_destroy(gpu->channel_manager);
309         create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
310 
311         TEST_NV_CHECK_RET(test_status);
312         TEST_NV_CHECK_RET(create_status);
313     }
314 
315     return NV_OK;
316 }
317 
uvm_test_iommu_rc_for_gpu(uvm_gpu_t * gpu)318 static NV_STATUS uvm_test_iommu_rc_for_gpu(uvm_gpu_t *gpu)
319 {
320     NV_STATUS status = NV_OK;
321 
322 #if defined(NV_IOMMU_IS_DMA_DOMAIN_PRESENT) && defined(CONFIG_IOMMU_DEFAULT_DMA_STRICT)
323     // This test needs the DMA API to immediately invalidate IOMMU mappings on
324     // DMA unmap (as apposed to lazy invalidation). The policy can be changed
325     // on boot (e.g. iommu.strict=1), but there isn't a good way to check for
326     // the runtime setting. CONFIG_IOMMU_DEFAULT_DMA_STRICT checks for the
327     // default value.
328 
329     uvm_push_t push;
330     uvm_mem_t *sysmem;
331     uvm_gpu_address_t sysmem_dma_addr;
332     char *cpu_ptr = NULL;
333     const size_t data_size = PAGE_SIZE;
334     size_t i;
335 
336     struct device *dev = &gpu->parent->pci_dev->dev;
337     struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
338 
339     // Check that the iommu domain is controlled by linux DMA API
340     if (!domain || !iommu_is_dma_domain(domain))
341         return NV_OK;
342 
343     // Only run if ATS is enabled with 64kB base page.
344     // Otherwise the CE doesn't get response on writing to unmapped location.
345     if (!g_uvm_global.ats.enabled || PAGE_SIZE != UVM_PAGE_SIZE_64K)
346         return NV_OK;
347 
348     status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(data_size, NULL, &sysmem);
349     TEST_NV_CHECK_RET(status);
350 
351     status = uvm_mem_map_gpu_phys(sysmem, gpu);
352     TEST_NV_CHECK_GOTO(status, done);
353 
354     cpu_ptr = uvm_mem_get_cpu_addr_kernel(sysmem);
355     sysmem_dma_addr = uvm_mem_gpu_address_physical(sysmem, gpu, 0, data_size);
356 
357     status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset to IOMMU mapped sysmem");
358     TEST_NV_CHECK_GOTO(status, done);
359 
360     gpu->parent->ce_hal->memset_8(&push, sysmem_dma_addr, 0, data_size);
361 
362     status = uvm_push_end_and_wait(&push);
363     TEST_NV_CHECK_GOTO(status, done);
364 
365     // Check that we have zeroed the memory
366     for (i = 0; i < data_size; ++i)
367         TEST_CHECK_GOTO(cpu_ptr[i] == 0, done);
368 
369     // Unmap the buffer and try write again to the same address
370     uvm_mem_unmap_gpu_phys(sysmem, gpu);
371 
372     status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset after IOMMU unmap");
373     TEST_NV_CHECK_GOTO(status, done);
374 
375     gpu->parent->ce_hal->memset_4(&push, sysmem_dma_addr, 0xffffffff, data_size);
376 
377     status = uvm_push_end_and_wait(&push);
378 
379     TEST_CHECK_GOTO(status == NV_ERR_RC_ERROR, done);
380     TEST_CHECK_GOTO(uvm_channel_get_status(push.channel) == NV_ERR_RC_ERROR, done);
381     TEST_CHECK_GOTO(uvm_global_reset_fatal_error() == NV_ERR_RC_ERROR, done);
382 
383     // Check that writes after unmap did not succeed
384     for (i = 0; i < data_size; ++i)
385         TEST_CHECK_GOTO(cpu_ptr[i] == 0, done);
386 
387     status = NV_OK;
388 
389 done:
390     uvm_mem_free(sysmem);
391 #endif
392     return status;
393 }
394 
test_iommu(uvm_va_space_t * va_space)395 static NV_STATUS test_iommu(uvm_va_space_t *va_space)
396 {
397     uvm_gpu_t *gpu;
398 
399     uvm_assert_mutex_locked(&g_uvm_global.global_lock);
400 
401     for_each_va_space_gpu(gpu, va_space) {
402         NV_STATUS test_status, create_status;
403 
404         // The GPU channel manager is destroyed and then re-created after
405         // testing ATS RC fault, so this test requires exclusive access to the GPU.
406         TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
407 
408         g_uvm_global.disable_fatal_error_assert = true;
409         test_status = uvm_test_iommu_rc_for_gpu(gpu);
410         g_uvm_global.disable_fatal_error_assert = false;
411 
412         uvm_channel_manager_destroy(gpu->channel_manager);
413         create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
414 
415         TEST_NV_CHECK_RET(test_status);
416         TEST_NV_CHECK_RET(create_status);
417     }
418 
419     return NV_OK;
420 }
421 
422 typedef struct
423 {
424     uvm_push_t push;
425     uvm_tracker_t tracker;
426     uvm_gpu_semaphore_t semaphore;
427     NvU32 queued_counter_value;
428     NvU32 queued_counter_repeat;
429     uvm_rm_mem_t *counter_mem;
430     uvm_rm_mem_t *counter_snapshots_mem;
431     uvm_rm_mem_t *other_stream_counter_snapshots_mem;
432     NvU32 *counter_snapshots;
433     NvU32 *other_stream_counter_snapshots;
434     NvU32 *other_stream_counter_expected;
435 } uvm_test_stream_t;
436 
437 #define MAX_COUNTER_REPEAT_COUNT 10 * 1024
438 // For each iter, snapshot the first and last counter value
439 #define TEST_SNAPSHOT_SIZE(it) (2 * it * sizeof(NvU32))
440 
snapshot_counter(uvm_push_t * push,uvm_rm_mem_t * counter_mem,uvm_rm_mem_t * snapshot_mem,NvU32 index,NvU32 counters_count)441 static void snapshot_counter(uvm_push_t *push,
442                              uvm_rm_mem_t *counter_mem,
443                              uvm_rm_mem_t *snapshot_mem,
444                              NvU32 index,
445                              NvU32 counters_count)
446 {
447     uvm_gpu_t *gpu = uvm_push_get_gpu(push);
448     NvU64 counter_gpu_va;
449     NvU64 snapshot_gpu_va;
450     bool is_proxy_channel;
451     NvU32 last_counter_offset = (counters_count - 1) * sizeof(NvU32);
452 
453     if (counters_count == 0)
454         return;
455 
456     is_proxy_channel = uvm_channel_is_proxy(push->channel);
457     counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;
458     snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel).address + index * 2 * sizeof(NvU32);
459 
460     // Copy the last and first counter to a snapshot for later verification.
461 
462     // Membar will be done by uvm_push_end()
463     uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
464     uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
465     gpu->parent->ce_hal->memcopy_v_to_v(push,
466                                         snapshot_gpu_va + sizeof(NvU32),
467                                         counter_gpu_va + last_counter_offset,
468                                         sizeof(NvU32));
469 
470     // Membar will be done by uvm_push_end()
471     uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
472     uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
473     gpu->parent->ce_hal->memcopy_v_to_v(push, snapshot_gpu_va, counter_gpu_va, sizeof(NvU32));
474 }
475 
set_counter(uvm_push_t * push,uvm_rm_mem_t * counter_mem,NvU32 value,NvU32 count)476 static void set_counter(uvm_push_t *push, uvm_rm_mem_t *counter_mem, NvU32 value, NvU32 count)
477 {
478     uvm_gpu_t *gpu = uvm_push_get_gpu(push);
479     NvU64 counter_gpu_va;
480     bool is_proxy_channel;
481 
482     is_proxy_channel = uvm_channel_is_proxy(push->channel);
483     counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;
484 
485     gpu->parent->ce_hal->memset_v_4(push, counter_gpu_va, value, count * sizeof(NvU32));
486 }
487 
random_ce_channel_type(uvm_test_rng_t * rng)488 static uvm_channel_type_t random_ce_channel_type(uvm_test_rng_t *rng)
489 {
490     return (uvm_channel_type_t)uvm_test_rng_range_32(rng, 0, UVM_CHANNEL_TYPE_CE_COUNT - 1);
491 }
492 
random_ce_channel_type_except(uvm_test_rng_t * rng,uvm_channel_type_t exception)493 static uvm_channel_type_t random_ce_channel_type_except(uvm_test_rng_t *rng, uvm_channel_type_t exception)
494 {
495     uvm_channel_type_t channel_type;
496 
497     UVM_ASSERT(exception < UVM_CHANNEL_TYPE_CE_COUNT);
498 
499     channel_type = (uvm_channel_type_t)uvm_test_rng_range_32(rng, 0, UVM_CHANNEL_TYPE_CE_COUNT - 2);
500 
501     if (channel_type >= exception)
502         channel_type++;
503 
504     UVM_ASSERT(channel_type < UVM_CHANNEL_TYPE_CE_COUNT);
505 
506     return channel_type;
507 }
508 
gpu_random_internal_ce_channel_type(uvm_gpu_t * gpu,uvm_test_rng_t * rng)509 static uvm_channel_type_t gpu_random_internal_ce_channel_type(uvm_gpu_t *gpu, uvm_test_rng_t *rng)
510 {
511     if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent))
512         return random_ce_channel_type_except(rng, uvm_channel_proxy_channel_type());
513 
514     return random_ce_channel_type(rng);
515 }
516 
random_va_space_gpu(uvm_test_rng_t * rng,uvm_va_space_t * va_space)517 static uvm_gpu_t *random_va_space_gpu(uvm_test_rng_t *rng, uvm_va_space_t *va_space)
518 {
519     uvm_gpu_t *gpu;
520     NvU32 gpu_count = uvm_processor_mask_get_gpu_count(&va_space->registered_gpus);
521     NvU32 gpu_index = uvm_test_rng_range_32(rng, 0, gpu_count - 1);
522 
523     UVM_ASSERT(gpu_count > 0);
524 
525     for_each_va_space_gpu(gpu, va_space) {
526         if (gpu_index-- == 0)
527             return gpu;
528     }
529 
530     UVM_ASSERT(0);
531     return NULL;
532 }
533 
534 
test_memset_rm_mem(uvm_push_t * push,uvm_rm_mem_t * rm_mem,NvU32 value)535 static void test_memset_rm_mem(uvm_push_t *push, uvm_rm_mem_t *rm_mem, NvU32 value)
536 {
537     uvm_gpu_t *gpu;
538     NvU64 gpu_va;
539 
540     UVM_ASSERT(rm_mem->size % 4 == 0);
541 
542     gpu = uvm_push_get_gpu(push);
543     gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel)).address;
544 
545     gpu->parent->ce_hal->memset_v_4(push, gpu_va, value, rm_mem->size);
546 }
547 
548 // This test schedules a randomly sized memset on a random channel and GPU in a
549 // "stream" that has operations ordered by acquiring the tracker of the previous
550 // operation. It also snapshots the memset done by the previous operation in the
551 // stream to verify it later on the CPU. Each iteration also optionally acquires
552 // a different stream and snapshots its memset.
553 // The test ioctl is expected to be called at the same time from multiple
554 // threads and contains some schedule() calls to help get as many threads
555 // through the init phase before other threads continue. It also has a random
556 // schedule() call in the main loop scheduling GPU work.
stress_test_all_gpus_in_va(uvm_va_space_t * va_space,NvU32 num_streams,NvU32 iterations_per_stream,NvU32 seed,NvU32 verbose)557 static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
558                                             NvU32 num_streams,
559                                             NvU32 iterations_per_stream,
560                                             NvU32 seed,
561                                             NvU32 verbose)
562 {
563     NV_STATUS status = NV_OK;
564     uvm_gpu_t *gpu;
565     NvU32 i, j;
566     uvm_test_stream_t *streams;
567     uvm_test_rng_t rng;
568 
569     uvm_test_rng_init(&rng, seed);
570 
571     gpu = uvm_va_space_find_first_gpu(va_space);
572     TEST_CHECK_RET(gpu != NULL);
573 
574     streams = uvm_kvmalloc_zero(sizeof(*streams) * num_streams);
575     TEST_CHECK_RET(streams != NULL);
576 
577     // Initialize all the trackers first so that clean up on error can always
578     // wait for them.
579     for (i = 0; i < num_streams; ++i) {
580         uvm_test_stream_t *stream = &streams[i];
581         uvm_tracker_init(&stream->tracker);
582     }
583 
584     for (i = 0; i < num_streams; ++i) {
585         uvm_test_stream_t *stream = &streams[i];
586 
587         status = uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &stream->semaphore);
588         if (status != NV_OK)
589             goto done;
590 
591         stream->queued_counter_value = 0;
592 
593         status = uvm_rm_mem_alloc_and_map_all(gpu,
594                                               UVM_RM_MEM_TYPE_SYS,
595                                               MAX_COUNTER_REPEAT_COUNT * sizeof(NvU32),
596                                               0,
597                                               &stream->counter_mem);
598         TEST_CHECK_GOTO(status == NV_OK, done);
599 
600         status = uvm_rm_mem_alloc_and_map_all(gpu,
601                                               UVM_RM_MEM_TYPE_SYS,
602                                               TEST_SNAPSHOT_SIZE(iterations_per_stream),
603                                               0,
604                                               &stream->counter_snapshots_mem);
605         TEST_CHECK_GOTO(status == NV_OK, done);
606 
607         stream->counter_snapshots = (NvU32*)uvm_rm_mem_get_cpu_va(stream->counter_snapshots_mem);
608 
609         status = uvm_rm_mem_alloc_and_map_all(gpu,
610                                               UVM_RM_MEM_TYPE_SYS,
611                                               TEST_SNAPSHOT_SIZE(iterations_per_stream),
612                                               0,
613                                               &stream->other_stream_counter_snapshots_mem);
614         TEST_CHECK_GOTO(status == NV_OK, done);
615 
616         stream->other_stream_counter_snapshots = (NvU32*)uvm_rm_mem_get_cpu_va(stream->other_stream_counter_snapshots_mem);
617 
618         stream->other_stream_counter_expected = uvm_kvmalloc_zero(sizeof(NvU32) * iterations_per_stream);
619         if (stream->other_stream_counter_expected == NULL) {
620             status = NV_ERR_NO_MEMORY;
621             goto done;
622         }
623 
624         status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, &stream->push, "stream %u init", i);
625         TEST_CHECK_GOTO(status == NV_OK, done);
626 
627         test_memset_rm_mem(&stream->push, stream->counter_mem, 0);
628         test_memset_rm_mem(&stream->push, stream->counter_snapshots_mem, 0);
629         test_memset_rm_mem(&stream->push, stream->other_stream_counter_snapshots_mem, 0);
630 
631         status = uvm_push_end_and_wait(&stream->push);
632         TEST_CHECK_GOTO(status == NV_OK, done);
633 
634         if (fatal_signal_pending(current)) {
635             status = NV_ERR_SIGNAL_PENDING;
636             goto done;
637         }
638 
639         // Let other threads run
640         schedule();
641     }
642 
643     if (verbose > 0) {
644         UVM_TEST_PRINT("Init done, seed %u, GPUs:\n", seed);
645         for_each_va_space_gpu(gpu, va_space) {
646             UVM_TEST_PRINT(" GPU %s\n", uvm_gpu_name(gpu));
647         }
648     }
649 
650     for (i = 0; i < iterations_per_stream; ++i) {
651         for (j = 0; j < num_streams; ++j) {
652             uvm_test_stream_t *stream = &streams[j];
653             uvm_channel_type_t channel_type;
654             gpu = random_va_space_gpu(&rng, va_space);
655 
656             if (fatal_signal_pending(current)) {
657                 status = NV_ERR_SIGNAL_PENDING;
658                 goto done;
659             }
660 
661             // Select a random channel type. In SR-IOV heavy the selection has
662             // to exclude the type associated with proxy channels, because they
663             // do not support the virtual memcopies/memsets pushed by
664             // snapshot_counter and set_counter
665             channel_type = gpu_random_internal_ce_channel_type(gpu, &rng);
666 
667             status = uvm_push_begin_acquire(gpu->channel_manager,
668                                             channel_type,
669                                             &stream->tracker,
670                                             &stream->push,
671                                             "stream %u payload %u gid %u channel_type %u",
672                                             j,
673                                             stream->queued_counter_value,
674                                             uvm_id_value(gpu->id),
675                                             channel_type);
676             TEST_CHECK_GOTO(status == NV_OK, done);
677 
678             snapshot_counter(&stream->push,
679                              stream->counter_mem,
680                              stream->counter_snapshots_mem,
681                              i,
682                              stream->queued_counter_repeat);
683 
684             // Set a random number [2, MAX_COUNTER_REPEAT_COUNT] of counters
685             stream->queued_counter_repeat = uvm_test_rng_range_32(&rng, 2, MAX_COUNTER_REPEAT_COUNT);
686             set_counter(&stream->push,
687                         stream->counter_mem,
688                         ++stream->queued_counter_value,
689                         stream->queued_counter_repeat);
690 
691             if (uvm_test_rng_range_32(&rng, 0, 1) == 0) {
692                 NvU32 random_stream_index = uvm_test_rng_range_32(&rng, 0, num_streams - 1);
693                 uvm_test_stream_t *random_stream = &streams[random_stream_index];
694 
695                 if ((random_stream->push.gpu == gpu) || uvm_push_allow_dependencies_across_gpus()) {
696                     uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
697 
698                     snapshot_counter(&stream->push,
699                                      random_stream->counter_mem,
700                                      stream->other_stream_counter_snapshots_mem,
701                                      i,
702                                      random_stream->queued_counter_repeat);
703                 }
704             }
705 
706             uvm_push_end(&stream->push);
707             uvm_tracker_clear(&stream->tracker);
708             TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&stream->tracker, &stream->push), done);
709         }
710 
711         // Randomly schedule other threads
712         if (uvm_test_rng_range_32(&rng, 0, 9) == 0)
713             schedule();
714     }
715 
716     if (verbose > 0)
717         UVM_TEST_PRINT("All work scheduled\n");
718 
719     // Let other threads run
720     schedule();
721 
722     for (i = 0; i < num_streams; ++i) {
723         uvm_test_stream_t *stream = &streams[i];
724         status = uvm_tracker_wait(&stream->tracker);
725         if (status != NV_OK) {
726             UVM_TEST_PRINT("Failed to wait for the tracker for stream %u: %s\n", i, nvstatusToString(status));
727             goto done;
728         }
729         for (j = 0; j < iterations_per_stream; ++j) {
730             NvU32 snapshot_last = stream->counter_snapshots[j * 2];
731             NvU32 snapshot_first = stream->counter_snapshots[j * 2 + 1];
732             if (snapshot_last != j || snapshot_first != j) {
733                 UVM_TEST_PRINT("Stream %u counter snapshot[%u] = %u,%u instead of %u,%u\n",
734                                i,
735                                j,
736                                snapshot_last,
737                                snapshot_first,
738                                j,
739                                j);
740                 status = NV_ERR_INVALID_STATE;
741                 goto done;
742             }
743         }
744         for (j = 0; j < iterations_per_stream; ++j) {
745             NvU32 snapshot_last = stream->other_stream_counter_snapshots[j * 2];
746             NvU32 snapshot_first = stream->other_stream_counter_snapshots[j * 2 + 1];
747             NvU32 expected = stream->other_stream_counter_expected[j];
748             if (snapshot_last < expected || snapshot_first < expected) {
749                 UVM_TEST_PRINT("Stream %u other_counter snapshot[%u] = %u,%u which is < of %u,%u\n",
750                                i,
751                                j,
752                                snapshot_last,
753                                snapshot_first,
754                                expected,
755                                expected);
756                 status = NV_ERR_INVALID_STATE;
757                 goto done;
758             }
759         }
760     }
761 
762     if (verbose > 0)
763         UVM_TEST_PRINT("Verification done\n");
764 
765     schedule();
766 
767 done:
768     // Wait for all the trackers first before freeing up memory as streams
769     // references each other's buffers.
770     for (i = 0; i < num_streams; ++i) {
771         uvm_test_stream_t *stream = &streams[i];
772         uvm_tracker_wait(&stream->tracker);
773     }
774 
775     for (i = 0; i < num_streams; ++i) {
776         uvm_test_stream_t *stream = &streams[i];
777         uvm_gpu_semaphore_free(&stream->semaphore);
778         uvm_rm_mem_free(stream->other_stream_counter_snapshots_mem);
779         uvm_rm_mem_free(stream->counter_snapshots_mem);
780         uvm_rm_mem_free(stream->counter_mem);
781         uvm_tracker_deinit(&stream->tracker);
782         uvm_kvfree(stream->other_stream_counter_expected);
783     }
784     uvm_kvfree(streams);
785 
786     if (verbose > 0)
787         UVM_TEST_PRINT("Cleanup done\n");
788 
789     return status;
790 }
791 
792 // The following test is inspired by uvm_push_test.c:test_concurrent_pushes.
793 // This test verifies that concurrent pushes using the same channel pool
794 // select different channels, when the Confidential Computing feature is
795 // enabled.
test_conf_computing_channel_selection(uvm_va_space_t * va_space)796 NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
797 {
798     NV_STATUS status = NV_OK;
799     uvm_channel_pool_t *pool;
800     uvm_push_t *pushes;
801     uvm_gpu_t *gpu;
802     NvU32 i;
803     NvU32 num_pushes;
804 
805     if (!g_uvm_global.conf_computing_enabled)
806         return NV_OK;
807 
808     uvm_thread_context_lock_disable_tracking();
809 
810     for_each_va_space_gpu(gpu, va_space) {
811         uvm_channel_type_t channel_type;
812 
813         for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
814             pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
815             TEST_CHECK_RET(pool != NULL);
816 
817             // Skip LCIC channels as those can't accept any pushes
818             if (uvm_channel_pool_is_lcic(pool))
819                 continue;
820 
821             if (pool->num_channels < 2)
822                 continue;
823 
824             num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
825 
826             pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
827             TEST_CHECK_RET(pushes != NULL);
828 
829             for (i = 0; i < num_pushes; i++) {
830                 uvm_push_t *push = &pushes[i];
831                 status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
832                 TEST_NV_CHECK_GOTO(status, error);
833                 if (i > 0)
834                     TEST_CHECK_GOTO(pushes[i-1].channel != push->channel, error);
835             }
836             for (i = 0; i < num_pushes; i++) {
837                 uvm_push_t *push = &pushes[i];
838                 status = uvm_push_end_and_wait(push);
839                 TEST_NV_CHECK_GOTO(status, error);
840             }
841 
842             uvm_kvfree(pushes);
843         }
844     }
845 
846     uvm_thread_context_lock_enable_tracking();
847 
848     return status;
849 error:
850     uvm_thread_context_lock_enable_tracking();
851     uvm_kvfree(pushes);
852 
853     return status;
854 }
855 
test_channel_iv_rotation(uvm_va_space_t * va_space)856 NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
857 {
858     uvm_gpu_t *gpu;
859 
860     if (!g_uvm_global.conf_computing_enabled)
861         return NV_OK;
862 
863     for_each_va_space_gpu(gpu, va_space) {
864         uvm_channel_pool_t *pool;
865 
866         uvm_for_each_pool(pool, gpu->channel_manager) {
867             NvU64 before_rotation_enc, before_rotation_dec, after_rotation_enc, after_rotation_dec;
868             NV_STATUS status = NV_OK;
869 
870             // Check one (the first) channel per pool
871             uvm_channel_t *channel = pool->channels;
872 
873             // Create a dummy encrypt/decrypt push to use few IVs.
874             // SEC2 used encrypt during initialization, no need to use a dummy
875             // push.
876             if (!uvm_channel_is_sec2(channel)) {
877                 uvm_push_t push;
878                 size_t data_size;
879                 uvm_conf_computing_dma_buffer_t *cipher_text;
880                 void *cipher_cpu_va, *plain_cpu_va, *tag_cpu_va;
881                 uvm_gpu_address_t cipher_gpu_address, plain_gpu_address, tag_gpu_address;
882                 uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ? uvm_channel_lcic_get_paired_wlc(channel) : channel;
883 
884                 plain_cpu_va = &status;
885                 data_size = sizeof(status);
886 
887                 TEST_NV_CHECK_RET(uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool,
888                                                                       &cipher_text,
889                                                                       NULL));
890                 cipher_cpu_va = uvm_mem_get_cpu_addr_kernel(cipher_text->alloc);
891                 tag_cpu_va = uvm_mem_get_cpu_addr_kernel(cipher_text->auth_tag);
892 
893                 cipher_gpu_address = uvm_mem_gpu_address_virtual_kernel(cipher_text->alloc, gpu);
894                 tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(cipher_text->auth_tag, gpu);
895 
896                 TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(work_channel, &push, "Dummy push for IV rotation"), free);
897 
898                 (void)uvm_push_get_single_inline_buffer(&push,
899                                                         data_size,
900                                                         UVM_CONF_COMPUTING_BUF_ALIGNMENT,
901                                                         &plain_gpu_address);
902 
903                 uvm_conf_computing_cpu_encrypt(work_channel, cipher_cpu_va, plain_cpu_va, NULL, data_size, tag_cpu_va);
904                 gpu->parent->ce_hal->decrypt(&push, plain_gpu_address, cipher_gpu_address, data_size, tag_gpu_address);
905 
906                 TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), free);
907 
908 free:
909                 uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, cipher_text, NULL);
910 
911                 if (status != NV_OK)
912                     return status;
913             }
914 
915             // Reserve a channel to hold the push lock during rotation
916             if (!uvm_channel_is_lcic(channel))
917                 TEST_NV_CHECK_RET(uvm_channel_reserve(channel, 1));
918 
919             uvm_conf_computing_query_message_pools(channel, &before_rotation_enc, &before_rotation_dec);
920             TEST_NV_CHECK_GOTO(uvm_conf_computing_rotate_channel_ivs_below_limit(channel, -1, true), release);
921             uvm_conf_computing_query_message_pools(channel, &after_rotation_enc, &after_rotation_dec);
922 
923 release:
924             if (!uvm_channel_is_lcic(channel))
925                 uvm_channel_release(channel, 1);
926 
927             if (status != NV_OK)
928                 return status;
929 
930             // All channels except SEC2 used at least a single IV to release tracking.
931             // SEC2 doesn't support decrypt direction.
932             if (uvm_channel_is_sec2(channel))
933                 TEST_CHECK_RET(before_rotation_dec == after_rotation_dec);
934             else
935                 TEST_CHECK_RET(before_rotation_dec < after_rotation_dec);
936 
937             // All channels used one CPU encrypt/GPU decrypt, either during
938             // initialization or in the push above, with the exception of LCIC.
939             // LCIC is used in tandem with WLC, but it never uses CPU encrypt/
940             // GPU decrypt ops.
941             if (uvm_channel_is_lcic(channel))
942                 TEST_CHECK_RET(before_rotation_enc == after_rotation_enc);
943             else
944                 TEST_CHECK_RET(before_rotation_enc < after_rotation_enc);
945         }
946     }
947 
948     return NV_OK;
949 }
950 
test_write_ctrl_gpfifo_noop(uvm_va_space_t * va_space)951 NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
952 {
953     uvm_gpu_t *gpu;
954 
955     for_each_va_space_gpu(gpu, va_space) {
956         uvm_channel_manager_t *manager = gpu->channel_manager;
957         uvm_channel_pool_t *pool;
958 
959         uvm_for_each_pool(pool, manager) {
960             uvm_channel_t *channel;
961 
962             // Skip LCIC channels as those can't accept any pushes
963             if (uvm_channel_pool_is_lcic(pool))
964                 continue;
965 
966             // Skip WLC channels as those can't accept ctrl gpfifos
967             // after their schedule is set up
968             if (uvm_channel_pool_is_wlc(pool))
969                 continue;
970             uvm_for_each_channel_in_pool(channel, pool) {
971                 NvU32 i;
972 
973                 if (uvm_channel_is_proxy(channel))
974                     continue;
975 
976                 // We submit 8x the channel's GPFIFO entries to force a few
977                 // complete loops in the GPFIFO circular buffer.
978                 for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
979                     NvU64 entry;
980                     gpu->parent->host_hal->set_gpfifo_noop(&entry);
981                     TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
982                 }
983             }
984         }
985     }
986 
987     return NV_OK;
988 }
989 
test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t * va_space)990 NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
991 {
992     uvm_gpu_t *gpu;
993 
994     for_each_va_space_gpu(gpu, va_space) {
995         uvm_channel_manager_t *manager = gpu->channel_manager;
996         uvm_channel_pool_t *pool;
997 
998         uvm_for_each_pool(pool, manager) {
999             uvm_channel_t *channel;
1000 
1001             // Skip LCIC channels as those can't accept any pushes
1002             if (uvm_channel_pool_is_lcic(pool))
1003                 continue;
1004 
1005             // Skip WLC channels as those can't accept ctrl gpfifos
1006             // after their schedule is set up
1007             if (uvm_channel_pool_is_wlc(pool))
1008                 continue;
1009             uvm_for_each_channel_in_pool(channel, pool) {
1010                 NvU32 i;
1011                 uvm_push_t push;
1012 
1013                 if (uvm_channel_is_proxy(channel))
1014                     continue;
1015 
1016                 // We submit 8x the channel's GPFIFO entries to force a few
1017                 // complete loops in the GPFIFO circular buffer.
1018                 for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
1019                     if (i % 2 == 0) {
1020                         NvU64 entry;
1021                         gpu->parent->host_hal->set_gpfifo_noop(&entry);
1022                         TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
1023                     }
1024                     else {
1025                         TEST_NV_CHECK_RET(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl and push test"));
1026                         uvm_push_end(&push);
1027                     }
1028                 }
1029 
1030                 TEST_NV_CHECK_RET(uvm_push_wait(&push));
1031             }
1032         }
1033     }
1034 
1035     return NV_OK;
1036 }
1037 
test_write_ctrl_gpfifo_tight(uvm_va_space_t * va_space)1038 NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
1039 {
1040     NV_STATUS status = NV_OK;
1041     uvm_gpu_t *gpu;
1042     uvm_channel_t *channel;
1043     uvm_rm_mem_t *mem;
1044     NvU32 *cpu_ptr;
1045     NvU64 gpu_va;
1046     NvU32 i;
1047     NvU64 entry;
1048     uvm_push_t push;
1049 
1050     // TODO: Bug 3839176: the test is waived on Confidential Computing because
1051     // it assumes that GPU can access system memory without using encryption.
1052     if (g_uvm_global.conf_computing_enabled)
1053         return NV_OK;
1054 
1055     for_each_va_space_gpu(gpu, va_space) {
1056         uvm_channel_manager_t *manager = gpu->channel_manager;
1057 
1058         TEST_NV_CHECK_RET(uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(*cpu_ptr), 0, &mem));
1059         cpu_ptr = uvm_rm_mem_get_cpu_va(mem);
1060         gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu);
1061 
1062         *cpu_ptr = 0;
1063 
1064         // This semaphore acquire takes 1 GPFIFO entries.
1065         TEST_NV_CHECK_GOTO(uvm_push_begin(manager, UVM_CHANNEL_TYPE_GPU_TO_GPU, &push, "gpfifo ctrl tight test acq"),
1066                            error);
1067 
1068         channel = push.channel;
1069         UVM_ASSERT(!uvm_channel_is_proxy(channel));
1070 
1071         gpu->parent->host_hal->semaphore_acquire(&push, gpu_va, 1);
1072         uvm_push_end(&push);
1073 
1074         // Flush all completed entries from the GPFIFO ring buffer. This test
1075         // requires this flush because we verify (below with
1076         // uvm_channel_get_available_gpfifo_entries) the number of free entries
1077         // in the channel.
1078         uvm_channel_update_progress_all(channel);
1079 
1080         // Populate the remaining GPFIFO entries, leaving 2 slots available.
1081         // 2 available entries + 1 semaphore acquire (above) + 1 spare entry to
1082         // indicate a terminal condition for the GPFIFO ringbuffer, therefore we
1083         // push num_gpfifo_entries-4.
1084         for (i = 0; i < channel->num_gpfifo_entries - 4; i++) {
1085             TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl tight test populate"), error);
1086             uvm_push_end(&push);
1087         }
1088 
1089         TEST_CHECK_GOTO(uvm_channel_get_available_gpfifo_entries(channel) == 2, error);
1090 
1091         // We should have room for the control GPFIFO and the subsequent
1092         // semaphore release.
1093         gpu->parent->host_hal->set_gpfifo_noop(&entry);
1094         TEST_NV_CHECK_GOTO(uvm_channel_write_ctrl_gpfifo(channel, entry), error);
1095 
1096         // Release the semaphore.
1097         UVM_WRITE_ONCE(*cpu_ptr, 1);
1098 
1099         TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
1100 
1101         uvm_rm_mem_free(mem);
1102     }
1103 
1104     return NV_OK;
1105 
1106 error:
1107     uvm_rm_mem_free(mem);
1108 
1109     return status;
1110 }
1111 
1112 // This test is inspired by the test_rc (above).
1113 // The test recreates the GPU's channel manager forcing its pushbuffer to be
1114 // mapped on a non-zero 1TB segment. This exercises work submission from
1115 // pushbuffers whose VAs are greater than 1TB.
test_channel_pushbuffer_extension_base(uvm_va_space_t * va_space)1116 static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space)
1117 {
1118     uvm_gpu_t *gpu;
1119     NV_STATUS status = NV_OK;
1120 
1121     uvm_assert_mutex_locked(&g_uvm_global.global_lock);
1122 
1123     for_each_va_space_gpu(gpu, va_space) {
1124         uvm_channel_manager_t *manager;
1125         uvm_channel_pool_t *pool;
1126 
1127         if (!uvm_parent_gpu_needs_pushbuffer_segments(gpu->parent))
1128             continue;
1129 
1130         // The GPU channel manager pushbuffer is destroyed and then re-created
1131         // after testing a non-zero pushbuffer extension base, so this test
1132         // requires exclusive access to the GPU.
1133         TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
1134 
1135         gpu->uvm_test_force_upper_pushbuffer_segment = 1;
1136         uvm_channel_manager_destroy(gpu->channel_manager);
1137         TEST_NV_CHECK_GOTO(uvm_channel_manager_create(gpu, &gpu->channel_manager), error);
1138         gpu->uvm_test_force_upper_pushbuffer_segment = 0;
1139 
1140         manager = gpu->channel_manager;
1141         TEST_CHECK_GOTO(uvm_pushbuffer_get_gpu_va_base(manager->pushbuffer) >= (1ull << 40), error);
1142 
1143         // Submit a few pushes with the recently allocated
1144         // channel_manager->pushbuffer.
1145         uvm_for_each_pool(pool, manager) {
1146             uvm_channel_t *channel;
1147 
1148             // Skip LCIC channels as those can't accept any pushes
1149             if (uvm_channel_pool_is_lcic(pool))
1150                 continue;
1151             uvm_for_each_channel_in_pool(channel, pool) {
1152                 NvU32 i;
1153                 uvm_push_t push;
1154 
1155                 for (i = 0; i < channel->num_gpfifo_entries; i++) {
1156                     TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "pushbuffer extension push test"),
1157                                        error);
1158                     uvm_push_end(&push);
1159                 }
1160 
1161                 TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
1162             }
1163         }
1164     }
1165 
1166     return NV_OK;
1167 
1168 error:
1169     gpu->uvm_test_force_upper_pushbuffer_segment = 0;
1170 
1171     return status;
1172 }
1173 
uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS * params,struct file * filp)1174 NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct file *filp)
1175 {
1176     NV_STATUS status;
1177     uvm_va_space_t *va_space = uvm_va_space_get(filp);
1178 
1179     uvm_mutex_lock(&g_uvm_global.global_lock);
1180     uvm_va_space_down_read_rm(va_space);
1181 
1182     status = test_ordering(va_space);
1183     if (status != NV_OK)
1184         goto done;
1185 
1186     status = test_write_ctrl_gpfifo_noop(va_space);
1187     if (status != NV_OK)
1188         goto done;
1189 
1190     status = test_write_ctrl_gpfifo_and_pushes(va_space);
1191     if (status != NV_OK)
1192         goto done;
1193 
1194     status = test_write_ctrl_gpfifo_tight(va_space);
1195     if (status != NV_OK)
1196         goto done;
1197 
1198     status = test_conf_computing_channel_selection(va_space);
1199     if (status != NV_OK)
1200         goto done;
1201 
1202     status = test_channel_iv_rotation(va_space);
1203     if (status != NV_OK)
1204         goto done;
1205 
1206     // The following tests have side effects, they reset the GPU's
1207     // channel_manager.
1208     status = test_channel_pushbuffer_extension_base(va_space);
1209     if (status != NV_OK)
1210         goto done;
1211 
1212     g_uvm_global.disable_fatal_error_assert = true;
1213     uvm_release_asserts_set_global_error_for_tests = true;
1214     status = test_unexpected_completed_values(va_space);
1215     uvm_release_asserts_set_global_error_for_tests = false;
1216     g_uvm_global.disable_fatal_error_assert = false;
1217     if (status != NV_OK)
1218         goto done;
1219 
1220     if (g_uvm_global.num_simulated_devices == 0) {
1221         status = test_rc(va_space);
1222         if (status != NV_OK)
1223             goto done;
1224     }
1225 
1226     status = test_iommu(va_space);
1227     if (status != NV_OK)
1228         goto done;
1229 
1230 done:
1231     uvm_va_space_up_read_rm(va_space);
1232     uvm_mutex_unlock(&g_uvm_global.global_lock);
1233 
1234     return status;
1235 }
1236 
uvm_test_channel_stress_stream(uvm_va_space_t * va_space,const UVM_TEST_CHANNEL_STRESS_PARAMS * params)1237 static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
1238                                                 const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
1239 {
1240     NV_STATUS status = NV_OK;
1241 
1242     if (params->iterations == 0 || params->num_streams == 0)
1243         return NV_ERR_INVALID_PARAMETER;
1244 
1245     // TODO: Bug 3839176: the test is waived on Confidential Computing because
1246     // it assumes that GPU can access system memory without using encryption.
1247     if (g_uvm_global.conf_computing_enabled)
1248         return NV_OK;
1249 
1250     // TODO: Bug 1764963: Rework the test to not rely on the global lock as that
1251     // serializes all the threads calling this at the same time.
1252     uvm_mutex_lock(&g_uvm_global.global_lock);
1253     uvm_va_space_down_read_rm(va_space);
1254 
1255     status = stress_test_all_gpus_in_va(va_space,
1256                                         params->num_streams,
1257                                         params->iterations,
1258                                         params->seed,
1259                                         params->verbose);
1260 
1261     uvm_va_space_up_read_rm(va_space);
1262     uvm_mutex_unlock(&g_uvm_global.global_lock);
1263 
1264     return status;
1265 }
1266 
uvm_test_channel_stress_update_channels(uvm_va_space_t * va_space,const UVM_TEST_CHANNEL_STRESS_PARAMS * params)1267 static NV_STATUS uvm_test_channel_stress_update_channels(uvm_va_space_t *va_space,
1268                                                          const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
1269 {
1270     NV_STATUS status = NV_OK;
1271     uvm_test_rng_t rng;
1272     NvU32 i;
1273 
1274     uvm_test_rng_init(&rng, params->seed);
1275 
1276     uvm_va_space_down_read(va_space);
1277 
1278     for (i = 0; i < params->iterations; ++i) {
1279         uvm_gpu_t *gpu = random_va_space_gpu(&rng, va_space);
1280         uvm_channel_manager_update_progress(gpu->channel_manager);
1281 
1282         if (fatal_signal_pending(current)) {
1283             status = NV_ERR_SIGNAL_PENDING;
1284             goto done;
1285         }
1286     }
1287 
1288 done:
1289     uvm_va_space_up_read(va_space);
1290 
1291     return status;
1292 }
1293 
uvm_test_channel_noop_push(uvm_va_space_t * va_space,const UVM_TEST_CHANNEL_STRESS_PARAMS * params)1294 static NV_STATUS uvm_test_channel_noop_push(uvm_va_space_t *va_space,
1295                                             const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
1296 {
1297     NV_STATUS status = NV_OK;
1298     uvm_push_t push;
1299     uvm_test_rng_t rng;
1300     uvm_gpu_t *gpu;
1301     NvU32 i;
1302 
1303     uvm_test_rng_init(&rng, params->seed);
1304 
1305     uvm_va_space_down_read(va_space);
1306 
1307     for (i = 0; i < params->iterations; ++i) {
1308         uvm_channel_type_t channel_type = random_ce_channel_type(&rng);
1309         gpu = random_va_space_gpu(&rng, va_space);
1310 
1311         status = uvm_push_begin(gpu->channel_manager, channel_type, &push, "noop push");
1312         if (status != NV_OK)
1313             goto done;
1314 
1315         // Push an actual noop method so that the push doesn't get optimized
1316         // away if we ever detect empty pushes.
1317         gpu->parent->host_hal->noop(&push, UVM_METHOD_SIZE);
1318 
1319         uvm_push_end(&push);
1320 
1321         if (fatal_signal_pending(current)) {
1322             status = NV_ERR_SIGNAL_PENDING;
1323             goto done;
1324         }
1325     }
1326     if (params->verbose > 0)
1327         UVM_TEST_PRINT("Noop pushes: completed %u pushes seed: %u\n", i, params->seed);
1328 
1329     for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
1330         NV_STATUS wait_status = uvm_channel_manager_wait(gpu->channel_manager);
1331         if (status == NV_OK)
1332             status = wait_status;
1333     }
1334 
1335 done:
1336     uvm_va_space_up_read(va_space);
1337 
1338     return status;
1339 }
1340 
uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS * params,struct file * filp)1341 NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
1342 {
1343     uvm_va_space_t *va_space = uvm_va_space_get(filp);
1344 
1345     switch (params->mode) {
1346         case UVM_TEST_CHANNEL_STRESS_MODE_STREAM:
1347             return uvm_test_channel_stress_stream(va_space, params);
1348         case UVM_TEST_CHANNEL_STRESS_MODE_UPDATE_CHANNELS:
1349             return uvm_test_channel_stress_update_channels(va_space, params);
1350         case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
1351             return uvm_test_channel_noop_push(va_space, params);
1352         default:
1353             return NV_ERR_INVALID_PARAMETER;
1354     }
1355 }
1356