1 /*******************************************************************************
2     Copyright (c) 2015-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_api.h"
25 #include "uvm_pushbuffer.h"
26 #include "uvm_channel.h"
27 #include "uvm_global.h"
28 #include "uvm_lock.h"
29 #include "uvm_procfs.h"
30 #include "uvm_push.h"
31 #include "uvm_kvmalloc.h"
32 #include "uvm_gpu.h"
33 #include "uvm_common.h"
34 #include "uvm_linux.h"
35 #include "uvm_conf_computing.h"
36 
37 // Print pushbuffer state into a seq_file if provided or with UVM_DBG_PRINT() if not.
38 static void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s);
39 
nv_procfs_read_pushbuffer_info(struct seq_file * s,void * v)40 static int nv_procfs_read_pushbuffer_info(struct seq_file *s, void *v)
41 {
42     uvm_pushbuffer_t *pushbuffer = (uvm_pushbuffer_t *)s->private;
43 
44     if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
45             return -EAGAIN;
46 
47     uvm_pushbuffer_print_common(pushbuffer, s);
48 
49     uvm_up_read(&g_uvm_global.pm.lock);
50 
51     return 0;
52 }
53 
nv_procfs_read_pushbuffer_info_entry(struct seq_file * s,void * v)54 static int nv_procfs_read_pushbuffer_info_entry(struct seq_file *s, void *v)
55 {
56     UVM_ENTRY_RET(nv_procfs_read_pushbuffer_info(s, v));
57 }
58 
59 UVM_DEFINE_SINGLE_PROCFS_FILE(pushbuffer_info_entry);
60 
create_procfs(uvm_pushbuffer_t * pushbuffer)61 static NV_STATUS create_procfs(uvm_pushbuffer_t *pushbuffer)
62 {
63     uvm_gpu_t *gpu = pushbuffer->channel_manager->gpu;
64 
65     // The pushbuffer info file is for debug only
66     if (!uvm_procfs_is_debug_enabled())
67         return NV_OK;
68 
69     pushbuffer->procfs.info_file = NV_CREATE_PROC_FILE("pushbuffer",
70                                                        gpu->procfs.dir,
71                                                        pushbuffer_info_entry,
72                                                        pushbuffer);
73     if (pushbuffer->procfs.info_file == NULL)
74         return NV_ERR_OPERATING_SYSTEM;
75 
76     return NV_OK;
77 }
78 
uvm_pushbuffer_create(uvm_channel_manager_t * channel_manager,uvm_pushbuffer_t ** pushbuffer_out)79 NV_STATUS uvm_pushbuffer_create(uvm_channel_manager_t *channel_manager, uvm_pushbuffer_t **pushbuffer_out)
80 {
81     NV_STATUS status;
82     int i;
83     uvm_gpu_t *gpu = channel_manager->gpu;
84     NvU64 pushbuffer_alignment;
85 
86     uvm_pushbuffer_t *pushbuffer = uvm_kvmalloc_zero(sizeof(*pushbuffer));
87     if (pushbuffer == NULL)
88         return NV_ERR_NO_MEMORY;
89 
90     pushbuffer->channel_manager = channel_manager;
91 
92     uvm_spin_lock_init(&pushbuffer->lock, UVM_LOCK_ORDER_LEAF);
93 
94     // Currently the pushbuffer supports UVM_PUSHBUFFER_CHUNKS of concurrent
95     // pushes.
96     uvm_sema_init(&pushbuffer->concurrent_pushes_sema, UVM_PUSHBUFFER_CHUNKS, UVM_LOCK_ORDER_PUSH);
97 
98     UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS ||
99                channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_VID);
100 
101     // The pushbuffer allocation is aligned to UVM_PUSHBUFFER_SIZE and its size
102     // (UVM_PUSHBUFFER_SIZE) is a power of 2. These constraints guarantee that
103     // the entire pushbuffer belongs to a 1TB (2^40) segment. Thus, we can set
104     // the Esched/PBDMA segment base for all channels during their
105     // initialization and it is immutable for the entire channels' lifetime.
106     BUILD_BUG_ON_NOT_POWER_OF_2(UVM_PUSHBUFFER_SIZE);
107     BUILD_BUG_ON(UVM_PUSHBUFFER_SIZE >= (1ull << 40));
108 
109     if (gpu->uvm_test_force_upper_pushbuffer_segment)
110         pushbuffer_alignment = (1ull << 40);
111     else
112         pushbuffer_alignment = UVM_PUSHBUFFER_SIZE;
113 
114     status = uvm_rm_mem_alloc_and_map_cpu(gpu,
115                                           (channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS) ?
116                                               UVM_RM_MEM_TYPE_SYS:
117                                               UVM_RM_MEM_TYPE_GPU,
118                                           UVM_PUSHBUFFER_SIZE,
119                                           pushbuffer_alignment,
120                                           &pushbuffer->memory);
121     if (status != NV_OK)
122         goto error;
123 
124     if (g_uvm_global.conf_computing_enabled) {
125         UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS);
126 
127         // Move the above allocation to unprotected_sysmem
128         pushbuffer->memory_unprotected_sysmem = pushbuffer->memory;
129         pushbuffer->memory = NULL;
130 
131         // Make sure the base can be least 4KB aligned. Pushes can include inline buffers
132         // with specific alignment requirement. Different base between backing memory
133         // locations would change that.
134         pushbuffer->memory_protected_sysmem = uvm_kvmalloc_zero(UVM_PUSHBUFFER_SIZE + UVM_PAGE_SIZE_4K);
135         if (!pushbuffer->memory_protected_sysmem) {
136             status = NV_ERR_NO_MEMORY;
137             goto error;
138         }
139 
140         status = uvm_rm_mem_alloc(gpu,
141                                   UVM_RM_MEM_TYPE_GPU,
142                                   UVM_PUSHBUFFER_SIZE,
143                                   pushbuffer_alignment,
144                                   &pushbuffer->memory);
145         if (status != NV_OK)
146             goto error;
147 
148         status = uvm_rm_mem_map_gpu(pushbuffer->memory_unprotected_sysmem, gpu, pushbuffer_alignment);
149         if (status != NV_OK)
150             goto error;
151     }
152 
153     // Verify the GPU can access the pushbuffer.
154     UVM_ASSERT((uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE - 1) < gpu->parent->max_host_va);
155 
156     bitmap_fill(pushbuffer->idle_chunks, UVM_PUSHBUFFER_CHUNKS);
157     bitmap_fill(pushbuffer->available_chunks, UVM_PUSHBUFFER_CHUNKS);
158 
159     for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i)
160         INIT_LIST_HEAD(&pushbuffer->chunks[i].pending_gpfifos);
161 
162     status = create_procfs(pushbuffer);
163     if (status != NV_OK)
164         goto error;
165 
166     *pushbuffer_out = pushbuffer;
167 
168     return status;
169 
170 error:
171     uvm_pushbuffer_destroy(pushbuffer);
172     return status;
173 }
174 
get_chunk_in_mask(uvm_pushbuffer_t * pushbuffer,unsigned long * mask)175 static uvm_pushbuffer_chunk_t *get_chunk_in_mask(uvm_pushbuffer_t *pushbuffer, unsigned long *mask)
176 {
177     NvU32 index = find_first_bit(mask, UVM_PUSHBUFFER_CHUNKS);
178 
179     uvm_assert_spinlock_locked(&pushbuffer->lock);
180 
181     if (index == UVM_PUSHBUFFER_CHUNKS)
182         return NULL;
183 
184     return &pushbuffer->chunks[index];
185 }
186 
get_available_chunk(uvm_pushbuffer_t * pushbuffer)187 static uvm_pushbuffer_chunk_t *get_available_chunk(uvm_pushbuffer_t *pushbuffer)
188 {
189     return get_chunk_in_mask(pushbuffer, pushbuffer->available_chunks);
190 }
191 
get_idle_chunk(uvm_pushbuffer_t * pushbuffer)192 static uvm_pushbuffer_chunk_t *get_idle_chunk(uvm_pushbuffer_t *pushbuffer)
193 {
194     return get_chunk_in_mask(pushbuffer, pushbuffer->idle_chunks);
195 }
196 
chunk_get_index(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)197 static NvU32 chunk_get_index(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
198 {
199     NvU32 index = chunk - pushbuffer->chunks;
200     UVM_ASSERT(index < UVM_PUSHBUFFER_CHUNKS);
201     return index;
202 }
203 
chunk_get_offset(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)204 static NvU32 chunk_get_offset(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
205 {
206     return chunk_get_index(pushbuffer, chunk) * UVM_PUSHBUFFER_CHUNK_SIZE;
207 }
208 
set_chunk(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk,unsigned long * mask)209 static void set_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask)
210 {
211     NvU32 index = chunk_get_index(pushbuffer, chunk);
212 
213     uvm_assert_spinlock_locked(&pushbuffer->lock);
214 
215     __set_bit(index, mask);
216 }
217 
clear_chunk(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk,unsigned long * mask)218 static void clear_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask)
219 {
220     NvU32 index = chunk_get_index(pushbuffer, chunk);
221 
222     uvm_assert_spinlock_locked(&pushbuffer->lock);
223 
224     __clear_bit(index, mask);
225 }
226 
pick_chunk(uvm_pushbuffer_t * pushbuffer)227 static uvm_pushbuffer_chunk_t *pick_chunk(uvm_pushbuffer_t *pushbuffer)
228 {
229     uvm_pushbuffer_chunk_t *chunk = get_idle_chunk(pushbuffer);
230 
231     uvm_assert_spinlock_locked(&pushbuffer->lock);
232 
233     if (chunk == NULL)
234         chunk = get_available_chunk(pushbuffer);
235 
236     return chunk;
237 }
238 
try_claim_chunk(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push,uvm_pushbuffer_chunk_t ** chunk_out)239 static bool try_claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out)
240 {
241     uvm_pushbuffer_chunk_t *chunk;
242 
243     uvm_spin_lock(&pushbuffer->lock);
244 
245     chunk = pick_chunk(pushbuffer);
246     if (!chunk)
247         goto done;
248 
249     chunk->current_push = push;
250     clear_chunk(pushbuffer, chunk, pushbuffer->idle_chunks);
251     clear_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
252 
253 done:
254     uvm_spin_unlock(&pushbuffer->lock);
255     *chunk_out = chunk;
256 
257     return chunk != NULL;
258 }
259 
get_base_cpu_va(uvm_pushbuffer_t * pushbuffer)260 static char *get_base_cpu_va(uvm_pushbuffer_t *pushbuffer)
261 {
262     // Confidential Computing pushes are assembled in protected sysmem
263     // and safely (through encrypt/decrypt) moved to protected vidmem.
264     // Or signed and moved to unprotected sysmem.
265     //
266     // The protected sysmem base is aligned to 4kB. This is enough to give
267     // the same alignment behaviour for inline buffers as the other two
268     // backing memory locations.
269     if (g_uvm_global.conf_computing_enabled)
270         return (char*)(UVM_ALIGN_UP((uintptr_t)pushbuffer->memory_protected_sysmem, UVM_PAGE_SIZE_4K));
271 
272     return (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory);
273 }
274 
chunk_get_next_push_start_addr(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)275 static NvU32 *chunk_get_next_push_start_addr(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
276 {
277     char *push_start = get_base_cpu_va(pushbuffer);
278     push_start += chunk_get_offset(pushbuffer, chunk);
279     push_start += chunk->next_push_start;
280 
281     UVM_ASSERT(((NvU64)push_start) % sizeof(NvU32) == 0);
282 
283     return (NvU32*)push_start;
284 }
285 
claim_chunk(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push,uvm_pushbuffer_chunk_t ** chunk_out)286 static NV_STATUS claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out)
287 {
288     NV_STATUS status = NV_OK;
289     uvm_channel_manager_t *channel_manager = pushbuffer->channel_manager;
290     uvm_spin_loop_t spin;
291 
292     if (try_claim_chunk(pushbuffer, push, chunk_out))
293         return NV_OK;
294 
295     uvm_channel_manager_update_progress(channel_manager);
296 
297     uvm_spin_loop_init(&spin);
298     while (!try_claim_chunk(pushbuffer, push, chunk_out) && status == NV_OK) {
299         UVM_SPIN_LOOP(&spin);
300         status = uvm_channel_manager_check_errors(channel_manager);
301         uvm_channel_manager_update_progress(channel_manager);
302     }
303 
304     return status;
305 }
306 
uvm_pushbuffer_begin_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)307 NV_STATUS uvm_pushbuffer_begin_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
308 {
309     uvm_pushbuffer_chunk_t *chunk;
310     NV_STATUS status;
311 
312     UVM_ASSERT(pushbuffer);
313     UVM_ASSERT(push);
314     UVM_ASSERT(push->channel);
315 
316     if (uvm_channel_is_wlc(push->channel)) {
317         // WLC pushes use static PB and don't count against max concurrent
318         // pushes.
319         push->begin = (void*)UVM_ALIGN_UP((uintptr_t)push->channel->conf_computing.static_pb_protected_sysmem,
320                                           UVM_PAGE_SIZE_4K);
321         push->next = push->begin;
322         return NV_OK;
323     }
324 
325     // Note that this semaphore is uvm_up()ed in end_push().
326     uvm_down(&pushbuffer->concurrent_pushes_sema);
327 
328     status = claim_chunk(pushbuffer, push, &chunk);
329     if (status != NV_OK) {
330         uvm_up(&pushbuffer->concurrent_pushes_sema);
331         return status;
332     }
333 
334     UVM_ASSERT(chunk);
335 
336     push->begin = chunk_get_next_push_start_addr(pushbuffer, chunk);
337     push->next = push->begin;
338 
339     return NV_OK;
340 }
341 
chunk_get_first_gpfifo(uvm_pushbuffer_chunk_t * chunk)342 static uvm_gpfifo_entry_t *chunk_get_first_gpfifo(uvm_pushbuffer_chunk_t *chunk)
343 {
344     return list_first_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node);
345 }
346 
chunk_get_last_gpfifo(uvm_pushbuffer_chunk_t * chunk)347 static uvm_gpfifo_entry_t *chunk_get_last_gpfifo(uvm_pushbuffer_chunk_t *chunk)
348 {
349     return list_last_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node);
350 }
351 
352 // Get the cpu put within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE])
chunk_get_cpu_put(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)353 static NvU32 chunk_get_cpu_put(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
354 {
355     uvm_gpfifo_entry_t *gpfifo = chunk_get_last_gpfifo(chunk);
356 
357     uvm_assert_spinlock_locked(&pushbuffer->lock);
358 
359     if (gpfifo != NULL)
360         return gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - chunk_get_offset(pushbuffer, chunk);
361     else
362         return 0;
363 }
364 
365 // Get the gpu get within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE))
chunk_get_gpu_get(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)366 static NvU32 chunk_get_gpu_get(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
367 {
368     uvm_gpfifo_entry_t *gpfifo = chunk_get_first_gpfifo(chunk);
369 
370     uvm_assert_spinlock_locked(&pushbuffer->lock);
371 
372     if (gpfifo != NULL)
373         return gpfifo->pushbuffer_offset - chunk_get_offset(pushbuffer, chunk);
374     else
375         return 0;
376 }
377 
update_chunk(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)378 static void update_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
379 {
380     NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk);
381     NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk);
382 
383     uvm_assert_spinlock_locked(&pushbuffer->lock);
384 
385     if (gpu_get == cpu_put) {
386         // cpu_put can be equal to gpu_get both when the chunk is full and empty. We
387         // can tell apart the cases by checking whether the pending GPFIFOs list is
388         // empty.
389         if (!list_empty(&chunk->pending_gpfifos))
390             return;
391 
392         // Chunk completely idle
393         set_chunk(pushbuffer, chunk, pushbuffer->idle_chunks);
394         set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
395         UVM_ASSERT_MSG(cpu_put == 0, "cpu put %u\n", cpu_put);
396 
397         // For a completely idle chunk, always start at the very beginning. This
398         // helps avoid the waste that can happen at the very end of the chunk
399         // described at the top of uvm_pushbuffer.h.
400         chunk->next_push_start = 0;
401     }
402     else if (gpu_get > cpu_put) {
403         if (gpu_get - cpu_put >= UVM_MAX_PUSH_SIZE) {
404             // Enough space between put and get
405             set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
406             chunk->next_push_start = cpu_put;
407         }
408     }
409     else if (UVM_PUSHBUFFER_CHUNK_SIZE >= cpu_put + UVM_MAX_PUSH_SIZE) {
410         UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put);
411 
412         // Enough space at the end
413         set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
414         chunk->next_push_start = cpu_put;
415     }
416     else if (gpu_get >= UVM_MAX_PUSH_SIZE) {
417         UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put);
418 
419         // Enough space at the beginning
420         set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
421         chunk->next_push_start = 0;
422     }
423 }
424 
uvm_pushbuffer_destroy(uvm_pushbuffer_t * pushbuffer)425 void uvm_pushbuffer_destroy(uvm_pushbuffer_t *pushbuffer)
426 {
427     if (pushbuffer == NULL)
428         return;
429 
430     proc_remove(pushbuffer->procfs.info_file);
431 
432     uvm_rm_mem_free(pushbuffer->memory_unprotected_sysmem);
433     uvm_kvfree(pushbuffer->memory_protected_sysmem);
434     uvm_rm_mem_free(pushbuffer->memory);
435     uvm_kvfree(pushbuffer);
436 }
437 
offset_to_chunk(uvm_pushbuffer_t * pushbuffer,NvU32 offset)438 static uvm_pushbuffer_chunk_t *offset_to_chunk(uvm_pushbuffer_t *pushbuffer, NvU32 offset)
439 {
440     UVM_ASSERT(offset < UVM_PUSHBUFFER_SIZE);
441     return &pushbuffer->chunks[offset / UVM_PUSHBUFFER_CHUNK_SIZE];
442 }
443 
gpfifo_to_chunk(uvm_pushbuffer_t * pushbuffer,uvm_gpfifo_entry_t * gpfifo)444 static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo)
445 {
446     uvm_pushbuffer_chunk_t *chunk = offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset);
447     UVM_ASSERT(offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - 1) == chunk);
448     return chunk;
449 }
450 
decrypt_push(uvm_channel_t * channel,uvm_gpfifo_entry_t * gpfifo)451 static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
452 {
453     NV_STATUS status;
454     void *auth_tag_cpu_va;
455     void *push_protected_cpu_va;
456     void *push_unprotected_cpu_va;
457     NvU32 pushbuffer_offset = gpfifo->pushbuffer_offset;
458     NvU32 push_info_index = gpfifo->push_info - channel->push_infos;
459     uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
460     uvm_push_crypto_bundle_t *crypto_bundle = channel->conf_computing.push_crypto_bundles + push_info_index;
461 
462     if (channel->conf_computing.push_crypto_bundles == NULL)
463         return;
464 
465     // When the crypto bundle is used, the push size cannot be zero
466     if (crypto_bundle->push_size == 0)
467         return;
468 
469     UVM_ASSERT(!uvm_channel_is_wlc(channel));
470     UVM_ASSERT(!uvm_channel_is_lcic(channel));
471 
472     push_protected_cpu_va = get_base_cpu_va(pushbuffer) + pushbuffer_offset;
473     push_unprotected_cpu_va = (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem) + pushbuffer_offset;
474     auth_tag_cpu_va = uvm_channel_get_push_crypto_bundle_auth_tags_cpu_va(channel, push_info_index);
475 
476     status = uvm_conf_computing_cpu_decrypt(channel,
477                                             push_protected_cpu_va,
478                                             push_unprotected_cpu_va,
479                                             &crypto_bundle->iv,
480                                             crypto_bundle->key_version,
481                                             crypto_bundle->push_size,
482                                             auth_tag_cpu_va);
483 
484     // A decryption failure here is not fatal because it does not
485     // prevent UVM from running fine in the future and cannot be used
486     // maliciously to leak information or otherwise derail UVM from its
487     // regular duties.
488     UVM_ASSERT_MSG_RELEASE(status == NV_OK, "Pushbuffer decryption failure: %s\n", nvstatusToString(status));
489 
490     // Avoid reusing the bundle across multiple pushes
491     crypto_bundle->push_size = 0;
492 }
493 
uvm_pushbuffer_mark_completed(uvm_channel_t * channel,uvm_gpfifo_entry_t * gpfifo)494 void uvm_pushbuffer_mark_completed(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
495 {
496     uvm_pushbuffer_chunk_t *chunk;
497     bool need_to_update_chunk = false;
498     uvm_push_info_t *push_info = gpfifo->push_info;
499     uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
500 
501     UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL);
502 
503     chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
504 
505     if (push_info->on_complete != NULL) {
506         decrypt_push(channel, gpfifo);
507         push_info->on_complete(push_info->on_complete_data);
508         push_info->on_complete = NULL;
509         push_info->on_complete_data = NULL;
510     }
511 
512     uvm_spin_lock(&pushbuffer->lock);
513 
514     if (gpfifo == chunk_get_first_gpfifo(chunk))
515         need_to_update_chunk = true;
516     else if (gpfifo == chunk_get_last_gpfifo(chunk))
517         need_to_update_chunk = true;
518 
519     list_del(&gpfifo->pending_list_node);
520 
521     // If current_push is not NULL, updating the chunk is delayed till
522     // uvm_pushbuffer_end_push() is called for that push.
523     if (need_to_update_chunk && chunk->current_push == NULL)
524         update_chunk(pushbuffer, chunk);
525 
526     uvm_spin_unlock(&pushbuffer->lock);
527 }
528 
uvm_pushbuffer_get_offset_for_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)529 NvU32 uvm_pushbuffer_get_offset_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
530 {
531     NvU32 offset;
532 
533     if (uvm_channel_is_wlc(push->channel)) {
534         // WLC channels use private static PB and their gpfifo entries are not
535         // added to any chunk's list. This only needs to return legal offset.
536         // Completion cleanup will not find WLC gpfifo entries as either first
537         // or last entry of any chunk.
538         return 0;
539     }
540 
541     offset = (char*)push->begin - get_base_cpu_va(pushbuffer);
542 
543     UVM_ASSERT(((NvU64)offset) % sizeof(NvU32) == 0);
544 
545     return offset;
546 }
547 
uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)548 NvU64 uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
549 {
550     NvU64 pushbuffer_base;
551     uvm_gpu_t *gpu = uvm_push_get_gpu(push);
552     bool is_proxy_channel = uvm_channel_is_proxy(push->channel);
553 
554     pushbuffer_base = uvm_rm_mem_get_gpu_va(pushbuffer->memory, gpu, is_proxy_channel).address;
555 
556     if (uvm_channel_is_wlc(push->channel) || uvm_channel_is_lcic(push->channel)) {
557         // We need to use the same static locations for PB as the fixed
558         // schedule because that's what the channels are initialized to use.
559         return uvm_channel_get_static_pb_protected_vidmem_gpu_va(push->channel);
560     }
561     else if (uvm_channel_is_sec2(push->channel)) {
562         // SEC2 PBs are in unprotected sysmem
563         pushbuffer_base = uvm_pushbuffer_get_sec2_gpu_va_base(pushbuffer);
564     }
565 
566     return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
567 }
568 
uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)569 void *uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
570 {
571     char *pushbuffer_base;
572 
573     if (uvm_channel_is_wlc(push->channel)) {
574         // Reuse existing WLC static pb for initialization
575         UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
576         return uvm_channel_get_static_pb_unprotected_sysmem_cpu(push->channel);
577     }
578 
579     pushbuffer_base = uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem);
580 
581     return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
582 }
583 
uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)584 NvU64 uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
585 {
586     NvU64 pushbuffer_base;
587 
588     if (uvm_channel_is_wlc(push->channel)) {
589         // Reuse existing WLC static pb for initialization
590         UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
591 
592         return uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(push->channel);
593     }
594 
595     pushbuffer_base = uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, uvm_push_get_gpu(push));
596 
597     return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
598 }
599 
uvm_pushbuffer_end_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push,uvm_gpfifo_entry_t * gpfifo)600 void uvm_pushbuffer_end_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_gpfifo_entry_t *gpfifo)
601 {
602     uvm_pushbuffer_chunk_t *chunk;
603 
604     if (uvm_channel_is_wlc(push->channel)) {
605         // WLC channels use static pushbuffer and don't count towards max
606         // concurrent pushes. Initializing the list as head makes sure the
607         // deletion in "uvm_pushbuffer_mark_completed" doesn't crash.
608         INIT_LIST_HEAD(&gpfifo->pending_list_node);
609         return;
610     }
611 
612     chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
613 
614     uvm_channel_pool_assert_locked(push->channel->pool);
615 
616     uvm_spin_lock(&pushbuffer->lock);
617 
618     list_add_tail(&gpfifo->pending_list_node, &chunk->pending_gpfifos);
619 
620     update_chunk(pushbuffer, chunk);
621 
622     UVM_ASSERT(chunk->current_push == push);
623     chunk->current_push = NULL;
624 
625     uvm_spin_unlock(&pushbuffer->lock);
626 
627     // uvm_pushbuffer_end_push() needs to be called with the channel lock held
628     // while the concurrent pushes sema has a higher lock order. To keep the
629     // code structure simple, just up out of order here.
630     uvm_up_out_of_order(&pushbuffer->concurrent_pushes_sema);
631 }
632 
uvm_pushbuffer_has_space(uvm_pushbuffer_t * pushbuffer)633 bool uvm_pushbuffer_has_space(uvm_pushbuffer_t *pushbuffer)
634 {
635     bool has_space;
636 
637     uvm_spin_lock(&pushbuffer->lock);
638 
639     has_space = pick_chunk(pushbuffer) != NULL;
640 
641     uvm_spin_unlock(&pushbuffer->lock);
642 
643     return has_space;
644 }
645 
uvm_pushbuffer_print_common(uvm_pushbuffer_t * pushbuffer,struct seq_file * s)646 void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s)
647 {
648     NvU32 i;
649 
650     UVM_SEQ_OR_DBG_PRINT(s, "Pushbuffer for GPU %s\n", uvm_gpu_name(pushbuffer->channel_manager->gpu));
651     UVM_SEQ_OR_DBG_PRINT(s, " has space: %d\n", uvm_pushbuffer_has_space(pushbuffer));
652 
653     uvm_spin_lock(&pushbuffer->lock);
654 
655     for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i) {
656         uvm_pushbuffer_chunk_t *chunk = &pushbuffer->chunks[i];
657         NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk);
658         NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk);
659         UVM_SEQ_OR_DBG_PRINT(s, " chunk %u put %u get %u next %u available %d idle %d\n",
660                 i,
661                 cpu_put, gpu_get, chunk->next_push_start,
662                 test_bit(i, pushbuffer->available_chunks) ? 1 : 0,
663                 test_bit(i, pushbuffer->idle_chunks) ? 1 : 0);
664 
665     }
666 
667     uvm_spin_unlock(&pushbuffer->lock);
668 }
669 
uvm_pushbuffer_print(uvm_pushbuffer_t * pushbuffer)670 void uvm_pushbuffer_print(uvm_pushbuffer_t *pushbuffer)
671 {
672     return uvm_pushbuffer_print_common(pushbuffer, NULL);
673 }
674 
uvm_pushbuffer_get_gpu_va_base(uvm_pushbuffer_t * pushbuffer)675 NvU64 uvm_pushbuffer_get_gpu_va_base(uvm_pushbuffer_t *pushbuffer)
676 {
677     return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory, pushbuffer->channel_manager->gpu);
678 }
679 
uvm_pushbuffer_get_sec2_gpu_va_base(uvm_pushbuffer_t * pushbuffer)680 NvU64 uvm_pushbuffer_get_sec2_gpu_va_base(uvm_pushbuffer_t *pushbuffer)
681 {
682     UVM_ASSERT(g_uvm_global.conf_computing_enabled);
683 
684     return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, pushbuffer->channel_manager->gpu);
685 }
686