1 /*******************************************************************************
2 Copyright (c) 2015-2022 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_api.h"
25 #include "uvm_pushbuffer.h"
26 #include "uvm_channel.h"
27 #include "uvm_global.h"
28 #include "uvm_lock.h"
29 #include "uvm_procfs.h"
30 #include "uvm_push.h"
31 #include "uvm_kvmalloc.h"
32 #include "uvm_gpu.h"
33 #include "uvm_common.h"
34 #include "uvm_linux.h"
35 #include "uvm_conf_computing.h"
36
37 // Print pushbuffer state into a seq_file if provided or with UVM_DBG_PRINT() if not.
38 static void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s);
39
nv_procfs_read_pushbuffer_info(struct seq_file * s,void * v)40 static int nv_procfs_read_pushbuffer_info(struct seq_file *s, void *v)
41 {
42 uvm_pushbuffer_t *pushbuffer = (uvm_pushbuffer_t *)s->private;
43
44 if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
45 return -EAGAIN;
46
47 uvm_pushbuffer_print_common(pushbuffer, s);
48
49 uvm_up_read(&g_uvm_global.pm.lock);
50
51 return 0;
52 }
53
nv_procfs_read_pushbuffer_info_entry(struct seq_file * s,void * v)54 static int nv_procfs_read_pushbuffer_info_entry(struct seq_file *s, void *v)
55 {
56 UVM_ENTRY_RET(nv_procfs_read_pushbuffer_info(s, v));
57 }
58
59 UVM_DEFINE_SINGLE_PROCFS_FILE(pushbuffer_info_entry);
60
create_procfs(uvm_pushbuffer_t * pushbuffer)61 static NV_STATUS create_procfs(uvm_pushbuffer_t *pushbuffer)
62 {
63 uvm_gpu_t *gpu = pushbuffer->channel_manager->gpu;
64
65 // The pushbuffer info file is for debug only
66 if (!uvm_procfs_is_debug_enabled())
67 return NV_OK;
68
69 pushbuffer->procfs.info_file = NV_CREATE_PROC_FILE("pushbuffer",
70 gpu->procfs.dir,
71 pushbuffer_info_entry,
72 pushbuffer);
73 if (pushbuffer->procfs.info_file == NULL)
74 return NV_ERR_OPERATING_SYSTEM;
75
76 return NV_OK;
77 }
78
uvm_pushbuffer_create(uvm_channel_manager_t * channel_manager,uvm_pushbuffer_t ** pushbuffer_out)79 NV_STATUS uvm_pushbuffer_create(uvm_channel_manager_t *channel_manager, uvm_pushbuffer_t **pushbuffer_out)
80 {
81 NV_STATUS status;
82 int i;
83 uvm_gpu_t *gpu = channel_manager->gpu;
84 NvU64 pushbuffer_alignment;
85
86 uvm_pushbuffer_t *pushbuffer = uvm_kvmalloc_zero(sizeof(*pushbuffer));
87 if (pushbuffer == NULL)
88 return NV_ERR_NO_MEMORY;
89
90 pushbuffer->channel_manager = channel_manager;
91
92 uvm_spin_lock_init(&pushbuffer->lock, UVM_LOCK_ORDER_LEAF);
93
94 // Currently the pushbuffer supports UVM_PUSHBUFFER_CHUNKS of concurrent
95 // pushes.
96 uvm_sema_init(&pushbuffer->concurrent_pushes_sema, UVM_PUSHBUFFER_CHUNKS, UVM_LOCK_ORDER_PUSH);
97
98 UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS ||
99 channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_VID);
100
101 // The pushbuffer allocation is aligned to UVM_PUSHBUFFER_SIZE and its size
102 // (UVM_PUSHBUFFER_SIZE) is a power of 2. These constraints guarantee that
103 // the entire pushbuffer belongs to a 1TB (2^40) segment. Thus, we can set
104 // the Esched/PBDMA segment base for all channels during their
105 // initialization and it is immutable for the entire channels' lifetime.
106 BUILD_BUG_ON_NOT_POWER_OF_2(UVM_PUSHBUFFER_SIZE);
107 BUILD_BUG_ON(UVM_PUSHBUFFER_SIZE >= (1ull << 40));
108
109 if (gpu->uvm_test_force_upper_pushbuffer_segment)
110 pushbuffer_alignment = (1ull << 40);
111 else
112 pushbuffer_alignment = UVM_PUSHBUFFER_SIZE;
113
114 status = uvm_rm_mem_alloc_and_map_cpu(gpu,
115 (channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS) ?
116 UVM_RM_MEM_TYPE_SYS:
117 UVM_RM_MEM_TYPE_GPU,
118 UVM_PUSHBUFFER_SIZE,
119 pushbuffer_alignment,
120 &pushbuffer->memory);
121 if (status != NV_OK)
122 goto error;
123
124 if (g_uvm_global.conf_computing_enabled) {
125 UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS);
126
127 // Move the above allocation to unprotected_sysmem
128 pushbuffer->memory_unprotected_sysmem = pushbuffer->memory;
129 pushbuffer->memory = NULL;
130
131 // Make sure the base can be least 4KB aligned. Pushes can include inline buffers
132 // with specific alignment requirement. Different base between backing memory
133 // locations would change that.
134 pushbuffer->memory_protected_sysmem = uvm_kvmalloc_zero(UVM_PUSHBUFFER_SIZE + UVM_PAGE_SIZE_4K);
135 if (!pushbuffer->memory_protected_sysmem) {
136 status = NV_ERR_NO_MEMORY;
137 goto error;
138 }
139
140 status = uvm_rm_mem_alloc(gpu,
141 UVM_RM_MEM_TYPE_GPU,
142 UVM_PUSHBUFFER_SIZE,
143 pushbuffer_alignment,
144 &pushbuffer->memory);
145 if (status != NV_OK)
146 goto error;
147
148 status = uvm_rm_mem_map_gpu(pushbuffer->memory_unprotected_sysmem, gpu, pushbuffer_alignment);
149 if (status != NV_OK)
150 goto error;
151 }
152
153 // Verify the GPU can access the pushbuffer.
154 UVM_ASSERT((uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE - 1) < gpu->parent->max_host_va);
155
156 bitmap_fill(pushbuffer->idle_chunks, UVM_PUSHBUFFER_CHUNKS);
157 bitmap_fill(pushbuffer->available_chunks, UVM_PUSHBUFFER_CHUNKS);
158
159 for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i)
160 INIT_LIST_HEAD(&pushbuffer->chunks[i].pending_gpfifos);
161
162 status = create_procfs(pushbuffer);
163 if (status != NV_OK)
164 goto error;
165
166 *pushbuffer_out = pushbuffer;
167
168 return status;
169
170 error:
171 uvm_pushbuffer_destroy(pushbuffer);
172 return status;
173 }
174
get_chunk_in_mask(uvm_pushbuffer_t * pushbuffer,unsigned long * mask)175 static uvm_pushbuffer_chunk_t *get_chunk_in_mask(uvm_pushbuffer_t *pushbuffer, unsigned long *mask)
176 {
177 NvU32 index = find_first_bit(mask, UVM_PUSHBUFFER_CHUNKS);
178
179 uvm_assert_spinlock_locked(&pushbuffer->lock);
180
181 if (index == UVM_PUSHBUFFER_CHUNKS)
182 return NULL;
183
184 return &pushbuffer->chunks[index];
185 }
186
get_available_chunk(uvm_pushbuffer_t * pushbuffer)187 static uvm_pushbuffer_chunk_t *get_available_chunk(uvm_pushbuffer_t *pushbuffer)
188 {
189 return get_chunk_in_mask(pushbuffer, pushbuffer->available_chunks);
190 }
191
get_idle_chunk(uvm_pushbuffer_t * pushbuffer)192 static uvm_pushbuffer_chunk_t *get_idle_chunk(uvm_pushbuffer_t *pushbuffer)
193 {
194 return get_chunk_in_mask(pushbuffer, pushbuffer->idle_chunks);
195 }
196
chunk_get_index(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)197 static NvU32 chunk_get_index(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
198 {
199 NvU32 index = chunk - pushbuffer->chunks;
200 UVM_ASSERT(index < UVM_PUSHBUFFER_CHUNKS);
201 return index;
202 }
203
chunk_get_offset(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)204 static NvU32 chunk_get_offset(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
205 {
206 return chunk_get_index(pushbuffer, chunk) * UVM_PUSHBUFFER_CHUNK_SIZE;
207 }
208
set_chunk(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk,unsigned long * mask)209 static void set_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask)
210 {
211 NvU32 index = chunk_get_index(pushbuffer, chunk);
212
213 uvm_assert_spinlock_locked(&pushbuffer->lock);
214
215 __set_bit(index, mask);
216 }
217
clear_chunk(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk,unsigned long * mask)218 static void clear_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask)
219 {
220 NvU32 index = chunk_get_index(pushbuffer, chunk);
221
222 uvm_assert_spinlock_locked(&pushbuffer->lock);
223
224 __clear_bit(index, mask);
225 }
226
pick_chunk(uvm_pushbuffer_t * pushbuffer)227 static uvm_pushbuffer_chunk_t *pick_chunk(uvm_pushbuffer_t *pushbuffer)
228 {
229 uvm_pushbuffer_chunk_t *chunk = get_idle_chunk(pushbuffer);
230
231 uvm_assert_spinlock_locked(&pushbuffer->lock);
232
233 if (chunk == NULL)
234 chunk = get_available_chunk(pushbuffer);
235
236 return chunk;
237 }
238
try_claim_chunk(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push,uvm_pushbuffer_chunk_t ** chunk_out)239 static bool try_claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out)
240 {
241 uvm_pushbuffer_chunk_t *chunk;
242
243 uvm_spin_lock(&pushbuffer->lock);
244
245 chunk = pick_chunk(pushbuffer);
246 if (!chunk)
247 goto done;
248
249 chunk->current_push = push;
250 clear_chunk(pushbuffer, chunk, pushbuffer->idle_chunks);
251 clear_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
252
253 done:
254 uvm_spin_unlock(&pushbuffer->lock);
255 *chunk_out = chunk;
256
257 return chunk != NULL;
258 }
259
get_base_cpu_va(uvm_pushbuffer_t * pushbuffer)260 static char *get_base_cpu_va(uvm_pushbuffer_t *pushbuffer)
261 {
262 // Confidential Computing pushes are assembled in protected sysmem
263 // and safely (through encrypt/decrypt) moved to protected vidmem.
264 // Or signed and moved to unprotected sysmem.
265 //
266 // The protected sysmem base is aligned to 4kB. This is enough to give
267 // the same alignment behaviour for inline buffers as the other two
268 // backing memory locations.
269 if (g_uvm_global.conf_computing_enabled)
270 return (char*)(UVM_ALIGN_UP((uintptr_t)pushbuffer->memory_protected_sysmem, UVM_PAGE_SIZE_4K));
271
272 return (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory);
273 }
274
chunk_get_next_push_start_addr(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)275 static NvU32 *chunk_get_next_push_start_addr(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
276 {
277 char *push_start = get_base_cpu_va(pushbuffer);
278 push_start += chunk_get_offset(pushbuffer, chunk);
279 push_start += chunk->next_push_start;
280
281 UVM_ASSERT(((NvU64)push_start) % sizeof(NvU32) == 0);
282
283 return (NvU32*)push_start;
284 }
285
claim_chunk(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push,uvm_pushbuffer_chunk_t ** chunk_out)286 static NV_STATUS claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out)
287 {
288 NV_STATUS status = NV_OK;
289 uvm_channel_manager_t *channel_manager = pushbuffer->channel_manager;
290 uvm_spin_loop_t spin;
291
292 if (try_claim_chunk(pushbuffer, push, chunk_out))
293 return NV_OK;
294
295 uvm_channel_manager_update_progress(channel_manager);
296
297 uvm_spin_loop_init(&spin);
298 while (!try_claim_chunk(pushbuffer, push, chunk_out) && status == NV_OK) {
299 UVM_SPIN_LOOP(&spin);
300 status = uvm_channel_manager_check_errors(channel_manager);
301 uvm_channel_manager_update_progress(channel_manager);
302 }
303
304 return status;
305 }
306
uvm_pushbuffer_begin_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)307 NV_STATUS uvm_pushbuffer_begin_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
308 {
309 uvm_pushbuffer_chunk_t *chunk;
310 NV_STATUS status;
311
312 UVM_ASSERT(pushbuffer);
313 UVM_ASSERT(push);
314 UVM_ASSERT(push->channel);
315
316 if (uvm_channel_is_wlc(push->channel)) {
317 // WLC pushes use static PB and don't count against max concurrent
318 // pushes.
319 push->begin = (void*)UVM_ALIGN_UP((uintptr_t)push->channel->conf_computing.static_pb_protected_sysmem,
320 UVM_PAGE_SIZE_4K);
321 push->next = push->begin;
322 return NV_OK;
323 }
324
325 // Note that this semaphore is uvm_up()ed in end_push().
326 uvm_down(&pushbuffer->concurrent_pushes_sema);
327
328 status = claim_chunk(pushbuffer, push, &chunk);
329 if (status != NV_OK) {
330 uvm_up(&pushbuffer->concurrent_pushes_sema);
331 return status;
332 }
333
334 UVM_ASSERT(chunk);
335
336 push->begin = chunk_get_next_push_start_addr(pushbuffer, chunk);
337 push->next = push->begin;
338
339 return NV_OK;
340 }
341
chunk_get_first_gpfifo(uvm_pushbuffer_chunk_t * chunk)342 static uvm_gpfifo_entry_t *chunk_get_first_gpfifo(uvm_pushbuffer_chunk_t *chunk)
343 {
344 return list_first_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node);
345 }
346
chunk_get_last_gpfifo(uvm_pushbuffer_chunk_t * chunk)347 static uvm_gpfifo_entry_t *chunk_get_last_gpfifo(uvm_pushbuffer_chunk_t *chunk)
348 {
349 return list_last_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node);
350 }
351
352 // Get the cpu put within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE])
chunk_get_cpu_put(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)353 static NvU32 chunk_get_cpu_put(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
354 {
355 uvm_gpfifo_entry_t *gpfifo = chunk_get_last_gpfifo(chunk);
356
357 uvm_assert_spinlock_locked(&pushbuffer->lock);
358
359 if (gpfifo != NULL)
360 return gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - chunk_get_offset(pushbuffer, chunk);
361 else
362 return 0;
363 }
364
365 // Get the gpu get within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE))
chunk_get_gpu_get(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)366 static NvU32 chunk_get_gpu_get(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
367 {
368 uvm_gpfifo_entry_t *gpfifo = chunk_get_first_gpfifo(chunk);
369
370 uvm_assert_spinlock_locked(&pushbuffer->lock);
371
372 if (gpfifo != NULL)
373 return gpfifo->pushbuffer_offset - chunk_get_offset(pushbuffer, chunk);
374 else
375 return 0;
376 }
377
update_chunk(uvm_pushbuffer_t * pushbuffer,uvm_pushbuffer_chunk_t * chunk)378 static void update_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
379 {
380 NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk);
381 NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk);
382
383 uvm_assert_spinlock_locked(&pushbuffer->lock);
384
385 if (gpu_get == cpu_put) {
386 // cpu_put can be equal to gpu_get both when the chunk is full and empty. We
387 // can tell apart the cases by checking whether the pending GPFIFOs list is
388 // empty.
389 if (!list_empty(&chunk->pending_gpfifos))
390 return;
391
392 // Chunk completely idle
393 set_chunk(pushbuffer, chunk, pushbuffer->idle_chunks);
394 set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
395 UVM_ASSERT_MSG(cpu_put == 0, "cpu put %u\n", cpu_put);
396
397 // For a completely idle chunk, always start at the very beginning. This
398 // helps avoid the waste that can happen at the very end of the chunk
399 // described at the top of uvm_pushbuffer.h.
400 chunk->next_push_start = 0;
401 }
402 else if (gpu_get > cpu_put) {
403 if (gpu_get - cpu_put >= UVM_MAX_PUSH_SIZE) {
404 // Enough space between put and get
405 set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
406 chunk->next_push_start = cpu_put;
407 }
408 }
409 else if (UVM_PUSHBUFFER_CHUNK_SIZE >= cpu_put + UVM_MAX_PUSH_SIZE) {
410 UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put);
411
412 // Enough space at the end
413 set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
414 chunk->next_push_start = cpu_put;
415 }
416 else if (gpu_get >= UVM_MAX_PUSH_SIZE) {
417 UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put);
418
419 // Enough space at the beginning
420 set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
421 chunk->next_push_start = 0;
422 }
423 }
424
uvm_pushbuffer_destroy(uvm_pushbuffer_t * pushbuffer)425 void uvm_pushbuffer_destroy(uvm_pushbuffer_t *pushbuffer)
426 {
427 if (pushbuffer == NULL)
428 return;
429
430 proc_remove(pushbuffer->procfs.info_file);
431
432 uvm_rm_mem_free(pushbuffer->memory_unprotected_sysmem);
433 uvm_kvfree(pushbuffer->memory_protected_sysmem);
434 uvm_rm_mem_free(pushbuffer->memory);
435 uvm_kvfree(pushbuffer);
436 }
437
offset_to_chunk(uvm_pushbuffer_t * pushbuffer,NvU32 offset)438 static uvm_pushbuffer_chunk_t *offset_to_chunk(uvm_pushbuffer_t *pushbuffer, NvU32 offset)
439 {
440 UVM_ASSERT(offset < UVM_PUSHBUFFER_SIZE);
441 return &pushbuffer->chunks[offset / UVM_PUSHBUFFER_CHUNK_SIZE];
442 }
443
gpfifo_to_chunk(uvm_pushbuffer_t * pushbuffer,uvm_gpfifo_entry_t * gpfifo)444 static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo)
445 {
446 uvm_pushbuffer_chunk_t *chunk = offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset);
447 UVM_ASSERT(offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - 1) == chunk);
448 return chunk;
449 }
450
decrypt_push(uvm_channel_t * channel,uvm_gpfifo_entry_t * gpfifo)451 static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
452 {
453 NV_STATUS status;
454 void *auth_tag_cpu_va;
455 void *push_protected_cpu_va;
456 void *push_unprotected_cpu_va;
457 NvU32 pushbuffer_offset = gpfifo->pushbuffer_offset;
458 NvU32 push_info_index = gpfifo->push_info - channel->push_infos;
459 uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
460 uvm_push_crypto_bundle_t *crypto_bundle = channel->conf_computing.push_crypto_bundles + push_info_index;
461
462 if (channel->conf_computing.push_crypto_bundles == NULL)
463 return;
464
465 // When the crypto bundle is used, the push size cannot be zero
466 if (crypto_bundle->push_size == 0)
467 return;
468
469 UVM_ASSERT(!uvm_channel_is_wlc(channel));
470 UVM_ASSERT(!uvm_channel_is_lcic(channel));
471
472 push_protected_cpu_va = get_base_cpu_va(pushbuffer) + pushbuffer_offset;
473 push_unprotected_cpu_va = (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem) + pushbuffer_offset;
474 auth_tag_cpu_va = uvm_channel_get_push_crypto_bundle_auth_tags_cpu_va(channel, push_info_index);
475
476 status = uvm_conf_computing_cpu_decrypt(channel,
477 push_protected_cpu_va,
478 push_unprotected_cpu_va,
479 &crypto_bundle->iv,
480 crypto_bundle->key_version,
481 crypto_bundle->push_size,
482 auth_tag_cpu_va);
483
484 // A decryption failure here is not fatal because it does not
485 // prevent UVM from running fine in the future and cannot be used
486 // maliciously to leak information or otherwise derail UVM from its
487 // regular duties.
488 UVM_ASSERT_MSG_RELEASE(status == NV_OK, "Pushbuffer decryption failure: %s\n", nvstatusToString(status));
489
490 // Avoid reusing the bundle across multiple pushes
491 crypto_bundle->push_size = 0;
492 }
493
uvm_pushbuffer_mark_completed(uvm_channel_t * channel,uvm_gpfifo_entry_t * gpfifo)494 void uvm_pushbuffer_mark_completed(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
495 {
496 uvm_pushbuffer_chunk_t *chunk;
497 bool need_to_update_chunk = false;
498 uvm_push_info_t *push_info = gpfifo->push_info;
499 uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
500
501 UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL);
502
503 chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
504
505 if (push_info->on_complete != NULL) {
506 decrypt_push(channel, gpfifo);
507 push_info->on_complete(push_info->on_complete_data);
508 push_info->on_complete = NULL;
509 push_info->on_complete_data = NULL;
510 }
511
512 uvm_spin_lock(&pushbuffer->lock);
513
514 if (gpfifo == chunk_get_first_gpfifo(chunk))
515 need_to_update_chunk = true;
516 else if (gpfifo == chunk_get_last_gpfifo(chunk))
517 need_to_update_chunk = true;
518
519 list_del(&gpfifo->pending_list_node);
520
521 // If current_push is not NULL, updating the chunk is delayed till
522 // uvm_pushbuffer_end_push() is called for that push.
523 if (need_to_update_chunk && chunk->current_push == NULL)
524 update_chunk(pushbuffer, chunk);
525
526 uvm_spin_unlock(&pushbuffer->lock);
527 }
528
uvm_pushbuffer_get_offset_for_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)529 NvU32 uvm_pushbuffer_get_offset_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
530 {
531 NvU32 offset;
532
533 if (uvm_channel_is_wlc(push->channel)) {
534 // WLC channels use private static PB and their gpfifo entries are not
535 // added to any chunk's list. This only needs to return legal offset.
536 // Completion cleanup will not find WLC gpfifo entries as either first
537 // or last entry of any chunk.
538 return 0;
539 }
540
541 offset = (char*)push->begin - get_base_cpu_va(pushbuffer);
542
543 UVM_ASSERT(((NvU64)offset) % sizeof(NvU32) == 0);
544
545 return offset;
546 }
547
uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)548 NvU64 uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
549 {
550 NvU64 pushbuffer_base;
551 uvm_gpu_t *gpu = uvm_push_get_gpu(push);
552 bool is_proxy_channel = uvm_channel_is_proxy(push->channel);
553
554 pushbuffer_base = uvm_rm_mem_get_gpu_va(pushbuffer->memory, gpu, is_proxy_channel).address;
555
556 if (uvm_channel_is_wlc(push->channel) || uvm_channel_is_lcic(push->channel)) {
557 // We need to use the same static locations for PB as the fixed
558 // schedule because that's what the channels are initialized to use.
559 return uvm_channel_get_static_pb_protected_vidmem_gpu_va(push->channel);
560 }
561 else if (uvm_channel_is_sec2(push->channel)) {
562 // SEC2 PBs are in unprotected sysmem
563 pushbuffer_base = uvm_pushbuffer_get_sec2_gpu_va_base(pushbuffer);
564 }
565
566 return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
567 }
568
uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)569 void *uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
570 {
571 char *pushbuffer_base;
572
573 if (uvm_channel_is_wlc(push->channel)) {
574 // Reuse existing WLC static pb for initialization
575 UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
576 return uvm_channel_get_static_pb_unprotected_sysmem_cpu(push->channel);
577 }
578
579 pushbuffer_base = uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem);
580
581 return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
582 }
583
uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push)584 NvU64 uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
585 {
586 NvU64 pushbuffer_base;
587
588 if (uvm_channel_is_wlc(push->channel)) {
589 // Reuse existing WLC static pb for initialization
590 UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
591
592 return uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(push->channel);
593 }
594
595 pushbuffer_base = uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, uvm_push_get_gpu(push));
596
597 return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
598 }
599
uvm_pushbuffer_end_push(uvm_pushbuffer_t * pushbuffer,uvm_push_t * push,uvm_gpfifo_entry_t * gpfifo)600 void uvm_pushbuffer_end_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_gpfifo_entry_t *gpfifo)
601 {
602 uvm_pushbuffer_chunk_t *chunk;
603
604 if (uvm_channel_is_wlc(push->channel)) {
605 // WLC channels use static pushbuffer and don't count towards max
606 // concurrent pushes. Initializing the list as head makes sure the
607 // deletion in "uvm_pushbuffer_mark_completed" doesn't crash.
608 INIT_LIST_HEAD(&gpfifo->pending_list_node);
609 return;
610 }
611
612 chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
613
614 uvm_channel_pool_assert_locked(push->channel->pool);
615
616 uvm_spin_lock(&pushbuffer->lock);
617
618 list_add_tail(&gpfifo->pending_list_node, &chunk->pending_gpfifos);
619
620 update_chunk(pushbuffer, chunk);
621
622 UVM_ASSERT(chunk->current_push == push);
623 chunk->current_push = NULL;
624
625 uvm_spin_unlock(&pushbuffer->lock);
626
627 // uvm_pushbuffer_end_push() needs to be called with the channel lock held
628 // while the concurrent pushes sema has a higher lock order. To keep the
629 // code structure simple, just up out of order here.
630 uvm_up_out_of_order(&pushbuffer->concurrent_pushes_sema);
631 }
632
uvm_pushbuffer_has_space(uvm_pushbuffer_t * pushbuffer)633 bool uvm_pushbuffer_has_space(uvm_pushbuffer_t *pushbuffer)
634 {
635 bool has_space;
636
637 uvm_spin_lock(&pushbuffer->lock);
638
639 has_space = pick_chunk(pushbuffer) != NULL;
640
641 uvm_spin_unlock(&pushbuffer->lock);
642
643 return has_space;
644 }
645
uvm_pushbuffer_print_common(uvm_pushbuffer_t * pushbuffer,struct seq_file * s)646 void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s)
647 {
648 NvU32 i;
649
650 UVM_SEQ_OR_DBG_PRINT(s, "Pushbuffer for GPU %s\n", uvm_gpu_name(pushbuffer->channel_manager->gpu));
651 UVM_SEQ_OR_DBG_PRINT(s, " has space: %d\n", uvm_pushbuffer_has_space(pushbuffer));
652
653 uvm_spin_lock(&pushbuffer->lock);
654
655 for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i) {
656 uvm_pushbuffer_chunk_t *chunk = &pushbuffer->chunks[i];
657 NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk);
658 NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk);
659 UVM_SEQ_OR_DBG_PRINT(s, " chunk %u put %u get %u next %u available %d idle %d\n",
660 i,
661 cpu_put, gpu_get, chunk->next_push_start,
662 test_bit(i, pushbuffer->available_chunks) ? 1 : 0,
663 test_bit(i, pushbuffer->idle_chunks) ? 1 : 0);
664
665 }
666
667 uvm_spin_unlock(&pushbuffer->lock);
668 }
669
uvm_pushbuffer_print(uvm_pushbuffer_t * pushbuffer)670 void uvm_pushbuffer_print(uvm_pushbuffer_t *pushbuffer)
671 {
672 return uvm_pushbuffer_print_common(pushbuffer, NULL);
673 }
674
uvm_pushbuffer_get_gpu_va_base(uvm_pushbuffer_t * pushbuffer)675 NvU64 uvm_pushbuffer_get_gpu_va_base(uvm_pushbuffer_t *pushbuffer)
676 {
677 return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory, pushbuffer->channel_manager->gpu);
678 }
679
uvm_pushbuffer_get_sec2_gpu_va_base(uvm_pushbuffer_t * pushbuffer)680 NvU64 uvm_pushbuffer_get_sec2_gpu_va_base(uvm_pushbuffer_t *pushbuffer)
681 {
682 UVM_ASSERT(g_uvm_global.conf_computing_enabled);
683
684 return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, pushbuffer->channel_manager->gpu);
685 }
686