1 /*******************************************************************************
2     Copyright (c) 2015-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_api.h"
25 #include "uvm_pushbuffer.h"
26 #include "uvm_channel.h"
27 #include "uvm_global.h"
28 #include "uvm_lock.h"
29 #include "uvm_procfs.h"
30 #include "uvm_push.h"
31 #include "uvm_kvmalloc.h"
32 #include "uvm_gpu.h"
33 #include "uvm_common.h"
34 #include "uvm_linux.h"
35 #include "uvm_conf_computing.h"
36 
37 // Print pushbuffer state into a seq_file if provided or with UVM_DBG_PRINT() if not.
38 static void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s);
39 
40 static int nv_procfs_read_pushbuffer_info(struct seq_file *s, void *v)
41 {
42     uvm_pushbuffer_t *pushbuffer = (uvm_pushbuffer_t *)s->private;
43 
44     if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
45             return -EAGAIN;
46 
47     uvm_pushbuffer_print_common(pushbuffer, s);
48 
49     uvm_up_read(&g_uvm_global.pm.lock);
50 
51     return 0;
52 }
53 
54 static int nv_procfs_read_pushbuffer_info_entry(struct seq_file *s, void *v)
55 {
56     UVM_ENTRY_RET(nv_procfs_read_pushbuffer_info(s, v));
57 }
58 
59 UVM_DEFINE_SINGLE_PROCFS_FILE(pushbuffer_info_entry);
60 
61 static NV_STATUS create_procfs(uvm_pushbuffer_t *pushbuffer)
62 {
63     uvm_gpu_t *gpu = pushbuffer->channel_manager->gpu;
64 
65     // The pushbuffer info file is for debug only
66     if (!uvm_procfs_is_debug_enabled())
67         return NV_OK;
68 
69     pushbuffer->procfs.info_file = NV_CREATE_PROC_FILE("pushbuffer",
70                                                        gpu->procfs.dir,
71                                                        pushbuffer_info_entry,
72                                                        pushbuffer);
73     if (pushbuffer->procfs.info_file == NULL)
74         return NV_ERR_OPERATING_SYSTEM;
75 
76     return NV_OK;
77 }
78 
79 NV_STATUS uvm_pushbuffer_create(uvm_channel_manager_t *channel_manager, uvm_pushbuffer_t **pushbuffer_out)
80 {
81     NV_STATUS status;
82     int i;
83     uvm_gpu_t *gpu = channel_manager->gpu;
84     NvU64 pushbuffer_alignment;
85 
86     uvm_pushbuffer_t *pushbuffer = uvm_kvmalloc_zero(sizeof(*pushbuffer));
87     if (pushbuffer == NULL)
88         return NV_ERR_NO_MEMORY;
89 
90     pushbuffer->channel_manager = channel_manager;
91 
92     uvm_spin_lock_init(&pushbuffer->lock, UVM_LOCK_ORDER_LEAF);
93 
94     // Currently the pushbuffer supports UVM_PUSHBUFFER_CHUNKS of concurrent
95     // pushes.
96     uvm_sema_init(&pushbuffer->concurrent_pushes_sema, UVM_PUSHBUFFER_CHUNKS, UVM_LOCK_ORDER_PUSH);
97 
98     UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS ||
99                channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_VID);
100 
101     // The pushbuffer allocation is aligned to UVM_PUSHBUFFER_SIZE and its size
102     // (UVM_PUSHBUFFER_SIZE) is a power of 2. These constraints guarantee that
103     // the entire pushbuffer belongs to a 1TB (2^40) segment. Thus, we can set
104     // the Esched/PBDMA segment base for all channels during their
105     // initialization and it is immutable for the entire channels' lifetime.
106     BUILD_BUG_ON_NOT_POWER_OF_2(UVM_PUSHBUFFER_SIZE);
107     BUILD_BUG_ON(UVM_PUSHBUFFER_SIZE >= (1ull << 40));
108 
109     if (gpu->uvm_test_force_upper_pushbuffer_segment)
110         pushbuffer_alignment = (1ull << 40);
111     else
112         pushbuffer_alignment = UVM_PUSHBUFFER_SIZE;
113 
114     status = uvm_rm_mem_alloc_and_map_cpu(gpu,
115                                           (channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS) ?
116                                               UVM_RM_MEM_TYPE_SYS:
117                                               UVM_RM_MEM_TYPE_GPU,
118                                           UVM_PUSHBUFFER_SIZE,
119                                           pushbuffer_alignment,
120                                           &pushbuffer->memory);
121     if (status != NV_OK)
122         goto error;
123 
124     if (uvm_conf_computing_mode_enabled(gpu)) {
125         UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS);
126 
127         // Move the above allocation to unprotected_sysmem
128         pushbuffer->memory_unprotected_sysmem = pushbuffer->memory;
129         pushbuffer->memory = NULL;
130 
131         // Make sure the base can be least 4KB aligned. Pushes can include inline buffers
132         // with specific alignment requirement. Different base between backing memory
133         // locations would change that.
134         pushbuffer->memory_protected_sysmem = uvm_kvmalloc_zero(UVM_PUSHBUFFER_SIZE + UVM_PAGE_SIZE_4K);
135         if (!pushbuffer->memory_protected_sysmem) {
136             status = NV_ERR_NO_MEMORY;
137             goto error;
138         }
139 
140 
141         status = uvm_rm_mem_alloc(gpu,
142                                   UVM_RM_MEM_TYPE_GPU,
143                                   UVM_PUSHBUFFER_SIZE,
144                                   pushbuffer_alignment,
145                                   &pushbuffer->memory);
146         if (status != NV_OK)
147             goto error;
148 
149         status = uvm_rm_mem_map_gpu(pushbuffer->memory_unprotected_sysmem, gpu, pushbuffer_alignment);
150         if (status != NV_OK)
151             goto error;
152     }
153 
154     // Verify the GPU can access the pushbuffer.
155     UVM_ASSERT((uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE - 1) < gpu->parent->max_host_va);
156 
157     bitmap_fill(pushbuffer->idle_chunks, UVM_PUSHBUFFER_CHUNKS);
158     bitmap_fill(pushbuffer->available_chunks, UVM_PUSHBUFFER_CHUNKS);
159 
160     for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i)
161         INIT_LIST_HEAD(&pushbuffer->chunks[i].pending_gpfifos);
162 
163     status = create_procfs(pushbuffer);
164     if (status != NV_OK)
165         goto error;
166 
167     *pushbuffer_out = pushbuffer;
168 
169     return status;
170 
171 error:
172     uvm_pushbuffer_destroy(pushbuffer);
173     return status;
174 }
175 
176 static uvm_pushbuffer_chunk_t *get_chunk_in_mask(uvm_pushbuffer_t *pushbuffer, unsigned long *mask)
177 {
178     NvU32 index = find_first_bit(mask, UVM_PUSHBUFFER_CHUNKS);
179 
180     uvm_assert_spinlock_locked(&pushbuffer->lock);
181 
182     if (index == UVM_PUSHBUFFER_CHUNKS)
183         return NULL;
184 
185     return &pushbuffer->chunks[index];
186 }
187 
188 static uvm_pushbuffer_chunk_t *get_available_chunk(uvm_pushbuffer_t *pushbuffer)
189 {
190     return get_chunk_in_mask(pushbuffer, pushbuffer->available_chunks);
191 }
192 
193 static uvm_pushbuffer_chunk_t *get_idle_chunk(uvm_pushbuffer_t *pushbuffer)
194 {
195     return get_chunk_in_mask(pushbuffer, pushbuffer->idle_chunks);
196 }
197 
198 static NvU32 chunk_get_index(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
199 {
200     NvU32 index = chunk - pushbuffer->chunks;
201     UVM_ASSERT(index < UVM_PUSHBUFFER_CHUNKS);
202     return index;
203 }
204 
205 static NvU32 chunk_get_offset(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
206 {
207     return chunk_get_index(pushbuffer, chunk) * UVM_PUSHBUFFER_CHUNK_SIZE;
208 }
209 
210 static void set_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask)
211 {
212     NvU32 index = chunk_get_index(pushbuffer, chunk);
213 
214     uvm_assert_spinlock_locked(&pushbuffer->lock);
215 
216     __set_bit(index, mask);
217 }
218 
219 static void clear_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask)
220 {
221     NvU32 index = chunk_get_index(pushbuffer, chunk);
222 
223     uvm_assert_spinlock_locked(&pushbuffer->lock);
224 
225     __clear_bit(index, mask);
226 }
227 
228 static uvm_pushbuffer_chunk_t *pick_chunk(uvm_pushbuffer_t *pushbuffer)
229 {
230     uvm_pushbuffer_chunk_t *chunk = get_idle_chunk(pushbuffer);
231 
232     uvm_assert_spinlock_locked(&pushbuffer->lock);
233 
234     if (chunk == NULL)
235         chunk = get_available_chunk(pushbuffer);
236 
237     return chunk;
238 }
239 
240 static bool try_claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out)
241 {
242     uvm_pushbuffer_chunk_t *chunk;
243 
244     uvm_spin_lock(&pushbuffer->lock);
245 
246     chunk = pick_chunk(pushbuffer);
247     if (!chunk)
248         goto done;
249 
250     chunk->current_push = push;
251     clear_chunk(pushbuffer, chunk, pushbuffer->idle_chunks);
252     clear_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
253 
254 done:
255     uvm_spin_unlock(&pushbuffer->lock);
256     *chunk_out = chunk;
257 
258     return chunk != NULL;
259 }
260 
261 static char *get_base_cpu_va(uvm_pushbuffer_t *pushbuffer)
262 {
263     // Confidential Computing pushes are assembled in protected sysmem
264     // and safely (through encrypt/decrypt) moved to protected vidmem.
265     // Or signed and moved to unprotected sysmem.
266     if (uvm_conf_computing_mode_enabled(pushbuffer->channel_manager->gpu)) {
267         // Align protected sysmem base to 4kB. This should be enough to give
268         // the same alignment behaviour for inline buffers as the other two
269         // backing memory locations.
270         return (char*)(UVM_ALIGN_UP((uintptr_t)pushbuffer->memory_protected_sysmem, UVM_PAGE_SIZE_4K));
271     }
272 
273     return (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory);
274 }
275 
276 static NvU32 *chunk_get_next_push_start_addr(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
277 {
278     char *push_start = get_base_cpu_va(pushbuffer);
279     push_start += chunk_get_offset(pushbuffer, chunk);
280     push_start += chunk->next_push_start;
281 
282     UVM_ASSERT(((NvU64)push_start) % sizeof(NvU32) == 0);
283 
284     return (NvU32*)push_start;
285 }
286 
287 static NV_STATUS claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out)
288 {
289     NV_STATUS status = NV_OK;
290     uvm_channel_manager_t *channel_manager = pushbuffer->channel_manager;
291     uvm_spin_loop_t spin;
292 
293     if (try_claim_chunk(pushbuffer, push, chunk_out))
294         return NV_OK;
295 
296     uvm_channel_manager_update_progress(channel_manager);
297 
298     uvm_spin_loop_init(&spin);
299     while (!try_claim_chunk(pushbuffer, push, chunk_out) && status == NV_OK) {
300         UVM_SPIN_LOOP(&spin);
301         status = uvm_channel_manager_check_errors(channel_manager);
302         uvm_channel_manager_update_progress(channel_manager);
303     }
304 
305     return status;
306 }
307 
308 NV_STATUS uvm_pushbuffer_begin_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
309 {
310     uvm_pushbuffer_chunk_t *chunk;
311     NV_STATUS status;
312 
313     UVM_ASSERT(pushbuffer);
314     UVM_ASSERT(push);
315     UVM_ASSERT(push->channel);
316 
317     if (uvm_channel_is_wlc(push->channel)) {
318         // WLC pushes use static PB and don't count against max concurrent
319         // pushes.
320         push->begin = (void*)UVM_ALIGN_UP((uintptr_t)push->channel->conf_computing.static_pb_protected_sysmem,
321                                           UVM_PAGE_SIZE_4K);
322         push->next = push->begin;
323         return NV_OK;
324     }
325 
326     // Note that this semaphore is uvm_up()ed in end_push().
327     uvm_down(&pushbuffer->concurrent_pushes_sema);
328 
329     status = claim_chunk(pushbuffer, push, &chunk);
330     if (status != NV_OK) {
331         uvm_up(&pushbuffer->concurrent_pushes_sema);
332         return status;
333     }
334 
335     UVM_ASSERT(chunk);
336 
337     push->begin = chunk_get_next_push_start_addr(pushbuffer, chunk);
338     push->next = push->begin;
339 
340     return NV_OK;
341 }
342 
343 static uvm_gpfifo_entry_t *chunk_get_first_gpfifo(uvm_pushbuffer_chunk_t *chunk)
344 {
345     return list_first_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node);
346 }
347 
348 static uvm_gpfifo_entry_t *chunk_get_last_gpfifo(uvm_pushbuffer_chunk_t *chunk)
349 {
350     return list_last_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node);
351 }
352 
353 // Get the cpu put within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE])
354 static NvU32 chunk_get_cpu_put(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
355 {
356     uvm_gpfifo_entry_t *gpfifo = chunk_get_last_gpfifo(chunk);
357 
358     uvm_assert_spinlock_locked(&pushbuffer->lock);
359 
360     if (gpfifo != NULL)
361         return gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - chunk_get_offset(pushbuffer, chunk);
362     else
363         return 0;
364 }
365 
366 // Get the gpu get within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE))
367 static NvU32 chunk_get_gpu_get(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
368 {
369     uvm_gpfifo_entry_t *gpfifo = chunk_get_first_gpfifo(chunk);
370 
371     uvm_assert_spinlock_locked(&pushbuffer->lock);
372 
373     if (gpfifo != NULL)
374         return gpfifo->pushbuffer_offset - chunk_get_offset(pushbuffer, chunk);
375     else
376         return 0;
377 }
378 
379 static void update_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
380 {
381     NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk);
382     NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk);
383 
384     uvm_assert_spinlock_locked(&pushbuffer->lock);
385 
386     if (gpu_get == cpu_put) {
387         // cpu_put can be equal to gpu_get both when the chunk is full and empty. We
388         // can tell apart the cases by checking whether the pending GPFIFOs list is
389         // empty.
390         if (!list_empty(&chunk->pending_gpfifos))
391             return;
392 
393         // Chunk completely idle
394         set_chunk(pushbuffer, chunk, pushbuffer->idle_chunks);
395         set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
396         UVM_ASSERT_MSG(cpu_put == 0, "cpu put %u\n", cpu_put);
397 
398         // For a completely idle chunk, always start at the very beginning. This
399         // helps avoid the waste that can happen at the very end of the chunk
400         // described at the top of uvm_pushbuffer.h.
401         chunk->next_push_start = 0;
402     }
403     else if (gpu_get > cpu_put) {
404         if (gpu_get - cpu_put >= UVM_MAX_PUSH_SIZE) {
405             // Enough space between put and get
406             set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
407             chunk->next_push_start = cpu_put;
408         }
409     }
410     else if (UVM_PUSHBUFFER_CHUNK_SIZE >= cpu_put + UVM_MAX_PUSH_SIZE) {
411         UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put);
412 
413         // Enough space at the end
414         set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
415         chunk->next_push_start = cpu_put;
416     }
417     else if (gpu_get >= UVM_MAX_PUSH_SIZE) {
418         UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put);
419 
420         // Enough space at the beginning
421         set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
422         chunk->next_push_start = 0;
423     }
424 }
425 
426 void uvm_pushbuffer_destroy(uvm_pushbuffer_t *pushbuffer)
427 {
428     if (pushbuffer == NULL)
429         return;
430 
431     proc_remove(pushbuffer->procfs.info_file);
432 
433     uvm_rm_mem_free(pushbuffer->memory_unprotected_sysmem);
434     uvm_kvfree(pushbuffer->memory_protected_sysmem);
435     uvm_rm_mem_free(pushbuffer->memory);
436     uvm_kvfree(pushbuffer);
437 }
438 
439 static uvm_pushbuffer_chunk_t *offset_to_chunk(uvm_pushbuffer_t *pushbuffer, NvU32 offset)
440 {
441     UVM_ASSERT(offset < UVM_PUSHBUFFER_SIZE);
442     return &pushbuffer->chunks[offset / UVM_PUSHBUFFER_CHUNK_SIZE];
443 }
444 
445 static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo)
446 {
447     uvm_pushbuffer_chunk_t *chunk = offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset);
448     UVM_ASSERT(offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - 1) == chunk);
449     return chunk;
450 }
451 
452 static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
453 {
454     NV_STATUS status;
455     NvU32 auth_tag_offset;
456     void *auth_tag_cpu_va;
457     void *push_protected_cpu_va;
458     void *push_unprotected_cpu_va;
459     NvU32 pushbuffer_offset = gpfifo->pushbuffer_offset;
460     NvU32 push_info_index = gpfifo->push_info - channel->push_infos;
461     uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
462     uvm_push_crypto_bundle_t *crypto_bundle = channel->conf_computing.push_crypto_bundles + push_info_index;
463 
464     if (channel->conf_computing.push_crypto_bundles == NULL)
465         return;
466 
467     // When the crypto bundle is used, the push size cannot be zero
468     if (crypto_bundle->push_size == 0)
469         return;
470 
471     UVM_ASSERT(!uvm_channel_is_wlc(channel));
472     UVM_ASSERT(!uvm_channel_is_lcic(channel));
473 
474     push_protected_cpu_va = (char *)get_base_cpu_va(pushbuffer) + pushbuffer_offset;
475     push_unprotected_cpu_va = (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem) + pushbuffer_offset;
476     auth_tag_offset = push_info_index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
477     auth_tag_cpu_va = (char *)uvm_rm_mem_get_cpu_va(channel->conf_computing.push_crypto_bundle_auth_tags) +
478                               auth_tag_offset;
479 
480     status = uvm_conf_computing_cpu_decrypt(channel,
481                                             push_protected_cpu_va,
482                                             push_unprotected_cpu_va,
483                                             &crypto_bundle->iv,
484                                             crypto_bundle->push_size,
485                                             auth_tag_cpu_va);
486 
487     // A decryption failure here is not fatal because it does not
488     // prevent UVM from running fine in the future and cannot be used
489     // maliciously to leak information or otherwise derail UVM from its
490     // regular duties.
491     UVM_ASSERT_MSG_RELEASE(status == NV_OK, "Pushbuffer decryption failure: %s\n", nvstatusToString(status));
492 
493     // Avoid reusing the bundle across multiple pushes
494     crypto_bundle->push_size = 0;
495 }
496 
497 void uvm_pushbuffer_mark_completed(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
498 {
499     uvm_pushbuffer_chunk_t *chunk;
500     bool need_to_update_chunk = false;
501     uvm_push_info_t *push_info = gpfifo->push_info;
502     uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
503 
504     UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL);
505 
506     chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
507 
508     if (push_info->on_complete != NULL) {
509         decrypt_push(channel, gpfifo);
510         push_info->on_complete(push_info->on_complete_data);
511         push_info->on_complete = NULL;
512         push_info->on_complete_data = NULL;
513     }
514 
515     uvm_spin_lock(&pushbuffer->lock);
516 
517     if (gpfifo == chunk_get_first_gpfifo(chunk))
518         need_to_update_chunk = true;
519     else if (gpfifo == chunk_get_last_gpfifo(chunk))
520         need_to_update_chunk = true;
521 
522     list_del(&gpfifo->pending_list_node);
523 
524     // If current_push is not NULL, updating the chunk is delayed till
525     // uvm_pushbuffer_end_push() is called for that push.
526     if (need_to_update_chunk && chunk->current_push == NULL)
527         update_chunk(pushbuffer, chunk);
528 
529     uvm_spin_unlock(&pushbuffer->lock);
530 }
531 
532 NvU32 uvm_pushbuffer_get_offset_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
533 {
534     NvU32 offset;
535 
536     if (uvm_channel_is_wlc(push->channel)) {
537         // WLC channels use private static PB and their gpfifo entries are not
538         // added to any chunk's list. This only needs to return legal offset.
539         // Completion cleanup will not find WLC gpfifo entries as either first
540         // or last entry of any chunk.
541         return 0;
542     }
543 
544     offset = (char*)push->begin - get_base_cpu_va(pushbuffer);
545 
546     UVM_ASSERT(((NvU64)offset) % sizeof(NvU32) == 0);
547 
548     return offset;
549 }
550 
551 NvU64 uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
552 {
553     NvU64 pushbuffer_base;
554     uvm_gpu_t *gpu = uvm_push_get_gpu(push);
555     bool is_proxy_channel = uvm_channel_is_proxy(push->channel);
556 
557     pushbuffer_base = uvm_rm_mem_get_gpu_va(pushbuffer->memory, gpu, is_proxy_channel).address;
558 
559     if (uvm_channel_is_wlc(push->channel) || uvm_channel_is_lcic(push->channel)) {
560         // We need to use the same static locations for PB as the fixed
561         // schedule because that's what the channels are initialized to use.
562         return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_protected_vidmem, gpu);
563     }
564     else if (uvm_channel_is_sec2(push->channel)) {
565         // SEC2 PBs are in unprotected sysmem
566         pushbuffer_base = uvm_pushbuffer_get_sec2_gpu_va_base(pushbuffer);
567     }
568 
569     return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
570 }
571 
572 void *uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
573 {
574     char *pushbuffer_base;
575 
576     if (uvm_channel_is_wlc(push->channel)) {
577         // Reuse existing WLC static pb for initialization
578         UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
579         return push->channel->conf_computing.static_pb_unprotected_sysmem_cpu;
580     }
581 
582     pushbuffer_base = uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem);
583 
584     return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
585 }
586 
587 NvU64 uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
588 {
589     NvU64 pushbuffer_base;
590 
591     if (uvm_channel_is_wlc(push->channel)) {
592         // Reuse existing WLC static pb for initialization
593         UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
594         return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_unprotected_sysmem,
595                                          uvm_push_get_gpu(push));
596     }
597 
598     pushbuffer_base = uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, uvm_push_get_gpu(push));
599 
600     return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
601 }
602 
603 void uvm_pushbuffer_end_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_gpfifo_entry_t *gpfifo)
604 {
605     uvm_pushbuffer_chunk_t *chunk;
606 
607     if (uvm_channel_is_wlc(push->channel)) {
608         // WLC channels use static pushbuffer and don't count towards max
609         // concurrent pushes. Initializing the list as head makes sure the
610         // deletion in "uvm_pushbuffer_mark_completed" doesn't crash.
611         INIT_LIST_HEAD(&gpfifo->pending_list_node);
612         return;
613     }
614 
615     chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
616 
617     uvm_channel_pool_assert_locked(push->channel->pool);
618 
619     uvm_spin_lock(&pushbuffer->lock);
620 
621     list_add_tail(&gpfifo->pending_list_node, &chunk->pending_gpfifos);
622 
623     update_chunk(pushbuffer, chunk);
624 
625     UVM_ASSERT(chunk->current_push == push);
626     chunk->current_push = NULL;
627 
628     uvm_spin_unlock(&pushbuffer->lock);
629 
630     // uvm_pushbuffer_end_push() needs to be called with the channel lock held
631     // while the concurrent pushes sema has a higher lock order. To keep the
632     // code structure simple, just up out of order here.
633     uvm_up_out_of_order(&pushbuffer->concurrent_pushes_sema);
634 }
635 
636 bool uvm_pushbuffer_has_space(uvm_pushbuffer_t *pushbuffer)
637 {
638     bool has_space;
639 
640     uvm_spin_lock(&pushbuffer->lock);
641 
642     has_space = pick_chunk(pushbuffer) != NULL;
643 
644     uvm_spin_unlock(&pushbuffer->lock);
645 
646     return has_space;
647 }
648 
649 void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s)
650 {
651     NvU32 i;
652 
653     UVM_SEQ_OR_DBG_PRINT(s, "Pushbuffer for GPU %s\n", uvm_gpu_name(pushbuffer->channel_manager->gpu));
654     UVM_SEQ_OR_DBG_PRINT(s, " has space: %d\n", uvm_pushbuffer_has_space(pushbuffer));
655 
656     uvm_spin_lock(&pushbuffer->lock);
657 
658     for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i) {
659         uvm_pushbuffer_chunk_t *chunk = &pushbuffer->chunks[i];
660         NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk);
661         NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk);
662         UVM_SEQ_OR_DBG_PRINT(s, " chunk %u put %u get %u next %u available %d idle %d\n",
663                 i,
664                 cpu_put, gpu_get, chunk->next_push_start,
665                 test_bit(i, pushbuffer->available_chunks) ? 1 : 0,
666                 test_bit(i, pushbuffer->idle_chunks) ? 1 : 0);
667 
668     }
669 
670     uvm_spin_unlock(&pushbuffer->lock);
671 }
672 
673 void uvm_pushbuffer_print(uvm_pushbuffer_t *pushbuffer)
674 {
675     return uvm_pushbuffer_print_common(pushbuffer, NULL);
676 }
677 
678 NvU64 uvm_pushbuffer_get_gpu_va_base(uvm_pushbuffer_t *pushbuffer)
679 {
680     return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory, pushbuffer->channel_manager->gpu);
681 }
682 
683 NvU64 uvm_pushbuffer_get_sec2_gpu_va_base(uvm_pushbuffer_t *pushbuffer)
684 {
685     UVM_ASSERT(uvm_conf_computing_mode_enabled(pushbuffer->channel_manager->gpu));
686 
687     return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, pushbuffer->channel_manager->gpu);
688 }
689