1 /*******************************************************************************
2     Copyright (c) 2015-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_gpu_semaphore.h"
25 #include "uvm_lock.h"
26 #include "uvm_global.h"
27 #include "uvm_kvmalloc.h"
28 #include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP
29 #include "uvm_conf_computing.h"
30 
31 #define UVM_SEMAPHORE_SIZE 4
32 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
33 #define UVM_SEMAPHORE_COUNT_PER_PAGE (PAGE_SIZE / UVM_SEMAPHORE_SIZE)
34 
35 // The top nibble of the canary base is intentionally 0. The rest of the value
36 // is arbitrary. See the comments below on make_canary.
37 #define UVM_SEMAPHORE_CANARY_BASE     0x0badc0de
38 #define UVM_SEMAPHORE_CANARY_MASK     0xf0000000
39 
40 struct uvm_gpu_semaphore_pool_struct
41 {
42     // The GPU owning the pool
43     uvm_gpu_t *gpu;
44 
45     // List of all the semaphore pages belonging to the pool
46     struct list_head pages;
47 
48     // Pages aperture.
49     uvm_aperture_t aperture;
50 
51     // Count of free semaphores among all the pages
52     NvU32 free_semaphores_count;
53 
54     // Lock protecting the state of the pool
55     uvm_mutex_t mutex;
56 };
57 
58 struct uvm_gpu_semaphore_pool_page_struct
59 {
60     // Allocation backing the page
61     uvm_rm_mem_t *memory;
62 
63     // Pool the page is part of
64     uvm_gpu_semaphore_pool_t *pool;
65 
66     // Node in the list of all pages in a semaphore pool
67     struct list_head all_pages_node;
68 
69     // Mask indicating free semaphore indices within the page
70     DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
71 };
72 
73 static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool)
74 {
75     return uvm_conf_computing_mode_enabled(pool->gpu) && (pool->aperture == UVM_APERTURE_VID);
76 }
77 
78 static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
79 {
80     return gpu_semaphore_pool_is_secure(semaphore->page->pool);
81 }
82 
83 static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
84 {
85     NvU32 offset;
86     NvU32 index;
87 
88     if (gpu_semaphore_is_secure(semaphore))
89         return semaphore->conf_computing.index;
90 
91     UVM_ASSERT(semaphore->payload != NULL);
92     UVM_ASSERT(semaphore->page != NULL);
93 
94     offset = (char*)semaphore->payload - (char*)uvm_rm_mem_get_cpu_va(semaphore->page->memory);
95     UVM_ASSERT(offset % UVM_SEMAPHORE_SIZE == 0);
96 
97     index = offset / UVM_SEMAPHORE_SIZE;
98     UVM_ASSERT(index < UVM_SEMAPHORE_COUNT_PER_PAGE);
99 
100     return index;
101 }
102 
103 // Use canary values on debug builds to catch semaphore use-after-free. We can
104 // catch release-after-free by simply setting the payload to a known value at
105 // free then checking it on alloc or pool free, but catching acquire-after-free
106 // is a little trickier.
107 //
108 // In order to make still-pending GEQ acquires stall indefinitely we need to
109 // reduce the current payload as much as we can, subject to two restrictions:
110 //
111 // 1) The pending acquires could be comparing against values much less than and
112 //    much greater than the current payload, so we have to set the payload to a
113 //    value reasonably less than the acquires which we might expect to be
114 //    pending.
115 //
116 // 2) Going over halfway past a pending acquire on the 32-bit number wheel will
117 //    cause Host to wrap and think the acquire succeeded. So we shouldn't reduce
118 //    by more than 2^31.
119 //
120 // To handle these restrictions we'll deal with quadrants of 2^32, under the
121 // assumption that it's unlikely for a payload to outpace a pending acquire by
122 // more than 2^30.
123 //
124 // We also need for the base value to have some 0s in the upper significant
125 // bits, otherwise those bits might carry us past the quadrant boundary when we
126 // OR them in.
127 static NvU32 make_canary(NvU32 payload)
128 {
129     NvU32 prev_quadrant = payload - (1 << 30);
130     return (prev_quadrant & UVM_SEMAPHORE_CANARY_MASK) | UVM_SEMAPHORE_CANARY_BASE;
131 }
132 
133 static bool is_canary(NvU32 val)
134 {
135     return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
136 }
137 
138 static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
139 {
140     // A pool allocated in the CPR of vidmem cannot be read/written from the
141     // CPU.
142     return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
143     return UVM_IS_DEBUG();
144 }
145 
146 // Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
147 // pool?
148 static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
149 {
150     return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
151 }
152 
153 // Secure semaphore pools are allocated in the CPR of vidmem and only mapped to
154 // the owning GPU as no other processor have access to it.
155 static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool,
156                                         uvm_gpu_semaphore_pool_page_t *pool_page,
157                                         uvm_rm_mem_type_t memory_type)
158 {
159     NV_STATUS status;
160 
161     UVM_ASSERT(gpu_semaphore_pool_is_secure(pool));
162     status = uvm_rm_mem_alloc(pool->gpu,
163                               memory_type,
164                               UVM_SEMAPHORE_PAGE_SIZE,
165                               UVM_CONF_COMPUTING_BUF_ALIGNMENT,
166                               &pool_page->memory);
167 
168     if (status != NV_OK)
169         return status;
170 
171     return NV_OK;
172 }
173 
174 static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
175 {
176     NV_STATUS status;
177     uvm_gpu_semaphore_pool_page_t *pool_page;
178     NvU32 *payloads;
179     size_t i;
180     uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
181 
182     uvm_assert_mutex_locked(&pool->mutex);
183 
184     pool_page = uvm_kvmalloc_zero(sizeof(*pool_page));
185 
186     if (!pool_page)
187         return NV_ERR_NO_MEMORY;
188 
189     pool_page->pool = pool;
190 
191     // Whenever the Confidential Computing feature is enabled, engines can
192     // access semaphores only in the CPR of vidmem. Mapping to other GPUs is
193     // also disabled.
194     if (gpu_semaphore_pool_is_secure(pool)) {
195         status = pool_alloc_secure_page(pool, pool_page, memory_type);
196 
197         if (status != NV_OK)
198             goto error;
199     }
200     else {
201     status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
202                                           memory_type,
203                                           UVM_SEMAPHORE_PAGE_SIZE,
204                                           0,
205                                           &pool_page->memory);
206     if (status != NV_OK)
207         goto error;
208     }
209 
210     // Verify the GPU can access the semaphore pool.
211     UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
212 
213     // All semaphores are initially free
214     bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
215 
216     list_add(&pool_page->all_pages_node, &pool->pages);
217     pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
218 
219     if (semaphore_uses_canary(pool)) {
220         payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
221         for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
222             payloads[i] = make_canary(0);
223     }
224 
225     return NV_OK;
226 
227 error:
228     uvm_kvfree(pool_page);
229     return status;
230 }
231 
232 static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
233 {
234     uvm_gpu_semaphore_pool_t *pool;
235 
236     UVM_ASSERT(page);
237     pool = page->pool;
238 
239     uvm_assert_mutex_locked(&pool->mutex);
240 
241     // Assert that no semaphores are still allocated
242     UVM_ASSERT(bitmap_full(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE));
243     UVM_ASSERT_MSG(pool->free_semaphores_count >= UVM_SEMAPHORE_COUNT_PER_PAGE,
244                    "count: %u\n",
245                    pool->free_semaphores_count);
246 
247     if (semaphore_uses_canary(pool)) {
248         size_t i;
249         NvU32 *payloads = uvm_rm_mem_get_cpu_va(page->memory);
250         for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
251             UVM_ASSERT(is_canary(payloads[i]));
252     }
253 
254     pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE;
255     list_del(&page->all_pages_node);
256     uvm_rm_mem_free(page->memory);
257     uvm_kvfree(page);
258 }
259 
260 NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore)
261 {
262     NV_STATUS status = NV_OK;
263     uvm_gpu_semaphore_pool_page_t *page;
264 
265     memset(semaphore, 0, sizeof(*semaphore));
266 
267     uvm_mutex_lock(&pool->mutex);
268 
269     if (pool->free_semaphores_count == 0)
270         status = pool_alloc_page(pool);
271 
272     if (status != NV_OK)
273         goto done;
274 
275     list_for_each_entry(page, &pool->pages, all_pages_node) {
276         NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
277         if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
278             continue;
279 
280         if (gpu_semaphore_pool_is_secure(pool)) {
281             semaphore->conf_computing.index = semaphore_index;
282         }
283         else {
284             semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) +
285                                                  semaphore_index * UVM_SEMAPHORE_SIZE);
286         }
287 
288         semaphore->page = page;
289 
290         if (semaphore_uses_canary(pool))
291             UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
292 
293         uvm_gpu_semaphore_set_payload(semaphore, 0);
294 
295         __clear_bit(semaphore_index, page->free_semaphores);
296         --pool->free_semaphores_count;
297 
298         goto done;
299     }
300 
301     UVM_ASSERT_MSG(0, "Failed to find a semaphore after allocating a new page\n");
302     status = NV_ERR_GENERIC;
303 
304 done:
305     uvm_mutex_unlock(&pool->mutex);
306 
307     return status;
308 }
309 
310 void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
311 {
312     uvm_gpu_semaphore_pool_page_t *page;
313     uvm_gpu_semaphore_pool_t *pool;
314     NvU32 index;
315 
316     UVM_ASSERT(semaphore);
317 
318     // uvm_gpu_semaphore_t is to be embedded in other structures so it should always
319     // be accessible, but it may not be initialized in error cases. Early out if
320     // page is NULL indicating the semaphore hasn't been allocated successfully.
321     page = semaphore->page;
322     if (page == NULL)
323         return;
324 
325     pool = page->pool;
326     index = get_index(semaphore);
327 
328     // Write a known value lower than the current payload in an attempt to catch
329     // release-after-free and acquire-after-free.
330     if (semaphore_uses_canary(pool))
331         uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore)));
332 
333     uvm_mutex_lock(&pool->mutex);
334 
335     semaphore->page = NULL;
336     semaphore->payload = NULL;
337 
338     ++pool->free_semaphores_count;
339     __set_bit(index, page->free_semaphores);
340 
341     uvm_mutex_unlock(&pool->mutex);
342 }
343 
344 NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
345 {
346     uvm_gpu_semaphore_pool_t *pool;
347     pool = uvm_kvmalloc_zero(sizeof(*pool));
348 
349     if (!pool)
350         return NV_ERR_NO_MEMORY;
351 
352     uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL);
353 
354     INIT_LIST_HEAD(&pool->pages);
355 
356     pool->free_semaphores_count = 0;
357     pool->gpu = gpu;
358     pool->aperture = UVM_APERTURE_SYS;
359 
360     *pool_out = pool;
361 
362     return NV_OK;
363 }
364 
365 NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
366 {
367     NV_STATUS status;
368 
369     UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
370 
371     status = uvm_gpu_semaphore_pool_create(gpu, pool_out);
372     if (status == NV_OK)
373         (*pool_out)->aperture = UVM_APERTURE_VID;
374 
375     return status;
376 }
377 
378 void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool)
379 {
380     uvm_gpu_semaphore_pool_page_t *page;
381     uvm_gpu_semaphore_pool_page_t *next_page;
382 
383     if (!pool)
384         return;
385 
386     // No other thread should be touching the pool once it's being destroyed
387     uvm_assert_mutex_unlocked(&pool->mutex);
388 
389     // Keep pool_free_page happy
390     uvm_mutex_lock(&pool->mutex);
391 
392     list_for_each_entry_safe(page, next_page, &pool->pages, all_pages_node)
393         pool_free_page(page);
394 
395     UVM_ASSERT_MSG(pool->free_semaphores_count == 0, "unused: %u", pool->free_semaphores_count);
396     UVM_ASSERT(list_empty(&pool->pages));
397 
398     uvm_mutex_unlock(&pool->mutex);
399 
400     uvm_kvfree(pool);
401 }
402 
403 NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
404 {
405     NV_STATUS status = NV_OK;
406     uvm_gpu_semaphore_pool_page_t *page;
407 
408     UVM_ASSERT(pool);
409     UVM_ASSERT(gpu);
410 
411     uvm_mutex_lock(&pool->mutex);
412 
413     list_for_each_entry(page, &pool->pages, all_pages_node) {
414         status = uvm_rm_mem_map_gpu(page->memory, gpu, 0);
415         if (status != NV_OK)
416             goto done;
417     }
418 
419 done:
420     uvm_mutex_unlock(&pool->mutex);
421 
422     return status;
423 }
424 
425 void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
426 {
427     uvm_gpu_semaphore_pool_page_t *page;
428 
429     UVM_ASSERT(pool);
430     UVM_ASSERT(gpu);
431 
432     uvm_mutex_lock(&pool->mutex);
433 
434     list_for_each_entry(page, &pool->pages, all_pages_node)
435         uvm_rm_mem_unmap_gpu(page->memory, gpu);
436 
437     uvm_mutex_unlock(&pool->mutex);
438 }
439 
440 NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
441 {
442     return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, false);
443 }
444 
445 NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
446 {
447     return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, true);
448 }
449 
450 NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
451 {
452     NvU32 index = get_index(semaphore);
453     NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;
454 
455     return base_va + UVM_SEMAPHORE_SIZE * index;
456 }
457 
458 NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
459 {
460     if (gpu_semaphore_is_secure(semaphore))
461         return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload);
462 
463     return UVM_GPU_READ_ONCE(*semaphore->payload);
464 }
465 
466 void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload)
467 {
468     // Provide a guarantee that all memory accesses prior to setting the payload
469     // won't be moved past it.
470     // Use a big hammer mb() as set_payload() is not used in any performance path
471     // today.
472     // This could likely be optimized to be either an smp_store_release() or use
473     // an smp_mb__before_atomic() barrier. The former is a recent addition to
474     // kernel though, and it's not clear whether combining the latter with a
475     // regular 32bit store is well defined in all cases. Both also seem to risk
476     // being optimized out on non-SMP configs (we need them for interacting with
477     // the GPU correctly even on non-SMP).
478     mb();
479 
480     if (gpu_semaphore_is_secure(semaphore))
481             UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload);
482     else
483     UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
484 }
485 
486 // This function is intended to catch channels which have been left dangling in
487 // trackers after their owning GPUs have been destroyed.
488 static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_sem)
489 {
490     uvm_gpu_t *gpu = tracking_sem->semaphore.page->pool->gpu;
491     uvm_gpu_t *table_gpu;
492 
493     UVM_ASSERT_MSG(gpu->magic == UVM_GPU_MAGIC_VALUE, "Corruption detected: magic number is 0x%llx\n", gpu->magic);
494 
495     // It's ok for the GPU to not be in the global table, since add_gpu operates
496     // on trackers before adding the GPU to the table, and remove_gpu operates
497     // on trackers after removing the GPU. We rely on the magic value to catch
498     // those cases.
499     //
500     // But if a pointer is in the table it must match.
501     table_gpu = uvm_gpu_get(gpu->global_id);
502     if (table_gpu)
503         UVM_ASSERT(table_gpu == gpu);
504 
505     // Return a boolean so this function can be used in assertions for
506     // conditional compilation
507     return true;
508 }
509 
510 bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
511 {
512     uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu;
513 
514     UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
515     if (uvm_conf_computing_mode_enabled(gpu))
516         return true;
517 
518     return false;
519 }
520 
521 
522 NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
523 {
524     NV_STATUS status;
525     uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;
526 
527     memset(tracking_sem, 0, sizeof(*tracking_sem));
528 
529     status = uvm_gpu_semaphore_alloc(pool, &tracking_sem->semaphore);
530     if (status != NV_OK)
531         return status;
532 
533     UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);
534 
535     if (uvm_conf_computing_mode_enabled(pool->gpu))
536         order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
537 
538     if (tracking_semaphore_uses_mutex(tracking_sem))
539         uvm_mutex_init(&tracking_sem->m_lock, order);
540     else
541         uvm_spin_lock_init(&tracking_sem->s_lock, order);
542 
543     atomic64_set(&tracking_sem->completed_value, 0);
544     tracking_sem->queued_value = 0;
545 
546     return NV_OK;
547 }
548 
549 void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
550 {
551     uvm_gpu_semaphore_free(&tracking_sem->semaphore);
552 }
553 
554 static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
555 {
556     // No new value, or the GPU is currently writing the new encrypted material
557     // and no change in value would still result in corrupted data.
558     return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
559 }
560 
561 static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
562 {
563     UvmCslIv local_iv;
564     NvU32 local_payload;
565     NvU32 new_sem_value;
566     NvU32 gpu_notifier;
567     NvU32 last_observed_notifier;
568     NvU32 new_gpu_notifier = 0;
569     NvU32 iv_index = 0;
570 
571     // A channel can have multiple entries pending and the tracking semaphore
572     // update of each entry can race with this function. Since the semaphore
573     // needs to be updated to release a used entry, we never need more
574     // than 'num_gpfifo_entries' re-tries.
575     unsigned tries_left = channel->num_gpfifo_entries;
576     NV_STATUS status = NV_OK;
577     NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
578     UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
579     void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
580     NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
581     NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
582 
583     UVM_ASSERT(uvm_channel_is_secure_ce(channel));
584 
585     last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
586     gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
587     UVM_ASSERT(last_observed_notifier <= gpu_notifier);
588 
589     if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
590         return;
591 
592     do {
593         gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
594 
595         // Odd notifier value means there's an update in progress.
596         if (gpu_notifier % 2)
597             continue;
598 
599         // Make sure no memory accesses happen before we read the notifier
600         smp_mb__after_atomic();
601 
602         iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
603         memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag));
604         local_payload = UVM_READ_ONCE(*payload_cpu_addr);
605         memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
606 
607         // Make sure the second read of notifier happens after
608         // all memory accesses.
609         smp_mb__before_atomic();
610         new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
611         tries_left--;
612     } while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
613 
614     if (!tries_left) {
615         status = NV_ERR_INVALID_STATE;
616         goto error;
617     }
618 
619     if (gpu_notifier == new_gpu_notifier) {
620         status = uvm_conf_computing_cpu_decrypt(channel,
621                                                 &new_sem_value,
622                                                 &local_payload,
623                                                 &local_iv,
624                                                 sizeof(new_sem_value),
625                                                 &local_auth_tag);
626 
627         if (status != NV_OK)
628             goto error;
629 
630         uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
631         UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
632     }
633 
634     return;
635 
636 error:
637     // Decryption failure is a fatal error as well as running out of try left.
638     // Upon testing, all decryption happened within one try, anything that
639     // would require ten retry would be considered active tampering with the
640     // data structures.
641     uvm_global_set_fatal_error(status);
642 }
643 
644 static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
645 {
646     NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value);
647     // The semaphore value is the bottom 32 bits of completed_value
648     NvU32 old_sem_value = (NvU32)old_value;
649     NvU32 new_sem_value;
650     NvU64 new_value;
651 
652     if (tracking_semaphore_uses_mutex(tracking_semaphore))
653         uvm_assert_mutex_locked(&tracking_semaphore->m_lock);
654     else
655         uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
656 
657     if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) {
658         // TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
659         //                     mechanism to all semaphore
660         uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
661         uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
662     }
663 
664     new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
665 
666     // The following logic to update the completed value is very subtle, it
667     // helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt
668     // before going through this code.
669 
670     if (old_sem_value == new_sem_value) {
671         // No progress since the last update.
672         // No additional memory barrier required in this case as completed_value
673         // is always updated under the lock that this thread just acquired.
674         // That guarantees full ordering with all the accesses the thread that
675         // updated completed_value did under the lock including the GPU
676         // semaphore read.
677         return old_value;
678     }
679 
680     // Replace the bottom 32-bits with the new semaphore value
681     new_value = (old_value & 0xFFFFFFFF00000000ull) | new_sem_value;
682 
683     // If we've wrapped around, add 2^32 to the value
684     // Notably the user of the GPU tracking semaphore needs to guarantee that
685     // the value is updated often enough to notice the wrap around each time it
686     // happens. In case of a channel tracking semaphore that's released for each
687     // push, it's easily guaranteed because of the small number of GPFIFO
688     // entries available per channel (there could be at most as many pending
689     // pushes as GPFIFO entries).
690     if (unlikely(new_sem_value < old_sem_value))
691         new_value += 1ULL << 32;
692 
693     // Check for unexpected large jumps of the semaphore value
694     UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
695                            "GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
696                            tracking_semaphore->semaphore.page->pool->gpu->parent->name,
697                            (NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
698                            old_value, new_value);
699 
700     // Use an atomic write even though the lock is held so that the value can
701     // be (carefully) read atomically outside of the lock.
702     //
703     // atomic64_set() on its own doesn't imply any memory barriers and we need
704     // prior memory accesses (in particular the read of the GPU semaphore
705     // payload) by this thread to be visible to other threads that see the newly
706     // set completed_value. smp_mb__before_atomic() provides that ordering.
707     //
708     // Also see the comment and matching smp_mb__after_atomic() barrier in
709     // uvm_gpu_tracking_semaphore_is_value_completed().
710     //
711     // Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire()
712     // have been added that are exactly what we need and could be slightly
713     // faster on arm and powerpc than the implementation below. But at least in
714     // 4.3 the implementation looks broken for arm32 (it maps directly to
715     // smp_load_acquire() and that doesn't support 64-bit reads on 32-bit
716     // architectures) so instead of dealing with that just use a slightly bigger
717     // hammer.
718     smp_mb__before_atomic();
719     atomic64_set(&tracking_semaphore->completed_value, new_value);
720 
721     // For this thread, we don't want any later accesses to be ordered above the
722     // GPU semaphore read. This could be accomplished by using a
723     // smp_load_acquire() for reading it, but given that it's also a pretty
724     // recent addition to the kernel, just leverage smp_mb__after_atomic() that
725     // guarantees that no accesses will be ordered above the atomic (and hence
726     // the GPU semaphore read).
727     //
728     // Notably the soon following unlock is a release barrier that allows later
729     // memory accesses to be reordered above it and hence doesn't provide the
730     // necessary ordering with the GPU semaphore read.
731     //
732     // Also notably this would still need to be handled if we ever switch to
733     // atomic64_set_release() and atomic64_read_acquire() for accessing
734     // completed_value.
735     smp_mb__after_atomic();
736 
737     return new_value;
738 }
739 
740 NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
741 {
742     NvU64 completed;
743 
744     // Check that the GPU which owns the semaphore is still present
745     UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
746 
747     if (tracking_semaphore_uses_mutex(tracking_semaphore))
748         uvm_mutex_lock(&tracking_semaphore->m_lock);
749     else
750         uvm_spin_lock(&tracking_semaphore->s_lock);
751 
752     completed = update_completed_value_locked(tracking_semaphore);
753 
754     if (tracking_semaphore_uses_mutex(tracking_semaphore))
755         uvm_mutex_unlock(&tracking_semaphore->m_lock);
756     else
757         uvm_spin_unlock(&tracking_semaphore->s_lock);
758 
759     return completed;
760 }
761 
762 bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value)
763 {
764     NvU64 completed = atomic64_read(&tracking_sem->completed_value);
765 
766     // Check that the GPU which owns the semaphore is still present
767     UVM_ASSERT(tracking_semaphore_check_gpu(tracking_sem));
768 
769     if (completed >= value) {
770         // atomic64_read() doesn't imply any memory barriers and we need all
771         // subsequent memory accesses in this thread to be ordered after the
772         // atomic read of the completed value above as that will also order them
773         // with any accesses (in particular the GPU semaphore read) performed by
774         // the other thread prior to it setting the completed_value we read.
775         // smp_mb__after_atomic() provides that ordering.
776         //
777         // Also see the comment in update_completed_value_locked().
778         smp_mb__after_atomic();
779 
780         return true;
781     }
782 
783     return uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) >= value;
784 }
785