1 /*******************************************************************************
2     Copyright (c) 2015-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_gpu_semaphore.h"
25 #include "uvm_lock.h"
26 #include "uvm_global.h"
27 #include "uvm_kvmalloc.h"
28 #include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP
29 #include "uvm_conf_computing.h"
30 
31 #define UVM_SEMAPHORE_SIZE 4
32 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
33 #define UVM_SEMAPHORE_COUNT_PER_PAGE (PAGE_SIZE / UVM_SEMAPHORE_SIZE)
34 
35 // The top nibble of the canary base is intentionally 0. The rest of the value
36 // is arbitrary. See the comments below on make_canary.
37 #define UVM_SEMAPHORE_CANARY_BASE     0x0badc0de
38 #define UVM_SEMAPHORE_CANARY_MASK     0xf0000000
39 
40 struct uvm_gpu_semaphore_pool_struct
41 {
42     // The GPU owning the pool
43     uvm_gpu_t *gpu;
44 
45     // List of all the semaphore pages belonging to the pool
46     struct list_head pages;
47 
48     // Pages aperture.
49     uvm_aperture_t aperture;
50 
51     // Count of free semaphores among all the pages
52     NvU32 free_semaphores_count;
53 
54     // Lock protecting the state of the pool
55     uvm_mutex_t mutex;
56 };
57 
58 struct uvm_gpu_semaphore_pool_page_struct
59 {
60     // Allocation backing the page
61     uvm_rm_mem_t *memory;
62 
63     struct {
64         // Unprotected sysmem storing encrypted value of semaphores
65         uvm_rm_mem_t *encrypted_payload_memory;
66 
67         // Unprotected sysmem storing encryption auth tags
68         uvm_rm_mem_t *auth_tag_memory;
69 
70         // Unprotected sysmem storing plain text notifier values
71         uvm_rm_mem_t *notifier_memory;
72     } conf_computing;
73 
74     // Pool the page is part of
75     uvm_gpu_semaphore_pool_t *pool;
76 
77     // Node in the list of all pages in a semaphore pool
78     struct list_head all_pages_node;
79 
80     // Mask indicating free semaphore indices within the page
81     DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
82 };
83 
gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t * pool)84 static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool)
85 {
86     return g_uvm_global.conf_computing_enabled && (pool->aperture == UVM_APERTURE_VID);
87 }
88 
gpu_semaphore_is_secure(uvm_gpu_semaphore_t * semaphore)89 static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
90 {
91     return gpu_semaphore_pool_is_secure(semaphore->page->pool);
92 }
93 
94 // Use canary values on debug builds to catch semaphore use-after-free. We can
95 // catch release-after-free by simply setting the payload to a known value at
96 // free then checking it on alloc or pool free, but catching acquire-after-free
97 // is a little trickier.
98 //
99 // In order to make still-pending GEQ acquires stall indefinitely we need to
100 // reduce the current payload as much as we can, subject to two restrictions:
101 //
102 // 1) The pending acquires could be comparing against values much less than and
103 //    much greater than the current payload, so we have to set the payload to a
104 //    value reasonably less than the acquires which we might expect to be
105 //    pending.
106 //
107 // 2) Going over halfway past a pending acquire on the 32-bit number wheel will
108 //    cause Host to wrap and think the acquire succeeded. So we shouldn't reduce
109 //    by more than 2^31.
110 //
111 // To handle these restrictions we'll deal with quadrants of 2^32, under the
112 // assumption that it's unlikely for a payload to outpace a pending acquire by
113 // more than 2^30.
114 //
115 // We also need for the base value to have some 0s in the upper significant
116 // bits, otherwise those bits might carry us past the quadrant boundary when we
117 // OR them in.
make_canary(NvU32 payload)118 static NvU32 make_canary(NvU32 payload)
119 {
120     NvU32 prev_quadrant = payload - (1 << 30);
121     return (prev_quadrant & UVM_SEMAPHORE_CANARY_MASK) | UVM_SEMAPHORE_CANARY_BASE;
122 }
123 
is_canary(NvU32 val)124 static bool is_canary(NvU32 val)
125 {
126     return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
127 }
128 
semaphore_uses_canary(uvm_gpu_semaphore_pool_t * pool)129 static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
130 {
131     // A pool allocated in the CPR of vidmem cannot be read/written from the
132     // CPU.
133     return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
134     return UVM_IS_DEBUG();
135 }
136 
137 // Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
138 // pool?
gpu_can_access_semaphore_pool(uvm_gpu_t * gpu,uvm_rm_mem_t * rm_mem)139 static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
140 {
141     return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
142 }
143 
pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t * page)144 static void pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t *page)
145 {
146     uvm_rm_mem_free(page->memory);
147     page->memory = NULL;
148 
149     if (gpu_semaphore_pool_is_secure(page->pool)) {
150         uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
151         uvm_rm_mem_free(page->conf_computing.auth_tag_memory);
152         uvm_rm_mem_free(page->conf_computing.notifier_memory);
153 
154         page->conf_computing.encrypted_payload_memory = NULL;
155         page->conf_computing.auth_tag_memory = NULL;
156         page->conf_computing.notifier_memory = NULL;
157     }
158     else {
159         UVM_ASSERT(!page->conf_computing.encrypted_payload_memory);
160         UVM_ASSERT(!page->conf_computing.auth_tag_memory);
161         UVM_ASSERT(!page->conf_computing.notifier_memory);
162     }
163 }
164 
pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t * page)165 static NV_STATUS pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t *page)
166 {
167     NV_STATUS status;
168     uvm_gpu_semaphore_pool_t *pool = page->pool;
169     uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
170     size_t align = 0;
171     bool map_all = true;
172     align = gpu_semaphore_pool_is_secure(pool) ? UVM_CONF_COMPUTING_BUF_ALIGNMENT : 0;
173     map_all = gpu_semaphore_pool_is_secure(pool) ? false : true;
174 
175     if (map_all)
176         status = uvm_rm_mem_alloc_and_map_all(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
177     else
178         status = uvm_rm_mem_alloc(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
179 
180     if (status != NV_OK)
181         goto error;
182 
183     if (!gpu_semaphore_pool_is_secure(pool))
184         return NV_OK;
185 
186     status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
187                                           UVM_RM_MEM_TYPE_SYS,
188                                           UVM_SEMAPHORE_PAGE_SIZE,
189                                           UVM_CONF_COMPUTING_BUF_ALIGNMENT,
190                                           &page->conf_computing.encrypted_payload_memory);
191     if (status != NV_OK)
192         goto error;
193 
194     BUILD_BUG_ON(UVM_CONF_COMPUTING_AUTH_TAG_SIZE % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
195     status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
196                                           UVM_RM_MEM_TYPE_SYS,
197                                           UVM_SEMAPHORE_COUNT_PER_PAGE * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
198                                           UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
199                                           &page->conf_computing.auth_tag_memory);
200     if (status != NV_OK)
201         goto error;
202 
203     status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
204                                           UVM_RM_MEM_TYPE_SYS,
205                                           UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(NvU32),
206                                           0,
207                                           &page->conf_computing.notifier_memory);
208     if (status != NV_OK)
209         goto error;
210 
211     return NV_OK;
212 error:
213     pool_page_free_buffers(page);
214     return status;
215 }
216 
pool_alloc_page(uvm_gpu_semaphore_pool_t * pool)217 static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
218 {
219     NV_STATUS status;
220     uvm_gpu_semaphore_pool_page_t *pool_page;
221 
222     uvm_assert_mutex_locked(&pool->mutex);
223 
224     pool_page = uvm_kvmalloc_zero(sizeof(*pool_page));
225 
226     if (!pool_page)
227         return NV_ERR_NO_MEMORY;
228 
229     pool_page->pool = pool;
230 
231     status = pool_page_alloc_buffers(pool_page);
232     if (status != NV_OK)
233         goto error;
234 
235     // Verify the GPU can access the semaphore pool.
236     UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
237 
238     // All semaphores are initially free
239     bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
240 
241     list_add(&pool_page->all_pages_node, &pool->pages);
242     pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
243 
244     if (semaphore_uses_canary(pool)) {
245         size_t i;
246         NvU32 *payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
247 
248         for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
249             payloads[i] = make_canary(0);
250     }
251 
252     return NV_OK;
253 
254 error:
255     uvm_kvfree(pool_page);
256     return status;
257 }
258 
pool_free_page(uvm_gpu_semaphore_pool_page_t * page)259 static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
260 {
261     uvm_gpu_semaphore_pool_t *pool;
262 
263     UVM_ASSERT(page);
264     pool = page->pool;
265 
266     uvm_assert_mutex_locked(&pool->mutex);
267 
268     // Assert that no semaphores are still allocated
269     UVM_ASSERT(bitmap_full(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE));
270     UVM_ASSERT_MSG(pool->free_semaphores_count >= UVM_SEMAPHORE_COUNT_PER_PAGE,
271                    "count: %u\n",
272                    pool->free_semaphores_count);
273 
274     if (semaphore_uses_canary(pool)) {
275         size_t i;
276         NvU32 *payloads = uvm_rm_mem_get_cpu_va(page->memory);
277         for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
278             UVM_ASSERT(is_canary(payloads[i]));
279     }
280 
281     pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE;
282     list_del(&page->all_pages_node);
283     pool_page_free_buffers(page);
284     uvm_kvfree(page);
285 }
286 
uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t * pool,uvm_gpu_semaphore_t * semaphore)287 NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore)
288 {
289     NV_STATUS status = NV_OK;
290     uvm_gpu_semaphore_pool_page_t *page;
291 
292     memset(semaphore, 0, sizeof(*semaphore));
293 
294     uvm_mutex_lock(&pool->mutex);
295 
296     if (pool->free_semaphores_count == 0)
297         status = pool_alloc_page(pool);
298 
299     if (status != NV_OK)
300         goto done;
301 
302     list_for_each_entry(page, &pool->pages, all_pages_node) {
303         const NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
304 
305         UVM_ASSERT(semaphore_index <= UVM_SEMAPHORE_COUNT_PER_PAGE);
306 
307         if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
308             continue;
309 
310         semaphore->page = page;
311         semaphore->index = semaphore_index;
312 
313         if (gpu_semaphore_pool_is_secure(pool)) {
314 
315             // Reset the notifier to prevent detection of false attack when
316             // checking for updated value
317             *uvm_gpu_semaphore_get_notifier_cpu_va(semaphore) = semaphore->conf_computing.last_observed_notifier;
318         }
319 
320         if (semaphore_uses_canary(pool))
321             UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
322 
323         uvm_gpu_semaphore_set_payload(semaphore, 0);
324 
325         __clear_bit(semaphore_index, page->free_semaphores);
326         --pool->free_semaphores_count;
327 
328         goto done;
329     }
330 
331     UVM_ASSERT_MSG(0, "Failed to find a semaphore after allocating a new page\n");
332     status = NV_ERR_GENERIC;
333 
334 done:
335     uvm_mutex_unlock(&pool->mutex);
336 
337     return status;
338 }
339 
uvm_gpu_semaphore_free(uvm_gpu_semaphore_t * semaphore)340 void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
341 {
342     uvm_gpu_semaphore_pool_page_t *page;
343     uvm_gpu_semaphore_pool_t *pool;
344 
345     UVM_ASSERT(semaphore);
346 
347     // uvm_gpu_semaphore_t is to be embedded in other structures so it should always
348     // be accessible, but it may not be initialized in error cases. Early out if
349     // page is NULL indicating the semaphore hasn't been allocated successfully.
350     page = semaphore->page;
351     if (page == NULL)
352         return;
353 
354     pool = page->pool;
355 
356     // Write a known value lower than the current payload in an attempt to catch
357     // release-after-free and acquire-after-free.
358     if (semaphore_uses_canary(pool))
359         uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore)));
360 
361     uvm_mutex_lock(&pool->mutex);
362 
363     semaphore->page = NULL;
364 
365     ++pool->free_semaphores_count;
366     __set_bit(semaphore->index, page->free_semaphores);
367 
368     uvm_mutex_unlock(&pool->mutex);
369 }
370 
uvm_gpu_semaphore_pool_create(uvm_gpu_t * gpu,uvm_gpu_semaphore_pool_t ** pool_out)371 NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
372 {
373     uvm_gpu_semaphore_pool_t *pool;
374     pool = uvm_kvmalloc_zero(sizeof(*pool));
375 
376     if (!pool)
377         return NV_ERR_NO_MEMORY;
378 
379     uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL);
380 
381     INIT_LIST_HEAD(&pool->pages);
382 
383     pool->free_semaphores_count = 0;
384     pool->gpu = gpu;
385     pool->aperture = UVM_APERTURE_SYS;
386 
387     *pool_out = pool;
388 
389     return NV_OK;
390 }
391 
uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t * gpu,uvm_gpu_semaphore_pool_t ** pool_out)392 NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
393 {
394     NV_STATUS status;
395 
396     UVM_ASSERT(g_uvm_global.conf_computing_enabled);
397 
398     status = uvm_gpu_semaphore_pool_create(gpu, pool_out);
399     if (status == NV_OK)
400         (*pool_out)->aperture = UVM_APERTURE_VID;
401 
402     return status;
403 }
404 
uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t * pool)405 void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool)
406 {
407     uvm_gpu_semaphore_pool_page_t *page;
408     uvm_gpu_semaphore_pool_page_t *next_page;
409 
410     if (!pool)
411         return;
412 
413     // No other thread should be touching the pool once it's being destroyed
414     uvm_assert_mutex_unlocked(&pool->mutex);
415 
416     // Keep pool_free_page happy
417     uvm_mutex_lock(&pool->mutex);
418 
419     list_for_each_entry_safe(page, next_page, &pool->pages, all_pages_node)
420         pool_free_page(page);
421 
422     UVM_ASSERT_MSG(pool->free_semaphores_count == 0, "unused: %u", pool->free_semaphores_count);
423     UVM_ASSERT(list_empty(&pool->pages));
424 
425     uvm_mutex_unlock(&pool->mutex);
426 
427     uvm_kvfree(pool);
428 }
429 
uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t * pool,uvm_gpu_t * gpu)430 NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
431 {
432     NV_STATUS status = NV_OK;
433     uvm_gpu_semaphore_pool_page_t *page;
434 
435     UVM_ASSERT(pool);
436     UVM_ASSERT(gpu);
437 
438     uvm_mutex_lock(&pool->mutex);
439 
440     list_for_each_entry(page, &pool->pages, all_pages_node) {
441         status = uvm_rm_mem_map_gpu(page->memory, gpu, 0);
442         if (status != NV_OK)
443             goto done;
444     }
445 
446 done:
447     uvm_mutex_unlock(&pool->mutex);
448 
449     return status;
450 }
451 
uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t * pool,uvm_gpu_t * gpu)452 void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
453 {
454     uvm_gpu_semaphore_pool_page_t *page;
455 
456     UVM_ASSERT(pool);
457     UVM_ASSERT(gpu);
458 
459     uvm_mutex_lock(&pool->mutex);
460 
461     list_for_each_entry(page, &pool->pages, all_pages_node)
462         uvm_rm_mem_unmap_gpu(page->memory, gpu);
463 
464     uvm_mutex_unlock(&pool->mutex);
465 }
466 
uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t * semaphore,uvm_gpu_t * gpu)467 NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
468 {
469     return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, false);
470 }
471 
uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t * semaphore,uvm_gpu_t * gpu)472 NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
473 {
474     return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, true);
475 }
476 
uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t * semaphore,uvm_gpu_t * gpu,bool is_proxy_va_space)477 NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
478 {
479     NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;
480 
481     return base_va + semaphore->index * UVM_SEMAPHORE_SIZE;
482 }
483 
uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t * semaphore)484 NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore)
485 {
486     char *base_va;
487 
488     if (gpu_semaphore_is_secure(semaphore))
489         return &semaphore->conf_computing.cached_payload;
490 
491     base_va = uvm_rm_mem_get_cpu_va(semaphore->page->memory);
492     return (NvU32*)(base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
493 }
494 
uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t * semaphore)495 NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore)
496 {
497     char *encrypted_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
498 
499     return (NvU32*)(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
500 }
501 
uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t * semaphore)502 uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
503 {
504     NvU64 encrypted_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.encrypted_payload_memory,
505                                                         semaphore->page->pool->gpu);
506 
507     return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
508 }
509 
uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t * semaphore)510 uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
511 {
512     uvm_gpu_semaphore_notifier_t *notifier_base_va =
513         uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
514 
515     return notifier_base_va + semaphore->index;
516 }
517 
uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t * semaphore)518 uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
519 {
520     NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
521                                                        semaphore->page->pool->gpu);
522 
523     return uvm_gpu_address_virtual_unprotected(notifier_base_va +
524                                                semaphore->index * sizeof(uvm_gpu_semaphore_notifier_t));
525 }
526 
uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t * semaphore)527 void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
528 {
529     char *auth_tag_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.auth_tag_memory);
530 
531     return (void*)(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
532 }
533 
uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t * semaphore)534 uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore)
535 {
536     NvU64 auth_tag_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.auth_tag_memory,
537                                                        semaphore->page->pool->gpu);
538 
539     return uvm_gpu_address_virtual_unprotected(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
540 }
541 
uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t * semaphore)542 NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
543 {
544     return UVM_GPU_READ_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore));
545 }
546 
uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t * semaphore,NvU32 payload)547 void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload)
548 {
549     // Provide a guarantee that all memory accesses prior to setting the payload
550     // won't be moved past it.
551     // Use a big hammer mb() as set_payload() is not used in any performance path
552     // today.
553     // This could likely be optimized to be either an smp_store_release() or use
554     // an smp_mb__before_atomic() barrier. The former is a recent addition to
555     // kernel though, and it's not clear whether combining the latter with a
556     // regular 32bit store is well defined in all cases. Both also seem to risk
557     // being optimized out on non-SMP configs (we need them for interacting with
558     // the GPU correctly even on non-SMP).
559     mb();
560 
561     UVM_GPU_WRITE_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore), payload);
562 }
563 
564 // This function is intended to catch channels which have been left dangling in
565 // trackers after their owning GPUs have been destroyed.
tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t * tracking_sem)566 static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_sem)
567 {
568     uvm_gpu_t *gpu = tracking_sem->semaphore.page->pool->gpu;
569     uvm_gpu_t *table_gpu;
570 
571     UVM_ASSERT_MSG(gpu->magic == UVM_GPU_MAGIC_VALUE, "Corruption detected: magic number is 0x%llx\n", gpu->magic);
572 
573     // It's ok for the GPU to not be in the global table, since add_gpu operates
574     // on trackers before adding the GPU to the table, and remove_gpu operates
575     // on trackers after removing the GPU. We rely on the magic value to catch
576     // those cases.
577     //
578     // But if a pointer is in the table it must match.
579     table_gpu = uvm_gpu_get(gpu->id);
580     if (table_gpu)
581         UVM_ASSERT(table_gpu == gpu);
582 
583     // Return a boolean so this function can be used in assertions for
584     // conditional compilation
585     return true;
586 }
587 
tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t * tracking_semaphore)588 bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
589 {
590     UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
591 
592     return g_uvm_global.conf_computing_enabled;
593 }
594 
uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t * pool,uvm_gpu_tracking_semaphore_t * tracking_sem)595 NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
596 {
597     NV_STATUS status;
598     uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;
599 
600     memset(tracking_sem, 0, sizeof(*tracking_sem));
601 
602     status = uvm_gpu_semaphore_alloc(pool, &tracking_sem->semaphore);
603     if (status != NV_OK)
604         return status;
605 
606     UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);
607 
608     if (g_uvm_global.conf_computing_enabled)
609         order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
610 
611     if (tracking_semaphore_uses_mutex(tracking_sem))
612         uvm_mutex_init(&tracking_sem->m_lock, order);
613     else
614         uvm_spin_lock_init(&tracking_sem->s_lock, order);
615 
616     atomic64_set(&tracking_sem->completed_value, 0);
617     tracking_sem->queued_value = 0;
618 
619     return NV_OK;
620 }
621 
uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t * tracking_sem)622 void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
623 {
624     uvm_gpu_semaphore_free(&tracking_sem->semaphore);
625 }
626 
gpu_semaphore_encrypted_payload_update(uvm_channel_t * channel,uvm_gpu_semaphore_t * semaphore)627 static void gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
628 {
629     NvU32 local_payload;
630     uvm_gpu_semaphore_notifier_t gpu_notifier;
631     uvm_gpu_semaphore_notifier_t new_gpu_notifier = 0;
632 
633     // A channel can have multiple entries pending and the tracking semaphore
634     // update of each entry can race with this function. Since the semaphore
635     // needs to be updated to release a used entry, we never need more
636     // than 'num_gpfifo_entries' re-tries.
637     unsigned tries_left = channel->num_gpfifo_entries;
638     NV_STATUS status = NV_OK;
639     NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
640     uvm_gpu_semaphore_notifier_t *semaphore_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
641 
642     UVM_ASSERT(g_uvm_global.conf_computing_enabled);
643     UVM_ASSERT(uvm_channel_is_ce(channel));
644 
645     do {
646         gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
647 
648         UVM_ASSERT(gpu_notifier >= semaphore->conf_computing.last_observed_notifier);
649 
650         // Odd notifier value means there's an update in progress.
651         if (gpu_notifier % 2)
652             continue;
653 
654         // There's no change since last time
655         if (gpu_notifier == semaphore->conf_computing.last_observed_notifier)
656             return;
657 
658         // Make sure no memory accesses happen before we read the notifier
659         smp_mb__after_atomic();
660 
661         memcpy(local_auth_tag, uvm_gpu_semaphore_get_auth_tag_cpu_va(semaphore), sizeof(local_auth_tag));
662         local_payload = UVM_READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
663 
664         // Make sure the second read of notifier happens after
665         // all memory accesses.
666         smp_mb__before_atomic();
667         new_gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
668         tries_left--;
669     } while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
670 
671     if (!tries_left) {
672         status = NV_ERR_INVALID_STATE;
673     }
674     else {
675         NvU32 key_version;
676         const NvU32 iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
677         NvU32 new_semaphore_value;
678 
679         UVM_ASSERT(gpu_notifier == new_gpu_notifier);
680         UVM_ASSERT(gpu_notifier % 2 == 0);
681 
682         // CPU decryption is guaranteed to use the same key version as the
683         // associated GPU encryption, because if there was any key rotation in
684         // between, then key rotation waited for all channels to complete before
685         // proceeding. The wait implies that the semaphore value matches the
686         // last one encrypted on the GPU, so this CPU decryption should happen
687         // before the key is rotated.
688         key_version = uvm_channel_pool_key_version(channel->pool);
689 
690         status = uvm_conf_computing_cpu_decrypt(channel,
691                                                 &new_semaphore_value,
692                                                 &local_payload,
693                                                 &semaphore->conf_computing.ivs[iv_index],
694                                                 key_version,
695                                                 sizeof(new_semaphore_value),
696                                                 &local_auth_tag);
697 
698         if (status != NV_OK)
699             goto error;
700 
701         uvm_gpu_semaphore_set_payload(semaphore, new_semaphore_value);
702         UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
703 
704         return;
705     }
706 
707 error:
708     // Decryption failure is a fatal error as well as running out of try left.
709     // Upon testing, all decryption happened within one try, anything that
710     // would require ten retry would be considered active tampering with the
711     // data structures.
712     uvm_global_set_fatal_error(status);
713 }
714 
update_completed_value_locked(uvm_gpu_tracking_semaphore_t * tracking_semaphore)715 static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
716 {
717     NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value);
718     // The semaphore value is the bottom 32 bits of completed_value
719     NvU32 old_sem_value = (NvU32)old_value;
720     NvU32 new_sem_value;
721     NvU64 new_value;
722 
723     if (tracking_semaphore_uses_mutex(tracking_semaphore))
724         uvm_assert_mutex_locked(&tracking_semaphore->m_lock);
725     else
726         uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
727 
728     if (gpu_semaphore_is_secure(&tracking_semaphore->semaphore)) {
729         // TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
730         //                     mechanism to all semaphore
731         uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
732         gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
733     }
734 
735     new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
736 
737     // The following logic to update the completed value is very subtle, it
738     // helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt
739     // before going through this code.
740 
741     if (old_sem_value == new_sem_value) {
742         // No progress since the last update.
743         // No additional memory barrier required in this case as completed_value
744         // is always updated under the lock that this thread just acquired.
745         // That guarantees full ordering with all the accesses the thread that
746         // updated completed_value did under the lock including the GPU
747         // semaphore read.
748         return old_value;
749     }
750 
751     // Replace the bottom 32-bits with the new semaphore value
752     new_value = (old_value & 0xFFFFFFFF00000000ull) | new_sem_value;
753 
754     // If we've wrapped around, add 2^32 to the value
755     // Notably the user of the GPU tracking semaphore needs to guarantee that
756     // the value is updated often enough to notice the wrap around each time it
757     // happens. In case of a channel tracking semaphore that's released for each
758     // push, it's easily guaranteed because of the small number of GPFIFO
759     // entries available per channel (there could be at most as many pending
760     // pushes as GPFIFO entries).
761     if (unlikely(new_sem_value < old_sem_value))
762         new_value += 1ULL << 32;
763 
764     // Check for unexpected large jumps of the semaphore value
765     UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
766                            "GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
767                            uvm_gpu_name(tracking_semaphore->semaphore.page->pool->gpu),
768                            (NvU64)(uintptr_t)uvm_gpu_semaphore_get_cpu_va(&tracking_semaphore->semaphore),
769                            old_value, new_value);
770 
771     // Use an atomic write even though the lock is held so that the value can
772     // be (carefully) read atomically outside of the lock.
773     //
774     // atomic64_set() on its own doesn't imply any memory barriers and we need
775     // prior memory accesses (in particular the read of the GPU semaphore
776     // payload) by this thread to be visible to other threads that see the newly
777     // set completed_value. smp_mb__before_atomic() provides that ordering.
778     //
779     // Also see the comment and matching smp_mb__after_atomic() barrier in
780     // uvm_gpu_tracking_semaphore_is_value_completed().
781     //
782     // Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire()
783     // have been added that are exactly what we need and could be slightly
784     // faster on arm and powerpc than the implementation below. But at least in
785     // 4.3 the implementation looks broken for arm32 (it maps directly to
786     // smp_load_acquire() and that doesn't support 64-bit reads on 32-bit
787     // architectures) so instead of dealing with that just use a slightly bigger
788     // hammer.
789     smp_mb__before_atomic();
790     atomic64_set(&tracking_semaphore->completed_value, new_value);
791 
792     // For this thread, we don't want any later accesses to be ordered above the
793     // GPU semaphore read. This could be accomplished by using a
794     // smp_load_acquire() for reading it, but given that it's also a pretty
795     // recent addition to the kernel, just leverage smp_mb__after_atomic() that
796     // guarantees that no accesses will be ordered above the atomic (and hence
797     // the GPU semaphore read).
798     //
799     // Notably the soon following unlock is a release barrier that allows later
800     // memory accesses to be reordered above it and hence doesn't provide the
801     // necessary ordering with the GPU semaphore read.
802     //
803     // Also notably this would still need to be handled if we ever switch to
804     // atomic64_set_release() and atomic64_read_acquire() for accessing
805     // completed_value.
806     smp_mb__after_atomic();
807 
808     return new_value;
809 }
810 
uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t * tracking_semaphore)811 NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
812 {
813     NvU64 completed;
814 
815     // Check that the GPU which owns the semaphore is still present
816     UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
817 
818     if (tracking_semaphore_uses_mutex(tracking_semaphore))
819         uvm_mutex_lock(&tracking_semaphore->m_lock);
820     else
821         uvm_spin_lock(&tracking_semaphore->s_lock);
822 
823     completed = update_completed_value_locked(tracking_semaphore);
824 
825     if (tracking_semaphore_uses_mutex(tracking_semaphore))
826         uvm_mutex_unlock(&tracking_semaphore->m_lock);
827     else
828         uvm_spin_unlock(&tracking_semaphore->s_lock);
829 
830     return completed;
831 }
832 
uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t * tracking_sem,NvU64 value)833 bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value)
834 {
835     NvU64 completed = atomic64_read(&tracking_sem->completed_value);
836 
837     // Check that the GPU which owns the semaphore is still present
838     UVM_ASSERT(tracking_semaphore_check_gpu(tracking_sem));
839 
840     if (completed >= value) {
841         // atomic64_read() doesn't imply any memory barriers and we need all
842         // subsequent memory accesses in this thread to be ordered after the
843         // atomic read of the completed value above as that will also order them
844         // with any accesses (in particular the GPU semaphore read) performed by
845         // the other thread prior to it setting the completed_value we read.
846         // smp_mb__after_atomic() provides that ordering.
847         //
848         // Also see the comment in update_completed_value_locked().
849         smp_mb__after_atomic();
850 
851         return true;
852     }
853 
854     return uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) >= value;
855 }
856