1 /*******************************************************************************
2 Copyright (c) 2015-2023 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_gpu_semaphore.h"
25 #include "uvm_lock.h"
26 #include "uvm_global.h"
27 #include "uvm_kvmalloc.h"
28 #include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP
29 #include "uvm_conf_computing.h"
30
31 #define UVM_SEMAPHORE_SIZE 4
32 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
33 #define UVM_SEMAPHORE_COUNT_PER_PAGE (PAGE_SIZE / UVM_SEMAPHORE_SIZE)
34
35 // The top nibble of the canary base is intentionally 0. The rest of the value
36 // is arbitrary. See the comments below on make_canary.
37 #define UVM_SEMAPHORE_CANARY_BASE 0x0badc0de
38 #define UVM_SEMAPHORE_CANARY_MASK 0xf0000000
39
40 struct uvm_gpu_semaphore_pool_struct
41 {
42 // The GPU owning the pool
43 uvm_gpu_t *gpu;
44
45 // List of all the semaphore pages belonging to the pool
46 struct list_head pages;
47
48 // Pages aperture.
49 uvm_aperture_t aperture;
50
51 // Count of free semaphores among all the pages
52 NvU32 free_semaphores_count;
53
54 // Lock protecting the state of the pool
55 uvm_mutex_t mutex;
56 };
57
58 struct uvm_gpu_semaphore_pool_page_struct
59 {
60 // Allocation backing the page
61 uvm_rm_mem_t *memory;
62
63 struct {
64 // Unprotected sysmem storing encrypted value of semaphores
65 uvm_rm_mem_t *encrypted_payload_memory;
66
67 // Unprotected sysmem storing encryption auth tags
68 uvm_rm_mem_t *auth_tag_memory;
69
70 // Unprotected sysmem storing plain text notifier values
71 uvm_rm_mem_t *notifier_memory;
72 } conf_computing;
73
74 // Pool the page is part of
75 uvm_gpu_semaphore_pool_t *pool;
76
77 // Node in the list of all pages in a semaphore pool
78 struct list_head all_pages_node;
79
80 // Mask indicating free semaphore indices within the page
81 DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
82 };
83
gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t * pool)84 static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool)
85 {
86 return g_uvm_global.conf_computing_enabled && (pool->aperture == UVM_APERTURE_VID);
87 }
88
gpu_semaphore_is_secure(uvm_gpu_semaphore_t * semaphore)89 static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
90 {
91 return gpu_semaphore_pool_is_secure(semaphore->page->pool);
92 }
93
94 // Use canary values on debug builds to catch semaphore use-after-free. We can
95 // catch release-after-free by simply setting the payload to a known value at
96 // free then checking it on alloc or pool free, but catching acquire-after-free
97 // is a little trickier.
98 //
99 // In order to make still-pending GEQ acquires stall indefinitely we need to
100 // reduce the current payload as much as we can, subject to two restrictions:
101 //
102 // 1) The pending acquires could be comparing against values much less than and
103 // much greater than the current payload, so we have to set the payload to a
104 // value reasonably less than the acquires which we might expect to be
105 // pending.
106 //
107 // 2) Going over halfway past a pending acquire on the 32-bit number wheel will
108 // cause Host to wrap and think the acquire succeeded. So we shouldn't reduce
109 // by more than 2^31.
110 //
111 // To handle these restrictions we'll deal with quadrants of 2^32, under the
112 // assumption that it's unlikely for a payload to outpace a pending acquire by
113 // more than 2^30.
114 //
115 // We also need for the base value to have some 0s in the upper significant
116 // bits, otherwise those bits might carry us past the quadrant boundary when we
117 // OR them in.
make_canary(NvU32 payload)118 static NvU32 make_canary(NvU32 payload)
119 {
120 NvU32 prev_quadrant = payload - (1 << 30);
121 return (prev_quadrant & UVM_SEMAPHORE_CANARY_MASK) | UVM_SEMAPHORE_CANARY_BASE;
122 }
123
is_canary(NvU32 val)124 static bool is_canary(NvU32 val)
125 {
126 return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
127 }
128
semaphore_uses_canary(uvm_gpu_semaphore_pool_t * pool)129 static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
130 {
131 // A pool allocated in the CPR of vidmem cannot be read/written from the
132 // CPU.
133 return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
134 return UVM_IS_DEBUG();
135 }
136
137 // Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
138 // pool?
gpu_can_access_semaphore_pool(uvm_gpu_t * gpu,uvm_rm_mem_t * rm_mem)139 static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
140 {
141 return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
142 }
143
pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t * page)144 static void pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t *page)
145 {
146 uvm_rm_mem_free(page->memory);
147 page->memory = NULL;
148
149 if (gpu_semaphore_pool_is_secure(page->pool)) {
150 uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
151 uvm_rm_mem_free(page->conf_computing.auth_tag_memory);
152 uvm_rm_mem_free(page->conf_computing.notifier_memory);
153
154 page->conf_computing.encrypted_payload_memory = NULL;
155 page->conf_computing.auth_tag_memory = NULL;
156 page->conf_computing.notifier_memory = NULL;
157 }
158 else {
159 UVM_ASSERT(!page->conf_computing.encrypted_payload_memory);
160 UVM_ASSERT(!page->conf_computing.auth_tag_memory);
161 UVM_ASSERT(!page->conf_computing.notifier_memory);
162 }
163 }
164
pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t * page)165 static NV_STATUS pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t *page)
166 {
167 NV_STATUS status;
168 uvm_gpu_semaphore_pool_t *pool = page->pool;
169 uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
170 size_t align = 0;
171 bool map_all = true;
172 align = gpu_semaphore_pool_is_secure(pool) ? UVM_CONF_COMPUTING_BUF_ALIGNMENT : 0;
173 map_all = gpu_semaphore_pool_is_secure(pool) ? false : true;
174
175 if (map_all)
176 status = uvm_rm_mem_alloc_and_map_all(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
177 else
178 status = uvm_rm_mem_alloc(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
179
180 if (status != NV_OK)
181 goto error;
182
183 if (!gpu_semaphore_pool_is_secure(pool))
184 return NV_OK;
185
186 status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
187 UVM_RM_MEM_TYPE_SYS,
188 UVM_SEMAPHORE_PAGE_SIZE,
189 UVM_CONF_COMPUTING_BUF_ALIGNMENT,
190 &page->conf_computing.encrypted_payload_memory);
191 if (status != NV_OK)
192 goto error;
193
194 BUILD_BUG_ON(UVM_CONF_COMPUTING_AUTH_TAG_SIZE % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
195 status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
196 UVM_RM_MEM_TYPE_SYS,
197 UVM_SEMAPHORE_COUNT_PER_PAGE * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
198 UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
199 &page->conf_computing.auth_tag_memory);
200 if (status != NV_OK)
201 goto error;
202
203 status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
204 UVM_RM_MEM_TYPE_SYS,
205 UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(NvU32),
206 0,
207 &page->conf_computing.notifier_memory);
208 if (status != NV_OK)
209 goto error;
210
211 return NV_OK;
212 error:
213 pool_page_free_buffers(page);
214 return status;
215 }
216
pool_alloc_page(uvm_gpu_semaphore_pool_t * pool)217 static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
218 {
219 NV_STATUS status;
220 uvm_gpu_semaphore_pool_page_t *pool_page;
221
222 uvm_assert_mutex_locked(&pool->mutex);
223
224 pool_page = uvm_kvmalloc_zero(sizeof(*pool_page));
225
226 if (!pool_page)
227 return NV_ERR_NO_MEMORY;
228
229 pool_page->pool = pool;
230
231 status = pool_page_alloc_buffers(pool_page);
232 if (status != NV_OK)
233 goto error;
234
235 // Verify the GPU can access the semaphore pool.
236 UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
237
238 // All semaphores are initially free
239 bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
240
241 list_add(&pool_page->all_pages_node, &pool->pages);
242 pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
243
244 if (semaphore_uses_canary(pool)) {
245 size_t i;
246 NvU32 *payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
247
248 for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
249 payloads[i] = make_canary(0);
250 }
251
252 return NV_OK;
253
254 error:
255 uvm_kvfree(pool_page);
256 return status;
257 }
258
pool_free_page(uvm_gpu_semaphore_pool_page_t * page)259 static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
260 {
261 uvm_gpu_semaphore_pool_t *pool;
262
263 UVM_ASSERT(page);
264 pool = page->pool;
265
266 uvm_assert_mutex_locked(&pool->mutex);
267
268 // Assert that no semaphores are still allocated
269 UVM_ASSERT(bitmap_full(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE));
270 UVM_ASSERT_MSG(pool->free_semaphores_count >= UVM_SEMAPHORE_COUNT_PER_PAGE,
271 "count: %u\n",
272 pool->free_semaphores_count);
273
274 if (semaphore_uses_canary(pool)) {
275 size_t i;
276 NvU32 *payloads = uvm_rm_mem_get_cpu_va(page->memory);
277 for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
278 UVM_ASSERT(is_canary(payloads[i]));
279 }
280
281 pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE;
282 list_del(&page->all_pages_node);
283 pool_page_free_buffers(page);
284 uvm_kvfree(page);
285 }
286
uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t * pool,uvm_gpu_semaphore_t * semaphore)287 NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore)
288 {
289 NV_STATUS status = NV_OK;
290 uvm_gpu_semaphore_pool_page_t *page;
291
292 memset(semaphore, 0, sizeof(*semaphore));
293
294 uvm_mutex_lock(&pool->mutex);
295
296 if (pool->free_semaphores_count == 0)
297 status = pool_alloc_page(pool);
298
299 if (status != NV_OK)
300 goto done;
301
302 list_for_each_entry(page, &pool->pages, all_pages_node) {
303 const NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
304
305 UVM_ASSERT(semaphore_index <= UVM_SEMAPHORE_COUNT_PER_PAGE);
306
307 if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
308 continue;
309
310 semaphore->page = page;
311 semaphore->index = semaphore_index;
312
313 if (gpu_semaphore_pool_is_secure(pool)) {
314
315 // Reset the notifier to prevent detection of false attack when
316 // checking for updated value
317 *uvm_gpu_semaphore_get_notifier_cpu_va(semaphore) = semaphore->conf_computing.last_observed_notifier;
318 }
319
320 if (semaphore_uses_canary(pool))
321 UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
322
323 uvm_gpu_semaphore_set_payload(semaphore, 0);
324
325 __clear_bit(semaphore_index, page->free_semaphores);
326 --pool->free_semaphores_count;
327
328 goto done;
329 }
330
331 UVM_ASSERT_MSG(0, "Failed to find a semaphore after allocating a new page\n");
332 status = NV_ERR_GENERIC;
333
334 done:
335 uvm_mutex_unlock(&pool->mutex);
336
337 return status;
338 }
339
uvm_gpu_semaphore_free(uvm_gpu_semaphore_t * semaphore)340 void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
341 {
342 uvm_gpu_semaphore_pool_page_t *page;
343 uvm_gpu_semaphore_pool_t *pool;
344
345 UVM_ASSERT(semaphore);
346
347 // uvm_gpu_semaphore_t is to be embedded in other structures so it should always
348 // be accessible, but it may not be initialized in error cases. Early out if
349 // page is NULL indicating the semaphore hasn't been allocated successfully.
350 page = semaphore->page;
351 if (page == NULL)
352 return;
353
354 pool = page->pool;
355
356 // Write a known value lower than the current payload in an attempt to catch
357 // release-after-free and acquire-after-free.
358 if (semaphore_uses_canary(pool))
359 uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore)));
360
361 uvm_mutex_lock(&pool->mutex);
362
363 semaphore->page = NULL;
364
365 ++pool->free_semaphores_count;
366 __set_bit(semaphore->index, page->free_semaphores);
367
368 uvm_mutex_unlock(&pool->mutex);
369 }
370
uvm_gpu_semaphore_pool_create(uvm_gpu_t * gpu,uvm_gpu_semaphore_pool_t ** pool_out)371 NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
372 {
373 uvm_gpu_semaphore_pool_t *pool;
374 pool = uvm_kvmalloc_zero(sizeof(*pool));
375
376 if (!pool)
377 return NV_ERR_NO_MEMORY;
378
379 uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL);
380
381 INIT_LIST_HEAD(&pool->pages);
382
383 pool->free_semaphores_count = 0;
384 pool->gpu = gpu;
385 pool->aperture = UVM_APERTURE_SYS;
386
387 *pool_out = pool;
388
389 return NV_OK;
390 }
391
uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t * gpu,uvm_gpu_semaphore_pool_t ** pool_out)392 NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
393 {
394 NV_STATUS status;
395
396 UVM_ASSERT(g_uvm_global.conf_computing_enabled);
397
398 status = uvm_gpu_semaphore_pool_create(gpu, pool_out);
399 if (status == NV_OK)
400 (*pool_out)->aperture = UVM_APERTURE_VID;
401
402 return status;
403 }
404
uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t * pool)405 void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool)
406 {
407 uvm_gpu_semaphore_pool_page_t *page;
408 uvm_gpu_semaphore_pool_page_t *next_page;
409
410 if (!pool)
411 return;
412
413 // No other thread should be touching the pool once it's being destroyed
414 uvm_assert_mutex_unlocked(&pool->mutex);
415
416 // Keep pool_free_page happy
417 uvm_mutex_lock(&pool->mutex);
418
419 list_for_each_entry_safe(page, next_page, &pool->pages, all_pages_node)
420 pool_free_page(page);
421
422 UVM_ASSERT_MSG(pool->free_semaphores_count == 0, "unused: %u", pool->free_semaphores_count);
423 UVM_ASSERT(list_empty(&pool->pages));
424
425 uvm_mutex_unlock(&pool->mutex);
426
427 uvm_kvfree(pool);
428 }
429
uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t * pool,uvm_gpu_t * gpu)430 NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
431 {
432 NV_STATUS status = NV_OK;
433 uvm_gpu_semaphore_pool_page_t *page;
434
435 UVM_ASSERT(pool);
436 UVM_ASSERT(gpu);
437
438 uvm_mutex_lock(&pool->mutex);
439
440 list_for_each_entry(page, &pool->pages, all_pages_node) {
441 status = uvm_rm_mem_map_gpu(page->memory, gpu, 0);
442 if (status != NV_OK)
443 goto done;
444 }
445
446 done:
447 uvm_mutex_unlock(&pool->mutex);
448
449 return status;
450 }
451
uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t * pool,uvm_gpu_t * gpu)452 void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
453 {
454 uvm_gpu_semaphore_pool_page_t *page;
455
456 UVM_ASSERT(pool);
457 UVM_ASSERT(gpu);
458
459 uvm_mutex_lock(&pool->mutex);
460
461 list_for_each_entry(page, &pool->pages, all_pages_node)
462 uvm_rm_mem_unmap_gpu(page->memory, gpu);
463
464 uvm_mutex_unlock(&pool->mutex);
465 }
466
uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t * semaphore,uvm_gpu_t * gpu)467 NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
468 {
469 return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, false);
470 }
471
uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t * semaphore,uvm_gpu_t * gpu)472 NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
473 {
474 return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, true);
475 }
476
uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t * semaphore,uvm_gpu_t * gpu,bool is_proxy_va_space)477 NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
478 {
479 NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;
480
481 return base_va + semaphore->index * UVM_SEMAPHORE_SIZE;
482 }
483
uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t * semaphore)484 NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore)
485 {
486 char *base_va;
487
488 if (gpu_semaphore_is_secure(semaphore))
489 return &semaphore->conf_computing.cached_payload;
490
491 base_va = uvm_rm_mem_get_cpu_va(semaphore->page->memory);
492 return (NvU32*)(base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
493 }
494
uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t * semaphore)495 NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore)
496 {
497 char *encrypted_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
498
499 return (NvU32*)(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
500 }
501
uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t * semaphore)502 uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
503 {
504 NvU64 encrypted_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.encrypted_payload_memory,
505 semaphore->page->pool->gpu);
506
507 return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
508 }
509
uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t * semaphore)510 uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
511 {
512 uvm_gpu_semaphore_notifier_t *notifier_base_va =
513 uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
514
515 return notifier_base_va + semaphore->index;
516 }
517
uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t * semaphore)518 uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
519 {
520 NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
521 semaphore->page->pool->gpu);
522
523 return uvm_gpu_address_virtual_unprotected(notifier_base_va +
524 semaphore->index * sizeof(uvm_gpu_semaphore_notifier_t));
525 }
526
uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t * semaphore)527 void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
528 {
529 char *auth_tag_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.auth_tag_memory);
530
531 return (void*)(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
532 }
533
uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t * semaphore)534 uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore)
535 {
536 NvU64 auth_tag_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.auth_tag_memory,
537 semaphore->page->pool->gpu);
538
539 return uvm_gpu_address_virtual_unprotected(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
540 }
541
uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t * semaphore)542 NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
543 {
544 return UVM_GPU_READ_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore));
545 }
546
uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t * semaphore,NvU32 payload)547 void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload)
548 {
549 // Provide a guarantee that all memory accesses prior to setting the payload
550 // won't be moved past it.
551 // Use a big hammer mb() as set_payload() is not used in any performance path
552 // today.
553 // This could likely be optimized to be either an smp_store_release() or use
554 // an smp_mb__before_atomic() barrier. The former is a recent addition to
555 // kernel though, and it's not clear whether combining the latter with a
556 // regular 32bit store is well defined in all cases. Both also seem to risk
557 // being optimized out on non-SMP configs (we need them for interacting with
558 // the GPU correctly even on non-SMP).
559 mb();
560
561 UVM_GPU_WRITE_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore), payload);
562 }
563
564 // This function is intended to catch channels which have been left dangling in
565 // trackers after their owning GPUs have been destroyed.
tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t * tracking_sem)566 static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_sem)
567 {
568 uvm_gpu_t *gpu = tracking_sem->semaphore.page->pool->gpu;
569 uvm_gpu_t *table_gpu;
570
571 UVM_ASSERT_MSG(gpu->magic == UVM_GPU_MAGIC_VALUE, "Corruption detected: magic number is 0x%llx\n", gpu->magic);
572
573 // It's ok for the GPU to not be in the global table, since add_gpu operates
574 // on trackers before adding the GPU to the table, and remove_gpu operates
575 // on trackers after removing the GPU. We rely on the magic value to catch
576 // those cases.
577 //
578 // But if a pointer is in the table it must match.
579 table_gpu = uvm_gpu_get(gpu->id);
580 if (table_gpu)
581 UVM_ASSERT(table_gpu == gpu);
582
583 // Return a boolean so this function can be used in assertions for
584 // conditional compilation
585 return true;
586 }
587
tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t * tracking_semaphore)588 bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
589 {
590 UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
591
592 return g_uvm_global.conf_computing_enabled;
593 }
594
uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t * pool,uvm_gpu_tracking_semaphore_t * tracking_sem)595 NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
596 {
597 NV_STATUS status;
598 uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;
599
600 memset(tracking_sem, 0, sizeof(*tracking_sem));
601
602 status = uvm_gpu_semaphore_alloc(pool, &tracking_sem->semaphore);
603 if (status != NV_OK)
604 return status;
605
606 UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);
607
608 if (g_uvm_global.conf_computing_enabled)
609 order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
610
611 if (tracking_semaphore_uses_mutex(tracking_sem))
612 uvm_mutex_init(&tracking_sem->m_lock, order);
613 else
614 uvm_spin_lock_init(&tracking_sem->s_lock, order);
615
616 atomic64_set(&tracking_sem->completed_value, 0);
617 tracking_sem->queued_value = 0;
618
619 return NV_OK;
620 }
621
uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t * tracking_sem)622 void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
623 {
624 uvm_gpu_semaphore_free(&tracking_sem->semaphore);
625 }
626
gpu_semaphore_encrypted_payload_update(uvm_channel_t * channel,uvm_gpu_semaphore_t * semaphore)627 static void gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
628 {
629 NvU32 local_payload;
630 uvm_gpu_semaphore_notifier_t gpu_notifier;
631 uvm_gpu_semaphore_notifier_t new_gpu_notifier = 0;
632
633 // A channel can have multiple entries pending and the tracking semaphore
634 // update of each entry can race with this function. Since the semaphore
635 // needs to be updated to release a used entry, we never need more
636 // than 'num_gpfifo_entries' re-tries.
637 unsigned tries_left = channel->num_gpfifo_entries;
638 NV_STATUS status = NV_OK;
639 NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
640 uvm_gpu_semaphore_notifier_t *semaphore_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
641
642 UVM_ASSERT(g_uvm_global.conf_computing_enabled);
643 UVM_ASSERT(uvm_channel_is_ce(channel));
644
645 do {
646 gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
647
648 UVM_ASSERT(gpu_notifier >= semaphore->conf_computing.last_observed_notifier);
649
650 // Odd notifier value means there's an update in progress.
651 if (gpu_notifier % 2)
652 continue;
653
654 // There's no change since last time
655 if (gpu_notifier == semaphore->conf_computing.last_observed_notifier)
656 return;
657
658 // Make sure no memory accesses happen before we read the notifier
659 smp_mb__after_atomic();
660
661 memcpy(local_auth_tag, uvm_gpu_semaphore_get_auth_tag_cpu_va(semaphore), sizeof(local_auth_tag));
662 local_payload = UVM_READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
663
664 // Make sure the second read of notifier happens after
665 // all memory accesses.
666 smp_mb__before_atomic();
667 new_gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
668 tries_left--;
669 } while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
670
671 if (!tries_left) {
672 status = NV_ERR_INVALID_STATE;
673 }
674 else {
675 NvU32 key_version;
676 const NvU32 iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
677 NvU32 new_semaphore_value;
678
679 UVM_ASSERT(gpu_notifier == new_gpu_notifier);
680 UVM_ASSERT(gpu_notifier % 2 == 0);
681
682 // CPU decryption is guaranteed to use the same key version as the
683 // associated GPU encryption, because if there was any key rotation in
684 // between, then key rotation waited for all channels to complete before
685 // proceeding. The wait implies that the semaphore value matches the
686 // last one encrypted on the GPU, so this CPU decryption should happen
687 // before the key is rotated.
688 key_version = uvm_channel_pool_key_version(channel->pool);
689
690 status = uvm_conf_computing_cpu_decrypt(channel,
691 &new_semaphore_value,
692 &local_payload,
693 &semaphore->conf_computing.ivs[iv_index],
694 key_version,
695 sizeof(new_semaphore_value),
696 &local_auth_tag);
697
698 if (status != NV_OK)
699 goto error;
700
701 uvm_gpu_semaphore_set_payload(semaphore, new_semaphore_value);
702 UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
703
704 return;
705 }
706
707 error:
708 // Decryption failure is a fatal error as well as running out of try left.
709 // Upon testing, all decryption happened within one try, anything that
710 // would require ten retry would be considered active tampering with the
711 // data structures.
712 uvm_global_set_fatal_error(status);
713 }
714
update_completed_value_locked(uvm_gpu_tracking_semaphore_t * tracking_semaphore)715 static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
716 {
717 NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value);
718 // The semaphore value is the bottom 32 bits of completed_value
719 NvU32 old_sem_value = (NvU32)old_value;
720 NvU32 new_sem_value;
721 NvU64 new_value;
722
723 if (tracking_semaphore_uses_mutex(tracking_semaphore))
724 uvm_assert_mutex_locked(&tracking_semaphore->m_lock);
725 else
726 uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
727
728 if (gpu_semaphore_is_secure(&tracking_semaphore->semaphore)) {
729 // TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
730 // mechanism to all semaphore
731 uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
732 gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
733 }
734
735 new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
736
737 // The following logic to update the completed value is very subtle, it
738 // helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt
739 // before going through this code.
740
741 if (old_sem_value == new_sem_value) {
742 // No progress since the last update.
743 // No additional memory barrier required in this case as completed_value
744 // is always updated under the lock that this thread just acquired.
745 // That guarantees full ordering with all the accesses the thread that
746 // updated completed_value did under the lock including the GPU
747 // semaphore read.
748 return old_value;
749 }
750
751 // Replace the bottom 32-bits with the new semaphore value
752 new_value = (old_value & 0xFFFFFFFF00000000ull) | new_sem_value;
753
754 // If we've wrapped around, add 2^32 to the value
755 // Notably the user of the GPU tracking semaphore needs to guarantee that
756 // the value is updated often enough to notice the wrap around each time it
757 // happens. In case of a channel tracking semaphore that's released for each
758 // push, it's easily guaranteed because of the small number of GPFIFO
759 // entries available per channel (there could be at most as many pending
760 // pushes as GPFIFO entries).
761 if (unlikely(new_sem_value < old_sem_value))
762 new_value += 1ULL << 32;
763
764 // Check for unexpected large jumps of the semaphore value
765 UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
766 "GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
767 uvm_gpu_name(tracking_semaphore->semaphore.page->pool->gpu),
768 (NvU64)(uintptr_t)uvm_gpu_semaphore_get_cpu_va(&tracking_semaphore->semaphore),
769 old_value, new_value);
770
771 // Use an atomic write even though the lock is held so that the value can
772 // be (carefully) read atomically outside of the lock.
773 //
774 // atomic64_set() on its own doesn't imply any memory barriers and we need
775 // prior memory accesses (in particular the read of the GPU semaphore
776 // payload) by this thread to be visible to other threads that see the newly
777 // set completed_value. smp_mb__before_atomic() provides that ordering.
778 //
779 // Also see the comment and matching smp_mb__after_atomic() barrier in
780 // uvm_gpu_tracking_semaphore_is_value_completed().
781 //
782 // Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire()
783 // have been added that are exactly what we need and could be slightly
784 // faster on arm and powerpc than the implementation below. But at least in
785 // 4.3 the implementation looks broken for arm32 (it maps directly to
786 // smp_load_acquire() and that doesn't support 64-bit reads on 32-bit
787 // architectures) so instead of dealing with that just use a slightly bigger
788 // hammer.
789 smp_mb__before_atomic();
790 atomic64_set(&tracking_semaphore->completed_value, new_value);
791
792 // For this thread, we don't want any later accesses to be ordered above the
793 // GPU semaphore read. This could be accomplished by using a
794 // smp_load_acquire() for reading it, but given that it's also a pretty
795 // recent addition to the kernel, just leverage smp_mb__after_atomic() that
796 // guarantees that no accesses will be ordered above the atomic (and hence
797 // the GPU semaphore read).
798 //
799 // Notably the soon following unlock is a release barrier that allows later
800 // memory accesses to be reordered above it and hence doesn't provide the
801 // necessary ordering with the GPU semaphore read.
802 //
803 // Also notably this would still need to be handled if we ever switch to
804 // atomic64_set_release() and atomic64_read_acquire() for accessing
805 // completed_value.
806 smp_mb__after_atomic();
807
808 return new_value;
809 }
810
uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t * tracking_semaphore)811 NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
812 {
813 NvU64 completed;
814
815 // Check that the GPU which owns the semaphore is still present
816 UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
817
818 if (tracking_semaphore_uses_mutex(tracking_semaphore))
819 uvm_mutex_lock(&tracking_semaphore->m_lock);
820 else
821 uvm_spin_lock(&tracking_semaphore->s_lock);
822
823 completed = update_completed_value_locked(tracking_semaphore);
824
825 if (tracking_semaphore_uses_mutex(tracking_semaphore))
826 uvm_mutex_unlock(&tracking_semaphore->m_lock);
827 else
828 uvm_spin_unlock(&tracking_semaphore->s_lock);
829
830 return completed;
831 }
832
uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t * tracking_sem,NvU64 value)833 bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value)
834 {
835 NvU64 completed = atomic64_read(&tracking_sem->completed_value);
836
837 // Check that the GPU which owns the semaphore is still present
838 UVM_ASSERT(tracking_semaphore_check_gpu(tracking_sem));
839
840 if (completed >= value) {
841 // atomic64_read() doesn't imply any memory barriers and we need all
842 // subsequent memory accesses in this thread to be ordered after the
843 // atomic read of the completed value above as that will also order them
844 // with any accesses (in particular the GPU semaphore read) performed by
845 // the other thread prior to it setting the completed_value we read.
846 // smp_mb__after_atomic() provides that ordering.
847 //
848 // Also see the comment in update_completed_value_locked().
849 smp_mb__after_atomic();
850
851 return true;
852 }
853
854 return uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) >= value;
855 }
856