1 /*******************************************************************************
2     Copyright (c) 2021-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_common.h"
25 #include "uvm_global.h"
26 #include "uvm_conf_computing.h"
27 #include "uvm_kvmalloc.h"
28 #include "uvm_gpu.h"
29 #include "uvm_hal.h"
30 #include "uvm_mem.h"
31 #include "uvm_processors.h"
32 #include "uvm_tracker.h"
33 #include "nv_uvm_interface.h"
34 #include "uvm_va_block.h"
35 
36 // Amount of encrypted data on a given engine that triggers key rotation. This
37 // is a UVM internal threshold, different from that of RM, and used only during
38 // testing.
39 //
40 // Key rotation is triggered when the total encryption size, or the total
41 // decryption size (whatever comes first) reaches this lower threshold on the
42 // engine.
43 #define UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD (UVM_SIZE_1MB * 8)
44 
45 // The maximum number of secure operations per push is:
46 // UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
47 // + 1 (tracking semaphore) =  128 * 1024 / 56 + 1 = 2342
48 #define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN 2342lu
49 
50 // Channels use 32-bit counters so the value after rotation is 0xffffffff.
51 // setting the limit to this value (or higher) will result in rotation
52 // on every check. However, pre-emptive rotation when submitting control
53 // GPFIFO entries relies on the fact that multiple successive checks after
54 // rotation do not trigger more rotations if there was no IV used in between.
55 #define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX 0xfffffffelu
56 
57 // Attempt rotation when two billion IVs are left. IV rotation call can fail if
58 // the necessary locks are not available, so multiple attempts may be need for
59 // IV rotation to succeed.
60 #define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT (1lu << 31)
61 
62 // Start rotating after 500 encryption/decryptions when running tests.
63 #define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_TESTS ((1lu << 32) - 500lu)
64 static ulong uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT;
65 
66 module_param(uvm_conf_computing_channel_iv_rotation_limit, ulong, S_IRUGO);
67 
uvm_conf_computing_get_mode(const uvm_parent_gpu_t * parent)68 static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
69 {
70     return parent->rm_info.gpuConfComputeCaps.mode;
71 }
72 
uvm_conf_computing_mode_is_hcc(const uvm_gpu_t * gpu)73 bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
74 {
75     return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
76 }
77 
uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t * parent)78 void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
79 {
80     uvm_parent_gpu_t *other_parent;
81     UvmGpuConfComputeMode parent_mode = uvm_conf_computing_get_mode(parent);
82 
83     uvm_assert_mutex_locked(&g_uvm_global.global_lock);
84 
85     // The Confidential Computing state of the GPU should match that of the
86     // system.
87     UVM_ASSERT((parent_mode != UVM_GPU_CONF_COMPUTE_MODE_NONE) == g_uvm_global.conf_computing_enabled);
88 
89     // All GPUs derive Confidential Computing status from their parent. By
90     // current policy all parent GPUs have identical Confidential Computing
91     // status.
92     for_each_parent_gpu(other_parent)
93         UVM_ASSERT(parent_mode == uvm_conf_computing_get_mode(other_parent));
94 }
95 
dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t * dma_buffer)96 static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
97                                       uvm_conf_computing_dma_buffer_t *dma_buffer)
98 {
99     uvm_assert_mutex_locked(&dma_buffer_pool->lock);
100 
101     list_del(&dma_buffer->node);
102     uvm_tracker_wait_deinit(&dma_buffer->tracker);
103 
104     uvm_mem_free(dma_buffer->alloc);
105     uvm_mem_free(dma_buffer->auth_tag);
106     uvm_kvfree(dma_buffer);
107 }
108 
dma_buffer_pool_to_gpu(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)109 static uvm_gpu_t *dma_buffer_pool_to_gpu(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
110 {
111     return container_of(dma_buffer_pool, uvm_gpu_t, conf_computing.dma_buffer_pool);
112 }
113 
114 // Allocate and map a new DMA stage buffer to CPU and GPU (VA)
dma_buffer_create(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t ** dma_buffer_out)115 static NV_STATUS dma_buffer_create(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
116                                    uvm_conf_computing_dma_buffer_t **dma_buffer_out)
117 {
118     uvm_gpu_t *dma_owner;
119     uvm_conf_computing_dma_buffer_t *dma_buffer;
120     uvm_mem_t *alloc = NULL;
121     NV_STATUS status = NV_OK;
122     size_t auth_tags_size = (UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
123 
124     dma_buffer = uvm_kvmalloc_zero(sizeof(*dma_buffer));
125     if (!dma_buffer)
126         return NV_ERR_NO_MEMORY;
127 
128     dma_owner = dma_buffer_pool_to_gpu(dma_buffer_pool);
129     uvm_tracker_init(&dma_buffer->tracker);
130     INIT_LIST_HEAD(&dma_buffer->node);
131 
132     status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE, dma_owner, NULL, &alloc);
133     if (status != NV_OK)
134         goto err;
135 
136     dma_buffer->alloc = alloc;
137 
138     status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
139     if (status != NV_OK)
140         goto err;
141 
142     status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(auth_tags_size, dma_owner, NULL, &alloc);
143     if (status != NV_OK)
144         goto err;
145 
146     dma_buffer->auth_tag = alloc;
147 
148     status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
149     if (status != NV_OK)
150         goto err;
151 
152     *dma_buffer_out = dma_buffer;
153 
154     return status;
155 
156 err:
157     dma_buffer_destroy_locked(dma_buffer_pool, dma_buffer);
158     return status;
159 }
160 
uvm_conf_computing_dma_buffer_pool_sync(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)161 void uvm_conf_computing_dma_buffer_pool_sync(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
162 {
163     uvm_conf_computing_dma_buffer_t *dma_buffer;
164 
165     if (dma_buffer_pool->num_dma_buffers == 0)
166         return;
167 
168     uvm_mutex_lock(&dma_buffer_pool->lock);
169     list_for_each_entry(dma_buffer, &dma_buffer_pool->free_dma_buffers, node)
170         uvm_tracker_wait(&dma_buffer->tracker);
171     uvm_mutex_unlock(&dma_buffer_pool->lock);
172 }
173 
conf_computing_dma_buffer_pool_deinit(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)174 static void conf_computing_dma_buffer_pool_deinit(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
175 {
176     uvm_conf_computing_dma_buffer_t *dma_buffer;
177     uvm_conf_computing_dma_buffer_t *next_buff;
178 
179     if (dma_buffer_pool->num_dma_buffers == 0)
180         return;
181 
182     // Because the pool is teared down at the same time the GPU is unregistered
183     // the lock is required only to quiet assertions not for functional reasons
184     // see dma_buffer_destroy_locked()).
185     uvm_mutex_lock(&dma_buffer_pool->lock);
186 
187     list_for_each_entry_safe(dma_buffer, next_buff, &dma_buffer_pool->free_dma_buffers, node) {
188         dma_buffer_destroy_locked(dma_buffer_pool, dma_buffer);
189         dma_buffer_pool->num_dma_buffers--;
190     }
191 
192     UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
193     UVM_ASSERT(list_empty(&dma_buffer_pool->free_dma_buffers));
194     uvm_mutex_unlock(&dma_buffer_pool->lock);
195 }
196 
dma_buffer_pool_add(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t * dma_buffer)197 static void dma_buffer_pool_add(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
198                                uvm_conf_computing_dma_buffer_t *dma_buffer)
199 {
200     uvm_assert_mutex_locked(&dma_buffer_pool->lock);
201     list_add_tail(&dma_buffer->node, &dma_buffer_pool->free_dma_buffers);
202 }
203 
conf_computing_dma_buffer_pool_init(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)204 static NV_STATUS conf_computing_dma_buffer_pool_init(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
205 {
206     size_t i;
207     size_t num_dma_buffers = 32;
208     NV_STATUS status = NV_OK;
209 
210     UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
211     UVM_ASSERT(g_uvm_global.conf_computing_enabled);
212 
213     INIT_LIST_HEAD(&dma_buffer_pool->free_dma_buffers);
214     uvm_mutex_init(&dma_buffer_pool->lock, UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
215     dma_buffer_pool->num_dma_buffers = num_dma_buffers;
216 
217     uvm_mutex_lock(&dma_buffer_pool->lock);
218     for (i = 0; i < num_dma_buffers; i++) {
219         uvm_conf_computing_dma_buffer_t *dma_buffer;
220 
221         status = dma_buffer_create(dma_buffer_pool, &dma_buffer);
222         if (status != NV_OK)
223             break;
224 
225         dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
226     }
227     uvm_mutex_unlock(&dma_buffer_pool->lock);
228 
229     if (i < num_dma_buffers)
230         conf_computing_dma_buffer_pool_deinit(dma_buffer_pool);
231 
232     return status;
233 }
234 
dma_buffer_pool_expand_locked(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)235 static NV_STATUS dma_buffer_pool_expand_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
236 {
237     size_t i;
238     uvm_gpu_t *gpu;
239     size_t nb_to_alloc;
240     NV_STATUS status = NV_OK;
241     UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
242 
243     gpu = dma_buffer_pool_to_gpu(dma_buffer_pool);
244     nb_to_alloc = dma_buffer_pool->num_dma_buffers;
245     for (i = 0; i < nb_to_alloc; ++i) {
246         uvm_conf_computing_dma_buffer_t *dma_buffer;
247 
248         status = dma_buffer_create(dma_buffer_pool, &dma_buffer);
249         if (status != NV_OK)
250             break;
251 
252         dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
253     }
254 
255     dma_buffer_pool->num_dma_buffers += i;
256 
257     if (i == 0)
258         return status;
259 
260     return NV_OK;
261 }
262 
uvm_conf_computing_dma_buffer_alloc(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t ** dma_buffer_out,uvm_tracker_t * out_tracker)263 NV_STATUS uvm_conf_computing_dma_buffer_alloc(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
264                                               uvm_conf_computing_dma_buffer_t **dma_buffer_out,
265                                               uvm_tracker_t *out_tracker)
266 {
267     uvm_conf_computing_dma_buffer_t *dma_buffer = NULL;
268     NV_STATUS status;
269 
270     UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
271 
272     // TODO: Bug 3385623: Heuristically expand DMA memory pool
273     uvm_mutex_lock(&dma_buffer_pool->lock);
274     if (list_empty(&dma_buffer_pool->free_dma_buffers)) {
275         status = dma_buffer_pool_expand_locked(dma_buffer_pool);
276 
277         if (status != NV_OK) {
278             uvm_mutex_unlock(&dma_buffer_pool->lock);
279             return status;
280         }
281     }
282 
283     // We're guaranteed that at least one DMA stage buffer is available at this
284     // point.
285     dma_buffer = list_first_entry(&dma_buffer_pool->free_dma_buffers, uvm_conf_computing_dma_buffer_t, node);
286     list_del_init(&dma_buffer->node);
287     uvm_mutex_unlock(&dma_buffer_pool->lock);
288 
289     status = uvm_tracker_wait_for_other_gpus(&dma_buffer->tracker, dma_buffer->alloc->dma_owner);
290     if (status != NV_OK)
291         goto error;
292 
293     if (out_tracker)
294         status = uvm_tracker_add_tracker_safe(out_tracker, &dma_buffer->tracker);
295     else
296         status = uvm_tracker_wait(&dma_buffer->tracker);
297 
298     if (status != NV_OK)
299         goto error;
300 
301     uvm_page_mask_zero(&dma_buffer->encrypted_page_mask);
302     *dma_buffer_out = dma_buffer;
303 
304     return status;
305 
306 error:
307     uvm_tracker_deinit(&dma_buffer->tracker);
308     uvm_conf_computing_dma_buffer_free(dma_buffer_pool, dma_buffer, NULL);
309     return status;
310 }
311 
uvm_conf_computing_dma_buffer_free(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t * dma_buffer,uvm_tracker_t * tracker)312 void uvm_conf_computing_dma_buffer_free(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
313                                         uvm_conf_computing_dma_buffer_t *dma_buffer,
314                                         uvm_tracker_t *tracker)
315 {
316 
317     NV_STATUS status;
318 
319     if (!dma_buffer)
320         return;
321 
322     UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
323 
324     uvm_tracker_remove_completed(&dma_buffer->tracker);
325     if (tracker) {
326         uvm_tracker_remove_completed(tracker);
327         status = uvm_tracker_add_tracker_safe(&dma_buffer->tracker, tracker);
328         if (status != NV_OK)
329             UVM_ASSERT(status == uvm_global_get_status());
330     }
331 
332     uvm_mutex_lock(&dma_buffer_pool->lock);
333     dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
334     uvm_mutex_unlock(&dma_buffer_pool->lock);
335 }
336 
dummy_iv_mem_deinit(uvm_gpu_t * gpu)337 static void dummy_iv_mem_deinit(uvm_gpu_t *gpu)
338 {
339     uvm_mem_free(gpu->conf_computing.iv_mem);
340 }
341 
dummy_iv_mem_init(uvm_gpu_t * gpu)342 static NV_STATUS dummy_iv_mem_init(uvm_gpu_t *gpu)
343 {
344     NV_STATUS status;
345 
346     if (!uvm_conf_computing_mode_is_hcc(gpu))
347         return NV_OK;
348 
349     status = uvm_mem_alloc_sysmem_dma(sizeof(UvmCslIv), gpu, NULL, &gpu->conf_computing.iv_mem);
350     if (status != NV_OK)
351         return status;
352 
353     status = uvm_mem_map_gpu_kernel(gpu->conf_computing.iv_mem, gpu);
354     if (status != NV_OK)
355         goto error;
356 
357     return NV_OK;
358 
359 error:
360     dummy_iv_mem_deinit(gpu);
361     return status;
362 }
363 
364 // The production key rotation defaults are such that key rotations rarely
365 // happen. During UVM testing more frequent rotations are triggering by relying
366 // on internal encryption usage accounting. When key rotations are triggered by
367 // UVM, the driver does not rely on channel key rotation notifiers.
368 //
369 // TODO: Bug 4612912: UVM should be able to programmatically set the rotation
370 // lower threshold. This function, and all the metadata associated with it
371 // (per-pool encryption accounting, for example) can be removed at that point.
key_rotation_is_notifier_driven(void)372 static bool key_rotation_is_notifier_driven(void)
373 {
374     return !uvm_enable_builtin_tests;
375 }
376 
uvm_conf_computing_gpu_init(uvm_gpu_t * gpu)377 NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
378 {
379     NV_STATUS status;
380 
381     if (!g_uvm_global.conf_computing_enabled)
382         return NV_OK;
383 
384     status = conf_computing_dma_buffer_pool_init(&gpu->conf_computing.dma_buffer_pool);
385     if (status != NV_OK)
386         return status;
387 
388     status = dummy_iv_mem_init(gpu);
389     if (status != NV_OK)
390         goto error;
391 
392     if (uvm_enable_builtin_tests && uvm_conf_computing_channel_iv_rotation_limit == UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT)
393         uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_TESTS;
394 
395     if (uvm_conf_computing_channel_iv_rotation_limit < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN ||
396         uvm_conf_computing_channel_iv_rotation_limit > UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX) {
397         UVM_ERR_PRINT("Value of uvm_conf_computing_channel_iv_rotation_limit: %lu is outside of the safe "
398                       "range: <%lu, %lu>. Using the default value instead (%lu)\n",
399                       uvm_conf_computing_channel_iv_rotation_limit,
400                       UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN,
401                       UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX,
402                       UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT);
403         uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT;
404     }
405 
406     return NV_OK;
407 
408 error:
409     uvm_conf_computing_gpu_deinit(gpu);
410     return status;
411 }
412 
uvm_conf_computing_gpu_deinit(uvm_gpu_t * gpu)413 void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
414 {
415     dummy_iv_mem_deinit(gpu);
416     conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
417 }
418 
uvm_conf_computing_log_gpu_encryption(uvm_channel_t * channel,size_t size,UvmCslIv * iv)419 void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv)
420 {
421     NV_STATUS status;
422     uvm_channel_pool_t *pool;
423 
424     if (uvm_channel_is_lcic(channel))
425         pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
426     else
427         pool = channel->pool;
428 
429     uvm_mutex_lock(&channel->csl.ctx_lock);
430 
431     if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
432         status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, size);
433 
434         // Informing RM of an encryption/decryption should not fail
435         UVM_ASSERT(status == NV_OK);
436 
437         if (!key_rotation_is_notifier_driven())
438             atomic64_add(size, &pool->conf_computing.key_rotation.encrypted);
439     }
440 
441     status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
442 
443     // IV rotation is done preemptively as needed, so the above
444     // call cannot return failure.
445     UVM_ASSERT(status == NV_OK);
446 
447     uvm_mutex_unlock(&channel->csl.ctx_lock);
448 }
449 
uvm_conf_computing_acquire_encryption_iv(uvm_channel_t * channel,UvmCslIv * iv)450 void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
451 {
452     NV_STATUS status;
453 
454     uvm_mutex_lock(&channel->csl.ctx_lock);
455     status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, 1, iv);
456     uvm_mutex_unlock(&channel->csl.ctx_lock);
457 
458     // IV rotation is done preemptively as needed, so the above
459     // call cannot return failure.
460     UVM_ASSERT(status == NV_OK);
461 }
462 
uvm_conf_computing_cpu_encrypt(uvm_channel_t * channel,void * dst_cipher,const void * src_plain,UvmCslIv * encrypt_iv,size_t size,void * auth_tag_buffer)463 void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
464                                     void *dst_cipher,
465                                     const void *src_plain,
466                                     UvmCslIv *encrypt_iv,
467                                     size_t size,
468                                     void *auth_tag_buffer)
469 {
470     NV_STATUS status;
471     uvm_channel_pool_t *pool;
472 
473     UVM_ASSERT(size);
474 
475     if (uvm_channel_is_lcic(channel))
476         pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
477     else
478         pool = channel->pool;
479 
480     uvm_mutex_lock(&channel->csl.ctx_lock);
481 
482     status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
483                                       size,
484                                       (NvU8 const *) src_plain,
485                                       encrypt_iv,
486                                       (NvU8 *) dst_cipher,
487                                       (NvU8 *) auth_tag_buffer);
488 
489     // IV rotation is done preemptively as needed, so the above
490     // call cannot return failure.
491     UVM_ASSERT(status == NV_OK);
492 
493     if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
494         status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, size);
495 
496         // Informing RM of an encryption/decryption should not fail
497         UVM_ASSERT(status == NV_OK);
498 
499         if (!key_rotation_is_notifier_driven())
500             atomic64_add(size, &pool->conf_computing.key_rotation.decrypted);
501     }
502 
503     uvm_mutex_unlock(&channel->csl.ctx_lock);
504 }
505 
uvm_conf_computing_cpu_decrypt(uvm_channel_t * channel,void * dst_plain,const void * src_cipher,const UvmCslIv * src_iv,NvU32 key_version,size_t size,const void * auth_tag_buffer)506 NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
507                                          void *dst_plain,
508                                          const void *src_cipher,
509                                          const UvmCslIv *src_iv,
510                                          NvU32 key_version,
511                                          size_t size,
512                                          const void *auth_tag_buffer)
513 {
514     NV_STATUS status;
515 
516     // The CSL context associated with a channel can be used by multiple
517     // threads. The IV sequence is thus guaranteed only while the channel is
518     // "locked for push". The channel/push lock is released in
519     // "uvm_channel_end_push", and at that time the GPU encryption operations
520     // have not executed, yet. Therefore the caller has to use
521     // "uvm_conf_computing_log_gpu_encryption" to explicitly store IVs needed
522     // to perform CPU decryption and pass those IVs to this function after the
523     // push that did the encryption completes.
524     UVM_ASSERT(src_iv);
525 
526     uvm_mutex_lock(&channel->csl.ctx_lock);
527     status = nvUvmInterfaceCslDecrypt(&channel->csl.ctx,
528                                       size,
529                                       (const NvU8 *) src_cipher,
530                                       src_iv,
531                                       key_version,
532                                       (NvU8 *) dst_plain,
533                                       NULL,
534                                       0,
535                                       (const NvU8 *) auth_tag_buffer);
536 
537     if (status != NV_OK) {
538         UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, channel %s, GPU %s\n",
539                       nvstatusToString(status),
540                       channel->name,
541                       uvm_gpu_name(uvm_channel_get_gpu(channel)));
542     }
543 
544     uvm_mutex_unlock(&channel->csl.ctx_lock);
545 
546     return status;
547 }
548 
uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t * parent_gpu,void * dst_plain,const void * src_cipher,const void * auth_tag_buffer,NvU8 valid)549 NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
550                                            void *dst_plain,
551                                            const void *src_cipher,
552                                            const void *auth_tag_buffer,
553                                            NvU8 valid)
554 {
555     NV_STATUS status;
556     NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
557     UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
558 
559     // There is no dedicated lock for the CSL context associated with replayable
560     // faults. The mutual exclusion required by the RM CSL API is enforced by
561     // relying on the GPU replayable service lock (ISR lock), since fault
562     // decryption is invoked as part of fault servicing.
563     UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
564 
565     UVM_ASSERT(g_uvm_global.conf_computing_enabled);
566 
567     status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
568 
569     // Informing RM of an encryption/decryption should not fail
570     UVM_ASSERT(status == NV_OK);
571 
572     status = nvUvmInterfaceCslDecrypt(csl_context,
573                                       fault_entry_size,
574                                       (const NvU8 *) src_cipher,
575                                       NULL,
576                                       NV_U32_MAX,
577                                       (NvU8 *) dst_plain,
578                                       &valid,
579                                       sizeof(valid),
580                                       (const NvU8 *) auth_tag_buffer);
581 
582     if (status != NV_OK) {
583         UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n",
584                       nvstatusToString(status),
585                       uvm_parent_gpu_name(parent_gpu));
586 
587     }
588 
589     return status;
590 }
591 
uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t * parent_gpu)592 void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
593 {
594     NV_STATUS status;
595     NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
596     UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
597 
598     // See comment in uvm_conf_computing_fault_decrypt
599     UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
600 
601     UVM_ASSERT(g_uvm_global.conf_computing_enabled);
602 
603     status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
604 
605     // Informing RM of an encryption/decryption should not fail
606     UVM_ASSERT(status == NV_OK);
607 
608     status = nvUvmInterfaceCslIncrementIv(csl_context, UVM_CSL_OPERATION_DECRYPT, 1, NULL);
609 
610     UVM_ASSERT(status == NV_OK);
611 }
612 
uvm_conf_computing_query_message_pools(uvm_channel_t * channel,NvU64 * remaining_encryptions,NvU64 * remaining_decryptions)613 void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
614                                             NvU64 *remaining_encryptions,
615                                             NvU64 *remaining_decryptions)
616 {
617     NV_STATUS status;
618 
619     UVM_ASSERT(channel);
620     UVM_ASSERT(remaining_encryptions);
621     UVM_ASSERT(remaining_decryptions);
622 
623     uvm_mutex_lock(&channel->csl.ctx_lock);
624     status = nvUvmInterfaceCslQueryMessagePool(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, remaining_encryptions);
625     UVM_ASSERT(status == NV_OK);
626     UVM_ASSERT(*remaining_encryptions <= NV_U32_MAX);
627 
628     status = nvUvmInterfaceCslQueryMessagePool(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, remaining_decryptions);
629     UVM_ASSERT(status == NV_OK);
630     UVM_ASSERT(*remaining_decryptions <= NV_U32_MAX);
631 
632     // LCIC channels never use CPU encrypt/GPU decrypt
633     if (uvm_channel_is_lcic(channel))
634         UVM_ASSERT(*remaining_encryptions == NV_U32_MAX);
635 
636     uvm_mutex_unlock(&channel->csl.ctx_lock);
637 }
638 
uvm_conf_computing_rotate_channel_ivs_below_limit_internal(uvm_channel_t * channel,NvU64 limit)639 static NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit_internal(uvm_channel_t *channel, NvU64 limit)
640 {
641     NV_STATUS status = NV_OK;
642     NvU64 remaining_encryptions, remaining_decryptions;
643     bool rotate_encryption_iv, rotate_decryption_iv;
644 
645     UVM_ASSERT(uvm_channel_is_locked_for_push(channel) ||
646                (uvm_channel_is_lcic(channel) && uvm_channel_manager_is_wlc_ready(channel->pool->manager)));
647 
648     uvm_conf_computing_query_message_pools(channel, &remaining_encryptions, &remaining_decryptions);
649 
650     // Ignore decryption limit for SEC2, only CE channels support
651     // GPU encrypt/CPU decrypt. However, RM reports _some_ decrementing
652     // value for SEC2 decryption counter.
653     rotate_decryption_iv = (remaining_decryptions <= limit) && uvm_channel_is_ce(channel);
654     rotate_encryption_iv = remaining_encryptions <= limit;
655 
656     if (!rotate_encryption_iv && !rotate_decryption_iv)
657         return NV_OK;
658 
659     // Wait for all in-flight pushes. The caller needs to guarantee that there
660     // are no concurrent pushes created, e.g. by only calling rotate after
661     // a channel is locked_for_push.
662     status = uvm_channel_wait(channel);
663     if (status != NV_OK)
664         return status;
665 
666     uvm_mutex_lock(&channel->csl.ctx_lock);
667 
668     if (rotate_encryption_iv)
669         status = nvUvmInterfaceCslRotateIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT);
670 
671     if (status == NV_OK && rotate_decryption_iv)
672         status = nvUvmInterfaceCslRotateIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT);
673 
674     uvm_mutex_unlock(&channel->csl.ctx_lock);
675 
676     // Change the error to out of resources if the available IVs are running
677     // too low
678     if (status == NV_ERR_STATE_IN_USE &&
679         (remaining_encryptions < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN ||
680          remaining_decryptions < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN))
681         return NV_ERR_INSUFFICIENT_RESOURCES;
682 
683     return status;
684 }
685 
uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t * channel,NvU64 limit,bool retry_if_busy)686 NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy)
687 {
688     NV_STATUS status;
689 
690     do {
691         status = uvm_conf_computing_rotate_channel_ivs_below_limit_internal(channel, limit);
692     } while (retry_if_busy && status == NV_ERR_STATE_IN_USE);
693 
694     // Hide "busy" error. The rotation will be retried at the next opportunity.
695     if (!retry_if_busy && status == NV_ERR_STATE_IN_USE)
696         status = NV_OK;
697 
698     return status;
699 }
700 
uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_t * channel)701 NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_t *channel)
702 {
703     return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, false);
704 }
705 
uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t * channel)706 NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *channel)
707 {
708     return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
709 }
710 
uvm_conf_computing_enable_key_rotation(uvm_gpu_t * gpu)711 void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu)
712 {
713     if (!g_uvm_global.conf_computing_enabled)
714         return;
715 
716     // Key rotation cannot be enabled on UVM if it is disabled on RM
717     if (!gpu->parent->rm_info.gpuConfComputeCaps.bKeyRotationEnabled)
718         return;
719 
720     gpu->channel_manager->conf_computing.key_rotation_enabled = true;
721 }
722 
uvm_conf_computing_disable_key_rotation(uvm_gpu_t * gpu)723 void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
724 {
725     if (!g_uvm_global.conf_computing_enabled)
726         return;
727 
728     gpu->channel_manager->conf_computing.key_rotation_enabled = false;
729 }
730 
uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t * gpu)731 bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
732 {
733     return gpu->channel_manager->conf_computing.key_rotation_enabled;
734 }
735 
uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t * pool)736 bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
737 {
738     if (!uvm_conf_computing_is_key_rotation_enabled(pool->manager->gpu))
739         return false;
740 
741     // TODO: Bug 4586447: key rotation must be disabled in the SEC2 engine,
742     // because currently the encryption key is shared between UVM and RM, but
743     // UVM is not able to idle SEC2 channels owned by RM.
744     if (uvm_channel_pool_is_sec2(pool))
745         return false;
746 
747     // Key rotation happens as part of channel reservation, and LCIC channels
748     // are never reserved directly. Rotation of keys in LCIC channels happens
749     // as the result of key rotation in WLC channels.
750     //
751     // Return false even if there is nothing fundamental prohibiting direct key
752     // rotation on LCIC pools
753     if (uvm_channel_pool_is_lcic(pool))
754         return false;
755 
756     return true;
757 }
758 
conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t * pool)759 static bool conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t *pool)
760 {
761     NvU64 decrypted, encrypted;
762 
763     UVM_ASSERT(!key_rotation_is_notifier_driven());
764 
765     decrypted = atomic64_read(&pool->conf_computing.key_rotation.decrypted);
766 
767     if (decrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
768         return true;
769 
770     encrypted = atomic64_read(&pool->conf_computing.key_rotation.encrypted);
771 
772     if (encrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
773         return true;
774 
775     return false;
776 }
777 
conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t * pool)778 static bool conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t *pool)
779 {
780     // If key rotation is pending for the pool's engine, then the key rotation
781     // notifier in any of the engine channels can be used by UVM to detect the
782     // situation. Note that RM doesn't update all the notifiers in a single
783     // atomic operation, so it is possible that the channel read by UVM (the
784     // first one in the pool) indicates that a key rotation is pending, but
785     // another channel in the pool (temporarily) indicates the opposite, or vice
786     // versa.
787     uvm_channel_t *first_channel = pool->channels;
788 
789     UVM_ASSERT(key_rotation_is_notifier_driven());
790     UVM_ASSERT(first_channel != NULL);
791 
792     return first_channel->channel_info.keyRotationNotifier->status == UVM_KEY_ROTATION_STATUS_PENDING;
793 }
794 
uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t * pool)795 bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool)
796 {
797     if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
798         return false;
799 
800     if (key_rotation_is_notifier_driven())
801         return conf_computing_is_key_rotation_pending_use_notifier(pool);
802     else
803         return conf_computing_is_key_rotation_pending_use_stats(pool);
804 }
805 
uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t * pool)806 NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool)
807 {
808     NV_STATUS status;
809 
810     UVM_ASSERT(uvm_conf_computing_is_key_rotation_enabled_in_pool(pool));
811     UVM_ASSERT(pool->conf_computing.key_rotation.csl_contexts != NULL);
812     UVM_ASSERT(pool->conf_computing.key_rotation.num_csl_contexts > 0);
813 
814     // NV_ERR_STATE_IN_USE indicates that RM was not able to acquire the
815     // required locks at this time. This status is not interpreted as an error,
816     // but as a sign for UVM to try again later. This is the same "protocol"
817     // used in IV rotation.
818     status = nvUvmInterfaceCslRotateKey(pool->conf_computing.key_rotation.csl_contexts,
819                                         pool->conf_computing.key_rotation.num_csl_contexts);
820 
821     if (status == NV_OK) {
822         pool->conf_computing.key_rotation.version++;
823 
824         if (!key_rotation_is_notifier_driven()) {
825             atomic64_set(&pool->conf_computing.key_rotation.decrypted, 0);
826             atomic64_set(&pool->conf_computing.key_rotation.encrypted, 0);
827         }
828     }
829     else if (status != NV_ERR_STATE_IN_USE) {
830         UVM_DBG_PRINT("nvUvmInterfaceCslRotateKey() failed in engine %u: %s\n",
831                       pool->engine_index,
832                       nvstatusToString(status));
833     }
834 
835     return status;
836 }
837 
838 __attribute__ ((format(printf, 6, 7)))
uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t * gpu,uvm_gpu_address_t dst_gpu_address,void * src_plain,size_t size,uvm_tracker_t * tracker,const char * format,...)839 NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
840                                                      uvm_gpu_address_t dst_gpu_address,
841                                                      void *src_plain,
842                                                      size_t size,
843                                                      uvm_tracker_t *tracker,
844                                                      const char *format,
845                                                      ...)
846 {
847     NV_STATUS status;
848     uvm_push_t push;
849     uvm_conf_computing_dma_buffer_t *dma_buffer;
850     uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
851     void *dst_cipher, *auth_tag;
852     va_list args;
853 
854     UVM_ASSERT(g_uvm_global.conf_computing_enabled);
855     UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
856 
857     status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
858     if (status != NV_OK)
859         return status;
860 
861     va_start(args, format);
862     status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
863     va_end(args);
864 
865     if (status != NV_OK)
866         goto out;
867 
868     dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
869     auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
870     uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
871 
872     src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
873     auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
874     gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
875 
876     status = uvm_push_end_and_wait(&push);
877 
878 out:
879     uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
880     return status;
881 }
882 
883 __attribute__ ((format(printf, 6, 7)))
uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t * gpu,void * dst_plain,uvm_gpu_address_t src_gpu_address,size_t size,uvm_tracker_t * tracker,const char * format,...)884 NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
885                                                      void *dst_plain,
886                                                      uvm_gpu_address_t src_gpu_address,
887                                                      size_t size,
888                                                      uvm_tracker_t *tracker,
889                                                      const char *format,
890                                                      ...)
891 {
892     NV_STATUS status;
893     uvm_push_t push;
894     uvm_conf_computing_dma_buffer_t *dma_buffer;
895     uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
896     void *src_cipher, *auth_tag;
897     va_list args;
898 
899     UVM_ASSERT(g_uvm_global.conf_computing_enabled);
900     UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
901 
902     status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
903     if (status != NV_OK)
904         return status;
905 
906     va_start(args, format);
907     status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
908     va_end(args);
909 
910     if (status != NV_OK)
911         goto out;
912 
913     uvm_conf_computing_log_gpu_encryption(push.channel, size, dma_buffer->decrypt_iv);
914     dma_buffer->key_version[0] = uvm_channel_pool_key_version(push.channel->pool);
915 
916     dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
917     auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
918     gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
919 
920     status = uvm_push_end_and_wait(&push);
921     if (status != NV_OK)
922         goto out;
923 
924     src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
925     auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
926     status = uvm_conf_computing_cpu_decrypt(push.channel,
927                                             dst_plain,
928                                             src_cipher,
929                                             dma_buffer->decrypt_iv,
930                                             dma_buffer->key_version[0],
931                                             size,
932                                             auth_tag);
933 
934  out:
935     uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
936     return status;
937 }
938