1 /*******************************************************************************
2 Copyright (c) 2021-2023 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_common.h"
25 #include "uvm_global.h"
26 #include "uvm_conf_computing.h"
27 #include "uvm_kvmalloc.h"
28 #include "uvm_gpu.h"
29 #include "uvm_hal.h"
30 #include "uvm_mem.h"
31 #include "uvm_processors.h"
32 #include "uvm_tracker.h"
33 #include "nv_uvm_interface.h"
34 #include "uvm_va_block.h"
35
36 // Amount of encrypted data on a given engine that triggers key rotation. This
37 // is a UVM internal threshold, different from that of RM, and used only during
38 // testing.
39 //
40 // Key rotation is triggered when the total encryption size, or the total
41 // decryption size (whatever comes first) reaches this lower threshold on the
42 // engine.
43 #define UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD (UVM_SIZE_1MB * 8)
44
45 // The maximum number of secure operations per push is:
46 // UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
47 // + 1 (tracking semaphore) = 128 * 1024 / 56 + 1 = 2342
48 #define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN 2342lu
49
50 // Channels use 32-bit counters so the value after rotation is 0xffffffff.
51 // setting the limit to this value (or higher) will result in rotation
52 // on every check. However, pre-emptive rotation when submitting control
53 // GPFIFO entries relies on the fact that multiple successive checks after
54 // rotation do not trigger more rotations if there was no IV used in between.
55 #define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX 0xfffffffelu
56
57 // Attempt rotation when two billion IVs are left. IV rotation call can fail if
58 // the necessary locks are not available, so multiple attempts may be need for
59 // IV rotation to succeed.
60 #define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT (1lu << 31)
61
62 // Start rotating after 500 encryption/decryptions when running tests.
63 #define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_TESTS ((1lu << 32) - 500lu)
64 static ulong uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT;
65
66 module_param(uvm_conf_computing_channel_iv_rotation_limit, ulong, S_IRUGO);
67
uvm_conf_computing_get_mode(const uvm_parent_gpu_t * parent)68 static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
69 {
70 return parent->rm_info.gpuConfComputeCaps.mode;
71 }
72
uvm_conf_computing_mode_is_hcc(const uvm_gpu_t * gpu)73 bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
74 {
75 return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
76 }
77
uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t * parent)78 void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
79 {
80 uvm_parent_gpu_t *other_parent;
81 UvmGpuConfComputeMode parent_mode = uvm_conf_computing_get_mode(parent);
82
83 uvm_assert_mutex_locked(&g_uvm_global.global_lock);
84
85 // The Confidential Computing state of the GPU should match that of the
86 // system.
87 UVM_ASSERT((parent_mode != UVM_GPU_CONF_COMPUTE_MODE_NONE) == g_uvm_global.conf_computing_enabled);
88
89 // All GPUs derive Confidential Computing status from their parent. By
90 // current policy all parent GPUs have identical Confidential Computing
91 // status.
92 for_each_parent_gpu(other_parent)
93 UVM_ASSERT(parent_mode == uvm_conf_computing_get_mode(other_parent));
94 }
95
dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t * dma_buffer)96 static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
97 uvm_conf_computing_dma_buffer_t *dma_buffer)
98 {
99 uvm_assert_mutex_locked(&dma_buffer_pool->lock);
100
101 list_del(&dma_buffer->node);
102 uvm_tracker_wait_deinit(&dma_buffer->tracker);
103
104 uvm_mem_free(dma_buffer->alloc);
105 uvm_mem_free(dma_buffer->auth_tag);
106 uvm_kvfree(dma_buffer);
107 }
108
dma_buffer_pool_to_gpu(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)109 static uvm_gpu_t *dma_buffer_pool_to_gpu(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
110 {
111 return container_of(dma_buffer_pool, uvm_gpu_t, conf_computing.dma_buffer_pool);
112 }
113
114 // Allocate and map a new DMA stage buffer to CPU and GPU (VA)
dma_buffer_create(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t ** dma_buffer_out)115 static NV_STATUS dma_buffer_create(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
116 uvm_conf_computing_dma_buffer_t **dma_buffer_out)
117 {
118 uvm_gpu_t *dma_owner;
119 uvm_conf_computing_dma_buffer_t *dma_buffer;
120 uvm_mem_t *alloc = NULL;
121 NV_STATUS status = NV_OK;
122 size_t auth_tags_size = (UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
123
124 dma_buffer = uvm_kvmalloc_zero(sizeof(*dma_buffer));
125 if (!dma_buffer)
126 return NV_ERR_NO_MEMORY;
127
128 dma_owner = dma_buffer_pool_to_gpu(dma_buffer_pool);
129 uvm_tracker_init(&dma_buffer->tracker);
130 INIT_LIST_HEAD(&dma_buffer->node);
131
132 status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE, dma_owner, NULL, &alloc);
133 if (status != NV_OK)
134 goto err;
135
136 dma_buffer->alloc = alloc;
137
138 status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
139 if (status != NV_OK)
140 goto err;
141
142 status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(auth_tags_size, dma_owner, NULL, &alloc);
143 if (status != NV_OK)
144 goto err;
145
146 dma_buffer->auth_tag = alloc;
147
148 status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
149 if (status != NV_OK)
150 goto err;
151
152 *dma_buffer_out = dma_buffer;
153
154 return status;
155
156 err:
157 dma_buffer_destroy_locked(dma_buffer_pool, dma_buffer);
158 return status;
159 }
160
uvm_conf_computing_dma_buffer_pool_sync(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)161 void uvm_conf_computing_dma_buffer_pool_sync(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
162 {
163 uvm_conf_computing_dma_buffer_t *dma_buffer;
164
165 if (dma_buffer_pool->num_dma_buffers == 0)
166 return;
167
168 uvm_mutex_lock(&dma_buffer_pool->lock);
169 list_for_each_entry(dma_buffer, &dma_buffer_pool->free_dma_buffers, node)
170 uvm_tracker_wait(&dma_buffer->tracker);
171 uvm_mutex_unlock(&dma_buffer_pool->lock);
172 }
173
conf_computing_dma_buffer_pool_deinit(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)174 static void conf_computing_dma_buffer_pool_deinit(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
175 {
176 uvm_conf_computing_dma_buffer_t *dma_buffer;
177 uvm_conf_computing_dma_buffer_t *next_buff;
178
179 if (dma_buffer_pool->num_dma_buffers == 0)
180 return;
181
182 // Because the pool is teared down at the same time the GPU is unregistered
183 // the lock is required only to quiet assertions not for functional reasons
184 // see dma_buffer_destroy_locked()).
185 uvm_mutex_lock(&dma_buffer_pool->lock);
186
187 list_for_each_entry_safe(dma_buffer, next_buff, &dma_buffer_pool->free_dma_buffers, node) {
188 dma_buffer_destroy_locked(dma_buffer_pool, dma_buffer);
189 dma_buffer_pool->num_dma_buffers--;
190 }
191
192 UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
193 UVM_ASSERT(list_empty(&dma_buffer_pool->free_dma_buffers));
194 uvm_mutex_unlock(&dma_buffer_pool->lock);
195 }
196
dma_buffer_pool_add(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t * dma_buffer)197 static void dma_buffer_pool_add(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
198 uvm_conf_computing_dma_buffer_t *dma_buffer)
199 {
200 uvm_assert_mutex_locked(&dma_buffer_pool->lock);
201 list_add_tail(&dma_buffer->node, &dma_buffer_pool->free_dma_buffers);
202 }
203
conf_computing_dma_buffer_pool_init(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)204 static NV_STATUS conf_computing_dma_buffer_pool_init(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
205 {
206 size_t i;
207 size_t num_dma_buffers = 32;
208 NV_STATUS status = NV_OK;
209
210 UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
211 UVM_ASSERT(g_uvm_global.conf_computing_enabled);
212
213 INIT_LIST_HEAD(&dma_buffer_pool->free_dma_buffers);
214 uvm_mutex_init(&dma_buffer_pool->lock, UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
215 dma_buffer_pool->num_dma_buffers = num_dma_buffers;
216
217 uvm_mutex_lock(&dma_buffer_pool->lock);
218 for (i = 0; i < num_dma_buffers; i++) {
219 uvm_conf_computing_dma_buffer_t *dma_buffer;
220
221 status = dma_buffer_create(dma_buffer_pool, &dma_buffer);
222 if (status != NV_OK)
223 break;
224
225 dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
226 }
227 uvm_mutex_unlock(&dma_buffer_pool->lock);
228
229 if (i < num_dma_buffers)
230 conf_computing_dma_buffer_pool_deinit(dma_buffer_pool);
231
232 return status;
233 }
234
dma_buffer_pool_expand_locked(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool)235 static NV_STATUS dma_buffer_pool_expand_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
236 {
237 size_t i;
238 uvm_gpu_t *gpu;
239 size_t nb_to_alloc;
240 NV_STATUS status = NV_OK;
241 UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
242
243 gpu = dma_buffer_pool_to_gpu(dma_buffer_pool);
244 nb_to_alloc = dma_buffer_pool->num_dma_buffers;
245 for (i = 0; i < nb_to_alloc; ++i) {
246 uvm_conf_computing_dma_buffer_t *dma_buffer;
247
248 status = dma_buffer_create(dma_buffer_pool, &dma_buffer);
249 if (status != NV_OK)
250 break;
251
252 dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
253 }
254
255 dma_buffer_pool->num_dma_buffers += i;
256
257 if (i == 0)
258 return status;
259
260 return NV_OK;
261 }
262
uvm_conf_computing_dma_buffer_alloc(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t ** dma_buffer_out,uvm_tracker_t * out_tracker)263 NV_STATUS uvm_conf_computing_dma_buffer_alloc(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
264 uvm_conf_computing_dma_buffer_t **dma_buffer_out,
265 uvm_tracker_t *out_tracker)
266 {
267 uvm_conf_computing_dma_buffer_t *dma_buffer = NULL;
268 NV_STATUS status;
269
270 UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
271
272 // TODO: Bug 3385623: Heuristically expand DMA memory pool
273 uvm_mutex_lock(&dma_buffer_pool->lock);
274 if (list_empty(&dma_buffer_pool->free_dma_buffers)) {
275 status = dma_buffer_pool_expand_locked(dma_buffer_pool);
276
277 if (status != NV_OK) {
278 uvm_mutex_unlock(&dma_buffer_pool->lock);
279 return status;
280 }
281 }
282
283 // We're guaranteed that at least one DMA stage buffer is available at this
284 // point.
285 dma_buffer = list_first_entry(&dma_buffer_pool->free_dma_buffers, uvm_conf_computing_dma_buffer_t, node);
286 list_del_init(&dma_buffer->node);
287 uvm_mutex_unlock(&dma_buffer_pool->lock);
288
289 status = uvm_tracker_wait_for_other_gpus(&dma_buffer->tracker, dma_buffer->alloc->dma_owner);
290 if (status != NV_OK)
291 goto error;
292
293 if (out_tracker)
294 status = uvm_tracker_add_tracker_safe(out_tracker, &dma_buffer->tracker);
295 else
296 status = uvm_tracker_wait(&dma_buffer->tracker);
297
298 if (status != NV_OK)
299 goto error;
300
301 uvm_page_mask_zero(&dma_buffer->encrypted_page_mask);
302 *dma_buffer_out = dma_buffer;
303
304 return status;
305
306 error:
307 uvm_tracker_deinit(&dma_buffer->tracker);
308 uvm_conf_computing_dma_buffer_free(dma_buffer_pool, dma_buffer, NULL);
309 return status;
310 }
311
uvm_conf_computing_dma_buffer_free(uvm_conf_computing_dma_buffer_pool_t * dma_buffer_pool,uvm_conf_computing_dma_buffer_t * dma_buffer,uvm_tracker_t * tracker)312 void uvm_conf_computing_dma_buffer_free(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
313 uvm_conf_computing_dma_buffer_t *dma_buffer,
314 uvm_tracker_t *tracker)
315 {
316
317 NV_STATUS status;
318
319 if (!dma_buffer)
320 return;
321
322 UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
323
324 uvm_tracker_remove_completed(&dma_buffer->tracker);
325 if (tracker) {
326 uvm_tracker_remove_completed(tracker);
327 status = uvm_tracker_add_tracker_safe(&dma_buffer->tracker, tracker);
328 if (status != NV_OK)
329 UVM_ASSERT(status == uvm_global_get_status());
330 }
331
332 uvm_mutex_lock(&dma_buffer_pool->lock);
333 dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
334 uvm_mutex_unlock(&dma_buffer_pool->lock);
335 }
336
dummy_iv_mem_deinit(uvm_gpu_t * gpu)337 static void dummy_iv_mem_deinit(uvm_gpu_t *gpu)
338 {
339 uvm_mem_free(gpu->conf_computing.iv_mem);
340 }
341
dummy_iv_mem_init(uvm_gpu_t * gpu)342 static NV_STATUS dummy_iv_mem_init(uvm_gpu_t *gpu)
343 {
344 NV_STATUS status;
345
346 if (!uvm_conf_computing_mode_is_hcc(gpu))
347 return NV_OK;
348
349 status = uvm_mem_alloc_sysmem_dma(sizeof(UvmCslIv), gpu, NULL, &gpu->conf_computing.iv_mem);
350 if (status != NV_OK)
351 return status;
352
353 status = uvm_mem_map_gpu_kernel(gpu->conf_computing.iv_mem, gpu);
354 if (status != NV_OK)
355 goto error;
356
357 return NV_OK;
358
359 error:
360 dummy_iv_mem_deinit(gpu);
361 return status;
362 }
363
364 // The production key rotation defaults are such that key rotations rarely
365 // happen. During UVM testing more frequent rotations are triggering by relying
366 // on internal encryption usage accounting. When key rotations are triggered by
367 // UVM, the driver does not rely on channel key rotation notifiers.
368 //
369 // TODO: Bug 4612912: UVM should be able to programmatically set the rotation
370 // lower threshold. This function, and all the metadata associated with it
371 // (per-pool encryption accounting, for example) can be removed at that point.
key_rotation_is_notifier_driven(void)372 static bool key_rotation_is_notifier_driven(void)
373 {
374 return !uvm_enable_builtin_tests;
375 }
376
uvm_conf_computing_gpu_init(uvm_gpu_t * gpu)377 NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
378 {
379 NV_STATUS status;
380
381 if (!g_uvm_global.conf_computing_enabled)
382 return NV_OK;
383
384 status = conf_computing_dma_buffer_pool_init(&gpu->conf_computing.dma_buffer_pool);
385 if (status != NV_OK)
386 return status;
387
388 status = dummy_iv_mem_init(gpu);
389 if (status != NV_OK)
390 goto error;
391
392 if (uvm_enable_builtin_tests && uvm_conf_computing_channel_iv_rotation_limit == UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT)
393 uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_TESTS;
394
395 if (uvm_conf_computing_channel_iv_rotation_limit < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN ||
396 uvm_conf_computing_channel_iv_rotation_limit > UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX) {
397 UVM_ERR_PRINT("Value of uvm_conf_computing_channel_iv_rotation_limit: %lu is outside of the safe "
398 "range: <%lu, %lu>. Using the default value instead (%lu)\n",
399 uvm_conf_computing_channel_iv_rotation_limit,
400 UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN,
401 UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX,
402 UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT);
403 uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT;
404 }
405
406 return NV_OK;
407
408 error:
409 uvm_conf_computing_gpu_deinit(gpu);
410 return status;
411 }
412
uvm_conf_computing_gpu_deinit(uvm_gpu_t * gpu)413 void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
414 {
415 dummy_iv_mem_deinit(gpu);
416 conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
417 }
418
uvm_conf_computing_log_gpu_encryption(uvm_channel_t * channel,size_t size,UvmCslIv * iv)419 void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv)
420 {
421 NV_STATUS status;
422 uvm_channel_pool_t *pool;
423
424 if (uvm_channel_is_lcic(channel))
425 pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
426 else
427 pool = channel->pool;
428
429 uvm_mutex_lock(&channel->csl.ctx_lock);
430
431 if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
432 status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, size);
433
434 // Informing RM of an encryption/decryption should not fail
435 UVM_ASSERT(status == NV_OK);
436
437 if (!key_rotation_is_notifier_driven())
438 atomic64_add(size, &pool->conf_computing.key_rotation.encrypted);
439 }
440
441 status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
442
443 // IV rotation is done preemptively as needed, so the above
444 // call cannot return failure.
445 UVM_ASSERT(status == NV_OK);
446
447 uvm_mutex_unlock(&channel->csl.ctx_lock);
448 }
449
uvm_conf_computing_acquire_encryption_iv(uvm_channel_t * channel,UvmCslIv * iv)450 void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
451 {
452 NV_STATUS status;
453
454 uvm_mutex_lock(&channel->csl.ctx_lock);
455 status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, 1, iv);
456 uvm_mutex_unlock(&channel->csl.ctx_lock);
457
458 // IV rotation is done preemptively as needed, so the above
459 // call cannot return failure.
460 UVM_ASSERT(status == NV_OK);
461 }
462
uvm_conf_computing_cpu_encrypt(uvm_channel_t * channel,void * dst_cipher,const void * src_plain,UvmCslIv * encrypt_iv,size_t size,void * auth_tag_buffer)463 void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
464 void *dst_cipher,
465 const void *src_plain,
466 UvmCslIv *encrypt_iv,
467 size_t size,
468 void *auth_tag_buffer)
469 {
470 NV_STATUS status;
471 uvm_channel_pool_t *pool;
472
473 UVM_ASSERT(size);
474
475 if (uvm_channel_is_lcic(channel))
476 pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
477 else
478 pool = channel->pool;
479
480 uvm_mutex_lock(&channel->csl.ctx_lock);
481
482 status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
483 size,
484 (NvU8 const *) src_plain,
485 encrypt_iv,
486 (NvU8 *) dst_cipher,
487 (NvU8 *) auth_tag_buffer);
488
489 // IV rotation is done preemptively as needed, so the above
490 // call cannot return failure.
491 UVM_ASSERT(status == NV_OK);
492
493 if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
494 status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, size);
495
496 // Informing RM of an encryption/decryption should not fail
497 UVM_ASSERT(status == NV_OK);
498
499 if (!key_rotation_is_notifier_driven())
500 atomic64_add(size, &pool->conf_computing.key_rotation.decrypted);
501 }
502
503 uvm_mutex_unlock(&channel->csl.ctx_lock);
504 }
505
uvm_conf_computing_cpu_decrypt(uvm_channel_t * channel,void * dst_plain,const void * src_cipher,const UvmCslIv * src_iv,NvU32 key_version,size_t size,const void * auth_tag_buffer)506 NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
507 void *dst_plain,
508 const void *src_cipher,
509 const UvmCslIv *src_iv,
510 NvU32 key_version,
511 size_t size,
512 const void *auth_tag_buffer)
513 {
514 NV_STATUS status;
515
516 // The CSL context associated with a channel can be used by multiple
517 // threads. The IV sequence is thus guaranteed only while the channel is
518 // "locked for push". The channel/push lock is released in
519 // "uvm_channel_end_push", and at that time the GPU encryption operations
520 // have not executed, yet. Therefore the caller has to use
521 // "uvm_conf_computing_log_gpu_encryption" to explicitly store IVs needed
522 // to perform CPU decryption and pass those IVs to this function after the
523 // push that did the encryption completes.
524 UVM_ASSERT(src_iv);
525
526 uvm_mutex_lock(&channel->csl.ctx_lock);
527 status = nvUvmInterfaceCslDecrypt(&channel->csl.ctx,
528 size,
529 (const NvU8 *) src_cipher,
530 src_iv,
531 key_version,
532 (NvU8 *) dst_plain,
533 NULL,
534 0,
535 (const NvU8 *) auth_tag_buffer);
536
537 if (status != NV_OK) {
538 UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, channel %s, GPU %s\n",
539 nvstatusToString(status),
540 channel->name,
541 uvm_gpu_name(uvm_channel_get_gpu(channel)));
542 }
543
544 uvm_mutex_unlock(&channel->csl.ctx_lock);
545
546 return status;
547 }
548
uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t * parent_gpu,void * dst_plain,const void * src_cipher,const void * auth_tag_buffer,NvU8 valid)549 NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
550 void *dst_plain,
551 const void *src_cipher,
552 const void *auth_tag_buffer,
553 NvU8 valid)
554 {
555 NV_STATUS status;
556 NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
557 UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
558
559 // There is no dedicated lock for the CSL context associated with replayable
560 // faults. The mutual exclusion required by the RM CSL API is enforced by
561 // relying on the GPU replayable service lock (ISR lock), since fault
562 // decryption is invoked as part of fault servicing.
563 UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
564
565 UVM_ASSERT(g_uvm_global.conf_computing_enabled);
566
567 status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
568
569 // Informing RM of an encryption/decryption should not fail
570 UVM_ASSERT(status == NV_OK);
571
572 status = nvUvmInterfaceCslDecrypt(csl_context,
573 fault_entry_size,
574 (const NvU8 *) src_cipher,
575 NULL,
576 NV_U32_MAX,
577 (NvU8 *) dst_plain,
578 &valid,
579 sizeof(valid),
580 (const NvU8 *) auth_tag_buffer);
581
582 if (status != NV_OK) {
583 UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n",
584 nvstatusToString(status),
585 uvm_parent_gpu_name(parent_gpu));
586
587 }
588
589 return status;
590 }
591
uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t * parent_gpu)592 void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
593 {
594 NV_STATUS status;
595 NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
596 UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
597
598 // See comment in uvm_conf_computing_fault_decrypt
599 UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
600
601 UVM_ASSERT(g_uvm_global.conf_computing_enabled);
602
603 status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
604
605 // Informing RM of an encryption/decryption should not fail
606 UVM_ASSERT(status == NV_OK);
607
608 status = nvUvmInterfaceCslIncrementIv(csl_context, UVM_CSL_OPERATION_DECRYPT, 1, NULL);
609
610 UVM_ASSERT(status == NV_OK);
611 }
612
uvm_conf_computing_query_message_pools(uvm_channel_t * channel,NvU64 * remaining_encryptions,NvU64 * remaining_decryptions)613 void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
614 NvU64 *remaining_encryptions,
615 NvU64 *remaining_decryptions)
616 {
617 NV_STATUS status;
618
619 UVM_ASSERT(channel);
620 UVM_ASSERT(remaining_encryptions);
621 UVM_ASSERT(remaining_decryptions);
622
623 uvm_mutex_lock(&channel->csl.ctx_lock);
624 status = nvUvmInterfaceCslQueryMessagePool(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, remaining_encryptions);
625 UVM_ASSERT(status == NV_OK);
626 UVM_ASSERT(*remaining_encryptions <= NV_U32_MAX);
627
628 status = nvUvmInterfaceCslQueryMessagePool(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, remaining_decryptions);
629 UVM_ASSERT(status == NV_OK);
630 UVM_ASSERT(*remaining_decryptions <= NV_U32_MAX);
631
632 // LCIC channels never use CPU encrypt/GPU decrypt
633 if (uvm_channel_is_lcic(channel))
634 UVM_ASSERT(*remaining_encryptions == NV_U32_MAX);
635
636 uvm_mutex_unlock(&channel->csl.ctx_lock);
637 }
638
uvm_conf_computing_rotate_channel_ivs_below_limit_internal(uvm_channel_t * channel,NvU64 limit)639 static NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit_internal(uvm_channel_t *channel, NvU64 limit)
640 {
641 NV_STATUS status = NV_OK;
642 NvU64 remaining_encryptions, remaining_decryptions;
643 bool rotate_encryption_iv, rotate_decryption_iv;
644
645 UVM_ASSERT(uvm_channel_is_locked_for_push(channel) ||
646 (uvm_channel_is_lcic(channel) && uvm_channel_manager_is_wlc_ready(channel->pool->manager)));
647
648 uvm_conf_computing_query_message_pools(channel, &remaining_encryptions, &remaining_decryptions);
649
650 // Ignore decryption limit for SEC2, only CE channels support
651 // GPU encrypt/CPU decrypt. However, RM reports _some_ decrementing
652 // value for SEC2 decryption counter.
653 rotate_decryption_iv = (remaining_decryptions <= limit) && uvm_channel_is_ce(channel);
654 rotate_encryption_iv = remaining_encryptions <= limit;
655
656 if (!rotate_encryption_iv && !rotate_decryption_iv)
657 return NV_OK;
658
659 // Wait for all in-flight pushes. The caller needs to guarantee that there
660 // are no concurrent pushes created, e.g. by only calling rotate after
661 // a channel is locked_for_push.
662 status = uvm_channel_wait(channel);
663 if (status != NV_OK)
664 return status;
665
666 uvm_mutex_lock(&channel->csl.ctx_lock);
667
668 if (rotate_encryption_iv)
669 status = nvUvmInterfaceCslRotateIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT);
670
671 if (status == NV_OK && rotate_decryption_iv)
672 status = nvUvmInterfaceCslRotateIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT);
673
674 uvm_mutex_unlock(&channel->csl.ctx_lock);
675
676 // Change the error to out of resources if the available IVs are running
677 // too low
678 if (status == NV_ERR_STATE_IN_USE &&
679 (remaining_encryptions < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN ||
680 remaining_decryptions < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN))
681 return NV_ERR_INSUFFICIENT_RESOURCES;
682
683 return status;
684 }
685
uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t * channel,NvU64 limit,bool retry_if_busy)686 NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy)
687 {
688 NV_STATUS status;
689
690 do {
691 status = uvm_conf_computing_rotate_channel_ivs_below_limit_internal(channel, limit);
692 } while (retry_if_busy && status == NV_ERR_STATE_IN_USE);
693
694 // Hide "busy" error. The rotation will be retried at the next opportunity.
695 if (!retry_if_busy && status == NV_ERR_STATE_IN_USE)
696 status = NV_OK;
697
698 return status;
699 }
700
uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_t * channel)701 NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_t *channel)
702 {
703 return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, false);
704 }
705
uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t * channel)706 NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *channel)
707 {
708 return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
709 }
710
uvm_conf_computing_enable_key_rotation(uvm_gpu_t * gpu)711 void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu)
712 {
713 if (!g_uvm_global.conf_computing_enabled)
714 return;
715
716 // Key rotation cannot be enabled on UVM if it is disabled on RM
717 if (!gpu->parent->rm_info.gpuConfComputeCaps.bKeyRotationEnabled)
718 return;
719
720 gpu->channel_manager->conf_computing.key_rotation_enabled = true;
721 }
722
uvm_conf_computing_disable_key_rotation(uvm_gpu_t * gpu)723 void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
724 {
725 if (!g_uvm_global.conf_computing_enabled)
726 return;
727
728 gpu->channel_manager->conf_computing.key_rotation_enabled = false;
729 }
730
uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t * gpu)731 bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
732 {
733 return gpu->channel_manager->conf_computing.key_rotation_enabled;
734 }
735
uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t * pool)736 bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
737 {
738 if (!uvm_conf_computing_is_key_rotation_enabled(pool->manager->gpu))
739 return false;
740
741 // TODO: Bug 4586447: key rotation must be disabled in the SEC2 engine,
742 // because currently the encryption key is shared between UVM and RM, but
743 // UVM is not able to idle SEC2 channels owned by RM.
744 if (uvm_channel_pool_is_sec2(pool))
745 return false;
746
747 // Key rotation happens as part of channel reservation, and LCIC channels
748 // are never reserved directly. Rotation of keys in LCIC channels happens
749 // as the result of key rotation in WLC channels.
750 //
751 // Return false even if there is nothing fundamental prohibiting direct key
752 // rotation on LCIC pools
753 if (uvm_channel_pool_is_lcic(pool))
754 return false;
755
756 return true;
757 }
758
conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t * pool)759 static bool conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t *pool)
760 {
761 NvU64 decrypted, encrypted;
762
763 UVM_ASSERT(!key_rotation_is_notifier_driven());
764
765 decrypted = atomic64_read(&pool->conf_computing.key_rotation.decrypted);
766
767 if (decrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
768 return true;
769
770 encrypted = atomic64_read(&pool->conf_computing.key_rotation.encrypted);
771
772 if (encrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
773 return true;
774
775 return false;
776 }
777
conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t * pool)778 static bool conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t *pool)
779 {
780 // If key rotation is pending for the pool's engine, then the key rotation
781 // notifier in any of the engine channels can be used by UVM to detect the
782 // situation. Note that RM doesn't update all the notifiers in a single
783 // atomic operation, so it is possible that the channel read by UVM (the
784 // first one in the pool) indicates that a key rotation is pending, but
785 // another channel in the pool (temporarily) indicates the opposite, or vice
786 // versa.
787 uvm_channel_t *first_channel = pool->channels;
788
789 UVM_ASSERT(key_rotation_is_notifier_driven());
790 UVM_ASSERT(first_channel != NULL);
791
792 return first_channel->channel_info.keyRotationNotifier->status == UVM_KEY_ROTATION_STATUS_PENDING;
793 }
794
uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t * pool)795 bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool)
796 {
797 if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
798 return false;
799
800 if (key_rotation_is_notifier_driven())
801 return conf_computing_is_key_rotation_pending_use_notifier(pool);
802 else
803 return conf_computing_is_key_rotation_pending_use_stats(pool);
804 }
805
uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t * pool)806 NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool)
807 {
808 NV_STATUS status;
809
810 UVM_ASSERT(uvm_conf_computing_is_key_rotation_enabled_in_pool(pool));
811 UVM_ASSERT(pool->conf_computing.key_rotation.csl_contexts != NULL);
812 UVM_ASSERT(pool->conf_computing.key_rotation.num_csl_contexts > 0);
813
814 // NV_ERR_STATE_IN_USE indicates that RM was not able to acquire the
815 // required locks at this time. This status is not interpreted as an error,
816 // but as a sign for UVM to try again later. This is the same "protocol"
817 // used in IV rotation.
818 status = nvUvmInterfaceCslRotateKey(pool->conf_computing.key_rotation.csl_contexts,
819 pool->conf_computing.key_rotation.num_csl_contexts);
820
821 if (status == NV_OK) {
822 pool->conf_computing.key_rotation.version++;
823
824 if (!key_rotation_is_notifier_driven()) {
825 atomic64_set(&pool->conf_computing.key_rotation.decrypted, 0);
826 atomic64_set(&pool->conf_computing.key_rotation.encrypted, 0);
827 }
828 }
829 else if (status != NV_ERR_STATE_IN_USE) {
830 UVM_DBG_PRINT("nvUvmInterfaceCslRotateKey() failed in engine %u: %s\n",
831 pool->engine_index,
832 nvstatusToString(status));
833 }
834
835 return status;
836 }
837
838 __attribute__ ((format(printf, 6, 7)))
uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t * gpu,uvm_gpu_address_t dst_gpu_address,void * src_plain,size_t size,uvm_tracker_t * tracker,const char * format,...)839 NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
840 uvm_gpu_address_t dst_gpu_address,
841 void *src_plain,
842 size_t size,
843 uvm_tracker_t *tracker,
844 const char *format,
845 ...)
846 {
847 NV_STATUS status;
848 uvm_push_t push;
849 uvm_conf_computing_dma_buffer_t *dma_buffer;
850 uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
851 void *dst_cipher, *auth_tag;
852 va_list args;
853
854 UVM_ASSERT(g_uvm_global.conf_computing_enabled);
855 UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
856
857 status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
858 if (status != NV_OK)
859 return status;
860
861 va_start(args, format);
862 status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
863 va_end(args);
864
865 if (status != NV_OK)
866 goto out;
867
868 dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
869 auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
870 uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
871
872 src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
873 auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
874 gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
875
876 status = uvm_push_end_and_wait(&push);
877
878 out:
879 uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
880 return status;
881 }
882
883 __attribute__ ((format(printf, 6, 7)))
uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t * gpu,void * dst_plain,uvm_gpu_address_t src_gpu_address,size_t size,uvm_tracker_t * tracker,const char * format,...)884 NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
885 void *dst_plain,
886 uvm_gpu_address_t src_gpu_address,
887 size_t size,
888 uvm_tracker_t *tracker,
889 const char *format,
890 ...)
891 {
892 NV_STATUS status;
893 uvm_push_t push;
894 uvm_conf_computing_dma_buffer_t *dma_buffer;
895 uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
896 void *src_cipher, *auth_tag;
897 va_list args;
898
899 UVM_ASSERT(g_uvm_global.conf_computing_enabled);
900 UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
901
902 status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
903 if (status != NV_OK)
904 return status;
905
906 va_start(args, format);
907 status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
908 va_end(args);
909
910 if (status != NV_OK)
911 goto out;
912
913 uvm_conf_computing_log_gpu_encryption(push.channel, size, dma_buffer->decrypt_iv);
914 dma_buffer->key_version[0] = uvm_channel_pool_key_version(push.channel->pool);
915
916 dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
917 auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
918 gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
919
920 status = uvm_push_end_and_wait(&push);
921 if (status != NV_OK)
922 goto out;
923
924 src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
925 auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
926 status = uvm_conf_computing_cpu_decrypt(push.channel,
927 dst_plain,
928 src_cipher,
929 dma_buffer->decrypt_iv,
930 dma_buffer->key_version[0],
931 size,
932 auth_tag);
933
934 out:
935 uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
936 return status;
937 }
938